Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
0e554f5a
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
0e554f5a
编写于
7月 30, 2018
作者:
S
smilejames
提交者:
GitHub
7月 30, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' into develop
上级
f9cc310b
3d3614f0
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
117 addition
and
108 deletion
+117
-108
src/fpga/api/fpga_api.h
src/fpga/api/fpga_api.h
+59
-27
src/io/executor.cpp
src/io/executor.cpp
+2
-2
src/operators/op_param.h
src/operators/op_param.h
+56
-77
tools/op.cmake
tools/op.cmake
+0
-2
未找到文件。
src/fpga/api/fpga_api.h
浏览文件 @
0e554f5a
...
...
@@ -14,36 +14,35 @@ limitations under the License. */
#pragma once
#include <stdint.h>
#include <cstddef>
#include <iostream>
#include <limits>
// memory management;
namespace
paddle
{
namespace
mobile
{
namespace
paddle_mobile
{
namespace
fpga
{
namespace
api
{
int
open_device
();
int
close_device
();
void
*
fpga_malloc
(
size_t
size
);
void
fpga_free
(
void
*
ptr
);
void
fpga_copy
(
void
*
dst
,
const
void
*
src
,
size_t
num
);
void
*
fpga_malloc
(
size_t
size
);
void
fpga_free
(
void
*
ptr
);
void
fpga_copy
(
void
*
dst
,
const
void
*
src
,
size_t
num
);
struct
FpgaVersionArgs
{
void
*
buf
;
void
*
buf
;
};
struct
MemoryToPhysicalArgs
{
const
void
*
src
;
const
void
*
src
;
uint64_t
physical
;
};
struct
MemoryCopyArgs
{
void
*
src
;
void
*
dst
;
void
*
src
;
void
*
dst
;
size_t
size
;
};
...
...
@@ -51,38 +50,71 @@ struct FpgaQuantArgs {
float
scale
;
};
struct
FpgaBNArgs
{};
struct
FpgaBNArgs
{
bool
enabled
=
false
;
void
*
bias_addr
;
void
*
scale_addr
;
};
struct
FpgaKernelArgs
{
uint32_t
width
;
uint32_t
height
;
uint32_t
stride_h
;
uint32_t
stride_w
;
};
struct
FpgaImageArgs
{
uint32_t
width
;
uint32_t
height
;
uint32_t
channels
;
uint32_t
pad_h
;
uint32_t
pad_w
;
};
struct
FpgaConvArgs
{
bool
enable_BN
=
false
;
bool
enable_Relu
=
false
;
struct
FpgaBNParam
bn_parm
;
bool
relu_enabled
;
struct
FpgaBNArgs
BNargs
;
void
*
image_addr
;
void
*
filter_addr
;
void
*
bias_addr
;
void
*
output_addr
;
float
quant_scale
;
struct
FpgaImageArgs
image
;
uint32_t
filter_num
;
uint32_t
group_num
;
struct
FpgaKernelArgs
kernel
;
};
struct
FpgaPoolArgs
{
bool
enable_BN
=
false
;
struct
FpgaBNParam
bn_parm
;
void
*
image_addr
;
void
*
output_addr
;
struct
FpgaImageArgs
image
;
struct
FpgaKernelArgs
kernel
;
};
struct
FpgaEWAddArgs
{
// only support X + Y
bool
enable_Relu
=
false
;
struct
FpgaEWAddArgs
{
bool
relu_enabled
;
void
*
image0_addr
;
void
*
image1_addr
;
void
*
result_addr
;
uint32_t
const0
;
uint32_t
const1
;
uint32_t
data_len
;
// aligned element count
};
int
ComputeFpgaConv
(
struct
FpgaConvArgs
);
int
ComputeFpgaPool
(
struct
FpgaPoolArgs
);
int
ComputeFpgaEWAdd
(
struct
FpgaEWAddArgs
);
int
ComputeFpgaConv
(
struct
FpgaConvArgs
args
);
int
ComputeFpgaPool
(
struct
FpgaPoolArgs
args
);
int
ComputeFpgaEWAdd
(
struct
FpgaEWAddArgs
args
);
#define IOCTL_FPGA_MAGIC '
FPGA
'
#define IOCTL_FPGA_MAGIC '
CNN
'
#define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 1, struct FpgaVersionArgs)
#define IOCTL_GET_QUANT _IOW(IOCTL_FPGA_MAGIC, 2, struct FpgaQuantArgs)
#define IOCTL_SET_QUANT _IOW(IOCTL_FPGA_MAGIC, 3, struct FpgaArgs)
#define IOCTL_SET_QUANT _IOW(IOCTL_FPGA_MAGIC, 3, struct Fpga
Quant
Args)
#define IOCTL_MEM_COPY _IOW(IOCTL_FPGA_MAGIC, 11, struct MemoryCopyArgs)
#define IOCTL_MEM_TOPHY _IOW(IOCTL_FPGA_MAGIC, 12, struct MemoryToPhysicalArgs)
#define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct FpgaConvArgs)
#define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct FpgaPoolArgs)
#define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct FpgaEWAddArgs)
}
// namespace api
}
// namespace fpga
}
// namespace mobile
}
// namespace paddle
}
// namespace paddle_mobile
src/io/executor.cpp
浏览文件 @
0e554f5a
...
...
@@ -419,7 +419,7 @@ std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict(
}
template
class
Executor
<
CPU
,
Precision
::
FP32
>;
template
class
Executor
<
FPGA
,
Precision
::
FP32
>;
template
class
Executor
<
GPU_MALI
,
Precision
::
FP16
>;
template
class
Executor
<
GPU_MALI
,
Precision
::
FP32
>;
template
class
Executor
<
FPGA
,
Precision
::
FP16
>;
}
// namespace paddle_mobile
src/operators/op_param.h
浏览文件 @
0e554f5a
...
...
@@ -22,6 +22,9 @@ limitations under the License. */
#include "framework/scope.h"
#include "framework/tensor.h"
#include "framework/variable.h"
#ifdef PADDLE_MOBILE_FPGA
#include "fpga/api/fpga_api.h"
#endif
namespace
paddle_mobile
{
namespace
operators
{
...
...
@@ -256,6 +259,15 @@ class ElementwiseAddParam : OpParam {
Tensor
*
input_y_
;
Tensor
*
out_
;
int
axis_
;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
FpgaEWAddArgs
fpga_EW_add_args
;
public:
const
fpga
::
FpgaEWAddArgs
&
FpgaArgs
()
const
{
return
fpga_EW_add_args
;
}
void
SetFpgaArgs
(
const
fpga
::
FpgaEWAddArgs
&
args
)
{
fpga_EW_add_args
=
args
;
}
#endif
};
#ifdef FUSION_ELEMENTWISEADDRELU_OP
...
...
@@ -450,80 +462,15 @@ class PoolParam : public OpParam {
vector
<
int
>
paddings_
;
bool
ceil_mode_
;
bool
global_pooling_
=
false
;
};
#endif
#ifdef FUSION_POOLBN_OP
class
FusionPoolBNParam
:
OpParam
{
public:
FusionPoolBNParam
(
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
AttributeMap
&
attrs
,
const
Scope
&
scope
)
{
input_
=
InputXFrom
<
LoDTensor
>
(
inputs
,
scope
);
pooling_type_
=
GetAttr
<
string
>
(
"pooling_type"
,
attrs
);
ksize_
=
GetAttr
<
vector
<
int
>>
(
"ksize"
,
attrs
);
strides_
=
GetAttr
<
vector
<
int
>>
(
"strides"
,
attrs
);
paddings_
=
GetAttr
<
vector
<
int
>>
(
"paddings"
,
attrs
);
ceil_mode_
=
GetAttr
<
bool
>
(
"ceil_mode"
,
attrs
);
global_pooling_
=
GetAttr
<
bool
>
(
"global_pooling"
,
attrs
);
output_y_
=
OutputYFrom
<
LoDTensor
>
(
outputs
,
scope
);
input_bias_
=
InputBiasFrom
<
LoDTensor
>
(
inputs
,
scope
);
input_mean_
=
InputMeanFrom
<
LoDTensor
>
(
inputs
,
scope
);
input_scale_
=
InputScaleFrom
<
LoDTensor
>
(
inputs
,
scope
);
input_variance_
=
InputVarianceFrom
<
LoDTensor
>
(
inputs
,
scope
);
epsilon_
=
GetAttr
<
float
>
(
"epsilon"
,
attrs
);
momentum_
=
GetAttr
<
float
>
(
"momentum"
,
attrs
);
// is_test_ = GetAttr<bool>("is_test", attrs);
}
const
Tensor
*
Input
()
const
{
return
input_
;
}
const
string
&
PoolingType
()
const
{
return
pooling_type_
;
}
const
vector
<
int
>
&
Ksize
()
const
{
return
ksize_
;
}
const
vector
<
int
>
&
Strides
()
const
{
return
strides_
;
}
const
vector
<
int
>
&
Paddings
()
const
{
return
paddings_
;
}
bool
isCeilMode
()
const
{
return
ceil_mode_
;
}
bool
isGlobalPooling
()
const
{
return
global_pooling_
;
}
Tensor
*
OutputY
()
const
{
return
output_y_
;
}
const
Tensor
*
InputBias
()
const
{
return
input_bias_
;
}
const
Tensor
*
InputMean
()
const
{
return
input_mean_
;
}
const
Tensor
*
InputScale
()
const
{
return
input_scale_
;
}
const
Tensor
*
InputVariance
()
const
{
return
input_variance_
;
}
const
float
&
Epsilon
()
const
{
return
epsilon_
;
}
const
float
&
Momentum
()
const
{
return
momentum_
;
}
const
bool
&
IsTest
()
const
{
return
is_test_
;
}
const
string
&
DataFormat
()
const
{
return
data_format_
;
}
#ifdef PADDLE_MOBILE_FPGA
private:
Tensor
*
input_
;
string
pooling_type_
;
vector
<
int
>
ksize_
;
vector
<
int
>
strides_
;
vector
<
int
>
paddings_
;
bool
ceil_mode_
;
bool
global_pooling_
=
false
;
Tensor
*
output_y_
;
Tensor
*
input_bias_
;
Tensor
*
input_mean_
;
Tensor
*
input_scale_
;
Tensor
*
input_variance_
;
float
epsilon_
;
float
momentum_
;
bool
is_test_
;
string
data_format_
;
fpga
::
FpgaPoolArgs
fpga_pool_args
;
public:
const
fpga
::
FpgaPoolArgs
&
FpgaArgs
()
const
{
return
fpga_pool_args
;
}
void
SetFpgaArgs
(
const
fpga
::
FpgaPoolArgs
&
args
)
{
fpga_pool_args
=
args
;
}
#endif
};
#endif
...
...
@@ -704,7 +651,7 @@ class MultiClassNMSParam : public OpParam {
class
FeedParam
:
public
OpParam
{
public:
FeedParam
(
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
AttributeMap
&
attrs
,
Scope
&
scope
)
{
const
AttributeMap
&
attrs
,
Scope
const
&
scope
)
{
input_x_
=
InputXFrom
<
LoDTensor
>
(
inputs
,
scope
);
out_
=
OutFrom
<
LoDTensor
>
(
outputs
,
scope
);
auto
var
=
scope
.
Var
(
"batch_size"
);
...
...
@@ -983,6 +930,15 @@ class FusionFcParam : public OpParam {
int
x_num_col_dims_
;
int
y_num_col_dims_
;
int
axis_
;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
FpgaConvArgs
fpga_conv_args
;
public:
const
fpga
::
FpgaConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
FpgaConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
#endif
};
#ifdef FUSION_FCRELU_OP
...
...
@@ -1032,6 +988,15 @@ class FusionConvAddParam : public OpParam {
vector
<
int
>
paddings_
;
vector
<
int
>
dilations_
;
int
groups
;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
FpgaConvArgs
fpga_conv_args
;
public:
const
fpga
::
FpgaConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
FpgaConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
#endif
};
Print
&
operator
<<
(
Print
&
printer
,
const
FusionConvAddParam
&
conv_param
);
...
...
@@ -1128,6 +1093,15 @@ class FusionConvAddBNReluParam : public OpParam {
bool
is_test_
;
Tensor
*
new_bias_
;
Tensor
*
new_scale_
;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
FpgaConvArgs
fpga_conv_args
;
public:
const
fpga
::
FpgaConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
FpgaConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
#endif
};
#endif
...
...
@@ -1213,6 +1187,15 @@ class FusionConvAddBNParam : public OpParam {
bool
is_test_
;
Tensor
*
new_bias_
;
Tensor
*
new_scale_
;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
FpgaConvArgs
fpga_conv_args
;
public:
const
fpga
::
FpgaConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
FpgaConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
#endif
};
#endif
...
...
@@ -1426,9 +1409,5 @@ class DropoutParam : public OpParam {
};
#endif
#ifdef REGION_OP
class
RegionParam
:
public
OpParam
{};
#endif
}
// namespace operators
}
// namespace paddle_mobile
tools/op.cmake
浏览文件 @
0e554f5a
...
...
@@ -75,11 +75,9 @@ if ("FPGAnets" IN_LIST NET)
set
(
FUSION_CONVADDRELU_OP ON
)
set
(
FUSION_CONVADDBNRELU_OP ON
)
set
(
FUSION_CONVADDBN_OP ON
)
set
(
FUSION_POOLBN_OP ON
)
set
(
FUSION_ELEMENTWISEADDRELU_OP ON
)
set
(
FUSION_FC_OP ON
)
set
(
FUSION_FCRELU_OP ON
)
set
(
REGION_OP ON
)
set
(
POOL_OP ON
)
set
(
CONCAT_OP ON
)
set
(
SOFTMAX_OP ON
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录