Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
4ba33e24
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
332
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
4ba33e24
编写于
3月 14, 2019
作者:
xiebaiyuan
提交者:
GitHub
3月 14, 2019
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1500 from hjchen2/backup
Fix compile with opencl
上级
73ceb48a
ef17bf2f
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
72 addition
and
53 deletion
+72
-53
src/io/paddle_mobile.h
src/io/paddle_mobile.h
+1
-1
src/operators/activation_op.cpp
src/operators/activation_op.cpp
+5
-6
src/operators/elementwise_add_op.cpp
src/operators/elementwise_add_op.cpp
+0
-1
src/operators/kernel/arm/activation_kernel.cpp
src/operators/kernel/arm/activation_kernel.cpp
+15
-10
src/operators/kernel/arm/softmax_kernel.cpp
src/operators/kernel/arm/softmax_kernel.cpp
+1
-0
src/operators/kernel/cl/feed_kernel.cpp
src/operators/kernel/cl/feed_kernel.cpp
+2
-1
src/operators/kernel/cl/fetch_kernel.cpp
src/operators/kernel/cl/fetch_kernel.cpp
+2
-1
src/operators/math/gemm/gemm_kernel.h
src/operators/math/gemm/gemm_kernel.h
+9
-8
src/operators/op_param.h
src/operators/op_param.h
+37
-24
src/operators/softmax_op.cpp
src/operators/softmax_op.cpp
+0
-1
未找到文件。
src/io/paddle_mobile.h
浏览文件 @
4ba33e24
...
...
@@ -46,7 +46,7 @@ class PaddleMobile {
PADDLE_MOBILE_ENFORCE
(
!
is_gpu
,
"Please recompile with GPU_CL is on"
);
#endif
}
~
PaddleMobile
()
{
}
virtual
~
PaddleMobile
()
{
Clear
();
}
PMStatus
Load
(
const
std
::
string
&
dirname
,
const
bool
optimize
=
false
,
const
bool
quantification
=
false
,
const
int
batch_size
=
1
,
...
...
src/operators/activation_op.cpp
浏览文件 @
4ba33e24
...
...
@@ -17,12 +17,11 @@ limitations under the License. */
namespace
paddle_mobile
{
namespace
operators
{
#define DEFINE_ACTIVATION_INFERSHAPE(OpName) \
template <typename Dtype, typename T> \
void OpName##Op<Dtype, T>::InferShape() const { \
const auto &input_dims = this->param_.InputX()->dims(); \
this->param_.Out()->Resize(input_dims); \
this->param_.Out()->set_lod(this->param_.InputX()->lod()); \
#define DEFINE_ACTIVATION_INFERSHAPE(OpName) \
template <typename Dtype, typename T> \
void OpName##Op<Dtype, T>::InferShape() const { \
const auto &input_dims = this->param_.InputX()->dims(); \
this->param_.Out()->Resize(input_dims); \
}
#ifdef RELU_OP
...
...
src/operators/elementwise_add_op.cpp
浏览文件 @
4ba33e24
...
...
@@ -23,7 +23,6 @@ template <typename Dtype, typename T>
void
ElementwiseAddOp
<
Dtype
,
T
>::
InferShape
()
const
{
auto
x_dim
=
this
->
param_
.
InputX
()
->
dims
();
this
->
param_
.
Out
()
->
Resize
(
x_dim
);
this
->
param_
.
Out
()
->
set_lod
(
this
->
param_
.
InputX
()
->
lod
());
}
}
// namespace operators
...
...
src/operators/kernel/arm/activation_kernel.cpp
浏览文件 @
4ba33e24
...
...
@@ -71,9 +71,10 @@ bool ReluKernel<CPU, float>::Init(ReluParam<CPU> *param) {
template
<
>
void
ReluKernel
<
CPU
,
float
>::
Compute
(
const
ReluParam
<
CPU
>
&
param
)
{
const
Tensor
*
input
=
param
.
InputX
();
Tensor
*
output
=
param
.
Out
();
const
LoD
Tensor
*
input
=
param
.
InputX
();
LoD
Tensor
*
output
=
param
.
Out
();
ActivationCompute
<
float
,
RELU
>
()(
input
,
output
);
output
->
set_lod
(
input
->
lod
());
}
template
<
>
...
...
@@ -83,9 +84,10 @@ bool Relu6Kernel<CPU, float>::Init(ReluParam<CPU> *param) {
template
<
>
void
Relu6Kernel
<
CPU
,
float
>::
Compute
(
const
ReluParam
<
CPU
>
&
param
)
{
const
Tensor
*
input
=
param
.
InputX
();
Tensor
*
output
=
param
.
Out
();
const
LoD
Tensor
*
input
=
param
.
InputX
();
LoD
Tensor
*
output
=
param
.
Out
();
ActivationCompute
<
float
,
RELU6
>
()(
input
,
output
);
output
->
set_lod
(
input
->
lod
());
}
#endif
...
...
@@ -97,9 +99,10 @@ bool SigmoidKernel<CPU, float>::Init(SigmoidParam<CPU> *param) {
template
<
>
void
SigmoidKernel
<
CPU
,
float
>::
Compute
(
const
SigmoidParam
<
CPU
>
&
param
)
{
const
Tensor
*
input
=
param
.
InputX
();
Tensor
*
output
=
param
.
Out
();
const
LoD
Tensor
*
input
=
param
.
InputX
();
LoD
Tensor
*
output
=
param
.
Out
();
ActivationCompute
<
float
,
SIGMOID
>
()(
input
,
output
);
output
->
set_lod
(
input
->
lod
());
}
#endif
...
...
@@ -111,9 +114,10 @@ bool TanhKernel<CPU, float>::Init(TanhParam<CPU> *param) {
template
<
>
void
TanhKernel
<
CPU
,
float
>::
Compute
(
const
TanhParam
<
CPU
>
&
param
)
{
const
Tensor
*
input
=
param
.
InputX
();
Tensor
*
output
=
param
.
Out
();
const
LoD
Tensor
*
input
=
param
.
InputX
();
LoD
Tensor
*
output
=
param
.
Out
();
ActivationCompute
<
float
,
TANH
>
()(
input
,
output
);
output
->
set_lod
(
input
->
lod
());
}
#endif
...
...
@@ -125,9 +129,10 @@ bool LogKernel<CPU, float>::Init(ReluParam<CPU> *param) {
template
<
>
void
LogKernel
<
CPU
,
float
>::
Compute
(
const
ReluParam
<
CPU
>
&
param
)
{
const
Tensor
*
input
=
param
.
InputX
();
Tensor
*
output
=
param
.
Out
();
const
LoD
Tensor
*
input
=
param
.
InputX
();
LoD
Tensor
*
output
=
param
.
Out
();
ActivationCompute
<
float
,
LOG
>
()(
input
,
output
);
output
->
set_lod
(
input
->
lod
());
}
#endif
...
...
src/operators/kernel/arm/softmax_kernel.cpp
浏览文件 @
4ba33e24
...
...
@@ -28,6 +28,7 @@ bool SoftmaxKernel<CPU, float>::Init(SoftmaxParam<CPU> *param) {
template
<
>
void
SoftmaxKernel
<
CPU
,
float
>::
Compute
(
const
SoftmaxParam
<
CPU
>
&
param
)
{
SoftmaxCompute
<
float
>
(
param
);
param
.
Out
()
->
set_lod
(
param
.
InputX
()
->
lod
());
}
template
class
SoftmaxKernel
<
CPU
,
float
>;
...
...
src/operators/kernel/cl/feed_kernel.cpp
浏览文件 @
4ba33e24
...
...
@@ -27,13 +27,14 @@ bool FeedKernel<GPU_CL, float>::Init(FeedParam<GPU_CL> *param) {
template
<
>
void
FeedKernel
<
GPU_CL
,
float
>::
Compute
(
const
FeedParam
<
GPU_CL
>
&
param
)
{
const
int
col
=
param
.
Col
();
auto
kernel
=
this
->
cl_helper_
.
KernelAt
(
0
);
auto
default_work_size
=
this
->
cl_helper_
.
DefaultWorkSize
(
*
(
param
.
Out
()));
cl_int
status
;
param
.
Out
()
->
InitEmptyImage
(
cl_helper_
.
CLContext
(),
cl_helper_
.
CLCommandQueue
(),
param
.
Out
()
->
dims
());
auto
output
=
param
.
Out
();
const
Tensor
*
input
=
param
.
InputX
(
);
const
Tensor
*
input
=
&
param
.
InputX
()
->
at
(
col
);
// DLOG << *input;
const
float
*
input_data
=
input
->
data
<
float
>
();
int
numel
=
input
->
numel
();
...
...
src/operators/kernel/cl/fetch_kernel.cpp
浏览文件 @
4ba33e24
...
...
@@ -35,8 +35,9 @@ void FetchKernel<GPU_CL, float>::Compute(const FetchParam<GPU_CL> ¶m) {
auto
kernel
=
this
->
cl_helper_
.
KernelAt
(
0
);
auto
default_work_size
=
this
->
cl_helper_
.
DefaultWorkSize
(
*
param
.
InputX
());
const
int
col
=
param
.
Col
();
auto
input
=
param
.
InputX
()
->
GetCLImage
();
auto
*
out
=
param
.
Out
(
);
auto
*
out
=
&
param
.
Out
()
->
at
(
col
);
out
->
Resize
(
param
.
InputX
()
->
dims
());
out
->
mutable_data
<
float
>
();
const
auto
&
dim
=
param
.
InputX
()
->
dims
();
...
...
src/operators/math/gemm/gemm_kernel.h
浏览文件 @
4ba33e24
...
...
@@ -57,12 +57,12 @@ void sgemm_6x16(const float *lhs, const float *rhs, const int k, float *output,
"dup v28.4s, wzr
\n\t
"
"dup v29.4s, wzr
\n\t
"
"subs
%[kc1], %[kc1], #1
\n\t
"
"blt
2f
\n\t
"
"1:
\n\t
"
"subs
%[kc1], %[kc1], #1
\n\t
"
"blt
2f
\n\t
"
"1:
\n\t
"
"prfm
pldl1keep, [%[lhs], #24
]
\n\t
"
"prfm pldl1keep, [%[rhs], #64]
\n\t
"
"prfm
pldl1keep, [%[lhs], #32
]
\n\t
"
"prfm
pldl1keep, [%[rhs], #64]
\n\t
"
"ld1 {v0.4s, v1.4s}, [%[lhs]], %[step1]
\n\t
"
"ld1 {v2.4s, v3.4s, v4.4s, v5.4s}, [%[rhs]], #64
\n\t
"
...
...
@@ -109,9 +109,10 @@ void sgemm_6x16(const float *lhs, const float *rhs, const int k, float *output,
"st1 {v26.4s, v27.4s, v28.4s, v29.4s}, [%[c]], %[step]
\n\t
"
:
[
lhs
]
"+r"
(
lhs
),
[
rhs
]
"+r"
(
rhs
),
[
c
]
"+r"
(
output
),
[
kc1
]
"+r"
(
kc1
)
:
[
step
]
"r"
(
step
),
[
step1
]
"r"
(
step1
)
:
"memory"
,
"v0"
,
"v1"
,
"v2"
,
"v3"
,
"v4"
,
"v5"
,
"v6"
,
"v7"
,
"v8"
,
"v9"
,
"v10"
,
"v11"
,
"v12"
,
"v13"
,
"v14"
,
"v15"
,
"v16"
,
"v17"
,
"v18"
,
"v19"
,
"v20"
,
"v21"
,
"v22"
,
"v23"
,
"v24"
,
"v25"
,
"v26"
,
"v27"
,
"v28"
,
"v29"
);
:
"cc"
,
"memory"
,
"v0"
,
"v1"
,
"v2"
,
"v3"
,
"v4"
,
"v5"
,
"v6"
,
"v7"
,
"v8"
,
"v9"
,
"v10"
,
"v11"
,
"v12"
,
"v13"
,
"v14"
,
"v15"
,
"v16"
,
"v17"
,
"v18"
,
"v19"
,
"v20"
,
"v21"
,
"v22"
,
"v23"
,
"v24"
,
"v25"
,
"v26"
,
"v27"
,
"v28"
,
"v29"
);
}
#else
void
sgemm_6x8
(
const
float
*
lhs
,
const
float
*
rhs
,
const
int
k
,
float
*
output
,
...
...
src/operators/op_param.h
浏览文件 @
4ba33e24
...
...
@@ -1224,19 +1224,19 @@ class FeedParam : public OpParam {
FeedParam
(
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
AttributeMap
&
attrs
,
Scope
*
scope
)
:
OpParam
(
inputs
,
outputs
,
attrs
,
scope
)
{
input_x_
=
InputXFrom
<
framework
::
LoDTensorArray
>
(
inputs
,
*
scope
);
input_x_
=
InputXFrom
<
std
::
vector
<
LoDTensor
>
>
(
inputs
,
*
scope
);
out_
=
OutFrom
<
GType
>
(
outputs
,
*
scope
);
col_
=
GetAttr
<
int
>
(
"col"
,
attrs
);
auto
var
=
scope
->
FindVar
(
"batch_size"
);
batch_size
=
var
->
GetValue
<
int
>
();
}
const
framework
::
LoDTensorArray
*
InputX
()
const
{
return
input_x_
;
}
const
std
::
vector
<
LoDTensor
>
*
InputX
()
const
{
return
input_x_
;
}
GType
*
Out
()
const
{
return
out_
;
}
const
int
Col
()
const
{
return
col_
;
}
const
int
BatchSize
()
const
{
return
batch_size
;
}
private:
framework
::
LoDTensorArray
*
input_x_
;
std
::
vector
<
LoDTensor
>
*
input_x_
;
GType
*
out_
;
int
col_
;
int
batch_size
;
...
...
@@ -1251,18 +1251,18 @@ class FetchParam : public OpParam {
FetchParam
(
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
AttributeMap
&
attrs
,
Scope
*
scope
)
:
OpParam
(
inputs
,
outputs
,
attrs
,
scope
)
{
input_x_
=
InputXFrom
<
framework
::
LoDTensor
>
(
inputs
,
*
scope
);
out_
=
OutFrom
<
framework
::
LoDTensorArray
>
(
outputs
,
*
scope
);
input_x_
=
InputXFrom
<
GType
>
(
inputs
,
*
scope
);
out_
=
OutFrom
<
std
::
vector
<
LoDTensor
>
>
(
outputs
,
*
scope
);
col_
=
GetAttr
<
int
>
(
"col"
,
attrs
);
}
const
framework
::
LoDTensor
*
InputX
()
const
{
return
input_x_
;
}
framework
::
LoDTensorArray
*
Out
()
const
{
return
out_
;
}
const
GType
*
InputX
()
const
{
return
input_x_
;
}
std
::
vector
<
LoDTensor
>
*
Out
()
const
{
return
out_
;
}
const
int
Col
()
const
{
return
col_
;
}
private:
framework
::
LoDTensor
*
input_x_
;
framework
::
LoDTensorArray
*
out_
;
GType
*
input_x_
;
std
::
vector
<
LoDTensor
>
*
out_
;
int
col_
;
#ifdef PADDLE_MOBILE_FPGA
...
...
@@ -2371,6 +2371,15 @@ class ConvTransposeParam : public OpParam {
const
int
&
Groups
()
const
{
return
groups
;
}
enum
ExecMode
{
EXEC_INVALID
=
0
,
EXEC_GEMM_FLOAT
,
EXEC_DECONV3X3_FLOAT
,
EXEC_DECONV4X4_FLOAT
,
};
ExecMode
&
ExecMode
()
const
{
return
exec_mode_
;
}
private:
GType
*
input_
;
GType
*
output_
;
...
...
@@ -2379,6 +2388,7 @@ class ConvTransposeParam : public OpParam {
vector
<
int
>
paddings_
;
vector
<
int
>
dilations_
;
int
groups
;
mutable
enum
ExecMode
exec_mode_
;
#ifdef PADDLE_MOBILE_FPGA
...
...
@@ -3214,43 +3224,46 @@ class LogicalUnaryParam : public OpParam {
#ifdef WRITE_TO_ARRAY_OP
template
<
typename
Dtype
>
class
WriteToArrayParam
:
public
OpParam
{
typedef
typename
DtypeTensorTrait
<
Dtype
>::
gtype
GType
;
typedef
typename
DtypeTensorTrait
<
Dtype
>::
rtype
RType
;
public:
WriteToArrayParam
(
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
AttributeMap
&
attrs
,
Scope
*
scope
)
:
OpParam
(
inputs
,
outputs
,
attrs
,
scope
)
{
input_
=
OpParam
::
GetVarValue
<
framework
::
LoDTensor
>
(
"X"
,
inputs
,
*
scope
);
index_
=
OpParam
::
GetVarValue
<
framework
::
LoDTensor
>
(
"I"
,
inputs
,
*
scope
);
output_
=
OpParam
::
GetVarValue
<
framework
::
LoDTensorArray
>
(
"Out"
,
outputs
,
*
scope
);
input_
=
OpParam
::
GetVarValue
<
GType
>
(
"X"
,
inputs
,
*
scope
);
index_
=
OpParam
::
GetVarValue
<
GType
>
(
"I"
,
inputs
,
*
scope
);
output_
=
OpParam
::
GetVarValue
<
std
::
vector
<
GType
>>
(
"Out"
,
outputs
,
*
scope
);
}
public:
framework
::
LoDTensor
*
input_
;
framework
::
LoDTensor
*
index_
;
framework
::
LoDTensorArray
*
output_
;
GType
*
input_
;
GType
*
index_
;
std
::
vector
<
GType
>
*
output_
;
};
#endif
#ifdef READ_FROM_ARRAY_OP
template
<
typename
Dtype
>
class
ReadFromArrayParam
:
public
OpParam
{
typedef
typename
DtypeTensorTrait
<
Dtype
>::
gtype
GType
;
typedef
typename
DtypeTensorTrait
<
Dtype
>::
rtype
RType
;
public:
ReadFromArrayParam
(
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
AttributeMap
&
attrs
,
Scope
*
scope
)
:
OpParam
(
inputs
,
outputs
,
attrs
,
scope
)
{
input_
=
OpParam
::
GetVarValue
<
framework
::
LoDTensorArray
>
(
"X"
,
inputs
,
*
scope
);
index_
=
OpParam
::
GetVarValue
<
framework
::
LoDTensor
>
(
"I"
,
inputs
,
*
scope
);
output_
=
OpParam
::
GetVarValue
<
framework
::
LoDTensor
>
(
"Out"
,
outputs
,
*
scope
);
input_
=
OpParam
::
GetVarValue
<
std
::
vector
<
GType
>>
(
"X"
,
inputs
,
*
scope
);
index_
=
OpParam
::
GetVarValue
<
GType
>
(
"I"
,
inputs
,
*
scope
);
output_
=
OpParam
::
GetVarValue
<
GType
>
(
"Out"
,
outputs
,
*
scope
);
}
public:
framework
::
LoDTensorArray
*
input_
;
framework
::
LoDTensor
*
index_
;
framework
::
LoDTensor
*
output_
;
std
::
vector
<
GType
>
*
input_
;
GType
*
index_
;
GType
*
output_
;
};
#endif
...
...
src/operators/softmax_op.cpp
浏览文件 @
4ba33e24
...
...
@@ -21,7 +21,6 @@ namespace operators {
template
<
typename
DeviceType
,
typename
T
>
void
SoftmaxOp
<
DeviceType
,
T
>::
InferShape
()
const
{
this
->
param_
.
Out
()
->
Resize
(
this
->
param_
.
InputX
()
->
dims
());
this
->
param_
.
Out
()
->
set_lod
(
this
->
param_
.
InputX
()
->
lod
());
}
}
// namespace operators
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录