Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
7d02bc6d
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
7d02bc6d
编写于
11月 20, 2018
作者:
H
hjchen2
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/paddle-mobile
into dev-latest
上级
3e999f40
f06abb02
变更
36
隐藏空白更改
内联
并排
Showing
36 changed file
with
1280 addition
and
33 deletion
+1280
-33
src/common/types.cpp
src/common/types.cpp
+5
-1
src/common/types.h
src/common/types.h
+3
-0
src/framework/executor.cpp
src/framework/executor.cpp
+3
-0
src/framework/loader.cpp
src/framework/loader.cpp
+12
-6
src/io/jni/paddle_mobile_jni.cpp
src/io/jni/paddle_mobile_jni.cpp
+1
-1
src/io/paddle_mobile.cpp
src/io/paddle_mobile.cpp
+8
-4
src/operators/feed_op.cpp
src/operators/feed_op.cpp
+7
-1
src/operators/fusion_conv_add_op.cpp
src/operators/fusion_conv_add_op.cpp
+3
-1
src/operators/fusion_conv_add_relu_op.h
src/operators/fusion_conv_add_relu_op.h
+3
-2
src/operators/fusion_deconv_add_op.cpp
src/operators/fusion_deconv_add_op.cpp
+33
-0
src/operators/fusion_deconv_add_op.h
src/operators/fusion_deconv_add_op.h
+108
-0
src/operators/fusion_deconv_add_relu_op.cpp
src/operators/fusion_deconv_add_relu_op.cpp
+34
-0
src/operators/fusion_deconv_add_relu_op.h
src/operators/fusion_deconv_add_relu_op.h
+110
-0
src/operators/kernel/central-arm-func/conv_add_arm_func.h
src/operators/kernel/central-arm-func/conv_add_arm_func.h
+1
-0
src/operators/kernel/cl/cl_kernel/conv_bn_relu_kernel.cl
src/operators/kernel/cl/cl_kernel/conv_bn_relu_kernel.cl
+18
-0
src/operators/kernel/cl/cl_kernel/fetch_kernel.cl
src/operators/kernel/cl/cl_kernel/fetch_kernel.cl
+12
-3
src/operators/kernel/cl/cl_kernel/prior_box_kernel.cl
src/operators/kernel/cl/cl_kernel/prior_box_kernel.cl
+100
-0
src/operators/kernel/cl/conv_add_kernel.cpp
src/operators/kernel/cl/conv_add_kernel.cpp
+3
-3
src/operators/kernel/cl/conv_bn_relu_kernel.cpp
src/operators/kernel/cl/conv_bn_relu_kernel.cpp
+174
-1
src/operators/kernel/cl/dwconv_bn_relu_kernel.cpp
src/operators/kernel/cl/dwconv_bn_relu_kernel.cpp
+140
-1
src/operators/kernel/cl/feed_kernel.cpp
src/operators/kernel/cl/feed_kernel.cpp
+2
-0
src/operators/kernel/cl/fetch_kernel.cpp
src/operators/kernel/cl/fetch_kernel.cpp
+5
-3
src/operators/kernel/cl/prior_box_kernel.cpp
src/operators/kernel/cl/prior_box_kernel.cpp
+149
-2
src/operators/kernel/deconv_add_kernel.h
src/operators/kernel/deconv_add_kernel.h
+39
-0
src/operators/kernel/deconv_add_relu_kernel.h
src/operators/kernel/deconv_add_relu_kernel.h
+39
-0
src/operators/kernel/fpga/V2/conv_add_kernel.cpp
src/operators/kernel/fpga/V2/conv_add_kernel.cpp
+61
-0
src/operators/kernel/fpga/V2/deconv_add_kernel.cpp
src/operators/kernel/fpga/V2/deconv_add_kernel.cpp
+36
-0
src/operators/kernel/fpga/V2/deconv_add_relu_kernel.cpp
src/operators/kernel/fpga/V2/deconv_add_relu_kernel.cpp
+37
-0
src/operators/kernel/fpga/V2/split_kernel.cpp
src/operators/kernel/fpga/V2/split_kernel.cpp
+30
-0
src/operators/kernel/fpga/V2/transpose2_kernel.cpp
src/operators/kernel/fpga/V2/transpose2_kernel.cpp
+36
-0
src/operators/op_param.h
src/operators/op_param.h
+36
-1
src/operators/split_op.cpp
src/operators/split_op.cpp
+4
-1
src/operators/tanh_op.cpp
src/operators/tanh_op.cpp
+1
-1
src/operators/transpose2_op.cpp
src/operators/transpose2_op.cpp
+3
-0
test/CMakeLists.txt
test/CMakeLists.txt
+8
-0
tools/op.cmake
tools/op.cmake
+16
-1
未找到文件。
src/common/types.cpp
浏览文件 @
7d02bc6d
...
...
@@ -73,6 +73,8 @@ const char *G_OP_TYPE_QUANTIZE = "quantize";
const
char
*
G_OP_TYPE_DEQUANTIZE
=
"dequantize"
;
extern
const
char
*
G_OP_TYPE_TANH
=
"tanh"
;
extern
const
char
*
G_OP_TYPE_FUSION_DECONV_RELU
=
"fusion_deconv_relu"
;
extern
const
char
*
G_OP_TYPE_FUSION_DECONV_ADD
=
"fusion_deconv_add"
;
extern
const
char
*
G_OP_TYPE_FUSION_DECONV_ADD_RELU
=
"fusion_deconv_add_relu"
;
std
::
unordered_map
<
std
::
string
,
std
::
pair
<
std
::
vector
<
std
::
string
>
,
std
::
vector
<
std
::
string
>>>
...
...
@@ -133,5 +135,7 @@ std::unordered_map<
{
G_OP_TYPE_QUANTIZE
,
{{
"X"
},
{
"Out"
,
"OutScale"
}}},
{
G_OP_TYPE_DEQUANTIZE
,
{{
"X"
,
"Scale"
},
{
"Out"
}}},
{
G_OP_TYPE_TANH
,
{{
"X"
},
{
"Out"
}}},
{
G_OP_TYPE_FUSION_DECONV_RELU
,
{{
"Input"
},
{
"Out"
}}}};
{
G_OP_TYPE_FUSION_DECONV_RELU
,
{{
"Input"
},
{
"Out"
}}},
{
G_OP_TYPE_FUSION_DECONV_ADD
,
{{
"Input"
},
{
"Out"
}}},
{
G_OP_TYPE_FUSION_DECONV_ADD_RELU
,
{{
"Input"
},
{
"Out"
}}}};
}
// namespace paddle_mobile
src/common/types.h
浏览文件 @
7d02bc6d
...
...
@@ -142,6 +142,9 @@ extern const char *G_OP_TYPE_DEQUANTIZE;
extern
const
char
*
G_OP_TYPE_TANH
;
extern
const
char
*
G_OP_TYPE_FUSION_DECONV_RELU
;
extern
const
char
*
G_OP_TYPE_FUSION_DECONV_ADD
;
extern
const
char
*
G_OP_TYPE_FUSION_DECONV_ADD_RELU
;
extern
std
::
unordered_map
<
std
::
string
,
std
::
pair
<
std
::
vector
<
std
::
string
>
,
std
::
vector
<
std
::
string
>>>
op_input_output_key
;
...
...
src/framework/executor.cpp
浏览文件 @
7d02bc6d
...
...
@@ -280,6 +280,9 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
clock_gettime
(
CLOCK_MONOTONIC
,
&
ts
);
profile
[
i
].
runBegin
=
(
uint64_t
)
ts
.
tv_sec
*
1e9
+
ts
.
tv_nsec
;
#endif
if
(
loddable_
)
{
ops
[
i
]
->
InferShape
();
}
// to Run
ops
[
i
]
->
Run
();
#ifdef PADDLE_MOBILE_PROFILE
...
...
src/framework/loader.cpp
浏览文件 @
7d02bc6d
...
...
@@ -43,15 +43,21 @@ void Loader<Dtype, P>::InitMemoryFromProgram(
tensor
->
Resize
(
make_ddim
(
dim
));
}
else
{
auto
dim
=
var_desc
->
Tensor_desc
().
Dims
();
PADDLE_MOBILE_ENFORCE
(
dim
.
size
()
>
0
,
"dim size is 0"
);
//
PADDLE_MOBILE_ENFORCE(dim.size() > 0, "dim size is 0");
// dim[0] = 1;
for
(
auto
&
d
:
dim
)
{
if
(
d
<
0
)
{
d
*=
-
1
;
if
(
dim
.
size
()
==
0
)
{
auto
tensor
=
var
->
GetMutable
<
LoDTensor
>
();
framework
::
DDim
dDim
=
{
0
};
tensor
->
Resize
(
dDim
);
}
else
{
for
(
auto
&
d
:
dim
)
{
if
(
d
<
0
)
{
d
*=
-
1
;
}
}
auto
tensor
=
var
->
GetMutable
<
LoDTensor
>
();
tensor
->
Resize
(
make_ddim
(
dim
));
}
auto
tensor
=
var
->
GetMutable
<
LoDTensor
>
();
tensor
->
Resize
(
make_ddim
(
dim
));
}
}
else
{
// TODO(codeWorm): some.
...
...
src/io/jni/paddle_mobile_jni.cpp
浏览文件 @
7d02bc6d
...
...
@@ -405,9 +405,9 @@ Java_com_baidu_paddle_PML_predictLod(JNIEnv *env, jclass thiz, jlongArray buf) {
ANDROIDLOGE
(
"predict nlp size %d"
,
count
);
result
=
env
->
NewLongArray
(
count
);
env
->
SetLongArrayRegion
(
result
,
0
,
count
,
vec_result
->
data
<
int64_t
>
());
env
->
ReleaseLongArrayElements
(
buf
,
ddim_ptr
,
0
);
return
result
;
}
...
...
src/io/paddle_mobile.cpp
浏览文件 @
7d02bc6d
...
...
@@ -122,9 +122,12 @@ void PaddleMobile<Dtype, P>::Clear() {
executor_
=
nullptr
;
loader_
=
nullptr
;
}
template
<
typename
Dtype
,
Precision
P
>
double
PaddleMobile
<
Dtype
,
P
>::
GetPredictTime
()
{
double
PaddleMobile
<
Dtype
,
P
>::
GetPredictTime
()
{}
#ifdef PADDLE_MOBILE_CPU
template
<
>
double
PaddleMobile
<
CPU
,
Precision
::
FP32
>::
GetPredictTime
()
{
int
m
=
32
;
int
n
=
224
*
224
;
int
k
=
27
;
...
...
@@ -147,8 +150,8 @@ double PaddleMobile<Dtype, P>::GetPredictTime() {
}
paddle_mobile
::
operators
::
math
::
Gemm
gemm
;
auto
time1
=
paddle_mobile
::
time
();
//
gemm.Sgemm(m, n, k, static_cast<float>(1), a, lda, b, ldb,
//
static_cast<float>(0), c, ldc, false, nullptr);
gemm
.
Sgemm
(
m
,
n
,
k
,
static_cast
<
float
>
(
1
),
a
,
lda
,
b
,
ldb
,
static_cast
<
float
>
(
0
),
c
,
ldc
,
false
,
nullptr
);
auto
time2
=
paddle_mobile
::
time
();
double
cost
=
paddle_mobile
::
time_diff
(
time1
,
time2
);
paddle_mobile
::
memory
::
Free
(
a
);
...
...
@@ -156,6 +159,7 @@ double PaddleMobile<Dtype, P>::GetPredictTime() {
paddle_mobile
::
memory
::
Free
(
c
);
return
cost
;
}
#endif
template
<
typename
Dtype
,
Precision
P
>
PaddleMobile
<
Dtype
,
P
>::~
PaddleMobile
()
{
...
...
src/operators/feed_op.cpp
浏览文件 @
7d02bc6d
...
...
@@ -21,7 +21,13 @@ template <typename DeviceType, typename T>
void
FeedOp
<
DeviceType
,
T
>::
InferShape
()
const
{
auto
out_dims
=
this
->
param_
.
Out
()
->
dims
();
out_dims
[
0
]
=
this
->
param_
.
BatchSize
();
this
->
param_
.
Out
()
->
Resize
(
out_dims
);
auto
input_dims
=
this
->
param_
.
InputX
()
->
dims
();
DLOG
<<
input_dims
.
size
();
if
(
input_dims
.
size
()
==
4
)
{
this
->
param_
.
Out
()
->
Resize
(
input_dims
);
}
else
{
this
->
param_
.
Out
()
->
Resize
(
out_dims
);
}
}
}
// namespace operators
...
...
src/operators/fusion_conv_add_op.cpp
浏览文件 @
7d02bc6d
...
...
@@ -61,5 +61,7 @@ REGISTER_OPERATOR_MALI_GPU(fusion_conv_add, ops::FusionConvAddOp);
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL
(
fusion_conv_add
,
ops
::
FusionConvAddOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
fusion_conv_add
,
ops
::
FusionConvAddOp
);
#endif
#endif
src/operators/fusion_conv_add_relu_op.h
浏览文件 @
7d02bc6d
...
...
@@ -29,8 +29,9 @@ namespace operators {
class
FusionConvAddReluOpMatcher
:
public
framework
::
FusionOpMatcher
{
public:
FusionConvAddReluOpMatcher
()
{
// node_ = framework::Node(G_OP_TYPE_FUSION_CONV_ADD);
// node_ > std::make_shared<framework::Node>(G_OP_TYPE_RELU);
node_
=
framework
::
Node
(
G_OP_TYPE_CONV
);
node_
>
std
::
make_shared
<
framework
::
Node
>
(
G_OP_TYPE_ELEMENTWISE_ADD
)
>
std
::
make_shared
<
framework
::
Node
>
(
G_OP_TYPE_RELU
);
}
void
FolderNodes
(
...
...
src/operators/fusion_deconv_add_op.cpp
0 → 100644
浏览文件 @
7d02bc6d
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_DECONVADD_OP
#include "operators/fusion_deconv_add_op.h"
namespace
paddle_mobile
{
namespace
operators
{}
}
// namespace paddle_mobile
namespace
ops
=
paddle_mobile
::
operators
;
REGISTER_FUSION_MATCHER
(
fusion_deconv_add
,
ops
::
FusionDeconvAddMatcher
);
#ifdef PADDLE_MOBILE_CPU
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
fusion_deconv_add
,
ops
::
FusionDeconvAddOp
);
#endif
#endif
src/operators/fusion_deconv_add_op.h
0 → 100644
浏览文件 @
7d02bc6d
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_DECONVADD_OP
#pragma once
#include <string>
#include <vector>
#include "framework/operator.h"
#include "framework/program/program-optimize/fusion_op_register.h"
#include "operators/kernel/deconv_add_kernel.h"
namespace
paddle_mobile
{
namespace
operators
{
using
std
::
string
;
using
std
::
vector
;
class
FusionDeconvAddMatcher
:
public
framework
::
FusionOpMatcher
{
public:
FusionDeconvAddMatcher
()
{
node_
=
framework
::
Node
(
G_OP_TYPE_CONV_TRANSPOSE
);
node_
>
std
::
make_shared
<
framework
::
Node
>
(
G_OP_TYPE_ELEMENTWISE_ADD
);
}
void
FolderNodes
(
framework
::
Node
*
node
,
std
::
vector
<
std
::
shared_ptr
<
framework
::
Node
>>
*
removed_nodes
)
{
node
->
Folder
(
node_
.
Depth
(),
Type
(),
{{
G_OP_TYPE_ELEMENTWISE_ADD
,
{{
"Y"
,
"Y"
}}}},
removed_nodes
);
}
std
::
string
Type
()
{
return
G_OP_TYPE_FUSION_DECONV_ADD
;
}
};
template
<
typename
DeviceType
,
typename
T
>
class
FusionDeconvAddOp
:
public
framework
::
OperatorWithKernel
<
DeviceType
,
FusionDeconvAddParam
<
DeviceType
>
,
operators
::
DeconvAddKernel
<
DeviceType
,
T
>>
{
public:
FusionDeconvAddOp
(
const
string
&
type
,
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
,
std
::
shared_ptr
<
framework
::
Scope
>
scope
)
:
framework
::
OperatorWithKernel
<
DeviceType
,
FusionDeconvAddParam
<
DeviceType
>
,
operators
::
DeconvAddKernel
<
DeviceType
,
T
>>
(
type
,
inputs
,
outputs
,
attrs
,
scope
)
{}
void
InferShape
()
const
{
auto
input
=
this
->
param_
.
Input
();
auto
in_dims
=
input
->
dims
();
auto
filter
=
this
->
param_
.
Filter
();
auto
filter_dims
=
filter
->
dims
();
std
::
vector
<
int
>
strides
=
this
->
param_
.
Strides
();
std
::
vector
<
int
>
paddings
=
this
->
param_
.
Paddings
();
std
::
vector
<
int
>
dilations
=
this
->
param_
.
Dilations
();
int
groups
=
this
->
param_
.
Groups
();
PADDLE_MOBILE_ENFORCE
(
in_dims
.
size
()
==
4
||
in_dims
.
size
()
==
5
,
"ConvTransposeOp intput should be 4-D or 5-D tensor."
);
PADDLE_MOBILE_ENFORCE
(
in_dims
.
size
()
==
filter_dims
.
size
(),
"ConvTransposeOp input dimension and filter dimension "
"should be the same."
);
PADDLE_MOBILE_ENFORCE
(
in_dims
.
size
()
-
strides
.
size
()
==
2U
,
"ConvTransposeOp input dimension and strides dimension should "
"be consistent."
);
PADDLE_MOBILE_ENFORCE
(
paddings
.
size
()
==
strides
.
size
(),
"ConvTransposeOp paddings dimension and strides "
"dimension should be the same."
);
PADDLE_MOBILE_ENFORCE
(
paddings
.
size
()
==
dilations
.
size
(),
"ConvTransposeOp paddings dimension and dilations "
"dimension should be the same."
);
PADDLE_MOBILE_ENFORCE
(
in_dims
[
1
]
==
filter_dims
[
0
],
"In ConvTransposeOp, The number of input channels should "
"be equal to the number of filter's channels."
);
std
::
vector
<
int64_t
>
output_shape
({
in_dims
[
0
],
filter_dims
[
1
]
*
groups
});
for
(
size_t
i
=
0
;
i
<
strides
.
size
();
++
i
)
{
auto
filter_extent
=
dilations
[
i
]
*
(
filter_dims
[
i
+
2
]
-
1
)
+
1
;
output_shape
.
push_back
((
in_dims
[
i
+
2
]
-
1
)
*
strides
[
i
]
-
2
*
paddings
[
i
]
+
filter_extent
);
}
this
->
param_
.
Output
()
->
Resize
(
framework
::
make_ddim
(
output_shape
));
}
protected:
};
}
// namespace operators
}
// namespace paddle_mobile
#endif // FUSION_DECONV_ADD_OP
src/operators/fusion_deconv_add_relu_op.cpp
0 → 100644
浏览文件 @
7d02bc6d
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_DECONVADDRELU_OP
#include "operators/fusion_deconv_add_relu_op.h"
namespace
paddle_mobile
{
namespace
operators
{}
}
// namespace paddle_mobile
namespace
ops
=
paddle_mobile
::
operators
;
REGISTER_FUSION_MATCHER
(
fusion_deconv_add_relu
,
ops
::
FusionDeconvAddReluMatcher
);
#ifdef PADDLE_MOBILE_CPU
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
fusion_deconv_add_relu
,
ops
::
FusionDeconvAddReluOp
);
#endif
#endif
src/operators/fusion_deconv_add_relu_op.h
0 → 100644
浏览文件 @
7d02bc6d
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_DECONVADDRELU_OP
#pragma once
#include <string>
#include <vector>
#include "framework/operator.h"
#include "framework/program/program-optimize/fusion_op_register.h"
#include "operators/kernel/deconv_add_relu_kernel.h"
namespace
paddle_mobile
{
namespace
operators
{
using
std
::
string
;
using
std
::
vector
;
class
FusionDeconvAddReluMatcher
:
public
framework
::
FusionOpMatcher
{
public:
FusionDeconvAddReluMatcher
()
{
node_
=
framework
::
Node
(
G_OP_TYPE_CONV_TRANSPOSE
);
node_
>
std
::
make_shared
<
framework
::
Node
>
(
G_OP_TYPE_ELEMENTWISE_ADD
)
>
std
::
make_shared
<
framework
::
Node
>
(
G_OP_TYPE_RELU
);
}
void
FolderNodes
(
framework
::
Node
*
node
,
std
::
vector
<
std
::
shared_ptr
<
framework
::
Node
>>
*
removed_nodes
)
{
node
->
Folder
(
node_
.
Depth
(),
Type
(),
{{
G_OP_TYPE_ELEMENTWISE_ADD
,
{{
"Y"
,
"Y"
}}}},
removed_nodes
);
}
std
::
string
Type
()
{
return
G_OP_TYPE_FUSION_DECONV_ADD_RELU
;
}
};
template
<
typename
DeviceType
,
typename
T
>
class
FusionDeconvAddReluOp
:
public
framework
::
OperatorWithKernel
<
DeviceType
,
FusionDeconvAddReluParam
<
DeviceType
>
,
operators
::
DeconvAddReluKernel
<
DeviceType
,
T
>>
{
public:
FusionDeconvAddReluOp
(
const
string
&
type
,
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
,
std
::
shared_ptr
<
framework
::
Scope
>
scope
)
:
framework
::
OperatorWithKernel
<
DeviceType
,
FusionDeconvAddReluParam
<
DeviceType
>
,
operators
::
DeconvAddReluKernel
<
DeviceType
,
T
>>
(
type
,
inputs
,
outputs
,
attrs
,
scope
)
{}
void
InferShape
()
const
{
auto
input
=
this
->
param_
.
Input
();
auto
in_dims
=
input
->
dims
();
auto
filter
=
this
->
param_
.
Filter
();
auto
filter_dims
=
filter
->
dims
();
std
::
vector
<
int
>
strides
=
this
->
param_
.
Strides
();
std
::
vector
<
int
>
paddings
=
this
->
param_
.
Paddings
();
std
::
vector
<
int
>
dilations
=
this
->
param_
.
Dilations
();
int
groups
=
this
->
param_
.
Groups
();
PADDLE_MOBILE_ENFORCE
(
in_dims
.
size
()
==
4
||
in_dims
.
size
()
==
5
,
"ConvTransposeOp intput should be 4-D or 5-D tensor."
);
PADDLE_MOBILE_ENFORCE
(
in_dims
.
size
()
==
filter_dims
.
size
(),
"ConvTransposeOp input dimension and filter dimension "
"should be the same."
);
PADDLE_MOBILE_ENFORCE
(
in_dims
.
size
()
-
strides
.
size
()
==
2U
,
"ConvTransposeOp input dimension and strides dimension should "
"be consistent."
);
PADDLE_MOBILE_ENFORCE
(
paddings
.
size
()
==
strides
.
size
(),
"ConvTransposeOp paddings dimension and strides "
"dimension should be the same."
);
PADDLE_MOBILE_ENFORCE
(
paddings
.
size
()
==
dilations
.
size
(),
"ConvTransposeOp paddings dimension and dilations "
"dimension should be the same."
);
PADDLE_MOBILE_ENFORCE
(
in_dims
[
1
]
==
filter_dims
[
0
],
"In ConvTransposeOp, The number of input channels should "
"be equal to the number of filter's channels."
);
std
::
vector
<
int64_t
>
output_shape
({
in_dims
[
0
],
filter_dims
[
1
]
*
groups
});
for
(
size_t
i
=
0
;
i
<
strides
.
size
();
++
i
)
{
auto
filter_extent
=
dilations
[
i
]
*
(
filter_dims
[
i
+
2
]
-
1
)
+
1
;
output_shape
.
push_back
((
in_dims
[
i
+
2
]
-
1
)
*
strides
[
i
]
-
2
*
paddings
[
i
]
+
filter_extent
);
}
this
->
param_
.
Output
()
->
Resize
(
framework
::
make_ddim
(
output_shape
));
}
protected:
};
}
// namespace operators
}
// namespace paddle_mobile
#endif // FUSION_DECONV_ADD_RELU_OP
src/operators/kernel/central-arm-func/conv_add_arm_func.h
浏览文件 @
7d02bc6d
...
...
@@ -115,6 +115,7 @@ void ConvAddBasic(const FusionConvAddParam<CPU> ¶m) {
template
<
typename
P
>
void
ConvAddCompute
(
const
FusionConvAddParam
<
CPU
>
&
param
)
{
param
.
Output
()
->
mutable_data
<
float
>
();
if
(
param
.
Groups
()
==
param
.
Input
()
->
dims
()[
1
]
&&
param
.
Input
()
->
dims
()[
1
]
==
param
.
Output
()
->
dims
()[
1
]
&&
param
.
Filter
()
->
dims
()[
2
]
==
param
.
Filter
()
->
dims
()[
3
]
&&
...
...
src/operators/kernel/cl/cl_kernel/conv_bn_relu_kernel.cl
0 → 100644
浏览文件 @
7d02bc6d
/*
Copyright
(
c
)
2018
PaddlePaddle
Authors.
All
Rights
Reserved.
Licensed
under
the
Apache
License,
Version
2.0
(
the
"License"
)
;
you
may
not
use
this
file
except
in
compliance
with
the
License.
You
may
obtain
a
copy
of
the
License
at
http://www.apache.org/licenses/LICENSE-2.0
Unless
required
by
applicable
law
or
agreed
to
in
writing,
software
distributed
under
the
License
is
distributed
on
an
"AS IS"
BASIS,
WITHOUT
WARRANTIES
OR
CONDITIONS
OF
ANY
KIND,
either
express
or
implied.
See
the
License
for
the
specific
language
governing
permissions
and
limitations
under
the
License.
*/
#
define
BATCH_NORM
#
define
RELU
#
include
"conv_kernel.inc.cl"
src/operators/kernel/cl/cl_kernel/fetch_kernel.cl
浏览文件 @
7d02bc6d
...
...
@@ -20,7 +20,8 @@ __kernel void fetch(__private const int in_height,
__global
float*
out,
__private
const
int
size_ch,
__private
const
int
size_block,
__private
const
int
size_batch
)
{
__private
const
int
size_batch,
__private
const
int
C
)
{
const
int
in_c
=
get_global_id
(
0
)
;
const
int
in_w
=
get_global_id
(
1
)
;
const
int
in_nh
=
get_global_id
(
2
)
;
...
...
@@ -35,9 +36,17 @@ __kernel void fetch(__private const int in_height,
const
int
index
=
in_n
*
size_batch
+
in_c
*
size_block
+
in_h
*
in_width
+
in_w
;
out[index]
=
convert_float
(
in.x
)
;
out[index
+
size_ch]
=
convert_float
(
in.y
)
;
if
(
C
-
4
*
in_c>=2
)
{
out[index
+
size_ch]
=
convert_float
(
in.y
)
;
}
if
(
C
-
4
*
in_c>=3
)
{
out[index
+
size_ch
*
2]
=
convert_float
(
in.z
)
;
out[index
+
size_ch
*
3]
=
convert_float
(
in.w
)
;
}
if
(
C
-
4
*
in_c>=4
)
{
out[index
+
size_ch
*
3]
=
convert_float
(
in.w
)
;
}
}
__kernel
void
fetch_2d
(
__private
const
int
in_height,
...
...
src/operators/kernel/cl/cl_kernel/prior_box_kernel.cl
0 → 100644
浏览文件 @
7d02bc6d
/*
Copyright
(
c
)
2018
PaddlePaddle
Authors.
All
Rights
Reserved.
Licensed
under
the
Apache
License,
Version
2.0
(
the
"License"
)
;
you
may
not
use
this
file
except
in
compliance
with
the
License.
You
may
obtain
a
copy
of
the
License
at
http://www.apache.org/licenses/LICENSE-2.0
Unless
required
by
applicable
law
or
agreed
to
in
writing,
software
distributed
under
the
License
is
distributed
on
an
"AS IS"
BASIS,
WITHOUT
WARRANTIES
OR
CONDITIONS
OF
ANY
KIND,
either
express
or
implied.
See
the
License
for
the
specific
language
governing
permissions
and
limitations
under
the
License.
*/
#
pragma
OPENCL
EXTENSION
cl_khr_fp16
:
enable
__kernel
void
prior_box
(
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1,
__private
const
int
global_size_dim2,
__global
float
*box_width,
__global
float
*box_height,
__write_only
image2d_t
output_image,
__private
const
float
step_width,
__private
const
float
step_height,
__private
const
float
offset,
__private
const
int
img_width,
__private
const
int
img_height,
__private
const
int
num_priors,
__private
const
int
C
)
{
const
int
out_c
=
get_global_id
(
0
)
;
const
int
out_nh
=
get_global_id
(
1
)
;
const
int
out_n
=
out_nh/num_priors
;
const
int
out_h
=
out_nh%num_priors
;
if
(
out_c
>=
global_size_dim0
|
|out_nh >= global_size_dim2) {
return;
}
const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE |
CLK_ADDRESS_CLAMP
|
CLK_FILTER_NEAREST
;
int2
output_pos
;
output_pos.x
=
out_c
*
4
;
output_pos.y
=
out_nh
;
float
center_x0
=
(
offset
+
out_c
*
4
)
*
step_width
;
float
center_x1
=
(
offset
+
out_c
*
4
+
1
)
*
step_width
;
float
center_x2
=
(
offset
+
out_c
*
4
+
2
)
*
step_width
;
float
center_x3
=
(
offset
+
out_c
*
4
+
3
)
*
step_width
;
float
center_y
=
(
out_n
+
offset
)
*
step_height
;
half4
output[4]
;
output[0].x
=
convert_half
((
center_x0
-
box_width[out_h]
)
/
img_width
)
;
output[1].x
=
convert_half
((
center_y
-
box_height[out_h]
)
/
img_height
)
;
output[2].x
=
convert_half
((
center_x0
+
box_width[out_h]
)
/
img_width
)
;
output[3].x
=
convert_half
((
center_y
+
box_height[out_h]
)
/
img_height
)
;
if
(
C
-
4
*
out_c>=2
)
{
output[0].y
=
convert_half
((
center_x1
-
box_width[out_h]
)
/
img_width
)
;
output[1].y
=
convert_half
((
center_y
-
box_height[out_h]
)
/
img_height
)
;
output[2].y
=
convert_half
((
center_x1
+
box_width[out_h]
)
/
img_width
)
;
output[3].y
=
convert_half
((
center_y
+
box_height[out_h]
)
/
img_height
)
;
}else{
output[0].y
=
0.0f
;
output[1].y
=
0.0f
;
output[2].y
=
0.0f
;
output[3].y
=
0.0f
;
}
if
(
C
-
4
*
out_c>=3
)
{
output[0].z
=
convert_half
((
center_x2
-
box_width[out_h]
)
/
img_width
)
;
output[1].z
=
convert_half
((
center_y
-
box_height[out_h]
)
/
img_height
)
;
output[2].z
=
convert_half
((
center_x2
+
box_width[out_h]
)
/
img_width
)
;
output[3].z
=
convert_half
((
center_y
+
box_height[out_h]
)
/
img_height
)
;
}else{
output[0].z
=
0.0f
;
output[1].z
=
0.0f
;
output[2].z
=
0.0f
;
output[3].z
=
0.0f
;
}
if
(
C
-
4
*
out_c>=4
)
{
output[0].w
=
convert_half
((
center_x3
-
box_width[out_h]
)
/
img_width
)
;
output[1].w
=
convert_half
((
center_y
-
box_height[out_h]
)
/
img_height
)
;
output[2].w
=
convert_half
((
center_x3
+
box_width[out_h]
)
/
img_width
)
;
output[3].w
=
convert_half
((
center_y
+
box_height[out_h]
)
/
img_height
)
;
}else{
output[0].z
=
0.0f
;
output[1].z
=
0.0f
;
output[2].z
=
0.0f
;
output[3].z
=
0.0f
;
}
output[0]
=
min
(
max
((
half4
)(
0.0f,
0.0f,
0.0f,
0.0f
)
,
output[0]
)
,
(
half4
)(
1.0f,
1.0f,
1.0f,
1.0f
))
;
output[1]
=
min
(
max
((
half4
)(
0.0f,
0.0f,
0.0f,
0.0f
)
,
output[1]
)
,
(
half4
)(
1.0f,
1.0f,
1.0f,
1.0f
))
;
output[2]
=
min
(
max
((
half4
)(
0.0f,
0.0f,
0.0f,
0.0f
)
,
output[2]
)
,
(
half4
)(
1.0f,
1.0f,
1.0f,
1.0f
))
;
output[3]
=
min
(
max
((
half4
)(
0.0f,
0.0f,
0.0f,
0.0f
)
,
output[3]
)
,
(
half4
)(
1.0f,
1.0f,
1.0f,
1.0f
))
;
write_imageh
(
output_image,
(
int2
)(
output_pos.x
+
1
,
output_pos.y
)
,
output[0]
)
;
write_imageh
(
output_image,
(
int2
)(
output_pos.x
+
2
,
output_pos.y
)
,
output[1]
)
;
write_imageh
(
output_image,
(
int2
)(
output_pos.x
+
3
,
output_pos.y
)
,
output[2]
)
;
write_imageh
(
output_image,
(
int2
)(
output_pos.x
+
4
,
output_pos.y
)
,
output[3]
)
;
}
\ No newline at end of file
src/operators/kernel/cl/conv_add_kernel.cpp
浏览文件 @
7d02bc6d
...
...
@@ -68,10 +68,10 @@ void ConvAddKernel<GPU_CL, float>::Compute(
int
nh
=
default_work_size
[
2
];
auto
input
=
param
.
Input
()
->
GetCLImage
();
auto
filter
=
param
.
Filter
()
->
GetCLImage
();
DLOG
<<
"---yangfei30---"
;
DLOG
<<
*
param
.
Filter
();
DLOG
<<
param
.
Paddings
();
auto
biase
=
param
.
Bias
()
->
GetCLImage
();
param
.
Output
()
->
InitEmptyImage
(
cl_helper_
.
CLContext
(),
cl_helper_
.
CLCommandQueue
(),
param
.
Output
()
->
dims
());
auto
output
=
param
.
Output
()
->
GetCLImage
();
int
stride
=
param
.
Strides
()[
0
];
int
offset
=
param
.
Offset
();
...
...
src/operators/kernel/cl/conv_bn_relu_kernel.cpp
浏览文件 @
7d02bc6d
...
...
@@ -22,12 +22,185 @@ namespace operators {
template
<
>
bool
ConvBNReluKernel
<
GPU_CL
,
float
>::
Init
(
FusionConvBNReluParam
<
GPU_CL
>
*
param
)
{
PADDLE_MOBILE_ENFORCE
(
param
->
Filter
()
->
dims
()[
2
]
==
param
->
Filter
()
->
dims
()[
3
]
&&
param
->
Paddings
()[
0
]
==
param
->
Paddings
()[
1
],
"need equal"
);
const
framework
::
CLImage
*
mean
=
param
->
InputMean
();
const
framework
::
CLImage
*
variance
=
param
->
InputVariance
();
const
framework
::
CLImage
*
scale
=
param
->
InputScale
();
const
framework
::
CLImage
*
bias
=
param
->
InputBias
();
const
float
epsilon
=
param
->
Epsilon
();
const
int
C
=
mean
->
numel
();
auto
mean_ptr
=
mean
->
data
<
float
>
();
auto
variance_ptr
=
variance
->
data
<
float
>
();
auto
scale_ptr
=
scale
->
data
<
float
>
();
auto
bias_ptr
=
bias
->
data
<
float
>
();
float
inv_std_ptr
[
C
];
for
(
int
i
=
0
;
i
<
C
;
i
++
)
{
inv_std_ptr
[
i
]
=
1
/
static_cast
<
float
>
(
pow
((
variance_ptr
[
i
]
+
epsilon
),
0.5
));
}
float
*
new_scale_ptr
=
new
float
[
C
];
float
*
new_bias_ptr
=
new
float
[
C
];
for
(
int
i
=
0
;
i
<
C
;
i
++
)
{
new_scale_ptr
[
i
]
=
inv_std_ptr
[
i
]
*
scale_ptr
[
i
];
new_bias_ptr
[
i
]
=
bias_ptr
[
i
]
-
mean_ptr
[
i
]
*
inv_std_ptr
[
i
]
*
scale_ptr
[
i
];
}
framework
::
CLImage
*
new_scale
=
new
framework
::
CLImage
();
// for (int j = 0; j < C; ++j) {
// DLOG << " new scale - " << j << new_scale_ptr[j];
// }
//
// for (int j = 0; j < C; ++j) {
// DLOG << " new bias - " << j << new_bias_ptr[j];
// }
new_scale
->
SetTensorData
(
new_scale_ptr
,
variance
->
dims
());
new_scale
->
InitCLImage
(
this
->
cl_helper_
.
CLContext
(),
cl_helper_
.
CLCommandQueue
());
// DLOG << " climage - y bias: " << *(param->Bias());
//
// DLOG << " climage - new scale: " << *new_scale;
framework
::
CLImage
*
new_bias
=
new
framework
::
CLImage
();
new_bias
->
SetTensorData
(
new_bias_ptr
,
variance
->
dims
());
new_bias
->
InitCLImage
(
this
->
cl_helper_
.
CLContext
(),
cl_helper_
.
CLCommandQueue
());
// DLOG << " climage - new bias: " << *new_bias;
//
// DLOG << " climage - filter: " << *(param->Filter());
param
->
SetNewScale
(
new_scale
);
param
->
SetNewBias
(
new_bias
);
delete
[](
new_scale_ptr
);
delete
[](
new_bias_ptr
);
PADDLE_MOBILE_ENFORCE
(
param
->
Filter
()
->
dims
()[
2
]
==
param
->
Filter
()
->
dims
()[
3
]
&&
param
->
Paddings
()[
0
]
==
param
->
Paddings
()[
1
],
"need equal"
);
int
offset
=
static_cast
<
int
>
(
param
->
Filter
()
->
dims
()[
2
])
/
2
-
static_cast
<
int
>
(
param
->
Paddings
()[
1
]);
param
->
SetOffset
(
offset
);
if
(
param
->
Filter
()
->
dims
()[
2
]
==
1
&&
param
->
Filter
()
->
dims
()[
3
]
==
1
)
{
param
->
Filter
()
->
InitNImage
(
cl_helper_
.
CLContext
(),
cl_helper_
.
CLCommandQueue
());
this
->
cl_helper_
.
AddKernel
(
"conv_1x1"
,
"conv_bn_relu_kernel.cl"
);
DLOG
<<
" conv bn relu conv 1x1"
;
}
else
if
(
param
->
Filter
()
->
dims
()[
1
]
==
1
&&
param
->
Input
()
->
dims
()[
1
]
==
param
->
Output
()
->
dims
()[
1
]
&&
param
->
Filter
()
->
dims
()[
2
]
==
3
)
{
param
->
Filter
()
->
InitDWImage
(
cl_helper_
.
CLContext
(),
cl_helper_
.
CLCommandQueue
());
this
->
cl_helper_
.
AddKernel
(
"depth_conv_3x3"
,
"conv_bn_relu_kernel.cl"
);
DLOG
<<
" conv bn relu depth_conv_3x3"
;
}
else
if
(
param
->
Filter
()
->
dims
()[
2
]
==
3
&&
param
->
Filter
()
->
dims
()[
3
]
==
3
)
{
param
->
Filter
()
->
InitCLImage
(
cl_helper_
.
CLContext
(),
cl_helper_
.
CLCommandQueue
());
this
->
cl_helper_
.
AddKernel
(
"conv_3x3"
,
"conv_bn_relu_kernel.cl"
);
DLOG
<<
" conv bn relu conv_3x3"
;
}
else
{
PADDLE_MOBILE_THROW_EXCEPTION
(
" not support "
);
}
return
true
;
}
template
<
>
void
ConvBNReluKernel
<
GPU_CL
,
float
>::
Compute
(
const
FusionConvBNReluParam
<
GPU_CL
>
&
param
)
{}
const
FusionConvBNReluParam
<
GPU_CL
>
&
param
)
{
auto
kernel
=
this
->
cl_helper_
.
KernelAt
(
0
);
auto
default_work_size
=
this
->
cl_helper_
.
DefaultWorkSize
(
*
param
.
Output
());
int
c_block
=
default_work_size
[
0
];
int
w
=
default_work_size
[
1
];
int
nh
=
default_work_size
[
2
];
auto
input
=
param
.
Input
()
->
GetCLImage
();
auto
filter
=
param
.
Filter
()
->
GetCLImage
();
auto
new_scale
=
param
.
NewScale
()
->
GetCLImage
();
auto
new_bias
=
param
.
NewBias
()
->
GetCLImage
();
auto
output
=
param
.
Output
()
->
GetCLImage
();
int
stride
=
param
.
Strides
()[
0
];
int
offset
=
param
.
Offset
();
int
input_c
=
reinterpret_cast
<
framework
::
CLImageConverterFolder
*>
(
param
.
Input
()
->
Converter
())
->
GetCBlock
();
int
dilation
=
param
.
Dilations
()[
0
];
int
input_width
=
param
.
Input
()
->
dims
()[
3
];
int
input_height
=
param
.
Input
()
->
dims
()[
2
];
int
output_width
=
param
.
Output
()
->
dims
()[
3
];
int
output_height
=
param
.
Output
()
->
dims
()[
2
];
cl_int
status
;
status
=
clSetKernelArg
(
kernel
,
0
,
sizeof
(
int
),
&
c_block
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
1
,
sizeof
(
int
),
&
w
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
2
,
sizeof
(
int
),
&
nh
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_mem
),
&
input
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
4
,
sizeof
(
cl_mem
),
&
filter
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
5
,
sizeof
(
cl_mem
),
&
new_scale
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
6
,
sizeof
(
cl_mem
),
&
new_bias
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
7
,
sizeof
(
cl_mem
),
&
output
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
8
,
sizeof
(
int
),
&
stride
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
9
,
sizeof
(
int
),
&
offset
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
10
,
sizeof
(
int
),
&
input_c
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
11
,
sizeof
(
int
),
&
dilation
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
12
,
sizeof
(
int
),
&
input_width
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
13
,
sizeof
(
int
),
&
input_height
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
14
,
sizeof
(
int
),
&
output_width
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
15
,
sizeof
(
int
),
&
output_height
);
CL_CHECK_ERRORS
(
status
);
status
=
clEnqueueNDRangeKernel
(
this
->
cl_helper_
.
CLCommandQueue
(),
kernel
,
default_work_size
.
size
(),
NULL
,
default_work_size
.
data
(),
NULL
,
0
,
NULL
,
NULL
);
CL_CHECK_ERRORS
(
status
);
}
template
class
ConvBNReluKernel
<
GPU_CL
,
float
>;
}
// namespace operators
...
...
src/operators/kernel/cl/dwconv_bn_relu_kernel.cpp
浏览文件 @
7d02bc6d
...
...
@@ -22,12 +22,151 @@ namespace operators {
template
<
>
bool
DWConvBNReluKernel
<
GPU_CL
,
float
>::
Init
(
FusionDWConvBNReluParam
<
GPU_CL
>
*
param
)
{
PADDLE_MOBILE_ENFORCE
(
param
->
Filter
()
->
dims
()[
2
]
==
param
->
Filter
()
->
dims
()[
3
]
&&
param
->
Paddings
()[
0
]
==
param
->
Paddings
()[
1
],
"need equal"
);
const
framework
::
CLImage
*
mean
=
param
->
InputMean
();
const
framework
::
CLImage
*
variance
=
param
->
InputVariance
();
const
framework
::
CLImage
*
scale
=
param
->
InputScale
();
const
framework
::
CLImage
*
bias
=
param
->
InputBias
();
const
float
epsilon
=
param
->
Epsilon
();
const
int
C
=
mean
->
numel
();
auto
mean_ptr
=
mean
->
data
<
float
>
();
auto
variance_ptr
=
variance
->
data
<
float
>
();
auto
scale_ptr
=
scale
->
data
<
float
>
();
auto
bias_ptr
=
bias
->
data
<
float
>
();
float
inv_std_ptr
[
C
];
for
(
int
i
=
0
;
i
<
C
;
i
++
)
{
inv_std_ptr
[
i
]
=
1
/
static_cast
<
float
>
(
pow
((
variance_ptr
[
i
]
+
epsilon
),
0.5
));
}
float
*
new_scale_ptr
=
new
float
[
C
];
float
*
new_bias_ptr
=
new
float
[
C
];
for
(
int
i
=
0
;
i
<
C
;
i
++
)
{
new_scale_ptr
[
i
]
=
inv_std_ptr
[
i
]
*
scale_ptr
[
i
];
new_bias_ptr
[
i
]
=
bias_ptr
[
i
]
-
mean_ptr
[
i
]
*
inv_std_ptr
[
i
]
*
scale_ptr
[
i
];
}
framework
::
CLImage
*
new_scale
=
new
framework
::
CLImage
();
new_scale
->
SetTensorData
(
new_scale_ptr
,
variance
->
dims
());
new_scale
->
InitCLImage
(
this
->
cl_helper_
.
CLContext
(),
cl_helper_
.
CLCommandQueue
());
framework
::
CLImage
*
new_bias
=
new
framework
::
CLImage
();
new_bias
->
SetTensorData
(
new_bias_ptr
,
variance
->
dims
());
new_bias
->
InitCLImage
(
this
->
cl_helper_
.
CLContext
(),
cl_helper_
.
CLCommandQueue
());
param
->
SetNewScale
(
new_scale
);
param
->
SetNewBias
(
new_bias
);
delete
[](
new_scale_ptr
);
delete
[](
new_bias_ptr
);
PADDLE_MOBILE_ENFORCE
(
param
->
Filter
()
->
dims
()[
2
]
==
param
->
Filter
()
->
dims
()[
3
]
&&
param
->
Paddings
()[
0
]
==
param
->
Paddings
()[
1
],
"need equal"
);
int
offset
=
static_cast
<
int
>
(
param
->
Filter
()
->
dims
()[
2
])
/
2
-
static_cast
<
int
>
(
param
->
Paddings
()[
1
]);
param
->
SetOffset
(
offset
);
param
->
Filter
()
->
InitDWImage
(
cl_helper_
.
CLContext
(),
cl_helper_
.
CLCommandQueue
());
this
->
cl_helper_
.
AddKernel
(
"depth_conv_3x3"
,
"conv_bn_relu_kernel.cl"
);
DLOG
<<
" conv bn relu depth_conv_3x3"
;
return
true
;
}
template
<
>
void
DWConvBNReluKernel
<
GPU_CL
,
float
>::
Compute
(
const
FusionDWConvBNReluParam
<
GPU_CL
>
&
param
)
{}
const
FusionDWConvBNReluParam
<
GPU_CL
>
&
param
)
{
auto
kernel
=
this
->
cl_helper_
.
KernelAt
(
0
);
auto
default_work_size
=
this
->
cl_helper_
.
DefaultWorkSize
(
*
param
.
Output
());
int
c_block
=
default_work_size
[
0
];
int
w
=
default_work_size
[
1
];
int
nh
=
default_work_size
[
2
];
auto
input
=
param
.
Input
()
->
GetCLImage
();
auto
filter
=
param
.
Filter
()
->
GetCLImage
();
auto
new_scale
=
param
.
NewScale
()
->
GetCLImage
();
auto
new_bias
=
param
.
NewBias
()
->
GetCLImage
();
auto
output
=
param
.
Output
()
->
GetCLImage
();
int
stride
=
param
.
Strides
()[
0
];
int
offset
=
param
.
Offset
();
int
input_c
=
reinterpret_cast
<
framework
::
CLImageConverterFolder
*>
(
param
.
Input
()
->
Converter
())
->
GetCBlock
();
int
dilation
=
param
.
Dilations
()[
0
];
int
input_width
=
param
.
Input
()
->
dims
()[
3
];
int
input_height
=
param
.
Input
()
->
dims
()[
2
];
int
output_width
=
param
.
Output
()
->
dims
()[
3
];
int
output_height
=
param
.
Output
()
->
dims
()[
2
];
cl_int
status
;
status
=
clSetKernelArg
(
kernel
,
0
,
sizeof
(
int
),
&
c_block
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
1
,
sizeof
(
int
),
&
w
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
2
,
sizeof
(
int
),
&
nh
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_mem
),
&
input
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
4
,
sizeof
(
cl_mem
),
&
filter
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
5
,
sizeof
(
cl_mem
),
&
new_scale
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
6
,
sizeof
(
cl_mem
),
&
new_bias
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
7
,
sizeof
(
cl_mem
),
&
output
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
8
,
sizeof
(
int
),
&
stride
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
9
,
sizeof
(
int
),
&
offset
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
10
,
sizeof
(
int
),
&
input_c
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
11
,
sizeof
(
int
),
&
dilation
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
12
,
sizeof
(
int
),
&
input_width
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
13
,
sizeof
(
int
),
&
input_height
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
14
,
sizeof
(
int
),
&
output_width
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
15
,
sizeof
(
int
),
&
output_height
);
CL_CHECK_ERRORS
(
status
);
status
=
clEnqueueNDRangeKernel
(
this
->
cl_helper_
.
CLCommandQueue
(),
kernel
,
default_work_size
.
size
(),
NULL
,
default_work_size
.
data
(),
NULL
,
0
,
NULL
,
NULL
);
CL_CHECK_ERRORS
(
status
);
}
template
class
DWConvBNReluKernel
<
GPU_CL
,
float
>;
}
// namespace operators
...
...
src/operators/kernel/cl/feed_kernel.cpp
浏览文件 @
7d02bc6d
...
...
@@ -28,6 +28,8 @@ template <>
void
FeedKernel
<
GPU_CL
,
float
>::
Compute
(
const
FeedParam
<
GPU_CL
>
&
param
)
{
auto
kernel
=
this
->
cl_helper_
.
KernelAt
(
0
);
cl_int
status
;
param
.
Out
()
->
InitEmptyImage
(
cl_helper_
.
CLContext
(),
cl_helper_
.
CLCommandQueue
(),
param
.
Out
()
->
dims
());
auto
output
=
param
.
Out
();
const
Tensor
*
input
=
param
.
InputX
();
// DLOG << *input;
...
...
src/operators/kernel/cl/fetch_kernel.cpp
浏览文件 @
7d02bc6d
...
...
@@ -27,8 +27,6 @@ bool FetchKernel<GPU_CL, float>::Init(FetchParam<GPU_CL> *param) {
}
else
{
this
->
cl_helper_
.
AddKernel
(
"fetch"
,
"fetch_kernel.cl"
);
}
auto
*
out
=
param
->
Out
();
out
->
mutable_data
<
float
>
();
return
true
;
}
...
...
@@ -39,7 +37,7 @@ void FetchKernel<GPU_CL, float>::Compute(const FetchParam<GPU_CL> ¶m) {
auto
input
=
param
.
InputX
()
->
GetCLImage
();
auto
*
out
=
param
.
Out
();
out
->
mutable_data
<
float
>
();
const
auto
&
dim
=
param
.
InputX
()
->
dims
();
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
...
...
@@ -70,9 +68,11 @@ void FetchKernel<GPU_CL, float>::Compute(const FetchParam<GPU_CL> ¶m) {
int
size_ch
=
in_height
*
in_width
;
int
size_block
=
size_ch
*
4
;
int
size_batch
=
size_ch
*
C
;
int
out_c
=
new_dims
[
1
];
clSetKernelArg
(
kernel
,
4
,
sizeof
(
int
),
&
size_ch
);
clSetKernelArg
(
kernel
,
5
,
sizeof
(
int
),
&
size_block
);
clSetKernelArg
(
kernel
,
6
,
sizeof
(
int
),
&
size_batch
);
clSetKernelArg
(
kernel
,
7
,
sizeof
(
int
),
&
out_c
);
}
// cl_event wait_event = param.InpdutX()->GetClEvent();
...
...
@@ -93,6 +93,8 @@ void FetchKernel<GPU_CL, float>::Compute(const FetchParam<GPU_CL> ¶m) {
// << "ms" << std::endl;
memcpy
(
out
->
data
<
float
>
(),
out_cl_tensor
.
Data
<
float
>
(),
out
->
memory_size
());
DLOG
<<
*
param
.
InputX
();
DLOG
<<
*
out
;
}
template
class
FetchKernel
<
GPU_CL
,
float
>;
...
...
src/operators/kernel/cl/prior_box_kernel.cpp
浏览文件 @
7d02bc6d
...
...
@@ -15,18 +15,165 @@ limitations under the License. */
#ifdef PRIORBOX_OP
#include "operators/kernel/prior_box_kernel.h"
#include "framework/cl/cl_tensor.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
>
bool
PriorBoxKernel
<
GPU_CL
,
float
>::
Init
(
PriorBoxParam
<
GPU_CL
>
*
param
)
{
this
->
cl_helper_
.
AddKernel
(
"prior_box"
,
"prior_box_kernel.cl"
);
return
true
;
}
template
<
>
void
PriorBoxKernel
<
GPU_CL
,
float
>::
Compute
(
const
PriorBoxParam
<
GPU_CL
>
&
param
)
{}
const
PriorBoxParam
<
GPU_CL
>
&
param
)
{
const
auto
*
input_
=
param
.
Input
();
const
auto
&
input_dims
=
input_
->
dims
();
const
auto
&
input_image_dims
=
param
.
InputImage
()
->
dims
();
const
auto
&
min_sizes
=
param
.
MinSizes
();
const
auto
&
max_sizes
=
param
.
MaxSizes
();
const
auto
&
variances
=
param
.
Variances
();
const
auto
&
input_aspect_ratio
=
param
.
AspectRatios
();
const
bool
&
flip
=
param
.
Flip
();
const
bool
&
clip
=
param
.
Clip
();
const
float
&
step_w
=
param
.
StepW
();
const
float
&
step_h
=
param
.
StepH
();
const
float
&
offset
=
param
.
Offset
();
const
int
C
=
param
.
OutputBoxes
()
->
dims
()[
1
];
auto
output_boxes
=
param
.
OutputBoxes
()
->
GetCLImage
();
auto
output_variances
=
param
.
OutputVariances
()
->
GetCLImage
();
std
::
vector
<
float
>
aspect_ratios
;
ExpandAspectRatios
(
input_aspect_ratio
,
flip
,
&
aspect_ratios
);
auto
img_width
=
input_image_dims
[
3
];
auto
img_height
=
input_image_dims
[
2
];
auto
feature_width
=
input_dims
[
3
];
auto
feature_height
=
input_dims
[
2
];
float
step_width
,
step_height
;
/// 300 / 19
if
(
step_w
==
0
||
step_h
==
0
)
{
step_width
=
static_cast
<
float
>
(
img_width
)
/
feature_width
;
step_height
=
static_cast
<
float
>
(
img_height
)
/
feature_height
;
}
else
{
step_width
=
step_w
;
step_height
=
step_h
;
}
int
num_priors
=
aspect_ratios
.
size
()
*
min_sizes
.
size
();
if
(
!
max_sizes
.
empty
())
{
num_priors
+=
max_sizes
.
size
();
}
float
*
box_width
=
static_cast
<
float
*>
(
paddle_mobile
::
memory
::
Alloc
(
sizeof
(
float
)
*
num_priors
));
float
*
box_height
=
static_cast
<
float
*>
(
paddle_mobile
::
memory
::
Alloc
(
sizeof
(
float
)
*
num_priors
));
int
idx
=
0
;
for
(
size_t
s
=
0
;
s
<
min_sizes
.
size
();
++
s
)
{
auto
min_size
=
min_sizes
[
s
];
if
(
param
.
MinMaxAspectRatiosOrder
())
{
box_width
[
idx
]
=
box_height
[
idx
]
=
min_size
/
2.
;
idx
++
;
if
(
max_sizes
.
size
()
>
0
)
{
auto
max_size
=
max_sizes
[
s
];
box_width
[
idx
]
=
box_height
[
idx
]
=
sqrt
(
min_size
*
max_size
)
/
2.
;
idx
++
;
}
for
(
float
ar
:
aspect_ratios
)
{
if
(
fabs
(
ar
-
1.
)
<
1e-6
)
{
continue
;
}
box_width
[
idx
]
=
min_size
*
sqrt
(
ar
)
/
2.
;
box_height
[
idx
]
=
min_size
/
sqrt
(
ar
)
/
2.
;
idx
++
;
}
}
else
{
for
(
float
ar
:
aspect_ratios
)
{
box_width
[
idx
]
=
min_size
*
sqrt
(
ar
)
/
2.
;
box_height
[
idx
]
=
min_size
/
sqrt
(
ar
)
/
2.
;
idx
++
;
}
if
(
!
max_sizes
.
empty
())
{
auto
max_size
=
max_sizes
[
s
];
box_width
[
idx
]
=
box_height
[
idx
]
=
sqrt
(
min_size
*
max_size
)
/
2.
;
idx
++
;
}
}
}
cl_int
status
;
auto
kernel
=
this
->
cl_helper_
.
KernelAt
(
0
);
auto
default_work_size
=
this
->
cl_helper_
.
DefaultWorkSize
(
*
param
.
OutputBoxes
());
int
c_block
=
default_work_size
[
0
];
int
w
=
default_work_size
[
1
];
int
nh
=
default_work_size
[
2
];
std
::
vector
<
int64_t
>
box_shape
({
1
,
1
,
1
,
num_priors
});
framework
::
DDim
ddim
=
framework
::
make_ddim
(
box_shape
);
framework
::
CLTensor
box_width_cl_tensor
(
this
->
cl_helper_
.
CLContext
(),
this
->
cl_helper_
.
CLCommandQueue
());
box_width_cl_tensor
.
Resize
(
ddim
);
cl_mem
box_width_Buffer
=
box_width_cl_tensor
.
mutable_with_data
<
float
>
(
box_width
);
framework
::
CLTensor
box_height_cl_tensor
(
this
->
cl_helper_
.
CLContext
(),
this
->
cl_helper_
.
CLCommandQueue
());
box_height_cl_tensor
.
Resize
(
ddim
);
cl_mem
box_height_Buffer
=
box_height_cl_tensor
.
mutable_with_data
<
float
>
(
box_height
);
DLOG
<<
"c_block:"
<<
c_block
;
DLOG
<<
"w:"
<<
w
;
DLOG
<<
"nh:"
<<
nh
;
DLOG
<<
"step_width:"
<<
step_width
;
DLOG
<<
"step_height:"
<<
step_height
;
DLOG
<<
"offset:"
<<
offset
;
DLOG
<<
"img_width:"
<<
img_width
;
DLOG
<<
"img_height:"
<<
img_height
;
DLOG
<<
"num_priors:"
<<
num_priors
;
DLOG
<<
"C:"
<<
C
;
status
=
clSetKernelArg
(
kernel
,
0
,
sizeof
(
int
),
&
c_block
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
1
,
sizeof
(
int
),
&
w
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
2
,
sizeof
(
int
),
&
nh
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_mem
),
&
box_width_Buffer
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
4
,
sizeof
(
cl_mem
),
&
box_height_Buffer
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
5
,
sizeof
(
cl_mem
),
&
output_boxes
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
6
,
sizeof
(
float
),
&
step_width
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
7
,
sizeof
(
float
),
&
step_height
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
8
,
sizeof
(
float
),
&
offset
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
9
,
sizeof
(
int
),
&
img_width
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
10
,
sizeof
(
int
),
&
img_height
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
11
,
sizeof
(
int
),
&
num_priors
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
12
,
sizeof
(
int
),
&
C
);
CL_CHECK_ERRORS
(
status
);
size_t
global_work_size
[
2
]
=
{
c_block
,
nh
};
status
=
clEnqueueNDRangeKernel
(
this
->
cl_helper_
.
CLCommandQueue
(),
kernel
,
2
,
NULL
,
global_work_size
,
NULL
,
0
,
NULL
,
NULL
);
CL_CHECK_ERRORS
(
status
);
paddle_mobile
::
memory
::
Free
(
box_width
);
paddle_mobile
::
memory
::
Free
(
box_height
);
}
template
class
PriorBoxKernel
<
GPU_CL
,
float
>;
}
// namespace operators
...
...
src/operators/kernel/deconv_add_kernel.h
0 → 100644
浏览文件 @
7d02bc6d
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_DECONVADD_OP
#pragma once
#include "framework/operator.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
using
framework
::
OpKernelBase
;
template
<
typename
DeviceType
,
typename
T
>
class
DeconvAddKernel
:
public
OpKernelBase
<
DeviceType
,
FusionDeconvAddParam
<
DeviceType
>>
{
public:
void
Compute
(
const
FusionDeconvAddParam
<
DeviceType
>
&
param
);
bool
Init
(
FusionDeconvAddParam
<
DeviceType
>
*
param
);
};
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/deconv_add_relu_kernel.h
0 → 100644
浏览文件 @
7d02bc6d
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_DECONVADDRELU_OP
#pragma once
#include "framework/operator.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
using
framework
::
OpKernelBase
;
template
<
typename
DeviceType
,
typename
T
>
class
DeconvAddReluKernel
:
public
OpKernelBase
<
DeviceType
,
FusionDeconvAddReluParam
<
DeviceType
>>
{
public:
void
Compute
(
const
FusionDeconvAddReluParam
<
DeviceType
>
&
param
);
bool
Init
(
FusionDeconvAddReluParam
<
DeviceType
>
*
param
);
};
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/fpga/V2/conv_add_kernel.cpp
0 → 100644
浏览文件 @
7d02bc6d
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADD_OP
#include "operators/kernel/conv_add_kernel.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
>
bool
ConvAddKernel
<
FPGA
,
float
>::
Init
(
FusionConvAddParam
<
FPGA
>
*
param
)
{
bool
relu_enabled
=
false
;
auto
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
const
Tensor
*
bias
=
param
->
Bias
();
auto
bias_ptr
=
bias
->
data
<
float
>
();
auto
filter
=
const_cast
<
Tensor
*>
(
param
->
Filter
());
auto
out
=
param
->
Output
();
PADDLE_MOBILE_ENFORCE
(
out
->
dims
()[
1
]
==
bias
->
dims
()[
0
],
"Output channel should be equal to bias number"
);
int
channel
=
out
->
dims
()[
1
];
auto
bs_ptr
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
channel
*
sizeof
(
float
));
// NOLINT
for
(
int
i
=
0
;
i
<
channel
;
i
++
)
{
bs_ptr
[
i
+
channel
]
=
1
;
bs_ptr
[
i
]
=
bias_ptr
[
i
];
}
fpga
::
format_conv_data
(
filter
,
out
,
bs_ptr
,
param
->
Groups
());
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
relu_enabled
,
param
->
Groups
(),
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
conv_arg
);
return
true
;
}
template
<
>
void
ConvAddKernel
<
FPGA
,
float
>::
Compute
(
const
FusionConvAddParam
<
FPGA
>
&
param
)
{
fpga
::
ComputeFpgaConv
(
param
.
FpgaArgs
());
}
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/fpga/V2/deconv_add_kernel.cpp
0 → 100644
浏览文件 @
7d02bc6d
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_DECONVADD_OP
#include "operators/kernel/deconv_add_kernel.h"
#include "framework/operator.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
>
bool
DeconvAddKernel
<
FPGA
,
float
>::
Init
(
FusionDeconvAddParam
<
FPGA
>
*
param
)
{
return
true
;
}
template
<
>
void
DeconvAddKernel
<
FPGA
,
float
>::
Compute
(
const
FusionDeconvAddParam
<
FPGA
>
&
param
)
{}
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/fpga/V2/deconv_add_relu_kernel.cpp
0 → 100644
浏览文件 @
7d02bc6d
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_DECONVADDRELU_OP
#include "operators/kernel/deconv_add_relu_kernel.h"
#include "framework/operator.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
>
bool
DeconvAddReluKernel
<
FPGA
,
float
>::
Init
(
FusionDeconvAddReluParam
<
FPGA
>
*
param
)
{
return
true
;
}
template
<
>
void
DeconvAddReluKernel
<
FPGA
,
float
>::
Compute
(
const
FusionDeconvAddReluParam
<
FPGA
>
&
param
)
{}
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/fpga/V2/split_kernel.cpp
0 → 100644
浏览文件 @
7d02bc6d
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef SPLIT_OP
#include "operators/kernel/split_kernel.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
>
bool
SplitKernel
<
FPGA
,
float
>::
Init
(
SplitParam
<
FPGA
>*
param
)
{
return
true
;
}
template
<
>
void
SplitKernel
<
FPGA
,
float
>::
Compute
(
const
SplitParam
<
FPGA
>&
param
)
{}
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/fpga/V2/transpose2_kernel.cpp
0 → 100644
浏览文件 @
7d02bc6d
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef TRANSPOSE2_OP
#include "operators/kernel/transpose2_kernel.h"
#include "operators/kernel/central-arm-func/transpose2_arm_func.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
>
bool
Transpose2Kernel
<
FPGA
,
float
>::
Init
(
Transpose2Param
<
FPGA
>
*
param
)
{
return
true
;
}
template
<
>
void
Transpose2Kernel
<
FPGA
,
float
>::
Compute
(
const
Transpose2Param
<
FPGA
>
&
param
)
{
// Transpose2Compute<float>(param);
}
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/op_param.h
浏览文件 @
7d02bc6d
...
...
@@ -2234,7 +2234,10 @@ class ConvTransposeParam : public OpParam {
const
Scope
&
scope
)
{
filter_
=
FilterFrom
<
GType
>
(
inputs
,
scope
);
input_
=
InputFrom
<
GType
>
(
inputs
,
scope
);
output_
=
OutputFrom
<
GType
>
(
outputs
,
scope
);
// output_ = OutputFrom<GType>(outputs, scope);
if
(
outputs
.
count
(
"Output"
))
{
output_
=
OpParam
::
OutputFrom
<
GType
>
(
outputs
,
scope
);
}
strides_
=
GetAttr
<
vector
<
int
>>
(
"strides"
,
attrs
);
paddings_
=
GetAttr
<
vector
<
int
>>
(
"paddings"
,
attrs
);
dilations_
=
GetAttr
<
vector
<
int
>>
(
"dilations"
,
attrs
);
...
...
@@ -2275,6 +2278,38 @@ class ConvTransposeParam : public OpParam {
#endif
};
#endif
#ifdef FUSION_DECONVADD_OP
template
<
typename
Dtype
>
class
FusionDeconvAddParam
:
public
ConvTransposeParam
<
Dtype
>
{
typedef
typename
DtypeTensorTrait
<
Dtype
>::
gtype
GType
;
typedef
typename
DtypeTensorTrait
<
Dtype
>::
rtype
RType
;
public:
FusionDeconvAddParam
(
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
AttributeMap
&
attrs
,
const
Scope
&
scope
)
:
ConvTransposeParam
<
Dtype
>
(
inputs
,
outputs
,
attrs
,
scope
)
{
bias_
=
OpParam
::
InputYFrom
<
GType
>
(
inputs
,
scope
);
axis_
=
OpParam
::
GetAttr
<
int
>
(
"axis"
,
attrs
);
output_
=
OpParam
::
OutFrom
<
GType
>
(
outputs
,
scope
);
}
RType
*
Bias
()
const
{
return
bias_
;
}
const
int
&
Axis
()
const
{
return
axis_
;
}
RType
*
Output
()
const
{
return
output_
;
}
protected:
RType
*
bias_
;
int
axis_
;
RType
*
output_
;
};
#endif
#ifdef FUSION_DECONVADDRELU_OP
template
<
typename
Dtype
>
using
FusionDeconvAddReluParam
=
FusionDeconvAddParam
<
Dtype
>
;
#endif
#ifdef FUSION_DECONVRELU_OP
template
<
typename
Dtype
>
...
...
src/operators/split_op.cpp
浏览文件 @
7d02bc6d
...
...
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#ifdef SPLIT_OP
#include "operators/split_op.h"
#include <vector>
namespace
paddle_mobile
{
namespace
operators
{
...
...
@@ -83,5 +83,8 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
split
,
ops
::
SplitOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
split
,
ops
::
SplitOp
);
#endif
#endif // SPLIT_OP
src/operators/tanh_op.cpp
浏览文件 @
7d02bc6d
...
...
@@ -29,7 +29,7 @@ void TanhOp<DeviceType, T>::InferShape() const {
namespace
ops
=
paddle_mobile
::
operators
;
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
T
anh
,
ops
::
TanhOp
);
REGISTER_OPERATOR_FPGA
(
t
anh
,
ops
::
TanhOp
);
#endif
#endif
src/operators/transpose2_op.cpp
浏览文件 @
7d02bc6d
...
...
@@ -60,5 +60,8 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
transpose2
,
ops
::
Transpose2Op
);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
transpose2
,
ops
::
Transpose2Op
);
#endif
#endif // TRANSPOSE_OP
test/CMakeLists.txt
浏览文件 @
7d02bc6d
...
...
@@ -66,6 +66,10 @@ list(FIND NET "FPGA_NET_V1" CON)
if
(
CON GREATER -1
)
ADD_EXECUTABLE
(
test-resnet50 fpga/test_resnet50.cpp test_helper.h test_include.h executor_for_test.h
)
target_link_libraries
(
test-resnet50 paddle-mobile
)
ADD_EXECUTABLE
(
test-densebox net/test_densebox_combine.cpp test_helper.h test_include.h executor_for_test.h
)
target_link_libraries
(
test-densebox paddle-mobile
)
set
(
FOUND_MATCH ON
)
endif
()
...
...
@@ -76,6 +80,10 @@ if (CON GREATER -1)
ADD_EXECUTABLE
(
test-pe fpga/test_pe.cpp
)
target_link_libraries
(
test-pe paddle-mobile
)
ADD_EXECUTABLE
(
test-densebox net/test_densebox_combine.cpp test_helper.h test_include.h executor_for_test.h
)
target_link_libraries
(
test-densebox paddle-mobile
)
set
(
FOUND_MATCH ON
)
endif
()
...
...
tools/op.cmake
浏览文件 @
7d02bc6d
...
...
@@ -127,6 +127,8 @@ endif()
list
(
FIND NET
"FPGA_NET_V2"
CON
)
if
(
CON GREATER -1
)
message
(
"FPGA_NET_V2 enabled"
)
set
(
FEED_OP ON
)
set
(
FUSION_CONVADDRELU_OP ON
)
set
(
FUSION_ELEMENTWISEADDRELU_OP ON
)
set
(
FUSION_FC_OP ON
)
set
(
POOL_OP ON
)
...
...
@@ -135,9 +137,16 @@ if (CON GREATER -1)
set
(
FUSION_CONVBN_OP ON
)
set
(
CONV_TRANSPOSE_OP ON
)
set
(
FUSION_DECONVRELU_OP ON
)
set
(
SLICE_OP ON
)
#
set(SLICE_OP ON)
set
(
TANH_OP ON
)
set
(
ELEMENTWISEADD_OP ON
)
set
(
TRANSPOSE2_OP ON
)
set
(
FUSION_CONVADD_OP ON
)
set
(
SPLIT_OP ON
)
set
(
FUSION_DECONVADD_OP ON
)
set
(
FUSION_DECONVADDRELU_OP ON
)
set
(
FOUND_MATCH ON
)
endif
()
...
...
@@ -452,4 +461,10 @@ if (TANH_OP)
endif
()
if
(
FUSION_DECONVRELU_OP
)
add_definitions
(
-DFUSION_DECONVRELU_OP
)
endif
()
if
(
FUSION_DECONVADD_OP
)
add_definitions
(
-DFUSION_DECONVADD_OP
)
endif
()
if
(
FUSION_DECONVADDRELU_OP
)
add_definitions
(
-DFUSION_DECONVADDRELU_OP
)
endif
()
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录