Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
14114fcf
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
14114fcf
编写于
6月 13, 2018
作者:
E
eclipsess
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'upstream/develop' into develop
上级
1f680e4c
663d0fd4
变更
39
展开全部
隐藏空白更改
内联
并排
Showing
39 changed file
with
877 addition
and
478 deletion
+877
-478
CMakeLists.txt
CMakeLists.txt
+31
-6
src/common/types.h
src/common/types.h
+1
-0
src/framework/op_registry.h
src/framework/op_registry.h
+31
-16
src/framework/operator.cpp
src/framework/operator.cpp
+5
-0
src/framework/operator.h
src/framework/operator.h
+1
-0
src/framework/tensor.h
src/framework/tensor.h
+0
-1
src/io/io.cpp
src/io/io.cpp
+28
-0
src/operators/batchnorm_op.cpp
src/operators/batchnorm_op.cpp
+8
-2
src/operators/box_coder_op.cpp
src/operators/box_coder_op.cpp
+8
-2
src/operators/concat_op.cpp
src/operators/concat_op.cpp
+8
-2
src/operators/conv_op.cpp
src/operators/conv_op.cpp
+12
-2
src/operators/depthwise_conv_op.cpp
src/operators/depthwise_conv_op.cpp
+8
-2
src/operators/elementwise_add_op.cpp
src/operators/elementwise_add_op.cpp
+8
-2
src/operators/feed_op.h
src/operators/feed_op.h
+8
-2
src/operators/fetch_op.h
src/operators/fetch_op.h
+8
-2
src/operators/fusion_conv_add.cpp
src/operators/fusion_conv_add.cpp
+33
-4
src/operators/fusion_conv_add.h
src/operators/fusion_conv_add.h
+27
-7
src/operators/fusion_conv_add_relu_op.h
src/operators/fusion_conv_add_relu_op.h
+8
-2
src/operators/fusion_fc_op.cpp
src/operators/fusion_fc_op.cpp
+8
-2
src/operators/fusion_fc_op.h
src/operators/fusion_fc_op.h
+8
-3
src/operators/kernel/arm/conv_add_kernel.cpp
src/operators/kernel/arm/conv_add_kernel.cpp
+138
-0
src/operators/kernel/arm/relu_kernel.cpp
src/operators/kernel/arm/relu_kernel.cpp
+64
-6
src/operators/kernel/conv_add_kernel.h
src/operators/kernel/conv_add_kernel.h
+57
-0
src/operators/kernel/fpga/conv_kernel.cpp
src/operators/kernel/fpga/conv_kernel.cpp
+7
-6
src/operators/lrn_op.cpp
src/operators/lrn_op.cpp
+8
-2
src/operators/mul_op.cpp
src/operators/mul_op.cpp
+8
-2
src/operators/multiclass_nms_op.cpp
src/operators/multiclass_nms_op.cpp
+8
-2
src/operators/op_param.cpp
src/operators/op_param.cpp
+26
-0
src/operators/op_param.h
src/operators/op_param.h
+51
-0
src/operators/pool_op.cpp
src/operators/pool_op.cpp
+8
-2
src/operators/prior_box_op.cpp
src/operators/prior_box_op.cpp
+8
-2
src/operators/relu_op.cpp
src/operators/relu_op.cpp
+8
-2
src/operators/reshape_op.cpp
src/operators/reshape_op.cpp
+8
-2
src/operators/sigmoid_op.cpp
src/operators/sigmoid_op.cpp
+8
-2
src/operators/softmax_op.cpp
src/operators/softmax_op.cpp
+8
-2
src/operators/transpose_op.cpp
src/operators/transpose_op.cpp
+8
-2
test/net/test_googlenet.cpp
test/net/test_googlenet.cpp
+3
-3
tools/build.sh
tools/build.sh
+3
-5
tools/ios-cmake/ios.toolchain.cmake
tools/ios-cmake/ios.toolchain.cmake
+199
-381
未找到文件。
CMakeLists.txt
浏览文件 @
14114fcf
cmake_minimum_required
(
VERSION 3.0
)
project
(
paddle-mobile
)
option
(
DEBUGING
"enable debug mode"
O
FF
)
option
(
DEBUGING
"enable debug mode"
O
N
)
option
(
USE_OPENMP
"openmp support"
OFF
)
option
(
USE_EXCEPTION
"use std exception"
OFF
)
option
(
USE_EXCEPTION
"use std exception"
ON
)
option
(
LOG_PROFILE
"log profile"
ON
)
# select the platform to build
option
(
CPU
"cpu"
ON
)
option
(
MALI_GPU
"mali gpu"
OFF
)
option
(
FPGA
"fpga"
OFF
)
if
(
CPU
)
add_definitions
(
-DPADDLE_MOBILE_CPU
)
elseif
(
MALI_GPU
)
add_definitions
(
-DPADDLE_MOBILE_MALI_GPU
)
elseif
(
FPGA
)
add_definitions
(
-DPADDLE_MOBILE_FPGA
)
endif
()
set
(
CMAKE_CXX_FLAGS
"-std=c++14 -O3 -s
${
CMAKE_CXX_FLAGS
}
"
)
if
(
DEBUGING
)
set
(
CMAKE_BUILD_TYPE Debug
)
set
(
CMAKE_CXX_FLAGS_DEBUG
"
${
CMAKE_CXX_FLAGS
}
"
)
else
()
set
(
CMAKE_BUILD_TYPE Release
)
endif
()
...
...
@@ -24,12 +39,17 @@ else()
endif
()
if
(
USE_EXCEPTION
)
message
(
STATUS
"use exception"
)
add_definitions
(
-DENABLE_EXCEPTION
)
add_definitions
(
-fexceptions
)
else
()
add_definitions
(
-fno-exceptions
)
endif
()
if
(
LOG_PROFILE
)
add_definitions
(
-DPADDLE_MOBILE_PROFILE
)
endif
()
if
(
IS_MAC
)
add_definitions
(
-DX86
)
elseif
(
IS_IOS
)
...
...
@@ -42,7 +62,6 @@ else ()
add_definitions
(
-DX86
)
endif
()
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-std=c++14"
)
set
(
CMAKE_VERBOSE_MAKEFILE ON
)
set
(
CMAKE_EXPORT_COMPILE_COMMANDS ON
)
set
(
CMAKE_ARCHIVE_OUTPUT_DIRECTORY build
)
...
...
@@ -74,6 +93,7 @@ if (googlenet)
add_definitions
(
-DFUSION_FC_OP
)
add_definitions
(
-DPOOL_OP
)
add_definitions
(
-DRELU_OP
)
add_definitions
(
-DFUSION_CONVADD_OP
)
elseif
(
mobilenet
)
add_definitions
(
-DCONV_OP
)
add_definitions
(
-DELEMENTWISEADD_OP
)
...
...
@@ -112,7 +132,7 @@ else ()
add_definitions
(
-DCONV_OP
)
add_definitions
(
-DDEPTHWISECONV_OP
)
add_definitions
(
-DELEMENTWISEADD_OP
)
add_definitions
(
-DFUSIONCONVADD_OP
)
add_definitions
(
-DFUSION
_
CONVADD_OP
)
add_definitions
(
-DCONVADDRELU_OP
)
add_definitions
(
-DFUSION_FC_OP
)
add_definitions
(
-DLRN_OP
)
...
...
@@ -127,8 +147,13 @@ else ()
add_definitions
(
-DTRANSPOSE_OP
)
endif
()
add_library
(
paddle-mobile SHARED
${
PADDLE_MOBILE_CC
}
${
PADDLE_MOBILE_H
}
)
if
(
IS_IOS
)
add_library
(
paddle-mobile STATIC
${
PADDLE_MOBILE_CC
}
${
PADDLE_MOBILE_H
}
)
elseif
(
ANDROID
)
add_library
(
paddle-mobile SHARED
${
PADDLE_MOBILE_CC
}
${
PADDLE_MOBILE_H
}
)
else
()
add_library
(
paddle-mobile SHARED
${
PADDLE_MOBILE_CC
}
${
PADDLE_MOBILE_H
}
)
endif
()
if
(
DEBUGING
)
add_subdirectory
(
test
)
...
...
src/common/types.h
浏览文件 @
14114fcf
...
...
@@ -99,6 +99,7 @@ static std::unordered_map<
std
::
string
,
std
::
pair
<
std
::
vector
<
std
::
string
>
,
std
::
vector
<
std
::
string
>>>
op_input_output_key
=
{
{
G_OP_TYPE_CONV
,
{{
"Input"
},
{
"Output"
}}},
{
G_OP_TYPE_CONV_ADD
,
{{
"Input"
},
{
"Out"
}}},
{
G_OP_TYPE_RELU
,
{{
"X"
},
{
"Out"
}}},
{
G_OP_TYPE_SOFTMAX
,
{{
"X"
},
{
"Out"
}}},
{
G_OP_TYPE_MUL
,
{{
"X"
},
{
"Out"
}}},
...
...
src/framework/op_registry.h
浏览文件 @
14114fcf
...
...
@@ -96,24 +96,39 @@ class OpRegistry {
}
};
#define REGISTER_OPERATOR(op_type, op_class
)
\
template <typename Dtype, typename T>
\
class _OpClass_##op_type##_
: public op_class<Dtype, T> {
\
public:
\
DEFINE_OP_CONSTRUCTOR(_OpClass_##op_type##_
, op_class);
\
};
\
static paddle_mobile::framework::OperatorRegistrar<
\
paddle_mobile::CPU, _OpClass_##op_type##_<paddle_mobile::CPU
, float>> \
__op_registrar_##op_type##_
_(#op_type);
\
int TouchOpRegistrar_##op_type
() {
\
__op_registrar_##op_type##_
_.Touch();
\
return 0;
\
#define REGISTER_OPERATOR(op_type, op_class
, device_name, device_type)
\
template <typename Dtype, typename T> \
class _OpClass_##op_type##_
##device_name : public op_class<Dtype, T> {
\
public: \
DEFINE_OP_CONSTRUCTOR(_OpClass_##op_type##_
##device_name, op_class);
\
}; \
static paddle_mobile::framework::OperatorRegistrar< \
device_type, _OpClass_##op_type##_##device_name<device_type
, float>> \
__op_registrar_##op_type##_
##device_name(#op_type);
\
int TouchOpRegistrar_##op_type
##_##device_name() {
\
__op_registrar_##op_type##_
##device_name.Touch();
\
return 0; \
}
#define USE_OP(op_type) \
extern int TouchOpRegistrar_##op_type(); \
static int use_op_itself_##op_type##_ __attribute__((unused)) = \
TouchOpRegistrar_##op_type()
#define REGISTER_OPERATOR_CPU(op_type, op_class) \
REGISTER_OPERATOR(op_type, op_class, cpu, paddle_mobile::CPU);
#define REGISTER_OPERATOR_MALI_GPU(op_type, op_class) \
REGISTER_OPERATOR(op_type, op_class, mali_gpu, paddle_mobile::GPU_MALI);
#define REGISTER_OPERATOR_FPGA(op_type, op_class) \
REGISTER_OPERATOR(op_type, op_class, fpga, paddle_mobile::FPGA);
#define USE_OP(op_type, device_name) \
extern int TouchOpRegistrar_##op_type##_##device_name(); \
static int use_op_itself_##op_type##_##device_name __attribute__((unused)) = \
TouchOpRegistrar_##op_type##_##device_name()
#define USE_OP_CPU(op_type) USE_OP(op_type, cpu);
#define USE_OP_MALI_GPU(op_type) USE_OP(op_type, mali_gpu);
#define USE_OP_FPGA(op_type) USE_OP(op_type, fpga);
}
// namespace framework
}
// namespace paddle_mobile
src/framework/operator.cpp
浏览文件 @
14114fcf
...
...
@@ -58,7 +58,12 @@ void OperatorBase<Dtype>::Run() const {
}
template
class
OperatorBase
<
CPU
>;
template
class
OperatorBase
<
FPGA
>;
template
class
OperatorBase
<
GPU_MALI
>;
template
class
OperatorWithKernel
<
CPU
>;
template
class
OperatorWithKernel
<
FPGA
>;
template
class
OperatorWithKernel
<
GPU_MALI
>;
}
// namespace framework
}
// namespace paddle_mobile
src/framework/operator.h
浏览文件 @
14114fcf
...
...
@@ -153,6 +153,7 @@ class FusionOpMatcher {
std
::
string
BeginType
()
{
return
node_
.
Type
();
}
// virtual bool Fusion();
protected:
Node
node_
;
std
::
string
type_
;
...
...
src/framework/tensor.h
浏览文件 @
14114fcf
...
...
@@ -131,7 +131,6 @@ class Tensor {
}
PADDLE_MOBILE_ENFORCE
(
numel
()
>=
0
,
"the Tensor'snumel must >=0."
)
int64_t
size
=
numel
()
*
SizeOfType
(
type
);
/* some versions of boost::variant don't have operator!= */
if
(
holder_
==
nullptr
||
holder_
->
size
()
<
size
+
offset_
)
{
holder_
.
reset
(
new
PlaceholderImpl
(
size
,
type
));
offset_
=
0
;
...
...
src/io/io.cpp
浏览文件 @
14114fcf
...
...
@@ -14,6 +14,10 @@ limitations under the License. */
#include "io.h"
#include <vector>
#ifdef PADDLE_MOBILE_PROFILE
#include <ctime>
#include <map>
#endif
#include "common/enforce.h"
#include "common/log.h"
...
...
@@ -336,10 +340,34 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
feed_tensor
->
ShareDataWith
(
t
);
std
::
shared_ptr
<
framework
::
BlockDesc
>
to_predict_block
=
to_predict_program_
->
Block
(
0
);
#ifdef PADDLE_MOBILE_PROFILE
std
::
map
<
std
::
string
,
clock_t
>
_profile
;
#endif
for
(
int
j
=
0
;
j
<
ops_of_block_
[
*
to_predict_block
.
get
()].
size
();
++
j
)
{
auto
op
=
ops_of_block_
[
*
to_predict_block
.
get
()][
j
];
#ifdef PADDLE_MOBILE_PROFILE
_profile
[
op
->
Type
()]
=
clock
();
#endif
op
->
Run
();
#ifdef PADDLE_MOBILE_PROFILE
_profile
[
op
->
Type
()]
=
clock
()
-
_profile
[
op
->
Type
()];
#endif
}
#ifdef PADDLE_MOBILE_PROFILE
{
DLOG
<<
"========================[ profile ]=========================="
;
clock_t
_ptotal
=
0
;
for
(
auto
const
&
p
:
_profile
)
{
_ptotal
+=
p
.
second
;
}
for
(
auto
const
&
p
:
_profile
)
{
DLOG
<<
p
.
first
<<
std
::
string
(
16
-
p
.
first
.
size
(),
' '
)
<<
"
\t
"
<<
(
float
)
p
.
second
<<
"
\t\t
"
<<
(
float
)
p
.
second
/
(
float
)
_ptotal
*
100.0
;
}
DLOG
<<
"========================[ ]=========================="
;
}
#endif
auto
ops
=
ops_of_block_
[
*
to_predict_program_
->
Block
(
0
)];
auto
last_op
=
ops
.
rbegin
();
auto
output_map
=
(
*
last_op
)
->
Outputs
();
...
...
src/operators/batchnorm_op.cpp
浏览文件 @
14114fcf
...
...
@@ -31,7 +31,13 @@ template class BatchNormOp<CPU, float>;
}
// namespace paddle_mobile
namespace
ops
=
paddle_mobile
::
operators
;
USE_OP
(
batch_norm
);
REGISTER_OPERATOR
(
batch_norm
,
ops
::
BatchNormOp
);
#ifdef PADDLE_MOBILE_CPU
USE_OP_CPU
(
batch_norm
);
REGISTER_OPERATOR_CPU
(
batch_norm
,
ops
::
BatchNormOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
src/operators/box_coder_op.cpp
浏览文件 @
14114fcf
...
...
@@ -52,7 +52,13 @@ template class BoxCoderOp<CPU, float>;
}
// namespace paddle_mobile
namespace
ops
=
paddle_mobile
::
operators
;
USE_OP
(
box_coder
);
REGISTER_OPERATOR
(
box_coder
,
ops
::
BoxCoderOp
);
#ifdef PADDLE_MOBILE_CPU
USE_OP_CPU
(
box_coder
);
REGISTER_OPERATOR_CPU
(
box_coder
,
ops
::
BoxCoderOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
src/operators/concat_op.cpp
浏览文件 @
14114fcf
...
...
@@ -62,7 +62,13 @@ template class ConcatOp<CPU, float>;
}
// namespace paddle_mobile
namespace
ops
=
paddle_mobile
::
operators
;
USE_OP
(
concat
);
REGISTER_OPERATOR
(
concat
,
ops
::
ConcatOp
);
#ifdef PADDLE_MOBILE_CPU
USE_OP_CPU
(
concat
);
REGISTER_OPERATOR_CPU
(
concat
,
ops
::
ConcatOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
src/operators/conv_op.cpp
浏览文件 @
14114fcf
...
...
@@ -53,7 +53,17 @@ template class ConvOp<CPU, float>;
}
// namespace paddle_mobile
namespace
ops
=
paddle_mobile
::
operators
;
USE_OP
(
conv2d
);
REGISTER_OPERATOR
(
conv2d
,
ops
::
ConvOp
);
#ifdef PADDLE_MOBILE_CPU
USE_OP_CPU
(
conv2d
);
REGISTER_OPERATOR_CPU
(
conv2d
,
ops
::
ConvOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
USE_OP_MALI_GPU
(
conv2d
);
REGISTER_OPERATOR_MALI_GPU
(
conv2d
,
ops
::
ConvOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
USE_OP_FPGA
(
conv2d
);
REGISTER_OPERATOR_FPGA
(
conv2d
,
ops
::
ConvOp
);
#endif
#endif
src/operators/depthwise_conv_op.cpp
浏览文件 @
14114fcf
...
...
@@ -54,7 +54,13 @@ template class DepthwiseConvOp<CPU, float>;
}
// namespace paddle_mobile
namespace
ops
=
paddle_mobile
::
operators
;
USE_OP
(
depthwise_conv2d
);
REGISTER_OPERATOR
(
depthwise_conv2d
,
ops
::
DepthwiseConvOp
);
#ifdef PADDLE_MOBILE_CPU
USE_OP_CPU
(
depthwise_conv2d
);
REGISTER_OPERATOR_CPU
(
depthwise_conv2d
,
ops
::
DepthwiseConvOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
src/operators/elementwise_add_op.cpp
浏览文件 @
14114fcf
...
...
@@ -29,7 +29,13 @@ template class ElementwiseAddOp<CPU, float>;
}
// namespace paddle_mobile
namespace
ops
=
paddle_mobile
::
operators
;
USE_OP
(
elementwise_add
);
REGISTER_OPERATOR
(
elementwise_add
,
ops
::
ElementwiseAddOp
);
#ifdef PADDLE_MOBILE_CPU
USE_OP_CPU
(
elementwise_add
);
REGISTER_OPERATOR_CPU
(
elementwise_add
,
ops
::
ElementwiseAddOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
src/operators/feed_op.h
浏览文件 @
14114fcf
...
...
@@ -43,8 +43,14 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
};
namespace
ops
=
paddle_mobile
::
operators
;
USE_OP
(
feed
);
REGISTER_OPERATOR
(
feed
,
ops
::
FeedOp
);
#ifdef PADDLE_MOBILE_CPU
USE_OP_CPU
(
feed
);
REGISTER_OPERATOR_CPU
(
feed
,
ops
::
FeedOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
}
// namespace operators
}
// namespace paddle_mobile
src/operators/fetch_op.h
浏览文件 @
14114fcf
...
...
@@ -43,8 +43,14 @@ class FetchOp : public framework::OperatorBase<DeviceType> {
};
namespace
ops
=
paddle_mobile
::
operators
;
USE_OP
(
fetch
);
REGISTER_OPERATOR
(
fetch
,
ops
::
FetchOp
);
#ifdef PADDLE_MOBILE_CPU
USE_OP_CPU
(
fetch
);
REGISTER_OPERATOR_CPU
(
fetch
,
ops
::
FetchOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
}
// namespace operators
}
// namespace paddle_mobile
src/operators/fusion_conv_add.cpp
浏览文件 @
14114fcf
...
...
@@ -12,20 +12,49 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSIONCONVADD_OP
#ifdef FUSION
_
CONVADD_OP
#include "operators/fusion_conv_add.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
typename
Dtype
,
typename
T
>
void
FushionConvAddOp
<
Dtype
,
T
>::
InferShape
()
const
{}
void
FushionConvAddOp
<
Dtype
,
T
>::
InferShape
()
const
{
auto
in_dims
=
param_
.
Input
()
->
dims
();
auto
filter_dims
=
param_
.
Filter
()
->
dims
();
const
std
::
vector
<
int
>
&
strides
=
param_
.
Strides
();
std
::
vector
<
int
>
paddings
=
param_
.
Paddings
();
int
groups
=
param_
.
Groups
();
std
::
vector
<
int
>
dilations
=
param_
.
Dilations
();
PADDLE_MOBILE_ENFORCE
((
in_dims
.
size
()
==
filter_dims
.
size
()
&&
dilations
.
size
()
==
paddings
.
size
()
&&
paddings
.
size
()
==
strides
.
size
()),
"ConvParam is not suitable"
);
std
::
vector
<
int64_t
>
output_shape
({
in_dims
[
0
],
filter_dims
[
0
]});
for
(
size_t
i
=
0
;
i
<
strides
.
size
();
++
i
)
{
output_shape
.
push_back
(
ConvOutputSize
(
in_dims
[
i
+
2
],
filter_dims
[
i
+
2
],
dilations
[
i
],
paddings
[
i
],
strides
[
i
]));
}
framework
::
DDim
ddim
=
framework
::
make_ddim
(
output_shape
);
param_
.
Output
()
->
Resize
(
ddim
);
}
template
class
FushionConvAddOp
<
CPU
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
namespace
ops
=
paddle_mobile
::
operators
;
USE_OP
(
conv_add
);
REGISTER_OPERATOR
(
conv_add
,
ops
::
FushionConvAddOp
);
#ifdef PADDLE_MOBILE_CPU
USE_OP_CPU
(
conv_add
);
REGISTER_OPERATOR_CPU
(
conv_add
,
ops
::
FushionConvAddOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
src/operators/fusion_conv_add.h
浏览文件 @
14114fcf
...
...
@@ -11,16 +11,17 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSIONCONVADD_OP
#define FUSION_CONVADD_OP
#ifdef FUSION
_
CONVADD_OP
#pragma once
#include <string>
#include <vector>
#include "framework/operator.h"
#include "framework/program/program-optimize/fusion_op_register.h"
#include "op_param.h"
#include "operators/kernel/conv_add_kernel.h"
namespace
paddle_mobile
{
namespace
operators
{
...
...
@@ -53,18 +54,37 @@ class FushionConvAddOp : public framework::OperatorWithKernel<DeviceType> {
const
framework
::
AttributeMap
&
attrs
,
std
::
shared_ptr
<
framework
::
Scope
>
scope
)
:
framework
::
OperatorWithKernel
<
DeviceType
>
(
type
,
inputs
,
outputs
,
attrs
,
scope
)
{}
scope
),
param_
(
inputs
,
outputs
,
attrs
,
*
scope
)
{}
void
RunImpl
()
const
{}
void
RunImpl
()
const
{
operators
::
ConvAddKernel
<
DeviceType
,
T
>
kernel
;
kernel
.
Compute
(
param_
);
this
->
ClearVariables
({
"Filter"
,
"Input"
,
"Y"
});
}
using
framework
::
OperatorWithKernel
<
DeviceType
>::
OperatorWithKernel
;
void
InferShape
()
const
override
;
protected:
// FushionFc
Param param_;
FushionConvAdd
Param
param_
;
};
// static framework::FusionOpRegistrar fc_registrar(new FusionConvAddMatcher());
inline
int
ConvOutputSize
(
int
input_size
,
int
filter_size
,
int
dilation
,
int
padding
,
int
stride
)
{
const
int
dkernel
=
dilation
*
(
filter_size
-
1
)
+
1
;
int
output_size
=
(
input_size
+
2
*
padding
-
dkernel
)
/
stride
+
1
;
return
output_size
;
}
#ifdef PADDLE_MOBILE_CPU
static
framework
::
FusionOpRegistrar
convadd_registrar
(
new
FusionConvAddMatcher
());
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
}
// namespace operators
}
// namespace paddle_mobile
...
...
src/operators/fusion_conv_add_relu_op.h
浏览文件 @
14114fcf
...
...
@@ -46,8 +46,14 @@ class ConvAddReluOp {
private:
};
// static framework::FusionOpRegistrar fc_registrar(
// new FushionConvAddReluOpMatcher());
#ifdef PADDLE_MOBILE_CPU
// static framework::FusionOpRegistrar fusion_conv_add_relu_registrar(
// new FushionConvAddReluOpMatcher());
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
}
// namespace operators
}
// namespace paddle_mobile
...
...
src/operators/fusion_fc_op.cpp
浏览文件 @
14114fcf
...
...
@@ -54,7 +54,13 @@ template class FushionFcOp<CPU, float>;
}
// namespace paddle_mobile
namespace
ops
=
paddle_mobile
::
operators
;
USE_OP
(
fc
);
REGISTER_OPERATOR
(
fc
,
ops
::
FushionFcOp
);
#ifdef PADDLE_MOBILE_CPU
USE_OP_CPU
(
fc
);
REGISTER_OPERATOR_CPU
(
fc
,
ops
::
FushionFcOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
src/operators/fusion_fc_op.h
浏览文件 @
14114fcf
...
...
@@ -37,8 +37,6 @@ class FusionFcMatcher : public framework::FusionOpMatcher {
void
FolderNodes
(
framework
::
Node
*
node
,
std
::
vector
<
std
::
shared_ptr
<
framework
::
Node
>>
*
removed_nodes
)
{
vector
<
std
::
shared_ptr
<
framework
::
OpDesc
>>
origin_descs
=
node
->
OpDescs
(
node_
.
Depth
());
node
->
Folder
(
node_
.
Depth
(),
Type
(),
{{
G_OP_TYPE_ELEMENTWISE_ADD
,
{
"Y"
,
"Z"
}}},
removed_nodes
);
}
...
...
@@ -69,7 +67,14 @@ class FushionFcOp : public framework::OperatorWithKernel<DeviceType> {
FushionFcParam
param_
;
};
// static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
#ifdef PADDLE_MOBILE_CPU
static
framework
::
FusionOpRegistrar
fc_registrar
(
new
FusionFcMatcher
());
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
static
framework
::
FusionOpRegistrar
fc_registrar
(
new
FusionFcMatcher
());
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
}
// namespace operators
}
// namespace paddle_mobile
...
...
src/operators/kernel/arm/conv_add_kernel.cpp
0 → 100644
浏览文件 @
14114fcf
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADD_OP
#include "operators/kernel/conv_add_kernel.h"
namespace
paddle_mobile
{
namespace
operators
{
void
expand_bias
(
Tensor
&
bias
,
int
axis
,
const
DDim
&
dDim
)
{
auto
bias_ptr
=
bias
.
data
<
float
>
();
const
DDim
bias_ddim
=
bias
.
dims
();
PADDLE_MOBILE_ENFORCE
(
bias
.
dims
().
size
()
==
1
,
"the bias tensor's dims size != 1"
)
DDim
outer_ddim
=
paddle_mobile
::
framework
::
slice_ddim
(
dDim
,
0
,
axis
+
1
);
DDim
inner_ddim
=
paddle_mobile
::
framework
::
slice_ddim
(
dDim
,
axis
+
1
,
dDim
.
size
());
int
outer_size
=
paddle_mobile
::
framework
::
product
(
outer_ddim
);
int
inner_size
=
paddle_mobile
::
framework
::
product
(
inner_ddim
);
bias
.
Resize
(
dDim
);
auto
new_ptr
=
bias
.
mutable_data
<
float
>
();
int
axis_size
=
dDim
[
axis
];
for
(
int
i
=
0
;
i
<
outer_size
;
++
i
)
{
float
v_bias
=
bias_ptr
[
i
*
axis_size
/
outer_size
];
for
(
int
j
=
0
;
j
<
inner_size
;
++
j
)
{
new_ptr
[
i
*
inner_size
+
j
]
=
v_bias
;
}
}
}
template
<
>
void
ConvAddKernel
<
CPU
,
float
>::
Compute
(
const
FushionConvAddParam
&
param
)
const
{
DLOG
<<
param
;
const
Tensor
*
input
=
param
.
Input
();
Tensor
filter
=
*
param
.
Filter
();
Tensor
bias
=
*
param
.
Bias
();
int
axis
=
param
.
Axis
();
Tensor
*
output
=
param
.
Output
();
expand_bias
(
bias
,
axis
,
output
->
dims
());
output
->
ShareDataWith
(
bias
);
int
groups
=
param
.
Groups
();
std
::
vector
<
int
>
strides
=
param
.
Strides
();
std
::
vector
<
int
>
paddings
=
param
.
Paddings
();
std
::
vector
<
int
>
dilations
=
param
.
Dilations
();
const
int
batch_size
=
static_cast
<
int
>
(
input
->
dims
()[
0
]);
std
::
vector
<
int64_t
>
filter_shape_vec
(
framework
::
vectorize
(
filter
.
dims
()));
std
::
vector
<
int64_t
>
output_shape_vec
(
framework
::
vectorize
(
output
->
dims
()));
size_t
data_dim
=
filter_shape_vec
.
size
()
-
2
;
std
::
vector
<
int64_t
>
col_shape_vec
(
1
+
2
*
data_dim
);
col_shape_vec
[
0
]
=
input
->
dims
()[
1
]
/
groups
;
for
(
size_t
j
=
0
;
j
<
data_dim
;
++
j
)
{
col_shape_vec
[
j
+
1
]
=
filter_shape_vec
[
j
+
2
];
col_shape_vec
[
j
+
1
+
data_dim
]
=
output_shape_vec
[
j
+
2
];
}
framework
::
DDim
col_shape
(
framework
::
make_ddim
(
col_shape_vec
));
framework
::
DDim
col_matrix_shape
=
framework
::
flatten_to_2d
(
col_shape
,
data_dim
+
1
);
bool
is_expand
=
IsExpand
(
filter_shape_vec
,
strides
,
paddings
,
dilations
);
Tensor
col
;
Tensor
col_matrix
;
if
(
is_expand
)
{
col
.
mutable_data
<
float
>
(
col_shape
);
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
Resize
(
col_matrix_shape
);
}
framework
::
DDim
input_shape
=
framework
::
slice_ddim
(
input
->
dims
(),
1
,
static_cast
<
int
>
(
input
->
dims
().
size
()));
framework
::
DDim
filter_matrix_shape
=
{
filter
.
dims
()[
0
],
filter
.
numel
()
/
filter
.
dims
()[
0
]};
filter
.
Resize
(
filter_matrix_shape
);
framework
::
DDim
output_matrix_shape
=
{
output
->
dims
()[
1
],
output
->
numel
()
/
(
output
->
dims
()[
0
]
*
output
->
dims
()[
1
])};
// convolution operator: im2col(or vol2col) + gemm
int
in_step
=
static_cast
<
int
>
(
input
->
dims
()[
1
])
/
groups
;
int
out_step
=
static_cast
<
int
>
(
output
->
dims
()[
1
])
/
groups
;
math
::
Vol2ColFunctor
<
CPU
,
float
>
vol2col
;
math
::
Im2ColFunctor
<
math
::
ColFormat
::
kCFO
,
CPU
,
float
>
im2col
;
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
Tensor
in_batch
=
input
->
Slice
(
i
,
i
+
1
).
Resize
(
input_shape
);
Tensor
out_batch
=
output
->
Slice
(
i
,
i
+
1
).
Resize
(
output_matrix_shape
);
for
(
int
g
=
0
;
g
<
groups
;
g
++
)
{
Tensor
in_slice
=
in_batch
.
Slice
(
g
*
in_step
,
(
g
+
1
)
*
in_step
);
if
(
!
is_expand
)
{
col
.
ShareDataWith
(
in_slice
);
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
Resize
(
col_matrix_shape
);
}
else
if
(
data_dim
==
2U
)
{
// im2col
im2col
(
in_slice
,
dilations
,
strides
,
std
::
vector
<
int
>
{
paddings
[
0
],
paddings
[
1
],
paddings
[
0
],
paddings
[
1
]},
&
col
);
}
else
if
(
data_dim
==
3U
)
{
// vol2col
vol2col
(
in_slice
,
dilations
,
strides
,
paddings
,
&
col
);
}
// gemm
Tensor
out_slice
=
out_batch
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
Tensor
filter_slice
=
filter
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
math
::
matmul
<
float
>
(
filter_slice
,
false
,
col_matrix
,
false
,
static_cast
<
float
>
(
1
),
&
out_slice
,
static_cast
<
float
>
(
1
));
}
}
}
template
class
ConvAddKernel
<
CPU
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/arm/relu_kernel.cpp
浏览文件 @
14114fcf
...
...
@@ -37,13 +37,71 @@ void ReluKernel<CPU, float>::Compute(const ReluParam ¶m) const {
auto
*
out
=
param
.
Out
();
auto
*
out_ptr
=
out
->
mutable_data
<
float
>
();
ReluFunctor
<
float
>
func_
;
math
::
Transform
trans
;
trans
(
input_x_ptr
,
input_x_ptr
+
input_x
->
numel
(),
out_ptr
,
func_
);
int
numel
=
input_x
->
numel
();
if
(
numel
>
32
)
{
asm
volatile
(
"pld [%[input_x_ptr], #0]
\n\t
"
"vmov.f32 q8, #0.0
\n\t
"
"subs %[num], %[num], #32
\n\t
"
"blt end_num_%=
\n\t
"
"loop_num_%=:
\n\t
"
"pld [%[input_x_ptr], #1024]
\n\t
"
// for (int i = 0; i < input_x->numel(); i++) {
// out_ptr[i] = input_x_ptr[i] > 0 ? input_x_ptr[i] : 0;
// }
"vld1.32 {q0, q1}, [%[input_x_ptr]]!
\n\t
"
"vld1.32 {q2, q3}, [%[input_x_ptr]]!
\n\t
"
"vld1.32 {q4, q5}, [%[input_x_ptr]]!
\n\t
"
"vld1.32 {q6, q7}, [%[input_x_ptr]]!
\n\t
"
"vmax.f32 q0, q0, q8
\n\t
"
"vmax.f32 q1, q1, q8
\n\t
"
"vmax.f32 q2, q2, q8
\n\t
"
"vmax.f32 q3, q3, q8
\n\t
"
"vmax.f32 q4, q4, q8
\n\t
"
"vmax.f32 q5, q5, q8
\n\t
"
"vmax.f32 q6, q6, q8
\n\t
"
"vmax.f32 q7, q7, q8
\n\t
"
"vst1.32 {q0, q1}, [%[out_ptr]]!
\n\t
"
"vst1.32 {q2, q3}, [%[out_ptr]]!
\n\t
"
"vst1.32 {q4, q5}, [%[out_ptr]]!
\n\t
"
"vst1.32 {q6, q7}, [%[out_ptr]]!
\n\t
"
"subs %[num], %[num], #32
\n\t
"
"bge loop_num_%=
\n\t
"
"end_num_%=:
\n\t
"
"cmp %[num], #0
\n\t
"
"bge end_%=
\n\t
"
"mov r6, #4
\n\t
"
"mul r5, %[num], r6
\n\t
"
"add %[input_x_ptr], %[input_x_ptr], r5
\n\t
"
"vld1.32 {q0, q1}, [%[input_x_ptr]]!
\n\t
"
"vld1.32 {q2, q3}, [%[input_x_ptr]]!
\n\t
"
"vld1.32 {q4, q5}, [%[input_x_ptr]]!
\n\t
"
"vld1.32 {q6, q7}, [%[input_x_ptr]]!
\n\t
"
"vmax.f32 q0, q0, q8
\n\t
"
"vmax.f32 q1, q1, q8
\n\t
"
"vmax.f32 q2, q2, q8
\n\t
"
"vmax.f32 q3, q3, q8
\n\t
"
"vmax.f32 q4, q4, q8
\n\t
"
"vmax.f32 q5, q5, q8
\n\t
"
"vmax.f32 q6, q6, q8
\n\t
"
"vmax.f32 q7, q7, q8
\n\t
"
"add %[out_ptr], %[out_ptr], r5
\n\t
"
"vst1.32 {q0, q1}, [%[out_ptr]]!
\n\t
"
"vst1.32 {q2, q3}, [%[out_ptr]]!
\n\t
"
"vst1.32 {q4, q5}, [%[out_ptr]]!
\n\t
"
"vst1.32 {q6, q7}, [%[out_ptr]]!
\n\t
"
"end_%=:
\n\t
"
:
:
[
out_ptr
]
"r"
(
out_ptr
),
[
input_x_ptr
]
"r"
(
input_x_ptr
),
[
num
]
"r"
(
numel
)
:
"memory"
,
"q0"
,
"q1"
,
"q2"
,
"q3"
,
"q4"
,
"q5"
,
"q6"
,
"q7"
,
"q8"
,
"r5"
,
"r6"
);
}
else
{
ReluFunctor
<
float
>
func_
;
math
::
Transform
trans
;
trans
(
input_x_ptr
,
input_x_ptr
+
numel
,
out_ptr
,
func_
);
}
}
}
// namespace operators
}
// namespace paddle_mobile
...
...
src/operators/kernel/conv_add_kernel.h
0 → 100644
浏览文件 @
14114fcf
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADD_OP
#pragma once
#include <vector>
#include "framework/ddim.h"
#include "framework/operator.h"
#include "operators/math/im2col.h"
#include "operators/math/math_function.h"
#include "operators/math/vol2col.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
using
framework
::
DDim
;
using
framework
::
OpKernelBase
;
template
<
typename
DeviceType
,
typename
T
>
class
ConvAddKernel
:
public
OpKernelBase
<
DeviceType
,
FushionConvAddParam
>
{
public:
void
Compute
(
const
FushionConvAddParam
&
param
)
const
;
};
inline
bool
IsExpand
(
const
std
::
vector
<
int64_t
>
&
filter_dim
,
const
std
::
vector
<
int
>
&
strides
,
const
std
::
vector
<
int
>
&
paddings
,
const
std
::
vector
<
int
>
&
dilations
)
{
bool
filter_1
=
true
,
strides_1
=
true
,
padding_0
=
true
,
dilation_1
=
true
;
for
(
size_t
j
=
0
;
j
<
strides
.
size
();
++
j
)
{
filter_1
=
filter_1
&&
(
static_cast
<
int
>
(
filter_dim
[
j
+
2
])
==
1
);
strides_1
=
strides_1
&&
(
strides
[
j
]
==
1
);
padding_0
=
padding_0
&&
(
paddings
[
j
]
==
0
);
dilation_1
=
dilation_1
&&
(
dilations
[
j
]
==
1
);
}
return
!
(
filter_1
&&
strides_1
&&
padding_0
&&
dilation_1
);
}
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/fpga/conv_kernel.cpp
浏览文件 @
14114fcf
...
...
@@ -14,15 +14,16 @@ limitations under the License. */
#ifdef CONV_OP
#include "operators/kernel/conv_kernel.h"
namespace
paddle_mobile
{
namespace
operators
{
// template<>
// void ConvKernel<FPGA, float>::Compute(const ConvParam ¶m) const
// {}
//
// template class ConvKernel<FPGA, float>;
}
template
<
>
void
ConvKernel
<
FPGA
,
float
>::
Compute
(
const
ConvParam
&
param
)
const
{}
template
class
ConvKernel
<
FPGA
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/lrn_op.cpp
浏览文件 @
14114fcf
...
...
@@ -29,7 +29,13 @@ template class LrnOp<CPU, float>;
}
// namespace paddle_mobile
namespace
ops
=
paddle_mobile
::
operators
;
USE_OP
(
lrn
);
REGISTER_OPERATOR
(
lrn
,
ops
::
LrnOp
);
#ifdef PADDLE_MOBILE_CPU
USE_OP_CPU
(
lrn
);
REGISTER_OPERATOR_CPU
(
lrn
,
ops
::
LrnOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
src/operators/mul_op.cpp
浏览文件 @
14114fcf
...
...
@@ -55,7 +55,13 @@ template class MulOp<CPU, float>;
}
// namespace paddle_mobile
namespace
ops
=
paddle_mobile
::
operators
;
USE_OP
(
mul
);
REGISTER_OPERATOR
(
mul
,
ops
::
MulOp
);
#ifdef PADDLE_MOBILE_CPU
USE_OP_CPU
(
mul
);
REGISTER_OPERATOR_CPU
(
mul
,
ops
::
MulOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
src/operators/multiclass_nms_op.cpp
浏览文件 @
14114fcf
...
...
@@ -39,7 +39,13 @@ template class MultiClassNMSOp<CPU, float>;
}
// namespace paddle_mobile
namespace
ops
=
paddle_mobile
::
operators
;
USE_OP
(
multiclass_nms
);
REGISTER_OPERATOR
(
multiclass_nms
,
ops
::
MultiClassNMSOp
);
#ifdef PADDLE_MOBILE_CPU
USE_OP_CPU
(
multiclass_nms
);
REGISTER_OPERATOR_CPU
(
multiclass_nms
,
ops
::
MultiClassNMSOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
src/operators/op_param.cpp
浏览文件 @
14114fcf
...
...
@@ -39,5 +39,31 @@ Print &operator<<(Print &printer, const ConvParam &conv_param) {
}
#endif
#ifdef FUSION_CONVADD_OP
Print
&
operator
<<
(
Print
&
printer
,
const
FushionConvAddParam
&
conv_param
)
{
printer
<<
"parameter of conv_add: "
<<
"
\n
"
;
printer
<<
" stride: "
<<
" ("
<<
conv_param
.
Strides
()[
0
]
<<
conv_param
.
Strides
()[
1
]
<<
") "
<<
"
\n
"
;
printer
<<
" paddings: "
<<
" ("
<<
conv_param
.
Paddings
()[
0
]
<<
conv_param
.
Paddings
()[
1
]
<<
") "
<<
"
\n
"
;
printer
<<
" dilations: "
<<
" ("
<<
conv_param
.
Dilations
()[
0
]
<<
conv_param
.
Dilations
()[
1
]
<<
") "
<<
"
\n
"
;
printer
<<
" groups: "
<<
conv_param
.
Groups
()
<<
"
\n
"
;
printer
<<
" input dims: "
<<
conv_param
.
Input
()
->
dims
()
<<
"
\n
"
;
printer
<<
" filter dims: "
<<
conv_param
.
Filter
()
->
dims
()
<<
"
\n
"
;
printer
<<
" bias dims: "
<<
conv_param
.
Bias
()
->
dims
()
<<
"
\n
"
;
printer
<<
" output dims: "
<<
conv_param
.
Output
()
->
dims
();
return
printer
;
}
#endif
}
// namespace operators
}
// namespace paddle_mobile
src/operators/op_param.h
浏览文件 @
14114fcf
...
...
@@ -165,6 +165,8 @@ class OpParam {
template
<
typename
T
>
static
T
*
GetVarValue
(
const
string
&
key
,
const
VariableNameMap
&
var_map
,
const
Scope
&
scope
)
{
PADDLE_MOBILE_ENFORCE
(
var_map
.
count
(
key
)
>
0
,
"%s is not contained in var_map"
,
key
.
c_str
())
auto
var_vec
=
var_map
.
at
(
key
);
if
(
!
var_vec
.
empty
())
{
auto
var
=
scope
.
FindVar
(
var_vec
[
0
]);
...
...
@@ -787,5 +789,54 @@ class FushionFcParam : public OpParam {
};
#endif
#ifdef FUSION_CONVADD_OP
class
FushionConvAddParam
:
public
OpParam
{
public:
FushionConvAddParam
(
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
AttributeMap
&
attrs
,
const
Scope
&
scope
)
{
bias_
=
InputYFrom
<
LoDTensor
>
(
inputs
,
scope
);
axis_
=
GetAttr
<
int
>
(
"axis"
,
attrs
);
filter_
=
FilterFrom
<
LoDTensor
>
(
inputs
,
scope
);
input_
=
InputFrom
<
LoDTensor
>
(
inputs
,
scope
);
output_
=
OutFrom
<
LoDTensor
>
(
outputs
,
scope
);
strides_
=
GetAttr
<
vector
<
int
>>
(
"strides"
,
attrs
);
paddings_
=
GetAttr
<
vector
<
int
>>
(
"paddings"
,
attrs
);
dilations_
=
GetAttr
<
vector
<
int
>>
(
"dilations"
,
attrs
);
groups
=
GetAttr
<
int
>
(
"groups"
,
attrs
);
}
Tensor
*
Bias
()
const
{
return
bias_
;
}
const
int
&
Axis
()
const
{
return
axis_
;
}
const
Tensor
*
Input
()
const
{
return
input_
;
}
const
Tensor
*
Filter
()
const
{
return
filter_
;
}
Tensor
*
Output
()
const
{
return
output_
;
}
const
vector
<
int
>
&
Strides
()
const
{
return
strides_
;
}
const
vector
<
int
>
&
Paddings
()
const
{
return
paddings_
;
}
const
vector
<
int
>
&
Dilations
()
const
{
return
dilations_
;
}
const
int
&
Groups
()
const
{
return
groups
;
}
private:
Tensor
*
bias_
;
int
axis_
;
Tensor
*
input_
;
Tensor
*
output_
;
Tensor
*
filter_
;
vector
<
int
>
strides_
;
vector
<
int
>
paddings_
;
vector
<
int
>
dilations_
;
int
groups
;
};
Print
&
operator
<<
(
Print
&
printer
,
const
FushionConvAddParam
&
conv_param
);
#endif
}
// namespace operators
}
// namespace paddle_mobile
src/operators/pool_op.cpp
浏览文件 @
14114fcf
...
...
@@ -59,7 +59,13 @@ template class PoolOp<CPU, float>;
}
// namespace paddle_mobile
namespace
ops
=
paddle_mobile
::
operators
;
USE_OP
(
pool2d
);
REGISTER_OPERATOR
(
pool2d
,
ops
::
PoolOp
);
#ifdef PADDLE_MOBILE_CPU
USE_OP_CPU
(
pool2d
);
REGISTER_OPERATOR_CPU
(
pool2d
,
ops
::
PoolOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
src/operators/prior_box_op.cpp
浏览文件 @
14114fcf
...
...
@@ -49,7 +49,13 @@ template class PriorBoxOp<CPU, float>;
}
// namespace paddle_mobile
namespace
ops
=
paddle_mobile
::
operators
;
USE_OP
(
prior_box
);
REGISTER_OPERATOR
(
prior_box
,
ops
::
PriorBoxOp
);
#ifdef PADDLE_MOBILE_CPU
USE_OP_CPU
(
prior_box
);
REGISTER_OPERATOR_CPU
(
prior_box
,
ops
::
PriorBoxOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
src/operators/relu_op.cpp
浏览文件 @
14114fcf
...
...
@@ -33,7 +33,13 @@ template class ReluOp<CPU, float>;
* 都是需要和model中类型对应起来的
* */
namespace
ops
=
paddle_mobile
::
operators
;
USE_OP
(
relu
);
REGISTER_OPERATOR
(
relu
,
ops
::
ReluOp
);
#ifdef PADDLE_MOBILE_CPU
USE_OP_CPU
(
relu
);
REGISTER_OPERATOR_CPU
(
relu
,
ops
::
ReluOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
src/operators/reshape_op.cpp
浏览文件 @
14114fcf
...
...
@@ -32,7 +32,13 @@ template class ReshapeOp<CPU, float>;
}
// namespace paddle_mobile
namespace
ops
=
paddle_mobile
::
operators
;
USE_OP
(
reshape
);
REGISTER_OPERATOR
(
reshape
,
ops
::
ReshapeOp
);
#ifdef PADDLE_MOBILE_CPU
USE_OP_CPU
(
reshape
);
REGISTER_OPERATOR_CPU
(
reshape
,
ops
::
ReshapeOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
src/operators/sigmoid_op.cpp
浏览文件 @
14114fcf
...
...
@@ -27,7 +27,13 @@ template class SigmoidOp<CPU, float>;
}
// namespace paddle_mobile
namespace
ops
=
paddle_mobile
::
operators
;
USE_OP
(
sigmoid
);
REGISTER_OPERATOR
(
sigmoid
,
ops
::
SigmoidOp
);
#ifdef PADDLE_MOBILE_CPU
USE_OP_CPU
(
sigmoid
);
REGISTER_OPERATOR_CPU
(
sigmoid
,
ops
::
SigmoidOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
src/operators/softmax_op.cpp
浏览文件 @
14114fcf
...
...
@@ -27,7 +27,13 @@ template class SoftmaxOp<CPU, float>;
}
// namespace paddle_mobile
namespace
ops
=
paddle_mobile
::
operators
;
USE_OP
(
softmax
);
REGISTER_OPERATOR
(
softmax
,
ops
::
SoftmaxOp
);
#ifdef PADDLE_MOBILE_CPU
USE_OP_CPU
(
softmax
);
REGISTER_OPERATOR_CPU
(
softmax
,
ops
::
SoftmaxOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
src/operators/transpose_op.cpp
浏览文件 @
14114fcf
...
...
@@ -52,7 +52,13 @@ template class TransposeOp<CPU, float>;
}
// namespace paddle_mobile
namespace
ops
=
paddle_mobile
::
operators
;
USE_OP
(
transpose
);
REGISTER_OPERATOR
(
transpose
,
ops
::
TransposeOp
);
#ifdef PADDLE_MOBILE_CPU
USE_OP_CPU
(
transpose
);
REGISTER_OPERATOR_CPU
(
transpose
,
ops
::
TransposeOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
test/net/test_googlenet.cpp
浏览文件 @
14114fcf
...
...
@@ -20,9 +20,9 @@ int main() {
paddle_mobile
::
Loader
<
paddle_mobile
::
CPU
>
loader
;
bool
optimize
=
false
;
auto
time1
=
time
();
//
auto program = loader.Load(g_googlenet, optimize);
auto
program
=
loader
.
Load
(
g_googlenet_combine
+
"/model"
,
g_googlenet_combine
+
"/params"
,
optimize
);
auto
program
=
loader
.
Load
(
g_googlenet
,
optimize
);
//
auto program = loader.Load(g_googlenet_combine + "/model",
//
g_googlenet_combine + "/params", optimize);
auto
time2
=
time
();
DLOG
<<
"load cost :"
<<
time_diff
(
time1
,
time2
)
<<
"ms
\n
"
;
paddle_mobile
::
Executor
<
paddle_mobile
::
CPU
>
executor
(
program
,
1
,
optimize
);
...
...
tools/build.sh
浏览文件 @
14114fcf
...
...
@@ -15,7 +15,6 @@ build_for_mac() {
fi
PLATFORM
=
"x86"
MODE
=
"Release"
CXX_FLAGS
=
"-std=c++11 -O3 -s"
BUILD_DIR
=
../build/release/
"
${
PLATFORM
}
"
mkdir
-p
${
BUILD_DIR
}
/build
...
...
@@ -25,7 +24,6 @@ build_for_mac() {
cmake ..
\
-B
"
${
BUILD_DIR
}
"
\
-DCMAKE_BUILD_TYPE
=
"
${
MODE
}
"
\
-DCMAKE_CXX_FLAGS
=
"
${
CXX_FLAGS
}
"
\
-DIS_MAC
=
true
cd
${
BUILD_DIR
}
...
...
@@ -46,11 +44,11 @@ build_for_android() {
if
[
"
${
PLATFORM
}
"
=
"arm-v7a"
]
;
then
ABI
=
"armeabi-v7a with NEON"
ARM_PLATFORM
=
"V7"
CXX_FLAGS
=
"-
O3 -std=c++11 -s -
march=armv7-a -mfpu=neon -mfloat-abi=softfp -pie -fPIE -w -Wno-error=format-security"
CXX_FLAGS
=
"-march=armv7-a -mfpu=neon -mfloat-abi=softfp -pie -fPIE -w -Wno-error=format-security"
elif
[
"
${
PLATFORM
}
"
=
"arm-v8a"
]
;
then
ABI
=
"arm64-v8a"
ARM_PLATFORM
=
"V8"
CXX_FLAGS
=
"-
O3 -std=c++11 -s -
march=armv8-a -pie -fPIE -w -Wno-error=format-security -llog"
CXX_FLAGS
=
"-march=armv8-a -pie -fPIE -w -Wno-error=format-security -llog"
else
echo
"unknown platform!"
exit
-1
...
...
@@ -98,7 +96,7 @@ build_for_ios() {
BUILD_DIR
=
../build/release/
"
${
PLATFORM
}
"
TOOLCHAIN_FILE
=
"./tools/ios-cmake/ios.toolchain.cmake"
C_FLAGS
=
"-fobjc-abi-version=2 -fobjc-arc -isysroot
${
CMAKE_OSX_SYSROOT
}
"
CXX_FLAGS
=
"-fobjc-abi-version=2 -fobjc-arc -std=gnu++1
1
-stdlib=libc++ -isysroot
${
CMAKE_OSX_SYSROOT
}
"
CXX_FLAGS
=
"-fobjc-abi-version=2 -fobjc-arc -std=gnu++1
4
-stdlib=libc++ -isysroot
${
CMAKE_OSX_SYSROOT
}
"
mkdir
-p
"
${
BUILD_DIR
}
"
if
[
$#
-eq
1
]
;
then
NET
=
$1
...
...
tools/ios-cmake/ios.toolchain.cmake
浏览文件 @
14114fcf
此差异已折叠。
点击以展开。
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录