Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
89bb5717
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
89bb5717
编写于
7月 11, 2018
作者:
W
WangLiu
提交者:
GitHub
7月 11, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #547 from codeWorm2015/develop
fix
#546
fix docker complile error
上级
3e9af702
85d1a559
变更
18
显示空白变更内容
内联
并排
Showing
18 changed file
with
109 addition
and
79 deletion
+109
-79
CMakeLists.txt
CMakeLists.txt
+63
-54
README.md
README.md
+2
-2
src/common/variant.h
src/common/variant.h
+2
-0
src/framework/attribute.h
src/framework/attribute.h
+1
-0
src/framework/data_layout.h
src/framework/data_layout.h
+1
-0
src/framework/ddim.h
src/framework/ddim.h
+2
-0
src/framework/dim.h
src/framework/dim.h
+1
-0
src/framework/tensor.h
src/framework/tensor.h
+1
-1
src/operators/kernel/arm/sigmoid_kernel.cpp
src/operators/kernel/arm/sigmoid_kernel.cpp
+1
-1
src/operators/kernel/central-arm-func/sigmoid_arm_func.h
src/operators/kernel/central-arm-func/sigmoid_arm_func.h
+4
-2
src/operators/kernel/conv_add_kernel.h
src/operators/kernel/conv_add_kernel.h
+1
-1
src/operators/math/conv_func.h
src/operators/math/conv_func.h
+2
-2
src/operators/math/depthwise_conv_3x3.cpp
src/operators/math/depthwise_conv_3x3.cpp
+14
-1
src/operators/math/pool_2x2.cpp
src/operators/math/pool_2x2.cpp
+2
-2
src/operators/math/pool_2x2.h
src/operators/math/pool_2x2.h
+1
-1
src/operators/math/pool_3x3.cpp
src/operators/math/pool_3x3.cpp
+7
-8
src/operators/math/pool_3x3.h
src/operators/math/pool_3x3.h
+1
-1
src/operators/math/softmax.cpp
src/operators/math/softmax.cpp
+3
-3
未找到文件。
CMakeLists.txt
浏览文件 @
89bb5717
...
@@ -10,20 +10,56 @@ option(CPU "armv7 with neon" ON)
...
@@ -10,20 +10,56 @@ option(CPU "armv7 with neon" ON)
option
(
MALI_GPU
"mali gpu"
OFF
)
option
(
MALI_GPU
"mali gpu"
OFF
)
option
(
FPGA
"fpga"
OFF
)
option
(
FPGA
"fpga"
OFF
)
if
(
ARM_LINUX
)
include
(
"
${
CMAKE_CURRENT_LIST_DIR
}
/tools/arm-platform.cmake"
)
endif
()
file
(
GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c src/*.mm
)
file
(
GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c src/*.mm
)
file
(
GLOB_RECURSE PADDLE_MOBILE_H src/*.h
)
file
(
GLOB_RECURSE PADDLE_MOBILE_H src/*.h
)
include_directories
(
src/
)
set
(
CMAKE_CXX_FLAGS
"-std=c++14 -O3 -s
${
CMAKE_CXX_FLAGS
}
"
)
if
(
DEBUGING
)
message
(
STATUS
"debug"
)
set
(
CMAKE_BUILD_TYPE Debug
)
set
(
CMAKE_CXX_FLAGS_DEBUG
"-g -DNDEBUG"
)
add_definitions
(
-DPADDLE_MOBILE_DEBUG
)
else
()
set
(
CMAKE_BUILD_TYPE Release
)
set
(
CMAKE_CXX_FLAGS_RELEASE
"-DNDEBUG"
)
add_definitions
(
-fvisibility=hidden -fvisibility-inlines-hidden
)
endif
()
if
(
USE_EXCEPTION
)
message
(
STATUS
"use exception"
)
add_definitions
(
-DENABLE_EXCEPTION
)
add_definitions
(
-fexceptions
)
else
()
add_definitions
(
-fno-exceptions
)
endif
()
if
(
LOG_PROFILE
)
add_definitions
(
-DPADDLE_MOBILE_PROFILE
)
endif
()
if
(
USE_OPENMP
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-fopenmp"
)
add_definitions
(
-DPADDLE_MOBILE_USE_OPENMP
)
endif
()
# platform control
if
(
ARM_LINUX
)
include
(
"
${
CMAKE_CURRENT_LIST_DIR
}
/tools/arm-platform.cmake"
)
endif
()
if
(
CPU
)
if
(
CPU
)
add_definitions
(
-DPADDLE_MOBILE_CPU
)
add_definitions
(
-DPADDLE_MOBILE_CPU
)
else
()
else
()
list
(
REMOVE_ITEM PADDLE_MOBILE_CC ./src/operators/kernel/arm/*.h
)
file
(
GLOB_RECURSE _tmp_list src/operators/kernel/arm/*.cpp src/operators/kernel/arm/*.cc
)
list
(
REMOVE_ITEM PADDLE_MOBILE_CC ./src/operators/kernel/arm/*.cc
)
foreach
(
f
${
_tmp_list
}
)
list
(
REMOVE_ITEM PADDLE_MOBILE_CC ./src/operators/kernel/arm/*.cpp
)
list
(
REMOVE_ITEM PADDLE_MOBILE_CC
${
f
}
)
endforeach
()
file
(
GLOB_RECURSE _tmp_list_h src/operators/kernel/arm/*.h
)
foreach
(
f
${
_tmp_list_h
}
)
list
(
REMOVE_ITEM PADDLE_MOBILE_H
${
f
}
)
endforeach
()
endif
()
endif
()
if
(
MALI_GPU
)
if
(
MALI_GPU
)
...
@@ -54,55 +90,33 @@ endif()
...
@@ -54,55 +90,33 @@ endif()
if
(
FPGA
)
if
(
FPGA
)
add_definitions
(
-DPADDLE_MOBILE_FPGA
)
add_definitions
(
-DPADDLE_MOBILE_FPGA
)
else
()
else
()
list
(
REMOVE_ITEM PADDLE_MOBILE_CC
${
CMAKE_CURRENT_SOURCE_DIR
}
/src/operators/kernel/fpga/*.h
)
file
(
GLOB_RECURSE _tmp_list src/operators/kernel/fpga/*.cpp src/operators/kernel/fpga/*.cc
)
list
(
REMOVE_ITEM PADDLE_MOBILE_CC
${
CMAKE_CURRENT_SOURCE_DIR
}
/src/operators/kernel/fpga/*.cc
)
foreach
(
f
${
_tmp_list
}
)
list
(
REMOVE_ITEM PADDLE_MOBILE_CC
${
CMAKE_CURRENT_SOURCE_DIR
}
/src/operators/kernel/fpga/*.cpp
)
list
(
REMOVE_ITEM PADDLE_MOBILE_CC
${
f
}
)
endforeach
()
file
(
GLOB_RECURSE _tmp_list_h src/operators/kernel/fpga/*.h
)
foreach
(
f
${
_tmp_list_h
}
)
list
(
REMOVE_ITEM PADDLE_MOBILE_H
${
f
}
)
endforeach
()
endif
()
endif
()
set
(
CMAKE_CXX_FLAGS
"-std=c++14 -O3 -s
${
CMAKE_CXX_FLAGS
}
"
)
if
(
ANDROID_NDK_TOOLCHAIN_INCLUDED
)
if
(
DEBUGING
)
message
(
STATUS
"debug"
)
set
(
CMAKE_BUILD_TYPE Debug
)
set
(
CMAKE_CXX_FLAGS_DEBUG
"-g -DNDEBUG"
)
add_definitions
(
-DPADDLE_MOBILE_DEBUG
)
if
(
ANDROID_NDK_TOOLCHAIN_INCLUDED
)
add_definitions
(
-DARMV7
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-llog"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-llog"
)
endif
()
add_definitions
(
-DARMV7
)
else
()
set
(
CMAKE_BUILD_TYPE Release
)
set
(
CMAKE_CXX_FLAGS_RELEASE
"-DNDEBUG"
)
add_definitions
(
-fvisibility=hidden -fvisibility-inlines-hidden
)
endif
()
if
(
USE_EXCEPTION
)
message
(
STATUS
"use exception"
)
add_definitions
(
-DENABLE_EXCEPTION
)
add_definitions
(
-fexceptions
)
else
()
else
()
add_definitions
(
-fno-exceptions
)
endif
()
if
(
LOG_PROFILE
)
add_definitions
(
-DPADDLE_MOBILE_PROFILE
)
endif
()
if
(
USE_OPENMP
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-fopenmp"
)
add_definitions
(
-DPADDLE_MOBILE_USE_OPENMP
)
endif
()
if
(
NOT ANDROID_NDK_TOOLCHAIN_INCLUDED
)
list
(
REMOVE_ITEM PADDLE_MOBILE_H
${
CMAKE_CURRENT_SOURCE_DIR
}
/src/jni/paddle_mobile_jni.h
)
list
(
REMOVE_ITEM PADDLE_MOBILE_H
${
CMAKE_CURRENT_SOURCE_DIR
}
/src/jni/paddle_mobile_jni.h
)
list
(
REMOVE_ITEM PADDLE_MOBILE_CC
${
CMAKE_CURRENT_SOURCE_DIR
}
/src/jni/paddle_mobile_jni.cpp
)
list
(
REMOVE_ITEM PADDLE_MOBILE_CC
${
CMAKE_CURRENT_SOURCE_DIR
}
/src/jni/paddle_mobile_jni.cpp
)
list
(
REMOVE_ITEM PADDLE_MOBILE_H
${
CMAKE_CURRENT_SOURCE_DIR
}
/src/operators/math/math_func_neon.h
)
list
(
REMOVE_ITEM PADDLE_MOBILE_H
${
CMAKE_CURRENT_SOURCE_DIR
}
/src/operators/math/math_func_neon.h
)
endif
()
endif
()
include_directories
(
src/
)
if
(
IS_IOS
)
else
()
list
(
REMOVE_ITEM PADDLE_MOBILE_H
${
CMAKE_CURRENT_SOURCE_DIR
}
/src/ios_io/PaddleMobile.h
)
list
(
REMOVE_ITEM PADDLE_MOBILE_CC
${
CMAKE_CURRENT_SOURCE_DIR
}
/src/ios_io/PaddleMobile.mm
)
list
(
REMOVE_ITEM PADDLE_MOBILE_H
${
CMAKE_CURRENT_SOURCE_DIR
}
/src/ios_io/op_symbols.h
)
endif
()
set
(
CMAKE_VERBOSE_MAKEFILE ON
)
set
(
CMAKE_VERBOSE_MAKEFILE ON
)
set
(
CMAKE_EXPORT_COMPILE_COMMANDS ON
)
set
(
CMAKE_EXPORT_COMPILE_COMMANDS ON
)
...
@@ -113,25 +127,20 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY build)
...
@@ -113,25 +127,20 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY build)
# NET default
# NET default
set
(
NET
"defult"
CACHE STRING
"select net type"
)
set
(
NET
"defult"
CACHE STRING
"select net type"
)
set_property
(
CACHE NET PROPERTY STRINGS
"defult"
"googlenet"
"mobilenet"
"yolo"
"squeezenet"
)
set_property
(
CACHE NET PROPERTY STRINGS
"defult"
"googlenet"
"mobilenet"
"yolo"
"squeezenet"
)
include
(
"
${
CMAKE_CURRENT_LIST_DIR
}
/tools/op.cmake"
)
include
(
"
${
CMAKE_CURRENT_LIST_DIR
}
/tools/op.cmake"
)
if
(
IS_IOS
)
add_library
(
paddle-mobile STATIC
${
PADDLE_MOBILE_CC
}
${
PADDLE_MOBILE_H
}
)
else
()
list
(
REMOVE_ITEM PADDLE_MOBILE_H
${
CMAKE_CURRENT_SOURCE_DIR
}
/src/ios_io/PaddleMobile.h
)
list
(
REMOVE_ITEM PADDLE_MOBILE_CC
${
CMAKE_CURRENT_SOURCE_DIR
}
/src/ios_io/PaddleMobile.mm
)
list
(
REMOVE_ITEM PADDLE_MOBILE_H
${
CMAKE_CURRENT_SOURCE_DIR
}
/src/ios_io/op_symbols.h
)
endif
()
# build library
if
(
ANDROID_NDK_TOOLCHAIN_INCLUDED
)
if
(
ANDROID_NDK_TOOLCHAIN_INCLUDED
)
list
(
REMOVE_DUPLICATES CMAKE_CXX_FLAGS
)
list
(
REMOVE_DUPLICATES CMAKE_CXX_FLAGS
)
add_library
(
paddle-mobile SHARED
${
PADDLE_MOBILE_CC
}
${
PADDLE_MOBILE_H
}
)
add_library
(
paddle-mobile SHARED
${
PADDLE_MOBILE_CC
}
${
PADDLE_MOBILE_H
}
)
elseif
(
IS_IOS
)
elseif
(
IS_IOS
)
add_library
(
paddle-mobile STATIC
${
PADDLE_MOBILE_CC
}
${
PADDLE_MOBILE_H
}
)
else
()
else
()
add_library
(
paddle-mobile SHARED
${
PADDLE_MOBILE_CC
}
${
PADDLE_MOBILE_H
}
)
add_library
(
paddle-mobile SHARED
${
PADDLE_MOBILE_CC
}
${
PADDLE_MOBILE_H
}
)
endif
()
endif
()
# unit test
if
(
DEBUGING
)
if
(
DEBUGING
)
if
(
IS_IOS
)
if
(
IS_IOS
)
else
()
else
()
...
...
README.md
浏览文件 @
89bb5717
...
@@ -91,8 +91,8 @@ ONNX全称为“Open Neural Network Exchange”,即“开放的神经网络切
...
@@ -91,8 +91,8 @@ ONNX全称为“Open Neural Network Exchange”,即“开放的神经网络切
![](
http://7xop3k.com1.z0.glb.clouddn.com/15311951836000.jpg
)
![](
http://7xop3k.com1.z0.glb.clouddn.com/15311951836000.jpg
)
### 4. 部分测试模型下载
### 4. 部分测试模型
和测试图片
下载
[
下载链接
](
http
s://mms-mis.cdn.bcebos.com/paddle-mobile/model
s.zip
)
[
下载链接
](
http
://mms-graph.bj.bcebos.com/paddle-mobile%2FmodelsAndImage
s.zip
)
## 问题解决
## 问题解决
...
...
src/common/variant.h
浏览文件 @
89bb5717
...
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include <cstdlib>
#include "common/enforce.h"
#include "common/enforce.h"
#include "common/log.h"
#include "common/log.h"
...
...
src/framework/attribute.h
浏览文件 @
89bb5717
...
@@ -14,6 +14,7 @@ limitations under the License. */
...
@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#pragma once
#include <cstdlib>
#include <string>
#include <string>
#include <typeinfo>
#include <typeinfo>
#include <unordered_map>
#include <unordered_map>
...
...
src/framework/data_layout.h
浏览文件 @
89bb5717
...
@@ -15,6 +15,7 @@ limitations under the License. */
...
@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#pragma once
#include <cctype>
#include <cctype>
#include <cstdlib>
#include <string>
#include <string>
namespace
paddle_mobile
{
namespace
paddle_mobile
{
...
...
src/framework/ddim.h
浏览文件 @
89bb5717
...
@@ -14,9 +14,11 @@ limitations under the License. */
...
@@ -14,9 +14,11 @@ limitations under the License. */
#pragma once
#pragma once
#include <cstdlib>
#include <initializer_list>
#include <initializer_list>
#include <typeinfo>
#include <typeinfo>
#include <vector>
#include <vector>
#include "common/enforce.h"
#include "common/enforce.h"
#include "common/variant.h"
#include "common/variant.h"
#include "dim.h"
#include "dim.h"
...
...
src/framework/dim.h
浏览文件 @
89bb5717
...
@@ -14,6 +14,7 @@ limitations under the License. */
...
@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#pragma once
#include <cstdlib>
#include "common/enforce.h"
#include "common/enforce.h"
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
framework
{
namespace
framework
{
...
...
src/framework/tensor.h
浏览文件 @
89bb5717
...
@@ -152,7 +152,7 @@ class Tensor {
...
@@ -152,7 +152,7 @@ class Tensor {
if
(
holder_
!=
nullptr
)
{
if
(
holder_
!=
nullptr
)
{
holder_
->
set_type
(
type
);
holder_
->
set_type
(
type
);
}
}
PADDLE_MOBILE_ENFORCE
(
numel
()
>=
0
,
"the Tensor'snumel must >=0."
)
PADDLE_MOBILE_ENFORCE
(
numel
()
>=
0
,
"the Tensor's
numel must >=0."
)
int64_t
size
=
numel
()
*
SizeOfType
(
type
);
int64_t
size
=
numel
()
*
SizeOfType
(
type
);
if
(
holder_
==
nullptr
||
holder_
->
size
()
<
size
+
offset_
)
{
if
(
holder_
==
nullptr
||
holder_
->
size
()
<
size
+
offset_
)
{
holder_
.
reset
(
new
PlaceholderImpl
(
size
,
type
));
holder_
.
reset
(
new
PlaceholderImpl
(
size
,
type
));
...
...
src/operators/kernel/arm/sigmoid_kernel.cpp
浏览文件 @
89bb5717
...
@@ -16,7 +16,7 @@ limitations under the License. */
...
@@ -16,7 +16,7 @@ limitations under the License. */
#include "../sigmoid_kernel.h"
#include "../sigmoid_kernel.h"
#include "../central-arm-func/sigmoid_arm_func.h"
#include "../central-arm-func/sigmoid_arm_func.h"
#if __ARM_NEON
#if
def
__ARM_NEON
#include "../../math/math_func_neon.h"
#include "../../math/math_func_neon.h"
#endif
#endif
#include <cmath>
#include <cmath>
...
...
src/operators/kernel/central-arm-func/sigmoid_arm_func.h
浏览文件 @
89bb5717
...
@@ -14,8 +14,10 @@ limitations under the License. */
...
@@ -14,8 +14,10 @@ limitations under the License. */
#ifdef SIGMOID_OP
#ifdef SIGMOID_OP
#pragma once
#pragma once
#include <cmath>
#include "operators/op_param.h"
#include "operators/op_param.h"
#if __ARM_NEON
#if
def
__ARM_NEON
#include <arm_neon.h>
#include <arm_neon.h>
#include "operators/math/math_func_neon.h"
#include "operators/math/math_func_neon.h"
#endif
#endif
...
@@ -24,7 +26,7 @@ namespace paddle_mobile {
...
@@ -24,7 +26,7 @@ namespace paddle_mobile {
namespace
operators
{
namespace
operators
{
using
framework
::
DDim
;
using
framework
::
DDim
;
void
sigmoid
(
const
Tensor
*
X
,
Tensor
*
Y
)
{
void
sigmoid
(
const
Tensor
*
X
,
Tensor
*
Y
)
{
#if __ARM_NEON
#if
def
__ARM_NEON
const
float
*
input
=
X
->
data
<
float
>
();
const
float
*
input
=
X
->
data
<
float
>
();
float
*
output
=
Y
->
mutable_data
<
float
>
();
float
*
output
=
Y
->
mutable_data
<
float
>
();
const
DDim
&
dDim
=
X
->
dims
();
const
DDim
&
dDim
=
X
->
dims
();
...
...
src/operators/kernel/conv_add_kernel.h
浏览文件 @
89bb5717
...
@@ -17,7 +17,7 @@ limitations under the License. */
...
@@ -17,7 +17,7 @@ limitations under the License. */
#pragma once
#pragma once
#include <vector>
#include <vector>
#if __ARM_NEON
#if
def
__ARM_NEON
#include <arm_neon.h>
#include <arm_neon.h>
#endif
#endif
#include "common/common.h"
#include "common/common.h"
...
...
src/operators/math/conv_func.h
浏览文件 @
89bb5717
...
@@ -14,7 +14,7 @@ limitations under the License. */
...
@@ -14,7 +14,7 @@ limitations under the License. */
#pragma once
#pragma once
#if __ARM_NEON
#if
def
__ARM_NEON
#include <arm_neon.h>
#include <arm_neon.h>
#endif
#endif
...
@@ -49,7 +49,7 @@ inline void expand_bias(Tensor &bias, int axis, const DDim &dDim) {
...
@@ -49,7 +49,7 @@ inline void expand_bias(Tensor &bias, int axis, const DDim &dDim) {
auto
new_ptr
=
bias
.
mutable_data
<
float
>
();
auto
new_ptr
=
bias
.
mutable_data
<
float
>
();
int
axis_size
=
dDim
[
axis
];
int
axis_size
=
dDim
[
axis
];
#if __ARM_NEON
#if
def
__ARM_NEON
for
(
int
i
=
0
;
i
<
outer_size
;
++
i
)
{
for
(
int
i
=
0
;
i
<
outer_size
;
++
i
)
{
int
inner_num
=
inner_size
>>
4
;
int
inner_num
=
inner_size
>>
4
;
int
remain
=
inner_size
-
(
inner_num
<<
4
);
int
remain
=
inner_size
-
(
inner_num
<<
4
);
...
...
src/operators/math/depthwise_conv_3x3.cpp
浏览文件 @
89bb5717
...
@@ -12,7 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,7 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "operators/math/depthwise_conv_3x3.h"
#include "operators/math/depthwise_conv_3x3.h"
#ifdef __ARM_NEON
#include <arm_neon.h>
#include <arm_neon.h>
#endif
#include <vector>
#include <vector>
namespace
paddle_mobile
{
namespace
paddle_mobile
{
...
@@ -21,7 +23,7 @@ namespace math {
...
@@ -21,7 +23,7 @@ namespace math {
void
DepthwiseConv3x3
(
const
Tensor
*
input
,
vector
<
int
>
strides
,
void
DepthwiseConv3x3
(
const
Tensor
*
input
,
vector
<
int
>
strides
,
vector
<
int
>
paddings
,
const
Tensor
*
filter
,
Tensor
*
bias
,
vector
<
int
>
paddings
,
const
Tensor
*
filter
,
Tensor
*
bias
,
Tensor
*
output
,
bool
if_bias
)
{
Tensor
*
output
,
bool
if_bias
)
{
#if __ARM_NEON
#if
def
__ARM_NEON
const
int
batch_size
=
input
->
dims
()[
0
];
const
int
batch_size
=
input
->
dims
()[
0
];
const
int
input_height
=
input
->
dims
()[
2
];
const
int
input_height
=
input
->
dims
()[
2
];
...
@@ -242,6 +244,7 @@ void DepthwiseConv3x3(const Tensor *input, vector<int> strides,
...
@@ -242,6 +244,7 @@ void DepthwiseConv3x3(const Tensor *input, vector<int> strides,
void
DepthwiseConv3x3s1p1
(
const
Tensor
*
input
,
const
Tensor
*
filter
,
void
DepthwiseConv3x3s1p1
(
const
Tensor
*
input
,
const
Tensor
*
filter
,
Tensor
*
output
,
Tensor
*
bias
,
bool
if_bias
)
{
Tensor
*
output
,
Tensor
*
bias
,
bool
if_bias
)
{
#ifdef __ARM_NEON
const
float
*
input_data
=
input
->
data
<
float
>
();
const
float
*
input_data
=
input
->
data
<
float
>
();
const
float
*
filter_data
=
filter
->
data
<
float
>
();
const
float
*
filter_data
=
filter
->
data
<
float
>
();
float
*
output_data
=
output
->
data
<
float
>
();
float
*
output_data
=
output
->
data
<
float
>
();
...
@@ -511,11 +514,13 @@ void DepthwiseConv3x3s1p1(const Tensor *input, const Tensor *filter,
...
@@ -511,11 +514,13 @@ void DepthwiseConv3x3s1p1(const Tensor *input, const Tensor *filter,
filter_data_tmp
+=
9
;
filter_data_tmp
+=
9
;
}
}
}
}
#endif
}
}
void
DepthwiseConvAddBNRelu3x3s1p1
(
const
Tensor
*
input
,
const
Tensor
*
filter
,
void
DepthwiseConvAddBNRelu3x3s1p1
(
const
Tensor
*
input
,
const
Tensor
*
filter
,
Tensor
*
output
,
const
Tensor
*
new_scale
,
Tensor
*
output
,
const
Tensor
*
new_scale
,
const
Tensor
*
new_bias
,
bool
if_relu
)
{
const
Tensor
*
new_bias
,
bool
if_relu
)
{
#ifdef __ARM_NEON
const
float
*
input_data
=
input
->
data
<
float
>
();
const
float
*
input_data
=
input
->
data
<
float
>
();
const
float
*
filter_data
=
filter
->
data
<
float
>
();
const
float
*
filter_data
=
filter
->
data
<
float
>
();
float
*
output_data
=
output
->
data
<
float
>
();
float
*
output_data
=
output
->
data
<
float
>
();
...
@@ -813,11 +818,14 @@ void DepthwiseConvAddBNRelu3x3s1p1(const Tensor *input, const Tensor *filter,
...
@@ -813,11 +818,14 @@ void DepthwiseConvAddBNRelu3x3s1p1(const Tensor *input, const Tensor *filter,
filter_data_tmp
+=
9
;
filter_data_tmp
+=
9
;
}
}
}
}
#endif
}
}
void
DepthwiseConvAddBNRelu3x3s2p1
(
const
Tensor
*
input
,
const
Tensor
*
filter
,
void
DepthwiseConvAddBNRelu3x3s2p1
(
const
Tensor
*
input
,
const
Tensor
*
filter
,
Tensor
*
output
,
const
Tensor
*
new_scale
,
Tensor
*
output
,
const
Tensor
*
new_scale
,
const
Tensor
*
new_bias
,
bool
if_relu
)
{
const
Tensor
*
new_bias
,
bool
if_relu
)
{
#ifdef __ARM_NEON
const
int
batch_size
=
input
->
dims
()[
0
];
const
int
batch_size
=
input
->
dims
()[
0
];
const
int
input_height
=
input
->
dims
()[
2
];
const
int
input_height
=
input
->
dims
()[
2
];
...
@@ -1009,10 +1017,12 @@ void DepthwiseConvAddBNRelu3x3s2p1(const Tensor *input, const Tensor *filter,
...
@@ -1009,10 +1017,12 @@ void DepthwiseConvAddBNRelu3x3s2p1(const Tensor *input, const Tensor *filter,
input_data
+=
input_batch_stride
;
input_data
+=
input_batch_stride
;
output_data
+=
output_batch_stride
;
output_data
+=
output_batch_stride
;
}
}
#endif
}
}
void
DepthwiseConv3x3s2p1v2
(
const
Tensor
*
input
,
const
Tensor
*
filter
,
void
DepthwiseConv3x3s2p1v2
(
const
Tensor
*
input
,
const
Tensor
*
filter
,
Tensor
*
output
,
Tensor
bias
,
bool
if_bias
)
{
Tensor
*
output
,
Tensor
bias
,
bool
if_bias
)
{
#ifdef __ARM_NEON
const
float
*
input_data
=
input
->
data
<
float
>
();
const
float
*
input_data
=
input
->
data
<
float
>
();
const
float
*
filter_data
=
filter
->
data
<
float
>
();
const
float
*
filter_data
=
filter
->
data
<
float
>
();
float
*
output_data
=
output
->
data
<
float
>
();
float
*
output_data
=
output
->
data
<
float
>
();
...
@@ -1209,11 +1219,13 @@ void DepthwiseConv3x3s2p1v2(const Tensor *input, const Tensor *filter,
...
@@ -1209,11 +1219,13 @@ void DepthwiseConv3x3s2p1v2(const Tensor *input, const Tensor *filter,
input_data
+=
inhxw
*
c
;
input_data
+=
inhxw
*
c
;
output_data
+=
outhxw
*
c
;
output_data
+=
outhxw
*
c
;
}
}
#endif
}
}
void
DepthwiseConvAddBNRelu3x3s2p1v2
(
const
Tensor
*
input
,
const
Tensor
*
filter
,
void
DepthwiseConvAddBNRelu3x3s2p1v2
(
const
Tensor
*
input
,
const
Tensor
*
filter
,
Tensor
*
output
,
const
Tensor
*
new_scale
,
Tensor
*
output
,
const
Tensor
*
new_scale
,
const
Tensor
*
new_bias
,
bool
if_relu
)
{
const
Tensor
*
new_bias
,
bool
if_relu
)
{
#ifdef __ARM_NEON
const
float
*
input_data
=
input
->
data
<
float
>
();
const
float
*
input_data
=
input
->
data
<
float
>
();
const
float
*
filter_data
=
filter
->
data
<
float
>
();
const
float
*
filter_data
=
filter
->
data
<
float
>
();
float
*
output_data
=
output
->
data
<
float
>
();
float
*
output_data
=
output
->
data
<
float
>
();
...
@@ -1444,6 +1456,7 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter,
...
@@ -1444,6 +1456,7 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter,
input_data
+=
inhxw
*
c
;
input_data
+=
inhxw
*
c
;
output_data
+=
outhxw
*
c
;
output_data
+=
outhxw
*
c
;
}
}
#endif
}
}
}
// namespace math
}
// namespace math
...
...
src/operators/math/pool_2x2.cpp
浏览文件 @
89bb5717
...
@@ -21,7 +21,7 @@ namespace math {
...
@@ -21,7 +21,7 @@ namespace math {
void
Pool2x2Max
(
vector
<
int
>
strides
,
vector
<
int
>
paddings
,
const
Tensor
*
input
,
void
Pool2x2Max
(
vector
<
int
>
strides
,
vector
<
int
>
paddings
,
const
Tensor
*
input
,
Tensor
*
output
)
{
Tensor
*
output
)
{
#if __ARM_NEON
#if
def
__ARM_NEON
#ifdef ARMV7
#ifdef ARMV7
...
@@ -99,7 +99,7 @@ void Pool2x2Max(vector<int> strides, vector<int> paddings, const Tensor *input,
...
@@ -99,7 +99,7 @@ void Pool2x2Max(vector<int> strides, vector<int> paddings, const Tensor *input,
void
Pool2x2Avg
(
vector
<
int
>
strides
,
vector
<
int
>
paddings
,
const
Tensor
*
input
,
void
Pool2x2Avg
(
vector
<
int
>
strides
,
vector
<
int
>
paddings
,
const
Tensor
*
input
,
Tensor
*
output
)
{
Tensor
*
output
)
{
#if __ARM_NEON
#if
def
__ARM_NEON
#ifdef ARMV7
#ifdef ARMV7
const
int
batch_size
=
input
->
dims
()[
0
];
const
int
batch_size
=
input
->
dims
()[
0
];
...
...
src/operators/math/pool_2x2.h
浏览文件 @
89bb5717
...
@@ -17,7 +17,7 @@ limitations under the License. */
...
@@ -17,7 +17,7 @@ limitations under the License. */
#pragma once
#pragma once
#include "framework/tensor.h"
#include "framework/tensor.h"
#if __ARM_NEON
#if
def
__ARM_NEON
#include <arm_neon.h>
#include <arm_neon.h>
#endif // __ARM_NEON
#endif // __ARM_NEON
namespace
paddle_mobile
{
namespace
paddle_mobile
{
...
...
src/operators/math/pool_3x3.cpp
浏览文件 @
89bb5717
...
@@ -13,13 +13,12 @@ See the License for the specific language governing permissions and
...
@@ -13,13 +13,12 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#ifdef POOL_OP
#ifdef POOL_OP
#define __ARM_NEON true
#ifdef _OPENMP
#ifdef _OPENMP
#include <omp.h>
#include <omp.h>
#endif
#endif
#include "framework/tensor.h"
#include "framework/tensor.h"
#include "pool_3x3.h"
#include "pool_3x3.h"
#if __ARM_NEON
#if
def
__ARM_NEON
#include <arm_neon.h>
#include <arm_neon.h>
#endif // __ARM_NEON
#endif // __ARM_NEON
#include <climits>
#include <climits>
...
@@ -31,7 +30,7 @@ using std::max;
...
@@ -31,7 +30,7 @@ using std::max;
using
std
::
min
;
using
std
::
min
;
using
std
::
vector
;
using
std
::
vector
;
void
Pool3x3Avgs1p1
(
const
Tensor
*
input
,
Tensor
*
output
)
{
void
Pool3x3Avgs1p1
(
const
Tensor
*
input
,
Tensor
*
output
)
{
#if __ARM_NEON
#if
def
__ARM_NEON
const
int
batch_size
=
input
->
dims
()[
0
];
const
int
batch_size
=
input
->
dims
()[
0
];
const
int
h_in
=
input
->
dims
()[
2
];
const
int
h_in
=
input
->
dims
()[
2
];
...
@@ -281,7 +280,7 @@ void Pool3x3Avgs1p1(const Tensor *input, Tensor *output) {
...
@@ -281,7 +280,7 @@ void Pool3x3Avgs1p1(const Tensor *input, Tensor *output) {
}
}
void
Pool3x3Maxs1p1
(
const
Tensor
*
input
,
Tensor
*
output
)
{
void
Pool3x3Maxs1p1
(
const
Tensor
*
input
,
Tensor
*
output
)
{
#if __ARM_NEON
#if
def
__ARM_NEON
const
int
batch_size
=
input
->
dims
()[
0
];
const
int
batch_size
=
input
->
dims
()[
0
];
const
int
h_in
=
input
->
dims
()[
2
];
const
int
h_in
=
input
->
dims
()[
2
];
...
@@ -524,7 +523,7 @@ void Pool3x3Maxs1p1(const Tensor *input, Tensor *output) {
...
@@ -524,7 +523,7 @@ void Pool3x3Maxs1p1(const Tensor *input, Tensor *output) {
void
Pool3x3Max
(
vector
<
int
>
strides
,
vector
<
int
>
paddings
,
const
Tensor
*
input
,
void
Pool3x3Max
(
vector
<
int
>
strides
,
vector
<
int
>
paddings
,
const
Tensor
*
input
,
Tensor
*
output
)
{
Tensor
*
output
)
{
#if __ARM_NEON
#if
def
__ARM_NEON
const
int
batch_size
=
input
->
dims
()[
0
];
const
int
batch_size
=
input
->
dims
()[
0
];
const
int
input_height
=
input
->
dims
()[
2
];
const
int
input_height
=
input
->
dims
()[
2
];
...
@@ -583,7 +582,7 @@ void Pool3x3Max(vector<int> strides, vector<int> paddings, const Tensor *input,
...
@@ -583,7 +582,7 @@ void Pool3x3Max(vector<int> strides, vector<int> paddings, const Tensor *input,
}
}
output_seg
[
ph
*
output_width
+
pw
]
=
max_value
;
output_seg
[
ph
*
output_width
+
pw
]
=
max_value
;
}
else
{
}
else
{
#if
defined(ARMV7)
#if
def ARMV7
asm
volatile
(
asm
volatile
(
"vld1.32 {q1}, [%[pos1]]
\n\t
"
"vld1.32 {q1}, [%[pos1]]
\n\t
"
"vld1.32 {q2}, [%[pos2]]
\n\t
"
"vld1.32 {q2}, [%[pos2]]
\n\t
"
...
@@ -623,7 +622,7 @@ void Pool3x3Max(vector<int> strides, vector<int> paddings, const Tensor *input,
...
@@ -623,7 +622,7 @@ void Pool3x3Max(vector<int> strides, vector<int> paddings, const Tensor *input,
void
Pool3x3Avg
(
vector
<
int
>
strides
,
vector
<
int
>
paddings
,
const
Tensor
*
input
,
void
Pool3x3Avg
(
vector
<
int
>
strides
,
vector
<
int
>
paddings
,
const
Tensor
*
input
,
Tensor
*
output
)
{
Tensor
*
output
)
{
#if __ARM_NEON
#if
def
__ARM_NEON
const
int
batch_size
=
input
->
dims
()[
0
];
const
int
batch_size
=
input
->
dims
()[
0
];
const
int
input_height
=
input
->
dims
()[
2
];
const
int
input_height
=
input
->
dims
()[
2
];
...
@@ -677,7 +676,7 @@ void Pool3x3Avg(vector<int> strides, vector<int> paddings, const Tensor *input,
...
@@ -677,7 +676,7 @@ void Pool3x3Avg(vector<int> strides, vector<int> paddings, const Tensor *input,
}
}
output_seg
[
ph
*
output_width
+
pw
]
=
sum
/
9.0
;
output_seg
[
ph
*
output_width
+
pw
]
=
sum
/
9.0
;
}
else
{
}
else
{
#if
defined(ARMV7)
#if
def ARMV7
asm
volatile
(
asm
volatile
(
"vld1.32 {q1}, [%[pos1]]
\n\t
"
"vld1.32 {q1}, [%[pos1]]
\n\t
"
...
...
src/operators/math/pool_3x3.h
浏览文件 @
89bb5717
...
@@ -21,7 +21,7 @@ limitations under the License. */
...
@@ -21,7 +21,7 @@ limitations under the License. */
#include <algorithm>
#include <algorithm>
#include <vector>
#include <vector>
#include "framework/tensor.h"
#include "framework/tensor.h"
#if __ARM_NEON
#if
def
__ARM_NEON
#include <arm_neon.h>
#include <arm_neon.h>
#endif // __ARM_NEON
#endif // __ARM_NEON
...
...
src/operators/math/softmax.cpp
浏览文件 @
89bb5717
...
@@ -16,7 +16,7 @@ limitations under the License. */
...
@@ -16,7 +16,7 @@ limitations under the License. */
#include "operators/math/softmax.h"
#include "operators/math/softmax.h"
#include "common/types.h"
#include "common/types.h"
#if __ARM_NEON
#if
def
__ARM_NEON
#include <math.h>
#include <math.h>
#include <algorithm>
#include <algorithm>
#include "operators/math/math_func_neon.h"
#include "operators/math/math_func_neon.h"
...
@@ -29,7 +29,7 @@ using framework::DDim;
...
@@ -29,7 +29,7 @@ using framework::DDim;
using
framework
::
Tensor
;
using
framework
::
Tensor
;
template
<
typename
T
>
template
<
typename
T
>
class
SoftmaxFuntor
<
CPU
,
T
>
{
class
SoftmaxFuntor
<
CPU
,
T
>
{
#if __ARM_NEON
#if
def
__ARM_NEON
void
sum
(
float
*
input
,
float
*
sumptr
,
int
inner_size
,
int
outter_size
)
{
void
sum
(
float
*
input
,
float
*
sumptr
,
int
inner_size
,
int
outter_size
)
{
float32x4_t
acc
=
vdupq_n_f32
(
0
);
float32x4_t
acc
=
vdupq_n_f32
(
0
);
float
sum_
=
0
;
float
sum_
=
0
;
...
@@ -144,7 +144,7 @@ class SoftmaxFuntor<CPU, T> {
...
@@ -144,7 +144,7 @@ class SoftmaxFuntor<CPU, T> {
framework
::
Tensor
sub_X
=
X
->
Slice
(
i
,
i
+
1
);
framework
::
Tensor
sub_X
=
X
->
Slice
(
i
,
i
+
1
);
framework
::
Tensor
sub_Y
=
Y
->
Slice
(
i
,
i
+
1
);
framework
::
Tensor
sub_Y
=
Y
->
Slice
(
i
,
i
+
1
);
#if __ARM_NEON
#if
def
__ARM_NEON
SoftmaxCacl
(
&
sub_X
,
&
sub_Y
);
SoftmaxCacl
(
&
sub_X
,
&
sub_Y
);
#endif
#endif
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录