提交 89bb5717 编写于 作者: W WangLiu 提交者: GitHub

Merge pull request #547 from codeWorm2015/develop

fix #546 fix docker complile error 
...@@ -10,20 +10,56 @@ option(CPU "armv7 with neon" ON) ...@@ -10,20 +10,56 @@ option(CPU "armv7 with neon" ON)
option(MALI_GPU "mali gpu" OFF) option(MALI_GPU "mali gpu" OFF)
option(FPGA "fpga" OFF) option(FPGA "fpga" OFF)
if (ARM_LINUX)
include("${CMAKE_CURRENT_LIST_DIR}/tools/arm-platform.cmake")
endif ()
file(GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c src/*.mm) file(GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c src/*.mm)
file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h) file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h)
include_directories(src/)
set(CMAKE_CXX_FLAGS "-std=c++14 -O3 -s ${CMAKE_CXX_FLAGS}")
if (DEBUGING)
message(STATUS "debug")
set(CMAKE_BUILD_TYPE Debug)
set(CMAKE_CXX_FLAGS_DEBUG "-g -DNDEBUG")
add_definitions(-DPADDLE_MOBILE_DEBUG)
else ()
set(CMAKE_BUILD_TYPE Release)
set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG")
add_definitions(-fvisibility=hidden -fvisibility-inlines-hidden)
endif ()
if (USE_EXCEPTION)
message(STATUS "use exception")
add_definitions(-DENABLE_EXCEPTION)
add_definitions(-fexceptions)
else()
add_definitions(-fno-exceptions)
endif ()
if (LOG_PROFILE)
add_definitions(-DPADDLE_MOBILE_PROFILE)
endif()
if(USE_OPENMP)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
add_definitions(-DPADDLE_MOBILE_USE_OPENMP)
endif()
# platform control
if (ARM_LINUX)
include("${CMAKE_CURRENT_LIST_DIR}/tools/arm-platform.cmake")
endif ()
if (CPU) if (CPU)
add_definitions(-DPADDLE_MOBILE_CPU) add_definitions(-DPADDLE_MOBILE_CPU)
else() else()
list(REMOVE_ITEM PADDLE_MOBILE_CC ./src/operators/kernel/arm/*.h) file(GLOB_RECURSE _tmp_list src/operators/kernel/arm/*.cpp src/operators/kernel/arm/*.cc)
list(REMOVE_ITEM PADDLE_MOBILE_CC ./src/operators/kernel/arm/*.cc) foreach(f ${_tmp_list})
list(REMOVE_ITEM PADDLE_MOBILE_CC ./src/operators/kernel/arm/*.cpp) list(REMOVE_ITEM PADDLE_MOBILE_CC ${f})
endforeach()
file(GLOB_RECURSE _tmp_list_h src/operators/kernel/arm/*.h)
foreach(f ${_tmp_list_h})
list(REMOVE_ITEM PADDLE_MOBILE_H ${f})
endforeach()
endif() endif()
if (MALI_GPU) if (MALI_GPU)
...@@ -54,55 +90,33 @@ endif() ...@@ -54,55 +90,33 @@ endif()
if(FPGA) if(FPGA)
add_definitions(-DPADDLE_MOBILE_FPGA) add_definitions(-DPADDLE_MOBILE_FPGA)
else() else()
list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/operators/kernel/fpga/*.h) file(GLOB_RECURSE _tmp_list src/operators/kernel/fpga/*.cpp src/operators/kernel/fpga/*.cc)
list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/operators/kernel/fpga/*.cc) foreach(f ${_tmp_list})
list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/operators/kernel/fpga/*.cpp) list(REMOVE_ITEM PADDLE_MOBILE_CC ${f})
endforeach()
file(GLOB_RECURSE _tmp_list_h src/operators/kernel/fpga/*.h)
foreach(f ${_tmp_list_h})
list(REMOVE_ITEM PADDLE_MOBILE_H ${f})
endforeach()
endif() endif()
set(CMAKE_CXX_FLAGS "-std=c++14 -O3 -s ${CMAKE_CXX_FLAGS}") if (ANDROID_NDK_TOOLCHAIN_INCLUDED)
if (DEBUGING)
message(STATUS "debug")
set(CMAKE_BUILD_TYPE Debug)
set(CMAKE_CXX_FLAGS_DEBUG "-g -DNDEBUG")
add_definitions(-DPADDLE_MOBILE_DEBUG)
if (ANDROID_NDK_TOOLCHAIN_INCLUDED)
add_definitions(-DARMV7)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -llog") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -llog")
endif () add_definitions(-DARMV7)
else ()
set(CMAKE_BUILD_TYPE Release)
set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG")
add_definitions(-fvisibility=hidden -fvisibility-inlines-hidden)
endif ()
if (USE_EXCEPTION)
message(STATUS "use exception")
add_definitions(-DENABLE_EXCEPTION)
add_definitions(-fexceptions)
else() else()
add_definitions(-fno-exceptions)
endif ()
if (LOG_PROFILE)
add_definitions(-DPADDLE_MOBILE_PROFILE)
endif()
if(USE_OPENMP)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
add_definitions(-DPADDLE_MOBILE_USE_OPENMP)
endif()
if (NOT ANDROID_NDK_TOOLCHAIN_INCLUDED)
list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/jni/paddle_mobile_jni.h) list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/jni/paddle_mobile_jni.h)
list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/jni/paddle_mobile_jni.cpp) list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/jni/paddle_mobile_jni.cpp)
list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/operators/math/math_func_neon.h) list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/operators/math/math_func_neon.h)
endif () endif ()
include_directories(src/) if (IS_IOS)
else()
list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/PaddleMobile.h)
list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/PaddleMobile.mm)
list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/op_symbols.h)
endif ()
set(CMAKE_VERBOSE_MAKEFILE ON) set(CMAKE_VERBOSE_MAKEFILE ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
...@@ -113,25 +127,20 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY build) ...@@ -113,25 +127,20 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY build)
# NET default # NET default
set(NET "defult" CACHE STRING "select net type") set(NET "defult" CACHE STRING "select net type")
set_property(CACHE NET PROPERTY STRINGS "defult" "googlenet" "mobilenet" "yolo" "squeezenet") set_property(CACHE NET PROPERTY STRINGS "defult" "googlenet" "mobilenet" "yolo" "squeezenet")
include("${CMAKE_CURRENT_LIST_DIR}/tools/op.cmake") include("${CMAKE_CURRENT_LIST_DIR}/tools/op.cmake")
if (IS_IOS)
add_library(paddle-mobile STATIC ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H})
else()
list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/PaddleMobile.h)
list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/PaddleMobile.mm)
list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/op_symbols.h)
endif ()
# build library
if (ANDROID_NDK_TOOLCHAIN_INCLUDED) if (ANDROID_NDK_TOOLCHAIN_INCLUDED)
list(REMOVE_DUPLICATES CMAKE_CXX_FLAGS) list(REMOVE_DUPLICATES CMAKE_CXX_FLAGS)
add_library(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H}) add_library(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H})
elseif(IS_IOS) elseif(IS_IOS)
add_library(paddle-mobile STATIC ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H})
else () else ()
add_library(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H}) add_library(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H})
endif () endif ()
# unit test
if(DEBUGING) if(DEBUGING)
if(IS_IOS) if(IS_IOS)
else() else()
......
...@@ -91,8 +91,8 @@ ONNX全称为“Open Neural Network Exchange”,即“开放的神经网络切 ...@@ -91,8 +91,8 @@ ONNX全称为“Open Neural Network Exchange”,即“开放的神经网络切
![](http://7xop3k.com1.z0.glb.clouddn.com/15311951836000.jpg) ![](http://7xop3k.com1.z0.glb.clouddn.com/15311951836000.jpg)
### 4. 部分测试模型下载 ### 4. 部分测试模型和测试图片下载
[下载链接](https://mms-mis.cdn.bcebos.com/paddle-mobile/models.zip) [下载链接](http://mms-graph.bj.bcebos.com/paddle-mobile%2FmodelsAndImages.zip)
## 问题解决 ## 问题解决
......
...@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <cstdlib>
#include "common/enforce.h" #include "common/enforce.h"
#include "common/log.h" #include "common/log.h"
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once #pragma once
#include <cstdlib>
#include <string> #include <string>
#include <typeinfo> #include <typeinfo>
#include <unordered_map> #include <unordered_map>
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once #pragma once
#include <cctype> #include <cctype>
#include <cstdlib>
#include <string> #include <string>
namespace paddle_mobile { namespace paddle_mobile {
......
...@@ -14,9 +14,11 @@ limitations under the License. */ ...@@ -14,9 +14,11 @@ limitations under the License. */
#pragma once #pragma once
#include <cstdlib>
#include <initializer_list> #include <initializer_list>
#include <typeinfo> #include <typeinfo>
#include <vector> #include <vector>
#include "common/enforce.h" #include "common/enforce.h"
#include "common/variant.h" #include "common/variant.h"
#include "dim.h" #include "dim.h"
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once #pragma once
#include <cstdlib>
#include "common/enforce.h" #include "common/enforce.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
......
...@@ -152,7 +152,7 @@ class Tensor { ...@@ -152,7 +152,7 @@ class Tensor {
if (holder_ != nullptr) { if (holder_ != nullptr) {
holder_->set_type(type); holder_->set_type(type);
} }
PADDLE_MOBILE_ENFORCE(numel() >= 0, "the Tensor'snumel must >=0.") PADDLE_MOBILE_ENFORCE(numel() >= 0, "the Tensor's numel must >=0.")
int64_t size = numel() * SizeOfType(type); int64_t size = numel() * SizeOfType(type);
if (holder_ == nullptr || holder_->size() < size + offset_) { if (holder_ == nullptr || holder_->size() < size + offset_) {
holder_.reset(new PlaceholderImpl(size, type)); holder_.reset(new PlaceholderImpl(size, type));
......
...@@ -16,7 +16,7 @@ limitations under the License. */ ...@@ -16,7 +16,7 @@ limitations under the License. */
#include "../sigmoid_kernel.h" #include "../sigmoid_kernel.h"
#include "../central-arm-func/sigmoid_arm_func.h" #include "../central-arm-func/sigmoid_arm_func.h"
#if __ARM_NEON #ifdef __ARM_NEON
#include "../../math/math_func_neon.h" #include "../../math/math_func_neon.h"
#endif #endif
#include <cmath> #include <cmath>
......
...@@ -14,8 +14,10 @@ limitations under the License. */ ...@@ -14,8 +14,10 @@ limitations under the License. */
#ifdef SIGMOID_OP #ifdef SIGMOID_OP
#pragma once #pragma once
#include <cmath>
#include "operators/op_param.h" #include "operators/op_param.h"
#if __ARM_NEON #ifdef __ARM_NEON
#include <arm_neon.h> #include <arm_neon.h>
#include "operators/math/math_func_neon.h" #include "operators/math/math_func_neon.h"
#endif #endif
...@@ -24,7 +26,7 @@ namespace paddle_mobile { ...@@ -24,7 +26,7 @@ namespace paddle_mobile {
namespace operators { namespace operators {
using framework::DDim; using framework::DDim;
void sigmoid(const Tensor *X, Tensor *Y) { void sigmoid(const Tensor *X, Tensor *Y) {
#if __ARM_NEON #ifdef __ARM_NEON
const float *input = X->data<float>(); const float *input = X->data<float>();
float *output = Y->mutable_data<float>(); float *output = Y->mutable_data<float>();
const DDim &dDim = X->dims(); const DDim &dDim = X->dims();
......
...@@ -17,7 +17,7 @@ limitations under the License. */ ...@@ -17,7 +17,7 @@ limitations under the License. */
#pragma once #pragma once
#include <vector> #include <vector>
#if __ARM_NEON #ifdef __ARM_NEON
#include <arm_neon.h> #include <arm_neon.h>
#endif #endif
#include "common/common.h" #include "common/common.h"
......
...@@ -14,7 +14,7 @@ limitations under the License. */ ...@@ -14,7 +14,7 @@ limitations under the License. */
#pragma once #pragma once
#if __ARM_NEON #ifdef __ARM_NEON
#include <arm_neon.h> #include <arm_neon.h>
#endif #endif
...@@ -49,7 +49,7 @@ inline void expand_bias(Tensor &bias, int axis, const DDim &dDim) { ...@@ -49,7 +49,7 @@ inline void expand_bias(Tensor &bias, int axis, const DDim &dDim) {
auto new_ptr = bias.mutable_data<float>(); auto new_ptr = bias.mutable_data<float>();
int axis_size = dDim[axis]; int axis_size = dDim[axis];
#if __ARM_NEON #ifdef __ARM_NEON
for (int i = 0; i < outer_size; ++i) { for (int i = 0; i < outer_size; ++i) {
int inner_num = inner_size >> 4; int inner_num = inner_size >> 4;
int remain = inner_size - (inner_num << 4); int remain = inner_size - (inner_num << 4);
......
...@@ -12,7 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "operators/math/depthwise_conv_3x3.h" #include "operators/math/depthwise_conv_3x3.h"
#ifdef __ARM_NEON
#include <arm_neon.h> #include <arm_neon.h>
#endif
#include <vector> #include <vector>
namespace paddle_mobile { namespace paddle_mobile {
...@@ -21,7 +23,7 @@ namespace math { ...@@ -21,7 +23,7 @@ namespace math {
void DepthwiseConv3x3(const Tensor *input, vector<int> strides, void DepthwiseConv3x3(const Tensor *input, vector<int> strides,
vector<int> paddings, const Tensor *filter, Tensor *bias, vector<int> paddings, const Tensor *filter, Tensor *bias,
Tensor *output, bool if_bias) { Tensor *output, bool if_bias) {
#if __ARM_NEON #ifdef __ARM_NEON
const int batch_size = input->dims()[0]; const int batch_size = input->dims()[0];
const int input_height = input->dims()[2]; const int input_height = input->dims()[2];
...@@ -242,6 +244,7 @@ void DepthwiseConv3x3(const Tensor *input, vector<int> strides, ...@@ -242,6 +244,7 @@ void DepthwiseConv3x3(const Tensor *input, vector<int> strides,
void DepthwiseConv3x3s1p1(const Tensor *input, const Tensor *filter, void DepthwiseConv3x3s1p1(const Tensor *input, const Tensor *filter,
Tensor *output, Tensor *bias, bool if_bias) { Tensor *output, Tensor *bias, bool if_bias) {
#ifdef __ARM_NEON
const float *input_data = input->data<float>(); const float *input_data = input->data<float>();
const float *filter_data = filter->data<float>(); const float *filter_data = filter->data<float>();
float *output_data = output->data<float>(); float *output_data = output->data<float>();
...@@ -511,11 +514,13 @@ void DepthwiseConv3x3s1p1(const Tensor *input, const Tensor *filter, ...@@ -511,11 +514,13 @@ void DepthwiseConv3x3s1p1(const Tensor *input, const Tensor *filter,
filter_data_tmp += 9; filter_data_tmp += 9;
} }
} }
#endif
} }
void DepthwiseConvAddBNRelu3x3s1p1(const Tensor *input, const Tensor *filter, void DepthwiseConvAddBNRelu3x3s1p1(const Tensor *input, const Tensor *filter,
Tensor *output, const Tensor *new_scale, Tensor *output, const Tensor *new_scale,
const Tensor *new_bias, bool if_relu) { const Tensor *new_bias, bool if_relu) {
#ifdef __ARM_NEON
const float *input_data = input->data<float>(); const float *input_data = input->data<float>();
const float *filter_data = filter->data<float>(); const float *filter_data = filter->data<float>();
float *output_data = output->data<float>(); float *output_data = output->data<float>();
...@@ -813,11 +818,14 @@ void DepthwiseConvAddBNRelu3x3s1p1(const Tensor *input, const Tensor *filter, ...@@ -813,11 +818,14 @@ void DepthwiseConvAddBNRelu3x3s1p1(const Tensor *input, const Tensor *filter,
filter_data_tmp += 9; filter_data_tmp += 9;
} }
} }
#endif
} }
void DepthwiseConvAddBNRelu3x3s2p1(const Tensor *input, const Tensor *filter, void DepthwiseConvAddBNRelu3x3s2p1(const Tensor *input, const Tensor *filter,
Tensor *output, const Tensor *new_scale, Tensor *output, const Tensor *new_scale,
const Tensor *new_bias, bool if_relu) { const Tensor *new_bias, bool if_relu) {
#ifdef __ARM_NEON
const int batch_size = input->dims()[0]; const int batch_size = input->dims()[0];
const int input_height = input->dims()[2]; const int input_height = input->dims()[2];
...@@ -1009,10 +1017,12 @@ void DepthwiseConvAddBNRelu3x3s2p1(const Tensor *input, const Tensor *filter, ...@@ -1009,10 +1017,12 @@ void DepthwiseConvAddBNRelu3x3s2p1(const Tensor *input, const Tensor *filter,
input_data += input_batch_stride; input_data += input_batch_stride;
output_data += output_batch_stride; output_data += output_batch_stride;
} }
#endif
} }
void DepthwiseConv3x3s2p1v2(const Tensor *input, const Tensor *filter, void DepthwiseConv3x3s2p1v2(const Tensor *input, const Tensor *filter,
Tensor *output, Tensor bias, bool if_bias) { Tensor *output, Tensor bias, bool if_bias) {
#ifdef __ARM_NEON
const float *input_data = input->data<float>(); const float *input_data = input->data<float>();
const float *filter_data = filter->data<float>(); const float *filter_data = filter->data<float>();
float *output_data = output->data<float>(); float *output_data = output->data<float>();
...@@ -1209,11 +1219,13 @@ void DepthwiseConv3x3s2p1v2(const Tensor *input, const Tensor *filter, ...@@ -1209,11 +1219,13 @@ void DepthwiseConv3x3s2p1v2(const Tensor *input, const Tensor *filter,
input_data += inhxw * c; input_data += inhxw * c;
output_data += outhxw * c; output_data += outhxw * c;
} }
#endif
} }
void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter, void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter,
Tensor *output, const Tensor *new_scale, Tensor *output, const Tensor *new_scale,
const Tensor *new_bias, bool if_relu) { const Tensor *new_bias, bool if_relu) {
#ifdef __ARM_NEON
const float *input_data = input->data<float>(); const float *input_data = input->data<float>();
const float *filter_data = filter->data<float>(); const float *filter_data = filter->data<float>();
float *output_data = output->data<float>(); float *output_data = output->data<float>();
...@@ -1444,6 +1456,7 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter, ...@@ -1444,6 +1456,7 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter,
input_data += inhxw * c; input_data += inhxw * c;
output_data += outhxw * c; output_data += outhxw * c;
} }
#endif
} }
} // namespace math } // namespace math
......
...@@ -21,7 +21,7 @@ namespace math { ...@@ -21,7 +21,7 @@ namespace math {
void Pool2x2Max(vector<int> strides, vector<int> paddings, const Tensor *input, void Pool2x2Max(vector<int> strides, vector<int> paddings, const Tensor *input,
Tensor *output) { Tensor *output) {
#if __ARM_NEON #ifdef __ARM_NEON
#ifdef ARMV7 #ifdef ARMV7
...@@ -99,7 +99,7 @@ void Pool2x2Max(vector<int> strides, vector<int> paddings, const Tensor *input, ...@@ -99,7 +99,7 @@ void Pool2x2Max(vector<int> strides, vector<int> paddings, const Tensor *input,
void Pool2x2Avg(vector<int> strides, vector<int> paddings, const Tensor *input, void Pool2x2Avg(vector<int> strides, vector<int> paddings, const Tensor *input,
Tensor *output) { Tensor *output) {
#if __ARM_NEON #ifdef __ARM_NEON
#ifdef ARMV7 #ifdef ARMV7
const int batch_size = input->dims()[0]; const int batch_size = input->dims()[0];
......
...@@ -17,7 +17,7 @@ limitations under the License. */ ...@@ -17,7 +17,7 @@ limitations under the License. */
#pragma once #pragma once
#include "framework/tensor.h" #include "framework/tensor.h"
#if __ARM_NEON #ifdef __ARM_NEON
#include <arm_neon.h> #include <arm_neon.h>
#endif // __ARM_NEON #endif // __ARM_NEON
namespace paddle_mobile { namespace paddle_mobile {
......
...@@ -13,13 +13,12 @@ See the License for the specific language governing permissions and ...@@ -13,13 +13,12 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef POOL_OP #ifdef POOL_OP
#define __ARM_NEON true
#ifdef _OPENMP #ifdef _OPENMP
#include <omp.h> #include <omp.h>
#endif #endif
#include "framework/tensor.h" #include "framework/tensor.h"
#include "pool_3x3.h" #include "pool_3x3.h"
#if __ARM_NEON #ifdef __ARM_NEON
#include <arm_neon.h> #include <arm_neon.h>
#endif // __ARM_NEON #endif // __ARM_NEON
#include <climits> #include <climits>
...@@ -31,7 +30,7 @@ using std::max; ...@@ -31,7 +30,7 @@ using std::max;
using std::min; using std::min;
using std::vector; using std::vector;
void Pool3x3Avgs1p1(const Tensor *input, Tensor *output) { void Pool3x3Avgs1p1(const Tensor *input, Tensor *output) {
#if __ARM_NEON #ifdef __ARM_NEON
const int batch_size = input->dims()[0]; const int batch_size = input->dims()[0];
const int h_in = input->dims()[2]; const int h_in = input->dims()[2];
...@@ -281,7 +280,7 @@ void Pool3x3Avgs1p1(const Tensor *input, Tensor *output) { ...@@ -281,7 +280,7 @@ void Pool3x3Avgs1p1(const Tensor *input, Tensor *output) {
} }
void Pool3x3Maxs1p1(const Tensor *input, Tensor *output) { void Pool3x3Maxs1p1(const Tensor *input, Tensor *output) {
#if __ARM_NEON #ifdef __ARM_NEON
const int batch_size = input->dims()[0]; const int batch_size = input->dims()[0];
const int h_in = input->dims()[2]; const int h_in = input->dims()[2];
...@@ -524,7 +523,7 @@ void Pool3x3Maxs1p1(const Tensor *input, Tensor *output) { ...@@ -524,7 +523,7 @@ void Pool3x3Maxs1p1(const Tensor *input, Tensor *output) {
void Pool3x3Max(vector<int> strides, vector<int> paddings, const Tensor *input, void Pool3x3Max(vector<int> strides, vector<int> paddings, const Tensor *input,
Tensor *output) { Tensor *output) {
#if __ARM_NEON #ifdef __ARM_NEON
const int batch_size = input->dims()[0]; const int batch_size = input->dims()[0];
const int input_height = input->dims()[2]; const int input_height = input->dims()[2];
...@@ -583,7 +582,7 @@ void Pool3x3Max(vector<int> strides, vector<int> paddings, const Tensor *input, ...@@ -583,7 +582,7 @@ void Pool3x3Max(vector<int> strides, vector<int> paddings, const Tensor *input,
} }
output_seg[ph * output_width + pw] = max_value; output_seg[ph * output_width + pw] = max_value;
} else { } else {
#if defined(ARMV7) #ifdef ARMV7
asm volatile( asm volatile(
"vld1.32 {q1}, [%[pos1]] \n\t" "vld1.32 {q1}, [%[pos1]] \n\t"
"vld1.32 {q2}, [%[pos2]] \n\t" "vld1.32 {q2}, [%[pos2]] \n\t"
...@@ -623,7 +622,7 @@ void Pool3x3Max(vector<int> strides, vector<int> paddings, const Tensor *input, ...@@ -623,7 +622,7 @@ void Pool3x3Max(vector<int> strides, vector<int> paddings, const Tensor *input,
void Pool3x3Avg(vector<int> strides, vector<int> paddings, const Tensor *input, void Pool3x3Avg(vector<int> strides, vector<int> paddings, const Tensor *input,
Tensor *output) { Tensor *output) {
#if __ARM_NEON #ifdef __ARM_NEON
const int batch_size = input->dims()[0]; const int batch_size = input->dims()[0];
const int input_height = input->dims()[2]; const int input_height = input->dims()[2];
...@@ -677,7 +676,7 @@ void Pool3x3Avg(vector<int> strides, vector<int> paddings, const Tensor *input, ...@@ -677,7 +676,7 @@ void Pool3x3Avg(vector<int> strides, vector<int> paddings, const Tensor *input,
} }
output_seg[ph * output_width + pw] = sum / 9.0; output_seg[ph * output_width + pw] = sum / 9.0;
} else { } else {
#if defined(ARMV7) #ifdef ARMV7
asm volatile( asm volatile(
"vld1.32 {q1}, [%[pos1]] \n\t" "vld1.32 {q1}, [%[pos1]] \n\t"
......
...@@ -21,7 +21,7 @@ limitations under the License. */ ...@@ -21,7 +21,7 @@ limitations under the License. */
#include <algorithm> #include <algorithm>
#include <vector> #include <vector>
#include "framework/tensor.h" #include "framework/tensor.h"
#if __ARM_NEON #ifdef __ARM_NEON
#include <arm_neon.h> #include <arm_neon.h>
#endif // __ARM_NEON #endif // __ARM_NEON
......
...@@ -16,7 +16,7 @@ limitations under the License. */ ...@@ -16,7 +16,7 @@ limitations under the License. */
#include "operators/math/softmax.h" #include "operators/math/softmax.h"
#include "common/types.h" #include "common/types.h"
#if __ARM_NEON #ifdef __ARM_NEON
#include <math.h> #include <math.h>
#include <algorithm> #include <algorithm>
#include "operators/math/math_func_neon.h" #include "operators/math/math_func_neon.h"
...@@ -29,7 +29,7 @@ using framework::DDim; ...@@ -29,7 +29,7 @@ using framework::DDim;
using framework::Tensor; using framework::Tensor;
template <typename T> template <typename T>
class SoftmaxFuntor<CPU, T> { class SoftmaxFuntor<CPU, T> {
#if __ARM_NEON #ifdef __ARM_NEON
void sum(float *input, float *sumptr, int inner_size, int outter_size) { void sum(float *input, float *sumptr, int inner_size, int outter_size) {
float32x4_t acc = vdupq_n_f32(0); float32x4_t acc = vdupq_n_f32(0);
float sum_ = 0; float sum_ = 0;
...@@ -144,7 +144,7 @@ class SoftmaxFuntor<CPU, T> { ...@@ -144,7 +144,7 @@ class SoftmaxFuntor<CPU, T> {
framework::Tensor sub_X = X->Slice(i, i + 1); framework::Tensor sub_X = X->Slice(i, i + 1);
framework::Tensor sub_Y = Y->Slice(i, i + 1); framework::Tensor sub_Y = Y->Slice(i, i + 1);
#if __ARM_NEON #ifdef __ARM_NEON
SoftmaxCacl(&sub_X, &sub_Y); SoftmaxCacl(&sub_X, &sub_Y);
#endif #endif
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册