diff --git a/CMakeLists.txt b/CMakeLists.txt index 9fd20c438bb7c34c01228698a7c831f90b9d0374..f5b9ec25aa1451f466d26b2edc79296a5579b2f7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,20 +10,56 @@ option(CPU "armv7 with neon" ON) option(MALI_GPU "mali gpu" OFF) option(FPGA "fpga" OFF) -if (ARM_LINUX) -include("${CMAKE_CURRENT_LIST_DIR}/tools/arm-platform.cmake") -endif () - file(GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c src/*.mm) file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h) +include_directories(src/) + +set(CMAKE_CXX_FLAGS "-std=c++14 -O3 -s ${CMAKE_CXX_FLAGS}") +if (DEBUGING) + message(STATUS "debug") + set(CMAKE_BUILD_TYPE Debug) + set(CMAKE_CXX_FLAGS_DEBUG "-g -DNDEBUG") + add_definitions(-DPADDLE_MOBILE_DEBUG) +else () + set(CMAKE_BUILD_TYPE Release) + set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG") + add_definitions(-fvisibility=hidden -fvisibility-inlines-hidden) +endif () + +if (USE_EXCEPTION) + message(STATUS "use exception") + add_definitions(-DENABLE_EXCEPTION) + add_definitions(-fexceptions) +else() + add_definitions(-fno-exceptions) +endif () + +if (LOG_PROFILE) + add_definitions(-DPADDLE_MOBILE_PROFILE) +endif() + +if(USE_OPENMP) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") + add_definitions(-DPADDLE_MOBILE_USE_OPENMP) +endif() + +# platform control +if (ARM_LINUX) + include("${CMAKE_CURRENT_LIST_DIR}/tools/arm-platform.cmake") +endif () if (CPU) add_definitions(-DPADDLE_MOBILE_CPU) else() - list(REMOVE_ITEM PADDLE_MOBILE_CC ./src/operators/kernel/arm/*.h) - list(REMOVE_ITEM PADDLE_MOBILE_CC ./src/operators/kernel/arm/*.cc) - list(REMOVE_ITEM PADDLE_MOBILE_CC ./src/operators/kernel/arm/*.cpp) + file(GLOB_RECURSE _tmp_list src/operators/kernel/arm/*.cpp src/operators/kernel/arm/*.cc) + foreach(f ${_tmp_list}) + list(REMOVE_ITEM PADDLE_MOBILE_CC ${f}) + endforeach() + file(GLOB_RECURSE _tmp_list_h src/operators/kernel/arm/*.h) + foreach(f ${_tmp_list_h}) + list(REMOVE_ITEM PADDLE_MOBILE_H ${f}) + endforeach() endif() if (MALI_GPU) @@ -54,55 +90,33 @@ endif() if(FPGA) add_definitions(-DPADDLE_MOBILE_FPGA) else() - list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/operators/kernel/fpga/*.h) - list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/operators/kernel/fpga/*.cc) - list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/operators/kernel/fpga/*.cpp) + file(GLOB_RECURSE _tmp_list src/operators/kernel/fpga/*.cpp src/operators/kernel/fpga/*.cc) + foreach(f ${_tmp_list}) + list(REMOVE_ITEM PADDLE_MOBILE_CC ${f}) + endforeach() + + file(GLOB_RECURSE _tmp_list_h src/operators/kernel/fpga/*.h) + foreach(f ${_tmp_list_h}) + list(REMOVE_ITEM PADDLE_MOBILE_H ${f}) + endforeach() endif() -set(CMAKE_CXX_FLAGS "-std=c++14 -O3 -s ${CMAKE_CXX_FLAGS}") -if (DEBUGING) - message(STATUS "debug") - set(CMAKE_BUILD_TYPE Debug) - set(CMAKE_CXX_FLAGS_DEBUG "-g -DNDEBUG") - add_definitions(-DPADDLE_MOBILE_DEBUG) - if (ANDROID_NDK_TOOLCHAIN_INCLUDED) - add_definitions(-DARMV7) +if (ANDROID_NDK_TOOLCHAIN_INCLUDED) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -llog") - endif () -else () - set(CMAKE_BUILD_TYPE Release) - set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG") - add_definitions(-fvisibility=hidden -fvisibility-inlines-hidden) -endif () - -if (USE_EXCEPTION) - message(STATUS "use exception") - add_definitions(-DENABLE_EXCEPTION) - add_definitions(-fexceptions) + add_definitions(-DARMV7) else() - add_definitions(-fno-exceptions) -endif () - -if (LOG_PROFILE) - add_definitions(-DPADDLE_MOBILE_PROFILE) -endif() - -if(USE_OPENMP) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") - add_definitions(-DPADDLE_MOBILE_USE_OPENMP) -endif() - - - - -if (NOT ANDROID_NDK_TOOLCHAIN_INCLUDED) list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/jni/paddle_mobile_jni.h) list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/jni/paddle_mobile_jni.cpp) list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/operators/math/math_func_neon.h) endif () -include_directories(src/) +if (IS_IOS) +else() + list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/PaddleMobile.h) + list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/PaddleMobile.mm) + list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/op_symbols.h) +endif () set(CMAKE_VERBOSE_MAKEFILE ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) @@ -113,25 +127,20 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY build) # NET default set(NET "defult" CACHE STRING "select net type") set_property(CACHE NET PROPERTY STRINGS "defult" "googlenet" "mobilenet" "yolo" "squeezenet") - include("${CMAKE_CURRENT_LIST_DIR}/tools/op.cmake") -if (IS_IOS) - add_library(paddle-mobile STATIC ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H}) -else() - list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/PaddleMobile.h) - list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/PaddleMobile.mm) - list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/op_symbols.h) -endif () +# build library if (ANDROID_NDK_TOOLCHAIN_INCLUDED) list(REMOVE_DUPLICATES CMAKE_CXX_FLAGS) add_library(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H}) elseif(IS_IOS) + add_library(paddle-mobile STATIC ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H}) else () add_library(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H}) endif () +# unit test if(DEBUGING) if(IS_IOS) else() diff --git a/README.md b/README.md index dfba5f18ad4258e7db0b87b73f68ce5413bb7c20..91b4f886a31bb839a7e513185464260b1e95c453 100644 --- a/README.md +++ b/README.md @@ -91,8 +91,8 @@ ONNX全称为“Open Neural Network Exchange”,即“开放的神经网络切 ![](http://7xop3k.com1.z0.glb.clouddn.com/15311951836000.jpg) -### 4. 部分测试模型下载 -[下载链接](https://mms-mis.cdn.bcebos.com/paddle-mobile/models.zip) +### 4. 部分测试模型和测试图片下载 +[下载链接](http://mms-graph.bj.bcebos.com/paddle-mobile%2FmodelsAndImages.zip) ## 问题解决 diff --git a/src/common/variant.h b/src/common/variant.h index b87a5e67a76f4c616f2c450ef4527bcf6c16286b..9d0aa3019fbfdd5acbaed8a1140bc58c33f7f438 100644 --- a/src/common/variant.h +++ b/src/common/variant.h @@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include + #include "common/enforce.h" #include "common/log.h" diff --git a/src/framework/attribute.h b/src/framework/attribute.h index f0519a35b3ed2a02e35f1ef0d6a718efb7b76095..ed264057be6810d8bae29e0117fa4f6d91067cc1 100644 --- a/src/framework/attribute.h +++ b/src/framework/attribute.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include #include #include #include diff --git a/src/framework/data_layout.h b/src/framework/data_layout.h index f1249008f088dce48ed040e47900121c2eb41af1..0ba31ef9b7016b453b34cc4a023b0841b2110540 100644 --- a/src/framework/data_layout.h +++ b/src/framework/data_layout.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include +#include #include namespace paddle_mobile { diff --git a/src/framework/ddim.h b/src/framework/ddim.h index 833bc2783f855fd9d6df50d21345539fbe2ca6c4..db240b260185bb8ac2ba1fe84d3390bedac5c36d 100644 --- a/src/framework/ddim.h +++ b/src/framework/ddim.h @@ -14,9 +14,11 @@ limitations under the License. */ #pragma once +#include #include #include #include + #include "common/enforce.h" #include "common/variant.h" #include "dim.h" diff --git a/src/framework/dim.h b/src/framework/dim.h index dd7610de65d4a4c93402cf49b0fdbdc7995610c0..0d3e86e92289da155843e1a9959d5ea67a73c060 100644 --- a/src/framework/dim.h +++ b/src/framework/dim.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include #include "common/enforce.h" namespace paddle_mobile { namespace framework { diff --git a/src/framework/tensor.h b/src/framework/tensor.h index 9bbd81aa30f6fa0188dacd0dce01813e17b9e339..56e6d6bf18740489c195a66db70331cbab42aeea 100644 --- a/src/framework/tensor.h +++ b/src/framework/tensor.h @@ -152,7 +152,7 @@ class Tensor { if (holder_ != nullptr) { holder_->set_type(type); } - PADDLE_MOBILE_ENFORCE(numel() >= 0, "the Tensor'snumel must >=0.") + PADDLE_MOBILE_ENFORCE(numel() >= 0, "the Tensor's numel must >=0.") int64_t size = numel() * SizeOfType(type); if (holder_ == nullptr || holder_->size() < size + offset_) { holder_.reset(new PlaceholderImpl(size, type)); diff --git a/src/operators/kernel/arm/sigmoid_kernel.cpp b/src/operators/kernel/arm/sigmoid_kernel.cpp index eb67de153ddb13fb48e42c28d6ec2270b0bc59b4..9f5e6a2048d940ddc4592777a773c69d976033bd 100644 --- a/src/operators/kernel/arm/sigmoid_kernel.cpp +++ b/src/operators/kernel/arm/sigmoid_kernel.cpp @@ -16,7 +16,7 @@ limitations under the License. */ #include "../sigmoid_kernel.h" #include "../central-arm-func/sigmoid_arm_func.h" -#if __ARM_NEON +#ifdef __ARM_NEON #include "../../math/math_func_neon.h" #endif #include diff --git a/src/operators/kernel/central-arm-func/sigmoid_arm_func.h b/src/operators/kernel/central-arm-func/sigmoid_arm_func.h index eb0e4ab7e4b4f18f8ede4d85b859e68f7d58bda2..daf6ad0e472515c8034a400dfc73de608f5b12d2 100644 --- a/src/operators/kernel/central-arm-func/sigmoid_arm_func.h +++ b/src/operators/kernel/central-arm-func/sigmoid_arm_func.h @@ -14,8 +14,10 @@ limitations under the License. */ #ifdef SIGMOID_OP #pragma once +#include + #include "operators/op_param.h" -#if __ARM_NEON +#ifdef __ARM_NEON #include #include "operators/math/math_func_neon.h" #endif @@ -24,7 +26,7 @@ namespace paddle_mobile { namespace operators { using framework::DDim; void sigmoid(const Tensor *X, Tensor *Y) { -#if __ARM_NEON +#ifdef __ARM_NEON const float *input = X->data(); float *output = Y->mutable_data(); const DDim &dDim = X->dims(); diff --git a/src/operators/kernel/conv_add_kernel.h b/src/operators/kernel/conv_add_kernel.h index 465d8bdd8cfd71d678eb2816cae10ea6a06cec35..5a351f8afcf7b73fb6c56dff48c08d7b5204ca10 100644 --- a/src/operators/kernel/conv_add_kernel.h +++ b/src/operators/kernel/conv_add_kernel.h @@ -17,7 +17,7 @@ limitations under the License. */ #pragma once #include -#if __ARM_NEON +#ifdef __ARM_NEON #include #endif #include "common/common.h" diff --git a/src/operators/math/conv_func.h b/src/operators/math/conv_func.h index 3d23f6c8a24be7f52e1b322e07addb47ccd8b056..d9e2da0db5c50e0b0f9b11d5584bfce8b75777cd 100644 --- a/src/operators/math/conv_func.h +++ b/src/operators/math/conv_func.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once -#if __ARM_NEON +#ifdef __ARM_NEON #include #endif @@ -49,7 +49,7 @@ inline void expand_bias(Tensor &bias, int axis, const DDim &dDim) { auto new_ptr = bias.mutable_data(); int axis_size = dDim[axis]; -#if __ARM_NEON +#ifdef __ARM_NEON for (int i = 0; i < outer_size; ++i) { int inner_num = inner_size >> 4; int remain = inner_size - (inner_num << 4); diff --git a/src/operators/math/depthwise_conv_3x3.cpp b/src/operators/math/depthwise_conv_3x3.cpp index c8a6473567e6572d506aa5339f8e647c5c25fd5d..5db676564e190bf40e8af437ba68aee80b5a5af3 100644 --- a/src/operators/math/depthwise_conv_3x3.cpp +++ b/src/operators/math/depthwise_conv_3x3.cpp @@ -12,7 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "operators/math/depthwise_conv_3x3.h" +#ifdef __ARM_NEON #include +#endif #include namespace paddle_mobile { @@ -21,7 +23,7 @@ namespace math { void DepthwiseConv3x3(const Tensor *input, vector strides, vector paddings, const Tensor *filter, Tensor *bias, Tensor *output, bool if_bias) { -#if __ARM_NEON +#ifdef __ARM_NEON const int batch_size = input->dims()[0]; const int input_height = input->dims()[2]; @@ -242,6 +244,7 @@ void DepthwiseConv3x3(const Tensor *input, vector strides, void DepthwiseConv3x3s1p1(const Tensor *input, const Tensor *filter, Tensor *output, Tensor *bias, bool if_bias) { +#ifdef __ARM_NEON const float *input_data = input->data(); const float *filter_data = filter->data(); float *output_data = output->data(); @@ -511,11 +514,13 @@ void DepthwiseConv3x3s1p1(const Tensor *input, const Tensor *filter, filter_data_tmp += 9; } } +#endif } void DepthwiseConvAddBNRelu3x3s1p1(const Tensor *input, const Tensor *filter, Tensor *output, const Tensor *new_scale, const Tensor *new_bias, bool if_relu) { +#ifdef __ARM_NEON const float *input_data = input->data(); const float *filter_data = filter->data(); float *output_data = output->data(); @@ -813,11 +818,14 @@ void DepthwiseConvAddBNRelu3x3s1p1(const Tensor *input, const Tensor *filter, filter_data_tmp += 9; } } +#endif } void DepthwiseConvAddBNRelu3x3s2p1(const Tensor *input, const Tensor *filter, Tensor *output, const Tensor *new_scale, const Tensor *new_bias, bool if_relu) { +#ifdef __ARM_NEON + const int batch_size = input->dims()[0]; const int input_height = input->dims()[2]; @@ -1009,10 +1017,12 @@ void DepthwiseConvAddBNRelu3x3s2p1(const Tensor *input, const Tensor *filter, input_data += input_batch_stride; output_data += output_batch_stride; } +#endif } void DepthwiseConv3x3s2p1v2(const Tensor *input, const Tensor *filter, Tensor *output, Tensor bias, bool if_bias) { +#ifdef __ARM_NEON const float *input_data = input->data(); const float *filter_data = filter->data(); float *output_data = output->data(); @@ -1209,11 +1219,13 @@ void DepthwiseConv3x3s2p1v2(const Tensor *input, const Tensor *filter, input_data += inhxw * c; output_data += outhxw * c; } +#endif } void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter, Tensor *output, const Tensor *new_scale, const Tensor *new_bias, bool if_relu) { +#ifdef __ARM_NEON const float *input_data = input->data(); const float *filter_data = filter->data(); float *output_data = output->data(); @@ -1444,6 +1456,7 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter, input_data += inhxw * c; output_data += outhxw * c; } +#endif } } // namespace math diff --git a/src/operators/math/pool_2x2.cpp b/src/operators/math/pool_2x2.cpp index e0fd5da57cc91d4f1c55c560134398126517db29..c86003f6f96b632efd50bbb156293510e3d8521c 100644 --- a/src/operators/math/pool_2x2.cpp +++ b/src/operators/math/pool_2x2.cpp @@ -21,7 +21,7 @@ namespace math { void Pool2x2Max(vector strides, vector paddings, const Tensor *input, Tensor *output) { -#if __ARM_NEON +#ifdef __ARM_NEON #ifdef ARMV7 @@ -99,7 +99,7 @@ void Pool2x2Max(vector strides, vector paddings, const Tensor *input, void Pool2x2Avg(vector strides, vector paddings, const Tensor *input, Tensor *output) { -#if __ARM_NEON +#ifdef __ARM_NEON #ifdef ARMV7 const int batch_size = input->dims()[0]; diff --git a/src/operators/math/pool_2x2.h b/src/operators/math/pool_2x2.h index 3fb0d24ba2ce854e8e63c066222e355e2c84dabb..ae32a3912b677efb50d8558700741a225e3eb3f8 100644 --- a/src/operators/math/pool_2x2.h +++ b/src/operators/math/pool_2x2.h @@ -17,7 +17,7 @@ limitations under the License. */ #pragma once #include "framework/tensor.h" -#if __ARM_NEON +#ifdef __ARM_NEON #include #endif // __ARM_NEON namespace paddle_mobile { diff --git a/src/operators/math/pool_3x3.cpp b/src/operators/math/pool_3x3.cpp index cb0de199f11ffcd1f798ac5afada31e666c0570b..1a743f7a31546253e79cfd2d888d0607bf8935ff 100644 --- a/src/operators/math/pool_3x3.cpp +++ b/src/operators/math/pool_3x3.cpp @@ -13,13 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. */ #ifdef POOL_OP -#define __ARM_NEON true #ifdef _OPENMP #include #endif #include "framework/tensor.h" #include "pool_3x3.h" -#if __ARM_NEON +#ifdef __ARM_NEON #include #endif // __ARM_NEON #include @@ -31,7 +30,7 @@ using std::max; using std::min; using std::vector; void Pool3x3Avgs1p1(const Tensor *input, Tensor *output) { -#if __ARM_NEON +#ifdef __ARM_NEON const int batch_size = input->dims()[0]; const int h_in = input->dims()[2]; @@ -281,7 +280,7 @@ void Pool3x3Avgs1p1(const Tensor *input, Tensor *output) { } void Pool3x3Maxs1p1(const Tensor *input, Tensor *output) { -#if __ARM_NEON +#ifdef __ARM_NEON const int batch_size = input->dims()[0]; const int h_in = input->dims()[2]; @@ -524,7 +523,7 @@ void Pool3x3Maxs1p1(const Tensor *input, Tensor *output) { void Pool3x3Max(vector strides, vector paddings, const Tensor *input, Tensor *output) { -#if __ARM_NEON +#ifdef __ARM_NEON const int batch_size = input->dims()[0]; const int input_height = input->dims()[2]; @@ -583,7 +582,7 @@ void Pool3x3Max(vector strides, vector paddings, const Tensor *input, } output_seg[ph * output_width + pw] = max_value; } else { -#if defined(ARMV7) +#ifdef ARMV7 asm volatile( "vld1.32 {q1}, [%[pos1]] \n\t" "vld1.32 {q2}, [%[pos2]] \n\t" @@ -623,7 +622,7 @@ void Pool3x3Max(vector strides, vector paddings, const Tensor *input, void Pool3x3Avg(vector strides, vector paddings, const Tensor *input, Tensor *output) { -#if __ARM_NEON +#ifdef __ARM_NEON const int batch_size = input->dims()[0]; const int input_height = input->dims()[2]; @@ -677,7 +676,7 @@ void Pool3x3Avg(vector strides, vector paddings, const Tensor *input, } output_seg[ph * output_width + pw] = sum / 9.0; } else { -#if defined(ARMV7) +#ifdef ARMV7 asm volatile( "vld1.32 {q1}, [%[pos1]] \n\t" diff --git a/src/operators/math/pool_3x3.h b/src/operators/math/pool_3x3.h index ac1eb16a4c0e077c625267545767b8f29144b8f1..1cf0c37c2c7c22c41be47b3d737a3c31ffc459ac 100644 --- a/src/operators/math/pool_3x3.h +++ b/src/operators/math/pool_3x3.h @@ -21,7 +21,7 @@ limitations under the License. */ #include #include #include "framework/tensor.h" -#if __ARM_NEON +#ifdef __ARM_NEON #include #endif // __ARM_NEON diff --git a/src/operators/math/softmax.cpp b/src/operators/math/softmax.cpp index a1eb4f13d82376d86da258101b15e6ae5e8bdc97..968915f21e08fce9f25ceb63831ee40ecba9cee6 100644 --- a/src/operators/math/softmax.cpp +++ b/src/operators/math/softmax.cpp @@ -16,7 +16,7 @@ limitations under the License. */ #include "operators/math/softmax.h" #include "common/types.h" -#if __ARM_NEON +#ifdef __ARM_NEON #include #include #include "operators/math/math_func_neon.h" @@ -29,7 +29,7 @@ using framework::DDim; using framework::Tensor; template class SoftmaxFuntor { -#if __ARM_NEON +#ifdef __ARM_NEON void sum(float *input, float *sumptr, int inner_size, int outter_size) { float32x4_t acc = vdupq_n_f32(0); float sum_ = 0; @@ -144,7 +144,7 @@ class SoftmaxFuntor { framework::Tensor sub_X = X->Slice(i, i + 1); framework::Tensor sub_Y = Y->Slice(i, i + 1); -#if __ARM_NEON +#ifdef __ARM_NEON SoftmaxCacl(&sub_X, &sub_Y); #endif }