Update Build docs for ARM Linux CPU and GPU(OpenCL) (#1583)

* format CMakeLists * BugFix: lacking of header file of memset, strlen, stable_sort, n_copy * Update ARM Linux CPU, GPU build docs for rk3399

Update Build docs for ARM Linux CPU and GPU(OpenCL) (#1583)
* format CMakeLists * BugFix: lacking of header file of memset, strlen, stable_sort, n_copy * Update ARM Linux CPU, GPU build docs for rk3399
15794c0a · Shuai Yuan · GitHub · 5a6bfa4a · 15794c0a · 15794c0a
6 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
 cmake_minimum_required(VERSION 3.0.0)

+# basic build option
 option(USE_OPENMP       "build with openmp support"      ON)
 option(USE_EXCEPTION    "build with exception"           ON)
 option(WITH_LOGGING     "print logging for debug"        OFF)
@@ -7,7 +8,7 @@ option(WITH_SYMBOL   "build with all symbols" ON) # turn off if use jni or ios i
 option(WITH_PROFILE     "print op profile for debug"     OFF)
 option(WITH_TEST        "build with unit tests"          ON)

-# select the platform to build
+# select platform: CPU, GPU_CL, FPGA
 option(CPU              "build with arm CPU support"     ON)
 option(GPU_CL           "build with OpenCL support"      OFF)
 option(FPGA             "build with FPGA support"        OFF)
@@ -19,10 +20,12 @@ endif()

 project(paddle-mobile)

+# source code
 file(GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c src/*.mm)
 file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h)
 include_directories(src/)

+# build flags
 set(CMAKE_CXX_FLAGS "-O3 -s -DNDEBUG ${CMAKE_CXX_FLAGS} -Wno-attributes")
 if(IS_IOS)
    set(CMAKE_CXX_FLAGS "-mfpu=neon -marm -fobjc-abi-version=2 -fobjc-arc \
@@ -32,13 +35,14 @@ else()
    set(CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS}")
 endif()

+# others
 if(USE_OPENMP)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
    add_definitions(-DPADDLE_MOBILE_USE_OPENMP)
 endif()

 if(WITH_LOGGING)
-    message(STATUS "debugging mode")
+    message(STATUS "Debugging mode")
    add_definitions(-DPADDLE_MOBILE_DEBUG)
 else()
 endif()
@@ -48,7 +52,7 @@ if(NOT WITH_SYMBOL)
 endif()

 if(USE_EXCEPTION)
-    message(STATUS "use exception")
+    message(STATUS "Use exception")
    add_definitions(-DENABLE_EXCEPTION -fexceptions)
 else()
    add_definitions(-fno-exceptions)
@@ -270,4 +274,3 @@ if(WITH_TEST AND WITH_SYMBOL)
 elseif(FPGA)
    add_subdirectory(test)
 endif()
-
--- a/doc/development_arm_linux.md
+++ b/doc/development_arm_linux.md
-# ARM_LINUX开发文档
-目前支持直接在arm_linux平台上编译paddle-mobile
+# ARM Linux开发文档

-## 以Raspberrypi3为例：
-### 执行编译
-在paddle-mobile根目录中，执行以下命令：
-```
-cd tools
-/bin/bash build.sh arm_linux googlenet
+在ARM Linux如Raspberrypi3，或Firefly-RK3399上编译paddle-mobile。
+
+## 预先安装
+
+```shell
+$ sudo apt update
+$ sudo apt-get install -y cmake git
+$ git clone https://github.com/PaddlePaddle/paddle-mobile.git
 ```
-执行完毕后，生成的so位于paddle-mobile/build/release/arm-linux/build目录中，单测可执行文件位于test/build目录中。

-### 运行
+## 编译
+
+在paddle-mobile根目录中，执行以下命令：
+
+```shell
+# 进入paddle-mobile根目录
+$ cd <your-paddle-mobile>
+
+# 可选：开启GPU支持，在CMakeLists.txt开启GPU_CL选项为ON
+$ cp /usr/lib/aarch64-linux-gnu/libMali.so ./third_party/opencl/
+$ cp /usr/lib/aarch64-linux-gnu/libOpenCL.so ./third_party/opencl/
+$ ln -s ./third_party/opencl/libMali.so ./third_party/opencl/
+
+# 编译
+$ cd ./tools
+$ /bin/bash build.sh arm_linux
 ```
-cd ../build/release/arm-linux/build
-export LD_LIBRARY_PATH=.
-cd ../../../../test/build/
-./test-googlenet
+
+- 动态库`so`文件位于`<paddle-mobile-repo>/build/release/arm-linux/build`目录；  
+- 单元测试位于`<paddle-model-repo>/test/build`目录，若只编译如`googlenet`，可以执行`bash build.sh arm_linux googlenet`。
+
+## 运行
+
+接着刚刚的命令，执行MobileNet模型：
+
+```shell
+# 导入编译好的动态库路径到LD_LIBRARY_PATH中
+$ cd ../build/release/arm-linux/build
+$ export LD_LIBRARY_PATH=.
+
+# 执行MobileNet
+# 可选：GPU执行./test-mobilenetgpu
+$ cd ../../../../test/build/
+$ ./test-mobilenet
+
+# 执行顺利会打印如下日志
+load cost :0ms
+ Max element is 0.985921 at position 954
+predict cost :121.462ms
+如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana 是否存在?
 ```
-*注1：如果本地test目录下没有模型的话，会自动下载官方demo模型并解压.*

-*注2：因为arm_linux设备算力限制,建议编译时,根据需要指定编译某个模型（如googlenet）或扩大系统的swap交换空间，避免编译时卡死.*
+注意：  
+1. 如果本地仓库中`test`目录下没有模型，脚本会自动下载官方demo模型并解压；  
+2. 因为ARM Linux设备算力限制，编译卡死重启机器尝试单线程编译（修改`tools/build.sh`中`build_for_arm_linux`的编译为`make -j`），或指定编译某个模型（如googlenet）或扩大系统的swap交换空间。

-## 其他ARM_LINUX平台
+## 其它

-其他的arm_linux平台可以修改 tools/build.sh中的相关编译参数进行编译。可以参考对应平台的编译选项。
-特别说明的是Android平台请参考Android开发文档.
+- 若编译中提示有不识别的编译选项等ARM Linux平台的编译问题，可尝试修改`tools/build.sh`中的相关编译参数；  
+- Android平台请参考Android开发文档.

--- a/src/framework/cl/cl_engine.h
+++ b/src/framework/cl/cl_engine.h
@@ -14,6 +14,7 @@ limitations under the License. */

 #pragma once

+#include <cstring>
 #include <memory>
 #include <string>


--- a/src/operators/kernel/cl/multiclass_nms_kernel.cpp
+++ b/src/operators/kernel/cl/multiclass_nms_kernel.cpp
@@ -15,6 +15,7 @@ limitations under the License. */
 #ifdef MULTICLASSNMS_OP

 #include "operators/kernel/multiclass_nms_kernel.h"
+#include <algorithm>
 #include "operators/math/poly_util.h"
 namespace paddle_mobile {
 namespace operators {

--- a/src/operators/math/gemm/gemm_kernel.h
+++ b/src/operators/math/gemm/gemm_kernel.h
@@ -17,7 +17,7 @@ limitations under the License. */
 #if defined(__ARM_NEON__) || defined(__ARM_NEON)

 #include <arm_neon.h>
-#include <memory>
+#include <string.h>
 #include "operators/math/math.h"

 namespace paddle_mobile {

--- a/tools/build.sh
+++ b/tools/build.sh
@@ -94,7 +94,6 @@ build_for_android() {
    cp ../../../src/operators/kernel/cl/cl_kernel/*  ./build/cl_kernel/
 }

-
 build_for_arm_linux() {
    MODE="Release"
    ARM_LINUX="arm-linux"
@@ -104,7 +103,7 @@ build_for_arm_linux() {
            -B"../build/release/arm-linux" \
            -DCMAKE_BUILD_TYPE="${MODE}" \
            -DCMAKE_TOOLCHAIN_FILE="./tools/toolchains/arm-linux-gnueabihf.cmake" \
-            -DCMAKE_CXX_FLAGS="-std=c++14 -mcpu=cortex-a53 -mtune=cortex-a53 -mfpu=neon-vfpv4 -mfloat-abi=hard -ftree-vectorize -funsafe-math-optimizations  -pipe -mlittle-endian -munaligned-access" \
+            -DCMAKE_CXX_FLAGS="-std=c++14 -mcpu=cortex-a53 -mtune=cortex-a53 -ftree-vectorize -funsafe-math-optimizations  -pipe -mlittle-endian " \
            -DNET="${NETS}" \
            -D"V7"=true
    else
@@ -112,7 +111,7 @@ build_for_arm_linux() {
            -B"../build/release/arm-linux" \
            -DCMAKE_BUILD_TYPE="${MODE}" \
            -DCMAKE_TOOLCHAIN_FILE="./tools/toolchains/arm-linux-gnueabihf.cmake" \
-            -DCMAKE_CXX_FLAGS="-std=c++14 -mcpu=cortex-a53 -mtune=cortex-a53 -mfpu=neon-vfpv4 -mfloat-abi=hard -ftree-vectorize -funsafe-math-optimizations  -pipe -mlittle-endian -munaligned-access" \
+            -DCMAKE_CXX_FLAGS="-std=c++14 -mcpu=cortex-a53 -mtune=cortex-a53 -ftree-vectorize -funsafe-math-optimizations -pipe -mlittle-endian " \
            -DNET="${NETS}" \
            -D"V7"=true
    fi