merge baidu/develop

11688829 · qijun · 418917e8 · 0fdbd40f · 11688829 · 11688829
6 changed file
--- a/doc/howto/raspberry/build_for_raspberry.md
+++ b/doc/howto/raspberry/build_for_raspberry.md
+# 如何构建Raspberry pi下运行的PaddlePaddle
+这里考虑的是交叉编译方式，即在Linux-x86环境下构建Raspberry pi下可运行的PaddlePaddle。
+## 下载交叉编译环境
+```
+git clone https://github.com/raspberrypi/tools
+```
+如果host是x86-64环境，选用`arm-bcm2708/gcc-linaro-arm-linux-gnueabihf-raspbian-x64`下的作为编译工具。注意，需要系统glibc支持2.14以上。
+## 编译第三方库
+cmake编译PaddlePaddle时候会自动下载编译依赖的第三方库，不过openblas和protobuf最好还是在编译PaddlePaddle之前先编译好，这样可以保证编译PaddlePaddle的时候更加顺畅。
+### 编译OpenBLAS
+```
+git clone https://github.com/xianyi/OpenBLAS.git
+make TARGET=ARMV7 HOSTCC=gcc CC=arm-linux-gnueabihf-gcc NOFORTRAN=1 USE_THREAD=0
+```
+### 编译protobuf
+```
+git clone https://github.com/google/protobuf.git
+git checkout 9f75c5aa851cd877fb0d93ccc31b8567a6706546
+cmake ../protobuf/cmake \
+-Dprotobuf_BUILD_TESTS=OFF \
+-DCMAKE_CXX_COMPILER=arm-linux-gnueabihf-g++ \
+-DCMAKE_C_COMPILER=arm-linux-gnueabihf-gcc \
+-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
+-DCMAKE_BUILD_TYPE=Release \
+-DCMAKE_INSTALL_LIBDIR=lib
+```
+注意：这样编译出来的`libprotobuf.a`和`protoc`都是ARM版本的，而我们需要的是一个x86-64版本的`protoc`，所以需要用host gcc再编译一遍protobuf然后使用其中的`protoc`。
+## 编译PaddlePaddle
+cmake参数如下；其中`WITH_C_API`设置为ON，编译输出的output目录会中包含`include`和`lib`目录，其中`include`中包含CAPI的头文件，`lib`中包含一个ARM版本的库。另外，`CMAKE_BUILD_TYPE`设置为`MinSizeRel`可以减小编译的库的大小。
+```
+cmake .. -DWITH_GPU=OFF -DWITH_C_API=ON -DWITH_PYTHON=OFF -DWITH_SWIG_PY=OFF \
+-DCMAKE_CXX_COMPILER:FILEPATH=arm-linux-gnueabihf-g++ \
+-DCMAKE_C_COMPILER:FILEPATH=arm-linux-gnueabihf-gcc \
+-DCMAKE_C_FLAGS="-mfpu=neon" \
+-DCMAKE_CXX_FLAGS="-mfpu=neon" \
+-DOPENBLAS_ROOT=openblas \
+-DCMAKE_PREFIX_PATH=protobuf \
+-DCMAKE_BUILD_TYPE=MinSizeRel
+```
--- a/paddle/api/CMakeLists.txt
+++ b/paddle/api/CMakeLists.txt
@@ -26,7 +26,7 @@ FILE(GLOB PY_PADDLE_PYTHON_FILES ${PROJ_ROOT}/paddle/py_paddle/*.py)
 SET_SOURCE_FILES_PROPERTIES(Paddle.i PROPERTIES CPLUSPLUS ON)
 SET(CMAKE_SWIG_OUTDIR ${CMAKE_CURRENT_BINARY_DIR})
-SET(CMAKE_CXX_FLAGS "-std=c++11 -fPIC -Wall")
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fPIC -Wall")
 IF(WITH_COVERAGE)
    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage")
 ENDIF(WITH_COVERAGE)

--- a/paddle/majel/CMakeLists.txt
+++ b/paddle/majel/CMakeLists.txt
@@ -9,6 +9,13 @@ if(${CMAKE_CURRENT_SOURCE_DIR} STREQUAL ${CMAKE_SOURCE_DIR})
    get_filename_component(PARENT_DIR ${PARENT_DIR} DIRECTORY)
    set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PARENT_DIR}/cmake")
+    # enable boost
+    find_package(Boost REQUIRED)
+    if(NOT Boost_FOUND)
+        message(FATAL "Cannot find Boost library.")
+    endif()
+    include_directories(${Boost_INCLUDE_DIRS})
    # enable c++11
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")

--- a/paddle/majel/README.md
+++ b/paddle/majel/README.md
@@ -93,6 +93,19 @@ typedef boost::variant<
 Because `variant` may be thought of as "multi-type, single value", we can utilize it to implement unified interfaces for PaddlePaddle.
+`DDim` plays two kinds of roles in Majel. First, it is used to indicate the size of a tensor. For example, we can construct a new `DArray` by following way:
+ ```c++
+ DArray arr = make_darray(make_ddim({2,3}), 0.0f);
+ ```
+ It means that `arr` will be a two-dimension tensor, or a matrix. The size of its first dimension is 2 and the second is 3. All the element value of `arr` will be initialized as 0.0 .
+ The second meaning of `DDim` is tensor index. For example, if we want to access the value in the 1st row and 2nd column of `arr` and set it to 1.0, we can do like this:
+ ```c++
+ arr[make_ddim({0, 1})] = 1.0；
+ ```
 ## Implement Tensor in Paddle
 We want to create a Tensor class to replace Vector and Matrix, and to support high-dimensional data. The operations on Tensor are implemented in both CPU and GPU. We also want to make sure that the Tensor interface is friendly to its callers.
@@ -142,8 +155,6 @@ You must appoint the type and dimension of a Array, whereas DArray can represent
 Please reference the section of `Learn from Majel` for more details.
 ### ArrayView
 `ViewIterator` is a class template which implements basic iterator operation, including increment(++), decrement(--), dereference(*), equalit comparisons(==) and so on.

--- a/paddle/majel/test/CMakeLists.txt
+++ b/paddle/majel/test/CMakeLists.txt
@@ -3,7 +3,6 @@ file(GLOB_RECURSE ALL_TEST_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cc")
 add_executable(majel_tests ${ALL_TEST_FILES})
 add_dependencies(majel_tests majel)
 target_link_libraries(majel_tests     
-                      ${Boost_LIBRARIES}
                      ${GTEST_LIBRARIES}
                      ${GTEST_MAIN_LIBRARIES}
                      majel

--- a/paddle/utils/CpuId.cpp
+++ b/paddle/utils/CpuId.cpp
@@ -19,19 +19,22 @@ limitations under the License. */
 /// for MSVC
 #define CPUID(info, x) __cpuidex(info, x, 0)
-#elif !defined(__ANDROID__)
+#else
+#if !defined(__arm__)
 #include <cpuid.h>
 /// for GCC/Clang
 #define CPUID(info, x) __cpuid_count(x, 0, info[0], info[1], info[2], info[3])
+#endif
 #endif
 namespace paddle {
 SIMDFlags::SIMDFlags() {
-#if !defined(__ANDROID__)
+#if defined(__arm__)
+  simd_flags_ = SIMD_NEON;
+#else
  unsigned int cpuInfo[4];
  // CPUID: https://en.wikipedia.org/wiki/CPUID
  // clang-format off
@@ -52,8 +55,6 @@ SIMDFlags::SIMDFlags() {
  CPUID(cpuInfo, 0x80000001);
  simd_flags_ |= cpuInfo[2] & (1 << 16) ? SIMD_FMA4  : SIMD_NONE;
  // clang-fotmat on
-#else
-  simd_flags_ = SIMD_NEON;
 #endif
 }