提交 0e358ccc 编写于 作者: D dangqingqing

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into image_v2

...@@ -48,7 +48,7 @@ before_install: ...@@ -48,7 +48,7 @@ before_install:
- if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi - if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi
# Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python # Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python
# protobuf version. # protobuf version.
- pip install numpy wheel 'protobuf==3.1' sphinx recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit requests==2.9.2 LinkChecker - pip install numpy wheel 'protobuf==3.1' sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit requests==2.9.2 LinkChecker
- | - |
function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; } function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; }
script: script:
......
...@@ -92,6 +92,7 @@ include(external/swig) # download, build, install swig ...@@ -92,6 +92,7 @@ include(external/swig) # download, build, install swig
include(external/warpctc) # download, build, install warpctc include(external/warpctc) # download, build, install warpctc
include(external/any) # download libn::any include(external/any) # download libn::any
include(generic) # simplify cmake module
include(package) # set paddle packages include(package) # set paddle packages
include(cpplint) # set paddle c++ style include(cpplint) # set paddle c++ style
include(ccache) # set ccache for compilation include(ccache) # set ccache for compilation
......
文件模式从 100755 更改为 100644
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# To simplify the build process of PaddlePaddle, we defined couple of
# fundamental abstractions, e.g., how to build library, binary and
# test in C++, CUDA and Go.
#
# -------------------------------------------
# C++ CUDA C++ Go
# -------------------------------------------
# cc_library nv_library go_library
# cc_binary nv_binary go_binary
# cc_test nv_test go_test
# -------------------------------------------
#
# cmake_parse_arguments can help us to achieve this goal.
# https://cmake.org/cmake/help/v3.0/module/CMakeParseArguments.html
# cc_library parses tensor.cc and figures out that target also depend on tensor.h.
# cc_library(tensor
# SRCS
# tensor.cc
# DEPS
# variant)
function(cc_library TARGET_NAME)
set(options OPTIONAL)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(cc_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if (${cc_library_OPTIONAL} STREQUAL "SHARED")
add_library(${TARGET_NAME} SHARED ${cc_library_SRCS})
else()
add_library(${TARGET_NAME} STATIC ${cc_library_SRCS})
endif()
add_dependencies(${TARGET_NAME} ${cc_library_DEPS} ${external_project_dependencies})
endfunction(cc_library)
# cc_binary parses tensor.cc and figures out that target also depend on tensor.h.
# cc_binary(tensor
# SRCS
# tensor.cc)
function(cc_binary TARGET_NAME)
set(options OPTIONAL)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(cc_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${cc_binary_SRCS})
add_dependencies(${TARGET_NAME} ${cc_binary_DEPS} ${external_project_dependencies})
target_link_libraries(${TARGET_NAME} ${cc_binary_DEPS})
endfunction(cc_binary)
# The dependency to target tensor implies that if any of
# tensor{.h,.cc,_test.cc} is changed, tensor_test need to be re-built.
# cc_test(tensor_test
# SRCS
# tensor_test.cc
# DEPS
# tensor)
function(cc_test TARGET_NAME)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${cc_test_SRCS})
add_dependencies(${TARGET_NAME} ${cc_test_DEPS} ${external_project_dependencies})
target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} ${GTEST_MAIN_LIBRARIES} ${GTEST_LIBRARIES})
add_test(${TARGET_NAME} ${TARGET_NAME})
endfunction(cc_test)
# Suppose that ops.cu includes global functions that take Tensor as
# their parameters, so ops depend on tensor. This implies that if
# any of tensor.{h.cc}, ops.{h,cu} is changed, ops need to be re-built.
# nv_library(ops
# SRCS
# ops.cu
# DEPS
# tensor)
function(nv_library TARGET_NAME)
set(options OPTIONAL)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(nv_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if (${nv_library_OPTIONAL} STREQUAL "SHARED")
cuda_add_library(${TARGET_NAME} SHARED ${nv_library_SRCS})
else()
cuda_add_library(${TARGET_NAME} STATIC ${nv_library_SRCS})
endif()
add_dependencies(${TARGET_NAME} ${nv_library_DEPS} ${external_project_dependencies})
endfunction(nv_library)
function(nv_binary TARGET_NAME)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(nv_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
cuda_add_executable(${TARGET_NAME} ${nv_binary_SRCS})
add_dependencies(${TARGET_NAME} ${nv_binary_DEPS} ${external_project_dependencies})
target_link_libraries(${TARGET_NAME} ${nv_binary_DEPS})
endfunction(nv_binary)
# The dependency to target tensor implies that if any of
# ops{.h,.cu,_test.cu} is changed, ops_test need to be re-built.
# nv_test(ops_test
# SRCS
# ops_test.cu
# DEPS
# ops)
function(nv_test TARGET_NAME)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS})
add_dependencies(${TARGET_NAME} ${nv_test_DEPS} ${external_project_dependencies})
target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} ${GTEST_MAIN_LIBRARIES} ${GTEST_LIBRARIES})
add_test(${TARGET_NAME} ${TARGET_NAME})
endfunction(nv_test)
import sys
import math import math
import numpy as np import numpy as np
import paddle.v2 as paddle import gzip
import logging
import paddle.v2.dataset.conll05 as conll05 import paddle.v2.dataset.conll05 as conll05
import paddle.v2.evaluator as evaluator
import paddle.v2 as paddle
logger = logging.getLogger('paddle')
def db_lstm(): word_dict, verb_dict, label_dict = conll05.get_dict()
word_dict, verb_dict, label_dict = conll05.get_dict() word_dict_len = len(word_dict)
word_dict_len = len(word_dict) label_dict_len = len(label_dict)
label_dict_len = len(label_dict) pred_len = len(verb_dict)
pred_len = len(verb_dict)
mark_dict_len = 2 mark_dict_len = 2
word_dim = 32 word_dim = 32
mark_dim = 5 mark_dim = 5
hidden_dim = 512 hidden_dim = 512
depth = 8 depth = 8
default_std = 1 / math.sqrt(hidden_dim) / 3.0
mix_hidden_lr = 1e-3
#8 features
def d_type(size):
return paddle.data_type.integer_value_sequence(size)
def d_type(size):
return paddle.data_type.integer_value_sequence(size)
def db_lstm():
#8 features
word = paddle.layer.data(name='word_data', type=d_type(word_dict_len)) word = paddle.layer.data(name='word_data', type=d_type(word_dict_len))
predicate = paddle.layer.data(name='verb_data', type=d_type(pred_len)) predicate = paddle.layer.data(name='verb_data', type=d_type(pred_len))
...@@ -31,11 +38,7 @@ def db_lstm(): ...@@ -31,11 +38,7 @@ def db_lstm():
ctx_p2 = paddle.layer.data(name='ctx_p2_data', type=d_type(word_dict_len)) ctx_p2 = paddle.layer.data(name='ctx_p2_data', type=d_type(word_dict_len))
mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_len)) mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_len))
target = paddle.layer.data(name='target', type=d_type(label_dict_len)) emb_para = paddle.attr.Param(name='emb', initial_std=0., is_static=True)
default_std = 1 / math.sqrt(hidden_dim) / 3.0
emb_para = paddle.attr.Param(name='emb', initial_std=0., learning_rate=0.)
std_0 = paddle.attr.Param(initial_std=0.) std_0 = paddle.attr.Param(initial_std=0.)
std_default = paddle.attr.Param(initial_std=default_std) std_default = paddle.attr.Param(initial_std=default_std)
...@@ -63,7 +66,6 @@ def db_lstm(): ...@@ -63,7 +66,6 @@ def db_lstm():
input=emb, param_attr=std_default) for emb in emb_layers input=emb, param_attr=std_default) for emb in emb_layers
]) ])
mix_hidden_lr = 1e-3
lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0) lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0)
hidden_para_attr = paddle.attr.Param( hidden_para_attr = paddle.attr.Param(
initial_std=default_std, learning_rate=mix_hidden_lr) initial_std=default_std, learning_rate=mix_hidden_lr)
...@@ -111,6 +113,21 @@ def db_lstm(): ...@@ -111,6 +113,21 @@ def db_lstm():
input=input_tmp[1], param_attr=lstm_para_attr) input=input_tmp[1], param_attr=lstm_para_attr)
], ) ], )
return feature_out
def load_parameter(file_name, h, w):
with open(file_name, 'rb') as f:
f.read(16) # skip header.
return np.fromfile(f, dtype=np.float32).reshape(h, w)
def train():
paddle.init(use_gpu=False, trainer_count=1)
# define network topology
feature_out = db_lstm()
target = paddle.layer.data(name='target', type=d_type(label_dict_len))
crf_cost = paddle.layer.crf(size=label_dict_len, crf_cost = paddle.layer.crf(size=label_dict_len,
input=feature_out, input=feature_out,
label=target, label=target,
...@@ -120,29 +137,15 @@ def db_lstm(): ...@@ -120,29 +137,15 @@ def db_lstm():
learning_rate=mix_hidden_lr)) learning_rate=mix_hidden_lr))
crf_dec = paddle.layer.crf_decoding( crf_dec = paddle.layer.crf_decoding(
name='crf_dec_l',
size=label_dict_len, size=label_dict_len,
input=feature_out, input=feature_out,
label=target, label=target,
param_attr=paddle.attr.Param(name='crfw')) param_attr=paddle.attr.Param(name='crfw'))
evaluator.sum(input=crf_dec)
return crf_cost, crf_dec
def load_parameter(file_name, h, w):
with open(file_name, 'rb') as f:
f.read(16) # skip header.
return np.fromfile(f, dtype=np.float32).reshape(h, w)
def main():
paddle.init(use_gpu=False, trainer_count=1)
# define network topology
crf_cost, crf_dec = db_lstm()
# create parameters # create parameters
parameters = paddle.parameters.create([crf_cost, crf_dec]) parameters = paddle.parameters.create(crf_cost)
parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32))
# create optimizer # create optimizer
optimizer = paddle.optimizer.Momentum( optimizer = paddle.optimizer.Momentum(
...@@ -152,18 +155,12 @@ def main(): ...@@ -152,18 +155,12 @@ def main():
model_average=paddle.optimizer.ModelAverage( model_average=paddle.optimizer.ModelAverage(
average_window=0.5, max_average_window=10000), ) average_window=0.5, max_average_window=10000), )
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
trainer = paddle.trainer.SGD(cost=crf_cost, trainer = paddle.trainer.SGD(cost=crf_cost,
parameters=parameters, parameters=parameters,
update_equation=optimizer) update_equation=optimizer,
parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32)) extra_layers=crf_dec)
trn_reader = paddle.batch( reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
conll05.test(), buf_size=8192), batch_size=10) conll05.test(), buf_size=8192), batch_size=10)
...@@ -179,12 +176,102 @@ def main(): ...@@ -179,12 +176,102 @@ def main():
'target': 8 'target': 8
} }
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
logger.info("Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics))
if event.batch_id and event.batch_id % 1000 == 0:
result = trainer.test(reader=reader, feeding=feeding)
logger.info("\nTest with Pass %d, Batch %d, %s" %
(event.pass_id, event.batch_id, result.metrics))
if isinstance(event, paddle.event.EndPass):
# save parameters
with gzip.open('params_pass_%d.tar.gz' % event.pass_id, 'w') as f:
parameters.to_tar(f)
result = trainer.test(reader=reader, feeding=feeding)
logger.info("\nTest with Pass %d, %s" %
(event.pass_id, result.metrics))
trainer.train( trainer.train(
reader=trn_reader, reader=reader,
event_handler=event_handler, event_handler=event_handler,
num_passes=10000, num_passes=10,
feeding=feeding) feeding=feeding)
def infer_a_batch(inferer, test_data, word_dict, pred_dict, label_dict):
probs = inferer.infer(input=test_data, field='id')
assert len(probs) == sum(len(x[0]) for x in test_data)
for idx, test_sample in enumerate(test_data):
start_id = 0
pred_str = "%s\t" % (pred_dict[test_sample[6][0]])
for w, tag in zip(test_sample[0],
probs[start_id:start_id + len(test_sample[0])]):
pred_str += "%s[%s] " % (word_dict[w], label_dict[tag])
print(pred_str.strip())
start_id += len(test_sample[0])
def infer():
label_dict_reverse = dict((value, key)
for key, value in label_dict.iteritems())
word_dict_reverse = dict((value, key)
for key, value in word_dict.iteritems())
pred_dict_reverse = dict((value, key)
for key, value in verb_dict.iteritems())
test_creator = paddle.dataset.conll05.test()
paddle.init(use_gpu=False, trainer_count=1)
# define network topology
feature_out = db_lstm()
predict = paddle.layer.crf_decoding(
size=label_dict_len,
input=feature_out,
param_attr=paddle.attr.Param(name='crfw'))
test_pass = 0
with gzip.open('params_pass_%d.tar.gz' % (test_pass)) as f:
parameters = paddle.parameters.Parameters.from_tar(f)
inferer = paddle.inference.Inference(
output_layer=predict, parameters=parameters)
# prepare test data
test_data = []
test_batch_size = 50
for idx, item in enumerate(test_creator()):
test_data.append(item[0:8])
if idx and (not idx % test_batch_size):
infer_a_batch(
inferer,
test_data,
word_dict_reverse,
pred_dict_reverse,
label_dict_reverse, )
test_data = []
infer_a_batch(
inferer,
test_data,
word_dict_reverse,
pred_dict_reverse,
label_dict_reverse, )
test_data = []
def main(is_inferring=False):
if is_inferring:
infer()
else:
train()
if __name__ == '__main__': if __name__ == '__main__':
main() main(is_inferring=False)
# 构建Android平台上的PaddlePaddle库
用户可通过交叉编译的方式,在用户熟悉的开发平台(Linux,Mac OS X和Windows)上编译Android平台上适用的PaddlePaddle库。
本文档将以Linux x86-64平台为例,介绍交叉编译Android平台上适用的PaddlePaddle库的方法和步骤。
## 准备交叉编译环境
从源码交叉编译PaddlePaddle,用户需要提前准备好交叉编译环境。Android平台上使用的C/C++交叉编译工具链为[Android NDK](https://developer.android.com/ndk/downloads/index.html?hl=zh-cn),用户可自行前往下载预编译好的版本,也可通过以下命令获取:
```bash
wget -q https://dl.google.com/android/repository/android-ndk-r14b-linux-x86_64.zip
unzip -q android-ndk-r14b-linux-x86_64.zip
```
Android NDK中包含了所有Android API级别、所有架构(arm/arm64/x86/mips)需要用到的编译工具和系统库。用户可根据自己的编译目标架构、所需支持的最低Android API级别,构建[独立工具链](https://developer.android.google.cn/ndk/guides/standalone_toolchain.html?hl=zh-cn)
比如:
```bash
your/path/to/android-ndk-r14b-linux-x86_64/build/tools/make-standalone-toolchain.sh \
--arch=arm --platform=android-21 --install-dir=your/path/to/my_standalone_toolchain
```
此命令将在your/path/to/my_standalone_toolchain目录生成一套编译工具链,面向架构为32位ARM架构,支持的最小的Android API级别为21,使用的编译器为arm-linux-androideabi-gcc (GCC) 4.9。
注意:**PaddlePaddle要求使用的编译工具链所支持的Andoid API级别不小于21**
## 配置交叉编译参数
CMake系统对交叉编译提供了支持[cmake-toolchains](https://cmake.org/cmake/help/v3.0/manual/cmake-toolchains.7.html#cross-compiling)。为了简化cmake配置,PaddlePaddle为交叉编译提供了工具链配置文档[cmake/cross_compiling/android.cmake](https://github.com/PaddlePaddle/Paddle/blob/develop/cmake/cross_compiling/android.cmake),以提供一些默认的编译器和编译参数相关配置。注意,从CMake 3.7版本开始,CMake官方对Android平台的交叉编译提供了通用的支持。PaddlePaddle若检测到用户使用的CMake版本不低于3.7时,将会将用户传进来的配置参数传递CMake系统,交由CMake系统本身来处理。有关参数配置的详细说明见[cmake-toolchains](https://cmake.org/cmake/help/v3.7/manual/cmake-toolchains.7.html#cross-compiling)
交叉编译Android版本的PaddlePaddle库时,有一些必须配置的参数:
- `CMAKE_SYSTEM_NAME`,CMake编译的目标平台,必须设置为`Android`。在设置`CMAKE_SYSTEM_NAME=Android`后,PaddlePaddle的CMake系统才认为是在交叉编译Android系统的版本,并自动编译宿主机版protoc可执行文件、目标机版protobuf库、以及Android所需`arm_soft_fp_abi`分支的目标机版OpenBLAS库。此外,还会强制设置一些PaddlePaddle参数的值(`WITH_GPU=OFF``WITH_AVX=OFF``WITH_PYTHON=OFF``WITH_RDMA=OFF`)。
- `WITH_C_API`,必须设置为`ON`。在Android平台上只支持使用C-API来预测。
- `WITH_SWIG_PY`,必须设置为`OFF`。在Android平台上不支持通过swig调用来训练或者预测。
Android平台可选配置参数:
- `ANDROID_STANDALONE_TOOLCHAIN`,独立工具链所在的绝对路径,或者相对于构建目录的相对路径。PaddlePaddle的CMake系统将根据该值自动推导和设置需要使用的交叉编译器、sysroot、以及Android API级别;否则,用户需要在cmake时手动设置这些值。无默认值。
- `ANDROID_ABI`,目标架构ABI。目前只支持`armeabi-v7a`,默认值为`armeabi-v7a`
- `ANDROID_NATIVE_API_LEVEL`,工具链的Android API级别。若没有显式设置,PaddlePaddle将根据`ANDROID_STANDALONE_TOOLCHAIN`的值自动推导得到。
- `ANROID_ARM_MODE`,是否使用ARM模式。可设置`ON/OFF`,默认值为`ON`
- `ANDROID_ARM_NEON`,是否使用NEON指令。目前必须设置成`ON`,默认值为`ON`
其他配置参数:
- `HOST_C/CXX_COMPILER`,宿主机的C/C++编译器。在编译宿主机版protoc可执行文件和目标机版OpenBLAS库时需要用到。默认设置成环境变量`CC`的值;若环境变量`CC`没有设置,则设置成`cc`编译器。
一种常用的cmake配置如下:
```bash
cmake -DCMAKE_SYSTEM_NAME=Android \
-DANDROID_STANDALONE_TOOLCHAIN=your/path/to/my_standalone_toolchain \
-DANDROID_ABI=armeabi-v7a \
-DANDROID_ARM_NEON=ON \
-DANDROID_ARM_MODE=ON \
-DCMAKE_INSTALL_PREFIX=your/path/to/install \
-DWITH_C_API=ON \
-DWITH_SWIG_PY=OFF \
..
```
用户还可根据自己的需求设置其他编译参数。比如希望最小化生成的库的大小,可以设置`CMAKE_BUILD_TYPE``MinSizeRel`;若希望最快的执行速度,则可设置`CMAKE_BUILD_TYPE``Release`。亦可以通过手动设置`CMAKE_C/CXX_FLAGS_MINSIZEREL/RELEASE`来影响PaddlePaddle的编译过程。
## 编译和安装
CMake配置完成后,执行以下命令,PaddlePaddle将自动下载和编译所有第三方依赖库、编译和安装PaddlePaddle预测库。
```bash
make
make install
```
注意:如果你曾经在源码目录下编译过其他平台的PaddlePaddle库,请先使用`rm -rf`命令删除`third_party`目录和`build`目录,以确保所有的第三方依赖库和PaddlePaddle代码都是针对新的CMake配置重新编译的。
执行完安装命令后,`your/path/to/install`目录中会包含`include``lib`目录,其中`include`中包含C-API的头文件,`lib`中包含一个Android版本的库。自此,PaddlePaddle的已经安装完成,用户可将`your/path/to/install`目录下的生成文件用于深度学习相关Android App中,调用方法见C-API文档。
# 构建Raspberry Pi平台上的PaddlePaddle库
对于Rasspberry Pi系统,用户可通过ssh等方式登录到Raspberry Pi系统上,按照[源码编译PaddlePaddle](http://www.paddlepaddle.org/doc_cn/getstarted/build_and_install/cmake/build_from_source_cn.html)相关文档所述,直接编译Raspberry Pi平台上适用的PaddlePaddle库。
用户也可以在自己熟悉的开发平台上,通过交叉编译的方式来编译。这篇文档将以Linux x86-64平台为例,介绍交叉编译Raspberry Pi平台上适用的PaddlePaddle的方法和步骤。
## 准备交叉编译环境
从源码交叉编译PaddlePaddle,用户需要提前准备好交叉编译环境。用户可自行前往[github](https://github.com/raspberrypi/tools)下载Raspberry Pi平台使用的C/C++交叉编译工具链,也可通过以下命令获取:
```bash
git clone https://github.com/raspberrypi/tools.git
```
该github仓库中包含若干个预编译好的、针对不同平台的编译工具。宿主机是Linux x86-64环境,则需选用`arm-bcm2708/gcc-linaro-arm-linux-gnueabihf-raspbian-x64`下的作为编译工具,所使用的编译器为arm-linux-gnueabihf-gcc 4.8.3。
注意,该编译工具链需要系统glibc支持2.14以上。
## 配置交叉编译参数
CMake系统对交叉编译提供了支持[cmake-toolchains](https://cmake.org/cmake/help/v3.0/manual/cmake-toolchains.7.html#cross-compiling)。为了简化cmake配置,PaddlePaddle为交叉编译提供了工具链配置文档[cmake/cross_compiling/raspberry_pi.cmake](https://github.com/PaddlePaddle/Paddle/blob/develop/cmake/cross_compiling/raspberry_pi.cmake),以提供一些默认的编译器和编译参数相关配置。
交叉编译Raspberry Pi版本PaddlePaddle库时,有一些必须配置的参数:
- `CMAKE_SYSTEM_NAME`,CMake编译的目标平台,必须配置为`RPi`。在设置`CMAKE_SYSTEM_NAME=RPi`后,PaddlePaddle的CMake系统才认为在是在交叉编译Raspberry Pi系统的版本,并自动编译宿主机版protoc可执行文件、目标机版protobuf库、以及目标机版OpenBLAS库。
Raspberry Pi平台可选配置参数:
- `RPI_TOOLCHAIN`,编译工具链所在的绝对路径,或者相对于构建目录的相对路径。PaddlePaddle的CMake系统将根据该值自动设置需要使用的交叉编译器;否则,用户需要在cmake时手动设置这些值。无默认值。
- `RPI_ARM_NEON`,是否使用NEON指令。目前必须设置成`ON`,默认值为`ON`
其他配置参数:
- `HOST_C/CXX_COMPILER`,宿主机的C/C++编译器。在编译宿主机版protoc可执行文件和目标机版OpenBLAS库时需要用到。默认设置成环境变量`CC`的值;若环境变量`CC`没有设置,则设置成`cc`编译器。
cmake参数如下;
```
cmake -DCMAKE_SYSTEM_NAME=RPi \
-DRPI_TOOLCHAIN=your/path/to/arm-bcm2708/gcc-linaro-arm-linux-gnueabihf-raspbian-x64 \
-DRPI_ARM_NEON=ON \
-DCMAKE_INSTALL_PREFIX=your/path/to/install \
-DWITH_GPU=OFF \
-DWITH_C_API=ON \
-DWITH_PYTHON=OFF \
-DWITH_SWIG_PY=OFF \
..
```
用户还可根据自己的需求设置其他编译参数。比如希望最小化生成的库的大小,可以设置`CMAKE_BUILD_TYPE``MinSizeRel`;若希望最快的执行速度,则可设置`CMAKE_BUILD_TYPE``Release`。亦可以通过手动设置`CMAKE_C/CXX_FLAGS_MINSIZEREL/RELEASE`来影响PaddlePaddle的编译过程。
## 编译和安装
CMake配置完成后,执行以下命令,PaddlePaddle将自动下载和编译所有第三方依赖库、编译和安装PaddlePaddle。
```bash
make
make install
```
注意:如果你曾经在源码目录下编译过其他平台的PaddlePaddle库,请先使用`rm -rf`命令删除`third_party`目录和`build`目录,以确保所有的第三方依赖库和PaddlePaddle代码都是针对新的CMake配置重新编译的。
执行完安装命令后,由于上一步cmake配置中`WITH_C_API`设置为`ON``your/path/to/install`目录中会包含`include``lib`目录,其中`include`中包含C-API的头文件,`lib`中包含一个Raspberry Pi版本的库。
更多的编译配置见[源码编译PaddlePaddle](http://www.paddlepaddle.org/doc_cn/getstarted/build_and_install/cmake/build_from_source_cn.html)相关文档。
...@@ -26,7 +26,7 @@ FILE(GLOB PY_PADDLE_PYTHON_FILES ${PROJ_ROOT}/paddle/py_paddle/*.py) ...@@ -26,7 +26,7 @@ FILE(GLOB PY_PADDLE_PYTHON_FILES ${PROJ_ROOT}/paddle/py_paddle/*.py)
SET_SOURCE_FILES_PROPERTIES(Paddle.i PROPERTIES CPLUSPLUS ON) SET_SOURCE_FILES_PROPERTIES(Paddle.i PROPERTIES CPLUSPLUS ON)
SET(CMAKE_SWIG_OUTDIR ${CMAKE_CURRENT_BINARY_DIR}) SET(CMAKE_SWIG_OUTDIR ${CMAKE_CURRENT_BINARY_DIR})
SET(CMAKE_CXX_FLAGS "-std=c++11 -fPIC -Wall") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fPIC -Wall")
IF(WITH_COVERAGE) IF(WITH_COVERAGE)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage")
ENDIF(WITH_COVERAGE) ENDIF(WITH_COVERAGE)
......
cmake_minimum_required(VERSION 3.0) cc_library(majel SRCS place.cc)
if(${CMAKE_CURRENT_SOURCE_DIR} STREQUAL ${CMAKE_SOURCE_DIR})
# find #include <majel/xx.h>
get_filename_component(PARENT_DIR ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY)
include_directories(${PARENT_DIR})
# find cmake directory modules
get_filename_component(PARENT_DIR ${PARENT_DIR} DIRECTORY)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PARENT_DIR}/cmake")
# enable c++11
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
# enable gtest
set(THIRD_PARTY_PATH ${CMAKE_CURRENT_SOURCE_DIR}/third_party)
set(WITH_TESTING ON)
include(external/gtest)
else()
message("-- Found gtest (include: ${GTEST_INCLUDE_DIR}, library: ${GTEST_LIBRARIES})")
endif()
########################### Build Majel #############################
set(MAJEL_CXX_FILES place.cc)
set(MAJEL_CUDA_FILES "")
if(CUDA_FOUND)
cuda_add_library(majel ${MAJEL_CUDA_FILES} ${MAJEL_CXX_FILES})
else()
add_library(majel ${MAJEL_CXX_FILES})
endif()
add_dependencies(majel ${external_project_dependencies})
#####################################################################
if(WITH_TESTING) if(WITH_TESTING)
add_subdirectory(test) add_subdirectory(test)
......
...@@ -93,6 +93,19 @@ typedef boost::variant< ...@@ -93,6 +93,19 @@ typedef boost::variant<
Because `variant` may be thought of as "multi-type, single value", we can utilize it to implement unified interfaces for PaddlePaddle. Because `variant` may be thought of as "multi-type, single value", we can utilize it to implement unified interfaces for PaddlePaddle.
`DDim` plays two kinds of roles in Majel. First, it is used to indicate the size of a tensor. For example, we can construct a new `DArray` by following way:
```c++
DArray arr = make_darray(make_ddim({2,3}), 0.0f);
```
It means that `arr` will be a two-dimension tensor, or a matrix. The size of its first dimension is 2 and the second is 3. All the element value of `arr` will be initialized as 0.0 .
The second meaning of `DDim` is tensor index. For example, if we want to access the value in the 1st row and 2nd column of `arr` and set it to 1.0, we can do like this:
```c++
arr[make_ddim({0, 1})] = 1.0
```
## implement Tensor in Paddle ## implement Tensor in Paddle
Before writing code, please make sure you already look through Majel Source Code and grabbed the design philosophy of `DArray` in Majel. Before writing code, please make sure you already look through Majel Source Code and grabbed the design philosophy of `DArray` in Majel.
...@@ -113,7 +126,7 @@ To assign subtasks to our colleagues, we have to discuss how to divide it to ind ...@@ -113,7 +126,7 @@ To assign subtasks to our colleagues, we have to discuss how to divide it to ind
- [ ] 3. Re-implement `Dim`. - [ ] 3. Re-implement `Dim`.
`Dim` is an excellent implementation in Majel. `Dim` is an excellent implementation in Majel.
> ??? > ???
......
file(GLOB_RECURSE ALL_TEST_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cc") cc_test(place_test
SRCS place_test.cc
DEPS majel)
add_executable(majel_tests ${ALL_TEST_FILES}) if(WITH_GPU)
add_dependencies(majel_tests majel) nv_test(cuda_test SRCS cuda_test.cu)
target_link_libraries(majel_tests endif()
${Boost_LIBRARIES}
${GTEST_LIBRARIES}
${GTEST_MAIN_LIBRARIES}
majel
)
add_test(majel_tests majel_tests)
#include <cuda_runtime.h>
#include <stdio.h>
#include "gtest/gtest.h"
#define CHECK_ERR(x) \
if (x != cudaSuccess) { \
fprintf(stderr, \
"%s in %s at line %d\n", \
cudaGetErrorString(err), \
__FILE__, \
__LINE__); \
exit(-1); \
}
__global__ void vecAdd(float *d_A, float *d_B, float *d_C, int n) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < n) {
d_C[i] = d_A[i] + d_B[i];
}
}
TEST(Cuda, Equality) {
int n = 10;
// Memory allocation for h_A, h_B and h_C (in the host)
float h_A[10] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 0.0};
float h_B[10] = {0.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0};
float h_C[10];
float *d_A, *d_B, *d_C;
cudaError_t err;
// Memory allocation for d_A, d_B and d_C (in the device)
err = cudaMalloc((void **)&d_A, sizeof(float) * n);
CHECK_ERR(err);
err = cudaMalloc((void **)&d_B, sizeof(float) * n);
CHECK_ERR(err);
err = cudaMalloc((void **)&d_C, sizeof(float) * n);
CHECK_ERR(err);
// Copying memory to device
err = cudaMemcpy(d_A, h_A, sizeof(float) * n, cudaMemcpyHostToDevice);
CHECK_ERR(err);
err = cudaMemcpy(d_B, h_B, sizeof(float) * n, cudaMemcpyHostToDevice);
CHECK_ERR(err);
// Calling the kernel
vecAdd<<<ceil(n / 256.0), 256>>>(d_A, d_B, d_C, n);
// Copying results back to host
err = cudaMemcpy(h_C, d_C, sizeof(float) * n, cudaMemcpyDeviceToHost);
CHECK_ERR(err);
EXPECT_EQ(h_C[0], 1.0);
for (int i = 1; i < n - 1; ++i) {
EXPECT_EQ(h_C[i], 11.0);
}
EXPECT_EQ(h_C[9], 1.0);
}
...@@ -60,7 +60,6 @@ function deploy_docs() { ...@@ -60,7 +60,6 @@ function deploy_docs() {
deploy_docs "master" "." deploy_docs "master" "."
deploy_docs "develop" "./develop/" deploy_docs "develop" "./develop/"
deploy_docs "release/0.10.0" "./release/0.10.0/"
# Check is there anything changed. # Check is there anything changed.
set +e set +e
......
...@@ -19,19 +19,22 @@ limitations under the License. */ ...@@ -19,19 +19,22 @@ limitations under the License. */
/// for MSVC /// for MSVC
#define CPUID(info, x) __cpuidex(info, x, 0) #define CPUID(info, x) __cpuidex(info, x, 0)
#elif !defined(__ANDROID__) #else
#if !defined(__arm__)
#include <cpuid.h> #include <cpuid.h>
/// for GCC/Clang /// for GCC/Clang
#define CPUID(info, x) __cpuid_count(x, 0, info[0], info[1], info[2], info[3]) #define CPUID(info, x) __cpuid_count(x, 0, info[0], info[1], info[2], info[3])
#endif
#endif #endif
namespace paddle { namespace paddle {
SIMDFlags::SIMDFlags() { SIMDFlags::SIMDFlags() {
#if !defined(__ANDROID__) #if defined(__arm__)
simd_flags_ = SIMD_NEON;
#else
unsigned int cpuInfo[4]; unsigned int cpuInfo[4];
// CPUID: https://en.wikipedia.org/wiki/CPUID // CPUID: https://en.wikipedia.org/wiki/CPUID
// clang-format off // clang-format off
...@@ -52,8 +55,6 @@ SIMDFlags::SIMDFlags() { ...@@ -52,8 +55,6 @@ SIMDFlags::SIMDFlags() {
CPUID(cpuInfo, 0x80000001); CPUID(cpuInfo, 0x80000001);
simd_flags_ |= cpuInfo[2] & (1 << 16) ? SIMD_FMA4 : SIMD_NONE; simd_flags_ |= cpuInfo[2] & (1 << 16) ? SIMD_FMA4 : SIMD_NONE;
// clang-fotmat on // clang-fotmat on
#else
simd_flags_ = SIMD_NEON;
#endif #endif
} }
......
...@@ -2320,6 +2320,9 @@ def Memory(name, ...@@ -2320,6 +2320,9 @@ def Memory(name,
memory_name = name + "+delay1" memory_name = name + "+delay1"
agent_name = memory_name agent_name = memory_name
if is_sequence: if is_sequence:
config_assert(
boot_layer is not None,
"there must be boot_layer in network when is_sequence = True")
agent_layer = SequenceAgentLayer(agent_name, size) agent_layer = SequenceAgentLayer(agent_name, size)
else: else:
agent_layer = AgentLayer(agent_name, size) agent_layer = AgentLayer(agent_name, size)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册