提交 15318aeb 编写于 作者: Y Yu Yang

Merge branch 'develop' of github.com:baidu/Paddle into feature/clean_sparse_row_matrix

......@@ -9,6 +9,8 @@ build/
.pydevproject
Makefile
.test_env/
third_party/
*~
bazel-*
third_party/
[submodule "warp-ctc"]
path = warp-ctc
url = https://github.com/baidu-research/warp-ctc.git
......@@ -2,7 +2,7 @@
sha: c25201a00e6b0514370501050cf2a8538ac12270
hooks:
- id: remove-crlf
files: (?!.*warp-ctc)^.*$
files: (?!.*third_party)^.*$
- repo: https://github.com/reyoung/mirrors-yapf.git
sha: v0.13.2
hooks:
......@@ -15,7 +15,7 @@
- id: check-merge-conflict
- id: check-symlinks
- id: detect-private-key
files: (?!.*warp-ctc)^.*$
files: (?!.*third_party)^.*$
- id: end-of-file-fixer
- repo: https://github.com/PaddlePaddle/clang-format-pre-commit-hook.git
sha: 28c0ea8a67a3e2dbbf4822ef44e85b63a0080a29
......
language: cpp
cache: ccache
cache:
directories:
- $HOME/third_party
- $HOME/.ccache
- $HOME/.cache/pip
sudo: required
dist: trusty
os:
......@@ -21,28 +25,21 @@ addons:
packages:
- gcc-4.8
- g++-4.8
- wget
- git
- build-essential
- libatlas-base-dev
- python
- python-pip
- python2.7-dev
- m4
- libprotobuf-dev
- doxygen
- protobuf-compiler
- python-protobuf
- python-numpy
- python-wheel
- libgoogle-glog-dev
- libgflags-dev
- libgtest-dev
- curl
- lcov
- graphviz
- swig
- graphviz
- clang-format-3.8
- automake
- libtool
- ccache
before_install:
- |
if [ ${JOB} == "BUILD_AND_TEST" ]; then
......@@ -55,10 +52,9 @@ before_install:
fi
fi
fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo paddle/scripts/travis/before_install.linux.sh; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi
- if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi
- pip install wheel protobuf sphinx recommonmark virtualenv numpy sphinx_rtd_theme pre-commit
- pip install numpy wheel protobuf sphinx recommonmark sphinx_rtd_theme virtualenv pre-commit requests==2.9.2 LinkChecker
script:
- paddle/scripts/travis/main.sh
notifications:
......
cmake_minimum_required(VERSION 2.8)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License
cmake_minimum_required(VERSION 3.0)
project(paddle CXX C)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
set(PROJ_ROOT ${CMAKE_SOURCE_DIR})
include(package)
find_package(SWIG 2.0)
find_package(CUDA QUIET)
find_package(Protobuf REQUIRED)
# Check protobuf library version.
execute_process(COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} --version
OUTPUT_VARIABLE PROTOBUF_VERSION)
string(REPLACE "libprotoc " "" PROTOBUF_VERSION ${PROTOBUF_VERSION})
set(PROTOBUF_3 OFF)
if (${PROTOBUF_VERSION} VERSION_GREATER "3.0.0" OR ${PROTOBUF_VERSION} VERSION_EQUAL "3.0.0")
set(PROTOBUF_3 ON)
endif()
find_package(PythonLibs 2.7 REQUIRED)
find_package(PythonInterp 2.7 REQUIRED)
find_package(ZLIB REQUIRED)
find_package(NumPy REQUIRED)
find_package(Threads REQUIRED)
find_package(AVX QUIET)
find_package(Glog)
find_package(Gflags QUIET)
find_package(GTest)
find_package(Sphinx)
find_package(Doxygen)
include(cblas)
find_program(M4_EXECUTABLE m4)
###################### Configurations ###########################
option(WITH_DSO "Compile PaddlePaddle with dynamic linked libraries" ON)
option(WITH_GPU "Compile PaddlePaddle with gpu" ${CUDA_FOUND})
option(WITH_DOUBLE "Compile PaddlePaddle with double precision, otherwise use single precision" OFF)
option(WITH_AVX "Compile PaddlePaddle with avx intrinsics" ${AVX_FOUND})
option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON)
option(WITH_STYLE_CHECK "Style Check for PaddlePaddle" ${PYTHONINTERP_FOUND})
option(WITH_RDMA "Compile PaddlePaddle with rdma support" OFF)
option(WITH_GLOG "Compile PaddlePaddle use glog, otherwise use a log implement internally" ${LIBGLOG_FOUND})
option(WITH_GFLAGS "Compile PaddlePaddle use gflags, otherwise use a flag implement internally" ${GFLAGS_FOUND})
option(WITH_TIMER "Compile PaddlePaddle use timer" OFF)
option(WITH_PROFILER "Compile PaddlePaddle use gpu profiler" OFF)
option(WITH_TESTING "Compile and run unittest for PaddlePaddle" ${GTEST_FOUND})
option(WITH_DOC "Compile PaddlePaddle with documentation" OFF)
option(WITH_SWIG_PY "Compile PaddlePaddle with py PaddlePaddle prediction api" ${SWIG_FOUND})
option(ON_TRAVIS "Running test on travis-ci or not." OFF)
option(ON_COVERALLS "Generating code coverage data on coveralls or not." OFF)
option(COVERALLS_UPLOAD "Uploading the generated coveralls json." ON)
include(cpplint)
include(ccache)
if(WITH_RDMA)
include(rdma)
endif()
include(util)
include(flags)
include(cudnn)
include(FindPythonModule)
include(check_packages)
include(swig)
include(coveralls)
# Set PaddlePaddle version to Git tag name or Git commit ID.
find_package(CUDA QUIET)
find_package(Git REQUIRED)
# version.cmake will get the current PADDLE_VERSION
include(version)
add_definitions(-DPADDLE_VERSION=${PADDLE_VERSION})
if(NOT WITH_GPU)
add_definitions(-DPADDLE_ONLY_CPU)
add_definitions(-DHPPL_STUB_FUNC)
list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu)
else()
if(${CUDA_VERSION_MAJOR} VERSION_LESS 7)
message(FATAL_ERROR "Paddle need CUDA >= 7.0 to compile")
endif()
if(NOT CUDNN_FOUND)
message(FATAL_ERROR "Paddle need cudnn to compile")
endif()
if(WITH_AVX)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${AVX_FLAG}")
else(WITH_AVX)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SSE3_FLAG}")
endif(WITH_AVX)
# Include cuda and cudnn
include_directories(${CUDNN_INCLUDE_DIR})
include_directories(${CUDA_TOOLKIT_INCLUDE})
endif(NOT WITH_GPU)
if(WITH_DSO)
add_definitions(-DPADDLE_USE_DSO)
endif(WITH_DSO)
if(WITH_DOUBLE)
add_definitions(-DPADDLE_TYPE_DOUBLE)
set(ACCURACY double)
else(WITH_DOUBLE)
set(ACCURACY float)
endif(WITH_DOUBLE)
if(NOT WITH_TIMER)
add_definitions(-DPADDLE_DISABLE_TIMER)
endif(NOT WITH_TIMER)
if(NOT WITH_PROFILER)
add_definitions(-DPADDLE_DISABLE_PROFILER)
endif(NOT WITH_PROFILER)
if(WITH_AVX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAG}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAG}")
else(WITH_AVX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SSE3_FLAG}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SSE3_FLAG}")
endif(WITH_AVX)
if(WITH_PYTHON)
include_directories(${PYTHON_INCLUDE_DIR})
include_directories(${PYTHON_NUMPY_INCLUDE_DIR})
else(WITH_PYTHON)
add_definitions(-DPADDLE_NO_PYTHON)
endif(WITH_PYTHON)
if(WITH_RDMA)
include_directories("${RDMA_INC_DIR}")
else(WITH_RDMA)
add_definitions(-DPADDLE_DISABLE_RDMA)
endif(WITH_RDMA)
if(WITH_GLOG)
add_definitions(-DPADDLE_USE_GLOG)
include_directories(${LIBGLOG_INCLUDE_DIR})
endif()
find_package(Threads REQUIRED)
if(WITH_GFLAGS)
add_definitions(-DPADDLE_USE_GFLAGS)
add_definitions(-DGFLAGS_NS=${GFLAGS_NAMESPACE})
include_directories(${GFLAGS_INCLUDE_DIRS})
include(system)
include(simd)
################################ Configurations #######################################
option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND})
option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND})
option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON)
option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON)
option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON)
option(WITH_STYLE_CHECK "Compile PaddlePaddle with style check" ON)
option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON)
option(WITH_DOUBLE "Compile PaddlePaddle with double precision" OFF)
option(WITH_RDMA "Compile PaddlePaddle with RDMA support" OFF)
option(WITH_TIMER "Compile PaddlePaddle with stats timer" OFF)
option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler" OFF)
option(WITH_DOC "Compile PaddlePaddle with documentation" OFF)
option(ON_COVERALLS "Compile PaddlePaddle with code coverage" OFF)
option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF)
option(ON_TRAVIS "Exclude special unit test on Travis CI" OFF)
# CMAKE_BUILD_TYPE
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING
"Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
FORCE)
endif()
if(WITH_TESTING)
enable_testing()
include_directories(${GTEST_INCLUDE_DIRS})
endif()
set(THIRD_PARTY_PATH "${PROJ_ROOT}/third_party" CACHE STRING
"A path setting third party libraries download & build directories.")
########################################################################################
include(external/zlib) # download, build, install zlib
include(external/gflags) # download, build, install gflags
include(external/glog) # download, build, install glog
include(external/gtest) # download, build, install gtest
include(external/protobuf) # download, build, install protobuf
include(external/python) # download, build, install python
include(external/openblas) # download, build, install openblas
include(external/swig) # download, build, install swig
include(external/warpctc) # download, build, install warpctc
include(package) # set paddle packages
include(cpplint) # set paddle c++ style
include(ccache) # set ccache for compilation
include(util) # set unittest and link libs
include(rdma) # set rdma libraries
include(flags) # set paddle compile flags
include(cudnn) # set cudnn libraries
include(version) # set PADDLE_VERSION
include(coveralls) # set code coverage
include(configure) # add paddle env configuration
include_directories("${CBLAS_INC_DIR}")
include_directories("${PROJ_ROOT}")
include_directories("${PROJ_ROOT}/paddle/cuda/include")
include_directories(${PROTOBUF_INCLUDE_DIRS})
include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto")
if(EXISTS "${PROJ_ROOT}/paddle/internals/CMakeLists.txt")
set(PADDLE_WITH_INTERNAL ON)
include(paddle/internals/CMakeLists.txt)
else()
set(PADDLE_WITH_INTERNAL OFF)
set(INTERNAL_PROTO_PATH "")
endif()
set(EXTERNAL_LIBS
# have not include gtest here.
${GFLAGS_LIBRARIES}
${GLOG_LIBRARIES}
${CBLAS_LIBRARIES}
${PROTOBUF_LIBRARY}
${ZLIB_LIBRARIES}
)
add_subdirectory(proto)
add_subdirectory(paddle)
add_subdirectory(python)
if(WITH_DOC)
add_subdirectory(doc)
add_subdirectory(doc_cn)
endif()
./doc/howto/dev/contribute_to_paddle_en.md
# External dependency to Google protobuf.
http_archive(
name="protobuf",
url="http://github.com/google/protobuf/archive/v3.1.0.tar.gz",
sha256="0a0ae63cbffc274efb573bdde9a253e3f32e458c41261df51c5dbc5ad541e8f7",
strip_prefix="protobuf-3.1.0", )
# External dependency to gtest 1.7.0. This method comes from
# https://www.bazel.io/versions/master/docs/tutorial/cpp.html.
new_http_archive(
name="gtest",
url="https://github.com/google/googletest/archive/release-1.7.0.zip",
sha256="b58cb7547a28b2c718d1e38aee18a3659c9e3ff52440297e965f5edffe34b6d0",
build_file="third_party/gtest.BUILD",
strip_prefix="googletest-release-1.7.0", )
......@@ -72,6 +72,7 @@ function( Sphinx_add_target target_name builder conf cache source destination )
${source}
${destination}
COMMENT "Generating sphinx documentation: ${builder}"
COMMAND ln -sf ${destination}/index_*.html ${destination}/index.html
)
set_property(
......@@ -143,4 +144,4 @@ function( Sphinx_add_targets target_base_name conf source base_destination )
add_dependencies( ${target_base_name}_linkcheck ${_dependencies} )
endif()
endfunction()
\ No newline at end of file
endfunction()
......@@ -13,6 +13,7 @@
# system paths.
#
set(CBLAS_FOUND OFF)
## Find MKL First.
set(MKL_ROOT $ENV{MKL_ROOT} CACHE PATH "Folder contains MKL")
......@@ -35,11 +36,12 @@ find_library(MKL_INTEL_LP64 NAMES mkl_intel_lp64 PATHS
if(MKL_INCLUDE_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64)
set(CBLAS_PROVIDER MKL)
set(CBLAS_INC_DIR ${MKL_INCLUDE_DIR})
set(CBLAS_LIBS ${MKL_INTEL_LP64}
set(CBLAS_LIBRARIES ${MKL_INTEL_LP64}
${MKL_SEQUENTIAL_LIB}
${MKL_CORE_LIB})
add_definitions(-DPADDLE_USE_MKL)
message(STATUS "Found MKL (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBS})")
message(STATUS "Found MKL (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
set(CBLAS_FOUND ON)
return() # return file.
endif()
......@@ -68,9 +70,10 @@ find_library(ATLAS_LIB NAMES lapack_atlas liblapack_atlas.so.3
if(ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_LIB)
set(CBLAS_PROVIDER ATLAS)
set(CBLAS_INC_DIR ${ATLAS_INC_DIR} ${ATLAS_CLAPACK_INC_DIR})
set(CBLAS_LIBS ${ATLAS_LIB} ${ATLAS_CBLAS_LIB})
set(CBLAS_LIBRARIES ${ATLAS_LIB} ${ATLAS_CBLAS_LIB})
add_definitions(-DPADDLE_USE_ATLAS)
message(STATUS "Found Atlas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBS})")
message(STATUS "Found Atlas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
set(CBLAS_FOUND ON)
return()
endif()
......@@ -98,8 +101,9 @@ find_library(OPENBLAS_LIB NAMES openblas
if(OPENBLAS_INC_DIR AND OPENBLAS_LIB)
set(CBLAS_PROVIDER OPENBLAS)
set(CBLAS_INC_DIR ${OPENBLAS_INC_DIR})
set(CBLAS_LIBS ${OPENBLAS_LIB})
message(STATUS "Found OpenBlas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBS})")
set(CBLAS_LIBRARIES ${OPENBLAS_LIB})
message(STATUS "Found OpenBlas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
set(CBLAS_FOUND ON)
return()
endif()
......@@ -130,9 +134,7 @@ find_library(REFERENCE_CBLAS_LIBRARY NAMES cblas PATHS
if (REFERENCE_CBLAS_INCLUDE_DIR AND REFERENCE_CBLAS_LIBRARY)
set(CBLAS_PROVIDER REFERENCE)
set(CBLAS_INC_DIR ${REFERENCE_CBLAS_INCLUDE_DIR})
set(CBLAS_LIBS ${REFERENCE_CBLAS_LIBRARY})
return()
set(CBLAS_LIBRARIES ${REFERENCE_CBLAS_LIBRARY})
message(STATUS "Found reference-cblas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBS})")
set(CBLAS_FOUND ON)
endif()
message(FATAL_ERROR "CBlas must be set. Paddle support MKL, ATLAS, OpenBlas, reference-cblas."
" Try set MKL_ROOT, ATLAS_ROOT, OPENBLAS_ROOT or REFERENCE_CBLAS_ROOT.")
# Check package for each cmake option
if(WITH_GPU)
find_package(CUDA REQUIRED) # CUDA is required when use gpu
endif()
if(WITH_PYTHON)
find_package(PythonLibs 2.6 REQUIRED)
find_package(PythonInterp REQUIRED)
find_package(NumPy REQUIRED)
endif()
if(WITH_STYLE_CHECK)
find_package(PythonInterp REQUIRED)
endif()
if(WITH_GLOG)
find_package(Glog REQUIRED)
endif()
if(WITH_GFLAGS)
find_package(Gflags REQUIRED)
endif()
if(WITH_TESTING)
find_package(GTest REQUIRED)
endif()
if(WITH_DOC)
find_package(Sphinx REQUIRED)
find_package(Doxygen REQUIRED)
find_python_module(recommonmark REQUIRED)
endif()
if(WITH_SWIG_PY)
if(NOT SWIG_FOUND)
message(FATAL_ERROR "SWIG is not found. Please install swig or disable WITH_SWIG_PY")
endif()
find_python_module(wheel REQUIRED) # package wheel
endif()
if(NOT M4_EXECUTABLE)
message(FATAL_ERROR "Paddle need m4 to generate proto file.")
endif()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if(NOT WITH_PYTHON)
add_definitions(-DPADDLE_NO_PYTHON)
endif(NOT WITH_PYTHON)
if(WITH_DSO)
add_definitions(-DPADDLE_USE_DSO)
endif(WITH_DSO)
if(WITH_DOUBLE)
add_definitions(-DPADDLE_TYPE_DOUBLE)
endif(WITH_DOUBLE)
if(NOT WITH_TIMER)
add_definitions(-DPADDLE_DISABLE_TIMER)
endif(NOT WITH_TIMER)
if(NOT WITH_PROFILER)
add_definitions(-DPADDLE_DISABLE_PROFILER)
endif(NOT WITH_PROFILER)
if(NOT WITH_GPU)
add_definitions(-DPADDLE_ONLY_CPU)
add_definitions(-DHPPL_STUB_FUNC)
list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu)
else()
FIND_PACKAGE(CUDA REQUIRED)
if(${CUDA_VERSION_MAJOR} VERSION_LESS 7)
message(FATAL_ERROR "Paddle need CUDA >= 7.0 to compile")
endif()
if(NOT CUDNN_FOUND)
message(FATAL_ERROR "Paddle need cudnn to compile")
endif()
if(WITH_AVX)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${AVX_FLAG}")
else(WITH_AVX)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SSE3_FLAG}")
endif(WITH_AVX)
# Include cuda and cudnn
include_directories(${CUDNN_INCLUDE_DIR})
include_directories(${CUDA_TOOLKIT_INCLUDE})
endif(NOT WITH_GPU)
if(WITH_AVX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAG}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAG}")
else(WITH_AVX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SSE3_FLAG}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SSE3_FLAG}")
endif(WITH_AVX)
......@@ -53,7 +53,7 @@ macro(add_style_check_target TARGET_NAME)
if(LINT MATCHES ON)
add_custom_command(TARGET ${TARGET_NAME}
PRE_BUILD
COMMAND "${PYTHON_EXECUTABLE}" "${PROJ_ROOT}/paddle/scripts/cpplint.py"
COMMAND env ${py_env} "${PYTHON_EXECUTABLE}" "${PROJ_ROOT}/paddle/scripts/cpplint.py"
"--filter=${STYLE_FILTER}" ${filename}
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR})
endif()
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
INCLUDE(ExternalProject)
SET(GFLAGS_SOURCES_DIR ${THIRD_PARTY_PATH}/gflags)
SET(GFLAGS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/gflags)
SET(GFLAGS_INCLUDE_DIR "${GFLAGS_INSTALL_DIR}/include" CACHE PATH "gflags include directory." FORCE)
IF(WIN32)
set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/gflags.lib" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE)
ELSE(WIN32)
set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/libgflags.a" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE)
ENDIF(WIN32)
INCLUDE_DIRECTORIES(${GFLAGS_INCLUDE_DIR})
ExternalProject_Add(
gflags
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/gflags/gflags.git"
PREFIX ${GFLAGS_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR}
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
CMAKE_ARGS -DBUILD_TESTING=OFF
)
LIST(APPEND external_project_dependencies gflags)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
INCLUDE(ExternalProject)
SET(GLOG_SOURCES_DIR ${THIRD_PARTY_PATH}/glog)
SET(GLOG_INSTALL_DIR ${THIRD_PARTY_PATH}/install/glog)
SET(GLOG_INCLUDE_DIR "${GLOG_INSTALL_DIR}/include" CACHE PATH "glog include directory." FORCE)
IF(WIN32)
SET(GLOG_LIBRARIES "${GLOG_INSTALL_DIR}/lib/libglog.lib" CACHE FILEPATH "glog library." FORCE)
ELSE(WIN32)
SET(GLOG_LIBRARIES "${GLOG_INSTALL_DIR}/lib/libglog.a" CACHE FILEPATH "glog library." FORCE)
ENDIF(WIN32)
INCLUDE_DIRECTORIES(${GLOG_INCLUDE_DIR})
ExternalProject_Add(
glog
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/google/glog.git"
PREFIX ${GLOG_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${GLOG_INSTALL_DIR}
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
CMAKE_ARGS -DWITH_GFLAGS=OFF
CMAKE_ARGS -DBUILD_TESTING=OFF
)
LIST(APPEND external_project_dependencies glog)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
IF(WITH_TESTING)
ENABLE_TESTING()
INCLUDE(ExternalProject)
SET(GTEST_SOURCES_DIR ${THIRD_PARTY_PATH}/gtest)
SET(GTEST_INSTALL_DIR ${THIRD_PARTY_PATH}/install/gtest)
SET(GTEST_INCLUDE_DIR "${GTEST_INSTALL_DIR}/include" CACHE PATH "gtest include directory." FORCE)
INCLUDE_DIRECTORIES(${GTEST_INCLUDE_DIR})
IF(WIN32)
set(GTEST_LIBRARIES
"${GTEST_INSTALL_DIR}/lib/gtest.lib" CACHE FILEPATH "gtest libraries." FORCE)
set(GTEST_MAIN_LIBRARIES
"${GTEST_INSTALL_DIR}/lib/gtest_main.lib" CACHE FILEPATH "gtest main libraries." FORCE)
ELSE(WIN32)
set(GTEST_LIBRARIES
"${GTEST_INSTALL_DIR}/lib/libgtest.a" CACHE FILEPATH "gtest libraries." FORCE)
set(GTEST_MAIN_LIBRARIES
"${GTEST_INSTALL_DIR}/lib/libgtest_main.a" CACHE FILEPATH "gtest main libraries." FORCE)
ENDIF(WIN32)
ExternalProject_Add(
gtest
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/google/googletest.git"
GIT_TAG "release-1.8.0"
PREFIX ${GTEST_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GTEST_INSTALL_DIR}
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
CMAKE_ARGS -DBUILD_GMOCK=ON
CMAKE_ARGS -Dgtest_disable_pthreads=ON
CMAKE_ARGS -Dgtest_force_shared_crt=ON
)
LIST(APPEND external_project_dependencies gtest)
ENDIF(WITH_TESTING)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
INCLUDE(cblas)
IF(NOT ${CBLAS_FOUND})
MESSAGE(FATAL_ERROR "Please install OpenBlas, MKL or ATLAS.")
INCLUDE(ExternalProject)
SET(CBLAS_SOURCES_DIR ${THIRD_PARTY_PATH}/openblas)
SET(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas)
SET(CBLAS_INC_DIR "${CBLAS_INSTALL_DIR}/include" CACHE PATH "openblas include directory." FORCE)
IF(WIN32)
SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/openblas.lib" CACHE FILEPATH "openblas library." FORCE)
ELSE(WIN32)
SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/libopenblas.a" CACHE FILEPATH "openblas library" FORCE)
ENDIF(WIN32)
ExternalProject_Add(
openblas
${EXTERNAL_PROJECT_LOG_ARGS}
URL "https://github.com/xianyi/OpenBLAS/archive/v0.2.19.tar.gz"
PREFIX ${CBLAS_SOURCES_DIR}
INSTALL_DIR ${CBLAS_INSTALL_DIR}
BUILD_IN_SOURCE 1
CONFIGURE_COMMAND ""
BUILD_COMMAND make CC=${CMAKE_C_COMPILER} FC=${CMAKE_Fortran_COMPILER}
INSTALL_COMMAND make install PREFIX=<INSTALL_DIR>
UPDATE_COMMAND ""
)
LIST(APPEND external_project_dependencies openblas)
ENDIF()
INCLUDE_DIRECTORIES(${CBLAS_INC_DIR})
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
INCLUDE(ExternalProject)
SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/protobuf)
SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/protobuf)
SET(PROTOBUF_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" CACHE PATH "protobuf include directory." FORCE)
INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR})
IF(WIN32)
SET(PROTOBUF_LITE_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.lib" CACHE FILEPATH "protobuf lite library." FORCE)
SET(PROTOBUF_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.lib" CACHE FILEPATH "protobuf library." FORCE)
SET(PROTOBUF_PROTOC_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotoc.lib" CACHE FILEPATH "protoc library." FORCE)
SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc.exe" CACHE FILEPATH "protobuf executable." FORCE)
ELSE(WIN32)
IF(${HOST_SYSTEM} STREQUAL "centos")
SET(LIB "lib64")
ELSE()
SET(LIB "lib")
ENDIF()
SET(PROTOBUF_LITE_LIBRARY
"${PROTOBUF_INSTALL_DIR}/${LIB}/libprotobuf-lite.a" CACHE FILEPATH "protobuf lite library." FORCE)
SET(PROTOBUF_LIBRARY
"${PROTOBUF_INSTALL_DIR}/${LIB}/libprotobuf.a" CACHE FILEPATH "protobuf library." FORCE)
SET(PROTOBUF_PROTOC_LIBRARY
"${PROTOBUF_INSTALL_DIR}/${LIB}/libprotoc.a" CACHE FILEPATH "protoc library." FORCE)
SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc" CACHE FILEPATH "protobuf executable." FORCE)
ENDIF(WIN32)
ExternalProject_Add(
protobuf
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${PROTOBUF_SOURCES_DIR}
UPDATE_COMMAND ""
DEPENDS zlib
GIT_REPOSITORY "https://github.com/google/protobuf.git"
GIT_TAG "9f75c5aa851cd877fb0d93ccc31b8567a6706546"
CONFIGURE_COMMAND
${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/protobuf/cmake
-Dprotobuf_BUILD_TESTS=OFF
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
)
LIST(APPEND external_project_dependencies protobuf)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
INCLUDE(ExternalProject)
INCLUDE(python_module)
FIND_PACKAGE(PythonInterp 2.7)
FIND_PACKAGE(PythonLibs 2.7)
SET(py_env "")
IF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
find_python_module(pip REQUIRED)
find_python_module(numpy REQUIRED)
find_python_module(wheel REQUIRED)
find_python_module(google.protobuf REQUIRED)
FIND_PACKAGE(NumPy REQUIRED)
IF(${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0")
MESSAGE(FATAL_ERROR "Found Python Protobuf ${PY_GOOGLE.PROTOBUF_VERSION} < 3.0.0, "
"please use pip to upgrade protobuf.")
ENDIF(${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0")
ELSE(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
##################################### PYTHON ########################################
SET(PYTHON_SOURCES_DIR ${THIRD_PARTY_PATH}/python)
SET(PYTHON_INSTALL_DIR ${THIRD_PARTY_PATH}/install/python)
SET(_python_DIR ${PYTHON_INSTALL_DIR})
IF(UNIX)
SET(PYTHON_FOUND ON)
SET(PYTHON_INCLUDE_DIR "${PYTHON_INSTALL_DIR}/include/python2.7" CACHE PATH "Python include dir" FORCE)
SET(PYTHON_LIBRARIES "${PYTHON_INSTALL_DIR}/lib/libpython2.7.a" CACHE FILEPATH "Python library" FORCE)
SET(PYTHON_EXECUTABLE ${PYTHON_INSTALL_DIR}/bin/python CACHE FILEPATH "Python executable" FORCE)
SET(PY_SITE_PACKAGES_PATH "${PYTHON_INSTALL_DIR}/lib/python2.7/site-packages" CACHE PATH "Python site-packages path" FORCE)
ELSEIF(WIN32)
SET(PYTHON_FOUND ON)
SET(PYTHON_INCLUDE_DIR "${PYTHON_INSTALL_DIR}/include" CACHE PATH "Python include dir" FORCE)
SET(PYTHON_LIBRARIES "${PYTHON_INSTALL_DIR}/libs/python27.lib" CACHE FILEPATH "Python library" FORCE)
SET(PYTHON_EXECUTABLE "${PYTHON_INSTALL_DIR}/bin/python.exe" CACHE FILEPATH "Python executable" FORCE)
SET(PY_SITE_PACKAGES_PATH "${PYTHON_INSTALL_DIR}/Lib/site-packages" CACHE PATH "Python site-packages path" FORCE)
ELSE()
MESSAGE(FATAL_ERROR "Unknown system !")
ENDIF()
IF(APPLE)
LIST(APPEND EXTERNAL_PROJECT_OPTIONAL_CMAKE_ARGS
-DCMAKE_BUILD_WITH_INSTALL_RPATH:BOOL=ON
)
ENDIF()
SET(EXTERNAL_PROJECT_OPTIONAL_CMAKE_CACHE_ARGS)
# Force Python build to "Release".
IF(CMAKE_CONFIGURATION_TYPES)
SET(SAVED_CMAKE_CFG_INTDIR ${CMAKE_CFG_INTDIR})
SET(CMAKE_CFG_INTDIR "Release")
ELSE()
LIST(APPEND EXTERNAL_PROJECT_OPTIONAL_CMAKE_CACHE_ARGS
-DCMAKE_BUILD_TYPE:STRING=Release
)
ENDIF()
ExternalProject_Add(python
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/python-cmake-buildsystem/python-cmake-buildsystem.git"
PREFIX ${PYTHON_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DPYTHON_VERSION=2.7.12
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
CMAKE_CACHE_ARGS
-DCMAKE_INSTALL_PREFIX:PATH=${PYTHON_INSTALL_DIR}
-DBUILD_LIBPYTHON_SHARED:BOOL=OFF
-DUSE_SYSTEM_LIBRARIES:BOOL=OFF
-DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}
-DZLIB_INCLUDE_DIR:PATH=${ZLIB_INCLUDE_DIR}
-DZLIB_LIBRARY:FILEPATH=${ZLIB_LIBRARIES}
-DDOWNLOAD_SOURCES:BOOL=ON
-DINSTALL_WINDOWS_TRADITIONAL:BOOL=OFF
${EXTERNAL_PROJECT_OPTIONAL_CMAKE_CACHE_ARGS}
${EXTERNAL_PROJECT_OPTIONAL_CMAKE_ARGS}
DEPENDS zlib
)
SET(py_env
PATH=${PYTHON_INSTALL_DIR}/bin
PYTHONHOME=${PYTHON_INSTALL_DIR}
PYTHONPATH=${PYTHON_INSTALL_DIR}/lib:${PYTHON_INSTALL_DIR}/lib/python2.7:${PY_SITE_PACKAGES_PATH})
####################################################################################
##################################### SETUPTOOLS ###################################
SET(SETUPTOOLS_SOURCES_DIR ${PYTHON_SOURCES_DIR}/setuptools)
ExternalProject_Add(setuptools
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${SETUPTOOLS_SOURCES_DIR}
URL "https://pypi.python.org/packages/source/s/setuptools/setuptools-18.3.2.tar.gz"
BUILD_IN_SOURCE 1
PATCH_COMMAND ""
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
INSTALL_COMMAND ""
BUILD_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install
DEPENDS python zlib
)
#####################################################################################
##################################### SIX ###########################################
SET(SIX_SOURCES_DIR ${PYTHON_SOURCES_DIR}/six)
ExternalProject_Add(six
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${SIX_SOURCES_DIR}
URL https://pypi.python.org/packages/source/s/six/six-1.10.0.tar.gz
BUILD_IN_SOURCE 1
PATCH_COMMAND ""
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
INSTALL_COMMAND ""
BUILD_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install
DEPENDS python setuptools
)
#####################################################################################
##################################### CYTHON ########################################
SET(CYTHON_SOURCES_DIR ${PYTHON_SOURCES_DIR}/cython)
ExternalProject_Add(cython
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${CYTHON_SOURCES_DIR}
URL https://github.com/cython/cython/archive/0.25.2.tar.gz
GIT_TAG 0.25.2
BUILD_IN_SOURCE 1
CONFIGURE_COMMAND ""
PATCH_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND ""
BUILD_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install
DEPENDS python
)
####################################################################################
##################################### NUMPY ########################################
SET(NUMPY_SOURCES_DIR ${PYTHON_SOURCES_DIR}/numpy)
SET(NUMPY_TAG_VERSION "v1.11.3")
SET(NUMPY_VERSION "1.11.3")
SET(EGG_NAME "")
SET(PYTHON_NUMPY_INCLUDE_DIR "")
IF(WIN32)
SET(EGG_NAME "numpy-${NUMPY_VERSION}-py2.7-${HOST_SYSTEM}.egg")
ELSE(WIN32)
IF(APPLE)
SET(EGG_NAME "numpy-${NUMPY_VERSION}-py2.7-${HOST_SYSTEM}-${MACOS_VERSION}")
ELSE(APPLE)
SET(EGG_NAME "numpy-${NUMPY_VERSION}-py2.7-linux")
SET(EGG_NAME "numpy-${NUMPY_VERSION}-py2.7-linux")
ENDIF(APPLE)
FOREACH(suffix x86_64 intel fat64 fat32 universal)
LIST(APPEND PYTHON_NUMPY_INCLUDE_DIR ${PY_SITE_PACKAGES_PATH}/${EGG_NAME}-${suffix}.egg/numpy/core/include)
ENDFOREACH()
ENDIF(WIN32)
ExternalProject_Add(numpy
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY https://github.com/numpy/numpy.git
GIT_TAG ${NUMPY_TAG_VERSION}
CONFIGURE_COMMAND ""
UPDATE_COMMAND ""
PREFIX ${NUMPY_SOURCES_DIR}
BUILD_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py build
INSTALL_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install
BUILD_IN_SOURCE 1
DEPENDS python setuptools cython
)
####################################################################################
##################################### WHEEL ########################################
SET(WHEEL_SOURCES_DIR ${PYTHON_SOURCES_DIR}/wheel)
ExternalProject_Add(wheel
${EXTERNAL_PROJECT_LOG_ARGS}
URL https://pypi.python.org/packages/source/w/wheel/wheel-0.29.0.tar.gz
PREFIX ${WHEEL_SOURCES_DIR}
CONFIGURE_COMMAND ""
UPDATE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install
BUILD_IN_SOURCE 1
DEPENDS python setuptools
)
####################################################################################
################################### PROTOBUF #######################################
SET(PY_PROTOBUF_SOURCES_DIR ${PYTHON_SOURCES_DIR}/protobuf)
ExternalProject_Add(python-protobuf
${EXTERNAL_PROJECT_LOG_ARGS}
URL https://pypi.python.org/packages/e0/b0/0a1b364fe8a7d177b4b7d4dca5b798500dc57a7273b93cca73931b305a6a/protobuf-3.1.0.post1.tar.gz
URL_MD5 38b5fb160c768d2f8444d0c6d637ff91
PREFIX ${PY_PROTOBUF_SOURCES_DIR}
BUILD_IN_SOURCE 1
PATCH_COMMAND ""
CONFIGURE_COMMAND ""
BUILD_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py build
INSTALL_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install
DEPENDS python setuptools six
)
####################################################################################
LIST(APPEND external_project_dependencies python setuptools six cython wheel python-protobuf numpy)
ENDIF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_DIR})
INCLUDE_DIRECTORIES(${PYTHON_NUMPY_INCLUDE_DIR})
MESSAGE("[Paddle] Python Executable: ${PYTHON_EXECUTABLE}")
MESSAGE("[Paddle] Python Include: ${PYTHON_INCLUDE_DIRS}")
MESSAGE("[Paddle] Python Libraries: ${PYTHON_LIBRARIES}")
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
FIND_PACKAGE(SWIG)
IF(NOT SWIG_FOUND)
# build swig as an external project
INCLUDE(ExternalProject)
SET(SWIG_SOURCES_DIR ${THIRD_PARTY_PATH}/swig)
SET(SWIG_INSTALL_DIR ${THIRD_PARTY_PATH}/install/swig)
SET(SWIG_TARGET_VERSION "3.0.2")
SET(SWIG_DOWNLOAD_SRC_MD5 "62f9b0d010cef36a13a010dc530d0d41")
SET(SWIG_DOWNLOAD_WIN_MD5 "3f18de4fc09ab9abb0d3be37c11fbc8f")
IF(WIN32)
# swig.exe available as pre-built binary on Windows:
ExternalProject_Add(swig
URL http://prdownloads.sourceforge.net/swig/swigwin-${SWIG_TARGET_VERSION}.zip
URL_MD5 ${SWIG_DOWNLOAD_WIN_MD5}
SOURCE_DIR ${SWIG_SOURCES_DIR}
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
UPDATE_COMMAND ""
)
SET(SWIG_DIR ${SWIG_SOURCES_DIR} CACHE FILEPATH "SWIG Directory" FORCE)
SET(SWIG_EXECUTABLE ${SWIG_SOURCES_DIR}/swig.exe CACHE FILEPATH "SWIG Executable" FORCE)
ELSE(WIN32)
# From PCRE configure
ExternalProject_Add(pcre
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY https://github.com/svn2github/pcre.git
PREFIX ${SWIG_SOURCES_DIR}/pcre
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${SWIG_INSTALL_DIR}/pcre
)
# swig uses bison find it by cmake and pass it down
FIND_PACKAGE(BISON)
# From SWIG configure
ExternalProject_Add(swig
GIT_REPOSITORY https://github.com/swig/swig.git
GIT_TAG rel-3.0.10
PREFIX ${SWIG_SOURCES_DIR}
CONFIGURE_COMMAND cd ${SWIG_SOURCES_DIR}/src/swig && ./autogen.sh
CONFIGURE_COMMAND cd ${SWIG_SOURCES_DIR}/src/swig &&
env "PCRE_LIBS=${SWIG_INSTALL_DIR}/pcre/lib/libpcre.a ${SWIG_INSTALL_DIR}/pcre/lib/libpcrecpp.a ${SWIG_INSTALL_DIR}/pcre/lib/libpcreposix.a"
./configure
--prefix=${SWIG_INSTALL_DIR}
--with-pcre-prefix=${SWIG_INSTALL_DIR}/pcre
BUILD_COMMAND cd ${SWIG_SOURCES_DIR}/src/swig && make
INSTALL_COMMAND cd ${SWIG_SOURCES_DIR}/src/swig && make install
UPDATE_COMMAND ""
DEPENDS pcre
)
SET(SWIG_DIR ${SWIG_INSTALL_DIR}/share/swig/${SWIG_TARGET_VERSION})
SET(SWIG_EXECUTABLE ${SWIG_INSTALL_DIR}/bin/swig)
ENDIF(WIN32)
LIST(APPEND external_project_dependencies swig)
ENDIF(NOT SWIG_FOUND)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
INCLUDE(ExternalProject)
SET(WARPCTC_SOURCES_DIR ${THIRD_PARTY_PATH}/warpctc)
SET(WARPCTC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/warpctc)
SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include" CACHE PATH "Warp-ctc Directory" FORCE)
INCLUDE_DIRECTORIES(${WARPCTC_INCLUDE_DIR})
SET(WARPCTC_LIB_DIR "${WARPCTC_INSTALL_DIR}/lib" CACHE PATH "Warp-ctc Library Directory" FORCE)
IF(WIN32)
SET(WARPCTC_LIBRARIES
"${WARPCTC_INSTALL_DIR}/lib/warpctc.dll" CACHE FILEPATH "Warp-ctc Library" FORCE)
ELSE(WIN32)
IF(APPLE)
SET(_warpctc_SHARED_SUFFIX dylib)
ELSE(APPLE)
SET(_warpctc_SHARED_SUFFIX so)
ENDIF(APPLE)
SET(WARPCTC_LIBRARIES
"${WARPCTC_INSTALL_DIR}/lib/libwarpctc.${_warpctc_SHARED_SUFFIX}" CACHE FILEPATH "Warp-ctc Library" FORCE)
ENDIF(WIN32)
IF(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" )
SET(USE_OMP OFF)
ELSE()
SET(USE_OMP ON)
ENDIF()
ExternalProject_Add(
warpctc
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/gangliao/warp-ctc.git"
PREFIX ${WARPCTC_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${WARPCTC_INSTALL_DIR}
CMAKE_ARGS -DWITH_GPU=${WITH_GPU}
CMAKE_ARGS -DWITH_OMP=${USE_OMP}
CMAKE_ARGS -DWITH_TORCH=OFF
CMAKE_ARGS -DBUILD_SHARED=ON
)
LIST(APPEND external_project_dependencies warpctc)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
INCLUDE(ExternalProject)
SET(ZLIB_SOURCES_DIR ${THIRD_PARTY_PATH}/zlib)
SET(ZLIB_INSTALL_DIR ${THIRD_PARTY_PATH}/install/zlib)
SET(ZLIB_ROOT ${ZLIB_INSTALL_DIR} CACHE FILEPATH "zlib root directory." FORCE)
SET(ZLIB_INCLUDE_DIR "${ZLIB_INSTALL_DIR}/include" CACHE PATH "zlib include directory." FORCE)
IF(WIN32)
SET(ZLIB_LIBRARIES "${ZLIB_INSTALL_DIR}/lib/zlibstatic.lib" CACHE FILEPATH "zlib library." FORCE)
ELSE(WIN32)
set(ZLIB_LIBRARIES "${ZLIB_INSTALL_DIR}/lib/libz.a" CACHE FILEPATH "zlib library." FORCE)
ENDIF(WIN32)
INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR})
ExternalProject_Add(
zlib
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/madler/zlib.git"
GIT_TAG "v1.2.8"
PREFIX ${ZLIB_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${ZLIB_INSTALL_DIR}
CMAKE_ARGS -DBUILD_SHARED_LIBS=OFF
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
CMAKE_ARGS -DCMAKE_MACOSX_RPATH=ON
)
LIST(APPEND external_project_dependencies zlib)
......@@ -3,12 +3,6 @@ include(CheckCXXCompilerFlag)
include(CheckCCompilerFlag)
include(CheckCXXSymbolExists)
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING
"Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
FORCE)
endif()
function(CheckCompilerCXX11Flag)
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.8)
......
......@@ -26,5 +26,18 @@ function(find_python_module module)
if(NOT PY_${module_upper}_FOUND AND ${module}_FIND_REQUIRED)
message(FATAL_ERROR "python module ${module} is not found")
endif()
execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
"import sys, ${module}; sys.stdout.write(${module}.__version__)"
OUTPUT_VARIABLE _${module}_version
RESULT_VARIABLE _${module}_status
ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT _${module}_status)
set(PY_${module_upper}_VERSION ${_${module}_version} CACHE STRING
"Version of Python module ${module}")
endif(NOT _${module}_status)
set(PY_${module_upper}_FOUND ${PY_${module_upper}_FOUND} PARENT_SCOPE)
set(PY_${module_upper}_VERSION ${PY_${module_upper}_VERSION} PARENT_SCOPE)
endfunction(find_python_module)
......@@ -5,72 +5,76 @@
# svn co https://svn.baidu.com/sys/ip/trunk/rdma/thirdparty rdma/
# we use static output in svn repositories to avoid implict bugs from not standard runtime env.
set(RDMA_ROOT $ENV{RDMA_ROOT} CACHE PATH "Folder contains RDMA sock library and thirdparty library")
if(WITH_RDMA)
set(RDMA_ROOT $ENV{RDMA_ROOT} CACHE PATH "Folder contains RDMA sock library and thirdparty library")
function(generate_rdma_links)
#redirect to current DIR to isolate the pollution from system runtime environment
#it can benifits unified control for different gcc environment.
#e.g, by default gcc48 did not refer /usr/lib64 which could contain low version
#runtime libraries that will crash process while loading it. That redirect trick
#can fix it.
execute_process(
COMMAND mkdir -p librdma
COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so.1
COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so
COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so.1
COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
)
endfunction(generate_rdma_links)
#check and set headers
find_path(RDMA_INC_SXISOCK sxi_sock.h PATHS ${RDMA_ROOT}/sockrdmav1/output/include)
find_path(RDMA_INC_XIO libxio.h PATHS ${RDMA_ROOT}/thirdparty/output/accelio)
find_path(RDMA_INC_EVENT event2 PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_path(RDMA_INC_NUMA numa.h PATHS ${RDMA_ROOT}/thirdparty/output/libnuma)
#check and set libs
find_library(RDMA_LIB_SXISOCK NAMES sxisock PATHS ${RDMA_ROOT}/sockrdmav1/output)
find_library(RDMA_LIB_XIO NAMES xio PATHS ${RDMA_ROOT}/thirdparty/output/accelio)
find_library(RDMA_LIB_EVENT NAMES event PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_library(RDMA_LIB_EVENT_CORE NAMES event_core PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_library(RDMA_LIB_EVENT_EXTRA NAMES event_extra PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_library(RDMA_LIB_EVENT_PTHREADS NAMES event_pthreads PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_library(RDMA_LIB_NUMA NAMES numa PATHS ${RDMA_ROOT}/thirdparty/output/libnuma)
if(
RDMA_INC_SXISOCK AND
RDMA_INC_XIO AND
RDMA_INC_EVENT AND
RDMA_INC_NUMA AND
RDMA_LIB_SXISOCK AND
RDMA_LIB_XIO AND
RDMA_LIB_EVENT AND
RDMA_LIB_EVENT_CORE AND
RDMA_LIB_EVENT_EXTRA AND
RDMA_LIB_EVENT_PTHREADS AND
RDMA_LIB_NUMA
function(generate_rdma_links)
#redirect to current DIR to isolate the pollution from system runtime environment
#it can benifits unified control for different gcc environment.
#e.g, by default gcc48 did not refer /usr/lib64 which could contain low version
#runtime libraries that will crash process while loading it. That redirect trick
#can fix it.
execute_process(
COMMAND mkdir -p librdma
COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so.1
COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so
COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so.1
COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
)
endfunction(generate_rdma_links)
set(RDMA_INC_DIR
${RDMA_INC_SXISOCK}
${RDMA_INC_XIO}
${RDMA_INC_EVENT}
${RDMA_INC_NUMA})
set(RDMA_LIBS
${RDMA_LIB_SXISOCK}
${RDMA_LIB_XIO}
${RDMA_LIB_EVENT}
${RDMA_LIB_EVENT_CORE}
${RDMA_LIB_EVENT_EXTRA}
${RDMA_LIB_EVENT_PTHREADS}
${RDMA_LIB_NUMA}
)
set(RDMA_LD_FLAGS "-L./librdma -libverbs -lrdmacm -Xlinker -rpath ./librdma")
return()
endif()
#check and set headers
find_path(RDMA_INC_SXISOCK sxi_sock.h PATHS ${RDMA_ROOT}/sockrdmav1/output/include)
find_path(RDMA_INC_XIO libxio.h PATHS ${RDMA_ROOT}/thirdparty/output/accelio)
find_path(RDMA_INC_EVENT event2 PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_path(RDMA_INC_NUMA numa.h PATHS ${RDMA_ROOT}/thirdparty/output/libnuma)
#check and set libs
find_library(RDMA_LIB_SXISOCK NAMES sxisock PATHS ${RDMA_ROOT}/sockrdmav1/output)
find_library(RDMA_LIB_XIO NAMES xio PATHS ${RDMA_ROOT}/thirdparty/output/accelio)
find_library(RDMA_LIB_EVENT NAMES event PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_library(RDMA_LIB_EVENT_CORE NAMES event_core PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_library(RDMA_LIB_EVENT_EXTRA NAMES event_extra PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_library(RDMA_LIB_EVENT_PTHREADS NAMES event_pthreads PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_library(RDMA_LIB_NUMA NAMES numa PATHS ${RDMA_ROOT}/thirdparty/output/libnuma)
#if this module is not called, RDMA_INC_DIR RDMA_LIBS will be null, so top module always refer this variable
if(
RDMA_INC_SXISOCK AND
RDMA_INC_XIO AND
RDMA_INC_EVENT AND
RDMA_INC_NUMA AND
RDMA_LIB_SXISOCK AND
RDMA_LIB_XIO AND
RDMA_LIB_EVENT AND
RDMA_LIB_EVENT_CORE AND
RDMA_LIB_EVENT_EXTRA AND
RDMA_LIB_EVENT_PTHREADS AND
RDMA_LIB_NUMA
)
message(FATAL_ERROR, "RDMA libraries are not found, try to set RDMA_ROOT or check all related libraries.")
set(RDMA_INC_DIR
${RDMA_INC_SXISOCK}
${RDMA_INC_XIO}
${RDMA_INC_EVENT}
${RDMA_INC_NUMA})
set(RDMA_LIBS
${RDMA_LIB_SXISOCK}
${RDMA_LIB_XIO}
${RDMA_LIB_EVENT}
${RDMA_LIB_EVENT_CORE}
${RDMA_LIB_EVENT_EXTRA}
${RDMA_LIB_EVENT_PTHREADS}
${RDMA_LIB_NUMA}
)
set(RDMA_LD_FLAGS "-L./librdma -libverbs -lrdmacm -Xlinker -rpath ./librdma")
include_directories("${RDMA_INC_DIR}")
else()
#if this module is not called, RDMA_INC_DIR RDMA_LIBS will be null, so top module always refer this variable
message(FATAL_ERROR, "RDMA libraries are not found, try to set RDMA_ROOT or check all related libraries.")
endif()
else(WITH_RDMA)
set(RDMA_LIBS "")
set(RDMA_LD_FLAGS "")
add_definitions(-DPADDLE_DISABLE_RDMA)
endif(WITH_RDMA)
function(generate_python_api target_name)
add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py
${PROJ_ROOT}/paddle/Paddle_wrap.cxx
${PROJ_ROOT}/paddle/Paddle_wrap.h
COMMAND swig -python -c++ -outcurrentdir -I../ api/Paddle.swig
&& mv ${PROJ_ROOT}/paddle/swig_paddle.py ${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py
DEPENDS ${PROJ_ROOT}/paddle/api/Paddle.swig
${PROJ_ROOT}/paddle/api/PaddleAPI.h
WORKING_DIRECTORY ${PROJ_ROOT}/paddle
COMMENT "Generate Python API from swig")
add_custom_target(${target_name} ALL DEPENDS
${PROJ_ROOT}/paddle/Paddle_wrap.cxx
${PROJ_ROOT}/paddle/Paddle_wrap.h
${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py)
endfunction(generate_python_api)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
IF(WIN32)
SET(HOST_SYSTEM "win32")
ELSE(WIN32)
IF(APPLE)
EXEC_PROGRAM (sw_vers ARGS -productVersion OUTPUT_VARIABLE MACOSX_VERSION)
STRING(REGEX MATCH "[0-9]+.[0-9]+" VERSION "${MACOSX_VERSION}")
SET(MACOS_VERSION ${VERSION})
SET(HOST_SYSTEM "macosx")
ELSE(APPLE)
IF(EXISTS "/etc/issue")
FILE(READ "/etc/issue" LINUX_ISSUE)
IF(LINUX_ISSUE MATCHES "CentOS")
SET(HOST_SYSTEM "centos")
ELSEIF(LINUX_ISSUE MATCHES "Debian")
SET(HOST_SYSTEM "debian")
ELSEIF(LINUX_ISSUE MATCHES "Ubuntu")
SET(HOST_SYSTEM "ubuntu")
ENDIF()
ENDIF(EXISTS "/etc/issue")
ENDIF(APPLE)
ENDIF(WIN32)
# query number of logical cores
CMAKE_HOST_SYSTEM_INFORMATION(RESULT CPU_CORES QUERY NUMBER_OF_LOGICAL_CORES)
MARK_AS_ADVANCED(HOST_SYSTEM CPU_CORES)
MESSAGE(STATUS "Found Paddle host system: ${HOST_SYSTEM}")
MESSAGE(STATUS "Found Paddle host system's CPU: ${CPU_CORES} cores")
# external dependencies log output
SET(EXTERNAL_PROJECT_LOG_ARGS
LOG_DOWNLOAD 0 # Wrap download in script to log output
LOG_UPDATE 1 # Wrap update in script to log output
LOG_CONFIGURE 1 # Wrap configure in script to log output
LOG_BUILD 1 # Wrap build in script to log output
LOG_TEST 1 # Wrap test in script to log output
LOG_INSTALL 1 # Wrap install in script to log output
)
......@@ -24,7 +24,7 @@ function(target_circle_link_libraries TARGET_NAME)
list(APPEND libsInArgn ${arg})
endif()
endforeach()
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang")
list(APPEND LIBS "-undefined dynamic_lookup")
endif()
list(REVERSE libsInArgn)
......@@ -65,7 +65,7 @@ endmacro()
# link_paddle_exe
# add paddle library for a paddle executable, such as trainer, pserver.
#
# It will handle WITH_PYTHON/WITH_GLOG etc.
# It will handle WITH_PYTHON etc.
function(link_paddle_exe TARGET_NAME)
if(WITH_RDMA)
generate_rdma_links()
......@@ -81,21 +81,10 @@ function(link_paddle_exe TARGET_NAME)
set(METRIC_LIBS "")
endif()
if(PADDLE_WITH_INTERNAL)
set(INTERAL_LIBS paddle_internal_gserver paddle_internal_parameter)
target_circle_link_libraries(${TARGET_NAME}
ARCHIVE_START
paddle_internal_gserver
paddle_internal_owlqn
ARCHIVE_END
paddle_internal_parameter)
else()
set(INTERAL_LIBS "")
endif()
target_circle_link_libraries(${TARGET_NAME}
ARCHIVE_START
paddle_gserver
paddle_function
${METRIC_LIBS}
ARCHIVE_END
paddle_pserver
......@@ -107,39 +96,23 @@ function(link_paddle_exe TARGET_NAME)
paddle_proto
paddle_cuda
${METRIC_LIBS}
${PROTOBUF_LIBRARY}
${EXTERNAL_LIBS}
${CMAKE_THREAD_LIBS_INIT}
${CBLAS_LIBS}
${ZLIB_LIBRARIES}
${INTERAL_LIBS}
${CMAKE_DL_LIBS})
${CMAKE_DL_LIBS}
${RDMA_LD_FLAGS}
${RDMA_LIBS})
if(WITH_RDMA)
target_link_libraries(${TARGET_NAME}
${RDMA_LD_FLAGS}
${RDMA_LIBS})
endif()
if(WITH_PYTHON)
target_link_libraries(${TARGET_NAME}
${PYTHON_LIBRARIES})
endif()
if(WITH_GLOG)
target_link_libraries(${TARGET_NAME}
${LIBGLOG_LIBRARY})
endif()
if(WITH_GFLAGS)
target_link_libraries(${TARGET_NAME}
${GFLAGS_LIBRARIES})
${PYTHON_LIBRARIES} util)
endif()
if(WITH_GPU)
if(NOT WITH_DSO OR WITH_METRIC)
target_link_libraries(${TARGET_NAME} ${CUDA_CUDART_LIBRARY})
if(NOT WITH_DSO OR WITH_METRIC)
target_link_libraries(${TARGET_NAME}
${CUDNN_LIBRARY}
${CUDA_curand_LIBRARY})
${CUDA_curand_LIBRARY})
CUDA_ADD_CUBLAS_TO_TARGET(${TARGET_NAME})
endif()
......@@ -149,10 +122,7 @@ function(link_paddle_exe TARGET_NAME)
endif()
endif()
if(NOT WITH_DSO)
target_link_libraries(${TARGET_NAME}
${WARPCTC_LIBRARY})
endif()
add_dependencies(${TARGET_NAME} ${external_project_dependencies})
endfunction()
# link_paddle_test
......@@ -161,8 +131,10 @@ endfunction()
# Rest Arguemnts: not used.
function(link_paddle_test TARGET_NAME)
link_paddle_exe(${TARGET_NAME})
target_link_libraries(${TARGET_NAME} ${GTEST_MAIN_LIBRARIES}
${GTEST_LIBRARIES})
target_link_libraries(${TARGET_NAME}
paddle_test_main
paddle_test_util
${GTEST_LIBRARIES})
endfunction()
# add_unittest_without_exec
......@@ -206,5 +178,5 @@ function(create_resources res_file output)
# Convert hex data for C compatibility
string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," filedata ${filedata})
# Append data to output file
file(APPEND ${output} "const unsigned char ${filename}[] = {${filedata}};\nconst unsigned ${filename}_size = sizeof(${filename});\n")
file(APPEND ${output} "const unsigned char ${filename}[] = {${filedata}0};\nconst unsigned ${filename}_size = sizeof(${filename});\n")
endfunction()
......@@ -21,4 +21,5 @@ while ("${PADDLE_VERSION}" STREQUAL "")
endif()
endwhile()
add_definitions(-DPADDLE_VERSION=${PADDLE_VERSION})
message(STATUS "Paddle version is ${PADDLE_VERSION}")
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
......
文件模式从 100644 更改为 100755
......@@ -87,9 +87,9 @@ def conv_bn(input,
print(imgSize, output_x, stride, filter_size, padding)
if trans:
nameApx = "_conv"
else:
nameApx = "_convt"
else:
nameApx = "_conv"
if bn:
conv = img_conv_layer(
......
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
......
......@@ -21,7 +21,7 @@ from paddle.trainer.PyDataProvider2 import *
#
# {'img_size': 32,
# 'settings': <paddle.trainer.PyDataProviderWrapper.Cls instance at 0x7fea27cb6050>,
# 'settings': a global object,
# 'color': True,
# 'mean_img_size': 32,
# 'meta': './data/cifar-out/batches/batches.meta',
......@@ -50,10 +50,10 @@ def hook(settings, img_size, mean_img_size, num_classes, color, meta, use_jpeg,
settings.logger.info('Image size: %s', settings.img_size)
settings.logger.info('Meta path: %s', settings.meta_path)
settings.input_types = [
dense_vector(settings.img_raw_size), # image feature
integer_value(settings.num_classes)
] # labels
settings.input_types = {
'image': dense_vector(settings.img_raw_size),
'label': integer_value(settings.num_classes)
}
settings.logger.info('DataProvider Initialization finished')
......@@ -83,4 +83,7 @@ def processData(settings, file_list):
img, settings.img_mean, settings.img_size,
settings.is_train, settings.color)
label = data['labels'][i]
yield img_feat.astype('float32'), int(label)
yield {
'image': img_feat.astype('float32'),
'label': int(label)
}
......@@ -27,5 +27,6 @@ paddle train \
--num_passes=300 \
--save_dir=$output \
2>&1 | tee $log
paddle usage -l $log -e $? -n "image_classification_train" >/dev/null 2>&1
python -m paddle.utils.plotcurve -i $log > plot.png
dataprovider.pyc
empty.list
train.log
output
train.list
......@@ -17,8 +17,10 @@ import random
# define data types of input: 2 real numbers
@provider(input_types=[dense_vector(1), dense_vector(1)], use_seq=False)
@provider(
input_types={'x': dense_vector(1),
'y': dense_vector(1)}, use_seq=False)
def process(settings, input_file):
for i in xrange(2000):
x = random.random()
yield [x], [2 * x + 0.3]
yield {'x': [x], 'y': [2 * x + 0.3]}
......@@ -19,3 +19,4 @@ paddle train \
--save_dir=./output \
--num_passes=30 \
2>&1 |tee 'train.log'
paddle usage -l "train.log" -e $? -n "introduction" >/dev/null 2>&1
......@@ -15,11 +15,8 @@
from paddle.trainer_config_helpers import *
# 1. read data. Suppose you saved above python code as dataprovider.py
data_file = 'empty.list'
with open(data_file, 'w') as f:
f.writelines(' ')
define_py_data_sources2(
train_list=data_file,
train_list=['no_matter.txt'],
test_list=None,
module='dataprovider',
obj='process',
......
......@@ -4,3 +4,4 @@ mnist_vgg_model
plot.png
train.log
*pyc
.ipynb_checkpoints
"""
A very basic example for how to use current Raw SWIG API to train mnist network.
Current implementation uses Raw SWIG, which means the API call is directly \
passed to C++ side of Paddle.
The user api could be simpler and carefully designed.
"""
import py_paddle.swig_paddle as api
from py_paddle import DataProviderConverter
import paddle.trainer.PyDataProvider2 as dp
import numpy as np
import random
from mnist_util import read_from_mnist
from paddle.trainer_config_helpers import *
def optimizer_config():
settings(
learning_rate=1e-4,
learning_method=AdamOptimizer(),
batch_size=1000,
model_average=ModelAverage(average_window=0.5),
regularization=L2Regularization(rate=0.5))
def network_config():
imgs = data_layer(name='pixel', size=784)
hidden1 = fc_layer(input=imgs, size=200)
hidden2 = fc_layer(input=hidden1, size=200)
inference = fc_layer(input=hidden2, size=10, act=SoftmaxActivation())
cost = classification_cost(
input=inference, label=data_layer(
name='label', size=10))
outputs(cost)
def init_parameter(network):
assert isinstance(network, api.GradientMachine)
for each_param in network.getParameters():
assert isinstance(each_param, api.Parameter)
array_size = len(each_param)
array = np.random.uniform(-1.0, 1.0, array_size).astype('float32')
each_param.getBuf(api.PARAMETER_VALUE).copyFromNumpyArray(array)
def generator_to_batch(generator, batch_size):
ret_val = list()
for each_item in generator:
ret_val.append(each_item)
if len(ret_val) == batch_size:
yield ret_val
ret_val = list()
if len(ret_val) != 0:
yield ret_val
class BatchPool(object):
def __init__(self, generator, batch_size):
self.data = list(generator)
self.batch_size = batch_size
def __call__(self):
random.shuffle(self.data)
for offset in xrange(0, len(self.data), self.batch_size):
limit = min(offset + self.batch_size, len(self.data))
yield self.data[offset:limit]
def input_order_converter(generator):
for each_item in generator:
yield each_item['pixel'], each_item['label']
def main():
api.initPaddle("-use_gpu=false", "-trainer_count=4") # use 4 cpu cores
# get enable_types for each optimizer.
# enable_types = [value, gradient, momentum, etc]
# For each optimizer(SGD, Adam), GradientMachine should enable different
# buffers.
opt_config_proto = parse_optimizer_config(optimizer_config)
opt_config = api.OptimizationConfig.createFromProto(opt_config_proto)
_temp_optimizer_ = api.ParameterOptimizer.create(opt_config)
enable_types = _temp_optimizer_.getParameterTypes()
# Create Simple Gradient Machine.
model_config = parse_network_config(network_config)
m = api.GradientMachine.createFromConfigProto(
model_config, api.CREATE_MODE_NORMAL, enable_types)
# This type check is not useful. Only enable type hint in IDE.
# Such as PyCharm
assert isinstance(m, api.GradientMachine)
# Initialize Parameter by numpy.
init_parameter(network=m)
# Create Local Updater. Local means not run in cluster.
# For a cluster training, here we can change to createRemoteUpdater
# in future.
updater = api.ParameterUpdater.createLocalUpdater(opt_config)
assert isinstance(updater, api.ParameterUpdater)
# Initialize ParameterUpdater.
updater.init(m)
# DataProvider Converter is a utility convert Python Object to Paddle C++
# Input. The input format is as same as Paddle's DataProvider.
converter = DataProviderConverter(
input_types=[dp.dense_vector(784), dp.integer_value(10)])
train_file = './data/raw_data/train'
test_file = './data/raw_data/t10k'
# start gradient machine.
# the gradient machine must be started before invoke forward/backward.
# not just for training, but also for inference.
m.start()
# evaluator can print error rate, etc. It is a C++ class.
batch_evaluator = m.makeEvaluator()
test_evaluator = m.makeEvaluator()
# Get Train Data.
# TrainData will stored in a data pool. Currently implementation is not care
# about memory, speed. Just a very naive implementation.
train_data_generator = input_order_converter(read_from_mnist(train_file))
train_data = BatchPool(train_data_generator, 512)
# outArgs is Neural Network forward result. Here is not useful, just passed
# to gradient_machine.forward
outArgs = api.Arguments.createArguments(0)
for pass_id in xrange(2): # we train 2 passes.
updater.startPass()
for batch_id, data_batch in enumerate(train_data()):
# data_batch is input images.
# here, for online learning, we could get data_batch from network.
# Start update one batch.
pass_type = updater.startBatch(len(data_batch))
# Start BatchEvaluator.
# batch_evaluator can be used between start/finish.
batch_evaluator.start()
# forwardBackward is a shortcut for forward and backward.
# It is sometimes faster than invoke forward/backward separately,
# because in GradientMachine, it may be async.
m.forwardBackward(converter(data_batch), outArgs, pass_type)
for each_param in m.getParameters():
updater.update(each_param)
# Get cost. We use numpy to calculate total cost for this batch.
cost_vec = outArgs.getSlotValue(0)
cost_vec = cost_vec.copyToNumpyMat()
cost = cost_vec.sum() / len(data_batch)
# Make evaluator works.
m.eval(batch_evaluator)
# Print logs.
print 'Pass id', pass_id, 'Batch id', batch_id, 'with cost=', \
cost, batch_evaluator
batch_evaluator.finish()
# Finish batch.
# * will clear gradient.
# * ensure all values should be updated.
updater.finishBatch(cost)
# testing stage. use test data set to test current network.
updater.apply()
test_evaluator.start()
test_data_generator = input_order_converter(read_from_mnist(test_file))
for data_batch in generator_to_batch(test_data_generator, 512):
# in testing stage, only forward is needed.
m.forward(converter(data_batch), outArgs, api.PASS_TEST)
m.eval(test_evaluator)
# print error rate for test data set
print 'Pass', pass_id, ' test evaluator: ', test_evaluator
test_evaluator.finish()
updater.restore()
updater.catchUpWith()
params = m.getParameters()
for each_param in params:
assert isinstance(each_param, api.Parameter)
value = each_param.getBuf(api.PARAMETER_VALUE)
value = value.copyToNumpyArray()
# Here, we could save parameter to every where you want
print each_param.getName(), value
updater.finishPass()
m.finish()
if __name__ == '__main__':
main()
from paddle.trainer.PyDataProvider2 import *
from mnist_util import read_from_mnist
# Define a py data provider
@provider(
input_types={'pixel': dense_vector(28 * 28),
'label': integer_value(10)})
'label': integer_value(10)},
cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, filename): # settings is not used currently.
imgf = filename + "-images-idx3-ubyte"
labelf = filename + "-labels-idx1-ubyte"
f = open(imgf, "rb")
l = open(labelf, "rb")
f.read(16)
l.read(8)
# Define number of samples for train/test
if "train" in filename:
n = 60000
else:
n = 10000
for i in range(n):
label = ord(l.read(1))
pixels = []
for j in range(28 * 28):
pixels.append(float(ord(f.read(1))) / 255.0)
yield {"pixel": pixels, 'label': label}
f.close()
l.close()
for each in read_from_mnist(filename):
yield each
import numpy
__all__ = ['read_from_mnist']
def read_from_mnist(filename):
imgf = filename + "-images-idx3-ubyte"
labelf = filename + "-labels-idx1-ubyte"
f = open(imgf, "rb")
l = open(labelf, "rb")
f.read(16)
l.read(8)
# Define number of samples for train/test
if "train" in filename:
n = 60000
else:
n = 10000
images = numpy.fromfile(
f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28)).astype('float32')
images = images / 255.0 * 2.0 - 1.0
labels = numpy.fromfile(l, 'ubyte', count=n).astype("int")
for i in xrange(n):
yield {"pixel": images[i, :], 'label': labels[i]}
f.close()
l.close()
......@@ -27,5 +27,6 @@ paddle train \
--num_passes=100 \
--save_dir=$output \
2>&1 | tee $log
paddle usage -l $log -e $? -n "mnist_train" >/dev/null 2>&1
python -m paddle.utils.plotcurve -i $log > plot.png
......@@ -14,9 +14,19 @@
# limitations under the License.
set -e
set -x
BASE_URL='http://paddlepaddle.cdn.bcebos.com/model_zoo/embedding'
# download the dictionary and pretrained model
for file in baidu.dict model_32.emb model_64.emb model_128.emb model_256.emb
do
wget http://paddlepaddle.bj.bcebos.com/model_zoo/embedding/$file
DOWNLOAD_ITEMS=(baidu.dict model_32.emb model_64.emb model_128.emb model_256.emb)
ITEM_MD5=(fa03a12321eaab6c30a8fcc9442eaea3
f88c8325ee6da6187f1080e8fe66c1cd
927cf70f27f860aff1a5703ebf7f1584
a52e43655cd25d279777ed509a1ae27b
b92c67fe9ff70fea53596080e351ac80)
for ((i=0; i<${#ITEM_MD5[@]}; i++))
do
FILENAME=${DOWNLOAD_ITEMS[${i}]}
REAL_MD5=`wget ${BASE_URL}/${FILENAME} -O - | tee ${FILENAME} | md5sum | cut -d ' ' -f 1`
EXPECTED_MD5=${ITEM_MD5[${i}]}
[ "${EXPECTED_MD5}" = "${REAL_MD5}" ]
done
......@@ -8,6 +8,8 @@ data/test.list
data/test.txt
data/train.list
data/train.txt
data/pred.list
data/pred.txt
dataprovider_copy_1.py
train.log
output
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os, sys
import numpy as np
from optparse import OptionParser
from py_paddle import swig_paddle, DataProviderConverter
from paddle.trainer.PyDataProvider2 import sparse_binary_vector
from paddle.trainer.config_parser import parse_config
"""
Usage: run following command to show help message.
python api_predict.py -h
"""
class QuickStartPrediction():
def __init__(self, train_conf, dict_file, model_dir=None, label_file=None):
"""
train_conf: trainer configure.
dict_file: word dictionary file name.
model_dir: directory of model.
"""
self.train_conf = train_conf
self.dict_file = dict_file
self.word_dict = {}
self.dict_dim = self.load_dict()
self.model_dir = model_dir
if model_dir is None:
self.model_dir = os.path.dirname(train_conf)
self.label = None
if label_file is not None:
self.load_label(label_file)
conf = parse_config(train_conf, "is_predict=1")
self.network = swig_paddle.GradientMachine.createFromConfigProto(
conf.model_config)
self.network.loadParameters(self.model_dir)
input_types = [sparse_binary_vector(self.dict_dim)]
self.converter = DataProviderConverter(input_types)
def load_dict(self):
"""
Load dictionary from self.dict_file.
"""
for line_count, line in enumerate(open(self.dict_file, 'r')):
self.word_dict[line.strip().split('\t')[0]] = line_count
return len(self.word_dict)
def load_label(self, label_file):
"""
Load label.
"""
self.label = {}
for v in open(label_file, 'r'):
self.label[int(v.split('\t')[1])] = v.split('\t')[0]
def get_index(self, data):
"""
transform word into integer index according to the dictionary.
"""
words = data.strip().split()
word_slot = [self.word_dict[w] for w in words if w in self.word_dict]
return word_slot
def batch_predict(self, data_batch):
input = self.converter(data_batch)
output = self.network.forwardTest(input)
prob = output[0]["id"].tolist()
print("predicting labels is:")
print prob
def option_parser():
usage = "python predict.py -n config -w model_dir -d dictionary -i input_file "
parser = OptionParser(usage="usage: %s [options]" % usage)
parser.add_option(
"-n",
"--tconf",
action="store",
dest="train_conf",
help="network config")
parser.add_option(
"-d",
"--dict",
action="store",
dest="dict_file",
help="dictionary file")
parser.add_option(
"-b",
"--label",
action="store",
dest="label",
default=None,
help="dictionary file")
parser.add_option(
"-c",
"--batch_size",
type="int",
action="store",
dest="batch_size",
default=1,
help="the batch size for prediction")
parser.add_option(
"-w",
"--model",
action="store",
dest="model_path",
default=None,
help="model path")
return parser.parse_args()
def main():
options, args = option_parser()
train_conf = options.train_conf
batch_size = options.batch_size
dict_file = options.dict_file
model_path = options.model_path
label = options.label
swig_paddle.initPaddle("--use_gpu=0")
predict = QuickStartPrediction(train_conf, dict_file, model_path, label)
batch = []
labels = []
for line in sys.stdin:
[label, text] = line.split("\t")
labels.append(int(label))
batch.append([predict.get_index(text)])
print("labels is:")
print labels
predict.batch_predict(batch)
if __name__ == '__main__':
main()
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
#Note the default model is pass-00002, you shold make sure the model path
#exists or change the mode path.
#only test on trainer_config.lr.py
model=output/model/pass-00001/
config=trainer_config.lr.py
label=data/labels.list
dict=data/dict.txt
batch_size=20
head -n$batch_size data/test.txt | python api_predict.py \
--tconf=$config\
--model=$model \
--label=$label \
--dict=$dict \
--batch_size=$batch_size
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
# Should run pserver.sh before run this script.
bin_dir=$(cd `dirname $0`; pwd)
home_dir=$(cd "${bin_dir}/.."; pwd)
source "$bin_dir/env.sh"
model_dir="$bin_dir/output"
log_file="$bin_dir/train.log"
pushd "$home_dir"
cfg=trainer_config.lr.py
paddle train \
--start_pserver=false \
--config=$cfg \
--save_dir=${model_dir} \
--trainer_count=4 \
--local=0 \
--log_period=100 \
--num_passes=15 \
--use_gpu=false \
--show_parameter_stats_period=100 \
--test_all_data_in_one_period=1 \
--num_gradient_servers=1 \
--nics=`get_nics` \
--port=7164 \
--ports_num=1 \
--pservers="127.0.0.1" \
--comment="paddle_trainer" \
2>&1 | tee "$log_file"
popd
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
function get_nics() {
machine=`uname -s`
local nics=""
if [ "$machine" == "Linux" ]; then
nics="lo"
elif [ "$machine" == "Darwin" ]; then
nics="lo0"
else
nics="unsupport"
fi
echo $nics
}
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
bin_dir=$(cd `dirname $0`; pwd)
source "$bin_dir/env.sh"
paddle pserver \
--nics=`get_nics` \
--port=7164 \
--ports_num=1 \
--ports_num_for_sparse=1 \
--num_gradient_servers=1 \
--comment="paddle_pserver" \
2>&1 | tee 'pserver.log'
......@@ -31,16 +31,16 @@ def initializer(settings, dictionary, **kwargs):
# setting.input_types specifies what the data types the data provider
# generates.
settings.input_types = [
settings.input_types = {
# The first input is a sparse_binary_vector,
# which means each dimension of the vector is either 0 or 1. It is the
# bag-of-words (BOW) representation of the texts.
sparse_binary_vector(len(dictionary)),
'word': sparse_binary_vector(len(dictionary)),
# The second input is an integer. It represents the category id of the
# sample. 2 means there are two labels in the dataset.
# (1 for positive and 0 for negative)
integer_value(2)
]
'label': integer_value(2)
}
# Delaring a data provider. It has an initializer 'data_initialzer'.
......@@ -67,12 +67,12 @@ def process(settings, file_name):
# Return the features for the current comment. The first is a list
# of ids representing a 0-1 binary sparse vector of the text,
# the second is the integer id of the label.
yield word_vector, int(label)
yield {'word': word_vector, 'label': int(label)}
def predict_initializer(settings, dictionary, **kwargs):
settings.word_dict = dictionary
settings.input_types = [sparse_binary_vector(len(dictionary))]
settings.input_types = {'word': sparse_binary_vector(len(dictionary))}
# Declaring a data provider for prediction. The difference with process
......@@ -83,4 +83,4 @@ def process_predict(settings, file_name):
for line in f:
comment = line.strip().split()
word_vector = [settings.word_dict.get(w, UNK_IDX) for w in comment]
yield word_vector
yield {'word': word_vector}
......@@ -19,13 +19,13 @@ UNK_IDX = 0
def initializer(settings, dictionary, **kwargs):
settings.word_dict = dictionary
settings.input_types = [
settings.input_types = {
# Define the type of the first input as sequence of integer.
# The value of the integers range from 0 to len(dictrionary)-1
integer_value_sequence(len(dictionary)),
'word': integer_value_sequence(len(dictionary)),
# Define the second input for label id
integer_value(2)
]
'label': integer_value(2)
}
@provider(init_hook=initializer, cache=CacheType.CACHE_PASS_IN_MEM)
......@@ -35,15 +35,12 @@ def process(settings, file_name):
label, comment = line.strip().split('\t')
words = comment.split()
word_slot = [settings.word_dict.get(w, UNK_IDX) for w in words]
yield word_slot, int(label)
yield {'word': word_slot, 'label': int(label)}
def predict_initializer(settings, dictionary, **kwargs):
settings.word_dict = dictionary
settings.input_types = [
integer_value(
len(dictionary), seq_type=SequenceType.SEQUENCE)
]
settings.input_types = {'word': integer_value_sequence(len(dictionary))}
@provider(init_hook=predict_initializer, should_shuffle=False)
......@@ -52,4 +49,4 @@ def process_predict(settings, file_name):
for line in f:
comment = line.strip().split()
word_slot = [settings.word_dict.get(w, UNK_IDX) for w in comment]
yield word_slot
yield {'word': word_slot}
......@@ -26,5 +26,7 @@ paddle train \
--init_model_path=$model \
--config_args=is_predict=1 \
--predict_output_dir=. \
2>&1 | tee 'predict.log'
paddle usage -l 'predict.log' -e $? -n "quick_start_predict_${cfg}" >/dev/null 2>&1
mv rank-00000 result.txt
......@@ -31,3 +31,4 @@ paddle train \
--show_parameter_stats_period=100 \
--test_all_data_in_one_period=1 \
2>&1 | tee 'train.log'
paddle usage -l "train.log" -e $? -n "quick_start_${cfg}" >/dev/null 2>&1
......@@ -17,13 +17,14 @@ from paddle.trainer.PyDataProvider2 import *
def meta_to_header(meta, name):
metas = meta[name]['__meta__']['raw_meta']
for each_meta in metas:
slot_name = each_meta.get('name', '%s_id' % name)
if each_meta['type'] == 'id':
yield integer_value(each_meta['max'])
yield slot_name, integer_value(each_meta['max'])
elif each_meta['type'] == 'embedding':
is_seq = each_meta['seq'] == 'sequence'
yield integer_value(
yield slot_name, integer_value(
len(each_meta['dict']),
seq_type=SequenceType.SEQUENCE
if is_seq else SequenceType.NO_SEQUENCE)
elif each_meta['type'] == 'one_hot_dense':
yield dense_vector(len(each_meta['dict']))
yield slot_name, dense_vector(len(each_meta['dict']))
......@@ -16,6 +16,14 @@ from paddle.trainer.PyDataProvider2 import *
import common_utils # parse
def __list_to_map__(lst):
ret_val = dict()
for each in lst:
k, v = each
ret_val[k] = v
return ret_val
def hook(settings, meta, **kwargs):
"""
Init hook is invoked before process data. It will set obj.slots and store
......@@ -34,12 +42,16 @@ def hook(settings, meta, **kwargs):
# second part is user features.
# final part is rating score.
# header is a list of [USE_SEQ_OR_NOT?, SlotType]
headers = list(common_utils.meta_to_header(meta, 'movie'))
headers.extend(list(common_utils.meta_to_header(meta, 'user')))
headers.append(dense_vector(1)) # Score
movie_headers = list(common_utils.meta_to_header(meta, 'movie'))
settings.movie_names = [h[0] for h in movie_headers]
headers = movie_headers
user_headers = list(common_utils.meta_to_header(meta, 'user'))
settings.user_names = [h[0] for h in user_headers]
headers.extend(user_headers)
headers.append(("rating", dense_vector(1))) # Score
# slot types.
settings.input_types = headers
settings.input_types = __list_to_map__(headers)
settings.meta = meta
......@@ -57,20 +69,20 @@ def process(settings, filename):
movie_meta = settings.meta['movie'][movie_id]
user_meta = settings.meta['user'][user_id]
outputs = [movie_id - 1]
outputs = [('movie_id', movie_id - 1)]
# Then add movie features
for each_meta in movie_meta:
outputs.append(each_meta)
for i, each_meta in enumerate(movie_meta):
outputs.append((settings.movie_names[i + 1], each_meta))
# Then add user id.
outputs.append(user_id - 1)
outputs.append(('user_id', user_id - 1))
# Then add user features.
for each_meta in user_meta:
outputs.append(each_meta)
for i, each_meta in enumerate(user_meta):
outputs.append((settings.user_names[i + 1], each_meta))
# Finally, add score
outputs.append([score])
outputs.append(('rating', [score]))
# Return data to paddle
yield outputs
yield __list_to_map__(outputs)
#!/usr/bin/python
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import re
import math
def get_best_pass(log_filename):
with open(log_filename, 'r') as f:
text = f.read()
pattern = re.compile('Test.*? cost=([0-9]+\.[0-9]+).*?pass-([0-9]+)',
re.S)
results = re.findall(pattern, text)
sorted_results = sorted(results, key=lambda result: float(result[0]))
return sorted_results[0]
log_filename = sys.argv[1]
log = get_best_pass(log_filename)
predict_error = math.sqrt(float(log[0])) / 2
print 'Best pass is %s, error is %s, which means predict get error as %f' % (
log[1], log[0], predict_error)
evaluate_pass = "output/pass-%s" % log[1]
print "evaluating from pass %s" % evaluate_pass
......@@ -34,8 +34,8 @@ if __name__ == '__main__':
network.loadParameters(model_path)
with open('./data/meta.bin', 'rb') as f:
meta = pickle.load(f)
headers = list(meta_to_header(meta, 'movie'))
headers.extend(list(meta_to_header(meta, 'user')))
headers = [h[1] for h in meta_to_header(meta, 'movie')]
headers.extend([h[1] for h in meta_to_header(meta, 'user')])
cvt = DataProviderConverter(headers)
while True:
movie_id = int(raw_input("Input movie_id: "))
......
......@@ -14,6 +14,15 @@
# limitations under the License.
set -e
UNAME_STR=`uname`
if [[ ${UNAME_STR} == 'Linux' ]]; then
SHUF_PROG='shuf'
else
SHUF_PROG='gshuf'
fi
cd "$(dirname "$0")"
delimiter='::'
dir=ml-1m
......@@ -25,7 +34,7 @@ python meta_generator.py $dir meta.bin --config=meta_config.json
echo 'split train/test file'
python split.py $dir/ratings.dat --delimiter=${delimiter} --test_ratio=0.1
echo 'shuffle train file'
shuf $dir/ratings.dat.train > ratings.dat.train
${SHUF_PROG} $dir/ratings.dat.train > ratings.dat.train
cp $dir/ratings.dat.test .
echo "./data/ratings.dat.train" > train.list
echo "./data/ratings.dat.test" > test.list
......@@ -22,3 +22,4 @@ paddle train \
--log_period=100 \
--dot_period=1 \
--num_passes=50 2>&1 | tee 'log.txt'
paddle usage -l log.txt -e $? -n "recommendation" >/dev/null 2>&1
......@@ -8,3 +8,7 @@ data/test.wsj.seq_pair
data/test.wsj.words
data/tgt.dict
output
data/emb
data/targetDict.txt
data/verbDict.txt
data/wordDict.txt
......@@ -43,13 +43,13 @@ def extract_dict_features(pair_file, feature_file):
mark[verb_index] = 1
ctx_0 = sentence_list[verb_index]
if verb_index < len(labels_list) - 2:
if verb_index < len(labels_list) - 1:
mark[verb_index + 1] = 1
ctx_p1 = sentence_list[verb_index + 1]
else:
ctx_p1 = 'eos'
if verb_index < len(labels_list) - 3:
if verb_index < len(labels_list) - 2:
mark[verb_index + 2] = 1
ctx_p2 = sentence_list[verb_index + 2]
else:
......
文件模式从 100644 更改为 100755
......@@ -43,7 +43,7 @@ def get_batch_size(yeild_data):
init_hook=hook,
should_shuffle=True,
calc_batch_size=get_batch_size,
can_over_batch_size=False,
can_over_batch_size=True,
cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, file_name):
with open(file_name, 'r') as fdata:
......
......@@ -38,3 +38,4 @@ paddle train \
--config_args=is_test=1 \
--test_all_data_in_one_period=1 \
2>&1 | tee 'test.log'
paddle usage -l test.log -e $? -n "semantic_role_labeling_test" >/dev/null 2>&1
......@@ -27,3 +27,4 @@ paddle train \
--load_missing_parameter_strategy=rand \
--test_all_data_in_one_period=1 \
2>&1 | tee 'train.log'
paddle usage -l train.log -e $? -n "semantic_role_labeling_train" >/dev/null 2>&1
......@@ -37,3 +37,4 @@ paddle train --config=$net_conf \
--trainer_count=4 \
--config_args=is_test=1 \
2>&1 | tee 'test.log'
paddle usage -l test.log -e $? -n "sentiment_test" >/dev/null 2>&1
......@@ -27,3 +27,4 @@ paddle train --config=$config \
--show_parameter_stats_period=100 \
--test_all_data_in_one_period=1 \
2>&1 | tee 'train.log'
paddle usage -l train.log -e $? -n "sentiment_train" >/dev/null 2>&1
......@@ -27,3 +27,4 @@ paddle train \
--log_period=10 \
--dot_period=5 \
2>&1 | tee 'paraphrase/train.log'
paddle usage -l 'paraphrase/train.log' -e $? -n "seqToseq_paraphrase_train" >/dev/null 2>&1
......@@ -24,3 +24,4 @@ paddle train \
--test_pass=12 \
--trainer_count=1 \
2>&1 | tee 'translation/gen.log'
paddle usage -l 'translation/gen.log' -e $? -n "seqToseq_translation_gen" >/dev/null 2>&1
......@@ -25,3 +25,4 @@ paddle train \
--log_period=10 \
--dot_period=5 \
2>&1 | tee 'translation/train.log'
paddle usage -l 'translation/train.log' -e $? -n "seqToseq_translation_train" >/dev/null 2>&1
......@@ -74,7 +74,8 @@ sum_evaluator(
chunk_evaluator(
name="chunk_f1",
input=[crf_decoding, chunk],
input=crf_decoding,
label=chunk,
chunk_scheme="IOB",
num_chunk_types=11, )
......
......@@ -112,7 +112,8 @@ sum_evaluator(
chunk_evaluator(
name="chunk_f1",
input=[crf_decoding, chunk],
input=crf_decoding,
label=chunk,
chunk_scheme="IOB",
num_chunk_types=11, )
......
......@@ -7,4 +7,6 @@ paddle train \
--dot_period=10 \
--log_period=1000 \
--test_period=0 \
--num_passes=10
--num_passes=10 \
2>&1 | tee 'train.log'
paddle usage -l 'train.log' -e $? -n "sequence_tagging_train" >/dev/null 2>&1
......@@ -7,3 +7,5 @@ paddle train \
--log_period=10000 \
--test_period=0 \
--num_passes=10
2>&1 | tee 'train_linear.log'
paddle usage -l 'train_linear.log' -e $? -n "sequence_tagging_train_linear" >/dev/null 2>&1
run by:
cd ./data
sh get_data.sh
cd ..
sh train.sh
sh predict.sh
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
set -x
DIR="$( cd "$(dirname "$0")" ; pwd -P )"
cd $DIR
#download the dataset
echo "Downloading traffic data..."
wget http://paddlepaddle.cdn.bcebos.com/demo/traffic/traffic_data.tar.gz
#extract package
echo "Unzipping..."
tar -zxvf traffic_data.tar.gz
echo "data/speeds.csv" > train.list
echo "data/speeds.csv" > test.list
echo "data/speeds.csv" > pred.list
echo "Done."
# Copyright (c) 2016 PaddlePaddle Authors, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.PyDataProvider2 import *
import sys
import numpy as np
TERM_NUM = 24
FORECASTING_NUM = 24
LABEL_VALUE_NUM = 4
def initHook(settings, file_list, **kwargs):
"""
Init hook is invoked before process data. It will set obj.slots and store data meta.
:param settings: global object. It will passed to process routine.
:type obj: object
:param file_list: the meta file object, which passed from trainer_config.py,but unused in this function.
:param kwargs: unused other arguments.
"""
del kwargs #unused
settings.pool_size = sys.maxint
#Use a time seires of the past as feature.
#Dense_vector's expression form is [float,float,...,float]
settings.input_types = [dense_vector(TERM_NUM)]
#There are next FORECASTING_NUM fragments you need predict.
#Every predicted condition at time point has four states.
for i in range(FORECASTING_NUM):
settings.input_types.append(integer_value(LABEL_VALUE_NUM))
@provider(
init_hook=initHook, cache=CacheType.CACHE_PASS_IN_MEM, should_shuffle=True)
def process(settings, file_name):
with open(file_name) as f:
#abandon fields name
f.next()
for row_num, line in enumerate(f):
speeds = map(int, line.rstrip('\r\n').split(",")[1:])
# Get the max index.
end_time = len(speeds)
# Scanning and generating samples
for i in range(TERM_NUM, end_time - FORECASTING_NUM):
# For dense slot
pre_spd = map(float, speeds[i - TERM_NUM:i])
# Integer value need predicting, values start from 0, so every one minus 1.
fol_spd = [j - 1 for j in speeds[i:i + FORECASTING_NUM]]
# Predicting label is missing, abandon the sample.
if -1 in fol_spd:
continue
yield [pre_spd] + fol_spd
def predict_initHook(settings, file_list, **kwargs):
settings.pool_size = sys.maxint
settings.input_types = [dense_vector(TERM_NUM)]
@provider(init_hook=predict_initHook, should_shuffle=False)
def process_predict(settings, file_name):
with open(file_name) as f:
#abandon fields name
f.next()
for row_num, line in enumerate(f):
speeds = map(int, line.rstrip('\r\n').split(","))
end_time = len(speeds)
pre_spd = map(float, speeds[end_time - TERM_NUM:end_time])
yield pre_spd
# Copyright (c) 2016 PaddlePaddle Authors, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
res = []
with open('./rank-00000') as f:
for line in f:
pred = map(int, line.strip('\r\n;').split(";"))
#raw prediction range from 0 to 3
res.append([i + 1 for i in pred])
file_name = open('./data/pred.list').read().strip('\r\n')
FORECASTING_NUM = 24
header = [
'id',
'201604200805',
'201604200810',
'201604200815',
'201604200820',
'201604200825',
'201604200830',
'201604200835',
'201604200840',
'201604200845',
'201604200850',
'201604200855',
'201604200900',
'201604200905',
'201604200910',
'201604200915',
'201604200920',
'201604200925',
'201604200930',
'201604200935',
'201604200940',
'201604200945',
'201604200950',
'201604200955',
'201604201000',
]
###################
## To CSV format ##
###################
with open(file_name) as f:
f.next()
print ','.join(header)
for row_num, line in enumerate(f):
fields = line.rstrip('\r\n').split(',')
linkid = fields[0]
print linkid + ',' + ','.join(map(str, res[row_num]))
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
cfg=trainer_config.py
# pass choice
model="output/pass-00000"
paddle train \
--config=$cfg \
--use_gpu=false \
--job=test \
--init_model_path=$model \
--config_args=is_predict=1 \
--predict_output_dir=.
python gen_result.py > result.txt
rm -rf rank-00000
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
cfg=trainer_config.py
paddle train \
--config=$cfg \
--save_dir=./output \
--trainer_count=4 \
--log_period=1000 \
--dot_period=10 \
--num_passes=10 \
--use_gpu=false \
--show_parameter_stats_period=3000 \
2>&1 | tee 'train.log'
# Copyright (c) 2016 PaddlePaddle Authors, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
################################### DATA Configuration #############################################
is_predict = get_config_arg('is_predict', bool, False)
trn = './data/train.list' if not is_predict else None
tst = './data/test.list' if not is_predict else './data/pred.list'
process = 'process' if not is_predict else 'process_predict'
define_py_data_sources2(
train_list=trn, test_list=tst, module="dataprovider", obj=process)
################################### Parameter Configuaration #######################################
TERM_NUM = 24
FORECASTING_NUM = 24
emb_size = 16
batch_size = 128 if not is_predict else 1
settings(
batch_size=batch_size,
learning_rate=1e-3,
learning_method=RMSPropOptimizer())
################################### Algorithm Configuration ########################################
output_label = []
link_encode = data_layer(name='link_encode', size=TERM_NUM)
for i in xrange(FORECASTING_NUM):
# Each task share same weight.
link_param = ParamAttr(
name='_link_vec.w', initial_max=1.0, initial_min=-1.0)
link_vec = fc_layer(input=link_encode, size=emb_size, param_attr=link_param)
score = fc_layer(input=link_vec, size=4, act=SoftmaxActivation())
if is_predict:
maxid = maxid_layer(score)
output_label.append(maxid)
else:
# Multi-task training.
label = data_layer(name='label_%dmin' % ((i + 1) * 5), size=4)
cls = classification_cost(
input=score, name="cost_%dmin" % ((i + 1) * 5), label=label)
output_label.append(cls)
outputs(output_label)
......@@ -7,25 +7,50 @@ if(NOT DEFINED SPHINX_THEME_DIR)
endif()
# configured documentation tools and intermediate build results
set(BINARY_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/_build")
set(BINARY_BUILD_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_build")
# Sphinx cache with pickled ReST documents
set(SPHINX_CACHE_DIR "${CMAKE_CURRENT_BINARY_DIR}/_doctrees")
set(SPHINX_CACHE_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_doctrees")
# HTML output directory
set(SPHINX_HTML_DIR "${CMAKE_CURRENT_BINARY_DIR}/html")
# HTML output director
set(SPHINX_HTML_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/html")
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/conf.py.in"
"${BINARY_BUILD_DIR}/conf.py"
"${CMAKE_CURRENT_SOURCE_DIR}/templates/conf.py.en.in"
"${BINARY_BUILD_DIR_EN}/conf.py"
@ONLY)
sphinx_add_target(paddle_docs
html
${BINARY_BUILD_DIR}
${SPHINX_CACHE_DIR}
${BINARY_BUILD_DIR_EN}
${SPHINX_CACHE_DIR_EN}
${CMAKE_CURRENT_SOURCE_DIR}
${SPHINX_HTML_DIR})
${SPHINX_HTML_DIR_EN})
add_dependencies(paddle_docs
gen_proto_py)
# configured documentation tools and intermediate build results
set(BINARY_BUILD_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/_build")
# Sphinx cache with pickled ReST documents
set(SPHINX_CACHE_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/_doctrees")
# HTML output directory
set(SPHINX_HTML_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/html")
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/templates/conf.py.cn.in"
"${BINARY_BUILD_DIR_CN}/conf.py"
@ONLY)
sphinx_add_target(paddle_docs_cn
html
${BINARY_BUILD_DIR_CN}
${SPHINX_CACHE_DIR_CN}
${CMAKE_CURRENT_SOURCE_DIR}
${SPHINX_HTML_DIR_CN})
add_dependencies(paddle_docs_cn
gen_proto_py)
关于PaddlePaddle
================
PaddlePaddle是一个最早由百度科学家和工程师共同研发的并行分布式深度学习平台,兼备易用性、高效性、灵活性和可扩展性,目前已被百度内部多个产品线广泛使用。
PaddlePaddle目前已经开放源码, 但是远未完善,我们希望能在这个基础上不断的改进、扩展和延伸。
同时我们希望广大开发者积极提供反馈和贡献源代码,建立一个活跃的开源社区。
致谢
--------
在此,特别感谢PaddlePaddle的[所有贡献者](https://github.com/PaddlePaddle/Paddle/graphs/contributors)
......@@ -11,4 +11,4 @@ We hope to build an active open source community both by providing feedback and
Credits
--------
We owe many thanks to `all contributors and developers <https://github.com/PaddlePaddle/Paddle/blob/develop/authors>`_ of PaddlePaddle!
We owe many thanks to `all contributors and developers <https://github.com/PaddlePaddle/Paddle/graphs/contributors>`_ of PaddlePaddle!
.. _api_dataprovider:
DataProvider的介绍
==================
DataProvider是PaddlePaddle负责提供数据的模块。其作用是将数据传入内存或显存,让神经网络可以进行训练或预测。用户可以通过简单使用Python接口 `PyDataProvider2 <pydataprovider2.html>`_ ,来自定义传数据的过程。如果有更复杂的使用,或者需要更高的效率,用户也可以在C++端自定义一个 ``DataProvider`` 。
DataProvider是PaddlePaddle负责提供数据的模块。其作用是将数据传入内存或显存,让神经网络可以进行训练或预测。用户可以通过简单使用Python接口 :ref:`api_pydataprovider2` ,来自定义传数据的过程。如果有更复杂的使用,或者需要更高的效率,用户也可以在C++端自定义一个 ``DataProvider`` 。
PaddlePaddle需要用户在网络配置(trainer_config.py)中定义使用哪种DataProvider,并且在DataProvider中实现如何访问训练文件列表(train.list)或测试文件列表(test.list)。
- train.list和test.list存放在本地(推荐直接存放到训练目录,以相对路径引用)。一般情况下,两者均为纯文本文件,其中每一行对应一个数据文件地址:
- 如果数据文件存于本地磁盘,这个地址则为它的绝对路径或相对路径(相对于PaddlePaddle程序运行时的路径)。
- 地址也可以为hdfs文件路径,或者数据库连接路径等。
- 由于这个地址会被DataProvider使用,因此,如何解析该地址也是用户自定义DataProvider时需要考虑的地方。
- train.list和test.list存放在本地(推荐直接存放到训练目录,以相对路径引用)。一般情况下,两者均为纯文本文件,其中每一行对应一个数据文件地址:
- 如果数据文件存于本地磁盘,这个地址则为它的绝对路径或相对路径(相对于PaddlePaddle程序运行时的路径)。
- 地址也可以为hdfs文件路径,或者数据库连接路径等。
- 由于这个地址会被DataProvider使用,因此,如何解析该地址也是用户自定义DataProvider时需要考虑的地方。
- 如果没有设置test.list,或设置为None,那么在训练过程中不会执行测试操作;否则,会根据命令行参数指定的测试方式,在训练过程中进行测试,从而防止过拟合。
.. _api_pydataprovider2_en:
.. _api_pydataprovider2:
PyDataProvider2
===============
......@@ -24,18 +24,18 @@ of 28 x 28 pixels.
A small part of the original data as an example is shown as below:
.. literalinclude:: ../../../doc_cn/ui/data_provider/mnist_train.txt
.. literalinclude:: src/mnist_train.txt
Each line of the data contains two parts, separated by :code:`;`. The first part is
label of an image. The second part contains 28x28 pixel float values.
Just write path of the above data into train.list. It looks like this:
.. literalinclude:: ../../../doc_cn/ui/data_provider/train.list
.. literalinclude:: src/train.list
The corresponding dataprovider is shown as below:
.. literalinclude:: ../../../doc_cn/ui/data_provider/mnist_provider.py
.. literalinclude:: src/mnist_provider.dict.py
The first line imports PyDataProvider2 package.
The main function is the process function, that has two parameters.
......@@ -74,7 +74,7 @@ sample by using keywords :code:`yield`.
Only a few lines of codes need to be added into the training configuration file,
you can take this as an example.
.. literalinclude:: ../../../doc_cn/ui/data_provider/mnist_config.py
.. literalinclude:: src/mnist_config.py
Here we specify training data by :code:`train.list`, and no testing data is specified.
The method which actually provide data is :code:`process`.
......@@ -83,7 +83,7 @@ User also can use another style to provide data, which defines the
:code:`data_layer`'s name explicitly when `yield`. For example,
the :code:`dataprovider` is shown as below.
.. literalinclude:: ../../../doc_cn/ui/data_provider/mnist_provider.dict.py
.. literalinclude:: src/mnist_provider.dict.py
:linenos:
If user did't give the :code:`data_layer`'s name, PaddlePaddle will use
......@@ -104,7 +104,7 @@ And PaddlePadle will do all of the rest things\:
Is this cool?
.. _api_pydataprovider2_en_sequential_model:
.. _api_pydataprovider2_sequential_model:
DataProvider for the sequential model
-------------------------------------
......@@ -121,11 +121,11 @@ negative sentiment (marked by 0 and 1 respectively).
A small part of the original data as an example can be found in the path below:
.. literalinclude:: ../../../doc_cn/ui/data_provider/sentimental_train.txt
.. literalinclude:: src/sentimental_train.txt
The corresponding data provider can be found in the path below:
.. literalinclude:: ../../../doc_cn/ui/data_provider/sentimental_provider.py
.. literalinclude:: src/sentimental_provider.py
This data provider for sequential model is a little more complex than that
for MINST dataset.
......@@ -143,7 +143,7 @@ initialized. The :code:`on_init` function has the following parameters:
To pass these parameters into DataProvider, the following lines should be added
into trainer configuration file.
.. literalinclude:: ../../../doc_cn/ui/data_provider/sentimental_config.py
.. literalinclude:: src/sentimental_config.py
The definition is basically same as MNIST example, except:
* Load dictionary in this configuration
......
API中文手册
============
DataProvider API
----------------
.. toctree::
:maxdepth: 1
data_provider/dataprovider_cn.rst
data_provider/pydataprovider2_cn.rst
.. _api_trainer_config:
Model Config API
----------------
.. toctree::
:maxdepth: 1
trainer_config_helpers/optimizers.rst
trainer_config_helpers/data_sources.rst
trainer_config_helpers/layers.rst
trainer_config_helpers/activations.rst
trainer_config_helpers/poolings.rst
trainer_config_helpers/networks.rst
trainer_config_helpers/evaluators.rst
trainer_config_helpers/attrs.rst
Applications API
----------------
.. toctree::
:maxdepth: 1
predict/swig_py_paddle_cn.rst
......@@ -7,7 +7,7 @@ DataProvider API
.. toctree::
:maxdepth: 1
data_provider/index_en.rst
data_provider/dataprovider_en.rst
data_provider/pydataprovider2_en.rst
.. _api_trainer_config:
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册