提交 1d58949a 编写于 作者: L LCY-Seso 提交者: GitHub

Merge branch 'develop' into enable_drop_in_average_and_max_layer

......@@ -9,6 +9,8 @@ build/
.pydevproject
Makefile
.test_env/
third_party/
*~
bazel-*
third_party/
[submodule "warp-ctc"]
path = warp-ctc
url = https://github.com/baidu-research/warp-ctc.git
......@@ -2,7 +2,7 @@
sha: c25201a00e6b0514370501050cf2a8538ac12270
hooks:
- id: remove-crlf
files: (?!.*warp-ctc)^.*$
files: (?!.*third_party)^.*$
- repo: https://github.com/reyoung/mirrors-yapf.git
sha: v0.13.2
hooks:
......@@ -15,7 +15,7 @@
- id: check-merge-conflict
- id: check-symlinks
- id: detect-private-key
files: (?!.*warp-ctc)^.*$
files: (?!.*third_party)^.*$
- id: end-of-file-fixer
- repo: https://github.com/PaddlePaddle/clang-format-pre-commit-hook.git
sha: 28c0ea8a67a3e2dbbf4822ef44e85b63a0080a29
......
language: cpp
cache: ccache
cache:
directories:
- $HOME/third_party
- $HOME/.ccache
- $HOME/.cache/pip
sudo: required
dist: trusty
os:
- linux
- osx
env:
- JOB=DOCS
- JOB=BUILD_AND_TEST
- JOB=PRE_COMMIT
matrix:
exclude:
- os: osx
env: JOB=DOCS # Only generate documentation in linux.
- os: osx
env: JOB=PRE_COMMIT # Only check pre-commit hook in linux
addons:
apt:
packages:
- gcc-4.8
- g++-4.8
- wget
- gfortran-4.8
- git
- build-essential
- libatlas-base-dev
- python
- python-pip
- python2.7-dev
- m4
- python-numpy
- python-wheel
- libgoogle-glog-dev
- libgflags-dev
- libgtest-dev
- curl
- lcov
- graphviz
- swig
- graphviz
- clang-format-3.8
- automake
- libtool
- ccache
before_install:
- |
if [ ${JOB} == "BUILD_AND_TEST" ]; then
......@@ -53,10 +45,10 @@ before_install:
fi
fi
fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo paddle/scripts/travis/before_install.linux.sh; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi
- if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi
- pip install wheel protobuf sphinx recommonmark virtualenv numpy sphinx_rtd_theme pre-commit requests==2.9.2 LinkChecker
# Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python
# protobuf version.
- pip install numpy wheel 'protobuf==3.1' sphinx recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit requests==2.9.2 LinkChecker
script:
- paddle/scripts/travis/main.sh
notifications:
......
cmake_minimum_required(VERSION 2.8)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License
cmake_minimum_required(VERSION 3.0)
project(paddle CXX C)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
set(PROJ_ROOT ${CMAKE_SOURCE_DIR})
include(package)
find_package(SWIG 2.0)
find_package(CUDA QUIET)
find_package(Protobuf REQUIRED)
# Check protobuf library version.
execute_process(COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} --version
OUTPUT_VARIABLE PROTOBUF_VERSION)
string(REPLACE "libprotoc " "" PROTOBUF_VERSION ${PROTOBUF_VERSION})
set(PROTOBUF_3 OFF)
if (${PROTOBUF_VERSION} VERSION_GREATER "3.0.0" OR ${PROTOBUF_VERSION} VERSION_EQUAL "3.0.0")
set(PROTOBUF_3 ON)
endif()
find_package(PythonLibs 2.7 REQUIRED)
find_package(PythonInterp 2.7 REQUIRED)
find_package(ZLIB REQUIRED)
find_package(NumPy REQUIRED)
find_package(Threads REQUIRED)
find_package(AVX QUIET)
find_package(Glog REQUIRED)
find_package(Gflags REQUIRED)
find_package(GTest)
find_package(Sphinx)
find_package(Doxygen)
include(cblas)
find_program(M4_EXECUTABLE m4)
###################### Configurations ###########################
option(WITH_DSO "Compile PaddlePaddle with dynamic linked libraries" ON)
option(WITH_GPU "Compile PaddlePaddle with gpu" ${CUDA_FOUND})
option(WITH_DOUBLE "Compile PaddlePaddle with double precision, otherwise use single precision" OFF)
option(WITH_AVX "Compile PaddlePaddle with avx intrinsics" ${AVX_FOUND})
option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON)
option(WITH_STYLE_CHECK "Style Check for PaddlePaddle" ${PYTHONINTERP_FOUND})
option(WITH_RDMA "Compile PaddlePaddle with rdma support" OFF)
option(WITH_TIMER "Compile PaddlePaddle use timer" OFF)
option(WITH_PROFILER "Compile PaddlePaddle use gpu profiler" OFF)
option(WITH_TESTING "Compile and run unittest for PaddlePaddle" ${GTEST_FOUND})
option(WITH_DOC "Compile PaddlePaddle with documentation" OFF)
option(WITH_SWIG_PY "Compile PaddlePaddle with py PaddlePaddle prediction api" ${SWIG_FOUND})
option(ON_TRAVIS "Running test on travis-ci or not." OFF)
option(ON_COVERALLS "Generating code coverage data on coveralls or not." OFF)
option(COVERALLS_UPLOAD "Uploading the generated coveralls json." ON)
include(cpplint)
include(ccache)
if(WITH_RDMA)
include(rdma)
endif()
include(util)
include(flags)
include(cudnn)
include(FindPythonModule)
include(check_packages)
include(swig)
include(coveralls)
# Set PaddlePaddle version to Git tag name or Git commit ID.
find_package(CUDA QUIET)
find_package(Git REQUIRED)
# version.cmake will get the current PADDLE_VERSION
include(version)
add_definitions(-DPADDLE_VERSION=${PADDLE_VERSION})
if(NOT WITH_GPU)
add_definitions(-DPADDLE_ONLY_CPU)
add_definitions(-DHPPL_STUB_FUNC)
list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu)
else()
if(${CUDA_VERSION_MAJOR} VERSION_LESS 7)
message(FATAL_ERROR "Paddle need CUDA >= 7.0 to compile")
endif()
if(NOT CUDNN_FOUND)
message(FATAL_ERROR "Paddle need cudnn to compile")
endif()
if(WITH_AVX)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${AVX_FLAG}")
else(WITH_AVX)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SSE3_FLAG}")
endif(WITH_AVX)
# Include cuda and cudnn
include_directories(${CUDNN_INCLUDE_DIR})
include_directories(${CUDA_TOOLKIT_INCLUDE})
endif(NOT WITH_GPU)
if(WITH_DSO)
add_definitions(-DPADDLE_USE_DSO)
endif(WITH_DSO)
if(WITH_DOUBLE)
add_definitions(-DPADDLE_TYPE_DOUBLE)
set(ACCURACY double)
else(WITH_DOUBLE)
set(ACCURACY float)
endif(WITH_DOUBLE)
if(NOT WITH_TIMER)
add_definitions(-DPADDLE_DISABLE_TIMER)
endif(NOT WITH_TIMER)
if(NOT WITH_PROFILER)
add_definitions(-DPADDLE_DISABLE_PROFILER)
endif(NOT WITH_PROFILER)
if(WITH_AVX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAG}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAG}")
else(WITH_AVX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SSE3_FLAG}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SSE3_FLAG}")
endif(WITH_AVX)
if(WITH_PYTHON)
include_directories(${PYTHON_INCLUDE_DIR})
include_directories(${PYTHON_NUMPY_INCLUDE_DIR})
else(WITH_PYTHON)
add_definitions(-DPADDLE_NO_PYTHON)
endif(WITH_PYTHON)
if(WITH_RDMA)
include_directories("${RDMA_INC_DIR}")
else(WITH_RDMA)
add_definitions(-DPADDLE_DISABLE_RDMA)
endif(WITH_RDMA)
# glog
include_directories(${LIBGLOG_INCLUDE_DIR})
#gflags
add_definitions(-DGFLAGS_NS=${GFLAGS_NAMESPACE})
include_directories(${GFLAGS_INCLUDE_DIRS})
find_package(Threads REQUIRED)
if(WITH_TESTING)
enable_testing()
include_directories(${GTEST_INCLUDE_DIRS})
include(system)
include(simd)
################################ Configurations #######################################
option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND})
option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND})
option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON)
option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON)
option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON)
option(WITH_STYLE_CHECK "Compile PaddlePaddle with style check" ON)
option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON)
option(WITH_DOUBLE "Compile PaddlePaddle with double precision" OFF)
option(WITH_RDMA "Compile PaddlePaddle with RDMA support" OFF)
option(WITH_TIMER "Compile PaddlePaddle with stats timer" OFF)
option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler" OFF)
option(WITH_DOC "Compile PaddlePaddle with documentation" OFF)
option(ON_COVERALLS "Compile PaddlePaddle with code coverage" OFF)
option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF)
option(ON_TRAVIS "Exclude special unit test on Travis CI" OFF)
# CMAKE_BUILD_TYPE
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING
"Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
FORCE)
endif()
include_directories("${CBLAS_INC_DIR}")
set(THIRD_PARTY_PATH "${PROJ_ROOT}/third_party" CACHE STRING
"A path setting third party libraries download & build directories.")
########################################################################################
include(external/zlib) # download, build, install zlib
include(external/gflags) # download, build, install gflags
include(external/glog) # download, build, install glog
include(external/gtest) # download, build, install gtest
include(external/protobuf) # download, build, install protobuf
include(external/python) # download, build, install python
include(external/openblas) # download, build, install openblas
include(external/swig) # download, build, install swig
include(external/warpctc) # download, build, install warpctc
include(package) # set paddle packages
include(cpplint) # set paddle c++ style
include(ccache) # set ccache for compilation
include(util) # set unittest and link libs
include(rdma) # set rdma libraries
include(flags) # set paddle compile flags
include(cudnn) # set cudnn libraries
include(version) # set PADDLE_VERSION
include(coveralls) # set code coverage
include(configure) # add paddle env configuration
include_directories("${PROJ_ROOT}")
include_directories("${PROJ_ROOT}/paddle/cuda/include")
include_directories(${PROTOBUF_INCLUDE_DIRS})
include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto")
if(EXISTS "${PROJ_ROOT}/paddle/internals/CMakeLists.txt")
set(PADDLE_WITH_INTERNAL ON)
include(paddle/internals/CMakeLists.txt)
else()
set(PADDLE_WITH_INTERNAL OFF)
set(INTERNAL_PROTO_PATH "")
endif()
set(EXTERNAL_LIBS
# have not include gtest here.
${GFLAGS_LIBRARIES}
${GLOG_LIBRARIES}
${CBLAS_LIBRARIES}
${PROTOBUF_LIBRARY}
${ZLIB_LIBRARIES}
)
add_subdirectory(proto)
add_subdirectory(paddle)
add_subdirectory(python)
if(WITH_DOC)
add_subdirectory(doc)
endif()
......@@ -72,7 +72,7 @@ function( Sphinx_add_target target_name builder conf cache source destination )
${source}
${destination}
COMMENT "Generating sphinx documentation: ${builder}"
COMMAND ln -sf ${destination}/index_*.html ${destination}/index.html
COMMAND cd ${destination} && ln -s ./index_*.html index.html
)
set_property(
......
......@@ -13,9 +13,11 @@
# system paths.
#
set(CBLAS_FOUND OFF)
## Find MKL First.
set(MKL_ROOT $ENV{MKL_ROOT} CACHE PATH "Folder contains MKL")
set(INTEL_ROOT "/opt/intel" CACHE PATH "Folder contains intel libs")
set(MKL_ROOT ${INTEL_ROOT}/mkl CACHE PATH "Folder contains MKL")
find_path(MKL_INCLUDE_DIR mkl.h PATHS
${MKL_ROOT}/include)
......@@ -35,11 +37,12 @@ find_library(MKL_INTEL_LP64 NAMES mkl_intel_lp64 PATHS
if(MKL_INCLUDE_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64)
set(CBLAS_PROVIDER MKL)
set(CBLAS_INC_DIR ${MKL_INCLUDE_DIR})
set(CBLAS_LIBS ${MKL_INTEL_LP64}
set(CBLAS_LIBRARIES ${MKL_INTEL_LP64}
${MKL_SEQUENTIAL_LIB}
${MKL_CORE_LIB})
add_definitions(-DPADDLE_USE_MKL)
message(STATUS "Found MKL (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBS})")
message(STATUS "Found MKL (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
set(CBLAS_FOUND ON)
return() # return file.
endif()
......@@ -68,9 +71,10 @@ find_library(ATLAS_LIB NAMES lapack_atlas liblapack_atlas.so.3
if(ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_LIB)
set(CBLAS_PROVIDER ATLAS)
set(CBLAS_INC_DIR ${ATLAS_INC_DIR} ${ATLAS_CLAPACK_INC_DIR})
set(CBLAS_LIBS ${ATLAS_LIB} ${ATLAS_CBLAS_LIB})
set(CBLAS_LIBRARIES ${ATLAS_LIB} ${ATLAS_CBLAS_LIB})
add_definitions(-DPADDLE_USE_ATLAS)
message(STATUS "Found Atlas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBS})")
message(STATUS "Found Atlas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
set(CBLAS_FOUND ON)
return()
endif()
......@@ -98,8 +102,9 @@ find_library(OPENBLAS_LIB NAMES openblas
if(OPENBLAS_INC_DIR AND OPENBLAS_LIB)
set(CBLAS_PROVIDER OPENBLAS)
set(CBLAS_INC_DIR ${OPENBLAS_INC_DIR})
set(CBLAS_LIBS ${OPENBLAS_LIB})
message(STATUS "Found OpenBlas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBS})")
set(CBLAS_LIBRARIES ${OPENBLAS_LIB})
message(STATUS "Found OpenBlas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
set(CBLAS_FOUND ON)
return()
endif()
......@@ -130,9 +135,7 @@ find_library(REFERENCE_CBLAS_LIBRARY NAMES cblas PATHS
if (REFERENCE_CBLAS_INCLUDE_DIR AND REFERENCE_CBLAS_LIBRARY)
set(CBLAS_PROVIDER REFERENCE)
set(CBLAS_INC_DIR ${REFERENCE_CBLAS_INCLUDE_DIR})
set(CBLAS_LIBS ${REFERENCE_CBLAS_LIBRARY})
return()
set(CBLAS_LIBRARIES ${REFERENCE_CBLAS_LIBRARY})
message(STATUS "Found reference-cblas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBS})")
set(CBLAS_FOUND ON)
endif()
message(FATAL_ERROR "CBlas must be set. Paddle support MKL, ATLAS, OpenBlas, reference-cblas."
" Try set MKL_ROOT, ATLAS_ROOT, OPENBLAS_ROOT or REFERENCE_CBLAS_ROOT.")
# Check package for each cmake option
if(WITH_GPU)
find_package(CUDA REQUIRED) # CUDA is required when use gpu
endif()
if(WITH_PYTHON)
find_package(PythonLibs 2.6 REQUIRED)
find_package(PythonInterp REQUIRED)
find_package(NumPy REQUIRED)
endif()
if(WITH_STYLE_CHECK)
find_package(PythonInterp REQUIRED)
endif()
find_package(Glog REQUIRED)
find_package(Gflags REQUIRED)
if(WITH_TESTING)
find_package(GTest REQUIRED)
endif()
if(WITH_DOC)
find_package(Sphinx REQUIRED)
find_python_module(recommonmark REQUIRED)
endif()
if(WITH_SWIG_PY)
if(NOT SWIG_FOUND)
message(FATAL_ERROR "SWIG is not found. Please install swig or disable WITH_SWIG_PY")
endif()
find_python_module(wheel REQUIRED) # package wheel
endif()
if(NOT M4_EXECUTABLE)
message(FATAL_ERROR "Paddle need m4 to generate proto file.")
endif()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if(NOT WITH_PYTHON)
add_definitions(-DPADDLE_NO_PYTHON)
endif(NOT WITH_PYTHON)
if(WITH_DSO)
add_definitions(-DPADDLE_USE_DSO)
endif(WITH_DSO)
if(WITH_DOUBLE)
add_definitions(-DPADDLE_TYPE_DOUBLE)
endif(WITH_DOUBLE)
if(NOT WITH_TIMER)
add_definitions(-DPADDLE_DISABLE_TIMER)
endif(NOT WITH_TIMER)
if(NOT WITH_PROFILER)
add_definitions(-DPADDLE_DISABLE_PROFILER)
endif(NOT WITH_PROFILER)
if(NOT WITH_GPU)
add_definitions(-DPADDLE_ONLY_CPU)
add_definitions(-DHPPL_STUB_FUNC)
list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu)
else()
FIND_PACKAGE(CUDA REQUIRED)
if(${CUDA_VERSION_MAJOR} VERSION_LESS 7)
message(FATAL_ERROR "Paddle need CUDA >= 7.0 to compile")
endif()
if(NOT CUDNN_FOUND)
message(FATAL_ERROR "Paddle need cudnn to compile")
endif()
if(WITH_AVX)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${AVX_FLAG}")
else(WITH_AVX)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SSE3_FLAG}")
endif(WITH_AVX)
# Include cuda and cudnn
include_directories(${CUDNN_INCLUDE_DIR})
include_directories(${CUDA_TOOLKIT_INCLUDE})
endif(NOT WITH_GPU)
if(WITH_AVX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAG}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAG}")
else(WITH_AVX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SSE3_FLAG}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SSE3_FLAG}")
endif(WITH_AVX)
......@@ -110,14 +110,13 @@ endmacro()
# Get the coverage data.
file(GLOB_RECURSE GCDA_FILES "${COV_PATH}" "*.gcda")
message("GCDA files:")
message("Process GCDA files:")
message("===============================")
# Get a list of all the object directories needed by gcov
# (The directories the .gcda files and .o files are found in)
# and run gcov on those.
foreach(GCDA ${GCDA_FILES})
message("Process: ${GCDA}")
message("------------------------------------------------------------------------------")
get_filename_component(GCDA_DIR ${GCDA} PATH)
#
......@@ -135,7 +134,7 @@ foreach(GCDA ${GCDA_FILES})
# If -p is not specified then the file is named only "the_file.c.gcov"
#
execute_process(
COMMAND ${GCOV_EXECUTABLE} -p -o ${GCDA_DIR} ${GCDA}
COMMAND "${GCOV_EXECUTABLE} -p -o ${GCDA_DIR} ${GCDA} >/dev/null"
WORKING_DIRECTORY ${GCDA_DIR}
)
endforeach()
......@@ -383,7 +382,6 @@ foreach(NOT_COVERED_SRC ${COVERAGE_SRCS_REMAINING})
set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}]")
# Generate the final JSON for this file.
message("Generate JSON for non-gcov file: ${NOT_COVERED_SRC}...")
string(CONFIGURE ${SRC_FILE_TEMPLATE} FILE_JSON)
set(JSON_GCOV_FILES "${JSON_GCOV_FILES}${FILE_JSON}, ")
endforeach()
......
......@@ -53,7 +53,7 @@ macro(add_style_check_target TARGET_NAME)
if(LINT MATCHES ON)
add_custom_command(TARGET ${TARGET_NAME}
PRE_BUILD
COMMAND "${PYTHON_EXECUTABLE}" "${PROJ_ROOT}/paddle/scripts/cpplint.py"
COMMAND env ${py_env} "${PYTHON_EXECUTABLE}" "${PROJ_ROOT}/paddle/scripts/cpplint.py"
"--filter=${STYLE_FILTER}" ${filename}
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR})
endif()
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
INCLUDE(ExternalProject)
SET(GFLAGS_SOURCES_DIR ${THIRD_PARTY_PATH}/gflags)
SET(GFLAGS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/gflags)
SET(GFLAGS_INCLUDE_DIR "${GFLAGS_INSTALL_DIR}/include" CACHE PATH "gflags include directory." FORCE)
IF(WIN32)
set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/gflags.lib" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE)
ELSE(WIN32)
set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/libgflags.a" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE)
ENDIF(WIN32)
INCLUDE_DIRECTORIES(${GFLAGS_INCLUDE_DIR})
ExternalProject_Add(
gflags
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/gflags/gflags.git"
PREFIX ${GFLAGS_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR}
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
CMAKE_ARGS -DBUILD_TESTING=OFF
)
LIST(APPEND external_project_dependencies gflags)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
INCLUDE(ExternalProject)
SET(GLOG_SOURCES_DIR ${THIRD_PARTY_PATH}/glog)
SET(GLOG_INSTALL_DIR ${THIRD_PARTY_PATH}/install/glog)
SET(GLOG_INCLUDE_DIR "${GLOG_INSTALL_DIR}/include" CACHE PATH "glog include directory." FORCE)
IF(WIN32)
SET(GLOG_LIBRARIES "${GLOG_INSTALL_DIR}/lib/libglog.lib" CACHE FILEPATH "glog library." FORCE)
ELSE(WIN32)
SET(GLOG_LIBRARIES "${GLOG_INSTALL_DIR}/lib/libglog.a" CACHE FILEPATH "glog library." FORCE)
ENDIF(WIN32)
INCLUDE_DIRECTORIES(${GLOG_INCLUDE_DIR})
ExternalProject_Add(
glog
${EXTERNAL_PROJECT_LOG_ARGS}
DEPENDS gflags
GIT_REPOSITORY "https://github.com/google/glog.git"
PREFIX ${GLOG_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${GLOG_INSTALL_DIR}
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
CMAKE_ARGS -DWITH_GFLAGS=ON
CMAKE_ARGS -Dgflags_DIR=${GFLAGS_INSTALL_DIR}/lib/cmake/gflags
CMAKE_ARGS -DBUILD_TESTING=OFF
)
LIST(APPEND external_project_dependencies glog)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
IF(WITH_TESTING)
ENABLE_TESTING()
INCLUDE(ExternalProject)
SET(GTEST_SOURCES_DIR ${THIRD_PARTY_PATH}/gtest)
SET(GTEST_INSTALL_DIR ${THIRD_PARTY_PATH}/install/gtest)
SET(GTEST_INCLUDE_DIR "${GTEST_INSTALL_DIR}/include" CACHE PATH "gtest include directory." FORCE)
INCLUDE_DIRECTORIES(${GTEST_INCLUDE_DIR})
IF(WIN32)
set(GTEST_LIBRARIES
"${GTEST_INSTALL_DIR}/lib/gtest.lib" CACHE FILEPATH "gtest libraries." FORCE)
set(GTEST_MAIN_LIBRARIES
"${GTEST_INSTALL_DIR}/lib/gtest_main.lib" CACHE FILEPATH "gtest main libraries." FORCE)
ELSE(WIN32)
set(GTEST_LIBRARIES
"${GTEST_INSTALL_DIR}/lib/libgtest.a" CACHE FILEPATH "gtest libraries." FORCE)
set(GTEST_MAIN_LIBRARIES
"${GTEST_INSTALL_DIR}/lib/libgtest_main.a" CACHE FILEPATH "gtest main libraries." FORCE)
ENDIF(WIN32)
ExternalProject_Add(
gtest
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/google/googletest.git"
GIT_TAG "release-1.8.0"
PREFIX ${GTEST_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GTEST_INSTALL_DIR}
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
CMAKE_ARGS -DBUILD_GMOCK=ON
CMAKE_ARGS -Dgtest_disable_pthreads=ON
CMAKE_ARGS -Dgtest_force_shared_crt=ON
)
LIST(APPEND external_project_dependencies gtest)
ENDIF(WITH_TESTING)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
INCLUDE(cblas)
IF(NOT ${CBLAS_FOUND})
INCLUDE(ExternalProject)
SET(CBLAS_SOURCES_DIR ${THIRD_PARTY_PATH}/openblas)
SET(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas)
SET(CBLAS_INC_DIR "${CBLAS_INSTALL_DIR}/include" CACHE PATH "openblas include directory." FORCE)
IF(WIN32)
SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/openblas.lib" CACHE FILEPATH "openblas library." FORCE)
ELSE(WIN32)
SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/libopenblas.a" CACHE FILEPATH "openblas library" FORCE)
ENDIF(WIN32)
IF(CMAKE_COMPILER_IS_GNUCC)
ENABLE_LANGUAGE(Fortran)
LIST(APPEND CBLAS_LIBRARIES gfortran pthread)
ENDIF(CMAKE_COMPILER_IS_GNUCC)
IF(NOT CMAKE_Fortran_COMPILER)
MESSAGE(FATAL_ERROR "To build lapack in libopenblas, "
"you need to set gfortran compiler: cmake .. -DCMAKE_Fortran_COMPILER=...")
ENDIF(NOT CMAKE_Fortran_COMPILER)
ExternalProject_Add(
openblas
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY https://github.com/xianyi/OpenBLAS.git
GIT_TAG v0.2.19
PREFIX ${CBLAS_SOURCES_DIR}
INSTALL_DIR ${CBLAS_INSTALL_DIR}
BUILD_IN_SOURCE 1
BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} FC=${CMAKE_Fortran_COMPILER} CC=${CMAKE_C_COMPILER} HOSTCC=${CMAKE_C_COMPILER} NO_SHARED=1 libs netlib
INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} install NO_SHARED=1 PREFIX=<INSTALL_DIR>
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
)
ExternalProject_Add_Step(
openblas lapacke_install
COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_mangling_with_flags.h" "${CBLAS_INSTALL_DIR}/include/lapacke_mangling.h"
COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke.h" "${CBLAS_INSTALL_DIR}/include/lapacke.h"
COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_config.h" "${CBLAS_INSTALL_DIR}/include/lapacke_config.h"
COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_utils.h" "${CBLAS_INSTALL_DIR}/include/lapacke_utils.h"
DEPENDEES install
)
LIST(APPEND external_project_dependencies openblas)
ENDIF(NOT ${CBLAS_FOUND})
INCLUDE_DIRECTORIES(${CBLAS_INC_DIR})
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
INCLUDE(ExternalProject)
FIND_PACKAGE(Protobuf)
IF(NOT PROTOBUF_FOUND)
SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/protobuf)
SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/protobuf)
SET(PROTOBUF_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" CACHE PATH "protobuf include directory." FORCE)
IF(WIN32)
SET(PROTOBUF_LITE_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.lib" CACHE FILEPATH "protobuf lite library." FORCE)
SET(PROTOBUF_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.lib" CACHE FILEPATH "protobuf library." FORCE)
SET(PROTOBUF_PROTOC_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotoc.lib" CACHE FILEPATH "protoc library." FORCE)
SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc.exe" CACHE FILEPATH "protobuf executable." FORCE)
ELSE(WIN32)
SET(PROTOBUF_LITE_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.a" CACHE FILEPATH "protobuf lite library." FORCE)
SET(PROTOBUF_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.a" CACHE FILEPATH "protobuf library." FORCE)
SET(PROTOBUF_PROTOC_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotoc.a" CACHE FILEPATH "protoc library." FORCE)
SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc" CACHE FILEPATH "protobuf executable." FORCE)
ENDIF(WIN32)
ExternalProject_Add(
protobuf
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${PROTOBUF_SOURCES_DIR}
UPDATE_COMMAND ""
DEPENDS zlib
GIT_REPOSITORY "https://github.com/google/protobuf.git"
GIT_TAG "9f75c5aa851cd877fb0d93ccc31b8567a6706546"
CONFIGURE_COMMAND
${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/protobuf/cmake
-Dprotobuf_BUILD_TESTS=OFF
-DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR=lib
)
LIST(APPEND external_project_dependencies protobuf)
ENDIF(NOT PROTOBUF_FOUND)
INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR})
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
INCLUDE(ExternalProject)
INCLUDE(python_module)
FIND_PACKAGE(PythonInterp 2.7)
FIND_PACKAGE(PythonLibs 2.7)
SET(py_env "")
IF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
find_python_module(pip REQUIRED)
find_python_module(numpy REQUIRED)
find_python_module(wheel REQUIRED)
find_python_module(google.protobuf REQUIRED)
FIND_PACKAGE(NumPy REQUIRED)
IF(${PY_GOOGLE.PROTOBUF_VERSION} AND ${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0")
MESSAGE(FATAL_ERROR "Found Python Protobuf ${PY_GOOGLE.PROTOBUF_VERSION} < 3.0.0, "
"please use pip to upgrade protobuf. pip install -U protobuf")
ENDIF()
ELSE(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
MESSAGE(FATAL_ERROR "Please install python 2.7 before building PaddlePaddle.")
##################################### PYTHON ########################################
SET(PYTHON_SOURCES_DIR ${THIRD_PARTY_PATH}/python)
SET(PYTHON_INSTALL_DIR ${THIRD_PARTY_PATH}/install/python)
SET(_python_DIR ${PYTHON_INSTALL_DIR})
IF(UNIX)
SET(PYTHON_FOUND ON)
SET(PYTHON_INCLUDE_DIR "${PYTHON_INSTALL_DIR}/include/python2.7" CACHE PATH "Python include dir" FORCE)
SET(PYTHON_LIBRARIES "${PYTHON_INSTALL_DIR}/lib/libpython2.7.a" CACHE FILEPATH "Python library" FORCE)
SET(PYTHON_EXECUTABLE ${PYTHON_INSTALL_DIR}/bin/python CACHE FILEPATH "Python executable" FORCE)
SET(PY_SITE_PACKAGES_PATH "${PYTHON_INSTALL_DIR}/lib/python2.7/site-packages" CACHE PATH "Python site-packages path" FORCE)
ELSEIF(WIN32)
SET(PYTHON_FOUND ON)
SET(PYTHON_INCLUDE_DIR "${PYTHON_INSTALL_DIR}/include" CACHE PATH "Python include dir" FORCE)
SET(PYTHON_LIBRARIES "${PYTHON_INSTALL_DIR}/libs/python27.lib" CACHE FILEPATH "Python library" FORCE)
SET(PYTHON_EXECUTABLE "${PYTHON_INSTALL_DIR}/bin/python.exe" CACHE FILEPATH "Python executable" FORCE)
SET(PY_SITE_PACKAGES_PATH "${PYTHON_INSTALL_DIR}/Lib/site-packages" CACHE PATH "Python site-packages path" FORCE)
ELSE()
MESSAGE(FATAL_ERROR "Unknown system !")
ENDIF()
IF(APPLE)
LIST(APPEND EXTERNAL_PROJECT_OPTIONAL_CMAKE_ARGS
-DCMAKE_BUILD_WITH_INSTALL_RPATH:BOOL=ON
)
ENDIF()
SET(EXTERNAL_PROJECT_OPTIONAL_CMAKE_CACHE_ARGS)
# Force Python build to "Release".
IF(CMAKE_CONFIGURATION_TYPES)
SET(SAVED_CMAKE_CFG_INTDIR ${CMAKE_CFG_INTDIR})
SET(CMAKE_CFG_INTDIR "Release")
ELSE()
LIST(APPEND EXTERNAL_PROJECT_OPTIONAL_CMAKE_CACHE_ARGS
-DCMAKE_BUILD_TYPE:STRING=Release
)
ENDIF()
ExternalProject_Add(python
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/python-cmake-buildsystem/python-cmake-buildsystem.git"
PREFIX ${PYTHON_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DPYTHON_VERSION=2.7.12
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
CMAKE_CACHE_ARGS
-DCMAKE_INSTALL_PREFIX:PATH=${PYTHON_INSTALL_DIR}
-DBUILD_LIBPYTHON_SHARED:BOOL=OFF
-DUSE_SYSTEM_LIBRARIES:BOOL=OFF
-DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}
-DZLIB_INCLUDE_DIR:PATH=${ZLIB_INCLUDE_DIR}
-DZLIB_LIBRARY:FILEPATH=${ZLIB_LIBRARIES}
-DDOWNLOAD_SOURCES:BOOL=ON
-DINSTALL_WINDOWS_TRADITIONAL:BOOL=OFF
${EXTERNAL_PROJECT_OPTIONAL_CMAKE_CACHE_ARGS}
${EXTERNAL_PROJECT_OPTIONAL_CMAKE_ARGS}
DEPENDS zlib
)
SET(py_env
PATH=${PYTHON_INSTALL_DIR}/bin
PYTHONHOME=${PYTHON_INSTALL_DIR}
PYTHONPATH=${PYTHON_INSTALL_DIR}/lib:${PYTHON_INSTALL_DIR}/lib/python2.7:${PY_SITE_PACKAGES_PATH})
####################################################################################
##################################### SETUPTOOLS ###################################
SET(SETUPTOOLS_SOURCES_DIR ${PYTHON_SOURCES_DIR}/setuptools)
ExternalProject_Add(setuptools
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${SETUPTOOLS_SOURCES_DIR}
URL "https://pypi.python.org/packages/source/s/setuptools/setuptools-18.3.2.tar.gz"
BUILD_IN_SOURCE 1
PATCH_COMMAND ""
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
INSTALL_COMMAND ""
BUILD_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install
DEPENDS python zlib
)
#####################################################################################
##################################### SIX ###########################################
SET(SIX_SOURCES_DIR ${PYTHON_SOURCES_DIR}/six)
ExternalProject_Add(six
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${SIX_SOURCES_DIR}
URL https://pypi.python.org/packages/source/s/six/six-1.10.0.tar.gz
BUILD_IN_SOURCE 1
PATCH_COMMAND ""
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
INSTALL_COMMAND ""
BUILD_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install
DEPENDS python setuptools
)
#####################################################################################
##################################### CYTHON ########################################
SET(CYTHON_SOURCES_DIR ${PYTHON_SOURCES_DIR}/cython)
ExternalProject_Add(cython
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${CYTHON_SOURCES_DIR}
URL https://github.com/cython/cython/archive/0.25.2.tar.gz
GIT_TAG 0.25.2
BUILD_IN_SOURCE 1
CONFIGURE_COMMAND ""
PATCH_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND ""
BUILD_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install
DEPENDS python
)
####################################################################################
##################################### NUMPY ########################################
SET(NUMPY_SOURCES_DIR ${PYTHON_SOURCES_DIR}/numpy)
SET(NUMPY_TAG_VERSION "v1.11.3")
SET(NUMPY_VERSION "1.11.3")
SET(EGG_NAME "")
SET(PYTHON_NUMPY_INCLUDE_DIR "")
IF(WIN32)
SET(EGG_NAME "numpy-${NUMPY_VERSION}-py2.7-${HOST_SYSTEM}.egg")
ELSE(WIN32)
IF(APPLE)
SET(EGG_NAME "numpy-${NUMPY_VERSION}-py2.7-${HOST_SYSTEM}-${MACOS_VERSION}")
ELSE(APPLE)
SET(EGG_NAME "numpy-${NUMPY_VERSION}-py2.7-linux")
SET(EGG_NAME "numpy-${NUMPY_VERSION}-py2.7-linux")
ENDIF(APPLE)
FOREACH(suffix x86_64 intel fat64 fat32 universal)
LIST(APPEND PYTHON_NUMPY_INCLUDE_DIR ${PY_SITE_PACKAGES_PATH}/${EGG_NAME}-${suffix}.egg/numpy/core/include)
ENDFOREACH()
ENDIF(WIN32)
ExternalProject_Add(numpy
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY https://github.com/numpy/numpy.git
GIT_TAG ${NUMPY_TAG_VERSION}
CONFIGURE_COMMAND ""
UPDATE_COMMAND ""
PREFIX ${NUMPY_SOURCES_DIR}
BUILD_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py build
INSTALL_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install
BUILD_IN_SOURCE 1
DEPENDS python setuptools cython
)
####################################################################################
##################################### WHEEL ########################################
SET(WHEEL_SOURCES_DIR ${PYTHON_SOURCES_DIR}/wheel)
ExternalProject_Add(wheel
${EXTERNAL_PROJECT_LOG_ARGS}
URL https://pypi.python.org/packages/source/w/wheel/wheel-0.29.0.tar.gz
PREFIX ${WHEEL_SOURCES_DIR}
CONFIGURE_COMMAND ""
UPDATE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install
BUILD_IN_SOURCE 1
DEPENDS python setuptools
)
####################################################################################
################################### PROTOBUF #######################################
SET(PY_PROTOBUF_SOURCES_DIR ${PYTHON_SOURCES_DIR}/protobuf)
ExternalProject_Add(python-protobuf
${EXTERNAL_PROJECT_LOG_ARGS}
URL https://pypi.python.org/packages/e0/b0/0a1b364fe8a7d177b4b7d4dca5b798500dc57a7273b93cca73931b305a6a/protobuf-3.1.0.post1.tar.gz
URL_MD5 38b5fb160c768d2f8444d0c6d637ff91
PREFIX ${PY_PROTOBUF_SOURCES_DIR}
BUILD_IN_SOURCE 1
PATCH_COMMAND ""
CONFIGURE_COMMAND ""
BUILD_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py build
INSTALL_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install
DEPENDS python setuptools six
)
####################################################################################
LIST(APPEND external_project_dependencies python setuptools six cython wheel python-protobuf numpy)
ENDIF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_DIR})
INCLUDE_DIRECTORIES(${PYTHON_NUMPY_INCLUDE_DIR})
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
FIND_PACKAGE(SWIG)
IF(NOT SWIG_FOUND)
# build swig as an external project
INCLUDE(ExternalProject)
SET(SWIG_SOURCES_DIR ${THIRD_PARTY_PATH}/swig)
SET(SWIG_INSTALL_DIR ${THIRD_PARTY_PATH}/install/swig)
SET(SWIG_TARGET_VERSION "3.0.2")
SET(SWIG_DOWNLOAD_SRC_MD5 "62f9b0d010cef36a13a010dc530d0d41")
SET(SWIG_DOWNLOAD_WIN_MD5 "3f18de4fc09ab9abb0d3be37c11fbc8f")
IF(WIN32)
# swig.exe available as pre-built binary on Windows:
ExternalProject_Add(swig
URL http://prdownloads.sourceforge.net/swig/swigwin-${SWIG_TARGET_VERSION}.zip
URL_MD5 ${SWIG_DOWNLOAD_WIN_MD5}
SOURCE_DIR ${SWIG_SOURCES_DIR}
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
UPDATE_COMMAND ""
)
SET(SWIG_DIR ${SWIG_SOURCES_DIR} CACHE FILEPATH "SWIG Directory" FORCE)
SET(SWIG_EXECUTABLE ${SWIG_SOURCES_DIR}/swig.exe CACHE FILEPATH "SWIG Executable" FORCE)
ELSE(WIN32)
# swig uses bison find it by cmake and pass it down
FIND_PACKAGE(BISON)
# From SWIG configure
ExternalProject_Add(swig
GIT_REPOSITORY https://github.com/swig/swig.git
GIT_TAG rel-3.0.10
PREFIX ${SWIG_SOURCES_DIR}
CONFIGURE_COMMAND cd <SOURCE_DIR> && ./autogen.sh && ./configure
--prefix=${SWIG_INSTALL_DIR} --without-pcre
BUILD_COMMAND cd <SOURCE_DIR> && make
INSTALL_COMMAND cd <SOURCE_DIR> && make install
UPDATE_COMMAND ""
)
SET(SWIG_DIR ${SWIG_INSTALL_DIR}/share/swig/${SWIG_TARGET_VERSION})
SET(SWIG_EXECUTABLE ${SWIG_INSTALL_DIR}/bin/swig)
ENDIF(WIN32)
LIST(APPEND external_project_dependencies swig)
ENDIF(NOT SWIG_FOUND)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
INCLUDE(ExternalProject)
SET(WARPCTC_SOURCES_DIR ${THIRD_PARTY_PATH}/warpctc)
SET(WARPCTC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/warpctc)
SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include" CACHE PATH "Warp-ctc Directory" FORCE)
INCLUDE_DIRECTORIES(${WARPCTC_INCLUDE_DIR})
SET(WARPCTC_LIB_DIR "${WARPCTC_INSTALL_DIR}/lib" CACHE PATH "Warp-ctc Library Directory" FORCE)
IF(WIN32)
SET(WARPCTC_LIBRARIES
"${WARPCTC_INSTALL_DIR}/lib/warpctc.dll" CACHE FILEPATH "Warp-ctc Library" FORCE)
ELSE(WIN32)
IF(APPLE)
SET(_warpctc_SHARED_SUFFIX dylib)
ELSE(APPLE)
SET(_warpctc_SHARED_SUFFIX so)
ENDIF(APPLE)
SET(WARPCTC_LIBRARIES
"${WARPCTC_INSTALL_DIR}/lib/libwarpctc.${_warpctc_SHARED_SUFFIX}" CACHE FILEPATH "Warp-ctc Library" FORCE)
ENDIF(WIN32)
IF(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" )
SET(USE_OMP OFF)
ELSE()
SET(USE_OMP ON)
ENDIF()
ExternalProject_Add(
warpctc
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/gangliao/warp-ctc.git"
PREFIX ${WARPCTC_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${WARPCTC_INSTALL_DIR}
CMAKE_ARGS -DWITH_GPU=${WITH_GPU}
CMAKE_ARGS -DWITH_OMP=${USE_OMP}
CMAKE_ARGS -DWITH_TORCH=OFF
CMAKE_ARGS -DCMAKE_DISABLE_FIND_PACKAGE_Torch=TRUE
CMAKE_ARGS -DBUILD_SHARED=ON
)
LIST(APPEND external_project_dependencies warpctc)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
INCLUDE(ExternalProject)
SET(ZLIB_SOURCES_DIR ${THIRD_PARTY_PATH}/zlib)
SET(ZLIB_INSTALL_DIR ${THIRD_PARTY_PATH}/install/zlib)
SET(ZLIB_ROOT ${ZLIB_INSTALL_DIR} CACHE FILEPATH "zlib root directory." FORCE)
SET(ZLIB_INCLUDE_DIR "${ZLIB_INSTALL_DIR}/include" CACHE PATH "zlib include directory." FORCE)
IF(WIN32)
SET(ZLIB_LIBRARIES "${ZLIB_INSTALL_DIR}/lib/zlibstatic.lib" CACHE FILEPATH "zlib library." FORCE)
ELSE(WIN32)
set(ZLIB_LIBRARIES "${ZLIB_INSTALL_DIR}/lib/libz.a" CACHE FILEPATH "zlib library." FORCE)
ENDIF(WIN32)
INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR})
ExternalProject_Add(
zlib
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/madler/zlib.git"
GIT_TAG "v1.2.8"
PREFIX ${ZLIB_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${ZLIB_INSTALL_DIR}
CMAKE_ARGS -DBUILD_SHARED_LIBS=OFF
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
CMAKE_ARGS -DCMAKE_MACOSX_RPATH=ON
)
LIST(APPEND external_project_dependencies zlib)
......@@ -3,12 +3,6 @@ include(CheckCXXCompilerFlag)
include(CheckCCompilerFlag)
include(CheckCXXSymbolExists)
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING
"Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
FORCE)
endif()
function(CheckCompilerCXX11Flag)
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.8)
......@@ -102,6 +96,7 @@ set(COMMON_FLAGS
-Wno-unused-parameter
-Wno-unused-function
-Wno-error=literal-suffix
-Wno-error=sign-compare
-Wno-error=unused-local-typedefs)
set(GPU_COMMON_FLAGS
......@@ -111,6 +106,7 @@ set(GPU_COMMON_FLAGS
-Wdelete-non-virtual-dtor
-Wno-unused-parameter
-Wno-unused-function
-Wno-error=sign-compare
-Wno-error=literal-suffix
-Wno-error=unused-local-typedefs
-Wno-error=unused-function # Warnings in Numpy Header.
......
......@@ -26,5 +26,18 @@ function(find_python_module module)
if(NOT PY_${module_upper}_FOUND AND ${module}_FIND_REQUIRED)
message(FATAL_ERROR "python module ${module} is not found")
endif()
execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
"import sys, ${module}; sys.stdout.write(${module}.__version__)"
OUTPUT_VARIABLE _${module}_version
RESULT_VARIABLE _${module}_status
ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT _${module}_status)
set(PY_${module_upper}_VERSION ${_${module}_version} CACHE STRING
"Version of Python module ${module}")
endif(NOT _${module}_status)
set(PY_${module_upper}_FOUND ${PY_${module_upper}_FOUND} PARENT_SCOPE)
set(PY_${module_upper}_VERSION ${PY_${module_upper}_VERSION} PARENT_SCOPE)
endfunction(find_python_module)
......@@ -5,72 +5,76 @@
# svn co https://svn.baidu.com/sys/ip/trunk/rdma/thirdparty rdma/
# we use static output in svn repositories to avoid implict bugs from not standard runtime env.
set(RDMA_ROOT $ENV{RDMA_ROOT} CACHE PATH "Folder contains RDMA sock library and thirdparty library")
if(WITH_RDMA)
set(RDMA_ROOT $ENV{RDMA_ROOT} CACHE PATH "Folder contains RDMA sock library and thirdparty library")
function(generate_rdma_links)
#redirect to current DIR to isolate the pollution from system runtime environment
#it can benifits unified control for different gcc environment.
#e.g, by default gcc48 did not refer /usr/lib64 which could contain low version
#runtime libraries that will crash process while loading it. That redirect trick
#can fix it.
execute_process(
COMMAND mkdir -p librdma
COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so.1
COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so
COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so.1
COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
)
endfunction(generate_rdma_links)
#check and set headers
find_path(RDMA_INC_SXISOCK sxi_sock.h PATHS ${RDMA_ROOT}/sockrdmav1/output/include)
find_path(RDMA_INC_XIO libxio.h PATHS ${RDMA_ROOT}/thirdparty/output/accelio)
find_path(RDMA_INC_EVENT event2 PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_path(RDMA_INC_NUMA numa.h PATHS ${RDMA_ROOT}/thirdparty/output/libnuma)
#check and set libs
find_library(RDMA_LIB_SXISOCK NAMES sxisock PATHS ${RDMA_ROOT}/sockrdmav1/output)
find_library(RDMA_LIB_XIO NAMES xio PATHS ${RDMA_ROOT}/thirdparty/output/accelio)
find_library(RDMA_LIB_EVENT NAMES event PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_library(RDMA_LIB_EVENT_CORE NAMES event_core PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_library(RDMA_LIB_EVENT_EXTRA NAMES event_extra PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_library(RDMA_LIB_EVENT_PTHREADS NAMES event_pthreads PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_library(RDMA_LIB_NUMA NAMES numa PATHS ${RDMA_ROOT}/thirdparty/output/libnuma)
if(
RDMA_INC_SXISOCK AND
RDMA_INC_XIO AND
RDMA_INC_EVENT AND
RDMA_INC_NUMA AND
RDMA_LIB_SXISOCK AND
RDMA_LIB_XIO AND
RDMA_LIB_EVENT AND
RDMA_LIB_EVENT_CORE AND
RDMA_LIB_EVENT_EXTRA AND
RDMA_LIB_EVENT_PTHREADS AND
RDMA_LIB_NUMA
function(generate_rdma_links)
#redirect to current DIR to isolate the pollution from system runtime environment
#it can benifits unified control for different gcc environment.
#e.g, by default gcc48 did not refer /usr/lib64 which could contain low version
#runtime libraries that will crash process while loading it. That redirect trick
#can fix it.
execute_process(
COMMAND mkdir -p librdma
COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so.1
COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so
COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so.1
COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
)
endfunction(generate_rdma_links)
set(RDMA_INC_DIR
${RDMA_INC_SXISOCK}
${RDMA_INC_XIO}
${RDMA_INC_EVENT}
${RDMA_INC_NUMA})
set(RDMA_LIBS
${RDMA_LIB_SXISOCK}
${RDMA_LIB_XIO}
${RDMA_LIB_EVENT}
${RDMA_LIB_EVENT_CORE}
${RDMA_LIB_EVENT_EXTRA}
${RDMA_LIB_EVENT_PTHREADS}
${RDMA_LIB_NUMA}
)
set(RDMA_LD_FLAGS "-L./librdma -libverbs -lrdmacm -Xlinker -rpath ./librdma")
return()
endif()
#check and set headers
find_path(RDMA_INC_SXISOCK sxi_sock.h PATHS ${RDMA_ROOT}/sockrdmav1/output/include)
find_path(RDMA_INC_XIO libxio.h PATHS ${RDMA_ROOT}/thirdparty/output/accelio)
find_path(RDMA_INC_EVENT event2 PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_path(RDMA_INC_NUMA numa.h PATHS ${RDMA_ROOT}/thirdparty/output/libnuma)
#check and set libs
find_library(RDMA_LIB_SXISOCK NAMES sxisock PATHS ${RDMA_ROOT}/sockrdmav1/output)
find_library(RDMA_LIB_XIO NAMES xio PATHS ${RDMA_ROOT}/thirdparty/output/accelio)
find_library(RDMA_LIB_EVENT NAMES event PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_library(RDMA_LIB_EVENT_CORE NAMES event_core PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_library(RDMA_LIB_EVENT_EXTRA NAMES event_extra PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_library(RDMA_LIB_EVENT_PTHREADS NAMES event_pthreads PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_library(RDMA_LIB_NUMA NAMES numa PATHS ${RDMA_ROOT}/thirdparty/output/libnuma)
#if this module is not called, RDMA_INC_DIR RDMA_LIBS will be null, so top module always refer this variable
if(
RDMA_INC_SXISOCK AND
RDMA_INC_XIO AND
RDMA_INC_EVENT AND
RDMA_INC_NUMA AND
RDMA_LIB_SXISOCK AND
RDMA_LIB_XIO AND
RDMA_LIB_EVENT AND
RDMA_LIB_EVENT_CORE AND
RDMA_LIB_EVENT_EXTRA AND
RDMA_LIB_EVENT_PTHREADS AND
RDMA_LIB_NUMA
)
message(FATAL_ERROR, "RDMA libraries are not found, try to set RDMA_ROOT or check all related libraries.")
set(RDMA_INC_DIR
${RDMA_INC_SXISOCK}
${RDMA_INC_XIO}
${RDMA_INC_EVENT}
${RDMA_INC_NUMA})
set(RDMA_LIBS
${RDMA_LIB_SXISOCK}
${RDMA_LIB_XIO}
${RDMA_LIB_EVENT}
${RDMA_LIB_EVENT_CORE}
${RDMA_LIB_EVENT_EXTRA}
${RDMA_LIB_EVENT_PTHREADS}
${RDMA_LIB_NUMA}
)
set(RDMA_LD_FLAGS "-L./librdma -libverbs -lrdmacm -Xlinker -rpath ./librdma")
include_directories("${RDMA_INC_DIR}")
else()
#if this module is not called, RDMA_INC_DIR RDMA_LIBS will be null, so top module always refer this variable
message(FATAL_ERROR, "RDMA libraries are not found, try to set RDMA_ROOT or check all related libraries.")
endif()
else(WITH_RDMA)
set(RDMA_LIBS "")
set(RDMA_LD_FLAGS "")
add_definitions(-DPADDLE_DISABLE_RDMA)
endif(WITH_RDMA)
function(generate_python_api target_name)
add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py
${PROJ_ROOT}/paddle/Paddle_wrap.cxx
${PROJ_ROOT}/paddle/Paddle_wrap.h
COMMAND swig -python -c++ -outcurrentdir -I../ api/Paddle.swig
&& mv ${PROJ_ROOT}/paddle/swig_paddle.py ${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py
DEPENDS ${PROJ_ROOT}/paddle/api/Paddle.swig
${PROJ_ROOT}/paddle/api/PaddleAPI.h
WORKING_DIRECTORY ${PROJ_ROOT}/paddle
COMMENT "Generate Python API from swig")
add_custom_target(${target_name} ALL DEPENDS
${PROJ_ROOT}/paddle/Paddle_wrap.cxx
${PROJ_ROOT}/paddle/Paddle_wrap.h
${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py)
endfunction(generate_python_api)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Detects the OS and sets appropriate variables.
# CMAKE_SYSTEM_NAME only give us a coarse-grained name,
# but the name like centos is necessary in some scenes
# to distinguish system for customization.
#
# for instance, protobuf libs path is <install_dir>/lib64
# on CentOS, but <install_dir>/lib on other systems.
IF(WIN32)
SET(HOST_SYSTEM "win32")
ELSE(WIN32)
IF(APPLE)
EXEC_PROGRAM (sw_vers ARGS -productVersion OUTPUT_VARIABLE MACOSX_VERSION)
STRING(REGEX MATCH "[0-9]+.[0-9]+" VERSION "${MACOSX_VERSION}")
SET(MACOS_VERSION ${VERSION})
SET(HOST_SYSTEM "macosx")
ELSE(APPLE)
IF(EXISTS "/etc/issue")
FILE(READ "/etc/issue" LINUX_ISSUE)
IF(LINUX_ISSUE MATCHES "CentOS")
SET(HOST_SYSTEM "centos")
ELSEIF(LINUX_ISSUE MATCHES "Debian")
SET(HOST_SYSTEM "debian")
ELSEIF(LINUX_ISSUE MATCHES "Ubuntu")
SET(HOST_SYSTEM "ubuntu")
ELSEIF(LINUX_ISSUE MATCHES "Red Hat")
SET(HOST_SYSTEM "redhat")
ELSEIF(LINUX_ISSUE MATCHES "Fedora")
SET(HOST_SYSTEM "fedora")
ENDIF()
ENDIF(EXISTS "/etc/issue")
IF(EXISTS "/etc/redhat-release")
FILE(READ "/etc/redhat-release" LINUX_ISSUE)
IF(LINUX_ISSUE MATCHES "CentOS")
SET(HOST_SYSTEM "centos")
ENDIF()
ENDIF(EXISTS "/etc/redhat-release")
IF(NOT HOST_SYSTEM)
SET(HOST_SYSTEM ${CMAKE_SYSTEM_NAME})
ENDIF()
ENDIF(APPLE)
ENDIF(WIN32)
# query number of logical cores
CMAKE_HOST_SYSTEM_INFORMATION(RESULT CPU_CORES QUERY NUMBER_OF_LOGICAL_CORES)
MARK_AS_ADVANCED(HOST_SYSTEM CPU_CORES)
MESSAGE(STATUS "Found Paddle host system: ${HOST_SYSTEM}")
MESSAGE(STATUS "Found Paddle host system's CPU: ${CPU_CORES} cores")
# external dependencies log output
SET(EXTERNAL_PROJECT_LOG_ARGS
LOG_DOWNLOAD 0 # Wrap download in script to log output
LOG_UPDATE 1 # Wrap update in script to log output
LOG_CONFIGURE 1 # Wrap configure in script to log output
LOG_BUILD 0 # Wrap build in script to log output
LOG_TEST 1 # Wrap test in script to log output
LOG_INSTALL 0 # Wrap install in script to log output
)
......@@ -24,7 +24,7 @@ function(target_circle_link_libraries TARGET_NAME)
list(APPEND libsInArgn ${arg})
endif()
endforeach()
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang")
list(APPEND LIBS "-undefined dynamic_lookup")
endif()
list(REVERSE libsInArgn)
......@@ -81,18 +81,6 @@ function(link_paddle_exe TARGET_NAME)
set(METRIC_LIBS "")
endif()
if(PADDLE_WITH_INTERNAL)
set(INTERAL_LIBS paddle_internal_gserver paddle_internal_parameter)
target_circle_link_libraries(${TARGET_NAME}
ARCHIVE_START
paddle_internal_gserver
paddle_internal_owlqn
ARCHIVE_END
paddle_internal_parameter)
else()
set(INTERAL_LIBS "")
endif()
target_circle_link_libraries(${TARGET_NAME}
ARCHIVE_START
paddle_gserver
......@@ -107,29 +95,20 @@ function(link_paddle_exe TARGET_NAME)
paddle_parameter
paddle_proto
paddle_cuda
paddle_test_main
${METRIC_LIBS}
${PROTOBUF_LIBRARY}
${LIBGLOG_LIBRARY}
${GFLAGS_LIBRARIES}
${EXTERNAL_LIBS}
${CMAKE_THREAD_LIBS_INIT}
${CBLAS_LIBS}
${ZLIB_LIBRARIES}
${INTERAL_LIBS}
${CMAKE_DL_LIBS})
if(WITH_RDMA)
target_link_libraries(${TARGET_NAME}
${RDMA_LD_FLAGS}
${RDMA_LIBS})
endif()
${CMAKE_DL_LIBS}
${RDMA_LD_FLAGS}
${RDMA_LIBS})
if(WITH_PYTHON)
target_link_libraries(${TARGET_NAME}
${PYTHON_LIBRARIES})
${PYTHON_LIBRARIES} util)
endif()
if(WITH_GPU)
target_link_libraries(${TARGET_NAME} ${CUDA_CUDART_LIBRARY})
if(NOT WITH_DSO OR WITH_METRIC)
target_link_libraries(${TARGET_NAME}
${CUDNN_LIBRARY}
......@@ -143,10 +122,7 @@ function(link_paddle_exe TARGET_NAME)
endif()
endif()
if(NOT WITH_DSO)
target_link_libraries(${TARGET_NAME}
${WARPCTC_LIBRARY})
endif()
add_dependencies(${TARGET_NAME} ${external_project_dependencies})
endfunction()
# link_paddle_test
......@@ -155,8 +131,10 @@ endfunction()
# Rest Arguemnts: not used.
function(link_paddle_test TARGET_NAME)
link_paddle_exe(${TARGET_NAME})
target_link_libraries(${TARGET_NAME} ${GTEST_MAIN_LIBRARIES}
${GTEST_LIBRARIES})
target_link_libraries(${TARGET_NAME}
paddle_test_main
paddle_test_util
${GTEST_LIBRARIES})
endfunction()
# add_unittest_without_exec
......
......@@ -21,4 +21,5 @@ while ("${PADDLE_VERSION}" STREQUAL "")
endif()
endwhile()
add_definitions(-DPADDLE_VERSION=${PADDLE_VERSION})
message(STATUS "Paddle version is ${PADDLE_VERSION}")
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2 as paddle
__all__ = ['resnet_cifar10']
def conv_bn_layer(input,
ch_out,
filter_size,
stride,
padding,
active_type=paddle.activation.Relu(),
ch_in=None):
tmp = paddle.layer.img_conv(
input=input,
filter_size=filter_size,
num_channels=ch_in,
num_filters=ch_out,
stride=stride,
padding=padding,
act=paddle.activation.Linear(),
bias_attr=False)
return paddle.layer.batch_norm(input=tmp, act=active_type)
def shortcut(ipt, n_in, n_out, stride):
if n_in != n_out:
return conv_bn_layer(ipt, n_out, 1, stride, 0,
paddle.activation.Linear())
else:
return ipt
def basicblock(ipt, ch_out, stride):
ch_in = ch_out * 2
tmp = conv_bn_layer(ipt, ch_out, 3, stride, 1)
tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, paddle.activation.Linear())
short = shortcut(ipt, ch_in, ch_out, stride)
return paddle.layer.addto(input=[tmp, short], act=paddle.activation.Relu())
def layer_warp(block_func, ipt, features, count, stride):
tmp = block_func(ipt, features, stride)
for i in range(1, count):
tmp = block_func(tmp, features, 1)
return tmp
def resnet_cifar10(ipt, depth=32):
# depth should be one of 20, 32, 44, 56, 110, 1202
assert (depth - 2) % 6 == 0
n = (depth - 2) / 6
nStages = {16, 64, 128}
conv1 = conv_bn_layer(
ipt, ch_in=3, ch_out=16, filter_size=3, stride=1, padding=1)
res1 = layer_warp(basicblock, conv1, 16, n, 1)
res2 = layer_warp(basicblock, res1, 32, n, 2)
res3 = layer_warp(basicblock, res2, 64, n, 2)
pool = paddle.layer.img_pool(
input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg())
return pool
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License
import sys
import paddle.v2 as paddle
from api_v2_vgg import vgg_bn_drop
def main():
datadim = 3 * 32 * 32
classdim = 10
# PaddlePaddle init
paddle.init(use_gpu=False, trainer_count=1)
image = paddle.layer.data(
name="image", type=paddle.data_type.dense_vector(datadim))
# Add neural network config
# option 1. resnet
# net = resnet_cifar10(image, depth=32)
# option 2. vgg
net = vgg_bn_drop(image)
out = paddle.layer.fc(input=net,
size=classdim,
act=paddle.activation.Softmax())
lbl = paddle.layer.data(
name="label", type=paddle.data_type.integer_value(classdim))
cost = paddle.layer.classification_cost(input=out, label=lbl)
# Create parameters
parameters = paddle.parameters.create(cost)
# Create optimizer
momentum_optimizer = paddle.optimizer.Momentum(
momentum=0.9,
regularization=paddle.optimizer.L2Regularization(rate=0.0002 * 128),
learning_rate=0.1 / 128.0,
learning_rate_decay_a=0.1,
learning_rate_decay_b=50000 * 100,
learning_rate_schedule='discexp',
batch_size=128)
# End batch and end pass event handler
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "\nPass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
else:
sys.stdout.write('.')
sys.stdout.flush()
if isinstance(event, paddle.event.EndPass):
result = trainer.test(
reader=paddle.batch(
paddle.dataset.cifar.test10(), batch_size=128),
feeding={'image': 0,
'label': 1})
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
# Create trainer
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=momentum_optimizer)
trainer.train(
reader=paddle.batch(
paddle.reader.shuffle(
paddle.dataset.cifar.train10(), buf_size=50000),
batch_size=128),
num_passes=5,
event_handler=event_handler,
feeding={'image': 0,
'label': 1})
if __name__ == '__main__':
main()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2 as paddle
__all__ = ['vgg_bn_drop']
def vgg_bn_drop(input):
def conv_block(ipt, num_filter, groups, dropouts, num_channels=None):
return paddle.networks.img_conv_group(
input=ipt,
num_channels=num_channels,
pool_size=2,
pool_stride=2,
conv_num_filter=[num_filter] * groups,
conv_filter_size=3,
conv_act=paddle.activation.Relu(),
conv_with_batchnorm=True,
conv_batchnorm_drop_rate=dropouts,
pool_type=paddle.pooling.Max())
conv1 = conv_block(input, 64, 2, [0.3, 0], 3)
conv2 = conv_block(conv1, 128, 2, [0.4, 0])
conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
drop = paddle.layer.dropout(input=conv5, dropout_rate=0.5)
fc1 = paddle.layer.fc(input=drop, size=512, act=paddle.activation.Linear())
bn = paddle.layer.batch_norm(
input=fc1,
act=paddle.activation.Relu(),
layer_attr=paddle.attr.Extra(drop_rate=0.5))
fc2 = paddle.layer.fc(input=bn, size=512, act=paddle.activation.Linear())
return fc2
......@@ -126,7 +126,7 @@ class ImageClassifier():
# For oversampling, average predictions across crops.
# If not, the shape of output[name]: (1, class_number),
# the mean is also applicable.
return output[output_layer].mean(0)
return output[output_layer]['value'].mean(0)
def predict(self, image=None, output_layer=None):
assert isinstance(image, basestring)
......
......@@ -27,5 +27,6 @@ paddle train \
--num_passes=300 \
--save_dir=$output \
2>&1 | tee $log
paddle usage -l $log -e $? -n "image_classification_train" >/dev/null 2>&1
python -m paddle.utils.plotcurve -i $log > plot.png
import paddle.v2 as paddle
import paddle.v2.dataset.uci_housing as uci_housing
def main():
# init
paddle.init(use_gpu=False, trainer_count=1)
# network config
x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13))
y_predict = paddle.layer.fc(input=x,
param_attr=paddle.attr.Param(name='w'),
size=1,
act=paddle.activation.Linear(),
bias_attr=paddle.attr.Param(name='b'))
y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1))
cost = paddle.layer.regression_cost(input=y_predict, label=y)
# create parameters
parameters = paddle.parameters.create(cost)
# create optimizer
optimizer = paddle.optimizer.Momentum(momentum=0)
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=optimizer)
# event_handler to print training and testing info
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "Pass %d, Batch %d, Cost %f" % (
event.pass_id, event.batch_id, event.cost)
if isinstance(event, paddle.event.EndPass):
if (event.pass_id + 1) % 10 == 0:
result = trainer.test(
reader=paddle.batch(
uci_housing.test(), batch_size=2),
feeding={'x': 0,
'y': 1})
print "Test %d, %.2f" % (event.pass_id, result.cost)
# training
trainer.train(
reader=paddle.batch(
paddle.reader.shuffle(
uci_housing.train(), buf_size=500),
batch_size=2),
feeding={'x': 0,
'y': 1},
event_handler=event_handler,
num_passes=30)
if __name__ == '__main__':
main()
......@@ -19,3 +19,4 @@ paddle train \
--save_dir=./output \
--num_passes=30 \
2>&1 |tee 'train.log'
paddle usage -l "train.log" -e $? -n "introduction" >/dev/null 2>&1
......@@ -5,3 +5,6 @@ plot.png
train.log
*pyc
.ipynb_checkpoints
params.pkl
params.tar
params.tar.gz
......@@ -6,33 +6,15 @@ passed to C++ side of Paddle.
The user api could be simpler and carefully designed.
"""
import py_paddle.swig_paddle as api
from py_paddle import DataProviderConverter
import paddle.trainer.PyDataProvider2 as dp
import numpy as np
import random
from mnist_util import read_from_mnist
from paddle.trainer_config_helpers import *
def optimizer_config():
settings(
learning_rate=1e-4,
learning_method=AdamOptimizer(),
batch_size=1000,
model_average=ModelAverage(average_window=0.5),
regularization=L2Regularization(rate=0.5))
import numpy as np
import paddle.v2 as paddle_v2
import py_paddle.swig_paddle as api
from paddle.trainer_config_helpers import *
from py_paddle import DataProviderConverter
def network_config():
imgs = data_layer(name='pixel', size=784)
hidden1 = fc_layer(input=imgs, size=200)
hidden2 = fc_layer(input=hidden1, size=200)
inference = fc_layer(input=hidden2, size=10, act=SoftmaxActivation())
cost = classification_cost(
input=inference, label=data_layer(
name='label', size=10))
outputs(cost)
from mnist_util import read_from_mnist
def init_parameter(network):
......@@ -75,19 +57,35 @@ def input_order_converter(generator):
def main():
api.initPaddle("-use_gpu=false", "-trainer_count=4") # use 4 cpu cores
# get enable_types for each optimizer.
# enable_types = [value, gradient, momentum, etc]
# For each optimizer(SGD, Adam), GradientMachine should enable different
# buffers.
opt_config_proto = parse_optimizer_config(optimizer_config)
opt_config = api.OptimizationConfig.createFromProto(opt_config_proto)
_temp_optimizer_ = api.ParameterOptimizer.create(opt_config)
enable_types = _temp_optimizer_.getParameterTypes()
optimizer = paddle_v2.optimizer.Adam(
learning_rate=1e-4,
batch_size=1000,
model_average=ModelAverage(average_window=0.5),
regularization=L2Regularization(rate=0.5))
# Create Local Updater. Local means not run in cluster.
# For a cluster training, here we can change to createRemoteUpdater
# in future.
updater = optimizer.create_local_updater()
assert isinstance(updater, api.ParameterUpdater)
# define network
images = paddle_v2.layer.data(
name='pixel', type=paddle_v2.data_type.dense_vector(784))
label = paddle_v2.layer.data(
name='label', type=paddle_v2.data_type.integer_value(10))
hidden1 = paddle_v2.layer.fc(input=images, size=200)
hidden2 = paddle_v2.layer.fc(input=hidden1, size=200)
inference = paddle_v2.layer.fc(input=hidden2,
size=10,
act=paddle_v2.activation.Softmax())
cost = paddle_v2.layer.classification_cost(input=inference, label=label)
# Create Simple Gradient Machine.
model_config = parse_network_config(network_config)
m = api.GradientMachine.createFromConfigProto(
model_config, api.CREATE_MODE_NORMAL, enable_types)
model_config = paddle_v2.layer.parse_network(cost)
m = api.GradientMachine.createFromConfigProto(model_config,
api.CREATE_MODE_NORMAL,
optimizer.enable_types())
# This type check is not useful. Only enable type hint in IDE.
# Such as PyCharm
......@@ -96,19 +94,12 @@ def main():
# Initialize Parameter by numpy.
init_parameter(network=m)
# Create Local Updater. Local means not run in cluster.
# For a cluster training, here we can change to createRemoteUpdater
# in future.
updater = api.ParameterUpdater.createLocalUpdater(opt_config)
assert isinstance(updater, api.ParameterUpdater)
# Initialize ParameterUpdater.
updater.init(m)
# DataProvider Converter is a utility convert Python Object to Paddle C++
# Input. The input format is as same as Paddle's DataProvider.
converter = DataProviderConverter(
input_types=[dp.dense_vector(784), dp.integer_value(10)])
converter = DataProviderConverter(input_types=[images.type, label.type])
train_file = './data/raw_data/train'
test_file = './data/raw_data/t10k'
......
import paddle.v2 as paddle
import gzip
def softmax_regression(img):
predict = paddle.layer.fc(input=img,
size=10,
act=paddle.activation.Softmax())
return predict
def multilayer_perceptron(img):
# The first fully-connected layer
hidden1 = paddle.layer.fc(input=img, size=128, act=paddle.activation.Relu())
# The second fully-connected layer and the according activation function
hidden2 = paddle.layer.fc(input=hidden1,
size=64,
act=paddle.activation.Relu())
# The thrid fully-connected layer, note that the hidden size should be 10,
# which is the number of unique digits
predict = paddle.layer.fc(input=hidden2,
size=10,
act=paddle.activation.Softmax())
return predict
def convolutional_neural_network(img):
# first conv layer
conv_pool_1 = paddle.networks.simple_img_conv_pool(
input=img,
filter_size=5,
num_filters=20,
num_channel=1,
pool_size=2,
pool_stride=2,
act=paddle.activation.Tanh())
# second conv layer
conv_pool_2 = paddle.networks.simple_img_conv_pool(
input=conv_pool_1,
filter_size=5,
num_filters=50,
num_channel=20,
pool_size=2,
pool_stride=2,
act=paddle.activation.Tanh())
# The first fully-connected layer
fc1 = paddle.layer.fc(input=conv_pool_2,
size=128,
act=paddle.activation.Tanh())
# The softmax layer, note that the hidden size should be 10,
# which is the number of unique digits
predict = paddle.layer.fc(input=fc1,
size=10,
act=paddle.activation.Softmax())
return predict
def main():
paddle.init(use_gpu=False, trainer_count=1)
# define network topology
images = paddle.layer.data(
name='pixel', type=paddle.data_type.dense_vector(784))
label = paddle.layer.data(
name='label', type=paddle.data_type.integer_value(10))
# Here we can build the prediction network in different ways. Please
# choose one by uncomment corresponding line.
predict = softmax_regression(images)
#predict = multilayer_perceptron(images)
#predict = convolutional_neural_network(images)
cost = paddle.layer.classification_cost(input=predict, label=label)
try:
with gzip.open('params.tar.gz', 'r') as f:
parameters = paddle.parameters.Parameters.from_tar(f)
except IOError:
parameters = paddle.parameters.create(cost)
optimizer = paddle.optimizer.Momentum(
learning_rate=0.1 / 128.0,
momentum=0.9,
regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128))
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=optimizer)
lists = []
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 1000 == 0:
result = trainer.test(reader=paddle.batch(
paddle.dataset.mnist.test(), batch_size=256))
print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics,
result.metrics)
with gzip.open('params.tar.gz', 'w') as f:
parameters.to_tar(f)
elif isinstance(event, paddle.event.EndPass):
result = trainer.test(reader=paddle.batch(
paddle.dataset.mnist.test(), batch_size=128))
print "Test with Pass %d, Cost %f, %s\n" % (
event.pass_id, result.cost, result.metrics)
lists.append((event.pass_id, result.cost,
result.metrics['classification_error_evaluator']))
trainer.train(
reader=paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=8192),
batch_size=128),
event_handler=event_handler,
num_passes=100)
# find the best pass
best = sorted(lists, key=lambda list: float(list[1]))[0]
print 'Best pass is %s, testing Avgcost is %s' % (best[0], best[1])
print 'The classification accuracy is %.2f%%' % (100 - float(best[2]) * 100)
# output is a softmax layer. It returns probabilities.
# Shape should be (100, 10)
probs = paddle.infer(
output=predict,
parameters=parameters,
reader=paddle.batch(
paddle.reader.firstn(
paddle.reader.map_readers(lambda item: (item[0], ),
paddle.dataset.mnist.test()),
n=100),
batch_size=32))
print probs.shape
if __name__ == '__main__':
main()
......@@ -27,5 +27,6 @@ paddle train \
--num_passes=100 \
--save_dir=$output \
2>&1 | tee $log
paddle usage -l $log -e $? -n "mnist_train" >/dev/null 2>&1
python -m paddle.utils.plotcurve -i $log > plot.png
......@@ -14,9 +14,19 @@
# limitations under the License.
set -e
set -x
BASE_URL='http://paddlepaddle.cdn.bcebos.com/model_zoo/embedding'
# download the dictionary and pretrained model
for file in baidu.dict model_32.emb model_64.emb model_128.emb model_256.emb
do
wget http://paddlepaddle.bj.bcebos.com/model_zoo/embedding/$file
DOWNLOAD_ITEMS=(baidu.dict model_32.emb model_64.emb model_128.emb model_256.emb)
ITEM_MD5=(fa03a12321eaab6c30a8fcc9442eaea3
f88c8325ee6da6187f1080e8fe66c1cd
927cf70f27f860aff1a5703ebf7f1584
a52e43655cd25d279777ed509a1ae27b
b92c67fe9ff70fea53596080e351ac80)
for ((i=0; i<${#ITEM_MD5[@]}; i++))
do
FILENAME=${DOWNLOAD_ITEMS[${i}]}
REAL_MD5=`wget ${BASE_URL}/${FILENAME} -O - | tee ${FILENAME} | md5sum | cut -d ' ' -f 1`
EXPECTED_MD5=${ITEM_MD5[${i}]}
[ "${EXPECTED_MD5}" = "${REAL_MD5}" ]
done
......@@ -156,7 +156,7 @@ class ImageClassifier():
# For oversampling, average predictions across crops.
# If not, the shape of output[name]: (1, class_number),
# the mean is also applicable.
res[name] = output[name].mean(0)
res[name] = output[name]['value'].mean(0)
return res
......
......@@ -25,6 +25,7 @@ log_file="$bin_dir/train.log"
pushd "$home_dir"
cfg=trainer_config.lr.py
paddle train \
--start_pserver=false \
--config=$cfg \
--save_dir=${model_dir} \
--trainer_count=4 \
......
......@@ -26,5 +26,7 @@ paddle train \
--init_model_path=$model \
--config_args=is_predict=1 \
--predict_output_dir=. \
2>&1 | tee 'predict.log'
paddle usage -l 'predict.log' -e $? -n "quick_start_predict_${cfg}" >/dev/null 2>&1
mv rank-00000 result.txt
......@@ -31,3 +31,4 @@ paddle train \
--show_parameter_stats_period=100 \
--test_all_data_in_one_period=1 \
2>&1 | tee 'train.log'
paddle usage -l "train.log" -e $? -n "quick_start_${cfg}" >/dev/null 2>&1
#!/usr/bin/python
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import re
import math
def get_best_pass(log_filename):
with open(log_filename, 'r') as f:
text = f.read()
pattern = re.compile('Test.*? cost=([0-9]+\.[0-9]+).*?pass-([0-9]+)',
re.S)
results = re.findall(pattern, text)
sorted_results = sorted(results, key=lambda result: float(result[0]))
return sorted_results[0]
log_filename = sys.argv[1]
log = get_best_pass(log_filename)
predict_error = math.sqrt(float(log[0])) / 2
print 'Best pass is %s, error is %s, which means predict get error as %f' % (
log[1], log[0], predict_error)
evaluate_pass = "output/pass-%s" % log[1]
print "evaluating from pass %s" % evaluate_pass
......@@ -22,3 +22,4 @@ paddle train \
--log_period=100 \
--dot_period=1 \
--num_passes=50 2>&1 | tee 'log.txt'
paddle usage -l log.txt -e $? -n "recommendation" >/dev/null 2>&1
import sys
import math
import numpy as np
import paddle.v2 as paddle
import paddle.v2.dataset.conll05 as conll05
def db_lstm():
word_dict, verb_dict, label_dict = conll05.get_dict()
word_dict_len = len(word_dict)
label_dict_len = len(label_dict)
pred_len = len(verb_dict)
mark_dict_len = 2
word_dim = 32
mark_dim = 5
hidden_dim = 512
depth = 8
#8 features
def d_type(size):
return paddle.data_type.integer_value_sequence(size)
word = paddle.layer.data(name='word_data', type=d_type(word_dict_len))
predicate = paddle.layer.data(name='verb_data', type=d_type(pred_len))
ctx_n2 = paddle.layer.data(name='ctx_n2_data', type=d_type(word_dict_len))
ctx_n1 = paddle.layer.data(name='ctx_n1_data', type=d_type(word_dict_len))
ctx_0 = paddle.layer.data(name='ctx_0_data', type=d_type(word_dict_len))
ctx_p1 = paddle.layer.data(name='ctx_p1_data', type=d_type(word_dict_len))
ctx_p2 = paddle.layer.data(name='ctx_p2_data', type=d_type(word_dict_len))
mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_len))
target = paddle.layer.data(name='target', type=d_type(label_dict_len))
default_std = 1 / math.sqrt(hidden_dim) / 3.0
emb_para = paddle.attr.Param(name='emb', initial_std=0., learning_rate=0.)
std_0 = paddle.attr.Param(initial_std=0.)
std_default = paddle.attr.Param(initial_std=default_std)
predicate_embedding = paddle.layer.embedding(
size=word_dim,
input=predicate,
param_attr=paddle.attr.Param(
name='vemb', initial_std=default_std))
mark_embedding = paddle.layer.embedding(
size=mark_dim, input=mark, param_attr=std_0)
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
emb_layers = [
paddle.layer.embedding(
size=word_dim, input=x, param_attr=emb_para) for x in word_input
]
emb_layers.append(predicate_embedding)
emb_layers.append(mark_embedding)
hidden_0 = paddle.layer.mixed(
size=hidden_dim,
bias_attr=std_default,
input=[
paddle.layer.full_matrix_projection(
input=emb, param_attr=std_default) for emb in emb_layers
])
mix_hidden_lr = 1e-3
lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0)
hidden_para_attr = paddle.attr.Param(
initial_std=default_std, learning_rate=mix_hidden_lr)
lstm_0 = paddle.layer.lstmemory(
input=hidden_0,
act=paddle.activation.Relu(),
gate_act=paddle.activation.Sigmoid(),
state_act=paddle.activation.Sigmoid(),
bias_attr=std_0,
param_attr=lstm_para_attr)
#stack L-LSTM and R-LSTM with direct edges
input_tmp = [hidden_0, lstm_0]
for i in range(1, depth):
mix_hidden = paddle.layer.mixed(
size=hidden_dim,
bias_attr=std_default,
input=[
paddle.layer.full_matrix_projection(
input=input_tmp[0], param_attr=hidden_para_attr),
paddle.layer.full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
])
lstm = paddle.layer.lstmemory(
input=mix_hidden,
act=paddle.activation.Relu(),
gate_act=paddle.activation.Sigmoid(),
state_act=paddle.activation.Sigmoid(),
reverse=((i % 2) == 1),
bias_attr=std_0,
param_attr=lstm_para_attr)
input_tmp = [mix_hidden, lstm]
feature_out = paddle.layer.mixed(
size=label_dict_len,
bias_attr=std_default,
input=[
paddle.layer.full_matrix_projection(
input=input_tmp[0], param_attr=hidden_para_attr),
paddle.layer.full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
], )
crf_cost = paddle.layer.crf(size=label_dict_len,
input=feature_out,
label=target,
param_attr=paddle.attr.Param(
name='crfw',
initial_std=default_std,
learning_rate=mix_hidden_lr))
crf_dec = paddle.layer.crf_decoding(
name='crf_dec_l',
size=label_dict_len,
input=feature_out,
label=target,
param_attr=paddle.attr.Param(name='crfw'))
return crf_cost, crf_dec
def load_parameter(file_name, h, w):
with open(file_name, 'rb') as f:
f.read(16) # skip header.
return np.fromfile(f, dtype=np.float32).reshape(h, w)
def main():
paddle.init(use_gpu=False, trainer_count=1)
# define network topology
crf_cost, crf_dec = db_lstm()
# create parameters
parameters = paddle.parameters.create([crf_cost, crf_dec])
# create optimizer
optimizer = paddle.optimizer.Momentum(
momentum=0,
learning_rate=2e-2,
regularization=paddle.optimizer.L2Regularization(rate=8e-4),
model_average=paddle.optimizer.ModelAverage(
average_window=0.5, max_average_window=10000), )
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
trainer = paddle.trainer.SGD(cost=crf_cost,
parameters=parameters,
update_equation=optimizer)
parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32))
trn_reader = paddle.batch(
paddle.reader.shuffle(
conll05.test(), buf_size=8192), batch_size=10)
feeding = {
'word_data': 0,
'ctx_n2_data': 1,
'ctx_n1_data': 2,
'ctx_0_data': 3,
'ctx_p1_data': 4,
'ctx_p2_data': 5,
'verb_data': 6,
'mark_data': 7,
'target': 8
}
trainer.train(
reader=trn_reader,
event_handler=event_handler,
num_passes=10000,
feeding=feeding)
if __name__ == '__main__':
main()
......@@ -38,3 +38,4 @@ paddle train \
--config_args=is_test=1 \
--test_all_data_in_one_period=1 \
2>&1 | tee 'test.log'
paddle usage -l test.log -e $? -n "semantic_role_labeling_test" >/dev/null 2>&1
......@@ -27,3 +27,4 @@ paddle train \
--load_missing_parameter_strategy=rand \
--test_all_data_in_one_period=1 \
2>&1 | tee 'train.log'
paddle usage -l train.log -e $? -n "semantic_role_labeling_train" >/dev/null 2>&1
......@@ -32,4 +32,6 @@ def process(settings, file_name):
word_slot = [
settings.word_dict[w] for w in words if w in settings.word_dict
]
if not word_slot:
continue
yield word_slot, label
......@@ -138,7 +138,11 @@ def main():
batch = []
for line in sys.stdin:
batch.append([predict.get_index(line)])
words = predict.get_index(line)
if words:
batch.append([words])
else:
print('All the words in [%s] are not in the dictionary.' % line)
if len(batch) == batch_size:
predict.batch_predict(batch)
batch = []
......
......@@ -37,3 +37,4 @@ paddle train --config=$net_conf \
--trainer_count=4 \
--config_args=is_test=1 \
2>&1 | tee 'test.log'
paddle usage -l test.log -e $? -n "sentiment_test" >/dev/null 2>&1
......@@ -27,3 +27,4 @@ paddle train --config=$config \
--show_parameter_stats_period=100 \
--test_all_data_in_one_period=1 \
2>&1 | tee 'train.log'
paddle usage -l train.log -e $? -n "sentiment_train" >/dev/null 2>&1
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import paddle.trainer_config_helpers.attrs as attrs
from paddle.trainer_config_helpers.poolings import MaxPooling
import paddle.v2 as paddle
def convolution_net(input_dim, class_dim=2, emb_dim=128, hid_dim=128):
data = paddle.layer.data("word",
paddle.data_type.integer_value_sequence(input_dim))
emb = paddle.layer.embedding(input=data, size=emb_dim)
conv_3 = paddle.networks.sequence_conv_pool(
input=emb, context_len=3, hidden_size=hid_dim)
conv_4 = paddle.networks.sequence_conv_pool(
input=emb, context_len=4, hidden_size=hid_dim)
output = paddle.layer.fc(input=[conv_3, conv_4],
size=class_dim,
act=paddle.activation.Softmax())
lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
cost = paddle.layer.classification_cost(input=output, label=lbl)
return cost
def stacked_lstm_net(input_dim,
class_dim=2,
emb_dim=128,
hid_dim=512,
stacked_num=3):
"""
A Wrapper for sentiment classification task.
This network uses bi-directional recurrent network,
consisting three LSTM layers. This configure is referred to
the paper as following url, but use fewer layrs.
http://www.aclweb.org/anthology/P15-1109
input_dim: here is word dictionary dimension.
class_dim: number of categories.
emb_dim: dimension of word embedding.
hid_dim: dimension of hidden layer.
stacked_num: number of stacked lstm-hidden layer.
is_predict: is predicting or not.
Some layers is not needed in network when predicting.
"""
assert stacked_num % 2 == 1
layer_attr = attrs.ExtraLayerAttribute(drop_rate=0.5)
fc_para_attr = attrs.ParameterAttribute(learning_rate=1e-3)
lstm_para_attr = attrs.ParameterAttribute(initial_std=0., learning_rate=1.)
para_attr = [fc_para_attr, lstm_para_attr]
bias_attr = attrs.ParameterAttribute(initial_std=0., l2_rate=0.)
relu = paddle.activation.Relu()
linear = paddle.activation.Linear()
data = paddle.layer.data("word",
paddle.data_type.integer_value_sequence(input_dim))
emb = paddle.layer.embedding(input=data, size=emb_dim)
fc1 = paddle.layer.fc(input=emb,
size=hid_dim,
act=linear,
bias_attr=bias_attr)
lstm1 = paddle.layer.lstmemory(
input=fc1, act=relu, bias_attr=bias_attr, layer_attr=layer_attr)
inputs = [fc1, lstm1]
for i in range(2, stacked_num + 1):
fc = paddle.layer.fc(input=inputs,
size=hid_dim,
act=linear,
param_attr=para_attr,
bias_attr=bias_attr)
lstm = paddle.layer.lstmemory(
input=fc,
reverse=(i % 2) == 0,
act=relu,
bias_attr=bias_attr,
layer_attr=layer_attr)
inputs = [fc, lstm]
fc_last = paddle.layer.pooling(input=inputs[0], pooling_type=MaxPooling())
lstm_last = paddle.layer.pooling(input=inputs[1], pooling_type=MaxPooling())
output = paddle.layer.fc(input=[fc_last, lstm_last],
size=class_dim,
act=paddle.activation.Softmax(),
bias_attr=bias_attr,
param_attr=para_attr)
lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
cost = paddle.layer.classification_cost(input=output, label=lbl)
return cost
if __name__ == '__main__':
# init
paddle.init(use_gpu=False, trainer_count=4)
# network config
print 'load dictionary...'
word_dict = paddle.dataset.imdb.word_dict()
dict_dim = len(word_dict)
class_dim = 2
# Please choose the way to build the network
# by uncommenting the corresponding line.
cost = convolution_net(dict_dim, class_dim=class_dim)
# cost = stacked_lstm_net(dict_dim, class_dim=class_dim, stacked_num=3)
# create parameters
parameters = paddle.parameters.create(cost)
# create optimizer
adam_optimizer = paddle.optimizer.Adam(
learning_rate=2e-3,
regularization=paddle.optimizer.L2Regularization(rate=8e-4),
model_average=paddle.optimizer.ModelAverage(average_window=0.5))
# End batch and end pass event handler
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "\nPass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
else:
sys.stdout.write('.')
sys.stdout.flush()
if isinstance(event, paddle.event.EndPass):
result = trainer.test(
reader=paddle.batch(
lambda: paddle.dataset.imdb.test(word_dict),
batch_size=128),
feeding={'word': 0,
'label': 1})
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
# create trainer
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=adam_optimizer)
trainer.train(
reader=paddle.batch(
paddle.reader.shuffle(
lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000),
batch_size=100),
event_handler=event_handler,
feeding={'word': 0,
'label': 1},
num_passes=10)
import paddle.v2 as paddle
def seqToseq_net(source_dict_dim, target_dict_dim):
### Network Architecture
word_vector_dim = 512 # dimension of word vector
decoder_size = 512 # dimension of hidden unit in GRU Decoder network
encoder_size = 512 # dimension of hidden unit in GRU Encoder network
#### Encoder
src_word_id = paddle.layer.data(
name='source_language_word',
type=paddle.data_type.integer_value_sequence(source_dict_dim))
src_embedding = paddle.layer.embedding(
input=src_word_id,
size=word_vector_dim,
param_attr=paddle.attr.ParamAttr(name='_source_language_embedding'))
src_forward = paddle.networks.simple_gru(
input=src_embedding, size=encoder_size)
src_backward = paddle.networks.simple_gru(
input=src_embedding, size=encoder_size, reverse=True)
encoded_vector = paddle.layer.concat(input=[src_forward, src_backward])
#### Decoder
with paddle.layer.mixed(size=decoder_size) as encoded_proj:
encoded_proj += paddle.layer.full_matrix_projection(
input=encoded_vector)
backward_first = paddle.layer.first_seq(input=src_backward)
with paddle.layer.mixed(
size=decoder_size, act=paddle.activation.Tanh()) as decoder_boot:
decoder_boot += paddle.layer.full_matrix_projection(
input=backward_first)
def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
decoder_mem = paddle.layer.memory(
name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
context = paddle.networks.simple_attention(
encoded_sequence=enc_vec,
encoded_proj=enc_proj,
decoder_state=decoder_mem)
with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs:
decoder_inputs += paddle.layer.full_matrix_projection(input=context)
decoder_inputs += paddle.layer.full_matrix_projection(
input=current_word)
gru_step = paddle.layer.gru_step(
name='gru_decoder',
input=decoder_inputs,
output_mem=decoder_mem,
size=decoder_size)
with paddle.layer.mixed(
size=target_dict_dim,
bias_attr=True,
act=paddle.activation.Softmax()) as out:
out += paddle.layer.full_matrix_projection(input=gru_step)
return out
decoder_group_name = "decoder_group"
group_input1 = paddle.layer.StaticInputV2(input=encoded_vector, is_seq=True)
group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True)
group_inputs = [group_input1, group_input2]
trg_embedding = paddle.layer.embedding(
input=paddle.layer.data(
name='target_language_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim)),
size=word_vector_dim,
param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding)
# For decoder equipped with attention mechanism, in training,
# target embeding (the groudtruth) is the data input,
# while encoded source sequence is accessed to as an unbounded memory.
# Here, the StaticInput defines a read-only memory
# for the recurrent_group.
decoder = paddle.layer.recurrent_group(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs)
lbl = paddle.layer.data(
name='target_language_next_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim))
cost = paddle.layer.classification_cost(input=decoder, label=lbl)
return cost
def main():
paddle.init(use_gpu=False, trainer_count=1)
# source and target dict dim.
dict_size = 30000
source_dict_dim = target_dict_dim = dict_size
# define network topology
cost = seqToseq_net(source_dict_dim, target_dict_dim)
parameters = paddle.parameters.create(cost)
# define optimize method and trainer
optimizer = paddle.optimizer.Adam(learning_rate=1e-4)
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=optimizer)
# define data reader
feeding = {
'source_language_word': 0,
'target_language_word': 1,
'target_language_next_word': 2
}
wmt14_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.wmt14.train(dict_size=dict_size), buf_size=8192),
batch_size=5)
# define event_handler callback
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 10 == 0:
print "Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
# start to train
trainer.train(
reader=wmt14_reader,
event_handler=event_handler,
num_passes=10000,
feeding=feeding)
if __name__ == '__main__':
main()
......@@ -27,3 +27,4 @@ paddle train \
--log_period=10 \
--dot_period=5 \
2>&1 | tee 'paraphrase/train.log'
paddle usage -l 'paraphrase/train.log' -e $? -n "seqToseq_paraphrase_train" >/dev/null 2>&1
......@@ -24,3 +24,4 @@ paddle train \
--test_pass=12 \
--trainer_count=1 \
2>&1 | tee 'translation/gen.log'
paddle usage -l 'translation/gen.log' -e $? -n "seqToseq_translation_gen" >/dev/null 2>&1
......@@ -25,3 +25,4 @@ paddle train \
--log_period=10 \
--dot_period=5 \
2>&1 | tee 'translation/train.log'
paddle usage -l 'translation/train.log' -e $? -n "seqToseq_translation_train" >/dev/null 2>&1
......@@ -7,4 +7,6 @@ paddle train \
--dot_period=10 \
--log_period=1000 \
--test_period=0 \
--num_passes=10
--num_passes=10 \
2>&1 | tee 'train.log'
paddle usage -l 'train.log' -e $? -n "sequence_tagging_train" >/dev/null 2>&1
......@@ -7,3 +7,5 @@ paddle train \
--log_period=10000 \
--test_period=0 \
--num_passes=10
2>&1 | tee 'train_linear.log'
paddle usage -l 'train_linear.log' -e $? -n "sequence_tagging_train_linear" >/dev/null 2>&1
run by:
cd ./data
sh get_data.sh
cd ..
sh train.sh
sh predict.sh
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
set -x
DIR="$( cd "$(dirname "$0")" ; pwd -P )"
cd $DIR
#download the dataset
echo "Downloading traffic data..."
wget http://paddlepaddle.cdn.bcebos.com/demo/traffic/traffic_data.tar.gz
#extract package
echo "Unzipping..."
tar -zxvf traffic_data.tar.gz
echo "data/speeds.csv" > train.list
echo "data/speeds.csv" > test.list
echo "data/speeds.csv" > pred.list
echo "Done."
# Copyright (c) 2016 PaddlePaddle Authors, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.PyDataProvider2 import *
import sys
import numpy as np
TERM_NUM = 24
FORECASTING_NUM = 24
LABEL_VALUE_NUM = 4
def initHook(settings, file_list, **kwargs):
"""
Init hook is invoked before process data. It will set obj.slots and store data meta.
:param settings: global object. It will passed to process routine.
:type obj: object
:param file_list: the meta file object, which passed from trainer_config.py,but unused in this function.
:param kwargs: unused other arguments.
"""
del kwargs #unused
settings.pool_size = sys.maxint
#Use a time seires of the past as feature.
#Dense_vector's expression form is [float,float,...,float]
settings.input_types = [dense_vector(TERM_NUM)]
#There are next FORECASTING_NUM fragments you need predict.
#Every predicted condition at time point has four states.
for i in range(FORECASTING_NUM):
settings.input_types.append(integer_value(LABEL_VALUE_NUM))
@provider(
init_hook=initHook, cache=CacheType.CACHE_PASS_IN_MEM, should_shuffle=True)
def process(settings, file_name):
with open(file_name) as f:
#abandon fields name
f.next()
for row_num, line in enumerate(f):
speeds = map(int, line.rstrip('\r\n').split(",")[1:])
# Get the max index.
end_time = len(speeds)
# Scanning and generating samples
for i in range(TERM_NUM, end_time - FORECASTING_NUM):
# For dense slot
pre_spd = map(float, speeds[i - TERM_NUM:i])
# Integer value need predicting, values start from 0, so every one minus 1.
fol_spd = [j - 1 for j in speeds[i:i + FORECASTING_NUM]]
# Predicting label is missing, abandon the sample.
if -1 in fol_spd:
continue
yield [pre_spd] + fol_spd
def predict_initHook(settings, file_list, **kwargs):
settings.pool_size = sys.maxint
settings.input_types = [dense_vector(TERM_NUM)]
@provider(init_hook=predict_initHook, should_shuffle=False)
def process_predict(settings, file_name):
with open(file_name) as f:
#abandon fields name
f.next()
for row_num, line in enumerate(f):
speeds = map(int, line.rstrip('\r\n').split(","))
end_time = len(speeds)
pre_spd = map(float, speeds[end_time - TERM_NUM:end_time])
yield pre_spd
# Copyright (c) 2016 PaddlePaddle Authors, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
res = []
with open('./rank-00000') as f:
for line in f:
pred = map(int, line.strip('\r\n;').split(";"))
#raw prediction range from 0 to 3
res.append([i + 1 for i in pred])
file_name = open('./data/pred.list').read().strip('\r\n')
FORECASTING_NUM = 24
header = [
'id',
'201604200805',
'201604200810',
'201604200815',
'201604200820',
'201604200825',
'201604200830',
'201604200835',
'201604200840',
'201604200845',
'201604200850',
'201604200855',
'201604200900',
'201604200905',
'201604200910',
'201604200915',
'201604200920',
'201604200925',
'201604200930',
'201604200935',
'201604200940',
'201604200945',
'201604200950',
'201604200955',
'201604201000',
]
###################
## To CSV format ##
###################
with open(file_name) as f:
f.next()
print ','.join(header)
for row_num, line in enumerate(f):
fields = line.rstrip('\r\n').split(',')
linkid = fields[0]
print linkid + ',' + ','.join(map(str, res[row_num]))
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
cfg=trainer_config.py
# pass choice
model="output/pass-00000"
paddle train \
--config=$cfg \
--use_gpu=false \
--job=test \
--init_model_path=$model \
--config_args=is_predict=1 \
--predict_output_dir=.
python gen_result.py > result.csv
rm -rf rank-00000
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
cfg=trainer_config.py
paddle train \
--config=$cfg \
--save_dir=./output \
--trainer_count=4 \
--log_period=1000 \
--dot_period=10 \
--num_passes=10 \
--use_gpu=false \
--show_parameter_stats_period=3000 \
2>&1 | tee 'train.log'
# Copyright (c) 2016 PaddlePaddle Authors, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
################################### DATA Configuration #############################################
is_predict = get_config_arg('is_predict', bool, False)
trn = './data/train.list' if not is_predict else None
tst = './data/test.list' if not is_predict else './data/pred.list'
process = 'process' if not is_predict else 'process_predict'
define_py_data_sources2(
train_list=trn, test_list=tst, module="dataprovider", obj=process)
################################### Parameter Configuaration #######################################
TERM_NUM = 24
FORECASTING_NUM = 24
emb_size = 16
batch_size = 128 if not is_predict else 1
settings(
batch_size=batch_size,
learning_rate=1e-3,
learning_method=RMSPropOptimizer())
################################### Algorithm Configuration ########################################
output_label = []
link_encode = data_layer(name='link_encode', size=TERM_NUM)
for i in xrange(FORECASTING_NUM):
# Each task share same weight.
link_param = ParamAttr(
name='_link_vec.w', initial_max=1.0, initial_min=-1.0)
link_vec = fc_layer(input=link_encode, size=emb_size, param_attr=link_param)
score = fc_layer(input=link_vec, size=4, act=SoftmaxActivation())
if is_predict:
maxid = maxid_layer(score)
output_label.append(maxid)
else:
# Multi-task training.
label = data_layer(name='label_%dmin' % ((i + 1) * 5), size=4)
cls = classification_cost(
input=score, name="cost_%dmin" % ((i + 1) * 5), label=label)
output_label.append(cls)
outputs(output_label)
import math
import paddle.v2 as paddle
dictsize = 1953
embsize = 32
hiddensize = 256
N = 5
def wordemb(inlayer):
wordemb = paddle.layer.table_projection(
input=inlayer,
size=embsize,
param_attr=paddle.attr.Param(
name="_proj",
initial_std=0.001,
learning_rate=1,
l2_rate=0, ))
return wordemb
def main():
paddle.init(use_gpu=False, trainer_count=1)
word_dict = paddle.dataset.imikolov.build_dict()
dict_size = len(word_dict)
firstword = paddle.layer.data(
name="firstw", type=paddle.data_type.integer_value(dict_size))
secondword = paddle.layer.data(
name="secondw", type=paddle.data_type.integer_value(dict_size))
thirdword = paddle.layer.data(
name="thirdw", type=paddle.data_type.integer_value(dict_size))
fourthword = paddle.layer.data(
name="fourthw", type=paddle.data_type.integer_value(dict_size))
nextword = paddle.layer.data(
name="fifthw", type=paddle.data_type.integer_value(dict_size))
Efirst = wordemb(firstword)
Esecond = wordemb(secondword)
Ethird = wordemb(thirdword)
Efourth = wordemb(fourthword)
contextemb = paddle.layer.concat(input=[Efirst, Esecond, Ethird, Efourth])
hidden1 = paddle.layer.fc(input=contextemb,
size=hiddensize,
act=paddle.activation.Sigmoid(),
layer_attr=paddle.attr.Extra(drop_rate=0.5),
bias_attr=paddle.attr.Param(learning_rate=2),
param_attr=paddle.attr.Param(
initial_std=1. / math.sqrt(embsize * 8),
learning_rate=1))
predictword = paddle.layer.fc(input=hidden1,
size=dict_size,
bias_attr=paddle.attr.Param(learning_rate=2),
act=paddle.activation.Softmax())
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
result = trainer.test(
paddle.batch(
paddle.dataset.imikolov.test(word_dict, N), 32))
print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics,
result.metrics)
cost = paddle.layer.classification_cost(input=predictword, label=nextword)
parameters = paddle.parameters.create(cost)
adam_optimizer = paddle.optimizer.Adam(
learning_rate=3e-3,
regularization=paddle.optimizer.L2Regularization(8e-4))
trainer = paddle.trainer.SGD(cost, parameters, adam_optimizer)
trainer.train(
paddle.batch(paddle.dataset.imikolov.train(word_dict, N), 32),
num_passes=30,
event_handler=event_handler)
if __name__ == '__main__':
main()
API中文手册
============
API
===
DataProvider API
----------------
模型配置 API
------------
.. toctree::
:maxdepth: 1
data_provider/dataprovider_cn.rst
data_provider/pydataprovider2_cn.rst
v2/model_configs.rst
.. _api_trainer_config:
Model Config API
----------------
数据 API
--------
.. toctree::
:maxdepth: 1
trainer_config_helpers/optimizers.rst
trainer_config_helpers/data_sources.rst
trainer_config_helpers/layers.rst
trainer_config_helpers/activations.rst
trainer_config_helpers/poolings.rst
trainer_config_helpers/networks.rst
trainer_config_helpers/evaluators.rst
trainer_config_helpers/attrs.rst
v2/data.rst
Applications API
----------------
训练 API
--------
.. toctree::
:maxdepth: 1
.. toctree::
:maxdepth: 1
predict/swig_py_paddle_cn.rst
v2/run_logic.rst
\ No newline at end of file
API
===
DataProvider API
Model Config API
----------------
.. toctree::
:maxdepth: 1
data_provider/dataprovider_en.rst
data_provider/pydataprovider2_en.rst
.. _api_trainer_config:
v2/model_configs.rst
Model Config API
----------------
Data API
--------
.. toctree::
:maxdepth: 1
trainer_config_helpers/optimizers.rst
trainer_config_helpers/data_sources.rst
trainer_config_helpers/layers.rst
trainer_config_helpers/activations.rst
trainer_config_helpers/poolings.rst
trainer_config_helpers/networks.rst
trainer_config_helpers/evaluators.rst
trainer_config_helpers/attrs.rst
v2/data.rst
Train API
---------
Applications API
----------------
.. toctree::
:maxdepth: 1
.. toctree::
:maxdepth: 1
predict/swig_py_paddle_en.rst
v2/run_logic.rst
\ No newline at end of file
API中文手册
============
DataProvider API
----------------
.. toctree::
:maxdepth: 1
data_provider/dataprovider_cn.rst
data_provider/pydataprovider2_cn.rst
.. _api_trainer_config:
Model Config API
----------------
.. toctree::
:maxdepth: 1
trainer_config_helpers/optimizers.rst
trainer_config_helpers/data_sources.rst
trainer_config_helpers/layers.rst
trainer_config_helpers/activations.rst
trainer_config_helpers/poolings.rst
trainer_config_helpers/networks.rst
trainer_config_helpers/evaluators.rst
trainer_config_helpers/attrs.rst
Applications API
----------------
.. toctree::
:maxdepth: 1
predict/swig_py_paddle_cn.rst
API
===
DataProvider API
----------------
.. toctree::
:maxdepth: 1
data_provider/dataprovider_en.rst
data_provider/pydataprovider2_en.rst
.. _api_trainer_config:
Model Config API
----------------
.. toctree::
:maxdepth: 1
trainer_config_helpers/optimizers.rst
trainer_config_helpers/data_sources.rst
trainer_config_helpers/layers.rst
trainer_config_helpers/activations.rst
trainer_config_helpers/poolings.rst
trainer_config_helpers/networks.rst
trainer_config_helpers/evaluators.rst
trainer_config_helpers/attrs.rst
Applications API
----------------
.. toctree::
:maxdepth: 1
predict/swig_py_paddle_en.rst
......@@ -139,24 +139,12 @@ lstmemory
:members: lstmemory
:noindex:
lstm_step_layer
---------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: lstm_step_layer
:noindex:
grumemory
---------
.. automodule:: paddle.trainer_config_helpers.layers
:members: grumemory
:noindex:
gru_step_layer
---------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: gru_step_layer
:noindex:
Recurrent Layer Group
=====================
......@@ -172,6 +160,18 @@ recurrent_group
:members: recurrent_group
:noindex:
lstm_step_layer
---------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: lstm_step_layer
:noindex:
gru_step_layer
---------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: gru_step_layer
:noindex:
beam_search
------------
.. automodule:: paddle.trainer_config_helpers.layers
......@@ -279,6 +279,12 @@ concat_layer
:members: concat_layer
:noindex:
seq_concat_layer
----------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: seq_concat_layer
:noindex:
Reshaping Layers
================
......@@ -302,6 +308,18 @@ repeat_layer
:members: repeat_layer
:noindex:
rotate_layer
------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: rotate_layer
:noindex:
seq_reshape_layer
-----------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: seq_reshape_layer
:noindex:
Math Layers
===========
......@@ -382,6 +400,15 @@ sampling_id_layer
:members: sampling_id_layer
:noindex:
Slicing and Joining Layers
==========================
pad_layer
-----------
.. automodule:: paddle.trainer_config_helpers.layers
:members: pad_layer
:noindex:
.. _api_trainer_config_helpers_layers_cost_layers:
Cost Layers
......@@ -441,6 +468,12 @@ ctc_layer
:members: ctc_layer
:noindex:
warp_ctc_layer
--------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: warp_ctc_layer
:noindex:
nce_layer
-----------
.. automodule:: paddle.trainer_config_helpers.layers
......
================
Data Related API
================
#########
DataTypes
#########
.. automodule:: paddle.v2.data_type
:members:
##########
DataFeeder
##########
.. automodule:: paddle.v2.data_feeder
:members:
######
Reader
######
.. automodule:: paddle.v2.reader
:members:
.. automodule:: paddle.v2.reader.creator
:members:
#########
minibatch
#########
.. automodule:: paddle.v2.minibatch
:members:
#######
Dataset
#######
.. automodule:: paddle.v2.dataset
:members:
mnist
+++++
.. automodule:: paddle.v2.dataset.mnist
:members:
cifar
+++++
.. automodule:: paddle.v2.dataset.cifar
:members:
conll05
+++++++
.. automodule:: paddle.v2.dataset.conll05
:members:
imdb
++++
.. automodule:: paddle.v2.dataset.imdb
:members:
imikolov
++++++++
.. automodule:: paddle.v2.dataset.imikolov
:members:
movielens
+++++++++
.. automodule:: paddle.v2.dataset.movielens
:members:
sentiment
+++++++++
.. automodule:: paddle.v2.dataset.sentiment
:members:
uci_housing
+++++++++++
.. automodule:: paddle.v2.dataset.uci_housing
:members:
#########################
Configuration Related API
#########################
======
Layers
======
.. automodule:: paddle.v2.layer
:members:
==========
Attributes
==========
.. automodule:: paddle.v2.attr
:members:
===========
Activations
===========
.. automodule:: paddle.v2.activation
:members:
========
Poolings
========
.. automodule:: paddle.v2.pooling
:members:
========
Networks
========
.. automodule:: paddle.v2.networks
:members:
==========
Optimizers
==========
.. automodule:: paddle.v2.optimizer
:members:
###########
Trainer API
###########
==========
Parameters
==========
.. automodule:: paddle.v2.parameters
:members:
=======
Trainer
=======
.. automodule:: paddle.v2.trainer
:members:
=====
Event
=====
.. automodule:: paddle.v2.event
:members:
# PaddlePaddle Design Doc
## Ingredients
As our design principle is starting from the essence: how could we
allow users to express and solve their problems at neural networks.
Some essential concepts that our API have to provide include:
1. A *topology* is an expression of *layers*.
1. A layer could be any kind of computation, including *cost*.
1. Some layers have parameters, some don't. Most costs don't have
parameters.
1. In some topologies, layers share parameters. For
example,
[the network for training a ranking model](https://github.com/PaddlePaddle/Paddle/issues/1311#issuecomment-279121850).
1. At programming time, users specify topologies and possible sharing
of parameters. PaddlePaddle can figure out and create parameters
required (and possibly shared) by one or more topologies.
## Starting from Examples
As a summarization
of
[our disucssion](https://github.com/PaddlePaddle/Paddle/issues/1315),
let us present two examples here:
### Example 1. Sharing Parameters between Layers
We use
the
[3-branch ranking](https://github.com/PaddlePaddle/Paddle/issues/1311#issuecomment-279121850) model
in this example. For your convenience, I copy-a-paste the model's
topology as follows:
```
A -> f -\
Q -> f --> cost
B -> f -/
```
The following program trains the topology including the cost, and then
use the sub-network in the trained topology in inference:
```python
def f(in):
e = paddle.layer.embedding(in, parameter_name="embedding")
o = paddle.layer.softmax(e, parameter_name="semantic")
return o
# Create 3 topologies (subnets), they share parameters because all
# correspoinding layers have the same parameter names.
fA = f(paddle.layer.data(input_name="A"))
fB = f(paddle.layer.data(input_name="B"))
fQ = f(paddle.layer.data(input_name="Q"))
topology = paddle.layer.less_than(
paddle.layer.cross_entropy(fA, fQ),
paddle.layer.corss_entropy(fB, fQ))
# Derive parameters required in topology and create them in model.
parameters = paddle.parameters.create(topology)
# Estimate parameters used in topology from data.
paddle.train(topology, parameters, reader=read_ranking_model_data)
# Inference using fA (or fB or fC, as they share their parameters).
[testA, testB, testQ] = read_ranking_model_data()
print "The sematic-vector of testA: ", paddle.infer(fA, parameters, testA)
```
### Example 2. Sharing Parameters between "Models"
We use [GAN](https://github.com/PaddlePaddle/book/tree/develop/gan) in
this example. In the following example program, `d0` and `d1`
correspond to the two networks in the following figure:
<img src="https://github.com/wangyang59/book/raw/00036f4b0da5225041a6824587c1a01cf20159b1/gan/image/gan_ig.png" width=400 />
```python
def G(in):
# over-simplified example as G has only one layers:
return paddle.layer.fc(in, parameter_name="G")
def D(in);
# again, over-simplified:
return paddle.layer.fc(in, parameter_name="D")
# Construct the first topology, which contains both D and G.
# By learning this topology, we update parameters of G.
d0 = paddle.layer.should_be_false(D(G(paddle.layer.data())))
# Construct a second topology d1, which contains only D. By
# training this topology, we update parameters of D. Note
# that d1 share parameters with d0.
d1 = paddle.layer.should_be_true(D(paddle.layer.data()))
# Create parameters from a list of multiple topologies (models) for
# the chance to share parameters between these topologies.
parameters = paddle.parameters.create([d0, d1])
# Iterative training of GAN.
for ...:
train(d0, parameters, reader=read_from_rng, immutable_parameters={"D"})
train(d1, parameters, reader=read_from_realistic_images)
# Use d1 for inference:
print "D thinks a batch of images are realistic ", infer(d1, parameters, read_mnist_images)
```
### Summarization
Above two programs reveal some important design concerns:
1. Users describe a topology as an expression of layers. Every layer
has a *parameter name*. If the users don't specify it explicitly, it's automatically generated as a unique name. By
specifying the parameter name, users can specify the sharing of
parameters between layers and even between topologies.
1. `paddle.parameters.create` figures out parameters required by one
or more topologies from parameter names of layers. It creates these
parameters and returns a `ParameterSet` object, which is in essence
a map from *parameter names* to *parameters*.
1. At training and inference time, `paddle.train` and `paddle.infer`
requires both a topology and the parameter set that holds the parameters of that topology. There are some reasons:
1. This prevents users from forgetting to call
`paddle.parameters.create`.
1. `paddle.train` needs to know which parameter set to update.
1. Users could load another (pre-trained) parameter set and use it
with a topology in `train.infer`.
1. By specifying the `immutable_parameters` parameter of
`paddle.train`, we can forbid the update of these parameters.
## Reader
Not all programming frameworks allow users to define I/O functions.
An example is Google MapReduce, which can only read from text,
SSTable, and RecordIO files. Hadoop MapReduce allows users to define
readers and writers by deriving from base classes `Reader` and
`Writer`. The former is less flexible but also less error-prone. We
decide to provide the flexibility to users to define their readers.
There are some open questions here:
1. **Should a reader return a Python dictionary?**
1. **How to map multiple outputs from a reader to multiple data layers?**
1. **How to easily compose some existing readers to read more data and
feed a topology with more data layers?**
## Training
The recommended way to training a model is to call `paddle.train`,
which simply calls `paddle.trainer.Default`, a global variable of
type `paddle.trainer.SGD`. Equivalently, we can do
```python
opt = paddle.trainer.SGD(..., paddle.updater.Adam(...))
opt.train(topology, parameters, reader=read, ...)
```
### Updater
Please be aware that a trainer can accept an updater as its data
member, where an updater is a class derived from
`paddle.trainer.Updater`. This is to make it easier to customize
trainers, as discussed
[here](https://github.com/PaddlePaddle/Paddle/issues/1319).
### Event Handler
`paddle.train` and `paddle.trainer.XXX.train` take an optional
parameter `event_handler`, which should be either `None` or a function
that handle some events:
1. BeginTraining
1. EndTraining
1. BeginIteration
1. EndIteration
1. BeginPass
1. EndPass
where EndPass is sent if and only if the reader yields
`end_pass=True`.
An example as follows:
```python
def event_handler(event):
if ininstance(event, paddle.event.EndIteration):
print paddle.test(...)
paddle.train(topology, parameters, reader, event_handler)
```
If we are writing a PaddlePaddle program in and for iPython/Jypyter,
we can use metaplotlib in the event handler to plot a curve of
cost/error versus iterations, as shown
[here](https://blog.dominodatalab.com/interactive-dashboards-in-jupyter/).
### Distributed Training
If users want to do distributed training on a cluster, s/he should
call `paddle.dist_train` and provides access tokens to the cluster as
a parameter.
For example, if the user has a TLS certificate that allows him to
access a Kubernetes cluster, s/he should be able to call
```python
paddle.dist_train(model,
trainer=paddle.trainer.SGD(...,
paddle.updater.Adam(...)),
reader=read,
k8s_user="yi",
k8s_token="kube_cluster_tls.pem",
k8s_job="hello",
num_parameter_servers=15)
```
The pseudo code if `paddle.dist_train` is as follows:
```python
def dist_train(topology, parameters, trainer, reader, ...):
if os.getenv("KUBERNETES_SERVICE_HOST") == None:
image_name = k8s_user + '/' + k8s_job
docker_build(image_name)
docker_push()
kube_ctrl_start_job(image_name, k8s_user, k8s_token)
else:
rank = kube_list_containers_in_job_and_return_current_containers_rank()
if rank == 0:
master()
elif rank < 15:
parameter_server()
else:
trainer.train(model, reader=read)
```
Please be aware that if a process is running on the Kubernetes
cluster, it will have some environment variables pre-defined.
If `dist_train` doesn't see these environment variables, it knows
that it's running on users' personal computer, and it should work as a
*launcher*. Otherwise, it knows that it's running on the cluster and
need to figure out its role as either the master, or a trainer, or a
parameter server.
# Python Data Reader Design Doc
At training and testing time, PaddlePaddle programs need to read data. To ease the users' work to write data reading code, we define that
- A *reader* is a function that reads data (from file, network, random number generator, etc) and yields data items.
- A *reader creator* is a function that returns a reader function.
- A *reader decorator* is a function, which accepts one or more readers, and returns a reader.
- A *batch reader* is a function that reads data (from *reader*, file, network, random number generator, etc) and yields a batch of data items.
and provide function which converts reader to batch reader, frequently used reader creators and reader decorators.
## Data Reader Interface
Indeed, *data reader* doesn't have to be a function that reads and yields data items. It can be any function with no parameter that creates a iterable (anything can be used in `for x in iterable`):
```
iterable = data_reader()
```
Element produced from the iterable should be a **single** entry of data, **not** a mini batch. That entry of data could be a single item, or a tuple of items. Item should be of [supported type](http://www.paddlepaddle.org/doc/ui/data_provider/pydataprovider2.html?highlight=dense_vector#input-types) (e.g., numpy 1d array of float32, int, list of int)
An example implementation for single item data reader creator:
```python
def reader_creator_random_image(width, height):
def reader():
while True:
yield numpy.random.uniform(-1, 1, size=width*height)
return reader
```
An example implementation for multiple item data reader creator:
```python
def reader_creator_random_image_and_label(width, height, label):
def reader():
while True:
yield numpy.random.uniform(-1, 1, size=width*height), label
return reader
```
## Batch Reader Interface
*batch reader* can be any function with no parameter that creates a iterable (anything can be used in `for x in iterable`). The output of the iterable should be a batch (list) of data items. Each item inside the list must be a tuple.
Here are valid outputs:
```python
# a mini batch of three data items. Each data item consist three columns of data, each of which is 1.
[(1, 1, 1),
(2, 2, 2),
(3, 3, 3)]
# a mini batch of three data items, each data item is a list (single column).
[([1,1,1],),
([2,2,2],),
([3,3,3],),
```
Please note that each item inside the list must be a tuple, below is an invalid output:
```python
# wrong, [1,1,1] needs to be inside a tuple: ([1,1,1],).
# Otherwise it's ambiguous whether [1,1,1] means a single column of data [1, 1, 1],
# or three column of datas, each of which is 1.
[[1,1,1],
[2,2,2],
[3,3,3]]
```
It's easy to convert from reader to batch reader:
```python
mnist_train = paddle.dataset.mnist.train()
mnist_train_batch_reader = paddle.batch(mnist_train, 128)
```
Also easy to create custom batch reader:
```python
def custom_batch_reader():
while True:
batch = []
for i in xrange(128):
batch.append((numpy.random.uniform(-1, 1, 28*28),)) # note that it's a tuple being appended.
yield batch
mnist_random_image_batch_reader = custom_batch_reader
```
## Usage
batch reader, mapping from item(s) read to data layer, batch size and number of total pass will be passed into `paddle.train`:
```python
# two data layer is created:
image_layer = paddle.layer.data("image", ...)
label_layer = paddle.layer.data("label", ...)
# ...
batch_reader = paddle.batch(paddle.dataset.mnist.train(), 128)
paddle.train(batch_reader, {"image":0, "label":1}, 128, 10, ...)
```
## Data Reader Decorator
*Data reader decorator* takes a single or multiple data reader, returns a new data reader. It is similar to a [python decorator](https://wiki.python.org/moin/PythonDecorators), but it does not use `@` syntax.
Since we have a strict interface for data readers (no parameter, return a single data item). Data reader can be used flexiable via data reader decorators. Following are a few examples:
### Prefetch Data
Since reading data may take time and training can not proceed without data. It is generally a good idea to prefetch data.
Use `paddle.reader.buffered` to prefetch data:
```python
buffered_reader = paddle.reader.buffered(paddle.dataset.mnist.train(), 100)
```
`buffered_reader` will try to buffer (prefetch) `100` data entries.
### Compose Multiple Data Readers
For example, we want to use a source of real images (reusing mnist dataset), and a source of random images as input for [Generative Adversarial Networks](https://arxiv.org/abs/1406.2661).
We can do:
```python
def reader_creator_random_image(width, height):
def reader():
while True:
yield numpy.random.uniform(-1, 1, size=width*height)
return reader
def reader_creator_bool(t):
def reader:
while True:
yield t
return reader
true_reader = reader_creator_bool(True)
false_reader = reader_creator_bool(False)
reader = paddle.reader.compose(paddle.dataset.mnist.train(), data_reader_creator_random_image(20, 20), true_reader, false_reader)
# Skipped 1 because paddle.dataset.mnist.train() produces two items per data entry.
# And we don't care second item at this time.
paddle.train(paddle.batch(reader, 128), {"true_image":0, "fake_image": 2, "true_label": 3, "false_label": 4}, ...)
```
### Shuffle
Given shuffle buffer size `n`, `paddle.reader.shuffle` will return a data reader that buffers `n` data entries and shuffle them before a data entry is read.
Example:
```python
reader = paddle.reader.shuffle(paddle.dataset.mnist.train(), 512)
```
## Q & A
### Why reader return only a single entry, but not a mini batch?
Always returning a single entry make reusing existing data readers much easier (e.g., if existing reader return not a single entry but 3 entries, training code will be more complex because it need to handle cases like batch size 2).
We provide function `paddle.batch` to turn (single entry) reader into batch reader.
### Why do we need batch reader, isn't train take reader and batch_size as arguments sufficient?
In most of the case, train taking reader and batch_size as arguments would be sufficent. However sometimes user want to customize order of data entries inside a mini batch. Or even change batch size dynamically.
### Why use a dictionary but not a list to provide mapping?
We decided to use dictionary (`{"image":0, "label":1}`) instead of list (`["image", "label"]`) is because that user can easily resue item (e.g., using `{"image_a":0, "image_b":0, "label":1}`) or skip item (e.g., using `{"image_a":0, "label":2}`).
### How to create custom data reader creator
```python
def image_reader_creator(image_path, label_path, n):
def reader():
f = open(image_path)
l = open(label_path)
images = numpy.fromfile(
f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28)).astype('float32')
images = images / 255.0 * 2.0 - 1.0
labels = numpy.fromfile(l, 'ubyte', count=n).astype("int")
for i in xrange(n):
yield images[i, :], labels[i] # a single entry of data is created each time
f.close()
l.close()
return reader
# images_reader_creator creates a reader
reader = image_reader_creator("/path/to/image_file", "/path/to/label_file", 1024)
paddle.train(paddle.batch(reader, 128), {"image":0, "label":1}, ...)
```
### How is `paddle.train` implemented
An example implementation of paddle.train could be:
```python
def train(batch_reader, mapping, batch_size, total_pass):
for pass_idx in range(total_pass):
for mini_batch in batch_reader(): # this loop will never end in online learning.
do_forward_backward(mini_batch, mapping)
```
......@@ -286,22 +286,3 @@ PaddlePaddle的参数使用名字 :code:`name` 作为参数的ID,相同名字
.. code-block:: bash
paddle train --use_gpu=true --trainer_count=2 --gpu_id=2
12. 编译源码提示warp-ctc/include/ctc.h 找不到的情况
---------------------------------------------------
目前Paddle使用\ :code:`git submodule`\ 来引用一些第三方模块。简单的\
:code:`git clone`\ 命令不能得到第三方模块的代码。需要使用\:
.. code-block:: bash
git clone --recursive https://github.com/PaddlePaddle/Paddle.git
来获取所有源码。对于已经clone的git版本库,可以在Paddle的源码目录中执行\:
.. code-block:: bash
git submodule init
git submodule update
来获得所有第三方模块。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册