diff --git a/CMakeLists.txt b/CMakeLists.txt index a51552d96a462f297f2711f9aaa70f4fc6c8ba00..9ec632e20690eafdc558e24f160270a89b29ee41 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,9 +33,7 @@ if(WIN32) set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT") endif(WIN32) -if(NOT CMAKE_CROSSCOMPILING) - find_package(CUDA QUIET) -endif(NOT CMAKE_CROSSCOMPILING) +find_package(CUDA QUIET) find_package(Git REQUIRED) find_package(Threads REQUIRED) @@ -49,7 +47,6 @@ option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FO option(WITH_NGRAPH "Compile PaddlePaddle with nGraph support." OFF) option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) option(WITH_TESTING "Compile PaddlePaddle with unit testing" OFF) -option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON) option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON) option(WITH_DOUBLE "Compile PaddlePaddle with double precision" OFF) option(WITH_RDMA "Compile PaddlePaddle with RDMA support" OFF) @@ -60,11 +57,9 @@ option(WITH_DOC "Compile PaddlePaddle with documentation" OFF) option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF) option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF) option(ON_TRAVIS "Exclude special unit test on Travis CI" OFF) -option(WITH_C_API "Compile PaddlePaddle with C-API(Prediction)" OFF) option(WITH_FLUID_ONLY "Compile PaddlePaddle fluid only" OFF) option(WITH_GOLANG "Compile PaddlePaddle with GOLANG" OFF) option(GLIDE_INSTALL "Download and install go dependencies " ON) -option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF) option(WITH_DISTRIBUTE "Compile with distributed support" OFF) option(WITH_PSLIB "Compile with pslib support" OFF) option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF) @@ -96,37 +91,6 @@ if(NOT CMAKE_BUILD_TYPE) FORCE) endif() -if(ANDROID OR IOS) - if(ANDROID) - if(${CMAKE_SYSTEM_VERSION} VERSION_LESS "16") - message(FATAL_ERROR "Unsupport standalone toolchains with Android API level lower than 16") - endif() - endif() - - set(WITH_GPU OFF CACHE STRING - "Disable GPU when cross-compiling for Android and iOS" FORCE) - set(WITH_AVX OFF CACHE STRING - "Disable AVX when cross-compiling for Android and iOS" FORCE) - set(WITH_PYTHON OFF CACHE STRING - "Disable PYTHON when cross-compiling for Android and iOS" FORCE) - set(WITH_RDMA OFF CACHE STRING - "Disable RDMA when cross-compiling for Android and iOS" FORCE) - set(WITH_MKL OFF CACHE STRING - "Disable MKL when cross-compiling for Android and iOS" FORCE) - set(WITH_NGRAPH OFF CACHE STRING - "Disable nGraph when cross-compiling for Android and iOS" FORCE) - set(WITH_GOLANG OFF CACHE STRING - "Disable golang when cross-compiling for Android and iOS" FORCE) - - # Compile PaddlePaddle mobile inference library - if (NOT WITH_C_API) - set(WITH_C_API ON CACHE STRING - "Always compile the C_API when cross-compiling for Android and iOS" FORCE) - endif() - set(MOBILE_INFERENCE ON) - add_definitions(-DPADDLE_MOBILE_INFERENCE) -endif() - if (APPLE) set(WITH_MKL OFF CACHE STRING "Disable MKL for building on mac" FORCE) @@ -135,8 +99,6 @@ endif() if (WIN32) set(WITH_DISTRIBUTE OFF CACHE STRING "Disable DISTRIBUTE when compiling for Windows" FORCE) - set(WITH_C_API OFF CACHE STRING - "Disable C_API when compiling for Windows" FORCE) set(WITH_FLUID_ONLY ON CACHE STRING "Enable FLUID_ONLY when compiling for Windows" FORCE) endif() @@ -150,21 +112,7 @@ set(FLUID_INSTALL_DIR "${CMAKE_BINARY_DIR}/fluid_install_dir" CACHE STRING set(FLUID_INFERENCE_INSTALL_DIR "${CMAKE_BINARY_DIR}/fluid_inference_install_dir" CACHE STRING "A path setting fluid inference shared and static libraries") -if (WITH_C_API AND WITH_PYTHON) - message(WARNING "It is suggest not embedded a python interpreter in Paddle " - "when using C-API. It will give an unpredictable behavior when using a " - "different Python interpreter from compiling.") -endif() - -if (WITH_C_API) - set(WITH_FLUID_ONLY OFF CACHE STRING "Disable install fluid when compile the C_API" FORCE) -endif() - -if(MOBILE_INFERENCE) - set(THIRD_PARTY_BUILD_TYPE MinSizeRel) -else() - set(THIRD_PARTY_BUILD_TYPE Release) -endif() +set(THIRD_PARTY_BUILD_TYPE Release) set(WITH_MKLML ${WITH_MKL}) if (NOT DEFINED WITH_MKLDNN) @@ -193,7 +141,6 @@ include(external/python) # download, build, install python include(external/openblas) # download, build, install openblas include(external/mkldnn) # download, build, install mkldnn include(external/ngraph) # download, build, install nGraph -include(external/swig) # download, build, install swig include(external/boost) # download boost include(external/any) # download libn::any include(external/eigen) # download eigen3 @@ -312,11 +259,6 @@ if(WITH_MKLDNN) list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB}) endif() -if(USE_NNPACK) - include(external/nnpack) - list(APPEND EXTERNAL_LIBS ${NNPACK_LIBS}) -endif(USE_NNPACK) - set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG") diff --git a/Dockerfile.android b/Dockerfile.android deleted file mode 100644 index 48db2efea21a648657e3f490c95429b9a29ede52..0000000000000000000000000000000000000000 --- a/Dockerfile.android +++ /dev/null @@ -1,42 +0,0 @@ -FROM ubuntu:16.04 -MAINTAINER PaddlePaddle Authors - -ARG UBUNTU_MIRROR -RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com/ubuntu#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi' - -# ENV variables -ARG ANDROID_ABI -ARG ANDROID_API - -ENV ANDROID_ABI=${ANDROID_ABI:-"armeabi-v7a"} -ENV ANDROID_API=${ANDROID_API:-21} - -ENV HOME=/root \ - ANDROID_NDK_HOME=/opt/android-ndk-linux \ - ANDROID_TOOLCHAINS_DIR=/opt/toolchains - -RUN apt-get update && \ - apt-get install -y \ - git python-dev python-pip python-numpy \ - wget curl tar unzip gcc g++ locales clang-format-3.8 swig cmake && \ - apt-get clean -y - -# git credential to skip password typing -RUN git config --global credential.helper store - -# Fix locales to en_US.UTF-8 -RUN localedef -i en_US -f UTF-8 en_US.UTF-8 - -RUN pip install --upgrade pip==9.0.3 && \ - pip install -U 'protobuf==3.1.0' && \ - pip install -U wheel sphinx && \ - pip install pre-commit - -# Android NDK -RUN mkdir -p ${ANDROID_TOOLCHAINS_DIR} && \ - mkdir -p /opt/android-ndk-tmp && \ - cd /opt/android-ndk-tmp && \ - wget -q https://dl.google.com/android/repository/android-ndk-r14b-linux-x86_64.zip && \ - unzip -q android-ndk-r14b-linux-x86_64.zip && \ - mv android-ndk-r14b ${ANDROID_NDK_HOME} && \ - rm -rf /opt/android-ndk-tmp diff --git a/cmake/cblas.cmake b/cmake/cblas.cmake index 24de8d9d7ced5f8111cc5d65f761b7506bde048e..52ac31d1d125afb89fb0ae783fba94ab9a0c5a1a 100644 --- a/cmake/cblas.cmake +++ b/cmake/cblas.cmake @@ -64,24 +64,18 @@ endif() ## Then find the reference-cblas. www.netlib.org/blas/ set(REFERENCE_CBLAS_ROOT $ENV{REFERENCE_CBLAS_ROOT} CACHE PATH "Folder contains reference-cblas") -if(NOT CMAKE_CROSSCOMPILING) - set(REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS - ${REFERENCE_CBLAS_ROOT}/include - /usr/include - /usr/include/cblas - ) +set(REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS + ${REFERENCE_CBLAS_ROOT}/include + /usr/include + /usr/include/cblas +) - set(REFERENCE_CBLAS_LIB_SEARCH_PATHS - ${REFERENCE_CBLAS_ROOT}/lib - /usr/lib - /usr/lib/blas/reference/ - /usr/lib/reference/ - ) -else() - # Disable the finding of reference cblas under host's system path - set(REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS ${REFERENCE_CBLAS_ROOT}/include) - set(REFERENCE_CBLAS_LIB_SEARCH_PATHS ${REFERENCE_CBLAS_ROOT}/lib) -endif() +set(REFERENCE_CBLAS_LIB_SEARCH_PATHS + ${REFERENCE_CBLAS_ROOT}/lib + /usr/lib + /usr/lib/blas/reference/ + /usr/lib/reference/ +) if(WITH_SYSTEM_BLAS) find_path(REFERENCE_CBLAS_INCLUDE_DIR NAMES cblas.h PATHS @@ -98,10 +92,3 @@ if(WITH_SYSTEM_BLAS) message(STATUS "Found reference-cblas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") endif() endif() - -if(IOS_USE_VECLIB_FOR_BLAS AND VECLIB_FOUND) - set(CBLAS_FOUND ON) - set(CBLAS_PROVIDER vecLib) - set(CBLAS_INC_DIR ${VECLIB_INC_DIR}) - add_definitions(-DPADDLE_USE_VECLIB) -endif() diff --git a/cmake/configure.cmake b/cmake/configure.cmake index e3d856fb30d8103f50ebcb6dc16153c8ed2a97a6..076e839120d98d801de4374f2f8338ebd918b88f 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -49,12 +49,10 @@ if(NOT WITH_PROFILER) add_definitions(-DPADDLE_DISABLE_PROFILER) endif(NOT WITH_PROFILER) -if(NOT CMAKE_CROSSCOMPILING) - if(WITH_AVX AND AVX_FOUND) - set(SIMD_FLAG ${AVX_FLAG}) - elseif(SSE3_FOUND) - set(SIMD_FLAG ${SSE3_FLAG}) - endif() +if(WITH_AVX AND AVX_FOUND) + set(SIMD_FLAG ${AVX_FLAG}) +elseif(SSE3_FOUND) + set(SIMD_FLAG ${SSE3_FLAG}) endif() if(WIN32) diff --git a/cmake/cross_compiling/android.cmake b/cmake/cross_compiling/android.cmake deleted file mode 100644 index 4cf2be3bdf07ed018c57cd6bc305a3eda9c9a23d..0000000000000000000000000000000000000000 --- a/cmake/cross_compiling/android.cmake +++ /dev/null @@ -1,236 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This is a toolchain file for cross-compiling for Android, and the -# configuration refers to the open-source resposity: -# https://github.com/taka-no-me/android-cmake -# Most of the variables are compatible with that used in -# https://developer.android.com/ndk/guides/cmake.html -# The supported variables are listed belows: -# -# ANDROID_STANDALONE_TOOLCHAIN -# ANDROID_TOOLCHAIN -# ANDROID_ABI -# ANDROID_NATIVE_API_LEVEL -# ANDROID_ARM_MODE -# ANDROID_ARM_NEON -# -# For CMake >= 3.7.0, all the settings will be delivered to CMake system -# variables to let CMake do the cross-compiling configurations itself. -# More detail of cross-compiling settings -# https://cmake.org/cmake/help/v3.7/manual/cmake-toolchains.7.html - -IF(NOT ANDROID) - return() -ENDIF() - -# check the exist of android standalone toolchain -IF(NOT DEFINED ANDROID_STANDALONE_TOOLCHAIN) - SET(ANDROID_STANDALONE_TOOLCHAIN $ENV{ANDROID_STANDALONE_TOOLCHAIN} - CACHE PATH "Folder holds the standalone toolchain of Android NDK") -ENDIF() -IF(NOT ANDROID_STANDALONE_TOOLCHAIN) - MESSAGE(WARNING "It is recommended to set ANDROID_STANDALONE_TOOLCHAIN to " - "use a standalone toolchain.\n" - "To cross-compile for Android, you need to:\n" - "1. Download an Android NDK from" - " https://developer.android.com/ndk/downloads/index.html\n" - "2. Setup a standalone toolchain" - "https://developer.android.google.cn/ndk/guides/standalone_toolchain.html?hl=zh-cn\n") -ENDIF() - -IF(NOT DEFINED CMAKE_SYSTEM_VERSION AND ANDROID_NATIVE_API_LEVEL) - IF(ANDROID_NATIVE_API_LEVEL MATCHES "^android-[0-9]+$") - STRING(REPLACE "android-" "" CMAKE_SYSTEM_VERSION "${CMAKE_MATCH_0}") - ELSEIF(ANDROID_NATIVE_API_LEVEL MATCHES "^[0-9]+$") - SET(CMAKE_SYSTEM_VERSION ${ANDROID_NATIVE_API_LEVEL}) - ENDIF() -ENDIF() - -IF(NOT DEFINED ANDROID_TOOLCHAIN) - SET(ANDROID_TOOLCHAIN clang) -ENDIF() - -IF(NOT DEFINED ANDROID_ABI) - SET(ANDROID_ABI "armeabi-v7a") -ENDIF() - -IF(NOT DEFINED ANDROID_ARM_MODE) - SET(ANDROID_ARM_MODE ON) -ENDIF() -IF(ANDROID_ARM_MODE) - SET(ANDROID_ARM_MODE_NAME "arm") -ELSE(ANDROID_ARM_MODE) - SET(ANDROID_ARM_MODE_NAME "thumb") -ENDIF(ANDROID_ARM_MODE) - -IF(NOT DEFINED ANDROID_ARM_NEON) - SET(ANDROID_ARM_NEON ON) -ENDIF() - -IF("${CMAKE_VERSION}" VERSION_LESS "3.7.0") - IF("${CMAKE_VERSION}" VERSION_LESS "3.1.0") - SET(CMAKE_SYSTEM_NAME "Linux") - ENDIF() - MESSAGE(WARNING "It is recommended to use CMake >= 3.7.0 (current version: " - "${CMAKE_VERSION}), when cross-compiling for Android.") - - IF(ANDROID_STANDALONE_TOOLCHAIN) - # Use standalone toolchain - SET(CMAKE_SYSROOT "${ANDROID_STANDALONE_TOOLCHAIN}/sysroot") - - IF(NOT CMAKE_SYSTEM_VERSION) - SET(ANDROID_STANDALONE_TOOLCHAIN_API "") - SET(ANDROID_API_LEVEL_H_REGEX "^[\t ]*#[\t ]*define[\t ]+__ANDROID_API__[\t ]+([0-9]+)") - FILE(STRINGS "${ANDROID_STANDALONE_TOOLCHAIN}/sysroot/usr/include/android/api-level.h" - ANDROID_API_LEVEL_H_CONTENT REGEX "${ANDROID_API_LEVEL_H_REGEX}") - IF(ANDROID_API_LEVEL_H_CONTENT MATCHES "${ANDROID_API_LEVEL_H_REGEX}") - SET(ANDROID_STANDALONE_TOOLCHAIN_API "${CMAKE_MATCH_1}") - ENDIF() - SET(CMAKE_SYSTEM_VERSION ${ANDROID_STANDALONE_TOOLCHAIN_API}) - ENDIF() - - # Toolchain - SET(ANDROID_TOOLCHAIN_ROOT ${ANDROID_STANDALONE_TOOLCHAIN}) - ELSE(ANDROID_NDK) - # TODO: use android ndk - ENDIF() - - IF(ANDROID_ABI MATCHES "^armeabi(-v7a)?$") - SET(ANDROID_TOOLCHAIN_NAME arm-linux-androideabi) - IF(ANDROID_ABI STREQUAL "armeabi") - SET(CMAKE_SYSTEM_PROCESSOR armv5te) - SET(ANDROID_CLANG_TRIPLE armv5te-none-linux-androideabi) - ELSEIF(ANDROID_ABI STREQUAL "armeabi-v7a") - SET(CMAKE_SYSTEM_PROCESSOR armv7-a) - SET(ANDROID_CLANG_TRIPLE armv7-none-linux-androideabi) - ENDIF() - ELSEIF(ANDROID_ABI STREQUAL "arm64-v8a") - SET(ANDROID_TOOLCHAIN_NAME aarch64-linux-android) - SET(CMAKE_SYSTEM_PROCESSOR aarch64) - SET(ANDROID_CLANG_TRIPLE aarch64-none-linux-android) - ELSE() - MESSAGE(FATAL_ERROR "Invalid Android ABI: ${ANDROID_ABI}.") - ENDIF() - SET(ANDROID_TOOLCHAIN_PREFIX "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_NAME}-") - - IF(ANDROID_TOOLCHAIN STREQUAL clang) - SET(ANDROID_C_COMPILER_NAME clang) - SET(ANDROID_CXX_COMPILER_NAME clang++) - SET(CMAKE_C_COMPILER_TARGET ${ANDROID_CLANG_TRIPLE}) - SET(CMAKE_CXX_COMPILER_TARGET ${ANDROID_CLANG_TRIPLE}) - ELSEIF(ANDROID_TOOLCHAIN STREQUAL gcc) - SET(ANDROID_C_COMPILER_NAME gcc) - SET(ANDROID_CXX_COMPILER_NAME g++) - ELSE() - MESSAGE(FATAL_ERROR "Invalid Android toolchain: ${ANDROID_TOOLCHAIN}") - ENDIF() - - # C compiler - IF(NOT CMAKE_C_COMPILER) - SET(ANDROID_C_COMPILER "${ANDROID_TOOLCHAIN_PREFIX}${ANDROID_C_COMPILER_NAME}") - ELSE() - GET_FILENAME_COMPONENT(ANDROID_C_COMPILER ${CMAKE_C_COMPILER} PROGRAM) - ENDIF() - IF(NOT EXISTS ${ANDROID_C_COMPILER}) - MESSAGE(FATAL_ERROR "Cannot find C compiler: ${ANDROID_C_COMPILER}") - ENDIF() - - # CXX compiler - IF(NOT CMAKE_CXX_COMPILER) - SET(ANDROID_CXX_COMPILER "${ANDROID_TOOLCHAIN_PREFIX}${ANDROID_CXX_COMPILER_NAME}") - ELSE() - GET_FILENAME_COMPONENT(ANDROID_CXX_COMPILER ${CMAKE_CXX_COMPILER} PROGRAM) - ENDIF() - IF(NOT EXISTS ${ANDROID_CXX_COMPILER}) - MESSAGE(FATAL_ERROR "Cannot find CXX compiler: ${ANDROID_CXX_COMPILER}") - ENDIF() - - SET(CMAKE_C_COMPILER ${ANDROID_C_COMPILER} CACHE PATH "C compiler" FORCE) - SET(CMAKE_CXX_COMPILER ${ANDROID_CXX_COMPILER} CACHE PATH "CXX compiler" FORCE) - - # Toolchain and ABI specific flags. - SET(ANDROID_COMPILER_FLAGS "-ffunction-sections -fdata-sections") - SET(ANDROID_LINKER_FLAGS "-Wl,--gc-sections") - - IF(ANDROID_ABI STREQUAL "armeabi") - LIST(APPEND ANDROID_COMPILER_FLAGS - -march=armv5te - -mtune=xscale - -msoft-float) - ELSEIF(ANDROID_ABI STREQUAL "armeabi-v7a") - LIST(APPEND ANDROID_COMPILER_FLAGS - -march=armv7-a - -mfloat-abi=softfp) - IF(ANDROID_ARM_NEON) - LIST(APPEND ANDROID_COMPILER_FLAGS -mfpu=neon) - ELSE() - LIST(APPEND ANDROID_COMPILER_FLAGS -mfpu=vfpv3-d16) - ENDIF() - LIST(APPEND ANDROID_LINKER_FLAGS -Wl,--fix-cortex-a8) - ELSEIF(ANDROID_ABI STREQUAL "arm64-v8a") - LIST(APPEND ANDROID_COMPILER_FLAGS -march=armv8-a) - ENDIF() - - IF(ANDROID_ABI MATCHES "^armeabi(-v7a)?$") - IF(ANDROID_ARM_MODE) - LIST(APPEND ANDROID_COMPILER_FLAGS -marm) - ELSE() - LIST(APPEND ANDROID_COMPILER_FLAGS -mthumb) - ENDIF() - IF(ANDROID_TOOLCHAIN STREQUAL clang) - # Disable integrated-as for better compatibility. - LIST(APPEND ANDROID_COMPILER_FLAGS -fno-integrated-as) - ENDIF() - ENDIF() - - IF(ANDROID_TOOLCHAIN STREQUAL clang) - # CMake automatically forwards all compiler flags to the linker, - # and clang doesn't like having -Wa flags being used for linking. - # To prevent CMake from doing this would require meddling with - # the CMAKE__COMPILE_OBJECT rules, which would get quite messy. - LIST(APPEND ANDROID_LINKER_FLAGS -Qunused-arguments) - ENDIF() - - STRING(REPLACE ";" " " ANDROID_COMPILER_FLAGS "${ANDROID_COMPILER_FLAGS}") - STRING(REPLACE ";" " " ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS}") - - SET(CMAKE_C_FLAGS "${ANDROID_COMPILER_FLAGS} ${CMAKE_C_FLAGS}" - CACHE STRING "C flags") - SET(CMAKE_CXX_FLAGS "${ANDROID_COMPILER_FLAGS} ${CMAKE_CXX_FLAGS}" - CACHE STRING "CXX flags") - SET(CMAKE_SHARED_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} ${CMAKE_SHARED_LINKER_FLAGS}" - CACHE STRING "shared linker flags") - - SET(CMAKE_POSITION_INDEPENDENT_CODE TRUE) - SET(CMAKE_EXE_LINKER_FLAGS "-pie -fPIE ${ANDROID_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS}" - CACHE STRING "executable linker flags") - - MESSAGE(STATUS "Android: Targeting API '${CMAKE_SYSTEM_VERSION}' " - "with architecture '${ANDROID_ARM_MODE_NAME}', " - "ABI '${ANDROID_ABI}', and processor '${CMAKE_SYSTEM_PROCESSOR}'") - MESSAGE(STATUS "System CMAKE_C_FLAGS: " ${CMAKE_C_FLAGS}) - MESSAGE(STATUS "System CMAKE_CXX_FLAGS: " ${CMAKE_CXX_FLAGS}) -ELSE() - IF(ANDROID_STANDALONE_TOOLCHAIN) - SET(CMAKE_ANDROID_STANDALONE_TOOLCHAIN ${ANDROID_STANDALONE_TOOLCHAIN}) - ENDIF() - SET(CMAKE_ANDROID_ARCH_ABI ${ANDROID_ABI}) - IF(ANDROID_ABI MATCHES "^armeabi(-v7a)?$") - SET(CMAKE_ANDROID_ARM_MODE ${ANDROID_ARM_MODE}) - IF(ANDROID_ABI STREQUAL "armeabi-v7a") - SET(CMAKE_ANDROID_ARM_NEON ${ANDROID_ARM_NEON}) - ENDIF() - ENDIF() -ENDIF() diff --git a/cmake/cross_compiling/host.cmake b/cmake/cross_compiling/host.cmake deleted file mode 100644 index f9c6b12136f488a9a6ab77b1ba673b6be75391b5..0000000000000000000000000000000000000000 --- a/cmake/cross_compiling/host.cmake +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# find host C compiler -IF(HOST_C_COMPILER) - SET(HOST_C_COMPILER_NAME ${HOST_C_COMPILER}) -ELSEIF(NOT $ENV{CC} STREQUAL "") - SET(HOST_C_COMPILER_NAME $ENV{CC}) -ELSE() - SET(HOST_C_COMPILER_NAME cc) -ENDIF() - -GET_FILENAME_COMPONENT(HOST_C_COMPILER_PATH ${HOST_C_COMPILER_NAME} PROGRAM) -IF(NOT HOST_C_COMPILER_PATH OR NOT EXISTS ${HOST_C_COMPILER_PATH}) - MESSAGE(FATAL_ERROR "Cannot find host C compiler, set host C compiler:\n" - "\tcmake .. -DHOST_C_COMPILER=...") -ENDIF() - -# find host CXX compiler -IF(HOST_CXX_COMPILER) - SET(HOST_CXX_COMPILER_NAME ${HOST_CXX_COMPILER}) -ELSEIF(NOT $ENV{CXX} STREQUAL "") - SET(HOST_CXX_COMPILER_NAME $ENV{CXX}) -ELSE() - SET(HOST_CXX_COMPILER_NAME c++) -ENDIF() - -GET_FILENAME_COMPONENT(HOST_CXX_COMPILER_PATH ${HOST_CXX_COMPILER_NAME} PROGRAM) -IF(NOT HOST_CXX_COMPILER_PATH OR NOT EXISTS ${HOST_CXX_COMPILER_PATH}) - MESSAGE(FATAL_ERROR "Cannot find host CXX compiler, set host CXX compiler:\n" - "\tcmake .. -DHOST_CXX_COMPILER=...") -ENDIF() - -SET(HOST_C_COMPILER ${HOST_C_COMPILER_PATH} CACHE PATH "Host C compiler") -SET(HOST_CXX_COMPILER ${HOST_CXX_COMPILER_PATH} CACHE PATH "Host CXX compiler") - -MESSAGE(STATUS "Found host C compiler: " ${HOST_C_COMPILER}) -MESSAGE(STATUS "Found host CXX compiler: " ${HOST_CXX_COMPILER}) diff --git a/cmake/cross_compiling/ios.cmake b/cmake/cross_compiling/ios.cmake deleted file mode 100644 index 10d389ec8ed57ac2b15dd925ef99c8aff4807b05..0000000000000000000000000000000000000000 --- a/cmake/cross_compiling/ios.cmake +++ /dev/null @@ -1,347 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This is a toolchain file for cross-compiling for iOS, and the -# configuration largely refers to public toolchain file: -# https://raw.githubusercontent.com/leetal/ios-cmake/master/ios.toolchain.cmake -# and -# https://github.com/cristeab/ios-cmake -# -# Supports options: -# IOS_PLATFORM = OS (default) or SIMULATOR -# This decides if SDKS will be selected from the iPhoneOS.platform or iPhoneSimulator.platform folders -# OS - the default, used to build for iPhone and iPad physical devices, which have an arm arch. -# SIMULATOR - used to build for the Simulator platforms, which have an x86 arch. -# IOS_ARCH -# The archectures wanted to support, such "arm64", "armv7;arm64" -# IOS_DEPLOYMENT_TARGET -# The minimum iOS deployment version, such as "7.0" -# IOS_ENABLE_BITCODE = ON (default) or OFF -# IOS_USE_VECLIB_FOR_BLAS = OFF (default) or ON -# IOS_DEVELOPER_ROOT = automatic(default) or /path/to/platform/Developer folder -# By default this location is automatcially chosen based on the IOS_PLATFORM value above. -# If set manually, it will override the default location and force the user of a particular Developer Platform -# IOS_SDK_ROOT = automatic(default) or /path/to/platform/Developer/SDKs/SDK folder -# By default this location is automatcially chosen based on the IOS_DEVELOPER_ROOT value. -# In this case it will always be the most up-to-date SDK found in the IOS_DEVELOPER_ROOT path. -# If set manually, this will force the use of a specific SDK version - -# Macros: -# set_xcode_property (TARGET XCODE_PROPERTY XCODE_VALUE) -# A convenience macro for setting xcode specific properties on targets -# example: set_xcode_property (myioslib IPHONEOS_DEPLOYMENT_TARGET "3.1") -# find_host_package (PROGRAM ARGS) -# A macro used to find executable programs on the host system, not within the iOS environment. -# Thanks to the android-cmake project for providing the command - -if(NOT IOS) - return() -endif() - -set(CMAKE_SYSTEM_NAME Darwin) - -# Get the Xcode version being used. -execute_process(COMMAND xcodebuild -version - OUTPUT_VARIABLE XCODE_VERSION - RESULT_VARIABLE XCODE_VERSION_RESULT - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) -if(NOT ${XCODE_VERSION_RESULT}) - string(REGEX MATCH "Xcode [0-9\\.]+" XCODE_VERSION "${XCODE_VERSION}") - string(REGEX REPLACE "Xcode ([0-9\\.]+)" "\\1" XCODE_VERSION "${XCODE_VERSION}") - message(STATUS "Building with Xcode version: ${XCODE_VERSION}") -else() - message(FATAL_ERROR "Cannot execute xcodebuild, please check whether xcode is installed.") -endif() - -# Required as of cmake 2.8.10 -set(CMAKE_OSX_DEPLOYMENT_TARGET "" CACHE STRING "Force unset of the deployment target for iOS" FORCE) - -# Setup iOS platform unless specified manually with IOS_PLATFORM -if(NOT DEFINED IOS_PLATFORM) - set(IOS_PLATFORM "OS") -endif() -set(IOS_PLATFORM ${IOS_PLATFORM} CACHE STRING "Type of iOS Platform") - -# Set the architecture for iOS -if(NOT DEFINED IOS_ARCH) - if(IOS_PLATFORM STREQUAL "OS") - set(IOS_ARCH "armv7;armv7s;arm64") - elseif(IOS_PLATFORM STREQUAL "SIMULATOR") - set(IOS_ARCH "i386;x86_64") - endif() -endif() -set(CMAKE_OSX_ARCHITECTURES ${IOS_ARCH} CACHE string "Build architecture for iOS") - -# Specify minimum iOS deployment version -if(NOT DEFINED IOS_DEPLOYMENT_TARGET) - set(IOS_DEPLOYMENT_TARGET "7.0") -endif() -set(IOS_DEPLOYMENT_TARGET ${IOS_DEPLOYMENT_TARGET} CACHE STRING "Minimum iOS version") - -# Whether to enable bitcode -if(NOT DEFINED IOS_ENABLE_BITCODE) - set(IOS_ENABLE_BITCODE ON) -endif() -set(IOS_ENABLE_BITCODE ${IOS_ENABLE_BITCODE} CACHE BOOL "Whether to enable bitcode") - -if(NOT DEFINED IOS_USE_VECLIB_FOR_BLAS) - set(IOS_USE_VECLIB_FOR_BLAS OFF) -endif() -set(IOS_USE_VECLIB_FOR_BLAS ${IOS_UES_VECLIB_FOR_BLAS} CACHE BOOL "Whether to use veclib") - -# Check the platform selection and setup for developer root -if(${IOS_PLATFORM} STREQUAL "OS") - set(IOS_PLATFORM_LOCATION "iPhoneOS.platform") - set(XCODE_IOS_PLATFORM iphoneos) - - # This causes the installers to properly locate the output libraries - set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-iphoneos") -elseif(${IOS_PLATFORM} STREQUAL "SIMULATOR") - set(IOS_PLATFORM_LOCATION "iPhoneSimulator.platform") - set(XCODE_IOS_PLATFORM iphonesimulator) - - # This causes the installers to properly locate the output libraries - set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-iphonesimulator") -elseif(${IOS_PLATFORM} STREQUAL "WATCHOS") - set(IOS_PLATFORM_LOCATION "WatchOS.platform") - set(XCODE_IOS_PLATFORM watchos) - - # This causes the installers to properly locate the output libraries - set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-watchos") -else(${IOS_PLATFORM} STREQUAL "OS") - message(FATAL_ERROR "Unsupported IOS_PLATFORM value selected. Please set to\n" - "\t OS, SIMULATOR, or WATCHOS.") -endif() - -# Check iOS developer toolchain -if(NOT DEFINED IOS_DEVELOPER_ROOT) - # Setup iOS developer location - execute_process(COMMAND xcode-select -print-path - OUTPUT_VARIABLE XCODE_DEVELOPER_DIR - RESULT_VARIABLE XCODE_DEVELOPER_DIR_RESULT - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - # Xcode 4.3 changed the installation location, choose the most recent one available - if(${XCODE_VERSION} VERSION_LESS "4.3.0") - set(IOS_DEVELOPER_ROOT "/Developer/Platforms/${IOS_PLATFORM_LOCATION}/Developer") - else() - set(IOS_DEVELOPER_ROOT "${XCODE_DEVELOPER_DIR}/Platforms/${IOS_PLATFORM_LOCATION}/Developer") - endif() -endif() -if(EXISTS ${IOS_DEVELOPER_ROOT}) - set(IOS_DEVELOPER_ROOT ${IOS_DEVELOPER_ROOT} CACHE PATH "Location of iOS Platform") -else() - message(FATAL_ERROR "Invalid IOS_DEVELOPER_ROOT: ${IOS_DEVELOPER_ROOT} does not exist.") -endif() - -# Check iOS SDK -if(NOT DEFINED IOS_SDK_ROOT) - # Find and use the most recent iOS sdk - file(GLOB IOS_SDK_LISTS "${IOS_DEVELOPER_ROOT}/SDKs/*") - if(IOS_SDK_LISTS) - list(SORT IOS_SDK_LISTS) - list(REVERSE IOS_SDK_LISTS) - list(GET IOS_SDK_LISTS 0 IOS_SDK_ROOT) - else(IOS_SDK_LISTS) - message(FATAL_ERROR "No iOS SDK's found in default search path ${IOS_DEVELOPER_ROOT}." - " Please manually set IOS_SDK_ROOT or install the iOS SDK.") - endif(IOS_SDK_LISTS) -endif() -if(EXISTS ${IOS_SDK_ROOT}) - set(IOS_SDK_ROOT ${IOS_SDK_ROOT} CACHE PATH "Location of the selected iOS SDK") - message(STATUS "iOS toolchain: ${IOS_SDK_ROOT}") -else() - message(FATAL_ERROR "Invalid IOS_SDK_ROOT: ${IOS_SDK_ROOT} does not exist.") -endif() - -# Set the sysroot default to the most recent SDK -set(CMAKE_OSX_SYSROOT ${IOS_SDK_ROOT} CACHE PATH "Sysroot used for iOS support") - -# Get version of iOS SDK -execute_process(COMMAND xcodebuild -sdk ${CMAKE_OSX_SYSROOT} -version SDKVersion - OUTPUT_VARIABLE IOS_SDK_VERSION - RESULT_VARIABLE IOS_SDK_VERSION_RESULT - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) -if(${IOS_SDK_VERSION_RESULT}) - string(REGEX MATCH "(([0-9]+)\\.)+([0-9]+)" IOS_SDK_VERSION "${IOS_SDK_ROOT}") -endif() -if(NOT IOS_SDK_VERSION) - message(WARNING "Cannot get SDK's version.") - set(IOS_SDK_VERSION 1) -endif() -set(CMAKE_SYSTEM_VERSION ${IOS_SDK_VERSION}) - -# Find the C & C++ compilers for the specified SDK. -if(NOT CMAKE_C_COMPILER) - # Default to use clang - execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT} -find clang - OUTPUT_VARIABLE IOS_C_COMPILER - RESULT_VARIABLE IOS_C_COMPILER_RESULT - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - if(${IOS_C_COMPILER_RESULT}) - get_filename_component(IOS_C_COMPILER clang PROGRAM) - endif() -else(NOT CMAKE_C_COMPILER) - # User can set it in cmake command - get_filename_component(IOS_C_COMPILER ${CMAKE_C_COMPILER} PROGRAM) -endif(NOT CMAKE_C_COMPILER) -if(NOT EXISTS ${IOS_C_COMPILER}) - message(FATAL_ERROR "Cannot find C compiler: ${IOS_C_COMPILER}") -endif() - -if(NOT CMAKE_CXX_COMPILER) - # Default to use clang++ - execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT} -find clang++ - OUTPUT_VARIABLE IOS_CXX_COMPILER - RESULT_VARIABLE IOS_CXX_COMPILER_RESULT - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - if(${IOS_CXX_COMPILER_RESULT}) - get_filename_component(IOS_CXX_COMPILER clang++ PROGRAM) - endif() -else(NOT CMAKE_CXX_COMPILER) - # User can set it in cmake command - get_filename_component(IOS_CXX_COMPILER ${CMAKE_CXX_COMPILER} PROGRAM) -endif(NOT CMAKE_CXX_COMPILER) -if(NOT EXISTS ${IOS_CXX_COMPILER}) - message(FATAL_ERROR "Cannot find CXX compiler: ${IOS_CXX_COMPILER}") -endif() - -set(CMAKE_C_COMPILER ${IOS_C_COMPILER} CACHE PATH "C compiler" FORCE) -set(CMAKE_CXX_COMPILER ${IOS_CXX_COMPILER} CACHE PATH "CXX compiler" FORCE) - -set(CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG "-compatibility_version ") -set(CMAKE_C_OSX_CURRENT_VERSION_FLAG "-current_version ") -set(CMAKE_CXX_OSX_COMPATIBILITY_VERSION_FLAG "${CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG}") -set(CMAKE_CXX_OSX_CURRENT_VERSION_FLAG "${CMAKE_C_OSX_CURRENT_VERSION_FLAG}") - -# Set iOS specific C/C++ flags -if(IOS_PLATFORM STREQUAL "OS") - if(XCODE_VERSION VERSION_LESS "7.0") - set(XCODE_IOS_PLATFORM_VERSION_FLAGS "-mios-version-min=${IOS_DEPLOYMENT_TARGET}") - else() - # Xcode 7.0+ uses flags we can build directly from XCODE_IOS_PLATFORM. - set(XCODE_IOS_PLATFORM_VERSION_FLAGS "-m${XCODE_IOS_PLATFORM}-version-min=${IOS_DEPLOYMENT_TARGET}") - endif() -else() - set(XCODE_IOS_FLATFORM_VERSION_FLAGS "-mios-simulator-version-min=${IOS_DEPLOYMENT_TARGET}") -endif() - -if(IOS_ENABLE_BITCODE) - set(XCODE_IOS_BITCODE_FLAGS "${IOS_COMPILER_FLAGS} -fembed-bitcode") -else() - set(XCODE_IOS_BITCODE_FLAGS "") -endif() - -set(IOS_COMPILER_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} ${XCODE_IOS_BITCODE_FLAGS}") - -# Hidden visibilty is required for cxx on iOS -set(CMAKE_C_FLAGS "${IOS_COMPILER_FLAGS} ${CMAKE_C_FLAGS}" CACHE STRING "C flags") -set(CMAKE_CXX_FLAGS "${IOS_COMPILER_FLAGS} -fvisibility=hidden -fvisibility-inlines-hidden ${CMAKE_CXX_FLAGS}" CACHE STRING "CXX flags") - -set(IOS_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first") - -if(IOS_USE_VECLIB_FOR_BLAS) - # Find vecLib for iOS - set(VECLIB_SEARCH_DIRS - ${IOS_SDK_ROOT}/System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks - ${IOS_SDK_ROOT}/System/Library/Frameworks/Accelerate.framework/Frameworks - ) - find_path(VECLIB_INC_DIR vecLib.h PATHS ${VECLIB_SEARCH_DIRS}/vecLib.framework/Headers) - - include(FindPackageHandleStandardArgs) - find_package_handle_standard_args(vecLib DEFAULT_MSG VECLIB_INC_DIR) - - if(VECLIB_FOUND) - if(VECLIB_INC_DIR MATCHES "^/System/Library/Frameworks/vecLib.framework.*") - set(IOS_LINK_FLAGS ${IOS_LINK_FLAGS} -lcblas "-framework vecLib") - message(STATUS "Found standalone vecLib.framework") - else() - set(IOS_LINK_FLAGS ${IOS_LINK_FLAGS} -lcblas "-framework Accelerate") - message(STATUS "Found vecLib as part of Accelerate.framework") - endif() - - endif() -endif() - -set(CMAKE_C_LINK_FLAGS "${IOS_LINK_FLAGS} ${CMAKE_C_LINK_FLAGS}") -set(CMAKE_CXX_LINK_FLAGS "${IOS_LINK_FLAGS} ${CMAKE_CXX_LINK_FLAGS}") - -set(CMAKE_PLATFORM_HAS_INSTALLNAME 1) -if(NOT IOS_ENABLE_BITCODE) - set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-dynamiclib -headerpad_max_install_names") - set(CMAKE_SHARED_MODULE_CREATE_C_FLAGS "-bundle -headerpad_max_install_names") -else() - set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-dynamiclib") - set(CMAKE_SHARED_MODULE_CREATE_C_FLAGS "-bundle") -endif() -set(CMAKE_SHARED_MODULE_LOADER_C_FLAG "-Wl,-bundle_loader,") -set(CMAKE_SHARED_MODULE_LOADER_CXX_FLAG "-Wl,-bundle_loader,") -set(CMAKE_FIND_LIBRARY_SUFFIXES ".dylib" ".so" ".a") - -# hack: if a new cmake (which uses CMAKE_INSTALL_NAME_TOOL) runs on an old build tree -# (where install_name_tool was hardcoded) and where CMAKE_INSTALL_NAME_TOOL isn't in the cache -# and still cmake didn't fail in CMakeFindBinUtils.cmake (because it isn't rerun) -# hardcode CMAKE_INSTALL_NAME_TOOL here to install_name_tool, so it behaves as it did before, Alex -if(NOT DEFINED CMAKE_INSTALL_NAME_TOOL) - find_program(CMAKE_INSTALL_NAME_TOOL install_name_tool) -endif() - -# Set the find root to the iOS developer roots and to user defined paths -set(CMAKE_FIND_ROOT_PATH ${IOS_DEVELOPER_ROOT} ${IOS_SDK_ROOT} ${CMAKE_PREFIX_PATH} - CACHE string "iOS find search path root") - -# default to searching for frameworks first -set(CMAKE_FIND_FRAMEWORK FIRST) - -# set up the default search directories for frameworks -set(CMAKE_SYSTEM_FRAMEWORK_PATH - ${IOS_SDK_ROOT}/System/Library/Frameworks - ${IOS_SDK_ROOT}/System/Library/PrivateFrameworks - ${IOS_SDK_ROOT}/Developer/Library/Frameworks - ) - -# only search the iOS sdks, not the remainder of the host filesystem -set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) -set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) -set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) - -message(STATUS "iOS: Targeting iOS '${CMAKE_SYSTEM_VERSION}', " - "building for '${IOS_PLATFORM}' platform, with architecture '${CMAKE_OSX_ARCHITECTURES}'") -message(STATUS "System CMAKE_C_FLAGS: ${CMAKE_C_FLAGS}") -message(STATUS "System CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}") - -# Used in ExternalProject command -string(REPLACE ";" "\\$" EXTERNAL_IOS_ARCHITECTURES "${CMAKE_OSX_ARCHITECTURES}") -set(EXTERNAL_OPTIONAL_ARGS - -DCMAKE_OSX_SYSROOT=${CMAKE_OSX_SYSROOT} - -DCMAKE_OSX_ARCHITECTURES=${EXTERNAL_IOS_ARCHITECTURES}) - -# This little macro lets you set any XCode specific property -macro(set_xcode_property TARGET XCODE_PROPERTY XCODE_VALUE) - set_property (TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY} ${XCODE_VALUE}) -endmacro(set_xcode_property) - -# This macro lets you find executable programs on the host system -macro(find_host_package) - set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) - set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER) - set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER) - set(IOS FALSE) - - find_package(${ARGN}) - - set(IOS TRUE) - set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY) - set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) - set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) -endmacro(find_host_package) diff --git a/cmake/cross_compiling/raspberry_pi.cmake b/cmake/cross_compiling/raspberry_pi.cmake deleted file mode 100644 index 0425b2ae158b265fd6f8423b05190a8002f03f20..0000000000000000000000000000000000000000 --- a/cmake/cross_compiling/raspberry_pi.cmake +++ /dev/null @@ -1,84 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This is a toolchain file for cross-compiling for Raspberry Pi. -# The supported variables are listed belows: -# -# RPI_TOOLCHAIN -# RPI_ARM_NEON -# -# Also you can set CMAKE_C/CXX_COMPILER yourself, through cmake arguments. - -IF(NOT RPI) - return() -ENDIF() - -SET(CMAKE_SYSTEM_NAME Linux) -SET(CMAKE_SYSTEM_VERSION 1) -SET(CMAKE_SYSTEM_PROCESSOR arm) - -# check the exist of raspberry pi toolchain -IF(NOT DEFINED RPI_TOOLCHAIN) - SET(RPI_TOOLCHAIN $ENV{RPI_TOOLCHAIN} - CACHE PATH "Folder holds the toolchain of Raspberr Pi") -ENDIF() -IF(NOT RPI_TOOLCHAIN) - MESSAGE(WARNING "It is recommended to set RPI_TOOLCHAIN to use toolchain.\n" - "To cross-compile for Raspberry Pi, you need to download the tools using:\n" - " git clone https://github.com/raspberrypi/tools\n") -ENDIF() - -IF(NOT DEFINED RPI_ARM_NEON) - SET(RPI_ARM_NEON ON) -ENDIF() - -IF(RPI_TOOLCHAIN) - SET(RPI_TOOLCHAIN_ROOT ${RPI_TOOLCHAIN}) - IF(RPI_TOOLCHAIN_ROOT MATCHES "gcc-linaro-arm-linux-gnueabihf-raspbian(-x64)?$") - # gcc-linaro-arm-linux-gnueabihf-raspbian - # gcc-linaro-arm-linux-gnueabihf-raspbian-x64 - SET(RPI_TOOLCHAIN_NAME arm-linux-gnueabihf) - ENDIF() - SET(RPI_TOOLCHAIN_PREFIX "${RPI_TOOLCHAIN_ROOT}/bin/${RPI_TOOLCHAIN_NAME}-") -ENDIF() - -# C compiler -IF(NOT CMAKE_C_COMPILER) - SET(RPI_C_COMPILER "${RPI_TOOLCHAIN_PREFIX}gcc") -ELSE() - GET_FILENAME_COMPONENT(RPI_C_COMPILER ${CMAKE_C_COMPILER} PROGRAM) -ENDIF() -IF(NOT EXISTS ${RPI_C_COMPILER}) - MESSAGE(FATAL_ERROR "Cannot find C compiler: ${RPI_C_COMPILER}") -ENDIF() - -# CXX compiler -IF(NOT CMAKE_CXX_COMPILER) - SET(RPI_CXX_COMPILER "${RPI_TOOLCHAIN_PREFIX}g++") -ELSE() - GET_FILENAME_COMPONENT(RPI_CXX_COMPILER ${CMAKE_CXX_COMPILER} PROGRAM) -ENDIF() -IF(NOT EXISTS ${RPI_CXX_COMPILER}) - MESSAGE(FATAL_ERROR "Cannot find CXX compiler: ${RPI_CXX_COMPILER}") -ENDIF() - -SET(CMAKE_C_COMPILER ${RPI_C_COMPILER} CACHE PATH "C compiler" FORCE) -SET(CMAKE_CXX_COMPILER ${RPI_CXX_COMPILER} CACHE PATH "CXX compiler" FORCE) - -IF(RPI_ARM_NEON) - SET(RPI_C_FLAGS "${RPI_C_FLAGS} -mfpu=neon") -ENDIF() - -SET(CMAKE_C_FLAGS "${RPI_C_FLAGS} ${CMAKE_C_FLAGS}" CACHE STRING "C flags") -SET(CMAKE_CXX_FLAGS "${RPI_C_FLAGS} ${CMAKE_CXX_FLAGS}" CACHE STRING "CXX flags") diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake index 16432ce2b803f6d21bbf47200eda5404269b750f..ea46f6418edf1db70b2a308dd49cf2131cc89d3b 100644 --- a/cmake/cuda.cmake +++ b/cmake/cuda.cmake @@ -63,9 +63,7 @@ function(select_nvcc_arch_flags out_variable) # List of arch names set(archs_names "Kepler" "Maxwell" "Pascal" "Volta" "Turing" "All" "Manual") set(archs_name_default "All") - if(NOT CMAKE_CROSSCOMPILING) - list(APPEND archs_names "Auto") - endif() + list(APPEND archs_names "Auto") # set CUDA_ARCH_NAME strings (so it will be seen as dropbox in CMake-Gui) set(CUDA_ARCH_NAME ${archs_name_default} CACHE STRING "Select target NVIDIA GPU achitecture.") diff --git a/cmake/external/cares.cmake b/cmake/external/cares.cmake index a743b572a6c3f6f152d85909500e9dbb35c72a01..52507a6ae4aabe300cf8bf88d0946c45a2c0e79c 100644 --- a/cmake/external/cares.cmake +++ b/cmake/external/cares.cmake @@ -13,7 +13,7 @@ # limitations under the License. # -IF(MOBILE_INFERENCE OR NOT WITH_DISTRIBUTE) +IF(NOT WITH_DISTRIBUTE) return() ENDIF() diff --git a/cmake/external/gflags.cmake b/cmake/external/gflags.cmake index 95ca16f57f2704eaded85aa5f5c0546310fba3a7..f3ca74faea3629ddce053c49ef1e629f230fdc49 100644 --- a/cmake/external/gflags.cmake +++ b/cmake/external/gflags.cmake @@ -71,13 +71,3 @@ if (WIN32) set_property(GLOBAL PROPERTY OS_DEPENDENCY_MODULES shlwapi.lib) endif(HAVE_SHLWAPI) endif (WIN32) - -IF(WITH_C_API) - INSTALL(DIRECTORY ${GFLAGS_INCLUDE_DIR} DESTINATION third_party/gflags) - IF(ANDROID) - INSTALL(FILES ${GFLAGS_LIBRARIES} DESTINATION third_party/gflags/lib/${ANDROID_ABI}) - ELSE() - INSTALL(FILES ${GFLAGS_LIBRARIES} DESTINATION third_party/gflags/lib) - ENDIF() -ENDIF() - diff --git a/cmake/external/glog.cmake b/cmake/external/glog.cmake index 8cd0455c16bf84909b735102e7fb1089744c4245..7a6a4523886824a67c82f9ce978de025ddb9c2cd 100644 --- a/cmake/external/glog.cmake +++ b/cmake/external/glog.cmake @@ -26,14 +26,8 @@ ENDIF(WIN32) INCLUDE_DIRECTORIES(${GLOG_INCLUDE_DIR}) -IF(ANDROID AND ${CMAKE_SYSTEM_VERSION} VERSION_LESS "21") - # Using the unofficial glog for Android API < 21 - SET(GLOG_REPOSITORY "https://github.com/Xreki/glog.git") - SET(GLOG_TAG "8a547150548b284382ccb6582408e9140ff2bea8") -ELSE() - SET(GLOG_REPOSITORY "https://github.com/google/glog.git") - SET(GLOG_TAG "v0.3.5") -ENDIF() +SET(GLOG_REPOSITORY "https://github.com/google/glog.git") +SET(GLOG_TAG "v0.3.5") ExternalProject_Add( extern_glog @@ -78,12 +72,3 @@ ADD_DEPENDENCIES(glog extern_glog gflags) LINK_LIBRARIES(glog gflags) LIST(APPEND external_project_dependencies glog) - -IF(WITH_C_API) - INSTALL(DIRECTORY ${GLOG_INCLUDE_DIR} DESTINATION third_party/glog) - IF(ANDROID) - INSTALL(FILES ${GLOG_LIBRARIES} DESTINATION third_party/glog/lib/${ANDROID_ABI}) - ELSE() - INSTALL(FILES ${GLOG_LIBRARIES} DESTINATION third_party/glog/lib) - ENDIF() -ENDIF() diff --git a/cmake/external/grpc.cmake b/cmake/external/grpc.cmake index fd9835d023c67b76579913f2ec56c2444fea8c15..c5754da59bf2053931be413eb10c481adecbae6b 100644 --- a/cmake/external/grpc.cmake +++ b/cmake/external/grpc.cmake @@ -13,7 +13,7 @@ # limitations under the License. # -IF(MOBILE_INFERENCE OR NOT WITH_DISTRIBUTE) +IF(NOT WITH_DISTRIBUTE) return() ENDIF() diff --git a/cmake/external/gzstream.cmake b/cmake/external/gzstream.cmake index 3e36ef7ae205bbf85f345d55456309cc05a58fbd..af7a8bfda6f7db12049203e7c9d54885884d8cf2 100644 --- a/cmake/external/gzstream.cmake +++ b/cmake/external/gzstream.cmake @@ -13,10 +13,6 @@ # limitations under the License. # -IF(MOBILE_INFERENCE) - return() -ENDIF() - include (ExternalProject) # NOTE: gzstream is needed when linking with ctr reader. diff --git a/cmake/external/libxsmm.cmake b/cmake/external/libxsmm.cmake index 530f7ebe2813fb2f00c6b5b4d1f7b2f04fe650b0..39f49d210a20d49a06c120361ecf0a5d07d1af28 100644 --- a/cmake/external/libxsmm.cmake +++ b/cmake/external/libxsmm.cmake @@ -19,8 +19,8 @@ IF(NOT WITH_LIBXSMM) return() ENDIF() -IF(WIN32 OR APPLE OR ANDROID OR IOS) - MESSAGE(WARNING "Windows, Mac or Mobile are not supported with libxsmm in Paddle yet.") +IF(WIN32 OR APPLE) + MESSAGE(WARNING "Windows, Mac are not supported with libxsmm in Paddle yet.") SET(WITH_LIBXSMM OFF CACHE STRING "Disable LIBXSMM" FORCE) return() ENDIF() diff --git a/cmake/external/mkldnn.cmake b/cmake/external/mkldnn.cmake index 03f0dee85911bdaa0312b624114b7f4aef1fb723..6a7be73f09a278ab0fd29c7599a7781df3d29413 100644 --- a/cmake/external/mkldnn.cmake +++ b/cmake/external/mkldnn.cmake @@ -110,7 +110,3 @@ else(WIN32) endif(WIN32) ADD_CUSTOM_TARGET(mkldnn_shared_lib ALL DEPENDS ${MKLDNN_SHARED_LIB}) ADD_DEPENDENCIES(mkldnn_shared_lib ${MKLDNN_PROJECT} mkldnn) -IF(WITH_C_API) - INSTALL(FILES ${MKLDNN_SHARED_LIB} DESTINATION lib) -ENDIF() - diff --git a/cmake/external/mklml.cmake b/cmake/external/mklml.cmake index 43322a257a02c3fd756078db6fe20b582826066a..2caff27357687018f29c1efc55b7b82c9dc3ccf6 100644 --- a/cmake/external/mklml.cmake +++ b/cmake/external/mklml.cmake @@ -74,7 +74,3 @@ ADD_LIBRARY(mklml SHARED IMPORTED GLOBAL) SET_PROPERTY(TARGET mklml PROPERTY IMPORTED_LOCATION ${MKLML_LIB}) ADD_DEPENDENCIES(mklml ${MKLML_PROJECT}) LIST(APPEND external_project_dependencies mklml) - -IF(WITH_C_API) - INSTALL(FILES ${MKLML_LIB} ${MKLML_IOMP_LIB} DESTINATION lib) -ENDIF() diff --git a/cmake/external/nnpack.cmake b/cmake/external/nnpack.cmake deleted file mode 100644 index d42bcb0f329041462bd8b568052fbb8226d18e4e..0000000000000000000000000000000000000000 --- a/cmake/external/nnpack.cmake +++ /dev/null @@ -1,30 +0,0 @@ -# Find the NNPACK library -# NNPACK_ROOT - where to find NNPACK include and library. -# - -set(NNPACK_FOUND OFF) -set(NNPACK_ROOT $ENV{NNPACK_ROOT} CACHE PATH "Folder contains NNPACK") -find_path(NNPACK_INC_DIR nnpack.h PATHS ${NNPACK_ROOT}/include) -find_library(NNPACK_LIB NAMES nnpack PATHS ${NNPACK_ROOT}/lib) -find_library(PTHREADPOOL_LIB NAMES pthreadpool PATHS ${NNPACK_ROOT}/lib) -find_library(NNPACK_UKERNELS_LIB NAMES nnpack_ukernels PATHS ${NNPACK_ROOT}/lib) -find_library(NNPACK_CPUFEATURES_LIB NAMES cpufeatures PATHS ${NNPACK_ROOT}/lib) - -if(NNPACK_INC_DIR AND NNPACK_LIB AND PTHREADPOOL_LIB) - set(NNPACK_FOUND ON) - INCLUDE_DIRECTORIES(${NNPACK_INC_DIR}) - - set(NNPACK_LIBS) - list(APPEND NNPACK_LIBS ${NNPACK_LIB} ${PTHREADPOOL_LIB}) - if (NNPACK_UKERNELS_LIB) - list(APPEND NNPACK_LIBS ${NNPACK_UKERNELS_LIB}) - endif() - if (NNPACK_CPUFEATURES_LIB) - list(APPEND NNPACK_LIBS ${NNPACK_CPUFEATURES_LIB}) - endif() - if(NOT ANDROID) - list(APPEND NNPACK_LIBS "rt") - endif() -else() - message(FATAL_ERROR "Cannot find NNPACK in (${NNPACK_ROOT})") -endif() diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake index aeb976b840e999a20e8cab11939cbb1f49a27850..b347a592929836a473ac764c0af1153b07d54258 100644 --- a/cmake/external/openblas.cmake +++ b/cmake/external/openblas.cmake @@ -40,38 +40,12 @@ IF(NOT ${CBLAS_FOUND}) SET(OPENBLAS_CC "${CMAKE_C_COMPILER} -Wno-unused-but-set-variable -Wno-unused-variable") SET(OPENBLAS_COMMIT "v0.2.20") - IF(CMAKE_CROSSCOMPILING) - SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER}) - GET_FILENAME_COMPONENT(CROSS_SUFFIX ${CMAKE_C_COMPILER} DIRECTORY) - SET(CROSS_SUFFIX ${CROSS_SUFFIX}/) - IF(ANDROID) - IF(ANDROID_ABI MATCHES "^armeabi(-v7a)?$") - # use softfp - SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV7 ARM_SOFTFP_ABI=1 USE_THREAD=0) - ELSEIF(ANDROID_ABI STREQUAL "arm64-v8a") - SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV8 BINARY=64 USE_THREAD=0) - ENDIF() - ELSEIF(IOS) - IF(CMAKE_OSX_ARCHITECTURES MATCHES "arm64") - SET(OPENBLAS_CC "${OPENBLAS_CC} ${CMAKE_C_FLAGS} -isysroot ${CMAKE_OSX_SYSROOT}") - SET(OPENBLAS_CC "${OPENBLAS_CC} -arch arm64") - SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV8 BINARY=64 USE_THREAD=0 CROSS_SUFFIX=${CROSS_SUFFIX}) - ELSE() - MESSAGE(FATAL_ERROR "OpenBLAS only support arm64 architectures on iOS. " - "You can set IOS_USE_VECLIB_FOR_BLAS=ON or USE_EIGEN_FOR_BLAS=ON to use other blas library instead.") - ENDIF() - ELSEIF(RPI) - # use hardfp - SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV7 USE_THREAD=0) - ENDIF() - ELSE() - IF(APPLE) - SET(OPENBLAS_CC "${CMAKE_C_COMPILER} -isysroot ${CMAKE_OSX_SYSROOT}") - ENDIF() - SET(OPTIONAL_ARGS "") - IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^x86(_64)?$") - SET(OPTIONAL_ARGS DYNAMIC_ARCH=1 NUM_THREADS=64) - ENDIF() + IF(APPLE) + SET(OPENBLAS_CC "${CMAKE_C_COMPILER} -isysroot ${CMAKE_OSX_SYSROOT}") + ENDIF() + SET(OPTIONAL_ARGS "") + IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^x86(_64)?$") + SET(OPTIONAL_ARGS DYNAMIC_ARCH=1 NUM_THREADS=64) ENDIF() SET(COMMON_ARGS CC=${OPENBLAS_CC} NO_SHARED=1 NO_LAPACK=1 libs) @@ -92,25 +66,6 @@ IF(NOT ${CBLAS_FOUND}) ELSE() ENDIF(NOT WIN32) SET(CBLAS_PROVIDER openblas) - IF(WITH_C_API) - INSTALL(DIRECTORY ${CBLAS_INC_DIR} DESTINATION third_party/openblas) - # Because libopenblas.a is a symbolic link of another library, thus need to - # install the whole directory. - IF(ANDROID) - SET(TMP_INSTALL_DIR third_party/openblas/lib/${ANDROID_ABI}) - ELSE() - SET(TMP_INSTALL_DIR third_party/openblas/lib) - ENDIF() - INSTALL(CODE "execute_process( - COMMAND ${CMAKE_COMMAND} -E copy_directory ${CBLAS_INSTALL_DIR}/lib - ${CMAKE_INSTALL_PREFIX}/${TMP_INSTALL_DIR} - )" - ) - INSTALL(CODE "MESSAGE(STATUS \"Installing: \" - \"${CBLAS_INSTALL_DIR}/lib -> ${CMAKE_INSTALL_PREFIX}/${TMP_INSTALL_DIR}\" - )" - ) - ENDIF() ENDIF(NOT ${CBLAS_FOUND}) MESSAGE(STATUS "BLAS library: ${CBLAS_LIBRARIES}") diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index e1e619e572b05e83fbe751af2e5391aafc494416..e05b7694ddf1e1652b00f156cde1a2d433c9fc46 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -204,15 +204,6 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) SET(PROTOBUF_REPO "https://github.com/google/protobuf.git") SET(PROTOBUF_TAG "9f75c5aa851cd877fb0d93ccc31b8567a6706546") - IF(MOBILE_INFERENCE) - # The reason why the official version is not used is described in - # https://github.com/PaddlePaddle/Paddle/issues/6114 - SET(PROTOBUF_REPO "https://github.com/qingqing01/protobuf.git") - SET(PROTOBUF_TAG "v3.2.0") - IF(NOT BUILD_FOR_HOST) - SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} "-Dprotobuf_BUILD_PROTOC_BINARIES=OFF") - ENDIF() - ENDIF() ExternalProject_Add( ${TARGET_NAME} @@ -240,19 +231,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) ) ENDFUNCTION() -IF(NOT MOBILE_INFERENCE) - SET(PROTOBUF_VERSION 3.1) -ELSE() - SET(PROTOBUF_VERSION 3.2) -ENDIF() -IF(CMAKE_CROSSCOMPILING) - build_protobuf(protobuf_host TRUE) - LIST(APPEND external_project_dependencies protobuf_host) - - SET(PROTOBUF_PROTOC_EXECUTABLE ${protobuf_host_PROTOC_EXECUTABLE} - CACHE FILEPATH "protobuf executable." FORCE) -ENDIF() - +SET(PROTOBUF_VERSION 3.1) IF(NOT PROTOBUF_FOUND) build_protobuf(extern_protobuf FALSE) @@ -266,20 +245,7 @@ IF(NOT PROTOBUF_FOUND) SET(PROTOBUF_PROTOC_LIBRARY ${extern_protobuf_PROTOC_LIBRARY} CACHE FILEPATH "protoc library." FORCE) - IF(WITH_C_API) - INSTALL(DIRECTORY ${PROTOBUF_INCLUDE_DIR} DESTINATION third_party/protobuf) - IF(ANDROID) - INSTALL(FILES ${PROTOBUF_LITE_LIBRARY} DESTINATION third_party/protobuf/lib/${ANDROID_ABI}) - ELSE() - INSTALL(FILES ${PROTOBUF_LITE_LIBRARY} DESTINATION third_party/protobuf/lib) - ENDIF() - ENDIF() - - IF(CMAKE_CROSSCOMPILING) - PROMPT_PROTOBUF_LIB(protobuf_host extern_protobuf) - ELSE() - SET(PROTOBUF_PROTOC_EXECUTABLE ${extern_protobuf_PROTOC_EXECUTABLE} - CACHE FILEPATH "protobuf executable." FORCE) - PROMPT_PROTOBUF_LIB(extern_protobuf) - ENDIF() + SET(PROTOBUF_PROTOC_EXECUTABLE ${extern_protobuf_PROTOC_EXECUTABLE} + CACHE FILEPATH "protobuf executable." FORCE) + PROMPT_PROTOBUF_LIB(extern_protobuf) ENDIF(NOT PROTOBUF_FOUND) diff --git a/cmake/external/pslib.cmake b/cmake/external/pslib.cmake index 3b495d78e2c61f90418adbc5746792bc6e49d90b..b4ea268e5a48e29d00b0ec8b957b61a42553ec7e 100644 --- a/cmake/external/pslib.cmake +++ b/cmake/external/pslib.cmake @@ -71,7 +71,3 @@ ADD_LIBRARY(pslib SHARED IMPORTED GLOBAL) SET_PROPERTY(TARGET pslib PROPERTY IMPORTED_LOCATION ${PSLIB_LIB}) ADD_DEPENDENCIES(pslib ${PSLIB_PROJECT}) LIST(APPEND external_project_dependencies pslib) - -IF(WITH_C_API) - INSTALL(FILES ${PSLIB_LIB} ${PSLIB_IOMP_LIB} DESTINATION lib) -ENDIF() diff --git a/cmake/external/pslib_brpc.cmake b/cmake/external/pslib_brpc.cmake index 7ff5a8aca187240108164900638f5a376e9fbc93..8b43f2ef5c999fc351543ba958c7cc4b0856625d 100644 --- a/cmake/external/pslib_brpc.cmake +++ b/cmake/external/pslib_brpc.cmake @@ -71,7 +71,3 @@ ADD_LIBRARY(pslib_brpc SHARED IMPORTED GLOBAL) SET_PROPERTY(TARGET pslib_brpc PROPERTY IMPORTED_LOCATION ${PSLIB_BRPC_LIB}) ADD_DEPENDENCIES(pslib_brpc ${PSLIB_BRPC_PROJECT}) LIST(APPEND external_project_dependencies pslib_brpc) - -IF(WITH_C_API) - INSTALL(FILES ${PSLIB_BRPC_LIB} ${PSLIB_BRPC_IOMP_LIB} DESTINATION lib) -ENDIF() diff --git a/cmake/external/snappy.cmake b/cmake/external/snappy.cmake index f9d4cd97400a68e613e3dd5467191a0d42a9942e..27d075336d556528ffaf1929c34753494692f0a0 100644 --- a/cmake/external/snappy.cmake +++ b/cmake/external/snappy.cmake @@ -12,10 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -if(MOBILE_INFERENCE OR RPI) - return() -endif() - include (ExternalProject) # NOTE: snappy is needed when linking with recordio diff --git a/cmake/external/snappystream.cmake b/cmake/external/snappystream.cmake index 1ec79462c14e44f2d0abe6904383ebd91d94d35e..392f186b7ce3821f313ed6fc3dd5a97c2a7adebd 100644 --- a/cmake/external/snappystream.cmake +++ b/cmake/external/snappystream.cmake @@ -12,10 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -IF(MOBILE_INFERENCE OR RPI) - return() -ENDIF() - include (ExternalProject) set(SNAPPYSTREAM_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy_stream) diff --git a/cmake/external/swig.cmake b/cmake/external/swig.cmake deleted file mode 100644 index de07703695eb14e76eedd3758d55cb98edd1e02b..0000000000000000000000000000000000000000 --- a/cmake/external/swig.cmake +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -IF(NOT WITH_SWIG_PY) - return() -ENDIF() - -FIND_PACKAGE(SWIG) - -IF(NOT SWIG_FOUND) - # build swig as an external project - INCLUDE(ExternalProject) - - SET(SWIG_SOURCES_DIR ${THIRD_PARTY_PATH}/swig) - SET(SWIG_INSTALL_DIR ${THIRD_PARTY_PATH}/install/swig) - SET(SWIG_TARGET_VERSION "3.0.2") - SET(SWIG_DOWNLOAD_SRC_MD5 "62f9b0d010cef36a13a010dc530d0d41") - SET(SWIG_DOWNLOAD_WIN_MD5 "3f18de4fc09ab9abb0d3be37c11fbc8f") - - IF(WIN32) - # swig.exe available as pre-built binary on Windows: - ExternalProject_Add(swig - URL http://prdownloads.sourceforge.net/swig/swigwin-${SWIG_TARGET_VERSION}.zip - URL_MD5 ${SWIG_DOWNLOAD_WIN_MD5} - SOURCE_DIR ${SWIG_SOURCES_DIR} - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND "" - UPDATE_COMMAND "" - ) - SET(SWIG_DIR ${SWIG_SOURCES_DIR} CACHE FILEPATH "SWIG Directory" FORCE) - SET(SWIG_EXECUTABLE ${SWIG_SOURCES_DIR}/swig.exe CACHE FILEPATH "SWIG Executable" FORCE) - ELSE(WIN32) - # swig uses bison find it by cmake and pass it down - FIND_PACKAGE(BISON) - - # From SWIG configure - ExternalProject_Add(swig - GIT_REPOSITORY https://github.com/swig/swig.git - GIT_TAG rel-3.0.10 - PREFIX ${SWIG_SOURCES_DIR} - CONFIGURE_COMMAND cd && ./autogen.sh && ./configure - --prefix=${SWIG_INSTALL_DIR} --without-pcre - BUILD_COMMAND cd && make - INSTALL_COMMAND cd && make install - UPDATE_COMMAND "" - ) - - SET(SWIG_DIR ${SWIG_INSTALL_DIR}/share/swig/${SWIG_TARGET_VERSION}) - SET(SWIG_EXECUTABLE ${SWIG_INSTALL_DIR}/bin/swig) - ENDIF(WIN32) - - LIST(APPEND external_project_dependencies swig) -ENDIF(NOT SWIG_FOUND) diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake index 7b937c93febdfa1d7d5f4c73fc2a5830322688e5..7a25aaf15f2c7f46d99394d82d69bc24e4f5cb2c 100644 --- a/cmake/external/warpctc.cmake +++ b/cmake/external/warpctc.cmake @@ -12,10 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -IF(MOBILE_INFERENCE) - return() -ENDIF() - INCLUDE(ExternalProject) SET(WARPCTC_SOURCES_DIR ${THIRD_PARTY_PATH}/warpctc) diff --git a/cmake/external/xxhash.cmake b/cmake/external/xxhash.cmake index c3e1212d8f8358e0148b5e00223414c9696686ee..a0f300c2e8bab9e7402f869eed1b4c2d1c579aab 100644 --- a/cmake/external/xxhash.cmake +++ b/cmake/external/xxhash.cmake @@ -73,12 +73,3 @@ include_directories(${XXHASH_INCLUDE_DIR}) add_dependencies(xxhash extern_xxhash) LIST(APPEND external_project_dependencies xxhash) - -IF(WITH_C_API) - INSTALL(DIRECTORY ${XXHASH_INCLUDE_DIR} DESTINATION third_party/xxhash) - IF(ANDROID) - INSTALL(FILES ${XXHASH_LIBRARIES} DESTINATION third_party/xxhash/lib/${ANDROID_ABI}) - ELSE() - INSTALL(FILES ${XXHASH_LIBRARIES} DESTINATION third_party/xxhash/lib) - ENDIF() -ENDIF() diff --git a/cmake/external/zlib.cmake b/cmake/external/zlib.cmake index d35073753725cd5772de3fc7a23af5ba69a65558..6c8d79c25e6a2655711fe4450e65600c9a584015 100644 --- a/cmake/external/zlib.cmake +++ b/cmake/external/zlib.cmake @@ -59,12 +59,3 @@ SET_PROPERTY(TARGET zlib PROPERTY IMPORTED_LOCATION ${ZLIB_LIBRARIES}) ADD_DEPENDENCIES(zlib extern_zlib) LIST(APPEND external_project_dependencies zlib) - -IF(WITH_C_API) - INSTALL(DIRECTORY ${ZLIB_INCLUDE_DIR} DESTINATION third_party/zlib) - IF(ANDROID) - INSTALL(FILES ${ZLIB_LIBRARIES} DESTINATION third_party/zlib/lib/${ANDROID_ABI}) - ELSE() - INSTALL(FILES ${ZLIB_LIBRARIES} DESTINATION third_party/zlib/lib) - ENDIF() -ENDIF() diff --git a/cmake/flags.cmake b/cmake/flags.cmake index c4472040cef870454c072c1b84a04e1ac592b476..9e6c47f016fe6dfd809c5b2bc88ff59d0a6b2b84 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -156,10 +156,8 @@ set(GPU_COMMON_FLAGS endif(NOT WIN32) if (APPLE) - if(NOT CMAKE_CROSSCOMPILING) - # On Mac OS X build fat binaries with x86_64 architectures by default. - set (CMAKE_OSX_ARCHITECTURES "x86_64" CACHE STRING "Build architectures for OSX" FORCE) - endif() + # On Mac OS X build fat binaries with x86_64 architectures by default. + set (CMAKE_OSX_ARCHITECTURES "x86_64" CACHE STRING "Build architectures for OSX" FORCE) # On Mac OS X register class specifier is deprecated and will cause warning error on latest clang 10.0 set (COMMON_FLAGS -Wno-deprecated-register) endif(APPLE) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 3f1be11d85555671eebb1c2ba3a5642d64d7f2bf..1f4dbe0b49825aef9a236f7ae72c6bea168b2ec5 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -90,11 +90,11 @@ # including binary directory for generated headers. include_directories(${CMAKE_CURRENT_BINARY_DIR}) -if(NOT APPLE AND NOT ANDROID) +if(NOT APPLE) find_package(Threads REQUIRED) link_libraries(${CMAKE_THREAD_LIBS_INIT}) set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl -lrt") -endif(NOT APPLE AND NOT ANDROID) +endif(NOT APPLE) set_property(GLOBAL PROPERTY FLUID_MODULES "") # find all fluid modules is used for paddle fluid static library @@ -655,12 +655,6 @@ function(paddle_protobuf_generate_cpp SRCS HDRS) set(${SRCS}) set(${HDRS}) - if (MOBILE_INFERENCE) - set(EXTRA_FLAG "lite:") - else() - set(EXTRA_FLAG "") - endif() - foreach(FIL ${ARGN}) get_filename_component(ABS_FIL ${FIL} ABSOLUTE) get_filename_component(FIL_WE ${FIL} NAME_WE) @@ -677,7 +671,7 @@ function(paddle_protobuf_generate_cpp SRCS HDRS) COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_CURRENT_BINARY_DIR}" COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} -I${CMAKE_CURRENT_SOURCE_DIR} - --cpp_out "${EXTRA_FLAG}${CMAKE_CURRENT_BINARY_DIR}" ${ABS_FIL} + --cpp_out "${CMAKE_CURRENT_BINARY_DIR}" ${ABS_FIL} DEPENDS ${ABS_FIL} protoc COMMENT "Running C++ protocol buffer compiler on ${FIL}" VERBATIM ) diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake index 3e11d332ff71098adf65e487a39351ae57427e9e..a7dce4dfdb530b13bea9df128694f0946714ccff 100644 --- a/cmake/inference_lib.cmake +++ b/cmake/inference_lib.cmake @@ -149,25 +149,23 @@ if (WITH_NGRAPH) ) endif () -if (NOT MOBILE_INFERENCE AND NOT RPI) - set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/snappy") - copy(snappy_lib - SRCS ${SNAPPY_INCLUDE_DIR} ${SNAPPY_LIBRARIES} - DSTS ${dst_dir} ${dst_dir}/lib - DEPS snappy) +set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/snappy") +copy(snappy_lib + SRCS ${SNAPPY_INCLUDE_DIR} ${SNAPPY_LIBRARIES} + DSTS ${dst_dir} ${dst_dir}/lib + DEPS snappy) - set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/snappystream") - copy(snappystream_lib - SRCS ${SNAPPYSTREAM_INCLUDE_DIR} ${SNAPPYSTREAM_LIBRARIES} - DSTS ${dst_dir} ${dst_dir}/lib - DEPS snappystream) +set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/snappystream") +copy(snappystream_lib + SRCS ${SNAPPYSTREAM_INCLUDE_DIR} ${SNAPPYSTREAM_LIBRARIES} + DSTS ${dst_dir} ${dst_dir}/lib + DEPS snappystream) - set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/zlib") - copy(zlib_lib - SRCS ${ZLIB_INCLUDE_DIR} ${ZLIB_LIBRARIES} - DSTS ${dst_dir} ${dst_dir}/lib - DEPS zlib) -endif () +set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/zlib") +copy(zlib_lib + SRCS ${ZLIB_INCLUDE_DIR} ${ZLIB_LIBRARIES} + DSTS ${dst_dir} ${dst_dir}/lib + DEPS zlib) # paddle fluid module set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid") diff --git a/cmake/system.cmake b/cmake/system.cmake index c91ef91127511da9ac8b9e11349f4a23aaedd613..65db05bebe957d740e391847d980e211b0e9e750 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -74,21 +74,6 @@ MARK_AS_ADVANCED(HOST_SYSTEM CPU_CORES) MESSAGE(STATUS "Found Paddle host system: ${HOST_SYSTEM}, version: ${HOST_SYSTEM_VERSION}") MESSAGE(STATUS "Found Paddle host system's CPU: ${CPU_CORES} cores") -# configuration for cross-compiling -IF(DEFINED CMAKE_SYSTEM_NAME) - INCLUDE(cross_compiling/host) - IF(${CMAKE_SYSTEM_NAME} STREQUAL "Android") - SET(ANDROID TRUE) - INCLUDE(cross_compiling/android) - ELSEIF(${CMAKE_SYSTEM_NAME} STREQUAL "RPi") - SET(RPI TRUE) - INCLUDE(cross_compiling/raspberry_pi) - ELSEIF(${CMAKE_SYSTEM_NAME} STREQUAL "iOS") - SET(IOS TRUE) - INCLUDE(cross_compiling/ios) - ENDIF() -ENDIF() - # external dependencies log output SET(EXTERNAL_PROJECT_LOG_ARGS LOG_DOWNLOAD 0 # Wrap download in script to log output diff --git a/cmake/util.cmake b/cmake/util.cmake index 0dc33ce385175d1e2dc454d41db467d4b9d9cf9a..02667dbce69ed159193ff88f38069dd08cdcf678 100644 --- a/cmake/util.cmake +++ b/cmake/util.cmake @@ -53,118 +53,3 @@ function(target_circle_link_libraries TARGET_NAME) "-Wl,--end-group") endif() endfunction() - -# compile_cu_as_cpp -# Make a cu file compiled as C++ -# Arguments: Source files -macro(compile_cu_as_cpp) - foreach(s ${ARGN}) - set_source_files_properties(${s} PROPERTIES LANGUAGE CXX) - set_source_files_properties(${s} PROPERTIES COMPILE_FLAGS "-x c++") - endforeach() -endmacro() - -# link_paddle_exe -# add paddle library for a paddle executable, such as trainer, pserver. -# -# It will handle WITH_PYTHON etc. -function(link_paddle_exe TARGET_NAME) - if(WITH_RDMA) - generate_rdma_links() - endif() - - if(MOBILE_INFERENCE) - target_circle_link_libraries(${TARGET_NAME} - ARCHIVE_START - paddle_gserver - paddle_function - ARCHIVE_END - paddle_math - paddle_utils - paddle_parameter - paddle_proto - paddle_cuda - ${EXTERNAL_LIBS} - ${CMAKE_THREAD_LIBS_INIT} - ${CMAKE_DL_LIBS} - ${RDMA_LD_FLAGS} - ${RDMA_LIBS}) - else() - target_circle_link_libraries(${TARGET_NAME} - ARCHIVE_START - paddle_gserver - paddle_function - ARCHIVE_END - paddle_pserver - paddle_trainer_lib - paddle_network - paddle_math - paddle_utils - paddle_parameter - paddle_proto - paddle_cuda - paddle_optimizer - ${EXTERNAL_LIBS} - ${CMAKE_THREAD_LIBS_INIT} - ${CMAKE_DL_LIBS} - ${RDMA_LD_FLAGS} - ${RDMA_LIBS}) - endif() - - if(ANDROID) - target_link_libraries(${TARGET_NAME} log) - endif(ANDROID) - - if(WITH_MKLML AND MKLML_LIB_DIR AND MKLML_IOMP_LIB) - target_link_libraries(${TARGET_NAME} "-L${MKLML_LIB_DIR} -liomp5 -Wl,--as-needed") - endif() - - add_dependencies(${TARGET_NAME} ${external_project_dependencies}) -endfunction() - -# link_paddle_test -# Link a paddle unittest for target -# TARGET_NAME: the unittest target name -# Rest Arguemnts: not used. -function(link_paddle_test TARGET_NAME) - link_paddle_exe(${TARGET_NAME}) - target_link_libraries(${TARGET_NAME} - paddle_test_main - paddle_test_util - ${GTEST_LIBRARIES}) -endfunction() - -# add_unittest_without_exec -# -# create a paddle unittest. not specifically define how to run this unittest. -# TARGET_NAME: the unittest target name, same as executable file name -# Rest Arguments: the source files to compile this unittest. -macro(add_unittest_without_exec TARGET_NAME) - add_executable(${TARGET_NAME} ${ARGN}) - link_paddle_test(${TARGET_NAME}) -endmacro() - -# add_unittest -# create a paddle unittest and just to execute this binary to make unittest. -# -# TARGET_NAME: the unittest target name, same as executable file name -# Rest Arguments: the source files to compile this unittest. -macro(add_unittest TARGET_NAME) - add_unittest_without_exec(${TARGET_NAME} ${ARGN}) - add_test(${TARGET_NAME} ${TARGET_NAME}) -endmacro() - -# add_simple_unittest -# create a paddle unittest with file name. It just compile ${TARGET_NAME}.cpp to -# ${TARGET_NAME} and then execute it. -macro(add_simple_unittest TARGET_NAME) - add_unittest(${TARGET_NAME} ${TARGET_NAME}.cpp) -endmacro() - -# Creates C resources file from files in given resource file -function(create_resources res_file output_file) - add_custom_command( - OUTPUT ${output_file} - COMMAND python ARGS ${PADDLE_SOURCE_DIR}/cmake/make_resource.py ${res_file} ${output_file} - DEPENDS ${res_file} ${PADDLE_SOURCE_DIR}/cmake/make_resource.py) -endfunction() diff --git a/paddle/fluid/framework/details/build_strategy.cc b/paddle/fluid/framework/details/build_strategy.cc index 756470c5b0bfb2ce2a90531442211bcb19172dae..ce5731a1f414e8ef6d8af22a3bb17109e82beb87 100644 --- a/paddle/fluid/framework/details/build_strategy.cc +++ b/paddle/fluid/framework/details/build_strategy.cc @@ -24,6 +24,7 @@ limitations under the License. */ #include "paddle/fluid/framework/details/sequential_execution_pass.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_helper.h" +#include "paddle/fluid/framework/ir/graph_to_program_pass.h" #include "paddle/fluid/framework/ir/graph_viz_pass.h" namespace paddle { @@ -243,3 +244,4 @@ USE_PASS(sequential_execution_pass); USE_PASS(all_reduce_deps_pass); USE_PASS(modify_op_lock_and_record_event_pass); USE_PASS(lock_free_optimize_pass); +USE_PASS(graph_to_program_pass); diff --git a/paddle/fluid/framework/ir/pass.cc b/paddle/fluid/framework/ir/pass.cc index 6cf405efe63d2bc284c4650771a747b27bb4a9f6..33ccee6aa0a94b8fd8308214d6144ae832d40bab 100644 --- a/paddle/fluid/framework/ir/pass.cc +++ b/paddle/fluid/framework/ir/pass.cc @@ -28,10 +28,14 @@ std::unique_ptr Pass::Apply(std::unique_ptr graph) const { PADDLE_ENFORCE(graph->Has(attr), "Required graph atrribute %s not set.", attr); } + auto* native_graph = graph.get(); auto applied_graph = ApplyImpl(std::move(graph)); // TODO(panyx0718): Add more verifications. PADDLE_ENFORCE(!HasCircle(*applied_graph), "Illegal Pass. Generated graph shouldn't has cycle."); + PADDLE_ENFORCE(applied_graph.get() == native_graph, + "Pass::Apply() cannot delete the passed graph and shouldn't " + "return a new graph.(For the need of pybind11)"); applied_ = true; return applied_graph; } diff --git a/paddle/fluid/operators/conv_fusion_op.cu.cc b/paddle/fluid/operators/conv_fusion_op.cu.cc index d8b997cca613f660046106512fc03bf55f9b992d..f97ebecfdd90beade3bef824c04ad7b2763eb036 100644 --- a/paddle/fluid/operators/conv_fusion_op.cu.cc +++ b/paddle/fluid/operators/conv_fusion_op.cu.cc @@ -104,7 +104,9 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel { // ------------------- cudnn conv algorithm --------------------- cudnnConvolutionFwdAlgo_t algo; auto handle = dev_ctx.cudnn_handle(); - auto workspace_handle = dev_ctx.cudnn_workspace_handle(); + + Tensor cudnn_workspace; + void* cudnn_workspace_ptr = nullptr; CUDNN_ENFORCE(platform::dynload::cudnnSetConvolutionMathType( cudnn_conv_desc, CUDNN_DEFAULT_MATH)); @@ -118,19 +120,24 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel { workspace_size_limit, &algo)); VLOG(3) << "cuDNN forward algo " << algo; } else { + cudnn_workspace = + ctx.AllocateTmpTensor( + framework::make_ddim( + {static_cast(workspace_size_limit)}), + dev_ctx); + cudnn_workspace_ptr = static_cast(cudnn_workspace.data()); + auto search_func = [&]() { int returned_algo_count; std::array fwd_perf_stat; - auto cudnn_find_func = [&](void* cudnn_workspace) { - CUDNN_ENFORCE( - platform::dynload::cudnnFindConvolutionForwardAlgorithmEx( - handle, cudnn_input_desc, input_data, cudnn_filter_desc, - filter_data, cudnn_conv_desc, cudnn_output_desc, output_data, - kNUM_CUDNN_FWD_ALGS, &returned_algo_count, - fwd_perf_stat.data(), cudnn_workspace, workspace_size_limit)); - }; - workspace_handle.RunFunc(cudnn_find_func, workspace_size_limit); + + CUDNN_ENFORCE(platform::dynload::cudnnFindConvolutionForwardAlgorithmEx( + handle, cudnn_input_desc, input_data, cudnn_filter_desc, + filter_data, cudnn_conv_desc, cudnn_output_desc, output_data, + kNUM_CUDNN_FWD_ALGS, &returned_algo_count, fwd_perf_stat.data(), + cudnn_workspace_ptr, workspace_size_limit)); + VLOG(3) << "Perf result: (algo: stat, time, memory)"; for (int i = 0; i < returned_algo_count; ++i) { const auto& stat = fwd_perf_stat[i]; @@ -181,6 +188,15 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel { PADDLE_ENFORCE_LE(workspace_size_in_bytes, workspace_size_limit, "workspace_size to be allocated exceeds the limit"); + if (!cudnn_workspace_ptr) { + cudnn_workspace = + ctx.AllocateTmpTensor( + framework::make_ddim( + {static_cast(workspace_size_in_bytes)}), + dev_ctx); + cudnn_workspace_ptr = static_cast(cudnn_workspace.data()); + } + if ((activation == "identity") && (!residual)) { // Only the CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM algo is // enabled with CUDNN_ACTIVATION_IDENTITY in cuDNN lib. @@ -188,13 +204,12 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel { // cudnnConvolutionForward and cudnnAddTensor // ------------- cudnn conv forward and bias add --------------------- ScalingParamType alpha = 1.0f, beta = 0.0f; - auto cudnn_func = [&](void* cudnn_workspace) { - CUDNN_ENFORCE(platform::dynload::cudnnConvolutionForward( - handle, &alpha, cudnn_input_desc, input_data, cudnn_filter_desc, - filter_data, cudnn_conv_desc, algo, cudnn_workspace, - workspace_size_in_bytes, &beta, cudnn_output_desc, output_data)); - }; - workspace_handle.RunFunc(cudnn_func, workspace_size_in_bytes); + + CUDNN_ENFORCE(platform::dynload::cudnnConvolutionForward( + handle, &alpha, cudnn_input_desc, input_data, cudnn_filter_desc, + filter_data, cudnn_conv_desc, algo, cudnn_workspace_ptr, + workspace_size_in_bytes, &beta, cudnn_output_desc, output_data)); + CUDNN_ENFORCE(platform::dynload::cudnnAddTensor( handle, &alpha, cudnn_bias_desc, bias_data, &alpha, cudnn_output_desc, output_data)); @@ -205,15 +220,13 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel { // ------------------- cudnn conv+bias+act forward -------------------- ScalingParamType alpha1 = 1.0f; ScalingParamType alpha2 = residual ? 1.0f : 0.0f; - auto cudnn_func = [&](void* cudnn_workspace) { - CUDNN_ENFORCE(platform::dynload::cudnnConvolutionBiasActivationForward( - handle, &alpha1, cudnn_input_desc, input_data, cudnn_filter_desc, - filter_data, cudnn_conv_desc, algo, cudnn_workspace, - workspace_size_in_bytes, &alpha2, cudnn_output_desc, residual_data, - cudnn_bias_desc, bias_data, cudnn_act_desc, cudnn_output_desc, - output_data)); - }; - workspace_handle.RunFunc(cudnn_func, workspace_size_in_bytes); + + CUDNN_ENFORCE(platform::dynload::cudnnConvolutionBiasActivationForward( + handle, &alpha1, cudnn_input_desc, input_data, cudnn_filter_desc, + filter_data, cudnn_conv_desc, algo, cudnn_workspace_ptr, + workspace_size_in_bytes, &alpha2, cudnn_output_desc, residual_data, + cudnn_bias_desc, bias_data, cudnn_act_desc, cudnn_output_desc, + output_data)); } std::vector channels = ctx.Attr>("split_channels"); if (channels.size()) { diff --git a/paddle/fluid/operators/conv_transpose_cudnn_op.cu.cc b/paddle/fluid/operators/conv_transpose_cudnn_op.cu.cc index f44094ca6b7b7f23f2e7593ad79e4e2a6f0d3070..016cf8448c5e07fdedab8c5e4a7d0ae9e2ded1ee 100644 --- a/paddle/fluid/operators/conv_transpose_cudnn_op.cu.cc +++ b/paddle/fluid/operators/conv_transpose_cudnn_op.cu.cc @@ -104,16 +104,18 @@ class CUDNNConvTransposeOpKernel : public framework::OpKernel { int output_offset = output->numel() / output->dims()[0] / groups; int filter_offset = filter->numel() / groups; T alpha = 1.0f, beta = 0.0f; - auto workspace_handle = dev_ctx.cudnn_workspace_handle(); + + auto temp_allocation = + platform::DeviceTemporaryAllocator::Instance().Get(dev_ctx).Allocate( + workspace_size_in_bytes); + void* cudnn_workspace = temp_allocation->ptr(); + for (int g = 0; g < groups; g++) { - auto cudnn_func = [&](void* cudnn_workspace) { - CUDNN_ENFORCE(platform::dynload::cudnnConvolutionBackwardData( - handle, &alpha, cudnn_filter_desc, filter_data + filter_offset * g, - cudnn_input_desc, input_data + input_offset * g, cudnn_conv_desc, - algo, cudnn_workspace, workspace_size_in_bytes, &beta, - cudnn_output_desc, output_data + output_offset * g)); - }; - workspace_handle.RunFunc(cudnn_func, workspace_size_in_bytes); + CUDNN_ENFORCE(platform::dynload::cudnnConvolutionBackwardData( + handle, &alpha, cudnn_filter_desc, filter_data + filter_offset * g, + cudnn_input_desc, input_data + input_offset * g, cudnn_conv_desc, + algo, cudnn_workspace, workspace_size_in_bytes, &beta, + cudnn_output_desc, output_data + output_offset * g)); } } }; @@ -209,20 +211,22 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel { output_grad->numel() / output_grad->dims()[0] / groups; int filter_offset = filter->numel() / groups; T alpha = 1.0f, beta = 0.0f; - auto workspace_handle = dev_ctx.cudnn_workspace_handle(); + + auto temp_allocation = + platform::DeviceTemporaryAllocator::Instance().Get(dev_ctx).Allocate( + workspace_size_in_bytes); + void* cudnn_workspace = temp_allocation->ptr(); + if (input_grad) { T* input_grad_data = input_grad->mutable_data(ctx.GetPlace()); // Because beta is zero, it is unnecessary to reset input_grad. for (int g = 0; g < groups; g++) { - auto cudnn_func = [&](void* cudnn_workspace) { - CUDNN_ENFORCE(platform::dynload::cudnnConvolutionForward( - handle, &alpha, cudnn_output_desc, - output_grad_data + output_grad_offset * g, cudnn_filter_desc, - filter_data + filter_offset * g, cudnn_conv_desc, data_algo, - cudnn_workspace, workspace_size_in_bytes, &beta, cudnn_input_desc, - input_grad_data + input_offset * g)); - }; - workspace_handle.RunFunc(cudnn_func, workspace_size_in_bytes); + CUDNN_ENFORCE(platform::dynload::cudnnConvolutionForward( + handle, &alpha, cudnn_output_desc, + output_grad_data + output_grad_offset * g, cudnn_filter_desc, + filter_data + filter_offset * g, cudnn_conv_desc, data_algo, + cudnn_workspace, workspace_size_in_bytes, &beta, cudnn_input_desc, + input_grad_data + input_offset * g)); } } @@ -232,15 +236,12 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel { // Because beta is zero, it is unnecessary to reset filter_grad. // Gradient with respect to the filter for (int g = 0; g < groups; g++) { - auto cudnn_func = [&](void* cudnn_workspace) { - CUDNN_ENFORCE(platform::dynload::cudnnConvolutionBackwardFilter( - handle, &alpha, cudnn_output_desc, - output_grad_data + output_grad_offset * g, cudnn_input_desc, - input_data + input_offset * g, cudnn_conv_desc, filter_algo, - cudnn_workspace, workspace_size_in_bytes, &beta, - cudnn_filter_desc, filter_grad_data + filter_offset * g)); - }; - workspace_handle.RunFunc(cudnn_func, workspace_size_in_bytes); + CUDNN_ENFORCE(platform::dynload::cudnnConvolutionBackwardFilter( + handle, &alpha, cudnn_output_desc, + output_grad_data + output_grad_offset * g, cudnn_input_desc, + input_data + input_offset * g, cudnn_conv_desc, filter_algo, + cudnn_workspace, workspace_size_in_bytes, &beta, cudnn_filter_desc, + filter_grad_data + filter_offset * g)); } } } diff --git a/paddle/fluid/operators/distributed/collective_server_test.cc b/paddle/fluid/operators/distributed/collective_server_test.cc index 5009058422b81d3187f7792bf7bf56db1d03f4d6..90f2f9fd65bf1b8c1edda6a2ebe0ce5288ddcb5d 100644 --- a/paddle/fluid/operators/distributed/collective_server_test.cc +++ b/paddle/fluid/operators/distributed/collective_server_test.cc @@ -20,6 +20,7 @@ limitations under the License. */ #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/operators/distributed/collective_client.h" #include "paddle/fluid/operators/distributed/collective_server.h" @@ -57,7 +58,7 @@ std::unique_ptr GenerateVars(platform::Place place) { auto* tensor = slr->mutable_value(); auto* rows = slr->mutable_rows(); - tensor->Resize(framework::make_ddim({20000, 1024})); + tensor->Resize(framework::make_ddim({3, 1024})); tensor->mutable_data(place); paddle::operators::math::set_constant(ctx, tensor, 32.7); @@ -80,6 +81,20 @@ void Gather(const std::vector& vars, std::vector dst; client->Gather(vars, &dst, *dev_ctx, scope); std::cout << "dst:" << distributed::GetSelectedRowsInfo(*dst[0]); + dev_ctx->Wait(); + + ASSERT_EQ(dst[0]->value().dims(), framework::make_ddim({3, 1024})); + ASSERT_EQ(dst[0]->height(), 20000); + ASSERT_EQ(dst[0]->rows().size(), static_cast(3)); + for (int i = 0; i < 3; i++) { + ASSERT_EQ(dst[0]->rows()[i], i); + } + + std::vector vec; + TensorToVector(dst[0]->value(), *dev_ctx, &vec); + for (size_t i = 0; i < 3 * 1024; i++) { + ASSERT_FLOAT_EQ(vec[i], 32.7); + } } TEST(CollectiveServer, GPU) { diff --git a/paddle/fluid/operators/fused/fusion_conv_inception_op.cu b/paddle/fluid/operators/fused/fusion_conv_inception_op.cu index 6e13887866485bd114ebf12f4bdfa8d60fca6d01..c72a966c575d4a63471905b82643e96454f08187 100644 --- a/paddle/fluid/operators/fused/fusion_conv_inception_op.cu +++ b/paddle/fluid/operators/fused/fusion_conv_inception_op.cu @@ -216,18 +216,19 @@ class CUDNNConvInceptionFusionOpKernel : public framework::OpKernel { out_datas.push_back( static_cast(output_data + (oc0 + oc1 + oc2) * h * w)); + auto temp_allocation = + platform::DeviceTemporaryAllocator::Instance().Get(dev_ctx).Allocate( + workspace_size_in_bytes); + void* cudnn_workspace = temp_allocation->ptr(); + for (int i = 0; i < 4; ++i) { - auto func = [&](void* cudnn_workspace) { - CUDNN_ENFORCE(platform::dynload::cudnnConvolutionBiasActivationForward( - handle, &alpha, in_desc[i], in_datas[i], filter_desc[i], - static_cast(filters[i]->data()), conv_desc[i], - algo[i], cudnn_workspace, workspace_size_in_bytes, &beta, - out_desc[i], out_datas[i], bias_desc[i], - static_cast(bias[i]->data()), cudnn_act_desc, - out_desc[i], out_datas[i])); - }; - auto workspace_handle = dev_ctx.cudnn_workspace_handle(); - workspace_handle.RunFunc(func, workspace_size_in_bytes); + CUDNN_ENFORCE(platform::dynload::cudnnConvolutionBiasActivationForward( + handle, &alpha, in_desc[i], in_datas[i], filter_desc[i], + static_cast(filters[i]->data()), conv_desc[i], + algo[i], cudnn_workspace, workspace_size_in_bytes, &beta, out_desc[i], + out_datas[i], bias_desc[i], + static_cast(bias[i]->data()), cudnn_act_desc, + out_desc[i], out_datas[i])); } cudnnTensorDescriptor_t x_desc; diff --git a/paddle/fluid/operators/warpctc_cudnn_op.cu.cc b/paddle/fluid/operators/warpctc_cudnn_op.cu.cc index a764d59410c90535dbda0b3f11e89ae9bf578c04..5e16a209e712a143e1083e171f88002817aef838 100644 --- a/paddle/fluid/operators/warpctc_cudnn_op.cu.cc +++ b/paddle/fluid/operators/warpctc_cudnn_op.cu.cc @@ -144,17 +144,19 @@ class CudnnCTCKernel : public framework::OpKernel { CUDNN_CTC_LOSS_ALGO_DETERMINISTIC, cu_ctcloss_desc, &workspace_size)); T* loss_data = loss->mutable_data(loss_dims, ctx.GetPlace()); - - auto workspace_handle = dev_ctx.cudnn_workspace_handle(); - auto cudnn_func = [&](void* cudnn_workspace) { - CUDNN_ENFORCE(platform::dynload::cudnnCTCLoss( - handle, cu_logits_desc, warpctc_logits_data, warpctc_label_data, - warpctc_label_lengths.data(), warpctc_logits_lengths.data(), - loss_data, cu_grad_desc, warpctc_grad_data, - CUDNN_CTC_LOSS_ALGO_DETERMINISTIC, cu_ctcloss_desc, cudnn_workspace, - workspace_size)); - }; - workspace_handle.RunFunc(cudnn_func, workspace_size); + math::SetConstant()( + ctx.template device_context(), loss, static_cast(0)); + + auto temp_allocation = + platform::DeviceTemporaryAllocator::Instance().Get(dev_ctx).Allocate( + workspace_size); + void* cudnn_workspace = temp_allocation->ptr(); + + CUDNN_ENFORCE(platform::dynload::cudnnCTCLoss( + handle, cu_logits_desc, warpctc_logits_data, warpctc_label_data, + warpctc_label_lengths.data(), warpctc_logits_lengths.data(), loss_data, + cu_grad_desc, warpctc_grad_data, CUDNN_CTC_LOSS_ALGO_DETERMINISTIC, + cu_ctcloss_desc, cudnn_workspace, workspace_size)); } }; diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index 67b2386813eeafca5ac627edf79cad7e02445b8c..803ea6b26087884ad79c6bf80238953a012eaddc 100644 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -18,9 +18,9 @@ if(WITH_PYTHON) SRCS ${PYBIND_SRCS} DEPS ${PYBIND_DEPS} ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS}) - if(NOT APPLE AND NOT ANDROID AND NOT WIN32) + if(NOT APPLE AND NOT WIN32) target_link_libraries(paddle_pybind rt) - endif(NOT APPLE AND NOT ANDROID AND NOT WIN32) + endif(NOT APPLE AND NOT WIN32) endif(WITH_AMD_GPU) get_property (os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) diff --git a/paddle/fluid/pybind/ir.cc b/paddle/fluid/pybind/ir.cc index d32fe58f8695a5c14f276ef038416f5c47f3400f..24059140ab20e24917b93a5f60936b1087797ff9 100644 --- a/paddle/fluid/pybind/ir.cc +++ b/paddle/fluid/pybind/ir.cc @@ -15,7 +15,9 @@ #include "paddle/fluid/pybind/ir.h" #include #include +#include #include "paddle/fluid/framework/ir/graph.h" +#include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/ir/node.h" #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/var_desc.h" @@ -24,6 +26,7 @@ namespace py = pybind11; using paddle::framework::ir::Graph; using paddle::framework::ir::Node; +using paddle::framework::ir::GraphSafeRemoveNodes; using paddle::framework::OpDesc; using paddle::framework::ProgramDesc; using paddle::framework::VarDesc; @@ -32,6 +35,7 @@ using pybind11::return_value_policy; namespace paddle { namespace pybind { void BindGraph(py::module *m) { + m->def("graph_safe_remove_nodes", GraphSafeRemoveNodes); py::class_>( *m, "Graph", "The graph is a Directed Acyclic Single Static Assignment Graph, see " @@ -42,6 +46,8 @@ void BindGraph(py::module *m) { .def("get_float", &Graph::Get) .def("get_double", &Graph::Get) .def("get_string", &Graph::Get) + .def("get_program", &Graph::Get) + .def("get_marked_nodes", &Graph::Get>) .def("set", [](Graph &self, const std::string &attr_name, int attr) { return self.Set(attr_name, new int(attr)); }) .def("set", @@ -57,6 +63,17 @@ void BindGraph(py::module *m) { [](Graph &self, const std::string &attr_name, double attr) { return self.Set(attr_name, new double(attr)); }) + .def("set", + [](Graph &self, const std::string &attr_name, + const ProgramDesc &attr) { + return self.Set(attr_name, new ProgramDesc(attr)); + }) + .def("set", + [](Graph &self, const std::string &attr_name, + const std::unordered_set &attr) { + return self.Set(attr_name, + new std::unordered_set(attr)); + }) .def("erase", &Graph::Erase) .def("nodes", &Graph::Nodes, return_value_policy::reference) .def("create_var_node", @@ -85,12 +102,52 @@ void BindNode(py::module *m) { py::class_ node(*m, "Node"); node.def("name", &Node::Name) .def("node_type", &Node::NodeType) - .def("var", &Node::Var) - .def("op", &Node::Op) + .def("var", &Node::Var, return_value_policy::reference) + .def("op", &Node::Op, return_value_policy::reference) .def("id", &Node::id) .def("is_op", &Node::IsOp) .def("is_var", &Node::IsVar) .def("is_ctrl_var", &Node::IsCtrlVar) + .def("inputs_remove", + [](Node &self, int node_id) { + for (auto it = self.inputs.begin(); it != self.inputs.end(); + it++) { + if ((*it)->id() == node_id) { + self.inputs.erase(it); + } + } + }) + .def("inputs_remove", + [](Node &self, Node &node) { + for (auto it = self.inputs.begin(); it != self.inputs.end(); + it++) { + if (*it == &node) { + self.inputs.erase(it); + } + } + }) + .def("inputs_append", + [](Node &self, Node &node) { self.inputs.push_back(&node); }) + .def("outputs_remove", + [](Node &self, int node_id) { + for (auto it = self.outputs.begin(); it != self.outputs.end(); + it++) { + if ((*it)->id() == node_id) { + self.outputs.erase(it); + } + } + }) + .def("outputs_remove", + [](Node &self, Node &node) { + for (auto it = self.outputs.begin(); it != self.outputs.end(); + it++) { + if (*it == &node) { + self.outputs.erase(it); + } + } + }) + .def("outputs_append", + [](Node &self, Node &node) { self.outputs.push_back(&node); }) .def_readwrite("inputs", &Node::inputs) .def_readwrite("outputs", &Node::outputs); diff --git a/paddle/fluid/pybind/protobuf.cc b/paddle/fluid/pybind/protobuf.cc index 4b218fb3a2af0933ea1e87abe20e7e031c32f721..e729be4a95a58510f1e0162af4216feaa400d971 100644 --- a/paddle/fluid/pybind/protobuf.cc +++ b/paddle/fluid/pybind/protobuf.cc @@ -228,7 +228,7 @@ void BindBlockDesc(pybind11::module *m) { void BindVarDsec(pybind11::module *m) { pybind11::class_ var_desc(*m, "VarDesc", ""); - var_desc + var_desc.def(pybind11::init()) .def("name", &pd::VarDesc::Name, pybind11::return_value_policy::reference) .def("set_name", &pd::VarDesc::SetName) .def("set_shape", &pd::VarDesc::SetShape) diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 3ea38266775ed59b3f3e2a09a6cf5c2a653cdbc0..d84ee06492d52c93e0b774c6e08218aca8a14916 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -788,21 +788,33 @@ All parameter, weight, gradient are variables in Paddle. m.def("disable_profiler", platform::DisableProfiler); m.def("is_profiler_enabled", platform::IsProfileEnabled); m.def("reset_profiler", platform::ResetProfiler); + m.def("get_pass", [](const py::bytes &binary_str) { + std::string pass_type(binary_str); + auto pass = framework::ir::PassRegistry::Instance().Get(pass_type); + return std::shared_ptr(std::move(pass)); + }); py::class_> pass(m, "Pass"); pass.def(py::init()) + .def("has", &ir::Pass::Has) + .def("set", + [](ir::Pass &self, const std::string &attr_name, + const ProgramDesc &attr) { + return self.Set(attr_name, new ProgramDesc(attr)); + }) .def( - "set_str", + "set", [](ir::Pass &self, const std::string &name, const std::string &attr) { self.Set(name, new std::string(attr)); }) - .def("set_int", [](ir::Pass &self, const std::string &name, - int val) { self.Set(name, new int(val)); }) + .def("set", [](ir::Pass &self, const std::string &name, + int val) { self.Set(name, new int(val)); }) + .def("get_program", &ir::Pass::Get) .def("type", &ir::Pass::Type) .def("apply", [](ir::Pass &self, std::shared_ptr graph) { std::unique_ptr origin_graph(graph.get()); auto optim_graph = self.Apply(std::move(origin_graph)); - graph.reset(optim_graph.release()); + optim_graph.release(); }); py::class_> pb( diff --git a/paddle/py_paddle/.gitignore b/paddle/py_paddle/.gitignore deleted file mode 100644 index 80d1f76fbc05627e21e334af55d63a4a534434c6..0000000000000000000000000000000000000000 --- a/paddle/py_paddle/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -swig_paddle.py -_swig_paddle.so diff --git a/paddle/py_paddle/dataprovider_converter.py b/paddle/py_paddle/dataprovider_converter.py deleted file mode 100644 index 43614b9779d21795f1f274589ea93639e923ce75..0000000000000000000000000000000000000000 --- a/paddle/py_paddle/dataprovider_converter.py +++ /dev/null @@ -1,309 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.trainer.PyDataProvider2 as dp2 -import collections -import swig_paddle -import numpy -import itertools -from functools import reduce - -__all__ = ['DataProviderConverter'] - - -class IScanner(object): - """ - The scanner will scan Python object two passes, then convert it to Paddle's - argument. - - In the first pass, `pre_scan` will be invoked by every data instance, and - then invoke `finish_pre_scan` to arguments. And the second pass do the same - thing except the functions changed to `scan`, `finish_scan`. - - During the first pass, a scanner may count the shape of input matrix and - allocate memory for this argument. Then fill the data into this argument - in second pass. - """ - - def __init__(self, input_type, pos): - self.input_type = input_type - if not isinstance(self.input_type, dp2.InputType): - raise ValueError("input type should be dataprovider2.InputType") - self.pos = pos - # data_in_gpu is used to indicate whether to create argument on GPU - # or not in GPU mode. Now if using one thread (trainer_count=1), - # trainer uses NeuralNetwork which needs to create argument on GPU - # before calling forward function. So, set data_in_gpu to True. - # Otherwise, trainer uses MultiGradientMachine which will transfer - # data from CPU to GPU in the forward function, set data_in_gpu to - # False in this case. - self.data_in_gpu = swig_paddle.isUsingGpu( - ) and swig_paddle.getTrainerCount() == 1 - - def pre_scan(self, dat): - """ - First pass scan method. During this method, the scanner could count the - data number, and get the total memory size this batch would use. - - :param dat: The python object. - """ - pass - - def finish_pre_scan(self, argument): - """ - Finish first scan pass. Allocate the memory. - - :param argument: Output arguments object. - :type argument: swig_paddle.Arguments - :param dat: Output arguments object. - :type dat: The Python object, numpy.array or List. - :return: - """ - pass - - def scan(self, dat): - """ - Second pass scan method. Copy the data to arguments. - - :param dat: The python object. - """ - pass - - def finish_scan(self, argument): - """ - Finish second pass. Finalize the resources, etc. - - :param argument: Output arguments object. - :type argument: swig_paddle.Arguments - """ - pass - - -class DenseScanner(IScanner): - """ - :type __mat__: numpy.ndarray - """ - - def __init__(self, input_type, pos): - IScanner.__init__(self, input_type, pos) - self.__mat__ = None - self.__shape__ = None - self.__height__ = 0 - self.__dim__ = 0 - - def pre_scan(self, dat): - self.__height__ += 1 - if self.__shape__ is None: - self.__shape__ = numpy.array(dat).shape - if len(self.__shape__) > 3: - raise ValueError( - "The dimension of input cannot be greater than 3.") - if len(self.__shape__) == 0: - raise ValueError( - "The input should be a vector, please check your input data." - ) - self.__dim__ = reduce(lambda x, y: x * y, self.__shape__) - if len(self.__shape__) == 1 and self.__dim__ != self.input_type.dim: - raise ValueError( - "The data size must be equal to it in data layer.") - else: - if self.__shape__ != numpy.array(dat).shape: - raise ValueError( - "The data shape must be same in one mini-batch.") - - def finish_pre_scan(self, argument): - self.__mat__ = numpy.ndarray( - shape=(self.__height__, self.__dim__), dtype=numpy.float32) - self.__height__ = 0 - - def scan(self, dat): - # It's better to use NumPy array for speed. - dat = numpy.array(dat) - dat = dat.flatten() - self.__mat__[self.__height__] = dat - self.__height__ += 1 - - def finish_scan(self, argument): - assert isinstance(argument, swig_paddle.Arguments) - if self.__mat__.dtype != numpy.float32: - self.__mat__ = self.__mat__.astype(numpy.float32) - m = swig_paddle.Matrix.createDenseFromNumpy(self.__mat__, True, - self.data_in_gpu) - argument.setSlotValue(self.pos, m) - if len(self.__shape__) > 1: - # The last-two dimenstions are the frame height and width. - # For example, the layout is CHW for 3-D feature of image. - # The H and W are the frame height and width. - h, w = self.__shape__[-2:] - argument.setSlotFrameHeight(self.pos, h) - argument.setSlotFrameWidth(self.pos, w) - self.__shape__ = None - - -class SparseBinaryScanner(IScanner): - def __init__(self, input_type, pos): - IScanner.__init__(self, input_type, pos) - self.__rows__ = [0] - self.__cols__ = [] - self.__height__ = 0 - self.__value__ = [] - - def scan(self, dat): - self.extend_cols(dat) - self.__rows__.append(len(self.__cols__)) - self.__height__ += 1 - - def extend_cols(self, dat): - self.__cols__.extend(dat) - - def finish_scan(self, argument): - assert isinstance(argument, swig_paddle.Arguments) - m = swig_paddle.Matrix.createSparse( - self.__height__, - self.input_type.dim, - len(self.__cols__), - len(self.__value__) == 0, - False, # trans - False) # TODO supoort GPU - assert isinstance(m, swig_paddle.Matrix) - m.sparseCopyFrom(self.__rows__, self.__cols__, self.__value__) - argument.setSlotValue(self.pos, m) - - -class SparseFloatScanner(SparseBinaryScanner): - def __init__(self, input_type, pos): - SparseBinaryScanner.__init__(self, input_type, pos) - - def extend_cols(self, dat): - self.__cols__.extend((x[0] for x in dat)) - self.__value__.extend((x[1] for x in dat)) - - -class IndexScanner(IScanner): - def __init__(self, input_type, pos): - IScanner.__init__(self, input_type, pos) - self.__ids__ = None - self.__idx__ = 0 - - def pre_scan(self, dat): - self.__idx__ += 1 - - def finish_pre_scan(self, argument): - self.__ids__ = [0] * self.__idx__ - self.__idx__ = 0 - - def scan(self, dat): - self.__ids__[self.__idx__] = dat - self.__idx__ += 1 - - def finish_scan(self, argument): - ids = swig_paddle.IVector.create(self.__ids__, self.data_in_gpu) - assert isinstance(argument, swig_paddle.Arguments) - argument.setSlotIds(self.pos, ids) - - -class SequenceScanner(IScanner): - def __init__(self, input_type, pos, inner_scanner, setter): - IScanner.__init__(self, input_type, pos) - self.__seq__ = [0] - self.__inner_scanner__ = inner_scanner - self.__setter__ = setter - - def pre_scan(self, dat): - for each in dat: - self.__inner_scanner__.pre_scan(each) - - def finish_pre_scan(self, argument): - self.__inner_scanner__.finish_pre_scan(argument) - - def scan(self, dat): - self.__seq__.append(self.__seq__[-1] + self.get_size(dat)) - for each in dat: - self.__inner_scanner__.scan(each) - - def finish_scan(self, argument): - seq = swig_paddle.IVector.create(self.__seq__, False) - self.__setter__(argument, self.pos, seq) - self.__inner_scanner__.finish_scan(argument) - - def get_size(self, dat): - if isinstance(self.__inner_scanner__, SequenceScanner): - return sum(self.__inner_scanner__.get_size(item) for item in dat) - else: - return len(dat) - - -class DataProviderConverter(object): - def __init__(self, input_types): - self.input_types = input_types - assert isinstance(self.input_types, collections.Sequence) - for each in self.input_types: - assert isinstance(each, dp2.InputType) - - def convert(self, dat, argument=None): - if argument is None: - argument = swig_paddle.Arguments.createArguments(0) - assert isinstance(argument, swig_paddle.Arguments) - argument.resize(len(self.input_types)) - - scanners = [ - DataProviderConverter.create_scanner(i, each_type) - for i, each_type in enumerate(self.input_types) - ] - - for each_sample in dat: - for each_step, scanner in itertools.izip(each_sample, scanners): - scanner.pre_scan(each_step) - - for scanner in scanners: - scanner.finish_pre_scan(argument) - - for each_sample in dat: - for each_step, scanner in itertools.izip(each_sample, scanners): - scanner.scan(each_step) - - for scanner in scanners: - scanner.finish_scan(argument) - - return argument - - def __call__(self, dat, argument=None): - return self.convert(dat, argument) - - @staticmethod - def create_scanner(i, each): - assert isinstance(each, dp2.InputType) - retv = None - if each.type == dp2.DataType.Dense: - retv = DenseScanner(each, i) - elif each.type == dp2.DataType.Index: - retv = IndexScanner(each, i) - elif each.type == dp2.DataType.SparseNonValue: - retv = SparseBinaryScanner(each, i) - elif each.type == dp2.DataType.SparseValue: - retv = SparseFloatScanner(each, i) - assert retv is not None - - if each.seq_type == dp2.SequenceType.SUB_SEQUENCE: - retv = SequenceScanner( - each, i, retv, - lambda a, p, seq: a.setSlotSubSequenceStartPositions(p, seq)) - - if each.seq_type in [ - dp2.SequenceType.SUB_SEQUENCE, dp2.SequenceType.SEQUENCE - ]: - retv = SequenceScanner( - each, i, retv, - lambda a, p, seq: a.setSlotSequenceStartPositions(p, seq)) - return retv diff --git a/paddle/py_paddle/util.py b/paddle/py_paddle/util.py deleted file mode 100644 index 3ae8dbf964c68c6f01ba30cb3ac69fb6c2f08c30..0000000000000000000000000000000000000000 --- a/paddle/py_paddle/util.py +++ /dev/null @@ -1,578 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Some Useful method for py_paddle. -""" - -import swig_paddle -import os -import paddle.trainer.PyDataProviderWrapper -import paddle.proto.ParameterConfig_pb2 -import paddle.proto.ModelConfig_pb2 -import paddle.proto.TrainerConfig_pb2 -import weakref -import numpy -import struct -import sys -import copy - - -def initializePaddle(*args): - """ - To initialize paddle process. - :param args: Command line options, such as --use_gpu=0, etc. - :return: Nothing. - """ - old_argv = copy.deepcopy(sys.argv) - old_pypath = os.getenv("PYTHONPATH") - pypath = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) - if old_pypath is not None: - pypath = os.pathsep.join([pypath, old_pypath]) - os.putenv("PYTHONPATH", pypath) - args = [""] + list(args) # argv[0] is command name, it is not important. - swig_paddle.__initPaddle__(args) - sys.argv = old_argv - - -def __monkeypatch_init_paddle__(): - swig_paddle.__initPaddle__ = swig_paddle.initPaddle - swig_paddle.initPaddle = initializePaddle - - -class __ParameterCallbackWrapper__(swig_paddle.UpdateCallback): - """ - Wrap the python callable object to paddle.UpdateCallback. - - INTERNAL USE ONLY. - """ - - def __init__(self, callback): - swig_paddle.UpdateCallback.__init__(self) - self.callback = callback - - def apply(self, param): - self.callback(param) - - @staticmethod - def wrap(callback): - """ - Cast the python callable object/paddle.UpdateCallback to - swig_paddle.UpdateCallback.__disown__ - :param callback: callable or swig_paddle.UpdateCallback object. - """ - if isinstance(callback, swig_paddle.UpdateCallback): - return callback.__disown__() - elif isinstance(callback, weakref.ProxyType): - raise RuntimeError("Should not pass __disown__ object") - else: - return __ParameterCallbackWrapper__(callback).__disown__() - - -def __arguments_to_numpy__(i, arg): - assert isinstance(arg, swig_paddle.Arguments) - value = arg.getSlotValue(i) - ids = arg.getSlotIds(i) - prob = arg.getSlotIn(i) - if value is not None: - assert isinstance(value, swig_paddle.Matrix) - value = value.copyToNumpyMat() - if ids is not None: - assert isinstance(ids, swig_paddle.IVector) - ids = ids.copyToNumpyArray() - if prob is not None: - assert isinstance(prob, swig_paddle.Matrix) - prob = prob.copyToNumpyMat() - return {"value": value, "id": ids, "prob": prob} - - -def __monkeypatch_gradient_machine__(): - """ - Add some class methods to GradientMachine. - This method should be only used internally. - """ - swig_paddle.GradientMachine.loadFromConfigFile = \ - staticmethod(loadGradientMachine) - - def __matrix_to_numpy__(m): - if isinstance(m, swig_paddle.Matrix): - return m.copyToNumpyMat() - elif isinstance(m, swig_paddle.IVector): - return m.copyToNumpyArra() - else: - raise RuntimeError("Input arg should be matrix or vecotr.") - - def createFromConfigProto(protoObj, - createMode=swig_paddle.CREATE_MODE_NORMAL, - paramTypes=[ - swig_paddle.PARAMETER_VALUE, - swig_paddle.PARAMETER_GRADIENT, - swig_paddle.PARAMETER_MOMENTUM - ]): - """ - Create Gradient Machine From Proto object. - :param protoObj: Model config - :type protoObj: proto.ModelConfig_pb2.ModelConfig - :param createMode: Create Mode, default is normal. - :type createMode: int - :param paramTypes: the gradient machine parameter type. - :type paramTypes: list of int - :return: paddle.GradientMachine - """ - assert isinstance(protoObj, paddle.proto.ModelConfig) - return swig_paddle.GradientMachine.createByConfigProtoStr( - protoObj.SerializeToString(), createMode, paramTypes) - - swig_paddle.GradientMachine.createFromConfigProto = \ - staticmethod(createFromConfigProto) - - def forwardTest(self, inArgs): - """ - forwardTest. forward gradient machine in test mode, and return a numpy - matrix dict. - - :param inArgs: The input arguments - :type inArgs: paddle.Arguments - :return: A dictionary with keys ['id', 'value'], each value is a - numpy.ndarray. - """ - outArgs = swig_paddle.Arguments.createArguments(0) - self.forward(inArgs, outArgs, swig_paddle.PASS_TEST) - return [ - __arguments_to_numpy__(i, outArgs) - for i in xrange(outArgs.getSlotNum()) - ] - - swig_paddle.GradientMachine.forwardTest = forwardTest - - # Monkey patching backward - swig_paddle.GradientMachine.__backward__ = swig_paddle.GradientMachine.backward - - def backward(self, callback): - """ - GradientMachine Backward - :param callback: a callback which parameter is (paddle.Parameter) or - a paddle.UpdateCallback object. - """ - self.__backward__(__ParameterCallbackWrapper__.wrap(callback)) - - swig_paddle.GradientMachine.backward = backward - - # Monkey patching forwardBackward. - swig_paddle.GradientMachine.__forwardBackward__ = \ - swig_paddle.GradientMachine.forwardBackward - - def forwardBackward(self, - inArgs, - outArgs, - passType, - callback=swig_paddle.UpdateCallback()): - """ - GradientMachine forward backward. - :param inArgs: Input Arguments for GradientMachine. - :type inArgs: paddle.Arguments - :param outArgs: Output Arguments for GradientMachine. - :type outArgs: paddle.Arguments - :param passType: gradient machine's pass type. - :type passType: paddle.PassType - :param callback: a callable object with arguments (paddle.Parameter) or - a paddle.UpdateCallback it will be called when - backward - """ - self.__forwardBackward__(inArgs, outArgs, passType, - __ParameterCallbackWrapper__.wrap(callback)) - - swig_paddle.GradientMachine.forwardBackward = forwardBackward - - def getParameters(self): - return (self.getParameter(i) for i in xrange(self.getParameterSize())) - - swig_paddle.GradientMachine.getParameters = getParameters - - def getNonStaticParameters(self): - return (self.getNonStaticParameter(i) - for i in xrange(self.getNonStaticParameterSize())) - - swig_paddle.GradientMachine.getNonStaticParameters = getNonStaticParameters - - def getLayerOutputs(self, layerNames): - """ - getLayerOutputs. get outputs of layers and return a numpy matrix dict. - :param layerNames: layer names. - :type layerNames: string or list. - """ - if isinstance(layerNames, basestring): - layerNames = [layerNames] - elif not isinstance(layerNames, list): - raise RuntimeError("Input args shuld be string or a sting list.") - - output = dict() - for name in layerNames: - output[name] = __arguments_to_numpy__(0, self.getLayerOutput(name)) - return output - - swig_paddle.GradientMachine.getLayerOutputs = getLayerOutputs - - -def loadGradientMachine(config_filename, model_dir=None): - """ - Load a gradient machine from config file name/path. - :param config_filename: The trainer config file name/path - :param model_dir: The model parameter directory. None if same as the - directory of config_filename - :return: GradientMachine with some enhance methods. - :rtype: paddle.GradientMachine - """ - trainer_config = swig_paddle.TrainerConfig.createFromTrainerConfigFile( - config_filename) - assert isinstance(trainer_config, swig_paddle.TrainerConfig) - model_conf = trainer_config.getModelConfig() - network = swig_paddle.GradientMachine.createByModelConfig(model_conf) - assert isinstance(network, swig_paddle.GradientMachine) - if model_dir is None: - model_dir = os.path.dirname(config_filename) - network.loadParameters(model_dir) - return network - - -def loadParameterFile(fn): - """ - Load Paddle Parameter file to numpy.ndarray - :param fn: file name or file like object. - :type fn: str or file like object. - :return: numpy array - :rtype: numpy.ndarray - :raise: paddle.UnsupportError when parameter format is wrong. - """ - if isinstance(fn, str): - with open(fn, 'rb') as f: - return loadParameterFile(f) - elif hasattr(fn, 'read'): # File like object - version, = struct.unpack('i', fn.read(4)) - if version != 0: - raise swig_paddle.UnsupportError() - value_length, = struct.unpack("I", fn.read(4)) - if value_length != 4 and value_length != 8: - raise swig_paddle.UnsupportError() - dtype = 'float32' if value_length == 4 else 'float64' - param_size, = struct.unpack("L", fn.read(8)) - value = numpy.fromfile(fn, dtype) - if len(value) != param_size: - raise swig_paddle.UnsupportError() - return value - else: - raise swig_paddle.UnsupportError() - - -class DataProviderWrapperConverter(object): - """ - A class convert DataFormat from PyDataProvider Wrapper to - py_paddle.paddle.Arguemnts. - """ - - class DenseValueConverter(object): - """ - Internal class - """ - - def __init__(self, header_def): - self.__dim__ = header_def.dim - self.buf = [] - - def append(self, other): - assert len(other) == self.__dim__ - self.buf += other - - def __call__(self, slot_idx, arg): - mat = swig_paddle.Matrix.createDense(self.buf, - len(self.buf) / self.__dim__, - self.__dim__) - arg.setSlotValue(slot_idx, mat) - - class IdValueConverter(object): - """ - Internal class - """ - - def __init__(self, *args): - self.buf = [] - - def append(self, other): - assert isinstance(other, int) - self.buf.append(other) - - def __call__(self, slot_idx, arg): - arg.setSlotIds(slot_idx, swig_paddle.IVector.create(self.buf)) - - class SparseNonValueConverter(object): - """ - Internal class - """ - - def __init__(self, slot_def): - self.indices = [0] - self.cols = [] - self.dim = slot_def.dim - - def append(self, other): - self.indices.append(self.indices[-1] + len(other)) - self.cols += other - - def __call__(self, slot_idx, arg): - mat = swig_paddle.Matrix.createSparse( - len(self.indices) - 1, self.dim, len(self.cols), True) - assert isinstance(mat, swig_paddle.Matrix) - mat.sparseCopyFrom(self.indices, self.cols) - self.putIntoArg(slot_idx, arg, mat) - - def putIntoArg(self, slot_idx, arg, mat): - arg.setSlotValue(slot_idx, mat) - - class SparseValueConverter(SparseNonValueConverter): - """ - Internal class - """ - - def __init__(self, slot_def): - super(DataProviderWrapperConverter.SparseValueConverter, - self).__init__(slot_def) - self.values = [] - - def append(self, other): - super(DataProviderWrapperConverter.SparseValueConverter, - self).append(map(lambda x: x[0], other)) - self.values += map(lambda x: x[1], other) - - def __call__(self, slot_idx, arg): - mat = swig_paddle.Matrix.createSparse( - len(self.indices) - 1, self.dim, len(self.cols), False) - assert isinstance(mat, swig_paddle.Matrix) - mat.sparseCopyFrom(self.indices, self.cols, self.values) - self.putIntoArg(slot_idx, arg, mat) - - __SLOT_VALUE_CONVERTER_MAP__ = { - paddle.trainer.PyDataProviderWrapper.DenseSlot: DenseValueConverter, - paddle.trainer.PyDataProviderWrapper.IndexSlot: IdValueConverter, - paddle.trainer.PyDataProviderWrapper.SparseNonValueSlot: - SparseNonValueConverter, - paddle.trainer.PyDataProviderWrapper.SparseValueSlot: - SparseValueConverter - } - - def __init__(self, use_seq, header): - """ - Ctor - :param use_seq: True if use sequence. - :param header: List of slots type, - trainer.PyDataProviderWrapper.SlotType - """ - self.__use_seq__ = use_seq - self.__header__ = header - - def convert(self, wrapper_data, argument=None): - """ - Convert PyDataProviderWrapper format to paddle.Argument - :param wrapper_data: PyDataProviderWrapper yield's data list. - :param argument: The output paddle.Arguments. - If it is not None, it will assign data in this - arguments, else it will create new arguments. - :return: arguments that contains data. - :rtype: paddle.Arguments - """ - if argument is None: - argument = swig_paddle.Arguments.createArguments(0) - assert isinstance(argument, swig_paddle.Arguments) - argument.resize(len(self.__header__)) - - values = map( - lambda x: DataProviderWrapperConverter.__SLOT_VALUE_CONVERTER_MAP__[x.__class__](x), - self.__header__) - - if self.__use_seq__: - seq_dim = [[] for _ in xrange(self.__header__.__len__())] - seq_start_pos = [[0] for _ in xrange(self.__header__.__len__())] - - for each_sample in wrapper_data: - for slot_idx, sequence in enumerate(each_sample): - for raw_data in sequence: - values[slot_idx].append(raw_data) - seq_start_pos[slot_idx].append(seq_start_pos[slot_idx][-1] + - len(sequence)) - seq_dim[slot_idx].append(len(sequence)) - - for slot_idx in xrange(len(self.__header__)): - argument.setSlotSequenceDim( - slot_idx, swig_paddle.IVector.create(seq_dim[slot_idx])) - argument.setSlotSequenceStartPositions( - slot_idx, - swig_paddle.IVector.create(seq_start_pos[slot_idx])) - else: - for each_sample in wrapper_data: - for raw_data, value in zip(each_sample, values): - value.append(raw_data) - - for i, v in enumerate(values): - v(i, argument) - - return argument - - def __call__(self, wrapper_data, argument=None): - """ - Invoke self.convert. See documents in self.convert. - """ - return self.convert(wrapper_data, argument) - - -def __monkey_patch_protobuf_objects__(): - def ParameterConfig_toProto(self): - """ - Convert paddle.ParameterConfig to - proto.ParameterConfig_pb2.ParameterConfig - - :return: proto.ParameterConfig_pb2.ParameterConfig object. - """ - param_conf = paddle.proto.ParameterConfig_pb2.ParameterConfig() - param_conf.ParseFromString(self.toProtoString()) - return param_conf - - swig_paddle.ParameterConfig.toProto = ParameterConfig_toProto - - def OptimizationConfig_toProto(self): - """ - Convert paddle.OptimizationConfig to - proto.TrainerConfig_pb2.OptimizationConfig - - :return: proto.TrainerConfig_pb2.OptimizationConfig - """ - opt_conf = proto.TrainerConfig_pb2.OptimizationConfig() - opt_conf.ParseFromString(self.toProtoString()) - return opt_conf - - swig_paddle.OptimizationConfig.toProto = OptimizationConfig_toProto - - def OptimizationConfig_createFromProto(protoObj): - """ - Create a new paddle.OptimizationConfig from - proto.TrainerConfig_pb2.OptimizationConfig - - :param protoObj: proto.TrainerConfig_pb2.OptimizationConfig - :return: paddle.OptimizationConfig - """ - - assert isinstance(protoObj, paddle.proto.OptimizationConfig) - return swig_paddle.OptimizationConfig.createFromProtoString( - protoObj.SerializeToString()) - - swig_paddle.OptimizationConfig.createFromProto = staticmethod( - OptimizationConfig_createFromProto) - - def TrainerConfig_createFromProto(protoObj): - """ - Create a new paddle.TrainerConfig from - proto.OptimizationConfig - - :param protoObj: proto.TrainerConfig - :return: paddle.TrainerConfig - """ - assert isinstance(protoObj, paddle.proto.TrainerConfig) - return swig_paddle.TrainerConfig.createFromProtoString( - protoObj.SerializeToString()) - - swig_paddle.TrainerConfig.createFromProto = staticmethod( - TrainerConfig_createFromProto) - - -def __monkey_patch_parameter__(): - def getBufs(self): - """ - get all parameter vectors. - NOTE: the return value is a generator. Maybe you need to cast to - list or tuple or something else. - - :return: generator of all parameter vectors. - :rtype: generator - """ - return (self.getBuf(i) for i in xrange(swig_paddle.NUM_PARAMETER_TYPES)) - - swig_paddle.Parameter.getBufs = getBufs - - -def __monkey_patch_trainer__(): - swig_paddle.Trainer.__create__ = staticmethod(swig_paddle.Trainer.create) - - def Trainer_create(config, model=None): - """ - Create a trainer for model with TrainerCOnfig trainer_config - trainer_config.model_config will be ignored when model is supplied. - Trainer.trainOneBatch() and Trainer.forwardOneBatch() can be used only - when trainer_config.data_config is set. - - A typical usage for Trainer is: - .. code-block:: python - trainer = Trainer.create(trainer_config, model) - for p in xrange(num_passes) - while True: - data = get_next_batch(batch_size) - if not data: - break - trainer.trainOneDataBatch(batch_size, data) - trainer.finishTrainPass() - trainer.finishTrain() - - The trainer will take care of logging, model saving, distributed - training, etc. - - :param config: trainer configuration - :type config: paddle.proto.TrainerConfig - :param model: the model to be trained - :type model: swig_paddle.GradientMachine - :return: a trainer - :rtype swig_paddle.Trainer - - """ - assert isinstance(config, paddle.proto.TrainerConfig) - if model is not None: - assert isinstance(model, swig_paddle.GradientMachine) - return swig_paddle.Trainer.__create__( - swig_paddle.TrainerConfig.createFromProto(config), model) - - swig_paddle.Trainer.create = staticmethod(Trainer_create) - - swig_paddle.Trainer.__getForwardOutput__ = \ - swig_paddle.Trainer.getForwardOutput - - def getForwardOutput(self): - """ - Get the netword outputs from the previous trainOneBatch(), - trainOneDataBatch(), testOneDataPatch(), or forwardOneBatch() call. - - :return: list of dictionary with keys ['id', 'value'], each value is a - numpy.ndarray. - """ - outArgs = self.__getForwardOutput__() - return [ - __arguments_to_numpy__(i, outArgs) - for i in xrange(outArgs.getSlotNum()) - ] - - swig_paddle.Trainer.getForwardOutput = getForwardOutput - - -def monkeypatches(): - patches = [ - __monkeypatch_init_paddle__, __monkeypatch_gradient_machine__, - __monkey_patch_protobuf_objects__, __monkey_patch_parameter__, - __monkey_patch_trainer__ - ] - for patch in patches: - patch() diff --git a/paddle/scripts/README.md b/paddle/scripts/README.md index 9e8b135c1bc7fc05d88fe6f3bed17dd3b48e9615..6c608fce3cdad38f3109e563be3ffbe2f73e5390 100644 --- a/paddle/scripts/README.md +++ b/paddle/scripts/README.md @@ -40,7 +40,6 @@ The lastest pre-built build environment images are: | Image | Tag | | ----- | --- | | paddlepaddle/paddle | latest-dev | -| paddlepaddle/paddle | latest-dev-android | ### Start Build @@ -68,8 +67,6 @@ Users can specify the following Docker build arguments with either "ON" or "OFF" | `WITH_TESTING` | OFF | Build unit tests binaries. | | `WITH_MKL` | ON | Build with [IntelĀ® MKL](https://software.intel.com/en-us/mkl) and [IntelĀ® MKL-DNN](https://github.com/01org/mkl-dnn) support. | | `WITH_GOLANG` | OFF | Build fault-tolerant parameter server written in go. | -| `WITH_SWIG_PY` | ON | Build with SWIG python API support. | -| `WITH_C_API` | OFF | Build capi libraries for inference. | | `WITH_PYTHON` | ON | Build with python support. Turn this off if build is only for capi. | | `WITH_STYLE_CHECK` | ON | Check the code style when building. | | `PYTHON_ABI` | "" | Build for different python ABI support, can be cp27-cp27m or cp27-cp27mu | diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index f58e392684d619e145b07ac61d2adfe175443bb6..cda04451f5e6a9a27c9352626f3f08e886fca04b 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -33,7 +33,6 @@ function print_usage() { ${BLUE}gen_doc_lib${NONE}: generate paddle documents library ${BLUE}html${NONE}: convert C++ source code into HTML ${BLUE}dockerfile${NONE}: generate paddle release dockerfile - ${BLUE}capi${NONE}: generate paddle CAPI package ${BLUE}fluid_inference_lib${NONE}: deploy fluid inference library ${BLUE}check_style${NONE}: run code style check ${BLUE}cicheck${NONE}: run CI tasks @@ -180,9 +179,7 @@ function cmake_gen() { -DWITH_AVX=${WITH_AVX:-OFF} -DWITH_GOLANG=${WITH_GOLANG:-OFF} -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All} - -DWITH_C_API=${WITH_C_API:-OFF} -DWITH_PYTHON=${WITH_PYTHON:-ON} - -DWITH_SWIG_PY=${WITH_SWIG_PY:-ON} -DCUDNN_ROOT=/usr/ -DWITH_TESTING=${WITH_TESTING:-ON} -DCMAKE_MODULE_PATH=/opt/rocm/hip/cmake @@ -216,8 +213,6 @@ EOF -DWITH_AVX=${WITH_AVX:-OFF} \ -DWITH_GOLANG=${WITH_GOLANG:-OFF} \ -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All} \ - -DWITH_SWIG_PY=${WITH_SWIG_PY:-ON} \ - -DWITH_C_API=${WITH_C_API:-OFF} \ -DWITH_PYTHON=${WITH_PYTHON:-ON} \ -DCUDNN_ROOT=/usr/ \ -DWITH_TESTING=${WITH_TESTING:-ON} \ @@ -706,59 +701,43 @@ EOF EOF } -function gen_capi_package() { - if [[ ${WITH_C_API} == "ON" ]]; then - capi_install_prefix=${INSTALL_PREFIX:-/paddle/build}/capi_output - rm -rf $capi_install_prefix - make DESTDIR="$capi_install_prefix" install - cd $capi_install_prefix/ - ls | egrep -v "^Found.*item$" | xargs tar -czf ${PADDLE_ROOT}/build/paddle.tgz - fi -} - function gen_fluid_lib() { mkdir -p ${PADDLE_ROOT}/build cd ${PADDLE_ROOT}/build - if [[ ${WITH_C_API:-OFF} == "OFF" ]] ; then - cat < 0: + assert self._scope is not None, \ + 'The scope cannot be set None when activation_quantize_type equals to range_abs_max.' + assert self._program_exe is not None, \ + 'The program_exe cannot be set None when activation_quantize_type equals to range_abs_max.' + init_program = Program() + for var_desc, initializer in self._need_initialized.iteritems(): + var = Variable(init_program.global_block()) + var._set_desc(var_desc) + initializer(var, init_program.global_block()) + self._program_exe.run(program=init_program, scope=self._scope) + + return graph + + def _create_global_step(self, graph): + if self._weight_quantize_type == 'range_abs_max' or \ + self._activation_quantize_type == 'range_abs_max': + counter_name = '@STEP_COUNTER@' + for node in graph.all_vars(): + if node.name() == counter_name: + self._global_step = node + if self._global_step is None: + global_step_in = graph.create_param_node( + name=counter_name, + var_type=core.VarDesc.VarType.LOD_TENSOR, + shape=[1], + var_dtype=core.VarDesc.VarType.INT64) + self._need_initialized[global_step_in.var()] = \ + Constant(value=0, force_cpu=True) + global_step_out = graph.create_var_node_from_desc( + global_step_in.var()) + increment_op = graph.create_op_node( + op_type='increment', + attrs={'step': 1.0}, + inputs={'X': global_step_in}, + outputs={'Out': global_step_out}) + graph.link_to(global_step_in, increment_op) + graph.link_to(increment_op, global_step_out) + self._global_step = global_step_out + + def _insert_quant_op(self, graph, var_node, quant_bits, quant_type): + """ + Insert fake_quantize_op in the graph. + """ + if quant_type == 'abs_max': + return self._insert_quant_abs_max_op(graph, var_node, quant_bits) + elif quant_type == 'range_abs_max': + return self._insert_quant_range_abs_max_op(graph, var_node, + quant_bits) + + def _insert_quant_abs_max_op(self, graph, var_node, quant_bits): + """ + Insert fake_quantize_abs_max op in the graph. + """ + assert var_node.is_var(), '{} is not a var'.format(var_node.name()) + + quant_var_node = graph.create_var_node( + name=self._quantized_var_name(var_node.name()), + var_type=var_node.var().type(), + shape=var_node.var().shape(), + var_dtype=var_node.var().dtype()) + scale_var_node = graph.create_var_node( + name=self._quantized_scale_name(var_node.name()), + var_type=var_node.var().type(), + shape=var_node.var().shape(), + var_dtype=var_node.var().dtype()) + quant_op_node = graph.create_op_node( + op_type='fake_quantize_abs_max', + attrs={'bit_length': quant_bits}, + inputs={'X': var_node}, + outputs={'Out': quant_var_node, + 'OutScale': scale_var_node}) + graph.link_to(var_node, quant_op_node) + graph.link_to(quant_op_node, quant_var_node) + graph.link_to(quant_op_node, scale_var_node) + return quant_var_node, scale_var_node + + def _insert_quant_range_abs_max_op(self, graph, var_node, quant_bits): + """ + Insert fake_quantize_range_abs_max on the graph. + """ + assert var_node.is_var(), '{} is not a var'.format(var_node.name()) + + quant_var_node = graph.create_var_node( + name=self._quantized_var_name(var_node.name()), + var_type=var_node.var().type(), + shape=var_node.var().shape(), + var_dtype=var_node.var().dtype()) + + scale_in_node = graph.create_param_node( + name=self._quantized_scale_name(var_node.name()), + var_type=core.VarDesc.VarType.LOD_TENSOR, + shape=[1], + var_dtype=var_node.var().dtype()) + self._need_initialized[scale_in_node.var()] = Constant(value=0.001) + + scale_out_node = graph.create_var_node_from_desc(scale_in_node.var()) + inputs = {'X': var_node, 'InScale': scale_in_node} + outputs = {'Out': quant_var_node, 'OutScale': scale_out_node} + + if not self._is_test: + # The name of scales_var_node maybe 'scales_0', 'scales_1', etc. + scales_node = graph.create_param_node( + name=unique_name.generate('scales'), + var_type=core.VarDesc.VarType.LOD_TENSOR, + shape=[self._window_size], + var_dtype=var_node.var().dtype()) + self._need_initialized[scales_node.var()] = Constant(value=0) + inputs['Iter'] = self._global_step + outputs['OutScales'] = scales_node + attrs = { + 'window_size': self._window_size, + 'bit_length': quant_bits, + 'is_test': self._is_test + } + quant_op_node = graph.create_op_node( + op_type='fake_quantize_range_abs_max', + attrs=attrs, + inputs=inputs, + outputs=outputs) + + graph.link_to(var_node, quant_op_node) + graph.link_to(scale_in_node, quant_op_node) + graph.link_to(quant_op_node, quant_var_node) + graph.link_to(quant_op_node, scale_out_node) + + if not self._is_test: + graph.link_to(self._global_step, quant_op_node) + graph.link_to(quant_op_node, scales_node) + + return quant_var_node, scale_out_node + + def _insert_dequant_op(self, graph, var_node, scale_var_node, quant_bits): + """ + Insert fake_dequantize_op in the graph. + """ + assert var_node.is_var(), '{} is not a var'.format(var_node.name()) + + dequant_var_node = graph.create_var_node( + name=self._dequantized_var_name(var_node.name()), + var_type=var_node.var().type(), + shape=var_node.var().shape(), + var_dtype=var_node.var().dtype()) + max_range = (1 << (quant_bits - 1)) - 1 + dequant_op_node = graph.create_op_node( + op_type='fake_dequantize_max_abs', + attrs={'max_range': float(max_range)}, + inputs={'X': var_node, + 'Scale': scale_var_node}, + outputs={'Out': dequant_var_node}) + graph.link_to(var_node, dequant_op_node) + graph.link_to(scale_var_node, dequant_op_node) + graph.link_to(dequant_op_node, dequant_var_node) + return dequant_var_node + + def _quantized_var_name(self, var_name): + """ + Return quantized variable name for the input `var_name`. + """ + return "%s.quantized" % (var_name) + + def _dequantized_var_name(self, var_name): + """ + Return dequantized variable name for the input `var_name`. + """ + return "%s.dequantized" % (var_name) + + def _quantized_scale_name(self, var_name): + """ + Return the scale name of quantized variable for the input `var_name`. + """ + return "%s.scale" % (var_name) diff --git a/python/paddle/fluid/contrib/slim/unitest/test_quantization_pass.py b/python/paddle/fluid/contrib/slim/unitest/test_quantization_pass.py new file mode 100644 index 0000000000000000000000000000000000000000..1bd4b95d6b90b7f16d507061190f0b463f6c4cc5 --- /dev/null +++ b/python/paddle/fluid/contrib/slim/unitest/test_quantization_pass.py @@ -0,0 +1,175 @@ +# copyright (c) 2018 paddlepaddle authors. all rights reserved. +# +# licensed under the apache license, version 2.0 (the "license"); +# you may not use this file except in compliance with the license. +# you may obtain a copy of the license at +# +# http://www.apache.org/licenses/license-2.0 +# +# unless required by applicable law or agreed to in writing, software +# distributed under the license is distributed on an "as is" basis, +# without warranties or conditions of any kind, either express or implied. +# see the license for the specific language governing permissions and +# limitations under the license. + +import unittest +import random +import numpy as np +import paddle.fluid as fluid +import six +from paddle.fluid.framework import Program +from paddle.fluid.framework import IrGraph +from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass +from paddle.fluid import core + + +def linear_fc(num): + data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + hidden = data + for _ in six.moves.xrange(num): + hidden = fluid.layers.fc(hidden, size=128, act='relu') + loss = fluid.layers.cross_entropy(input=hidden, label=label) + loss = fluid.layers.mean(loss) + return loss + + +def residual_block(num): + def conv_bn_layer(input, + ch_out, + filter_size, + stride, + padding, + act='relu', + bias_attr=False): + tmp = fluid.layers.conv2d( + input=input, + filter_size=filter_size, + num_filters=ch_out, + stride=stride, + padding=padding, + act=None, + bias_attr=bias_attr) + return fluid.layers.batch_norm(input=tmp, act=act) + + data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + hidden = data + for _ in six.moves.xrange(num): + conv = conv_bn_layer(hidden, 16, 3, 1, 1, act=None, bias_attr=True) + short = conv_bn_layer(hidden, 16, 1, 1, 0, act=None) + hidden = fluid.layers.elementwise_add(x=conv, y=short, act='relu') + fc = fluid.layers.fc(input=hidden, size=10) + loss = fluid.layers.cross_entropy(input=fc, label=label) + loss = fluid.layers.mean(loss) + return loss + + +class TestQuantizationTransformPass(unittest.TestCase): + def setUp(self): + self.quantizable_op_and_inputs = { + 'conv2d': ['Input', 'Filter'], + 'depthwise_conv2d': ['Input', 'Filter'], + 'mul': ['X', 'Y'] + } + self.quantizable_grad_op_inputs = { + 'conv2d_grad': ['Input', 'Filter'], + 'depthwise_conv2d_grad': ['Input', 'Filter'], + 'mul_grad': ['X', 'Y'] + } + + def check_program(self, transform_pass, program): + quantized_ops = set() + for block in program.blocks: + for op in block.ops: + # check forward + if op.type in self.quantizable_op_and_inputs: + for arg_name in op.input_arg_names: + self.assertTrue( + arg_name.endswith('.quantized.dequantized')) + quantized_ops.add(arg_name) + + for op in block.ops: + # check backward + if op.type in self.quantizable_grad_op_inputs: + for pname in self.quantizable_grad_op_inputs[op.type]: + arg_name = op.input(pname)[0] + self.assertTrue( + arg_name.endswith('.quantized.dequantized')) + self.assertTrue(arg_name in quantized_ops) + + def linear_fc_quant(self, quant_type): + main = fluid.Program() + startup = fluid.Program() + with fluid.program_guard(main, startup): + loss = linear_fc(3) + opt = fluid.optimizer.Adam(learning_rate=0.001) + opt.minimize(loss) + exe = fluid.Executor(fluid.CPUPlace()) + graph = IrGraph(core.Graph(main.desc), for_test=False) + transform_pass = QuantizationTransformPass( + scope=fluid.global_scope(), + program_exe=exe, + activation_quantize_type=quant_type) + transform_pass.apply(graph) + marked_nodes = set() + for op in graph.all_ops(): + if op.name().find('quantize') > -1: + marked_nodes.add(op) + graph.draw('.', 'quantize_fc_' + quant_type, marked_nodes) + program = graph.to_program() + self.check_program(transform_pass, program) + val_graph = IrGraph(core.Graph(program.desc), for_test=False) + val_marked_nodes = set() + for op in val_graph.all_ops(): + if op.name().find('quantize') > -1: + val_marked_nodes.add(op) + val_graph.draw('.', 'val_fc_' + quant_type, val_marked_nodes) + + def test_linear_fc_quant_abs_max(self): + self.act_quant_op_type = 'fake_quantize_abs_max' + self.linear_fc_quant('abs_max') + + def test_linear_fc_quant_range_abs_max(self): + self.act_quant_op_type = 'fake_quantize_range_abs_max' + self.linear_fc_quant('range_abs_max') + + def residual_block_quant(self, quant_type): + main = fluid.Program() + startup = fluid.Program() + with fluid.program_guard(main, startup): + loss = residual_block(2) + opt = fluid.optimizer.Adam(learning_rate=0.001) + opt.minimize(loss) + exe = fluid.Executor(fluid.CPUPlace()) + graph = IrGraph(core.Graph(main.desc), for_test=False) + transform_pass = QuantizationTransformPass( + scope=fluid.global_scope(), + program_exe=exe, + activation_quantize_type=quant_type) + transform_pass.apply(graph) + marked_nodes = set() + for op in graph.all_ops(): + if op.name().find('quantize') > -1: + marked_nodes.add(op) + graph.draw('.', 'quantize_residual_' + quant_type, marked_nodes) + program = graph.to_program() + self.check_program(transform_pass, program) + val_graph = IrGraph(core.Graph(program.desc), for_test=False) + val_marked_nodes = set() + for op in val_graph.all_ops(): + if op.name().find('quantize') > -1: + val_marked_nodes.add(op) + val_graph.draw('.', 'val_residual_' + quant_type, val_marked_nodes) + + def test_residual_block_abs_max(self): + self.act_quant_op_type = 'fake_quantize_abs_max' + self.residual_block_quant('abs_max') + + def test_residual_block_range_abs_max(self): + self.act_quant_op_type = 'fake_quantize_range_abs_max' + self.residual_block_quant('range_abs_max') + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py index 20aa6054fe4b7d6a1b1454292954237bdfbe045e..d3ff14a17955990bff851e95bd61fbc370ea7aa5 100644 --- a/python/paddle/fluid/executor.py +++ b/python/paddle/fluid/executor.py @@ -305,7 +305,9 @@ class Executor(object): def __init__(self, place): self.place = place self.program_caches = dict() - self.executor = None + p = core.Place() + p.set_place(self.place) + self._default_executor = core.Executor(p) self._closed = False def _get_program_cache(self, program_cache_key): @@ -397,12 +399,13 @@ class Executor(object): >>> ... >>> exe.close() """ - if not self._closed and self.executor: - self.executor.close() + if not self._closed: + self._default_executor.close() self._closed = True def _run_parallel(self, program, scope, feed, fetch_list, fetch_var_name, return_numpy): + exe = program._executor if isinstance(feed, dict): feed_tensor_dict = dict() for feed_name in feed: @@ -414,8 +417,7 @@ class Executor(object): feed_tensor.set(feed[feed_name], core.CPUPlace()) feed_tensor_dict[feed_name] = feed_tensor - self.executor.feed_and_split_tensor_into_local_scopes( - feed_tensor_dict) + exe.feed_and_split_tensor_into_local_scopes(feed_tensor_dict) elif isinstance(feed, list) or isinstance(feed, tuple): if len(feed) != len(program._places): raise ValueError( @@ -436,10 +438,10 @@ class Executor(object): tensor = tmp res_dict[feed_name] = tensor res.append(res_dict) - self.executor.feed_tensors_into_local_scopes(res) + exe.feed_tensors_into_local_scopes(res) fetch_var_names = list(map(_to_name_str, fetch_list)) - self.executor.run(fetch_var_names, fetch_var_name) + exe.run(fetch_var_names, fetch_var_name) arr = scope.find_var(fetch_var_name).get_lod_tensor_array() if return_numpy: @@ -511,12 +513,9 @@ class Executor(object): compiled = isinstance(program, compiler.CompiledProgram) # For backward compatibility, run directly. if not compiled: - if not self.executor: - p = core.Place() - p.set_place(self.place) - self.executor = core.Executor(p) return self._run( program, + self._default_executor, feed=feed, fetch_list=fetch_list, feed_var_name=feed_var_name, @@ -526,7 +525,6 @@ class Executor(object): use_program_cache=use_program_cache) program._compile(scope, self.place) - self.executor = program._executor if program._is_data_parallel: return self._run_parallel( program, @@ -536,12 +534,13 @@ class Executor(object): fetch_var_name=fetch_var_name, return_numpy=return_numpy) elif program._is_inference: - return self._run_inference(program, feed) + return self._run_inference(program._executor, feed) else: # TODO(panyx0718): Can compile program to optimize executor # performance. return self._run( program._program, + self._default_executor, feed=feed, fetch_list=fetch_list, feed_var_name=feed_var_name, @@ -550,8 +549,8 @@ class Executor(object): return_numpy=return_numpy, use_program_cache=use_program_cache) - def _run(self, program, feed, fetch_list, feed_var_name, fetch_var_name, - scope, return_numpy, use_program_cache): + def _run(self, program, exe, feed, fetch_list, feed_var_name, + fetch_var_name, scope, return_numpy, use_program_cache): if feed is None: feed = {} @@ -589,11 +588,11 @@ class Executor(object): fetch_var_name=fetch_var_name) self._feed_data(program, feed, feed_var_name, scope) - self.executor.run(program.desc, scope, 0, True, True) + exe.run(program.desc, scope, 0, True, True) outs = self._fetch_data(fetch_list, fetch_var_name, scope) if return_numpy: outs = as_numpy(outs) return outs - def _run_inference(self, program, feed): - return self.executor.run(feed) + def _run_inference(self, exe, feed): + return exe.run(feed) diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 77239f2e8ee5243611352d275c0d595670e24185..fc5e471ae3041b0245c873d85efa8b49f3a43678 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -23,6 +23,7 @@ import traceback import six import numpy as np +import subprocess from .. import compat as cpt from .proto import framework_pb2 @@ -1512,6 +1513,154 @@ class Block(object): return ret_var +class IrGraph(object): + """ + IrGraph uses core.Graph as the delegation to accomplish the manipulation. + """ + + def __init__(self, graph, for_test=False): + """ + Construct the IrGraph using core.Graph. + Args: + graph(core.Graph): C++ Graph. + for_test(bool): True for the test graph and false for the train graph. + """ + assert isinstance( + graph, core.Graph), 'graph must be the instance of core.Graph.' + self.graph = graph + self._for_test = for_test + + def is_test(self): + return self._for_test + + def all_parameters(self): + param_nodes = set() + for node in self.graph.nodes(): + if node.is_var() and node.var() is not None and node.var( + ).persistable(): + param_nodes.add(node) + return param_nodes + + def all_vars(self): + return {node for node in self.graph.nodes() if node.is_var()} + + def all_ops(self): + return {node for node in self.graph.nodes() if node.is_op()} + + def create_param_node(self, name, var_type, shape, var_dtype): + var_desc = core.VarDesc(name) + var_desc.set_type(var_type) + var_desc.set_shape(shape) + var_desc.set_dtype(var_dtype) + var_desc.set_persistable(True) + return self.graph.create_var_node(var_desc) + + def create_var_node(self, name, var_type, shape, var_dtype): + var_desc = core.VarDesc(name) + var_desc.set_type(var_type) + var_desc.set_shape(shape) + var_desc.set_dtype(var_dtype) + return self.graph.create_var_node(var_desc) + + def create_var_node_from_desc(self, var_desc): + return self.graph.create_var_node(var_desc) + + def create_op_node(self, op_type, attrs, inputs, outputs): + op_desc = core.OpDesc() + op_desc.set_type(op_type) + for attr, value in attrs.iteritems(): + self._update_desc_attr(op_desc, attr, value) + for input_name, var_nodes in inputs.iteritems(): + if not isinstance(var_nodes, list): + var_nodes = [var_nodes] + op_desc.set_input(input_name, + [var_node.name() for var_node in var_nodes]) + for output_name, var_nodes in outputs.iteritems(): + if not isinstance(var_nodes, list): + var_nodes = [var_nodes] + op_desc.set_output(output_name, + [var_node.name() for var_node in var_nodes]) + return self.graph.create_op_node(op_desc) + + def create_op_node_from_desc(self, op_desc): + return self.graph.create_op_node(op_desc) + + def update_input_link(self, old_input_node, new_input_node, op_node): + assert old_input_node in self.graph.nodes() and new_input_node in self.graph.nodes() and \ + op_node in self.graph.nodes(), 'Th three arguments must be in the graph nodes.' + old_input_node.outputs_remove(op_node) + op_node.inputs_remove(old_input_node) + new_input_node.outputs_append(op_node) + op_node.inputs_append(new_input_node) + op_node.op()._rename_input(old_input_node.name(), new_input_node.name()) + + def link_to(self, node_in, node_out): + assert node_in in self.graph.nodes() and node_out in self.graph.nodes(), \ + 'Th two arguments must be in the graph nodes.' + node_in.outputs_append(node_out) + node_out.inputs_append(node_in) + + def safe_remove_nodes(self, remove_nodes): + if not isinstance(remove_nodes, set): + remove_nodes = set(remove_nodes) + core.graph_safe_remove_nodes(self.graph, remove_nodes) + + def draw(self, save_path, name, marked_nodes=None): + def _convert_to_pdf(dot_file_path): + pdf_save_path = os.path.splitext(dot_file_path)[0] + '.pdf' + exited_code = subprocess.call('dot -Tpdf ' + dot_file_path \ + + ' -o ' + pdf_save_path, shell=True) + if exited_code != 0: + print('The dot command is needed for creating pdf files.') + print('The {} is saved as the dot filetype.'.format( + dot_file_path)) + + remove_ctr_vars = set() + ops_num = 0 + for node in self.graph.nodes(): + if node.is_ctrl_var(): + remove_ctr_vars.add(node) + elif node.is_op(): + ops_num += 1 + print('Total ops num = {}.'.format(ops_num)) + self.safe_remove_nodes(remove_ctr_vars) + if marked_nodes is not None: + if not isinstance(marked_nodes, set): + marked_nodes = set(marked_nodes) + marked_nodes = marked_nodes - remove_ctr_vars + if self.graph.has('__graphviz__marked_node__'): + self.graph.erase('__graphviz__marked_node__') + self.graph.set('__graphviz__marked_node__', marked_nodes) + viz_dot_path = os.path.join(save_path, name) + '.dot' + viz_pass = core.get_pass('graph_viz_pass') + viz_pass.set('graph_viz_path', viz_dot_path) + viz_pass.apply(self.graph) + _convert_to_pdf(viz_dot_path) + + def to_program(self): + convert_pass = core.get_pass('graph_to_program_pass') + convert_pass.set('program', Program().desc) + convert_pass.apply(self.graph) + desc = convert_pass.get_program('program') + program = Program._construct_from_desc(desc) + return program + + def _update_desc_attr(self, desc, name, val): + """ + Update the value of desc's attribute by attribute's name. + """ + if isinstance(val, Block): + desc.set_block_attr(name, val.desc) + elif isinstance(val, list) and val and all( + isinstance(v, Block) for v in val): + desc.set_blocks_attr(name, [v.desc for v in val]) + elif isinstance(val, core.BlockDesc) or \ + isinstance(val, core.ProgramDesc): + desc.set_serialized_attr(name, val.serialize_to_string()) + else: + desc._set_attr(name, val) + + class Program(object): """ Python Program. Beneath it is a ProgramDesc, which is used for @@ -1936,6 +2085,23 @@ class Program(object): p._sync_with_cpp() return p + @staticmethod + def _construct_from_desc(desc): + """ + Construct a program from program desc. + + Args: + desc(core.ProgramDesc): The program desc for constructing. + + Returns: + Program: A program. + """ + p = Program() + p.desc = desc + p.blocks = [Block(p, i) for i in six.moves.range(p.desc.num_blocks())] + p._sync_with_cpp() + return p + @property def random_seed(self): """ diff --git a/python/paddle/fluid/tests/book/test_word2vec.py b/python/paddle/fluid/tests/book/test_word2vec.py index 48cb7789276479d905b1c78f6ae98c5346de14b7..487a29c8391231471737a25d521770ebbca18673 100644 --- a/python/paddle/fluid/tests/book/test_word2vec.py +++ b/python/paddle/fluid/tests/book/test_word2vec.py @@ -220,9 +220,7 @@ def infer(use_cuda, save_dirname=None): np_data = np.array(results[0]) infer_out = infer_outputs[0].data.float_data() for a, b in zip(np_data[0], infer_out): - g_a = float("{:.6g}".format(a)) - g_b = float("{:.6g}".format(b)) - assert g_a == g_b + assert np.isclose(a, b), "a: {}, b: {}".format(a, b) def main(use_cuda, is_sparse, is_parallel): diff --git a/python/paddle/fluid/tests/unittests/test_dist_base.py b/python/paddle/fluid/tests/unittests/test_dist_base.py index 03be3fe84b022a68f358bb9ab9194020aa9f9e26..2789a07beefa02964b78e3e5dccebff9976f64c4 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_base.py +++ b/python/paddle/fluid/tests/unittests/test_dist_base.py @@ -123,7 +123,7 @@ class TestDistRunnerBase(object): pass_builder = build_stra._finalize_strategy_and_create_passes() mypass = pass_builder.insert_pass( len(pass_builder.all_passes()) - 3, "multi_batch_merge_pass") - mypass.set_int("num_repeats", args.batch_merge_repeat) + mypass.set("num_repeats", args.batch_merge_repeat) if args.update_method == "nccl2": build_stra.num_trainers = len(args.endpoints.split(",")) diff --git a/python/paddle/fluid/tests/unittests/test_pass_builder.py b/python/paddle/fluid/tests/unittests/test_pass_builder.py index 8c9e489e02839e25cfabe14c16bfd91a908bd734..7e1c2572f08598b8b600517e4a82b48ca71cc20d 100644 --- a/python/paddle/fluid/tests/unittests/test_pass_builder.py +++ b/python/paddle/fluid/tests/unittests/test_pass_builder.py @@ -111,7 +111,7 @@ class TestPassBuilder(unittest.TestCase): pass_builder.remove_pass(len(pass_builder.all_passes()) - 1) self.assertEqual(origin_len + 1, len(pass_builder.all_passes())) - viz_pass.set_str("graph_viz_path", "/tmp/test_viz_pass") + viz_pass.set("graph_viz_path", "/tmp/test_viz_pass") self.check_network_convergence( use_cuda=core.is_compiled_with_cuda(), diff --git a/python/setup.py.in b/python/setup.py.in index e00c88b3a6e49bd806bcc6125fa8dc0f69928227..fb4b273a0676fcbcb4402eaf54ddf73d37a2754f 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -113,6 +113,7 @@ packages=['paddle', 'paddle.fluid.contrib.slim.core', 'paddle.fluid.contrib.slim.graph', 'paddle.fluid.contrib.slim.prune', + 'paddle.fluid.contrib.slim.quantization', 'paddle.fluid.contrib.utils', 'paddle.fluid.transpiler', 'paddle.fluid.transpiler.details']