提交 6833ec06 编写于 作者: Q Qiao Longfei

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into optimize-assign

test=develop
...@@ -33,9 +33,7 @@ if(WIN32) ...@@ -33,9 +33,7 @@ if(WIN32)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT")
endif(WIN32) endif(WIN32)
if(NOT CMAKE_CROSSCOMPILING) find_package(CUDA QUIET)
find_package(CUDA QUIET)
endif(NOT CMAKE_CROSSCOMPILING)
find_package(Git REQUIRED) find_package(Git REQUIRED)
find_package(Threads REQUIRED) find_package(Threads REQUIRED)
...@@ -49,7 +47,6 @@ option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FO ...@@ -49,7 +47,6 @@ option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FO
option(WITH_NGRAPH "Compile PaddlePaddle with nGraph support." OFF) option(WITH_NGRAPH "Compile PaddlePaddle with nGraph support." OFF)
option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON)
option(WITH_TESTING "Compile PaddlePaddle with unit testing" OFF) option(WITH_TESTING "Compile PaddlePaddle with unit testing" OFF)
option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON)
option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON) option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON)
option(WITH_DOUBLE "Compile PaddlePaddle with double precision" OFF) option(WITH_DOUBLE "Compile PaddlePaddle with double precision" OFF)
option(WITH_RDMA "Compile PaddlePaddle with RDMA support" OFF) option(WITH_RDMA "Compile PaddlePaddle with RDMA support" OFF)
...@@ -60,11 +57,9 @@ option(WITH_DOC "Compile PaddlePaddle with documentation" OFF) ...@@ -60,11 +57,9 @@ option(WITH_DOC "Compile PaddlePaddle with documentation" OFF)
option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF) option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF)
option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF) option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF)
option(ON_TRAVIS "Exclude special unit test on Travis CI" OFF) option(ON_TRAVIS "Exclude special unit test on Travis CI" OFF)
option(WITH_C_API "Compile PaddlePaddle with C-API(Prediction)" OFF)
option(WITH_FLUID_ONLY "Compile PaddlePaddle fluid only" OFF) option(WITH_FLUID_ONLY "Compile PaddlePaddle fluid only" OFF)
option(WITH_GOLANG "Compile PaddlePaddle with GOLANG" OFF) option(WITH_GOLANG "Compile PaddlePaddle with GOLANG" OFF)
option(GLIDE_INSTALL "Download and install go dependencies " ON) option(GLIDE_INSTALL "Download and install go dependencies " ON)
option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF)
option(WITH_DISTRIBUTE "Compile with distributed support" OFF) option(WITH_DISTRIBUTE "Compile with distributed support" OFF)
option(WITH_PSLIB "Compile with pslib support" OFF) option(WITH_PSLIB "Compile with pslib support" OFF)
option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF) option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF)
...@@ -96,37 +91,6 @@ if(NOT CMAKE_BUILD_TYPE) ...@@ -96,37 +91,6 @@ if(NOT CMAKE_BUILD_TYPE)
FORCE) FORCE)
endif() endif()
if(ANDROID OR IOS)
if(ANDROID)
if(${CMAKE_SYSTEM_VERSION} VERSION_LESS "16")
message(FATAL_ERROR "Unsupport standalone toolchains with Android API level lower than 16")
endif()
endif()
set(WITH_GPU OFF CACHE STRING
"Disable GPU when cross-compiling for Android and iOS" FORCE)
set(WITH_AVX OFF CACHE STRING
"Disable AVX when cross-compiling for Android and iOS" FORCE)
set(WITH_PYTHON OFF CACHE STRING
"Disable PYTHON when cross-compiling for Android and iOS" FORCE)
set(WITH_RDMA OFF CACHE STRING
"Disable RDMA when cross-compiling for Android and iOS" FORCE)
set(WITH_MKL OFF CACHE STRING
"Disable MKL when cross-compiling for Android and iOS" FORCE)
set(WITH_NGRAPH OFF CACHE STRING
"Disable nGraph when cross-compiling for Android and iOS" FORCE)
set(WITH_GOLANG OFF CACHE STRING
"Disable golang when cross-compiling for Android and iOS" FORCE)
# Compile PaddlePaddle mobile inference library
if (NOT WITH_C_API)
set(WITH_C_API ON CACHE STRING
"Always compile the C_API when cross-compiling for Android and iOS" FORCE)
endif()
set(MOBILE_INFERENCE ON)
add_definitions(-DPADDLE_MOBILE_INFERENCE)
endif()
if (APPLE) if (APPLE)
set(WITH_MKL OFF CACHE STRING set(WITH_MKL OFF CACHE STRING
"Disable MKL for building on mac" FORCE) "Disable MKL for building on mac" FORCE)
...@@ -135,8 +99,6 @@ endif() ...@@ -135,8 +99,6 @@ endif()
if (WIN32) if (WIN32)
set(WITH_DISTRIBUTE OFF CACHE STRING set(WITH_DISTRIBUTE OFF CACHE STRING
"Disable DISTRIBUTE when compiling for Windows" FORCE) "Disable DISTRIBUTE when compiling for Windows" FORCE)
set(WITH_C_API OFF CACHE STRING
"Disable C_API when compiling for Windows" FORCE)
set(WITH_FLUID_ONLY ON CACHE STRING set(WITH_FLUID_ONLY ON CACHE STRING
"Enable FLUID_ONLY when compiling for Windows" FORCE) "Enable FLUID_ONLY when compiling for Windows" FORCE)
endif() endif()
...@@ -150,21 +112,7 @@ set(FLUID_INSTALL_DIR "${CMAKE_BINARY_DIR}/fluid_install_dir" CACHE STRING ...@@ -150,21 +112,7 @@ set(FLUID_INSTALL_DIR "${CMAKE_BINARY_DIR}/fluid_install_dir" CACHE STRING
set(FLUID_INFERENCE_INSTALL_DIR "${CMAKE_BINARY_DIR}/fluid_inference_install_dir" CACHE STRING set(FLUID_INFERENCE_INSTALL_DIR "${CMAKE_BINARY_DIR}/fluid_inference_install_dir" CACHE STRING
"A path setting fluid inference shared and static libraries") "A path setting fluid inference shared and static libraries")
if (WITH_C_API AND WITH_PYTHON) set(THIRD_PARTY_BUILD_TYPE Release)
message(WARNING "It is suggest not embedded a python interpreter in Paddle "
"when using C-API. It will give an unpredictable behavior when using a "
"different Python interpreter from compiling.")
endif()
if (WITH_C_API)
set(WITH_FLUID_ONLY OFF CACHE STRING "Disable install fluid when compile the C_API" FORCE)
endif()
if(MOBILE_INFERENCE)
set(THIRD_PARTY_BUILD_TYPE MinSizeRel)
else()
set(THIRD_PARTY_BUILD_TYPE Release)
endif()
set(WITH_MKLML ${WITH_MKL}) set(WITH_MKLML ${WITH_MKL})
if (NOT DEFINED WITH_MKLDNN) if (NOT DEFINED WITH_MKLDNN)
...@@ -193,7 +141,6 @@ include(external/python) # download, build, install python ...@@ -193,7 +141,6 @@ include(external/python) # download, build, install python
include(external/openblas) # download, build, install openblas include(external/openblas) # download, build, install openblas
include(external/mkldnn) # download, build, install mkldnn include(external/mkldnn) # download, build, install mkldnn
include(external/ngraph) # download, build, install nGraph include(external/ngraph) # download, build, install nGraph
include(external/swig) # download, build, install swig
include(external/boost) # download boost include(external/boost) # download boost
include(external/any) # download libn::any include(external/any) # download libn::any
include(external/eigen) # download eigen3 include(external/eigen) # download eigen3
...@@ -312,11 +259,6 @@ if(WITH_MKLDNN) ...@@ -312,11 +259,6 @@ if(WITH_MKLDNN)
list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB}) list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB})
endif() endif()
if(USE_NNPACK)
include(external/nnpack)
list(APPEND EXTERNAL_LIBS ${NNPACK_LIBS})
endif(USE_NNPACK)
set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build") set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
......
FROM ubuntu:16.04
MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
ARG UBUNTU_MIRROR
RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com/ubuntu#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi'
# ENV variables
ARG ANDROID_ABI
ARG ANDROID_API
ENV ANDROID_ABI=${ANDROID_ABI:-"armeabi-v7a"}
ENV ANDROID_API=${ANDROID_API:-21}
ENV HOME=/root \
ANDROID_NDK_HOME=/opt/android-ndk-linux \
ANDROID_TOOLCHAINS_DIR=/opt/toolchains
RUN apt-get update && \
apt-get install -y \
git python-dev python-pip python-numpy \
wget curl tar unzip gcc g++ locales clang-format-3.8 swig cmake && \
apt-get clean -y
# git credential to skip password typing
RUN git config --global credential.helper store
# Fix locales to en_US.UTF-8
RUN localedef -i en_US -f UTF-8 en_US.UTF-8
RUN pip install --upgrade pip==9.0.3 && \
pip install -U 'protobuf==3.1.0' && \
pip install -U wheel sphinx && \
pip install pre-commit
# Android NDK
RUN mkdir -p ${ANDROID_TOOLCHAINS_DIR} && \
mkdir -p /opt/android-ndk-tmp && \
cd /opt/android-ndk-tmp && \
wget -q https://dl.google.com/android/repository/android-ndk-r14b-linux-x86_64.zip && \
unzip -q android-ndk-r14b-linux-x86_64.zip && \
mv android-ndk-r14b ${ANDROID_NDK_HOME} && \
rm -rf /opt/android-ndk-tmp
...@@ -64,24 +64,18 @@ endif() ...@@ -64,24 +64,18 @@ endif()
## Then find the reference-cblas. www.netlib.org/blas/ ## Then find the reference-cblas. www.netlib.org/blas/
set(REFERENCE_CBLAS_ROOT $ENV{REFERENCE_CBLAS_ROOT} CACHE PATH set(REFERENCE_CBLAS_ROOT $ENV{REFERENCE_CBLAS_ROOT} CACHE PATH
"Folder contains reference-cblas") "Folder contains reference-cblas")
if(NOT CMAKE_CROSSCOMPILING) set(REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS
set(REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS ${REFERENCE_CBLAS_ROOT}/include
${REFERENCE_CBLAS_ROOT}/include /usr/include
/usr/include /usr/include/cblas
/usr/include/cblas )
)
set(REFERENCE_CBLAS_LIB_SEARCH_PATHS set(REFERENCE_CBLAS_LIB_SEARCH_PATHS
${REFERENCE_CBLAS_ROOT}/lib ${REFERENCE_CBLAS_ROOT}/lib
/usr/lib /usr/lib
/usr/lib/blas/reference/ /usr/lib/blas/reference/
/usr/lib/reference/ /usr/lib/reference/
) )
else()
# Disable the finding of reference cblas under host's system path
set(REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS ${REFERENCE_CBLAS_ROOT}/include)
set(REFERENCE_CBLAS_LIB_SEARCH_PATHS ${REFERENCE_CBLAS_ROOT}/lib)
endif()
if(WITH_SYSTEM_BLAS) if(WITH_SYSTEM_BLAS)
find_path(REFERENCE_CBLAS_INCLUDE_DIR NAMES cblas.h PATHS find_path(REFERENCE_CBLAS_INCLUDE_DIR NAMES cblas.h PATHS
...@@ -98,10 +92,3 @@ if(WITH_SYSTEM_BLAS) ...@@ -98,10 +92,3 @@ if(WITH_SYSTEM_BLAS)
message(STATUS "Found reference-cblas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") message(STATUS "Found reference-cblas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
endif() endif()
endif() endif()
if(IOS_USE_VECLIB_FOR_BLAS AND VECLIB_FOUND)
set(CBLAS_FOUND ON)
set(CBLAS_PROVIDER vecLib)
set(CBLAS_INC_DIR ${VECLIB_INC_DIR})
add_definitions(-DPADDLE_USE_VECLIB)
endif()
...@@ -49,12 +49,10 @@ if(NOT WITH_PROFILER) ...@@ -49,12 +49,10 @@ if(NOT WITH_PROFILER)
add_definitions(-DPADDLE_DISABLE_PROFILER) add_definitions(-DPADDLE_DISABLE_PROFILER)
endif(NOT WITH_PROFILER) endif(NOT WITH_PROFILER)
if(NOT CMAKE_CROSSCOMPILING) if(WITH_AVX AND AVX_FOUND)
if(WITH_AVX AND AVX_FOUND) set(SIMD_FLAG ${AVX_FLAG})
set(SIMD_FLAG ${AVX_FLAG}) elseif(SSE3_FOUND)
elseif(SSE3_FOUND) set(SIMD_FLAG ${SSE3_FLAG})
set(SIMD_FLAG ${SSE3_FLAG})
endif()
endif() endif()
if(WIN32) if(WIN32)
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This is a toolchain file for cross-compiling for Android, and the
# configuration refers to the open-source resposity:
# https://github.com/taka-no-me/android-cmake
# Most of the variables are compatible with that used in
# https://developer.android.com/ndk/guides/cmake.html
# The supported variables are listed belows:
#
# ANDROID_STANDALONE_TOOLCHAIN
# ANDROID_TOOLCHAIN
# ANDROID_ABI
# ANDROID_NATIVE_API_LEVEL
# ANDROID_ARM_MODE
# ANDROID_ARM_NEON
#
# For CMake >= 3.7.0, all the settings will be delivered to CMake system
# variables to let CMake do the cross-compiling configurations itself.
# More detail of cross-compiling settings
# https://cmake.org/cmake/help/v3.7/manual/cmake-toolchains.7.html
IF(NOT ANDROID)
return()
ENDIF()
# check the exist of android standalone toolchain
IF(NOT DEFINED ANDROID_STANDALONE_TOOLCHAIN)
SET(ANDROID_STANDALONE_TOOLCHAIN $ENV{ANDROID_STANDALONE_TOOLCHAIN}
CACHE PATH "Folder holds the standalone toolchain of Android NDK")
ENDIF()
IF(NOT ANDROID_STANDALONE_TOOLCHAIN)
MESSAGE(WARNING "It is recommended to set ANDROID_STANDALONE_TOOLCHAIN to "
"use a standalone toolchain.\n"
"To cross-compile for Android, you need to:\n"
"1. Download an Android NDK from"
" https://developer.android.com/ndk/downloads/index.html\n"
"2. Setup a standalone toolchain"
"https://developer.android.google.cn/ndk/guides/standalone_toolchain.html?hl=zh-cn\n")
ENDIF()
IF(NOT DEFINED CMAKE_SYSTEM_VERSION AND ANDROID_NATIVE_API_LEVEL)
IF(ANDROID_NATIVE_API_LEVEL MATCHES "^android-[0-9]+$")
STRING(REPLACE "android-" "" CMAKE_SYSTEM_VERSION "${CMAKE_MATCH_0}")
ELSEIF(ANDROID_NATIVE_API_LEVEL MATCHES "^[0-9]+$")
SET(CMAKE_SYSTEM_VERSION ${ANDROID_NATIVE_API_LEVEL})
ENDIF()
ENDIF()
IF(NOT DEFINED ANDROID_TOOLCHAIN)
SET(ANDROID_TOOLCHAIN clang)
ENDIF()
IF(NOT DEFINED ANDROID_ABI)
SET(ANDROID_ABI "armeabi-v7a")
ENDIF()
IF(NOT DEFINED ANDROID_ARM_MODE)
SET(ANDROID_ARM_MODE ON)
ENDIF()
IF(ANDROID_ARM_MODE)
SET(ANDROID_ARM_MODE_NAME "arm")
ELSE(ANDROID_ARM_MODE)
SET(ANDROID_ARM_MODE_NAME "thumb")
ENDIF(ANDROID_ARM_MODE)
IF(NOT DEFINED ANDROID_ARM_NEON)
SET(ANDROID_ARM_NEON ON)
ENDIF()
IF("${CMAKE_VERSION}" VERSION_LESS "3.7.0")
IF("${CMAKE_VERSION}" VERSION_LESS "3.1.0")
SET(CMAKE_SYSTEM_NAME "Linux")
ENDIF()
MESSAGE(WARNING "It is recommended to use CMake >= 3.7.0 (current version: "
"${CMAKE_VERSION}), when cross-compiling for Android.")
IF(ANDROID_STANDALONE_TOOLCHAIN)
# Use standalone toolchain
SET(CMAKE_SYSROOT "${ANDROID_STANDALONE_TOOLCHAIN}/sysroot")
IF(NOT CMAKE_SYSTEM_VERSION)
SET(ANDROID_STANDALONE_TOOLCHAIN_API "")
SET(ANDROID_API_LEVEL_H_REGEX "^[\t ]*#[\t ]*define[\t ]+__ANDROID_API__[\t ]+([0-9]+)")
FILE(STRINGS "${ANDROID_STANDALONE_TOOLCHAIN}/sysroot/usr/include/android/api-level.h"
ANDROID_API_LEVEL_H_CONTENT REGEX "${ANDROID_API_LEVEL_H_REGEX}")
IF(ANDROID_API_LEVEL_H_CONTENT MATCHES "${ANDROID_API_LEVEL_H_REGEX}")
SET(ANDROID_STANDALONE_TOOLCHAIN_API "${CMAKE_MATCH_1}")
ENDIF()
SET(CMAKE_SYSTEM_VERSION ${ANDROID_STANDALONE_TOOLCHAIN_API})
ENDIF()
# Toolchain
SET(ANDROID_TOOLCHAIN_ROOT ${ANDROID_STANDALONE_TOOLCHAIN})
ELSE(ANDROID_NDK)
# TODO: use android ndk
ENDIF()
IF(ANDROID_ABI MATCHES "^armeabi(-v7a)?$")
SET(ANDROID_TOOLCHAIN_NAME arm-linux-androideabi)
IF(ANDROID_ABI STREQUAL "armeabi")
SET(CMAKE_SYSTEM_PROCESSOR armv5te)
SET(ANDROID_CLANG_TRIPLE armv5te-none-linux-androideabi)
ELSEIF(ANDROID_ABI STREQUAL "armeabi-v7a")
SET(CMAKE_SYSTEM_PROCESSOR armv7-a)
SET(ANDROID_CLANG_TRIPLE armv7-none-linux-androideabi)
ENDIF()
ELSEIF(ANDROID_ABI STREQUAL "arm64-v8a")
SET(ANDROID_TOOLCHAIN_NAME aarch64-linux-android)
SET(CMAKE_SYSTEM_PROCESSOR aarch64)
SET(ANDROID_CLANG_TRIPLE aarch64-none-linux-android)
ELSE()
MESSAGE(FATAL_ERROR "Invalid Android ABI: ${ANDROID_ABI}.")
ENDIF()
SET(ANDROID_TOOLCHAIN_PREFIX "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_NAME}-")
IF(ANDROID_TOOLCHAIN STREQUAL clang)
SET(ANDROID_C_COMPILER_NAME clang)
SET(ANDROID_CXX_COMPILER_NAME clang++)
SET(CMAKE_C_COMPILER_TARGET ${ANDROID_CLANG_TRIPLE})
SET(CMAKE_CXX_COMPILER_TARGET ${ANDROID_CLANG_TRIPLE})
ELSEIF(ANDROID_TOOLCHAIN STREQUAL gcc)
SET(ANDROID_C_COMPILER_NAME gcc)
SET(ANDROID_CXX_COMPILER_NAME g++)
ELSE()
MESSAGE(FATAL_ERROR "Invalid Android toolchain: ${ANDROID_TOOLCHAIN}")
ENDIF()
# C compiler
IF(NOT CMAKE_C_COMPILER)
SET(ANDROID_C_COMPILER "${ANDROID_TOOLCHAIN_PREFIX}${ANDROID_C_COMPILER_NAME}")
ELSE()
GET_FILENAME_COMPONENT(ANDROID_C_COMPILER ${CMAKE_C_COMPILER} PROGRAM)
ENDIF()
IF(NOT EXISTS ${ANDROID_C_COMPILER})
MESSAGE(FATAL_ERROR "Cannot find C compiler: ${ANDROID_C_COMPILER}")
ENDIF()
# CXX compiler
IF(NOT CMAKE_CXX_COMPILER)
SET(ANDROID_CXX_COMPILER "${ANDROID_TOOLCHAIN_PREFIX}${ANDROID_CXX_COMPILER_NAME}")
ELSE()
GET_FILENAME_COMPONENT(ANDROID_CXX_COMPILER ${CMAKE_CXX_COMPILER} PROGRAM)
ENDIF()
IF(NOT EXISTS ${ANDROID_CXX_COMPILER})
MESSAGE(FATAL_ERROR "Cannot find CXX compiler: ${ANDROID_CXX_COMPILER}")
ENDIF()
SET(CMAKE_C_COMPILER ${ANDROID_C_COMPILER} CACHE PATH "C compiler" FORCE)
SET(CMAKE_CXX_COMPILER ${ANDROID_CXX_COMPILER} CACHE PATH "CXX compiler" FORCE)
# Toolchain and ABI specific flags.
SET(ANDROID_COMPILER_FLAGS "-ffunction-sections -fdata-sections")
SET(ANDROID_LINKER_FLAGS "-Wl,--gc-sections")
IF(ANDROID_ABI STREQUAL "armeabi")
LIST(APPEND ANDROID_COMPILER_FLAGS
-march=armv5te
-mtune=xscale
-msoft-float)
ELSEIF(ANDROID_ABI STREQUAL "armeabi-v7a")
LIST(APPEND ANDROID_COMPILER_FLAGS
-march=armv7-a
-mfloat-abi=softfp)
IF(ANDROID_ARM_NEON)
LIST(APPEND ANDROID_COMPILER_FLAGS -mfpu=neon)
ELSE()
LIST(APPEND ANDROID_COMPILER_FLAGS -mfpu=vfpv3-d16)
ENDIF()
LIST(APPEND ANDROID_LINKER_FLAGS -Wl,--fix-cortex-a8)
ELSEIF(ANDROID_ABI STREQUAL "arm64-v8a")
LIST(APPEND ANDROID_COMPILER_FLAGS -march=armv8-a)
ENDIF()
IF(ANDROID_ABI MATCHES "^armeabi(-v7a)?$")
IF(ANDROID_ARM_MODE)
LIST(APPEND ANDROID_COMPILER_FLAGS -marm)
ELSE()
LIST(APPEND ANDROID_COMPILER_FLAGS -mthumb)
ENDIF()
IF(ANDROID_TOOLCHAIN STREQUAL clang)
# Disable integrated-as for better compatibility.
LIST(APPEND ANDROID_COMPILER_FLAGS -fno-integrated-as)
ENDIF()
ENDIF()
IF(ANDROID_TOOLCHAIN STREQUAL clang)
# CMake automatically forwards all compiler flags to the linker,
# and clang doesn't like having -Wa flags being used for linking.
# To prevent CMake from doing this would require meddling with
# the CMAKE_<LANG>_COMPILE_OBJECT rules, which would get quite messy.
LIST(APPEND ANDROID_LINKER_FLAGS -Qunused-arguments)
ENDIF()
STRING(REPLACE ";" " " ANDROID_COMPILER_FLAGS "${ANDROID_COMPILER_FLAGS}")
STRING(REPLACE ";" " " ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS}")
SET(CMAKE_C_FLAGS "${ANDROID_COMPILER_FLAGS} ${CMAKE_C_FLAGS}"
CACHE STRING "C flags")
SET(CMAKE_CXX_FLAGS "${ANDROID_COMPILER_FLAGS} ${CMAKE_CXX_FLAGS}"
CACHE STRING "CXX flags")
SET(CMAKE_SHARED_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} ${CMAKE_SHARED_LINKER_FLAGS}"
CACHE STRING "shared linker flags")
SET(CMAKE_POSITION_INDEPENDENT_CODE TRUE)
SET(CMAKE_EXE_LINKER_FLAGS "-pie -fPIE ${ANDROID_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS}"
CACHE STRING "executable linker flags")
MESSAGE(STATUS "Android: Targeting API '${CMAKE_SYSTEM_VERSION}' "
"with architecture '${ANDROID_ARM_MODE_NAME}', "
"ABI '${ANDROID_ABI}', and processor '${CMAKE_SYSTEM_PROCESSOR}'")
MESSAGE(STATUS "System CMAKE_C_FLAGS: " ${CMAKE_C_FLAGS})
MESSAGE(STATUS "System CMAKE_CXX_FLAGS: " ${CMAKE_CXX_FLAGS})
ELSE()
IF(ANDROID_STANDALONE_TOOLCHAIN)
SET(CMAKE_ANDROID_STANDALONE_TOOLCHAIN ${ANDROID_STANDALONE_TOOLCHAIN})
ENDIF()
SET(CMAKE_ANDROID_ARCH_ABI ${ANDROID_ABI})
IF(ANDROID_ABI MATCHES "^armeabi(-v7a)?$")
SET(CMAKE_ANDROID_ARM_MODE ${ANDROID_ARM_MODE})
IF(ANDROID_ABI STREQUAL "armeabi-v7a")
SET(CMAKE_ANDROID_ARM_NEON ${ANDROID_ARM_NEON})
ENDIF()
ENDIF()
ENDIF()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# find host C compiler
IF(HOST_C_COMPILER)
SET(HOST_C_COMPILER_NAME ${HOST_C_COMPILER})
ELSEIF(NOT $ENV{CC} STREQUAL "")
SET(HOST_C_COMPILER_NAME $ENV{CC})
ELSE()
SET(HOST_C_COMPILER_NAME cc)
ENDIF()
GET_FILENAME_COMPONENT(HOST_C_COMPILER_PATH ${HOST_C_COMPILER_NAME} PROGRAM)
IF(NOT HOST_C_COMPILER_PATH OR NOT EXISTS ${HOST_C_COMPILER_PATH})
MESSAGE(FATAL_ERROR "Cannot find host C compiler, set host C compiler:\n"
"\tcmake .. -DHOST_C_COMPILER=...")
ENDIF()
# find host CXX compiler
IF(HOST_CXX_COMPILER)
SET(HOST_CXX_COMPILER_NAME ${HOST_CXX_COMPILER})
ELSEIF(NOT $ENV{CXX} STREQUAL "")
SET(HOST_CXX_COMPILER_NAME $ENV{CXX})
ELSE()
SET(HOST_CXX_COMPILER_NAME c++)
ENDIF()
GET_FILENAME_COMPONENT(HOST_CXX_COMPILER_PATH ${HOST_CXX_COMPILER_NAME} PROGRAM)
IF(NOT HOST_CXX_COMPILER_PATH OR NOT EXISTS ${HOST_CXX_COMPILER_PATH})
MESSAGE(FATAL_ERROR "Cannot find host CXX compiler, set host CXX compiler:\n"
"\tcmake .. -DHOST_CXX_COMPILER=...")
ENDIF()
SET(HOST_C_COMPILER ${HOST_C_COMPILER_PATH} CACHE PATH "Host C compiler")
SET(HOST_CXX_COMPILER ${HOST_CXX_COMPILER_PATH} CACHE PATH "Host CXX compiler")
MESSAGE(STATUS "Found host C compiler: " ${HOST_C_COMPILER})
MESSAGE(STATUS "Found host CXX compiler: " ${HOST_CXX_COMPILER})
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This is a toolchain file for cross-compiling for iOS, and the
# configuration largely refers to public toolchain file:
# https://raw.githubusercontent.com/leetal/ios-cmake/master/ios.toolchain.cmake
# and
# https://github.com/cristeab/ios-cmake
#
# Supports options:
# IOS_PLATFORM = OS (default) or SIMULATOR
# This decides if SDKS will be selected from the iPhoneOS.platform or iPhoneSimulator.platform folders
# OS - the default, used to build for iPhone and iPad physical devices, which have an arm arch.
# SIMULATOR - used to build for the Simulator platforms, which have an x86 arch.
# IOS_ARCH
# The archectures wanted to support, such "arm64", "armv7;arm64"
# IOS_DEPLOYMENT_TARGET
# The minimum iOS deployment version, such as "7.0"
# IOS_ENABLE_BITCODE = ON (default) or OFF
# IOS_USE_VECLIB_FOR_BLAS = OFF (default) or ON
# IOS_DEVELOPER_ROOT = automatic(default) or /path/to/platform/Developer folder
# By default this location is automatcially chosen based on the IOS_PLATFORM value above.
# If set manually, it will override the default location and force the user of a particular Developer Platform
# IOS_SDK_ROOT = automatic(default) or /path/to/platform/Developer/SDKs/SDK folder
# By default this location is automatcially chosen based on the IOS_DEVELOPER_ROOT value.
# In this case it will always be the most up-to-date SDK found in the IOS_DEVELOPER_ROOT path.
# If set manually, this will force the use of a specific SDK version
# Macros:
# set_xcode_property (TARGET XCODE_PROPERTY XCODE_VALUE)
# A convenience macro for setting xcode specific properties on targets
# example: set_xcode_property (myioslib IPHONEOS_DEPLOYMENT_TARGET "3.1")
# find_host_package (PROGRAM ARGS)
# A macro used to find executable programs on the host system, not within the iOS environment.
# Thanks to the android-cmake project for providing the command
if(NOT IOS)
return()
endif()
set(CMAKE_SYSTEM_NAME Darwin)
# Get the Xcode version being used.
execute_process(COMMAND xcodebuild -version
OUTPUT_VARIABLE XCODE_VERSION
RESULT_VARIABLE XCODE_VERSION_RESULT
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT ${XCODE_VERSION_RESULT})
string(REGEX MATCH "Xcode [0-9\\.]+" XCODE_VERSION "${XCODE_VERSION}")
string(REGEX REPLACE "Xcode ([0-9\\.]+)" "\\1" XCODE_VERSION "${XCODE_VERSION}")
message(STATUS "Building with Xcode version: ${XCODE_VERSION}")
else()
message(FATAL_ERROR "Cannot execute xcodebuild, please check whether xcode is installed.")
endif()
# Required as of cmake 2.8.10
set(CMAKE_OSX_DEPLOYMENT_TARGET "" CACHE STRING "Force unset of the deployment target for iOS" FORCE)
# Setup iOS platform unless specified manually with IOS_PLATFORM
if(NOT DEFINED IOS_PLATFORM)
set(IOS_PLATFORM "OS")
endif()
set(IOS_PLATFORM ${IOS_PLATFORM} CACHE STRING "Type of iOS Platform")
# Set the architecture for iOS
if(NOT DEFINED IOS_ARCH)
if(IOS_PLATFORM STREQUAL "OS")
set(IOS_ARCH "armv7;armv7s;arm64")
elseif(IOS_PLATFORM STREQUAL "SIMULATOR")
set(IOS_ARCH "i386;x86_64")
endif()
endif()
set(CMAKE_OSX_ARCHITECTURES ${IOS_ARCH} CACHE string "Build architecture for iOS")
# Specify minimum iOS deployment version
if(NOT DEFINED IOS_DEPLOYMENT_TARGET)
set(IOS_DEPLOYMENT_TARGET "7.0")
endif()
set(IOS_DEPLOYMENT_TARGET ${IOS_DEPLOYMENT_TARGET} CACHE STRING "Minimum iOS version")
# Whether to enable bitcode
if(NOT DEFINED IOS_ENABLE_BITCODE)
set(IOS_ENABLE_BITCODE ON)
endif()
set(IOS_ENABLE_BITCODE ${IOS_ENABLE_BITCODE} CACHE BOOL "Whether to enable bitcode")
if(NOT DEFINED IOS_USE_VECLIB_FOR_BLAS)
set(IOS_USE_VECLIB_FOR_BLAS OFF)
endif()
set(IOS_USE_VECLIB_FOR_BLAS ${IOS_UES_VECLIB_FOR_BLAS} CACHE BOOL "Whether to use veclib")
# Check the platform selection and setup for developer root
if(${IOS_PLATFORM} STREQUAL "OS")
set(IOS_PLATFORM_LOCATION "iPhoneOS.platform")
set(XCODE_IOS_PLATFORM iphoneos)
# This causes the installers to properly locate the output libraries
set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-iphoneos")
elseif(${IOS_PLATFORM} STREQUAL "SIMULATOR")
set(IOS_PLATFORM_LOCATION "iPhoneSimulator.platform")
set(XCODE_IOS_PLATFORM iphonesimulator)
# This causes the installers to properly locate the output libraries
set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-iphonesimulator")
elseif(${IOS_PLATFORM} STREQUAL "WATCHOS")
set(IOS_PLATFORM_LOCATION "WatchOS.platform")
set(XCODE_IOS_PLATFORM watchos)
# This causes the installers to properly locate the output libraries
set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-watchos")
else(${IOS_PLATFORM} STREQUAL "OS")
message(FATAL_ERROR "Unsupported IOS_PLATFORM value selected. Please set to\n"
"\t OS, SIMULATOR, or WATCHOS.")
endif()
# Check iOS developer toolchain
if(NOT DEFINED IOS_DEVELOPER_ROOT)
# Setup iOS developer location
execute_process(COMMAND xcode-select -print-path
OUTPUT_VARIABLE XCODE_DEVELOPER_DIR
RESULT_VARIABLE XCODE_DEVELOPER_DIR_RESULT
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
# Xcode 4.3 changed the installation location, choose the most recent one available
if(${XCODE_VERSION} VERSION_LESS "4.3.0")
set(IOS_DEVELOPER_ROOT "/Developer/Platforms/${IOS_PLATFORM_LOCATION}/Developer")
else()
set(IOS_DEVELOPER_ROOT "${XCODE_DEVELOPER_DIR}/Platforms/${IOS_PLATFORM_LOCATION}/Developer")
endif()
endif()
if(EXISTS ${IOS_DEVELOPER_ROOT})
set(IOS_DEVELOPER_ROOT ${IOS_DEVELOPER_ROOT} CACHE PATH "Location of iOS Platform")
else()
message(FATAL_ERROR "Invalid IOS_DEVELOPER_ROOT: ${IOS_DEVELOPER_ROOT} does not exist.")
endif()
# Check iOS SDK
if(NOT DEFINED IOS_SDK_ROOT)
# Find and use the most recent iOS sdk
file(GLOB IOS_SDK_LISTS "${IOS_DEVELOPER_ROOT}/SDKs/*")
if(IOS_SDK_LISTS)
list(SORT IOS_SDK_LISTS)
list(REVERSE IOS_SDK_LISTS)
list(GET IOS_SDK_LISTS 0 IOS_SDK_ROOT)
else(IOS_SDK_LISTS)
message(FATAL_ERROR "No iOS SDK's found in default search path ${IOS_DEVELOPER_ROOT}."
" Please manually set IOS_SDK_ROOT or install the iOS SDK.")
endif(IOS_SDK_LISTS)
endif()
if(EXISTS ${IOS_SDK_ROOT})
set(IOS_SDK_ROOT ${IOS_SDK_ROOT} CACHE PATH "Location of the selected iOS SDK")
message(STATUS "iOS toolchain: ${IOS_SDK_ROOT}")
else()
message(FATAL_ERROR "Invalid IOS_SDK_ROOT: ${IOS_SDK_ROOT} does not exist.")
endif()
# Set the sysroot default to the most recent SDK
set(CMAKE_OSX_SYSROOT ${IOS_SDK_ROOT} CACHE PATH "Sysroot used for iOS support")
# Get version of iOS SDK
execute_process(COMMAND xcodebuild -sdk ${CMAKE_OSX_SYSROOT} -version SDKVersion
OUTPUT_VARIABLE IOS_SDK_VERSION
RESULT_VARIABLE IOS_SDK_VERSION_RESULT
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(${IOS_SDK_VERSION_RESULT})
string(REGEX MATCH "(([0-9]+)\\.)+([0-9]+)" IOS_SDK_VERSION "${IOS_SDK_ROOT}")
endif()
if(NOT IOS_SDK_VERSION)
message(WARNING "Cannot get SDK's version.")
set(IOS_SDK_VERSION 1)
endif()
set(CMAKE_SYSTEM_VERSION ${IOS_SDK_VERSION})
# Find the C & C++ compilers for the specified SDK.
if(NOT CMAKE_C_COMPILER)
# Default to use clang
execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT} -find clang
OUTPUT_VARIABLE IOS_C_COMPILER
RESULT_VARIABLE IOS_C_COMPILER_RESULT
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(${IOS_C_COMPILER_RESULT})
get_filename_component(IOS_C_COMPILER clang PROGRAM)
endif()
else(NOT CMAKE_C_COMPILER)
# User can set it in cmake command
get_filename_component(IOS_C_COMPILER ${CMAKE_C_COMPILER} PROGRAM)
endif(NOT CMAKE_C_COMPILER)
if(NOT EXISTS ${IOS_C_COMPILER})
message(FATAL_ERROR "Cannot find C compiler: ${IOS_C_COMPILER}")
endif()
if(NOT CMAKE_CXX_COMPILER)
# Default to use clang++
execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT} -find clang++
OUTPUT_VARIABLE IOS_CXX_COMPILER
RESULT_VARIABLE IOS_CXX_COMPILER_RESULT
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(${IOS_CXX_COMPILER_RESULT})
get_filename_component(IOS_CXX_COMPILER clang++ PROGRAM)
endif()
else(NOT CMAKE_CXX_COMPILER)
# User can set it in cmake command
get_filename_component(IOS_CXX_COMPILER ${CMAKE_CXX_COMPILER} PROGRAM)
endif(NOT CMAKE_CXX_COMPILER)
if(NOT EXISTS ${IOS_CXX_COMPILER})
message(FATAL_ERROR "Cannot find CXX compiler: ${IOS_CXX_COMPILER}")
endif()
set(CMAKE_C_COMPILER ${IOS_C_COMPILER} CACHE PATH "C compiler" FORCE)
set(CMAKE_CXX_COMPILER ${IOS_CXX_COMPILER} CACHE PATH "CXX compiler" FORCE)
set(CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG "-compatibility_version ")
set(CMAKE_C_OSX_CURRENT_VERSION_FLAG "-current_version ")
set(CMAKE_CXX_OSX_COMPATIBILITY_VERSION_FLAG "${CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG}")
set(CMAKE_CXX_OSX_CURRENT_VERSION_FLAG "${CMAKE_C_OSX_CURRENT_VERSION_FLAG}")
# Set iOS specific C/C++ flags
if(IOS_PLATFORM STREQUAL "OS")
if(XCODE_VERSION VERSION_LESS "7.0")
set(XCODE_IOS_PLATFORM_VERSION_FLAGS "-mios-version-min=${IOS_DEPLOYMENT_TARGET}")
else()
# Xcode 7.0+ uses flags we can build directly from XCODE_IOS_PLATFORM.
set(XCODE_IOS_PLATFORM_VERSION_FLAGS "-m${XCODE_IOS_PLATFORM}-version-min=${IOS_DEPLOYMENT_TARGET}")
endif()
else()
set(XCODE_IOS_FLATFORM_VERSION_FLAGS "-mios-simulator-version-min=${IOS_DEPLOYMENT_TARGET}")
endif()
if(IOS_ENABLE_BITCODE)
set(XCODE_IOS_BITCODE_FLAGS "${IOS_COMPILER_FLAGS} -fembed-bitcode")
else()
set(XCODE_IOS_BITCODE_FLAGS "")
endif()
set(IOS_COMPILER_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} ${XCODE_IOS_BITCODE_FLAGS}")
# Hidden visibilty is required for cxx on iOS
set(CMAKE_C_FLAGS "${IOS_COMPILER_FLAGS} ${CMAKE_C_FLAGS}" CACHE STRING "C flags")
set(CMAKE_CXX_FLAGS "${IOS_COMPILER_FLAGS} -fvisibility=hidden -fvisibility-inlines-hidden ${CMAKE_CXX_FLAGS}" CACHE STRING "CXX flags")
set(IOS_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first")
if(IOS_USE_VECLIB_FOR_BLAS)
# Find vecLib for iOS
set(VECLIB_SEARCH_DIRS
${IOS_SDK_ROOT}/System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks
${IOS_SDK_ROOT}/System/Library/Frameworks/Accelerate.framework/Frameworks
)
find_path(VECLIB_INC_DIR vecLib.h PATHS ${VECLIB_SEARCH_DIRS}/vecLib.framework/Headers)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(vecLib DEFAULT_MSG VECLIB_INC_DIR)
if(VECLIB_FOUND)
if(VECLIB_INC_DIR MATCHES "^/System/Library/Frameworks/vecLib.framework.*")
set(IOS_LINK_FLAGS ${IOS_LINK_FLAGS} -lcblas "-framework vecLib")
message(STATUS "Found standalone vecLib.framework")
else()
set(IOS_LINK_FLAGS ${IOS_LINK_FLAGS} -lcblas "-framework Accelerate")
message(STATUS "Found vecLib as part of Accelerate.framework")
endif()
endif()
endif()
set(CMAKE_C_LINK_FLAGS "${IOS_LINK_FLAGS} ${CMAKE_C_LINK_FLAGS}")
set(CMAKE_CXX_LINK_FLAGS "${IOS_LINK_FLAGS} ${CMAKE_CXX_LINK_FLAGS}")
set(CMAKE_PLATFORM_HAS_INSTALLNAME 1)
if(NOT IOS_ENABLE_BITCODE)
set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-dynamiclib -headerpad_max_install_names")
set(CMAKE_SHARED_MODULE_CREATE_C_FLAGS "-bundle -headerpad_max_install_names")
else()
set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-dynamiclib")
set(CMAKE_SHARED_MODULE_CREATE_C_FLAGS "-bundle")
endif()
set(CMAKE_SHARED_MODULE_LOADER_C_FLAG "-Wl,-bundle_loader,")
set(CMAKE_SHARED_MODULE_LOADER_CXX_FLAG "-Wl,-bundle_loader,")
set(CMAKE_FIND_LIBRARY_SUFFIXES ".dylib" ".so" ".a")
# hack: if a new cmake (which uses CMAKE_INSTALL_NAME_TOOL) runs on an old build tree
# (where install_name_tool was hardcoded) and where CMAKE_INSTALL_NAME_TOOL isn't in the cache
# and still cmake didn't fail in CMakeFindBinUtils.cmake (because it isn't rerun)
# hardcode CMAKE_INSTALL_NAME_TOOL here to install_name_tool, so it behaves as it did before, Alex
if(NOT DEFINED CMAKE_INSTALL_NAME_TOOL)
find_program(CMAKE_INSTALL_NAME_TOOL install_name_tool)
endif()
# Set the find root to the iOS developer roots and to user defined paths
set(CMAKE_FIND_ROOT_PATH ${IOS_DEVELOPER_ROOT} ${IOS_SDK_ROOT} ${CMAKE_PREFIX_PATH}
CACHE string "iOS find search path root")
# default to searching for frameworks first
set(CMAKE_FIND_FRAMEWORK FIRST)
# set up the default search directories for frameworks
set(CMAKE_SYSTEM_FRAMEWORK_PATH
${IOS_SDK_ROOT}/System/Library/Frameworks
${IOS_SDK_ROOT}/System/Library/PrivateFrameworks
${IOS_SDK_ROOT}/Developer/Library/Frameworks
)
# only search the iOS sdks, not the remainder of the host filesystem
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
message(STATUS "iOS: Targeting iOS '${CMAKE_SYSTEM_VERSION}', "
"building for '${IOS_PLATFORM}' platform, with architecture '${CMAKE_OSX_ARCHITECTURES}'")
message(STATUS "System CMAKE_C_FLAGS: ${CMAKE_C_FLAGS}")
message(STATUS "System CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
# Used in ExternalProject command
string(REPLACE ";" "\\$<SEMICOLON>" EXTERNAL_IOS_ARCHITECTURES "${CMAKE_OSX_ARCHITECTURES}")
set(EXTERNAL_OPTIONAL_ARGS
-DCMAKE_OSX_SYSROOT=${CMAKE_OSX_SYSROOT}
-DCMAKE_OSX_ARCHITECTURES=${EXTERNAL_IOS_ARCHITECTURES})
# This little macro lets you set any XCode specific property
macro(set_xcode_property TARGET XCODE_PROPERTY XCODE_VALUE)
set_property (TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY} ${XCODE_VALUE})
endmacro(set_xcode_property)
# This macro lets you find executable programs on the host system
macro(find_host_package)
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER)
set(IOS FALSE)
find_package(${ARGN})
set(IOS TRUE)
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
endmacro(find_host_package)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This is a toolchain file for cross-compiling for Raspberry Pi.
# The supported variables are listed belows:
#
# RPI_TOOLCHAIN
# RPI_ARM_NEON
#
# Also you can set CMAKE_C/CXX_COMPILER yourself, through cmake arguments.
IF(NOT RPI)
return()
ENDIF()
SET(CMAKE_SYSTEM_NAME Linux)
SET(CMAKE_SYSTEM_VERSION 1)
SET(CMAKE_SYSTEM_PROCESSOR arm)
# check the exist of raspberry pi toolchain
IF(NOT DEFINED RPI_TOOLCHAIN)
SET(RPI_TOOLCHAIN $ENV{RPI_TOOLCHAIN}
CACHE PATH "Folder holds the toolchain of Raspberr Pi")
ENDIF()
IF(NOT RPI_TOOLCHAIN)
MESSAGE(WARNING "It is recommended to set RPI_TOOLCHAIN to use toolchain.\n"
"To cross-compile for Raspberry Pi, you need to download the tools using:\n"
" git clone https://github.com/raspberrypi/tools\n")
ENDIF()
IF(NOT DEFINED RPI_ARM_NEON)
SET(RPI_ARM_NEON ON)
ENDIF()
IF(RPI_TOOLCHAIN)
SET(RPI_TOOLCHAIN_ROOT ${RPI_TOOLCHAIN})
IF(RPI_TOOLCHAIN_ROOT MATCHES "gcc-linaro-arm-linux-gnueabihf-raspbian(-x64)?$")
# gcc-linaro-arm-linux-gnueabihf-raspbian
# gcc-linaro-arm-linux-gnueabihf-raspbian-x64
SET(RPI_TOOLCHAIN_NAME arm-linux-gnueabihf)
ENDIF()
SET(RPI_TOOLCHAIN_PREFIX "${RPI_TOOLCHAIN_ROOT}/bin/${RPI_TOOLCHAIN_NAME}-")
ENDIF()
# C compiler
IF(NOT CMAKE_C_COMPILER)
SET(RPI_C_COMPILER "${RPI_TOOLCHAIN_PREFIX}gcc")
ELSE()
GET_FILENAME_COMPONENT(RPI_C_COMPILER ${CMAKE_C_COMPILER} PROGRAM)
ENDIF()
IF(NOT EXISTS ${RPI_C_COMPILER})
MESSAGE(FATAL_ERROR "Cannot find C compiler: ${RPI_C_COMPILER}")
ENDIF()
# CXX compiler
IF(NOT CMAKE_CXX_COMPILER)
SET(RPI_CXX_COMPILER "${RPI_TOOLCHAIN_PREFIX}g++")
ELSE()
GET_FILENAME_COMPONENT(RPI_CXX_COMPILER ${CMAKE_CXX_COMPILER} PROGRAM)
ENDIF()
IF(NOT EXISTS ${RPI_CXX_COMPILER})
MESSAGE(FATAL_ERROR "Cannot find CXX compiler: ${RPI_CXX_COMPILER}")
ENDIF()
SET(CMAKE_C_COMPILER ${RPI_C_COMPILER} CACHE PATH "C compiler" FORCE)
SET(CMAKE_CXX_COMPILER ${RPI_CXX_COMPILER} CACHE PATH "CXX compiler" FORCE)
IF(RPI_ARM_NEON)
SET(RPI_C_FLAGS "${RPI_C_FLAGS} -mfpu=neon")
ENDIF()
SET(CMAKE_C_FLAGS "${RPI_C_FLAGS} ${CMAKE_C_FLAGS}" CACHE STRING "C flags")
SET(CMAKE_CXX_FLAGS "${RPI_C_FLAGS} ${CMAKE_CXX_FLAGS}" CACHE STRING "CXX flags")
...@@ -63,9 +63,7 @@ function(select_nvcc_arch_flags out_variable) ...@@ -63,9 +63,7 @@ function(select_nvcc_arch_flags out_variable)
# List of arch names # List of arch names
set(archs_names "Kepler" "Maxwell" "Pascal" "Volta" "Turing" "All" "Manual") set(archs_names "Kepler" "Maxwell" "Pascal" "Volta" "Turing" "All" "Manual")
set(archs_name_default "All") set(archs_name_default "All")
if(NOT CMAKE_CROSSCOMPILING) list(APPEND archs_names "Auto")
list(APPEND archs_names "Auto")
endif()
# set CUDA_ARCH_NAME strings (so it will be seen as dropbox in CMake-Gui) # set CUDA_ARCH_NAME strings (so it will be seen as dropbox in CMake-Gui)
set(CUDA_ARCH_NAME ${archs_name_default} CACHE STRING "Select target NVIDIA GPU achitecture.") set(CUDA_ARCH_NAME ${archs_name_default} CACHE STRING "Select target NVIDIA GPU achitecture.")
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
# #
IF(MOBILE_INFERENCE OR NOT WITH_DISTRIBUTE) IF(NOT WITH_DISTRIBUTE)
return() return()
ENDIF() ENDIF()
......
...@@ -71,13 +71,3 @@ if (WIN32) ...@@ -71,13 +71,3 @@ if (WIN32)
set_property(GLOBAL PROPERTY OS_DEPENDENCY_MODULES shlwapi.lib) set_property(GLOBAL PROPERTY OS_DEPENDENCY_MODULES shlwapi.lib)
endif(HAVE_SHLWAPI) endif(HAVE_SHLWAPI)
endif (WIN32) endif (WIN32)
IF(WITH_C_API)
INSTALL(DIRECTORY ${GFLAGS_INCLUDE_DIR} DESTINATION third_party/gflags)
IF(ANDROID)
INSTALL(FILES ${GFLAGS_LIBRARIES} DESTINATION third_party/gflags/lib/${ANDROID_ABI})
ELSE()
INSTALL(FILES ${GFLAGS_LIBRARIES} DESTINATION third_party/gflags/lib)
ENDIF()
ENDIF()
...@@ -26,14 +26,8 @@ ENDIF(WIN32) ...@@ -26,14 +26,8 @@ ENDIF(WIN32)
INCLUDE_DIRECTORIES(${GLOG_INCLUDE_DIR}) INCLUDE_DIRECTORIES(${GLOG_INCLUDE_DIR})
IF(ANDROID AND ${CMAKE_SYSTEM_VERSION} VERSION_LESS "21") SET(GLOG_REPOSITORY "https://github.com/google/glog.git")
# Using the unofficial glog for Android API < 21 SET(GLOG_TAG "v0.3.5")
SET(GLOG_REPOSITORY "https://github.com/Xreki/glog.git")
SET(GLOG_TAG "8a547150548b284382ccb6582408e9140ff2bea8")
ELSE()
SET(GLOG_REPOSITORY "https://github.com/google/glog.git")
SET(GLOG_TAG "v0.3.5")
ENDIF()
ExternalProject_Add( ExternalProject_Add(
extern_glog extern_glog
...@@ -78,12 +72,3 @@ ADD_DEPENDENCIES(glog extern_glog gflags) ...@@ -78,12 +72,3 @@ ADD_DEPENDENCIES(glog extern_glog gflags)
LINK_LIBRARIES(glog gflags) LINK_LIBRARIES(glog gflags)
LIST(APPEND external_project_dependencies glog) LIST(APPEND external_project_dependencies glog)
IF(WITH_C_API)
INSTALL(DIRECTORY ${GLOG_INCLUDE_DIR} DESTINATION third_party/glog)
IF(ANDROID)
INSTALL(FILES ${GLOG_LIBRARIES} DESTINATION third_party/glog/lib/${ANDROID_ABI})
ELSE()
INSTALL(FILES ${GLOG_LIBRARIES} DESTINATION third_party/glog/lib)
ENDIF()
ENDIF()
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
# #
IF(MOBILE_INFERENCE OR NOT WITH_DISTRIBUTE) IF(NOT WITH_DISTRIBUTE)
return() return()
ENDIF() ENDIF()
......
...@@ -13,10 +13,6 @@ ...@@ -13,10 +13,6 @@
# limitations under the License. # limitations under the License.
# #
IF(MOBILE_INFERENCE)
return()
ENDIF()
include (ExternalProject) include (ExternalProject)
# NOTE: gzstream is needed when linking with ctr reader. # NOTE: gzstream is needed when linking with ctr reader.
......
...@@ -19,8 +19,8 @@ IF(NOT WITH_LIBXSMM) ...@@ -19,8 +19,8 @@ IF(NOT WITH_LIBXSMM)
return() return()
ENDIF() ENDIF()
IF(WIN32 OR APPLE OR ANDROID OR IOS) IF(WIN32 OR APPLE)
MESSAGE(WARNING "Windows, Mac or Mobile are not supported with libxsmm in Paddle yet.") MESSAGE(WARNING "Windows, Mac are not supported with libxsmm in Paddle yet.")
SET(WITH_LIBXSMM OFF CACHE STRING "Disable LIBXSMM" FORCE) SET(WITH_LIBXSMM OFF CACHE STRING "Disable LIBXSMM" FORCE)
return() return()
ENDIF() ENDIF()
......
...@@ -110,7 +110,3 @@ else(WIN32) ...@@ -110,7 +110,3 @@ else(WIN32)
endif(WIN32) endif(WIN32)
ADD_CUSTOM_TARGET(mkldnn_shared_lib ALL DEPENDS ${MKLDNN_SHARED_LIB}) ADD_CUSTOM_TARGET(mkldnn_shared_lib ALL DEPENDS ${MKLDNN_SHARED_LIB})
ADD_DEPENDENCIES(mkldnn_shared_lib ${MKLDNN_PROJECT} mkldnn) ADD_DEPENDENCIES(mkldnn_shared_lib ${MKLDNN_PROJECT} mkldnn)
IF(WITH_C_API)
INSTALL(FILES ${MKLDNN_SHARED_LIB} DESTINATION lib)
ENDIF()
...@@ -74,7 +74,3 @@ ADD_LIBRARY(mklml SHARED IMPORTED GLOBAL) ...@@ -74,7 +74,3 @@ ADD_LIBRARY(mklml SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET mklml PROPERTY IMPORTED_LOCATION ${MKLML_LIB}) SET_PROPERTY(TARGET mklml PROPERTY IMPORTED_LOCATION ${MKLML_LIB})
ADD_DEPENDENCIES(mklml ${MKLML_PROJECT}) ADD_DEPENDENCIES(mklml ${MKLML_PROJECT})
LIST(APPEND external_project_dependencies mklml) LIST(APPEND external_project_dependencies mklml)
IF(WITH_C_API)
INSTALL(FILES ${MKLML_LIB} ${MKLML_IOMP_LIB} DESTINATION lib)
ENDIF()
# Find the NNPACK library
# NNPACK_ROOT - where to find NNPACK include and library.
#
set(NNPACK_FOUND OFF)
set(NNPACK_ROOT $ENV{NNPACK_ROOT} CACHE PATH "Folder contains NNPACK")
find_path(NNPACK_INC_DIR nnpack.h PATHS ${NNPACK_ROOT}/include)
find_library(NNPACK_LIB NAMES nnpack PATHS ${NNPACK_ROOT}/lib)
find_library(PTHREADPOOL_LIB NAMES pthreadpool PATHS ${NNPACK_ROOT}/lib)
find_library(NNPACK_UKERNELS_LIB NAMES nnpack_ukernels PATHS ${NNPACK_ROOT}/lib)
find_library(NNPACK_CPUFEATURES_LIB NAMES cpufeatures PATHS ${NNPACK_ROOT}/lib)
if(NNPACK_INC_DIR AND NNPACK_LIB AND PTHREADPOOL_LIB)
set(NNPACK_FOUND ON)
INCLUDE_DIRECTORIES(${NNPACK_INC_DIR})
set(NNPACK_LIBS)
list(APPEND NNPACK_LIBS ${NNPACK_LIB} ${PTHREADPOOL_LIB})
if (NNPACK_UKERNELS_LIB)
list(APPEND NNPACK_LIBS ${NNPACK_UKERNELS_LIB})
endif()
if (NNPACK_CPUFEATURES_LIB)
list(APPEND NNPACK_LIBS ${NNPACK_CPUFEATURES_LIB})
endif()
if(NOT ANDROID)
list(APPEND NNPACK_LIBS "rt")
endif()
else()
message(FATAL_ERROR "Cannot find NNPACK in (${NNPACK_ROOT})")
endif()
...@@ -40,38 +40,12 @@ IF(NOT ${CBLAS_FOUND}) ...@@ -40,38 +40,12 @@ IF(NOT ${CBLAS_FOUND})
SET(OPENBLAS_CC "${CMAKE_C_COMPILER} -Wno-unused-but-set-variable -Wno-unused-variable") SET(OPENBLAS_CC "${CMAKE_C_COMPILER} -Wno-unused-but-set-variable -Wno-unused-variable")
SET(OPENBLAS_COMMIT "v0.2.20") SET(OPENBLAS_COMMIT "v0.2.20")
IF(CMAKE_CROSSCOMPILING) IF(APPLE)
SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER}) SET(OPENBLAS_CC "${CMAKE_C_COMPILER} -isysroot ${CMAKE_OSX_SYSROOT}")
GET_FILENAME_COMPONENT(CROSS_SUFFIX ${CMAKE_C_COMPILER} DIRECTORY) ENDIF()
SET(CROSS_SUFFIX ${CROSS_SUFFIX}/) SET(OPTIONAL_ARGS "")
IF(ANDROID) IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^x86(_64)?$")
IF(ANDROID_ABI MATCHES "^armeabi(-v7a)?$") SET(OPTIONAL_ARGS DYNAMIC_ARCH=1 NUM_THREADS=64)
# use softfp
SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV7 ARM_SOFTFP_ABI=1 USE_THREAD=0)
ELSEIF(ANDROID_ABI STREQUAL "arm64-v8a")
SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV8 BINARY=64 USE_THREAD=0)
ENDIF()
ELSEIF(IOS)
IF(CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
SET(OPENBLAS_CC "${OPENBLAS_CC} ${CMAKE_C_FLAGS} -isysroot ${CMAKE_OSX_SYSROOT}")
SET(OPENBLAS_CC "${OPENBLAS_CC} -arch arm64")
SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV8 BINARY=64 USE_THREAD=0 CROSS_SUFFIX=${CROSS_SUFFIX})
ELSE()
MESSAGE(FATAL_ERROR "OpenBLAS only support arm64 architectures on iOS. "
"You can set IOS_USE_VECLIB_FOR_BLAS=ON or USE_EIGEN_FOR_BLAS=ON to use other blas library instead.")
ENDIF()
ELSEIF(RPI)
# use hardfp
SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV7 USE_THREAD=0)
ENDIF()
ELSE()
IF(APPLE)
SET(OPENBLAS_CC "${CMAKE_C_COMPILER} -isysroot ${CMAKE_OSX_SYSROOT}")
ENDIF()
SET(OPTIONAL_ARGS "")
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^x86(_64)?$")
SET(OPTIONAL_ARGS DYNAMIC_ARCH=1 NUM_THREADS=64)
ENDIF()
ENDIF() ENDIF()
SET(COMMON_ARGS CC=${OPENBLAS_CC} NO_SHARED=1 NO_LAPACK=1 libs) SET(COMMON_ARGS CC=${OPENBLAS_CC} NO_SHARED=1 NO_LAPACK=1 libs)
...@@ -92,25 +66,6 @@ IF(NOT ${CBLAS_FOUND}) ...@@ -92,25 +66,6 @@ IF(NOT ${CBLAS_FOUND})
ELSE() ELSE()
ENDIF(NOT WIN32) ENDIF(NOT WIN32)
SET(CBLAS_PROVIDER openblas) SET(CBLAS_PROVIDER openblas)
IF(WITH_C_API)
INSTALL(DIRECTORY ${CBLAS_INC_DIR} DESTINATION third_party/openblas)
# Because libopenblas.a is a symbolic link of another library, thus need to
# install the whole directory.
IF(ANDROID)
SET(TMP_INSTALL_DIR third_party/openblas/lib/${ANDROID_ABI})
ELSE()
SET(TMP_INSTALL_DIR third_party/openblas/lib)
ENDIF()
INSTALL(CODE "execute_process(
COMMAND ${CMAKE_COMMAND} -E copy_directory ${CBLAS_INSTALL_DIR}/lib
${CMAKE_INSTALL_PREFIX}/${TMP_INSTALL_DIR}
)"
)
INSTALL(CODE "MESSAGE(STATUS \"Installing: \"
\"${CBLAS_INSTALL_DIR}/lib -> ${CMAKE_INSTALL_PREFIX}/${TMP_INSTALL_DIR}\"
)"
)
ENDIF()
ENDIF(NOT ${CBLAS_FOUND}) ENDIF(NOT ${CBLAS_FOUND})
MESSAGE(STATUS "BLAS library: ${CBLAS_LIBRARIES}") MESSAGE(STATUS "BLAS library: ${CBLAS_LIBRARIES}")
......
...@@ -204,15 +204,6 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) ...@@ -204,15 +204,6 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
SET(PROTOBUF_REPO "https://github.com/google/protobuf.git") SET(PROTOBUF_REPO "https://github.com/google/protobuf.git")
SET(PROTOBUF_TAG "9f75c5aa851cd877fb0d93ccc31b8567a6706546") SET(PROTOBUF_TAG "9f75c5aa851cd877fb0d93ccc31b8567a6706546")
IF(MOBILE_INFERENCE)
# The reason why the official version is not used is described in
# https://github.com/PaddlePaddle/Paddle/issues/6114
SET(PROTOBUF_REPO "https://github.com/qingqing01/protobuf.git")
SET(PROTOBUF_TAG "v3.2.0")
IF(NOT BUILD_FOR_HOST)
SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} "-Dprotobuf_BUILD_PROTOC_BINARIES=OFF")
ENDIF()
ENDIF()
ExternalProject_Add( ExternalProject_Add(
${TARGET_NAME} ${TARGET_NAME}
...@@ -240,19 +231,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) ...@@ -240,19 +231,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
) )
ENDFUNCTION() ENDFUNCTION()
IF(NOT MOBILE_INFERENCE) SET(PROTOBUF_VERSION 3.1)
SET(PROTOBUF_VERSION 3.1)
ELSE()
SET(PROTOBUF_VERSION 3.2)
ENDIF()
IF(CMAKE_CROSSCOMPILING)
build_protobuf(protobuf_host TRUE)
LIST(APPEND external_project_dependencies protobuf_host)
SET(PROTOBUF_PROTOC_EXECUTABLE ${protobuf_host_PROTOC_EXECUTABLE}
CACHE FILEPATH "protobuf executable." FORCE)
ENDIF()
IF(NOT PROTOBUF_FOUND) IF(NOT PROTOBUF_FOUND)
build_protobuf(extern_protobuf FALSE) build_protobuf(extern_protobuf FALSE)
...@@ -266,20 +245,7 @@ IF(NOT PROTOBUF_FOUND) ...@@ -266,20 +245,7 @@ IF(NOT PROTOBUF_FOUND)
SET(PROTOBUF_PROTOC_LIBRARY ${extern_protobuf_PROTOC_LIBRARY} SET(PROTOBUF_PROTOC_LIBRARY ${extern_protobuf_PROTOC_LIBRARY}
CACHE FILEPATH "protoc library." FORCE) CACHE FILEPATH "protoc library." FORCE)
IF(WITH_C_API) SET(PROTOBUF_PROTOC_EXECUTABLE ${extern_protobuf_PROTOC_EXECUTABLE}
INSTALL(DIRECTORY ${PROTOBUF_INCLUDE_DIR} DESTINATION third_party/protobuf) CACHE FILEPATH "protobuf executable." FORCE)
IF(ANDROID) PROMPT_PROTOBUF_LIB(extern_protobuf)
INSTALL(FILES ${PROTOBUF_LITE_LIBRARY} DESTINATION third_party/protobuf/lib/${ANDROID_ABI})
ELSE()
INSTALL(FILES ${PROTOBUF_LITE_LIBRARY} DESTINATION third_party/protobuf/lib)
ENDIF()
ENDIF()
IF(CMAKE_CROSSCOMPILING)
PROMPT_PROTOBUF_LIB(protobuf_host extern_protobuf)
ELSE()
SET(PROTOBUF_PROTOC_EXECUTABLE ${extern_protobuf_PROTOC_EXECUTABLE}
CACHE FILEPATH "protobuf executable." FORCE)
PROMPT_PROTOBUF_LIB(extern_protobuf)
ENDIF()
ENDIF(NOT PROTOBUF_FOUND) ENDIF(NOT PROTOBUF_FOUND)
...@@ -71,7 +71,3 @@ ADD_LIBRARY(pslib SHARED IMPORTED GLOBAL) ...@@ -71,7 +71,3 @@ ADD_LIBRARY(pslib SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET pslib PROPERTY IMPORTED_LOCATION ${PSLIB_LIB}) SET_PROPERTY(TARGET pslib PROPERTY IMPORTED_LOCATION ${PSLIB_LIB})
ADD_DEPENDENCIES(pslib ${PSLIB_PROJECT}) ADD_DEPENDENCIES(pslib ${PSLIB_PROJECT})
LIST(APPEND external_project_dependencies pslib) LIST(APPEND external_project_dependencies pslib)
IF(WITH_C_API)
INSTALL(FILES ${PSLIB_LIB} ${PSLIB_IOMP_LIB} DESTINATION lib)
ENDIF()
...@@ -71,7 +71,3 @@ ADD_LIBRARY(pslib_brpc SHARED IMPORTED GLOBAL) ...@@ -71,7 +71,3 @@ ADD_LIBRARY(pslib_brpc SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET pslib_brpc PROPERTY IMPORTED_LOCATION ${PSLIB_BRPC_LIB}) SET_PROPERTY(TARGET pslib_brpc PROPERTY IMPORTED_LOCATION ${PSLIB_BRPC_LIB})
ADD_DEPENDENCIES(pslib_brpc ${PSLIB_BRPC_PROJECT}) ADD_DEPENDENCIES(pslib_brpc ${PSLIB_BRPC_PROJECT})
LIST(APPEND external_project_dependencies pslib_brpc) LIST(APPEND external_project_dependencies pslib_brpc)
IF(WITH_C_API)
INSTALL(FILES ${PSLIB_BRPC_LIB} ${PSLIB_BRPC_IOMP_LIB} DESTINATION lib)
ENDIF()
...@@ -12,10 +12,6 @@ ...@@ -12,10 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
if(MOBILE_INFERENCE OR RPI)
return()
endif()
include (ExternalProject) include (ExternalProject)
# NOTE: snappy is needed when linking with recordio # NOTE: snappy is needed when linking with recordio
......
...@@ -12,10 +12,6 @@ ...@@ -12,10 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
IF(MOBILE_INFERENCE OR RPI)
return()
ENDIF()
include (ExternalProject) include (ExternalProject)
set(SNAPPYSTREAM_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy_stream) set(SNAPPYSTREAM_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy_stream)
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
IF(NOT WITH_SWIG_PY)
return()
ENDIF()
FIND_PACKAGE(SWIG)
IF(NOT SWIG_FOUND)
# build swig as an external project
INCLUDE(ExternalProject)
SET(SWIG_SOURCES_DIR ${THIRD_PARTY_PATH}/swig)
SET(SWIG_INSTALL_DIR ${THIRD_PARTY_PATH}/install/swig)
SET(SWIG_TARGET_VERSION "3.0.2")
SET(SWIG_DOWNLOAD_SRC_MD5 "62f9b0d010cef36a13a010dc530d0d41")
SET(SWIG_DOWNLOAD_WIN_MD5 "3f18de4fc09ab9abb0d3be37c11fbc8f")
IF(WIN32)
# swig.exe available as pre-built binary on Windows:
ExternalProject_Add(swig
URL http://prdownloads.sourceforge.net/swig/swigwin-${SWIG_TARGET_VERSION}.zip
URL_MD5 ${SWIG_DOWNLOAD_WIN_MD5}
SOURCE_DIR ${SWIG_SOURCES_DIR}
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
UPDATE_COMMAND ""
)
SET(SWIG_DIR ${SWIG_SOURCES_DIR} CACHE FILEPATH "SWIG Directory" FORCE)
SET(SWIG_EXECUTABLE ${SWIG_SOURCES_DIR}/swig.exe CACHE FILEPATH "SWIG Executable" FORCE)
ELSE(WIN32)
# swig uses bison find it by cmake and pass it down
FIND_PACKAGE(BISON)
# From SWIG configure
ExternalProject_Add(swig
GIT_REPOSITORY https://github.com/swig/swig.git
GIT_TAG rel-3.0.10
PREFIX ${SWIG_SOURCES_DIR}
CONFIGURE_COMMAND cd <SOURCE_DIR> && ./autogen.sh && ./configure
--prefix=${SWIG_INSTALL_DIR} --without-pcre
BUILD_COMMAND cd <SOURCE_DIR> && make
INSTALL_COMMAND cd <SOURCE_DIR> && make install
UPDATE_COMMAND ""
)
SET(SWIG_DIR ${SWIG_INSTALL_DIR}/share/swig/${SWIG_TARGET_VERSION})
SET(SWIG_EXECUTABLE ${SWIG_INSTALL_DIR}/bin/swig)
ENDIF(WIN32)
LIST(APPEND external_project_dependencies swig)
ENDIF(NOT SWIG_FOUND)
...@@ -12,10 +12,6 @@ ...@@ -12,10 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
IF(MOBILE_INFERENCE)
return()
ENDIF()
INCLUDE(ExternalProject) INCLUDE(ExternalProject)
SET(WARPCTC_SOURCES_DIR ${THIRD_PARTY_PATH}/warpctc) SET(WARPCTC_SOURCES_DIR ${THIRD_PARTY_PATH}/warpctc)
......
...@@ -73,12 +73,3 @@ include_directories(${XXHASH_INCLUDE_DIR}) ...@@ -73,12 +73,3 @@ include_directories(${XXHASH_INCLUDE_DIR})
add_dependencies(xxhash extern_xxhash) add_dependencies(xxhash extern_xxhash)
LIST(APPEND external_project_dependencies xxhash) LIST(APPEND external_project_dependencies xxhash)
IF(WITH_C_API)
INSTALL(DIRECTORY ${XXHASH_INCLUDE_DIR} DESTINATION third_party/xxhash)
IF(ANDROID)
INSTALL(FILES ${XXHASH_LIBRARIES} DESTINATION third_party/xxhash/lib/${ANDROID_ABI})
ELSE()
INSTALL(FILES ${XXHASH_LIBRARIES} DESTINATION third_party/xxhash/lib)
ENDIF()
ENDIF()
...@@ -59,12 +59,3 @@ SET_PROPERTY(TARGET zlib PROPERTY IMPORTED_LOCATION ${ZLIB_LIBRARIES}) ...@@ -59,12 +59,3 @@ SET_PROPERTY(TARGET zlib PROPERTY IMPORTED_LOCATION ${ZLIB_LIBRARIES})
ADD_DEPENDENCIES(zlib extern_zlib) ADD_DEPENDENCIES(zlib extern_zlib)
LIST(APPEND external_project_dependencies zlib) LIST(APPEND external_project_dependencies zlib)
IF(WITH_C_API)
INSTALL(DIRECTORY ${ZLIB_INCLUDE_DIR} DESTINATION third_party/zlib)
IF(ANDROID)
INSTALL(FILES ${ZLIB_LIBRARIES} DESTINATION third_party/zlib/lib/${ANDROID_ABI})
ELSE()
INSTALL(FILES ${ZLIB_LIBRARIES} DESTINATION third_party/zlib/lib)
ENDIF()
ENDIF()
...@@ -156,10 +156,8 @@ set(GPU_COMMON_FLAGS ...@@ -156,10 +156,8 @@ set(GPU_COMMON_FLAGS
endif(NOT WIN32) endif(NOT WIN32)
if (APPLE) if (APPLE)
if(NOT CMAKE_CROSSCOMPILING) # On Mac OS X build fat binaries with x86_64 architectures by default.
# On Mac OS X build fat binaries with x86_64 architectures by default. set (CMAKE_OSX_ARCHITECTURES "x86_64" CACHE STRING "Build architectures for OSX" FORCE)
set (CMAKE_OSX_ARCHITECTURES "x86_64" CACHE STRING "Build architectures for OSX" FORCE)
endif()
# On Mac OS X register class specifier is deprecated and will cause warning error on latest clang 10.0 # On Mac OS X register class specifier is deprecated and will cause warning error on latest clang 10.0
set (COMMON_FLAGS -Wno-deprecated-register) set (COMMON_FLAGS -Wno-deprecated-register)
endif(APPLE) endif(APPLE)
......
...@@ -90,11 +90,11 @@ ...@@ -90,11 +90,11 @@
# including binary directory for generated headers. # including binary directory for generated headers.
include_directories(${CMAKE_CURRENT_BINARY_DIR}) include_directories(${CMAKE_CURRENT_BINARY_DIR})
if(NOT APPLE AND NOT ANDROID) if(NOT APPLE)
find_package(Threads REQUIRED) find_package(Threads REQUIRED)
link_libraries(${CMAKE_THREAD_LIBS_INIT}) link_libraries(${CMAKE_THREAD_LIBS_INIT})
set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl -lrt") set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl -lrt")
endif(NOT APPLE AND NOT ANDROID) endif(NOT APPLE)
set_property(GLOBAL PROPERTY FLUID_MODULES "") set_property(GLOBAL PROPERTY FLUID_MODULES "")
# find all fluid modules is used for paddle fluid static library # find all fluid modules is used for paddle fluid static library
...@@ -655,12 +655,6 @@ function(paddle_protobuf_generate_cpp SRCS HDRS) ...@@ -655,12 +655,6 @@ function(paddle_protobuf_generate_cpp SRCS HDRS)
set(${SRCS}) set(${SRCS})
set(${HDRS}) set(${HDRS})
if (MOBILE_INFERENCE)
set(EXTRA_FLAG "lite:")
else()
set(EXTRA_FLAG "")
endif()
foreach(FIL ${ARGN}) foreach(FIL ${ARGN})
get_filename_component(ABS_FIL ${FIL} ABSOLUTE) get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
get_filename_component(FIL_WE ${FIL} NAME_WE) get_filename_component(FIL_WE ${FIL} NAME_WE)
...@@ -677,7 +671,7 @@ function(paddle_protobuf_generate_cpp SRCS HDRS) ...@@ -677,7 +671,7 @@ function(paddle_protobuf_generate_cpp SRCS HDRS)
COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_CURRENT_BINARY_DIR}" COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_CURRENT_BINARY_DIR}"
COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} COMMAND ${PROTOBUF_PROTOC_EXECUTABLE}
-I${CMAKE_CURRENT_SOURCE_DIR} -I${CMAKE_CURRENT_SOURCE_DIR}
--cpp_out "${EXTRA_FLAG}${CMAKE_CURRENT_BINARY_DIR}" ${ABS_FIL} --cpp_out "${CMAKE_CURRENT_BINARY_DIR}" ${ABS_FIL}
DEPENDS ${ABS_FIL} protoc DEPENDS ${ABS_FIL} protoc
COMMENT "Running C++ protocol buffer compiler on ${FIL}" COMMENT "Running C++ protocol buffer compiler on ${FIL}"
VERBATIM ) VERBATIM )
......
...@@ -149,25 +149,23 @@ if (WITH_NGRAPH) ...@@ -149,25 +149,23 @@ if (WITH_NGRAPH)
) )
endif () endif ()
if (NOT MOBILE_INFERENCE AND NOT RPI) set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/snappy")
set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/snappy") copy(snappy_lib
copy(snappy_lib SRCS ${SNAPPY_INCLUDE_DIR} ${SNAPPY_LIBRARIES}
SRCS ${SNAPPY_INCLUDE_DIR} ${SNAPPY_LIBRARIES} DSTS ${dst_dir} ${dst_dir}/lib
DSTS ${dst_dir} ${dst_dir}/lib DEPS snappy)
DEPS snappy)
set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/snappystream") set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/snappystream")
copy(snappystream_lib copy(snappystream_lib
SRCS ${SNAPPYSTREAM_INCLUDE_DIR} ${SNAPPYSTREAM_LIBRARIES} SRCS ${SNAPPYSTREAM_INCLUDE_DIR} ${SNAPPYSTREAM_LIBRARIES}
DSTS ${dst_dir} ${dst_dir}/lib DSTS ${dst_dir} ${dst_dir}/lib
DEPS snappystream) DEPS snappystream)
set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/zlib") set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/zlib")
copy(zlib_lib copy(zlib_lib
SRCS ${ZLIB_INCLUDE_DIR} ${ZLIB_LIBRARIES} SRCS ${ZLIB_INCLUDE_DIR} ${ZLIB_LIBRARIES}
DSTS ${dst_dir} ${dst_dir}/lib DSTS ${dst_dir} ${dst_dir}/lib
DEPS zlib) DEPS zlib)
endif ()
# paddle fluid module # paddle fluid module
set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid") set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid")
......
...@@ -74,21 +74,6 @@ MARK_AS_ADVANCED(HOST_SYSTEM CPU_CORES) ...@@ -74,21 +74,6 @@ MARK_AS_ADVANCED(HOST_SYSTEM CPU_CORES)
MESSAGE(STATUS "Found Paddle host system: ${HOST_SYSTEM}, version: ${HOST_SYSTEM_VERSION}") MESSAGE(STATUS "Found Paddle host system: ${HOST_SYSTEM}, version: ${HOST_SYSTEM_VERSION}")
MESSAGE(STATUS "Found Paddle host system's CPU: ${CPU_CORES} cores") MESSAGE(STATUS "Found Paddle host system's CPU: ${CPU_CORES} cores")
# configuration for cross-compiling
IF(DEFINED CMAKE_SYSTEM_NAME)
INCLUDE(cross_compiling/host)
IF(${CMAKE_SYSTEM_NAME} STREQUAL "Android")
SET(ANDROID TRUE)
INCLUDE(cross_compiling/android)
ELSEIF(${CMAKE_SYSTEM_NAME} STREQUAL "RPi")
SET(RPI TRUE)
INCLUDE(cross_compiling/raspberry_pi)
ELSEIF(${CMAKE_SYSTEM_NAME} STREQUAL "iOS")
SET(IOS TRUE)
INCLUDE(cross_compiling/ios)
ENDIF()
ENDIF()
# external dependencies log output # external dependencies log output
SET(EXTERNAL_PROJECT_LOG_ARGS SET(EXTERNAL_PROJECT_LOG_ARGS
LOG_DOWNLOAD 0 # Wrap download in script to log output LOG_DOWNLOAD 0 # Wrap download in script to log output
......
...@@ -53,118 +53,3 @@ function(target_circle_link_libraries TARGET_NAME) ...@@ -53,118 +53,3 @@ function(target_circle_link_libraries TARGET_NAME)
"-Wl,--end-group") "-Wl,--end-group")
endif() endif()
endfunction() endfunction()
# compile_cu_as_cpp
# Make a cu file compiled as C++
# Arguments: Source files
macro(compile_cu_as_cpp)
foreach(s ${ARGN})
set_source_files_properties(${s} PROPERTIES LANGUAGE CXX)
set_source_files_properties(${s} PROPERTIES COMPILE_FLAGS "-x c++")
endforeach()
endmacro()
# link_paddle_exe
# add paddle library for a paddle executable, such as trainer, pserver.
#
# It will handle WITH_PYTHON etc.
function(link_paddle_exe TARGET_NAME)
if(WITH_RDMA)
generate_rdma_links()
endif()
if(MOBILE_INFERENCE)
target_circle_link_libraries(${TARGET_NAME}
ARCHIVE_START
paddle_gserver
paddle_function
ARCHIVE_END
paddle_math
paddle_utils
paddle_parameter
paddle_proto
paddle_cuda
${EXTERNAL_LIBS}
${CMAKE_THREAD_LIBS_INIT}
${CMAKE_DL_LIBS}
${RDMA_LD_FLAGS}
${RDMA_LIBS})
else()
target_circle_link_libraries(${TARGET_NAME}
ARCHIVE_START
paddle_gserver
paddle_function
ARCHIVE_END
paddle_pserver
paddle_trainer_lib
paddle_network
paddle_math
paddle_utils
paddle_parameter
paddle_proto
paddle_cuda
paddle_optimizer
${EXTERNAL_LIBS}
${CMAKE_THREAD_LIBS_INIT}
${CMAKE_DL_LIBS}
${RDMA_LD_FLAGS}
${RDMA_LIBS})
endif()
if(ANDROID)
target_link_libraries(${TARGET_NAME} log)
endif(ANDROID)
if(WITH_MKLML AND MKLML_LIB_DIR AND MKLML_IOMP_LIB)
target_link_libraries(${TARGET_NAME} "-L${MKLML_LIB_DIR} -liomp5 -Wl,--as-needed")
endif()
add_dependencies(${TARGET_NAME} ${external_project_dependencies})
endfunction()
# link_paddle_test
# Link a paddle unittest for target
# TARGET_NAME: the unittest target name
# Rest Arguemnts: not used.
function(link_paddle_test TARGET_NAME)
link_paddle_exe(${TARGET_NAME})
target_link_libraries(${TARGET_NAME}
paddle_test_main
paddle_test_util
${GTEST_LIBRARIES})
endfunction()
# add_unittest_without_exec
#
# create a paddle unittest. not specifically define how to run this unittest.
# TARGET_NAME: the unittest target name, same as executable file name
# Rest Arguments: the source files to compile this unittest.
macro(add_unittest_without_exec TARGET_NAME)
add_executable(${TARGET_NAME} ${ARGN})
link_paddle_test(${TARGET_NAME})
endmacro()
# add_unittest
# create a paddle unittest and just to execute this binary to make unittest.
#
# TARGET_NAME: the unittest target name, same as executable file name
# Rest Arguments: the source files to compile this unittest.
macro(add_unittest TARGET_NAME)
add_unittest_without_exec(${TARGET_NAME} ${ARGN})
add_test(${TARGET_NAME} ${TARGET_NAME})
endmacro()
# add_simple_unittest
# create a paddle unittest with file name. It just compile ${TARGET_NAME}.cpp to
# ${TARGET_NAME} and then execute it.
macro(add_simple_unittest TARGET_NAME)
add_unittest(${TARGET_NAME} ${TARGET_NAME}.cpp)
endmacro()
# Creates C resources file from files in given resource file
function(create_resources res_file output_file)
add_custom_command(
OUTPUT ${output_file}
COMMAND python ARGS ${PADDLE_SOURCE_DIR}/cmake/make_resource.py ${res_file} ${output_file}
DEPENDS ${res_file} ${PADDLE_SOURCE_DIR}/cmake/make_resource.py)
endfunction()
...@@ -24,6 +24,7 @@ limitations under the License. */ ...@@ -24,6 +24,7 @@ limitations under the License. */
#include "paddle/fluid/framework/details/sequential_execution_pass.h" #include "paddle/fluid/framework/details/sequential_execution_pass.h"
#include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph_to_program_pass.h"
#include "paddle/fluid/framework/ir/graph_viz_pass.h" #include "paddle/fluid/framework/ir/graph_viz_pass.h"
namespace paddle { namespace paddle {
...@@ -243,3 +244,4 @@ USE_PASS(sequential_execution_pass); ...@@ -243,3 +244,4 @@ USE_PASS(sequential_execution_pass);
USE_PASS(all_reduce_deps_pass); USE_PASS(all_reduce_deps_pass);
USE_PASS(modify_op_lock_and_record_event_pass); USE_PASS(modify_op_lock_and_record_event_pass);
USE_PASS(lock_free_optimize_pass); USE_PASS(lock_free_optimize_pass);
USE_PASS(graph_to_program_pass);
...@@ -28,10 +28,14 @@ std::unique_ptr<Graph> Pass::Apply(std::unique_ptr<Graph> graph) const { ...@@ -28,10 +28,14 @@ std::unique_ptr<Graph> Pass::Apply(std::unique_ptr<Graph> graph) const {
PADDLE_ENFORCE(graph->Has(attr), "Required graph atrribute %s not set.", PADDLE_ENFORCE(graph->Has(attr), "Required graph atrribute %s not set.",
attr); attr);
} }
auto* native_graph = graph.get();
auto applied_graph = ApplyImpl(std::move(graph)); auto applied_graph = ApplyImpl(std::move(graph));
// TODO(panyx0718): Add more verifications. // TODO(panyx0718): Add more verifications.
PADDLE_ENFORCE(!HasCircle(*applied_graph), PADDLE_ENFORCE(!HasCircle(*applied_graph),
"Illegal Pass. Generated graph shouldn't has cycle."); "Illegal Pass. Generated graph shouldn't has cycle.");
PADDLE_ENFORCE(applied_graph.get() == native_graph,
"Pass::Apply() cannot delete the passed graph and shouldn't "
"return a new graph.(For the need of pybind11)");
applied_ = true; applied_ = true;
return applied_graph; return applied_graph;
} }
......
...@@ -104,7 +104,9 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> { ...@@ -104,7 +104,9 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
// ------------------- cudnn conv algorithm --------------------- // ------------------- cudnn conv algorithm ---------------------
cudnnConvolutionFwdAlgo_t algo; cudnnConvolutionFwdAlgo_t algo;
auto handle = dev_ctx.cudnn_handle(); auto handle = dev_ctx.cudnn_handle();
auto workspace_handle = dev_ctx.cudnn_workspace_handle();
Tensor cudnn_workspace;
void* cudnn_workspace_ptr = nullptr;
CUDNN_ENFORCE(platform::dynload::cudnnSetConvolutionMathType( CUDNN_ENFORCE(platform::dynload::cudnnSetConvolutionMathType(
cudnn_conv_desc, CUDNN_DEFAULT_MATH)); cudnn_conv_desc, CUDNN_DEFAULT_MATH));
...@@ -118,19 +120,24 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> { ...@@ -118,19 +120,24 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
workspace_size_limit, &algo)); workspace_size_limit, &algo));
VLOG(3) << "cuDNN forward algo " << algo; VLOG(3) << "cuDNN forward algo " << algo;
} else { } else {
cudnn_workspace =
ctx.AllocateTmpTensor<int8_t, platform::CUDADeviceContext>(
framework::make_ddim(
{static_cast<int64_t>(workspace_size_limit)}),
dev_ctx);
cudnn_workspace_ptr = static_cast<void*>(cudnn_workspace.data<int8_t>());
auto search_func = [&]() { auto search_func = [&]() {
int returned_algo_count; int returned_algo_count;
std::array<cudnnConvolutionFwdAlgoPerf_t, kNUM_CUDNN_FWD_ALGS> std::array<cudnnConvolutionFwdAlgoPerf_t, kNUM_CUDNN_FWD_ALGS>
fwd_perf_stat; fwd_perf_stat;
auto cudnn_find_func = [&](void* cudnn_workspace) {
CUDNN_ENFORCE( CUDNN_ENFORCE(platform::dynload::cudnnFindConvolutionForwardAlgorithmEx(
platform::dynload::cudnnFindConvolutionForwardAlgorithmEx( handle, cudnn_input_desc, input_data, cudnn_filter_desc,
handle, cudnn_input_desc, input_data, cudnn_filter_desc, filter_data, cudnn_conv_desc, cudnn_output_desc, output_data,
filter_data, cudnn_conv_desc, cudnn_output_desc, output_data, kNUM_CUDNN_FWD_ALGS, &returned_algo_count, fwd_perf_stat.data(),
kNUM_CUDNN_FWD_ALGS, &returned_algo_count, cudnn_workspace_ptr, workspace_size_limit));
fwd_perf_stat.data(), cudnn_workspace, workspace_size_limit));
};
workspace_handle.RunFunc(cudnn_find_func, workspace_size_limit);
VLOG(3) << "Perf result: (algo: stat, time, memory)"; VLOG(3) << "Perf result: (algo: stat, time, memory)";
for (int i = 0; i < returned_algo_count; ++i) { for (int i = 0; i < returned_algo_count; ++i) {
const auto& stat = fwd_perf_stat[i]; const auto& stat = fwd_perf_stat[i];
...@@ -181,6 +188,15 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> { ...@@ -181,6 +188,15 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_LE(workspace_size_in_bytes, workspace_size_limit, PADDLE_ENFORCE_LE(workspace_size_in_bytes, workspace_size_limit,
"workspace_size to be allocated exceeds the limit"); "workspace_size to be allocated exceeds the limit");
if (!cudnn_workspace_ptr) {
cudnn_workspace =
ctx.AllocateTmpTensor<int8_t, platform::CUDADeviceContext>(
framework::make_ddim(
{static_cast<int64_t>(workspace_size_in_bytes)}),
dev_ctx);
cudnn_workspace_ptr = static_cast<void*>(cudnn_workspace.data<int8_t>());
}
if ((activation == "identity") && (!residual)) { if ((activation == "identity") && (!residual)) {
// Only the CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM algo is // Only the CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM algo is
// enabled with CUDNN_ACTIVATION_IDENTITY in cuDNN lib. // enabled with CUDNN_ACTIVATION_IDENTITY in cuDNN lib.
...@@ -188,13 +204,12 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> { ...@@ -188,13 +204,12 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
// cudnnConvolutionForward and cudnnAddTensor // cudnnConvolutionForward and cudnnAddTensor
// ------------- cudnn conv forward and bias add --------------------- // ------------- cudnn conv forward and bias add ---------------------
ScalingParamType<T> alpha = 1.0f, beta = 0.0f; ScalingParamType<T> alpha = 1.0f, beta = 0.0f;
auto cudnn_func = [&](void* cudnn_workspace) {
CUDNN_ENFORCE(platform::dynload::cudnnConvolutionForward( CUDNN_ENFORCE(platform::dynload::cudnnConvolutionForward(
handle, &alpha, cudnn_input_desc, input_data, cudnn_filter_desc, handle, &alpha, cudnn_input_desc, input_data, cudnn_filter_desc,
filter_data, cudnn_conv_desc, algo, cudnn_workspace, filter_data, cudnn_conv_desc, algo, cudnn_workspace_ptr,
workspace_size_in_bytes, &beta, cudnn_output_desc, output_data)); workspace_size_in_bytes, &beta, cudnn_output_desc, output_data));
};
workspace_handle.RunFunc(cudnn_func, workspace_size_in_bytes);
CUDNN_ENFORCE(platform::dynload::cudnnAddTensor( CUDNN_ENFORCE(platform::dynload::cudnnAddTensor(
handle, &alpha, cudnn_bias_desc, bias_data, &alpha, cudnn_output_desc, handle, &alpha, cudnn_bias_desc, bias_data, &alpha, cudnn_output_desc,
output_data)); output_data));
...@@ -205,15 +220,13 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> { ...@@ -205,15 +220,13 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
// ------------------- cudnn conv+bias+act forward -------------------- // ------------------- cudnn conv+bias+act forward --------------------
ScalingParamType<T> alpha1 = 1.0f; ScalingParamType<T> alpha1 = 1.0f;
ScalingParamType<T> alpha2 = residual ? 1.0f : 0.0f; ScalingParamType<T> alpha2 = residual ? 1.0f : 0.0f;
auto cudnn_func = [&](void* cudnn_workspace) {
CUDNN_ENFORCE(platform::dynload::cudnnConvolutionBiasActivationForward( CUDNN_ENFORCE(platform::dynload::cudnnConvolutionBiasActivationForward(
handle, &alpha1, cudnn_input_desc, input_data, cudnn_filter_desc, handle, &alpha1, cudnn_input_desc, input_data, cudnn_filter_desc,
filter_data, cudnn_conv_desc, algo, cudnn_workspace, filter_data, cudnn_conv_desc, algo, cudnn_workspace_ptr,
workspace_size_in_bytes, &alpha2, cudnn_output_desc, residual_data, workspace_size_in_bytes, &alpha2, cudnn_output_desc, residual_data,
cudnn_bias_desc, bias_data, cudnn_act_desc, cudnn_output_desc, cudnn_bias_desc, bias_data, cudnn_act_desc, cudnn_output_desc,
output_data)); output_data));
};
workspace_handle.RunFunc(cudnn_func, workspace_size_in_bytes);
} }
std::vector<int> channels = ctx.Attr<std::vector<int>>("split_channels"); std::vector<int> channels = ctx.Attr<std::vector<int>>("split_channels");
if (channels.size()) { if (channels.size()) {
......
...@@ -104,16 +104,18 @@ class CUDNNConvTransposeOpKernel : public framework::OpKernel<T> { ...@@ -104,16 +104,18 @@ class CUDNNConvTransposeOpKernel : public framework::OpKernel<T> {
int output_offset = output->numel() / output->dims()[0] / groups; int output_offset = output->numel() / output->dims()[0] / groups;
int filter_offset = filter->numel() / groups; int filter_offset = filter->numel() / groups;
T alpha = 1.0f, beta = 0.0f; T alpha = 1.0f, beta = 0.0f;
auto workspace_handle = dev_ctx.cudnn_workspace_handle();
auto temp_allocation =
platform::DeviceTemporaryAllocator::Instance().Get(dev_ctx).Allocate(
workspace_size_in_bytes);
void* cudnn_workspace = temp_allocation->ptr();
for (int g = 0; g < groups; g++) { for (int g = 0; g < groups; g++) {
auto cudnn_func = [&](void* cudnn_workspace) { CUDNN_ENFORCE(platform::dynload::cudnnConvolutionBackwardData(
CUDNN_ENFORCE(platform::dynload::cudnnConvolutionBackwardData( handle, &alpha, cudnn_filter_desc, filter_data + filter_offset * g,
handle, &alpha, cudnn_filter_desc, filter_data + filter_offset * g, cudnn_input_desc, input_data + input_offset * g, cudnn_conv_desc,
cudnn_input_desc, input_data + input_offset * g, cudnn_conv_desc, algo, cudnn_workspace, workspace_size_in_bytes, &beta,
algo, cudnn_workspace, workspace_size_in_bytes, &beta, cudnn_output_desc, output_data + output_offset * g));
cudnn_output_desc, output_data + output_offset * g));
};
workspace_handle.RunFunc(cudnn_func, workspace_size_in_bytes);
} }
} }
}; };
...@@ -209,20 +211,22 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> { ...@@ -209,20 +211,22 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> {
output_grad->numel() / output_grad->dims()[0] / groups; output_grad->numel() / output_grad->dims()[0] / groups;
int filter_offset = filter->numel() / groups; int filter_offset = filter->numel() / groups;
T alpha = 1.0f, beta = 0.0f; T alpha = 1.0f, beta = 0.0f;
auto workspace_handle = dev_ctx.cudnn_workspace_handle();
auto temp_allocation =
platform::DeviceTemporaryAllocator::Instance().Get(dev_ctx).Allocate(
workspace_size_in_bytes);
void* cudnn_workspace = temp_allocation->ptr();
if (input_grad) { if (input_grad) {
T* input_grad_data = input_grad->mutable_data<T>(ctx.GetPlace()); T* input_grad_data = input_grad->mutable_data<T>(ctx.GetPlace());
// Because beta is zero, it is unnecessary to reset input_grad. // Because beta is zero, it is unnecessary to reset input_grad.
for (int g = 0; g < groups; g++) { for (int g = 0; g < groups; g++) {
auto cudnn_func = [&](void* cudnn_workspace) { CUDNN_ENFORCE(platform::dynload::cudnnConvolutionForward(
CUDNN_ENFORCE(platform::dynload::cudnnConvolutionForward( handle, &alpha, cudnn_output_desc,
handle, &alpha, cudnn_output_desc, output_grad_data + output_grad_offset * g, cudnn_filter_desc,
output_grad_data + output_grad_offset * g, cudnn_filter_desc, filter_data + filter_offset * g, cudnn_conv_desc, data_algo,
filter_data + filter_offset * g, cudnn_conv_desc, data_algo, cudnn_workspace, workspace_size_in_bytes, &beta, cudnn_input_desc,
cudnn_workspace, workspace_size_in_bytes, &beta, cudnn_input_desc, input_grad_data + input_offset * g));
input_grad_data + input_offset * g));
};
workspace_handle.RunFunc(cudnn_func, workspace_size_in_bytes);
} }
} }
...@@ -232,15 +236,12 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> { ...@@ -232,15 +236,12 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> {
// Because beta is zero, it is unnecessary to reset filter_grad. // Because beta is zero, it is unnecessary to reset filter_grad.
// Gradient with respect to the filter // Gradient with respect to the filter
for (int g = 0; g < groups; g++) { for (int g = 0; g < groups; g++) {
auto cudnn_func = [&](void* cudnn_workspace) { CUDNN_ENFORCE(platform::dynload::cudnnConvolutionBackwardFilter(
CUDNN_ENFORCE(platform::dynload::cudnnConvolutionBackwardFilter( handle, &alpha, cudnn_output_desc,
handle, &alpha, cudnn_output_desc, output_grad_data + output_grad_offset * g, cudnn_input_desc,
output_grad_data + output_grad_offset * g, cudnn_input_desc, input_data + input_offset * g, cudnn_conv_desc, filter_algo,
input_data + input_offset * g, cudnn_conv_desc, filter_algo, cudnn_workspace, workspace_size_in_bytes, &beta, cudnn_filter_desc,
cudnn_workspace, workspace_size_in_bytes, &beta, filter_grad_data + filter_offset * g));
cudnn_filter_desc, filter_grad_data + filter_offset * g));
};
workspace_handle.RunFunc(cudnn_func, workspace_size_in_bytes);
} }
} }
} }
......
...@@ -37,7 +37,7 @@ else() ...@@ -37,7 +37,7 @@ else()
variable_response.cc variable_response.cc
collective_client.cc collective_server.cc collective_client.cc collective_server.cc
${BRPC_SRCS} ${BRPC_SRCS}
PROTO ${CMAKE_CURRENT_BINARY_DIR}/send_recv.proto PROTO send_recv.proto
DEPS lod_tensor selected_rows memory) DEPS lod_tensor selected_rows memory)
set(RPC_DEPS sendrecvop_rpc brpc ssl crypto protobuf leveldb snappystream snappy zlib) set(RPC_DEPS sendrecvop_rpc brpc ssl crypto protobuf leveldb snappystream snappy zlib)
......
...@@ -20,6 +20,7 @@ limitations under the License. */ ...@@ -20,6 +20,7 @@ limitations under the License. */
#include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/distributed/collective_client.h" #include "paddle/fluid/operators/distributed/collective_client.h"
#include "paddle/fluid/operators/distributed/collective_server.h" #include "paddle/fluid/operators/distributed/collective_server.h"
...@@ -57,7 +58,7 @@ std::unique_ptr<framework::Scope> GenerateVars(platform::Place place) { ...@@ -57,7 +58,7 @@ std::unique_ptr<framework::Scope> GenerateVars(platform::Place place) {
auto* tensor = slr->mutable_value(); auto* tensor = slr->mutable_value();
auto* rows = slr->mutable_rows(); auto* rows = slr->mutable_rows();
tensor->Resize(framework::make_ddim({20000, 1024})); tensor->Resize(framework::make_ddim({3, 1024}));
tensor->mutable_data<float>(place); tensor->mutable_data<float>(place);
paddle::operators::math::set_constant(ctx, tensor, 32.7); paddle::operators::math::set_constant(ctx, tensor, 32.7);
...@@ -80,6 +81,20 @@ void Gather(const std::vector<distributed::RemoteVar>& vars, ...@@ -80,6 +81,20 @@ void Gather(const std::vector<distributed::RemoteVar>& vars,
std::vector<const framework::SelectedRows*> dst; std::vector<const framework::SelectedRows*> dst;
client->Gather(vars, &dst, *dev_ctx, scope); client->Gather(vars, &dst, *dev_ctx, scope);
std::cout << "dst:" << distributed::GetSelectedRowsInfo(*dst[0]); std::cout << "dst:" << distributed::GetSelectedRowsInfo(*dst[0]);
dev_ctx->Wait();
ASSERT_EQ(dst[0]->value().dims(), framework::make_ddim({3, 1024}));
ASSERT_EQ(dst[0]->height(), 20000);
ASSERT_EQ(dst[0]->rows().size(), static_cast<size_t>(3));
for (int i = 0; i < 3; i++) {
ASSERT_EQ(dst[0]->rows()[i], i);
}
std::vector<float> vec;
TensorToVector(dst[0]->value(), *dev_ctx, &vec);
for (size_t i = 0; i < 3 * 1024; i++) {
ASSERT_FLOAT_EQ(vec[i], 32.7);
}
} }
TEST(CollectiveServer, GPU) { TEST(CollectiveServer, GPU) {
......
...@@ -216,18 +216,19 @@ class CUDNNConvInceptionFusionOpKernel : public framework::OpKernel<T> { ...@@ -216,18 +216,19 @@ class CUDNNConvInceptionFusionOpKernel : public framework::OpKernel<T> {
out_datas.push_back( out_datas.push_back(
static_cast<void*>(output_data + (oc0 + oc1 + oc2) * h * w)); static_cast<void*>(output_data + (oc0 + oc1 + oc2) * h * w));
auto temp_allocation =
platform::DeviceTemporaryAllocator::Instance().Get(dev_ctx).Allocate(
workspace_size_in_bytes);
void* cudnn_workspace = temp_allocation->ptr();
for (int i = 0; i < 4; ++i) { for (int i = 0; i < 4; ++i) {
auto func = [&](void* cudnn_workspace) { CUDNN_ENFORCE(platform::dynload::cudnnConvolutionBiasActivationForward(
CUDNN_ENFORCE(platform::dynload::cudnnConvolutionBiasActivationForward( handle, &alpha, in_desc[i], in_datas[i], filter_desc[i],
handle, &alpha, in_desc[i], in_datas[i], filter_desc[i], static_cast<const void*>(filters[i]->data<T>()), conv_desc[i],
static_cast<const void*>(filters[i]->data<T>()), conv_desc[i], algo[i], cudnn_workspace, workspace_size_in_bytes, &beta, out_desc[i],
algo[i], cudnn_workspace, workspace_size_in_bytes, &beta, out_datas[i], bias_desc[i],
out_desc[i], out_datas[i], bias_desc[i], static_cast<const void*>(bias[i]->data<T>()), cudnn_act_desc,
static_cast<const void*>(bias[i]->data<T>()), cudnn_act_desc, out_desc[i], out_datas[i]));
out_desc[i], out_datas[i]));
};
auto workspace_handle = dev_ctx.cudnn_workspace_handle();
workspace_handle.RunFunc(func, workspace_size_in_bytes);
} }
cudnnTensorDescriptor_t x_desc; cudnnTensorDescriptor_t x_desc;
......
...@@ -21,20 +21,20 @@ namespace operators { ...@@ -21,20 +21,20 @@ namespace operators {
enum GroupNormKernelFlags { kHasScale = 1, kHasBias = 2 }; enum GroupNormKernelFlags { kHasScale = 1, kHasBias = 2 };
#define CHECK_CASE(i, flags, kernel_name, args...) \ #define CHECK_CASE(i, flags, kernel_name, ...) \
if (i == flags) { \ if (i == flags) { \
kernel_name<T, i><<<grid, threads, 0, dev_ctx.stream()>>>(args); \ kernel_name<T, i><<<grid, threads, 0, dev_ctx.stream()>>>(__VA_ARGS__); \
} }
// 0 for no scale, no bias // 0 for no scale, no bias
// 1 for has scale, no bias // 1 for has scale, no bias
// 2 for no scale, has bias // 2 for no scale, has bias
// 3 for has scale, has bias // 3 for has scale, has bias
#define UNROLL_ALL_CASES(flags, kernel_name, args...) \ #define UNROLL_ALL_CASES(flags, kernel_name, ...) \
CHECK_CASE(0, flags, kernel_name, args) \ CHECK_CASE(0, flags, kernel_name, __VA_ARGS__) \
CHECK_CASE(1, flags, kernel_name, args) \ CHECK_CASE(1, flags, kernel_name, __VA_ARGS__) \
CHECK_CASE(2, flags, kernel_name, args) \ CHECK_CASE(2, flags, kernel_name, __VA_ARGS__) \
CHECK_CASE(3, flags, kernel_name, args) CHECK_CASE(3, flags, kernel_name, __VA_ARGS__)
template <typename T> template <typename T>
__device__ __inline__ void CudaAtomicAddWithWarp(T* sum, T value) { __device__ __inline__ void CudaAtomicAddWithWarp(T* sum, T value) {
......
...@@ -68,6 +68,11 @@ class SequenceExpandOp : public framework::OperatorWithKernel { ...@@ -68,6 +68,11 @@ class SequenceExpandOp : public framework::OperatorWithKernel {
"Level number of Input(X)'s lod could be 0. Otherwise " "Level number of Input(X)'s lod could be 0. Otherwise "
"size of Input(X)'s first level lod should be equal to " "size of Input(X)'s first level lod should be equal to "
"size of Input(Y)'s referred level lod."); "size of Input(Y)'s referred level lod.");
} else {
PADDLE_ENFORCE_EQ(x_dims[0], y_lod[ref_level].size() - 1,
"When Input(X)'s lod is null, the dims[0] of "
"Input(X) should match the "
"size of Input(Y)'s referred level lod.");
} }
int64_t out_first_dim = 0; int64_t out_first_dim = 0;
......
...@@ -144,17 +144,19 @@ class CudnnCTCKernel : public framework::OpKernel<T> { ...@@ -144,17 +144,19 @@ class CudnnCTCKernel : public framework::OpKernel<T> {
CUDNN_CTC_LOSS_ALGO_DETERMINISTIC, cu_ctcloss_desc, &workspace_size)); CUDNN_CTC_LOSS_ALGO_DETERMINISTIC, cu_ctcloss_desc, &workspace_size));
T* loss_data = loss->mutable_data<T>(loss_dims, ctx.GetPlace()); T* loss_data = loss->mutable_data<T>(loss_dims, ctx.GetPlace());
math::SetConstant<DeviceContext, T>()(
auto workspace_handle = dev_ctx.cudnn_workspace_handle(); ctx.template device_context<DeviceContext>(), loss, static_cast<T>(0));
auto cudnn_func = [&](void* cudnn_workspace) {
CUDNN_ENFORCE(platform::dynload::cudnnCTCLoss( auto temp_allocation =
handle, cu_logits_desc, warpctc_logits_data, warpctc_label_data, platform::DeviceTemporaryAllocator::Instance().Get(dev_ctx).Allocate(
warpctc_label_lengths.data(), warpctc_logits_lengths.data(), workspace_size);
loss_data, cu_grad_desc, warpctc_grad_data, void* cudnn_workspace = temp_allocation->ptr();
CUDNN_CTC_LOSS_ALGO_DETERMINISTIC, cu_ctcloss_desc, cudnn_workspace,
workspace_size)); CUDNN_ENFORCE(platform::dynload::cudnnCTCLoss(
}; handle, cu_logits_desc, warpctc_logits_data, warpctc_label_data,
workspace_handle.RunFunc(cudnn_func, workspace_size); warpctc_label_lengths.data(), warpctc_logits_lengths.data(), loss_data,
cu_grad_desc, warpctc_grad_data, CUDNN_CTC_LOSS_ALGO_DETERMINISTIC,
cu_ctcloss_desc, cudnn_workspace, workspace_size));
} }
}; };
......
...@@ -18,9 +18,9 @@ if(WITH_PYTHON) ...@@ -18,9 +18,9 @@ if(WITH_PYTHON)
SRCS ${PYBIND_SRCS} SRCS ${PYBIND_SRCS}
DEPS ${PYBIND_DEPS} DEPS ${PYBIND_DEPS}
${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS}) ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS})
if(NOT APPLE AND NOT ANDROID AND NOT WIN32) if(NOT APPLE AND NOT WIN32)
target_link_libraries(paddle_pybind rt) target_link_libraries(paddle_pybind rt)
endif(NOT APPLE AND NOT ANDROID AND NOT WIN32) endif(NOT APPLE AND NOT WIN32)
endif(WITH_AMD_GPU) endif(WITH_AMD_GPU)
get_property (os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) get_property (os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
......
...@@ -15,7 +15,9 @@ ...@@ -15,7 +15,9 @@
#include "paddle/fluid/pybind/ir.h" #include "paddle/fluid/pybind/ir.h"
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include <unordered_set>
#include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/node.h" #include "paddle/fluid/framework/ir/node.h"
#include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/var_desc.h" #include "paddle/fluid/framework/var_desc.h"
...@@ -24,6 +26,7 @@ ...@@ -24,6 +26,7 @@
namespace py = pybind11; namespace py = pybind11;
using paddle::framework::ir::Graph; using paddle::framework::ir::Graph;
using paddle::framework::ir::Node; using paddle::framework::ir::Node;
using paddle::framework::ir::GraphSafeRemoveNodes;
using paddle::framework::OpDesc; using paddle::framework::OpDesc;
using paddle::framework::ProgramDesc; using paddle::framework::ProgramDesc;
using paddle::framework::VarDesc; using paddle::framework::VarDesc;
...@@ -32,6 +35,7 @@ using pybind11::return_value_policy; ...@@ -32,6 +35,7 @@ using pybind11::return_value_policy;
namespace paddle { namespace paddle {
namespace pybind { namespace pybind {
void BindGraph(py::module *m) { void BindGraph(py::module *m) {
m->def("graph_safe_remove_nodes", GraphSafeRemoveNodes);
py::class_<Graph, std::shared_ptr<Graph>>( py::class_<Graph, std::shared_ptr<Graph>>(
*m, "Graph", *m, "Graph",
"The graph is a Directed Acyclic Single Static Assignment Graph, see " "The graph is a Directed Acyclic Single Static Assignment Graph, see "
...@@ -42,6 +46,8 @@ void BindGraph(py::module *m) { ...@@ -42,6 +46,8 @@ void BindGraph(py::module *m) {
.def("get_float", &Graph::Get<float>) .def("get_float", &Graph::Get<float>)
.def("get_double", &Graph::Get<double>) .def("get_double", &Graph::Get<double>)
.def("get_string", &Graph::Get<std::string>) .def("get_string", &Graph::Get<std::string>)
.def("get_program", &Graph::Get<ProgramDesc>)
.def("get_marked_nodes", &Graph::Get<std::unordered_set<const Node *>>)
.def("set", [](Graph &self, const std::string &attr_name, .def("set", [](Graph &self, const std::string &attr_name,
int attr) { return self.Set(attr_name, new int(attr)); }) int attr) { return self.Set(attr_name, new int(attr)); })
.def("set", .def("set",
...@@ -57,6 +63,17 @@ void BindGraph(py::module *m) { ...@@ -57,6 +63,17 @@ void BindGraph(py::module *m) {
[](Graph &self, const std::string &attr_name, double attr) { [](Graph &self, const std::string &attr_name, double attr) {
return self.Set(attr_name, new double(attr)); return self.Set(attr_name, new double(attr));
}) })
.def("set",
[](Graph &self, const std::string &attr_name,
const ProgramDesc &attr) {
return self.Set(attr_name, new ProgramDesc(attr));
})
.def("set",
[](Graph &self, const std::string &attr_name,
const std::unordered_set<const Node *> &attr) {
return self.Set(attr_name,
new std::unordered_set<const Node *>(attr));
})
.def("erase", &Graph::Erase) .def("erase", &Graph::Erase)
.def("nodes", &Graph::Nodes, return_value_policy::reference) .def("nodes", &Graph::Nodes, return_value_policy::reference)
.def("create_var_node", .def("create_var_node",
...@@ -85,12 +102,52 @@ void BindNode(py::module *m) { ...@@ -85,12 +102,52 @@ void BindNode(py::module *m) {
py::class_<Node> node(*m, "Node"); py::class_<Node> node(*m, "Node");
node.def("name", &Node::Name) node.def("name", &Node::Name)
.def("node_type", &Node::NodeType) .def("node_type", &Node::NodeType)
.def("var", &Node::Var) .def("var", &Node::Var, return_value_policy::reference)
.def("op", &Node::Op) .def("op", &Node::Op, return_value_policy::reference)
.def("id", &Node::id) .def("id", &Node::id)
.def("is_op", &Node::IsOp) .def("is_op", &Node::IsOp)
.def("is_var", &Node::IsVar) .def("is_var", &Node::IsVar)
.def("is_ctrl_var", &Node::IsCtrlVar) .def("is_ctrl_var", &Node::IsCtrlVar)
.def("inputs_remove",
[](Node &self, int node_id) {
for (auto it = self.inputs.begin(); it != self.inputs.end();
it++) {
if ((*it)->id() == node_id) {
self.inputs.erase(it);
}
}
})
.def("inputs_remove",
[](Node &self, Node &node) {
for (auto it = self.inputs.begin(); it != self.inputs.end();
it++) {
if (*it == &node) {
self.inputs.erase(it);
}
}
})
.def("inputs_append",
[](Node &self, Node &node) { self.inputs.push_back(&node); })
.def("outputs_remove",
[](Node &self, int node_id) {
for (auto it = self.outputs.begin(); it != self.outputs.end();
it++) {
if ((*it)->id() == node_id) {
self.outputs.erase(it);
}
}
})
.def("outputs_remove",
[](Node &self, Node &node) {
for (auto it = self.outputs.begin(); it != self.outputs.end();
it++) {
if (*it == &node) {
self.outputs.erase(it);
}
}
})
.def("outputs_append",
[](Node &self, Node &node) { self.outputs.push_back(&node); })
.def_readwrite("inputs", &Node::inputs) .def_readwrite("inputs", &Node::inputs)
.def_readwrite("outputs", &Node::outputs); .def_readwrite("outputs", &Node::outputs);
......
...@@ -228,7 +228,7 @@ void BindBlockDesc(pybind11::module *m) { ...@@ -228,7 +228,7 @@ void BindBlockDesc(pybind11::module *m) {
void BindVarDsec(pybind11::module *m) { void BindVarDsec(pybind11::module *m) {
pybind11::class_<pd::VarDesc> var_desc(*m, "VarDesc", ""); pybind11::class_<pd::VarDesc> var_desc(*m, "VarDesc", "");
var_desc var_desc.def(pybind11::init<const std::string &>())
.def("name", &pd::VarDesc::Name, pybind11::return_value_policy::reference) .def("name", &pd::VarDesc::Name, pybind11::return_value_policy::reference)
.def("set_name", &pd::VarDesc::SetName) .def("set_name", &pd::VarDesc::SetName)
.def("set_shape", &pd::VarDesc::SetShape) .def("set_shape", &pd::VarDesc::SetShape)
......
...@@ -788,21 +788,33 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -788,21 +788,33 @@ All parameter, weight, gradient are variables in Paddle.
m.def("disable_profiler", platform::DisableProfiler); m.def("disable_profiler", platform::DisableProfiler);
m.def("is_profiler_enabled", platform::IsProfileEnabled); m.def("is_profiler_enabled", platform::IsProfileEnabled);
m.def("reset_profiler", platform::ResetProfiler); m.def("reset_profiler", platform::ResetProfiler);
m.def("get_pass", [](const py::bytes &binary_str) {
std::string pass_type(binary_str);
auto pass = framework::ir::PassRegistry::Instance().Get(pass_type);
return std::shared_ptr<framework::ir::Pass>(std::move(pass));
});
py::class_<ir::Pass, std::shared_ptr<ir::Pass>> pass(m, "Pass"); py::class_<ir::Pass, std::shared_ptr<ir::Pass>> pass(m, "Pass");
pass.def(py::init()) pass.def(py::init())
.def("has", &ir::Pass::Has)
.def("set",
[](ir::Pass &self, const std::string &attr_name,
const ProgramDesc &attr) {
return self.Set(attr_name, new ProgramDesc(attr));
})
.def( .def(
"set_str", "set",
[](ir::Pass &self, const std::string &name, const std::string &attr) { [](ir::Pass &self, const std::string &name, const std::string &attr) {
self.Set<std::string>(name, new std::string(attr)); self.Set<std::string>(name, new std::string(attr));
}) })
.def("set_int", [](ir::Pass &self, const std::string &name, .def("set", [](ir::Pass &self, const std::string &name,
int val) { self.Set<const int>(name, new int(val)); }) int val) { self.Set<const int>(name, new int(val)); })
.def("get_program", &ir::Pass::Get<ProgramDesc>)
.def("type", &ir::Pass::Type) .def("type", &ir::Pass::Type)
.def("apply", [](ir::Pass &self, std::shared_ptr<ir::Graph> graph) { .def("apply", [](ir::Pass &self, std::shared_ptr<ir::Graph> graph) {
std::unique_ptr<ir::Graph> origin_graph(graph.get()); std::unique_ptr<ir::Graph> origin_graph(graph.get());
auto optim_graph = self.Apply(std::move(origin_graph)); auto optim_graph = self.Apply(std::move(origin_graph));
graph.reset(optim_graph.release()); optim_graph.release();
}); });
py::class_<ir::PassBuilder, std::shared_ptr<ir::PassBuilder>> pb( py::class_<ir::PassBuilder, std::shared_ptr<ir::PassBuilder>> pb(
......
swig_paddle.py
_swig_paddle.so
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.trainer.PyDataProvider2 as dp2
import collections
import swig_paddle
import numpy
import itertools
from functools import reduce
__all__ = ['DataProviderConverter']
class IScanner(object):
"""
The scanner will scan Python object two passes, then convert it to Paddle's
argument.
In the first pass, `pre_scan` will be invoked by every data instance, and
then invoke `finish_pre_scan` to arguments. And the second pass do the same
thing except the functions changed to `scan`, `finish_scan`.
During the first pass, a scanner may count the shape of input matrix and
allocate memory for this argument. Then fill the data into this argument
in second pass.
"""
def __init__(self, input_type, pos):
self.input_type = input_type
if not isinstance(self.input_type, dp2.InputType):
raise ValueError("input type should be dataprovider2.InputType")
self.pos = pos
# data_in_gpu is used to indicate whether to create argument on GPU
# or not in GPU mode. Now if using one thread (trainer_count=1),
# trainer uses NeuralNetwork which needs to create argument on GPU
# before calling forward function. So, set data_in_gpu to True.
# Otherwise, trainer uses MultiGradientMachine which will transfer
# data from CPU to GPU in the forward function, set data_in_gpu to
# False in this case.
self.data_in_gpu = swig_paddle.isUsingGpu(
) and swig_paddle.getTrainerCount() == 1
def pre_scan(self, dat):
"""
First pass scan method. During this method, the scanner could count the
data number, and get the total memory size this batch would use.
:param dat: The python object.
"""
pass
def finish_pre_scan(self, argument):
"""
Finish first scan pass. Allocate the memory.
:param argument: Output arguments object.
:type argument: swig_paddle.Arguments
:param dat: Output arguments object.
:type dat: The Python object, numpy.array or List.
:return:
"""
pass
def scan(self, dat):
"""
Second pass scan method. Copy the data to arguments.
:param dat: The python object.
"""
pass
def finish_scan(self, argument):
"""
Finish second pass. Finalize the resources, etc.
:param argument: Output arguments object.
:type argument: swig_paddle.Arguments
"""
pass
class DenseScanner(IScanner):
"""
:type __mat__: numpy.ndarray
"""
def __init__(self, input_type, pos):
IScanner.__init__(self, input_type, pos)
self.__mat__ = None
self.__shape__ = None
self.__height__ = 0
self.__dim__ = 0
def pre_scan(self, dat):
self.__height__ += 1
if self.__shape__ is None:
self.__shape__ = numpy.array(dat).shape
if len(self.__shape__) > 3:
raise ValueError(
"The dimension of input cannot be greater than 3.")
if len(self.__shape__) == 0:
raise ValueError(
"The input should be a vector, please check your input data."
)
self.__dim__ = reduce(lambda x, y: x * y, self.__shape__)
if len(self.__shape__) == 1 and self.__dim__ != self.input_type.dim:
raise ValueError(
"The data size must be equal to it in data layer.")
else:
if self.__shape__ != numpy.array(dat).shape:
raise ValueError(
"The data shape must be same in one mini-batch.")
def finish_pre_scan(self, argument):
self.__mat__ = numpy.ndarray(
shape=(self.__height__, self.__dim__), dtype=numpy.float32)
self.__height__ = 0
def scan(self, dat):
# It's better to use NumPy array for speed.
dat = numpy.array(dat)
dat = dat.flatten()
self.__mat__[self.__height__] = dat
self.__height__ += 1
def finish_scan(self, argument):
assert isinstance(argument, swig_paddle.Arguments)
if self.__mat__.dtype != numpy.float32:
self.__mat__ = self.__mat__.astype(numpy.float32)
m = swig_paddle.Matrix.createDenseFromNumpy(self.__mat__, True,
self.data_in_gpu)
argument.setSlotValue(self.pos, m)
if len(self.__shape__) > 1:
# The last-two dimenstions are the frame height and width.
# For example, the layout is CHW for 3-D feature of image.
# The H and W are the frame height and width.
h, w = self.__shape__[-2:]
argument.setSlotFrameHeight(self.pos, h)
argument.setSlotFrameWidth(self.pos, w)
self.__shape__ = None
class SparseBinaryScanner(IScanner):
def __init__(self, input_type, pos):
IScanner.__init__(self, input_type, pos)
self.__rows__ = [0]
self.__cols__ = []
self.__height__ = 0
self.__value__ = []
def scan(self, dat):
self.extend_cols(dat)
self.__rows__.append(len(self.__cols__))
self.__height__ += 1
def extend_cols(self, dat):
self.__cols__.extend(dat)
def finish_scan(self, argument):
assert isinstance(argument, swig_paddle.Arguments)
m = swig_paddle.Matrix.createSparse(
self.__height__,
self.input_type.dim,
len(self.__cols__),
len(self.__value__) == 0,
False, # trans
False) # TODO supoort GPU
assert isinstance(m, swig_paddle.Matrix)
m.sparseCopyFrom(self.__rows__, self.__cols__, self.__value__)
argument.setSlotValue(self.pos, m)
class SparseFloatScanner(SparseBinaryScanner):
def __init__(self, input_type, pos):
SparseBinaryScanner.__init__(self, input_type, pos)
def extend_cols(self, dat):
self.__cols__.extend((x[0] for x in dat))
self.__value__.extend((x[1] for x in dat))
class IndexScanner(IScanner):
def __init__(self, input_type, pos):
IScanner.__init__(self, input_type, pos)
self.__ids__ = None
self.__idx__ = 0
def pre_scan(self, dat):
self.__idx__ += 1
def finish_pre_scan(self, argument):
self.__ids__ = [0] * self.__idx__
self.__idx__ = 0
def scan(self, dat):
self.__ids__[self.__idx__] = dat
self.__idx__ += 1
def finish_scan(self, argument):
ids = swig_paddle.IVector.create(self.__ids__, self.data_in_gpu)
assert isinstance(argument, swig_paddle.Arguments)
argument.setSlotIds(self.pos, ids)
class SequenceScanner(IScanner):
def __init__(self, input_type, pos, inner_scanner, setter):
IScanner.__init__(self, input_type, pos)
self.__seq__ = [0]
self.__inner_scanner__ = inner_scanner
self.__setter__ = setter
def pre_scan(self, dat):
for each in dat:
self.__inner_scanner__.pre_scan(each)
def finish_pre_scan(self, argument):
self.__inner_scanner__.finish_pre_scan(argument)
def scan(self, dat):
self.__seq__.append(self.__seq__[-1] + self.get_size(dat))
for each in dat:
self.__inner_scanner__.scan(each)
def finish_scan(self, argument):
seq = swig_paddle.IVector.create(self.__seq__, False)
self.__setter__(argument, self.pos, seq)
self.__inner_scanner__.finish_scan(argument)
def get_size(self, dat):
if isinstance(self.__inner_scanner__, SequenceScanner):
return sum(self.__inner_scanner__.get_size(item) for item in dat)
else:
return len(dat)
class DataProviderConverter(object):
def __init__(self, input_types):
self.input_types = input_types
assert isinstance(self.input_types, collections.Sequence)
for each in self.input_types:
assert isinstance(each, dp2.InputType)
def convert(self, dat, argument=None):
if argument is None:
argument = swig_paddle.Arguments.createArguments(0)
assert isinstance(argument, swig_paddle.Arguments)
argument.resize(len(self.input_types))
scanners = [
DataProviderConverter.create_scanner(i, each_type)
for i, each_type in enumerate(self.input_types)
]
for each_sample in dat:
for each_step, scanner in itertools.izip(each_sample, scanners):
scanner.pre_scan(each_step)
for scanner in scanners:
scanner.finish_pre_scan(argument)
for each_sample in dat:
for each_step, scanner in itertools.izip(each_sample, scanners):
scanner.scan(each_step)
for scanner in scanners:
scanner.finish_scan(argument)
return argument
def __call__(self, dat, argument=None):
return self.convert(dat, argument)
@staticmethod
def create_scanner(i, each):
assert isinstance(each, dp2.InputType)
retv = None
if each.type == dp2.DataType.Dense:
retv = DenseScanner(each, i)
elif each.type == dp2.DataType.Index:
retv = IndexScanner(each, i)
elif each.type == dp2.DataType.SparseNonValue:
retv = SparseBinaryScanner(each, i)
elif each.type == dp2.DataType.SparseValue:
retv = SparseFloatScanner(each, i)
assert retv is not None
if each.seq_type == dp2.SequenceType.SUB_SEQUENCE:
retv = SequenceScanner(
each, i, retv,
lambda a, p, seq: a.setSlotSubSequenceStartPositions(p, seq))
if each.seq_type in [
dp2.SequenceType.SUB_SEQUENCE, dp2.SequenceType.SEQUENCE
]:
retv = SequenceScanner(
each, i, retv,
lambda a, p, seq: a.setSlotSequenceStartPositions(p, seq))
return retv
此差异已折叠。
...@@ -40,7 +40,6 @@ The lastest pre-built build environment images are: ...@@ -40,7 +40,6 @@ The lastest pre-built build environment images are:
| Image | Tag | | Image | Tag |
| ----- | --- | | ----- | --- |
| paddlepaddle/paddle | latest-dev | | paddlepaddle/paddle | latest-dev |
| paddlepaddle/paddle | latest-dev-android |
### Start Build ### Start Build
...@@ -68,8 +67,6 @@ Users can specify the following Docker build arguments with either "ON" or "OFF" ...@@ -68,8 +67,6 @@ Users can specify the following Docker build arguments with either "ON" or "OFF"
| `WITH_TESTING` | OFF | Build unit tests binaries. | | `WITH_TESTING` | OFF | Build unit tests binaries. |
| `WITH_MKL` | ON | Build with [Intel® MKL](https://software.intel.com/en-us/mkl) and [Intel® MKL-DNN](https://github.com/01org/mkl-dnn) support. | | `WITH_MKL` | ON | Build with [Intel® MKL](https://software.intel.com/en-us/mkl) and [Intel® MKL-DNN](https://github.com/01org/mkl-dnn) support. |
| `WITH_GOLANG` | OFF | Build fault-tolerant parameter server written in go. | | `WITH_GOLANG` | OFF | Build fault-tolerant parameter server written in go. |
| `WITH_SWIG_PY` | ON | Build with SWIG python API support. |
| `WITH_C_API` | OFF | Build capi libraries for inference. |
| `WITH_PYTHON` | ON | Build with python support. Turn this off if build is only for capi. | | `WITH_PYTHON` | ON | Build with python support. Turn this off if build is only for capi. |
| `WITH_STYLE_CHECK` | ON | Check the code style when building. | | `WITH_STYLE_CHECK` | ON | Check the code style when building. |
| `PYTHON_ABI` | "" | Build for different python ABI support, can be cp27-cp27m or cp27-cp27mu | | `PYTHON_ABI` | "" | Build for different python ABI support, can be cp27-cp27m or cp27-cp27mu |
......
...@@ -33,7 +33,6 @@ function print_usage() { ...@@ -33,7 +33,6 @@ function print_usage() {
${BLUE}gen_doc_lib${NONE}: generate paddle documents library ${BLUE}gen_doc_lib${NONE}: generate paddle documents library
${BLUE}html${NONE}: convert C++ source code into HTML ${BLUE}html${NONE}: convert C++ source code into HTML
${BLUE}dockerfile${NONE}: generate paddle release dockerfile ${BLUE}dockerfile${NONE}: generate paddle release dockerfile
${BLUE}capi${NONE}: generate paddle CAPI package
${BLUE}fluid_inference_lib${NONE}: deploy fluid inference library ${BLUE}fluid_inference_lib${NONE}: deploy fluid inference library
${BLUE}check_style${NONE}: run code style check ${BLUE}check_style${NONE}: run code style check
${BLUE}cicheck${NONE}: run CI tasks ${BLUE}cicheck${NONE}: run CI tasks
...@@ -180,9 +179,7 @@ function cmake_gen() { ...@@ -180,9 +179,7 @@ function cmake_gen() {
-DWITH_AVX=${WITH_AVX:-OFF} -DWITH_AVX=${WITH_AVX:-OFF}
-DWITH_GOLANG=${WITH_GOLANG:-OFF} -DWITH_GOLANG=${WITH_GOLANG:-OFF}
-DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All} -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All}
-DWITH_C_API=${WITH_C_API:-OFF}
-DWITH_PYTHON=${WITH_PYTHON:-ON} -DWITH_PYTHON=${WITH_PYTHON:-ON}
-DWITH_SWIG_PY=${WITH_SWIG_PY:-ON}
-DCUDNN_ROOT=/usr/ -DCUDNN_ROOT=/usr/
-DWITH_TESTING=${WITH_TESTING:-ON} -DWITH_TESTING=${WITH_TESTING:-ON}
-DCMAKE_MODULE_PATH=/opt/rocm/hip/cmake -DCMAKE_MODULE_PATH=/opt/rocm/hip/cmake
...@@ -216,8 +213,6 @@ EOF ...@@ -216,8 +213,6 @@ EOF
-DWITH_AVX=${WITH_AVX:-OFF} \ -DWITH_AVX=${WITH_AVX:-OFF} \
-DWITH_GOLANG=${WITH_GOLANG:-OFF} \ -DWITH_GOLANG=${WITH_GOLANG:-OFF} \
-DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All} \ -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All} \
-DWITH_SWIG_PY=${WITH_SWIG_PY:-ON} \
-DWITH_C_API=${WITH_C_API:-OFF} \
-DWITH_PYTHON=${WITH_PYTHON:-ON} \ -DWITH_PYTHON=${WITH_PYTHON:-ON} \
-DCUDNN_ROOT=/usr/ \ -DCUDNN_ROOT=/usr/ \
-DWITH_TESTING=${WITH_TESTING:-ON} \ -DWITH_TESTING=${WITH_TESTING:-ON} \
...@@ -706,59 +701,43 @@ EOF ...@@ -706,59 +701,43 @@ EOF
EOF EOF
} }
function gen_capi_package() {
if [[ ${WITH_C_API} == "ON" ]]; then
capi_install_prefix=${INSTALL_PREFIX:-/paddle/build}/capi_output
rm -rf $capi_install_prefix
make DESTDIR="$capi_install_prefix" install
cd $capi_install_prefix/
ls | egrep -v "^Found.*item$" | xargs tar -czf ${PADDLE_ROOT}/build/paddle.tgz
fi
}
function gen_fluid_lib() { function gen_fluid_lib() {
mkdir -p ${PADDLE_ROOT}/build mkdir -p ${PADDLE_ROOT}/build
cd ${PADDLE_ROOT}/build cd ${PADDLE_ROOT}/build
if [[ ${WITH_C_API:-OFF} == "OFF" ]] ; then cat <<EOF
cat <<EOF
======================================== ========================================
Generating fluid library for train and inference ... Generating fluid library for train and inference ...
======================================== ========================================
EOF EOF
cmake .. -DWITH_DISTRIBUTE=OFF -DON_INFER=ON cmake .. -DWITH_DISTRIBUTE=OFF -DON_INFER=ON
make -j `nproc` fluid_lib_dist make -j `nproc` fluid_lib_dist
make -j `nproc` inference_lib_dist make -j `nproc` inference_lib_dist
fi
} }
function tar_fluid_lib() { function tar_fluid_lib() {
if [[ ${WITH_C_API:-OFF} == "OFF" ]] ; then cat <<EOF
cat <<EOF
======================================== ========================================
Taring fluid library for train and inference ... Taring fluid library for train and inference ...
======================================== ========================================
EOF EOF
cd ${PADDLE_ROOT}/build cd ${PADDLE_ROOT}/build
cp -r fluid_install_dir fluid cp -r fluid_install_dir fluid
tar -czf fluid.tgz fluid tar -czf fluid.tgz fluid
cp -r fluid_inference_install_dir fluid_inference cp -r fluid_inference_install_dir fluid_inference
tar -czf fluid_inference.tgz fluid_inference tar -czf fluid_inference.tgz fluid_inference
fi
} }
function test_fluid_lib() { function test_fluid_lib() {
if [[ ${WITH_C_API:-OFF} == "OFF" ]] ; then cat <<EOF
cat <<EOF
======================================== ========================================
Testing fluid library for inference ... Testing fluid library for inference ...
======================================== ========================================
EOF EOF
cd ${PADDLE_ROOT}/paddle/fluid/inference/api/demo_ci cd ${PADDLE_ROOT}/paddle/fluid/inference/api/demo_ci
./run.sh ${PADDLE_ROOT} ${WITH_MKL:-ON} ${WITH_GPU:-OFF} ${INFERENCE_DEMO_INSTALL_DIR} \ ./run.sh ${PADDLE_ROOT} ${WITH_MKL:-ON} ${WITH_GPU:-OFF} ${INFERENCE_DEMO_INSTALL_DIR} \
${TENSORRT_INCLUDE_DIR:-/usr/local/TensorRT/include} \ ${TENSORRT_INCLUDE_DIR:-/usr/local/TensorRT/include} \
${TENSORRT_LIB_DIR:-/usr/local/TensorRT/lib} ${TENSORRT_LIB_DIR:-/usr/local/TensorRT/lib}
./clean.sh ./clean.sh
fi
} }
function main() { function main() {
...@@ -791,11 +770,6 @@ function main() { ...@@ -791,11 +770,6 @@ function main() {
dockerfile) dockerfile)
gen_dockerfile ${PYTHON_ABI:-""} gen_dockerfile ${PYTHON_ABI:-""}
;; ;;
capi)
cmake_gen ${PYTHON_ABI:-""}
build
gen_capi_package
;;
fluid_inference_lib) fluid_inference_lib)
cmake_gen ${PYTHON_ABI:-""} cmake_gen ${PYTHON_ABI:-""}
gen_fluid_lib gen_fluid_lib
...@@ -810,7 +784,6 @@ function main() { ...@@ -810,7 +784,6 @@ function main() {
build build
assert_api_not_changed ${PYTHON_ABI:-""} assert_api_not_changed ${PYTHON_ABI:-""}
run_test run_test
gen_capi_package
gen_fluid_lib gen_fluid_lib
test_fluid_lib test_fluid_lib
assert_api_spec_approvals assert_api_spec_approvals
...@@ -820,7 +793,6 @@ function main() { ...@@ -820,7 +793,6 @@ function main() {
assert_api_spec_approvals assert_api_spec_approvals
;; ;;
test_inference) test_inference)
gen_capi_package
gen_fluid_lib gen_fluid_lib
test_fluid_lib test_fluid_lib
;; ;;
......
...@@ -28,7 +28,6 @@ function start_build_docker() { ...@@ -28,7 +28,6 @@ function start_build_docker() {
-e WITH_AVX=ON \ -e WITH_AVX=ON \
-e WITH_GOLANG=OFF \ -e WITH_GOLANG=OFF \
-e WITH_TESTING=ON \ -e WITH_TESTING=ON \
-e WITH_C_API=OFF \
-e WITH_COVERAGE=ON \ -e WITH_COVERAGE=ON \
-e COVERALLS_UPLOAD=ON \ -e COVERALLS_UPLOAD=ON \
-e WITH_DEB=OFF \ -e WITH_DEB=OFF \
...@@ -67,9 +66,6 @@ function main() { ...@@ -67,9 +66,6 @@ function main() {
DOCKER_REPO="paddlepaddle/paddle" DOCKER_REPO="paddlepaddle/paddle"
VERSION="latest-dev" VERSION="latest-dev"
PADDLE_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}")/../../" && pwd )" PADDLE_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}")/../../" && pwd )"
if [ "$1" == "build_android" ]; then
VERSION="latest-dev-android"
fi
IMG=${DOCKER_REPO}:${VERSION} IMG=${DOCKER_REPO}:${VERSION}
start_build_docker $@ start_build_docker $@
} }
......
...@@ -39,7 +39,6 @@ add_custom_target(copy_paddle_pybind ALL DEPENDS ${FLUID_CORE}) ...@@ -39,7 +39,6 @@ add_custom_target(copy_paddle_pybind ALL DEPENDS ${FLUID_CORE})
IF(WIN32) IF(WIN32)
add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
COMMAND ${CMAKE_COMMAND} -E copy_directory ${PADDLE_SOURCE_DIR}/python/paddle ${PADDLE_BINARY_DIR}/python/paddle/ COMMAND ${CMAKE_COMMAND} -E copy_directory ${PADDLE_SOURCE_DIR}/python/paddle ${PADDLE_BINARY_DIR}/python/paddle/
COMMAND ${CMAKE_COMMAND} -E copy_directory ${PADDLE_SOURCE_DIR}/paddle/py_paddle ${PADDLE_BINARY_DIR}/python/
COMMAND ${CMAKE_COMMAND} -E env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND ${CMAKE_COMMAND} -E env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
COMMAND ${CMAKE_COMMAND} -E remove_directory ${PADDLE_PYTHON_BUILD_DIR}/lib-python COMMAND ${CMAKE_COMMAND} -E remove_directory ${PADDLE_PYTHON_BUILD_DIR}/lib-python
...@@ -48,7 +47,6 @@ ELSE(WIN32) ...@@ -48,7 +47,6 @@ ELSE(WIN32)
add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
COMMAND touch stub.cc COMMAND touch stub.cc
COMMAND cp -r ${PADDLE_SOURCE_DIR}/python/paddle ${PADDLE_BINARY_DIR}/python COMMAND cp -r ${PADDLE_SOURCE_DIR}/python/paddle ${PADDLE_BINARY_DIR}/python
COMMAND cp -r ${PADDLE_SOURCE_DIR}/paddle/py_paddle ${PADDLE_BINARY_DIR}/python/
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
COMMAND ${CMAKE_COMMAND} -E remove_directory ${PADDLE_PYTHON_BUILD_DIR}/lib-python COMMAND ${CMAKE_COMMAND} -E remove_directory ${PADDLE_PYTHON_BUILD_DIR}/lib-python
......
...@@ -11,8 +11,12 @@ ...@@ -11,8 +11,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function
import os
import subprocess
from ....framework import Program from ....framework import Program
from ....framework import Block
from .... import core
__all__ = ['Graph', 'ImitationGraph', 'IRGraph'] __all__ = ['Graph', 'ImitationGraph', 'IRGraph']
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -12,13 +12,9 @@ ...@@ -12,13 +12,9 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from util import DataProviderWrapperConverter from __future__ import print_function
from dataprovider_converter import DataProviderConverter
__all__ = [ from . import quantization_pass
'paddle', from .quantization_pass import *
'DataProviderConverter',
'DataProviderWrapperConverter', # for deprecated usage. __all__ = quantization_pass.__all__
'loadParameterFile'
]
util.monkeypatches()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
from .... import core
from ....framework import IrGraph
from ....framework import Program
from ....framework import Variable
from ....initializer import Constant
from .... import unique_name
__all__ = ['QuantizationTransformPass']
class QuantizationTransformPass(object):
def __init__(self,
scope=None,
program_exe=None,
weight_bits=8,
activation_bits=8,
activation_quantize_type='abs_max',
weight_quantize_type='abs_max',
window_size=10000):
"""
Convert and rewrite the IrGraph according to weight and
activation quantization type.
Args:
weight_bits (int): quantization bit number for weights,
the bias is not quantized.
activation_bits (int): quantization bit number for activation.
activation_quantize_type (str): quantization type for activation,
now support 'abs_max', 'range_abs_max'. If use 'abs_max' mode,
the quantization scale will be calculated dynamically each step
in both training and testing period. If use 'range_abs_max',
a static quantization scale will be calculated during training
and used in inference.
weight_quantize_type (str): quantization type for weights,
support 'abs_max'. The 'range_abs_max' usually is not used for
weight, since weights are fixed once the model is well trained.
window_size (int): the window size for 'range_abs_max' quantization.
Examples:
.. code-block:: python
# The original graph will be rewrite.
import paddle.fluid as fluid
from paddle.fluid.contrib.slim.quantization \
import QuantizationTransformPass
from paddle.fluid.contrib.slim.graph import IrGraph
from paddle.fluid import core
graph = IrGraph(core.Graph(program.desc), for_test=False)
exe = fluid.Executor(fluid.CPUPlace())
transform_pass = QuantizationTransformPass(fluid.global_scope(),
exe)
transform_pass.apply(graph)
"""
self._scope = scope
self._program_exe = program_exe
self._weight_bits = weight_bits
self._activation_bits = activation_bits
quant_type = ['abs_max', 'range_abs_max']
if activation_quantize_type not in quant_type:
raise ValueError(
"Unknown activation_quantize_type : '%s'. It can only be ",
"'abs_max' or 'range_abs_max'.", str(activation_quantize_type))
if weight_quantize_type not in quant_type:
raise ValueError(
"Unknown weight_quantize_type: '%s'. It can only be ",
"'abs_max' or 'range_abs_max'.", str(weight_quantize_type))
self._activation_quantize_type = activation_quantize_type
self._weight_quantize_type = weight_quantize_type
self._window_size = window_size
self._need_initialized = collections.OrderedDict()
self._quantizable_ops = ['conv2d', 'depthwise_conv2d', 'mul']
self._quantizable_grad_ops = [
'%s_grad' % (op) for op in self._quantizable_ops
]
self._fake_quant_op_types = [
'fake_quantize_abs_max', 'fake_quantize_range_abs_max'
]
self._fake_dequant_op_types = ['fake_dequantize_max_abs']
self._is_test = None
self._global_step = None
def apply(self, graph):
assert isinstance(graph,
IrGraph), 'graph must be the instance of IrGraph.'
self._need_initialized.clear()
self._is_test = graph.is_test()
# marked the variable which has been dequantized.
dequantized_vars = collections.OrderedDict()
params = [p.name() for p in graph.all_parameters()]
def _transform_forward(graph, op):
for var_node in op.inputs:
if var_node.name() in dequantized_vars:
dequant_var_node = dequantized_vars[var_node.name()]
else:
quant_bits = self._weight_bits if var_node.name() in params \
else self._activation_bits
quant_type = self._weight_quantize_type if var_node.name() \
in params else self._activation_quantize_type
quant_var_node, scale_var_node = self._insert_quant_op(
graph, var_node, quant_bits, quant_type)
dequant_var_node = self._insert_dequant_op(
graph, quant_var_node, scale_var_node, quant_bits)
dequantized_vars[var_node.name()] = dequant_var_node
graph.update_input_link(var_node, dequant_var_node, op)
def _transform_backward(graph, op):
no_dequanted_input_vars = True
for var_node in op.inputs:
if var_node.name() in dequantized_vars:
dequant_var_node = dequantized_vars[var_node.name()]
graph.update_input_link(var_node, dequant_var_node, op)
no_dequanted_input_vars = False
if no_dequanted_input_vars:
raise ValueError("There is no dequanted inputs for op %s." %
(op.name()))
if not self._is_test:
self._create_global_step(graph)
ops = graph.all_ops()
# The process of _transform_forward and _transform_backward is needed in two for loops.
# The loop for transforming the forward graph:
for op in ops:
if op.name() in self._quantizable_ops:
_transform_forward(graph, op)
# The loop for renaming the inputs of backward op.
for op in ops:
if op.name() in self._quantizable_grad_ops:
_transform_backward(graph, op)
if len(self._need_initialized) > 0:
assert self._scope is not None, \
'The scope cannot be set None when activation_quantize_type equals to range_abs_max.'
assert self._program_exe is not None, \
'The program_exe cannot be set None when activation_quantize_type equals to range_abs_max.'
init_program = Program()
for var_desc, initializer in self._need_initialized.iteritems():
var = Variable(init_program.global_block())
var._set_desc(var_desc)
initializer(var, init_program.global_block())
self._program_exe.run(program=init_program, scope=self._scope)
return graph
def _create_global_step(self, graph):
if self._weight_quantize_type == 'range_abs_max' or \
self._activation_quantize_type == 'range_abs_max':
counter_name = '@STEP_COUNTER@'
for node in graph.all_vars():
if node.name() == counter_name:
self._global_step = node
if self._global_step is None:
global_step_in = graph.create_param_node(
name=counter_name,
var_type=core.VarDesc.VarType.LOD_TENSOR,
shape=[1],
var_dtype=core.VarDesc.VarType.INT64)
self._need_initialized[global_step_in.var()] = \
Constant(value=0, force_cpu=True)
global_step_out = graph.create_var_node_from_desc(
global_step_in.var())
increment_op = graph.create_op_node(
op_type='increment',
attrs={'step': 1.0},
inputs={'X': global_step_in},
outputs={'Out': global_step_out})
graph.link_to(global_step_in, increment_op)
graph.link_to(increment_op, global_step_out)
self._global_step = global_step_out
def _insert_quant_op(self, graph, var_node, quant_bits, quant_type):
"""
Insert fake_quantize_op in the graph.
"""
if quant_type == 'abs_max':
return self._insert_quant_abs_max_op(graph, var_node, quant_bits)
elif quant_type == 'range_abs_max':
return self._insert_quant_range_abs_max_op(graph, var_node,
quant_bits)
def _insert_quant_abs_max_op(self, graph, var_node, quant_bits):
"""
Insert fake_quantize_abs_max op in the graph.
"""
assert var_node.is_var(), '{} is not a var'.format(var_node.name())
quant_var_node = graph.create_var_node(
name=self._quantized_var_name(var_node.name()),
var_type=var_node.var().type(),
shape=var_node.var().shape(),
var_dtype=var_node.var().dtype())
scale_var_node = graph.create_var_node(
name=self._quantized_scale_name(var_node.name()),
var_type=var_node.var().type(),
shape=var_node.var().shape(),
var_dtype=var_node.var().dtype())
quant_op_node = graph.create_op_node(
op_type='fake_quantize_abs_max',
attrs={'bit_length': quant_bits},
inputs={'X': var_node},
outputs={'Out': quant_var_node,
'OutScale': scale_var_node})
graph.link_to(var_node, quant_op_node)
graph.link_to(quant_op_node, quant_var_node)
graph.link_to(quant_op_node, scale_var_node)
return quant_var_node, scale_var_node
def _insert_quant_range_abs_max_op(self, graph, var_node, quant_bits):
"""
Insert fake_quantize_range_abs_max on the graph.
"""
assert var_node.is_var(), '{} is not a var'.format(var_node.name())
quant_var_node = graph.create_var_node(
name=self._quantized_var_name(var_node.name()),
var_type=var_node.var().type(),
shape=var_node.var().shape(),
var_dtype=var_node.var().dtype())
scale_in_node = graph.create_param_node(
name=self._quantized_scale_name(var_node.name()),
var_type=core.VarDesc.VarType.LOD_TENSOR,
shape=[1],
var_dtype=var_node.var().dtype())
self._need_initialized[scale_in_node.var()] = Constant(value=0.001)
scale_out_node = graph.create_var_node_from_desc(scale_in_node.var())
inputs = {'X': var_node, 'InScale': scale_in_node}
outputs = {'Out': quant_var_node, 'OutScale': scale_out_node}
if not self._is_test:
# The name of scales_var_node maybe 'scales_0', 'scales_1', etc.
scales_node = graph.create_param_node(
name=unique_name.generate('scales'),
var_type=core.VarDesc.VarType.LOD_TENSOR,
shape=[self._window_size],
var_dtype=var_node.var().dtype())
self._need_initialized[scales_node.var()] = Constant(value=0)
inputs['Iter'] = self._global_step
outputs['OutScales'] = scales_node
attrs = {
'window_size': self._window_size,
'bit_length': quant_bits,
'is_test': self._is_test
}
quant_op_node = graph.create_op_node(
op_type='fake_quantize_range_abs_max',
attrs=attrs,
inputs=inputs,
outputs=outputs)
graph.link_to(var_node, quant_op_node)
graph.link_to(scale_in_node, quant_op_node)
graph.link_to(quant_op_node, quant_var_node)
graph.link_to(quant_op_node, scale_out_node)
if not self._is_test:
graph.link_to(self._global_step, quant_op_node)
graph.link_to(quant_op_node, scales_node)
return quant_var_node, scale_out_node
def _insert_dequant_op(self, graph, var_node, scale_var_node, quant_bits):
"""
Insert fake_dequantize_op in the graph.
"""
assert var_node.is_var(), '{} is not a var'.format(var_node.name())
dequant_var_node = graph.create_var_node(
name=self._dequantized_var_name(var_node.name()),
var_type=var_node.var().type(),
shape=var_node.var().shape(),
var_dtype=var_node.var().dtype())
max_range = (1 << (quant_bits - 1)) - 1
dequant_op_node = graph.create_op_node(
op_type='fake_dequantize_max_abs',
attrs={'max_range': float(max_range)},
inputs={'X': var_node,
'Scale': scale_var_node},
outputs={'Out': dequant_var_node})
graph.link_to(var_node, dequant_op_node)
graph.link_to(scale_var_node, dequant_op_node)
graph.link_to(dequant_op_node, dequant_var_node)
return dequant_var_node
def _quantized_var_name(self, var_name):
"""
Return quantized variable name for the input `var_name`.
"""
return "%s.quantized" % (var_name)
def _dequantized_var_name(self, var_name):
"""
Return dequantized variable name for the input `var_name`.
"""
return "%s.dequantized" % (var_name)
def _quantized_scale_name(self, var_name):
"""
Return the scale name of quantized variable for the input `var_name`.
"""
return "%s.scale" % (var_name)
# copyright (c) 2018 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import unittest
import random
import numpy as np
import paddle.fluid as fluid
import six
from paddle.fluid.framework import Program
from paddle.fluid.framework import IrGraph
from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
from paddle.fluid import core
def linear_fc(num):
data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
hidden = data
for _ in six.moves.xrange(num):
hidden = fluid.layers.fc(hidden, size=128, act='relu')
loss = fluid.layers.cross_entropy(input=hidden, label=label)
loss = fluid.layers.mean(loss)
return loss
def residual_block(num):
def conv_bn_layer(input,
ch_out,
filter_size,
stride,
padding,
act='relu',
bias_attr=False):
tmp = fluid.layers.conv2d(
input=input,
filter_size=filter_size,
num_filters=ch_out,
stride=stride,
padding=padding,
act=None,
bias_attr=bias_attr)
return fluid.layers.batch_norm(input=tmp, act=act)
data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
hidden = data
for _ in six.moves.xrange(num):
conv = conv_bn_layer(hidden, 16, 3, 1, 1, act=None, bias_attr=True)
short = conv_bn_layer(hidden, 16, 1, 1, 0, act=None)
hidden = fluid.layers.elementwise_add(x=conv, y=short, act='relu')
fc = fluid.layers.fc(input=hidden, size=10)
loss = fluid.layers.cross_entropy(input=fc, label=label)
loss = fluid.layers.mean(loss)
return loss
class TestQuantizationTransformPass(unittest.TestCase):
def setUp(self):
self.quantizable_op_and_inputs = {
'conv2d': ['Input', 'Filter'],
'depthwise_conv2d': ['Input', 'Filter'],
'mul': ['X', 'Y']
}
self.quantizable_grad_op_inputs = {
'conv2d_grad': ['Input', 'Filter'],
'depthwise_conv2d_grad': ['Input', 'Filter'],
'mul_grad': ['X', 'Y']
}
def check_program(self, transform_pass, program):
quantized_ops = set()
for block in program.blocks:
for op in block.ops:
# check forward
if op.type in self.quantizable_op_and_inputs:
for arg_name in op.input_arg_names:
self.assertTrue(
arg_name.endswith('.quantized.dequantized'))
quantized_ops.add(arg_name)
for op in block.ops:
# check backward
if op.type in self.quantizable_grad_op_inputs:
for pname in self.quantizable_grad_op_inputs[op.type]:
arg_name = op.input(pname)[0]
self.assertTrue(
arg_name.endswith('.quantized.dequantized'))
self.assertTrue(arg_name in quantized_ops)
def linear_fc_quant(self, quant_type):
main = fluid.Program()
startup = fluid.Program()
with fluid.program_guard(main, startup):
loss = linear_fc(3)
opt = fluid.optimizer.Adam(learning_rate=0.001)
opt.minimize(loss)
exe = fluid.Executor(fluid.CPUPlace())
graph = IrGraph(core.Graph(main.desc), for_test=False)
transform_pass = QuantizationTransformPass(
scope=fluid.global_scope(),
program_exe=exe,
activation_quantize_type=quant_type)
transform_pass.apply(graph)
marked_nodes = set()
for op in graph.all_ops():
if op.name().find('quantize') > -1:
marked_nodes.add(op)
graph.draw('.', 'quantize_fc_' + quant_type, marked_nodes)
program = graph.to_program()
self.check_program(transform_pass, program)
val_graph = IrGraph(core.Graph(program.desc), for_test=False)
val_marked_nodes = set()
for op in val_graph.all_ops():
if op.name().find('quantize') > -1:
val_marked_nodes.add(op)
val_graph.draw('.', 'val_fc_' + quant_type, val_marked_nodes)
def test_linear_fc_quant_abs_max(self):
self.act_quant_op_type = 'fake_quantize_abs_max'
self.linear_fc_quant('abs_max')
def test_linear_fc_quant_range_abs_max(self):
self.act_quant_op_type = 'fake_quantize_range_abs_max'
self.linear_fc_quant('range_abs_max')
def residual_block_quant(self, quant_type):
main = fluid.Program()
startup = fluid.Program()
with fluid.program_guard(main, startup):
loss = residual_block(2)
opt = fluid.optimizer.Adam(learning_rate=0.001)
opt.minimize(loss)
exe = fluid.Executor(fluid.CPUPlace())
graph = IrGraph(core.Graph(main.desc), for_test=False)
transform_pass = QuantizationTransformPass(
scope=fluid.global_scope(),
program_exe=exe,
activation_quantize_type=quant_type)
transform_pass.apply(graph)
marked_nodes = set()
for op in graph.all_ops():
if op.name().find('quantize') > -1:
marked_nodes.add(op)
graph.draw('.', 'quantize_residual_' + quant_type, marked_nodes)
program = graph.to_program()
self.check_program(transform_pass, program)
val_graph = IrGraph(core.Graph(program.desc), for_test=False)
val_marked_nodes = set()
for op in val_graph.all_ops():
if op.name().find('quantize') > -1:
val_marked_nodes.add(op)
val_graph.draw('.', 'val_residual_' + quant_type, val_marked_nodes)
def test_residual_block_abs_max(self):
self.act_quant_op_type = 'fake_quantize_abs_max'
self.residual_block_quant('abs_max')
def test_residual_block_range_abs_max(self):
self.act_quant_op_type = 'fake_quantize_range_abs_max'
self.residual_block_quant('range_abs_max')
if __name__ == '__main__':
unittest.main()
...@@ -305,7 +305,9 @@ class Executor(object): ...@@ -305,7 +305,9 @@ class Executor(object):
def __init__(self, place): def __init__(self, place):
self.place = place self.place = place
self.program_caches = dict() self.program_caches = dict()
self.executor = None p = core.Place()
p.set_place(self.place)
self._default_executor = core.Executor(p)
self._closed = False self._closed = False
def _get_program_cache(self, program_cache_key): def _get_program_cache(self, program_cache_key):
...@@ -397,12 +399,13 @@ class Executor(object): ...@@ -397,12 +399,13 @@ class Executor(object):
>>> ... >>> ...
>>> exe.close() >>> exe.close()
""" """
if not self._closed and self.executor: if not self._closed:
self.executor.close() self._default_executor.close()
self._closed = True self._closed = True
def _run_parallel(self, program, scope, feed, fetch_list, fetch_var_name, def _run_parallel(self, program, scope, feed, fetch_list, fetch_var_name,
return_numpy): return_numpy):
exe = program._executor
if isinstance(feed, dict): if isinstance(feed, dict):
feed_tensor_dict = dict() feed_tensor_dict = dict()
for feed_name in feed: for feed_name in feed:
...@@ -414,8 +417,7 @@ class Executor(object): ...@@ -414,8 +417,7 @@ class Executor(object):
feed_tensor.set(feed[feed_name], core.CPUPlace()) feed_tensor.set(feed[feed_name], core.CPUPlace())
feed_tensor_dict[feed_name] = feed_tensor feed_tensor_dict[feed_name] = feed_tensor
self.executor.feed_and_split_tensor_into_local_scopes( exe.feed_and_split_tensor_into_local_scopes(feed_tensor_dict)
feed_tensor_dict)
elif isinstance(feed, list) or isinstance(feed, tuple): elif isinstance(feed, list) or isinstance(feed, tuple):
if len(feed) != len(program._places): if len(feed) != len(program._places):
raise ValueError( raise ValueError(
...@@ -436,10 +438,10 @@ class Executor(object): ...@@ -436,10 +438,10 @@ class Executor(object):
tensor = tmp tensor = tmp
res_dict[feed_name] = tensor res_dict[feed_name] = tensor
res.append(res_dict) res.append(res_dict)
self.executor.feed_tensors_into_local_scopes(res) exe.feed_tensors_into_local_scopes(res)
fetch_var_names = list(map(_to_name_str, fetch_list)) fetch_var_names = list(map(_to_name_str, fetch_list))
self.executor.run(fetch_var_names, fetch_var_name) exe.run(fetch_var_names, fetch_var_name)
arr = scope.find_var(fetch_var_name).get_lod_tensor_array() arr = scope.find_var(fetch_var_name).get_lod_tensor_array()
if return_numpy: if return_numpy:
...@@ -511,12 +513,9 @@ class Executor(object): ...@@ -511,12 +513,9 @@ class Executor(object):
compiled = isinstance(program, compiler.CompiledProgram) compiled = isinstance(program, compiler.CompiledProgram)
# For backward compatibility, run directly. # For backward compatibility, run directly.
if not compiled: if not compiled:
if not self.executor:
p = core.Place()
p.set_place(self.place)
self.executor = core.Executor(p)
return self._run( return self._run(
program, program,
self._default_executor,
feed=feed, feed=feed,
fetch_list=fetch_list, fetch_list=fetch_list,
feed_var_name=feed_var_name, feed_var_name=feed_var_name,
...@@ -526,7 +525,6 @@ class Executor(object): ...@@ -526,7 +525,6 @@ class Executor(object):
use_program_cache=use_program_cache) use_program_cache=use_program_cache)
program._compile(scope, self.place) program._compile(scope, self.place)
self.executor = program._executor
if program._is_data_parallel: if program._is_data_parallel:
return self._run_parallel( return self._run_parallel(
program, program,
...@@ -536,12 +534,13 @@ class Executor(object): ...@@ -536,12 +534,13 @@ class Executor(object):
fetch_var_name=fetch_var_name, fetch_var_name=fetch_var_name,
return_numpy=return_numpy) return_numpy=return_numpy)
elif program._is_inference: elif program._is_inference:
return self._run_inference(program, feed) return self._run_inference(program._executor, feed)
else: else:
# TODO(panyx0718): Can compile program to optimize executor # TODO(panyx0718): Can compile program to optimize executor
# performance. # performance.
return self._run( return self._run(
program._program, program._program,
self._default_executor,
feed=feed, feed=feed,
fetch_list=fetch_list, fetch_list=fetch_list,
feed_var_name=feed_var_name, feed_var_name=feed_var_name,
...@@ -550,8 +549,8 @@ class Executor(object): ...@@ -550,8 +549,8 @@ class Executor(object):
return_numpy=return_numpy, return_numpy=return_numpy,
use_program_cache=use_program_cache) use_program_cache=use_program_cache)
def _run(self, program, feed, fetch_list, feed_var_name, fetch_var_name, def _run(self, program, exe, feed, fetch_list, feed_var_name,
scope, return_numpy, use_program_cache): fetch_var_name, scope, return_numpy, use_program_cache):
if feed is None: if feed is None:
feed = {} feed = {}
...@@ -589,11 +588,11 @@ class Executor(object): ...@@ -589,11 +588,11 @@ class Executor(object):
fetch_var_name=fetch_var_name) fetch_var_name=fetch_var_name)
self._feed_data(program, feed, feed_var_name, scope) self._feed_data(program, feed, feed_var_name, scope)
self.executor.run(program.desc, scope, 0, True, True) exe.run(program.desc, scope, 0, True, True)
outs = self._fetch_data(fetch_list, fetch_var_name, scope) outs = self._fetch_data(fetch_list, fetch_var_name, scope)
if return_numpy: if return_numpy:
outs = as_numpy(outs) outs = as_numpy(outs)
return outs return outs
def _run_inference(self, program, feed): def _run_inference(self, exe, feed):
return self.executor.run(feed) return exe.run(feed)
...@@ -23,6 +23,7 @@ import traceback ...@@ -23,6 +23,7 @@ import traceback
import six import six
import numpy as np import numpy as np
import subprocess
from .. import compat as cpt from .. import compat as cpt
from .proto import framework_pb2 from .proto import framework_pb2
...@@ -1512,6 +1513,154 @@ class Block(object): ...@@ -1512,6 +1513,154 @@ class Block(object):
return ret_var return ret_var
class IrGraph(object):
"""
IrGraph uses core.Graph as the delegation to accomplish the manipulation.
"""
def __init__(self, graph, for_test=False):
"""
Construct the IrGraph using core.Graph.
Args:
graph(core.Graph): C++ Graph.
for_test(bool): True for the test graph and false for the train graph.
"""
assert isinstance(
graph, core.Graph), 'graph must be the instance of core.Graph.'
self.graph = graph
self._for_test = for_test
def is_test(self):
return self._for_test
def all_parameters(self):
param_nodes = set()
for node in self.graph.nodes():
if node.is_var() and node.var() is not None and node.var(
).persistable():
param_nodes.add(node)
return param_nodes
def all_vars(self):
return {node for node in self.graph.nodes() if node.is_var()}
def all_ops(self):
return {node for node in self.graph.nodes() if node.is_op()}
def create_param_node(self, name, var_type, shape, var_dtype):
var_desc = core.VarDesc(name)
var_desc.set_type(var_type)
var_desc.set_shape(shape)
var_desc.set_dtype(var_dtype)
var_desc.set_persistable(True)
return self.graph.create_var_node(var_desc)
def create_var_node(self, name, var_type, shape, var_dtype):
var_desc = core.VarDesc(name)
var_desc.set_type(var_type)
var_desc.set_shape(shape)
var_desc.set_dtype(var_dtype)
return self.graph.create_var_node(var_desc)
def create_var_node_from_desc(self, var_desc):
return self.graph.create_var_node(var_desc)
def create_op_node(self, op_type, attrs, inputs, outputs):
op_desc = core.OpDesc()
op_desc.set_type(op_type)
for attr, value in attrs.iteritems():
self._update_desc_attr(op_desc, attr, value)
for input_name, var_nodes in inputs.iteritems():
if not isinstance(var_nodes, list):
var_nodes = [var_nodes]
op_desc.set_input(input_name,
[var_node.name() for var_node in var_nodes])
for output_name, var_nodes in outputs.iteritems():
if not isinstance(var_nodes, list):
var_nodes = [var_nodes]
op_desc.set_output(output_name,
[var_node.name() for var_node in var_nodes])
return self.graph.create_op_node(op_desc)
def create_op_node_from_desc(self, op_desc):
return self.graph.create_op_node(op_desc)
def update_input_link(self, old_input_node, new_input_node, op_node):
assert old_input_node in self.graph.nodes() and new_input_node in self.graph.nodes() and \
op_node in self.graph.nodes(), 'Th three arguments must be in the graph nodes.'
old_input_node.outputs_remove(op_node)
op_node.inputs_remove(old_input_node)
new_input_node.outputs_append(op_node)
op_node.inputs_append(new_input_node)
op_node.op()._rename_input(old_input_node.name(), new_input_node.name())
def link_to(self, node_in, node_out):
assert node_in in self.graph.nodes() and node_out in self.graph.nodes(), \
'Th two arguments must be in the graph nodes.'
node_in.outputs_append(node_out)
node_out.inputs_append(node_in)
def safe_remove_nodes(self, remove_nodes):
if not isinstance(remove_nodes, set):
remove_nodes = set(remove_nodes)
core.graph_safe_remove_nodes(self.graph, remove_nodes)
def draw(self, save_path, name, marked_nodes=None):
def _convert_to_pdf(dot_file_path):
pdf_save_path = os.path.splitext(dot_file_path)[0] + '.pdf'
exited_code = subprocess.call('dot -Tpdf ' + dot_file_path \
+ ' -o ' + pdf_save_path, shell=True)
if exited_code != 0:
print('The dot command is needed for creating pdf files.')
print('The {} is saved as the dot filetype.'.format(
dot_file_path))
remove_ctr_vars = set()
ops_num = 0
for node in self.graph.nodes():
if node.is_ctrl_var():
remove_ctr_vars.add(node)
elif node.is_op():
ops_num += 1
print('Total ops num = {}.'.format(ops_num))
self.safe_remove_nodes(remove_ctr_vars)
if marked_nodes is not None:
if not isinstance(marked_nodes, set):
marked_nodes = set(marked_nodes)
marked_nodes = marked_nodes - remove_ctr_vars
if self.graph.has('__graphviz__marked_node__'):
self.graph.erase('__graphviz__marked_node__')
self.graph.set('__graphviz__marked_node__', marked_nodes)
viz_dot_path = os.path.join(save_path, name) + '.dot'
viz_pass = core.get_pass('graph_viz_pass')
viz_pass.set('graph_viz_path', viz_dot_path)
viz_pass.apply(self.graph)
_convert_to_pdf(viz_dot_path)
def to_program(self):
convert_pass = core.get_pass('graph_to_program_pass')
convert_pass.set('program', Program().desc)
convert_pass.apply(self.graph)
desc = convert_pass.get_program('program')
program = Program._construct_from_desc(desc)
return program
def _update_desc_attr(self, desc, name, val):
"""
Update the value of desc's attribute by attribute's name.
"""
if isinstance(val, Block):
desc.set_block_attr(name, val.desc)
elif isinstance(val, list) and val and all(
isinstance(v, Block) for v in val):
desc.set_blocks_attr(name, [v.desc for v in val])
elif isinstance(val, core.BlockDesc) or \
isinstance(val, core.ProgramDesc):
desc.set_serialized_attr(name, val.serialize_to_string())
else:
desc._set_attr(name, val)
class Program(object): class Program(object):
""" """
Python Program. Beneath it is a ProgramDesc, which is used for Python Program. Beneath it is a ProgramDesc, which is used for
...@@ -1936,6 +2085,23 @@ class Program(object): ...@@ -1936,6 +2085,23 @@ class Program(object):
p._sync_with_cpp() p._sync_with_cpp()
return p return p
@staticmethod
def _construct_from_desc(desc):
"""
Construct a program from program desc.
Args:
desc(core.ProgramDesc): The program desc for constructing.
Returns:
Program: A program.
"""
p = Program()
p.desc = desc
p.blocks = [Block(p, i) for i in six.moves.range(p.desc.num_blocks())]
p._sync_with_cpp()
return p
@property @property
def random_seed(self): def random_seed(self):
""" """
......
...@@ -220,9 +220,7 @@ def infer(use_cuda, save_dirname=None): ...@@ -220,9 +220,7 @@ def infer(use_cuda, save_dirname=None):
np_data = np.array(results[0]) np_data = np.array(results[0])
infer_out = infer_outputs[0].data.float_data() infer_out = infer_outputs[0].data.float_data()
for a, b in zip(np_data[0], infer_out): for a, b in zip(np_data[0], infer_out):
g_a = float("{:.6g}".format(a)) assert np.isclose(a, b), "a: {}, b: {}".format(a, b)
g_b = float("{:.6g}".format(b))
assert g_a == g_b
def main(use_cuda, is_sparse, is_parallel): def main(use_cuda, is_sparse, is_parallel):
......
...@@ -123,7 +123,7 @@ class TestDistRunnerBase(object): ...@@ -123,7 +123,7 @@ class TestDistRunnerBase(object):
pass_builder = build_stra._finalize_strategy_and_create_passes() pass_builder = build_stra._finalize_strategy_and_create_passes()
mypass = pass_builder.insert_pass( mypass = pass_builder.insert_pass(
len(pass_builder.all_passes()) - 2, "multi_batch_merge_pass") len(pass_builder.all_passes()) - 2, "multi_batch_merge_pass")
mypass.set_int("num_repeats", args.batch_merge_repeat) mypass.set("num_repeats", args.batch_merge_repeat)
if args.update_method == "nccl2": if args.update_method == "nccl2":
build_stra.num_trainers = len(args.endpoints.split(",")) build_stra.num_trainers = len(args.endpoints.split(","))
......
...@@ -111,7 +111,7 @@ class TestPassBuilder(unittest.TestCase): ...@@ -111,7 +111,7 @@ class TestPassBuilder(unittest.TestCase):
pass_builder.remove_pass(len(pass_builder.all_passes()) - 1) pass_builder.remove_pass(len(pass_builder.all_passes()) - 1)
self.assertEqual(origin_len + 1, len(pass_builder.all_passes())) self.assertEqual(origin_len + 1, len(pass_builder.all_passes()))
viz_pass.set_str("graph_viz_path", "/tmp/test_viz_pass") viz_pass.set("graph_viz_path", "/tmp/test_viz_pass")
self.check_network_convergence( self.check_network_convergence(
use_cuda=core.is_compiled_with_cuda(), use_cuda=core.is_compiled_with_cuda(),
......
...@@ -81,11 +81,10 @@ class TestSequenceExpand(OpTest): ...@@ -81,11 +81,10 @@ class TestSequenceExpand(OpTest):
class TestSequenceExpandCase1(TestSequenceExpand): class TestSequenceExpandCase1(TestSequenceExpand):
def set_data(self): def set_data(self):
x_data = np.random.uniform(0.1, 1, [5, 1]).astype('float32') x_data = np.random.uniform(0.1, 1, [5, 1]).astype('float32')
x_lod = [[2, 3]]
y_data = np.random.uniform(0.1, 1, [13, 1]).astype('float32') y_data = np.random.uniform(0.1, 1, [13, 1]).astype('float32')
y_lod = [[2, 3], [2, 2, 3, 3, 3]] y_lod = [[2, 3], [2, 2, 3, 3, 3]]
self.inputs = {'X': x_data, 'Y': (y_data, y_lod)} self.inputs = {'X': x_data, 'Y': (y_data, y_lod)}
self.attrs = {'ref_level': 0} self.attrs = {'ref_level': 1}
class TestSequenceExpandCase2(TestSequenceExpand): class TestSequenceExpandCase2(TestSequenceExpand):
......
...@@ -113,6 +113,7 @@ packages=['paddle', ...@@ -113,6 +113,7 @@ packages=['paddle',
'paddle.fluid.contrib.slim.core', 'paddle.fluid.contrib.slim.core',
'paddle.fluid.contrib.slim.graph', 'paddle.fluid.contrib.slim.graph',
'paddle.fluid.contrib.slim.prune', 'paddle.fluid.contrib.slim.prune',
'paddle.fluid.contrib.slim.quantization',
'paddle.fluid.contrib.utils', 'paddle.fluid.contrib.utils',
'paddle.fluid.transpiler', 'paddle.fluid.transpiler',
'paddle.fluid.transpiler.details'] 'paddle.fluid.transpiler.details']
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册