提交 23560d08 编写于 作者: L lujun

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into fix-api-doc-20190220

...@@ -44,6 +44,7 @@ ...@@ -44,6 +44,7 @@
| qingqing01 | Qing-Qing Dang | | qingqing01 | Qing-Qing Dang |
| reyoung | Yang Yu | | reyoung | Yang Yu |
| Sand3r- | Michal Gallus | | Sand3r- | Michal Gallus |
| sfraczek | Sylwester Fraczek |
| Superjom | Chun-Wei Yan | | Superjom | Chun-Wei Yan |
| tensor-tang | Jian Tang | | tensor-tang | Jian Tang |
| tianbingsz | Tian-Bing Xu | | tianbingsz | Tian-Bing Xu |
...@@ -54,6 +55,7 @@ ...@@ -54,6 +55,7 @@
| wangyang59 | Yang Wang | | wangyang59 | Yang Wang |
| wangzhen-nlp | Zhen Wang | | wangzhen-nlp | Zhen Wang |
| wen-bo-yang | Wen-Bo Yang | | wen-bo-yang | Wen-Bo Yang |
| wojtuss | Wojciech Uss |
| wwhu | Wei-Wei Hu | | wwhu | Wei-Wei Hu |
| xinghai-sun | Xing-Hai Sun | | xinghai-sun | Xing-Hai Sun |
| Xreki | Yi-Qun Liu | | Xreki | Yi-Qun Liu |
......
...@@ -54,23 +54,12 @@ option(WITH_NGRAPH "Compile PaddlePaddle with nGraph support." OFF) ...@@ -54,23 +54,12 @@ option(WITH_NGRAPH "Compile PaddlePaddle with nGraph support." OFF)
option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON)
option(WITH_TESTING "Compile PaddlePaddle with unit testing" OFF) option(WITH_TESTING "Compile PaddlePaddle with unit testing" OFF)
option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON) option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON)
option(WITH_DOUBLE "Compile PaddlePaddle with double precision" OFF)
option(WITH_RDMA "Compile PaddlePaddle with RDMA support" OFF)
option(WITH_TIMER "Compile PaddlePaddle with stats timer" OFF)
option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler and gperftools" OFF) option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler and gperftools" OFF)
option(WITH_JEMALLOC "Compile PaddlePaddle with jemalloc" OFF) option(WITH_JEMALLOC "Compile PaddlePaddle with jemalloc" OFF)
option(WITH_DOC "Compile PaddlePaddle with documentation" OFF)
option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF) option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF)
option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF) option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF)
option(ON_TRAVIS "Exclude special unit test on Travis CI" OFF)
option(WITH_FLUID_ONLY "Compile PaddlePaddle fluid only" OFF)
option(WITH_GOLANG "Compile PaddlePaddle with GOLANG" OFF)
option(GLIDE_INSTALL "Download and install go dependencies " ON)
option(WITH_DISTRIBUTE "Compile with distributed support" OFF) option(WITH_DISTRIBUTE "Compile with distributed support" OFF)
option(WITH_PSLIB "Compile with pslib support" OFF) option(WITH_PSLIB "Compile with pslib support" OFF)
option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF)
option(EIGEN_USE_THREADS "Compile with multi-threaded Eigen" OFF)
option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF)
option(WITH_CONTRIB "Compile the third-party contributation" OFF) option(WITH_CONTRIB "Compile the third-party contributation" OFF)
option(REPLACE_ENFORCE_GLOG "Replace PADDLE_ENFORCE with glog/CHECK for better debug." OFF) option(REPLACE_ENFORCE_GLOG "Replace PADDLE_ENFORCE with glog/CHECK for better debug." OFF)
option(WITH_ANAKIN "Compile with Anakin library" OFF) option(WITH_ANAKIN "Compile with Anakin library" OFF)
...@@ -105,8 +94,6 @@ endif() ...@@ -105,8 +94,6 @@ endif()
if (WIN32) if (WIN32)
set(WITH_DISTRIBUTE OFF CACHE STRING set(WITH_DISTRIBUTE OFF CACHE STRING
"Disable DISTRIBUTE when compiling for Windows" FORCE) "Disable DISTRIBUTE when compiling for Windows" FORCE)
set(WITH_FLUID_ONLY ON CACHE STRING
"Enable FLUID_ONLY when compiling for Windows" FORCE)
endif() endif()
set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING
...@@ -148,7 +135,6 @@ include(external/openblas) # download, build, install openblas ...@@ -148,7 +135,6 @@ include(external/openblas) # download, build, install openblas
include(external/mkldnn) # download, build, install mkldnn include(external/mkldnn) # download, build, install mkldnn
include(external/ngraph) # download, build, install nGraph include(external/ngraph) # download, build, install nGraph
include(external/boost) # download boost include(external/boost) # download boost
include(external/any) # download libn::any
include(external/eigen) # download eigen3 include(external/eigen) # download eigen3
include(external/pybind11) # download pybind11 include(external/pybind11) # download pybind11
include(external/cares) include(external/cares)
...@@ -225,7 +211,6 @@ include(generic) # simplify cmake module ...@@ -225,7 +211,6 @@ include(generic) # simplify cmake module
include(package) # set paddle packages include(package) # set paddle packages
include(ccache) # set ccache for compilation include(ccache) # set ccache for compilation
include(util) # set unittest and link libs include(util) # set unittest and link libs
include(rdma) # set rdma libraries
include(version) # set PADDLE_VERSION include(version) # set PADDLE_VERSION
include(coveralls) # set code coverage include(coveralls) # set code coverage
include(inference_lib) # add paddle fluid inference libraries include(inference_lib) # add paddle fluid inference libraries
...@@ -233,38 +218,11 @@ include(inference_lib) # add paddle fluid inference libraries ...@@ -233,38 +218,11 @@ include(inference_lib) # add paddle fluid inference libraries
include_directories("${PADDLE_SOURCE_DIR}") include_directories("${PADDLE_SOURCE_DIR}")
set(EXTERNAL_LIBS
gflags
glog
${CBLAS_LIBRARIES}
protobuf
zlib
${PYTHON_LIBRARIES}
)
if(WITH_PSLIB)
list(APPEND EXTERNAL_LIBS pslib)
list(APPEND EXTERNAL_LIBS pslib_brpc)
list(APPEND EXTERNAL_LIBS libmct)
endif(WITH_PSLIB)
if(WITH_AMD_GPU) if(WITH_AMD_GPU)
find_package(HIP) find_package(HIP)
include(hip) include(hip)
endif(WITH_AMD_GPU) endif(WITH_AMD_GPU)
if(WITH_MKLML)
list(APPEND EXTERNAL_LIBS ${MKLML_IOMP_LIB})
endif()
if(WITH_LIBXSMM)
list(APPEND EXTERNAL_LIBS ${LIBXSMM_LIBS})
endif()
if(WITH_MKLDNN)
list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB})
endif()
set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build") set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
......
...@@ -3,8 +3,8 @@ ...@@ -3,8 +3,8 @@
English | [简体中文](./README_cn.md) English | [简体中文](./README_cn.md)
[![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle) [![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle)
[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://paddlepaddle.org/documentation/docs/en/1.2/getstarted/index_en.html) [![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://paddlepaddle.org/documentation/docs/en/1.3/beginners_guide/index_en.html)
[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/index.html) [![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://paddlepaddle.org/documentation/docs/zh/1.3/beginners_guide/index.html)
[![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases) [![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases)
[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE) [![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
...@@ -18,7 +18,7 @@ learning to many products at Baidu. ...@@ -18,7 +18,7 @@ learning to many products at Baidu.
Our vision is to enable deep learning for everyone via PaddlePaddle. Our vision is to enable deep learning for everyone via PaddlePaddle.
Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest feature of PaddlePaddle. Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest feature of PaddlePaddle.
### Latest PaddlePaddle Release: [Fluid 1.2.0](https://github.com/PaddlePaddle/Paddle/tree/release/1.2) ### Latest PaddlePaddle Release: [Fluid 1.3.0](https://github.com/PaddlePaddle/Paddle/tree/release/1.3)
### Install Latest Stable Release: ### Install Latest Stable Release:
``` ```
# Linux CPU # Linux CPU
...@@ -26,9 +26,9 @@ pip install paddlepaddle ...@@ -26,9 +26,9 @@ pip install paddlepaddle
# Linux GPU cuda9cudnn7 # Linux GPU cuda9cudnn7
pip install paddlepaddle-gpu pip install paddlepaddle-gpu
# Linux GPU cuda8cudnn7 # Linux GPU cuda8cudnn7
pip install paddlepaddle-gpu==1.2.0.post87 pip install paddlepaddle-gpu==1.3.0.post87
# Linux GPU cuda8cudnn5 # Linux GPU cuda8cudnn5
pip install paddlepaddle-gpu==1.2.0.post85 pip install paddlepaddle-gpu==1.3.0.post85
# For installation on other platform, refer to http://paddlepaddle.org/ # For installation on other platform, refer to http://paddlepaddle.org/
``` ```
...@@ -75,26 +75,26 @@ pip install paddlepaddle-gpu==1.2.0.post85 ...@@ -75,26 +75,26 @@ pip install paddlepaddle-gpu==1.2.0.post85
## Installation ## Installation
It is recommended to read [this doc](http://paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/install/index_cn.html) on our website. It is recommended to read [this doc](http://paddlepaddle.org/documentation/docs/en/1.3/beginners_guide/index_en.html) on our website.
## Documentation ## Documentation
We provide [English](http://paddlepaddle.org/documentation/docs/en/1.2/getstarted/index_en.html) and We provide [English](http://paddlepaddle.org/documentation/docs/en/1.3/beginners_guide/index_en.html) and
[Chinese](http://paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/index.html) documentation. [Chinese](http://paddlepaddle.org/documentation/docs/zh/1.3/beginners_guide/index.html) documentation.
- [Deep Learning 101](https://github.com/PaddlePaddle/book) - [Deep Learning 101](https://github.com/PaddlePaddle/book)
You might want to start from this online interactive book that can run in a Jupyter Notebook. You might want to start from this online interactive book that can run in a Jupyter Notebook.
- [Distributed Training](http://paddlepaddle.org/documentation/docs/zh/1.2/user_guides/howto/training/cluster_howto.html) - [Distributed Training](http://paddlepaddle.org/documentation/docs/en/1.3/user_guides/howto/training/multi_node_en.html)
You can run distributed training jobs on MPI clusters. You can run distributed training jobs on MPI clusters.
- [Python API](http://paddlepaddle.org/documentation/docs/zh/1.2/api_cn/index_cn.html) - [Python API](http://paddlepaddle.org/documentation/docs/en/1.3/api/index_en.html)
Our new API enables much shorter programs. Our new API enables much shorter programs.
- [How to Contribute](http://paddlepaddle.org/documentation/docs/zh/1.2/advanced_usage/development/contribute_to_paddle/index_cn.html) - [How to Contribute](http://paddlepaddle.org/documentation/docs/en/1.3/advanced_usage/development/contribute_to_paddle/index_en.html)
We appreciate your contributions! We appreciate your contributions!
......
...@@ -3,8 +3,8 @@ ...@@ -3,8 +3,8 @@
[English](./README.md) | 简体中文 [English](./README.md) | 简体中文
[![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle) [![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle)
[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://paddlepaddle.org/documentation/docs/en/1.2/getstarted/index_en.html) [![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://paddlepaddle.org/documentation/docs/en/1.3/beginners_guide/index_en.html)
[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/index.html) [![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://paddlepaddle.org/documentation/docs/zh/1.3/beginners_guide/index.html)
[![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases) [![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases)
[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE) [![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
...@@ -16,7 +16,7 @@ PaddlePaddle (PArallel Distributed Deep LEarning) 是一个简单易用、高效 ...@@ -16,7 +16,7 @@ PaddlePaddle (PArallel Distributed Deep LEarning) 是一个简单易用、高效
跟进PaddlePaddle最新特性请参考我们的[版本说明](https://github.com/PaddlePaddle/Paddle/releases) 跟进PaddlePaddle最新特性请参考我们的[版本说明](https://github.com/PaddlePaddle/Paddle/releases)
### PaddlePaddle最新版本: [Fluid 1.2.0](https://github.com/PaddlePaddle/Paddle/tree/release/1.2) ### PaddlePaddle最新版本: [Fluid 1.3.0](https://github.com/PaddlePaddle/Paddle/tree/release/1.3)
### 安装最新稳定版本: ### 安装最新稳定版本:
``` ```
# Linux CPU # Linux CPU
...@@ -24,9 +24,9 @@ pip install paddlepaddle ...@@ -24,9 +24,9 @@ pip install paddlepaddle
# Linux GPU cuda9cudnn7 # Linux GPU cuda9cudnn7
pip install paddlepaddle-gpu pip install paddlepaddle-gpu
# Linux GPU cuda8cudnn7 # Linux GPU cuda8cudnn7
pip install paddlepaddle-gpu==1.2.0.post87 pip install paddlepaddle-gpu==1.3.0.post87
# Linux GPU cuda8cudnn5 # Linux GPU cuda8cudnn5
pip install paddlepaddle-gpu==1.2.0.post85 pip install paddlepaddle-gpu==1.3.0.post85
# 其他平台上的安装指引请参考 http://paddlepaddle.org/ # 其他平台上的安装指引请参考 http://paddlepaddle.org/
``` ```
...@@ -57,26 +57,26 @@ pip install paddlepaddle-gpu==1.2.0.post85 ...@@ -57,26 +57,26 @@ pip install paddlepaddle-gpu==1.2.0.post85
## 安装 ## 安装
推荐阅读官网上的[安装说明](http://paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/install/index_cn.html) 推荐阅读官网上的[安装说明](http://paddlepaddle.org/documentation/docs/zh/1.3/beginners_guide/install/index_cn.html)
## 文档 ## 文档
我们提供[英文](http://paddlepaddle.org/documentation/docs/en/1.2/getstarted/index_en.html) 我们提供[英文](http://paddlepaddle.org/documentation/docs/en/1.3/beginners_guide/index_en.html)
[中文](http://paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/index.html) 文档 [中文](http://paddlepaddle.org/documentation/docs/zh/1.3/beginners_guide/index.html) 文档
- [深度学习101](https://github.com/PaddlePaddle/book) - [深度学习101](https://github.com/PaddlePaddle/book)
或许您想从这个在线交互式书籍开始,可以在Jupyter Notebook中运行 或许您想从这个在线交互式书籍开始,可以在Jupyter Notebook中运行
- [分布式训练](http://paddlepaddle.org/documentation/docs/zh/1.2/user_guides/howto/training/cluster_howto.html) - [分布式训练](http://paddlepaddle.org/documentation/docs/zh/1.3/user_guides/howto/training/multi_node.html)
可以在MPI集群上运行分布式训练任务 可以在MPI集群上运行分布式训练任务
- [Python API](http://paddlepaddle.org/documentation/docs/zh/1.2/api_cn/index_cn.html) - [Python API](http://paddlepaddle.org/documentation/docs/zh/1.3/api_cn/index_cn.html)
新的API支持代码更少更简洁的程序 新的API支持代码更少更简洁的程序
- [贡献方式](http://paddlepaddle.org/documentation/docs/zh/1.2/advanced_usage/development/contribute_to_paddle/index_cn.html) - [贡献方式](http://paddlepaddle.org/documentation/docs/zh/1.3/advanced_usage/development/contribute_to_paddle/index_cn.html)
欢迎您的贡献! 欢迎您的贡献!
......
...@@ -20,31 +20,10 @@ if(WITH_DSO) ...@@ -20,31 +20,10 @@ if(WITH_DSO)
add_definitions(-DPADDLE_USE_DSO) add_definitions(-DPADDLE_USE_DSO)
endif(WITH_DSO) endif(WITH_DSO)
if(WITH_DOUBLE)
add_definitions(-DPADDLE_TYPE_DOUBLE)
endif(WITH_DOUBLE)
if(WITH_ARM_FP16)
add_definitions(-DPADDLE_ARM_FP16)
add_definitions("-march=armv8.2-a+fp16+simd")
endif(WITH_ARM_FP16)
if(WITH_TESTING) if(WITH_TESTING)
add_definitions(-DPADDLE_WITH_TESTING) add_definitions(-DPADDLE_WITH_TESTING)
endif(WITH_TESTING) endif(WITH_TESTING)
if(NOT WITH_TIMER)
add_definitions(-DPADDLE_DISABLE_TIMER)
endif(NOT WITH_TIMER)
if(USE_EIGEN_FOR_BLAS)
add_definitions(-DPADDLE_USE_EIGEN_FOR_BLAS)
endif(USE_EIGEN_FOR_BLAS)
if(EIGEN_USE_THREADS)
add_definitions(-DEIGEN_USE_THREADS)
endif(EIGEN_USE_THREADS)
if(NOT WITH_PROFILER) if(NOT WITH_PROFILER)
add_definitions(-DPADDLE_DISABLE_PROFILER) add_definitions(-DPADDLE_DISABLE_PROFILER)
endif(NOT WITH_PROFILER) endif(NOT WITH_PROFILER)
...@@ -78,10 +57,6 @@ if(WIN32) ...@@ -78,10 +57,6 @@ if(WIN32)
endif(NOT MSVC) endif(NOT MSVC)
endif(WIN32) endif(WIN32)
if(NOT WITH_GOLANG)
add_definitions(-DPADDLE_WITHOUT_GOLANG)
endif(NOT WITH_GOLANG)
if(WITH_PSLIB) if(WITH_PSLIB)
add_definitions(-DPADDLE_WITH_PSLIB) add_definitions(-DPADDLE_WITH_PSLIB)
endif() endif()
...@@ -171,55 +146,6 @@ if(WITH_DISTRIBUTE) ...@@ -171,55 +146,6 @@ if(WITH_DISTRIBUTE)
add_definitions(-DPADDLE_WITH_DISTRIBUTE) add_definitions(-DPADDLE_WITH_DISTRIBUTE)
endif() endif()
if(WITH_GOLANG)
# we need to symlink Paddle directory into GOPATH. If we
# don't do it and we have code that depends on Paddle, go
# get ./... will download a new Paddle repo from Github,
# without the changes in our current Paddle repo that we
# want to build.
set(GOPATH "${CMAKE_CURRENT_BINARY_DIR}/go")
file(MAKE_DIRECTORY ${GOPATH})
set(PADDLE_IN_GOPATH "${GOPATH}/src/github.com/PaddlePaddle/Paddle")
file(MAKE_DIRECTORY "${PADDLE_IN_GOPATH}")
set(PADDLE_GO_PATH "${CMAKE_SOURCE_DIR}/go")
add_custom_target(go_path)
add_custom_command(TARGET go_path
# Symlink Paddle directory into GOPATH
COMMAND mkdir -p ${PADDLE_IN_GOPATH}
COMMAND rm -rf ${PADDLE_IN_GOPATH}
COMMAND ln -sf ${CMAKE_SOURCE_DIR} ${PADDLE_IN_GOPATH}
# Automatically get all dependencies specified in the source code
# We can't run `go get -d ./...` for every target, because
# multiple `go get` can not run concurrently, but make need to be
# able to run with multiple jobs.
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
)
if (GLIDE_INSTALL)
if(EXISTS $ENV{GOPATH}/bin/glide)
set(GLIDE "$ENV{GOPATH}/bin/glide")
else()
message(FATAL_ERROR "no glide executeble found: $ENV{GOPATH}/bin/glide")
endif()
# this command will only run when the file it depends is missing
# or has changed, or the output is missing.
add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/glide
COMMAND env GOPATH=${GOPATH} ${GLIDE} install
COMMAND touch ${CMAKE_BINARY_DIR}/glide
DEPENDS ${PADDLE_SOURCE_DIR}/go/glide.lock
WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go"
)
# depends on the custom command which outputs
# ${CMAKE_BINARY_DIR}/glide, the custom command does not need to
# run every time this target is built.
add_custom_target(go_vendor DEPENDS ${CMAKE_BINARY_DIR}/glide go_path)
endif()
endif(WITH_GOLANG)
if(WITH_GRPC) if(WITH_GRPC)
add_definitions(-DPADDLE_WITH_GRPC) add_definitions(-DPADDLE_WITH_GRPC)
endif(WITH_GRPC) endif(WITH_GRPC)
......
...@@ -168,10 +168,7 @@ elseif (${CUDA_VERSION} LESS 11.0) # CUDA 10.x ...@@ -168,10 +168,7 @@ elseif (${CUDA_VERSION} LESS 11.0) # CUDA 10.x
endif() endif()
include_directories(${CUDA_INCLUDE_DIRS}) include_directories(${CUDA_INCLUDE_DIRS})
list(APPEND EXTERNAL_LIBS ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY})
if(NOT WITH_DSO) if(NOT WITH_DSO)
# TODO(panyx0718): CUPTI only allows DSO?
list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUPTI_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY} ${NCCL_LIBRARY})
if(WIN32) if(WIN32)
set_property(GLOBAL PROPERTY CUDA_MODULES ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY}) set_property(GLOBAL PROPERTY CUDA_MODULES ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY})
endif(WIN32) endif(WIN32)
......
...@@ -74,5 +74,3 @@ add_dependencies(anakin_shared extern_anakin) ...@@ -74,5 +74,3 @@ add_dependencies(anakin_shared extern_anakin)
add_library(anakin_saber SHARED IMPORTED GLOBAL) add_library(anakin_saber SHARED IMPORTED GLOBAL)
set_property(TARGET anakin_saber PROPERTY IMPORTED_LOCATION ${ANAKIN_SABER_LIB}) set_property(TARGET anakin_saber PROPERTY IMPORTED_LOCATION ${ANAKIN_SABER_LIB})
add_dependencies(anakin_saber extern_anakin) add_dependencies(anakin_saber extern_anakin)
list(APPEND external_project_dependencies anakin_shared anakin_saber)
INCLUDE(ExternalProject)
SET(ANY_SOURCE_DIR ${THIRD_PARTY_PATH}/any)
INCLUDE_DIRECTORIES(${ANY_SOURCE_DIR}/src/extern_lib_any)
ExternalProject_Add(
extern_lib_any
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/PaddlePaddle/any.git"
GIT_TAG "15595d8324be9e8a9a80d9ae442fdd12bd66df5d"
PREFIX ${ANY_SOURCE_DIR}
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
if (${CMAKE_VERSION} VERSION_LESS "3.3.0")
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/lib_any_dummy.c)
file(WRITE ${dummyfile} "const char * dummy_any = \"${dummyfile}\";")
add_library(lib_any STATIC ${dummyfile})
else()
add_library(lib_any INTERFACE)
endif()
add_dependencies(lib_any extern_lib_any)
add_definitions(-DANY_IMPL_ANY_CAST_MOVEABLE)
LIST(APPEND external_project_dependencies lib_any)
...@@ -57,5 +57,4 @@ else() ...@@ -57,5 +57,4 @@ else()
endif() endif()
add_dependencies(boost ${BOOST_PROJECT}) add_dependencies(boost ${BOOST_PROJECT})
list(APPEND external_project_dependencies boost)
set(Boost_INCLUDE_DIR ${BOOST_INCLUDE_DIR}) set(Boost_INCLUDE_DIR ${BOOST_INCLUDE_DIR})
...@@ -69,5 +69,3 @@ SET_PROPERTY(TARGET brpc PROPERTY IMPORTED_LOCATION ${BRPC_LIBRARIES}) ...@@ -69,5 +69,3 @@ SET_PROPERTY(TARGET brpc PROPERTY IMPORTED_LOCATION ${BRPC_LIBRARIES})
ADD_DEPENDENCIES(brpc extern_brpc) ADD_DEPENDENCIES(brpc extern_brpc)
add_definitions(-DBRPC_WITH_GLOG) add_definitions(-DBRPC_WITH_GLOG)
LIST(APPEND external_project_dependencies brpc)
...@@ -31,5 +31,3 @@ else() ...@@ -31,5 +31,3 @@ else()
endif() endif()
add_dependencies(cub extern_cub) add_dependencies(cub extern_cub)
LIST(APPEND external_project_dependencies cub)
...@@ -27,5 +27,3 @@ else() ...@@ -27,5 +27,3 @@ else()
endif() endif()
add_dependencies(dlpack extern_dlpack) add_dependencies(dlpack extern_dlpack)
LIST(APPEND external_project_dependencies dlpack)
...@@ -52,5 +52,3 @@ else() ...@@ -52,5 +52,3 @@ else()
endif() endif()
add_dependencies(eigen3 extern_eigen3) add_dependencies(eigen3 extern_eigen3)
LIST(APPEND external_project_dependencies eigen3)
...@@ -61,8 +61,6 @@ ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL) ...@@ -61,8 +61,6 @@ ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARIES}) SET_PROPERTY(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARIES})
ADD_DEPENDENCIES(gflags extern_gflags) ADD_DEPENDENCIES(gflags extern_gflags)
LIST(APPEND external_project_dependencies gflags)
# On Windows (including MinGW), the Shlwapi library is used by gflags if available. # On Windows (including MinGW), the Shlwapi library is used by gflags if available.
if (WIN32) if (WIN32)
include(CheckIncludeFileCXX) include(CheckIncludeFileCXX)
......
...@@ -72,5 +72,3 @@ ADD_LIBRARY(glog STATIC IMPORTED GLOBAL) ...@@ -72,5 +72,3 @@ ADD_LIBRARY(glog STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET glog PROPERTY IMPORTED_LOCATION ${GLOG_LIBRARIES}) SET_PROPERTY(TARGET glog PROPERTY IMPORTED_LOCATION ${GLOG_LIBRARIES})
ADD_DEPENDENCIES(glog extern_glog gflags) ADD_DEPENDENCIES(glog extern_glog gflags)
LINK_LIBRARIES(glog gflags) LINK_LIBRARIES(glog gflags)
LIST(APPEND external_project_dependencies glog)
...@@ -79,5 +79,4 @@ IF(WITH_TESTING OR (WITH_DISTRIBUTE AND NOT WITH_GRPC)) ...@@ -79,5 +79,4 @@ IF(WITH_TESTING OR (WITH_DISTRIBUTE AND NOT WITH_GRPC))
SET_PROPERTY(TARGET gtest_main PROPERTY IMPORTED_LOCATION ${GTEST_MAIN_LIBRARIES}) SET_PROPERTY(TARGET gtest_main PROPERTY IMPORTED_LOCATION ${GTEST_MAIN_LIBRARIES})
ADD_DEPENDENCIES(gtest_main extern_gtest) ADD_DEPENDENCIES(gtest_main extern_gtest)
LIST(APPEND external_project_dependencies gtest gtest_main)
ENDIF(WITH_TESTING OR (WITH_DISTRIBUTE AND NOT WITH_GRPC)) ENDIF(WITH_TESTING OR (WITH_DISTRIBUTE AND NOT WITH_GRPC))
...@@ -39,6 +39,3 @@ ADD_DEPENDENCIES(extern_leveldb snappy) ...@@ -39,6 +39,3 @@ ADD_DEPENDENCIES(extern_leveldb snappy)
ADD_LIBRARY(leveldb STATIC IMPORTED GLOBAL) ADD_LIBRARY(leveldb STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET leveldb PROPERTY IMPORTED_LOCATION ${LEVELDB_LIBRARIES}) SET_PROPERTY(TARGET leveldb PROPERTY IMPORTED_LOCATION ${LEVELDB_LIBRARIES})
ADD_DEPENDENCIES(leveldb extern_leveldb) ADD_DEPENDENCIES(leveldb extern_leveldb)
LIST(APPEND external_project_dependencies leveldb)
...@@ -72,7 +72,4 @@ else() ...@@ -72,7 +72,4 @@ else()
add_library(libmct INTERFACE) add_library(libmct INTERFACE)
endif() endif()
#ADD_LIBRARY(libmct SHARED IMPORTED GLOBAL)
ADD_DEPENDENCIES(libmct ${LIBMCT_PROJECT}) ADD_DEPENDENCIES(libmct ${LIBMCT_PROJECT})
LIST(APPEND external_project_dependencies libmct)
...@@ -53,5 +53,3 @@ MESSAGE(STATUS "Libxsmm library: ${LIBXSMM_LIBS}") ...@@ -53,5 +53,3 @@ MESSAGE(STATUS "Libxsmm library: ${LIBXSMM_LIBS}")
include_directories(${LIBXSMM_INCLUDE_DIR}) include_directories(${LIBXSMM_INCLUDE_DIR})
ADD_DEFINITIONS(-DPADDLE_WITH_LIBXSMM) ADD_DEFINITIONS(-DPADDLE_WITH_LIBXSMM)
ADD_DEPENDENCIES(libxsmm extern_libxsmm) ADD_DEPENDENCIES(libxsmm extern_libxsmm)
LIST(APPEND external_project_dependencies libxsmm)
...@@ -89,7 +89,6 @@ SET_PROPERTY(TARGET shared_mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIB}) ...@@ -89,7 +89,6 @@ SET_PROPERTY(TARGET shared_mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIB})
ADD_DEPENDENCIES(shared_mkldnn ${MKLDNN_PROJECT}) ADD_DEPENDENCIES(shared_mkldnn ${MKLDNN_PROJECT})
MESSAGE(STATUS "MKLDNN library: ${MKLDNN_LIB}") MESSAGE(STATUS "MKLDNN library: ${MKLDNN_LIB}")
add_definitions(-DPADDLE_WITH_MKLDNN) add_definitions(-DPADDLE_WITH_MKLDNN)
LIST(APPEND external_project_dependencies shared_mkldnn)
# generate a static dummy target to track mkldnn dependencies # generate a static dummy target to track mkldnn dependencies
# for cc_library(xxx SRCS xxx.c DEPS mkldnn) # for cc_library(xxx SRCS xxx.c DEPS mkldnn)
......
...@@ -73,4 +73,3 @@ INCLUDE_DIRECTORIES(${MKLML_INC_DIR}) ...@@ -73,4 +73,3 @@ INCLUDE_DIRECTORIES(${MKLML_INC_DIR})
ADD_LIBRARY(mklml SHARED IMPORTED GLOBAL) ADD_LIBRARY(mklml SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET mklml PROPERTY IMPORTED_LOCATION ${MKLML_LIB}) SET_PROPERTY(TARGET mklml PROPERTY IMPORTED_LOCATION ${MKLML_LIB})
ADD_DEPENDENCIES(mklml ${MKLML_PROJECT}) ADD_DEPENDENCIES(mklml ${MKLML_PROJECT})
LIST(APPEND external_project_dependencies mklml)
...@@ -77,4 +77,3 @@ add_dependencies(ngraph ${NGRAPH_PROJECT}) ...@@ -77,4 +77,3 @@ add_dependencies(ngraph ${NGRAPH_PROJECT})
target_compile_definitions(ngraph INTERFACE -DPADDLE_WITH_NGRAPH) target_compile_definitions(ngraph INTERFACE -DPADDLE_WITH_NGRAPH)
target_include_directories(ngraph INTERFACE ${NGRAPH_INC_DIR}) target_include_directories(ngraph INTERFACE ${NGRAPH_INC_DIR})
target_link_libraries(ngraph INTERFACE ${NGRAPH_SHARED_LIB}) target_link_libraries(ngraph INTERFACE ${NGRAPH_SHARED_LIB})
LIST(APPEND external_project_dependencies ngraph)
...@@ -11,11 +11,6 @@ ...@@ -11,11 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
IF(USE_EIGEN_FOR_BLAS)
return()
ENDIF(USE_EIGEN_FOR_BLAS)
INCLUDE(cblas) INCLUDE(cblas)
IF(NOT ${CBLAS_FOUND}) IF(NOT ${CBLAS_FOUND})
...@@ -91,7 +86,6 @@ ENDIF() ...@@ -91,7 +86,6 @@ ENDIF()
IF(NOT ${CBLAS_FOUND}) IF(NOT ${CBLAS_FOUND})
ADD_DEPENDENCIES(cblas extern_openblas) ADD_DEPENDENCIES(cblas extern_openblas)
LIST(APPEND external_project_dependencies cblas)
ELSE() ELSE()
IF("${CBLAS_PROVIDER}" STREQUAL "MKLML") IF("${CBLAS_PROVIDER}" STREQUAL "MKLML")
ADD_DEPENDENCIES(cblas mklml) ADD_DEPENDENCIES(cblas mklml)
......
...@@ -129,7 +129,6 @@ macro(PROMPT_PROTOBUF_LIB) ...@@ -129,7 +129,6 @@ macro(PROMPT_PROTOBUF_LIB)
ADD_DEPENDENCIES(protoc ${dep}) ADD_DEPENDENCIES(protoc ${dep})
ENDFOREACH() ENDFOREACH()
LIST(APPEND external_project_dependencies protobuf)
RETURN() RETURN()
endmacro() endmacro()
macro(SET_PROTOBUF_VERSION) macro(SET_PROTOBUF_VERSION)
...@@ -203,7 +202,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) ...@@ -203,7 +202,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
ENDIF() ENDIF()
SET(PROTOBUF_REPO "https://github.com/google/protobuf.git") SET(PROTOBUF_REPO "https://github.com/google/protobuf.git")
SET(PROTOBUF_TAG "v3.6.1") SET(PROTOBUF_TAG "9f75c5aa851cd877fb0d93ccc31b8567a6706546")
ExternalProject_Add( ExternalProject_Add(
${TARGET_NAME} ${TARGET_NAME}
...@@ -231,7 +230,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) ...@@ -231,7 +230,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
) )
ENDFUNCTION() ENDFUNCTION()
SET(PROTOBUF_VERSION 3.6.1) SET(PROTOBUF_VERSION 3.1.0)
IF(NOT PROTOBUF_FOUND) IF(NOT PROTOBUF_FOUND)
build_protobuf(extern_protobuf FALSE) build_protobuf(extern_protobuf FALSE)
......
...@@ -70,4 +70,3 @@ ExternalProject_Add( ...@@ -70,4 +70,3 @@ ExternalProject_Add(
ADD_LIBRARY(pslib SHARED IMPORTED GLOBAL) ADD_LIBRARY(pslib SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET pslib PROPERTY IMPORTED_LOCATION ${PSLIB_LIB}) SET_PROPERTY(TARGET pslib PROPERTY IMPORTED_LOCATION ${PSLIB_LIB})
ADD_DEPENDENCIES(pslib ${PSLIB_PROJECT}) ADD_DEPENDENCIES(pslib ${PSLIB_PROJECT})
LIST(APPEND external_project_dependencies pslib)
...@@ -70,4 +70,3 @@ ExternalProject_Add( ...@@ -70,4 +70,3 @@ ExternalProject_Add(
ADD_LIBRARY(pslib_brpc SHARED IMPORTED GLOBAL) ADD_LIBRARY(pslib_brpc SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET pslib_brpc PROPERTY IMPORTED_LOCATION ${PSLIB_BRPC_LIB}) SET_PROPERTY(TARGET pslib_brpc PROPERTY IMPORTED_LOCATION ${PSLIB_BRPC_LIB})
ADD_DEPENDENCIES(pslib_brpc ${PSLIB_BRPC_PROJECT}) ADD_DEPENDENCIES(pslib_brpc ${PSLIB_BRPC_PROJECT})
LIST(APPEND external_project_dependencies pslib_brpc)
...@@ -74,8 +74,8 @@ IF(PYTHONINTERP_FOUND) ...@@ -74,8 +74,8 @@ IF(PYTHONINTERP_FOUND)
find_python_module(wheel REQUIRED) find_python_module(wheel REQUIRED)
find_python_module(google.protobuf REQUIRED) find_python_module(google.protobuf REQUIRED)
FIND_PACKAGE(NumPy REQUIRED) FIND_PACKAGE(NumPy REQUIRED)
IF(${PY_GOOGLE.PROTOBUF_VERSION} AND ${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.6.1") IF(${PY_GOOGLE.PROTOBUF_VERSION} AND ${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0")
MESSAGE(FATAL_ERROR "Found Python Protobuf ${PY_GOOGLE.PROTOBUF_VERSION} < 3.6.1, " MESSAGE(FATAL_ERROR "Found Python Protobuf ${PY_GOOGLE.PROTOBUF_VERSION} < 3.0.0, "
"please use pip to upgrade protobuf. pip install -U protobuf") "please use pip to upgrade protobuf. pip install -U protobuf")
ENDIF() ENDIF()
ENDIF(PYTHONINTERP_FOUND) ENDIF(PYTHONINTERP_FOUND)
......
...@@ -26,5 +26,3 @@ else() ...@@ -26,5 +26,3 @@ else()
endif() endif()
add_dependencies(simple_threadpool extern_threadpool) add_dependencies(simple_threadpool extern_threadpool)
LIST(APPEND external_project_dependencies simple_threadpool)
...@@ -83,5 +83,3 @@ INCLUDE_DIRECTORIES(${THIRD_PARTY_PATH}/install) # For Paddle code to include wa ...@@ -83,5 +83,3 @@ INCLUDE_DIRECTORIES(${THIRD_PARTY_PATH}/install) # For Paddle code to include wa
ADD_LIBRARY(warpctc SHARED IMPORTED GLOBAL) ADD_LIBRARY(warpctc SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET warpctc PROPERTY IMPORTED_LOCATION ${WARPCTC_LIBRARIES}) SET_PROPERTY(TARGET warpctc PROPERTY IMPORTED_LOCATION ${WARPCTC_LIBRARIES})
ADD_DEPENDENCIES(warpctc extern_warpctc) ADD_DEPENDENCIES(warpctc extern_warpctc)
LIST(APPEND external_project_dependencies warpctc)
...@@ -55,4 +55,3 @@ else() ...@@ -55,4 +55,3 @@ else()
endif() endif()
add_dependencies(xbyak ${XBYAK_PROJECT}) add_dependencies(xbyak ${XBYAK_PROJECT})
list(APPEND external_project_dependencies xbyak)
...@@ -71,5 +71,3 @@ add_library(xxhash STATIC IMPORTED GLOBAL) ...@@ -71,5 +71,3 @@ add_library(xxhash STATIC IMPORTED GLOBAL)
set_property(TARGET xxhash PROPERTY IMPORTED_LOCATION ${XXHASH_LIBRARIES}) set_property(TARGET xxhash PROPERTY IMPORTED_LOCATION ${XXHASH_LIBRARIES})
include_directories(${XXHASH_INCLUDE_DIR}) include_directories(${XXHASH_INCLUDE_DIR})
add_dependencies(xxhash extern_xxhash) add_dependencies(xxhash extern_xxhash)
LIST(APPEND external_project_dependencies xxhash)
...@@ -57,5 +57,3 @@ ENDIF(WIN32) ...@@ -57,5 +57,3 @@ ENDIF(WIN32)
ADD_LIBRARY(zlib STATIC IMPORTED GLOBAL) ADD_LIBRARY(zlib STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET zlib PROPERTY IMPORTED_LOCATION ${ZLIB_LIBRARIES}) SET_PROPERTY(TARGET zlib PROPERTY IMPORTED_LOCATION ${ZLIB_LIBRARIES})
ADD_DEPENDENCIES(zlib extern_zlib) ADD_DEPENDENCIES(zlib extern_zlib)
LIST(APPEND external_project_dependencies zlib)
...@@ -11,8 +11,6 @@ include_directories("/opt/rocm/rocrand/include") ...@@ -11,8 +11,6 @@ include_directories("/opt/rocm/rocrand/include")
include_directories("/opt/rocm/rccl/include") include_directories("/opt/rocm/rccl/include")
include_directories("/opt/rocm/thrust") include_directories("/opt/rocm/thrust")
list(APPEND EXTERNAL_LIBS "-L/opt/rocm/lib/ -lhip_hcc")
set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -fPIC -DPADDLE_WITH_HIP -std=c++11" ) set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -fPIC -DPADDLE_WITH_HIP -std=c++11" )
if(WITH_DSO) if(WITH_DSO)
...@@ -31,22 +29,12 @@ if(WITH_GRPC) ...@@ -31,22 +29,12 @@ if(WITH_GRPC)
set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -DPADDLE_WITH_GRPC") set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -DPADDLE_WITH_GRPC")
endif(WITH_GRPC) endif(WITH_GRPC)
if(NOT WITH_GOLANG)
set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -DPADDLE_WITHOUT_GOLANG")
endif(NOT WITH_GOLANG)
if(WITH_MKLDNN) if(WITH_MKLDNN)
set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -DPADDLE_WITH_MKLDNN") set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -DPADDLE_WITH_MKLDNN")
endif(WITH_MKLDNN) endif(WITH_MKLDNN)
set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -DANY_IMPL_ANY_CAST_MOVEABLE") set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -DANY_IMPL_ANY_CAST_MOVEABLE")
if(NOT WITH_RDMA)
set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -DPADDLE_DISABLE_RDMA")
endif(NOT WITH_RDMA)
if(CMAKE_BUILD_TYPE STREQUAL "Debug") if(CMAKE_BUILD_TYPE STREQUAL "Debug")
list(APPEND HIP_HCC_FLAGS ${CMAKE_CXX_FLAGS_DEBUG}) list(APPEND HIP_HCC_FLAGS ${CMAKE_CXX_FLAGS_DEBUG})
elseif(CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") elseif(CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo")
......
# user should download rdma first from subversion repository
# execute following instruction to download svn mannally
# svn co https://svn.baidu.com/sys/ip/trunk/rdma/sockrdmav1 rdma/
# svn co https://svn.baidu.com/sys/ip/trunk/rdma/thirdparty rdma/
# we use static output in svn repositories to avoid implict bugs from not standard runtime env.
if(WITH_RDMA)
set(RDMA_ROOT $ENV{RDMA_ROOT} CACHE PATH "Folder contains RDMA sock library and thirdparty library")
function(generate_rdma_links)
#redirect to current DIR to isolate the pollution from system runtime environment
#it can benifits unified control for different gcc environment.
#e.g, by default gcc48 did not refer /usr/lib64 which could contain low version
#runtime libraries that will crash process while loading it. That redirect trick
#can fix it.
execute_process(
COMMAND mkdir -p librdma
COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so.1
COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so
COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so.1
COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so
COMMAND ln -s -f /lib64/libnl.so.1.1.4 librdma/libnl.so.1
COMMAND ln -s -f /lib64/libnl.so.1.1.4 librdma/libnl.so
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
)
endfunction(generate_rdma_links)
#check and set headers
find_path(RDMA_INC_SXISOCK sxi_sock.h PATHS ${RDMA_ROOT}/sockrdmav1/output/include)
find_path(RDMA_INC_XIO libxio.h PATHS ${RDMA_ROOT}/thirdparty/output/accelio)
find_path(RDMA_INC_EVENT event2 PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_path(RDMA_INC_NUMA numa.h PATHS ${RDMA_ROOT}/thirdparty/output/libnuma)
#check and set libs
find_library(RDMA_LIB_SXISOCK NAMES sxisock PATHS ${RDMA_ROOT}/sockrdmav1/output)
find_library(RDMA_LIB_XIO NAMES xio PATHS ${RDMA_ROOT}/thirdparty/output/accelio)
find_library(RDMA_LIB_EVENT NAMES event PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_library(RDMA_LIB_EVENT_CORE NAMES event_core PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_library(RDMA_LIB_EVENT_EXTRA NAMES event_extra PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_library(RDMA_LIB_EVENT_PTHREADS NAMES event_pthreads PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_library(RDMA_LIB_NUMA NAMES numa PATHS ${RDMA_ROOT}/thirdparty/output/libnuma)
if(
RDMA_INC_SXISOCK AND
RDMA_INC_XIO AND
RDMA_INC_EVENT AND
RDMA_INC_NUMA AND
RDMA_LIB_SXISOCK AND
RDMA_LIB_XIO AND
RDMA_LIB_EVENT AND
RDMA_LIB_EVENT_CORE AND
RDMA_LIB_EVENT_EXTRA AND
RDMA_LIB_EVENT_PTHREADS AND
RDMA_LIB_NUMA
)
set(RDMA_INC_DIR
${RDMA_INC_SXISOCK}
${RDMA_INC_XIO}
${RDMA_INC_EVENT}
${RDMA_INC_NUMA})
set(RDMA_LIBS
${RDMA_LIB_SXISOCK}
${RDMA_LIB_XIO}
${RDMA_LIB_EVENT}
${RDMA_LIB_EVENT_CORE}
${RDMA_LIB_EVENT_EXTRA}
${RDMA_LIB_EVENT_PTHREADS}
${RDMA_LIB_NUMA}
)
set(RDMA_LD_FLAGS "-L./librdma -libverbs -lrdmacm -Xlinker -rpath ./librdma")
include_directories("${RDMA_INC_DIR}")
else()
#if this module is not called, RDMA_INC_DIR RDMA_LIBS will be null, so top module always refer this variable
message(FATAL_ERROR, "RDMA libraries are not found, try to set RDMA_ROOT or check all related libraries.")
endif()
else(WITH_RDMA)
set(RDMA_LIBS "")
set(RDMA_LD_FLAGS "")
add_definitions(-DPADDLE_DISABLE_RDMA)
endif(WITH_RDMA)
...@@ -33,6 +33,5 @@ if(TENSORRT_FOUND) ...@@ -33,6 +33,5 @@ if(TENSORRT_FOUND)
message(STATUS "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. " message(STATUS "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. "
"Current TensorRT version is v${TENSORRT_MAJOR_VERSION}. ") "Current TensorRT version is v${TENSORRT_MAJOR_VERSION}. ")
include_directories(${TENSORRT_INCLUDE_DIR}) include_directories(${TENSORRT_INCLUDE_DIR})
list(APPEND EXTERNAL_LIBS ${TENSORRT_LIBRARY})
add_definitions(-DPADDLE_WITH_TENSORRT) add_definitions(-DPADDLE_WITH_TENSORRT)
endif() endif()
...@@ -14,9 +14,7 @@ cmake .. -DWITH_AVX=OFF \ ...@@ -14,9 +14,7 @@ cmake .. -DWITH_AVX=OFF \
-DWITH_MKL=OFF \ -DWITH_MKL=OFF \
-DWITH_GPU=ON \ -DWITH_GPU=ON \
-DWITH_TESTING=ON \ -DWITH_TESTING=ON \
-DWITH_TIMER=ON \
-DWITH_PROFILER=ON \ -DWITH_PROFILER=ON \
-DWITH_FLUID_ONLY=ON
make -j `nproc` make -j `nproc`
pip install -U "$WHEEL_PATH/$(ls $WHEEL_PATH)" pip install -U "$WHEEL_PATH/$(ls $WHEEL_PATH)"
......
...@@ -71,7 +71,7 @@ paddle.fluid.initializer.NumpyArrayInitializer.__init__ ArgSpec(args=['self', 'v ...@@ -71,7 +71,7 @@ paddle.fluid.initializer.NumpyArrayInitializer.__init__ ArgSpec(args=['self', 'v
paddle.fluid.layers.fc ArgSpec(args=['input', 'size', 'num_flatten_dims', 'param_attr', 'bias_attr', 'act', 'is_test', 'name'], varargs=None, keywords=None, defaults=(1, None, None, None, False, None)) paddle.fluid.layers.fc ArgSpec(args=['input', 'size', 'num_flatten_dims', 'param_attr', 'bias_attr', 'act', 'is_test', 'name'], varargs=None, keywords=None, defaults=(1, None, None, None, False, None))
paddle.fluid.layers.embedding ArgSpec(args=['input', 'size', 'is_sparse', 'is_distributed', 'padding_idx', 'param_attr', 'dtype'], varargs=None, keywords=None, defaults=(False, False, None, None, 'float32')) paddle.fluid.layers.embedding ArgSpec(args=['input', 'size', 'is_sparse', 'is_distributed', 'padding_idx', 'param_attr', 'dtype'], varargs=None, keywords=None, defaults=(False, False, None, None, 'float32'))
paddle.fluid.layers.dynamic_lstm ArgSpec(args=['input', 'size', 'h_0', 'c_0', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, None, None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'float32', None)) paddle.fluid.layers.dynamic_lstm ArgSpec(args=['input', 'size', 'h_0', 'c_0', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, None, None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'float32', None))
paddle.fluid.layers.dynamic_lstmp ArgSpec(args=['input', 'size', 'proj_size', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'proj_activation', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'tanh', 'float32', None)) paddle.fluid.layers.dynamic_lstmp ArgSpec(args=['input', 'size', 'proj_size', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'proj_activation', 'dtype', 'name', 'h_0', 'c_0', 'cell_clip', 'proj_clip'], varargs=None, keywords=None, defaults=(None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'tanh', 'float32', None, None, None, None, None))
paddle.fluid.layers.dynamic_gru ArgSpec(args=['input', 'size', 'param_attr', 'bias_attr', 'is_reverse', 'gate_activation', 'candidate_activation', 'h_0', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, False, 'sigmoid', 'tanh', None, False)) paddle.fluid.layers.dynamic_gru ArgSpec(args=['input', 'size', 'param_attr', 'bias_attr', 'is_reverse', 'gate_activation', 'candidate_activation', 'h_0', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, False, 'sigmoid', 'tanh', None, False))
paddle.fluid.layers.gru_unit ArgSpec(args=['input', 'hidden', 'size', 'param_attr', 'bias_attr', 'activation', 'gate_activation', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, 'tanh', 'sigmoid', False)) paddle.fluid.layers.gru_unit ArgSpec(args=['input', 'hidden', 'size', 'param_attr', 'bias_attr', 'activation', 'gate_activation', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, 'tanh', 'sigmoid', False))
paddle.fluid.layers.linear_chain_crf ArgSpec(args=['input', 'label', 'param_attr'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.linear_chain_crf ArgSpec(args=['input', 'label', 'param_attr'], varargs=None, keywords=None, defaults=(None,))
...@@ -121,6 +121,7 @@ paddle.fluid.layers.sequence_reshape ArgSpec(args=['input', 'new_dim'], varargs= ...@@ -121,6 +121,7 @@ paddle.fluid.layers.sequence_reshape ArgSpec(args=['input', 'new_dim'], varargs=
paddle.fluid.layers.transpose ArgSpec(args=['x', 'perm', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.transpose ArgSpec(args=['x', 'perm', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.im2sequence ArgSpec(args=['input', 'filter_size', 'stride', 'padding', 'input_image_size', 'out_stride', 'name'], varargs=None, keywords=None, defaults=(1, 1, 0, None, 1, None)) paddle.fluid.layers.im2sequence ArgSpec(args=['input', 'filter_size', 'stride', 'padding', 'input_image_size', 'out_stride', 'name'], varargs=None, keywords=None, defaults=(1, 1, 0, None, 1, None))
paddle.fluid.layers.nce ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name', 'sampler', 'custom_dist', 'seed', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 'uniform', None, 0, False)) paddle.fluid.layers.nce ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name', 'sampler', 'custom_dist', 'seed', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 'uniform', None, 0, False))
paddle.fluid.layers.sampled_softmax_with_cross_entropy ArgSpec(args=['logits', 'label', 'num_samples', 'num_true', 'remove_accidental_hits', 'use_customized_samples', 'customized_samples', 'customized_probabilities', 'seed'], varargs=None, keywords=None, defaults=(1, True, False, None, None, 0))
paddle.fluid.layers.hsigmoid ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name', 'path_table', 'path_code', 'is_custom', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, False, False)) paddle.fluid.layers.hsigmoid ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name', 'path_table', 'path_code', 'is_custom', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, False, False))
paddle.fluid.layers.beam_search ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'is_accumulated', 'name', 'return_parent_idx'], varargs=None, keywords=None, defaults=(0, True, None, False)) paddle.fluid.layers.beam_search ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'is_accumulated', 'name', 'return_parent_idx'], varargs=None, keywords=None, defaults=(0, True, None, False))
paddle.fluid.layers.row_conv ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.layers.row_conv ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None))
...@@ -427,7 +428,7 @@ paddle.fluid.optimizer.MomentumOptimizer.__init__ ArgSpec(args=['self', 'learnin ...@@ -427,7 +428,7 @@ paddle.fluid.optimizer.MomentumOptimizer.__init__ ArgSpec(args=['self', 'learnin
paddle.fluid.optimizer.MomentumOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None) paddle.fluid.optimizer.MomentumOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.MomentumOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)) paddle.fluid.optimizer.MomentumOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.optimizer.MomentumOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.optimizer.MomentumOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.AdagradOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1e-06, None, None)) paddle.fluid.optimizer.AdagradOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'epsilon', 'regularization', 'name', 'initial_accumulator_value'], varargs=None, keywords=None, defaults=(1e-06, None, None, 0.0))
paddle.fluid.optimizer.AdagradOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None) paddle.fluid.optimizer.AdagradOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.AdagradOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)) paddle.fluid.optimizer.AdagradOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.optimizer.AdagradOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.optimizer.AdagradOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
......
...@@ -30,8 +30,6 @@ namespace paddle { ...@@ -30,8 +30,6 @@ namespace paddle {
namespace framework { namespace framework {
namespace details { namespace details {
static constexpr char kAllOpDescs[] = "all_op_descs";
VarHandle* GetValidInput(const OpHandleBase* a) { VarHandle* GetValidInput(const OpHandleBase* a) {
for (auto p : a->Inputs()) { for (auto p : a->Inputs()) {
VarHandle* b = dynamic_cast<VarHandle*>(p); VarHandle* b = dynamic_cast<VarHandle*>(p);
......
...@@ -53,7 +53,7 @@ AllReduceOpHandle::AllReduceOpHandle(ir::Node *node, ...@@ -53,7 +53,7 @@ AllReduceOpHandle::AllReduceOpHandle(ir::Node *node,
#endif #endif
void AllReduceOpHandle::RunImpl() { void AllReduceOpHandle::RunImpl() {
platform::RecordEvent record_event(Name(), dev_ctxes_.cbegin()->second); platform::RecordEvent record_event(Name());
WaitInputVarGenerated(); WaitInputVarGenerated();
auto in_var_handles = DynamicCast<VarHandle>(this->Inputs()); auto in_var_handles = DynamicCast<VarHandle>(this->Inputs());
......
...@@ -22,7 +22,7 @@ namespace framework { ...@@ -22,7 +22,7 @@ namespace framework {
namespace details { namespace details {
void BroadcastOpHandle::RunImpl() { void BroadcastOpHandle::RunImpl() {
platform::RecordEvent record_event(Name(), dev_ctxes_.begin()->second); platform::RecordEvent record_event(Name());
if (places_.size() == 1) return; if (places_.size() == 1) return;
...@@ -30,7 +30,7 @@ void BroadcastOpHandle::RunImpl() { ...@@ -30,7 +30,7 @@ void BroadcastOpHandle::RunImpl() {
VarHandle *in_var_handle; VarHandle *in_var_handle;
{ {
auto in_var_handles = DynamicCast<VarHandle>(inputs_); auto in_var_handles = DynamicCast<VarHandle>(inputs_);
PADDLE_ENFORCE_EQ(in_var_handles.size(), 1, PADDLE_ENFORCE_EQ(in_var_handles.size(), 1UL,
"The number of input should be one."); "The number of input should be one.");
in_var_handle = in_var_handles[0]; in_var_handle = in_var_handles[0];
} }
......
...@@ -34,9 +34,11 @@ namespace details { ...@@ -34,9 +34,11 @@ namespace details {
static inline bool SeqOnlyAllReduceOps(const BuildStrategy &strategy) { static inline bool SeqOnlyAllReduceOps(const BuildStrategy &strategy) {
// Should fix the allreduce op order if scheduling // Should fix the allreduce op order if scheduling
// them in multiple threads or processes to avoid hang. // them in multiple threads or processes to avoid hang.
// NOTE: ParallelGraph would execute this pass on each graph, so
// don't need to append it here.
return (!strategy.enable_sequential_execution_ && return (!strategy.enable_sequential_execution_ &&
strategy.num_trainers_ > 1) || strategy.num_trainers_ > 1) &&
strategy.enable_parallel_graph_; !strategy.enable_parallel_graph_;
} }
class ParallelExecutorPassBuilder : public ir::PassBuilder { class ParallelExecutorPassBuilder : public ir::PassBuilder {
...@@ -133,12 +135,15 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder { ...@@ -133,12 +135,15 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
void AppendMultiDevPass(const BuildStrategy &strategy) { void AppendMultiDevPass(const BuildStrategy &strategy) {
ir::Pass *multi_devices_pass; ir::Pass *multi_devices_pass;
if (strategy_.is_distribution_) { if (strategy_.is_distribution_) {
VLOG(3) << "multi device parameter server mode";
multi_devices_pass = AppendPass("dist_multi_devices_pass").get(); multi_devices_pass = AppendPass("dist_multi_devices_pass").get();
} else { } else {
if (strategy.reduce_ == BuildStrategy::ReduceStrategy::kAllReduce) { if (strategy.reduce_ == BuildStrategy::ReduceStrategy::kAllReduce) {
VLOG(3) << "multi devices collective mode with allreduce";
multi_devices_pass = multi_devices_pass =
AppendPass("allreduce_mode_multi_devices_pass").get(); AppendPass("allreduce_mode_multi_devices_pass").get();
} else if (strategy.reduce_ == BuildStrategy::ReduceStrategy::kReduce) { } else if (strategy.reduce_ == BuildStrategy::ReduceStrategy::kReduce) {
VLOG(3) << "multi deivces collective mode with reduce";
multi_devices_pass = AppendPass("reduce_mode_multi_devices_pass").get(); multi_devices_pass = AppendPass("reduce_mode_multi_devices_pass").get();
} else { } else {
PADDLE_THROW("Unknown reduce strategy."); PADDLE_THROW("Unknown reduce strategy.");
......
...@@ -86,7 +86,7 @@ std::vector<std::array<int, 3>> DataBalanceOpHandle::GetBalancePlan( ...@@ -86,7 +86,7 @@ std::vector<std::array<int, 3>> DataBalanceOpHandle::GetBalancePlan(
} }
void DataBalanceOpHandle::RunImpl() { void DataBalanceOpHandle::RunImpl() {
PADDLE_ENFORCE_GT(places_.size(), 1, PADDLE_ENFORCE_GT(places_.size(), 1UL,
"Data balance can only be enabled when the number of " "Data balance can only be enabled when the number of "
"places to run larger than 1."); "places to run larger than 1.");
auto in_var_handles = DynamicCast<VarHandle>(this->Inputs()); auto in_var_handles = DynamicCast<VarHandle>(this->Inputs());
......
...@@ -23,7 +23,7 @@ void FuseVarsOpHandle::RunImpl() { ...@@ -23,7 +23,7 @@ void FuseVarsOpHandle::RunImpl() {
auto in_var_handles = DynamicCast<VarHandle>(this->Inputs()); auto in_var_handles = DynamicCast<VarHandle>(this->Inputs());
auto out_var_handles = DynamicCast<VarHandle>(this->Outputs()); auto out_var_handles = DynamicCast<VarHandle>(this->Outputs());
PADDLE_ENFORCE_EQ(in_var_handles.size(), 0); PADDLE_ENFORCE_EQ(in_var_handles.size(), 0UL);
PADDLE_ENFORCE_EQ(out_var_handles.size() - 1, inputs_numel_.size(), ""); PADDLE_ENFORCE_EQ(out_var_handles.size() - 1, inputs_numel_.size(), "");
auto scope = local_scope_->FindVar(kLocalExecScopeName)->Get<Scope *>(); auto scope = local_scope_->FindVar(kLocalExecScopeName)->Get<Scope *>();
......
...@@ -22,7 +22,7 @@ namespace framework { ...@@ -22,7 +22,7 @@ namespace framework {
namespace details { namespace details {
void FusedBroadcastOpHandle::RunImpl() { void FusedBroadcastOpHandle::RunImpl() {
platform::RecordEvent record_event(Name(), dev_ctxes_.begin()->second); platform::RecordEvent record_event(Name());
if (places_.size() == 1UL) return; if (places_.size() == 1UL) return;
......
...@@ -129,7 +129,13 @@ size_t NodeSize(const VarDesc& node) { ...@@ -129,7 +129,13 @@ size_t NodeSize(const VarDesc& node) {
} }
size_t NodeSize(ir::Node* n) { size_t NodeSize(ir::Node* n) {
auto* desc = FindVarDescInBlock(n); VarDesc* desc = nullptr;
// some op do not have block pointer
if (n->inputs[0]->Op() != nullptr) {
desc = FindVarDescInBlock(n);
} else {
desc = n->Var();
}
return NodeSize(*desc); return NodeSize(*desc);
} }
......
...@@ -29,8 +29,6 @@ namespace paddle { ...@@ -29,8 +29,6 @@ namespace paddle {
namespace framework { namespace framework {
namespace details { namespace details {
constexpr char kAllOpDescs[] = "all_op_descs";
std::vector<ir::Node*> SortOpLikeDescOrder(const ir::Graph& graph); std::vector<ir::Node*> SortOpLikeDescOrder(const ir::Graph& graph);
// NOTE(dzh): A ordered set for node reuse in memory optimize. // NOTE(dzh): A ordered set for node reuse in memory optimize.
......
...@@ -194,7 +194,8 @@ void MemoryOptimizePass::SubGraphOptimize(OpDesc* op_desc) const { ...@@ -194,7 +194,8 @@ void MemoryOptimizePass::SubGraphOptimize(OpDesc* op_desc) const {
// effect. Because it is a single op in graph. No need to // effect. Because it is a single op in graph. No need to
// update the ir nodes. // update the ir nodes.
sub_op_desc->Rename(var->Name(), cache->Name()); sub_op_desc->Rename(var->Name(), cache->Name());
if (sub_op_desc->Block()->HasVar(var->Name())) { if (sub_op_desc->Block() != nullptr &&
sub_op_desc->Block()->HasVar(var->Name())) {
sub_op_desc->Block()->RemoveVar(var->Name()); sub_op_desc->Block()->RemoveVar(var->Name());
} }
} }
...@@ -235,7 +236,13 @@ void MemoryOptimizePass::RenameVarInGraphDesc(const std::string& var, ...@@ -235,7 +236,13 @@ void MemoryOptimizePass::RenameVarInGraphDesc(const std::string& var,
auto* op_desc = op->Op(); auto* op_desc = op->Op();
op_desc->RenameInput(var, cache_var); op_desc->RenameInput(var, cache_var);
op_desc->RenameOutput(var, cache_var); op_desc->RenameOutput(var, cache_var);
if (op_desc->Block()->HasVar(var)) op_desc->Block()->RemoveVar(var); if (op_desc->Block() != nullptr) {
op_desc->Block()->RemoveVar(var);
} else {
LOG(WARNING) << "op " << op->Name() << " not know its block."
<< "Is the op_desc created without block pointer? "
<< "Can not find " << var << " in Block(0)";
}
op_desc->Flush(); op_desc->Flush();
} }
} }
......
...@@ -392,20 +392,32 @@ void MultiDevSSAGraphBuilderBase::CreateComputationalOp(ir::Graph *result, ...@@ -392,20 +392,32 @@ void MultiDevSSAGraphBuilderBase::CreateComputationalOp(ir::Graph *result,
void MultiDevSSAGraphBuilderBase::CreateAllReduceOp( void MultiDevSSAGraphBuilderBase::CreateAllReduceOp(
ir::Graph *result, const std::string &og) const { ir::Graph *result, const std::string &og) const {
OpHandleBase *op_handle = nullptr;
auto append_allreduce_op = [&](
const std::vector<Scope *> &scopes,
const std::vector<platform::Place> &places) -> OpHandleBase * {
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
result->Get<GraphOps>(kGraphOps).emplace_back(new AllReduceOpHandle( result->Get<GraphOps>(kGraphOps).emplace_back(new AllReduceOpHandle(
result->CreateEmptyNode("allreduce", ir::Node::Type::kOperation), result->CreateEmptyNode("allreduce", ir::Node::Type::kOperation),
local_scopes_, places_, nccl_ctxs_)); scopes, places, nccl_ctxs_));
#else #else
result->Get<GraphOps>(kGraphOps).emplace_back(new AllReduceOpHandle( result->Get<GraphOps>(kGraphOps).emplace_back(new AllReduceOpHandle(
result->CreateEmptyNode("allreduce", ir::Node::Type::kOperation), result->CreateEmptyNode("allreduce", ir::Node::Type::kOperation),
local_scopes_, places_)); scopes, places));
#endif #endif
auto *op_handle = result->Get<GraphOps>(kGraphOps).back(); return result->Get<GraphOps>(kGraphOps).back();
};
if (!strategy_.enable_parallel_graph_)
op_handle = append_allreduce_op(local_scopes_, places_);
for (size_t i = 0; i < places_.size(); ++i) { for (size_t i = 0; i < places_.size(); ++i) {
auto &p = places_[i]; if (strategy_.enable_parallel_graph_) {
SetCommunicationContext(op_handle, p); op_handle = append_allreduce_op({local_scopes_[i]}, {places_[i]});
}
SetCommunicationContext(op_handle, places_[i]);
auto &vars = result->Get<GraphVars>(kGraphVars)[i][og]; auto &vars = result->Get<GraphVars>(kGraphVars)[i][og];
PADDLE_ENFORCE(!vars.empty()); PADDLE_ENFORCE(!vars.empty());
auto &prev_grad = vars.back(); auto &prev_grad = vars.back();
...@@ -413,7 +425,7 @@ void MultiDevSSAGraphBuilderBase::CreateAllReduceOp( ...@@ -413,7 +425,7 @@ void MultiDevSSAGraphBuilderBase::CreateAllReduceOp(
auto var = auto var =
new VarHandle(result->CreateEmptyNode(og, ir::Node::Type::kVariable), new VarHandle(result->CreateEmptyNode(og, ir::Node::Type::kVariable),
vars.size(), i, og, p); vars.size(), i, og, places_[i]);
vars.emplace_back(var); vars.emplace_back(var);
op_handle->AddOutput(var); op_handle->AddOutput(var);
} }
...@@ -925,9 +937,21 @@ void DistSSAGraphBuilder::InsertCollectiveOp(ir::Graph *result, ...@@ -925,9 +937,21 @@ void DistSSAGraphBuilder::InsertCollectiveOp(ir::Graph *result,
} }
void DistSSAGraphBuilder::InsertPostprocessOps(ir::Graph *result) const { void DistSSAGraphBuilder::InsertPostprocessOps(ir::Graph *result) const {
if (need_broadcast_var_ || // broad cast received parameters when training in parameter server mode.
(UseGPU() && if (need_broadcast_var_) {
strategy_.reduce_ == BuildStrategy::ReduceStrategy::kReduce)) { // There are 4 conditions:
// 1. GPU && Reduce: Reduce gradient then broadcast gradient to other GPUS.
// Need to broadcast received parameters to other GPU.
// 2. GPU && AllReduce: AllReduce all graident to each GPU. Need to
// broadcast received parameters to other GPU.
// 3. CPU && AllReduce: AllReduce all gradient to each thread. Need to
// broadcast received parameters to other scope.
// 4. CPU && Reduce: because all parameters share the same memory, did not
// broadcast received parameters.
if (!UseGPU() &&
strategy_.reduce_ == BuildStrategy::ReduceStrategy::kReduce) {
return;
}
if (strategy_.fuse_broadcast_op_) { if (strategy_.fuse_broadcast_op_) {
CreateFusedBroadcastOp(result, bcast_var_name_set_); CreateFusedBroadcastOp(result, bcast_var_name_set_);
} else { } else {
......
...@@ -36,13 +36,14 @@ namespace details { ...@@ -36,13 +36,14 @@ namespace details {
// map from variable name to variables. The variables, who have the same name, // map from variable name to variables. The variables, who have the same name,
// will have a differsent version. The offset in the // will have a differsent version. The offset in the
// `std::vector<VarHandle*>` is the version of varaibles. // `std::vector<VarHandle*>` is the version of varaibles.
typedef std::vector<std::unordered_map<std::string, std::vector<VarHandle*>>> typedef std::vector<std::unordered_map<std::string, std::vector<VarHandle *>>>
GraphVars; GraphVars;
const char kGraphVars[] = "vars"; const char kGraphVars[] = "vars";
// aux variables to represent dependency. Useful to resolve data hazard. // aux variables to represent dependency. Useful to resolve data hazard.
typedef std::unordered_set<VarHandleBase*> GraphDepVars; typedef std::unordered_set<VarHandleBase *> GraphDepVars;
const char kGraphDepVars[] = "dep_vars"; const char kGraphDepVars[] = "dep_vars";
} // namespace details } // namespace details
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -70,6 +70,9 @@ class OpHandleBase { ...@@ -70,6 +70,9 @@ class OpHandleBase {
auto it = dev_ctxes_.find(place); auto it = dev_ctxes_.find(place);
return it != dev_ctxes_.end() ? it->second : nullptr; return it != dev_ctxes_.end() ? it->second : nullptr;
} }
const std::map<platform::Place, platform::DeviceContext *> &DeviceContext() {
return dev_ctxes_;
}
void SetDeviceContext(platform::Place place, platform::DeviceContext *ctx_) { void SetDeviceContext(platform::Place place, platform::DeviceContext *ctx_) {
dev_ctxes_[place] = ctx_; dev_ctxes_[place] = ctx_;
......
...@@ -13,22 +13,92 @@ ...@@ -13,22 +13,92 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/details/parallel_ssa_graph_executor.h" #include "paddle/fluid/framework/details/parallel_ssa_graph_executor.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
namespace details { namespace details {
std::vector<std::unique_ptr<ir::Graph>>
ParallelSSAGraphExecutor::SeparateMultiDevicesGraph(
std::unique_ptr<ir::Graph> &&graph) {
std::vector<std::unique_ptr<ir::Graph>> graphs;
graphs.reserve(places_.size());
for (size_t i = 0; i < places_.size(); ++i) {
ProgramDesc empty;
graphs.emplace_back(std::unique_ptr<ir::Graph>(new ir::Graph(empty)));
auto &g = graphs.back();
g->Set(kGraphVars, new GraphVars(1UL));
g->Set(kGraphDepVars, new GraphDepVars);
}
auto op_handles = ir::FilterByNodeWrapper<OpHandleBase>(*graph);
for (auto &op : op_handles) {
auto &dev_ctx = op->DeviceContext();
auto &p = dev_ctx.begin()->first;
int dev_id = boost::get<platform::CUDAPlace>(p).device;
auto &dev_dummys = graphs[dev_id]->Get<GraphDepVars>(kGraphDepVars);
graphs[dev_id]->AddNode(graph->RemoveNode(op->Node()).release());
for (auto &var : op->Inputs()) {
auto dummy_ptr = dynamic_cast<DummyVarHandle *>(var);
if (dummy_ptr) {
dev_dummys.insert(var);
if (graph->Nodes().count(var->Node()))
graphs[dev_id]->AddNode(graph->RemoveNode(var->Node()).release());
}
}
for (auto &var : op->Outputs()) {
auto dummy_ptr = dynamic_cast<DummyVarHandle *>(var);
if (dummy_ptr) {
dev_dummys.insert(var);
if (graph->Nodes().count(var->Node()))
graphs[dev_id]->AddNode(graph->RemoveNode(var->Node()).release());
}
}
}
for (size_t dev_id = 0; dev_id < places_.size(); ++dev_id) {
auto &dev_vars = graphs[dev_id]->Get<GraphVars>(kGraphVars)[0];
auto &origin_vars = graph->Get<GraphVars>(kGraphVars)[dev_id];
for (auto &name_pair : origin_vars) {
dev_vars.emplace(name_pair.first, name_pair.second);
for (auto &version_pair : name_pair.second) {
if (graph->Nodes().count(version_pair->Node())) {
graphs[dev_id]->AddNode(
graph->RemoveNode(version_pair->Node()).release());
}
}
}
}
return graphs;
}
ParallelSSAGraphExecutor::ParallelSSAGraphExecutor( ParallelSSAGraphExecutor::ParallelSSAGraphExecutor(
const ExecutionStrategy &strategy, const std::vector<Scope *> &local_scopes, const ExecutionStrategy &strategy, const std::vector<Scope *> &local_scopes,
const std::vector<platform::Place> &places, const std::vector<platform::Place> &places,
std::vector<std::unique_ptr<ir::Graph>> &&graphs) const framework::ProgramDesc &main_prog, std::unique_ptr<ir::Graph> &&graph)
: strategy_(std::move(strategy)), : strategy_(std::move(strategy)),
local_scopes_(std::move(local_scopes)), local_scopes_(std::move(local_scopes)),
pool_(places.size() >= 2 ? new ::ThreadPool(places.size()) : nullptr), pool_(places.size() >= 2 ? new ::ThreadPool(places.size()) : nullptr),
places_(std::move(places)), places_(std::move(places)),
graphs_(std::move(graphs)) { main_prog_(main_prog),
// TODO(Yancey1989): Copying graphs is not safely since it deleted the
// attrs.
graphs_(SeparateMultiDevicesGraph(std::move(graph))) {
PADDLE_ENFORCE_EQ(places_.size(), local_scopes_.size()); PADDLE_ENFORCE_EQ(places_.size(), local_scopes_.size());
auto seq_allreduce_pass =
ir::PassRegistry::Instance().Get("all_reduce_deps_pass");
seq_allreduce_pass->Erase(details::kAllOpDescs);
seq_allreduce_pass->Set<const std::vector<OpDesc *>>(
details::kAllOpDescs,
new std::vector<OpDesc *>(main_prog_.Block(0).AllOps()));
for (size_t i = 0; i < graphs_.size(); ++i) {
graphs_[i] = seq_allreduce_pass->Apply(std::move(graphs_[i]));
}
// set the correct size of thread pool to each device. // set the correct size of thread pool to each device.
strategy_.num_threads_ = strategy_.num_threads_ < places_.size() strategy_.num_threads_ = strategy_.num_threads_ < places_.size()
? 1UL ? 1UL
...@@ -37,7 +107,7 @@ ParallelSSAGraphExecutor::ParallelSSAGraphExecutor( ...@@ -37,7 +107,7 @@ ParallelSSAGraphExecutor::ParallelSSAGraphExecutor(
<< " to run the operators of the graph on each device."; << " to run the operators of the graph on each device.";
for (size_t i = 0; i < places.size(); ++i) { for (size_t i = 0; i < places.size(); ++i) {
executors_.emplace_back(new details::ThreadedSSAGraphExecutor( executors_.emplace_back(new details::ThreadedSSAGraphExecutor(
strategy_, {local_scopes_[i]}, {places_[i]}, std::move(graphs_[i]))); strategy_, local_scopes_, {places_[i]}, std::move(graphs_.at(i))));
} }
} }
......
...@@ -18,7 +18,9 @@ ...@@ -18,7 +18,9 @@
#include <vector> #include <vector>
#include "ThreadPool.h" #include "ThreadPool.h"
#include "paddle/fluid/framework/details/multi_devices_helper.h"
#include "paddle/fluid/framework/details/threaded_ssa_graph_executor.h" #include "paddle/fluid/framework/details/threaded_ssa_graph_executor.h"
#include "paddle/fluid/framework/ir/graph.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -29,17 +31,23 @@ class ParallelSSAGraphExecutor : public SSAGraphExecutor { ...@@ -29,17 +31,23 @@ class ParallelSSAGraphExecutor : public SSAGraphExecutor {
ParallelSSAGraphExecutor(const ExecutionStrategy &strategy, ParallelSSAGraphExecutor(const ExecutionStrategy &strategy,
const std::vector<Scope *> &local_scopes, const std::vector<Scope *> &local_scopes,
const std::vector<platform::Place> &places, const std::vector<platform::Place> &places,
std::vector<std::unique_ptr<ir::Graph>> &&graphs); const framework::ProgramDesc &main_prog,
std::unique_ptr<ir::Graph> &&graph);
~ParallelSSAGraphExecutor() final = default; ~ParallelSSAGraphExecutor() final = default;
const ir::Graph &Graph() const override { return *graphs_[0]; } const ir::Graph &Graph() const override { return *graphs_[0]; }
FeedFetchList Run(const std::vector<std::string> &fetch_tensors) override; FeedFetchList Run(const std::vector<std::string> &fetch_tensors) override;
private: private:
std::vector<std::unique_ptr<ir::Graph>> SeparateMultiDevicesGraph(
std::unique_ptr<ir::Graph> &&graph);
ExecutionStrategy strategy_; ExecutionStrategy strategy_;
std::vector<Scope *> local_scopes_; std::vector<Scope *> local_scopes_;
std::unique_ptr<::ThreadPool> pool_{nullptr}; std::unique_ptr<::ThreadPool> pool_{nullptr};
std::vector<platform::Place> places_; std::vector<platform::Place> places_;
framework::ProgramDesc main_prog_;
std::vector<std::unique_ptr<ir::Graph>> graphs_; std::vector<std::unique_ptr<ir::Graph>> graphs_;
std::vector<std::unique_ptr<details::ThreadedSSAGraphExecutor>> executors_; std::vector<std::unique_ptr<details::ThreadedSSAGraphExecutor>> executors_;
......
...@@ -139,7 +139,7 @@ void ReduceOpHandle::GatherSelectedRows( ...@@ -139,7 +139,7 @@ void ReduceOpHandle::GatherSelectedRows(
#endif #endif
void ReduceOpHandle::RunImpl() { void ReduceOpHandle::RunImpl() {
platform::RecordEvent record_event(Name(), dev_ctxes_.cbegin()->second); platform::RecordEvent record_event(Name());
if (places_.size() == 1) return; if (places_.size() == 1) return;
// the input and output may have dummy var. // the input and output may have dummy var.
...@@ -153,7 +153,7 @@ void ReduceOpHandle::RunImpl() { ...@@ -153,7 +153,7 @@ void ReduceOpHandle::RunImpl() {
{ {
auto out_var_handles = DynamicCast<VarHandle>(outputs_); auto out_var_handles = DynamicCast<VarHandle>(outputs_);
PADDLE_ENFORCE_EQ(out_var_handles.size(), 1, PADDLE_ENFORCE_EQ(out_var_handles.size(), 1UL,
"The number of output should be one."); "The number of output should be one.");
out_var_handle = out_var_handles.front(); out_var_handle = out_var_handles.front();
} }
......
...@@ -63,7 +63,7 @@ FeedFetchList ScopeBufferedSSAGraphExecutor::Run( ...@@ -63,7 +63,7 @@ FeedFetchList ScopeBufferedSSAGraphExecutor::Run(
eptr = std::current_exception(); eptr = std::current_exception();
} }
platform::RecordEvent e("ScopeBufferedSSAGraphExecutorAfterRun", nullptr); platform::RecordEvent e("ScopeBufferedSSAGraphExecutorAfterRun");
++drop_scope_counter_; ++drop_scope_counter_;
bool stream_end = false; bool stream_end = false;
......
...@@ -37,7 +37,7 @@ ThreadedSSAGraphExecutor::ThreadedSSAGraphExecutor( ...@@ -37,7 +37,7 @@ ThreadedSSAGraphExecutor::ThreadedSSAGraphExecutor(
FeedFetchList ThreadedSSAGraphExecutor::Run( FeedFetchList ThreadedSSAGraphExecutor::Run(
const std::vector<std::string> &fetch_tensors) { const std::vector<std::string> &fetch_tensors) {
std::unique_ptr<platform::RecordEvent> event( std::unique_ptr<platform::RecordEvent> event(
new platform::RecordEvent("ThreadedSSAGraphExecutorPrepare", nullptr)); new platform::RecordEvent("ThreadedSSAGraphExecutorPrepare"));
std::unordered_map<OpHandleBase *, size_t> pending_ops; std::unordered_map<OpHandleBase *, size_t> pending_ops;
std::unordered_set<VarHandleBase *> pending_vars; std::unordered_set<VarHandleBase *> pending_vars;
auto ready_vars = std::make_shared<BlockingQueue<VarHandleBase *>>(); auto ready_vars = std::make_shared<BlockingQueue<VarHandleBase *>>();
...@@ -219,7 +219,7 @@ void ThreadedSSAGraphExecutor::RunOp( ...@@ -219,7 +219,7 @@ void ThreadedSSAGraphExecutor::RunOp(
VLOG(10) << op << " " << op->Name() << " Done "; VLOG(10) << op << " " << op->Name() << " Done ";
running_ops_--; running_ops_--;
ready_var_q->Extend(op->Outputs()); ready_var_q->Extend(op->Outputs());
VLOG(10) << op << " " << op->Name() << "Signal posted"; VLOG(10) << op << " " << op->Name() << " Signal posted";
} catch (...) { } catch (...) {
exception_holder_.Catch(std::current_exception()); exception_holder_.Catch(std::current_exception());
} }
......
...@@ -102,6 +102,7 @@ cc_test(test_seqpool_concat_fuse_pass SRCS seqpool_concat_fuse_pass_tester.cc DE ...@@ -102,6 +102,7 @@ cc_test(test_seqpool_concat_fuse_pass SRCS seqpool_concat_fuse_pass_tester.cc DE
cc_test(test_is_test_pass SRCS is_test_pass_tester.cc DEPS is_test_pass) cc_test(test_is_test_pass SRCS is_test_pass_tester.cc DEPS is_test_pass)
if (WITH_MKLDNN) if (WITH_MKLDNN)
cc_test(test_depthwise_conv_mkldnn_pass SRCS mkldnn/depthwise_conv_mkldnn_pass_tester.cc DEPS depthwise_conv_mkldnn_pass) cc_test(test_depthwise_conv_mkldnn_pass SRCS mkldnn/depthwise_conv_mkldnn_pass_tester.cc DEPS depthwise_conv_mkldnn_pass)
cc_test(test_conv_bias_mkldnn_fuse_pass SRCS mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc DEPS conv_bias_mkldnn_fuse_pass naive_executor)
cc_test(test_conv_relu_mkldnn_fuse_pass SRCS mkldnn/conv_relu_mkldnn_fuse_pass_tester.cc DEPS conv_relu_mkldnn_fuse_pass) cc_test(test_conv_relu_mkldnn_fuse_pass SRCS mkldnn/conv_relu_mkldnn_fuse_pass_tester.cc DEPS conv_relu_mkldnn_fuse_pass)
cc_test(test_conv_elementwise_add_mkldnn_fuse_pass SRCS mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc DEPS conv_elementwise_add_mkldnn_fuse_pass) cc_test(test_conv_elementwise_add_mkldnn_fuse_pass SRCS mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc DEPS conv_elementwise_add_mkldnn_fuse_pass)
endif () endif ()
...@@ -22,7 +22,8 @@ namespace ir { ...@@ -22,7 +22,8 @@ namespace ir {
class AttentionLSTMFusePass : public FusePassBase { class AttentionLSTMFusePass : public FusePassBase {
protected: protected:
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const; std::unique_ptr<ir::Graph> ApplyImpl(
std::unique_ptr<ir::Graph> graph) const override;
}; };
} // namespace ir } // namespace ir
......
...@@ -31,7 +31,8 @@ class ConvAffineChannelFusePass : public FusePassBase { ...@@ -31,7 +31,8 @@ class ConvAffineChannelFusePass : public FusePassBase {
virtual ~ConvAffineChannelFusePass() {} virtual ~ConvAffineChannelFusePass() {}
protected: protected:
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const; std::unique_ptr<ir::Graph> ApplyImpl(
std::unique_ptr<ir::Graph> graph) const override;
const std::string name_scope_{"conv_affine_channel_fuse"}; const std::string name_scope_{"conv_affine_channel_fuse"};
}; };
...@@ -40,7 +41,8 @@ class ConvEltwiseAddAffineChannelFusePass : public FusePassBase { ...@@ -40,7 +41,8 @@ class ConvEltwiseAddAffineChannelFusePass : public FusePassBase {
virtual ~ConvEltwiseAddAffineChannelFusePass() {} virtual ~ConvEltwiseAddAffineChannelFusePass() {}
protected: protected:
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const; std::unique_ptr<ir::Graph> ApplyImpl(
std::unique_ptr<ir::Graph> graph) const override;
const std::string name_scope_{"conv_eltwiseadd_affine_channel_fuse"}; const std::string name_scope_{"conv_eltwiseadd_affine_channel_fuse"};
}; };
......
...@@ -169,7 +169,7 @@ std::unique_ptr<ir::Graph> ConvBNFusePass::ApplyImpl( ...@@ -169,7 +169,7 @@ std::unique_ptr<ir::Graph> ConvBNFusePass::ApplyImpl(
if (has_bias && conv->Op()->Input("Bias").size() > 0) { if (has_bias && conv->Op()->Input("Bias").size() > 0) {
// reuse existing conv bias node // reuse existing conv bias node
auto conv_bias_names = conv->Op()->Input("Bias"); auto conv_bias_names = conv->Op()->Input("Bias");
PADDLE_ENFORCE_EQ(conv_bias_names.size(), 1); PADDLE_ENFORCE_EQ(conv_bias_names.size(), 1UL);
auto* conv_bias_var = scope->FindVar(conv_bias_names[0]); auto* conv_bias_var = scope->FindVar(conv_bias_names[0]);
auto* conv_bias_tensor = conv_bias_var->GetMutable<LoDTensor>(); auto* conv_bias_tensor = conv_bias_var->GetMutable<LoDTensor>();
PADDLE_ENFORCE_EQ(conv_bias_tensor->dims(), PADDLE_ENFORCE_EQ(conv_bias_tensor->dims(),
......
...@@ -31,7 +31,8 @@ class ConvBNFusePass : public FusePassBase { ...@@ -31,7 +31,8 @@ class ConvBNFusePass : public FusePassBase {
virtual ~ConvBNFusePass() {} virtual ~ConvBNFusePass() {}
protected: protected:
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const; std::unique_ptr<ir::Graph> ApplyImpl(
std::unique_ptr<ir::Graph> graph) const override;
const std::string name_scope_{"conv_bn_fuse"}; const std::string name_scope_{"conv_bn_fuse"};
}; };
...@@ -40,7 +41,8 @@ class ConvEltwiseAddBNFusePass : public FusePassBase { ...@@ -40,7 +41,8 @@ class ConvEltwiseAddBNFusePass : public FusePassBase {
virtual ~ConvEltwiseAddBNFusePass() {} virtual ~ConvEltwiseAddBNFusePass() {}
protected: protected:
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const; std::unique_ptr<ir::Graph> ApplyImpl(
std::unique_ptr<ir::Graph> graph) const override;
const std::string name_scope_{"conv_eltwiseadd_bn_fuse"}; const std::string name_scope_{"conv_eltwiseadd_bn_fuse"};
}; };
......
...@@ -25,7 +25,8 @@ class ConvElementwiseAdd2ActFusePass : public FusePassBase { ...@@ -25,7 +25,8 @@ class ConvElementwiseAdd2ActFusePass : public FusePassBase {
virtual ~ConvElementwiseAdd2ActFusePass() {} virtual ~ConvElementwiseAdd2ActFusePass() {}
protected: protected:
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const; std::unique_ptr<ir::Graph> ApplyImpl(
std::unique_ptr<ir::Graph> graph) const override;
}; };
} // namespace ir } // namespace ir
......
...@@ -25,7 +25,8 @@ class ConvElementwiseAddActFusePass : public FusePassBase { ...@@ -25,7 +25,8 @@ class ConvElementwiseAddActFusePass : public FusePassBase {
virtual ~ConvElementwiseAddActFusePass() {} virtual ~ConvElementwiseAddActFusePass() {}
protected: protected:
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const; std::unique_ptr<ir::Graph> ApplyImpl(
std::unique_ptr<ir::Graph> graph) const override;
}; };
} // namespace ir } // namespace ir
......
...@@ -25,7 +25,8 @@ class ConvElementwiseAddFusePass : public FusePassBase { ...@@ -25,7 +25,8 @@ class ConvElementwiseAddFusePass : public FusePassBase {
virtual ~ConvElementwiseAddFusePass() {} virtual ~ConvElementwiseAddFusePass() {}
protected: protected:
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const; std::unique_ptr<ir::Graph> ApplyImpl(
std::unique_ptr<ir::Graph> graph) const override;
}; };
} // namespace ir } // namespace ir
......
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
#pragma once #pragma once
#include <string>
#include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
...@@ -30,7 +32,8 @@ class EmbeddingFCLSTMFusePass : public FusePassBase { ...@@ -30,7 +32,8 @@ class EmbeddingFCLSTMFusePass : public FusePassBase {
virtual ~EmbeddingFCLSTMFusePass() {} virtual ~EmbeddingFCLSTMFusePass() {}
protected: protected:
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const; std::unique_ptr<ir::Graph> ApplyImpl(
std::unique_ptr<ir::Graph> graph) const override;
const std::string name_scope_{"embedding_fc_lstm_fuse"}; const std::string name_scope_{"embedding_fc_lstm_fuse"};
}; };
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
...@@ -29,7 +31,8 @@ class FCFusePass : public FusePassBase { ...@@ -29,7 +31,8 @@ class FCFusePass : public FusePassBase {
virtual ~FCFusePass() {} virtual ~FCFusePass() {}
protected: protected:
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const; std::unique_ptr<ir::Graph> ApplyImpl(
std::unique_ptr<ir::Graph> graph) const override;
}; };
} // namespace ir } // namespace ir
......
...@@ -30,7 +30,8 @@ class FCGRUFusePass : public FusePassBase { ...@@ -30,7 +30,8 @@ class FCGRUFusePass : public FusePassBase {
virtual ~FCGRUFusePass() {} virtual ~FCGRUFusePass() {}
protected: protected:
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const; std::unique_ptr<ir::Graph> ApplyImpl(
std::unique_ptr<ir::Graph> graph) const override;
const std::string name_scope_{"fc_gru_fuse"}; const std::string name_scope_{"fc_gru_fuse"};
}; };
...@@ -41,7 +42,8 @@ class MulGRUFusePass : public FusePassBase { ...@@ -41,7 +42,8 @@ class MulGRUFusePass : public FusePassBase {
virtual ~MulGRUFusePass() {} virtual ~MulGRUFusePass() {}
protected: protected:
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const; std::unique_ptr<ir::Graph> ApplyImpl(
std::unique_ptr<ir::Graph> graph) const override;
const std::string name_scope_{"fc_nobias_gru_fuse"}; const std::string name_scope_{"fc_nobias_gru_fuse"};
}; };
......
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
#pragma once #pragma once
#include <string>
#include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
...@@ -30,7 +32,8 @@ class FCLstmFusePass : public FusePassBase { ...@@ -30,7 +32,8 @@ class FCLstmFusePass : public FusePassBase {
virtual ~FCLstmFusePass() {} virtual ~FCLstmFusePass() {}
protected: protected:
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const; std::unique_ptr<ir::Graph> ApplyImpl(
std::unique_ptr<ir::Graph> graph) const override;
const std::string name_scope_{"fc_lstm_fuse"}; const std::string name_scope_{"fc_lstm_fuse"};
}; };
...@@ -40,7 +43,8 @@ class MulLstmFusePass : public FusePassBase { ...@@ -40,7 +43,8 @@ class MulLstmFusePass : public FusePassBase {
virtual ~MulLstmFusePass() {} virtual ~MulLstmFusePass() {}
protected: protected:
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const; std::unique_ptr<ir::Graph> ApplyImpl(
std::unique_ptr<ir::Graph> graph) const override;
const std::string name_scope_{"fc_nobias_lstm_fuse"}; const std::string name_scope_{"fc_nobias_lstm_fuse"};
}; };
......
...@@ -32,7 +32,8 @@ class FuseElewiseAddActPass : public FusePassBase { ...@@ -32,7 +32,8 @@ class FuseElewiseAddActPass : public FusePassBase {
virtual ~FuseElewiseAddActPass() {} virtual ~FuseElewiseAddActPass() {}
protected: protected:
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const; std::unique_ptr<ir::Graph> ApplyImpl(
std::unique_ptr<ir::Graph> graph) const override;
std::unique_ptr<ir::Graph> FuseElewiseAddAct( std::unique_ptr<ir::Graph> FuseElewiseAddAct(
std::unique_ptr<ir::Graph> graph, std::unique_ptr<ir::Graph> graph,
......
...@@ -111,7 +111,7 @@ std::unique_ptr<ir::Graph> FuseReluDepthwiseConvPass::FuseReluDepthwiseConv( ...@@ -111,7 +111,7 @@ std::unique_ptr<ir::Graph> FuseReluDepthwiseConvPass::FuseReluDepthwiseConv(
xg_var = subgraph.at(xg)->Var(); xg_var = subgraph.at(xg)->Var();
} }
PADDLE_ENFORCE_EQ(layer_op->Input("Input").size(), 1); PADDLE_ENFORCE_EQ(layer_op->Input("Input").size(), 1UL);
PADDLE_ENFORCE_EQ(layer_op->Input("Input")[0], y_var->Name()); PADDLE_ENFORCE_EQ(layer_op->Input("Input")[0], y_var->Name());
layer_op->SetInput("Input", {x_var->Name()}); layer_op->SetInput("Input", {x_var->Name()});
subgraph.at(layer)->inputs.push_back(subgraph.at(x)); subgraph.at(layer)->inputs.push_back(subgraph.at(x));
...@@ -119,13 +119,13 @@ std::unique_ptr<ir::Graph> FuseReluDepthwiseConvPass::FuseReluDepthwiseConv( ...@@ -119,13 +119,13 @@ std::unique_ptr<ir::Graph> FuseReluDepthwiseConvPass::FuseReluDepthwiseConv(
VLOG(4) << "replace " << y_var->Name() << " -> " << x_var->Name(); VLOG(4) << "replace " << y_var->Name() << " -> " << x_var->Name();
if (!only_forward) { if (!only_forward) {
PADDLE_ENFORCE_EQ(layer_g_op->Input("Input").size(), 1); PADDLE_ENFORCE_EQ(layer_g_op->Input("Input").size(), 1UL);
PADDLE_ENFORCE_EQ(layer_g_op->Input("Input")[0], y_var->Name()); PADDLE_ENFORCE_EQ(layer_g_op->Input("Input")[0], y_var->Name());
layer_g_op->SetInput("Input", {x_var->Name()}); layer_g_op->SetInput("Input", {x_var->Name()});
subgraph.at(layer_g)->inputs.push_back(subgraph.at(x)); subgraph.at(layer_g)->inputs.push_back(subgraph.at(x));
subgraph.at(x)->outputs.push_back(subgraph.at(layer_g)); subgraph.at(x)->outputs.push_back(subgraph.at(layer_g));
PADDLE_ENFORCE_EQ(layer_g_op->Output(GradVarName("Input")).size(), 1); PADDLE_ENFORCE_EQ(layer_g_op->Output(GradVarName("Input")).size(), 1UL);
PADDLE_ENFORCE_EQ(layer_g_op->Output(GradVarName("Input"))[0], PADDLE_ENFORCE_EQ(layer_g_op->Output(GradVarName("Input"))[0],
yg_var->Name()); yg_var->Name());
layer_g_op->SetOutput(GradVarName("Input"), {xg_var->Name()}); layer_g_op->SetOutput(GradVarName("Input"), {xg_var->Name()});
......
...@@ -32,7 +32,8 @@ class FuseReluDepthwiseConvPass : public FusePassBase { ...@@ -32,7 +32,8 @@ class FuseReluDepthwiseConvPass : public FusePassBase {
virtual ~FuseReluDepthwiseConvPass() {} virtual ~FuseReluDepthwiseConvPass() {}
protected: protected:
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const; std::unique_ptr<ir::Graph> ApplyImpl(
std::unique_ptr<ir::Graph> graph) const override;
std::unique_ptr<ir::Graph> FuseReluDepthwiseConv( std::unique_ptr<ir::Graph> FuseReluDepthwiseConv(
std::unique_ptr<ir::Graph> graph, bool only_forward) const; std::unique_ptr<ir::Graph> graph, bool only_forward) const;
}; };
......
...@@ -26,6 +26,14 @@ limitations under the License. */ ...@@ -26,6 +26,14 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
namespace details {
// This attr is not recommended, because the graph should not dependence
// the program once it is built.
constexpr char kAllOpDescs[] = "all_op_descs";
} // namespace details
namespace ir { namespace ir {
/* /*
...@@ -168,10 +176,13 @@ class Graph { ...@@ -168,10 +176,13 @@ class Graph {
return ret; return ret;
} }
void RemoveNode(ir::Node *node) { std::unique_ptr<ir::Node> RemoveNode(ir::Node *node) {
PADDLE_ENFORCE(node_set_.find(node) != node_set_.end()); PADDLE_ENFORCE(node_set_.find(node) != node_set_.end());
node_set_.erase(node); std::unique_ptr<ir::Node> ret;
ret.reset(nodes_.at(node).release());
nodes_.erase(node); nodes_.erase(node);
node_set_.erase(node);
return ret;
} }
// NOTE low performance, but simple and secure. // NOTE low performance, but simple and secure.
...@@ -184,13 +195,6 @@ class Graph { ...@@ -184,13 +195,6 @@ class Graph {
return nullptr; return nullptr;
} }
void ResolveHazard(
const std::map<std::string, std::vector<ir::Node *>> &var_nodes);
private:
std::map<std::string, std::vector<ir::Node *>> InitFromProgram(
const ProgramDesc &program);
// This method takes ownership of `node`. // This method takes ownership of `node`.
ir::Node *AddNode(ir::Node *node) { ir::Node *AddNode(ir::Node *node) {
PADDLE_ENFORCE(node_set_.find(node) == node_set_.end()); PADDLE_ENFORCE(node_set_.find(node) == node_set_.end());
...@@ -199,6 +203,13 @@ class Graph { ...@@ -199,6 +203,13 @@ class Graph {
return node; return node;
} }
void ResolveHazard(
const std::map<std::string, std::vector<ir::Node *>> &var_nodes);
private:
std::map<std::string, std::vector<ir::Node *>> InitFromProgram(
const ProgramDesc &program);
// NOTE: program_ shouldn't be exposed to user. // NOTE: program_ shouldn't be exposed to user.
const ProgramDesc program_; const ProgramDesc program_;
std::map<std::string, boost::any> attrs_; std::map<std::string, boost::any> attrs_;
......
...@@ -38,7 +38,7 @@ size_t PDPattern::id_ = 0UL; ...@@ -38,7 +38,7 @@ size_t PDPattern::id_ = 0UL;
PDNode *PDPattern::NewNode(const std::string &name) { PDNode *PDPattern::NewNode(const std::string &name) {
if (!name.empty()) { if (!name.empty()) {
PADDLE_ENFORCE_EQ(node_map_.count(name), 0, PADDLE_ENFORCE_EQ(node_map_.count(name), 0UL,
"PDNode's name should be unique, get duplicate [%s]", "PDNode's name should be unique, get duplicate [%s]",
name); name);
} }
...@@ -51,7 +51,7 @@ PDNode *PDPattern::NewNode(const std::string &name) { ...@@ -51,7 +51,7 @@ PDNode *PDPattern::NewNode(const std::string &name) {
PDNode *PDPattern::NewNode(PDNode::teller_t &&teller, const std::string &name) { PDNode *PDPattern::NewNode(PDNode::teller_t &&teller, const std::string &name) {
if (!name.empty()) { if (!name.empty()) {
PADDLE_ENFORCE_EQ(node_map_.count(name), 0, PADDLE_ENFORCE_EQ(node_map_.count(name), 0UL,
"PDNode's name should be unique, get duplicate [%s]", "PDNode's name should be unique, get duplicate [%s]",
name); name);
} }
......
...@@ -22,7 +22,8 @@ namespace ir { ...@@ -22,7 +22,8 @@ namespace ir {
class IdentityScaleOpCleanPass : public FusePassBase { class IdentityScaleOpCleanPass : public FusePassBase {
protected: protected:
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const; std::unique_ptr<ir::Graph> ApplyImpl(
std::unique_ptr<ir::Graph> graph) const override;
private: private:
virtual ~IdentityScaleOpCleanPass() = default; virtual ~IdentityScaleOpCleanPass() = default;
......
...@@ -60,7 +60,8 @@ class LockFreeOptimizePass : public Pass { ...@@ -60,7 +60,8 @@ class LockFreeOptimizePass : public Pass {
virtual ~LockFreeOptimizePass() {} virtual ~LockFreeOptimizePass() {}
protected: protected:
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const; std::unique_ptr<ir::Graph> ApplyImpl(
std::unique_ptr<ir::Graph> graph) const override;
private: private:
// Create a new sgd node via current optimizer node // Create a new sgd node via current optimizer node
......
...@@ -29,7 +29,8 @@ class ConvBiasFusePass : public FusePassBase { ...@@ -29,7 +29,8 @@ class ConvBiasFusePass : public FusePassBase {
virtual bool is_conv3d() const { return false; } virtual bool is_conv3d() const { return false; }
protected: protected:
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const; std::unique_ptr<ir::Graph> ApplyImpl(
std::unique_ptr<ir::Graph> graph) const override;
const std::string name_scope_{"conv_bias_mkldnn_fuse"}; const std::string name_scope_{"conv_bias_mkldnn_fuse"};
}; };
/* /*
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.h"
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/platform/place.h"
#include <gtest/gtest.h>
#include "paddle/fluid/framework/op_proto_maker.h"
namespace paddle {
namespace framework {
namespace ir {
void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
const std::vector<std::string>& inputs,
const std::vector<std::string>& outputs) {
auto* op = prog->MutableBlock(0)->AppendOp();
op->SetType(type);
if (type == "conv2d") {
op->SetAttr("use_mkldnn", true);
op->SetAttr("name", name);
op->SetInput("Input", {inputs[0]});
op->SetInput("Filter", {inputs[1]});
if (inputs.size() > 2)
op->SetInput("Bias", {inputs[2]});
else
op->SetInput("Bias", {});
} else if (type == "elementwise_add") {
op->SetAttr("use_mkldnn", true);
op->SetInput("X", {inputs[0]});
op->SetInput("Y", {inputs[1]});
}
op->SetOutput("Out", outputs);
op->SetAttr(OpProtoAndCheckerMaker::OpRoleAttrName(),
static_cast<int>(OpRole::kForward));
}
// (c, weights)->conv->f
// (f)->elementwise_add->g
ProgramDesc BuildProgramDesc(bool convWithExistingBias) {
ProgramDesc prog;
std::vector<std::string> nodes{"c", "weights", "f", "eltwise_bias", "g"};
if (convWithExistingBias) nodes.push_back("conv_bias");
for (auto& v : nodes) {
auto* var = prog.MutableBlock(0)->Var(v);
var->SetType(proto::VarType::LOD_TENSOR);
if (v == "weights" || v == "conv_bias" || v == "eltwise_bias") {
var->SetPersistable(true);
}
}
// conv+bias, both with MKL-DNN
if (convWithExistingBias) {
SetOp(&prog, "conv2d", "conv",
std::vector<std::string>({"c", "weights", "conv_bias"}),
std::vector<std::string>({"f"}));
} else {
SetOp(&prog, "conv2d", "conv", std::vector<std::string>({"c", "weights"}),
std::vector<std::string>({"f"}));
}
SetOp(&prog, "elementwise_add", "eltwise",
std::vector<std::string>({"f", "eltwise_bias"}),
std::vector<std::string>({"g"}));
return prog;
}
void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
const char* var_name) {
auto x = scope->Var(var_name);
auto tensor = x->GetMutable<LoDTensor>();
tensor->mutable_data(place, proto::VarType::FP32,
::paddle::memory::Allocator::kDefault, 1);
}
void MainTest(bool convWithExistingBias) {
auto prog = BuildProgramDesc(convWithExistingBias);
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
auto place = paddle::platform::CPUPlace();
NaiveExecutor exe{place};
Scope scope;
// Init scope, as it is used in pass
exe.CreateVariables(prog, 0, true, &scope);
if (convWithExistingBias) {
InitTensorHolder(&scope, place, "conv_bias");
InitTensorHolder(&scope, place, "eltwise_bias");
}
graph->Set(kParamScopeAttr, new framework::Scope*(&scope));
auto pass = PassRegistry::Instance().Get("conv_bias_mkldnn_fuse_pass");
int original_nodes_num = graph->Nodes().size();
graph = pass->Apply(std::move(graph));
int current_nodes_num = graph->Nodes().size();
// Remove 3 Nodes: Conv, Bias, conv_out
// Add 1 Node: ConvBias
EXPECT_EQ(original_nodes_num - 2, current_nodes_num);
// Assert conv_bias op in newly generated graph
int conv_bias_count = 0;
for (auto* node : graph->Nodes()) {
if (node->IsOp() && node->Op()->Type() == "conv2d") {
auto* op = node->Op();
ASSERT_TRUE(op->HasAttr("use_mkldnn"));
EXPECT_TRUE(boost::get<bool>(op->GetAttr("use_mkldnn")));
// check if "conv" convolution is fused
auto op_name = boost::get<std::string>(op->GetAttr("name"));
if (op_name == "conv") {
auto input_names = op->InputNames();
ASSERT_TRUE(std::find(input_names.begin(), input_names.end(), "Bias") !=
input_names.end());
auto bias = boost::get<std::vector<std::string>>(op->Input("Bias"));
if (bias.size()) {
++conv_bias_count;
}
}
}
}
EXPECT_EQ(conv_bias_count, 1);
}
TEST(ConvBiasFusePass, bias_free_conv) { MainTest(false); }
TEST(ConvBiasFusePass, conv_with_existing_bias) { MainTest(true); }
TEST(ConvBiasFusePass, conv3d) {
Conv3DBiasFusePass pass;
ASSERT_TRUE(pass.is_conv3d());
}
} // namespace ir
} // namespace framework
} // namespace paddle
USE_PASS(conv_bias_mkldnn_fuse_pass);
...@@ -31,7 +31,8 @@ class RepeatedFCReluFusePass : public FusePassBase { ...@@ -31,7 +31,8 @@ class RepeatedFCReluFusePass : public FusePassBase {
virtual ~RepeatedFCReluFusePass() {} virtual ~RepeatedFCReluFusePass() {}
protected: protected:
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const; std::unique_ptr<ir::Graph> ApplyImpl(
std::unique_ptr<ir::Graph> graph) const override;
const std::string name_scope_{"repeated_fc_relu_fuse"}; const std::string name_scope_{"repeated_fc_relu_fuse"};
}; };
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/pass.h" #include "paddle/fluid/framework/ir/pass.h"
...@@ -25,7 +27,8 @@ class SeqConcatFcFusePass : public FusePassBase { ...@@ -25,7 +27,8 @@ class SeqConcatFcFusePass : public FusePassBase {
virtual ~SeqConcatFcFusePass() {} virtual ~SeqConcatFcFusePass() {}
protected: protected:
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const; std::unique_ptr<ir::Graph> ApplyImpl(
std::unique_ptr<ir::Graph> graph) const override;
}; };
} // namespace ir } // namespace ir
......
...@@ -28,7 +28,8 @@ class SeqConvEltAddReluFusePass : public FusePassBase { ...@@ -28,7 +28,8 @@ class SeqConvEltAddReluFusePass : public FusePassBase {
virtual ~SeqConvEltAddReluFusePass() {} virtual ~SeqConvEltAddReluFusePass() {}
protected: protected:
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const; std::unique_ptr<ir::Graph> ApplyImpl(
std::unique_ptr<ir::Graph> graph) const override;
const std::string name_scope_{"seqconv_eltadd_relu_fuse"}; const std::string name_scope_{"seqconv_eltadd_relu_fuse"};
}; };
......
...@@ -42,7 +42,8 @@ class SeqPoolConcatFusePass : public FusePassBase { ...@@ -42,7 +42,8 @@ class SeqPoolConcatFusePass : public FusePassBase {
virtual ~SeqPoolConcatFusePass() {} virtual ~SeqPoolConcatFusePass() {}
protected: protected:
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const; std::unique_ptr<ir::Graph> ApplyImpl(
std::unique_ptr<ir::Graph> graph) const override;
const std::string name_scope_{"seqpool_concat_fuse"}; const std::string name_scope_{"seqpool_concat_fuse"};
}; };
......
...@@ -31,7 +31,8 @@ class SquaredMatSubFusePass : public FusePassBase { ...@@ -31,7 +31,8 @@ class SquaredMatSubFusePass : public FusePassBase {
virtual ~SquaredMatSubFusePass() {} virtual ~SquaredMatSubFusePass() {}
protected: protected:
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const; std::unique_ptr<ir::Graph> ApplyImpl(
std::unique_ptr<ir::Graph> graph) const override;
const std::string name_scope_{"squared_mat_sub_fuse"}; const std::string name_scope_{"squared_mat_sub_fuse"};
}; };
......
...@@ -30,7 +30,8 @@ class TransposeFlattenConcatFusePass : public FusePassBase { ...@@ -30,7 +30,8 @@ class TransposeFlattenConcatFusePass : public FusePassBase {
virtual ~TransposeFlattenConcatFusePass() {} virtual ~TransposeFlattenConcatFusePass() {}
protected: protected:
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const; std::unique_ptr<ir::Graph> ApplyImpl(
std::unique_ptr<ir::Graph> graph) const override;
}; };
} // namespace ir } // namespace ir
......
...@@ -27,7 +27,7 @@ enum class OpRole { ...@@ -27,7 +27,7 @@ enum class OpRole {
kForward = 0x0000, kForward = 0x0000,
kBackward = 0x0001, kBackward = 0x0001,
kOptimize = 0x0002, kOptimize = 0x0002,
// RPC role is for send/recv releated op // RPC role is for send/recv related op
kRPC = 0x0004, kRPC = 0x0004,
// Dist role is for split_byref/split_selected_rows/concat // Dist role is for split_byref/split_selected_rows/concat
// used for distributed training. // used for distributed training.
......
...@@ -177,9 +177,7 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) { ...@@ -177,9 +177,7 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
// in concurrency scenerio. Here use an `if` to fix this issue. // in concurrency scenerio. Here use an `if` to fix this issue.
// Please not remove the `if`, ask @Superjomn if there are any concern. // Please not remove the `if`, ask @Superjomn if there are any concern.
if (platform::IsProfileEnabled()) { if (platform::IsProfileEnabled()) {
platform::DeviceContextPool& pool = platform::RecordEvent record_event(Type());
platform::DeviceContextPool::Instance();
platform::RecordEvent record_event(Type(), pool.Get(place));
RunImpl(scope, place); RunImpl(scope, place);
} else { } else {
RunImpl(scope, place); RunImpl(scope, place);
......
...@@ -21,6 +21,7 @@ limitations under the License. */ ...@@ -21,6 +21,7 @@ limitations under the License. */
#include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/details/all_reduce_deps_pass.h"
#include "paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h" #include "paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h"
#include "paddle/fluid/framework/details/multi_devices_helper.h" #include "paddle/fluid/framework/details/multi_devices_helper.h"
#include "paddle/fluid/framework/details/parallel_ssa_graph_executor.h" #include "paddle/fluid/framework/details/parallel_ssa_graph_executor.h"
...@@ -193,7 +194,6 @@ ParallelExecutor::ParallelExecutor( ...@@ -193,7 +194,6 @@ ParallelExecutor::ParallelExecutor(
member_->use_all_reduce_ = member_->use_all_reduce_ =
build_strategy.reduce_ == BuildStrategy::ReduceStrategy::kAllReduce; build_strategy.reduce_ == BuildStrategy::ReduceStrategy::kAllReduce;
member_->nranks_ = build_strategy.num_trainers_ * places.size(); member_->nranks_ = build_strategy.num_trainers_ * places.size();
if (!member_->use_all_reduce_) { if (!member_->use_all_reduce_) {
PADDLE_ENFORCE(places.size() > 1, PADDLE_ENFORCE(places.size() > 1,
"If you set build_strategy.reduce with 'Reduce'," "If you set build_strategy.reduce with 'Reduce',"
...@@ -221,9 +221,10 @@ ParallelExecutor::ParallelExecutor( ...@@ -221,9 +221,10 @@ ParallelExecutor::ParallelExecutor(
// choice the execution strategy. // choice the execution strategy.
build_strategy.enable_parallel_graph_ = build_strategy.enable_parallel_graph_ =
EnableParallelGraphExecution(main_program, exec_strategy, build_strategy); EnableParallelGraphExecution(main_program, exec_strategy, build_strategy);
if (build_strategy.enable_parallel_graph_)
VLOG(1) << "Enable ParallelGraph Execution: " VLOG(0) << "The Executor would execute the graph by ParallelGraph "
<< build_strategy.enable_parallel_graph_; "Execution which can get better performance,"
<< "you can force it off by env FLAGS_enable_parallel_graph=0";
if (member_->use_cuda_) { if (member_->use_cuda_) {
// Bcast Parameters to all GPUs // Bcast Parameters to all GPUs
...@@ -257,60 +258,44 @@ ParallelExecutor::ParallelExecutor( ...@@ -257,60 +258,44 @@ ParallelExecutor::ParallelExecutor(
// Step 2. Convert main_program to SSA form and dependency graph. Also, insert // Step 2. Convert main_program to SSA form and dependency graph. Also, insert
// ncclOp // ncclOp
std::vector<std::unique_ptr<ir::Graph>> graphs; std::unique_ptr<ir::Graph> graph;
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
if (build_strategy.enable_parallel_graph_) { graph = build_strategy.Apply(main_program, member_->places_, loss_var_name,
for (size_t i = 0; i < member_->places_.size(); ++i) { member_->local_scopes_, member_->nranks_,
std::unique_ptr<ir::Graph> graph = build_strategy.Apply( member_->use_cuda_, member_->nccl_ctxs_.get());
main_program, {member_->places_[i]}, loss_var_name,
{member_->local_scopes_[i]}, member_->nranks_, member_->use_cuda_,
member_->nccl_ctxs_.get());
graphs.push_back(std::move(graph));
}
} else {
std::unique_ptr<ir::Graph> graph = build_strategy.Apply(
main_program, member_->places_, loss_var_name, member_->local_scopes_,
member_->nranks_, member_->use_cuda_, member_->nccl_ctxs_.get());
graphs.push_back(std::move(graph));
}
#else #else
std::unique_ptr<ir::Graph> graph = build_strategy.Apply( graph = build_strategy.Apply(main_program, member_->places_, loss_var_name,
main_program, member_->places_, loss_var_name, member_->local_scopes_, member_->local_scopes_, member_->nranks_,
member_->nranks_, member_->use_cuda_); member_->use_cuda_);
graphs.push_back(std::move(graph));
#endif #endif
auto max_memory_size = GetEagerDeletionThreshold(); auto max_memory_size = GetEagerDeletionThreshold();
VLOG(10) << "Eager Deletion Threshold " VLOG(10) << "Eager Deletion Threshold "
<< static_cast<float>(max_memory_size) / (1 << 30); << static_cast<float>(max_memory_size) / (1 << 30);
if (max_memory_size >= 0) { if (max_memory_size >= 0) {
for (size_t i = 0; i < graphs.size(); ++i) { graph = member_->PrepareGCAndRefCnts(std::move(graph),
graphs[i] = member_->PrepareGCAndRefCnts( static_cast<size_t>(max_memory_size));
std::move(graphs[i]), static_cast<size_t>(max_memory_size));
}
} }
// Step 3. Create vars in each scope. Passes may also create new vars. // Step 3. Create vars in each scope. Passes may also create new vars.
// skip control vars and empty vars // skip control vars and empty vars
std::vector<details::VariableInfo> var_infos; std::vector<details::VariableInfo> var_infos;
for (auto &graph : graphs) { for (auto &node : graph->Nodes()) {
for (auto &node : graph->Nodes()) { if (node->IsVar() && !node->IsCtrlVar() && node->Var()) {
if (node->IsVar() && !node->IsCtrlVar() && node->Var()) { var_infos.emplace_back();
var_infos.emplace_back(); var_infos.back().name_ = node->Var()->Name();
var_infos.back().name_ = node->Var()->Name(); var_infos.back().type_ = node->Var()->GetType();
var_infos.back().type_ = node->Var()->GetType(); var_infos.back().persistable_ = node->Var()->Persistable();
var_infos.back().persistable_ = node->Var()->Persistable();
}
} }
} }
// If the loss_var_name is given, the number of graph should be only one. // If the loss_var_name is given, the number of graph should be only one.
if (loss_var_name.size()) { if (loss_var_name.size()) {
size_t graph_num = ir::GraphNum(*graphs[0]); size_t graph_num = ir::GraphNum(*graph);
if (graph_num > 1) { if (graph_num > 1) {
LOG(WARNING) LOG(WARNING)
<< "The number of graph should be only one, " << "The number of graph should be only one, "
"but the current graph has " "but the current graph has "
<< ir::GraphNum(*graphs[0]) << ir::GraphNum(*graph)
<< " sub_graphs. If you want to see the nodes of the " << " sub_graphs. If you want to see the nodes of the "
"sub_graphs, you should use 'FLAGS_print_sub_graph_dir' " "sub_graphs, you should use 'FLAGS_print_sub_graph_dir' "
"to specify the output dir. NOTES: if you not do training, " "to specify the output dir. NOTES: if you not do training, "
...@@ -319,18 +304,25 @@ ParallelExecutor::ParallelExecutor( ...@@ -319,18 +304,25 @@ ParallelExecutor::ParallelExecutor(
} }
if (build_strategy.enable_parallel_graph_) { if (build_strategy.enable_parallel_graph_) {
#ifdef PADDLE_WITH_CUDA
// TODO(Yancey1989): Remove passing in the main_program when
// allreduce_seq_pass doesn't need it as the attr.
member_->executor_.reset(new details::ParallelSSAGraphExecutor( member_->executor_.reset(new details::ParallelSSAGraphExecutor(
exec_strategy, member_->local_scopes_, member_->places_, exec_strategy, member_->local_scopes_, member_->places_, main_program,
std::move(graphs))); std::move(graph)));
#else
PADDLE_THROW(
"Paddle should be compiled with CUDA for ParallelGraph Execution.");
#endif
} else { } else {
if (exec_strategy.type_ == ExecutionStrategy::kDefault) { if (exec_strategy.type_ == ExecutionStrategy::kDefault) {
member_->executor_.reset(new details::ThreadedSSAGraphExecutor( member_->executor_.reset(new details::ThreadedSSAGraphExecutor(
exec_strategy, member_->local_scopes_, member_->places_, exec_strategy, member_->local_scopes_, member_->places_,
std::move(graphs[0]))); std::move(graph)));
} else { } else {
member_->executor_.reset(new details::FastThreadedSSAGraphExecutor( member_->executor_.reset(new details::FastThreadedSSAGraphExecutor(
exec_strategy, member_->local_scopes_, member_->places_, exec_strategy, member_->local_scopes_, member_->places_,
std::move(graphs[0]))); std::move(graph)));
} }
} }
...@@ -482,11 +474,10 @@ bool ParallelExecutor::EnableParallelGraphExecution( ...@@ -482,11 +474,10 @@ bool ParallelExecutor::EnableParallelGraphExecution(
} }
if (!member_->use_all_reduce_ || !member_->use_cuda_) if (!member_->use_all_reduce_ || !member_->use_cuda_)
enable_parallel_graph = false;
if (build_strategy.enable_sequential_execution_ || if (build_strategy.enable_sequential_execution_ ||
exec_strategy.type_ == ExecutionStrategy::ExecutorType::kExperimental) exec_strategy.type_ == ExecutionStrategy::ExecutorType::kExperimental)
enable_parallel_graph = false; enable_parallel_graph = false;
return enable_parallel_graph; return enable_parallel_graph;
} }
......
...@@ -89,7 +89,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { ...@@ -89,7 +89,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(params_file_); CP_MEMBER(params_file_);
CP_MEMBER(model_from_memory_); // the memory model reuses prog_file_ and CP_MEMBER(model_from_memory_); // the memory model reuses prog_file_ and
// params_file_ fields. // params_file_ fields.
// Gpu releated. // Gpu related.
CP_MEMBER(use_gpu_); CP_MEMBER(use_gpu_);
CP_MEMBER(device_id_); CP_MEMBER(device_id_);
CP_MEMBER(memory_pool_init_size_mb_); CP_MEMBER(memory_pool_init_size_mb_);
...@@ -97,13 +97,13 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { ...@@ -97,13 +97,13 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(enable_memory_optim_); CP_MEMBER(enable_memory_optim_);
CP_MEMBER(static_memory_optim_); CP_MEMBER(static_memory_optim_);
CP_MEMBER(static_memory_optim_force_update_); CP_MEMBER(static_memory_optim_force_update_);
// TensorRT releated. // TensorRT related.
CP_MEMBER(use_tensorrt_); CP_MEMBER(use_tensorrt_);
CP_MEMBER(tensorrt_workspace_size_); CP_MEMBER(tensorrt_workspace_size_);
CP_MEMBER(tensorrt_max_batchsize_); CP_MEMBER(tensorrt_max_batchsize_);
CP_MEMBER(tensorrt_min_subgraph_size_); CP_MEMBER(tensorrt_min_subgraph_size_);
CP_MEMBER(tensorrt_precision_mode_); CP_MEMBER(tensorrt_precision_mode_);
// MKLDNN releated. // MKLDNN related.
CP_MEMBER(use_mkldnn_); CP_MEMBER(use_mkldnn_);
CP_MEMBER(mkldnn_enabled_op_types_); CP_MEMBER(mkldnn_enabled_op_types_);
......
...@@ -392,7 +392,7 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor< ...@@ -392,7 +392,7 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig &config) { AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig &config) {
VLOG(3) << "create AnalysisConfig"; VLOG(3) << "create AnalysisConfig";
if (config.use_gpu()) { if (config.use_gpu()) {
// 1. GPU memeroy // 1. GPU memory
PADDLE_ENFORCE_GT(config.memory_pool_init_size_mb(), 0.f); PADDLE_ENFORCE_GT(config.memory_pool_init_size_mb(), 0.f);
PADDLE_ENFORCE_GE(config.gpu_device_id(), 0, "Invalid device id %d", PADDLE_ENFORCE_GE(config.gpu_device_id(), 0, "Invalid device id %d",
config.gpu_device_id()); config.gpu_device_id());
...@@ -726,7 +726,7 @@ bool AnalysisPredictor::need_collect_var_shapes_for_memory_optim() { ...@@ -726,7 +726,7 @@ bool AnalysisPredictor::need_collect_var_shapes_for_memory_optim() {
return need; return need;
} }
std::string AnalysisPredictor::GetSeriazlizedProgram() const { std::string AnalysisPredictor::GetSerializedProgram() const {
return inference_program_->Proto()->SerializeAsString(); return inference_program_->Proto()->SerializeAsString();
} }
......
...@@ -74,7 +74,7 @@ class AnalysisPredictor : public PaddlePredictor { ...@@ -74,7 +74,7 @@ class AnalysisPredictor : public PaddlePredictor {
void SetMkldnnThreadID(int tid); void SetMkldnnThreadID(int tid);
std::string GetSeriazlizedProgram() const override; std::string GetSerializedProgram() const override;
protected: protected:
// For memory optimization. // For memory optimization.
......
...@@ -214,8 +214,8 @@ TEST(AnalysisPredictor, memory_optim) { ...@@ -214,8 +214,8 @@ TEST(AnalysisPredictor, memory_optim) {
{ {
// The first predictor help to cache the memory optimize strategy. // The first predictor help to cache the memory optimize strategy.
auto predictor = CreatePaddlePredictor<AnalysisConfig>(config); auto predictor = CreatePaddlePredictor<AnalysisConfig>(config);
LOG(INFO) << "serialized program: " << predictor->GetSeriazlizedProgram(); LOG(INFO) << "serialized program: " << predictor->GetSerializedProgram();
ASSERT_FALSE(predictor->GetSeriazlizedProgram().empty()); ASSERT_FALSE(predictor->GetSerializedProgram().empty());
// Run several times to check the parameters are not reused by mistake. // Run several times to check the parameters are not reused by mistake.
for (int i = 0; i < 5; i++) { for (int i = 0; i < 5; i++) {
......
...@@ -92,7 +92,7 @@ void PaddleBuf::Reset(void *data, size_t length) { ...@@ -92,7 +92,7 @@ void PaddleBuf::Reset(void *data, size_t length) {
void PaddleBuf::Free() { void PaddleBuf::Free() {
if (memory_owned_ && data_) { if (memory_owned_ && data_) {
PADDLE_ENFORCE_GT(length_, 0); PADDLE_ENFORCE_GT(length_, 0UL);
free(static_cast<char *>(data_)); free(static_cast<char *>(data_));
data_ = nullptr; data_ = nullptr;
length_ = 0; length_ = 0;
......
...@@ -290,7 +290,7 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor< ...@@ -290,7 +290,7 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
NativeConfig, PaddleEngineKind::kNative>(const NativeConfig &config) { NativeConfig, PaddleEngineKind::kNative>(const NativeConfig &config) {
VLOG(3) << "create NativePaddlePredictor"; VLOG(3) << "create NativePaddlePredictor";
if (config.use_gpu) { if (config.use_gpu) {
// 1. GPU memeroy // 1. GPU memory
PADDLE_ENFORCE_GE( PADDLE_ENFORCE_GE(
config.fraction_of_gpu_memory, 0.f, config.fraction_of_gpu_memory, 0.f,
"fraction_of_gpu_memory in the config should be set to range (0., 1.]"); "fraction_of_gpu_memory in the config should be set to range (0., 1.]");
......
...@@ -212,12 +212,12 @@ struct AnalysisConfig { ...@@ -212,12 +212,12 @@ struct AnalysisConfig {
std::string prog_file_; std::string prog_file_;
std::string params_file_; std::string params_file_;
// GPU releated. // GPU related.
bool use_gpu_{false}; bool use_gpu_{false};
int device_id_{0}; int device_id_{0};
uint64_t memory_pool_init_size_mb_{100}; // initial size is 100MB. uint64_t memory_pool_init_size_mb_{100}; // initial size is 100MB.
// TensorRT releated. // TensorRT related.
bool use_tensorrt_{false}; bool use_tensorrt_{false};
// For workspace_size, refer it from here: // For workspace_size, refer it from here:
// https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#troubleshooting // https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#troubleshooting
......
...@@ -248,7 +248,7 @@ class PaddlePredictor { ...@@ -248,7 +248,7 @@ class PaddlePredictor {
/** \brief Get the serialized model program that executes in inference phase. /** \brief Get the serialized model program that executes in inference phase.
* Its data type is ProgramDesc, which is a protobuf message. * Its data type is ProgramDesc, which is a protobuf message.
*/ */
virtual std::string GetSeriazlizedProgram() const { virtual std::string GetSerializedProgram() const {
assert(false); // Force raise error. assert(false); // Force raise error.
return "NotImplemented"; return "NotImplemented";
} }
......
...@@ -60,10 +60,13 @@ set(RNN2_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/rnn2") ...@@ -60,10 +60,13 @@ set(RNN2_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/rnn2")
download_model_and_data(${RNN2_INSTALL_DIR} "rnn2_model.tar.gz" "rnn2_data.txt.tar.gz") download_model_and_data(${RNN2_INSTALL_DIR} "rnn2_model.tar.gz" "rnn2_data.txt.tar.gz")
inference_analysis_api_test(test_analyzer_rnn2 ${RNN2_INSTALL_DIR} analyzer_rnn2_tester.cc) inference_analysis_api_test(test_analyzer_rnn2 ${RNN2_INSTALL_DIR} analyzer_rnn2_tester.cc)
# TODO(luotao, Superjom) Disable DAM test, temporarily fix
# https://github.com/PaddlePaddle/Paddle/issues/15032#issuecomment-455990914.
# After inference framework refactor, will reopen it.
# normal DAM # normal DAM
set(DAM_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/dam") set(DAM_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/dam")
download_model_and_data(${DAM_INSTALL_DIR} "DAM_model.tar.gz" "DAM_data.txt.tar.gz") download_model_and_data(${DAM_INSTALL_DIR} "DAM_model.tar.gz" "DAM_data.txt.tar.gz")
inference_analysis_api_test(test_analyzer_dam ${DAM_INSTALL_DIR} analyzer_dam_tester.cc EXTRA_DEPS legacy_allocator SERIAL) #inference_analysis_api_test(test_analyzer_dam ${DAM_INSTALL_DIR} analyzer_dam_tester.cc EXTRA_DEPS legacy_allocator SERIAL)
# small DAM # small DAM
set(DAM_SMALL_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/small_dam") set(DAM_SMALL_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/small_dam")
......
...@@ -56,14 +56,14 @@ struct DataRecord { ...@@ -56,14 +56,14 @@ struct DataRecord {
std::vector<float> slot_data; std::vector<float> slot_data;
split_to_float(data[1], ' ', &slot_data); split_to_float(data[1], ' ', &slot_data);
std::string name = data[0]; std::string name = data[0];
PADDLE_ENFORCE_EQ(slot_data.size() % 11, 0, PADDLE_ENFORCE_EQ(slot_data.size() % 11, 0UL,
"line %d, %s should be divisible", num_lines, name); "line %d, %s should be divisible", num_lines, name);
datasets[name].emplace_back(std::move(slot_data)); datasets[name].emplace_back(std::move(slot_data));
} }
num_samples = num_lines / num_slots; num_samples = num_lines / num_slots;
PADDLE_ENFORCE_EQ(num_samples * num_slots, static_cast<size_t>(num_lines), PADDLE_ENFORCE_EQ(num_samples * num_slots, static_cast<size_t>(num_lines),
"num samples should be divisible"); "num samples should be divisible");
PADDLE_ENFORCE_GT(num_samples, 0); PADDLE_ENFORCE_GT(num_samples, 0UL);
} }
void Prepare(int bs) { void Prepare(int bs) {
......
include(ExternalProject)
set(INFERENCE_URL "http://paddle-inference-dist.cdn.bcebos.com" CACHE STRING "inference download url") set(INFERENCE_URL "http://paddle-inference-dist.cdn.bcebos.com" CACHE STRING "inference download url")
set(INFERENCE_DEMO_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo" CACHE STRING set(INFERENCE_DEMO_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo" CACHE STRING
"A path setting inference demo download directories.") "A path setting inference demo download directories.")
function (inference_download install_dir url filename)
message(STATUS "Download inference test stuff from ${url}/${filename}") function(inference_download INSTALL_DIR URL FILENAME)
file(DOWNLOAD "${url}/${filename}" "${install_dir}/${filename}") message(STATUS "Download inference test stuff from ${URL}/${FILENAME}")
message(STATUS "finish downloading ${filename}") string(REGEX REPLACE "[-%.]" "_" FILENAME_EX ${FILENAME})
ExternalProject_Add(
extern_inference_download_${FILENAME_EX}
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${INSTALL_DIR}
URL ${URL}/${FILENAME}
DOWNLOAD_COMMAND wget -q -O ${INSTALL_DIR}/${FILENAME} ${URL}/${FILENAME}
DOWNLOAD_DIR ${INSTALL_DIR}
DOWNLOAD_NO_PROGRESS 1
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND ""
)
endfunction() endfunction()
function (inference_download_and_uncompress install_dir url filename) function(inference_download_and_uncompress INSTALL_DIR URL FILENAME)
inference_download(${install_dir} ${url} ${filename}) message(STATUS "Download inference test stuff from ${URL}/${FILENAME}")
execute_process( string(REGEX REPLACE "[-%.]" "_" FILENAME_EX ${FILENAME})
COMMAND ${CMAKE_COMMAND} -E tar xzf ${install_dir}/${filename} set(EXTERNAL_PROJECT_NAME "extern_inference_download_${FILENAME_EX}")
WORKING_DIRECTORY ${install_dir} set(UNPACK_DIR "${INSTALL_DIR}/src/${EXTERNAL_PROJECT_NAME}")
) ExternalProject_Add(
${EXTERNAL_PROJECT_NAME}
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${INSTALL_DIR}
URL ${URL}/${FILENAME}
DOWNLOAD_DIR ${INSTALL_DIR}
DOWNLOAD_NO_PROGRESS 1
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND ${CMAKE_COMMAND} -E copy_directory ${UNPACK_DIR} ${INSTALL_DIR}
)
endfunction() endfunction()
set(WORD2VEC_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/word2vec") set(WORD2VEC_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/word2vec")
......
...@@ -171,9 +171,7 @@ void TestInference(const std::string& dirname, ...@@ -171,9 +171,7 @@ void TestInference(const std::string& dirname,
// Enable the profiler // Enable the profiler
paddle::platform::EnableProfiler(state); paddle::platform::EnableProfiler(state);
{ {
paddle::platform::RecordEvent record_event( paddle::platform::RecordEvent record_event("init_program");
"init_program",
paddle::platform::DeviceContextPool::Instance().Get(place));
inference_program = InitProgram(&executor, scope, dirname, is_combined); inference_program = InitProgram(&executor, scope, dirname, is_combined);
} }
...@@ -230,9 +228,7 @@ void TestInference(const std::string& dirname, ...@@ -230,9 +228,7 @@ void TestInference(const std::string& dirname,
// Run repeat times to profile the performance // Run repeat times to profile the performance
for (int i = 0; i < repeat; ++i) { for (int i = 0; i < repeat; ++i) {
paddle::platform::RecordEvent record_event( paddle::platform::RecordEvent record_event("run_inference");
"run_inference",
paddle::platform::DeviceContextPool::Instance().Get(place));
if (PrepareContext) { if (PrepareContext) {
// Note: if you change the inference_program, you need to call // Note: if you change the inference_program, you need to call
......
...@@ -356,7 +356,7 @@ void MemInfo::Minus(const size_t &size) { ...@@ -356,7 +356,7 @@ void MemInfo::Minus(const size_t &size) {
usage_ -= size; usage_ -= size;
} }
uint64_t MemInfo::GetPeakUsage() { return peak_usage_; } uint64_t MemInfo::GetPeakUsage() const { return peak_usage_; }
LegacyMemMonitor::~LegacyMemMonitor() { LegacyMemMonitor::~LegacyMemMonitor() {
for (auto &item : gpu_mem_info_) delete item.second; for (auto &item : gpu_mem_info_) delete item.second;
...@@ -380,10 +380,10 @@ void LegacyMemMonitor::Minus(const int &device, const size_t &size) { ...@@ -380,10 +380,10 @@ void LegacyMemMonitor::Minus(const int &device, const size_t &size) {
gpu_mem_info_[device]->Minus(size); gpu_mem_info_[device]->Minus(size);
} }
uint64_t LegacyMemMonitor::GetMemUsage(const int &device) { uint64_t LegacyMemMonitor::GetMemUsage(const int &device) const {
return gpu_mem_info_.find(device) == gpu_mem_info_.end() return gpu_mem_info_.find(device) == gpu_mem_info_.end()
? 0 ? 0
: gpu_mem_info_[device]->GetPeakUsage(); : gpu_mem_info_.at(device)->GetPeakUsage();
} }
void LegacyMemMonitor::PrintMemUsage() { void LegacyMemMonitor::PrintMemUsage() {
......
...@@ -27,20 +27,20 @@ namespace allocation { ...@@ -27,20 +27,20 @@ namespace allocation {
class MemInfo { class MemInfo {
public: public:
MemInfo() : usage_(0), peak_usage_(0) {} MemInfo() : usage_(0), peak_usage_(0) {}
MemInfo(const MemInfo &) = delete;
MemInfo &operator=(const MemInfo &) = delete;
// return a flag to indicate current operation will create a peak point or not // return a flag to indicate current operation will create a peak point or not
bool Add(const size_t &); bool Add(const size_t &);
void Minus(const size_t &); void Minus(const size_t &);
uint64_t GetPeakUsage(); uint64_t GetPeakUsage() const;
private: private:
/* current memory usage*/ /* current memory usage*/
uint64_t usage_; uint64_t usage_;
uint64_t peak_usage_; uint64_t peak_usage_;
std::mutex mutex_; std::mutex mutex_;
DISABLE_COPY_AND_ASSIGN(MemInfo);
}; };
class LegacyMemMonitor { class LegacyMemMonitor {
...@@ -56,11 +56,11 @@ class LegacyMemMonitor { ...@@ -56,11 +56,11 @@ class LegacyMemMonitor {
void Add(const int &, const size_t &); void Add(const int &, const size_t &);
void Minus(const int &, const size_t &); void Minus(const int &, const size_t &);
uint64_t GetMemUsage(const int &); uint64_t GetMemUsage(const int &) const;
void PrintMemUsage(); void PrintMemUsage();
protected: private:
MemUsage gpu_mem_info_; MemUsage gpu_mem_info_;
}; };
......
...@@ -66,7 +66,7 @@ set(COMMON_OP_DEPS ${OP_HEADER_DEPS}) ...@@ -66,7 +66,7 @@ set(COMMON_OP_DEPS ${OP_HEADER_DEPS})
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} selected_rows_functor selected_rows lod_tensor maxouting unpooling pooling lod_rank_table context_project sequence_pooling executor) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} selected_rows_functor selected_rows lod_tensor maxouting unpooling pooling lod_rank_table context_project sequence_pooling executor)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} dynload_warpctc) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} dynload_warpctc)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence_padding sequence_scale cos_sim_functor memory jit_kernel_helper concat_and_split cross_entropy softmax vol2col im2col sampler tree2col) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence_padding sequence_scale cos_sim_functor memory jit_kernel_helper concat_and_split cross_entropy softmax vol2col im2col sampler sample_prob tree2col)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions beam_search) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions beam_search)
if (WITH_GPU) if (WITH_GPU)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} depthwise_conv prelu) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} depthwise_conv prelu)
...@@ -97,3 +97,4 @@ if (WITH_PYTHON) ...@@ -97,3 +97,4 @@ if (WITH_PYTHON)
endif() endif()
set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library") set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library")
add_subdirectory(benchmark)
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册