diff --git a/AUTHORS.md b/AUTHORS.md
index deafa641203ed9d9bd794fe92e4a91e3aaa03f63..da91933f4697f3b7c08feab20d703e22397b3757 100644
--- a/AUTHORS.md
+++ b/AUTHORS.md
@@ -44,6 +44,7 @@
| qingqing01 | Qing-Qing Dang |
| reyoung | Yang Yu |
| Sand3r- | Michal Gallus |
+| sfraczek | Sylwester Fraczek |
| Superjom | Chun-Wei Yan |
| tensor-tang | Jian Tang |
| tianbingsz | Tian-Bing Xu |
@@ -54,6 +55,7 @@
| wangyang59 | Yang Wang |
| wangzhen-nlp | Zhen Wang |
| wen-bo-yang | Wen-Bo Yang |
+| wojtuss | Wojciech Uss |
| wwhu | Wei-Wei Hu |
| xinghai-sun | Xing-Hai Sun |
| Xreki | Yi-Qun Liu |
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 61f5e63098c40f140774ba6bfd9a2de8d2d67bfb..8e7ffe72b5fb846fb55ab8dc4809d87a40cfe06c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -54,23 +54,12 @@ option(WITH_NGRAPH "Compile PaddlePaddle with nGraph support." OFF)
option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON)
option(WITH_TESTING "Compile PaddlePaddle with unit testing" OFF)
option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON)
-option(WITH_DOUBLE "Compile PaddlePaddle with double precision" OFF)
-option(WITH_RDMA "Compile PaddlePaddle with RDMA support" OFF)
-option(WITH_TIMER "Compile PaddlePaddle with stats timer" OFF)
option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler and gperftools" OFF)
option(WITH_JEMALLOC "Compile PaddlePaddle with jemalloc" OFF)
-option(WITH_DOC "Compile PaddlePaddle with documentation" OFF)
option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF)
option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF)
-option(ON_TRAVIS "Exclude special unit test on Travis CI" OFF)
-option(WITH_FLUID_ONLY "Compile PaddlePaddle fluid only" OFF)
-option(WITH_GOLANG "Compile PaddlePaddle with GOLANG" OFF)
-option(GLIDE_INSTALL "Download and install go dependencies " ON)
option(WITH_DISTRIBUTE "Compile with distributed support" OFF)
option(WITH_PSLIB "Compile with pslib support" OFF)
-option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF)
-option(EIGEN_USE_THREADS "Compile with multi-threaded Eigen" OFF)
-option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF)
option(WITH_CONTRIB "Compile the third-party contributation" OFF)
option(REPLACE_ENFORCE_GLOG "Replace PADDLE_ENFORCE with glog/CHECK for better debug." OFF)
option(WITH_ANAKIN "Compile with Anakin library" OFF)
@@ -105,8 +94,6 @@ endif()
if (WIN32)
set(WITH_DISTRIBUTE OFF CACHE STRING
"Disable DISTRIBUTE when compiling for Windows" FORCE)
- set(WITH_FLUID_ONLY ON CACHE STRING
- "Enable FLUID_ONLY when compiling for Windows" FORCE)
endif()
set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING
@@ -148,7 +135,6 @@ include(external/openblas) # download, build, install openblas
include(external/mkldnn) # download, build, install mkldnn
include(external/ngraph) # download, build, install nGraph
include(external/boost) # download boost
-include(external/any) # download libn::any
include(external/eigen) # download eigen3
include(external/pybind11) # download pybind11
include(external/cares)
@@ -225,7 +211,6 @@ include(generic) # simplify cmake module
include(package) # set paddle packages
include(ccache) # set ccache for compilation
include(util) # set unittest and link libs
-include(rdma) # set rdma libraries
include(version) # set PADDLE_VERSION
include(coveralls) # set code coverage
include(inference_lib) # add paddle fluid inference libraries
@@ -233,38 +218,11 @@ include(inference_lib) # add paddle fluid inference libraries
include_directories("${PADDLE_SOURCE_DIR}")
-set(EXTERNAL_LIBS
- gflags
- glog
- ${CBLAS_LIBRARIES}
- protobuf
- zlib
- ${PYTHON_LIBRARIES}
-)
-
-if(WITH_PSLIB)
- list(APPEND EXTERNAL_LIBS pslib)
- list(APPEND EXTERNAL_LIBS pslib_brpc)
- list(APPEND EXTERNAL_LIBS libmct)
-endif(WITH_PSLIB)
-
if(WITH_AMD_GPU)
find_package(HIP)
include(hip)
endif(WITH_AMD_GPU)
-if(WITH_MKLML)
- list(APPEND EXTERNAL_LIBS ${MKLML_IOMP_LIB})
-endif()
-
-if(WITH_LIBXSMM)
- list(APPEND EXTERNAL_LIBS ${LIBXSMM_LIBS})
-endif()
-
-if(WITH_MKLDNN)
- list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB})
-endif()
-
set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
diff --git a/README.md b/README.md
index 68421cf177f4cd15f8f44e8d00a27cafb5a13b91..5c428e9900762a208eebbfd053ce98663f803345 100644
--- a/README.md
+++ b/README.md
@@ -3,8 +3,8 @@
English | [简体中文](./README_cn.md)
[![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle)
-[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://paddlepaddle.org/documentation/docs/en/1.2/getstarted/index_en.html)
-[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/index.html)
+[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://paddlepaddle.org/documentation/docs/en/1.3/beginners_guide/index_en.html)
+[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://paddlepaddle.org/documentation/docs/zh/1.3/beginners_guide/index.html)
[![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases)
[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
@@ -18,7 +18,7 @@ learning to many products at Baidu.
Our vision is to enable deep learning for everyone via PaddlePaddle.
Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest feature of PaddlePaddle.
-### Latest PaddlePaddle Release: [Fluid 1.2.0](https://github.com/PaddlePaddle/Paddle/tree/release/1.2)
+### Latest PaddlePaddle Release: [Fluid 1.3.0](https://github.com/PaddlePaddle/Paddle/tree/release/1.3)
### Install Latest Stable Release:
```
# Linux CPU
@@ -26,9 +26,9 @@ pip install paddlepaddle
# Linux GPU cuda9cudnn7
pip install paddlepaddle-gpu
# Linux GPU cuda8cudnn7
-pip install paddlepaddle-gpu==1.2.0.post87
+pip install paddlepaddle-gpu==1.3.0.post87
# Linux GPU cuda8cudnn5
-pip install paddlepaddle-gpu==1.2.0.post85
+pip install paddlepaddle-gpu==1.3.0.post85
# For installation on other platform, refer to http://paddlepaddle.org/
```
@@ -75,26 +75,26 @@ pip install paddlepaddle-gpu==1.2.0.post85
## Installation
-It is recommended to read [this doc](http://paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/install/index_cn.html) on our website.
+It is recommended to read [this doc](http://paddlepaddle.org/documentation/docs/en/1.3/beginners_guide/index_en.html) on our website.
## Documentation
-We provide [English](http://paddlepaddle.org/documentation/docs/en/1.2/getstarted/index_en.html) and
-[Chinese](http://paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/index.html) documentation.
+We provide [English](http://paddlepaddle.org/documentation/docs/en/1.3/beginners_guide/index_en.html) and
+[Chinese](http://paddlepaddle.org/documentation/docs/zh/1.3/beginners_guide/index.html) documentation.
- [Deep Learning 101](https://github.com/PaddlePaddle/book)
You might want to start from this online interactive book that can run in a Jupyter Notebook.
-- [Distributed Training](http://paddlepaddle.org/documentation/docs/zh/1.2/user_guides/howto/training/cluster_howto.html)
+- [Distributed Training](http://paddlepaddle.org/documentation/docs/en/1.3/user_guides/howto/training/multi_node_en.html)
You can run distributed training jobs on MPI clusters.
-- [Python API](http://paddlepaddle.org/documentation/docs/zh/1.2/api_cn/index_cn.html)
+- [Python API](http://paddlepaddle.org/documentation/docs/en/1.3/api/index_en.html)
Our new API enables much shorter programs.
-- [How to Contribute](http://paddlepaddle.org/documentation/docs/zh/1.2/advanced_usage/development/contribute_to_paddle/index_cn.html)
+- [How to Contribute](http://paddlepaddle.org/documentation/docs/en/1.3/advanced_usage/development/contribute_to_paddle/index_en.html)
We appreciate your contributions!
diff --git a/README_cn.md b/README_cn.md
index dfb55b17ca4fd05ce5b7b85b2e26e4f7f7229763..b7b0e75e5524cc483a8c203a382e7f339f91694f 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -3,8 +3,8 @@
[English](./README.md) | 简体中文
[![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle)
-[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://paddlepaddle.org/documentation/docs/en/1.2/getstarted/index_en.html)
-[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/index.html)
+[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://paddlepaddle.org/documentation/docs/en/1.3/beginners_guide/index_en.html)
+[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://paddlepaddle.org/documentation/docs/zh/1.3/beginners_guide/index.html)
[![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases)
[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
@@ -16,7 +16,7 @@ PaddlePaddle (PArallel Distributed Deep LEarning) 是一个简单易用、高效
跟进PaddlePaddle最新特性请参考我们的[版本说明](https://github.com/PaddlePaddle/Paddle/releases)
-### PaddlePaddle最新版本: [Fluid 1.2.0](https://github.com/PaddlePaddle/Paddle/tree/release/1.2)
+### PaddlePaddle最新版本: [Fluid 1.3.0](https://github.com/PaddlePaddle/Paddle/tree/release/1.3)
### 安装最新稳定版本:
```
# Linux CPU
@@ -24,9 +24,9 @@ pip install paddlepaddle
# Linux GPU cuda9cudnn7
pip install paddlepaddle-gpu
# Linux GPU cuda8cudnn7
-pip install paddlepaddle-gpu==1.2.0.post87
+pip install paddlepaddle-gpu==1.3.0.post87
# Linux GPU cuda8cudnn5
-pip install paddlepaddle-gpu==1.2.0.post85
+pip install paddlepaddle-gpu==1.3.0.post85
# 其他平台上的安装指引请参考 http://paddlepaddle.org/
```
@@ -57,26 +57,26 @@ pip install paddlepaddle-gpu==1.2.0.post85
## 安装
-推荐阅读官网上的[安装说明](http://paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/install/index_cn.html)
+推荐阅读官网上的[安装说明](http://paddlepaddle.org/documentation/docs/zh/1.3/beginners_guide/install/index_cn.html)
## 文档
-我们提供[英文](http://paddlepaddle.org/documentation/docs/en/1.2/getstarted/index_en.html)和
-[中文](http://paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/index.html) 文档
+我们提供[英文](http://paddlepaddle.org/documentation/docs/en/1.3/beginners_guide/index_en.html)和
+[中文](http://paddlepaddle.org/documentation/docs/zh/1.3/beginners_guide/index.html) 文档
- [深度学习101](https://github.com/PaddlePaddle/book)
或许您想从这个在线交互式书籍开始,可以在Jupyter Notebook中运行
-- [分布式训练](http://paddlepaddle.org/documentation/docs/zh/1.2/user_guides/howto/training/cluster_howto.html)
+- [分布式训练](http://paddlepaddle.org/documentation/docs/zh/1.3/user_guides/howto/training/multi_node.html)
可以在MPI集群上运行分布式训练任务
-- [Python API](http://paddlepaddle.org/documentation/docs/zh/1.2/api_cn/index_cn.html)
+- [Python API](http://paddlepaddle.org/documentation/docs/zh/1.3/api_cn/index_cn.html)
新的API支持代码更少更简洁的程序
-- [贡献方式](http://paddlepaddle.org/documentation/docs/zh/1.2/advanced_usage/development/contribute_to_paddle/index_cn.html)
+- [贡献方式](http://paddlepaddle.org/documentation/docs/zh/1.3/advanced_usage/development/contribute_to_paddle/index_cn.html)
欢迎您的贡献!
diff --git a/benchmark/IntelOptimizedPaddle.md b/benchmark/IntelOptimizedPaddle.md
deleted file mode 100644
index 8b7dc5b7db800896eb4de2054ab5e584aed93999..0000000000000000000000000000000000000000
--- a/benchmark/IntelOptimizedPaddle.md
+++ /dev/null
@@ -1,112 +0,0 @@
-# Benchmark
-
-Machine:
-
-- Server: Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz, 2 Sockets, 20 Cores per socket
-- Laptop: TBD
-
-System: CentOS release 6.3 (Final), Docker 1.12.1.
-
-PaddlePaddle:
-- paddlepaddle/paddle:0.11.0 (for MKLML and MKL-DNN)
- - MKL-DNN tag v0.11
- - MKLML 2018.0.1.20171007
-- paddlepaddle/paddle:0.11.0-openblas (for OpenBLAS)
- - OpenBLAS v0.2.20
-
-On each machine, we will test and compare the performance of training on single node using MKL-DNN / MKLML / OpenBLAS respectively.
-
-## Benchmark Model
-
-### Server
-
-#### Training
-Test on batch size 64, 128, 256 on Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz
-Pay attetion that the speed below includes forward, backward and parameter update time. So we can not directly compare the data with the benchmark of caffe `time` [command](https://github.com/PaddlePaddle/Paddle/blob/develop/benchmark/caffe/image/run.sh#L9), which only contain forward and backward. The updating time of parameter would become very heavy when the weight size are large, especially on alexnet.
-
-Input image size - 3 * 224 * 224, Time: images/second
-
-- VGG-19
-
-| BatchSize | 64 | 128 | 256 |
-|--------------|-------| -----| --------|
-| OpenBLAS | 7.80 | 9.00 | 10.80 |
-| MKLML | 12.12 | 13.70 | 16.18 |
-| MKL-DNN | 28.46 | 29.83 | 30.44 |
-
-
-
- - ResNet-50
-
-| BatchSize | 64 | 128 | 256 |
-|--------------|-------| ------| -------|
-| OpenBLAS | 25.22 | 25.68 | 27.12 |
-| MKLML | 32.52 | 31.89 | 33.12 |
-| MKL-DNN | 81.69 | 82.35 | 84.08 |
-
-
-
- - GoogLeNet
-
-| BatchSize | 64 | 128 | 256 |
-|--------------|-------| ------| -------|
-| OpenBLAS | 89.52 | 96.97 | 108.25 |
-| MKLML | 128.46| 137.89| 158.63 |
-| MKL-DNN | 250.46| 264.83| 269.50 |
-
-
-
-- AlexNet
-
-| BatchSize | 64 | 128 | 256 |
-|--------------|--------| ------ | -------|
-| OpenBLAS | 45.62 | 72.79 | 107.22 |
-| MKLML | 66.37 | 105.60 | 144.04 |
-| MKL-DNN | 399.00 | 498.94 | 626.53 |
-
-
-
-#### Inference
-Test on batch size 1, 2, 4, 8, 16 on Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz
-- VGG-19
-
-| BatchSize | 1 | 2 | 4 | 8 | 16 |
-|-----------|-------|-------|-------|-------|-------|
-| OpenBLAS | 1.10 | 1.96 | 3.62 | 3.63 | 2.25 |
-| MKLML | 5.58 | 9.80 | 15.15 | 21.21 | 28.67 |
-| MKL-DNN | 75.07 | 88.64 | 82.58 | 92.29 | 96.75 |
-
-
-
-- ResNet-50
-
-| BatchSize | 1 | 2 | 4 | 8 | 16 |
-|-----------|-------|--------|--------|--------|--------|
-| OpenBLAS | 3.31 | 6.72 | 11.59 | 13.17 | 9.27 |
-| MKLML | 6.33 | 12.02 | 22.88 | 40.53 | 63.09 |
-| MKL-DNN | 107.83| 148.84 | 177.78 | 189.35 | 217.69 |
-
-
-
-- GoogLeNet
-
-| BatchSize | 1 | 2 | 4 | 8 | 16 |
-|-----------|--------|--------|--------|--------|--------|
-| OpenBLAS | 12.06 | 23.56 | 34.48 | 36.45 | 23.12 |
-| MKLML | 22.74 | 41.56 | 81.22 | 133.47 | 210.53 |
-| MKL-DNN | 175.10 | 272.92 | 450.70 | 512.00 | 600.94 |
-
-
-
-- AlexNet
-
-| BatchSize | 1 | 2 | 4 | 8 | 16 |
-|-----------|--------|--------|--------|--------|--------|
-| OpenBLAS | 3.53 | 6.23 | 15.04 | 26.06 | 31.62 |
-| MKLML | 21.32 | 36.55 | 73.06 | 131.15 | 192.77 |
-| MKL-DNN | 442.91 | 656.41 | 719.10 | 847.68 | 850.51 |
-
-
-
-### Laptop
-TBD
diff --git a/benchmark/README.md b/benchmark/README.md
deleted file mode 100644
index 367013f0457f9bbb9ae1335ea63dce181316d444..0000000000000000000000000000000000000000
--- a/benchmark/README.md
+++ /dev/null
@@ -1,168 +0,0 @@
-# Benchmark
-
-Machine:
-
-- CPU: 12-core Intel(R) Xeon(R) CPU E5-2620 v2 @2.10GHz
-- GPU: Tesla K40m
-- cuDNN: v5.1
-- system: Docker 1.12.1, all platforms are tested in docker environment.
-
-Platforms:
-
-- PaddlePaddle: paddledev/paddle:gpu-devel-v0.9.0a0
-- Tensorflow: gcr.io/tensorflow/tensorflow:0.11.0rc0-gpu
-- Caffe: kaixhin/cuda-caffe
-
-Several convolutional neural networks and recurrent neural networks are used to test.
-
-## Image
-
-### Benchmark Model
-
-AlexNet, GoogleNet and a small network used in Caffe.
-
-- [AlexNet](https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet): but the group size is one.
-
-- [GoogleNet](https://github.com/BVLC/caffe/tree/master/models/bvlc_googlenet): but remove loss1 and loss2 when testing benchmark.
-
-- [SmallNet](https://github.com/BVLC/caffe/blob/master/examples/cifar10/cifar10\_quick\_train\_test.prototxt)
-
-
-### Single-GPU
-
-- AlexNet: input - 3 * 227 * 227, Time: ms/batch
-
-| BatchSize | 64 | 128 | 256 | 512 |
-|--------------|-----| -----| ------| -----|
-| PaddlePaddle | 195 | 334 | 602 | 1629 |
-| TensorFlow | 223 | 364 | 645 | 1235 |
-| Caffe | 324 | 627 | 1232 | 2513 |
-
-**Notation**
-
-All platforms use cuDNN-v5.1. We see that caffe is slower in this experiment, because its workspace limit size of cuDNN-conv interface is 8 * 1024 * 1024, which is smaller in PaddlePaddle and TensorFlow. Note that Caffe will be faster if increasing the workspace limit size.
-
-- GoogletNet: input - 3 * 224 * 224, Time: ms/batch
-
-
-| BatchSize | 64 | 128 | 256 |
-|--------------|-------| -------| --------|
-| PaddlePaddle | 613 | 1149 | 2348 |
-| TensorFlow | 644 | 1176 | 2219 |
-| Caffe | 694 | 1364 | out of memory |
-
-- SmallNet: input - 3 * 32 * 32, Time ms/batch
-
-| BatchSize | 64 | 128 | 256 | 512 |
-|--------------|--------| -------- | --------|---------|
-| PaddlePaddle | 10.463 | 18.184 | 33.113 | 63.039 |
-| TensorFlow | 9 | 15 | 28 | 59 |
-| Caffe | 9.373 | 16.6606 | 31.4797 | 59.719 |
-
-**Notation**
-
-All the single-GPU experiments in caffe use `caffe time` to calculate elapsed time, which does not include parameter updating time. However, both PaddlePaddle and TensorFlow experiments contain the parameter updating time. As compared with the total time, this part is relatively little on single machine, we can ignore it.
-
-In Tensorflow, they implement algorithm searching method instead of using the algorithm searching interface in cuDNN.
-
-### Multi-GPU: 4 GPUs
-
-- AlexNet, ms / batch
-
-| total-BatchSize | 128 * 4 | 256 * 4 |
-|------------------|----------| -----------|
-| PaddlePaddle | 347 | 622 |
-| TensorFlow | 377 | 675 |
-| Caffe | 1229 | 2435 |
-
-For example, if `total-BatchSize = 128 * 4`, the speedup ratio is calculated by
-
-```
- time_at_1gpu_batch_128 * 4 / time_at_4gpu_total_batch_512
-= (334 * 4)/347
-= 3.85
-```
-
-
-
-
-- GoogleNet, ms / batch
-
-| total-BatchSize | 128 * 4 | 256 * 4 |
-|-------------------|--------------| ----------- |
-| PaddlePaddle | 1178 | 2367 |
-| TensorFlow | 1210 | 2292 |
-| Caffe | 2007 | out of memory |
-
-
-
-
-## RNN
-We use lstm network for text classfication to test benchmark.
-
-### Dataset
-- [IMDB](http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl)
-- Sequence length is 100. In fact, PaddlePaddle supports training with variable-length sequence, but TensorFlow needs to pad. Thus, we also pad sequence length to 100 in PaddlePaddle in order to compare.
-- Dictionary size=30000
-- Peephole connection is used in `lstmemory` by default in PaddlePaddle. It is also configured in TensorFlow.
-
-### Single-GPU
-
-#### LSTM in Text Classification
-
-Testing `2 lstm layer + fc` network with different hidden size and batch size.
-
-- Batch size = 64, ms / batch
-
-| hidden_size | 256 | 512 | 1280 |
-|--------------|-------| -------| --------|
-| PaddlePaddle | 83 | 184 | 641 |
-| TensorFlow | 175 | 280 | 818 |
-
-- Batch size = 128, ms / batch
-
-| hidden_size | 256 | 512 | 1280 |
-|--------------|------- | -------| --------|
-| PaddlePaddle | 110 | 261 | 1007 |
-| TensorFlow | 181 | 361 | 1237 |
-
-
-- Batch size = 256, ms / batch
-
-| hidden_size | 256 | 512 | 1280 |
-|--------------|-------| -------| --------|
-| PaddlePaddle | 170 | 414 | 1655 |
-| TensorFlow | 238 | 536 | 1905 |
-
-
-
-#### Seq2Seq
-
-The benchmark of sequence-to-sequence network will be added later.
-
-
-### Multi GPU: 4 GPUs
-
-#### LSTM in Text Classification
-
-- hidden_size = 256, ms / batch
-
-| batch_size | 256 | 512 |
-|--------------| -------| --------|
-| PaddlePaddle | 90 | 118 |
-| TensorFlow | 226 | 118 |
-
-
-- hidden_size = 512, ms / batch
-
-| batch_size | 256 | 512 |
-|--------------| -------| --------|
-| PaddlePaddle | 189 | 268 |
-| TensorFlow | 297 | 383 |
-
-
-
-
-#### Seq2Seq
-
-The benchmark of sequence-to-sequence network will be added later.
diff --git a/benchmark/fluid/Dockerfile b/benchmark/fluid/Dockerfile
index 2e1e0d376899fd664866621263db62258e7c3869..81ea870050fe5db4a60fee40221991e38de6bd2e 100644
--- a/benchmark/fluid/Dockerfile
+++ b/benchmark/fluid/Dockerfile
@@ -15,9 +15,6 @@ RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.7 /usr/lib/libcudnn.so && ln -s
RUN pip install -U pip
RUN pip install -U kubernetes paddlepaddle
-RUN sh -c 'echo "import paddle.v2 as paddle\npaddle.dataset.cifar.train10()\npaddle.dataset.flowers.fetch()" | python'
-RUN sh -c 'echo "import paddle.v2 as paddle\npaddle.dataset.mnist.train()\npaddle.dataset.mnist.test()\npaddle.dataset.imdb.fetch()" | python'
-RUN sh -c 'echo "import paddle.v2 as paddle\npaddle.dataset.imikolov.fetch()" | python'
RUN pip uninstall -y paddlepaddle && mkdir /workspace
ADD https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/paddle_k8s /usr/bin
diff --git a/benchmark/paddle/image/check_env.sh b/benchmark/fluid/check_env.sh
similarity index 100%
rename from benchmark/paddle/image/check_env.sh
rename to benchmark/fluid/check_env.sh
diff --git a/benchmark/paddle/image/alexnet.py b/benchmark/paddle/image/alexnet.py
deleted file mode 100644
index 9efc3f0494e4a817a7357f29e684f621bce1921e..0000000000000000000000000000000000000000
--- a/benchmark/paddle/image/alexnet.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-height = 227
-width = 227
-num_class = 1000
-batch_size = get_config_arg('batch_size', int, 128)
-gp = get_config_arg('layer_num', int, 1)
-is_infer = get_config_arg("is_infer", bool, False)
-num_samples = get_config_arg('num_samples', int, 2560)
-
-args = {
- 'height': height,
- 'width': width,
- 'color': True,
- 'num_class': num_class,
- 'is_infer': is_infer,
- 'num_samples': num_samples
-}
-define_py_data_sources2(
- "train.list" if not is_infer else None,
- "test.list" if is_infer else None,
- module="provider",
- obj="process",
- args=args)
-
-settings(
- batch_size=batch_size,
- learning_rate=0.01 / batch_size,
- learning_method=MomentumOptimizer(0.9),
- regularization=L2Regularization(0.0005 * batch_size))
-
-# conv1
-net = data_layer('data', size=height * width * 3)
-net = img_conv_layer(
- input=net,
- filter_size=11,
- num_channels=3,
- num_filters=96,
- stride=4,
- padding=1)
-net = img_cmrnorm_layer(input=net, size=5, scale=0.0001, power=0.75)
-net = img_pool_layer(input=net, pool_size=3, stride=2)
-
-# conv2
-net = img_conv_layer(
- input=net, filter_size=5, num_filters=256, stride=1, padding=2, groups=gp)
-net = img_cmrnorm_layer(input=net, size=5, scale=0.0001, power=0.75)
-net = img_pool_layer(input=net, pool_size=3, stride=2)
-
-# conv3
-net = img_conv_layer(
- input=net, filter_size=3, num_filters=384, stride=1, padding=1)
-# conv4
-net = img_conv_layer(
- input=net, filter_size=3, num_filters=384, stride=1, padding=1, groups=gp)
-
-# conv5
-net = img_conv_layer(
- input=net, filter_size=3, num_filters=256, stride=1, padding=1, groups=gp)
-net = img_pool_layer(input=net, pool_size=3, stride=2)
-
-net = fc_layer(
- input=net,
- size=4096,
- act=ReluActivation(),
- layer_attr=ExtraAttr(drop_rate=0.5))
-net = fc_layer(
- input=net,
- size=4096,
- act=ReluActivation(),
- layer_attr=ExtraAttr(drop_rate=0.5))
-net = fc_layer(input=net, size=1000, act=SoftmaxActivation())
-
-if is_infer:
- outputs(net)
-else:
- lab = data_layer('label', num_class)
- loss = cross_entropy(input=net, label=lab)
- outputs(loss)
diff --git a/benchmark/paddle/image/googlenet.py b/benchmark/paddle/image/googlenet.py
deleted file mode 100644
index 2a850ccb7f2c75b467554181fc5f4aa8f2b97a09..0000000000000000000000000000000000000000
--- a/benchmark/paddle/image/googlenet.py
+++ /dev/null
@@ -1,245 +0,0 @@
-#!/usr/bin/env python
-from paddle.trainer_config_helpers import *
-
-height = 224
-width = 224
-num_class = 1000
-batch_size = get_config_arg('batch_size', int, 128)
-use_gpu = get_config_arg('use_gpu', bool, True)
-is_infer = get_config_arg("is_infer", bool, False)
-num_samples = get_config_arg('num_samples', int, 2560)
-
-args = {
- 'height': height,
- 'width': width,
- 'color': True,
- 'num_class': num_class,
- 'is_infer': is_infer,
- 'num_samples': num_samples
-}
-define_py_data_sources2(
- "train.list" if not is_infer else None,
- "test.list" if is_infer else None,
- module="provider",
- obj="process",
- args=args)
-
-settings(
- batch_size=batch_size,
- learning_rate=0.01 / batch_size,
- learning_method=MomentumOptimizer(0.9),
- regularization=L2Regularization(0.0005 * batch_size))
-
-conv_projection = conv_projection if use_gpu else img_conv_layer
-
-def inception2(name, input, channels, \
- filter1,
- filter3R, filter3,
- filter5R, filter5,
- proj):
-
- conv1 = name + '_1'
- conv3r = name + '_3r'
- conv3 = name + '_3'
- conv5r = name + '_5r'
- conv5 = name + '_5'
- maxpool = name + '_max'
- convproj = name + '_proj'
-
- cov1 = img_conv_layer(
- name=conv1,
- input=input,
- filter_size=1,
- num_channels=channels,
- num_filters=filter1,
- stride=1,
- padding=0)
-
- cov3r = img_conv_layer(
- name=conv3r,
- input=input,
- filter_size=1,
- num_channels=channels,
- num_filters=filter3R,
- stride=1,
- padding=0)
- cov3 = img_conv_layer(
- name=conv3,
- input=cov3r,
- filter_size=3,
- num_filters=filter3,
- stride=1,
- padding=1)
-
- cov5r = img_conv_layer(
- name=conv5r,
- input=input,
- filter_size=1,
- num_channels=channels,
- num_filters=filter5R,
- stride=1,
- padding=0)
- cov5 = img_conv_layer(
- name=conv5,
- input=cov5r,
- filter_size=5,
- num_filters=filter5,
- stride=1,
- padding=2)
-
- pool1 = img_pool_layer(
- name=maxpool,
- input=input,
- pool_size=3,
- num_channels=channels,
- stride=1,
- padding=1)
- covprj = img_conv_layer(
- name=convproj,
- input=pool1,
- filter_size=1,
- num_filters=proj,
- stride=1,
- padding=0)
-
- cat = concat_layer(name=name, input=[cov1, cov3, cov5, covprj])
- return cat
-
-def inception(name, input, channels, \
- filter1,
- filter3R, filter3,
- filter5R, filter5,
- proj):
-
- cov1 = conv_projection(
- input=input,
- filter_size=1,
- num_channels=channels,
- num_filters=filter1,
- stride=1,
- padding=0)
-
- cov3r = img_conv_layer(
- name=name + '_3r',
- input=input,
- filter_size=1,
- num_channels=channels,
- num_filters=filter3R,
- stride=1,
- padding=0)
- cov3 = conv_projection(
- input=cov3r, filter_size=3, num_filters=filter3, stride=1, padding=1)
-
- cov5r = img_conv_layer(
- name=name + '_5r',
- input=input,
- filter_size=1,
- num_channels=channels,
- num_filters=filter5R,
- stride=1,
- padding=0)
- cov5 = conv_projection(
- input=cov5r, filter_size=5, num_filters=filter5, stride=1, padding=2)
-
- pool1 = img_pool_layer(
- name=name + '_max',
- input=input,
- pool_size=3,
- num_channels=channels,
- stride=1,
- padding=1)
- covprj = conv_projection(
- input=pool1, filter_size=1, num_filters=proj, stride=1, padding=0)
-
- cat = concat_layer(
- name=name,
- input=[cov1, cov3, cov5, covprj],
- bias_attr=True if use_gpu else False,
- act=ReluActivation())
- return cat
-
-
-data = data_layer(name="input", size=3 * height * width)
-
-# stage 1
-conv1 = img_conv_layer(
- name="conv1",
- input=data,
- filter_size=7,
- num_channels=3,
- num_filters=64,
- stride=2,
- padding=3)
-pool1 = img_pool_layer(
- name="pool1", input=conv1, pool_size=3, num_channels=64, stride=2)
-
-# stage 2
-conv2_1 = img_conv_layer(
- name="conv2_1",
- input=pool1,
- filter_size=1,
- num_filters=64,
- stride=1,
- padding=0)
-conv2_2 = img_conv_layer(
- name="conv2_2",
- input=conv2_1,
- filter_size=3,
- num_filters=192,
- stride=1,
- padding=1)
-pool2 = img_pool_layer(
- name="pool2", input=conv2_2, pool_size=3, num_channels=192, stride=2)
-
-# stage 3
-ince3a = inception("ince3a", pool2, 192, 64, 96, 128, 16, 32, 32)
-ince3b = inception("ince3b", ince3a, 256, 128, 128, 192, 32, 96, 64)
-pool3 = img_pool_layer(
- name="pool3", input=ince3b, num_channels=480, pool_size=3, stride=2)
-
-# stage 4
-ince4a = inception("ince4a", pool3, 480, 192, 96, 208, 16, 48, 64)
-ince4b = inception("ince4b", ince4a, 512, 160, 112, 224, 24, 64, 64)
-ince4c = inception("ince4c", ince4b, 512, 128, 128, 256, 24, 64, 64)
-ince4d = inception("ince4d", ince4c, 512, 112, 144, 288, 32, 64, 64)
-ince4e = inception("ince4e", ince4d, 528, 256, 160, 320, 32, 128, 128)
-pool4 = img_pool_layer(
- name="pool4", input=ince4e, num_channels=832, pool_size=3, stride=2)
-
-# stage 5
-ince5a = inception("ince5a", pool4, 832, 256, 160, 320, 32, 128, 128)
-ince5b = inception("ince5b", ince5a, 832, 384, 192, 384, 48, 128, 128)
-pool5 = img_pool_layer(
- name="pool5",
- input=ince5b,
- num_channels=1024,
- pool_size=7,
- stride=7,
- pool_type=AvgPooling())
-
-# We remove loss1 and loss2 for all system when testing benchmark
-# output 1
-# pool_o1 = img_pool_layer(name="pool_o1", input=ince4a, num_channels=512, pool_size=5, stride=3, pool_type=AvgPooling())
-# conv_o1 = img_conv_layer(name="conv_o1", input=pool_o1, filter_size=1, num_filters=128, stride=1, padding=0)
-# fc_o1 = fc_layer(name="fc_o1", input=conv_o1, size=1024, layer_attr=ExtraAttr(drop_rate=0.7), act=ReluActivation())
-# out1 = fc_layer(name="output1", input=fc_o1, size=1000, act=SoftmaxActivation())
-# loss1 = cross_entropy(name='loss1', input=out1, label=lab, coeff=0.3)
-
-# output 2
-#pool_o2 = img_pool_layer(name="pool_o2", input=ince4d, num_channels=528, pool_size=5, stride=3, pool_type=AvgPooling())
-#conv_o2 = img_conv_layer(name="conv_o2", input=pool_o2, filter_size=1, num_filters=128, stride=1, padding=0)
-#fc_o2 = fc_layer(name="fc_o2", input=conv_o2, size=1024, layer_attr=ExtraAttr(drop_rate=0.7), act=ReluActivation())
-#out2 = fc_layer(name="output2", input=fc_o2, size=1000, act=SoftmaxActivation())
-#loss2 = cross_entropy(name='loss2', input=out2, label=lab, coeff=0.3)
-
-# output 3
-dropout = dropout_layer(name="dropout", input=pool5, dropout_rate=0.4)
-out3 = fc_layer(
- name="output3", input=dropout, size=1000, act=SoftmaxActivation())
-
-if is_infer:
- outputs(out3)
-else:
- lab = data_layer(name="label", size=num_class)
- loss3 = cross_entropy(name='loss3', input=out3, label=lab)
- outputs(loss3)
diff --git a/benchmark/paddle/image/plotlog.py b/benchmark/paddle/image/plotlog.py
deleted file mode 100644
index 8679d4f272d1b7aaf8d5a397f07698a6b70e4fcd..0000000000000000000000000000000000000000
--- a/benchmark/paddle/image/plotlog.py
+++ /dev/null
@@ -1,114 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import argparse
-import matplotlib.pyplot as plt
-
-
-def parse_args():
- parser = argparse.ArgumentParser('Parse Log')
- parser.add_argument(
- '--file_path', '-f', type=str, help='the path of the log file')
- parser.add_argument(
- '--sample_rate',
- '-s',
- type=float,
- default=1.0,
- help='the rate to take samples from log')
- parser.add_argument(
- '--log_period', '-p', type=int, default=1, help='the period of log')
-
- args = parser.parse_args()
- return args
-
-
-def parse_file(file_name):
- loss = []
- error = []
- with open(file_name) as f:
- for i, line in enumerate(f):
- line = line.strip()
- if not line.startswith('pass'):
- continue
- line_split = line.split(' ')
- if len(line_split) != 5:
- continue
-
- loss_str = line_split[2][:-1]
- cur_loss = float(loss_str.split('=')[-1])
- loss.append(cur_loss)
-
- err_str = line_split[3][:-1]
- cur_err = float(err_str.split('=')[-1])
- error.append(cur_err)
-
- accuracy = [1.0 - err for err in error]
-
- return loss, accuracy
-
-
-def sample(metric, sample_rate):
- interval = int(1.0 / sample_rate)
- if interval > len(metric):
- return metric[:1]
-
- num = len(metric) / interval
- idx = [interval * i for i in range(num)]
- metric_sample = [metric[id] for id in idx]
- return metric_sample
-
-
-def plot_metric(metric,
- batch_id,
- graph_title,
- line_style='b-',
- line_label='y',
- line_num=1):
- plt.figure()
- plt.title(graph_title)
- if line_num == 1:
- plt.plot(batch_id, metric, line_style, label=line_label)
- else:
- for i in range(line_num):
- plt.plot(batch_id, metric[i], line_style[i], label=line_label[i])
- plt.xlabel('batch')
- plt.ylabel(graph_title)
- plt.legend()
- plt.savefig(graph_title + '.jpg')
- plt.close()
-
-
-def main():
- args = parse_args()
- assert args.sample_rate > 0. and args.sample_rate <= 1.0, "The sample rate should in the range (0, 1]."
-
- loss, accuracy = parse_file(args.file_path)
- batch = [args.log_period * i for i in range(len(loss))]
-
- batch_sample = sample(batch, args.sample_rate)
- loss_sample = sample(loss, args.sample_rate)
- accuracy_sample = sample(accuracy, args.sample_rate)
-
- plot_metric(loss_sample, batch_sample, 'loss', line_label='loss')
- plot_metric(
- accuracy_sample,
- batch_sample,
- 'accuracy',
- line_style='g-',
- line_label='accuracy')
-
-
-if __name__ == '__main__':
- main()
diff --git a/benchmark/paddle/image/provider.py b/benchmark/paddle/image/provider.py
deleted file mode 100644
index 6ad817ccefab3e44a8f962e907ba2110a6ed4a45..0000000000000000000000000000000000000000
--- a/benchmark/paddle/image/provider.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import io, os
-import random
-import numpy as np
-from paddle.trainer.PyDataProvider2 import *
-
-
-def initHook(settings, height, width, color, num_class, **kwargs):
- settings.height = height
- settings.width = width
- settings.color = color
- settings.num_class = num_class
- if settings.color:
- settings.data_size = settings.height * settings.width * 3
- else:
- settings.data_size = settings.height * settings.width
- settings.is_infer = kwargs.get('is_infer', False)
- settings.num_samples = kwargs.get('num_samples', 2560)
- if settings.is_infer:
- settings.slots = [dense_vector(settings.data_size)]
- else:
- settings.slots = [dense_vector(settings.data_size), integer_value(1)]
-
-
-@provider(
- init_hook=initHook, min_pool_size=-1, cache=CacheType.CACHE_PASS_IN_MEM)
-def process(settings, file_list):
- for i in xrange(settings.num_samples):
- img = np.random.rand(1, settings.data_size).reshape(-1, 1).flatten()
- if settings.is_infer:
- yield img.astype('float32')
- else:
- lab = random.randint(0, settings.num_class - 1)
- yield img.astype('float32'), int(lab)
diff --git a/benchmark/paddle/image/resnet.py b/benchmark/paddle/image/resnet.py
deleted file mode 100644
index 2846e4763f1cda4602f03af5ec649d57ee6cf0d8..0000000000000000000000000000000000000000
--- a/benchmark/paddle/image/resnet.py
+++ /dev/null
@@ -1,230 +0,0 @@
-#!/usr/bin/env python
-from paddle.trainer_config_helpers import *
-
-height = 224
-width = 224
-num_class = 1000
-batch_size = get_config_arg('batch_size', int, 64)
-layer_num = get_config_arg("layer_num", int, 50)
-is_infer = get_config_arg("is_infer", bool, False)
-num_samples = get_config_arg('num_samples', int, 2560)
-
-args = {
- 'height': height,
- 'width': width,
- 'color': True,
- 'num_class': num_class,
- 'is_infer': is_infer,
- 'num_samples': num_samples
-}
-define_py_data_sources2(
- "train.list" if not is_infer else None,
- "test.list" if is_infer else None,
- module="provider",
- obj="process",
- args=args)
-
-settings(
- batch_size=batch_size,
- learning_rate=0.01 / batch_size,
- learning_method=MomentumOptimizer(0.9),
- regularization=L2Regularization(0.0005 * batch_size))
-
-
-#######################Network Configuration #############
-def conv_bn_layer(name,
- input,
- filter_size,
- num_filters,
- stride,
- padding,
- channels=None,
- active_type=ReluActivation()):
- """
- A wrapper for conv layer with batch normalization layers.
- Note:
- conv layer has no activation.
- """
-
- tmp = img_conv_layer(
- name=name + "_conv",
- input=input,
- filter_size=filter_size,
- num_channels=channels,
- num_filters=num_filters,
- stride=stride,
- padding=padding,
- act=LinearActivation(),
- bias_attr=False)
- return batch_norm_layer(
- name=name + "_bn",
- input=tmp,
- act=active_type,
- use_global_stats=is_infer)
-
-
-def bottleneck_block(name, input, num_filters1, num_filters2):
- """
- A wrapper for bottlenect building block in ResNet.
- Last conv_bn_layer has no activation.
- Addto layer has activation of relu.
- """
- last_name = conv_bn_layer(
- name=name + '_branch2a',
- input=input,
- filter_size=1,
- num_filters=num_filters1,
- stride=1,
- padding=0)
- last_name = conv_bn_layer(
- name=name + '_branch2b',
- input=last_name,
- filter_size=3,
- num_filters=num_filters1,
- stride=1,
- padding=1)
- last_name = conv_bn_layer(
- name=name + '_branch2c',
- input=last_name,
- filter_size=1,
- num_filters=num_filters2,
- stride=1,
- padding=0,
- active_type=LinearActivation())
-
- return addto_layer(
- name=name + "_addto", input=[input, last_name], act=ReluActivation())
-
-
-def mid_projection(name, input, num_filters1, num_filters2, stride=2):
- """
- A wrapper for middile projection in ResNet.
- projection shortcuts are used for increasing dimensions,
- and other shortcuts are identity
- branch1: projection shortcuts are used for increasing
- dimensions, has no activation.
- branch2x: bottleneck building block, shortcuts are identity.
- """
- # stride = 2
- branch1 = conv_bn_layer(
- name=name + '_branch1',
- input=input,
- filter_size=1,
- num_filters=num_filters2,
- stride=stride,
- padding=0,
- active_type=LinearActivation())
-
- last_name = conv_bn_layer(
- name=name + '_branch2a',
- input=input,
- filter_size=1,
- num_filters=num_filters1,
- stride=stride,
- padding=0)
- last_name = conv_bn_layer(
- name=name + '_branch2b',
- input=last_name,
- filter_size=3,
- num_filters=num_filters1,
- stride=1,
- padding=1)
-
- last_name = conv_bn_layer(
- name=name + '_branch2c',
- input=last_name,
- filter_size=1,
- num_filters=num_filters2,
- stride=1,
- padding=0,
- active_type=LinearActivation())
-
- return addto_layer(
- name=name + "_addto", input=[branch1, last_name], act=ReluActivation())
-
-
-img = data_layer(name='image', size=height * width * 3)
-
-
-def deep_res_net(res2_num=3, res3_num=4, res4_num=6, res5_num=3):
- """
- A wrapper for 50,101,152 layers of ResNet.
- res2_num: number of blocks stacked in conv2_x
- res3_num: number of blocks stacked in conv3_x
- res4_num: number of blocks stacked in conv4_x
- res5_num: number of blocks stacked in conv5_x
- """
- # For ImageNet
- # conv1: 112x112
- tmp = conv_bn_layer(
- "conv1",
- input=img,
- filter_size=7,
- channels=3,
- num_filters=64,
- stride=2,
- padding=3)
- tmp = img_pool_layer(name="pool1", input=tmp, pool_size=3, stride=2)
-
- # conv2_x: 56x56
- tmp = mid_projection(
- name="res2_1", input=tmp, num_filters1=64, num_filters2=256, stride=1)
- for i in xrange(2, res2_num + 1, 1):
- tmp = bottleneck_block(
- name="res2_" + str(i), input=tmp, num_filters1=64, num_filters2=256)
-
- # conv3_x: 28x28
- tmp = mid_projection(
- name="res3_1", input=tmp, num_filters1=128, num_filters2=512)
- for i in xrange(2, res3_num + 1, 1):
- tmp = bottleneck_block(
- name="res3_" + str(i),
- input=tmp,
- num_filters1=128,
- num_filters2=512)
-
- # conv4_x: 14x14
- tmp = mid_projection(
- name="res4_1", input=tmp, num_filters1=256, num_filters2=1024)
- for i in xrange(2, res4_num + 1, 1):
- tmp = bottleneck_block(
- name="res4_" + str(i),
- input=tmp,
- num_filters1=256,
- num_filters2=1024)
-
- # conv5_x: 7x7
- tmp = mid_projection(
- name="res5_1", input=tmp, num_filters1=512, num_filters2=2048)
- for i in xrange(2, res5_num + 1, 1):
- tmp = bottleneck_block(
- name="res5_" + str(i),
- input=tmp,
- num_filters1=512,
- num_filters2=2048)
-
- tmp = img_pool_layer(
- name='avgpool',
- input=tmp,
- pool_size=7,
- stride=1,
- pool_type=AvgPooling())
-
- return fc_layer(input=tmp, size=num_class, act=SoftmaxActivation())
-
-
-if layer_num == 50:
- resnet = deep_res_net(3, 4, 6, 3)
-elif layer_num == 101:
- resnet = deep_res_net(3, 4, 23, 3)
-elif layer_num == 152:
- resnet = deep_res_net(3, 8, 36, 3)
-else:
- print("Wrong layer number.")
-
-if is_infer:
- outputs(resnet)
-else:
- lbl = data_layer(name="label", size=num_class)
- loss = cross_entropy(name='loss', input=resnet, label=lbl)
- outputs(loss)
diff --git a/benchmark/paddle/image/run.sh b/benchmark/paddle/image/run.sh
deleted file mode 100755
index 5b58a8d773aab795e5439b0f0e5d81bec66b5f56..0000000000000000000000000000000000000000
--- a/benchmark/paddle/image/run.sh
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/bin/bash
-
-set -e
-
-function train() {
- cfg=$1
- thread=$2
- bz=$3
- args="batch_size=$3"
- prefix=$4
- paddle train --job=time \
- --config=$cfg \
- --use_gpu=True \
- --trainer_count=$thread \
- --log_period=10 \
- --test_period=100 \
- --config_args=$args \
- > logs/$prefix-${thread}gpu-$bz.log 2>&1
-}
-
-if [ ! -d "train.list" ]; then
- echo " " > train.list
-fi
-if [ ! -d "logs" ]; then
- mkdir logs
-fi
-
-#========single-gpu=========#
-# alexnet
-train alexnet.py 1 64 alexnet
-train alexnet.py 1 128 alexnet
-train alexnet.py 1 256 alexnet
-train alexnet.py 1 512 alexnet
-
-# googlenet
-train googlenet.py 1 64 googlenet
-train googlenet.py 1 128 googlenet
-train googlenet.py 1 256 googlenet
-
-# smallnet
-train smallnet_mnist_cifar.py 1 64 smallnet
-train smallnet_mnist_cifar.py 1 128 smallnet
-train smallnet_mnist_cifar.py 1 256 smallnet
-train smallnet_mnist_cifar.py 1 512 smallnet
-
-
-############################
-#========multi-gpus=========#
-train alexnet.py 4 512 alexnet
-train alexnet.py 4 1024 alexnet
-
-train googlenet.py 4 512 googlenet
-train googlenet.py 4 1024 googlenet
diff --git a/benchmark/paddle/image/run_mkl_infer.sh b/benchmark/paddle/image/run_mkl_infer.sh
deleted file mode 100755
index 0fad5e04cc992a3ec97591d3833957bb7517a8f3..0000000000000000000000000000000000000000
--- a/benchmark/paddle/image/run_mkl_infer.sh
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/bin/bash
-
-set -e
-
-function clock_to_seconds() {
- hours=`echo $1 | awk -F ':' '{print $1}'`
- mins=`echo $1 | awk -F ':' '{print $2}'`
- secs=`echo $1 | awk -F ':' '{print $3}'`
- echo `awk 'BEGIN{printf "%.2f",('$secs' + '$mins' * 60 + '$hours' * 3600)}'`
-}
-
-function infer() {
- unset OMP_NUM_THREADS MKL_NUM_THREADS OMP_DYNAMIC KMP_AFFINITY
- topology=$1
- layer_num=$2
- bs=$3
- use_mkldnn=$4
- if [ $4 == "True" ]; then
- thread=1
- log="logs/infer-${topology}-${layer_num}-mkldnn-${bs}.log"
- elif [ $4 == "False" ]; then
- thread=`nproc`
- if [ $thread -gt $bs ]; then
- thread=$bs
- fi
- log="logs/infer-${topology}-${layer_num}-${thread}mklml-${bs}.log"
- else
- echo "Wrong input $4, use True or False."
- exit 0
- fi
-
- models_in="models/${topology}-${layer_num}/pass-00000/"
- if [ ! -d $models_in ]; then
- echo "Training model ${topology}_${layer_num}"
- paddle train --job=train \
- --config="${topology}.py" \
- --use_mkldnn=True \
- --use_gpu=False \
- --trainer_count=1 \
- --num_passes=1 \
- --save_dir="models/${topology}-${layer_num}" \
- --config_args="batch_size=128,layer_num=${layer_num},num_samples=256" \
- > /dev/null 2>&1
- echo "Done"
- fi
- log_period=$((256 / bs))
- paddle train --job=test \
- --config="${topology}.py" \
- --use_mkldnn=$use_mkldnn \
- --use_gpu=False \
- --trainer_count=$thread \
- --log_period=$log_period \
- --config_args="batch_size=${bs},layer_num=${layer_num},is_infer=True" \
- --init_model_path=$models_in \
- 2>&1 | tee ${log}
-
- # calculate the last 5 logs period time of 1280 samples,
- # the time before are burning time.
- start=`tail ${log} -n 7 | head -n 1 | awk -F ' ' '{print $2}' | xargs`
- end=`tail ${log} -n 2 | head -n 1 | awk -F ' ' '{print $2}' | xargs`
- start_sec=`clock_to_seconds $start`
- end_sec=`clock_to_seconds $end`
- fps=`awk 'BEGIN{printf "%.2f",(1280 / ('$end_sec' - '$start_sec'))}'`
- echo "Last 1280 samples start: ${start}(${start_sec} sec), end: ${end}(${end_sec} sec;" >> ${log}
- echo "FPS: $fps images/sec" 2>&1 | tee -a ${log}
-}
-
-if [ ! -f "train.list" ]; then
- echo " " > train.list
-fi
-if [ ! -f "test.list" ]; then
- echo " " > test.list
-fi
-if [ ! -d "logs" ]; then
- mkdir logs
-fi
-if [ ! -d "models" ]; then
- mkdir -p models
-fi
-
-# inference benchmark
-for use_mkldnn in True False; do
- for batchsize in 1 2 4 8 16; do
- infer vgg 19 $batchsize $use_mkldnn
- infer resnet 50 $batchsize $use_mkldnn
- infer googlenet v1 $batchsize $use_mkldnn
- infer alexnet 2 $batchsize $use_mkldnn
- done
-done
diff --git a/benchmark/paddle/image/run_mkl_train.sh b/benchmark/paddle/image/run_mkl_train.sh
deleted file mode 100755
index 1583bf134a276a08aa2f8e84dc63adbb205a83d6..0000000000000000000000000000000000000000
--- a/benchmark/paddle/image/run_mkl_train.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/bin/bash
-
-set -e
-
-function train() {
- unset OMP_NUM_THREADS MKL_NUM_THREADS OMP_DYNAMIC KMP_AFFINITY
- topology=$1
- layer_num=$2
- bs=$3
- use_mkldnn=$4
- if [ $4 == "True" ]; then
- thread=1
- log="logs/train-${topology}-${layer_num}-mkldnn-${bs}.log"
- elif [ $4 == "False" ]; then
- thread=`nproc`
- # each trainer_count use only 1 core to avoid conflict
- log="logs/train-${topology}-${layer_num}-${thread}mklml-${bs}.log"
- else
- echo "Wrong input $4, use True or False."
- exit 0
- fi
- args="batch_size=${bs},layer_num=${layer_num}"
- config="${topology}.py"
- paddle train --job=time \
- --config=$config \
- --use_mkldnn=$use_mkldnn \
- --use_gpu=False \
- --trainer_count=$thread \
- --log_period=10 \
- --test_period=100 \
- --config_args=$args \
- 2>&1 | tee ${log}
-
- avg_time=`tail ${log} -n 1 | awk -F ' ' '{print $8}' | sed 's/avg=//'`
- fps=`awk 'BEGIN{printf "%.2f",('$bs' / '$avg_time' * 1000)}'`
- echo "FPS: $fps images/sec" 2>&1 | tee -a ${log}
-}
-
-if [ ! -f "train.list" ]; then
- echo " " > train.list
-fi
-if [ ! -d "logs" ]; then
- mkdir logs
-fi
-
-# training benchmark
-for use_mkldnn in True False; do
- for batchsize in 64 128 256; do
- train vgg 19 $batchsize $use_mkldnn
- train resnet 50 $batchsize $use_mkldnn
- train googlenet v1 $batchsize $use_mkldnn
- train alexnet 2 $batchsize $use_mkldnn
- done
-done
diff --git a/benchmark/paddle/image/run_openblas_infer.sh b/benchmark/paddle/image/run_openblas_infer.sh
deleted file mode 100755
index 987381cabc2e793886099212660723c122b73bb0..0000000000000000000000000000000000000000
--- a/benchmark/paddle/image/run_openblas_infer.sh
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/bin/bash
-
-set -e
-
-function clock_to_seconds() {
- hours=`echo $1 | awk -F ':' '{print $1}'`
- mins=`echo $1 | awk -F ':' '{print $2}'`
- secs=`echo $1 | awk -F ':' '{print $3}'`
- echo `awk 'BEGIN{printf "%.2f",('$secs' + '$mins' * 60 + '$hours' * 3600)}'`
-}
-
-function infer() {
- export OPENBLAS_MAIN_FREE=1
- topology=$1
- layer_num=$2
- bs=$3
- trainers=`nproc`
- if [ $trainers -gt $bs ]; then
- trainers=$bs
- fi
- log="logs/infer-${topology}-${layer_num}-${trainers}openblas-${bs}.log"
- threads=$((`nproc` / trainers))
- if [ $threads -eq 0 ]; then
- threads=1
- fi
- export OPENBLAS_NUM_THREADS=$threads
-
- models_in="models/${topology}-${layer_num}/pass-00000/"
- if [ ! -d $models_in ]; then
- echo "./run_mkl_infer.sh to save the model first"
- exit 0
- fi
- log_period=$((32 / bs))
- paddle train --job=test \
- --config="${topology}.py" \
- --use_mkldnn=False \
- --use_gpu=False \
- --trainer_count=$trainers \
- --log_period=$log_period \
- --config_args="batch_size=${bs},layer_num=${layer_num},is_infer=True,num_samples=256" \
- --init_model_path=$models_in \
- 2>&1 | tee ${log}
-
- # calculate the last 5 logs period time of 160(=32*5) samples,
- # the time before are burning time.
- start=`tail ${log} -n 7 | head -n 1 | awk -F ' ' '{print $2}' | xargs`
- end=`tail ${log} -n 2 | head -n 1 | awk -F ' ' '{print $2}' | xargs`
- start_sec=`clock_to_seconds $start`
- end_sec=`clock_to_seconds $end`
- fps=`awk 'BEGIN{printf "%.2f",(160 / ('$end_sec' - '$start_sec'))}'`
- echo "Last 160 samples start: ${start}(${start_sec} sec), end: ${end}(${end_sec} sec;" >> ${log}
- echo "FPS: $fps images/sec" 2>&1 | tee -a ${log}
-}
-
-if [ ! -f "train.list" ]; then
- echo " " > train.list
-fi
-if [ ! -f "test.list" ]; then
- echo " " > test.list
-fi
-if [ ! -d "logs" ]; then
- mkdir logs
-fi
-
-# inference benchmark
-for batchsize in 1 2 4 8 16; do
- infer vgg 19 $batchsize
- infer resnet 50 $batchsize
- infer googlenet v1 $batchsize
- infer alexnet 2 $batchsize
-done
diff --git a/benchmark/paddle/image/run_openblas_train.sh b/benchmark/paddle/image/run_openblas_train.sh
deleted file mode 100755
index cc64e1d09da02087b1737190a0b75dc7758600a6..0000000000000000000000000000000000000000
--- a/benchmark/paddle/image/run_openblas_train.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/bin/bash
-
-set -e
-
-function train() {
- export OPENBLAS_NUM_THREADS=1
- topology=$1
- layer_num=$2
- bs=$3
- thread=`nproc`
- # each trainer_count use only 1 core to avoid conflict
- log="logs/train-${topology}-${layer_num}-${thread}openblas-${bs}.log"
- args="batch_size=${bs},layer_num=${layer_num}"
- config="${topology}.py"
- paddle train --job=time \
- --config=$config \
- --use_mkldnn=False \
- --use_gpu=False \
- --trainer_count=$thread \
- --log_period=3 \
- --test_period=30 \
- --config_args=$args \
- 2>&1 | tee ${log}
-
- avg_time=`tail ${log} -n 1 | awk -F ' ' '{print $8}' | sed 's/avg=//'`
- fps=`awk 'BEGIN{printf "%.2f",('$bs' / '$avg_time' * 1000)}'`
- echo "FPS: $fps images/sec" 2>&1 | tee -a ${log}
-}
-
-if [ ! -f "train.list" ]; then
- echo " " > train.list
-fi
-if [ ! -d "logs" ]; then
- mkdir logs
-fi
-
-# training benchmark
-for batchsize in 64 128 256; do
- train vgg 19 $batchsize
- train resnet 50 $batchsize
- train googlenet v1 $batchsize
- train alexnet 2 $batchsize
-done
diff --git a/benchmark/paddle/image/smallnet_mnist_cifar.py b/benchmark/paddle/image/smallnet_mnist_cifar.py
deleted file mode 100644
index 58879c454f37991405d83bbb593bb5d1e977ff53..0000000000000000000000000000000000000000
--- a/benchmark/paddle/image/smallnet_mnist_cifar.py
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env python
-
-from paddle.trainer_config_helpers import *
-
-height = 32
-width = 32
-num_class = 10
-
-batch_size = get_config_arg('batch_size', int, 128)
-
-args = {'height': height, 'width': width, 'color': True, 'num_class': num_class}
-define_py_data_sources2(
- "train.list", None, module="provider", obj="process", args=args)
-
-settings(
- batch_size=batch_size,
- learning_rate=0.01 / batch_size,
- learning_method=MomentumOptimizer(0.9),
- regularization=L2Regularization(0.0005 * batch_size))
-
-# conv1
-net = data_layer('data', size=height * width * 3)
-net = img_conv_layer(
- input=net,
- filter_size=5,
- num_channels=3,
- num_filters=32,
- stride=1,
- padding=2)
-net = img_pool_layer(input=net, pool_size=3, stride=2, padding=1)
-
-# conv2
-net = img_conv_layer(
- input=net, filter_size=5, num_filters=32, stride=1, padding=2)
-net = img_pool_layer(
- input=net, pool_size=3, stride=2, padding=1, pool_type=AvgPooling())
-
-# conv3
-net = img_conv_layer(
- input=net, filter_size=3, num_filters=64, stride=1, padding=1)
-net = img_pool_layer(
- input=net, pool_size=3, stride=2, padding=1, pool_type=AvgPooling())
-
-net = fc_layer(input=net, size=64, act=ReluActivation())
-net = fc_layer(input=net, size=10, act=SoftmaxActivation())
-
-lab = data_layer('label', num_class)
-loss = classification_cost(input=net, label=lab)
-outputs(loss)
diff --git a/benchmark/paddle/image/vgg.py b/benchmark/paddle/image/vgg.py
deleted file mode 100644
index ca0a6798fb8c35b68cf84d263855955eb93ba0b0..0000000000000000000000000000000000000000
--- a/benchmark/paddle/image/vgg.py
+++ /dev/null
@@ -1,119 +0,0 @@
-#!/usr/bin/env python
-from paddle.trainer_config_helpers import *
-
-height = 224
-width = 224
-num_class = 1000
-batch_size = get_config_arg('batch_size', int, 64)
-layer_num = get_config_arg('layer_num', int, 19)
-is_infer = get_config_arg("is_infer", bool, False)
-num_samples = get_config_arg('num_samples', int, 2560)
-
-args = {
- 'height': height,
- 'width': width,
- 'color': True,
- 'num_class': num_class,
- 'is_infer': is_infer,
- 'num_samples': num_samples
-}
-define_py_data_sources2(
- "train.list" if not is_infer else None,
- "test.list" if is_infer else None,
- module="provider",
- obj="process",
- args=args)
-
-settings(
- batch_size=batch_size,
- learning_rate=0.001 / batch_size,
- learning_method=MomentumOptimizer(0.9),
- regularization=L2Regularization(0.0005 * batch_size))
-
-img = data_layer(name='image', size=height * width * 3)
-
-
-def vgg_network(vgg_num=3):
- tmp = img_conv_group(
- input=img,
- num_channels=3,
- conv_padding=1,
- conv_num_filter=[64, 64],
- conv_filter_size=3,
- conv_act=ReluActivation(),
- pool_size=2,
- pool_stride=2,
- pool_type=MaxPooling())
-
- tmp = img_conv_group(
- input=tmp,
- conv_num_filter=[128, 128],
- conv_padding=1,
- conv_filter_size=3,
- conv_act=ReluActivation(),
- pool_stride=2,
- pool_type=MaxPooling(),
- pool_size=2)
-
- channels = []
- for i in range(vgg_num):
- channels.append(256)
- tmp = img_conv_group(
- input=tmp,
- conv_num_filter=channels,
- conv_padding=1,
- conv_filter_size=3,
- conv_act=ReluActivation(),
- pool_stride=2,
- pool_type=MaxPooling(),
- pool_size=2)
- channels = []
- for i in range(vgg_num):
- channels.append(512)
- tmp = img_conv_group(
- input=tmp,
- conv_num_filter=channels,
- conv_padding=1,
- conv_filter_size=3,
- conv_act=ReluActivation(),
- pool_stride=2,
- pool_type=MaxPooling(),
- pool_size=2)
- tmp = img_conv_group(
- input=tmp,
- conv_num_filter=channels,
- conv_padding=1,
- conv_filter_size=3,
- conv_act=ReluActivation(),
- pool_stride=2,
- pool_type=MaxPooling(),
- pool_size=2)
-
- tmp = fc_layer(
- input=tmp,
- size=4096,
- act=ReluActivation(),
- layer_attr=ExtraAttr(drop_rate=0.5))
-
- tmp = fc_layer(
- input=tmp,
- size=4096,
- act=ReluActivation(),
- layer_attr=ExtraAttr(drop_rate=0.5))
-
- return fc_layer(input=tmp, size=num_class, act=SoftmaxActivation())
-
-
-if layer_num == 16:
- vgg = vgg_network(3)
-elif layer_num == 19:
- vgg = vgg_network(4)
-else:
- print("Wrong layer number.")
-
-if is_infer:
- outputs(vgg)
-else:
- lab = data_layer('label', num_class)
- loss = cross_entropy(input=vgg, label=lab)
- outputs(loss)
diff --git a/benchmark/paddle/rnn/imdb.py b/benchmark/paddle/rnn/imdb.py
deleted file mode 100755
index 2a67f9b0cf52484d9d44fe9db0b1e57cdd20fd43..0000000000000000000000000000000000000000
--- a/benchmark/paddle/rnn/imdb.py
+++ /dev/null
@@ -1,60 +0,0 @@
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-import six.moves.cPickle as pickle
-import gzip
-import os
-import numpy
-
-
-def get_dataset_file(dataset, default_dataset, origin):
- data_dir, data_file = os.path.split(dataset)
- if (not os.path.isfile(dataset)) and data_file == default_dataset:
- from six.moves import urllib
- print('Downloading data from %s' % origin)
- urllib.request.urlretrieve(origin, dataset)
-
- return dataset
-
-
-def create_data(path="imdb.pkl"):
-
- if (not os.path.isfile('imdb.train.pkl')):
- path = get_dataset_file(
- path, "imdb.pkl",
- "http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl")
-
- if path.endswith(".gz"):
- f = gzip.open(path, 'rb')
- else:
- f = open(path, 'rb')
-
- train_set = pickle.load(f)
- test_set = pickle.load(f)
- f.close()
-
- pickle.dump(train_set, open('imdb.train.pkl', 'wb'))
- pickle.dump(test_set, open('imdb.test.pkl', 'wb'))
-
- if (not os.path.isfile('train.list')):
- file('train.list', 'w').write('imdb.train.pkl\n')
-
-
-def main():
- create_data('imdb.pkl')
-
-
-if __name__ == "__main__":
- main()
diff --git a/benchmark/paddle/rnn/provider.py b/benchmark/paddle/rnn/provider.py
deleted file mode 100644
index 23cc0c44a98d0ae7f586d1a376a603198f2c6144..0000000000000000000000000000000000000000
--- a/benchmark/paddle/rnn/provider.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import io, os
-import random
-import numpy as np
-import six.moves.cPickle as pickle
-from paddle.trainer.PyDataProvider2 import *
-
-
-def remove_unk(x, n_words):
- return [[1 if w >= n_words else w for w in sen] for sen in x]
-
-
-# ==============================================================
-# tensorflow uses fixed length, but PaddlePaddle can process
-# variable-length. Padding is used in benchmark in order to
-# compare with other platform.
-# ==============================================================
-def pad_sequences(sequences,
- maxlen=None,
- dtype='int32',
- padding='post',
- truncating='post',
- value=0.):
- lengths = [len(s) for s in sequences]
-
- nb_samples = len(sequences)
- if maxlen is None:
- maxlen = np.max(lengths)
-
- x = (np.ones((nb_samples, maxlen)) * value).astype(dtype)
- for idx, s in enumerate(sequences):
- if len(s) == 0:
- continue # empty list was found
- if truncating == 'pre':
- trunc = s[-maxlen:]
- elif truncating == 'post':
- trunc = s[:maxlen]
- else:
- raise ValueError("Truncating type '%s' not understood" % padding)
-
- if padding == 'post':
- x[idx, :len(trunc)] = trunc
- elif padding == 'pre':
- x[idx, -len(trunc):] = trunc
- else:
- raise ValueError("Padding type '%s' not understood" % padding)
- return x
-
-
-def initHook(settings, vocab_size, pad_seq, maxlen, **kwargs):
- settings.vocab_size = vocab_size
- settings.pad_seq = pad_seq
- settings.maxlen = maxlen
- settings.input_types = [
- integer_value_sequence(vocab_size), integer_value(2)
- ]
-
-
-@provider(
- init_hook=initHook, min_pool_size=-1, cache=CacheType.CACHE_PASS_IN_MEM)
-def process(settings, file):
- f = open(file, 'rb')
- train_set = pickle.load(f)
- f.close()
- x, y = train_set
-
- # remove unk, namely remove the words out of dictionary
- x = remove_unk(x, settings.vocab_size)
- if settings.pad_seq:
- x = pad_sequences(x, maxlen=settings.maxlen, value=0.)
-
- for i in range(len(y)):
- yield map(int, x[i]), int(y[i])
diff --git a/benchmark/paddle/rnn/rnn.py b/benchmark/paddle/rnn/rnn.py
deleted file mode 100755
index 83eb3e565473f7e7e91cddeaa3cd2aafb7e3df2c..0000000000000000000000000000000000000000
--- a/benchmark/paddle/rnn/rnn.py
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env python
-
-from paddle.trainer_config_helpers import *
-import imdb
-
-num_class = 2
-vocab_size = 30000
-fixedlen = 100
-batch_size = get_config_arg('batch_size', int, 128)
-lstm_num = get_config_arg('lstm_num', int, 1)
-hidden_size = get_config_arg('hidden_size', int, 128)
-# whether to pad sequence into fixed length
-pad_seq = get_config_arg('pad_seq', bool, True)
-imdb.create_data('imdb.pkl')
-
-args = {'vocab_size': vocab_size, 'pad_seq': pad_seq, 'maxlen': fixedlen}
-define_py_data_sources2(
- "train.list", None, module="provider", obj="process", args=args)
-
-settings(
- batch_size=batch_size,
- learning_rate=2e-3,
- learning_method=AdamOptimizer(),
- regularization=L2Regularization(8e-4),
- gradient_clipping_threshold=25)
-
-net = data_layer('data', size=vocab_size)
-net = embedding_layer(input=net, size=128)
-
-for i in xrange(lstm_num):
- net = simple_lstm(input=net, size=hidden_size)
-
-net = last_seq(input=net)
-net = fc_layer(input=net, size=2, act=SoftmaxActivation())
-
-lab = data_layer('label', num_class)
-loss = classification_cost(input=net, label=lab)
-outputs(loss)
diff --git a/benchmark/paddle/rnn/run.sh b/benchmark/paddle/rnn/run.sh
deleted file mode 100755
index f99a562b3f88a98560f4bf7aee98ceee9daefe67..0000000000000000000000000000000000000000
--- a/benchmark/paddle/rnn/run.sh
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/bin/bash
-
-set -e
-
-function train() {
- cfg=$1
- thread=$2
- args="lstm_num=${3},seq_pad=${4},hidden_size=${5},batch_size=${6}"
- paddle train --job=time \
- --config=$cfg \
- --use_gpu=1 \
- --trainer_count=$thread \
- --log_period=10 \
- --test_period=100 \
- --num_passes=1 \
- --feed_data=1 \
- --config_args=$args \
- >logs/rnn-pad${4}-${thread}gpu-lstm${3}-batch${6}-hid${5}.log 2>&1
-}
-
-if [ ! -d "logs" ]; then
- mkdir logs
-fi
-
-## padding, single gpu
-#-----config--gpu--lstm_num--padding--hidden_size--batch_size
-## lstm_num=2, batch_size=64
-train rnn.py 1 2 1 256 64
-train rnn.py 1 2 1 512 64
-train rnn.py 1 2 1 1280 64
-
-## lstm_num=2, batch_size=128
-train rnn.py 1 2 1 256 128
-train rnn.py 1 2 1 512 128
-train rnn.py 1 2 1 1280 128
-
-## lstm_num=4, batch_size=256
-train rnn.py 1 2 1 256 256
-train rnn.py 1 2 1 512 256
-train rnn.py 1 2 1 1280 256
-
-
-#==================multi gpus=====================#
-# hidden_size=256, lstm_num=2, different batch size
-train rnn.py 4 2 1 256 128
-train rnn.py 4 2 1 256 256
-train rnn.py 4 2 1 256 512
-
-# hidden_size=512, lstm_num=4, different batch size
-train rnn.py 4 2 1 512 128
-train rnn.py 4 2 1 512 256
-train rnn.py 4 2 1 512 512
diff --git a/benchmark/tensorflow/machine_translation.py b/benchmark/tensorflow/machine_translation.py
index 8f77dce98353af53803246be8dc61063836b7867..7837669edc7a206c03e5b9fa2989bf45b35f0605 100644
--- a/benchmark/tensorflow/machine_translation.py
+++ b/benchmark/tensorflow/machine_translation.py
@@ -35,8 +35,6 @@ import os
import argparse
import time
-import paddle.v2 as paddle
-
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--embedding_dim",
diff --git a/benchmark/tensorflow/mnist.py b/benchmark/tensorflow/mnist.py
index 7140eed6eaff49b5c65f9ccb2e38f113a4cdbdbf..03d533fecfededddd3956ba83ea600456782cfc9 100644
--- a/benchmark/tensorflow/mnist.py
+++ b/benchmark/tensorflow/mnist.py
@@ -21,7 +21,6 @@ import time
import numpy as np
import tensorflow as tf
-import paddle.v2 as paddle
DTYPE = tf.float32
diff --git a/benchmark/tensorflow/resnet.py b/benchmark/tensorflow/resnet.py
index c432fa8d59571e128b9ff9e3ffa1949b792ef3a4..fdb044195766b847e16a0cc33424a999c1d9166e 100644
--- a/benchmark/tensorflow/resnet.py
+++ b/benchmark/tensorflow/resnet.py
@@ -27,7 +27,6 @@ import argparse
import time
import numpy as np
-import paddle.v2 as paddle
import tensorflow as tf
DTYPE = tf.float32
diff --git a/benchmark/tensorflow/stacked_dynamic_lstm.py b/benchmark/tensorflow/stacked_dynamic_lstm.py
index 5285033005044d907d0b7e91eb66ee7281c4f27a..1f532dc2fa082ea0f6b1da560e1a57b96d2ef1bb 100644
--- a/benchmark/tensorflow/stacked_dynamic_lstm.py
+++ b/benchmark/tensorflow/stacked_dynamic_lstm.py
@@ -21,8 +21,6 @@ import argparse
import time
import tensorflow as tf
-import paddle.v2 as paddle
-
def parse_args():
parser = argparse.ArgumentParser("LSTM model benchmark.")
diff --git a/benchmark/tensorflow/vgg.py b/benchmark/tensorflow/vgg.py
index fba5ec71a46b3ac8b2e1244424c39fd5192e5458..d32c835bd7a7dafaafe0970fb6b422db3c866370 100644
--- a/benchmark/tensorflow/vgg.py
+++ b/benchmark/tensorflow/vgg.py
@@ -13,7 +13,6 @@
# limitations under the License.
"""VGG16 benchmark in TensorFlow"""
import tensorflow as tf
-import paddle.v2 as paddle
import numpy as np
import argparse
import time
diff --git a/cmake/configure.cmake b/cmake/configure.cmake
index b0f54bf49aafb65f1a92fa95877de2cc61fc67d3..93d74bb0a8f726ad31685cbfc7831b5441cd5108 100644
--- a/cmake/configure.cmake
+++ b/cmake/configure.cmake
@@ -20,31 +20,10 @@ if(WITH_DSO)
add_definitions(-DPADDLE_USE_DSO)
endif(WITH_DSO)
-if(WITH_DOUBLE)
- add_definitions(-DPADDLE_TYPE_DOUBLE)
-endif(WITH_DOUBLE)
-
-if(WITH_ARM_FP16)
- add_definitions(-DPADDLE_ARM_FP16)
- add_definitions("-march=armv8.2-a+fp16+simd")
-endif(WITH_ARM_FP16)
-
if(WITH_TESTING)
add_definitions(-DPADDLE_WITH_TESTING)
endif(WITH_TESTING)
-if(NOT WITH_TIMER)
- add_definitions(-DPADDLE_DISABLE_TIMER)
-endif(NOT WITH_TIMER)
-
-if(USE_EIGEN_FOR_BLAS)
- add_definitions(-DPADDLE_USE_EIGEN_FOR_BLAS)
-endif(USE_EIGEN_FOR_BLAS)
-
-if(EIGEN_USE_THREADS)
- add_definitions(-DEIGEN_USE_THREADS)
-endif(EIGEN_USE_THREADS)
-
if(NOT WITH_PROFILER)
add_definitions(-DPADDLE_DISABLE_PROFILER)
endif(NOT WITH_PROFILER)
@@ -78,10 +57,6 @@ if(WIN32)
endif(NOT MSVC)
endif(WIN32)
-if(NOT WITH_GOLANG)
- add_definitions(-DPADDLE_WITHOUT_GOLANG)
-endif(NOT WITH_GOLANG)
-
if(WITH_PSLIB)
add_definitions(-DPADDLE_WITH_PSLIB)
endif()
@@ -171,55 +146,6 @@ if(WITH_DISTRIBUTE)
add_definitions(-DPADDLE_WITH_DISTRIBUTE)
endif()
-if(WITH_GOLANG)
- # we need to symlink Paddle directory into GOPATH. If we
- # don't do it and we have code that depends on Paddle, go
- # get ./... will download a new Paddle repo from Github,
- # without the changes in our current Paddle repo that we
- # want to build.
- set(GOPATH "${CMAKE_CURRENT_BINARY_DIR}/go")
- file(MAKE_DIRECTORY ${GOPATH})
- set(PADDLE_IN_GOPATH "${GOPATH}/src/github.com/PaddlePaddle/Paddle")
- file(MAKE_DIRECTORY "${PADDLE_IN_GOPATH}")
- set(PADDLE_GO_PATH "${CMAKE_SOURCE_DIR}/go")
-
- add_custom_target(go_path)
- add_custom_command(TARGET go_path
- # Symlink Paddle directory into GOPATH
- COMMAND mkdir -p ${PADDLE_IN_GOPATH}
- COMMAND rm -rf ${PADDLE_IN_GOPATH}
- COMMAND ln -sf ${CMAKE_SOURCE_DIR} ${PADDLE_IN_GOPATH}
- # Automatically get all dependencies specified in the source code
- # We can't run `go get -d ./...` for every target, because
- # multiple `go get` can not run concurrently, but make need to be
- # able to run with multiple jobs.
- WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
- )
-
- if (GLIDE_INSTALL)
- if(EXISTS $ENV{GOPATH}/bin/glide)
- set(GLIDE "$ENV{GOPATH}/bin/glide")
- else()
- message(FATAL_ERROR "no glide executeble found: $ENV{GOPATH}/bin/glide")
- endif()
-
- # this command will only run when the file it depends is missing
- # or has changed, or the output is missing.
- add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/glide
- COMMAND env GOPATH=${GOPATH} ${GLIDE} install
- COMMAND touch ${CMAKE_BINARY_DIR}/glide
- DEPENDS ${PADDLE_SOURCE_DIR}/go/glide.lock
- WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go"
- )
-
- # depends on the custom command which outputs
- # ${CMAKE_BINARY_DIR}/glide, the custom command does not need to
- # run every time this target is built.
- add_custom_target(go_vendor DEPENDS ${CMAKE_BINARY_DIR}/glide go_path)
- endif()
-
-endif(WITH_GOLANG)
-
if(WITH_GRPC)
add_definitions(-DPADDLE_WITH_GRPC)
endif(WITH_GRPC)
diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake
index ef4192ecc98ea6de0c81c1f33320528d547b818a..735846db1db04e3884d72ec62d911d9a0efec147 100644
--- a/cmake/cuda.cmake
+++ b/cmake/cuda.cmake
@@ -168,10 +168,7 @@ elseif (${CUDA_VERSION} LESS 11.0) # CUDA 10.x
endif()
include_directories(${CUDA_INCLUDE_DIRS})
-list(APPEND EXTERNAL_LIBS ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY})
if(NOT WITH_DSO)
- # TODO(panyx0718): CUPTI only allows DSO?
- list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUPTI_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY} ${NCCL_LIBRARY})
if(WIN32)
set_property(GLOBAL PROPERTY CUDA_MODULES ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY})
endif(WIN32)
diff --git a/cmake/external/anakin.cmake b/cmake/external/anakin.cmake
index 06fc6061bc98eec8c4c71860333f7d3456952aeb..77f4b34537577c7c5782675e7af19c73bc9f8e32 100644
--- a/cmake/external/anakin.cmake
+++ b/cmake/external/anakin.cmake
@@ -74,5 +74,3 @@ add_dependencies(anakin_shared extern_anakin)
add_library(anakin_saber SHARED IMPORTED GLOBAL)
set_property(TARGET anakin_saber PROPERTY IMPORTED_LOCATION ${ANAKIN_SABER_LIB})
add_dependencies(anakin_saber extern_anakin)
-
-list(APPEND external_project_dependencies anakin_shared anakin_saber)
diff --git a/cmake/external/any.cmake b/cmake/external/any.cmake
deleted file mode 100644
index 85cce80b70a1fcf57015ac7a264e4950616b2717..0000000000000000000000000000000000000000
--- a/cmake/external/any.cmake
+++ /dev/null
@@ -1,31 +0,0 @@
-INCLUDE(ExternalProject)
-
-SET(ANY_SOURCE_DIR ${THIRD_PARTY_PATH}/any)
-
-INCLUDE_DIRECTORIES(${ANY_SOURCE_DIR}/src/extern_lib_any)
-
-ExternalProject_Add(
- extern_lib_any
- ${EXTERNAL_PROJECT_LOG_ARGS}
- GIT_REPOSITORY "https://github.com/PaddlePaddle/any.git"
- GIT_TAG "15595d8324be9e8a9a80d9ae442fdd12bd66df5d"
- PREFIX ${ANY_SOURCE_DIR}
- UPDATE_COMMAND ""
- CONFIGURE_COMMAND ""
- BUILD_COMMAND ""
- INSTALL_COMMAND ""
- TEST_COMMAND ""
-)
-
-if (${CMAKE_VERSION} VERSION_LESS "3.3.0")
- set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/lib_any_dummy.c)
- file(WRITE ${dummyfile} "const char * dummy_any = \"${dummyfile}\";")
- add_library(lib_any STATIC ${dummyfile})
-else()
- add_library(lib_any INTERFACE)
-endif()
-
-add_dependencies(lib_any extern_lib_any)
-
-add_definitions(-DANY_IMPL_ANY_CAST_MOVEABLE)
-LIST(APPEND external_project_dependencies lib_any)
diff --git a/cmake/external/boost.cmake b/cmake/external/boost.cmake
index 12412a51a0fd1aaa9702bd4547fb935d94012ada..fc204dc9193bb28b654936048dd61a9b461abb2f 100644
--- a/cmake/external/boost.cmake
+++ b/cmake/external/boost.cmake
@@ -57,5 +57,4 @@ else()
endif()
add_dependencies(boost ${BOOST_PROJECT})
-list(APPEND external_project_dependencies boost)
set(Boost_INCLUDE_DIR ${BOOST_INCLUDE_DIR})
diff --git a/cmake/external/brpc.cmake b/cmake/external/brpc.cmake
index 6b50cff7a66a33d9413627bfbc663cca06ba86f3..989d1dbd4cf593e779b94f7bb5eda613f000859c 100644
--- a/cmake/external/brpc.cmake
+++ b/cmake/external/brpc.cmake
@@ -69,5 +69,3 @@ SET_PROPERTY(TARGET brpc PROPERTY IMPORTED_LOCATION ${BRPC_LIBRARIES})
ADD_DEPENDENCIES(brpc extern_brpc)
add_definitions(-DBRPC_WITH_GLOG)
-
-LIST(APPEND external_project_dependencies brpc)
diff --git a/cmake/external/cub.cmake b/cmake/external/cub.cmake
index f06728de91e4509be661e56baef641d591928b66..41ad8207743201fbddd1d678fc5122afe68207ae 100644
--- a/cmake/external/cub.cmake
+++ b/cmake/external/cub.cmake
@@ -31,5 +31,3 @@ else()
endif()
add_dependencies(cub extern_cub)
-
-LIST(APPEND external_project_dependencies cub)
diff --git a/cmake/external/dlpack.cmake b/cmake/external/dlpack.cmake
index 4587475d7902a134eecd54bf8241fb96d175d0ba..63dd16b28e40a0c2d5310bec011c721285049952 100644
--- a/cmake/external/dlpack.cmake
+++ b/cmake/external/dlpack.cmake
@@ -27,5 +27,3 @@ else()
endif()
add_dependencies(dlpack extern_dlpack)
-
-LIST(APPEND external_project_dependencies dlpack)
diff --git a/cmake/external/eigen.cmake b/cmake/external/eigen.cmake
index 6aef97f21244efd09e22781f703553a19a9e1860..72441160f89d2c188d35fc6b08b5f0b6d746a1ad 100644
--- a/cmake/external/eigen.cmake
+++ b/cmake/external/eigen.cmake
@@ -52,5 +52,3 @@ else()
endif()
add_dependencies(eigen3 extern_eigen3)
-
-LIST(APPEND external_project_dependencies eigen3)
diff --git a/cmake/external/gflags.cmake b/cmake/external/gflags.cmake
index f3ca74faea3629ddce053c49ef1e629f230fdc49..911920ed6212b87aa25ba9a1faf7696fbcb22587 100644
--- a/cmake/external/gflags.cmake
+++ b/cmake/external/gflags.cmake
@@ -61,8 +61,6 @@ ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARIES})
ADD_DEPENDENCIES(gflags extern_gflags)
-LIST(APPEND external_project_dependencies gflags)
-
# On Windows (including MinGW), the Shlwapi library is used by gflags if available.
if (WIN32)
include(CheckIncludeFileCXX)
diff --git a/cmake/external/glog.cmake b/cmake/external/glog.cmake
index d3a4d69d3a05515fdf72074083470e19b4ec255c..7fa17ce6b7b106c47c486729d0136748c73176a7 100644
--- a/cmake/external/glog.cmake
+++ b/cmake/external/glog.cmake
@@ -72,5 +72,3 @@ ADD_LIBRARY(glog STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET glog PROPERTY IMPORTED_LOCATION ${GLOG_LIBRARIES})
ADD_DEPENDENCIES(glog extern_glog gflags)
LINK_LIBRARIES(glog gflags)
-
-LIST(APPEND external_project_dependencies glog)
diff --git a/cmake/external/gtest.cmake b/cmake/external/gtest.cmake
index 9be625b620287cd4c644ae6908000fd5eec5d5c7..e459526583bd5ee3c89807657f3c30376e57d971 100644
--- a/cmake/external/gtest.cmake
+++ b/cmake/external/gtest.cmake
@@ -79,5 +79,4 @@ IF(WITH_TESTING OR (WITH_DISTRIBUTE AND NOT WITH_GRPC))
SET_PROPERTY(TARGET gtest_main PROPERTY IMPORTED_LOCATION ${GTEST_MAIN_LIBRARIES})
ADD_DEPENDENCIES(gtest_main extern_gtest)
- LIST(APPEND external_project_dependencies gtest gtest_main)
ENDIF(WITH_TESTING OR (WITH_DISTRIBUTE AND NOT WITH_GRPC))
diff --git a/cmake/external/leveldb.cmake b/cmake/external/leveldb.cmake
index 0df61b01ab64c8b751bdc3893dd5294ad39ab928..ac0febd076e659927a6a882ff487c61ac130437a 100644
--- a/cmake/external/leveldb.cmake
+++ b/cmake/external/leveldb.cmake
@@ -39,6 +39,3 @@ ADD_DEPENDENCIES(extern_leveldb snappy)
ADD_LIBRARY(leveldb STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET leveldb PROPERTY IMPORTED_LOCATION ${LEVELDB_LIBRARIES})
ADD_DEPENDENCIES(leveldb extern_leveldb)
-
-LIST(APPEND external_project_dependencies leveldb)
-
diff --git a/cmake/external/libmct.cmake b/cmake/external/libmct.cmake
index 27cff8cfb6315c9b4fa5677ad9062bee73a0e5d8..b944f2945b7874ca76bf1a19e0a363f564851a62 100644
--- a/cmake/external/libmct.cmake
+++ b/cmake/external/libmct.cmake
@@ -72,7 +72,4 @@ else()
add_library(libmct INTERFACE)
endif()
-#ADD_LIBRARY(libmct SHARED IMPORTED GLOBAL)
ADD_DEPENDENCIES(libmct ${LIBMCT_PROJECT})
-LIST(APPEND external_project_dependencies libmct)
-
diff --git a/cmake/external/libxsmm.cmake b/cmake/external/libxsmm.cmake
index 39f49d210a20d49a06c120361ecf0a5d07d1af28..69cdba7c5921f14a87172d95791332e364045b26 100644
--- a/cmake/external/libxsmm.cmake
+++ b/cmake/external/libxsmm.cmake
@@ -53,5 +53,3 @@ MESSAGE(STATUS "Libxsmm library: ${LIBXSMM_LIBS}")
include_directories(${LIBXSMM_INCLUDE_DIR})
ADD_DEFINITIONS(-DPADDLE_WITH_LIBXSMM)
ADD_DEPENDENCIES(libxsmm extern_libxsmm)
-LIST(APPEND external_project_dependencies libxsmm)
-
diff --git a/cmake/external/mkldnn.cmake b/cmake/external/mkldnn.cmake
index 92fe76d05c7507c295b784bc37870abfc31a0a29..94a266c50114a94d125467d55a6367a6999e3298 100644
--- a/cmake/external/mkldnn.cmake
+++ b/cmake/external/mkldnn.cmake
@@ -89,7 +89,6 @@ SET_PROPERTY(TARGET shared_mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIB})
ADD_DEPENDENCIES(shared_mkldnn ${MKLDNN_PROJECT})
MESSAGE(STATUS "MKLDNN library: ${MKLDNN_LIB}")
add_definitions(-DPADDLE_WITH_MKLDNN)
-LIST(APPEND external_project_dependencies shared_mkldnn)
# generate a static dummy target to track mkldnn dependencies
# for cc_library(xxx SRCS xxx.c DEPS mkldnn)
diff --git a/cmake/external/mklml.cmake b/cmake/external/mklml.cmake
index 2caff27357687018f29c1efc55b7b82c9dc3ccf6..54826cedb871690a82b535ae3ed102600277c622 100644
--- a/cmake/external/mklml.cmake
+++ b/cmake/external/mklml.cmake
@@ -73,4 +73,3 @@ INCLUDE_DIRECTORIES(${MKLML_INC_DIR})
ADD_LIBRARY(mklml SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET mklml PROPERTY IMPORTED_LOCATION ${MKLML_LIB})
ADD_DEPENDENCIES(mklml ${MKLML_PROJECT})
-LIST(APPEND external_project_dependencies mklml)
diff --git a/cmake/external/ngraph.cmake b/cmake/external/ngraph.cmake
index 14af98b2d74d4aa955aac27727e05567788a84c9..5812a61f0ddc3a3233ff212710fc1b16aa140724 100644
--- a/cmake/external/ngraph.cmake
+++ b/cmake/external/ngraph.cmake
@@ -77,4 +77,3 @@ add_dependencies(ngraph ${NGRAPH_PROJECT})
target_compile_definitions(ngraph INTERFACE -DPADDLE_WITH_NGRAPH)
target_include_directories(ngraph INTERFACE ${NGRAPH_INC_DIR})
target_link_libraries(ngraph INTERFACE ${NGRAPH_SHARED_LIB})
-LIST(APPEND external_project_dependencies ngraph)
diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake
index b347a592929836a473ac764c0af1153b07d54258..d8a4a0be6f5aaa3a1a4977bbc68348743f2fa742 100644
--- a/cmake/external/openblas.cmake
+++ b/cmake/external/openblas.cmake
@@ -11,11 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
-IF(USE_EIGEN_FOR_BLAS)
- return()
-ENDIF(USE_EIGEN_FOR_BLAS)
-
INCLUDE(cblas)
IF(NOT ${CBLAS_FOUND})
@@ -91,7 +86,6 @@ ENDIF()
IF(NOT ${CBLAS_FOUND})
ADD_DEPENDENCIES(cblas extern_openblas)
- LIST(APPEND external_project_dependencies cblas)
ELSE()
IF("${CBLAS_PROVIDER}" STREQUAL "MKLML")
ADD_DEPENDENCIES(cblas mklml)
diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake
index e05b7694ddf1e1652b00f156cde1a2d433c9fc46..bc7fe5454f5883108e43b4ca47920995dc13a1ff 100644
--- a/cmake/external/protobuf.cmake
+++ b/cmake/external/protobuf.cmake
@@ -129,7 +129,6 @@ macro(PROMPT_PROTOBUF_LIB)
ADD_DEPENDENCIES(protoc ${dep})
ENDFOREACH()
- LIST(APPEND external_project_dependencies protobuf)
RETURN()
endmacro()
macro(SET_PROTOBUF_VERSION)
@@ -231,7 +230,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
)
ENDFUNCTION()
-SET(PROTOBUF_VERSION 3.1)
+SET(PROTOBUF_VERSION 3.1.0)
IF(NOT PROTOBUF_FOUND)
build_protobuf(extern_protobuf FALSE)
diff --git a/cmake/external/pslib.cmake b/cmake/external/pslib.cmake
index b4ea268e5a48e29d00b0ec8b957b61a42553ec7e..0287e5cf2a835ed65c5fc26ff69d2653d5db217e 100644
--- a/cmake/external/pslib.cmake
+++ b/cmake/external/pslib.cmake
@@ -70,4 +70,3 @@ ExternalProject_Add(
ADD_LIBRARY(pslib SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET pslib PROPERTY IMPORTED_LOCATION ${PSLIB_LIB})
ADD_DEPENDENCIES(pslib ${PSLIB_PROJECT})
-LIST(APPEND external_project_dependencies pslib)
diff --git a/cmake/external/pslib_brpc.cmake b/cmake/external/pslib_brpc.cmake
index 8b43f2ef5c999fc351543ba958c7cc4b0856625d..22c8c1b463764b6e107c5f3da25d51b36c2ce59f 100644
--- a/cmake/external/pslib_brpc.cmake
+++ b/cmake/external/pslib_brpc.cmake
@@ -70,4 +70,3 @@ ExternalProject_Add(
ADD_LIBRARY(pslib_brpc SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET pslib_brpc PROPERTY IMPORTED_LOCATION ${PSLIB_BRPC_LIB})
ADD_DEPENDENCIES(pslib_brpc ${PSLIB_BRPC_PROJECT})
-LIST(APPEND external_project_dependencies pslib_brpc)
diff --git a/cmake/external/threadpool.cmake b/cmake/external/threadpool.cmake
index 0159815fed81bdff6de3e561af569e9edc75f947..1f56bc7ab056ef0dd95d603ebe3461ef044b2a79 100644
--- a/cmake/external/threadpool.cmake
+++ b/cmake/external/threadpool.cmake
@@ -26,5 +26,3 @@ else()
endif()
add_dependencies(simple_threadpool extern_threadpool)
-
-LIST(APPEND external_project_dependencies simple_threadpool)
diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake
index 7a25aaf15f2c7f46d99394d82d69bc24e4f5cb2c..6f2af8670f25c00ac0970fe4ae2b0c5b03aa0d9e 100644
--- a/cmake/external/warpctc.cmake
+++ b/cmake/external/warpctc.cmake
@@ -83,5 +83,3 @@ INCLUDE_DIRECTORIES(${THIRD_PARTY_PATH}/install) # For Paddle code to include wa
ADD_LIBRARY(warpctc SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET warpctc PROPERTY IMPORTED_LOCATION ${WARPCTC_LIBRARIES})
ADD_DEPENDENCIES(warpctc extern_warpctc)
-
-LIST(APPEND external_project_dependencies warpctc)
diff --git a/cmake/external/xbyak.cmake b/cmake/external/xbyak.cmake
index 384c2f9328296ce6a8a6293be6cc47e5063dd3c4..1d61154c0d45dea795902d6544deb796693db263 100644
--- a/cmake/external/xbyak.cmake
+++ b/cmake/external/xbyak.cmake
@@ -55,4 +55,3 @@ else()
endif()
add_dependencies(xbyak ${XBYAK_PROJECT})
-list(APPEND external_project_dependencies xbyak)
diff --git a/cmake/external/xxhash.cmake b/cmake/external/xxhash.cmake
index a0f300c2e8bab9e7402f869eed1b4c2d1c579aab..23b1e02108642df561948a6faa3152effb7ca932 100644
--- a/cmake/external/xxhash.cmake
+++ b/cmake/external/xxhash.cmake
@@ -71,5 +71,3 @@ add_library(xxhash STATIC IMPORTED GLOBAL)
set_property(TARGET xxhash PROPERTY IMPORTED_LOCATION ${XXHASH_LIBRARIES})
include_directories(${XXHASH_INCLUDE_DIR})
add_dependencies(xxhash extern_xxhash)
-
-LIST(APPEND external_project_dependencies xxhash)
diff --git a/cmake/external/zlib.cmake b/cmake/external/zlib.cmake
index 6c8d79c25e6a2655711fe4450e65600c9a584015..5569fefe992d10ad4820e51e677f40271d0214e7 100644
--- a/cmake/external/zlib.cmake
+++ b/cmake/external/zlib.cmake
@@ -57,5 +57,3 @@ ENDIF(WIN32)
ADD_LIBRARY(zlib STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET zlib PROPERTY IMPORTED_LOCATION ${ZLIB_LIBRARIES})
ADD_DEPENDENCIES(zlib extern_zlib)
-
-LIST(APPEND external_project_dependencies zlib)
diff --git a/cmake/flags.cmake b/cmake/flags.cmake
index 81e7868a6ad3fee16911a49ff9d1394a103706c5..36b533aa4f7815896fb48c33fefad892b8d0d29c 100644
--- a/cmake/flags.cmake
+++ b/cmake/flags.cmake
@@ -21,7 +21,7 @@ function(CheckCompilerCXX11Flag)
if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.3)
message(FATAL_ERROR "Unsupported Clang version. Clang >= 3.3 required.")
endif()
- endif()
+ endif()
endif()
endfunction()
@@ -147,6 +147,7 @@ set(GPU_COMMON_FLAGS
-Wno-error=unused-function # Warnings in Numpy Header.
-Wno-error=array-bounds # Warnings in Eigen::array
)
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64")
endif(NOT WIN32)
if (APPLE)
diff --git a/cmake/hip.cmake b/cmake/hip.cmake
index 4276bc5b08cd88a52bb5782bca87fc37deabd830..c3a748db502037f926dc241e4c3bc26a83ad3468 100644
--- a/cmake/hip.cmake
+++ b/cmake/hip.cmake
@@ -11,8 +11,6 @@ include_directories("/opt/rocm/rocrand/include")
include_directories("/opt/rocm/rccl/include")
include_directories("/opt/rocm/thrust")
-list(APPEND EXTERNAL_LIBS "-L/opt/rocm/lib/ -lhip_hcc")
-
set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -fPIC -DPADDLE_WITH_HIP -std=c++11" )
if(WITH_DSO)
@@ -31,22 +29,12 @@ if(WITH_GRPC)
set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -DPADDLE_WITH_GRPC")
endif(WITH_GRPC)
-if(NOT WITH_GOLANG)
- set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -DPADDLE_WITHOUT_GOLANG")
-endif(NOT WITH_GOLANG)
-
if(WITH_MKLDNN)
set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -DPADDLE_WITH_MKLDNN")
endif(WITH_MKLDNN)
set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -DANY_IMPL_ANY_CAST_MOVEABLE")
-if(NOT WITH_RDMA)
- set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -DPADDLE_DISABLE_RDMA")
-endif(NOT WITH_RDMA)
-
-
-
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
list(APPEND HIP_HCC_FLAGS ${CMAKE_CXX_FLAGS_DEBUG})
elseif(CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo")
diff --git a/cmake/rdma.cmake b/cmake/rdma.cmake
deleted file mode 100644
index b698f3bdc3ff586a72badee3e0109e29285b457f..0000000000000000000000000000000000000000
--- a/cmake/rdma.cmake
+++ /dev/null
@@ -1,82 +0,0 @@
-# user should download rdma first from subversion repository
-
-# execute following instruction to download svn mannally
-# svn co https://svn.baidu.com/sys/ip/trunk/rdma/sockrdmav1 rdma/
-# svn co https://svn.baidu.com/sys/ip/trunk/rdma/thirdparty rdma/
-# we use static output in svn repositories to avoid implict bugs from not standard runtime env.
-
-if(WITH_RDMA)
- set(RDMA_ROOT $ENV{RDMA_ROOT} CACHE PATH "Folder contains RDMA sock library and thirdparty library")
-
- function(generate_rdma_links)
- #redirect to current DIR to isolate the pollution from system runtime environment
- #it can benifits unified control for different gcc environment.
- #e.g, by default gcc48 did not refer /usr/lib64 which could contain low version
- #runtime libraries that will crash process while loading it. That redirect trick
- #can fix it.
- execute_process(
- COMMAND mkdir -p librdma
- COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so.1
- COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so
- COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so.1
- COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so
- COMMAND ln -s -f /lib64/libnl.so.1.1.4 librdma/libnl.so.1
- COMMAND ln -s -f /lib64/libnl.so.1.1.4 librdma/libnl.so
- WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
- )
- endfunction(generate_rdma_links)
-
- #check and set headers
- find_path(RDMA_INC_SXISOCK sxi_sock.h PATHS ${RDMA_ROOT}/sockrdmav1/output/include)
- find_path(RDMA_INC_XIO libxio.h PATHS ${RDMA_ROOT}/thirdparty/output/accelio)
- find_path(RDMA_INC_EVENT event2 PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
- find_path(RDMA_INC_NUMA numa.h PATHS ${RDMA_ROOT}/thirdparty/output/libnuma)
-
- #check and set libs
- find_library(RDMA_LIB_SXISOCK NAMES sxisock PATHS ${RDMA_ROOT}/sockrdmav1/output)
- find_library(RDMA_LIB_XIO NAMES xio PATHS ${RDMA_ROOT}/thirdparty/output/accelio)
- find_library(RDMA_LIB_EVENT NAMES event PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
- find_library(RDMA_LIB_EVENT_CORE NAMES event_core PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
- find_library(RDMA_LIB_EVENT_EXTRA NAMES event_extra PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
- find_library(RDMA_LIB_EVENT_PTHREADS NAMES event_pthreads PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
- find_library(RDMA_LIB_NUMA NAMES numa PATHS ${RDMA_ROOT}/thirdparty/output/libnuma)
-
- if(
- RDMA_INC_SXISOCK AND
- RDMA_INC_XIO AND
- RDMA_INC_EVENT AND
- RDMA_INC_NUMA AND
- RDMA_LIB_SXISOCK AND
- RDMA_LIB_XIO AND
- RDMA_LIB_EVENT AND
- RDMA_LIB_EVENT_CORE AND
- RDMA_LIB_EVENT_EXTRA AND
- RDMA_LIB_EVENT_PTHREADS AND
- RDMA_LIB_NUMA
- )
-
- set(RDMA_INC_DIR
- ${RDMA_INC_SXISOCK}
- ${RDMA_INC_XIO}
- ${RDMA_INC_EVENT}
- ${RDMA_INC_NUMA})
- set(RDMA_LIBS
- ${RDMA_LIB_SXISOCK}
- ${RDMA_LIB_XIO}
- ${RDMA_LIB_EVENT}
- ${RDMA_LIB_EVENT_CORE}
- ${RDMA_LIB_EVENT_EXTRA}
- ${RDMA_LIB_EVENT_PTHREADS}
- ${RDMA_LIB_NUMA}
- )
- set(RDMA_LD_FLAGS "-L./librdma -libverbs -lrdmacm -Xlinker -rpath ./librdma")
- include_directories("${RDMA_INC_DIR}")
- else()
- #if this module is not called, RDMA_INC_DIR RDMA_LIBS will be null, so top module always refer this variable
- message(FATAL_ERROR, "RDMA libraries are not found, try to set RDMA_ROOT or check all related libraries.")
- endif()
-else(WITH_RDMA)
- set(RDMA_LIBS "")
- set(RDMA_LD_FLAGS "")
- add_definitions(-DPADDLE_DISABLE_RDMA)
-endif(WITH_RDMA)
diff --git a/cmake/tensorrt.cmake b/cmake/tensorrt.cmake
index 3dc7171551bfb7aff8d1e75083c98b00378d247f..891ff222633741f9894c2fdb6c0096a48f8a35e1 100644
--- a/cmake/tensorrt.cmake
+++ b/cmake/tensorrt.cmake
@@ -33,6 +33,5 @@ if(TENSORRT_FOUND)
message(STATUS "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. "
"Current TensorRT version is v${TENSORRT_MAJOR_VERSION}. ")
include_directories(${TENSORRT_INCLUDE_DIR})
- list(APPEND EXTERNAL_LIBS ${TENSORRT_LIBRARY})
add_definitions(-DPADDLE_WITH_TENSORRT)
endif()
diff --git a/paddle/contrib/float16/run_float16_demo.sh b/paddle/contrib/float16/run_float16_demo.sh
index 031225a85dabb26e5d9ea06f58909c049e7f0c08..34cb7a12db171915f2bc7df8787dd62cd381de68 100755
--- a/paddle/contrib/float16/run_float16_demo.sh
+++ b/paddle/contrib/float16/run_float16_demo.sh
@@ -14,9 +14,7 @@ cmake .. -DWITH_AVX=OFF \
-DWITH_MKL=OFF \
-DWITH_GPU=ON \
-DWITH_TESTING=ON \
- -DWITH_TIMER=ON \
-DWITH_PROFILER=ON \
- -DWITH_FLUID_ONLY=ON
make -j `nproc`
pip install -U "$WHEEL_PATH/$(ls $WHEEL_PATH)"
diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec
index f50a38842a21c795c979f859e88a9b16c3e54bd8..2544b7308c20daedd63e5b8866f3ee4fb0b71f36 100644
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@@ -8,13 +8,13 @@ paddle.fluid.Program.parse_from_string ArgSpec(args=['binary_str'], varargs=None
paddle.fluid.Program.to_string ArgSpec(args=['self', 'throw_on_error', 'with_details'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.default_startup_program ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
paddle.fluid.default_main_program ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
-paddle.fluid.program_guard ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
-paddle.fluid.name_scope ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
+paddle.fluid.program_guard ArgSpec(args=['main_program', 'startup_program'], varargs=None, keywords=None, defaults=(None,))
+paddle.fluid.name_scope ArgSpec(args=['prefix'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.Executor.__init__ ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Executor.close ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Executor.run ArgSpec(args=['self', 'program', 'feed', 'fetch_list', 'feed_var_name', 'fetch_var_name', 'scope', 'return_numpy', 'use_program_cache'], varargs=None, keywords=None, defaults=(None, None, None, 'feed', 'fetch', None, True, False))
paddle.fluid.global_scope ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
-paddle.fluid.scope_guard ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
+paddle.fluid.scope_guard ArgSpec(args=['scope'], varargs=None, keywords=None, defaults=None)
paddle.fluid.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
@@ -66,12 +66,12 @@ paddle.fluid.initializer.XavierInitializer.__init__ ArgSpec(args=['self', 'unifo
paddle.fluid.initializer.BilinearInitializer.__init__ ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.initializer.MSRAInitializer.__init__ ArgSpec(args=['self', 'uniform', 'fan_in', 'seed'], varargs=None, keywords=None, defaults=(True, None, 0))
paddle.fluid.initializer.force_init_on_cpu ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
-paddle.fluid.initializer.init_on_cpu ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
+paddle.fluid.initializer.init_on_cpu ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
paddle.fluid.initializer.NumpyArrayInitializer.__init__ ArgSpec(args=['self', 'value'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.fc ArgSpec(args=['input', 'size', 'num_flatten_dims', 'param_attr', 'bias_attr', 'act', 'is_test', 'name'], varargs=None, keywords=None, defaults=(1, None, None, None, False, None))
paddle.fluid.layers.embedding ArgSpec(args=['input', 'size', 'is_sparse', 'is_distributed', 'padding_idx', 'param_attr', 'dtype'], varargs=None, keywords=None, defaults=(False, False, None, None, 'float32'))
paddle.fluid.layers.dynamic_lstm ArgSpec(args=['input', 'size', 'h_0', 'c_0', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, None, None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'float32', None))
-paddle.fluid.layers.dynamic_lstmp ArgSpec(args=['input', 'size', 'proj_size', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'proj_activation', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'tanh', 'float32', None))
+paddle.fluid.layers.dynamic_lstmp ArgSpec(args=['input', 'size', 'proj_size', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'proj_activation', 'dtype', 'name', 'h_0', 'c_0', 'cell_clip', 'proj_clip'], varargs=None, keywords=None, defaults=(None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'tanh', 'float32', None, None, None, None, None))
paddle.fluid.layers.dynamic_gru ArgSpec(args=['input', 'size', 'param_attr', 'bias_attr', 'is_reverse', 'gate_activation', 'candidate_activation', 'h_0', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, False, 'sigmoid', 'tanh', None, False))
paddle.fluid.layers.gru_unit ArgSpec(args=['input', 'hidden', 'size', 'param_attr', 'bias_attr', 'activation', 'gate_activation', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, 'tanh', 'sigmoid', False))
paddle.fluid.layers.linear_chain_crf ArgSpec(args=['input', 'label', 'param_attr'], varargs=None, keywords=None, defaults=(None,))
@@ -121,6 +121,7 @@ paddle.fluid.layers.sequence_reshape ArgSpec(args=['input', 'new_dim'], varargs=
paddle.fluid.layers.transpose ArgSpec(args=['x', 'perm', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.im2sequence ArgSpec(args=['input', 'filter_size', 'stride', 'padding', 'input_image_size', 'out_stride', 'name'], varargs=None, keywords=None, defaults=(1, 1, 0, None, 1, None))
paddle.fluid.layers.nce ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name', 'sampler', 'custom_dist', 'seed', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 'uniform', None, 0, False))
+paddle.fluid.layers.sampled_softmax_with_cross_entropy ArgSpec(args=['logits', 'label', 'num_samples', 'num_true', 'remove_accidental_hits', 'use_customized_samples', 'customized_samples', 'customized_probabilities', 'seed'], varargs=None, keywords=None, defaults=(1, True, False, None, None, 0))
paddle.fluid.layers.hsigmoid ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name', 'path_table', 'path_code', 'is_custom', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, False, False))
paddle.fluid.layers.beam_search ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'is_accumulated', 'name', 'return_parent_idx'], varargs=None, keywords=None, defaults=(0, True, None, False))
paddle.fluid.layers.row_conv ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None))
@@ -229,7 +230,7 @@ paddle.fluid.layers.random_data_generator ArgSpec(args=['low', 'high', 'shapes',
paddle.fluid.layers.py_reader ArgSpec(args=['capacity', 'shapes', 'dtypes', 'lod_levels', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, None, True))
paddle.fluid.layers.create_py_reader_by_data ArgSpec(args=['capacity', 'feed_list', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, True))
paddle.fluid.layers.Preprocessor.__init__ ArgSpec(args=['self', 'reader', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.Preprocessor.block ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
+paddle.fluid.layers.Preprocessor.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.Preprocessor.inputs ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.Preprocessor.outputs ArgSpec(args=['self'], varargs='outs', keywords=None, defaults=None)
paddle.fluid.layers.load ArgSpec(args=['out', 'file_path', 'load_as_fp16'], varargs=None, keywords=None, defaults=(None,))
@@ -261,7 +262,7 @@ paddle.fluid.layers.increment ArgSpec(args=['x', 'value', 'in_place'], varargs=N
paddle.fluid.layers.array_write ArgSpec(args=['x', 'i', 'array'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.create_array ArgSpec(args=['dtype'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.less_than ArgSpec(args=['x', 'y', 'force_cpu', 'cond'], varargs=None, keywords='ignored', defaults=(None, None))
-paddle.fluid.layers.equal ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords='ignored', defaults=(None,))
+paddle.fluid.layers.equal ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.array_read ArgSpec(args=['array', 'i'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.array_length ArgSpec(args=['array'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.IfElse.__init__ ArgSpec(args=['self', 'cond', 'name'], varargs=None, keywords=None, defaults=(None,))
@@ -270,7 +271,7 @@ paddle.fluid.layers.IfElse.input ArgSpec(args=['self', 'x'], varargs=None, keywo
paddle.fluid.layers.IfElse.output ArgSpec(args=['self'], varargs='outs', keywords=None, defaults=None)
paddle.fluid.layers.IfElse.true_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.DynamicRNN.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.DynamicRNN.block ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
+paddle.fluid.layers.DynamicRNN.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.DynamicRNN.memory ArgSpec(args=['self', 'init', 'shape', 'value', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, False, 'float32'))
paddle.fluid.layers.DynamicRNN.output ArgSpec(args=['self'], varargs='outputs', keywords=None, defaults=None)
paddle.fluid.layers.DynamicRNN.static_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None)
@@ -303,7 +304,7 @@ paddle.fluid.layers.reciprocal ArgSpec(args=['x', 'name'], varargs=None, keyword
paddle.fluid.layers.square ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.softplus ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.softsign ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.uniform_random ArgSpec(args=['shape', 'dtype', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=(None, None, None, None))
+paddle.fluid.layers.uniform_random ArgSpec(args=['shape', 'dtype', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=('float32', -1.0, 1.0, 0))
paddle.fluid.layers.hard_shrink ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.cumsum ArgSpec(args=['x', 'axis', 'exclusive', 'reverse'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.layers.thresholded_relu ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,))
@@ -346,12 +347,12 @@ paddle.fluid.contrib.StateCell.set_state ArgSpec(args=['self', 'state_name', 'st
paddle.fluid.contrib.StateCell.state_updater ArgSpec(args=['self', 'updater'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.StateCell.update_states ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.TrainingDecoder.__init__ ArgSpec(args=['self', 'state_cell', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.contrib.TrainingDecoder.block ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
+paddle.fluid.contrib.TrainingDecoder.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.TrainingDecoder.output ArgSpec(args=['self'], varargs='outputs', keywords=None, defaults=None)
paddle.fluid.contrib.TrainingDecoder.static_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.TrainingDecoder.step_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.BeamSearchDecoder.__init__ ArgSpec(args=['self', 'state_cell', 'init_ids', 'init_scores', 'target_dict_dim', 'word_dim', 'input_var_dict', 'topk_size', 'sparse_emb', 'max_len', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=({}, 50, True, 100, 1, 1, None))
-paddle.fluid.contrib.BeamSearchDecoder.block ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
+paddle.fluid.contrib.BeamSearchDecoder.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.BeamSearchDecoder.decode ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.BeamSearchDecoder.early_stop ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.BeamSearchDecoder.read_array ArgSpec(args=['self', 'init', 'is_ids', 'is_scores'], varargs=None, keywords=None, defaults=(False, False))
@@ -427,7 +428,7 @@ paddle.fluid.optimizer.MomentumOptimizer.__init__ ArgSpec(args=['self', 'learnin
paddle.fluid.optimizer.MomentumOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.MomentumOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.optimizer.MomentumOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
-paddle.fluid.optimizer.AdagradOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1e-06, None, None))
+paddle.fluid.optimizer.AdagradOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'epsilon', 'regularization', 'name', 'initial_accumulator_value'], varargs=None, keywords=None, defaults=(1e-06, None, None, 0.0))
paddle.fluid.optimizer.AdagradOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.AdagradOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.optimizer.AdagradOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
@@ -456,7 +457,7 @@ paddle.fluid.optimizer.AdadeltaOptimizer.apply_gradients ArgSpec(args=['self', '
paddle.fluid.optimizer.AdadeltaOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.optimizer.AdadeltaOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.ModelAverage.__init__ ArgSpec(args=['self', 'average_window_rate', 'min_average_window', 'max_average_window', 'regularization', 'name'], varargs=None, keywords=None, defaults=(10000, 10000, None, None))
-paddle.fluid.optimizer.ModelAverage.apply ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
+paddle.fluid.optimizer.ModelAverage.apply ArgSpec(args=['self', 'executor', 'need_restore'], varargs=None, keywords=None, defaults=(True,))
paddle.fluid.optimizer.ModelAverage.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.ModelAverage.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.optimizer.ModelAverage.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
@@ -473,11 +474,11 @@ paddle.fluid.LoDTensor.has_valid_recursive_sequence_lengths has_valid_recursive_
paddle.fluid.LoDTensor.lod lod(self: paddle.fluid.core.LoDTensor) -> List[List[int]]
paddle.fluid.LoDTensor.recursive_sequence_lengths recursive_sequence_lengths(self: paddle.fluid.core.LoDTensor) -> List[List[int]]
paddle.fluid.LoDTensor.set 1. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float32], arg1: paddle::platform::CPUPlace) -> None 2. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int32], arg1: paddle::platform::CPUPlace) -> None 3. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float64], arg1: paddle::platform::CPUPlace) -> None 4. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int64], arg1: paddle::platform::CPUPlace) -> None 5. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[bool], arg1: paddle::platform::CPUPlace) -> None 6. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint16], arg1: paddle::platform::CPUPlace) -> None 7. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint8], arg1: paddle::platform::CPUPlace) -> None 8. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int8], arg1: paddle::platform::CPUPlace) -> None 9. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float32], arg1: paddle::platform::CUDAPlace) -> None 10. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int32], arg1: paddle::platform::CUDAPlace) -> None 11. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float64], arg1: paddle::platform::CUDAPlace) -> None 12. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int64], arg1: paddle::platform::CUDAPlace) -> None 13. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[bool], arg1: paddle::platform::CUDAPlace) -> None 14. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint16], arg1: paddle::platform::CUDAPlace) -> None 15. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint8], arg1: paddle::platform::CUDAPlace) -> None 16. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int8], arg1: paddle::platform::CUDAPlace) -> None 17. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float32], arg1: paddle::platform::CUDAPinnedPlace) -> None 18. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int32], arg1: paddle::platform::CUDAPinnedPlace) -> None 19. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float64], arg1: paddle::platform::CUDAPinnedPlace) -> None 20. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int64], arg1: paddle::platform::CUDAPinnedPlace) -> None 21. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[bool], arg1: paddle::platform::CUDAPinnedPlace) -> None 22. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint16], arg1: paddle::platform::CUDAPinnedPlace) -> None 23. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint8], arg1: paddle::platform::CUDAPinnedPlace) -> None 24. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int8], arg1: paddle::platform::CUDAPinnedPlace) -> None
-paddle.fluid.LoDTensor.set_lod set_lod(self: paddle.fluid.core.LoDTensor, arg0: List[List[int]]) -> None
-paddle.fluid.LoDTensor.set_recursive_sequence_lengths set_recursive_sequence_lengths(self: paddle.fluid.core.LoDTensor, arg0: List[List[int]]) -> None
+paddle.fluid.LoDTensor.set_lod set_lod(self: paddle.fluid.core.LoDTensor, lod: List[List[int]]) -> None
+paddle.fluid.LoDTensor.set_recursive_sequence_lengths set_recursive_sequence_lengths(self: paddle.fluid.core.LoDTensor, recursive_sequence_lengths: List[List[int]]) -> None
paddle.fluid.LoDTensor.shape shape(self: paddle.fluid.core.Tensor) -> List[int]
paddle.fluid.LoDTensorArray.__init__ __init__(self: paddle.fluid.core.LoDTensorArray) -> None
-paddle.fluid.LoDTensorArray.append append(self: paddle.fluid.core.LoDTensorArray, arg0: paddle.fluid.core.LoDTensor) -> None
+paddle.fluid.LoDTensorArray.append append(self: paddle.fluid.core.LoDTensorArray, tensor: paddle.fluid.core.LoDTensor) -> None
paddle.fluid.CPUPlace.__init__ __init__(self: paddle.fluid.core.CPUPlace) -> None
paddle.fluid.CUDAPlace.__init__ __init__(self: paddle.fluid.core.CUDAPlace, arg0: int) -> None
paddle.fluid.CUDAPinnedPlace.__init__ __init__(self: paddle.fluid.core.CUDAPinnedPlace) -> None
@@ -491,14 +492,14 @@ paddle.fluid.clip.ErrorClipByValue.__init__ ArgSpec(args=['self', 'max', 'min'],
paddle.fluid.clip.GradientClipByValue.__init__ ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.clip.GradientClipByNorm.__init__ ArgSpec(args=['self', 'clip_norm'], varargs=None, keywords=None, defaults=None)
paddle.fluid.clip.GradientClipByGlobalNorm.__init__ ArgSpec(args=['self', 'clip_norm', 'group_name'], varargs=None, keywords=None, defaults=('default_group',))
-paddle.fluid.profiler.cuda_profiler ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
+paddle.fluid.profiler.cuda_profiler ArgSpec(args=['output_file', 'output_mode', 'config'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.profiler.reset_profiler ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
-paddle.fluid.profiler.profiler ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
+paddle.fluid.profiler.profiler ArgSpec(args=['state', 'sorted_key', 'profile_path'], varargs=None, keywords=None, defaults=(None, '/tmp/profile'))
paddle.fluid.profiler.start_profiler ArgSpec(args=['state'], varargs=None, keywords=None, defaults=None)
paddle.fluid.profiler.stop_profiler ArgSpec(args=['sorted_key', 'profile_path'], varargs=None, keywords=None, defaults=(None, '/tmp/profile'))
paddle.fluid.unique_name.generate ArgSpec(args=['key'], varargs=None, keywords=None, defaults=None)
paddle.fluid.unique_name.switch ArgSpec(args=['new_generator'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.unique_name.guard ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
+paddle.fluid.unique_name.guard ArgSpec(args=['new_generator'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.recordio_writer.convert_reader_to_recordio_file ArgSpec(args=['filename', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None))
paddle.fluid.recordio_writer.convert_reader_to_recordio_files ArgSpec(args=['filename', 'batch_per_file', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None))
paddle.fluid.Scope Scope() -> paddle.fluid.core._Scope
diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt
index 910318a49cea50fadd29b1427a4591abfa5d5a23..7ddf1ab44fe096739f4d241994e5cb686970a7c5 100644
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@@ -158,18 +158,19 @@ cc_library(variable_helper SRCS variable_helper.cc DEPS lod_tensor)
cc_library(naive_executor SRCS naive_executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass variable_helper)
-if(WITH_DISTRIBUTE)
- cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog
- lod_rank_table feed_fetch_method sendrecvop_rpc ${GLOB_DISTRIBUTE_DEPS} graph_to_program_pass variable_helper)
+if(WITH_NGRAPH)
+ set(NGRAPH_EXE_DEPS ngraph_engine)
+else()
+ set(NGRAPH_EXE_DEPS)
+endif()
- set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
- set_source_files_properties(executor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
+if(WITH_DISTRIBUTE)
+ cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog
+ lod_rank_table feed_fetch_method sendrecvop_rpc ${GLOB_DISTRIBUTE_DEPS} graph_to_program_pass variable_helper ${NGRAPH_EXE_DEPS})
+ set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
+ set_source_files_properties(executor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
else()
- if (WITH_NGRAPH)
- cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass variable_helper ngraph_engine)
- else ()
- cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass variable_helper)
- endif()
+ cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass variable_helper ${NGRAPH_EXE_DEPS})
cc_test(test_naive_executor SRCS naive_executor_test.cc DEPS naive_executor elementwise_add_op)
endif()
diff --git a/paddle/fluid/framework/async_executor.cc b/paddle/fluid/framework/async_executor.cc
index 1d9678a1ba1409e5c18d3e25b3aa13dfbbf76908..60708bf609d6f8b327d46fe585cbbcf07a62eece 100644
--- a/paddle/fluid/framework/async_executor.cc
+++ b/paddle/fluid/framework/async_executor.cc
@@ -244,6 +244,7 @@ void AsyncExecutor::RunFromFile(const ProgramDesc& main_program,
auto& block = main_program.Block(0);
for (auto var_name : fetch_var_names) {
auto var_desc = block.FindVar(var_name);
+ PADDLE_ENFORCE_NOT_NULL(var_desc, "%s is not found.", var_name);
auto shapes = var_desc->GetShape();
PADDLE_ENFORCE(shapes[shapes.size() - 1] == 1,
"var %s: Fetched var has wrong shape, "
diff --git a/paddle/fluid/framework/data_layout_transform.cc b/paddle/fluid/framework/data_layout_transform.cc
index 72c50518af08b9c1b2f97e6864e5836e806c77fc..10aa7a59422f4508dda8d0bcd960583056e25938 100644
--- a/paddle/fluid/framework/data_layout_transform.cc
+++ b/paddle/fluid/framework/data_layout_transform.cc
@@ -134,11 +134,6 @@ void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var,
out_layout =
out_layout == DataLayout::kAnyLayout ? DataLayout::kNCHW : out_layout;
- auto& pool = platform::DeviceContextPool::Instance();
- auto* dev_ctx = dynamic_cast(
- pool.Get(expected_kernel_type.place_));
- auto& cpu_engine = dev_ctx->GetEngine();
-
std::vector in_tz = paddle::framework::vectorize2int(in.dims());
std::vector out_tz = in_tz;
@@ -147,29 +142,25 @@ void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var,
"Input tensor type is not supported: %s", in.type());
memory::data_type out_type = in_type;
- auto in_format = platform::MKLDNNFormatForSize(in_tz.size(), in.format());
- auto out_format =
- platform::MKLDNNFormatForSize(in_tz.size(), ToMKLDNNFormat(out_layout));
-
// output tensor has the same dims as input. Reorder don't change dims
out->Resize(in.dims());
- if (in_format != out_format) {
+ // tempory mem pd fr out , to make reorder
+ auto out_mem_pd = paddle::platform::create_prim_desc_from_dims(
+ paddle::framework::vectorize2int(out->dims()),
+ mkldnn::memory::format::blocked, out_type);
+ if (in.get_mkldnn_prim_desc() != out_mem_pd) {
void* in_data = GetDataFromTensor(in, in_type);
auto out_data = out->mutable_data(expected_kernel_type.place_, in.type());
- auto in_memory =
- memory({{{in_tz}, in_type, in_format}, cpu_engine}, in_data);
- auto out_memory =
- memory({{{out_tz}, out_type, out_format}, cpu_engine}, out_data);
+ auto in_memory = memory(in.get_mkldnn_prim_desc(), in_data);
+ auto out_memory = memory(out_mem_pd, out_data);
platform::Reorder(in_memory, out_memory);
} else {
out->ShareDataWith(in);
}
out->set_layout(out_layout);
- // reset format since the out tensor will be feed to non-MKLDNN OPkernel
- out->set_format(memory::format::format_undef);
#endif
}
diff --git a/paddle/fluid/framework/data_transform.cc b/paddle/fluid/framework/data_transform.cc
index 82872224501709080ff02a13464d58543a0abda8..f0203edf05635452bf347335066dadc24ecc3138 100644
--- a/paddle/fluid/framework/data_transform.cc
+++ b/paddle/fluid/framework/data_transform.cc
@@ -51,13 +51,31 @@ void TransformData(const OpKernelType &expected_kernel_type,
#ifdef PADDLE_WITH_MKLDNN
// Case1 - transform from Non-MKLDNN OPKernel to MKLDNN OPKernel
// Just set layout/format. No real transform occur
-
- auto out_format = platform::MKLDNNFormatForSize(in.dims().size(),
- ToMKLDNNFormat(lin));
-
out.ShareDataWith(input_tensor);
- out.set_layout(DataLayout::kMKLDNN);
- out.set_format(out_format);
+ // TODO(jczaja): Remove that once all mkldnn ops
+ // are modified to work with mkldnn_blocked
+ auto mkldnn_fmt = [&](int rank) {
+ switch (rank) {
+ case 5:
+ return mkldnn::memory::format::ncdhw;
+ case 4:
+ return mkldnn::memory::format::nchw;
+ case 3:
+ return mkldnn::memory::format::ncw;
+ case 2:
+ return mkldnn::memory::format::nc;
+ case 1:
+ return mkldnn::memory::format::x;
+ default:
+ return mkldnn::memory::format::blocked;
+ }
+ };
+
+ auto out_mem_pd = paddle::platform::create_prim_desc_from_dims(
+ paddle::framework::vectorize2int(out.dims()),
+ mkldnn_fmt(out.dims().size()));
+
+ out.set_mkldnn_prim_desc(out_mem_pd);
#endif
} else {
// Case2 - transfrom from MKLDNN OPKernel to Non-MKLDNN OPKernel
diff --git a/paddle/fluid/framework/details/CMakeLists.txt b/paddle/fluid/framework/details/CMakeLists.txt
index 6621a59d37a670f7025507faeab5b9897794a72e..dc308fd2592bb158f46f6eac9dd0df25787559fe 100644
--- a/paddle/fluid/framework/details/CMakeLists.txt
+++ b/paddle/fluid/framework/details/CMakeLists.txt
@@ -50,12 +50,15 @@ cc_library(data_balance_op_handle SRCS data_balance_op_handle.cc DEPS op_handle_
cc_library(gather_op_handle SRCS gather_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor)
cc_library(fuse_vars_op_handle SRCS fuse_vars_op_handle.cc DEPS op_handle_base scope)
-cc_library(memory_optimize_helper SRCS memory_optimize_helper.cc DEPS graph graph_helper)
+if(WITH_GPU)
+cc_library(memory_optimize_helper SRCS memory_optimize_helper.cc DEPS graph graph_helper gpu_info)
+else()
+cc_library(memory_optimize_helper SRCS memory_optimize_helper.cc DEPS graph graph_helper cpu_info)
+endif()
+
cc_library(memory_optimize_pass SRCS memory_optimize_pass.cc DEPS memory_optimize_helper pass)
cc_library(inplace_op_pass SRCS inplace_op_pass.cc DEPS memory_optimize_pass op_info)
cc_library(modify_op_lock_and_record_event_pass SRCS modify_op_lock_and_record_event_pass.cc DEPS computation_op_handle op_graph_view multi_devices_helper)
-cc_library(memory_early_delete_pass SRCS memory_early_delete_pass.cc DEPS memory_optimize_pass computation_op_handle scale_loss_grad_op_handle rpc_op_handle
- all_reduce_op_handle reduce_op_handle broadcast_op_handle data_balance_op_handle graph graph_helper pass)
cc_library(reference_count_pass_helper SRCS reference_count_pass_helper.cc DEPS garbage_collector computation_op_handle)
cc_library(eager_deletion_op_handle SRCS eager_deletion_op_handle.cc DEPS lod_tensor selected_rows reference_count_pass_helper)
cc_library(eager_deletion_pass SRCS eager_deletion_pass.cc DEPS computation_op_handle eager_deletion_op_handle graph graph_helper pass)
@@ -67,13 +70,11 @@ cc_library(all_reduce_deps_pass SRCS all_reduce_deps_pass.cc DEPS graph graph_he
cc_library(multi_devices_graph_pass SRCS multi_devices_graph_pass.cc DEPS multi_devices_helper computation_op_handle
scale_loss_grad_op_handle rpc_op_handle all_reduce_op_handle reduce_op_handle broadcast_op_handle data_balance_op_handle fused_broadcast_op_handle)
-set(SSA_GRAPH_EXECUTOR_DEPS graph framework_proto sequential_execution_pass modify_op_lock_and_record_event_pass all_reduce_deps_pass reference_count_pass eager_deletion_pass memory_optimize_pass memory_early_delete_pass inplace_op_pass)
+set(SSA_GRAPH_EXECUTOR_DEPS graph framework_proto sequential_execution_pass modify_op_lock_and_record_event_pass all_reduce_deps_pass reference_count_pass eager_deletion_pass memory_optimize_pass inplace_op_pass)
if (WITH_GPU)
list(APPEND SSA_GRAPH_EXECUTOR_DEPS reference_count_pass)
endif()
-cc_test(memory_optimize_helper_test SRCS memory_optimize_helper_test.cc memory_optimize_helper.cc DEPS framework_proto graph)
-cc_test(memory_optimize_pass_test SRCS memory_optimize_pass_test.cc memory_optimize_pass.cc memory_optimize_helper.cc DEPS framework_proto graph graph_helper op_registry pass)
-
+cc_test(memory_optimize_helper_test SRCS memory_optimize_helper_test.cc memory_optimize_helper.cc DEPS framework_proto graph graph_helper op_registry)
cc_library(ssa_graph_executor SRCS ssa_graph_executor.cc DEPS ${SSA_GRAPH_EXECUTOR_DEPS})
cc_library(threaded_ssa_graph_executor SRCS threaded_ssa_graph_executor.cc DEPS fetch_op_handle ssa_graph_executor scope
diff --git a/paddle/fluid/framework/details/all_reduce_deps_pass.cc b/paddle/fluid/framework/details/all_reduce_deps_pass.cc
index b7d6edd389d8e40835dadf56d7c54d53402f6f4d..2e20c436dfdb61fcda78cd044b86848c750cf22c 100644
--- a/paddle/fluid/framework/details/all_reduce_deps_pass.cc
+++ b/paddle/fluid/framework/details/all_reduce_deps_pass.cc
@@ -30,8 +30,6 @@ namespace paddle {
namespace framework {
namespace details {
-static constexpr char kAllOpDescs[] = "all_op_descs";
-
VarHandle* GetValidInput(const OpHandleBase* a) {
for (auto p : a->Inputs()) {
VarHandle* b = dynamic_cast(p);
diff --git a/paddle/fluid/framework/details/all_reduce_op_handle.cc b/paddle/fluid/framework/details/all_reduce_op_handle.cc
index dd77f7099f581a5b825916c4ea010023f3ad5bcd..c1f9c2b60c915370df7793f26fe83812a7ced96d 100644
--- a/paddle/fluid/framework/details/all_reduce_op_handle.cc
+++ b/paddle/fluid/framework/details/all_reduce_op_handle.cc
@@ -53,7 +53,7 @@ AllReduceOpHandle::AllReduceOpHandle(ir::Node *node,
#endif
void AllReduceOpHandle::RunImpl() {
- platform::RecordEvent record_event(Name(), dev_ctxes_.cbegin()->second);
+ platform::RecordEvent record_event(Name());
WaitInputVarGenerated();
auto in_var_handles = DynamicCast(this->Inputs());
diff --git a/paddle/fluid/framework/details/broadcast_op_handle.cc b/paddle/fluid/framework/details/broadcast_op_handle.cc
index 89d626edddfee3d2c43a3cf2232ad4fc1611e655..fdff83b92819b39974f3b2ce0848710f1ee02a41 100644
--- a/paddle/fluid/framework/details/broadcast_op_handle.cc
+++ b/paddle/fluid/framework/details/broadcast_op_handle.cc
@@ -22,7 +22,7 @@ namespace framework {
namespace details {
void BroadcastOpHandle::RunImpl() {
- platform::RecordEvent record_event(Name(), dev_ctxes_.begin()->second);
+ platform::RecordEvent record_event(Name());
if (places_.size() == 1) return;
@@ -30,7 +30,7 @@ void BroadcastOpHandle::RunImpl() {
VarHandle *in_var_handle;
{
auto in_var_handles = DynamicCast(inputs_);
- PADDLE_ENFORCE_EQ(in_var_handles.size(), 1,
+ PADDLE_ENFORCE_EQ(in_var_handles.size(), 1UL,
"The number of input should be one.");
in_var_handle = in_var_handles[0];
}
diff --git a/paddle/fluid/framework/details/build_strategy.cc b/paddle/fluid/framework/details/build_strategy.cc
index 48fdb57101e644e60a1e364ce44d81967a33a8cb..a0bd21778359fffe84cbfc0eaf67b95c0498bade 100644
--- a/paddle/fluid/framework/details/build_strategy.cc
+++ b/paddle/fluid/framework/details/build_strategy.cc
@@ -34,9 +34,11 @@ namespace details {
static inline bool SeqOnlyAllReduceOps(const BuildStrategy &strategy) {
// Should fix the allreduce op order if scheduling
// them in multiple threads or processes to avoid hang.
+ // NOTE: ParallelGraph would execute this pass on each graph, so
+ // don't need to append it here.
return (!strategy.enable_sequential_execution_ &&
- strategy.num_trainers_ > 1) ||
- strategy.enable_parallel_graph_;
+ strategy.num_trainers_ > 1) &&
+ !strategy.enable_parallel_graph_;
}
class ParallelExecutorPassBuilder : public ir::PassBuilder {
@@ -133,15 +135,15 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
void AppendMultiDevPass(const BuildStrategy &strategy) {
ir::Pass *multi_devices_pass;
if (strategy_.is_distribution_) {
- VLOG(3) << "dist train mode";
+ VLOG(3) << "multi device parameter server mode";
multi_devices_pass = AppendPass("dist_multi_devices_pass").get();
} else {
if (strategy.reduce_ == BuildStrategy::ReduceStrategy::kAllReduce) {
- VLOG(3) << "allreduce mode";
+ VLOG(3) << "multi devices collective mode with allreduce";
multi_devices_pass =
AppendPass("allreduce_mode_multi_devices_pass").get();
} else if (strategy.reduce_ == BuildStrategy::ReduceStrategy::kReduce) {
- VLOG(3) << "reduce mode";
+ VLOG(3) << "multi deivces collective mode with reduce";
multi_devices_pass = AppendPass("reduce_mode_multi_devices_pass").get();
} else {
PADDLE_THROW("Unknown reduce strategy.");
@@ -211,8 +213,6 @@ std::unique_ptr BuildStrategy::Apply(
new std::vector(main_program.Block(0).AllOps());
graph->Set>(kAllOpDescs,
all_op_descs); // take ownership
- graph->Set(kGraphNodePool,
- new GraphNodePool); // take ownership
pass->Erase(kAllOpDescs);
pass->SetNotOwned>(kAllOpDescs, all_op_descs);
@@ -247,7 +247,9 @@ std::unique_ptr BuildStrategy::Apply(
continue;
}
}
+ VLOG(3) << "Start Apply Pass " << pass->Type();
graph = pass->Apply(std::move(graph));
+ VLOG(3) << "Finish Apply Pass " << pass->Type();
}
return graph;
}
diff --git a/paddle/fluid/framework/details/build_strategy.h b/paddle/fluid/framework/details/build_strategy.h
index e3e06a5614ddee0bea342bc3608691b7a32326cc..e62e3edcef710df739c53b5d848f5aceb4f2db4e 100644
--- a/paddle/fluid/framework/details/build_strategy.h
+++ b/paddle/fluid/framework/details/build_strategy.h
@@ -77,9 +77,6 @@ struct BuildStrategy {
bool fuse_relu_depthwise_conv_{false};
bool memory_optimize_{false};
-
- bool memory_early_delete_{false};
-
// TODO(dzhwinter):
// make enable_inplace, memory_optimize_
// memory_early_delete_ true by default
diff --git a/paddle/fluid/framework/details/computation_op_handle.h b/paddle/fluid/framework/details/computation_op_handle.h
index 601ae4f8c6de11b0bf25d4f9a92ef8eada67be3d..1e3dbb1e44ecb16872e3bf4dee31e31cc69c9818 100644
--- a/paddle/fluid/framework/details/computation_op_handle.h
+++ b/paddle/fluid/framework/details/computation_op_handle.h
@@ -26,7 +26,7 @@
namespace paddle {
namespace framework {
namespace details {
-struct ComputationOpHandle : public OpHandleBase {
+class ComputationOpHandle : public OpHandleBase {
public:
ComputationOpHandle(ir::Node *node, Scope *scope, platform::Place place,
size_t scope_idx);
diff --git a/paddle/fluid/framework/details/data_balance_op_handle.cc b/paddle/fluid/framework/details/data_balance_op_handle.cc
index 48dcc52623369f7b0f51cd8c8aeb198b37467d5f..c9b52b68205ade000e21a3d06b80af86cbe01f34 100644
--- a/paddle/fluid/framework/details/data_balance_op_handle.cc
+++ b/paddle/fluid/framework/details/data_balance_op_handle.cc
@@ -86,7 +86,7 @@ std::vector> DataBalanceOpHandle::GetBalancePlan(
}
void DataBalanceOpHandle::RunImpl() {
- PADDLE_ENFORCE_GT(places_.size(), 1,
+ PADDLE_ENFORCE_GT(places_.size(), 1UL,
"Data balance can only be enabled when the number of "
"places to run larger than 1.");
auto in_var_handles = DynamicCast(this->Inputs());
diff --git a/paddle/fluid/framework/details/fuse_vars_op_handle.cc b/paddle/fluid/framework/details/fuse_vars_op_handle.cc
index d65b0920698748e8a2ded728d78fbcd69b7bae0e..14292c0a5d06aa3ff12b46b5768b136fa925752d 100644
--- a/paddle/fluid/framework/details/fuse_vars_op_handle.cc
+++ b/paddle/fluid/framework/details/fuse_vars_op_handle.cc
@@ -23,7 +23,7 @@ void FuseVarsOpHandle::RunImpl() {
auto in_var_handles = DynamicCast(this->Inputs());
auto out_var_handles = DynamicCast(this->Outputs());
- PADDLE_ENFORCE_EQ(in_var_handles.size(), 0);
+ PADDLE_ENFORCE_EQ(in_var_handles.size(), 0UL);
PADDLE_ENFORCE_EQ(out_var_handles.size() - 1, inputs_numel_.size(), "");
auto scope = local_scope_->FindVar(kLocalExecScopeName)->Get();
diff --git a/paddle/fluid/framework/details/fused_broadcast_op_handle.cc b/paddle/fluid/framework/details/fused_broadcast_op_handle.cc
index 51dfa2d0711f49aaefab0af3549283dbf77eee4a..f48561ea32e6a3bbc7e9f2a8326b080ad21c6d61 100644
--- a/paddle/fluid/framework/details/fused_broadcast_op_handle.cc
+++ b/paddle/fluid/framework/details/fused_broadcast_op_handle.cc
@@ -22,7 +22,7 @@ namespace framework {
namespace details {
void FusedBroadcastOpHandle::RunImpl() {
- platform::RecordEvent record_event(Name(), dev_ctxes_.begin()->second);
+ platform::RecordEvent record_event(Name());
if (places_.size() == 1UL) return;
diff --git a/paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc b/paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc
index be0d941c4f9c2fe8fbb1da8ec2c11868112fcf9b..6d53dac5c0a20b4340e71274a00a7f3c0cd08ff6 100644
--- a/paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc
+++ b/paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc
@@ -34,8 +34,8 @@ struct TestFusedBroadcastOpHandle : TestBroadcastOpHandle {
->Var(details::kLocalExecScopeName)
->GetMutable() = &local_scope;
for (size_t j = 0; j < input_scope_idxes.size(); ++j) {
- local_scope.Var("out_var" + j);
- if (i == j) local_scope.Var("in_var" + j);
+ local_scope.Var("out_var" + std::to_string(j));
+ if (i == j) local_scope.Var("in_var" + std::to_string(j));
}
param_scopes_.emplace_back(&local_scope);
}
@@ -62,20 +62,21 @@ struct TestFusedBroadcastOpHandle : TestBroadcastOpHandle {
for (size_t i = 0; i < input_scope_idxes.size(); ++i) {
// add input var handle
- nodes_.emplace_back(
- ir::CreateNodeForTest("in_node" + i, ir::Node::Type::kVariable));
- VarHandle* in_var_handle =
- new VarHandle(nodes_.back().get(), 1, input_scope_idxes[i],
- "in_var" + i, place_list_[input_scope_idxes[i]]);
+ nodes_.emplace_back(ir::CreateNodeForTest("in_node" + std::to_string(i),
+ ir::Node::Type::kVariable));
+ VarHandle* in_var_handle = new VarHandle(
+ nodes_.back().get(), 1, input_scope_idxes[i],
+ "in_var" + std::to_string(i), place_list_[input_scope_idxes[i]]);
vars_.emplace_back(in_var_handle);
op_handle_->AddInput(in_var_handle);
// add output var handle
for (size_t j = 0; j < place_list_.size(); ++j) {
- nodes_.emplace_back(
- ir::CreateNodeForTest("out_node" + i, ir::Node::Type::kVariable));
- VarHandle* out_var_handle = new VarHandle(
- nodes_.back().get(), 2, j, "out_var" + i, place_list_[j]);
+ nodes_.emplace_back(ir::CreateNodeForTest(
+ "out_node" + std::to_string(i), ir::Node::Type::kVariable));
+ VarHandle* out_var_handle =
+ new VarHandle(nodes_.back().get(), 2, j,
+ "out_var" + std::to_string(i), place_list_[j]);
vars_.emplace_back(out_var_handle);
op_handle_->AddOutput(out_var_handle);
}
@@ -86,7 +87,7 @@ struct TestFusedBroadcastOpHandle : TestBroadcastOpHandle {
std::vector> send_vec;
f::LoD lod{{0, 10, 20}};
for (size_t i = 0; i < input_scope_idxes.size(); ++i) {
- const std::string varname("in_var" + i);
+ const std::string varname("in_var" + std::to_string(i));
float val_scalar = static_cast(i);
send_vec.push_back(
InitLoDTensor(varname, input_scope_idxes[i], lod, val_scalar));
@@ -96,7 +97,7 @@ struct TestFusedBroadcastOpHandle : TestBroadcastOpHandle {
WaitAll();
for (size_t i = 0; i < input_scope_idxes.size(); ++i) {
- const std::string& varname("out_var" + i);
+ const std::string& varname("out_var" + std::to_string(i));
for (size_t j = 0; j < place_list_.size(); ++j) {
LoDTensorEqual(varname, send_vec[i], lod, param_scopes_[j]);
}
@@ -109,7 +110,7 @@ struct TestFusedBroadcastOpHandle : TestBroadcastOpHandle {
2, 4, 6, 3, 1, 1, 1, 1, 3, 7};
int height = static_cast(kDims[0] * 2);
for (size_t i = 0; i < input_scope_idxes.size(); ++i) {
- const std::string varname("in_var" + i);
+ const std::string varname("in_var" + std::to_string(i));
float val_scalar = static_cast(i);
send_vector.push_back(InitSelectedRows(varname, input_scope_idxes[i],
rows, height, val_scalar));
@@ -119,7 +120,7 @@ struct TestFusedBroadcastOpHandle : TestBroadcastOpHandle {
WaitAll();
for (size_t i = 0; i < input_scope_idxes.size(); ++i) {
- const std::string& varname("out_var" + i);
+ const std::string& varname("out_var" + std::to_string(i));
for (size_t j = 0; j < place_list_.size(); ++j) {
SelectedRowsEqual(varname, input_scope_idxes[i], send_vector[i], rows,
height);
diff --git a/paddle/fluid/framework/details/inplace_op_pass.cc b/paddle/fluid/framework/details/inplace_op_pass.cc
index 78c5d5b50e606daa963e728355dc1bce83cd5484..c91fc81b2defc9fe6b5720ce652a9aa94b27735e 100644
--- a/paddle/fluid/framework/details/inplace_op_pass.cc
+++ b/paddle/fluid/framework/details/inplace_op_pass.cc
@@ -49,7 +49,7 @@ DEFINE_bool(
"If this option turns on, only these op in whitelist can be inplaced."
"If it turns off, all of the running op can be candidate of inplaced op."
"Such as scale, elementwise_add"
- "By default, it's turned on");
+ "By default, it's turned off");
DECLARE_string(memory_optimize_debug);
@@ -171,16 +171,15 @@ void InplacePass::InplaceModifyDesc(const std::string& var,
}
}
-const SSANodePair InplacePass::TryInplaceModifyVar(const std::string& var,
- const std::string& cache_var,
- const size_t& idx,
- ir::Graph* graph) const {
+const NodeSwapQueue InplacePass::TryInplaceModifyVar(
+ const std::string& var, const std::string& cache_var, const size_t& idx,
+ ir::Graph* graph) const {
PADDLE_ENFORCE(var_nodes_[var].size() >= 1 &&
var_nodes_[var].at(0)->Var() != nullptr);
std::unique_ptr var_desc(new VarDesc(*var_nodes_[var].at(0)->Var()));
var_desc->SetName(cache_var);
- SSANodePair swap_nodes;
+ NodeSwapQueue swap_nodes;
for (size_t i = idx; i < view_.AllOps().size(); ++i) {
auto* op = view_.AllOps()[i];
@@ -230,7 +229,7 @@ const SSANodePair InplacePass::TryInplaceModifyVar(const std::string& var,
return swap_nodes;
}
-void InplacePass::CommitModify(const SSANodePair& swap_nodes,
+void InplacePass::CommitModify(const NodeSwapQueue& swap_nodes,
ir::Graph* graph) const {
for (auto& pair : swap_nodes) {
auto *node = pair.first, *cache_node = pair.second;
@@ -245,7 +244,7 @@ void InplacePass::CommitModify(const SSANodePair& swap_nodes,
}
}
-void InplacePass::WithdrawModify(const SSANodePair& nodes,
+void InplacePass::WithdrawModify(const NodeSwapQueue& nodes,
ir::Graph* graph) const {
for (auto& pair : nodes) {
auto *node = pair.first, *cache_node = pair.second;
diff --git a/paddle/fluid/framework/details/inplace_op_pass.h b/paddle/fluid/framework/details/inplace_op_pass.h
index 1abcf1f279e225839d440ff9c6840ce9b8a6547f..7be7f311852d2b64ce95e1a939371760d03d296b 100644
--- a/paddle/fluid/framework/details/inplace_op_pass.h
+++ b/paddle/fluid/framework/details/inplace_op_pass.h
@@ -56,7 +56,8 @@ class GraphView {
std::map> adj_list_;
};
-typedef std::vector> SSANodePair;
+// swap pairs in sequence
+typedef std::vector> NodeSwapQueue;
class InplacePass : public ir::Pass {
public:
InplacePass();
@@ -68,14 +69,14 @@ class InplacePass : public ir::Pass {
void InitSSAGraphNodes() const;
private:
- const SSANodePair TryInplaceModifyVar(const std::string& var,
- const std::string& cache_var,
- const size_t& idx,
- ir::Graph* graph) const;
+ const NodeSwapQueue TryInplaceModifyVar(const std::string& var,
+ const std::string& cache_var,
+ const size_t& idx,
+ ir::Graph* graph) const;
- void CommitModify(const SSANodePair&, ir::Graph* graph) const;
+ void CommitModify(const NodeSwapQueue&, ir::Graph* graph) const;
- void WithdrawModify(const SSANodePair& nodes, ir::Graph* graph) const;
+ void WithdrawModify(const NodeSwapQueue& nodes, ir::Graph* graph) const;
void InplaceModifyDesc(const std::string& in_var, const std::string& out_var,
const size_t& idx) const;
diff --git a/paddle/fluid/framework/details/memory_early_delete_pass.cc b/paddle/fluid/framework/details/memory_early_delete_pass.cc
deleted file mode 100644
index 69f8f705484450b0544291b19027eb174d7eeb8f..0000000000000000000000000000000000000000
--- a/paddle/fluid/framework/details/memory_early_delete_pass.cc
+++ /dev/null
@@ -1,117 +0,0 @@
-// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/fluid/framework/details/memory_early_delete_pass.h"
-#include
-#include
-#include
-#include "paddle/fluid/framework/details/memory_optimize_helper.h"
-#include "paddle/fluid/framework/details/multi_devices_helper.h"
-#include "paddle/fluid/framework/details/reference_count_pass_helper.h"
-#include "paddle/fluid/framework/ir/graph_helper.h"
-
-namespace paddle {
-namespace framework {
-namespace details {
-
-static ComputationOpHandle* FindNextComputationOpHandle(VarHandle* var_in) {
- std::queue queue;
- queue.push(var_in);
- do {
- auto* var = queue.front();
- queue.pop();
- for (auto* op : var->PendingOps()) {
- auto* compute_op = dynamic_cast(op);
- if (compute_op != nullptr && compute_op->GetPlace() == var_in->place()) {
- return compute_op;
- }
- for (auto* out_var : op->Outputs()) {
- queue.push(out_var);
- }
- }
- } while (!queue.empty());
- return nullptr;
-}
-
-std::unique_ptr MemoryEarlyDeletePass::ApplyImpl(
- std::unique_ptr graph) const {
- auto& graph_pool = Get(kGraphNodePool);
- auto& gcs = Get(kGarbageCollector);
-
- std::unordered_map> unlived_vars;
- unlived_vars.reserve(graph_pool.size());
- for (auto& pair : graph_pool) {
- unlived_vars.insert(std::make_pair(pair.first, pair.second));
- }
-
- auto compare_and_insert_early_delete_op = [&](
- OpHandleBase* op, const std::vector& vars) {
- if (unlived_vars.empty()) return;
- // unlived vars can be deleted after the last used op has finished.
- auto* compute_op = dynamic_cast(op);
- const auto& places = Get>(kAllPlaces);
- for (auto& var : vars) {
- auto* var_handle = dynamic_cast(var);
- auto var_name = var->Node()->Name();
- auto& var_place = var_handle->place();
- if (unlived_vars.count(var_name) == 0) continue;
- if (!unlived_vars[var_name].empty()) {
- if (compute_op != nullptr &&
- unlived_vars[var_name].count(compute_op->Node()->Op()) != 0) {
- unlived_vars[var_name].erase(compute_op->Node()->Op());
- }
- continue;
- }
-
- if (var_handle == nullptr || !var_handle->Node()->IsVar() ||
- var_handle->Node()->IsCtrlVar())
- continue;
-
- // shameless copyed from reference count pass.
- if (compute_op == nullptr) {
- // use next computation op scope
- compute_op = FindNextComputationOpHandle(var_handle);
- }
- auto* early_delete_node =
- graph->CreateEmptyNode("early_delete", ir::Node::Type::kOperation);
- GarbageCollector* gc = gcs.at(places[compute_op->GetScopeIdx()]).get();
- auto* early_delete_handle = new EarlyDeleteOpHandle(
- early_delete_node, compute_op->GetScope(), var_place, {var_name}, gc);
- if (compute_op->Outputs().empty()) {
- auto* dep_var = new DummyVarHandle(graph->CreateControlDepVar());
- compute_op->AddOutput(dep_var);
- graph->Get(kGraphDepVars).emplace(dep_var);
- }
- early_delete_handle->AddInput(compute_op->Outputs().front());
- VLOG(5) << "Add early delete op " << var_name << " to Operator"
- << compute_op->Name();
- }
- };
-
- auto all_ops = ir::FilterByNodeWrapper(*graph);
- for (auto& op : all_ops) {
- compare_and_insert_early_delete_op(op, op->Inputs());
- compare_and_insert_early_delete_op(op, op->Outputs());
- }
- return graph;
-}
-
-} // namespace details
-} // namespace framework
-} // namespace paddle
-
-REGISTER_PASS(memory_early_delete_pass,
- paddle::framework::details::MemoryEarlyDeletePass)
- .RequireGraphAttr(paddle::framework::details::kGraphNodePool)
- .RequireGraphAttr(paddle::framework::details::kGarbageCollector);
diff --git a/paddle/fluid/framework/details/memory_early_delete_pass.h b/paddle/fluid/framework/details/memory_early_delete_pass.h
deleted file mode 100644
index 8215aa1b2baa223a111f9050d5488c5fc8ac0e6e..0000000000000000000000000000000000000000
--- a/paddle/fluid/framework/details/memory_early_delete_pass.h
+++ /dev/null
@@ -1,32 +0,0 @@
-// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include "paddle/fluid/framework/details/early_delete_op_handle.h"
-#include "paddle/fluid/framework/ir/graph.h"
-#include "paddle/fluid/framework/ir/pass.h"
-
-namespace paddle {
-namespace framework {
-namespace details {
-
-class MemoryEarlyDeletePass : public ir::Pass {
- protected:
- std::unique_ptr ApplyImpl(
- std::unique_ptr graph) const override;
-};
-
-} // namespace details
-} // namespace framework
-} // namespace paddle
diff --git a/paddle/fluid/framework/details/memory_optimize_helper.cc b/paddle/fluid/framework/details/memory_optimize_helper.cc
index b56ef021ef508a43aac082acbcfa6f543635203e..db4e805bb692ee44ac50337fae54f8dbfe389e6f 100644
--- a/paddle/fluid/framework/details/memory_optimize_helper.cc
+++ b/paddle/fluid/framework/details/memory_optimize_helper.cc
@@ -13,17 +13,114 @@
// limitations under the License.
#include "paddle/fluid/framework/details/memory_optimize_helper.h"
+#include
+#include
#include
-#include
+#include
#include
#include
#include
+#include "paddle/fluid/framework/var_desc.h"
+#include "paddle/fluid/platform/cpu_info.h"
+
+#ifdef PADDLE_WITH_CUDA
+#include "paddle/fluid/platform/gpu_info.h"
+#endif // PADDLE_WITH_CUDA
namespace paddle {
namespace framework {
namespace details {
+using paddle::framework::VarDesc;
+
+std::vector SortOpLikeDescOrder(const ir::Graph& graph) {
+ PADDLE_ENFORCE(graph.Has(kAllOpDescs),
+ "Graph has no attribute of kAllOpDescs.");
+ // 1. get op desc order
+ auto& op_descs = graph.Get>(kAllOpDescs);
+
+ // 2. topology sort order
+ auto nodes = graph.Nodes();
+ std::deque ops;
+ FilterVariables(nodes, [&](ir::Node* op) {
+ if (op->IsOp() && op->Op() != nullptr) {
+ ops.emplace_back(op);
+ }
+ });
+ std::unordered_map op_deps;
+ std::list ready_ops;
+ std::unordered_map> pending_ops;
+
+ for (auto* op : ops) {
+ std::unordered_set preceding_op;
+ for (auto* in : op->inputs) {
+ if (in->inputs.empty()) continue;
+ PADDLE_ENFORCE(in->inputs.size() == 1 && in->inputs[0]->IsOp());
+ preceding_op.emplace(in->inputs[0]);
+ pending_ops[in->inputs[0]].emplace(op);
+ }
+ op_deps[op] = preceding_op.size();
+ if (preceding_op.empty()) {
+ ready_ops.emplace_back(op);
+ }
+ }
+
+ // 3. generated op list based desc order and the topology order
+ std::vector ret;
+ std::list op_descs_list(op_descs.begin(), op_descs.end());
+
+ auto update_by_found_node = [&](ir::Node* found_node) {
+ for (auto* pending_op : pending_ops[found_node]) {
+ if (--op_deps[pending_op] == 0) {
+ ready_ops.emplace_back(pending_op);
+ }
+ }
+ ready_ops.remove(found_node);
+ ret.emplace_back(found_node);
+ };
+
+ while (!ready_ops.empty()) {
+ bool all_of_ready_op_unmatched = true;
+ for (auto it = op_descs_list.begin(); it != op_descs_list.end();) {
+ auto op_desc = *it;
+ ir::Node* found_node = nullptr;
+ for (auto* op : ready_ops) {
+ if (IsSameDesc(op->Op(), op_desc)) {
+ found_node = op;
+ break;
+ }
+ }
+
+ // 3.1 op desc deleted by other pass
+ if (found_node == nullptr) {
+ ++it;
+ continue;
+ } else {
+ all_of_ready_op_unmatched = false;
+ it = op_descs_list.erase(it);
+ }
+ update_by_found_node(found_node);
+ }
+
+ // 3.2 op descs are added by other pass
+ // preceding op non empty means some new op descs are
+ // created, but not contained in return node list.
+ // these new op desc may depend on each other.
+ std::list prev_ready_ops(ready_ops);
+ if (all_of_ready_op_unmatched) {
+ for (auto op : prev_ready_ops) {
+ update_by_found_node(op);
+ }
+ }
+ }
+
+ PADDLE_ENFORCE(std::all_of(
+ op_deps.begin(), op_deps.end(),
+ [&](const std::pair& p) { return p.second == 0; }));
+
+ return ret;
+}
-size_t NodeSizeInBytes(const VarDesc& node) {
+size_t NodeSize(const VarDesc& node) {
auto shape = node.GetShape();
int size =
std::accumulate(shape.begin(), shape.end(), 1, std::multiplies());
@@ -31,9 +128,15 @@ size_t NodeSizeInBytes(const VarDesc& node) {
return type_size * std::abs(size);
}
-size_t NodeSizeInBytes(ir::Node* n) {
- auto* desc = FindVarDescInBlock(n);
- return NodeSizeInBytes(*desc);
+size_t NodeSize(ir::Node* n) {
+ VarDesc* desc = nullptr;
+ // some op do not have block pointer
+ if (n->inputs[0]->Op() != nullptr) {
+ desc = FindVarDescInBlock(n);
+ } else {
+ desc = n->Var();
+ }
+ return NodeSize(*desc);
}
std::string DebugStringImpl(VarDesc* var) {
@@ -59,7 +162,6 @@ std::string DebugStringImpl(VarDesc* var) {
std::string DebugString(ir::Node* var) {
return DebugStringImpl(FindVarDescInBlock(var));
}
-// return DebugString(var->Var()); }
// NOTE(dzh): based ir node, if a large node has been reused
// by a small size node, then next time it appear in pool, it will
@@ -76,22 +178,26 @@ struct NodeComparator {
bool operator()(ir::Node* lhs, ir::Node* rhs) const {
auto* lhs_desc = FindVarDescInBlock(lhs);
auto* rhs_desc = FindVarDescInBlock(rhs);
+ // match data type
+ if (lhs_desc->GetDataType() != rhs_desc->GetDataType()) {
+ return false;
+ }
+ // match shape
auto lhs_shape = lhs_desc->GetShape();
auto rhs_shape = rhs_desc->GetShape();
if ((lhs_shape[0] == -1 && rhs_shape[0] == -1) ||
(lhs_shape[0] != -1 && rhs_shape[0] != -1)) {
- return NodeSizeInBytes(lhs) <= NodeSizeInBytes(rhs);
+ return NodeSize(lhs) <= NodeSize(rhs);
} else {
return false;
}
}
};
-void OrderedNodeList::Insert(ir::Node* var, ir::Node* op) {
+void OrderedSet::Insert(ir::Node* var) {
PADDLE_ENFORCE(var->IsVar() && !var->IsCtrlVar());
- PADDLE_ENFORCE(op->IsOp());
if (mark_table_.count(var->Name()) != 0) {
- mark_table_[var->Name()]->second.insert(op);
+ mark_table_[var->Name()]->emplace_back(var);
return;
}
@@ -99,14 +205,15 @@ void OrderedNodeList::Insert(ir::Node* var, ir::Node* op) {
auto var_shape = var_desc->GetShape();
int batch_size = static_cast(var_shape[0]);
- NodeComparator compare_node;
+ NodeComparator functor;
Iter it = nodes_.begin();
while (it != nodes_.end()) {
- auto* cache_desc = FindVarDescInBlock(it->first);
+ auto& prev = it->front();
+ auto* cache_desc = FindVarDescInBlock(prev);
int cache_batch_size = cache_desc->GetShape()[0];
if ((cache_batch_size == -1 && batch_size == -1) ||
(cache_batch_size != -1 && batch_size != -1)) {
- if (compare_node(it->first, var)) {
+ if (functor(prev, var)) {
++it;
} else {
break;
@@ -118,62 +225,127 @@ void OrderedNodeList::Insert(ir::Node* var, ir::Node* op) {
}
}
- it =
- nodes_.insert(it, std::make_pair(var, std::unordered_set{op}));
+ it = nodes_.insert(it, {var});
mark_table_[var->Name()] = it;
}
-int OrderedNodeList::GetIndex(ir::Node* var) {
+int OrderedSet::GetNodeIndexInPool(ir::Node* var) {
return std::distance(nodes_.begin(), mark_table_[var->Name()]);
}
-ir::Node* OrderedNodeList::NodeMatch(ir::Node* var) const {
+ir::Node* OrderedSet::FindBestFitNode(ir::Node* var) const {
ir::Node* found_node = nullptr;
- NodeComparator compare_node;
+ NodeComparator functor;
for (auto it = nodes_.begin(); it != nodes_.end(); ++it) {
- if (compare_node(var, it->first)) {
- found_node = it->first;
+ auto& candidate = it->front();
+ if (functor(var, candidate)) {
+ found_node = candidate;
break;
}
}
return found_node;
}
-void OrderedNodeList::Erase(ir::Node* var) { Erase(var->Name()); }
+ir::Node* OrderedSet::FindNextBestFitNode(ir::Node* var, ir::Node* prev) const {
+ ir::Node* found_node = nullptr;
+ NodeComparator functor;
+ auto it =
+ std::find_if(nodes_.begin(), nodes_.end(), [&](const NodeVector& v) {
+ if (v.front() == prev)
+ return true;
+ else
+ return false;
+ });
+ PADDLE_ENFORCE(it != nodes_.end(), "Not found previous in node list!");
+ for (it = std::next(it); it != nodes_.end(); ++it) {
+ auto& candidate = it->front();
+ if (functor(var, candidate)) {
+ found_node = candidate;
+ break;
+ }
+ }
+ return found_node;
+}
-void OrderedNodeList::Erase(const std::string& var) {
+bool OrderedSet::Has(ir::Node* var) const {
+ if (mark_table_.count(var->Name())) {
+ auto& node_in_samename = mark_table_.at(var->Name());
+ auto iter =
+ std::find_if(node_in_samename->begin(), node_in_samename->end(),
+ [&](ir::Node* n) { return n->Name() == var->Name(); });
+ return iter != node_in_samename->end();
+ }
+ return false;
+}
+
+void OrderedSet::Erase(const std::string& var) {
PADDLE_ENFORCE(mark_table_.count(var));
nodes_.erase(mark_table_[var]);
mark_table_.erase(var);
}
-std::string OrderedNodeList::ToString() const {
+void OrderedSet::Erase(ir::Node* var) {
+ PADDLE_ENFORCE(var != nullptr);
+ Erase(var->Name());
+}
+
+std::string OrderedSet::ToString() const {
std::stringstream ss;
for (auto it = nodes_.begin(); it != nodes_.end(); ++it) {
- ss << DebugString(it->first) << " ";
+ for (auto& node : *it) {
+ ss << DebugString(node) << " ";
+ }
}
return ss.str();
}
bool NodeCanReused(ir::Node* node) {
+ // valid the node is a var node
if (node == nullptr || !node->IsVar() || node->IsCtrlVar()) return false;
- // auto* desc = node->Var();
- bool flag = NodeCanReused(*node->Var());
+
+ bool flag = true;
+ // op output force generated in cpu, can not be reused.
for (auto* op : node->inputs) {
if (op->Op()->HasAttr("force_cpu")) {
- // op output force generated in cpu, can not be reused.
flag &= framework::AttrReader(op->Op()->GetAttrMap())
.Get("force_cpu") == 0;
}
}
+ // var desc validation.
+ flag &= NodeCanReused(*node->Var());
return flag;
}
+int MinChunkSize() {
+ int size{0};
+#ifdef PADDLE_WITH_CUDA
+ size = platform::GpuMinChunkSize();
+#else
+ size = platform::CpuMinChunkSize();
+#endif // PADDLE_WITH_CUDA
+ return size;
+}
+
bool NodeCanReused(const VarDesc& node) {
auto type = node.GetType();
- if (node.Persistable() || type != proto::VarType::LOD_TENSOR ||
- node.GetShape().empty()) {
+ // only these types holds bulk of gpu memory
+ if (!(type == proto::VarType::LOD_TENSOR ||
+ type == proto::VarType::SELECTED_ROWS ||
+ type == proto::VarType::LOD_TENSOR_ARRAY)) {
+ return false;
+ }
+ // persistable variable is parameter
+ if (node.Persistable()) {
+ return false;
+ }
+ // shape < min_chunk_size is meaningless.
+ // further more, fetched loss always has size = 1
+ // which should not be reused.
+ auto shape = node.GetShape();
+ int size = std::abs(
+ std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()));
+ if (shape.empty() || size < MinChunkSize()) {
return false;
}
// vars can be @EMPTY@, @LR_DECAY_REUSE_ID@. For example, while_grad
@@ -193,6 +365,176 @@ bool OpHasSubBlock(OpDesc* desc) {
return false;
}
+ControlFlowGraph::ControlFlowGraph(const ir::Graph& graph) {
+ ops_ = SortOpLikeDescOrder(graph);
+ ConnectNodes();
+}
+
+void ControlFlowGraph::BuildCFGGraph() {
+ // FIXME(dzh): same effect with ConnectNodes, but use the control
+ // link to build dependency graph, it goes wrong in transformer.
+ for (ir::Node* op : ops_) {
+ for (auto& input_var : op->inputs) {
+ if (!input_var->inputs.empty()) {
+ PADDLE_ENFORCE(
+ input_var->inputs.size() == 1 && input_var->inputs[0]->IsOp(),
+ "Preceding Op Node of Var Node must be unique");
+ auto* pred_op = input_var->inputs[0];
+ if (pred_op->Op() != nullptr) {
+ predecessors_[op].insert(pred_op);
+ successors_[pred_op].insert(op);
+ }
+ }
+ if (input_var->IsVar() && !input_var->IsCtrlVar()) {
+ uses_[op].insert(input_var->Name());
+ }
+ }
+ for (auto& output_var : op->outputs) {
+ // output var may be used by many op
+ for (auto* succ_op : output_var->outputs) {
+ if (succ_op->Op() != nullptr) {
+ successors_[op].insert(succ_op);
+ predecessors_[succ_op].insert(op);
+ }
+ }
+ if (output_var->IsVar() && !output_var->IsCtrlVar()) {
+ defs_[op].insert(output_var->Name());
+ }
+ }
+ }
+}
+
+void ControlFlowGraph::ConnectNodes() {
+ for (size_t i = 0; i < ops_.size(); ++i) {
+ auto& op = ops_[i];
+ try {
+ auto& next_op = ops_.at(i + 1);
+ successors_[op].insert(next_op);
+ predecessors_[next_op].insert(op);
+ } catch (...) {
+ // do nothing
+ }
+
+ FilterVariables(op->inputs,
+ [&](ir::Node* var) { uses_[op].emplace(var->Name()); });
+
+ FilterVariables(op->outputs,
+ [&](ir::Node* var) { defs_[op].emplace(var->Name()); });
+ }
+}
+
+void ControlFlowGraph::LiveVariableAnalysis() {
+ // NOTE(dzh): variable liveless analysis (a.k.a reversed_ops algorithm)
+ // compute the liveness of for each variable though reversed_ops algorithm.
+ // It iterates the operators from end to begin, compute the live in/live out
+ // variable set for each op, then the diff between in/out will be used for
+ // the variable reuse. For detail refer to
+ // http://www.cs.cornell.edu/courses/cs4120/2013fa/lectures/lec26-fa13.pdf
+ std::list work_list(ops_.rbegin(), ops_.rend());
+ while (!work_list.empty()) {
+ ir::Node* op = work_list.front();
+ work_list.pop_front();
+ // get the live_in calculated before. Empty if first.
+ auto prev_live_in = std::move(live_in_[op]);
+ for (auto& s : successors_[op]) {
+ for (auto& var : live_in_[s]) {
+ live_out_[op].insert(var);
+ }
+ }
+ for (auto& var : uses_[op]) {
+ live_in_[op].insert(var);
+ }
+ for (auto& var : live_out_[op]) {
+ live_in_[op].insert(var);
+ }
+ for (auto& var : defs_[op]) {
+ live_in_[op].erase(var);
+ }
+
+ // If the live_in is not changed, then the liveness analysis of
+ // predecessors is completed.
+ //
+ // Otherwise, recalculate the predecessors liveness
+ if (live_in_[op] != prev_live_in) {
+ for (auto& pre : predecessors_[op]) {
+ work_list.push_back(pre);
+ }
+ }
+ }
+}
+
+void ControlFlowGraph::RenameVarInCFGGraph(const std::string& old_node,
+ const std::string& new_node,
+ int begin_idx) {
+ // update graph from begin idx to the end
+ for (size_t i = begin_idx; i != ops_.size(); ++i) {
+ auto* op = ops_[i];
+ if (uses_[op].find(old_node) != uses_[op].end()) {
+ uses_[op].erase(old_node);
+ uses_[op].insert(new_node);
+ }
+ if (defs_[op].find(old_node) != defs_[op].end()) {
+ defs_[op].erase(old_node);
+ defs_[op].insert(new_node);
+ }
+ if (live_in_[op].find(old_node) != live_in_[op].end()) {
+ live_in_[op].erase(old_node);
+ live_in_[op].insert(new_node);
+ }
+ if (live_out_[op].find(old_node) != live_out_[op].end()) {
+ live_out_[op].erase(old_node);
+ live_out_[op].insert(new_node);
+ }
+ }
+}
+
+const std::set ControlFlowGraph::LiveIn(ir::Node* op) const {
+ auto it = live_in_.find(op);
+ PADDLE_ENFORCE(
+ it != live_in_.end(),
+ string::Sprintf("Expect %s in live_in, but Not Found.", op->Name()));
+ return it->second;
+}
+
+const std::set ControlFlowGraph::LiveOut(ir::Node* op) const {
+ auto it = live_out_.find(op);
+ PADDLE_ENFORCE(
+ it != live_out_.end(),
+ string::Sprintf("Expect %s in live_out, but Not Found.", op->Name()));
+ return it->second;
+}
+
+const std::set ControlFlowGraph::Use(ir::Node* op) const {
+ auto it = uses_.find(op);
+ PADDLE_ENFORCE(
+ it != uses_.end(),
+ string::Sprintf("Expect %s in live_out, but Not Found.", op->Name()));
+ return it->second;
+}
+
+const std::vector ControlFlowGraph::Ops() const { return ops_; }
+
+std::vector& ControlFlowGraph::Ops() { return ops_; }
+
+ir::Node* ControlFlowGraph::GetNodeByName(const std::string& name,
+ ir::Node* op) const {
+ // in ssa-graph, different version nodes have same name,
+ // this function get the latest version var before target op
+ // It may return nullptr, such as data node.
+ ir::Node* found_node = nullptr;
+ for (auto* node : ops_) {
+ if (node == op) break;
+ for (auto& output : node->outputs) {
+ PADDLE_ENFORCE((output != nullptr && output->IsVar()),
+ "Output is empty!");
+ if (output->Var() && output->Name() == name) {
+ found_node = output;
+ }
+ }
+ }
+ return found_node;
+}
+
} // namespace details
} // namespace framework
} // namespace paddle
diff --git a/paddle/fluid/framework/details/memory_optimize_helper.h b/paddle/fluid/framework/details/memory_optimize_helper.h
index 064183d61ea7386b6b45034c90fd7569a8647f60..377367faf3c529496b00004f23159750cc2e4bc4 100644
--- a/paddle/fluid/framework/details/memory_optimize_helper.h
+++ b/paddle/fluid/framework/details/memory_optimize_helper.h
@@ -17,6 +17,8 @@
#include
#include
#include
+#include