diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e718b32cb6c48d11e73600509a17db107f438708..d8112837dc9627bc2e501940b8e97c89e97c45ff 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -42,12 +42,6 @@ repos: entry: bash ./tools/codestyle/pylint_pre_commit.hook language: system files: \.(py)$ -- repo: https://github.com/PaddlePaddle/pre-commit-golang - sha: 8337620115c25ff8333f1b1a493bd031049bd7c0 - hooks: - - id: go-fmt - types: - - go - repo: local hooks: - id: copyright_checker diff --git a/AUTHORS.md b/AUTHORS.md index deafa641203ed9d9bd794fe92e4a91e3aaa03f63..da91933f4697f3b7c08feab20d703e22397b3757 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -44,6 +44,7 @@ | qingqing01 | Qing-Qing Dang | | reyoung | Yang Yu | | Sand3r- | Michal Gallus | +| sfraczek | Sylwester Fraczek | | Superjom | Chun-Wei Yan | | tensor-tang | Jian Tang | | tianbingsz | Tian-Bing Xu | @@ -54,6 +55,7 @@ | wangyang59 | Yang Wang | | wangzhen-nlp | Zhen Wang | | wen-bo-yang | Wen-Bo Yang | +| wojtuss | Wojciech Uss | | wwhu | Wei-Wei Hu | | xinghai-sun | Xing-Hai Sun | | Xreki | Yi-Qun Liu | diff --git a/CMakeLists.txt b/CMakeLists.txt index 61f5e63098c40f140774ba6bfd9a2de8d2d67bfb..8e7ffe72b5fb846fb55ab8dc4809d87a40cfe06c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -54,23 +54,12 @@ option(WITH_NGRAPH "Compile PaddlePaddle with nGraph support." OFF) option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) option(WITH_TESTING "Compile PaddlePaddle with unit testing" OFF) option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON) -option(WITH_DOUBLE "Compile PaddlePaddle with double precision" OFF) -option(WITH_RDMA "Compile PaddlePaddle with RDMA support" OFF) -option(WITH_TIMER "Compile PaddlePaddle with stats timer" OFF) option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler and gperftools" OFF) option(WITH_JEMALLOC "Compile PaddlePaddle with jemalloc" OFF) -option(WITH_DOC "Compile PaddlePaddle with documentation" OFF) option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF) option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF) -option(ON_TRAVIS "Exclude special unit test on Travis CI" OFF) -option(WITH_FLUID_ONLY "Compile PaddlePaddle fluid only" OFF) -option(WITH_GOLANG "Compile PaddlePaddle with GOLANG" OFF) -option(GLIDE_INSTALL "Download and install go dependencies " ON) option(WITH_DISTRIBUTE "Compile with distributed support" OFF) option(WITH_PSLIB "Compile with pslib support" OFF) -option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF) -option(EIGEN_USE_THREADS "Compile with multi-threaded Eigen" OFF) -option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF) option(WITH_CONTRIB "Compile the third-party contributation" OFF) option(REPLACE_ENFORCE_GLOG "Replace PADDLE_ENFORCE with glog/CHECK for better debug." OFF) option(WITH_ANAKIN "Compile with Anakin library" OFF) @@ -105,8 +94,6 @@ endif() if (WIN32) set(WITH_DISTRIBUTE OFF CACHE STRING "Disable DISTRIBUTE when compiling for Windows" FORCE) - set(WITH_FLUID_ONLY ON CACHE STRING - "Enable FLUID_ONLY when compiling for Windows" FORCE) endif() set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING @@ -148,7 +135,6 @@ include(external/openblas) # download, build, install openblas include(external/mkldnn) # download, build, install mkldnn include(external/ngraph) # download, build, install nGraph include(external/boost) # download boost -include(external/any) # download libn::any include(external/eigen) # download eigen3 include(external/pybind11) # download pybind11 include(external/cares) @@ -225,7 +211,6 @@ include(generic) # simplify cmake module include(package) # set paddle packages include(ccache) # set ccache for compilation include(util) # set unittest and link libs -include(rdma) # set rdma libraries include(version) # set PADDLE_VERSION include(coveralls) # set code coverage include(inference_lib) # add paddle fluid inference libraries @@ -233,38 +218,11 @@ include(inference_lib) # add paddle fluid inference libraries include_directories("${PADDLE_SOURCE_DIR}") -set(EXTERNAL_LIBS - gflags - glog - ${CBLAS_LIBRARIES} - protobuf - zlib - ${PYTHON_LIBRARIES} -) - -if(WITH_PSLIB) - list(APPEND EXTERNAL_LIBS pslib) - list(APPEND EXTERNAL_LIBS pslib_brpc) - list(APPEND EXTERNAL_LIBS libmct) -endif(WITH_PSLIB) - if(WITH_AMD_GPU) find_package(HIP) include(hip) endif(WITH_AMD_GPU) -if(WITH_MKLML) - list(APPEND EXTERNAL_LIBS ${MKLML_IOMP_LIB}) -endif() - -if(WITH_LIBXSMM) - list(APPEND EXTERNAL_LIBS ${LIBXSMM_LIBS}) -endif() - -if(WITH_MKLDNN) - list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB}) -endif() - set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG") diff --git a/README.md b/README.md index 68421cf177f4cd15f8f44e8d00a27cafb5a13b91..5c428e9900762a208eebbfd053ce98663f803345 100644 --- a/README.md +++ b/README.md @@ -3,8 +3,8 @@ English | [简体中文](./README_cn.md) [![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle) -[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://paddlepaddle.org/documentation/docs/en/1.2/getstarted/index_en.html) -[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/index.html) +[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://paddlepaddle.org/documentation/docs/en/1.3/beginners_guide/index_en.html) +[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://paddlepaddle.org/documentation/docs/zh/1.3/beginners_guide/index.html) [![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases) [![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE) @@ -18,7 +18,7 @@ learning to many products at Baidu. Our vision is to enable deep learning for everyone via PaddlePaddle. Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest feature of PaddlePaddle. -### Latest PaddlePaddle Release: [Fluid 1.2.0](https://github.com/PaddlePaddle/Paddle/tree/release/1.2) +### Latest PaddlePaddle Release: [Fluid 1.3.0](https://github.com/PaddlePaddle/Paddle/tree/release/1.3) ### Install Latest Stable Release: ``` # Linux CPU @@ -26,9 +26,9 @@ pip install paddlepaddle # Linux GPU cuda9cudnn7 pip install paddlepaddle-gpu # Linux GPU cuda8cudnn7 -pip install paddlepaddle-gpu==1.2.0.post87 +pip install paddlepaddle-gpu==1.3.0.post87 # Linux GPU cuda8cudnn5 -pip install paddlepaddle-gpu==1.2.0.post85 +pip install paddlepaddle-gpu==1.3.0.post85 # For installation on other platform, refer to http://paddlepaddle.org/ ``` @@ -75,26 +75,26 @@ pip install paddlepaddle-gpu==1.2.0.post85 ## Installation -It is recommended to read [this doc](http://paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/install/index_cn.html) on our website. +It is recommended to read [this doc](http://paddlepaddle.org/documentation/docs/en/1.3/beginners_guide/index_en.html) on our website. ## Documentation -We provide [English](http://paddlepaddle.org/documentation/docs/en/1.2/getstarted/index_en.html) and -[Chinese](http://paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/index.html) documentation. +We provide [English](http://paddlepaddle.org/documentation/docs/en/1.3/beginners_guide/index_en.html) and +[Chinese](http://paddlepaddle.org/documentation/docs/zh/1.3/beginners_guide/index.html) documentation. - [Deep Learning 101](https://github.com/PaddlePaddle/book) You might want to start from this online interactive book that can run in a Jupyter Notebook. -- [Distributed Training](http://paddlepaddle.org/documentation/docs/zh/1.2/user_guides/howto/training/cluster_howto.html) +- [Distributed Training](http://paddlepaddle.org/documentation/docs/en/1.3/user_guides/howto/training/multi_node_en.html) You can run distributed training jobs on MPI clusters. -- [Python API](http://paddlepaddle.org/documentation/docs/zh/1.2/api_cn/index_cn.html) +- [Python API](http://paddlepaddle.org/documentation/docs/en/1.3/api/index_en.html) Our new API enables much shorter programs. -- [How to Contribute](http://paddlepaddle.org/documentation/docs/zh/1.2/advanced_usage/development/contribute_to_paddle/index_cn.html) +- [How to Contribute](http://paddlepaddle.org/documentation/docs/en/1.3/advanced_usage/development/contribute_to_paddle/index_en.html) We appreciate your contributions! diff --git a/README_cn.md b/README_cn.md index dfb55b17ca4fd05ce5b7b85b2e26e4f7f7229763..b7b0e75e5524cc483a8c203a382e7f339f91694f 100644 --- a/README_cn.md +++ b/README_cn.md @@ -3,8 +3,8 @@ [English](./README.md) | 简体中文 [![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle) -[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://paddlepaddle.org/documentation/docs/en/1.2/getstarted/index_en.html) -[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/index.html) +[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://paddlepaddle.org/documentation/docs/en/1.3/beginners_guide/index_en.html) +[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://paddlepaddle.org/documentation/docs/zh/1.3/beginners_guide/index.html) [![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases) [![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE) @@ -16,7 +16,7 @@ PaddlePaddle (PArallel Distributed Deep LEarning) 是一个简单易用、高效 跟进PaddlePaddle最新特性请参考我们的[版本说明](https://github.com/PaddlePaddle/Paddle/releases) -### PaddlePaddle最新版本: [Fluid 1.2.0](https://github.com/PaddlePaddle/Paddle/tree/release/1.2) +### PaddlePaddle最新版本: [Fluid 1.3.0](https://github.com/PaddlePaddle/Paddle/tree/release/1.3) ### 安装最新稳定版本: ``` # Linux CPU @@ -24,9 +24,9 @@ pip install paddlepaddle # Linux GPU cuda9cudnn7 pip install paddlepaddle-gpu # Linux GPU cuda8cudnn7 -pip install paddlepaddle-gpu==1.2.0.post87 +pip install paddlepaddle-gpu==1.3.0.post87 # Linux GPU cuda8cudnn5 -pip install paddlepaddle-gpu==1.2.0.post85 +pip install paddlepaddle-gpu==1.3.0.post85 # 其他平台上的安装指引请参考 http://paddlepaddle.org/ ``` @@ -57,26 +57,26 @@ pip install paddlepaddle-gpu==1.2.0.post85 ## 安装 -推荐阅读官网上的[安装说明](http://paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/install/index_cn.html) +推荐阅读官网上的[安装说明](http://paddlepaddle.org/documentation/docs/zh/1.3/beginners_guide/install/index_cn.html) ## 文档 -我们提供[英文](http://paddlepaddle.org/documentation/docs/en/1.2/getstarted/index_en.html)和 -[中文](http://paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/index.html) 文档 +我们提供[英文](http://paddlepaddle.org/documentation/docs/en/1.3/beginners_guide/index_en.html)和 +[中文](http://paddlepaddle.org/documentation/docs/zh/1.3/beginners_guide/index.html) 文档 - [深度学习101](https://github.com/PaddlePaddle/book) 或许您想从这个在线交互式书籍开始,可以在Jupyter Notebook中运行 -- [分布式训练](http://paddlepaddle.org/documentation/docs/zh/1.2/user_guides/howto/training/cluster_howto.html) +- [分布式训练](http://paddlepaddle.org/documentation/docs/zh/1.3/user_guides/howto/training/multi_node.html) 可以在MPI集群上运行分布式训练任务 -- [Python API](http://paddlepaddle.org/documentation/docs/zh/1.2/api_cn/index_cn.html) +- [Python API](http://paddlepaddle.org/documentation/docs/zh/1.3/api_cn/index_cn.html) 新的API支持代码更少更简洁的程序 -- [贡献方式](http://paddlepaddle.org/documentation/docs/zh/1.2/advanced_usage/development/contribute_to_paddle/index_cn.html) +- [贡献方式](http://paddlepaddle.org/documentation/docs/zh/1.3/advanced_usage/development/contribute_to_paddle/index_cn.html) 欢迎您的贡献! diff --git a/benchmark/IntelOptimizedPaddle.md b/benchmark/IntelOptimizedPaddle.md deleted file mode 100644 index 8b7dc5b7db800896eb4de2054ab5e584aed93999..0000000000000000000000000000000000000000 --- a/benchmark/IntelOptimizedPaddle.md +++ /dev/null @@ -1,112 +0,0 @@ -# Benchmark - -Machine: - -- Server: Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz, 2 Sockets, 20 Cores per socket -- Laptop: TBD - -System: CentOS release 6.3 (Final), Docker 1.12.1. - -PaddlePaddle: -- paddlepaddle/paddle:0.11.0 (for MKLML and MKL-DNN) - - MKL-DNN tag v0.11 - - MKLML 2018.0.1.20171007 -- paddlepaddle/paddle:0.11.0-openblas (for OpenBLAS) - - OpenBLAS v0.2.20 - -On each machine, we will test and compare the performance of training on single node using MKL-DNN / MKLML / OpenBLAS respectively. - -## Benchmark Model - -### Server - -#### Training -Test on batch size 64, 128, 256 on Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz -Pay attetion that the speed below includes forward, backward and parameter update time. So we can not directly compare the data with the benchmark of caffe `time` [command](https://github.com/PaddlePaddle/Paddle/blob/develop/benchmark/caffe/image/run.sh#L9), which only contain forward and backward. The updating time of parameter would become very heavy when the weight size are large, especially on alexnet. - -Input image size - 3 * 224 * 224, Time: images/second - -- VGG-19 - -| BatchSize | 64 | 128 | 256 | -|--------------|-------| -----| --------| -| OpenBLAS | 7.80 | 9.00 | 10.80 | -| MKLML | 12.12 | 13.70 | 16.18 | -| MKL-DNN | 28.46 | 29.83 | 30.44 | - - - - - ResNet-50 - -| BatchSize | 64 | 128 | 256 | -|--------------|-------| ------| -------| -| OpenBLAS | 25.22 | 25.68 | 27.12 | -| MKLML | 32.52 | 31.89 | 33.12 | -| MKL-DNN | 81.69 | 82.35 | 84.08 | - - - - - GoogLeNet - -| BatchSize | 64 | 128 | 256 | -|--------------|-------| ------| -------| -| OpenBLAS | 89.52 | 96.97 | 108.25 | -| MKLML | 128.46| 137.89| 158.63 | -| MKL-DNN     | 250.46| 264.83| 269.50 | - - - -- AlexNet - -| BatchSize | 64 | 128 | 256 | -|--------------|--------| ------ | -------| -| OpenBLAS | 45.62 | 72.79 | 107.22 | -| MKLML | 66.37 | 105.60 | 144.04 | -| MKL-DNN | 399.00 | 498.94 | 626.53 | - - - -#### Inference -Test on batch size 1, 2, 4, 8, 16 on Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz -- VGG-19 - -| BatchSize | 1 | 2 | 4 | 8 | 16 | -|-----------|-------|-------|-------|-------|-------| -| OpenBLAS | 1.10 | 1.96 | 3.62 | 3.63 | 2.25 | -| MKLML | 5.58 | 9.80 | 15.15 | 21.21 | 28.67 | -| MKL-DNN | 75.07 | 88.64 | 82.58 | 92.29 | 96.75 | - - - -- ResNet-50 - -| BatchSize | 1 | 2 | 4 | 8 | 16 | -|-----------|-------|--------|--------|--------|--------| -| OpenBLAS | 3.31 | 6.72 | 11.59 | 13.17 | 9.27 | -| MKLML | 6.33 | 12.02 | 22.88 | 40.53 | 63.09 | -| MKL-DNN | 107.83| 148.84 | 177.78 | 189.35 | 217.69 | - - - -- GoogLeNet - -| BatchSize | 1 | 2 | 4 | 8 | 16 | -|-----------|--------|--------|--------|--------|--------| -| OpenBLAS | 12.06 | 23.56 | 34.48 | 36.45 | 23.12 | -| MKLML | 22.74 | 41.56 | 81.22 | 133.47 | 210.53 | -| MKL-DNN | 175.10 | 272.92 | 450.70 | 512.00 | 600.94 | - - - -- AlexNet - -| BatchSize | 1 | 2 | 4 | 8 | 16 | -|-----------|--------|--------|--------|--------|--------| -| OpenBLAS | 3.53 | 6.23 | 15.04 | 26.06 | 31.62 | -| MKLML | 21.32 | 36.55 | 73.06 | 131.15 | 192.77 | -| MKL-DNN | 442.91 | 656.41 | 719.10 | 847.68 | 850.51 | - - - -### Laptop -TBD diff --git a/benchmark/README.md b/benchmark/README.md deleted file mode 100644 index 367013f0457f9bbb9ae1335ea63dce181316d444..0000000000000000000000000000000000000000 --- a/benchmark/README.md +++ /dev/null @@ -1,168 +0,0 @@ -# Benchmark - -Machine: - -- CPU: 12-core Intel(R) Xeon(R) CPU E5-2620 v2 @2.10GHz -- GPU: Tesla K40m -- cuDNN: v5.1 -- system: Docker 1.12.1, all platforms are tested in docker environment. - -Platforms: - -- PaddlePaddle: paddledev/paddle:gpu-devel-v0.9.0a0 -- Tensorflow: gcr.io/tensorflow/tensorflow:0.11.0rc0-gpu -- Caffe: kaixhin/cuda-caffe - -Several convolutional neural networks and recurrent neural networks are used to test. - -## Image - -### Benchmark Model - -AlexNet, GoogleNet and a small network used in Caffe. - -- [AlexNet](https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet): but the group size is one. - -- [GoogleNet](https://github.com/BVLC/caffe/tree/master/models/bvlc_googlenet): but remove loss1 and loss2 when testing benchmark. - -- [SmallNet](https://github.com/BVLC/caffe/blob/master/examples/cifar10/cifar10\_quick\_train\_test.prototxt) - - -### Single-GPU - -- AlexNet: input - 3 * 227 * 227, Time: ms/batch - -| BatchSize | 64 | 128 | 256 | 512 | -|--------------|-----| -----| ------| -----| -| PaddlePaddle | 195 | 334 | 602 | 1629 | -| TensorFlow | 223 | 364 | 645 | 1235 | -| Caffe | 324 | 627 | 1232 | 2513 | - -**Notation** - -All platforms use cuDNN-v5.1. We see that caffe is slower in this experiment, because its workspace limit size of cuDNN-conv interface is 8 * 1024 * 1024, which is smaller in PaddlePaddle and TensorFlow. Note that Caffe will be faster if increasing the workspace limit size. - -- GoogletNet: input - 3 * 224 * 224, Time: ms/batch - - -| BatchSize | 64 | 128 | 256 | -|--------------|-------| -------| --------| -| PaddlePaddle | 613 | 1149 | 2348 | -| TensorFlow | 644 | 1176 | 2219 | -| Caffe | 694 | 1364 | out of memory | - -- SmallNet: input - 3 * 32 * 32, Time ms/batch - -| BatchSize | 64 | 128 | 256 | 512 | -|--------------|--------| -------- | --------|---------| -| PaddlePaddle | 10.463 | 18.184 | 33.113 | 63.039 | -| TensorFlow | 9 | 15 | 28 | 59 | -| Caffe | 9.373 | 16.6606 | 31.4797 | 59.719 | - -**Notation** - -All the single-GPU experiments in caffe use `caffe time` to calculate elapsed time, which does not include parameter updating time. However, both PaddlePaddle and TensorFlow experiments contain the parameter updating time. As compared with the total time, this part is relatively little on single machine, we can ignore it. - -In Tensorflow, they implement algorithm searching method instead of using the algorithm searching interface in cuDNN. - -### Multi-GPU: 4 GPUs - -- AlexNet, ms / batch - -| total-BatchSize | 128 * 4 | 256 * 4 | -|------------------|----------| -----------| -| PaddlePaddle | 347 | 622 | -| TensorFlow | 377 | 675 | -| Caffe | 1229 | 2435 | - -For example, if `total-BatchSize = 128 * 4`, the speedup ratio is calculated by - -``` - time_at_1gpu_batch_128 * 4 / time_at_4gpu_total_batch_512 -= (334 * 4)/347 -= 3.85 -``` - - - - -- GoogleNet, ms / batch - -| total-BatchSize | 128 * 4 | 256 * 4 | -|-------------------|--------------| ----------- | -| PaddlePaddle | 1178 | 2367 | -| TensorFlow | 1210 | 2292 | -| Caffe | 2007 | out of memory | - - - - -## RNN -We use lstm network for text classfication to test benchmark. - -### Dataset -- [IMDB](http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl) -- Sequence length is 100. In fact, PaddlePaddle supports training with variable-length sequence, but TensorFlow needs to pad. Thus, we also pad sequence length to 100 in PaddlePaddle in order to compare. -- Dictionary size=30000 -- Peephole connection is used in `lstmemory` by default in PaddlePaddle. It is also configured in TensorFlow. - -### Single-GPU - -#### LSTM in Text Classification - -Testing `2 lstm layer + fc` network with different hidden size and batch size. - -- Batch size = 64, ms / batch - -| hidden_size | 256 | 512 | 1280 | -|--------------|-------| -------| --------| -| PaddlePaddle | 83 | 184 | 641 | -| TensorFlow | 175 | 280 | 818 | - -- Batch size = 128, ms / batch - -| hidden_size | 256 | 512 | 1280 | -|--------------|------- | -------| --------| -| PaddlePaddle | 110 | 261 | 1007 | -| TensorFlow | 181 | 361 | 1237 | - - -- Batch size = 256, ms / batch - -| hidden_size | 256 | 512 | 1280 | -|--------------|-------| -------| --------| -| PaddlePaddle | 170 | 414 | 1655 | -| TensorFlow | 238 | 536 | 1905 | - - - -#### Seq2Seq - -The benchmark of sequence-to-sequence network will be added later. - - -### Multi GPU: 4 GPUs - -#### LSTM in Text Classification - -- hidden_size = 256, ms / batch - -| batch_size | 256 | 512 | -|--------------| -------| --------| -| PaddlePaddle | 90 | 118 | -| TensorFlow | 226 | 118 | - - -- hidden_size = 512, ms / batch - -| batch_size | 256 | 512 | -|--------------| -------| --------| -| PaddlePaddle | 189 | 268 | -| TensorFlow | 297 | 383 | - - - - -#### Seq2Seq - -The benchmark of sequence-to-sequence network will be added later. diff --git a/benchmark/fluid/Dockerfile b/benchmark/fluid/Dockerfile index 2e1e0d376899fd664866621263db62258e7c3869..81ea870050fe5db4a60fee40221991e38de6bd2e 100644 --- a/benchmark/fluid/Dockerfile +++ b/benchmark/fluid/Dockerfile @@ -15,9 +15,6 @@ RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.7 /usr/lib/libcudnn.so && ln -s RUN pip install -U pip RUN pip install -U kubernetes paddlepaddle -RUN sh -c 'echo "import paddle.v2 as paddle\npaddle.dataset.cifar.train10()\npaddle.dataset.flowers.fetch()" | python' -RUN sh -c 'echo "import paddle.v2 as paddle\npaddle.dataset.mnist.train()\npaddle.dataset.mnist.test()\npaddle.dataset.imdb.fetch()" | python' -RUN sh -c 'echo "import paddle.v2 as paddle\npaddle.dataset.imikolov.fetch()" | python' RUN pip uninstall -y paddlepaddle && mkdir /workspace ADD https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/paddle_k8s /usr/bin diff --git a/benchmark/paddle/image/check_env.sh b/benchmark/fluid/check_env.sh similarity index 100% rename from benchmark/paddle/image/check_env.sh rename to benchmark/fluid/check_env.sh diff --git a/benchmark/paddle/image/alexnet.py b/benchmark/paddle/image/alexnet.py deleted file mode 100644 index 9efc3f0494e4a817a7357f29e684f621bce1921e..0000000000000000000000000000000000000000 --- a/benchmark/paddle/image/alexnet.py +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -height = 227 -width = 227 -num_class = 1000 -batch_size = get_config_arg('batch_size', int, 128) -gp = get_config_arg('layer_num', int, 1) -is_infer = get_config_arg("is_infer", bool, False) -num_samples = get_config_arg('num_samples', int, 2560) - -args = { - 'height': height, - 'width': width, - 'color': True, - 'num_class': num_class, - 'is_infer': is_infer, - 'num_samples': num_samples -} -define_py_data_sources2( - "train.list" if not is_infer else None, - "test.list" if is_infer else None, - module="provider", - obj="process", - args=args) - -settings( - batch_size=batch_size, - learning_rate=0.01 / batch_size, - learning_method=MomentumOptimizer(0.9), - regularization=L2Regularization(0.0005 * batch_size)) - -# conv1 -net = data_layer('data', size=height * width * 3) -net = img_conv_layer( - input=net, - filter_size=11, - num_channels=3, - num_filters=96, - stride=4, - padding=1) -net = img_cmrnorm_layer(input=net, size=5, scale=0.0001, power=0.75) -net = img_pool_layer(input=net, pool_size=3, stride=2) - -# conv2 -net = img_conv_layer( - input=net, filter_size=5, num_filters=256, stride=1, padding=2, groups=gp) -net = img_cmrnorm_layer(input=net, size=5, scale=0.0001, power=0.75) -net = img_pool_layer(input=net, pool_size=3, stride=2) - -# conv3 -net = img_conv_layer( - input=net, filter_size=3, num_filters=384, stride=1, padding=1) -# conv4 -net = img_conv_layer( - input=net, filter_size=3, num_filters=384, stride=1, padding=1, groups=gp) - -# conv5 -net = img_conv_layer( - input=net, filter_size=3, num_filters=256, stride=1, padding=1, groups=gp) -net = img_pool_layer(input=net, pool_size=3, stride=2) - -net = fc_layer( - input=net, - size=4096, - act=ReluActivation(), - layer_attr=ExtraAttr(drop_rate=0.5)) -net = fc_layer( - input=net, - size=4096, - act=ReluActivation(), - layer_attr=ExtraAttr(drop_rate=0.5)) -net = fc_layer(input=net, size=1000, act=SoftmaxActivation()) - -if is_infer: - outputs(net) -else: - lab = data_layer('label', num_class) - loss = cross_entropy(input=net, label=lab) - outputs(loss) diff --git a/benchmark/paddle/image/googlenet.py b/benchmark/paddle/image/googlenet.py deleted file mode 100644 index 2a850ccb7f2c75b467554181fc5f4aa8f2b97a09..0000000000000000000000000000000000000000 --- a/benchmark/paddle/image/googlenet.py +++ /dev/null @@ -1,245 +0,0 @@ -#!/usr/bin/env python -from paddle.trainer_config_helpers import * - -height = 224 -width = 224 -num_class = 1000 -batch_size = get_config_arg('batch_size', int, 128) -use_gpu = get_config_arg('use_gpu', bool, True) -is_infer = get_config_arg("is_infer", bool, False) -num_samples = get_config_arg('num_samples', int, 2560) - -args = { - 'height': height, - 'width': width, - 'color': True, - 'num_class': num_class, - 'is_infer': is_infer, - 'num_samples': num_samples -} -define_py_data_sources2( - "train.list" if not is_infer else None, - "test.list" if is_infer else None, - module="provider", - obj="process", - args=args) - -settings( - batch_size=batch_size, - learning_rate=0.01 / batch_size, - learning_method=MomentumOptimizer(0.9), - regularization=L2Regularization(0.0005 * batch_size)) - -conv_projection = conv_projection if use_gpu else img_conv_layer - -def inception2(name, input, channels, \ - filter1, - filter3R, filter3, - filter5R, filter5, - proj): - - conv1 = name + '_1' - conv3r = name + '_3r' - conv3 = name + '_3' - conv5r = name + '_5r' - conv5 = name + '_5' - maxpool = name + '_max' - convproj = name + '_proj' - - cov1 = img_conv_layer( - name=conv1, - input=input, - filter_size=1, - num_channels=channels, - num_filters=filter1, - stride=1, - padding=0) - - cov3r = img_conv_layer( - name=conv3r, - input=input, - filter_size=1, - num_channels=channels, - num_filters=filter3R, - stride=1, - padding=0) - cov3 = img_conv_layer( - name=conv3, - input=cov3r, - filter_size=3, - num_filters=filter3, - stride=1, - padding=1) - - cov5r = img_conv_layer( - name=conv5r, - input=input, - filter_size=1, - num_channels=channels, - num_filters=filter5R, - stride=1, - padding=0) - cov5 = img_conv_layer( - name=conv5, - input=cov5r, - filter_size=5, - num_filters=filter5, - stride=1, - padding=2) - - pool1 = img_pool_layer( - name=maxpool, - input=input, - pool_size=3, - num_channels=channels, - stride=1, - padding=1) - covprj = img_conv_layer( - name=convproj, - input=pool1, - filter_size=1, - num_filters=proj, - stride=1, - padding=0) - - cat = concat_layer(name=name, input=[cov1, cov3, cov5, covprj]) - return cat - -def inception(name, input, channels, \ - filter1, - filter3R, filter3, - filter5R, filter5, - proj): - - cov1 = conv_projection( - input=input, - filter_size=1, - num_channels=channels, - num_filters=filter1, - stride=1, - padding=0) - - cov3r = img_conv_layer( - name=name + '_3r', - input=input, - filter_size=1, - num_channels=channels, - num_filters=filter3R, - stride=1, - padding=0) - cov3 = conv_projection( - input=cov3r, filter_size=3, num_filters=filter3, stride=1, padding=1) - - cov5r = img_conv_layer( - name=name + '_5r', - input=input, - filter_size=1, - num_channels=channels, - num_filters=filter5R, - stride=1, - padding=0) - cov5 = conv_projection( - input=cov5r, filter_size=5, num_filters=filter5, stride=1, padding=2) - - pool1 = img_pool_layer( - name=name + '_max', - input=input, - pool_size=3, - num_channels=channels, - stride=1, - padding=1) - covprj = conv_projection( - input=pool1, filter_size=1, num_filters=proj, stride=1, padding=0) - - cat = concat_layer( - name=name, - input=[cov1, cov3, cov5, covprj], - bias_attr=True if use_gpu else False, - act=ReluActivation()) - return cat - - -data = data_layer(name="input", size=3 * height * width) - -# stage 1 -conv1 = img_conv_layer( - name="conv1", - input=data, - filter_size=7, - num_channels=3, - num_filters=64, - stride=2, - padding=3) -pool1 = img_pool_layer( - name="pool1", input=conv1, pool_size=3, num_channels=64, stride=2) - -# stage 2 -conv2_1 = img_conv_layer( - name="conv2_1", - input=pool1, - filter_size=1, - num_filters=64, - stride=1, - padding=0) -conv2_2 = img_conv_layer( - name="conv2_2", - input=conv2_1, - filter_size=3, - num_filters=192, - stride=1, - padding=1) -pool2 = img_pool_layer( - name="pool2", input=conv2_2, pool_size=3, num_channels=192, stride=2) - -# stage 3 -ince3a = inception("ince3a", pool2, 192, 64, 96, 128, 16, 32, 32) -ince3b = inception("ince3b", ince3a, 256, 128, 128, 192, 32, 96, 64) -pool3 = img_pool_layer( - name="pool3", input=ince3b, num_channels=480, pool_size=3, stride=2) - -# stage 4 -ince4a = inception("ince4a", pool3, 480, 192, 96, 208, 16, 48, 64) -ince4b = inception("ince4b", ince4a, 512, 160, 112, 224, 24, 64, 64) -ince4c = inception("ince4c", ince4b, 512, 128, 128, 256, 24, 64, 64) -ince4d = inception("ince4d", ince4c, 512, 112, 144, 288, 32, 64, 64) -ince4e = inception("ince4e", ince4d, 528, 256, 160, 320, 32, 128, 128) -pool4 = img_pool_layer( - name="pool4", input=ince4e, num_channels=832, pool_size=3, stride=2) - -# stage 5 -ince5a = inception("ince5a", pool4, 832, 256, 160, 320, 32, 128, 128) -ince5b = inception("ince5b", ince5a, 832, 384, 192, 384, 48, 128, 128) -pool5 = img_pool_layer( - name="pool5", - input=ince5b, - num_channels=1024, - pool_size=7, - stride=7, - pool_type=AvgPooling()) - -# We remove loss1 and loss2 for all system when testing benchmark -# output 1 -# pool_o1 = img_pool_layer(name="pool_o1", input=ince4a, num_channels=512, pool_size=5, stride=3, pool_type=AvgPooling()) -# conv_o1 = img_conv_layer(name="conv_o1", input=pool_o1, filter_size=1, num_filters=128, stride=1, padding=0) -# fc_o1 = fc_layer(name="fc_o1", input=conv_o1, size=1024, layer_attr=ExtraAttr(drop_rate=0.7), act=ReluActivation()) -# out1 = fc_layer(name="output1", input=fc_o1, size=1000, act=SoftmaxActivation()) -# loss1 = cross_entropy(name='loss1', input=out1, label=lab, coeff=0.3) - -# output 2 -#pool_o2 = img_pool_layer(name="pool_o2", input=ince4d, num_channels=528, pool_size=5, stride=3, pool_type=AvgPooling()) -#conv_o2 = img_conv_layer(name="conv_o2", input=pool_o2, filter_size=1, num_filters=128, stride=1, padding=0) -#fc_o2 = fc_layer(name="fc_o2", input=conv_o2, size=1024, layer_attr=ExtraAttr(drop_rate=0.7), act=ReluActivation()) -#out2 = fc_layer(name="output2", input=fc_o2, size=1000, act=SoftmaxActivation()) -#loss2 = cross_entropy(name='loss2', input=out2, label=lab, coeff=0.3) - -# output 3 -dropout = dropout_layer(name="dropout", input=pool5, dropout_rate=0.4) -out3 = fc_layer( - name="output3", input=dropout, size=1000, act=SoftmaxActivation()) - -if is_infer: - outputs(out3) -else: - lab = data_layer(name="label", size=num_class) - loss3 = cross_entropy(name='loss3', input=out3, label=lab) - outputs(loss3) diff --git a/benchmark/paddle/image/plotlog.py b/benchmark/paddle/image/plotlog.py deleted file mode 100644 index 8679d4f272d1b7aaf8d5a397f07698a6b70e4fcd..0000000000000000000000000000000000000000 --- a/benchmark/paddle/image/plotlog.py +++ /dev/null @@ -1,114 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import argparse -import matplotlib.pyplot as plt - - -def parse_args(): - parser = argparse.ArgumentParser('Parse Log') - parser.add_argument( - '--file_path', '-f', type=str, help='the path of the log file') - parser.add_argument( - '--sample_rate', - '-s', - type=float, - default=1.0, - help='the rate to take samples from log') - parser.add_argument( - '--log_period', '-p', type=int, default=1, help='the period of log') - - args = parser.parse_args() - return args - - -def parse_file(file_name): - loss = [] - error = [] - with open(file_name) as f: - for i, line in enumerate(f): - line = line.strip() - if not line.startswith('pass'): - continue - line_split = line.split(' ') - if len(line_split) != 5: - continue - - loss_str = line_split[2][:-1] - cur_loss = float(loss_str.split('=')[-1]) - loss.append(cur_loss) - - err_str = line_split[3][:-1] - cur_err = float(err_str.split('=')[-1]) - error.append(cur_err) - - accuracy = [1.0 - err for err in error] - - return loss, accuracy - - -def sample(metric, sample_rate): - interval = int(1.0 / sample_rate) - if interval > len(metric): - return metric[:1] - - num = len(metric) / interval - idx = [interval * i for i in range(num)] - metric_sample = [metric[id] for id in idx] - return metric_sample - - -def plot_metric(metric, - batch_id, - graph_title, - line_style='b-', - line_label='y', - line_num=1): - plt.figure() - plt.title(graph_title) - if line_num == 1: - plt.plot(batch_id, metric, line_style, label=line_label) - else: - for i in range(line_num): - plt.plot(batch_id, metric[i], line_style[i], label=line_label[i]) - plt.xlabel('batch') - plt.ylabel(graph_title) - plt.legend() - plt.savefig(graph_title + '.jpg') - plt.close() - - -def main(): - args = parse_args() - assert args.sample_rate > 0. and args.sample_rate <= 1.0, "The sample rate should in the range (0, 1]." - - loss, accuracy = parse_file(args.file_path) - batch = [args.log_period * i for i in range(len(loss))] - - batch_sample = sample(batch, args.sample_rate) - loss_sample = sample(loss, args.sample_rate) - accuracy_sample = sample(accuracy, args.sample_rate) - - plot_metric(loss_sample, batch_sample, 'loss', line_label='loss') - plot_metric( - accuracy_sample, - batch_sample, - 'accuracy', - line_style='g-', - line_label='accuracy') - - -if __name__ == '__main__': - main() diff --git a/benchmark/paddle/image/provider.py b/benchmark/paddle/image/provider.py deleted file mode 100644 index 6ad817ccefab3e44a8f962e907ba2110a6ed4a45..0000000000000000000000000000000000000000 --- a/benchmark/paddle/image/provider.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import io, os -import random -import numpy as np -from paddle.trainer.PyDataProvider2 import * - - -def initHook(settings, height, width, color, num_class, **kwargs): - settings.height = height - settings.width = width - settings.color = color - settings.num_class = num_class - if settings.color: - settings.data_size = settings.height * settings.width * 3 - else: - settings.data_size = settings.height * settings.width - settings.is_infer = kwargs.get('is_infer', False) - settings.num_samples = kwargs.get('num_samples', 2560) - if settings.is_infer: - settings.slots = [dense_vector(settings.data_size)] - else: - settings.slots = [dense_vector(settings.data_size), integer_value(1)] - - -@provider( - init_hook=initHook, min_pool_size=-1, cache=CacheType.CACHE_PASS_IN_MEM) -def process(settings, file_list): - for i in xrange(settings.num_samples): - img = np.random.rand(1, settings.data_size).reshape(-1, 1).flatten() - if settings.is_infer: - yield img.astype('float32') - else: - lab = random.randint(0, settings.num_class - 1) - yield img.astype('float32'), int(lab) diff --git a/benchmark/paddle/image/resnet.py b/benchmark/paddle/image/resnet.py deleted file mode 100644 index 2846e4763f1cda4602f03af5ec649d57ee6cf0d8..0000000000000000000000000000000000000000 --- a/benchmark/paddle/image/resnet.py +++ /dev/null @@ -1,230 +0,0 @@ -#!/usr/bin/env python -from paddle.trainer_config_helpers import * - -height = 224 -width = 224 -num_class = 1000 -batch_size = get_config_arg('batch_size', int, 64) -layer_num = get_config_arg("layer_num", int, 50) -is_infer = get_config_arg("is_infer", bool, False) -num_samples = get_config_arg('num_samples', int, 2560) - -args = { - 'height': height, - 'width': width, - 'color': True, - 'num_class': num_class, - 'is_infer': is_infer, - 'num_samples': num_samples -} -define_py_data_sources2( - "train.list" if not is_infer else None, - "test.list" if is_infer else None, - module="provider", - obj="process", - args=args) - -settings( - batch_size=batch_size, - learning_rate=0.01 / batch_size, - learning_method=MomentumOptimizer(0.9), - regularization=L2Regularization(0.0005 * batch_size)) - - -#######################Network Configuration ############# -def conv_bn_layer(name, - input, - filter_size, - num_filters, - stride, - padding, - channels=None, - active_type=ReluActivation()): - """ - A wrapper for conv layer with batch normalization layers. - Note: - conv layer has no activation. - """ - - tmp = img_conv_layer( - name=name + "_conv", - input=input, - filter_size=filter_size, - num_channels=channels, - num_filters=num_filters, - stride=stride, - padding=padding, - act=LinearActivation(), - bias_attr=False) - return batch_norm_layer( - name=name + "_bn", - input=tmp, - act=active_type, - use_global_stats=is_infer) - - -def bottleneck_block(name, input, num_filters1, num_filters2): - """ - A wrapper for bottlenect building block in ResNet. - Last conv_bn_layer has no activation. - Addto layer has activation of relu. - """ - last_name = conv_bn_layer( - name=name + '_branch2a', - input=input, - filter_size=1, - num_filters=num_filters1, - stride=1, - padding=0) - last_name = conv_bn_layer( - name=name + '_branch2b', - input=last_name, - filter_size=3, - num_filters=num_filters1, - stride=1, - padding=1) - last_name = conv_bn_layer( - name=name + '_branch2c', - input=last_name, - filter_size=1, - num_filters=num_filters2, - stride=1, - padding=0, - active_type=LinearActivation()) - - return addto_layer( - name=name + "_addto", input=[input, last_name], act=ReluActivation()) - - -def mid_projection(name, input, num_filters1, num_filters2, stride=2): - """ - A wrapper for middile projection in ResNet. - projection shortcuts are used for increasing dimensions, - and other shortcuts are identity - branch1: projection shortcuts are used for increasing - dimensions, has no activation. - branch2x: bottleneck building block, shortcuts are identity. - """ - # stride = 2 - branch1 = conv_bn_layer( - name=name + '_branch1', - input=input, - filter_size=1, - num_filters=num_filters2, - stride=stride, - padding=0, - active_type=LinearActivation()) - - last_name = conv_bn_layer( - name=name + '_branch2a', - input=input, - filter_size=1, - num_filters=num_filters1, - stride=stride, - padding=0) - last_name = conv_bn_layer( - name=name + '_branch2b', - input=last_name, - filter_size=3, - num_filters=num_filters1, - stride=1, - padding=1) - - last_name = conv_bn_layer( - name=name + '_branch2c', - input=last_name, - filter_size=1, - num_filters=num_filters2, - stride=1, - padding=0, - active_type=LinearActivation()) - - return addto_layer( - name=name + "_addto", input=[branch1, last_name], act=ReluActivation()) - - -img = data_layer(name='image', size=height * width * 3) - - -def deep_res_net(res2_num=3, res3_num=4, res4_num=6, res5_num=3): - """ - A wrapper for 50,101,152 layers of ResNet. - res2_num: number of blocks stacked in conv2_x - res3_num: number of blocks stacked in conv3_x - res4_num: number of blocks stacked in conv4_x - res5_num: number of blocks stacked in conv5_x - """ - # For ImageNet - # conv1: 112x112 - tmp = conv_bn_layer( - "conv1", - input=img, - filter_size=7, - channels=3, - num_filters=64, - stride=2, - padding=3) - tmp = img_pool_layer(name="pool1", input=tmp, pool_size=3, stride=2) - - # conv2_x: 56x56 - tmp = mid_projection( - name="res2_1", input=tmp, num_filters1=64, num_filters2=256, stride=1) - for i in xrange(2, res2_num + 1, 1): - tmp = bottleneck_block( - name="res2_" + str(i), input=tmp, num_filters1=64, num_filters2=256) - - # conv3_x: 28x28 - tmp = mid_projection( - name="res3_1", input=tmp, num_filters1=128, num_filters2=512) - for i in xrange(2, res3_num + 1, 1): - tmp = bottleneck_block( - name="res3_" + str(i), - input=tmp, - num_filters1=128, - num_filters2=512) - - # conv4_x: 14x14 - tmp = mid_projection( - name="res4_1", input=tmp, num_filters1=256, num_filters2=1024) - for i in xrange(2, res4_num + 1, 1): - tmp = bottleneck_block( - name="res4_" + str(i), - input=tmp, - num_filters1=256, - num_filters2=1024) - - # conv5_x: 7x7 - tmp = mid_projection( - name="res5_1", input=tmp, num_filters1=512, num_filters2=2048) - for i in xrange(2, res5_num + 1, 1): - tmp = bottleneck_block( - name="res5_" + str(i), - input=tmp, - num_filters1=512, - num_filters2=2048) - - tmp = img_pool_layer( - name='avgpool', - input=tmp, - pool_size=7, - stride=1, - pool_type=AvgPooling()) - - return fc_layer(input=tmp, size=num_class, act=SoftmaxActivation()) - - -if layer_num == 50: - resnet = deep_res_net(3, 4, 6, 3) -elif layer_num == 101: - resnet = deep_res_net(3, 4, 23, 3) -elif layer_num == 152: - resnet = deep_res_net(3, 8, 36, 3) -else: - print("Wrong layer number.") - -if is_infer: - outputs(resnet) -else: - lbl = data_layer(name="label", size=num_class) - loss = cross_entropy(name='loss', input=resnet, label=lbl) - outputs(loss) diff --git a/benchmark/paddle/image/run.sh b/benchmark/paddle/image/run.sh deleted file mode 100755 index 5b58a8d773aab795e5439b0f0e5d81bec66b5f56..0000000000000000000000000000000000000000 --- a/benchmark/paddle/image/run.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash - -set -e - -function train() { - cfg=$1 - thread=$2 - bz=$3 - args="batch_size=$3" - prefix=$4 - paddle train --job=time \ - --config=$cfg \ - --use_gpu=True \ - --trainer_count=$thread \ - --log_period=10 \ - --test_period=100 \ - --config_args=$args \ - > logs/$prefix-${thread}gpu-$bz.log 2>&1 -} - -if [ ! -d "train.list" ]; then - echo " " > train.list -fi -if [ ! -d "logs" ]; then - mkdir logs -fi - -#========single-gpu=========# -# alexnet -train alexnet.py 1 64 alexnet -train alexnet.py 1 128 alexnet -train alexnet.py 1 256 alexnet -train alexnet.py 1 512 alexnet - -# googlenet -train googlenet.py 1 64 googlenet -train googlenet.py 1 128 googlenet -train googlenet.py 1 256 googlenet - -# smallnet -train smallnet_mnist_cifar.py 1 64 smallnet -train smallnet_mnist_cifar.py 1 128 smallnet -train smallnet_mnist_cifar.py 1 256 smallnet -train smallnet_mnist_cifar.py 1 512 smallnet - - -############################ -#========multi-gpus=========# -train alexnet.py 4 512 alexnet -train alexnet.py 4 1024 alexnet - -train googlenet.py 4 512 googlenet -train googlenet.py 4 1024 googlenet diff --git a/benchmark/paddle/image/run_mkl_infer.sh b/benchmark/paddle/image/run_mkl_infer.sh deleted file mode 100755 index 0fad5e04cc992a3ec97591d3833957bb7517a8f3..0000000000000000000000000000000000000000 --- a/benchmark/paddle/image/run_mkl_infer.sh +++ /dev/null @@ -1,89 +0,0 @@ -#!/bin/bash - -set -e - -function clock_to_seconds() { - hours=`echo $1 | awk -F ':' '{print $1}'` - mins=`echo $1 | awk -F ':' '{print $2}'` - secs=`echo $1 | awk -F ':' '{print $3}'` - echo `awk 'BEGIN{printf "%.2f",('$secs' + '$mins' * 60 + '$hours' * 3600)}'` -} - -function infer() { - unset OMP_NUM_THREADS MKL_NUM_THREADS OMP_DYNAMIC KMP_AFFINITY - topology=$1 - layer_num=$2 - bs=$3 - use_mkldnn=$4 - if [ $4 == "True" ]; then - thread=1 - log="logs/infer-${topology}-${layer_num}-mkldnn-${bs}.log" - elif [ $4 == "False" ]; then - thread=`nproc` - if [ $thread -gt $bs ]; then - thread=$bs - fi - log="logs/infer-${topology}-${layer_num}-${thread}mklml-${bs}.log" - else - echo "Wrong input $4, use True or False." - exit 0 - fi - - models_in="models/${topology}-${layer_num}/pass-00000/" - if [ ! -d $models_in ]; then - echo "Training model ${topology}_${layer_num}" - paddle train --job=train \ - --config="${topology}.py" \ - --use_mkldnn=True \ - --use_gpu=False \ - --trainer_count=1 \ - --num_passes=1 \ - --save_dir="models/${topology}-${layer_num}" \ - --config_args="batch_size=128,layer_num=${layer_num},num_samples=256" \ - > /dev/null 2>&1 - echo "Done" - fi - log_period=$((256 / bs)) - paddle train --job=test \ - --config="${topology}.py" \ - --use_mkldnn=$use_mkldnn \ - --use_gpu=False \ - --trainer_count=$thread \ - --log_period=$log_period \ - --config_args="batch_size=${bs},layer_num=${layer_num},is_infer=True" \ - --init_model_path=$models_in \ - 2>&1 | tee ${log} - - # calculate the last 5 logs period time of 1280 samples, - # the time before are burning time. - start=`tail ${log} -n 7 | head -n 1 | awk -F ' ' '{print $2}' | xargs` - end=`tail ${log} -n 2 | head -n 1 | awk -F ' ' '{print $2}' | xargs` - start_sec=`clock_to_seconds $start` - end_sec=`clock_to_seconds $end` - fps=`awk 'BEGIN{printf "%.2f",(1280 / ('$end_sec' - '$start_sec'))}'` - echo "Last 1280 samples start: ${start}(${start_sec} sec), end: ${end}(${end_sec} sec;" >> ${log} - echo "FPS: $fps images/sec" 2>&1 | tee -a ${log} -} - -if [ ! -f "train.list" ]; then - echo " " > train.list -fi -if [ ! -f "test.list" ]; then - echo " " > test.list -fi -if [ ! -d "logs" ]; then - mkdir logs -fi -if [ ! -d "models" ]; then - mkdir -p models -fi - -# inference benchmark -for use_mkldnn in True False; do - for batchsize in 1 2 4 8 16; do - infer vgg 19 $batchsize $use_mkldnn - infer resnet 50 $batchsize $use_mkldnn - infer googlenet v1 $batchsize $use_mkldnn - infer alexnet 2 $batchsize $use_mkldnn - done -done diff --git a/benchmark/paddle/image/run_mkl_train.sh b/benchmark/paddle/image/run_mkl_train.sh deleted file mode 100755 index 1583bf134a276a08aa2f8e84dc63adbb205a83d6..0000000000000000000000000000000000000000 --- a/benchmark/paddle/image/run_mkl_train.sh +++ /dev/null @@ -1,54 +0,0 @@ -#!/bin/bash - -set -e - -function train() { - unset OMP_NUM_THREADS MKL_NUM_THREADS OMP_DYNAMIC KMP_AFFINITY - topology=$1 - layer_num=$2 - bs=$3 - use_mkldnn=$4 - if [ $4 == "True" ]; then - thread=1 - log="logs/train-${topology}-${layer_num}-mkldnn-${bs}.log" - elif [ $4 == "False" ]; then - thread=`nproc` - # each trainer_count use only 1 core to avoid conflict - log="logs/train-${topology}-${layer_num}-${thread}mklml-${bs}.log" - else - echo "Wrong input $4, use True or False." - exit 0 - fi - args="batch_size=${bs},layer_num=${layer_num}" - config="${topology}.py" - paddle train --job=time \ - --config=$config \ - --use_mkldnn=$use_mkldnn \ - --use_gpu=False \ - --trainer_count=$thread \ - --log_period=10 \ - --test_period=100 \ - --config_args=$args \ - 2>&1 | tee ${log} - - avg_time=`tail ${log} -n 1 | awk -F ' ' '{print $8}' | sed 's/avg=//'` - fps=`awk 'BEGIN{printf "%.2f",('$bs' / '$avg_time' * 1000)}'` - echo "FPS: $fps images/sec" 2>&1 | tee -a ${log} -} - -if [ ! -f "train.list" ]; then - echo " " > train.list -fi -if [ ! -d "logs" ]; then - mkdir logs -fi - -# training benchmark -for use_mkldnn in True False; do - for batchsize in 64 128 256; do - train vgg 19 $batchsize $use_mkldnn - train resnet 50 $batchsize $use_mkldnn - train googlenet v1 $batchsize $use_mkldnn - train alexnet 2 $batchsize $use_mkldnn - done -done diff --git a/benchmark/paddle/image/run_openblas_infer.sh b/benchmark/paddle/image/run_openblas_infer.sh deleted file mode 100755 index 987381cabc2e793886099212660723c122b73bb0..0000000000000000000000000000000000000000 --- a/benchmark/paddle/image/run_openblas_infer.sh +++ /dev/null @@ -1,71 +0,0 @@ -#!/bin/bash - -set -e - -function clock_to_seconds() { - hours=`echo $1 | awk -F ':' '{print $1}'` - mins=`echo $1 | awk -F ':' '{print $2}'` - secs=`echo $1 | awk -F ':' '{print $3}'` - echo `awk 'BEGIN{printf "%.2f",('$secs' + '$mins' * 60 + '$hours' * 3600)}'` -} - -function infer() { - export OPENBLAS_MAIN_FREE=1 - topology=$1 - layer_num=$2 - bs=$3 - trainers=`nproc` - if [ $trainers -gt $bs ]; then - trainers=$bs - fi - log="logs/infer-${topology}-${layer_num}-${trainers}openblas-${bs}.log" - threads=$((`nproc` / trainers)) - if [ $threads -eq 0 ]; then - threads=1 - fi - export OPENBLAS_NUM_THREADS=$threads - - models_in="models/${topology}-${layer_num}/pass-00000/" - if [ ! -d $models_in ]; then - echo "./run_mkl_infer.sh to save the model first" - exit 0 - fi - log_period=$((32 / bs)) - paddle train --job=test \ - --config="${topology}.py" \ - --use_mkldnn=False \ - --use_gpu=False \ - --trainer_count=$trainers \ - --log_period=$log_period \ - --config_args="batch_size=${bs},layer_num=${layer_num},is_infer=True,num_samples=256" \ - --init_model_path=$models_in \ - 2>&1 | tee ${log} - - # calculate the last 5 logs period time of 160(=32*5) samples, - # the time before are burning time. - start=`tail ${log} -n 7 | head -n 1 | awk -F ' ' '{print $2}' | xargs` - end=`tail ${log} -n 2 | head -n 1 | awk -F ' ' '{print $2}' | xargs` - start_sec=`clock_to_seconds $start` - end_sec=`clock_to_seconds $end` - fps=`awk 'BEGIN{printf "%.2f",(160 / ('$end_sec' - '$start_sec'))}'` - echo "Last 160 samples start: ${start}(${start_sec} sec), end: ${end}(${end_sec} sec;" >> ${log} - echo "FPS: $fps images/sec" 2>&1 | tee -a ${log} -} - -if [ ! -f "train.list" ]; then - echo " " > train.list -fi -if [ ! -f "test.list" ]; then - echo " " > test.list -fi -if [ ! -d "logs" ]; then - mkdir logs -fi - -# inference benchmark -for batchsize in 1 2 4 8 16; do - infer vgg 19 $batchsize - infer resnet 50 $batchsize - infer googlenet v1 $batchsize - infer alexnet 2 $batchsize -done diff --git a/benchmark/paddle/image/run_openblas_train.sh b/benchmark/paddle/image/run_openblas_train.sh deleted file mode 100755 index cc64e1d09da02087b1737190a0b75dc7758600a6..0000000000000000000000000000000000000000 --- a/benchmark/paddle/image/run_openblas_train.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash - -set -e - -function train() { - export OPENBLAS_NUM_THREADS=1 - topology=$1 - layer_num=$2 - bs=$3 - thread=`nproc` - # each trainer_count use only 1 core to avoid conflict - log="logs/train-${topology}-${layer_num}-${thread}openblas-${bs}.log" - args="batch_size=${bs},layer_num=${layer_num}" - config="${topology}.py" - paddle train --job=time \ - --config=$config \ - --use_mkldnn=False \ - --use_gpu=False \ - --trainer_count=$thread \ - --log_period=3 \ - --test_period=30 \ - --config_args=$args \ - 2>&1 | tee ${log} - - avg_time=`tail ${log} -n 1 | awk -F ' ' '{print $8}' | sed 's/avg=//'` - fps=`awk 'BEGIN{printf "%.2f",('$bs' / '$avg_time' * 1000)}'` - echo "FPS: $fps images/sec" 2>&1 | tee -a ${log} -} - -if [ ! -f "train.list" ]; then - echo " " > train.list -fi -if [ ! -d "logs" ]; then - mkdir logs -fi - -# training benchmark -for batchsize in 64 128 256; do - train vgg 19 $batchsize - train resnet 50 $batchsize - train googlenet v1 $batchsize - train alexnet 2 $batchsize -done diff --git a/benchmark/paddle/image/smallnet_mnist_cifar.py b/benchmark/paddle/image/smallnet_mnist_cifar.py deleted file mode 100644 index 58879c454f37991405d83bbb593bb5d1e977ff53..0000000000000000000000000000000000000000 --- a/benchmark/paddle/image/smallnet_mnist_cifar.py +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python - -from paddle.trainer_config_helpers import * - -height = 32 -width = 32 -num_class = 10 - -batch_size = get_config_arg('batch_size', int, 128) - -args = {'height': height, 'width': width, 'color': True, 'num_class': num_class} -define_py_data_sources2( - "train.list", None, module="provider", obj="process", args=args) - -settings( - batch_size=batch_size, - learning_rate=0.01 / batch_size, - learning_method=MomentumOptimizer(0.9), - regularization=L2Regularization(0.0005 * batch_size)) - -# conv1 -net = data_layer('data', size=height * width * 3) -net = img_conv_layer( - input=net, - filter_size=5, - num_channels=3, - num_filters=32, - stride=1, - padding=2) -net = img_pool_layer(input=net, pool_size=3, stride=2, padding=1) - -# conv2 -net = img_conv_layer( - input=net, filter_size=5, num_filters=32, stride=1, padding=2) -net = img_pool_layer( - input=net, pool_size=3, stride=2, padding=1, pool_type=AvgPooling()) - -# conv3 -net = img_conv_layer( - input=net, filter_size=3, num_filters=64, stride=1, padding=1) -net = img_pool_layer( - input=net, pool_size=3, stride=2, padding=1, pool_type=AvgPooling()) - -net = fc_layer(input=net, size=64, act=ReluActivation()) -net = fc_layer(input=net, size=10, act=SoftmaxActivation()) - -lab = data_layer('label', num_class) -loss = classification_cost(input=net, label=lab) -outputs(loss) diff --git a/benchmark/paddle/image/vgg.py b/benchmark/paddle/image/vgg.py deleted file mode 100644 index ca0a6798fb8c35b68cf84d263855955eb93ba0b0..0000000000000000000000000000000000000000 --- a/benchmark/paddle/image/vgg.py +++ /dev/null @@ -1,119 +0,0 @@ -#!/usr/bin/env python -from paddle.trainer_config_helpers import * - -height = 224 -width = 224 -num_class = 1000 -batch_size = get_config_arg('batch_size', int, 64) -layer_num = get_config_arg('layer_num', int, 19) -is_infer = get_config_arg("is_infer", bool, False) -num_samples = get_config_arg('num_samples', int, 2560) - -args = { - 'height': height, - 'width': width, - 'color': True, - 'num_class': num_class, - 'is_infer': is_infer, - 'num_samples': num_samples -} -define_py_data_sources2( - "train.list" if not is_infer else None, - "test.list" if is_infer else None, - module="provider", - obj="process", - args=args) - -settings( - batch_size=batch_size, - learning_rate=0.001 / batch_size, - learning_method=MomentumOptimizer(0.9), - regularization=L2Regularization(0.0005 * batch_size)) - -img = data_layer(name='image', size=height * width * 3) - - -def vgg_network(vgg_num=3): - tmp = img_conv_group( - input=img, - num_channels=3, - conv_padding=1, - conv_num_filter=[64, 64], - conv_filter_size=3, - conv_act=ReluActivation(), - pool_size=2, - pool_stride=2, - pool_type=MaxPooling()) - - tmp = img_conv_group( - input=tmp, - conv_num_filter=[128, 128], - conv_padding=1, - conv_filter_size=3, - conv_act=ReluActivation(), - pool_stride=2, - pool_type=MaxPooling(), - pool_size=2) - - channels = [] - for i in range(vgg_num): - channels.append(256) - tmp = img_conv_group( - input=tmp, - conv_num_filter=channels, - conv_padding=1, - conv_filter_size=3, - conv_act=ReluActivation(), - pool_stride=2, - pool_type=MaxPooling(), - pool_size=2) - channels = [] - for i in range(vgg_num): - channels.append(512) - tmp = img_conv_group( - input=tmp, - conv_num_filter=channels, - conv_padding=1, - conv_filter_size=3, - conv_act=ReluActivation(), - pool_stride=2, - pool_type=MaxPooling(), - pool_size=2) - tmp = img_conv_group( - input=tmp, - conv_num_filter=channels, - conv_padding=1, - conv_filter_size=3, - conv_act=ReluActivation(), - pool_stride=2, - pool_type=MaxPooling(), - pool_size=2) - - tmp = fc_layer( - input=tmp, - size=4096, - act=ReluActivation(), - layer_attr=ExtraAttr(drop_rate=0.5)) - - tmp = fc_layer( - input=tmp, - size=4096, - act=ReluActivation(), - layer_attr=ExtraAttr(drop_rate=0.5)) - - return fc_layer(input=tmp, size=num_class, act=SoftmaxActivation()) - - -if layer_num == 16: - vgg = vgg_network(3) -elif layer_num == 19: - vgg = vgg_network(4) -else: - print("Wrong layer number.") - -if is_infer: - outputs(vgg) -else: - lab = data_layer('label', num_class) - loss = cross_entropy(input=vgg, label=lab) - outputs(loss) diff --git a/benchmark/paddle/rnn/imdb.py b/benchmark/paddle/rnn/imdb.py deleted file mode 100755 index 2a67f9b0cf52484d9d44fe9db0b1e57cdd20fd43..0000000000000000000000000000000000000000 --- a/benchmark/paddle/rnn/imdb.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function -import six.moves.cPickle as pickle -import gzip -import os -import numpy - - -def get_dataset_file(dataset, default_dataset, origin): - data_dir, data_file = os.path.split(dataset) - if (not os.path.isfile(dataset)) and data_file == default_dataset: - from six.moves import urllib - print('Downloading data from %s' % origin) - urllib.request.urlretrieve(origin, dataset) - - return dataset - - -def create_data(path="imdb.pkl"): - - if (not os.path.isfile('imdb.train.pkl')): - path = get_dataset_file( - path, "imdb.pkl", - "http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl") - - if path.endswith(".gz"): - f = gzip.open(path, 'rb') - else: - f = open(path, 'rb') - - train_set = pickle.load(f) - test_set = pickle.load(f) - f.close() - - pickle.dump(train_set, open('imdb.train.pkl', 'wb')) - pickle.dump(test_set, open('imdb.test.pkl', 'wb')) - - if (not os.path.isfile('train.list')): - file('train.list', 'w').write('imdb.train.pkl\n') - - -def main(): - create_data('imdb.pkl') - - -if __name__ == "__main__": - main() diff --git a/benchmark/paddle/rnn/provider.py b/benchmark/paddle/rnn/provider.py deleted file mode 100644 index 23cc0c44a98d0ae7f586d1a376a603198f2c6144..0000000000000000000000000000000000000000 --- a/benchmark/paddle/rnn/provider.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import io, os -import random -import numpy as np -import six.moves.cPickle as pickle -from paddle.trainer.PyDataProvider2 import * - - -def remove_unk(x, n_words): - return [[1 if w >= n_words else w for w in sen] for sen in x] - - -# ============================================================== -# tensorflow uses fixed length, but PaddlePaddle can process -# variable-length. Padding is used in benchmark in order to -# compare with other platform. -# ============================================================== -def pad_sequences(sequences, - maxlen=None, - dtype='int32', - padding='post', - truncating='post', - value=0.): - lengths = [len(s) for s in sequences] - - nb_samples = len(sequences) - if maxlen is None: - maxlen = np.max(lengths) - - x = (np.ones((nb_samples, maxlen)) * value).astype(dtype) - for idx, s in enumerate(sequences): - if len(s) == 0: - continue # empty list was found - if truncating == 'pre': - trunc = s[-maxlen:] - elif truncating == 'post': - trunc = s[:maxlen] - else: - raise ValueError("Truncating type '%s' not understood" % padding) - - if padding == 'post': - x[idx, :len(trunc)] = trunc - elif padding == 'pre': - x[idx, -len(trunc):] = trunc - else: - raise ValueError("Padding type '%s' not understood" % padding) - return x - - -def initHook(settings, vocab_size, pad_seq, maxlen, **kwargs): - settings.vocab_size = vocab_size - settings.pad_seq = pad_seq - settings.maxlen = maxlen - settings.input_types = [ - integer_value_sequence(vocab_size), integer_value(2) - ] - - -@provider( - init_hook=initHook, min_pool_size=-1, cache=CacheType.CACHE_PASS_IN_MEM) -def process(settings, file): - f = open(file, 'rb') - train_set = pickle.load(f) - f.close() - x, y = train_set - - # remove unk, namely remove the words out of dictionary - x = remove_unk(x, settings.vocab_size) - if settings.pad_seq: - x = pad_sequences(x, maxlen=settings.maxlen, value=0.) - - for i in range(len(y)): - yield map(int, x[i]), int(y[i]) diff --git a/benchmark/paddle/rnn/rnn.py b/benchmark/paddle/rnn/rnn.py deleted file mode 100755 index 83eb3e565473f7e7e91cddeaa3cd2aafb7e3df2c..0000000000000000000000000000000000000000 --- a/benchmark/paddle/rnn/rnn.py +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env python - -from paddle.trainer_config_helpers import * -import imdb - -num_class = 2 -vocab_size = 30000 -fixedlen = 100 -batch_size = get_config_arg('batch_size', int, 128) -lstm_num = get_config_arg('lstm_num', int, 1) -hidden_size = get_config_arg('hidden_size', int, 128) -# whether to pad sequence into fixed length -pad_seq = get_config_arg('pad_seq', bool, True) -imdb.create_data('imdb.pkl') - -args = {'vocab_size': vocab_size, 'pad_seq': pad_seq, 'maxlen': fixedlen} -define_py_data_sources2( - "train.list", None, module="provider", obj="process", args=args) - -settings( - batch_size=batch_size, - learning_rate=2e-3, - learning_method=AdamOptimizer(), - regularization=L2Regularization(8e-4), - gradient_clipping_threshold=25) - -net = data_layer('data', size=vocab_size) -net = embedding_layer(input=net, size=128) - -for i in xrange(lstm_num): - net = simple_lstm(input=net, size=hidden_size) - -net = last_seq(input=net) -net = fc_layer(input=net, size=2, act=SoftmaxActivation()) - -lab = data_layer('label', num_class) -loss = classification_cost(input=net, label=lab) -outputs(loss) diff --git a/benchmark/paddle/rnn/run.sh b/benchmark/paddle/rnn/run.sh deleted file mode 100755 index f99a562b3f88a98560f4bf7aee98ceee9daefe67..0000000000000000000000000000000000000000 --- a/benchmark/paddle/rnn/run.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/bin/bash - -set -e - -function train() { - cfg=$1 - thread=$2 - args="lstm_num=${3},seq_pad=${4},hidden_size=${5},batch_size=${6}" - paddle train --job=time \ - --config=$cfg \ - --use_gpu=1 \ - --trainer_count=$thread \ - --log_period=10 \ - --test_period=100 \ - --num_passes=1 \ - --feed_data=1 \ - --config_args=$args \ - >logs/rnn-pad${4}-${thread}gpu-lstm${3}-batch${6}-hid${5}.log 2>&1 -} - -if [ ! -d "logs" ]; then - mkdir logs -fi - -## padding, single gpu -#-----config--gpu--lstm_num--padding--hidden_size--batch_size -## lstm_num=2, batch_size=64 -train rnn.py 1 2 1 256 64 -train rnn.py 1 2 1 512 64 -train rnn.py 1 2 1 1280 64 - -## lstm_num=2, batch_size=128 -train rnn.py 1 2 1 256 128 -train rnn.py 1 2 1 512 128 -train rnn.py 1 2 1 1280 128 - -## lstm_num=4, batch_size=256 -train rnn.py 1 2 1 256 256 -train rnn.py 1 2 1 512 256 -train rnn.py 1 2 1 1280 256 - - -#==================multi gpus=====================# -# hidden_size=256, lstm_num=2, different batch size -train rnn.py 4 2 1 256 128 -train rnn.py 4 2 1 256 256 -train rnn.py 4 2 1 256 512 - -# hidden_size=512, lstm_num=4, different batch size -train rnn.py 4 2 1 512 128 -train rnn.py 4 2 1 512 256 -train rnn.py 4 2 1 512 512 diff --git a/benchmark/tensorflow/machine_translation.py b/benchmark/tensorflow/machine_translation.py index 8f77dce98353af53803246be8dc61063836b7867..7837669edc7a206c03e5b9fa2989bf45b35f0605 100644 --- a/benchmark/tensorflow/machine_translation.py +++ b/benchmark/tensorflow/machine_translation.py @@ -35,8 +35,6 @@ import os import argparse import time -import paddle.v2 as paddle - parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--embedding_dim", diff --git a/benchmark/tensorflow/mnist.py b/benchmark/tensorflow/mnist.py index 7140eed6eaff49b5c65f9ccb2e38f113a4cdbdbf..03d533fecfededddd3956ba83ea600456782cfc9 100644 --- a/benchmark/tensorflow/mnist.py +++ b/benchmark/tensorflow/mnist.py @@ -21,7 +21,6 @@ import time import numpy as np import tensorflow as tf -import paddle.v2 as paddle DTYPE = tf.float32 diff --git a/benchmark/tensorflow/resnet.py b/benchmark/tensorflow/resnet.py index c432fa8d59571e128b9ff9e3ffa1949b792ef3a4..fdb044195766b847e16a0cc33424a999c1d9166e 100644 --- a/benchmark/tensorflow/resnet.py +++ b/benchmark/tensorflow/resnet.py @@ -27,7 +27,6 @@ import argparse import time import numpy as np -import paddle.v2 as paddle import tensorflow as tf DTYPE = tf.float32 diff --git a/benchmark/tensorflow/stacked_dynamic_lstm.py b/benchmark/tensorflow/stacked_dynamic_lstm.py index 5285033005044d907d0b7e91eb66ee7281c4f27a..1f532dc2fa082ea0f6b1da560e1a57b96d2ef1bb 100644 --- a/benchmark/tensorflow/stacked_dynamic_lstm.py +++ b/benchmark/tensorflow/stacked_dynamic_lstm.py @@ -21,8 +21,6 @@ import argparse import time import tensorflow as tf -import paddle.v2 as paddle - def parse_args(): parser = argparse.ArgumentParser("LSTM model benchmark.") diff --git a/benchmark/tensorflow/vgg.py b/benchmark/tensorflow/vgg.py index fba5ec71a46b3ac8b2e1244424c39fd5192e5458..d32c835bd7a7dafaafe0970fb6b422db3c866370 100644 --- a/benchmark/tensorflow/vgg.py +++ b/benchmark/tensorflow/vgg.py @@ -13,7 +13,6 @@ # limitations under the License. """VGG16 benchmark in TensorFlow""" import tensorflow as tf -import paddle.v2 as paddle import numpy as np import argparse import time diff --git a/cmake/configure.cmake b/cmake/configure.cmake index b0f54bf49aafb65f1a92fa95877de2cc61fc67d3..93d74bb0a8f726ad31685cbfc7831b5441cd5108 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -20,31 +20,10 @@ if(WITH_DSO) add_definitions(-DPADDLE_USE_DSO) endif(WITH_DSO) -if(WITH_DOUBLE) - add_definitions(-DPADDLE_TYPE_DOUBLE) -endif(WITH_DOUBLE) - -if(WITH_ARM_FP16) - add_definitions(-DPADDLE_ARM_FP16) - add_definitions("-march=armv8.2-a+fp16+simd") -endif(WITH_ARM_FP16) - if(WITH_TESTING) add_definitions(-DPADDLE_WITH_TESTING) endif(WITH_TESTING) -if(NOT WITH_TIMER) - add_definitions(-DPADDLE_DISABLE_TIMER) -endif(NOT WITH_TIMER) - -if(USE_EIGEN_FOR_BLAS) - add_definitions(-DPADDLE_USE_EIGEN_FOR_BLAS) -endif(USE_EIGEN_FOR_BLAS) - -if(EIGEN_USE_THREADS) - add_definitions(-DEIGEN_USE_THREADS) -endif(EIGEN_USE_THREADS) - if(NOT WITH_PROFILER) add_definitions(-DPADDLE_DISABLE_PROFILER) endif(NOT WITH_PROFILER) @@ -78,10 +57,6 @@ if(WIN32) endif(NOT MSVC) endif(WIN32) -if(NOT WITH_GOLANG) - add_definitions(-DPADDLE_WITHOUT_GOLANG) -endif(NOT WITH_GOLANG) - if(WITH_PSLIB) add_definitions(-DPADDLE_WITH_PSLIB) endif() @@ -171,55 +146,6 @@ if(WITH_DISTRIBUTE) add_definitions(-DPADDLE_WITH_DISTRIBUTE) endif() -if(WITH_GOLANG) - # we need to symlink Paddle directory into GOPATH. If we - # don't do it and we have code that depends on Paddle, go - # get ./... will download a new Paddle repo from Github, - # without the changes in our current Paddle repo that we - # want to build. - set(GOPATH "${CMAKE_CURRENT_BINARY_DIR}/go") - file(MAKE_DIRECTORY ${GOPATH}) - set(PADDLE_IN_GOPATH "${GOPATH}/src/github.com/PaddlePaddle/Paddle") - file(MAKE_DIRECTORY "${PADDLE_IN_GOPATH}") - set(PADDLE_GO_PATH "${CMAKE_SOURCE_DIR}/go") - - add_custom_target(go_path) - add_custom_command(TARGET go_path - # Symlink Paddle directory into GOPATH - COMMAND mkdir -p ${PADDLE_IN_GOPATH} - COMMAND rm -rf ${PADDLE_IN_GOPATH} - COMMAND ln -sf ${CMAKE_SOURCE_DIR} ${PADDLE_IN_GOPATH} - # Automatically get all dependencies specified in the source code - # We can't run `go get -d ./...` for every target, because - # multiple `go get` can not run concurrently, but make need to be - # able to run with multiple jobs. - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - ) - - if (GLIDE_INSTALL) - if(EXISTS $ENV{GOPATH}/bin/glide) - set(GLIDE "$ENV{GOPATH}/bin/glide") - else() - message(FATAL_ERROR "no glide executeble found: $ENV{GOPATH}/bin/glide") - endif() - - # this command will only run when the file it depends is missing - # or has changed, or the output is missing. - add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/glide - COMMAND env GOPATH=${GOPATH} ${GLIDE} install - COMMAND touch ${CMAKE_BINARY_DIR}/glide - DEPENDS ${PADDLE_SOURCE_DIR}/go/glide.lock - WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go" - ) - - # depends on the custom command which outputs - # ${CMAKE_BINARY_DIR}/glide, the custom command does not need to - # run every time this target is built. - add_custom_target(go_vendor DEPENDS ${CMAKE_BINARY_DIR}/glide go_path) - endif() - -endif(WITH_GOLANG) - if(WITH_GRPC) add_definitions(-DPADDLE_WITH_GRPC) endif(WITH_GRPC) diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake index ef4192ecc98ea6de0c81c1f33320528d547b818a..735846db1db04e3884d72ec62d911d9a0efec147 100644 --- a/cmake/cuda.cmake +++ b/cmake/cuda.cmake @@ -168,10 +168,7 @@ elseif (${CUDA_VERSION} LESS 11.0) # CUDA 10.x endif() include_directories(${CUDA_INCLUDE_DIRS}) -list(APPEND EXTERNAL_LIBS ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY}) if(NOT WITH_DSO) - # TODO(panyx0718): CUPTI only allows DSO? - list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUPTI_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY} ${NCCL_LIBRARY}) if(WIN32) set_property(GLOBAL PROPERTY CUDA_MODULES ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY}) endif(WIN32) diff --git a/cmake/external/anakin.cmake b/cmake/external/anakin.cmake index 06fc6061bc98eec8c4c71860333f7d3456952aeb..77f4b34537577c7c5782675e7af19c73bc9f8e32 100644 --- a/cmake/external/anakin.cmake +++ b/cmake/external/anakin.cmake @@ -74,5 +74,3 @@ add_dependencies(anakin_shared extern_anakin) add_library(anakin_saber SHARED IMPORTED GLOBAL) set_property(TARGET anakin_saber PROPERTY IMPORTED_LOCATION ${ANAKIN_SABER_LIB}) add_dependencies(anakin_saber extern_anakin) - -list(APPEND external_project_dependencies anakin_shared anakin_saber) diff --git a/cmake/external/any.cmake b/cmake/external/any.cmake deleted file mode 100644 index 85cce80b70a1fcf57015ac7a264e4950616b2717..0000000000000000000000000000000000000000 --- a/cmake/external/any.cmake +++ /dev/null @@ -1,31 +0,0 @@ -INCLUDE(ExternalProject) - -SET(ANY_SOURCE_DIR ${THIRD_PARTY_PATH}/any) - -INCLUDE_DIRECTORIES(${ANY_SOURCE_DIR}/src/extern_lib_any) - -ExternalProject_Add( - extern_lib_any - ${EXTERNAL_PROJECT_LOG_ARGS} - GIT_REPOSITORY "https://github.com/PaddlePaddle/any.git" - GIT_TAG "15595d8324be9e8a9a80d9ae442fdd12bd66df5d" - PREFIX ${ANY_SOURCE_DIR} - UPDATE_COMMAND "" - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND "" - TEST_COMMAND "" -) - -if (${CMAKE_VERSION} VERSION_LESS "3.3.0") - set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/lib_any_dummy.c) - file(WRITE ${dummyfile} "const char * dummy_any = \"${dummyfile}\";") - add_library(lib_any STATIC ${dummyfile}) -else() - add_library(lib_any INTERFACE) -endif() - -add_dependencies(lib_any extern_lib_any) - -add_definitions(-DANY_IMPL_ANY_CAST_MOVEABLE) -LIST(APPEND external_project_dependencies lib_any) diff --git a/cmake/external/boost.cmake b/cmake/external/boost.cmake index 12412a51a0fd1aaa9702bd4547fb935d94012ada..fc204dc9193bb28b654936048dd61a9b461abb2f 100644 --- a/cmake/external/boost.cmake +++ b/cmake/external/boost.cmake @@ -57,5 +57,4 @@ else() endif() add_dependencies(boost ${BOOST_PROJECT}) -list(APPEND external_project_dependencies boost) set(Boost_INCLUDE_DIR ${BOOST_INCLUDE_DIR}) diff --git a/cmake/external/brpc.cmake b/cmake/external/brpc.cmake index 6b50cff7a66a33d9413627bfbc663cca06ba86f3..989d1dbd4cf593e779b94f7bb5eda613f000859c 100644 --- a/cmake/external/brpc.cmake +++ b/cmake/external/brpc.cmake @@ -69,5 +69,3 @@ SET_PROPERTY(TARGET brpc PROPERTY IMPORTED_LOCATION ${BRPC_LIBRARIES}) ADD_DEPENDENCIES(brpc extern_brpc) add_definitions(-DBRPC_WITH_GLOG) - -LIST(APPEND external_project_dependencies brpc) diff --git a/cmake/external/cub.cmake b/cmake/external/cub.cmake index f06728de91e4509be661e56baef641d591928b66..41ad8207743201fbddd1d678fc5122afe68207ae 100644 --- a/cmake/external/cub.cmake +++ b/cmake/external/cub.cmake @@ -31,5 +31,3 @@ else() endif() add_dependencies(cub extern_cub) - -LIST(APPEND external_project_dependencies cub) diff --git a/cmake/external/dlpack.cmake b/cmake/external/dlpack.cmake index 4587475d7902a134eecd54bf8241fb96d175d0ba..63dd16b28e40a0c2d5310bec011c721285049952 100644 --- a/cmake/external/dlpack.cmake +++ b/cmake/external/dlpack.cmake @@ -27,5 +27,3 @@ else() endif() add_dependencies(dlpack extern_dlpack) - -LIST(APPEND external_project_dependencies dlpack) diff --git a/cmake/external/eigen.cmake b/cmake/external/eigen.cmake index 6aef97f21244efd09e22781f703553a19a9e1860..72441160f89d2c188d35fc6b08b5f0b6d746a1ad 100644 --- a/cmake/external/eigen.cmake +++ b/cmake/external/eigen.cmake @@ -52,5 +52,3 @@ else() endif() add_dependencies(eigen3 extern_eigen3) - -LIST(APPEND external_project_dependencies eigen3) diff --git a/cmake/external/gflags.cmake b/cmake/external/gflags.cmake index f3ca74faea3629ddce053c49ef1e629f230fdc49..911920ed6212b87aa25ba9a1faf7696fbcb22587 100644 --- a/cmake/external/gflags.cmake +++ b/cmake/external/gflags.cmake @@ -61,8 +61,6 @@ ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL) SET_PROPERTY(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARIES}) ADD_DEPENDENCIES(gflags extern_gflags) -LIST(APPEND external_project_dependencies gflags) - # On Windows (including MinGW), the Shlwapi library is used by gflags if available. if (WIN32) include(CheckIncludeFileCXX) diff --git a/cmake/external/glog.cmake b/cmake/external/glog.cmake index d3a4d69d3a05515fdf72074083470e19b4ec255c..7fa17ce6b7b106c47c486729d0136748c73176a7 100644 --- a/cmake/external/glog.cmake +++ b/cmake/external/glog.cmake @@ -72,5 +72,3 @@ ADD_LIBRARY(glog STATIC IMPORTED GLOBAL) SET_PROPERTY(TARGET glog PROPERTY IMPORTED_LOCATION ${GLOG_LIBRARIES}) ADD_DEPENDENCIES(glog extern_glog gflags) LINK_LIBRARIES(glog gflags) - -LIST(APPEND external_project_dependencies glog) diff --git a/cmake/external/gtest.cmake b/cmake/external/gtest.cmake index 9be625b620287cd4c644ae6908000fd5eec5d5c7..e459526583bd5ee3c89807657f3c30376e57d971 100644 --- a/cmake/external/gtest.cmake +++ b/cmake/external/gtest.cmake @@ -79,5 +79,4 @@ IF(WITH_TESTING OR (WITH_DISTRIBUTE AND NOT WITH_GRPC)) SET_PROPERTY(TARGET gtest_main PROPERTY IMPORTED_LOCATION ${GTEST_MAIN_LIBRARIES}) ADD_DEPENDENCIES(gtest_main extern_gtest) - LIST(APPEND external_project_dependencies gtest gtest_main) ENDIF(WITH_TESTING OR (WITH_DISTRIBUTE AND NOT WITH_GRPC)) diff --git a/cmake/external/leveldb.cmake b/cmake/external/leveldb.cmake index 0df61b01ab64c8b751bdc3893dd5294ad39ab928..ac0febd076e659927a6a882ff487c61ac130437a 100644 --- a/cmake/external/leveldb.cmake +++ b/cmake/external/leveldb.cmake @@ -39,6 +39,3 @@ ADD_DEPENDENCIES(extern_leveldb snappy) ADD_LIBRARY(leveldb STATIC IMPORTED GLOBAL) SET_PROPERTY(TARGET leveldb PROPERTY IMPORTED_LOCATION ${LEVELDB_LIBRARIES}) ADD_DEPENDENCIES(leveldb extern_leveldb) - -LIST(APPEND external_project_dependencies leveldb) - diff --git a/cmake/external/libmct.cmake b/cmake/external/libmct.cmake index 27cff8cfb6315c9b4fa5677ad9062bee73a0e5d8..b944f2945b7874ca76bf1a19e0a363f564851a62 100644 --- a/cmake/external/libmct.cmake +++ b/cmake/external/libmct.cmake @@ -72,7 +72,4 @@ else() add_library(libmct INTERFACE) endif() -#ADD_LIBRARY(libmct SHARED IMPORTED GLOBAL) ADD_DEPENDENCIES(libmct ${LIBMCT_PROJECT}) -LIST(APPEND external_project_dependencies libmct) - diff --git a/cmake/external/libxsmm.cmake b/cmake/external/libxsmm.cmake index 39f49d210a20d49a06c120361ecf0a5d07d1af28..69cdba7c5921f14a87172d95791332e364045b26 100644 --- a/cmake/external/libxsmm.cmake +++ b/cmake/external/libxsmm.cmake @@ -53,5 +53,3 @@ MESSAGE(STATUS "Libxsmm library: ${LIBXSMM_LIBS}") include_directories(${LIBXSMM_INCLUDE_DIR}) ADD_DEFINITIONS(-DPADDLE_WITH_LIBXSMM) ADD_DEPENDENCIES(libxsmm extern_libxsmm) -LIST(APPEND external_project_dependencies libxsmm) - diff --git a/cmake/external/mkldnn.cmake b/cmake/external/mkldnn.cmake index 92fe76d05c7507c295b784bc37870abfc31a0a29..94a266c50114a94d125467d55a6367a6999e3298 100644 --- a/cmake/external/mkldnn.cmake +++ b/cmake/external/mkldnn.cmake @@ -89,7 +89,6 @@ SET_PROPERTY(TARGET shared_mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIB}) ADD_DEPENDENCIES(shared_mkldnn ${MKLDNN_PROJECT}) MESSAGE(STATUS "MKLDNN library: ${MKLDNN_LIB}") add_definitions(-DPADDLE_WITH_MKLDNN) -LIST(APPEND external_project_dependencies shared_mkldnn) # generate a static dummy target to track mkldnn dependencies # for cc_library(xxx SRCS xxx.c DEPS mkldnn) diff --git a/cmake/external/mklml.cmake b/cmake/external/mklml.cmake index 2caff27357687018f29c1efc55b7b82c9dc3ccf6..ae2679db4aed7a77ad407f881c4482fd3914ac27 100644 --- a/cmake/external/mklml.cmake +++ b/cmake/external/mklml.cmake @@ -39,8 +39,10 @@ IF(WIN32) SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5md.lib) SET(MKLML_SHARED_LIB ${MKLML_LIB_DIR}/mklml.dll) SET(MKLML_SHARED_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5md.dll) -ELSE() - SET(MKLML_VER "mklml_lnx_${TIME_VERSION}" CACHE STRING "" FORCE) +ELSE() + #TODO(intel-huying): + # Now enable Erf function in mklml library temporarily, it will be updated as offical version later. + SET(MKLML_VER "Glibc225_vsErf_mklml_lnx_${TIME_VERSION}" CACHE STRING "" FORCE) SET(MKLML_URL "http://paddlepaddledeps.cdn.bcebos.com/${MKLML_VER}.tgz" CACHE STRING "" FORCE) SET(MKLML_LIB ${MKLML_LIB_DIR}/libmklml_intel.so) SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5.so) @@ -73,4 +75,3 @@ INCLUDE_DIRECTORIES(${MKLML_INC_DIR}) ADD_LIBRARY(mklml SHARED IMPORTED GLOBAL) SET_PROPERTY(TARGET mklml PROPERTY IMPORTED_LOCATION ${MKLML_LIB}) ADD_DEPENDENCIES(mklml ${MKLML_PROJECT}) -LIST(APPEND external_project_dependencies mklml) diff --git a/cmake/external/ngraph.cmake b/cmake/external/ngraph.cmake index 14af98b2d74d4aa955aac27727e05567788a84c9..e7fb69dbbc872c813b2eba16a5b1098eebfeedd8 100644 --- a/cmake/external/ngraph.cmake +++ b/cmake/external/ngraph.cmake @@ -37,7 +37,7 @@ INCLUDE(GNUInstallDirs) INCLUDE(ExternalProject) SET(NGRAPH_PROJECT "extern_ngraph") -SET(NGRAPH_GIT_TAG "20bd8bbc79ae3a81c57313846a2be7313e5d1dab") +SET(NGRAPH_GIT_TAG "a444f7a959b7d87f2c117c9b57a4c387759e481e") SET(NGRAPH_SOURCES_DIR ${THIRD_PARTY_PATH}/ngraph) SET(NGRAPH_INSTALL_DIR ${THIRD_PARTY_PATH}/install/ngraph) SET(NGRAPH_INC_DIR ${NGRAPH_INSTALL_DIR}/include) @@ -69,7 +69,7 @@ ExternalProject_Add( CMAKE_ARGS -DNGRAPH_DEX_ONLY=TRUE CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} CMAKE_ARGS -DMKLDNN_INCLUDE_DIR=${MKLDNN_INC_DIR} - CMAKE_ARGS -DMKLDNN_LIB_DIR=${MKLDNN_INSTALL_DIR}/lib + CMAKE_ARGS -DMKLDNN_LIB_DIR=${MKLDNN_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR} CMAKE_ARGS -DMKLML_LIB_DIR=${MKLML_INSTALL_DIR}/lib ) @@ -77,4 +77,3 @@ add_dependencies(ngraph ${NGRAPH_PROJECT}) target_compile_definitions(ngraph INTERFACE -DPADDLE_WITH_NGRAPH) target_include_directories(ngraph INTERFACE ${NGRAPH_INC_DIR}) target_link_libraries(ngraph INTERFACE ${NGRAPH_SHARED_LIB}) -LIST(APPEND external_project_dependencies ngraph) diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake index b347a592929836a473ac764c0af1153b07d54258..d8a4a0be6f5aaa3a1a4977bbc68348743f2fa742 100644 --- a/cmake/external/openblas.cmake +++ b/cmake/external/openblas.cmake @@ -11,11 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -IF(USE_EIGEN_FOR_BLAS) - return() -ENDIF(USE_EIGEN_FOR_BLAS) - INCLUDE(cblas) IF(NOT ${CBLAS_FOUND}) @@ -91,7 +86,6 @@ ENDIF() IF(NOT ${CBLAS_FOUND}) ADD_DEPENDENCIES(cblas extern_openblas) - LIST(APPEND external_project_dependencies cblas) ELSE() IF("${CBLAS_PROVIDER}" STREQUAL "MKLML") ADD_DEPENDENCIES(cblas mklml) diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index e05b7694ddf1e1652b00f156cde1a2d433c9fc46..bc7fe5454f5883108e43b4ca47920995dc13a1ff 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -129,7 +129,6 @@ macro(PROMPT_PROTOBUF_LIB) ADD_DEPENDENCIES(protoc ${dep}) ENDFOREACH() - LIST(APPEND external_project_dependencies protobuf) RETURN() endmacro() macro(SET_PROTOBUF_VERSION) @@ -231,7 +230,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) ) ENDFUNCTION() -SET(PROTOBUF_VERSION 3.1) +SET(PROTOBUF_VERSION 3.1.0) IF(NOT PROTOBUF_FOUND) build_protobuf(extern_protobuf FALSE) diff --git a/cmake/external/pslib.cmake b/cmake/external/pslib.cmake index b4ea268e5a48e29d00b0ec8b957b61a42553ec7e..0287e5cf2a835ed65c5fc26ff69d2653d5db217e 100644 --- a/cmake/external/pslib.cmake +++ b/cmake/external/pslib.cmake @@ -70,4 +70,3 @@ ExternalProject_Add( ADD_LIBRARY(pslib SHARED IMPORTED GLOBAL) SET_PROPERTY(TARGET pslib PROPERTY IMPORTED_LOCATION ${PSLIB_LIB}) ADD_DEPENDENCIES(pslib ${PSLIB_PROJECT}) -LIST(APPEND external_project_dependencies pslib) diff --git a/cmake/external/pslib_brpc.cmake b/cmake/external/pslib_brpc.cmake index 8b43f2ef5c999fc351543ba958c7cc4b0856625d..22c8c1b463764b6e107c5f3da25d51b36c2ce59f 100644 --- a/cmake/external/pslib_brpc.cmake +++ b/cmake/external/pslib_brpc.cmake @@ -70,4 +70,3 @@ ExternalProject_Add( ADD_LIBRARY(pslib_brpc SHARED IMPORTED GLOBAL) SET_PROPERTY(TARGET pslib_brpc PROPERTY IMPORTED_LOCATION ${PSLIB_BRPC_LIB}) ADD_DEPENDENCIES(pslib_brpc ${PSLIB_BRPC_PROJECT}) -LIST(APPEND external_project_dependencies pslib_brpc) diff --git a/cmake/external/threadpool.cmake b/cmake/external/threadpool.cmake index 0159815fed81bdff6de3e561af569e9edc75f947..1f56bc7ab056ef0dd95d603ebe3461ef044b2a79 100644 --- a/cmake/external/threadpool.cmake +++ b/cmake/external/threadpool.cmake @@ -26,5 +26,3 @@ else() endif() add_dependencies(simple_threadpool extern_threadpool) - -LIST(APPEND external_project_dependencies simple_threadpool) diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake index 7a25aaf15f2c7f46d99394d82d69bc24e4f5cb2c..6f2af8670f25c00ac0970fe4ae2b0c5b03aa0d9e 100644 --- a/cmake/external/warpctc.cmake +++ b/cmake/external/warpctc.cmake @@ -83,5 +83,3 @@ INCLUDE_DIRECTORIES(${THIRD_PARTY_PATH}/install) # For Paddle code to include wa ADD_LIBRARY(warpctc SHARED IMPORTED GLOBAL) SET_PROPERTY(TARGET warpctc PROPERTY IMPORTED_LOCATION ${WARPCTC_LIBRARIES}) ADD_DEPENDENCIES(warpctc extern_warpctc) - -LIST(APPEND external_project_dependencies warpctc) diff --git a/cmake/external/xbyak.cmake b/cmake/external/xbyak.cmake index 384c2f9328296ce6a8a6293be6cc47e5063dd3c4..1d61154c0d45dea795902d6544deb796693db263 100644 --- a/cmake/external/xbyak.cmake +++ b/cmake/external/xbyak.cmake @@ -55,4 +55,3 @@ else() endif() add_dependencies(xbyak ${XBYAK_PROJECT}) -list(APPEND external_project_dependencies xbyak) diff --git a/cmake/external/xxhash.cmake b/cmake/external/xxhash.cmake index a0f300c2e8bab9e7402f869eed1b4c2d1c579aab..23b1e02108642df561948a6faa3152effb7ca932 100644 --- a/cmake/external/xxhash.cmake +++ b/cmake/external/xxhash.cmake @@ -71,5 +71,3 @@ add_library(xxhash STATIC IMPORTED GLOBAL) set_property(TARGET xxhash PROPERTY IMPORTED_LOCATION ${XXHASH_LIBRARIES}) include_directories(${XXHASH_INCLUDE_DIR}) add_dependencies(xxhash extern_xxhash) - -LIST(APPEND external_project_dependencies xxhash) diff --git a/cmake/external/zlib.cmake b/cmake/external/zlib.cmake index 6c8d79c25e6a2655711fe4450e65600c9a584015..5569fefe992d10ad4820e51e677f40271d0214e7 100644 --- a/cmake/external/zlib.cmake +++ b/cmake/external/zlib.cmake @@ -57,5 +57,3 @@ ENDIF(WIN32) ADD_LIBRARY(zlib STATIC IMPORTED GLOBAL) SET_PROPERTY(TARGET zlib PROPERTY IMPORTED_LOCATION ${ZLIB_LIBRARIES}) ADD_DEPENDENCIES(zlib extern_zlib) - -LIST(APPEND external_project_dependencies zlib) diff --git a/cmake/hip.cmake b/cmake/hip.cmake index 4276bc5b08cd88a52bb5782bca87fc37deabd830..c3a748db502037f926dc241e4c3bc26a83ad3468 100644 --- a/cmake/hip.cmake +++ b/cmake/hip.cmake @@ -11,8 +11,6 @@ include_directories("/opt/rocm/rocrand/include") include_directories("/opt/rocm/rccl/include") include_directories("/opt/rocm/thrust") -list(APPEND EXTERNAL_LIBS "-L/opt/rocm/lib/ -lhip_hcc") - set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -fPIC -DPADDLE_WITH_HIP -std=c++11" ) if(WITH_DSO) @@ -31,22 +29,12 @@ if(WITH_GRPC) set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -DPADDLE_WITH_GRPC") endif(WITH_GRPC) -if(NOT WITH_GOLANG) - set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -DPADDLE_WITHOUT_GOLANG") -endif(NOT WITH_GOLANG) - if(WITH_MKLDNN) set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -DPADDLE_WITH_MKLDNN") endif(WITH_MKLDNN) set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -DANY_IMPL_ANY_CAST_MOVEABLE") -if(NOT WITH_RDMA) - set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -DPADDLE_DISABLE_RDMA") -endif(NOT WITH_RDMA) - - - if(CMAKE_BUILD_TYPE STREQUAL "Debug") list(APPEND HIP_HCC_FLAGS ${CMAKE_CXX_FLAGS_DEBUG}) elseif(CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") diff --git a/cmake/operators.cmake b/cmake/operators.cmake index c2d04828564e69d7ac965881057f185194aa0475..11a5b1b4554e7899c3ee7092a9295234743750d7 100644 --- a/cmake/operators.cmake +++ b/cmake/operators.cmake @@ -153,7 +153,11 @@ function(op_library TARGET) # pybind USE_OP_DEVICE_KERNEL for CUDNN list(LENGTH cudnn_cu_cc_srcs cudnn_cu_cc_srcs_len) if (WITH_GPU AND ${cudnn_cu_cc_srcs_len} GREATER 0) + if(${TARGET} STREQUAL "activation") + file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, CUDNN);\n") + else() file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, CUDNN);\n") + endif() endif() # pybind USE_OP_DEVICE_KERNEL for MIOPEN @@ -168,6 +172,9 @@ function(op_library TARGET) file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, MKLDNN);\n") elseif(${MKLDNN_FILE} STREQUAL "conv_mkldnn_op") file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, FP32);\n") + file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, S8);\n") + file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, U8);\n") + else() file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, MKLDNN);\n") endif() diff --git a/cmake/rdma.cmake b/cmake/rdma.cmake deleted file mode 100644 index b698f3bdc3ff586a72badee3e0109e29285b457f..0000000000000000000000000000000000000000 --- a/cmake/rdma.cmake +++ /dev/null @@ -1,82 +0,0 @@ -# user should download rdma first from subversion repository - -# execute following instruction to download svn mannally -# svn co https://svn.baidu.com/sys/ip/trunk/rdma/sockrdmav1 rdma/ -# svn co https://svn.baidu.com/sys/ip/trunk/rdma/thirdparty rdma/ -# we use static output in svn repositories to avoid implict bugs from not standard runtime env. - -if(WITH_RDMA) - set(RDMA_ROOT $ENV{RDMA_ROOT} CACHE PATH "Folder contains RDMA sock library and thirdparty library") - - function(generate_rdma_links) - #redirect to current DIR to isolate the pollution from system runtime environment - #it can benifits unified control for different gcc environment. - #e.g, by default gcc48 did not refer /usr/lib64 which could contain low version - #runtime libraries that will crash process while loading it. That redirect trick - #can fix it. - execute_process( - COMMAND mkdir -p librdma - COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so.1 - COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so - COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so.1 - COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so - COMMAND ln -s -f /lib64/libnl.so.1.1.4 librdma/libnl.so.1 - COMMAND ln -s -f /lib64/libnl.so.1.1.4 librdma/libnl.so - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - ) - endfunction(generate_rdma_links) - - #check and set headers - find_path(RDMA_INC_SXISOCK sxi_sock.h PATHS ${RDMA_ROOT}/sockrdmav1/output/include) - find_path(RDMA_INC_XIO libxio.h PATHS ${RDMA_ROOT}/thirdparty/output/accelio) - find_path(RDMA_INC_EVENT event2 PATHS ${RDMA_ROOT}/thirdparty/output/libevent) - find_path(RDMA_INC_NUMA numa.h PATHS ${RDMA_ROOT}/thirdparty/output/libnuma) - - #check and set libs - find_library(RDMA_LIB_SXISOCK NAMES sxisock PATHS ${RDMA_ROOT}/sockrdmav1/output) - find_library(RDMA_LIB_XIO NAMES xio PATHS ${RDMA_ROOT}/thirdparty/output/accelio) - find_library(RDMA_LIB_EVENT NAMES event PATHS ${RDMA_ROOT}/thirdparty/output/libevent) - find_library(RDMA_LIB_EVENT_CORE NAMES event_core PATHS ${RDMA_ROOT}/thirdparty/output/libevent) - find_library(RDMA_LIB_EVENT_EXTRA NAMES event_extra PATHS ${RDMA_ROOT}/thirdparty/output/libevent) - find_library(RDMA_LIB_EVENT_PTHREADS NAMES event_pthreads PATHS ${RDMA_ROOT}/thirdparty/output/libevent) - find_library(RDMA_LIB_NUMA NAMES numa PATHS ${RDMA_ROOT}/thirdparty/output/libnuma) - - if( - RDMA_INC_SXISOCK AND - RDMA_INC_XIO AND - RDMA_INC_EVENT AND - RDMA_INC_NUMA AND - RDMA_LIB_SXISOCK AND - RDMA_LIB_XIO AND - RDMA_LIB_EVENT AND - RDMA_LIB_EVENT_CORE AND - RDMA_LIB_EVENT_EXTRA AND - RDMA_LIB_EVENT_PTHREADS AND - RDMA_LIB_NUMA - ) - - set(RDMA_INC_DIR - ${RDMA_INC_SXISOCK} - ${RDMA_INC_XIO} - ${RDMA_INC_EVENT} - ${RDMA_INC_NUMA}) - set(RDMA_LIBS - ${RDMA_LIB_SXISOCK} - ${RDMA_LIB_XIO} - ${RDMA_LIB_EVENT} - ${RDMA_LIB_EVENT_CORE} - ${RDMA_LIB_EVENT_EXTRA} - ${RDMA_LIB_EVENT_PTHREADS} - ${RDMA_LIB_NUMA} - ) - set(RDMA_LD_FLAGS "-L./librdma -libverbs -lrdmacm -Xlinker -rpath ./librdma") - include_directories("${RDMA_INC_DIR}") - else() - #if this module is not called, RDMA_INC_DIR RDMA_LIBS will be null, so top module always refer this variable - message(FATAL_ERROR, "RDMA libraries are not found, try to set RDMA_ROOT or check all related libraries.") - endif() -else(WITH_RDMA) - set(RDMA_LIBS "") - set(RDMA_LD_FLAGS "") - add_definitions(-DPADDLE_DISABLE_RDMA) -endif(WITH_RDMA) diff --git a/cmake/tensorrt.cmake b/cmake/tensorrt.cmake index 3dc7171551bfb7aff8d1e75083c98b00378d247f..891ff222633741f9894c2fdb6c0096a48f8a35e1 100644 --- a/cmake/tensorrt.cmake +++ b/cmake/tensorrt.cmake @@ -33,6 +33,5 @@ if(TENSORRT_FOUND) message(STATUS "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. " "Current TensorRT version is v${TENSORRT_MAJOR_VERSION}. ") include_directories(${TENSORRT_INCLUDE_DIR}) - list(APPEND EXTERNAL_LIBS ${TENSORRT_LIBRARY}) add_definitions(-DPADDLE_WITH_TENSORRT) endif() diff --git a/paddle/contrib/float16/run_float16_demo.sh b/paddle/contrib/float16/run_float16_demo.sh index 031225a85dabb26e5d9ea06f58909c049e7f0c08..34cb7a12db171915f2bc7df8787dd62cd381de68 100755 --- a/paddle/contrib/float16/run_float16_demo.sh +++ b/paddle/contrib/float16/run_float16_demo.sh @@ -14,9 +14,7 @@ cmake .. -DWITH_AVX=OFF \ -DWITH_MKL=OFF \ -DWITH_GPU=ON \ -DWITH_TESTING=ON \ - -DWITH_TIMER=ON \ -DWITH_PROFILER=ON \ - -DWITH_FLUID_ONLY=ON make -j `nproc` pip install -U "$WHEEL_PATH/$(ls $WHEEL_PATH)" diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index df961be911537582fb60dd258fcdd4a5dd41a38e..7eec0b31556a93ec2ee390772a260a0a5d90c87f 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -1,518 +1,522 @@ -paddle.fluid.Program.__init__ ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Program.block ArgSpec(args=['self', 'index'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Program.clone ArgSpec(args=['self', 'for_test'], varargs=None, keywords=None, defaults=(False,)) -paddle.fluid.Program.current_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Program.global_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Program.list_vars ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Program.parse_from_string ArgSpec(args=['binary_str'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Program.to_string ArgSpec(args=['self', 'throw_on_error', 'with_details'], varargs=None, keywords=None, defaults=(False,)) -paddle.fluid.default_startup_program ArgSpec(args=[], varargs=None, keywords=None, defaults=None) -paddle.fluid.default_main_program ArgSpec(args=[], varargs=None, keywords=None, defaults=None) -paddle.fluid.program_guard ArgSpec(args=['main_program', 'startup_program'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.name_scope ArgSpec(args=['prefix'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.Executor.__init__ ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Executor.close ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Executor.run ArgSpec(args=['self', 'program', 'feed', 'fetch_list', 'feed_var_name', 'fetch_var_name', 'scope', 'return_numpy', 'use_program_cache'], varargs=None, keywords=None, defaults=(None, None, None, 'feed', 'fetch', None, True, False)) -paddle.fluid.global_scope ArgSpec(args=[], varargs=None, keywords=None, defaults=None) -paddle.fluid.scope_guard ArgSpec(args=['scope'], varargs=None, keywords=None, defaults=None) -paddle.fluid.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None) -paddle.fluid.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None) -paddle.fluid.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)) -paddle.fluid.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,)) -paddle.fluid.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174')) -paddle.fluid.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False)) -paddle.fluid.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.Program.__init__ (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.Program.block (ArgSpec(args=['self', 'index'], varargs=None, keywords=None, defaults=None), ('document', 'af5346376065ff4cf6832a8ac0ae0945')) +paddle.fluid.Program.clone (ArgSpec(args=['self', 'for_test'], varargs=None, keywords=None, defaults=(False,)), ('document', 'ebb7765b2962bd2be041d19720e49d0f')) +paddle.fluid.Program.current_block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '5e162d3bf8dd625703463d9e4be36adb')) +paddle.fluid.Program.global_block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'cfb7e05a002b2e64650778cabde7301c')) +paddle.fluid.Program.list_vars (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '1c8647b14fe57c7824b1c9562394dd3c')) +paddle.fluid.Program.parse_from_string (ArgSpec(args=['binary_str'], varargs=None, keywords=None, defaults=None), ('document', 'b6a7ffb239a30bf2ce58cfaca8d8b8d5')) +paddle.fluid.Program.to_string (ArgSpec(args=['self', 'throw_on_error', 'with_details'], varargs=None, keywords=None, defaults=(False,)), ('document', 'faec17e5a04af28e3776160e34504d15')) +paddle.fluid.default_startup_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '99e5d53d92d82797093332719c9e3ccd')) +paddle.fluid.default_main_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '5430f54ab4895f9f47db6bebbaf71659')) +paddle.fluid.program_guard (ArgSpec(args=['main_program', 'startup_program'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b54f403e57825a1592aece03afe3afb6')) +paddle.fluid.name_scope (ArgSpec(args=['prefix'], varargs=None, keywords=None, defaults=(None,)), ('document', '0ef753f5cec69fef9ae6ad8b867b33a2')) +paddle.fluid.Executor.__init__ (ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.Executor.close (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'f5369953dd0c443961cf79f7a00e1a03')) +paddle.fluid.Executor.run (ArgSpec(args=['self', 'program', 'feed', 'fetch_list', 'feed_var_name', 'fetch_var_name', 'scope', 'return_numpy', 'use_program_cache'], varargs=None, keywords=None, defaults=(None, None, None, 'feed', 'fetch', None, True, False)), ('document', 'aba8093edebf2d5c869b735b92811e45')) +paddle.fluid.global_scope (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'e148d3ab1ed8edf3e928212a375959c0')) +paddle.fluid.scope_guard (ArgSpec(args=['scope'], varargs=None, keywords=None, defaults=None), ('document', 'b94d1f6bcc29c4fb58fc0058561250c2')) +paddle.fluid.DistributeTranspiler.__init__ (ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.DistributeTranspiler.get_pserver_program (ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None), ('document', '292ab72977afbe58e6a3bde175452680')) +paddle.fluid.DistributeTranspiler.get_pserver_programs (ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None), ('document', '78f4949aedf317666a89ca74b3748ba8')) +paddle.fluid.DistributeTranspiler.get_startup_program (ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'd796fc0c8d51503b556fcf6dc15c4f0c')) +paddle.fluid.DistributeTranspiler.get_trainer_program (ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,)), ('document', '736330e31a7a54abccc0c7fd9119d9ff')) +paddle.fluid.DistributeTranspiler.transpile (ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174')), ('document', '06ce55338dfe96311ad1078235ab3bf4')) +paddle.fluid.memory_optimize (ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False)), ('document', 'eda17d0f1639bc6ca215cecf87f588a4')) +paddle.fluid.release_memory (ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ac4114d3df16264f1946deb3a8434a6f')) paddle.fluid.DistributeTranspilerConfig.__init__ -paddle.fluid.ParallelExecutor.__init__ ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None)) -paddle.fluid.ParallelExecutor.run ArgSpec(args=['self', 'fetch_list', 'feed', 'feed_dict', 'return_numpy'], varargs=None, keywords=None, defaults=(None, None, True)) -paddle.fluid.create_lod_tensor ArgSpec(args=['data', 'recursive_seq_lens', 'place'], varargs=None, keywords=None, defaults=None) -paddle.fluid.create_random_int_lodtensor ArgSpec(args=['recursive_seq_lens', 'base_shape', 'place', 'low', 'high'], varargs=None, keywords=None, defaults=None) -paddle.fluid.DataFeedDesc.__init__ ArgSpec(args=['self', 'proto_file'], varargs=None, keywords=None, defaults=None) -paddle.fluid.DataFeedDesc.desc ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.DataFeedDesc.set_batch_size ArgSpec(args=['self', 'batch_size'], varargs=None, keywords=None, defaults=None) -paddle.fluid.DataFeedDesc.set_dense_slots ArgSpec(args=['self', 'dense_slots_name'], varargs=None, keywords=None, defaults=None) -paddle.fluid.DataFeedDesc.set_use_slots ArgSpec(args=['self', 'use_slots_name'], varargs=None, keywords=None, defaults=None) -paddle.fluid.AsyncExecutor.__init__ ArgSpec(args=['self', 'place', 'run_mode'], varargs=None, keywords=None, defaults=(None, '')) -paddle.fluid.AsyncExecutor.config_distributed_nodes ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.AsyncExecutor.download_data ArgSpec(args=['self', 'afs_path', 'local_path', 'fs_default_name', 'ugi', 'file_cnt', 'hadoop_home', 'process_num'], varargs=None, keywords=None, defaults=('$HADOOP_HOME', 12)) -paddle.fluid.AsyncExecutor.get_instance ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.AsyncExecutor.init_model ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.AsyncExecutor.init_server ArgSpec(args=['self', 'dist_desc'], varargs=None, keywords=None, defaults=None) -paddle.fluid.AsyncExecutor.init_worker ArgSpec(args=['self', 'dist_desc', 'startup_program'], varargs=None, keywords=None, defaults=None) -paddle.fluid.AsyncExecutor.run ArgSpec(args=['self', 'program', 'data_feed', 'filelist', 'thread_num', 'fetch', 'mode', 'debug'], varargs=None, keywords=None, defaults=('', False)) -paddle.fluid.AsyncExecutor.save_model ArgSpec(args=['self', 'save_path'], varargs=None, keywords=None, defaults=None) -paddle.fluid.AsyncExecutor.stop ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.CompiledProgram.__init__ ArgSpec(args=['self', 'program'], varargs=None, keywords=None, defaults=None) -paddle.fluid.CompiledProgram.with_data_parallel ArgSpec(args=['self', 'loss_name', 'build_strategy', 'exec_strategy', 'share_vars_from'], varargs=None, keywords=None, defaults=(None, None, None, None)) -paddle.fluid.CompiledProgram.with_inference_optimize ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=None) +paddle.fluid.ParallelExecutor.__init__ (ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.ParallelExecutor.run (ArgSpec(args=['self', 'fetch_list', 'feed', 'feed_dict', 'return_numpy'], varargs=None, keywords=None, defaults=(None, None, True)), ('document', '2cb4bd74481861345c70228a0f57620c')) +paddle.fluid.create_lod_tensor (ArgSpec(args=['data', 'recursive_seq_lens', 'place'], varargs=None, keywords=None, defaults=None), ('document', '8e7bb21e83ff4604f5b379672e285b94')) +paddle.fluid.create_random_int_lodtensor (ArgSpec(args=['recursive_seq_lens', 'base_shape', 'place', 'low', 'high'], varargs=None, keywords=None, defaults=None), ('document', '368f638b99f1dfe59e9b02aa6f077752')) +paddle.fluid.DataFeedDesc.__init__ (ArgSpec(args=['self', 'proto_file'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.DataFeedDesc.desc (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '4294493e31c4bc9fc4bd48753044235f')) +paddle.fluid.DataFeedDesc.set_batch_size (ArgSpec(args=['self', 'batch_size'], varargs=None, keywords=None, defaults=None), ('document', '8d9f44601e0a99dd431f14fd9250cd21')) +paddle.fluid.DataFeedDesc.set_dense_slots (ArgSpec(args=['self', 'dense_slots_name'], varargs=None, keywords=None, defaults=None), ('document', 'eb894b464bbcd1b4bc8038398954f766')) +paddle.fluid.DataFeedDesc.set_use_slots (ArgSpec(args=['self', 'use_slots_name'], varargs=None, keywords=None, defaults=None), ('document', '415c56600ce4e198c071cad01409a690')) +paddle.fluid.AsyncExecutor.__init__ (ArgSpec(args=['self', 'place', 'run_mode'], varargs=None, keywords=None, defaults=(None, '')), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.AsyncExecutor.config_distributed_nodes (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '4810dbe1870452f16b3c60b6c5fd1459')) +paddle.fluid.AsyncExecutor.download_data (ArgSpec(args=['self', 'afs_path', 'local_path', 'fs_default_name', 'ugi', 'file_cnt', 'hadoop_home', 'process_num'], varargs=None, keywords=None, defaults=('$HADOOP_HOME', 12)), ('document', '799a2066cc26819f1ed31f47c15ad083')) +paddle.fluid.AsyncExecutor.get_instance (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'f8688f76a2db1243c7097a60c507b182')) +paddle.fluid.AsyncExecutor.init_model (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '504f39be2007404a17e5cabea1256c7d')) +paddle.fluid.AsyncExecutor.init_server (ArgSpec(args=['self', 'dist_desc'], varargs=None, keywords=None, defaults=None), ('document', 'c403ab46c5d3ef25c0f7e94ae75dcb68')) +paddle.fluid.AsyncExecutor.init_worker (ArgSpec(args=['self', 'dist_desc', 'startup_program'], varargs=None, keywords=None, defaults=None), ('document', 'dcf08f4bf2f3282acf11391f5d39c536')) +paddle.fluid.AsyncExecutor.run (ArgSpec(args=['self', 'program', 'data_feed', 'filelist', 'thread_num', 'fetch', 'mode', 'debug'], varargs=None, keywords=None, defaults=('', False)), ('document', '848fc53484e8326f6325feea87fe955c')) +paddle.fluid.AsyncExecutor.save_model (ArgSpec(args=['self', 'save_path'], varargs=None, keywords=None, defaults=None), ('document', 'c8ac0dfcb3b187aba25d03af7fea56b2')) +paddle.fluid.AsyncExecutor.stop (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '5f23d043607bb5d55e466ec3f578e093')) +paddle.fluid.CompiledProgram.__init__ (ArgSpec(args=['self', 'program_or_graph'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.CompiledProgram.with_data_parallel (ArgSpec(args=['self', 'loss_name', 'build_strategy', 'exec_strategy', 'share_vars_from'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'e1af7fd53cf868554f312779fc803864')) +paddle.fluid.CompiledProgram.with_inference_optimize (ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=None), ('document', '9e5b009d850191a010e859189c127fd8')) paddle.fluid.ExecutionStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.ExecutionStrategy) -> None paddle.fluid.BuildStrategy.GradientScaleStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.BuildStrategy.GradientScaleStrategy, arg0: int) -> None paddle.fluid.BuildStrategy.ReduceStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.BuildStrategy.ReduceStrategy, arg0: int) -> None paddle.fluid.BuildStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.BuildStrategy) -> None -paddle.fluid.io.save_vars ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None)) -paddle.fluid.io.save_params ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)) -paddle.fluid.io.save_persistables ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)) -paddle.fluid.io.load_vars ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None)) -paddle.fluid.io.load_params ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)) -paddle.fluid.io.load_persistables ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)) -paddle.fluid.io.save_inference_model ArgSpec(args=['dirname', 'feeded_var_names', 'target_vars', 'executor', 'main_program', 'model_filename', 'params_filename', 'export_for_deployment'], varargs=None, keywords=None, defaults=(None, None, None, True)) -paddle.fluid.io.load_inference_model ArgSpec(args=['dirname', 'executor', 'model_filename', 'params_filename', 'pserver_endpoints'], varargs=None, keywords=None, defaults=(None, None, None)) -paddle.fluid.initializer.ConstantInitializer.__init__ ArgSpec(args=['self', 'value', 'force_cpu'], varargs=None, keywords=None, defaults=(0.0, False)) -paddle.fluid.initializer.UniformInitializer.__init__ ArgSpec(args=['self', 'low', 'high', 'seed'], varargs=None, keywords=None, defaults=(-1.0, 1.0, 0)) -paddle.fluid.initializer.NormalInitializer.__init__ ArgSpec(args=['self', 'loc', 'scale', 'seed'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0)) -paddle.fluid.initializer.TruncatedNormalInitializer.__init__ ArgSpec(args=['self', 'loc', 'scale', 'seed'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0)) -paddle.fluid.initializer.XavierInitializer.__init__ ArgSpec(args=['self', 'uniform', 'fan_in', 'fan_out', 'seed'], varargs=None, keywords=None, defaults=(True, None, None, 0)) -paddle.fluid.initializer.BilinearInitializer.__init__ ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.initializer.MSRAInitializer.__init__ ArgSpec(args=['self', 'uniform', 'fan_in', 'seed'], varargs=None, keywords=None, defaults=(True, None, 0)) -paddle.fluid.initializer.force_init_on_cpu ArgSpec(args=[], varargs=None, keywords=None, defaults=None) -paddle.fluid.initializer.init_on_cpu ArgSpec(args=[], varargs=None, keywords=None, defaults=None) -paddle.fluid.initializer.NumpyArrayInitializer.__init__ ArgSpec(args=['self', 'value'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.fc ArgSpec(args=['input', 'size', 'num_flatten_dims', 'param_attr', 'bias_attr', 'act', 'is_test', 'name'], varargs=None, keywords=None, defaults=(1, None, None, None, False, None)) -paddle.fluid.layers.embedding ArgSpec(args=['input', 'size', 'is_sparse', 'is_distributed', 'padding_idx', 'param_attr', 'dtype'], varargs=None, keywords=None, defaults=(False, False, None, None, 'float32')) -paddle.fluid.layers.dynamic_lstm ArgSpec(args=['input', 'size', 'h_0', 'c_0', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, None, None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'float32', None)) -paddle.fluid.layers.dynamic_lstmp ArgSpec(args=['input', 'size', 'proj_size', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'proj_activation', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'tanh', 'float32', None)) -paddle.fluid.layers.dynamic_gru ArgSpec(args=['input', 'size', 'param_attr', 'bias_attr', 'is_reverse', 'gate_activation', 'candidate_activation', 'h_0', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, False, 'sigmoid', 'tanh', None, False)) -paddle.fluid.layers.gru_unit ArgSpec(args=['input', 'hidden', 'size', 'param_attr', 'bias_attr', 'activation', 'gate_activation', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, 'tanh', 'sigmoid', False)) -paddle.fluid.layers.linear_chain_crf ArgSpec(args=['input', 'label', 'param_attr'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.crf_decoding ArgSpec(args=['input', 'param_attr', 'label'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.cos_sim ArgSpec(args=['X', 'Y'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.cross_entropy ArgSpec(args=['input', 'label', 'soft_label', 'ignore_index'], varargs=None, keywords=None, defaults=(False, -100)) -paddle.fluid.layers.bpr_loss ArgSpec(args=['input', 'label', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.square_error_cost ArgSpec(args=['input', 'label'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.chunk_eval ArgSpec(args=['input', 'label', 'chunk_scheme', 'num_chunk_types', 'excluded_chunk_types'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.sequence_conv ArgSpec(args=['input', 'num_filters', 'filter_size', 'filter_stride', 'padding', 'bias_attr', 'param_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(3, 1, None, None, None, None, None)) -paddle.fluid.layers.conv2d ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None)) -paddle.fluid.layers.conv3d ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None)) -paddle.fluid.layers.sequence_pool ArgSpec(args=['input', 'pool_type', 'is_test'], varargs=None, keywords=None, defaults=(False,)) -paddle.fluid.layers.sequence_softmax ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(False, None)) -paddle.fluid.layers.softmax ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(True, None)) -paddle.fluid.layers.pool2d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True)) -paddle.fluid.layers.pool3d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True)) -paddle.fluid.layers.adaptive_pool2d ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None)) -paddle.fluid.layers.adaptive_pool3d ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None)) -paddle.fluid.layers.batch_norm ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu', 'use_global_stats'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW', False, None, None, None, False, False, False)) -paddle.fluid.layers.data_norm ArgSpec(args=['input', 'act', 'epsilon', 'param_attr', 'data_layout', 'in_place', 'use_mkldnn', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var'], varargs=None, keywords=None, defaults=(None, 1e-05, None, 'NCHW', False, False, None, None, None, False)) -paddle.fluid.layers.beam_search_decode ArgSpec(args=['ids', 'scores', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.conv2d_transpose ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None)) -paddle.fluid.layers.conv3d_transpose ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None)) -paddle.fluid.layers.sequence_expand ArgSpec(args=['x', 'y', 'ref_level', 'name'], varargs=None, keywords=None, defaults=(-1, None)) -paddle.fluid.layers.sequence_expand_as ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.sequence_pad ArgSpec(args=['x', 'pad_value', 'maxlen', 'name'], varargs=None, keywords=None, defaults=(None, None)) -paddle.fluid.layers.sequence_unpad ArgSpec(args=['x', 'length', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.lstm_unit ArgSpec(args=['x_t', 'hidden_t_prev', 'cell_t_prev', 'forget_bias', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(0.0, None, None, None)) -paddle.fluid.layers.reduce_sum ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)) -paddle.fluid.layers.reduce_mean ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)) -paddle.fluid.layers.reduce_max ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)) -paddle.fluid.layers.reduce_min ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)) -paddle.fluid.layers.reduce_prod ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)) -paddle.fluid.layers.sequence_first_step ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.sequence_last_step ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.sequence_slice ArgSpec(args=['input', 'offset', 'length', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.dropout ArgSpec(args=['x', 'dropout_prob', 'is_test', 'seed', 'name', 'dropout_implementation'], varargs=None, keywords=None, defaults=(False, None, None, 'downgrade_in_infer')) -paddle.fluid.layers.split ArgSpec(args=['input', 'num_or_sections', 'dim', 'name'], varargs=None, keywords=None, defaults=(-1, None)) -paddle.fluid.layers.ctc_greedy_decoder ArgSpec(args=['input', 'blank', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.edit_distance ArgSpec(args=['input', 'label', 'normalized', 'ignored_tokens'], varargs=None, keywords=None, defaults=(True, None)) -paddle.fluid.layers.l2_normalize ArgSpec(args=['x', 'axis', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(1e-12, None)) -paddle.fluid.layers.matmul ArgSpec(args=['x', 'y', 'transpose_x', 'transpose_y', 'alpha', 'name'], varargs=None, keywords=None, defaults=(False, False, 1.0, None)) -paddle.fluid.layers.topk ArgSpec(args=['input', 'k', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.warpctc ArgSpec(args=['input', 'label', 'blank', 'norm_by_times', 'use_cudnn'], varargs=None, keywords=None, defaults=(0, False, False)) -paddle.fluid.layers.sequence_reshape ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.transpose ArgSpec(args=['x', 'perm', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.im2sequence ArgSpec(args=['input', 'filter_size', 'stride', 'padding', 'input_image_size', 'out_stride', 'name'], varargs=None, keywords=None, defaults=(1, 1, 0, None, 1, None)) -paddle.fluid.layers.nce ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name', 'sampler', 'custom_dist', 'seed', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 'uniform', None, 0, False)) -paddle.fluid.layers.hsigmoid ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name', 'path_table', 'path_code', 'is_custom', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, False, False)) -paddle.fluid.layers.beam_search ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'is_accumulated', 'name', 'return_parent_idx'], varargs=None, keywords=None, defaults=(0, True, None, False)) -paddle.fluid.layers.row_conv ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None)) -paddle.fluid.layers.multiplex ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.layer_norm ArgSpec(args=['input', 'scale', 'shift', 'begin_norm_axis', 'epsilon', 'param_attr', 'bias_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(True, True, 1, 1e-05, None, None, None, None)) -paddle.fluid.layers.group_norm ArgSpec(args=['input', 'groups', 'epsilon', 'param_attr', 'bias_attr', 'act', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(1e-05, None, None, None, 'NCHW', None)) -paddle.fluid.layers.softmax_with_cross_entropy ArgSpec(args=['logits', 'label', 'soft_label', 'ignore_index', 'numeric_stable_mode', 'return_softmax'], varargs=None, keywords=None, defaults=(False, -100, False, False)) -paddle.fluid.layers.smooth_l1 ArgSpec(args=['x', 'y', 'inside_weight', 'outside_weight', 'sigma'], varargs=None, keywords=None, defaults=(None, None, None)) -paddle.fluid.layers.one_hot ArgSpec(args=['input', 'depth'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.autoincreased_step_counter ArgSpec(args=['counter_name', 'begin', 'step'], varargs=None, keywords=None, defaults=(None, 1, 1)) -paddle.fluid.layers.reshape ArgSpec(args=['x', 'shape', 'actual_shape', 'act', 'inplace', 'name'], varargs=None, keywords=None, defaults=(None, None, False, None)) -paddle.fluid.layers.squeeze ArgSpec(args=['input', 'axes', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.unsqueeze ArgSpec(args=['input', 'axes', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.lod_reset ArgSpec(args=['x', 'y', 'target_lod'], varargs=None, keywords=None, defaults=(None, None)) -paddle.fluid.layers.lrn ArgSpec(args=['input', 'n', 'k', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(5, 1.0, 0.0001, 0.75, None)) -paddle.fluid.layers.pad ArgSpec(args=['x', 'paddings', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0.0, None)) -paddle.fluid.layers.pad_constant_like ArgSpec(args=['x', 'y', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0.0, None)) -paddle.fluid.layers.label_smooth ArgSpec(args=['label', 'prior_dist', 'epsilon', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, 0.1, 'float32', None)) -paddle.fluid.layers.roi_pool ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1, 1, 1.0)) -paddle.fluid.layers.roi_align ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale', 'sampling_ratio', 'name'], varargs=None, keywords=None, defaults=(1, 1, 1.0, -1, None)) -paddle.fluid.layers.dice_loss ArgSpec(args=['input', 'label', 'epsilon'], varargs=None, keywords=None, defaults=(1e-05,)) -paddle.fluid.layers.image_resize ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'resample', 'actual_shape', 'align_corners', 'align_mode'], varargs=None, keywords=None, defaults=(None, None, None, 'BILINEAR', None, True, 1)) -paddle.fluid.layers.image_resize_short ArgSpec(args=['input', 'out_short_len', 'resample'], varargs=None, keywords=None, defaults=('BILINEAR',)) -paddle.fluid.layers.resize_bilinear ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners', 'align_mode'], varargs=None, keywords=None, defaults=(None, None, None, None, True, 1)) -paddle.fluid.layers.resize_nearest ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners'], varargs=None, keywords=None, defaults=(None, None, None, None, True)) -paddle.fluid.layers.gather ArgSpec(args=['input', 'index'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.scatter ArgSpec(args=['input', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.sequence_scatter ArgSpec(args=['input', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.random_crop ArgSpec(args=['x', 'shape', 'seed'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.mean_iou ArgSpec(args=['input', 'label', 'num_classes'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.relu ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.selu ArgSpec(args=['x', 'scale', 'alpha', 'name'], varargs=None, keywords=None, defaults=(None, None, None)) -paddle.fluid.layers.log ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.crop ArgSpec(args=['x', 'shape', 'offsets', 'name'], varargs=None, keywords=None, defaults=(None, None, None)) -paddle.fluid.layers.rank_loss ArgSpec(args=['label', 'left', 'right', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.margin_rank_loss ArgSpec(args=['label', 'left', 'right', 'margin', 'name'], varargs=None, keywords=None, defaults=(0.1, None)) -paddle.fluid.layers.elu ArgSpec(args=['x', 'alpha', 'name'], varargs=None, keywords=None, defaults=(1.0, None)) -paddle.fluid.layers.relu6 ArgSpec(args=['x', 'threshold', 'name'], varargs=None, keywords=None, defaults=(6.0, None)) -paddle.fluid.layers.pow ArgSpec(args=['x', 'factor', 'name'], varargs=None, keywords=None, defaults=(1.0, None)) -paddle.fluid.layers.stanh ArgSpec(args=['x', 'scale_a', 'scale_b', 'name'], varargs=None, keywords=None, defaults=(0.6666666666666666, 1.7159, None)) -paddle.fluid.layers.hard_sigmoid ArgSpec(args=['x', 'slope', 'offset', 'name'], varargs=None, keywords=None, defaults=(0.2, 0.5, None)) -paddle.fluid.layers.swish ArgSpec(args=['x', 'beta', 'name'], varargs=None, keywords=None, defaults=(1.0, None)) -paddle.fluid.layers.prelu ArgSpec(args=['x', 'mode', 'param_attr', 'name'], varargs=None, keywords=None, defaults=(None, None)) -paddle.fluid.layers.brelu ArgSpec(args=['x', 't_min', 't_max', 'name'], varargs=None, keywords=None, defaults=(0.0, 24.0, None)) -paddle.fluid.layers.leaky_relu ArgSpec(args=['x', 'alpha', 'name'], varargs=None, keywords=None, defaults=(0.02, None)) -paddle.fluid.layers.soft_relu ArgSpec(args=['x', 'threshold', 'name'], varargs=None, keywords=None, defaults=(40.0, None)) -paddle.fluid.layers.flatten ArgSpec(args=['x', 'axis', 'name'], varargs=None, keywords=None, defaults=(1, None)) -paddle.fluid.layers.sequence_mask ArgSpec(args=['x', 'maxlen', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, 'int64', None)) -paddle.fluid.layers.stack ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,)) -paddle.fluid.layers.pad2d ArgSpec(args=['input', 'paddings', 'mode', 'pad_value', 'data_format', 'name'], varargs=None, keywords=None, defaults=([0, 0, 0, 0], 'constant', 0.0, 'NCHW', None)) -paddle.fluid.layers.unstack ArgSpec(args=['x', 'axis', 'num'], varargs=None, keywords=None, defaults=(0, None)) -paddle.fluid.layers.sequence_enumerate ArgSpec(args=['input', 'win_size', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0, None)) -paddle.fluid.layers.expand ArgSpec(args=['x', 'expand_times', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.sequence_concat ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.scale ArgSpec(args=['x', 'scale', 'bias', 'bias_after_scale', 'act', 'name'], varargs=None, keywords=None, defaults=(1.0, 0.0, True, None, None)) -paddle.fluid.layers.elementwise_add ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)) -paddle.fluid.layers.elementwise_div ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)) -paddle.fluid.layers.elementwise_sub ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)) -paddle.fluid.layers.elementwise_mul ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)) -paddle.fluid.layers.elementwise_max ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)) -paddle.fluid.layers.elementwise_min ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)) -paddle.fluid.layers.elementwise_pow ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)) -paddle.fluid.layers.uniform_random_batch_size_like ArgSpec(args=['input', 'shape', 'dtype', 'input_dim_idx', 'output_dim_idx', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=('float32', 0, 0, -1.0, 1.0, 0)) -paddle.fluid.layers.gaussian_random ArgSpec(args=['shape', 'mean', 'std', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32')) -paddle.fluid.layers.sampling_id ArgSpec(args=['x', 'min', 'max', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32')) -paddle.fluid.layers.gaussian_random_batch_size_like ArgSpec(args=['input', 'shape', 'input_dim_idx', 'output_dim_idx', 'mean', 'std', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0, 0, 0.0, 1.0, 0, 'float32')) -paddle.fluid.layers.sum ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.slice ArgSpec(args=['input', 'axes', 'starts', 'ends'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.shape ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.logical_and ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)) -paddle.fluid.layers.logical_or ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)) -paddle.fluid.layers.logical_xor ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)) -paddle.fluid.layers.logical_not ArgSpec(args=['x', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)) -paddle.fluid.layers.clip ArgSpec(args=['x', 'min', 'max', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.clip_by_norm ArgSpec(args=['x', 'max_norm', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.mean ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.mul ArgSpec(args=['x', 'y', 'x_num_col_dims', 'y_num_col_dims', 'name'], varargs=None, keywords=None, defaults=(1, 1, None)) -paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=['x', 'label', 'ignore_index', 'name', 'normalize'], varargs=None, keywords=None, defaults=(-100, None, False)) -paddle.fluid.layers.maxout ArgSpec(args=['x', 'groups', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.space_to_depth ArgSpec(args=['x', 'blocksize', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.affine_grid ArgSpec(args=['theta', 'out_shape', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.sequence_reverse ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.affine_channel ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None)) -paddle.fluid.layers.similarity_focus ArgSpec(args=['input', 'axis', 'indexes', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.hash ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None)) -paddle.fluid.layers.grid_sampler ArgSpec(args=['x', 'grid', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.log_loss ArgSpec(args=['input', 'label', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(0.0001, None)) -paddle.fluid.layers.add_position_encoding ArgSpec(args=['input', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.bilinear_tensor_product ArgSpec(args=['x', 'y', 'size', 'act', 'name', 'param_attr', 'bias_attr'], varargs=None, keywords=None, defaults=(None, None, None, None)) -paddle.fluid.layers.merge_selected_rows ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.get_tensor_from_selected_rows ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.lstm ArgSpec(args=['input', 'init_h', 'init_c', 'max_len', 'hidden_size', 'num_layers', 'dropout_prob', 'is_bidirec', 'is_test', 'name', 'default_initializer', 'seed'], varargs=None, keywords=None, defaults=(0.0, False, False, None, None, -1)) -paddle.fluid.layers.shuffle_channel ArgSpec(args=['x', 'group', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.py_func ArgSpec(args=['func', 'x', 'out', 'backward_func', 'skip_vars_in_backward_input'], varargs=None, keywords=None, defaults=(None, None)) -paddle.fluid.layers.psroi_pool ArgSpec(args=['input', 'rois', 'output_channels', 'spatial_scale', 'pooled_height', 'pooled_width', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.teacher_student_sigmoid_loss ArgSpec(args=['input', 'label', 'soft_max_up_bound', 'soft_max_lower_bound'], varargs=None, keywords=None, defaults=(15.0, -15.0)) -paddle.fluid.layers.huber_loss ArgSpec(args=['input', 'label', 'delta'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.tree_conv ArgSpec(args=['nodes_vector', 'edge_set', 'output_size', 'num_filters', 'max_depth', 'act', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(1, 2, 'tanh', None, None, None)) -paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)) -paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None)) -paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.shuffle ArgSpec(args=['reader', 'buffer_size'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.batch ArgSpec(args=['reader', 'batch_size'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.double_buffer ArgSpec(args=['reader', 'place', 'name'], varargs=None, keywords=None, defaults=(None, None)) -paddle.fluid.layers.random_data_generator ArgSpec(args=['low', 'high', 'shapes', 'lod_levels', 'for_parallel'], varargs=None, keywords=None, defaults=(True,)) -paddle.fluid.layers.py_reader ArgSpec(args=['capacity', 'shapes', 'dtypes', 'lod_levels', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, None, True)) -paddle.fluid.layers.create_py_reader_by_data ArgSpec(args=['capacity', 'feed_list', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, True)) -paddle.fluid.layers.Preprocessor.__init__ ArgSpec(args=['self', 'reader', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.Preprocessor.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.Preprocessor.inputs ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.Preprocessor.outputs ArgSpec(args=['self'], varargs='outs', keywords=None, defaults=None) -paddle.fluid.layers.load ArgSpec(args=['out', 'file_path', 'load_as_fp16'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.create_tensor ArgSpec(args=['dtype', 'name', 'persistable'], varargs=None, keywords=None, defaults=(None, False)) -paddle.fluid.layers.create_parameter ArgSpec(args=['shape', 'dtype', 'name', 'attr', 'is_bias', 'default_initializer'], varargs=None, keywords=None, defaults=(None, None, False, None)) -paddle.fluid.layers.create_global_var ArgSpec(args=['shape', 'value', 'dtype', 'persistable', 'force_cpu', 'name'], varargs=None, keywords=None, defaults=(False, False, None)) -paddle.fluid.layers.cast ArgSpec(args=['x', 'dtype'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.tensor_array_to_tensor ArgSpec(args=['input', 'axis', 'name'], varargs=None, keywords=None, defaults=(1, None)) -paddle.fluid.layers.concat ArgSpec(args=['input', 'axis', 'name'], varargs=None, keywords=None, defaults=(0, None)) -paddle.fluid.layers.sums ArgSpec(args=['input', 'out'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.assign ArgSpec(args=['input', 'output'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.fill_constant_batch_size_like ArgSpec(args=['input', 'shape', 'dtype', 'value', 'input_dim_idx', 'output_dim_idx'], varargs=None, keywords=None, defaults=(0, 0)) -paddle.fluid.layers.fill_constant ArgSpec(args=['shape', 'dtype', 'value', 'force_cpu', 'out'], varargs=None, keywords=None, defaults=(False, None)) -paddle.fluid.layers.argmin ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,)) -paddle.fluid.layers.argmax ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,)) -paddle.fluid.layers.argsort ArgSpec(args=['input', 'axis', 'name'], varargs=None, keywords=None, defaults=(-1, None)) -paddle.fluid.layers.ones ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,)) -paddle.fluid.layers.zeros ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,)) -paddle.fluid.layers.reverse ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.has_inf ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.has_nan ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.isfinite ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.While.__init__ ArgSpec(args=['self', 'cond', 'is_test', 'name'], varargs=None, keywords=None, defaults=(False, None)) -paddle.fluid.layers.While.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.Switch.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.Switch.case ArgSpec(args=['self', 'condition'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.Switch.default ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.increment ArgSpec(args=['x', 'value', 'in_place'], varargs=None, keywords=None, defaults=(1.0, True)) -paddle.fluid.layers.array_write ArgSpec(args=['x', 'i', 'array'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.create_array ArgSpec(args=['dtype'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.less_than ArgSpec(args=['x', 'y', 'force_cpu', 'cond'], varargs=None, keywords='ignored', defaults=(None, None)) -paddle.fluid.layers.equal ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords='ignored', defaults=(None,)) -paddle.fluid.layers.array_read ArgSpec(args=['array', 'i'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.array_length ArgSpec(args=['array'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.IfElse.__init__ ArgSpec(args=['self', 'cond', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.IfElse.false_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.IfElse.input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.IfElse.output ArgSpec(args=['self'], varargs='outs', keywords=None, defaults=None) -paddle.fluid.layers.IfElse.true_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.DynamicRNN.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.DynamicRNN.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.DynamicRNN.memory ArgSpec(args=['self', 'init', 'shape', 'value', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, False, 'float32')) -paddle.fluid.layers.DynamicRNN.output ArgSpec(args=['self'], varargs='outputs', keywords=None, defaults=None) -paddle.fluid.layers.DynamicRNN.static_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.DynamicRNN.step_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.DynamicRNN.update_memory ArgSpec(args=['self', 'ex_mem', 'new_mem'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.StaticRNN.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.StaticRNN.memory ArgSpec(args=['self', 'init', 'shape', 'batch_ref', 'init_value', 'init_batch_dim_idx', 'ref_batch_dim_idx'], varargs=None, keywords=None, defaults=(None, None, None, 0.0, 0, 1)) -paddle.fluid.layers.StaticRNN.output ArgSpec(args=['self'], varargs='outputs', keywords=None, defaults=None) -paddle.fluid.layers.StaticRNN.step ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.StaticRNN.step_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.StaticRNN.step_output ArgSpec(args=['self', 'o'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.StaticRNN.update_memory ArgSpec(args=['self', 'mem', 'var'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.reorder_lod_tensor_by_rank ArgSpec(args=['x', 'rank_table'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.Print ArgSpec(args=['input', 'first_n', 'message', 'summarize', 'print_tensor_name', 'print_tensor_type', 'print_tensor_shape', 'print_tensor_lod', 'print_phase'], varargs=None, keywords=None, defaults=(-1, None, -1, True, True, True, True, 'both')) -paddle.fluid.layers.is_empty ArgSpec(args=['x', 'cond'], varargs=None, keywords='ignored', defaults=(None,)) -paddle.fluid.layers.sigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.logsigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.exp ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.tanh ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.tanh_shrink ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.softshrink ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.sqrt ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.abs ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.ceil ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.floor ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.cos ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.sin ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.round ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.reciprocal ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.square ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.softplus ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.softsign ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.uniform_random ArgSpec(args=['shape', 'dtype', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=(None, None, None, None)) -paddle.fluid.layers.hard_shrink ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.cumsum ArgSpec(args=['x', 'axis', 'exclusive', 'reverse'], varargs=None, keywords=None, defaults=(None, None, None)) -paddle.fluid.layers.thresholded_relu ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.prior_box ArgSpec(args=['input', 'image', 'min_sizes', 'max_sizes', 'aspect_ratios', 'variance', 'flip', 'clip', 'steps', 'offset', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, [1.0], [0.1, 0.1, 0.2, 0.2], False, False, [0.0, 0.0], 0.5, None, False)) -paddle.fluid.layers.density_prior_box ArgSpec(args=['input', 'image', 'densities', 'fixed_sizes', 'fixed_ratios', 'variance', 'clip', 'steps', 'offset', 'flatten_to_2d', 'name'], varargs=None, keywords=None, defaults=(None, None, None, [0.1, 0.1, 0.2, 0.2], False, [0.0, 0.0], 0.5, False, None)) -paddle.fluid.layers.multi_box_head ArgSpec(args=['inputs', 'image', 'base_size', 'num_classes', 'aspect_ratios', 'min_ratio', 'max_ratio', 'min_sizes', 'max_sizes', 'steps', 'step_w', 'step_h', 'offset', 'variance', 'flip', 'clip', 'kernel_size', 'pad', 'stride', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None, 0.5, [0.1, 0.1, 0.2, 0.2], True, False, 1, 0, 1, None, False)) -paddle.fluid.layers.bipartite_match ArgSpec(args=['dist_matrix', 'match_type', 'dist_threshold', 'name'], varargs=None, keywords=None, defaults=(None, None, None)) -paddle.fluid.layers.target_assign ArgSpec(args=['input', 'matched_indices', 'negative_indices', 'mismatch_value', 'name'], varargs=None, keywords=None, defaults=(None, None, None)) -paddle.fluid.layers.detection_output ArgSpec(args=['loc', 'scores', 'prior_box', 'prior_box_var', 'background_label', 'nms_threshold', 'nms_top_k', 'keep_top_k', 'score_threshold', 'nms_eta'], varargs=None, keywords=None, defaults=(0, 0.3, 400, 200, 0.01, 1.0)) -paddle.fluid.layers.ssd_loss ArgSpec(args=['location', 'confidence', 'gt_box', 'gt_label', 'prior_box', 'prior_box_var', 'background_label', 'overlap_threshold', 'neg_pos_ratio', 'neg_overlap', 'loc_loss_weight', 'conf_loss_weight', 'match_type', 'mining_type', 'normalize', 'sample_size'], varargs=None, keywords=None, defaults=(None, 0, 0.5, 3.0, 0.5, 1.0, 1.0, 'per_prediction', 'max_negative', True, None)) -paddle.fluid.layers.detection_map ArgSpec(args=['detect_res', 'label', 'class_num', 'background_label', 'overlap_threshold', 'evaluate_difficult', 'has_state', 'input_states', 'out_states', 'ap_version'], varargs=None, keywords=None, defaults=(0, 0.3, True, None, None, None, 'integral')) -paddle.fluid.layers.rpn_target_assign ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'is_crowd', 'im_info', 'rpn_batch_size_per_im', 'rpn_straddle_thresh', 'rpn_fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.0, 0.5, 0.7, 0.3, True)) -paddle.fluid.layers.anchor_generator ArgSpec(args=['input', 'anchor_sizes', 'aspect_ratios', 'variance', 'stride', 'offset', 'name'], varargs=None, keywords=None, defaults=(None, None, [0.1, 0.1, 0.2, 0.2], None, 0.5, None)) -paddle.fluid.layers.roi_perspective_transform ArgSpec(args=['input', 'rois', 'transformed_height', 'transformed_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1.0,)) -paddle.fluid.layers.generate_proposal_labels ArgSpec(args=['rpn_rois', 'gt_classes', 'is_crowd', 'gt_boxes', 'im_info', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None, True)) -paddle.fluid.layers.generate_proposals ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None)) -paddle.fluid.layers.generate_mask_labels ArgSpec(args=['im_info', 'gt_classes', 'is_crowd', 'gt_segms', 'rois', 'labels_int32', 'num_classes', 'resolution'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.iou_similarity ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.box_coder ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name', 'axis'], varargs=None, keywords=None, defaults=('encode_center_size', True, None, 0)) -paddle.fluid.layers.polygon_box_transform ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.yolov3_loss ArgSpec(args=['x', 'gtbox', 'gtlabel', 'anchors', 'anchor_mask', 'class_num', 'ignore_thresh', 'downsample_ratio', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.box_clip ArgSpec(args=['input', 'im_info', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.multiclass_nms ArgSpec(args=['bboxes', 'scores', 'score_threshold', 'nms_top_k', 'keep_top_k', 'nms_threshold', 'normalized', 'nms_eta', 'background_label', 'name'], varargs=None, keywords=None, defaults=(0.3, True, 1.0, 0, None)) -paddle.fluid.layers.accuracy ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None)) -paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk', 'slide_steps'], varargs=None, keywords=None, defaults=('ROC', 4095, 1, 1)) -paddle.fluid.layers.exponential_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,)) -paddle.fluid.layers.natural_exp_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,)) -paddle.fluid.layers.inverse_time_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,)) -paddle.fluid.layers.polynomial_decay ArgSpec(args=['learning_rate', 'decay_steps', 'end_learning_rate', 'power', 'cycle'], varargs=None, keywords=None, defaults=(0.0001, 1.0, False)) -paddle.fluid.layers.piecewise_decay ArgSpec(args=['boundaries', 'values'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.noam_decay ArgSpec(args=['d_model', 'warmup_steps'], varargs=None, keywords=None, defaults=None) -paddle.fluid.layers.append_LARS ArgSpec(args=['params_grads', 'learning_rate', 'weight_decay'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.InitState.__init__ ArgSpec(args=['self', 'init', 'shape', 'value', 'init_boot', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, None, False, 'float32')) -paddle.fluid.contrib.StateCell.__init__ ArgSpec(args=['self', 'inputs', 'states', 'out_state', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.contrib.StateCell.compute_state ArgSpec(args=['self', 'inputs'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.StateCell.get_input ArgSpec(args=['self', 'input_name'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.StateCell.get_state ArgSpec(args=['self', 'state_name'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.StateCell.out_state ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.StateCell.set_state ArgSpec(args=['self', 'state_name', 'state_value'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.StateCell.state_updater ArgSpec(args=['self', 'updater'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.StateCell.update_states ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.TrainingDecoder.__init__ ArgSpec(args=['self', 'state_cell', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.contrib.TrainingDecoder.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.TrainingDecoder.output ArgSpec(args=['self'], varargs='outputs', keywords=None, defaults=None) -paddle.fluid.contrib.TrainingDecoder.static_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.TrainingDecoder.step_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.BeamSearchDecoder.__init__ ArgSpec(args=['self', 'state_cell', 'init_ids', 'init_scores', 'target_dict_dim', 'word_dim', 'input_var_dict', 'topk_size', 'sparse_emb', 'max_len', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=({}, 50, True, 100, 1, 1, None)) -paddle.fluid.contrib.BeamSearchDecoder.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.BeamSearchDecoder.decode ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.BeamSearchDecoder.early_stop ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.BeamSearchDecoder.read_array ArgSpec(args=['self', 'init', 'is_ids', 'is_scores'], varargs=None, keywords=None, defaults=(False, False)) -paddle.fluid.contrib.BeamSearchDecoder.update_array ArgSpec(args=['self', 'array', 'value'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.memory_usage ArgSpec(args=['program', 'batch_size'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.op_freq_statistic ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.QuantizeTranspiler.__init__ ArgSpec(args=['self', 'weight_bits', 'activation_bits', 'activation_quantize_type', 'weight_quantize_type', 'window_size'], varargs=None, keywords=None, defaults=(8, 8, 'abs_max', 'abs_max', 10000)) -paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.contrib.QuantizeTranspiler.freeze_program ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None)) -paddle.fluid.contrib.QuantizeTranspiler.training_transpile ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)) -paddle.fluid.contrib.Calibrator.__init__ ArgSpec(args=['self'], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.contrib.Calibrator.sample_data ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.Calibrator.save_int8_model ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.reader.ctr_reader.ctr_reader ArgSpec(args=['feed_dict', 'file_type', 'file_format', 'dense_slot_index', 'sparse_slot_index', 'capacity', 'thread_num', 'batch_size', 'file_list', 'slots', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.contrib.build_compressor ArgSpec(args=['place', 'data_reader', 'data_feeder', 'scope', 'metrics', 'epoch', 'config'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None)) -paddle.fluid.contrib.CompressPass.__init__ ArgSpec(args=['self', 'place', 'data_reader', 'data_feeder', 'scope', 'metrics', 'epoch', 'program_exe'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None)) -paddle.fluid.contrib.CompressPass.add_strategy ArgSpec(args=['self', 'strategy'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.CompressPass.apply ArgSpec(args=['self', 'graph'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.ImitationGraph.__init__ ArgSpec(args=['self', 'program'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.contrib.ImitationGraph.all_parameters ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.SensitivePruneStrategy.__init__ ArgSpec(args=['self', 'pruner', 'start_epoch', 'end_epoch', 'delta_rate', 'acc_loss_threshold', 'sensitivities'], varargs=None, keywords=None, defaults=(None, 0, 10, 0.2, 0.2, None)) -paddle.fluid.contrib.SensitivePruneStrategy.on_batch_begin ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.SensitivePruneStrategy.on_batch_end ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.SensitivePruneStrategy.on_compress_begin ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.SensitivePruneStrategy.on_compress_end ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.SensitivePruneStrategy.on_epoch_begin ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.SensitivePruneStrategy.on_epoch_end ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.MagnitudePruner.__init__ ArgSpec(args=['self', 'threshold'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.MagnitudePruner.prune ArgSpec(args=['self', 'param', 'threshold'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.contrib.RatioPruner.__init__ ArgSpec(args=['self', 'ratios'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.contrib.RatioPruner.prune ArgSpec(args=['self', 'param', 'ratio'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.contrib.load_persistables_for_increment ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var', 'lookup_table_var_path'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.load_persistables_for_inference ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var_name'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.convert_dist_to_sparse_program ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.HDFSClient.__init__ ArgSpec(args=['self', 'hadoop_home', 'configs'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.HDFSClient.delete ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.HDFSClient.download ArgSpec(args=['self', 'hdfs_path', 'local_path', 'overwrite', 'unzip'], varargs=None, keywords=None, defaults=(False, False)) -paddle.fluid.contrib.HDFSClient.is_dir ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.contrib.HDFSClient.is_exist ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.contrib.HDFSClient.ls ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.HDFSClient.lsr ArgSpec(args=['self', 'hdfs_path', 'only_file', 'sort'], varargs=None, keywords=None, defaults=(True, True)) -paddle.fluid.contrib.HDFSClient.make_local_dirs ArgSpec(args=['local_path'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.HDFSClient.makedirs ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None) -paddle.fluid.contrib.HDFSClient.rename ArgSpec(args=['self', 'hdfs_src_path', 'hdfs_dst_path', 'overwrite'], varargs=None, keywords=None, defaults=(False,)) -paddle.fluid.contrib.HDFSClient.upload ArgSpec(args=['self', 'hdfs_path', 'local_path', 'overwrite', 'retry_times'], varargs=None, keywords=None, defaults=(False, 5)) -paddle.fluid.contrib.multi_download ArgSpec(args=['client', 'hdfs_path', 'local_path', 'trainer_id', 'trainers', 'multi_processes'], varargs=None, keywords=None, defaults=(5,)) -paddle.fluid.contrib.multi_upload ArgSpec(args=['client', 'hdfs_path', 'local_path', 'multi_processes', 'overwrite', 'sync'], varargs=None, keywords=None, defaults=(5, False, True)) -paddle.fluid.transpiler.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None) -paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None) -paddle.fluid.transpiler.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)) -paddle.fluid.transpiler.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,)) -paddle.fluid.transpiler.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174')) -paddle.fluid.transpiler.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False)) -paddle.fluid.transpiler.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.transpiler.HashName.__init__ ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None) -paddle.fluid.transpiler.HashName.dispatch ArgSpec(args=['self', 'varlist'], varargs=None, keywords=None, defaults=None) -paddle.fluid.transpiler.HashName.reset ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.transpiler.RoundRobin.__init__ ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None) -paddle.fluid.transpiler.RoundRobin.dispatch ArgSpec(args=['self', 'varlist'], varargs=None, keywords=None, defaults=None) -paddle.fluid.transpiler.RoundRobin.reset ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) +paddle.fluid.io.save_vars (ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'b55d6193a1d4198d45b013fc5779e1f2')) +paddle.fluid.io.save_params (ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)), ('document', '3a7a99abac3e1bf898871fe609354218')) +paddle.fluid.io.save_persistables (ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)), ('document', '9141bb5f32caf7975eb3fd88c8a1b2da')) +paddle.fluid.io.load_vars (ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '0a5308f496632ab1ec3ba1f1377e6f95')) +paddle.fluid.io.load_params (ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)), ('document', '41779819cef32f2246e83aebc5a002e2')) +paddle.fluid.io.load_persistables (ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)), ('document', '28df5bfe26ca7a077f91156abb0fe6d2')) +paddle.fluid.io.save_inference_model (ArgSpec(args=['dirname', 'feeded_var_names', 'target_vars', 'executor', 'main_program', 'model_filename', 'params_filename', 'export_for_deployment'], varargs=None, keywords=None, defaults=(None, None, None, True)), ('document', '582d87b8df75a5a639a107db8ff86f9c')) +paddle.fluid.io.load_inference_model (ArgSpec(args=['dirname', 'executor', 'model_filename', 'params_filename', 'pserver_endpoints'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '7a5255386075dac3c75b7058254fcdcb')) +paddle.fluid.initializer.ConstantInitializer.__init__ (ArgSpec(args=['self', 'value', 'force_cpu'], varargs=None, keywords=None, defaults=(0.0, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.initializer.UniformInitializer.__init__ (ArgSpec(args=['self', 'low', 'high', 'seed'], varargs=None, keywords=None, defaults=(-1.0, 1.0, 0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.initializer.NormalInitializer.__init__ (ArgSpec(args=['self', 'loc', 'scale', 'seed'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.initializer.TruncatedNormalInitializer.__init__ (ArgSpec(args=['self', 'loc', 'scale', 'seed'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.initializer.XavierInitializer.__init__ (ArgSpec(args=['self', 'uniform', 'fan_in', 'fan_out', 'seed'], varargs=None, keywords=None, defaults=(True, None, None, 0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.initializer.BilinearInitializer.__init__ (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'd389912dc079cbef432335a00017cec0')) +paddle.fluid.initializer.MSRAInitializer.__init__ (ArgSpec(args=['self', 'uniform', 'fan_in', 'seed'], varargs=None, keywords=None, defaults=(True, None, 0)), ('document', '53c757bed9345f2ad3361902531e7cf5')) +paddle.fluid.initializer.force_init_on_cpu (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '6d0f3e22c90d9d500d36ff57daf056ee')) +paddle.fluid.initializer.init_on_cpu (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'a6d7011ca3d8c0d454dac3a56eae0c29')) +paddle.fluid.initializer.NumpyArrayInitializer.__init__ (ArgSpec(args=['self', 'value'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.layers.fc (ArgSpec(args=['input', 'size', 'num_flatten_dims', 'param_attr', 'bias_attr', 'act', 'is_test', 'name'], varargs=None, keywords=None, defaults=(1, None, None, None, False, None)), ('document', '1929058262994f212620599c63aea6bd')) +paddle.fluid.layers.embedding (ArgSpec(args=['input', 'size', 'is_sparse', 'is_distributed', 'padding_idx', 'param_attr', 'dtype'], varargs=None, keywords=None, defaults=(False, False, None, None, 'float32')), ('document', '89c2c55a0b0656b106064048e068e77a')) +paddle.fluid.layers.dynamic_lstm (ArgSpec(args=['input', 'size', 'h_0', 'c_0', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, None, None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'float32', None)), ('document', 'dfbb624f85015df29e994ca6999e8ff6')) +paddle.fluid.layers.dynamic_lstmp (ArgSpec(args=['input', 'size', 'proj_size', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'proj_activation', 'dtype', 'name', 'h_0', 'c_0', 'cell_clip', 'proj_clip'], varargs=None, keywords=None, defaults=(None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'tanh', 'float32', None, None, None, None, None)), ('document', 'b4b608b986eb9617aa0525e1be21d32d')) +paddle.fluid.layers.dynamic_gru (ArgSpec(args=['input', 'size', 'param_attr', 'bias_attr', 'is_reverse', 'gate_activation', 'candidate_activation', 'h_0', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, False, 'sigmoid', 'tanh', None, False)), ('document', '4ec4845fd7d991bcac822f8b0dfc101f')) +paddle.fluid.layers.gru_unit (ArgSpec(args=['input', 'hidden', 'size', 'param_attr', 'bias_attr', 'activation', 'gate_activation', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, 'tanh', 'sigmoid', False)), ('document', 'e0e2439f7af069b57badca18a6ba60b8')) +paddle.fluid.layers.linear_chain_crf (ArgSpec(args=['input', 'label', 'param_attr'], varargs=None, keywords=None, defaults=(None,)), ('document', '7c49ef4bbf0adfd4b9a1d98e2e5f3fea')) +paddle.fluid.layers.crf_decoding (ArgSpec(args=['input', 'param_attr', 'label'], varargs=None, keywords=None, defaults=(None,)), ('document', '7642373ab65d3fc3b96d16d10fef1538')) +paddle.fluid.layers.cos_sim (ArgSpec(args=['X', 'Y'], varargs=None, keywords=None, defaults=None), ('document', 'd740824aa7316b807c4b4a3c6c8c0bbe')) +paddle.fluid.layers.cross_entropy (ArgSpec(args=['input', 'label', 'soft_label', 'ignore_index'], varargs=None, keywords=None, defaults=(False, -100)), ('document', '025b364dafb4b7975c801eb33e7831a1')) +paddle.fluid.layers.bpr_loss (ArgSpec(args=['input', 'label', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '30add751a0f99347a6257634c03ff254')) +paddle.fluid.layers.square_error_cost (ArgSpec(args=['input', 'label'], varargs=None, keywords=None, defaults=None), ('document', '44b6eef4a0f2bc15f7d9745782406736')) +paddle.fluid.layers.chunk_eval (ArgSpec(args=['input', 'label', 'chunk_scheme', 'num_chunk_types', 'excluded_chunk_types'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ee152a7ba3036e7b9ede9184545179b4')) +paddle.fluid.layers.sequence_conv (ArgSpec(args=['input', 'num_filters', 'filter_size', 'filter_stride', 'padding', 'bias_attr', 'param_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(3, 1, None, None, None, None, None)), ('document', 'b6543768e1afaa2ecb869709d6e9c7e2')) +paddle.fluid.layers.conv2d (ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None)), ('document', '8ca6121acd6d23cd8806a93f493c2e17')) +paddle.fluid.layers.conv3d (ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None)), ('document', '37042620f9bd3a2da6e5d3138b2f724b')) +paddle.fluid.layers.sequence_pool (ArgSpec(args=['input', 'pool_type', 'is_test'], varargs=None, keywords=None, defaults=(False,)), ('document', 'a194fb80614023f543df3949fbd0d0b8')) +paddle.fluid.layers.sequence_softmax (ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(False, None)), ('document', '19ef6f9cdd27feac8a1ae060f19c10b4')) +paddle.fluid.layers.softmax (ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(False, None)), ('document', 'f19dd380864e61134ce3814e4be0de4b')) +paddle.fluid.layers.pool2d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True)), ('document', 'bbd84e855e660cd1084bb71a2fd0cdaa')) +paddle.fluid.layers.pool3d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True)), ('document', '043de7333b79ee0ac55053c14ed81625')) +paddle.fluid.layers.adaptive_pool2d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None)), ('document', '859b887174d06f361658f69cb7c06d95')) +paddle.fluid.layers.adaptive_pool3d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None)), ('document', '120f4323a3d7ed9c0916f15a59f0e497')) +paddle.fluid.layers.batch_norm (ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu', 'use_global_stats'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW', False, None, None, None, False, False, False)), ('document', 'c527b71b8a4c60dca8df8a745c2b598d')) +paddle.fluid.layers.data_norm (ArgSpec(args=['input', 'act', 'epsilon', 'param_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var'], varargs=None, keywords=None, defaults=(None, 1e-05, None, 'NCHW', False, None, None, None, False)), ('document', 'e45e09e65a2658e07cad987222f0d9ab')) +paddle.fluid.layers.beam_search_decode (ArgSpec(args=['ids', 'scores', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b0b8d53821716cd50c42e09b593f3feb')) +paddle.fluid.layers.conv2d_transpose (ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None)), ('document', '03993955ab1e6d3044c44e6f17fc85e9')) +paddle.fluid.layers.conv3d_transpose (ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None)), ('document', 'ec113c6a3686ac94f8fccd1a7953d445')) +paddle.fluid.layers.sequence_expand (ArgSpec(args=['x', 'y', 'ref_level', 'name'], varargs=None, keywords=None, defaults=(-1, None)), ('document', '79c375214fa427faac504043d162dae9')) +paddle.fluid.layers.sequence_expand_as (ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '9d2611f84ab364c5da545e6a82f1770a')) +paddle.fluid.layers.sequence_pad (ArgSpec(args=['x', 'pad_value', 'maxlen', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6a1adf3067b20f6e4bcb354d71c19184')) +paddle.fluid.layers.sequence_unpad (ArgSpec(args=['x', 'length', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd12803c903c99aa36ec03aaac5f0cc5b')) +paddle.fluid.layers.lstm_unit (ArgSpec(args=['x_t', 'hidden_t_prev', 'cell_t_prev', 'forget_bias', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(0.0, None, None, None)), ('document', '027723966f3ef0d7bc598f22287a96cc')) +paddle.fluid.layers.reduce_sum (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', 'b69998ce3ff4980fb21da0df05565f1b')) +paddle.fluid.layers.reduce_mean (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', 'd4d80dd98a1a5839f41eeb3a0f85f370')) +paddle.fluid.layers.reduce_max (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', '66a622db727551761ce4eb73eaa7f6a4')) +paddle.fluid.layers.reduce_min (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', 'd50ac552b5d131468ed466d08bb2d38c')) +paddle.fluid.layers.reduce_prod (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', 'fcd8301a0ce15f219c7a4bcd0c1e8eca')) +paddle.fluid.layers.sequence_first_step (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', '2b290d3d77882bfe9bb8d331cac8cdd3')) +paddle.fluid.layers.sequence_last_step (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', 'c16a892f44f7fe71bfa5afc32d3f34ce')) +paddle.fluid.layers.sequence_slice (ArgSpec(args=['input', 'offset', 'length', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'fdcea0e8b5bc7d8d4b1b072c521014e6')) +paddle.fluid.layers.dropout (ArgSpec(args=['x', 'dropout_prob', 'is_test', 'seed', 'name', 'dropout_implementation'], varargs=None, keywords=None, defaults=(False, None, None, 'downgrade_in_infer')), ('document', 'dc7042734c6d8b8ce97321f017f01d6f')) +paddle.fluid.layers.split (ArgSpec(args=['input', 'num_or_sections', 'dim', 'name'], varargs=None, keywords=None, defaults=(-1, None)), ('document', '652625345c2acb900029c78cc75f8aa6')) +paddle.fluid.layers.ctc_greedy_decoder (ArgSpec(args=['input', 'blank', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ebbf2adbd79683dc93db03454dfa18c2')) +paddle.fluid.layers.edit_distance (ArgSpec(args=['input', 'label', 'normalized', 'ignored_tokens'], varargs=None, keywords=None, defaults=(True, None)), ('document', '97f0262f97602644c83142789d784571')) +paddle.fluid.layers.l2_normalize (ArgSpec(args=['x', 'axis', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(1e-12, None)), ('document', '6e428384ce6a77207fa2c70d9f011990')) +paddle.fluid.layers.matmul (ArgSpec(args=['x', 'y', 'transpose_x', 'transpose_y', 'alpha', 'name'], varargs=None, keywords=None, defaults=(False, False, 1.0, None)), ('document', 'b4cbe1ac451005df6dad12e9ffdccca9')) +paddle.fluid.layers.topk (ArgSpec(args=['input', 'k', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd3570c02f71bcd78e60b3f31dc8f5b32')) +paddle.fluid.layers.warpctc (ArgSpec(args=['input', 'label', 'blank', 'norm_by_times', 'use_cudnn'], varargs=None, keywords=None, defaults=(0, False, False)), ('document', 'aaba49c038ba927f0a8e45c0c9a686ab')) +paddle.fluid.layers.sequence_reshape (ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None), ('document', 'a10ab9bf88d4a7e328882d411abb6fd1')) +paddle.fluid.layers.transpose (ArgSpec(args=['x', 'perm', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'a1feac48b843d679db82312dc85885f4')) +paddle.fluid.layers.im2sequence (ArgSpec(args=['input', 'filter_size', 'stride', 'padding', 'input_image_size', 'out_stride', 'name'], varargs=None, keywords=None, defaults=(1, 1, 0, None, 1, None)), ('document', '3ce01160ede80b1c26f776f8fef9340f')) +paddle.fluid.layers.nce (ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name', 'sampler', 'custom_dist', 'seed', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 'uniform', None, 0, False)), ('document', 'fddad4896dee5193e1cdf70882c2a347')) +paddle.fluid.layers.sampled_softmax_with_cross_entropy (ArgSpec(args=['logits', 'label', 'num_samples', 'num_true', 'remove_accidental_hits', 'use_customized_samples', 'customized_samples', 'customized_probabilities', 'seed'], varargs=None, keywords=None, defaults=(1, True, False, None, None, 0)), ('document', '5db30b8a74e8c93687943a3e8d221da0')) +paddle.fluid.layers.hsigmoid (ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name', 'path_table', 'path_code', 'is_custom', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, False, False)), ('document', '80641ee6810b1cdc3fd6e14fc89ecc9d')) +paddle.fluid.layers.beam_search (ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'is_accumulated', 'name', 'return_parent_idx'], varargs=None, keywords=None, defaults=(0, True, None, False)), ('document', 'b350b9a30a18e7efd7e1bb740eef6996')) +paddle.fluid.layers.row_conv (ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None)), ('document', '17485788fffe4e2d36dc58c2ac8d174e')) +paddle.fluid.layers.multiplex (ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None), ('document', '013795af319e2e86d3506741941078ee')) +paddle.fluid.layers.layer_norm (ArgSpec(args=['input', 'scale', 'shift', 'begin_norm_axis', 'epsilon', 'param_attr', 'bias_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(True, True, 1, 1e-05, None, None, None, None)), ('document', 'de6a906950bae9f3c245cb744d22b94e')) +paddle.fluid.layers.group_norm (ArgSpec(args=['input', 'groups', 'epsilon', 'param_attr', 'bias_attr', 'act', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(1e-05, None, None, None, 'NCHW', None)), ('document', '419c3a24a83cc89219a029cf4092788b')) +paddle.fluid.layers.spectral_norm (ArgSpec(args=['weight', 'dim', 'power_iters', 'eps', 'name'], varargs=None, keywords=None, defaults=(0, 1, 1e-12, None)), ('document', '3f536aafba30d793287b52d231baff1b')) +paddle.fluid.layers.softmax_with_cross_entropy (ArgSpec(args=['logits', 'label', 'soft_label', 'ignore_index', 'numeric_stable_mode', 'return_softmax'], varargs=None, keywords=None, defaults=(False, -100, True, False)), ('document', 'bce1b75e3d95b75cacd1099655cbb3c3')) +paddle.fluid.layers.smooth_l1 (ArgSpec(args=['x', 'y', 'inside_weight', 'outside_weight', 'sigma'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', 'c6b175d253c55baf4b9c0eca9b1dda88')) +paddle.fluid.layers.one_hot (ArgSpec(args=['input', 'depth'], varargs=None, keywords=None, defaults=None), ('document', '6148b6a555cbfb62fdcd030d8982c18c')) +paddle.fluid.layers.autoincreased_step_counter (ArgSpec(args=['counter_name', 'begin', 'step'], varargs=None, keywords=None, defaults=(None, 1, 1)), ('document', '3f6c828594720c9b2da89c464be94478')) +paddle.fluid.layers.reshape (ArgSpec(args=['x', 'shape', 'actual_shape', 'act', 'inplace', 'name'], varargs=None, keywords=None, defaults=(None, None, False, None)), ('document', '323c019f257e55ddea4a824a362de62f')) +paddle.fluid.layers.squeeze (ArgSpec(args=['input', 'axes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '3229d06517f794e86ca3da14c38b1465')) +paddle.fluid.layers.unsqueeze (ArgSpec(args=['input', 'axes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'bbd62da391b1df984a1909d069a759b2')) +paddle.fluid.layers.lod_reset (ArgSpec(args=['x', 'y', 'target_lod'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'f122194c562bd674f6ecdccf33785f99')) +paddle.fluid.layers.lrn (ArgSpec(args=['input', 'n', 'k', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(5, 1.0, 0.0001, 0.75, None)), ('document', '0795e9940e42dcd62953514ff7e09f77')) +paddle.fluid.layers.pad (ArgSpec(args=['x', 'paddings', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0.0, None)), ('document', '2f28153bdd2d5ea6f7bad5867bd03eeb')) +paddle.fluid.layers.pad_constant_like (ArgSpec(args=['x', 'y', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0.0, None)), ('document', 'd2e1f45fef51b2c214e3f2aa8976c46c')) +paddle.fluid.layers.label_smooth (ArgSpec(args=['label', 'prior_dist', 'epsilon', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, 0.1, 'float32', None)), ('document', '70c113658102a11cc5d8e3d45145737a')) +paddle.fluid.layers.roi_pool (ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1, 1, 1.0)), ('document', 'c317aa595deb31649083c8faa91cdb97')) +paddle.fluid.layers.roi_align (ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale', 'sampling_ratio', 'name'], varargs=None, keywords=None, defaults=(1, 1, 1.0, -1, None)), ('document', '12c5bbb8b38c42e623fbc47611d766e1')) +paddle.fluid.layers.dice_loss (ArgSpec(args=['input', 'label', 'epsilon'], varargs=None, keywords=None, defaults=(1e-05,)), ('document', '1ba0508d573f65feecf3564dce22aa1d')) +paddle.fluid.layers.image_resize (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'resample', 'actual_shape', 'align_corners', 'align_mode'], varargs=None, keywords=None, defaults=(None, None, None, 'BILINEAR', None, True, 1)), ('document', 'b3ecb819454832885c1f0f3ab9a5b938')) +paddle.fluid.layers.image_resize_short (ArgSpec(args=['input', 'out_short_len', 'resample'], varargs=None, keywords=None, defaults=('BILINEAR',)), ('document', '06211aefc50c5a3e940d7204d859cdf7')) +paddle.fluid.layers.resize_bilinear (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners', 'align_mode'], varargs=None, keywords=None, defaults=(None, None, None, None, True, 1)), ('document', 'e4fb4ed511b2293b8f04f7e872afbfd7')) +paddle.fluid.layers.resize_nearest (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners'], varargs=None, keywords=None, defaults=(None, None, None, None, True)), ('document', '735fa9758a6d7ff3b47d7b827f961c1d')) +paddle.fluid.layers.gather (ArgSpec(args=['input', 'index'], varargs=None, keywords=None, defaults=None), ('document', '98f1c86716b9b7f4dda83f20e2adeee2')) +paddle.fluid.layers.scatter (ArgSpec(args=['input', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '65f8e9d8ddfd0b412f940579c4faa342')) +paddle.fluid.layers.sequence_scatter (ArgSpec(args=['input', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '15b522457dfef103f0c20ca9d397678b')) +paddle.fluid.layers.random_crop (ArgSpec(args=['x', 'shape', 'seed'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c9ab9e460ef0a1823249935a30e82c66')) +paddle.fluid.layers.mean_iou (ArgSpec(args=['input', 'label', 'num_classes'], varargs=None, keywords=None, defaults=None), ('document', '35cbbdfa585d027bb490707c95a176b9')) +paddle.fluid.layers.relu (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '866ffa1cc93f29e23662b526a7596537')) +paddle.fluid.layers.selu (ArgSpec(args=['x', 'scale', 'alpha', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '9044c7fe667b76cb2d9264f2db11f417')) +paddle.fluid.layers.log (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '98247c59d1c9b40af6730001b2aea73d')) +paddle.fluid.layers.crop (ArgSpec(args=['x', 'shape', 'offsets', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '883104791204d3127e24234bb630b2e7')) +paddle.fluid.layers.rank_loss (ArgSpec(args=['label', 'left', 'right', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c542e39ac6add24a6bef6e79bf5617e2')) +paddle.fluid.layers.margin_rank_loss (ArgSpec(args=['label', 'left', 'right', 'margin', 'name'], varargs=None, keywords=None, defaults=(0.1, None)), ('document', '6d19dcc19917080b7ff3e03bde451bc8')) +paddle.fluid.layers.elu (ArgSpec(args=['x', 'alpha', 'name'], varargs=None, keywords=None, defaults=(1.0, None)), ('document', '463258ee9f8b60760eb1e26357cc9bfa')) +paddle.fluid.layers.relu6 (ArgSpec(args=['x', 'threshold', 'name'], varargs=None, keywords=None, defaults=(6.0, None)), ('document', '6f367339caf6c7124bc262fe1475df70')) +paddle.fluid.layers.pow (ArgSpec(args=['x', 'factor', 'name'], varargs=None, keywords=None, defaults=(1.0, None)), ('document', 'a5117c1eb84aca2ac0b0abab337a4799')) +paddle.fluid.layers.stanh (ArgSpec(args=['x', 'scale_a', 'scale_b', 'name'], varargs=None, keywords=None, defaults=(0.6666666666666666, 1.7159, None)), ('document', '959936a477efc6c1447a9c8bf8ce94bb')) +paddle.fluid.layers.hard_sigmoid (ArgSpec(args=['x', 'slope', 'offset', 'name'], varargs=None, keywords=None, defaults=(0.2, 0.5, None)), ('document', 'c82059b6fea1aa730f9aac911807b756')) +paddle.fluid.layers.swish (ArgSpec(args=['x', 'beta', 'name'], varargs=None, keywords=None, defaults=(1.0, None)), ('document', 'ef745e55a48763ee7b46b21a81dc7e84')) +paddle.fluid.layers.prelu (ArgSpec(args=['x', 'mode', 'param_attr', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'f6acef7ff7d887e49ff499fbb1dad4a9')) +paddle.fluid.layers.brelu (ArgSpec(args=['x', 't_min', 't_max', 'name'], varargs=None, keywords=None, defaults=(0.0, 24.0, None)), ('document', '3db337c195e156e6ef2b8b4a57113600')) +paddle.fluid.layers.leaky_relu (ArgSpec(args=['x', 'alpha', 'name'], varargs=None, keywords=None, defaults=(0.02, None)), ('document', 'f878486c82b576938151daad0de995a0')) +paddle.fluid.layers.soft_relu (ArgSpec(args=['x', 'threshold', 'name'], varargs=None, keywords=None, defaults=(40.0, None)), ('document', '869adce548c342d6cc1bd88a948d83c9')) +paddle.fluid.layers.flatten (ArgSpec(args=['x', 'axis', 'name'], varargs=None, keywords=None, defaults=(1, None)), ('document', 'cb295c13cb957db85cd9609269d7784d')) +paddle.fluid.layers.sequence_mask (ArgSpec(args=['x', 'maxlen', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, 'int64', None)), ('document', 'f0dd6eddd3bff015a3c05269d82fcbd8')) +paddle.fluid.layers.stack (ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,)), ('document', '367cfbb642839beacb5d117e2d2b4041')) +paddle.fluid.layers.pad2d (ArgSpec(args=['input', 'paddings', 'mode', 'pad_value', 'data_format', 'name'], varargs=None, keywords=None, defaults=([0, 0, 0, 0], 'constant', 0.0, 'NCHW', None)), ('document', '7f4d46320cc077ca2e8db600c35f4030')) +paddle.fluid.layers.unstack (ArgSpec(args=['x', 'axis', 'num'], varargs=None, keywords=None, defaults=(0, None)), ('document', '98eb9d633116efcfc6f90c114bd44fd6')) +paddle.fluid.layers.sequence_enumerate (ArgSpec(args=['input', 'win_size', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0, None)), ('document', 'f6028537085dc296103bbbd85fa7763d')) +paddle.fluid.layers.expand (ArgSpec(args=['x', 'expand_times', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '117d3607d1ffa0571835bbaebc7857ff')) +paddle.fluid.layers.sequence_concat (ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '3a1d155dd1bf6e72a0a3e3e1519591d1')) +paddle.fluid.layers.scale (ArgSpec(args=['x', 'scale', 'bias', 'bias_after_scale', 'act', 'name'], varargs=None, keywords=None, defaults=(1.0, 0.0, True, None, None)), ('document', '30190413b2fa442e7466d6cf2ce5ea07')) +paddle.fluid.layers.elementwise_add (ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)), ('document', '6bfbe72cbadc95ac7ab88c05ed5bf9f0')) +paddle.fluid.layers.elementwise_div (ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)), ('document', 'cc6e6cc1cb942a152dde3ef08d5f165c')) +paddle.fluid.layers.elementwise_sub (ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)), ('document', 'a12abdab09c3e57af5a6e1e9f138684a')) +paddle.fluid.layers.elementwise_mul (ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)), ('document', '422c77dbfcff355a57b5fdd4ec876daa')) +paddle.fluid.layers.elementwise_max (ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)), ('document', 'f0bb0b2c454541cfafa761021a5cc776')) +paddle.fluid.layers.elementwise_min (ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)), ('document', '8a9cdefefbccbf9f6b0991c0946a21e9')) +paddle.fluid.layers.elementwise_pow (ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)), ('document', '1aea4e197c552a284f83888a3c67a32e')) +paddle.fluid.layers.uniform_random_batch_size_like (ArgSpec(args=['input', 'shape', 'dtype', 'input_dim_idx', 'output_dim_idx', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=('float32', 0, 0, -1.0, 1.0, 0)), ('document', '129e0a3257f1d532a948eedf9d5bf671')) +paddle.fluid.layers.gaussian_random (ArgSpec(args=['shape', 'mean', 'std', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32')), ('document', '389dafe36e099841b6a7fb18d11f1b4c')) +paddle.fluid.layers.sampling_id (ArgSpec(args=['x', 'min', 'max', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32')), ('document', '840fdac643d1341c1cae218d4511dbb9')) +paddle.fluid.layers.gaussian_random_batch_size_like (ArgSpec(args=['input', 'shape', 'input_dim_idx', 'output_dim_idx', 'mean', 'std', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0, 0, 0.0, 1.0, 0, 'float32')), ('document', '840026b4766613c5705e06563cd103b6')) +paddle.fluid.layers.sum (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', 'a418e3ccb5e2ac21bd60f5cc221d5860')) +paddle.fluid.layers.slice (ArgSpec(args=['input', 'axes', 'starts', 'ends'], varargs=None, keywords=None, defaults=None), ('document', '01dbb91e7c74cb11336cd531013de51a')) +paddle.fluid.layers.shape (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', '17db0f814eb7bb5a3fac1ca6e60e16d8')) +paddle.fluid.layers.logical_and (ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'cdcf20c494c92060d10feb9374532f42')) +paddle.fluid.layers.logical_or (ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '0eae3f726a4afe590757552fa3ced012')) +paddle.fluid.layers.logical_xor (ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'b0daaa3fa4a0aa62f9b58c43d959eb25')) +paddle.fluid.layers.logical_not (ArgSpec(args=['x', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'cd1c8cf31e040427d4e05711044caeb6')) +paddle.fluid.layers.clip (ArgSpec(args=['x', 'min', 'max', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b020b7aab59719be98a4ae229a76deba')) +paddle.fluid.layers.clip_by_norm (ArgSpec(args=['x', 'max_norm', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'a1ea0bc5a926f427458c4254ca022749')) +paddle.fluid.layers.mean (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd638d915195ce86a8d7963b81110d4c8')) +paddle.fluid.layers.mul (ArgSpec(args=['x', 'y', 'x_num_col_dims', 'y_num_col_dims', 'name'], varargs=None, keywords=None, defaults=(1, 1, None)), ('document', 'ccd37fa6b53f074adbfb732d738c4c2d')) +paddle.fluid.layers.sigmoid_cross_entropy_with_logits (ArgSpec(args=['x', 'label', 'ignore_index', 'name', 'normalize'], varargs=None, keywords=None, defaults=(-100, None, False)), ('document', '180c284317ea45ef89a460d8d79c0b72')) +paddle.fluid.layers.maxout (ArgSpec(args=['x', 'groups', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '891870d069a6aea746d34cc53b61690c')) +paddle.fluid.layers.space_to_depth (ArgSpec(args=['x', 'blocksize', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '5f207ae10589ebe38a63575ef6ff8e1e')) +paddle.fluid.layers.affine_grid (ArgSpec(args=['theta', 'out_shape', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '51def402b8910e163cbace9d0c0526ed')) +paddle.fluid.layers.sequence_reverse (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '77a6d80aa5551ca70324fc975c44507f')) +paddle.fluid.layers.affine_channel (ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None)), ('document', '2f46f1ff39a13ab00857e7b9f44b2fa7')) +paddle.fluid.layers.similarity_focus (ArgSpec(args=['input', 'axis', 'indexes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '70e3b5182a18b40b47ecabd7c8490a35')) +paddle.fluid.layers.hash (ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None)), ('document', '9bb77f8dc002dd2ce75d4769eaaf5007')) +paddle.fluid.layers.grid_sampler (ArgSpec(args=['x', 'grid', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd256cba1c41a5ed92ce3f31e24a2ca6d')) +paddle.fluid.layers.log_loss (ArgSpec(args=['input', 'label', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(0.0001, None)), ('document', '4b5a2341023afe63157a066c14254f98')) +paddle.fluid.layers.add_position_encoding (ArgSpec(args=['input', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '4b9c2e8af5817937d831820874b5aa77')) +paddle.fluid.layers.bilinear_tensor_product (ArgSpec(args=['x', 'y', 'size', 'act', 'name', 'param_attr', 'bias_attr'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'aa7540a0fa73ff69a02e11b4091aab75')) +paddle.fluid.layers.merge_selected_rows (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'dc63315b84f591ac79ecca0c3632027a')) +paddle.fluid.layers.get_tensor_from_selected_rows (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '7ffc849e71f31dfe29030ff94e662de6')) +paddle.fluid.layers.lstm (ArgSpec(args=['input', 'init_h', 'init_c', 'max_len', 'hidden_size', 'num_layers', 'dropout_prob', 'is_bidirec', 'is_test', 'name', 'default_initializer', 'seed'], varargs=None, keywords=None, defaults=(0.0, False, False, None, None, -1)), ('document', 'd5e6c494ac35100e2ed4d4bd9a1ed932')) +paddle.fluid.layers.shuffle_channel (ArgSpec(args=['x', 'group', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '2fa6782d43d02ae64482d21235a82949')) +paddle.fluid.layers.py_func (ArgSpec(args=['func', 'x', 'out', 'backward_func', 'skip_vars_in_backward_input'], varargs=None, keywords=None, defaults=(None, None)), ('document', '8404e472ac12b4a30a505d3d3a3e5fdb')) +paddle.fluid.layers.psroi_pool (ArgSpec(args=['input', 'rois', 'output_channels', 'spatial_scale', 'pooled_height', 'pooled_width', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '1546136806fef5c08f6918544bd9151d')) +paddle.fluid.layers.teacher_student_sigmoid_loss (ArgSpec(args=['input', 'label', 'soft_max_up_bound', 'soft_max_lower_bound'], varargs=None, keywords=None, defaults=(15.0, -15.0)), ('document', '2f6ff96864054a31aa4bb659c6722c99')) +paddle.fluid.layers.huber_loss (ArgSpec(args=['input', 'label', 'delta'], varargs=None, keywords=None, defaults=None), ('document', '431a4301c35032166ec029f7432c80a7')) +paddle.fluid.layers.tree_conv (ArgSpec(args=['nodes_vector', 'edge_set', 'output_size', 'num_filters', 'max_depth', 'act', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(1, 2, 'tanh', None, None, None)), ('document', '34ea12ac9f10a65dccbc50100d12e607')) +paddle.fluid.layers.data (ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)), ('document', '33bbd42027d872b3818b3d64ec52e139')) +paddle.fluid.layers.open_files (ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None)), ('document', 'b1ae2e1cc0750e58726374061ea90ecc')) +paddle.fluid.layers.read_file (ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None), ('document', 'b0a1c2fc51c27a106da28f3308c41f5e')) +paddle.fluid.layers.shuffle (ArgSpec(args=['reader', 'buffer_size'], varargs=None, keywords=None, defaults=None), ('document', 'f967a73426db26f970bc70bfb03cffca')) +paddle.fluid.layers.batch (ArgSpec(args=['reader', 'batch_size'], varargs=None, keywords=None, defaults=None), ('document', 'f563d376d35e1a4c4db100fd11b381a0')) +paddle.fluid.layers.double_buffer (ArgSpec(args=['reader', 'place', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '07e5b796674796eb1ef3fee9c10d24e3')) +paddle.fluid.layers.random_data_generator (ArgSpec(args=['low', 'high', 'shapes', 'lod_levels', 'for_parallel'], varargs=None, keywords=None, defaults=(True,)), ('document', '9b7f0f86ec24bbc97643cadcb6499cff')) +paddle.fluid.layers.py_reader (ArgSpec(args=['capacity', 'shapes', 'dtypes', 'lod_levels', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, None, True)), ('document', '13dabc57863f62ab3141586784ee356b')) +paddle.fluid.layers.create_py_reader_by_data (ArgSpec(args=['capacity', 'feed_list', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, True)), ('document', '350f74d93fab9adb2ac4950f1c26416b')) +paddle.fluid.layers.Preprocessor.__init__ (ArgSpec(args=['self', 'reader', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.layers.Preprocessor.block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.layers.Preprocessor.inputs (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.layers.Preprocessor.outputs (ArgSpec(args=['self'], varargs='outs', keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.layers.load (ArgSpec(args=['out', 'file_path', 'load_as_fp16'], varargs=None, keywords=None, defaults=(None,)), ('document', '9d1a4bc97bbce9fa1d4f7a4200a771ff')) +paddle.fluid.layers.create_tensor (ArgSpec(args=['dtype', 'name', 'persistable'], varargs=None, keywords=None, defaults=(None, False)), ('document', 'c0c3d0194f83fff8ea99ce0820657dae')) +paddle.fluid.layers.create_parameter (ArgSpec(args=['shape', 'dtype', 'name', 'attr', 'is_bias', 'default_initializer'], varargs=None, keywords=None, defaults=(None, None, False, None)), ('document', 'd62b866c899bc1fedb5385f95b88e1f8')) +paddle.fluid.layers.create_global_var (ArgSpec(args=['shape', 'value', 'dtype', 'persistable', 'force_cpu', 'name'], varargs=None, keywords=None, defaults=(False, False, None)), ('document', 'ab914fac893607e29ac6e52bbdbea1a4')) +paddle.fluid.layers.cast (ArgSpec(args=['x', 'dtype'], varargs=None, keywords=None, defaults=None), ('document', '60cb8f843d625abf33f8bf12455b8f99')) +paddle.fluid.layers.tensor_array_to_tensor (ArgSpec(args=['input', 'axis', 'name'], varargs=None, keywords=None, defaults=(1, None)), ('document', 'b12717d3d4567e6119589f7f655b0cbb')) +paddle.fluid.layers.concat (ArgSpec(args=['input', 'axis', 'name'], varargs=None, keywords=None, defaults=(0, None)), ('document', 'b19b79be4f05e85d1d6cec642c9fb535')) +paddle.fluid.layers.sums (ArgSpec(args=['input', 'out'], varargs=None, keywords=None, defaults=(None,)), ('document', '42912092418620b4be07f36af31e7816')) +paddle.fluid.layers.assign (ArgSpec(args=['input', 'output'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b690184f3537df5501e4d9d8f31152a5')) +paddle.fluid.layers.fill_constant_batch_size_like (ArgSpec(args=['input', 'shape', 'dtype', 'value', 'input_dim_idx', 'output_dim_idx'], varargs=None, keywords=None, defaults=(0, 0)), ('document', 'd4059a2f5763036b07018d76429f9acb')) +paddle.fluid.layers.fill_constant (ArgSpec(args=['shape', 'dtype', 'value', 'force_cpu', 'out'], varargs=None, keywords=None, defaults=(False, None)), ('document', '1d8b14729639fa38509c79b9784740fa')) +paddle.fluid.layers.argmin (ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,)), ('document', '2778a1d34be49263a51211885599ea37')) +paddle.fluid.layers.argmax (ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,)), ('document', '04114996cfb98994ba222804a1a6109f')) +paddle.fluid.layers.argsort (ArgSpec(args=['input', 'axis', 'name'], varargs=None, keywords=None, defaults=(-1, None)), ('document', '68ec45c6fb6b93e47de9c9a0945fb98e')) +paddle.fluid.layers.ones (ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,)), ('document', 'b402489c62e668df42e7daceb63c142b')) +paddle.fluid.layers.zeros (ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,)), ('document', 'c155e2efc56ffa5ed4658cca0272e491')) +paddle.fluid.layers.reverse (ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=None), ('document', '8ee7cb6ca639e7460e825f953b65d94d')) +paddle.fluid.layers.has_inf (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '8f8c0306117ea441f20dcbbdba1f0ecc')) +paddle.fluid.layers.has_nan (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '2e53e83127dbfd86e7098bdfe9a549e8')) +paddle.fluid.layers.isfinite (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '0a437011c3906079fd8947ed3e52d292')) +paddle.fluid.layers.While.__init__ (ArgSpec(args=['self', 'cond', 'is_test', 'name'], varargs=None, keywords=None, defaults=(False, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.layers.While.block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.layers.Switch.__init__ (ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.layers.Switch.case (ArgSpec(args=['self', 'condition'], varargs=None, keywords=None, defaults=None), ('document', 'f7c7160014c1b46cfeda9dd5808d1789')) +paddle.fluid.layers.Switch.default (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '50853ae884df03d9c36703bb46d9ef07')) +paddle.fluid.layers.increment (ArgSpec(args=['x', 'value', 'in_place'], varargs=None, keywords=None, defaults=(1.0, True)), ('document', '73bb96ec4783ec1a11e760e8851b0e77')) +paddle.fluid.layers.array_write (ArgSpec(args=['x', 'i', 'array'], varargs=None, keywords=None, defaults=(None,)), ('document', '40b6d15f4c86b2b09df340d7778ad713')) +paddle.fluid.layers.create_array (ArgSpec(args=['dtype'], varargs=None, keywords=None, defaults=None), ('document', '2d4f20087080ba5105b55205ad5c5b6a')) +paddle.fluid.layers.less_than (ArgSpec(args=['x', 'y', 'force_cpu', 'cond'], varargs=None, keywords=None, defaults=(None, None)), ('document', '067bbc799c66289ca8b8924c26b6673f')) +paddle.fluid.layers.equal (ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords=None, defaults=(None,)), ('document', '80c29b1dc64718f0116de90d1ac88a77')) +paddle.fluid.layers.array_read (ArgSpec(args=['array', 'i'], varargs=None, keywords=None, defaults=None), ('document', '0275133f1dde2aed528b4d3230edf823')) +paddle.fluid.layers.array_length (ArgSpec(args=['array'], varargs=None, keywords=None, defaults=None), ('document', 'ffb8b9578ec66db565b223d313aa82a2')) +paddle.fluid.layers.IfElse.__init__ (ArgSpec(args=['self', 'cond', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.layers.IfElse.false_block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.layers.IfElse.input (ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.layers.IfElse.output (ArgSpec(args=['self'], varargs='outs', keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.layers.IfElse.true_block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.layers.DynamicRNN.__init__ (ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.layers.DynamicRNN.block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6d3e0a5d9aa519a9773a36e1620ea9b7')) +paddle.fluid.layers.DynamicRNN.memory (ArgSpec(args=['self', 'init', 'shape', 'value', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, False, 'float32')), ('document', 'b9174d4e91505b0c8ecc193eb51e248d')) +paddle.fluid.layers.DynamicRNN.output (ArgSpec(args=['self'], varargs='outputs', keywords=None, defaults=None), ('document', 'b439a176a3328de8a75bdc5c08eece4a')) +paddle.fluid.layers.DynamicRNN.static_input (ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None), ('document', 'f29ad2478b6b2ad4f413d2936a331ea0')) +paddle.fluid.layers.DynamicRNN.step_input (ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None), ('document', '169d694d2224f62b4f3afdc3dbc19e95')) +paddle.fluid.layers.DynamicRNN.update_memory (ArgSpec(args=['self', 'ex_mem', 'new_mem'], varargs=None, keywords=None, defaults=None), ('document', '5d83987da13b98363d6a807a52d8024f')) +paddle.fluid.layers.StaticRNN.__init__ (ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.layers.StaticRNN.memory (ArgSpec(args=['self', 'init', 'shape', 'batch_ref', 'init_value', 'init_batch_dim_idx', 'ref_batch_dim_idx'], varargs=None, keywords=None, defaults=(None, None, None, 0.0, 0, 1)), ('document', 'c24e368e23afac1ed91a78a639d7a9c7')) +paddle.fluid.layers.StaticRNN.output (ArgSpec(args=['self'], varargs='outputs', keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.layers.StaticRNN.step (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.layers.StaticRNN.step_input (ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.layers.StaticRNN.step_output (ArgSpec(args=['self', 'o'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.layers.StaticRNN.update_memory (ArgSpec(args=['self', 'mem', 'var'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.layers.reorder_lod_tensor_by_rank (ArgSpec(args=['x', 'rank_table'], varargs=None, keywords=None, defaults=None), ('document', '3545f529ef04e8f6ecb76b47fa3df01a')) +paddle.fluid.layers.Print (ArgSpec(args=['input', 'first_n', 'message', 'summarize', 'print_tensor_name', 'print_tensor_type', 'print_tensor_shape', 'print_tensor_lod', 'print_phase'], varargs=None, keywords=None, defaults=(-1, None, -1, True, True, True, True, 'both')), ('document', '5fef91b0e21c93610785f2b1f7161732')) +paddle.fluid.layers.is_empty (ArgSpec(args=['x', 'cond'], varargs=None, keywords=None, defaults=(None,)), ('document', 'bbe578dbb49ad13e15b014e98c22b519')) +paddle.fluid.layers.sigmoid (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '29a25ba78de79152076cacfc5443137d')) +paddle.fluid.layers.logsigmoid (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '81ccb7acafd06c7728e11581f5d342e3')) +paddle.fluid.layers.exp (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e6b3e769413d96aab4176f96db25984b')) +paddle.fluid.layers.tanh (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e9d586a0b5bd05f67ee78048f9d503b6')) +paddle.fluid.layers.tanh_shrink (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '1e521554b9fdda9061ec6d306f0709b7')) +paddle.fluid.layers.softshrink (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '9eef31597bbafa2bd49691e072296e13')) +paddle.fluid.layers.sqrt (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '072a8541e0f632366bba10f67cb0db27')) +paddle.fluid.layers.abs (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '64650ac42cf82e9920cb0b172b1d29fd')) +paddle.fluid.layers.ceil (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c75d67dc5fe28f68e4cfffead4f698ad')) +paddle.fluid.layers.floor (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '647b16c5da5ef909649ae02abb434973')) +paddle.fluid.layers.cos (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '485f2686bcc2fe37a4bd893769c8a3e2')) +paddle.fluid.layers.sin (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '01f1766aa76eff1df30147505b59f7c4')) +paddle.fluid.layers.round (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b47f5da13913d3e56bdb1e612a73f3f2')) +paddle.fluid.layers.reciprocal (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'cc6ac2f14f03c52aaa83a59bf83b8d26')) +paddle.fluid.layers.square (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '48dfb45d773dbc30126c3a7f777de5ee')) +paddle.fluid.layers.softplus (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '459c5781e9d1dd88283b7c5769d7872a')) +paddle.fluid.layers.softsign (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '80846bcd4bd457207457a6d5411f4148')) +paddle.fluid.layers.uniform_random (ArgSpec(args=['shape', 'dtype', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=('float32', -1.0, 1.0, 0)), ('document', '308b619af849caa82bbc31e897f5e641')) +paddle.fluid.layers.hard_shrink (ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c142f5884f3255e0d6075c286bbd531e')) +paddle.fluid.layers.cumsum (ArgSpec(args=['x', 'axis', 'exclusive', 'reverse'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '944d7c03057f5fc88bc78acd4d82f926')) +paddle.fluid.layers.thresholded_relu (ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,)), ('document', '90566ea449ea4c681435546e2f70610a')) +paddle.fluid.layers.prior_box (ArgSpec(args=['input', 'image', 'min_sizes', 'max_sizes', 'aspect_ratios', 'variance', 'flip', 'clip', 'steps', 'offset', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, [1.0], [0.1, 0.1, 0.2, 0.2], False, False, [0.0, 0.0], 0.5, None, False)), ('document', '14cac0ee643fa6e026ad82aeeee75bd8')) +paddle.fluid.layers.density_prior_box (ArgSpec(args=['input', 'image', 'densities', 'fixed_sizes', 'fixed_ratios', 'variance', 'clip', 'steps', 'offset', 'flatten_to_2d', 'name'], varargs=None, keywords=None, defaults=(None, None, None, [0.1, 0.1, 0.2, 0.2], False, [0.0, 0.0], 0.5, False, None)), ('document', 'a0d762bb08de9ce93bc780aa57cd5cd9')) +paddle.fluid.layers.multi_box_head (ArgSpec(args=['inputs', 'image', 'base_size', 'num_classes', 'aspect_ratios', 'min_ratio', 'max_ratio', 'min_sizes', 'max_sizes', 'steps', 'step_w', 'step_h', 'offset', 'variance', 'flip', 'clip', 'kernel_size', 'pad', 'stride', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None, 0.5, [0.1, 0.1, 0.2, 0.2], True, False, 1, 0, 1, None, False)), ('document', 'a6ab47a2fe681e52fabb7057ddf0efdd')) +paddle.fluid.layers.bipartite_match (ArgSpec(args=['dist_matrix', 'match_type', 'dist_threshold', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '3ddb9b966f193900193a95a3df77c3c1')) +paddle.fluid.layers.target_assign (ArgSpec(args=['input', 'matched_indices', 'negative_indices', 'mismatch_value', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', 'c0b334f917828f95056f6ebe10907b1c')) +paddle.fluid.layers.detection_output (ArgSpec(args=['loc', 'scores', 'prior_box', 'prior_box_var', 'background_label', 'nms_threshold', 'nms_top_k', 'keep_top_k', 'score_threshold', 'nms_eta'], varargs=None, keywords=None, defaults=(0, 0.3, 400, 200, 0.01, 1.0)), ('document', 'c33093a82a46e3091e789e5572588db1')) +paddle.fluid.layers.ssd_loss (ArgSpec(args=['location', 'confidence', 'gt_box', 'gt_label', 'prior_box', 'prior_box_var', 'background_label', 'overlap_threshold', 'neg_pos_ratio', 'neg_overlap', 'loc_loss_weight', 'conf_loss_weight', 'match_type', 'mining_type', 'normalize', 'sample_size'], varargs=None, keywords=None, defaults=(None, 0, 0.5, 3.0, 0.5, 1.0, 1.0, 'per_prediction', 'max_negative', True, None)), ('document', '6d5028fd09d01ab82d296adc0ea95aee')) +paddle.fluid.layers.detection_map (ArgSpec(args=['detect_res', 'label', 'class_num', 'background_label', 'overlap_threshold', 'evaluate_difficult', 'has_state', 'input_states', 'out_states', 'ap_version'], varargs=None, keywords=None, defaults=(0, 0.3, True, None, None, None, 'integral')), ('document', '1467d91b50c22cd52103b4aa1ee9d0a1')) +paddle.fluid.layers.rpn_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'is_crowd', 'im_info', 'rpn_batch_size_per_im', 'rpn_straddle_thresh', 'rpn_fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.0, 0.5, 0.7, 0.3, True)), ('document', '1dddef3eb4b3cbd4df8e03ac480dbf97')) +paddle.fluid.layers.anchor_generator (ArgSpec(args=['input', 'anchor_sizes', 'aspect_ratios', 'variance', 'stride', 'offset', 'name'], varargs=None, keywords=None, defaults=(None, None, [0.1, 0.1, 0.2, 0.2], None, 0.5, None)), ('document', '23337cc57bbf5be73884b6bd0f849603')) +paddle.fluid.layers.roi_perspective_transform (ArgSpec(args=['input', 'rois', 'transformed_height', 'transformed_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1.0,)), ('document', '5761f9ed83654314416e24372b33bb84')) +paddle.fluid.layers.generate_proposal_labels (ArgSpec(args=['rpn_rois', 'gt_classes', 'is_crowd', 'gt_boxes', 'im_info', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None, True)), ('document', '87863717edeb7fe87a1268976cbc015d')) +paddle.fluid.layers.generate_proposals (ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None)), ('document', '57ab49f3f324f310b7eed322e7c1057a')) +paddle.fluid.layers.generate_mask_labels (ArgSpec(args=['im_info', 'gt_classes', 'is_crowd', 'gt_segms', 'rois', 'labels_int32', 'num_classes', 'resolution'], varargs=None, keywords=None, defaults=None), ('document', 'f73706a65468e9ca3e0bee4a31521b0a')) +paddle.fluid.layers.iou_similarity (ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '587845f60c5d97ffdf2dfd21da52eca1')) +paddle.fluid.layers.box_coder (ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name', 'axis'], varargs=None, keywords=None, defaults=('encode_center_size', True, None, 0)), ('document', '032d0f4b7d8f6235ee5d91e473344f0e')) +paddle.fluid.layers.polygon_box_transform (ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '0e5ac2507723a0b5adec473f9556799b')) +paddle.fluid.layers.yolov3_loss (ArgSpec(args=['x', 'gtbox', 'gtlabel', 'anchors', 'anchor_mask', 'class_num', 'ignore_thresh', 'downsample_ratio', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '991e934c3e09abf0edec7c9c978b4691')) +paddle.fluid.layers.box_clip (ArgSpec(args=['input', 'im_info', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '397e9e02b451d99c56e20f268fa03f2e')) +paddle.fluid.layers.multiclass_nms (ArgSpec(args=['bboxes', 'scores', 'score_threshold', 'nms_top_k', 'keep_top_k', 'nms_threshold', 'normalized', 'nms_eta', 'background_label', 'name'], varargs=None, keywords=None, defaults=(0.3, True, 1.0, 0, None)), ('document', 'ca7d1107b6c5d2d6d8221039a220fde0')) +paddle.fluid.layers.box_decoder_and_assign (ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'box_score', 'box_clip', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '005a5ae47d6c8fff721931d69d072b9f')) +paddle.fluid.layers.accuracy (ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None)), ('document', '9808534c12c5e739a10f73ebb0b4eafd')) +paddle.fluid.layers.auc (ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk', 'slide_steps'], varargs=None, keywords=None, defaults=('ROC', 4095, 1, 1)), ('document', 'e0e95334fce92d16c2d9db6e7caffc47')) +paddle.fluid.layers.exponential_decay (ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,)), ('document', '98a5050bee8522fcea81aa795adaba51')) +paddle.fluid.layers.natural_exp_decay (ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,)), ('document', '676a7bc2a218691db50bca233903d21e')) +paddle.fluid.layers.inverse_time_decay (ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,)), ('document', 'd07e767d59c4a5e6c930f3e6756d3f82')) +paddle.fluid.layers.polynomial_decay (ArgSpec(args=['learning_rate', 'decay_steps', 'end_learning_rate', 'power', 'cycle'], varargs=None, keywords=None, defaults=(0.0001, 1.0, False)), ('document', '882634f420f626642f0874481263da40')) +paddle.fluid.layers.piecewise_decay (ArgSpec(args=['boundaries', 'values'], varargs=None, keywords=None, defaults=None), ('document', 'c717d9d1d78a53c809d01b8bc56f3cae')) +paddle.fluid.layers.noam_decay (ArgSpec(args=['d_model', 'warmup_steps'], varargs=None, keywords=None, defaults=None), ('document', 'd9a95746353fd574be36dc28d8726c28')) +paddle.fluid.layers.append_LARS (ArgSpec(args=['params_grads', 'learning_rate', 'weight_decay'], varargs=None, keywords=None, defaults=None), ('document', 'd24fa1e7d62ac8a534fc6a86002f84f8')) +paddle.fluid.layers.cosine_decay (ArgSpec(args=['learning_rate', 'step_each_epoch', 'epochs'], varargs=None, keywords=None, defaults=None), ('document', '9588c64c26ffaef3c466e404a6af9d9b')) +paddle.fluid.contrib.InitState.__init__ (ArgSpec(args=['self', 'init', 'shape', 'value', 'init_boot', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, None, False, 'float32')), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.contrib.StateCell.__init__ (ArgSpec(args=['self', 'inputs', 'states', 'out_state', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.contrib.StateCell.compute_state (ArgSpec(args=['self', 'inputs'], varargs=None, keywords=None, defaults=None), ('document', '92973b3f222081a1d17069c683cf4a99')) +paddle.fluid.contrib.StateCell.get_input (ArgSpec(args=['self', 'input_name'], varargs=None, keywords=None, defaults=None), ('document', '6f24a007cfa184e32f01a960703bfd70')) +paddle.fluid.contrib.StateCell.get_state (ArgSpec(args=['self', 'state_name'], varargs=None, keywords=None, defaults=None), ('document', '630a4945cfe659ea4f307598fbbce5d2')) +paddle.fluid.contrib.StateCell.out_state (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '7ad681dff0393ddf13a724194e720f28')) +paddle.fluid.contrib.StateCell.set_state (ArgSpec(args=['self', 'state_name', 'state_value'], varargs=None, keywords=None, defaults=None), ('document', 'd4e0e08cd5d9d9a571cbc52d114f5ae9')) +paddle.fluid.contrib.StateCell.state_updater (ArgSpec(args=['self', 'updater'], varargs=None, keywords=None, defaults=None), ('document', 'd5afe1b7665d94fb023b15cf913ca510')) +paddle.fluid.contrib.StateCell.update_states (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'fe0b0f1338723516a35a30247899c81b')) +paddle.fluid.contrib.TrainingDecoder.__init__ (ArgSpec(args=['self', 'state_cell', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.contrib.TrainingDecoder.block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '98d88fa1c989748410a12517c6a585bf')) +paddle.fluid.contrib.TrainingDecoder.output (ArgSpec(args=['self'], varargs='outputs', keywords=None, defaults=None), ('document', 'f0a457dee586559036202087ce2eff69')) +paddle.fluid.contrib.TrainingDecoder.static_input (ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None), ('document', 'a024c72664fe815068423ba630b7658a')) +paddle.fluid.contrib.TrainingDecoder.step_input (ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None), ('document', '4659db7a888a2495e71c1838a0483909')) +paddle.fluid.contrib.BeamSearchDecoder.__init__ (ArgSpec(args=['self', 'state_cell', 'init_ids', 'init_scores', 'target_dict_dim', 'word_dim', 'input_var_dict', 'topk_size', 'sparse_emb', 'max_len', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=({}, 50, True, 100, 1, 1, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.contrib.BeamSearchDecoder.block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '98d88fa1c989748410a12517c6a585bf')) +paddle.fluid.contrib.BeamSearchDecoder.decode (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '1e47c60f080c1343ebb6ceaef89656b2')) +paddle.fluid.contrib.BeamSearchDecoder.early_stop (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '3a84a7454ed6707f79b9e954d92a7575')) +paddle.fluid.contrib.BeamSearchDecoder.read_array (ArgSpec(args=['self', 'init', 'is_ids', 'is_scores'], varargs=None, keywords=None, defaults=(False, False)), ('document', 'aa89eb8fd5e4cabaf5cc1bcae14665a4')) +paddle.fluid.contrib.BeamSearchDecoder.update_array (ArgSpec(args=['self', 'array', 'value'], varargs=None, keywords=None, defaults=None), ('document', '5754e9b3212b7c09497151516a0de5a7')) +paddle.fluid.contrib.memory_usage (ArgSpec(args=['program', 'batch_size'], varargs=None, keywords=None, defaults=None), ('document', '8fcb2f93bb743693baa8d4860a5ccc47')) +paddle.fluid.contrib.op_freq_statistic (ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None), ('document', '4d43687113c4bf5b29d15aee2f4e4afa')) +paddle.fluid.contrib.QuantizeTranspiler.__init__ (ArgSpec(args=['self', 'weight_bits', 'activation_bits', 'activation_quantize_type', 'weight_quantize_type', 'window_size'], varargs=None, keywords=None, defaults=(8, 8, 'abs_max', 'abs_max', 10000)), ('document', '14b39f1fcd5667ff556b1aad94357d1d')) +paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 (ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.contrib.QuantizeTranspiler.freeze_program (ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None)), ('document', '909675a1ab055c69b436a7893fcae4fd')) +paddle.fluid.contrib.QuantizeTranspiler.training_transpile (ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6dd9909f10b283ba2892a99058a72884')) +paddle.fluid.contrib.Calibrator.__init__ (ArgSpec(args=['self'], varargs='args', keywords='kwargs', defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.contrib.Calibrator.sample_data (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '3b8c85ca1e2cf753cc8c90a6c6992958')) +paddle.fluid.contrib.Calibrator.save_int8_model (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.contrib.reader.ctr_reader.ctr_reader (ArgSpec(args=['feed_dict', 'file_type', 'file_format', 'dense_slot_index', 'sparse_slot_index', 'capacity', 'thread_num', 'batch_size', 'file_list', 'slots', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b2ebf3de2a6ef1af2c3b88d2db7591ab')) +paddle.fluid.contrib.build_compressor (ArgSpec(args=['place', 'data_reader', 'data_feeder', 'scope', 'metrics', 'epoch', 'config'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.contrib.CompressPass.__init__ (ArgSpec(args=['self', 'place', 'data_reader', 'data_feeder', 'scope', 'metrics', 'epoch', 'program_exe'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.contrib.CompressPass.add_strategy (ArgSpec(args=['self', 'strategy'], varargs=None, keywords=None, defaults=None), ('document', '3bf6010b6f47d3c86df0ec8957be95e0')) +paddle.fluid.contrib.CompressPass.apply (ArgSpec(args=['self', 'graph'], varargs=None, keywords=None, defaults=None), ('document', 'a92bf85d4b59bd4f2ac1706d7c4899a6')) +paddle.fluid.contrib.ImitationGraph.__init__ (ArgSpec(args=['self', 'program'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.contrib.ImitationGraph.all_parameters (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.contrib.SensitivePruneStrategy.__init__ (ArgSpec(args=['self', 'pruner', 'start_epoch', 'end_epoch', 'delta_rate', 'acc_loss_threshold', 'sensitivities'], varargs=None, keywords=None, defaults=(None, 0, 10, 0.2, 0.2, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.contrib.SensitivePruneStrategy.on_batch_begin (ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.contrib.SensitivePruneStrategy.on_batch_end (ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.contrib.SensitivePruneStrategy.on_compress_begin (ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.contrib.SensitivePruneStrategy.on_compress_end (ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.contrib.SensitivePruneStrategy.on_epoch_begin (ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.contrib.SensitivePruneStrategy.on_epoch_end (ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.contrib.MagnitudePruner.__init__ (ArgSpec(args=['self', 'threshold'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.contrib.MagnitudePruner.prune (ArgSpec(args=['self', 'param', 'threshold'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.contrib.RatioPruner.__init__ (ArgSpec(args=['self', 'ratios'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e7a81a325b296a9ca502ee5adb4fc85d')) +paddle.fluid.contrib.RatioPruner.prune (ArgSpec(args=['self', 'param', 'ratio'], varargs=None, keywords=None, defaults=(None,)), ('document', '358cbf2978c91028fb96a195a9884645')) +paddle.fluid.contrib.load_persistables_for_increment (ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var', 'lookup_table_var_path'], varargs=None, keywords=None, defaults=None), ('document', '11fbf7e8dd2289805de291b453a33ee7')) +paddle.fluid.contrib.load_persistables_for_inference (ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var_name'], varargs=None, keywords=None, defaults=None), ('document', '5b5577bb3d24070da819674255d16196')) +paddle.fluid.contrib.convert_dist_to_sparse_program (ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None), ('document', '4efbd93876832d4d35497cdbc7a1e6d8')) +paddle.fluid.contrib.HDFSClient.__init__ (ArgSpec(args=['self', 'hadoop_home', 'configs'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.contrib.HDFSClient.delete (ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None), ('document', 'c3721aa2d4d9ef5a857dd47b2681c03e')) +paddle.fluid.contrib.HDFSClient.download (ArgSpec(args=['self', 'hdfs_path', 'local_path', 'overwrite', 'unzip'], varargs=None, keywords=None, defaults=(False, False)), ('document', 'ca55bde92184d3fd0f9f5c963b25e634')) +paddle.fluid.contrib.HDFSClient.is_dir (ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=(None,)), ('document', '45bde1bae02605a205c8245b58b9156d')) +paddle.fluid.contrib.HDFSClient.is_exist (ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=(None,)), ('document', 'be9c94bccff7ba0c1d95883ac62b5864')) +paddle.fluid.contrib.HDFSClient.ls (ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None), ('document', '808acac504870c7e46594b95674f8a86')) +paddle.fluid.contrib.HDFSClient.lsr (ArgSpec(args=['self', 'hdfs_path', 'only_file', 'sort'], varargs=None, keywords=None, defaults=(True, True)), ('document', 'fae835aa3354eb6a0434c0f9ba3c2747')) +paddle.fluid.contrib.HDFSClient.make_local_dirs (ArgSpec(args=['local_path'], varargs=None, keywords=None, defaults=None), ('document', 'e76b89c8e7f019b5da576c0026fcf689')) +paddle.fluid.contrib.HDFSClient.makedirs (ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None), ('document', '44d9972aae390aedf40aaea731a37e4b')) +paddle.fluid.contrib.HDFSClient.rename (ArgSpec(args=['self', 'hdfs_src_path', 'hdfs_dst_path', 'overwrite'], varargs=None, keywords=None, defaults=(False,)), ('document', '0eb133644d9a9f4da45bb39261ff0955')) +paddle.fluid.contrib.HDFSClient.upload (ArgSpec(args=['self', 'hdfs_path', 'local_path', 'overwrite', 'retry_times'], varargs=None, keywords=None, defaults=(False, 5)), ('document', '7d053b4bfd6dcfdd2c9dda0e0dbd9665')) +paddle.fluid.contrib.multi_download (ArgSpec(args=['client', 'hdfs_path', 'local_path', 'trainer_id', 'trainers', 'multi_processes'], varargs=None, keywords=None, defaults=(5,)), ('document', '100927be598ed8f9eaa1f3ef1b23568a')) +paddle.fluid.contrib.multi_upload (ArgSpec(args=['client', 'hdfs_path', 'local_path', 'multi_processes', 'overwrite', 'sync'], varargs=None, keywords=None, defaults=(5, False, True)), ('document', '183f34c83d30dbe16e09e8716c41958a')) +paddle.fluid.transpiler.DistributeTranspiler.__init__ (ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program (ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None), ('document', '292ab72977afbe58e6a3bde175452680')) +paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs (ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None), ('document', '78f4949aedf317666a89ca74b3748ba8')) +paddle.fluid.transpiler.DistributeTranspiler.get_startup_program (ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'd796fc0c8d51503b556fcf6dc15c4f0c')) +paddle.fluid.transpiler.DistributeTranspiler.get_trainer_program (ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,)), ('document', '736330e31a7a54abccc0c7fd9119d9ff')) +paddle.fluid.transpiler.DistributeTranspiler.transpile (ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174')), ('document', '06ce55338dfe96311ad1078235ab3bf4')) +paddle.fluid.transpiler.memory_optimize (ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False)), ('document', 'eda17d0f1639bc6ca215cecf87f588a4')) +paddle.fluid.transpiler.release_memory (ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ac4114d3df16264f1946deb3a8434a6f')) +paddle.fluid.transpiler.HashName.__init__ (ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.transpiler.HashName.dispatch (ArgSpec(args=['self', 'varlist'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.transpiler.HashName.reset (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.transpiler.RoundRobin.__init__ (ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.transpiler.RoundRobin.dispatch (ArgSpec(args=['self', 'varlist'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.transpiler.RoundRobin.reset (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.transpiler.DistributeTranspilerConfig.__init__ -paddle.fluid.nets.simple_img_conv_pool ArgSpec(args=['input', 'num_filters', 'filter_size', 'pool_size', 'pool_stride', 'pool_padding', 'pool_type', 'global_pooling', 'conv_stride', 'conv_padding', 'conv_dilation', 'conv_groups', 'param_attr', 'bias_attr', 'act', 'use_cudnn'], varargs=None, keywords=None, defaults=(0, 'max', False, 1, 0, 1, 1, None, None, None, True)) -paddle.fluid.nets.sequence_conv_pool ArgSpec(args=['input', 'num_filters', 'filter_size', 'param_attr', 'act', 'pool_type', 'bias_attr'], varargs=None, keywords=None, defaults=(None, 'sigmoid', 'max', None)) -paddle.fluid.nets.glu ArgSpec(args=['input', 'dim'], varargs=None, keywords=None, defaults=(-1,)) -paddle.fluid.nets.scaled_dot_product_attention ArgSpec(args=['queries', 'keys', 'values', 'num_heads', 'dropout_rate'], varargs=None, keywords=None, defaults=(1, 0.0)) -paddle.fluid.nets.img_conv_group ArgSpec(args=['input', 'conv_num_filter', 'pool_size', 'conv_padding', 'conv_filter_size', 'conv_act', 'param_attr', 'conv_with_batchnorm', 'conv_batchnorm_drop_rate', 'pool_stride', 'pool_type', 'use_cudnn'], varargs=None, keywords=None, defaults=(1, 3, None, None, False, 0.0, 1, 'max', True)) -paddle.fluid.optimizer.SGDOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'regularization', 'name'], varargs=None, keywords=None, defaults=(None, None)) -paddle.fluid.optimizer.SGDOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None) -paddle.fluid.optimizer.SGDOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)) -paddle.fluid.optimizer.SGDOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) -paddle.fluid.optimizer.MomentumOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'momentum', 'use_nesterov', 'regularization', 'name'], varargs=None, keywords=None, defaults=(False, None, None)) -paddle.fluid.optimizer.MomentumOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None) -paddle.fluid.optimizer.MomentumOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)) -paddle.fluid.optimizer.MomentumOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) -paddle.fluid.optimizer.AdagradOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1e-06, None, None)) -paddle.fluid.optimizer.AdagradOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None) -paddle.fluid.optimizer.AdagradOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)) -paddle.fluid.optimizer.AdagradOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) -paddle.fluid.optimizer.AdamOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name', 'lazy_mode'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None, False)) -paddle.fluid.optimizer.AdamOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None) -paddle.fluid.optimizer.AdamOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)) -paddle.fluid.optimizer.AdamOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) -paddle.fluid.optimizer.AdamaxOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None)) -paddle.fluid.optimizer.AdamaxOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None) -paddle.fluid.optimizer.AdamaxOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)) -paddle.fluid.optimizer.AdamaxOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) -paddle.fluid.optimizer.DecayedAdagradOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'decay', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, None, None)) -paddle.fluid.optimizer.DecayedAdagradOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None) -paddle.fluid.optimizer.DecayedAdagradOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)) -paddle.fluid.optimizer.DecayedAdagradOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) -paddle.fluid.optimizer.FtrlOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'l1', 'l2', 'lr_power', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.0, 0.0, -0.5, None, None)) -paddle.fluid.optimizer.FtrlOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None) -paddle.fluid.optimizer.FtrlOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)) -paddle.fluid.optimizer.FtrlOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) -paddle.fluid.optimizer.RMSPropOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'rho', 'epsilon', 'momentum', 'centered', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, 0.0, False, None, None)) -paddle.fluid.optimizer.RMSPropOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None) -paddle.fluid.optimizer.RMSPropOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)) -paddle.fluid.optimizer.RMSPropOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) -paddle.fluid.optimizer.AdadeltaOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'epsilon', 'rho', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1e-06, 0.95, None, None)) -paddle.fluid.optimizer.AdadeltaOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None) -paddle.fluid.optimizer.AdadeltaOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)) -paddle.fluid.optimizer.AdadeltaOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) -paddle.fluid.optimizer.ModelAverage.__init__ ArgSpec(args=['self', 'average_window_rate', 'min_average_window', 'max_average_window', 'regularization', 'name'], varargs=None, keywords=None, defaults=(10000, 10000, None, None)) -paddle.fluid.optimizer.ModelAverage.apply ArgSpec(args=['self', 'executor', 'need_restore'], varargs=None, keywords=None, defaults=(True,)) -paddle.fluid.optimizer.ModelAverage.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None) -paddle.fluid.optimizer.ModelAverage.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)) -paddle.fluid.optimizer.ModelAverage.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) -paddle.fluid.optimizer.ModelAverage.restore ArgSpec(args=['self', 'executor'], varargs=None, keywords=None, defaults=None) -paddle.fluid.optimizer.LarsMomentumOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'momentum', 'lars_coeff', 'lars_weight_decay', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.0005, None, None)) -paddle.fluid.optimizer.LarsMomentumOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None) -paddle.fluid.optimizer.LarsMomentumOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)) -paddle.fluid.optimizer.LarsMomentumOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) -paddle.fluid.backward.append_backward ArgSpec(args=['loss', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None)) -paddle.fluid.regularizer.L1DecayRegularizer.__init__ ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,)) -paddle.fluid.regularizer.L2DecayRegularizer.__init__ ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,)) +paddle.fluid.nets.simple_img_conv_pool (ArgSpec(args=['input', 'num_filters', 'filter_size', 'pool_size', 'pool_stride', 'pool_padding', 'pool_type', 'global_pooling', 'conv_stride', 'conv_padding', 'conv_dilation', 'conv_groups', 'param_attr', 'bias_attr', 'act', 'use_cudnn'], varargs=None, keywords=None, defaults=(0, 'max', False, 1, 0, 1, 1, None, None, None, True)), ('document', 'e0f67f35abf27f666f81003113b90244')) +paddle.fluid.nets.sequence_conv_pool (ArgSpec(args=['input', 'num_filters', 'filter_size', 'param_attr', 'act', 'pool_type', 'bias_attr'], varargs=None, keywords=None, defaults=(None, 'sigmoid', 'max', None)), ('document', '48c434dd7bb827f69d90e5135d77470f')) +paddle.fluid.nets.glu (ArgSpec(args=['input', 'dim'], varargs=None, keywords=None, defaults=(-1,)), ('document', '08c1c57e1db6b20bf87b264cb7cf3ca8')) +paddle.fluid.nets.scaled_dot_product_attention (ArgSpec(args=['queries', 'keys', 'values', 'num_heads', 'dropout_rate'], varargs=None, keywords=None, defaults=(1, 0.0)), ('document', '921714c9bfb351b41403418265393203')) +paddle.fluid.nets.img_conv_group (ArgSpec(args=['input', 'conv_num_filter', 'pool_size', 'conv_padding', 'conv_filter_size', 'conv_act', 'param_attr', 'conv_with_batchnorm', 'conv_batchnorm_drop_rate', 'pool_stride', 'pool_type', 'use_cudnn'], varargs=None, keywords=None, defaults=(1, 3, None, None, False, 0.0, 1, 'max', True)), ('document', '3802be78fbfb206dae64a2d9f8480970')) +paddle.fluid.optimizer.SGDOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'regularization', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.optimizer.SGDOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) +paddle.fluid.optimizer.SGDOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) +paddle.fluid.optimizer.SGDOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) +paddle.fluid.optimizer.MomentumOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'momentum', 'use_nesterov', 'regularization', 'name'], varargs=None, keywords=None, defaults=(False, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.optimizer.MomentumOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) +paddle.fluid.optimizer.MomentumOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) +paddle.fluid.optimizer.MomentumOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) +paddle.fluid.optimizer.AdagradOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'epsilon', 'regularization', 'name', 'initial_accumulator_value'], varargs=None, keywords=None, defaults=(1e-06, None, None, 0.0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.optimizer.AdagradOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) +paddle.fluid.optimizer.AdagradOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) +paddle.fluid.optimizer.AdagradOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) +paddle.fluid.optimizer.AdamOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name', 'lazy_mode'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.optimizer.AdamOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) +paddle.fluid.optimizer.AdamOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) +paddle.fluid.optimizer.AdamOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) +paddle.fluid.optimizer.AdamaxOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.optimizer.AdamaxOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) +paddle.fluid.optimizer.AdamaxOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) +paddle.fluid.optimizer.AdamaxOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) +paddle.fluid.optimizer.DecayedAdagradOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'decay', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.optimizer.DecayedAdagradOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) +paddle.fluid.optimizer.DecayedAdagradOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) +paddle.fluid.optimizer.DecayedAdagradOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) +paddle.fluid.optimizer.FtrlOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'l1', 'l2', 'lr_power', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.0, 0.0, -0.5, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.optimizer.FtrlOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) +paddle.fluid.optimizer.FtrlOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) +paddle.fluid.optimizer.FtrlOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) +paddle.fluid.optimizer.RMSPropOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'rho', 'epsilon', 'momentum', 'centered', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, 0.0, False, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.optimizer.RMSPropOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) +paddle.fluid.optimizer.RMSPropOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) +paddle.fluid.optimizer.RMSPropOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) +paddle.fluid.optimizer.AdadeltaOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'epsilon', 'rho', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1e-06, 0.95, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.optimizer.AdadeltaOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) +paddle.fluid.optimizer.AdadeltaOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) +paddle.fluid.optimizer.AdadeltaOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) +paddle.fluid.optimizer.ModelAverage.__init__ (ArgSpec(args=['self', 'average_window_rate', 'min_average_window', 'max_average_window', 'regularization', 'name'], varargs=None, keywords=None, defaults=(10000, 10000, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.optimizer.ModelAverage.apply (ArgSpec(args=['self', 'executor', 'need_restore'], varargs=None, keywords=None, defaults=(True,)), ('document', '46234a5470590feb336346f70a3db715')) +paddle.fluid.optimizer.ModelAverage.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) +paddle.fluid.optimizer.ModelAverage.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) +paddle.fluid.optimizer.ModelAverage.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) +paddle.fluid.optimizer.ModelAverage.restore (ArgSpec(args=['self', 'executor'], varargs=None, keywords=None, defaults=None), ('document', '18db9c70be9c4dd466f9844457b21bfe')) +paddle.fluid.optimizer.LarsMomentumOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'momentum', 'lars_coeff', 'lars_weight_decay', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.0005, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.optimizer.LarsMomentumOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871')) +paddle.fluid.optimizer.LarsMomentumOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f')) +paddle.fluid.optimizer.LarsMomentumOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea')) +paddle.fluid.backward.append_backward (ArgSpec(args=['loss', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '1a79bd7d10ae54ca763ec81bca36ba24')) +paddle.fluid.regularizer.L1DecayRegularizer.__init__ (ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.regularizer.L2DecayRegularizer.__init__ (ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.LoDTensor.__init__ 1. __init__(self: paddle.fluid.core.LoDTensor, arg0: List[List[int]]) -> None 2. __init__(self: paddle.fluid.core.LoDTensor) -> None paddle.fluid.LoDTensor.has_valid_recursive_sequence_lengths has_valid_recursive_sequence_lengths(self: paddle.fluid.core.LoDTensor) -> bool paddle.fluid.LoDTensor.lod lod(self: paddle.fluid.core.LoDTensor) -> List[List[int]] paddle.fluid.LoDTensor.recursive_sequence_lengths recursive_sequence_lengths(self: paddle.fluid.core.LoDTensor) -> List[List[int]] paddle.fluid.LoDTensor.set 1. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float32], arg1: paddle::platform::CPUPlace) -> None 2. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int32], arg1: paddle::platform::CPUPlace) -> None 3. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float64], arg1: paddle::platform::CPUPlace) -> None 4. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int64], arg1: paddle::platform::CPUPlace) -> None 5. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[bool], arg1: paddle::platform::CPUPlace) -> None 6. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint16], arg1: paddle::platform::CPUPlace) -> None 7. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint8], arg1: paddle::platform::CPUPlace) -> None 8. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int8], arg1: paddle::platform::CPUPlace) -> None 9. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float32], arg1: paddle::platform::CUDAPlace) -> None 10. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int32], arg1: paddle::platform::CUDAPlace) -> None 11. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float64], arg1: paddle::platform::CUDAPlace) -> None 12. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int64], arg1: paddle::platform::CUDAPlace) -> None 13. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[bool], arg1: paddle::platform::CUDAPlace) -> None 14. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint16], arg1: paddle::platform::CUDAPlace) -> None 15. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint8], arg1: paddle::platform::CUDAPlace) -> None 16. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int8], arg1: paddle::platform::CUDAPlace) -> None 17. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float32], arg1: paddle::platform::CUDAPinnedPlace) -> None 18. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int32], arg1: paddle::platform::CUDAPinnedPlace) -> None 19. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float64], arg1: paddle::platform::CUDAPinnedPlace) -> None 20. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int64], arg1: paddle::platform::CUDAPinnedPlace) -> None 21. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[bool], arg1: paddle::platform::CUDAPinnedPlace) -> None 22. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint16], arg1: paddle::platform::CUDAPinnedPlace) -> None 23. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint8], arg1: paddle::platform::CUDAPinnedPlace) -> None 24. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int8], arg1: paddle::platform::CUDAPinnedPlace) -> None -paddle.fluid.LoDTensor.set_lod set_lod(self: paddle.fluid.core.LoDTensor, arg0: List[List[int]]) -> None -paddle.fluid.LoDTensor.set_recursive_sequence_lengths set_recursive_sequence_lengths(self: paddle.fluid.core.LoDTensor, arg0: List[List[int]]) -> None +paddle.fluid.LoDTensor.set_lod set_lod(self: paddle.fluid.core.LoDTensor, lod: List[List[int]]) -> None +paddle.fluid.LoDTensor.set_recursive_sequence_lengths set_recursive_sequence_lengths(self: paddle.fluid.core.LoDTensor, recursive_sequence_lengths: List[List[int]]) -> None paddle.fluid.LoDTensor.shape shape(self: paddle.fluid.core.Tensor) -> List[int] paddle.fluid.LoDTensorArray.__init__ __init__(self: paddle.fluid.core.LoDTensorArray) -> None -paddle.fluid.LoDTensorArray.append append(self: paddle.fluid.core.LoDTensorArray, arg0: paddle.fluid.core.LoDTensor) -> None +paddle.fluid.LoDTensorArray.append append(self: paddle.fluid.core.LoDTensorArray, tensor: paddle.fluid.core.LoDTensor) -> None paddle.fluid.CPUPlace.__init__ __init__(self: paddle.fluid.core.CPUPlace) -> None paddle.fluid.CUDAPlace.__init__ __init__(self: paddle.fluid.core.CUDAPlace, arg0: int) -> None paddle.fluid.CUDAPinnedPlace.__init__ __init__(self: paddle.fluid.core.CUDAPinnedPlace) -> None -paddle.fluid.ParamAttr.__init__ ArgSpec(args=['self', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, 1.0, None, True, None, False)) -paddle.fluid.WeightNormParamAttr.__init__ ArgSpec(args=['self', 'dim', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, None, 1.0, None, True, None, False)) -paddle.fluid.DataFeeder.__init__ ArgSpec(args=['self', 'feed_list', 'place', 'program'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.DataFeeder.decorate_reader ArgSpec(args=['self', 'reader', 'multi_devices', 'num_places', 'drop_last'], varargs=None, keywords=None, defaults=(None, True)) -paddle.fluid.DataFeeder.feed ArgSpec(args=['self', 'iterable'], varargs=None, keywords=None, defaults=None) -paddle.fluid.DataFeeder.feed_parallel ArgSpec(args=['self', 'iterable', 'num_places'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.clip.ErrorClipByValue.__init__ ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.clip.GradientClipByValue.__init__ ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.clip.GradientClipByNorm.__init__ ArgSpec(args=['self', 'clip_norm'], varargs=None, keywords=None, defaults=None) -paddle.fluid.clip.GradientClipByGlobalNorm.__init__ ArgSpec(args=['self', 'clip_norm', 'group_name'], varargs=None, keywords=None, defaults=('default_group',)) -paddle.fluid.profiler.cuda_profiler ArgSpec(args=['output_file', 'output_mode', 'config'], varargs=None, keywords=None, defaults=(None, None)) -paddle.fluid.profiler.reset_profiler ArgSpec(args=[], varargs=None, keywords=None, defaults=None) -paddle.fluid.profiler.profiler ArgSpec(args=['state', 'sorted_key', 'profile_path'], varargs=None, keywords=None, defaults=(None, '/tmp/profile')) -paddle.fluid.profiler.start_profiler ArgSpec(args=['state'], varargs=None, keywords=None, defaults=None) -paddle.fluid.profiler.stop_profiler ArgSpec(args=['sorted_key', 'profile_path'], varargs=None, keywords=None, defaults=(None, '/tmp/profile')) -paddle.fluid.unique_name.generate ArgSpec(args=['key'], varargs=None, keywords=None, defaults=None) -paddle.fluid.unique_name.switch ArgSpec(args=['new_generator'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.unique_name.guard ArgSpec(args=['new_generator'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.recordio_writer.convert_reader_to_recordio_file ArgSpec(args=['filename', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None)) -paddle.fluid.recordio_writer.convert_reader_to_recordio_files ArgSpec(args=['filename', 'batch_per_file', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None)) +paddle.fluid.ParamAttr.__init__ (ArgSpec(args=['self', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, 1.0, None, True, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.WeightNormParamAttr.__init__ (ArgSpec(args=['self', 'dim', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, None, 1.0, None, True, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.DataFeeder.__init__ (ArgSpec(args=['self', 'feed_list', 'place', 'program'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.DataFeeder.decorate_reader (ArgSpec(args=['self', 'reader', 'multi_devices', 'num_places', 'drop_last'], varargs=None, keywords=None, defaults=(None, True)), ('document', '0eed2f198dc73c08a41b61edbc755753')) +paddle.fluid.DataFeeder.feed (ArgSpec(args=['self', 'iterable'], varargs=None, keywords=None, defaults=None), ('document', '459e316301279dfd82001b46f0b8ffca')) +paddle.fluid.DataFeeder.feed_parallel (ArgSpec(args=['self', 'iterable', 'num_places'], varargs=None, keywords=None, defaults=(None,)), ('document', '543863d1f9d4853758adb613b8659e85')) +paddle.fluid.clip.ErrorClipByValue.__init__ (ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.clip.GradientClipByValue.__init__ (ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.clip.GradientClipByNorm.__init__ (ArgSpec(args=['self', 'clip_norm'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.clip.GradientClipByGlobalNorm.__init__ (ArgSpec(args=['self', 'clip_norm', 'group_name'], varargs=None, keywords=None, defaults=('default_group',)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.profiler.cuda_profiler (ArgSpec(args=['output_file', 'output_mode', 'config'], varargs=None, keywords=None, defaults=(None, None)), ('document', '2e2fb1cfc469a67f19fb578a2ed6be79')) +paddle.fluid.profiler.reset_profiler (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '397ce757fabbe5c622e0c3458c41fcd0')) +paddle.fluid.profiler.profiler (ArgSpec(args=['state', 'sorted_key', 'profile_path'], varargs=None, keywords=None, defaults=(None, '/tmp/profile')), ('document', 'bd3a07eeb68e384f4d2d416cb2e28d86')) +paddle.fluid.profiler.start_profiler (ArgSpec(args=['state'], varargs=None, keywords=None, defaults=None), ('document', '88da8fb6dbebaee2f7520188a09574f9')) +paddle.fluid.profiler.stop_profiler (ArgSpec(args=['sorted_key', 'profile_path'], varargs=None, keywords=None, defaults=(None, '/tmp/profile')), ('document', 'a7500e39dd033f1e64f562e909333a8a')) +paddle.fluid.unique_name.generate (ArgSpec(args=['key'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.unique_name.switch (ArgSpec(args=['new_generator'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.unique_name.guard (ArgSpec(args=['new_generator'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.fluid.recordio_writer.convert_reader_to_recordio_file (ArgSpec(args=['filename', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None)), ('document', '65c7523e86f0c50bb729b01667f36310')) +paddle.fluid.recordio_writer.convert_reader_to_recordio_files (ArgSpec(args=['filename', 'batch_per_file', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None)), ('document', 'bc643f0f5f1b9db57ff0d8a57d379bd7')) paddle.fluid.Scope Scope() -> paddle.fluid.core._Scope -paddle.reader.map_readers ArgSpec(args=['func'], varargs='readers', keywords=None, defaults=None) -paddle.reader.buffered ArgSpec(args=['reader', 'size'], varargs=None, keywords=None, defaults=None) -paddle.reader.compose ArgSpec(args=[], varargs='readers', keywords='kwargs', defaults=None) -paddle.reader.chain ArgSpec(args=[], varargs='readers', keywords=None, defaults=None) -paddle.reader.shuffle ArgSpec(args=['reader', 'buf_size'], varargs=None, keywords=None, defaults=None) -paddle.reader.firstn ArgSpec(args=['reader', 'n'], varargs=None, keywords=None, defaults=None) -paddle.reader.xmap_readers ArgSpec(args=['mapper', 'reader', 'process_num', 'buffer_size', 'order'], varargs=None, keywords=None, defaults=(False,)) -paddle.reader.PipeReader.__init__ ArgSpec(args=['self', 'command', 'bufsize', 'file_type'], varargs=None, keywords=None, defaults=(8192, 'plain')) -paddle.reader.PipeReader.get_line ArgSpec(args=['self', 'cut_lines', 'line_break'], varargs=None, keywords=None, defaults=(True, '\n')) -paddle.reader.multiprocess_reader ArgSpec(args=['readers', 'use_pipe', 'queue_size'], varargs=None, keywords=None, defaults=(True, 1000)) -paddle.reader.Fake.__init__ ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.reader.creator.np_array ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None) -paddle.reader.creator.text_file ArgSpec(args=['path'], varargs=None, keywords=None, defaults=None) -paddle.reader.creator.recordio ArgSpec(args=['paths', 'buf_size'], varargs=None, keywords=None, defaults=(100,)) +paddle.reader.map_readers (ArgSpec(args=['func'], varargs='readers', keywords=None, defaults=None), ('document', '77cbadb09df588e21e5cc0819b69c87d')) +paddle.reader.buffered (ArgSpec(args=['reader', 'size'], varargs=None, keywords=None, defaults=None), ('document', '0d6186f109feceb99f60ec50a0a624cb')) +paddle.reader.compose (ArgSpec(args=[], varargs='readers', keywords='kwargs', defaults=None), ('document', '884291104e1c3f37f33aae44b7deeb0d')) +paddle.reader.chain (ArgSpec(args=[], varargs='readers', keywords=None, defaults=None), ('document', 'd22c34e379a53901ae67a6bca7f4def4')) +paddle.reader.shuffle (ArgSpec(args=['reader', 'buf_size'], varargs=None, keywords=None, defaults=None), ('document', 'e42ea6fee23ce26b23cb142cd1d6522d')) +paddle.reader.firstn (ArgSpec(args=['reader', 'n'], varargs=None, keywords=None, defaults=None), ('document', 'c5bb8f7dd4f917f1569a368aab5b8aad')) +paddle.reader.xmap_readers (ArgSpec(args=['mapper', 'reader', 'process_num', 'buffer_size', 'order'], varargs=None, keywords=None, defaults=(False,)), ('document', '283bc0b8a0e26ae186b8b9bee4aec560')) +paddle.reader.PipeReader.__init__ (ArgSpec(args=['self', 'command', 'bufsize', 'file_type'], varargs=None, keywords=None, defaults=(8192, 'plain')), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.reader.PipeReader.get_line (ArgSpec(args=['self', 'cut_lines', 'line_break'], varargs=None, keywords=None, defaults=(True, '\n')), ('document', '5f80a7ed70052f01665e4c74acccfa69')) +paddle.reader.multiprocess_reader (ArgSpec(args=['readers', 'use_pipe', 'queue_size'], varargs=None, keywords=None, defaults=(True, 1000)), ('document', '7d8b3a96e592107c893d5d51ce968ba0')) +paddle.reader.Fake.__init__ (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) +paddle.reader.creator.np_array (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '28d457fbc9a71efa4ac91a3be179cada')) +paddle.reader.creator.text_file (ArgSpec(args=['path'], varargs=None, keywords=None, defaults=None), ('document', '44fe286ab6175a5464d3a961a68c266a')) +paddle.reader.creator.recordio (ArgSpec(args=['paths', 'buf_size'], varargs=None, keywords=None, defaults=(100,)), ('document', '11b3704ea42cfd537953387a7e58dae8')) diff --git a/paddle/fluid/framework/async_executor.cc b/paddle/fluid/framework/async_executor.cc index 1d9678a1ba1409e5c18d3e25b3aa13dfbbf76908..60708bf609d6f8b327d46fe585cbbcf07a62eece 100644 --- a/paddle/fluid/framework/async_executor.cc +++ b/paddle/fluid/framework/async_executor.cc @@ -244,6 +244,7 @@ void AsyncExecutor::RunFromFile(const ProgramDesc& main_program, auto& block = main_program.Block(0); for (auto var_name : fetch_var_names) { auto var_desc = block.FindVar(var_name); + PADDLE_ENFORCE_NOT_NULL(var_desc, "%s is not found.", var_name); auto shapes = var_desc->GetShape(); PADDLE_ENFORCE(shapes[shapes.size() - 1] == 1, "var %s: Fetched var has wrong shape, " diff --git a/paddle/fluid/framework/block_desc.cc b/paddle/fluid/framework/block_desc.cc index f537e4b9e569dd4c513ac0efde7240833bcf04b6..0b7aaf11746d1931e10ad7e5368d9e053092500e 100644 --- a/paddle/fluid/framework/block_desc.cc +++ b/paddle/fluid/framework/block_desc.cc @@ -13,7 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/block_desc.h" + #include +#include +#include + #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" @@ -155,6 +159,16 @@ void BlockDesc::RemoveOp(size_t s, size_t e) { ops_.erase(ops_.begin() + s, ops_.begin() + e); } +void BlockDesc::RemoveOpInternal(const OpDesc *op_desc) { + // TODO(minqiyang): make this faster + for (auto it = ops_.begin(); it != ops_.end(); ++it) { + if (it->get() == op_desc) { + ops_.erase(it); + break; + } + } +} + std::vector BlockDesc::AllOps() const { std::vector res; for (const auto &op : ops_) { diff --git a/paddle/fluid/framework/block_desc.h b/paddle/fluid/framework/block_desc.h index 960ca39e1eadd3c064beb0e2c1342a406c4f0b6a..5c6e421516269a9b9865605400efa772f944a96f 100644 --- a/paddle/fluid/framework/block_desc.h +++ b/paddle/fluid/framework/block_desc.h @@ -93,6 +93,8 @@ class BlockDesc { */ void RemoveOp(size_t s, size_t e); + void RemoveOpInternal(const OpDesc *op_desc); + void RemoveVar(const std::string &name) { vars_.erase(name); } std::vector AllOps() const; diff --git a/paddle/fluid/framework/data_layout_transform.cc b/paddle/fluid/framework/data_layout_transform.cc index 72c50518af08b9c1b2f97e6864e5836e806c77fc..10aa7a59422f4508dda8d0bcd960583056e25938 100644 --- a/paddle/fluid/framework/data_layout_transform.cc +++ b/paddle/fluid/framework/data_layout_transform.cc @@ -134,11 +134,6 @@ void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var, out_layout = out_layout == DataLayout::kAnyLayout ? DataLayout::kNCHW : out_layout; - auto& pool = platform::DeviceContextPool::Instance(); - auto* dev_ctx = dynamic_cast( - pool.Get(expected_kernel_type.place_)); - auto& cpu_engine = dev_ctx->GetEngine(); - std::vector in_tz = paddle::framework::vectorize2int(in.dims()); std::vector out_tz = in_tz; @@ -147,29 +142,25 @@ void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var, "Input tensor type is not supported: %s", in.type()); memory::data_type out_type = in_type; - auto in_format = platform::MKLDNNFormatForSize(in_tz.size(), in.format()); - auto out_format = - platform::MKLDNNFormatForSize(in_tz.size(), ToMKLDNNFormat(out_layout)); - // output tensor has the same dims as input. Reorder don't change dims out->Resize(in.dims()); - if (in_format != out_format) { + // tempory mem pd fr out , to make reorder + auto out_mem_pd = paddle::platform::create_prim_desc_from_dims( + paddle::framework::vectorize2int(out->dims()), + mkldnn::memory::format::blocked, out_type); + if (in.get_mkldnn_prim_desc() != out_mem_pd) { void* in_data = GetDataFromTensor(in, in_type); auto out_data = out->mutable_data(expected_kernel_type.place_, in.type()); - auto in_memory = - memory({{{in_tz}, in_type, in_format}, cpu_engine}, in_data); - auto out_memory = - memory({{{out_tz}, out_type, out_format}, cpu_engine}, out_data); + auto in_memory = memory(in.get_mkldnn_prim_desc(), in_data); + auto out_memory = memory(out_mem_pd, out_data); platform::Reorder(in_memory, out_memory); } else { out->ShareDataWith(in); } out->set_layout(out_layout); - // reset format since the out tensor will be feed to non-MKLDNN OPkernel - out->set_format(memory::format::format_undef); #endif } diff --git a/paddle/fluid/framework/data_transform.cc b/paddle/fluid/framework/data_transform.cc index 82872224501709080ff02a13464d58543a0abda8..f0203edf05635452bf347335066dadc24ecc3138 100644 --- a/paddle/fluid/framework/data_transform.cc +++ b/paddle/fluid/framework/data_transform.cc @@ -51,13 +51,31 @@ void TransformData(const OpKernelType &expected_kernel_type, #ifdef PADDLE_WITH_MKLDNN // Case1 - transform from Non-MKLDNN OPKernel to MKLDNN OPKernel // Just set layout/format. No real transform occur - - auto out_format = platform::MKLDNNFormatForSize(in.dims().size(), - ToMKLDNNFormat(lin)); - out.ShareDataWith(input_tensor); - out.set_layout(DataLayout::kMKLDNN); - out.set_format(out_format); + // TODO(jczaja): Remove that once all mkldnn ops + // are modified to work with mkldnn_blocked + auto mkldnn_fmt = [&](int rank) { + switch (rank) { + case 5: + return mkldnn::memory::format::ncdhw; + case 4: + return mkldnn::memory::format::nchw; + case 3: + return mkldnn::memory::format::ncw; + case 2: + return mkldnn::memory::format::nc; + case 1: + return mkldnn::memory::format::x; + default: + return mkldnn::memory::format::blocked; + } + }; + + auto out_mem_pd = paddle::platform::create_prim_desc_from_dims( + paddle::framework::vectorize2int(out.dims()), + mkldnn_fmt(out.dims().size())); + + out.set_mkldnn_prim_desc(out_mem_pd); #endif } else { // Case2 - transfrom from MKLDNN OPKernel to Non-MKLDNN OPKernel diff --git a/paddle/fluid/framework/details/CMakeLists.txt b/paddle/fluid/framework/details/CMakeLists.txt index e88084424baf7eb5cd1d67ea19966866d71ec3eb..dc308fd2592bb158f46f6eac9dd0df25787559fe 100644 --- a/paddle/fluid/framework/details/CMakeLists.txt +++ b/paddle/fluid/framework/details/CMakeLists.txt @@ -50,7 +50,12 @@ cc_library(data_balance_op_handle SRCS data_balance_op_handle.cc DEPS op_handle_ cc_library(gather_op_handle SRCS gather_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor) cc_library(fuse_vars_op_handle SRCS fuse_vars_op_handle.cc DEPS op_handle_base scope) -cc_library(memory_optimize_helper SRCS memory_optimize_helper.cc DEPS graph graph_helper) +if(WITH_GPU) +cc_library(memory_optimize_helper SRCS memory_optimize_helper.cc DEPS graph graph_helper gpu_info) +else() +cc_library(memory_optimize_helper SRCS memory_optimize_helper.cc DEPS graph graph_helper cpu_info) +endif() + cc_library(memory_optimize_pass SRCS memory_optimize_pass.cc DEPS memory_optimize_helper pass) cc_library(inplace_op_pass SRCS inplace_op_pass.cc DEPS memory_optimize_pass op_info) cc_library(modify_op_lock_and_record_event_pass SRCS modify_op_lock_and_record_event_pass.cc DEPS computation_op_handle op_graph_view multi_devices_helper) diff --git a/paddle/fluid/framework/details/all_reduce_deps_pass.cc b/paddle/fluid/framework/details/all_reduce_deps_pass.cc index b7d6edd389d8e40835dadf56d7c54d53402f6f4d..ff223e616f7ef0c794e72a0028c7e5bb3f234ec0 100644 --- a/paddle/fluid/framework/details/all_reduce_deps_pass.cc +++ b/paddle/fluid/framework/details/all_reduce_deps_pass.cc @@ -30,8 +30,6 @@ namespace paddle { namespace framework { namespace details { -static constexpr char kAllOpDescs[] = "all_op_descs"; - VarHandle* GetValidInput(const OpHandleBase* a) { for (auto p : a->Inputs()) { VarHandle* b = dynamic_cast(p); @@ -52,7 +50,7 @@ std::unique_ptr AllReduceDepsPass::ApplyImpl( std::unordered_map vars; // TODO(gongwb): use graph topology sort to find the order of operators. // Note that must assert topology sort is stable - auto& ops = Get>(kAllOpDescs); + auto& ops = graph->Get>(kStaleProgramOpDescs); for (auto* op_desc : ops) { auto outputs = op_desc->Outputs(); for (auto& o_it : outputs) { @@ -122,4 +120,4 @@ std::unique_ptr AllReduceDepsPass::ApplyImpl( REGISTER_PASS(all_reduce_deps_pass, paddle::framework::details::AllReduceDepsPass) - .RequirePassAttr(paddle::framework::details::kAllOpDescs); + .RequireGraphAttr(paddle::framework::details::kStaleProgramOpDescs); diff --git a/paddle/fluid/framework/details/all_reduce_op_handle.cc b/paddle/fluid/framework/details/all_reduce_op_handle.cc index dd77f7099f581a5b825916c4ea010023f3ad5bcd..c1f9c2b60c915370df7793f26fe83812a7ced96d 100644 --- a/paddle/fluid/framework/details/all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/all_reduce_op_handle.cc @@ -53,7 +53,7 @@ AllReduceOpHandle::AllReduceOpHandle(ir::Node *node, #endif void AllReduceOpHandle::RunImpl() { - platform::RecordEvent record_event(Name(), dev_ctxes_.cbegin()->second); + platform::RecordEvent record_event(Name()); WaitInputVarGenerated(); auto in_var_handles = DynamicCast(this->Inputs()); diff --git a/paddle/fluid/framework/details/broadcast_op_handle.cc b/paddle/fluid/framework/details/broadcast_op_handle.cc index 89d626edddfee3d2c43a3cf2232ad4fc1611e655..fdff83b92819b39974f3b2ce0848710f1ee02a41 100644 --- a/paddle/fluid/framework/details/broadcast_op_handle.cc +++ b/paddle/fluid/framework/details/broadcast_op_handle.cc @@ -22,7 +22,7 @@ namespace framework { namespace details { void BroadcastOpHandle::RunImpl() { - platform::RecordEvent record_event(Name(), dev_ctxes_.begin()->second); + platform::RecordEvent record_event(Name()); if (places_.size() == 1) return; @@ -30,7 +30,7 @@ void BroadcastOpHandle::RunImpl() { VarHandle *in_var_handle; { auto in_var_handles = DynamicCast(inputs_); - PADDLE_ENFORCE_EQ(in_var_handles.size(), 1, + PADDLE_ENFORCE_EQ(in_var_handles.size(), 1UL, "The number of input should be one."); in_var_handle = in_var_handles[0]; } diff --git a/paddle/fluid/framework/details/build_strategy.cc b/paddle/fluid/framework/details/build_strategy.cc index f8030c53f72bc8a6f007c1eb6a3072abd8037de2..2cfc76e47f41862731fb2de5d1d03287acd4d9d7 100644 --- a/paddle/fluid/framework/details/build_strategy.cc +++ b/paddle/fluid/framework/details/build_strategy.cc @@ -34,9 +34,11 @@ namespace details { static inline bool SeqOnlyAllReduceOps(const BuildStrategy &strategy) { // Should fix the allreduce op order if scheduling // them in multiple threads or processes to avoid hang. + // NOTE: ParallelGraph would execute this pass on each graph, so + // don't need to append it here. return (!strategy.enable_sequential_execution_ && - strategy.num_trainers_ > 1) || - strategy.enable_parallel_graph_; + strategy.num_trainers_ > 1) && + !strategy.enable_parallel_graph_; } class ParallelExecutorPassBuilder : public ir::PassBuilder { @@ -133,12 +135,15 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder { void AppendMultiDevPass(const BuildStrategy &strategy) { ir::Pass *multi_devices_pass; if (strategy_.is_distribution_) { + VLOG(3) << "multi device parameter server mode"; multi_devices_pass = AppendPass("dist_multi_devices_pass").get(); } else { if (strategy.reduce_ == BuildStrategy::ReduceStrategy::kAllReduce) { + VLOG(3) << "multi devices collective mode with allreduce"; multi_devices_pass = AppendPass("allreduce_mode_multi_devices_pass").get(); } else if (strategy.reduce_ == BuildStrategy::ReduceStrategy::kReduce) { + VLOG(3) << "multi deivces collective mode with reduce"; multi_devices_pass = AppendPass("reduce_mode_multi_devices_pass").get(); } else { PADDLE_THROW("Unknown reduce strategy."); @@ -169,7 +174,8 @@ bool BuildStrategy::IsMultiDevPass(const std::string &pass_name) const { } std::unique_ptr BuildStrategy::Apply( - const ProgramDesc &main_program, const std::vector &places, + std::unique_ptr graph, + const std::vector &places, const std::string &loss_var_name, const std::vector &local_scopes, const size_t &nranks, #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) @@ -180,7 +186,6 @@ std::unique_ptr BuildStrategy::Apply( // Create a default one if not finalized by user. CreatePassesFromStrategy(false); - std::unique_ptr graph(new ir::Graph(main_program)); for (std::shared_ptr &pass : pass_builder_->AllPasses()) { if (IsMultiDevPass(pass->Type())) { pass->Erase(kPlaces); @@ -198,41 +203,12 @@ std::unique_ptr BuildStrategy::Apply( pass->Erase("nccl_ctxs"); pass->SetNotOwned("nccl_ctxs", nctx); #endif - } else if (pass->Type() == "memory_optimize_pass") { - if (graph->Has(kAllOpDescs)) { - graph->Erase(kAllOpDescs); - } - const std::vector *all_op_descs = - new std::vector(main_program.Block(0).AllOps()); - graph->Set>(kAllOpDescs, - all_op_descs); // take ownership - - pass->Erase(kAllOpDescs); - pass->SetNotOwned>(kAllOpDescs, all_op_descs); - } else if (pass->Type() == "sequential_execution_pass") { LOG(INFO) << "set enable_sequential_execution:" << enable_sequential_execution_; - - pass->Erase(kAllOpDescs); - pass->Set>( - kAllOpDescs, - new std::vector(main_program.Block(0).AllOps())); } else if (pass->Type() == "all_reduce_deps_pass") { LOG(INFO) << "SeqOnlyAllReduceOps:" << SeqOnlyAllReduceOps(*this) << ", num_trainers:" << num_trainers_; - - pass->Erase(kAllOpDescs); - pass->Set>( - kAllOpDescs, - new std::vector(main_program.Block(0).AllOps())); - } else if (pass->Type() == "inplace_pass") { - if (graph->Has(kAllOpDescs)) { - graph->Erase(kAllOpDescs); - } - graph->Set>( - kAllOpDescs, - new std::vector(main_program.Block(0).AllOps())); } else if (pass->Type() == "fuse_relu_depthwise_conv_pass") { if (!use_cuda) { LOG(WARNING) << "fuse_relu_depthwise_conv_pass is only supported on " @@ -240,7 +216,9 @@ std::unique_ptr BuildStrategy::Apply( continue; } } + VLOG(3) << "Start Apply Pass " << pass->Type(); graph = pass->Apply(std::move(graph)); + VLOG(3) << "Finish Apply Pass " << pass->Type(); } return graph; } diff --git a/paddle/fluid/framework/details/build_strategy.h b/paddle/fluid/framework/details/build_strategy.h index e62e3edcef710df739c53b5d848f5aceb4f2db4e..d755a2505aead37538bef2b01a193dba87dc1567 100644 --- a/paddle/fluid/framework/details/build_strategy.h +++ b/paddle/fluid/framework/details/build_strategy.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include @@ -76,11 +77,11 @@ struct BuildStrategy { bool fuse_relu_depthwise_conv_{false}; - bool memory_optimize_{false}; + bool memory_optimize_{true}; // TODO(dzhwinter): // make enable_inplace, memory_optimize_ // memory_early_delete_ true by default - bool enable_inplace_{false}; + bool enable_inplace_{true}; bool enable_sequential_execution_{false}; @@ -114,7 +115,7 @@ struct BuildStrategy { // Apply the passes built by the pass_builder_. The passes will be // applied to the Program and output an ir::Graph. - std::unique_ptr Apply(const ProgramDesc &main_program, + std::unique_ptr Apply(std::unique_ptr graph, const std::vector &places, const std::string &loss_var_name, const std::vector &local_scopes, diff --git a/paddle/fluid/framework/details/data_balance_op_handle.cc b/paddle/fluid/framework/details/data_balance_op_handle.cc index 48dcc52623369f7b0f51cd8c8aeb198b37467d5f..c9b52b68205ade000e21a3d06b80af86cbe01f34 100644 --- a/paddle/fluid/framework/details/data_balance_op_handle.cc +++ b/paddle/fluid/framework/details/data_balance_op_handle.cc @@ -86,7 +86,7 @@ std::vector> DataBalanceOpHandle::GetBalancePlan( } void DataBalanceOpHandle::RunImpl() { - PADDLE_ENFORCE_GT(places_.size(), 1, + PADDLE_ENFORCE_GT(places_.size(), 1UL, "Data balance can only be enabled when the number of " "places to run larger than 1."); auto in_var_handles = DynamicCast(this->Inputs()); diff --git a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc index 872bc5d654cd66db821e56031d878815b653645c..d4fbea9d95118666ababde811867e95c657c07de 100644 --- a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc @@ -12,7 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h" +#include #include +#include #include #include "paddle/fluid/framework/details/fetch_op_handle.h" #include "paddle/fluid/framework/details/multi_devices_helper.h" @@ -24,12 +26,11 @@ namespace details { FastThreadedSSAGraphExecutor::FastThreadedSSAGraphExecutor( const ExecutionStrategy &strategy, const std::vector &local_scopes, - const std::vector &places, - std::unique_ptr &&graph) + const std::vector &places, ir::Graph *graph) : strategy_(strategy), local_scopes_(local_scopes), places_(places), - graph_(std::move(graph)), + graph_(graph), pool_(strategy.num_threads_), prepare_pool_(1), // add one more thread for generate op_deps fetch_ctxs_(places) { @@ -56,7 +57,7 @@ FeedFetchList FastThreadedSSAGraphExecutor::Run( std::vector fetch_ops; for (auto &fetch_var_name : fetch_tensors) { - for (auto &var_map : graph_->Get("vars")) { + for (auto &var_map : graph_->Get(details::kGraphVars)) { auto it = var_map.find(fetch_var_name); if (it != var_map.end()) { fetched_vars[fetch_var_name].push_back(*it->second.rbegin()); @@ -110,14 +111,14 @@ FeedFetchList FastThreadedSSAGraphExecutor::Run( } } if (exception_.IsCaught()) { - ClearFetchOp(graph_.get(), &fetch_ops); + ClearFetchOp(graph_, &fetch_ops); exception_.ReThrow(); } } num_complete += num_comp; } // Wait FetchOps. - ClearFetchOp(graph_.get(), &fetch_ops); + ClearFetchOp(graph_, &fetch_ops); return fetches; } diff --git a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h index c3a8b85423403992e3a12ceb0a1acbae82d25dfa..970298950cc8089bc5861fcbf8dc2544934b181f 100644 --- a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h @@ -32,7 +32,7 @@ class FastThreadedSSAGraphExecutor : public SSAGraphExecutor { FastThreadedSSAGraphExecutor(const ExecutionStrategy &strategy, const std::vector &local_scopes, const std::vector &places, - std::unique_ptr &&graph); + ir::Graph *graph); FeedFetchList Run(const std::vector &fetch_tensors) override; const ir::Graph &Graph() const override; @@ -40,7 +40,7 @@ class FastThreadedSSAGraphExecutor : public SSAGraphExecutor { ExecutionStrategy strategy_; std::vector local_scopes_; std::vector places_; - std::unique_ptr graph_; + ir::Graph *graph_; std::unordered_map op_deps_; std::vector bootstrap_ops_; diff --git a/paddle/fluid/framework/details/fuse_vars_op_handle.cc b/paddle/fluid/framework/details/fuse_vars_op_handle.cc index d65b0920698748e8a2ded728d78fbcd69b7bae0e..14292c0a5d06aa3ff12b46b5768b136fa925752d 100644 --- a/paddle/fluid/framework/details/fuse_vars_op_handle.cc +++ b/paddle/fluid/framework/details/fuse_vars_op_handle.cc @@ -23,7 +23,7 @@ void FuseVarsOpHandle::RunImpl() { auto in_var_handles = DynamicCast(this->Inputs()); auto out_var_handles = DynamicCast(this->Outputs()); - PADDLE_ENFORCE_EQ(in_var_handles.size(), 0); + PADDLE_ENFORCE_EQ(in_var_handles.size(), 0UL); PADDLE_ENFORCE_EQ(out_var_handles.size() - 1, inputs_numel_.size(), ""); auto scope = local_scope_->FindVar(kLocalExecScopeName)->Get(); diff --git a/paddle/fluid/framework/details/fused_broadcast_op_handle.cc b/paddle/fluid/framework/details/fused_broadcast_op_handle.cc index 51dfa2d0711f49aaefab0af3549283dbf77eee4a..f48561ea32e6a3bbc7e9f2a8326b080ad21c6d61 100644 --- a/paddle/fluid/framework/details/fused_broadcast_op_handle.cc +++ b/paddle/fluid/framework/details/fused_broadcast_op_handle.cc @@ -22,7 +22,7 @@ namespace framework { namespace details { void FusedBroadcastOpHandle::RunImpl() { - platform::RecordEvent record_event(Name(), dev_ctxes_.begin()->second); + platform::RecordEvent record_event(Name()); if (places_.size() == 1UL) return; diff --git a/paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc b/paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc index be0d941c4f9c2fe8fbb1da8ec2c11868112fcf9b..6d53dac5c0a20b4340e71274a00a7f3c0cd08ff6 100644 --- a/paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc +++ b/paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc @@ -34,8 +34,8 @@ struct TestFusedBroadcastOpHandle : TestBroadcastOpHandle { ->Var(details::kLocalExecScopeName) ->GetMutable() = &local_scope; for (size_t j = 0; j < input_scope_idxes.size(); ++j) { - local_scope.Var("out_var" + j); - if (i == j) local_scope.Var("in_var" + j); + local_scope.Var("out_var" + std::to_string(j)); + if (i == j) local_scope.Var("in_var" + std::to_string(j)); } param_scopes_.emplace_back(&local_scope); } @@ -62,20 +62,21 @@ struct TestFusedBroadcastOpHandle : TestBroadcastOpHandle { for (size_t i = 0; i < input_scope_idxes.size(); ++i) { // add input var handle - nodes_.emplace_back( - ir::CreateNodeForTest("in_node" + i, ir::Node::Type::kVariable)); - VarHandle* in_var_handle = - new VarHandle(nodes_.back().get(), 1, input_scope_idxes[i], - "in_var" + i, place_list_[input_scope_idxes[i]]); + nodes_.emplace_back(ir::CreateNodeForTest("in_node" + std::to_string(i), + ir::Node::Type::kVariable)); + VarHandle* in_var_handle = new VarHandle( + nodes_.back().get(), 1, input_scope_idxes[i], + "in_var" + std::to_string(i), place_list_[input_scope_idxes[i]]); vars_.emplace_back(in_var_handle); op_handle_->AddInput(in_var_handle); // add output var handle for (size_t j = 0; j < place_list_.size(); ++j) { - nodes_.emplace_back( - ir::CreateNodeForTest("out_node" + i, ir::Node::Type::kVariable)); - VarHandle* out_var_handle = new VarHandle( - nodes_.back().get(), 2, j, "out_var" + i, place_list_[j]); + nodes_.emplace_back(ir::CreateNodeForTest( + "out_node" + std::to_string(i), ir::Node::Type::kVariable)); + VarHandle* out_var_handle = + new VarHandle(nodes_.back().get(), 2, j, + "out_var" + std::to_string(i), place_list_[j]); vars_.emplace_back(out_var_handle); op_handle_->AddOutput(out_var_handle); } @@ -86,7 +87,7 @@ struct TestFusedBroadcastOpHandle : TestBroadcastOpHandle { std::vector> send_vec; f::LoD lod{{0, 10, 20}}; for (size_t i = 0; i < input_scope_idxes.size(); ++i) { - const std::string varname("in_var" + i); + const std::string varname("in_var" + std::to_string(i)); float val_scalar = static_cast(i); send_vec.push_back( InitLoDTensor(varname, input_scope_idxes[i], lod, val_scalar)); @@ -96,7 +97,7 @@ struct TestFusedBroadcastOpHandle : TestBroadcastOpHandle { WaitAll(); for (size_t i = 0; i < input_scope_idxes.size(); ++i) { - const std::string& varname("out_var" + i); + const std::string& varname("out_var" + std::to_string(i)); for (size_t j = 0; j < place_list_.size(); ++j) { LoDTensorEqual(varname, send_vec[i], lod, param_scopes_[j]); } @@ -109,7 +110,7 @@ struct TestFusedBroadcastOpHandle : TestBroadcastOpHandle { 2, 4, 6, 3, 1, 1, 1, 1, 3, 7}; int height = static_cast(kDims[0] * 2); for (size_t i = 0; i < input_scope_idxes.size(); ++i) { - const std::string varname("in_var" + i); + const std::string varname("in_var" + std::to_string(i)); float val_scalar = static_cast(i); send_vector.push_back(InitSelectedRows(varname, input_scope_idxes[i], rows, height, val_scalar)); @@ -119,7 +120,7 @@ struct TestFusedBroadcastOpHandle : TestBroadcastOpHandle { WaitAll(); for (size_t i = 0; i < input_scope_idxes.size(); ++i) { - const std::string& varname("out_var" + i); + const std::string& varname("out_var" + std::to_string(i)); for (size_t j = 0; j < place_list_.size(); ++j) { SelectedRowsEqual(varname, input_scope_idxes[i], send_vector[i], rows, height); diff --git a/paddle/fluid/framework/details/inplace_op_pass.cc b/paddle/fluid/framework/details/inplace_op_pass.cc index b0c5968499be3a959dd6103424f25056a6dc2282..c91fc81b2defc9fe6b5720ce652a9aa94b27735e 100644 --- a/paddle/fluid/framework/details/inplace_op_pass.cc +++ b/paddle/fluid/framework/details/inplace_op_pass.cc @@ -49,7 +49,7 @@ DEFINE_bool( "If this option turns on, only these op in whitelist can be inplaced." "If it turns off, all of the running op can be candidate of inplaced op." "Such as scale, elementwise_add" - "By default, it's turned on"); + "By default, it's turned off"); DECLARE_string(memory_optimize_debug); diff --git a/paddle/fluid/framework/details/memory_optimize_helper.cc b/paddle/fluid/framework/details/memory_optimize_helper.cc index 6345ba335997ec42ebc63f90e9bf6a3ed2648edc..c89a33fc959247afb74dab49056fc3fca8b9bd89 100644 --- a/paddle/fluid/framework/details/memory_optimize_helper.cc +++ b/paddle/fluid/framework/details/memory_optimize_helper.cc @@ -13,13 +13,22 @@ // limitations under the License. #include "paddle/fluid/framework/details/memory_optimize_helper.h" +#include #include #include -#include +#include #include #include #include +#include +#include +#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/var_desc.h" +#include "paddle/fluid/platform/cpu_info.h" + +#ifdef PADDLE_WITH_CUDA +#include "paddle/fluid/platform/gpu_info.h" +#endif // PADDLE_WITH_CUDA namespace paddle { namespace framework { @@ -27,10 +36,10 @@ namespace details { using paddle::framework::VarDesc; std::vector SortOpLikeDescOrder(const ir::Graph& graph) { - PADDLE_ENFORCE(graph.Has(kAllOpDescs), - "Graph has no attribute of kAllOpDescs."); + PADDLE_ENFORCE(graph.Has(kStaleProgramOpDescs), + "Graph has no attribute of kStaleProgramOpDescs."); // 1. get op desc order - auto& op_descs = graph.Get>(kAllOpDescs); + auto& op_descs = graph.Get>(kStaleProgramOpDescs); // 2. topology sort order auto nodes = graph.Nodes(); @@ -123,7 +132,13 @@ size_t NodeSize(const VarDesc& node) { } size_t NodeSize(ir::Node* n) { - auto* desc = FindVarDescInBlock(n); + VarDesc* desc = nullptr; + // some op do not have block pointer + if (n->inputs[0]->Op() != nullptr) { + desc = FindVarDescInBlock(n); + } else { + desc = n->Var(); + } return NodeSize(*desc); } @@ -166,6 +181,11 @@ struct NodeComparator { bool operator()(ir::Node* lhs, ir::Node* rhs) const { auto* lhs_desc = FindVarDescInBlock(lhs); auto* rhs_desc = FindVarDescInBlock(rhs); + // match data type + if (lhs_desc->GetDataType() != rhs_desc->GetDataType()) { + return false; + } + // match shape auto lhs_shape = lhs_desc->GetShape(); auto rhs_shape = rhs_desc->GetShape(); if ((lhs_shape[0] == -1 && rhs_shape[0] == -1) || @@ -230,6 +250,27 @@ ir::Node* OrderedSet::FindBestFitNode(ir::Node* var) const { return found_node; } +ir::Node* OrderedSet::FindNextBestFitNode(ir::Node* var, ir::Node* prev) const { + ir::Node* found_node = nullptr; + NodeComparator functor; + auto it = + std::find_if(nodes_.begin(), nodes_.end(), [&](const NodeVector& v) { + if (v.front() == prev) + return true; + else + return false; + }); + PADDLE_ENFORCE(it != nodes_.end(), "Not found previous in node list!"); + for (it = std::next(it); it != nodes_.end(); ++it) { + auto& candidate = it->front(); + if (functor(var, candidate)) { + found_node = candidate; + break; + } + } + return found_node; +} + bool OrderedSet::Has(ir::Node* var) const { if (mark_table_.count(var->Name())) { auto& node_in_samename = mark_table_.at(var->Name()); @@ -241,10 +282,15 @@ bool OrderedSet::Has(ir::Node* var) const { return false; } +void OrderedSet::Erase(const std::string& var) { + PADDLE_ENFORCE(mark_table_.count(var)); + nodes_.erase(mark_table_[var]); + mark_table_.erase(var); +} + void OrderedSet::Erase(ir::Node* var) { - PADDLE_ENFORCE(mark_table_.count(var->Name())); - nodes_.erase(mark_table_[var->Name()]); - mark_table_.erase(var->Name()); + PADDLE_ENFORCE(var != nullptr); + Erase(var->Name()); } std::string OrderedSet::ToString() const { @@ -259,7 +305,10 @@ std::string OrderedSet::ToString() const { bool NodeCanReused(ir::Node* node) { // valid the node is a var node - if (node == nullptr || !node->IsVar() || node->IsCtrlVar()) return false; + // vars can be @EMPTY@, @LR_DECAY_REUSE_ID@. For example, while_grad + if (node == nullptr || !node->IsVar() || node->IsCtrlVar() || + node->Name() == kEmptyVarName) + return false; bool flag = true; // op output force generated in cpu, can not be reused. @@ -274,20 +323,37 @@ bool NodeCanReused(ir::Node* node) { return flag; } +int MinChunkSize() { + int size{0}; +#ifdef PADDLE_WITH_CUDA + size = platform::GpuMinChunkSize(); +#else + size = platform::CpuMinChunkSize(); +#endif // PADDLE_WITH_CUDA + return size; +} + bool NodeCanReused(const VarDesc& node) { auto type = node.GetType(); + // only these types holds bulk of gpu memory if (!(type == proto::VarType::LOD_TENSOR || type == proto::VarType::SELECTED_ROWS || type == proto::VarType::LOD_TENSOR_ARRAY)) { return false; } - if (node.Persistable() || node.GetShape().empty()) { + // persistable variable is parameter + if (node.Persistable()) { return false; } - // vars can be @EMPTY@, @LR_DECAY_REUSE_ID@. For example, while_grad - std::string name = node.Name(); - if (!name.empty() && name[0] == '@' && name[name.size() - 1] == '@') + // shape < min_chunk_size is meaningless. + // further more, fetched loss always has size = 1 + // which should not be reused. + auto shape = node.GetShape(); + int size = std::abs( + std::accumulate(shape.begin(), shape.end(), 1, std::multiplies())); + if (shape.empty() || size < MinChunkSize()) { return false; + } return true; } @@ -397,11 +463,21 @@ void ControlFlowGraph::LiveVariableAnalysis() { } } } + + for (auto* op : ops_) { + unlived_vars_[op] = std::set(); + for (auto& var : this->LiveIn(op)) { + if (!this->LiveOut(op).count(var)) { + unlived_vars_[op].insert(var); + } + } + } } void ControlFlowGraph::RenameVarInCFGGraph(const std::string& old_node, const std::string& new_node, int begin_idx) { + std::vector need_update(ops_.size(), false); // update graph from begin idx to the end for (size_t i = begin_idx; i != ops_.size(); ++i) { auto* op = ops_[i]; @@ -416,15 +492,27 @@ void ControlFlowGraph::RenameVarInCFGGraph(const std::string& old_node, if (live_in_[op].find(old_node) != live_in_[op].end()) { live_in_[op].erase(old_node); live_in_[op].insert(new_node); + need_update[i] = true; } if (live_out_[op].find(old_node) != live_out_[op].end()) { live_out_[op].erase(old_node); live_out_[op].insert(new_node); + need_update[i] = true; + } + } + + for (size_t i = begin_idx; i < ops_.size(); ++i) { + if (!need_update[i]) continue; + auto* op = ops_[i]; + for (auto& var : this->LiveIn(op)) { + if (!this->LiveOut(op).count(var)) { + unlived_vars_[op].insert(var); + } } } } -const std::set ControlFlowGraph::LiveIn(ir::Node* op) const { +const std::set& ControlFlowGraph::LiveIn(ir::Node* op) const { auto it = live_in_.find(op); PADDLE_ENFORCE( it != live_in_.end(), @@ -432,7 +520,7 @@ const std::set ControlFlowGraph::LiveIn(ir::Node* op) const { return it->second; } -const std::set ControlFlowGraph::LiveOut(ir::Node* op) const { +const std::set& ControlFlowGraph::LiveOut(ir::Node* op) const { auto it = live_out_.find(op); PADDLE_ENFORCE( it != live_out_.end(), @@ -440,15 +528,24 @@ const std::set ControlFlowGraph::LiveOut(ir::Node* op) const { return it->second; } -const std::set ControlFlowGraph::Use(ir::Node* op) const { +const std::set& ControlFlowGraph::Use(ir::Node* op) const { auto it = uses_.find(op); PADDLE_ENFORCE( it != uses_.end(), - string::Sprintf("Expect %s in live_out, but Not Found.", op->Name())); + string::Sprintf("Expect %s in use, but Not Found.", op->Name())); + return it->second; +} + +const std::set& ControlFlowGraph::Unlived(ir::Node* op) const { + auto it = unlived_vars_.find(op); + PADDLE_ENFORCE( + it != unlived_vars_.end(), + string::Sprintf("Expect %s in unlived_set, but Not Found.", op->Name())); + return it->second; return it->second; } -const std::vector ControlFlowGraph::Ops() const { return ops_; } +const std::vector& ControlFlowGraph::Ops() const { return ops_; } std::vector& ControlFlowGraph::Ops() { return ops_; } @@ -461,7 +558,9 @@ ir::Node* ControlFlowGraph::GetNodeByName(const std::string& name, for (auto* node : ops_) { if (node == op) break; for (auto& output : node->outputs) { - if (output->Name() == name) { + PADDLE_ENFORCE((output != nullptr && output->IsVar()), + "Output is empty!"); + if (output->Var() && output->Name() == name) { found_node = output; } } diff --git a/paddle/fluid/framework/details/memory_optimize_helper.h b/paddle/fluid/framework/details/memory_optimize_helper.h index 0bfaf827fea84030de48a9984197f5b39f5c9261..b5348cc66eaa446719b299b63caa340eab3e2ab9 100644 --- a/paddle/fluid/framework/details/memory_optimize_helper.h +++ b/paddle/fluid/framework/details/memory_optimize_helper.h @@ -29,8 +29,6 @@ namespace paddle { namespace framework { namespace details { -constexpr char kAllOpDescs[] = "all_op_descs"; - std::vector SortOpLikeDescOrder(const ir::Graph& graph); // NOTE(dzh): A ordered set for node reuse in memory optimize. @@ -55,6 +53,7 @@ class OrderedSet { void Insert(ir::Node* var); void Erase(ir::Node* var); + void Erase(const std::string& var); bool Has(ir::Node* var) const; void Clear() { mark_table_.clear(); @@ -62,6 +61,7 @@ class OrderedSet { } // find the bestfit shape node block with var. ir::Node* FindBestFitNode(ir::Node* var) const; + ir::Node* FindNextBestFitNode(ir::Node* var, ir::Node* prev) const; // map store non-const iterator, can not promise const int GetNodeIndexInPool(ir::Node* var); // pool all node to string @@ -92,10 +92,11 @@ class ControlFlowGraph { void RenameVarInCFGGraph(const std::string& old_node, const std::string& new_node, int begin_idx); - const std::set LiveIn(ir::Node* op) const; - const std::set LiveOut(ir::Node* op) const; - const std::set Use(ir::Node* op) const; - const std::vector Ops() const; + const std::set& LiveIn(ir::Node* op) const; + const std::set& LiveOut(ir::Node* op) const; + const std::set& Use(ir::Node* op) const; + const std::set& Unlived(ir::Node* op) const; + const std::vector& Ops() const; std::vector& Ops(); // for ssa-graph nodes @@ -117,6 +118,7 @@ class ControlFlowGraph { VarSetMap live_out_; VarSetMap uses_; // op inputs VarSetMap defs_; // op outputs + std::unordered_map> unlived_vars_; std::vector ops_; // op sequence by topology sort }; diff --git a/paddle/fluid/framework/details/memory_optimize_helper_test.cc b/paddle/fluid/framework/details/memory_optimize_helper_test.cc index 5c13dda9e5491044d2bcbed4b24d438cc8b8a413..5389e76e0c65c7c0ee23004ca1b0a56efb4c54fe 100644 --- a/paddle/fluid/framework/details/memory_optimize_helper_test.cc +++ b/paddle/fluid/framework/details/memory_optimize_helper_test.cc @@ -107,6 +107,52 @@ TEST(OrderedSet, Normal) { ASSERT_EQ(pool.GetNodeIndexInPool(cache), 5); // match 4:[5,2] } } + +TEST(OrderedSet, FindBestFitNode) { + OrderedSet pool; + std::vector> nodes; + ProgramDesc prog; + BlockDesc* block_desc = prog.MutableBlock(0); + auto* op_desc = block_desc->AppendOp(); + op_desc->SetType("dummy"); + std::unique_ptr op = ir::CreateNodeForTest(op_desc); + + { + auto desc = block_desc->Var("a"); + desc->SetShape({128, 128}); + std::unique_ptr node = ir::CreateNodeForTest(desc); + node->inputs.emplace_back(op.get()); + nodes.emplace_back(std::move(node)); + } + { + auto desc = block_desc->Var("b"); + desc->SetShape({128, 129}); + std::unique_ptr node = ir::CreateNodeForTest(desc); + node->inputs.emplace_back(op.get()); + nodes.emplace_back(std::move(node)); + } + { + auto desc = block_desc->Var("c"); + desc->SetShape({128, 128}); + std::unique_ptr node = ir::CreateNodeForTest(desc); + node->inputs.emplace_back(op.get()); + nodes.emplace_back(std::move(node)); + } + + for (auto& node : nodes) { + pool.Insert(node.get()); + } + + // FindNextBestFitNode + auto* n = nodes[0].get(); + auto* cache = pool.FindBestFitNode(n); + PADDLE_ENFORCE(cache->Name() == "a"); + cache = pool.FindNextBestFitNode(n, cache); + PADDLE_ENFORCE(cache->Name() == "c"); + cache = pool.FindNextBestFitNode(n, cache); + PADDLE_ENFORCE(cache->Name() == "b"); +} + } // namespace details } // namespace framework } // namespace paddle @@ -182,9 +228,6 @@ TEST(CFGGraph, IRGraph) { // prepare ir graph auto prog = FillProgramDesc(); ir::Graph graph(prog); - const std::vector* all_op_descs = - new std::vector(prog.Block(0).AllOps()); - graph.Set(details::kAllOpDescs, all_op_descs); // take ownership ControlFlowGraph cfg(graph); cfg.LiveVariableAnalysis(); @@ -210,9 +253,6 @@ TEST(CFGGraph, IRGraph) { TEST(SortOpLikeDescOrder, NormalTest) { auto prog = FillProgramDesc(); ir::Graph graph(prog); - const std::vector* all_op_descs = - new std::vector(prog.Block(0).AllOps()); - graph.Set(details::kAllOpDescs, all_op_descs); // take ownership auto nodes = SortOpLikeDescOrder(graph); auto op_descs = prog.Block(0).AllOps(); @@ -227,9 +267,6 @@ TEST(SortOpLikeDescOrder, NormalTest) { TEST(SortOpLikeDescOrder, RemoveOpDesc) { auto prog = FillProgramDesc(); ir::Graph graph(prog); - const std::vector* all_op_descs = - new std::vector(prog.Block(0).AllOps()); - graph.Set(details::kAllOpDescs, all_op_descs); // take ownership auto nodes = graph.Nodes(); auto op_descs = prog.Block(0).AllOps(); ir::Node* found_node = nullptr; @@ -278,8 +315,6 @@ TEST(SortOpLikeDescOrder, RemoveOpDesc) { // 3. add some op_desc TEST(SortOpLikeDescOrder, AddOpDesc) { auto prog = FillProgramDesc(); - const std::vector* all_op_descs = - new std::vector(prog.Block(0).AllOps()); ir::Graph graph(prog); auto find_node_in_graph = [&](std::string s) { @@ -296,9 +331,7 @@ TEST(SortOpLikeDescOrder, AddOpDesc) { // cached desc different with real one // mimic the intermidiete pass modify the programdesc. - graph.Set(details::kAllOpDescs, all_op_descs); // take ownership - - auto op_descs = prog.Block(0).AllOps(); + std::vector op_descs = graph.OriginProgram().Block(0).AllOps(); auto op = prog.MutableBlock(0)->AppendOp(); prog.MutableBlock(0)->Var("d1")->SetType(proto::VarType::LOD_TENSOR); @@ -330,9 +363,6 @@ TEST(SortOpLikeDescOrder, AddOpDesc) { TEST(SortOpLikeDescOrder, AddAndDeleteOpDesc) { auto prog = FillProgramDesc(); ir::Graph graph(prog); - const std::vector* all_op_descs = - new std::vector(prog.Block(0).AllOps()); - graph.Set(details::kAllOpDescs, all_op_descs); // take ownership auto find_node_in_graph = [&](std::string s) { ir::Node* ret = nullptr; @@ -346,8 +376,9 @@ TEST(SortOpLikeDescOrder, AddAndDeleteOpDesc) { return ret; }; + std::vector op_descs = graph.OriginProgram().Block(0).AllOps(); + // remove sum node - auto op_descs = prog.Block(0).AllOps(); ir::Node* found_node = nullptr; auto nodes = graph.Nodes(); for (auto node : nodes) { @@ -408,9 +439,7 @@ TEST(SortOpLikeDescOrder, AddAndDeleteOpDesc) { TEST(SortOpLikeDescOrder, AddAndReplaceOpDescInplace) { auto prog = FillProgramDesc(); ir::Graph graph(prog); - const std::vector* all_op_descs = - new std::vector(prog.Block(0).AllOps()); - graph.Set(details::kAllOpDescs, all_op_descs); // take ownership + std::vector op_descs = graph.OriginProgram().Block(0).AllOps(); auto find_node_in_graph = [&](std::string s) { ir::Node* ret = nullptr; @@ -424,7 +453,6 @@ TEST(SortOpLikeDescOrder, AddAndReplaceOpDescInplace) { return ret; }; - auto op_descs = prog.Block(0).AllOps(); // add node auto op = prog.MutableBlock(0)->AppendOp(); prog.MutableBlock(0)->Var("d1")->SetType(proto::VarType::LOD_TENSOR); diff --git a/paddle/fluid/framework/details/memory_optimize_pass.cc b/paddle/fluid/framework/details/memory_optimize_pass.cc index 41e4a834df0abab069ab1f6cd21c8479b911250c..e7284ea64438557161a0c97a6a7f45fb9bb245ca 100644 --- a/paddle/fluid/framework/details/memory_optimize_pass.cc +++ b/paddle/fluid/framework/details/memory_optimize_pass.cc @@ -69,58 +69,60 @@ std::unique_ptr MemoryOptimizePass::ApplyImpl( } for (auto& var : op->outputs) { - if (!NodeCanReused(var) || cfg_->Use(op).count(var->Name()) == 0 || - skip_set_.count(var->Name())) + if (var->IsVar() && !var->IsCtrlVar() && skip_set_.count(var->Name())) { + VLOG(3) << "Skip set contains variable of " << var->Name() + << "disable reuse on it. skipped"; continue; - ir::Node* cache = pool_.FindBestFitNode(var); - - if (var->Name() == FLAGS_memory_optimize_debug) { - VLOG(3) << "start match var " << DebugString(var) << " of op " - << op->Name(); - VLOG(3) << pool_.ToString(); - VLOG(3) << "matched in pool : " - << ((cache == nullptr) ? "False" : "True"); } + if (NodeCanReused(var) && cfg_->Use(op).count(var->Name()) == 0) { + ir::Node* cache = pool_.FindBestFitNode(var); + while (cache != nullptr && var->Name() == cache->Name()) { + VLOG(3) << "The same cache variable is cascade reused. " + << cache->Name() << " is re-filled to the pool after " + << "the reused op is finished. Current op can not " + << "replace it again. Skip this candidate."; + cache = pool_.FindNextBestFitNode(var, cache); + } + if (var->Name() == FLAGS_memory_optimize_debug) { + VLOG(3) << "start match var " << DebugString(var) << " of op " + << op->Name(); + VLOG(3) << pool_.ToString(); + VLOG(3) << "matched in pool : " + << ((cache == nullptr) ? "False" : "True"); + } - if (cache == nullptr) continue; - if (var->Name() == cache->Name()) { - VLOG(3) << "The same cache variable is cascade reused." << var->Name() - << " is re-filled to the pool after" - << "the reused op is finished. Current op can not " - << "replace it again. Skip this candidate."; - continue; - - int node_idx_in_pool = pool_.GetNodeIndexInPool(cache); - VLOG(3) << string::Sprintf( - "!!! %s, %s => %s, cache idx %d, pool size %d", - std::to_string(reuse_id++), DebugString(var), DebugString(cache), - node_idx_in_pool, static_cast(pool_.size())); - - // update CFG Graph on the fly. - // reused var maybe re-fill into the pool - cfg_->RenameVarInCFGGraph(var->Name(), cache->Name(), idx); - // NOTE(dzhwinter): we need to both update the ProgramDesc - // and IR Graph. because op_desc/var_desc is used in CreateOp, - // CreateVar when running happens. But IR Graph - // define the dependence relationship between nodes. - RenameVarInGraphDesc(var->Name(), cache->Name(), idx); - RenameVarInGraphNode(var->Name(), cache->Name(), idx, graph.get()); - - pool_.Erase(cache); - } + if (cache != nullptr) { + int node_idx_in_pool = pool_.GetNodeIndexInPool(cache); + VLOG(3) << string::Sprintf( + "!!! %s, %s => %s, cache idx %d, pool size %d", + std::to_string(reuse_id++), DebugString(var), DebugString(cache), + node_idx_in_pool, static_cast(pool_.size())); + // NOTE(dzhwinter): update the ProgramDesc/IR Graph + // and the CFG Graph on the fly. + // + // IR Graph define the dependence relationship between nodes. + // + // ProgramDesc defines the input/output vars. Its used in + // CreateOp, CreateVar when running happens. + // + // CFG Graph store the liveness information, when reuse happens + // we also need to update the variable liveness. + const std::string var_name = var->Name(); + const std::string cache_name = cache->Name(); - // fill the pool - std::unordered_set unlived_vars; - for (auto var : cfg_->LiveIn(op)) { - if (cfg_->LiveOut(op).count(var) == 0) { - unlived_vars.emplace(var); + cfg_->RenameVarInCFGGraph(var_name, cache_name, idx); + RenameVarInGraphDesc(var_name, cache_name, idx); + RenameVarInGraphNode(var_name, cache_name, idx, graph.get()); + pool_.Erase(cache_name); } } - for (auto var : unlived_vars) { - ir::Node* var_node = cfg_->GetNodeByName(var, op); - if (NodeCanReused(var_node) && !pool_.Has(var_node)) { - pool_.Insert(var_node); - } + } + // fill the pool + for (auto& var : cfg_->Unlived(op)) { + ir::Node* var_node = cfg_->GetNodeByName(var, op); + if (var_node == nullptr || var_node->IsCtrlVar()) continue; + if (NodeCanReused(var_node) && !pool_.Has(var_node)) { + pool_.Insert(var_node); } } } @@ -190,7 +192,8 @@ void MemoryOptimizePass::SubGraphOptimize(OpDesc* op_desc) const { // effect. Because it is a single op in graph. No need to // update the ir nodes. sub_op_desc->Rename(var->Name(), cache->Name()); - if (sub_op_desc->Block()->HasVar(var->Name())) { + if (sub_op_desc->Block() != nullptr && + sub_op_desc->Block()->HasVar(var->Name())) { sub_op_desc->Block()->RemoveVar(var->Name()); } } @@ -231,7 +234,13 @@ void MemoryOptimizePass::RenameVarInGraphDesc(const std::string& var, auto* op_desc = op->Op(); op_desc->RenameInput(var, cache_var); op_desc->RenameOutput(var, cache_var); - if (op_desc->Block()->HasVar(var)) op_desc->Block()->RemoveVar(var); + if (op_desc->Block() != nullptr) { + op_desc->Block()->RemoveVar(var); + } else { + LOG(WARNING) << "op " << op->Name() << " not know its block." + << "Is the op_desc created without block pointer? " + << "Can not find " << var << " in Block(0)"; + } op_desc->Flush(); } } @@ -273,8 +282,7 @@ void MemoryOptimizePass::RenameVarInGraphNode(const std::string& var, // redirect the input to the latest version of cache_var for (auto* node : op->inputs) { if (node->Name() == var) { - ir::Node* cache_node = graph->CreateVarNode(var_desc.get()); - var_nodes_[cache_var].emplace_back(cache_node); + ir::Node* cache_node = var_nodes_[cache_var].back(); // swap node to cache_node cache_node->outputs.insert(cache_node->outputs.end(), @@ -283,11 +291,15 @@ void MemoryOptimizePass::RenameVarInGraphNode(const std::string& var, auto* prev_op = node->inputs[0]; std::replace(prev_op->outputs.begin(), prev_op->outputs.end(), node, cache_node); - cache_node->inputs.emplace_back(prev_op); for (auto* next_op : node->outputs) { std::replace(next_op->inputs.begin(), next_op->inputs.end(), node, cache_node); } + + // erase unused node + auto& nodes = var_nodes_.at(var); + nodes.erase(std::remove(nodes.begin(), nodes.end(), node), nodes.end()); + graph->RemoveNode(node); } } @@ -307,15 +319,14 @@ void MemoryOptimizePass::RenameVarInGraphNode(const std::string& var, std::replace(next_op->inputs.begin(), next_op->inputs.end(), node, cache_node); } + + // erase unused node + auto& nodes = var_nodes_.at(var); + nodes.erase(std::remove(nodes.begin(), nodes.end(), node), nodes.end()); + graph->RemoveNode(node); } } } - - // release node of unused var in graph - for (auto* node : var_nodes_[var]) { - graph->RemoveNode(node); - } - var_nodes_.at(var).clear(); } } // namespace details @@ -324,4 +335,4 @@ void MemoryOptimizePass::RenameVarInGraphNode(const std::string& var, REGISTER_PASS(memory_optimize_pass, paddle::framework::details::MemoryOptimizePass) - .RequireGraphAttr(paddle::framework::details::kAllOpDescs); + .RequireGraphAttr(paddle::framework::details::kStaleProgramOpDescs); diff --git a/paddle/fluid/framework/details/multi_devices_graph_pass.cc b/paddle/fluid/framework/details/multi_devices_graph_pass.cc index 75f922d2cca6855a67be7284ae407e549a1a1afb..478d2ffbcf2988487893984284d4597f018f0ca0 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_pass.cc +++ b/paddle/fluid/framework/details/multi_devices_graph_pass.cc @@ -392,20 +392,32 @@ void MultiDevSSAGraphBuilderBase::CreateComputationalOp(ir::Graph *result, void MultiDevSSAGraphBuilderBase::CreateAllReduceOp( ir::Graph *result, const std::string &og) const { + OpHandleBase *op_handle = nullptr; + + auto append_allreduce_op = [&]( + const std::vector &scopes, + const std::vector &places) -> OpHandleBase * { #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) - result->Get(kGraphOps).emplace_back(new AllReduceOpHandle( - result->CreateEmptyNode("allreduce", ir::Node::Type::kOperation), - local_scopes_, places_, nccl_ctxs_)); + result->Get(kGraphOps).emplace_back(new AllReduceOpHandle( + result->CreateEmptyNode("allreduce", ir::Node::Type::kOperation), + scopes, places, nccl_ctxs_)); #else - result->Get(kGraphOps).emplace_back(new AllReduceOpHandle( - result->CreateEmptyNode("allreduce", ir::Node::Type::kOperation), - local_scopes_, places_)); + result->Get(kGraphOps).emplace_back(new AllReduceOpHandle( + result->CreateEmptyNode("allreduce", ir::Node::Type::kOperation), + scopes, places)); #endif - auto *op_handle = result->Get(kGraphOps).back(); + return result->Get(kGraphOps).back(); + }; + + if (!strategy_.enable_parallel_graph_) + op_handle = append_allreduce_op(local_scopes_, places_); for (size_t i = 0; i < places_.size(); ++i) { - auto &p = places_[i]; - SetCommunicationContext(op_handle, p); + if (strategy_.enable_parallel_graph_) { + op_handle = append_allreduce_op({local_scopes_[i]}, {places_[i]}); + } + + SetCommunicationContext(op_handle, places_[i]); auto &vars = result->Get(kGraphVars)[i][og]; PADDLE_ENFORCE(!vars.empty()); auto &prev_grad = vars.back(); @@ -413,7 +425,7 @@ void MultiDevSSAGraphBuilderBase::CreateAllReduceOp( auto var = new VarHandle(result->CreateEmptyNode(og, ir::Node::Type::kVariable), - vars.size(), i, og, p); + vars.size(), i, og, places_[i]); vars.emplace_back(var); op_handle->AddOutput(var); } @@ -925,9 +937,21 @@ void DistSSAGraphBuilder::InsertCollectiveOp(ir::Graph *result, } void DistSSAGraphBuilder::InsertPostprocessOps(ir::Graph *result) const { - if (need_broadcast_var_ || - (UseGPU() && - strategy_.reduce_ == BuildStrategy::ReduceStrategy::kReduce)) { + // broad cast received parameters when training in parameter server mode. + if (need_broadcast_var_) { + // There are 4 conditions: + // 1. GPU && Reduce: Reduce gradient then broadcast gradient to other GPUS. + // Need to broadcast received parameters to other GPU. + // 2. GPU && AllReduce: AllReduce all graident to each GPU. Need to + // broadcast received parameters to other GPU. + // 3. CPU && AllReduce: AllReduce all gradient to each thread. Need to + // broadcast received parameters to other scope. + // 4. CPU && Reduce: because all parameters share the same memory, did not + // broadcast received parameters. + if (!UseGPU() && + strategy_.reduce_ == BuildStrategy::ReduceStrategy::kReduce) { + return; + } if (strategy_.fuse_broadcast_op_) { CreateFusedBroadcastOp(result, bcast_var_name_set_); } else { diff --git a/paddle/fluid/framework/details/multi_devices_helper.h b/paddle/fluid/framework/details/multi_devices_helper.h index 1a2b75fbc0c28984ce5cf00e0a2ce0f804349bb1..9afbb91005c9c3a9d2e185f4dfa901ebf812ee19 100644 --- a/paddle/fluid/framework/details/multi_devices_helper.h +++ b/paddle/fluid/framework/details/multi_devices_helper.h @@ -36,13 +36,14 @@ namespace details { // map from variable name to variables. The variables, who have the same name, // will have a differsent version. The offset in the // `std::vector` is the version of varaibles. -typedef std::vector>> +typedef std::vector>> GraphVars; const char kGraphVars[] = "vars"; // aux variables to represent dependency. Useful to resolve data hazard. -typedef std::unordered_set GraphDepVars; +typedef std::unordered_set GraphDepVars; const char kGraphDepVars[] = "dep_vars"; + } // namespace details } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/details/op_handle_base.h b/paddle/fluid/framework/details/op_handle_base.h index b1a82e8771b92f2d0af4a1c7732ff2da54d496a8..e0aa352e95bc3685a1f4879bffa6e86eecd7e7f9 100644 --- a/paddle/fluid/framework/details/op_handle_base.h +++ b/paddle/fluid/framework/details/op_handle_base.h @@ -70,6 +70,9 @@ class OpHandleBase { auto it = dev_ctxes_.find(place); return it != dev_ctxes_.end() ? it->second : nullptr; } + const std::map &DeviceContext() { + return dev_ctxes_; + } void SetDeviceContext(platform::Place place, platform::DeviceContext *ctx_) { dev_ctxes_[place] = ctx_; diff --git a/paddle/fluid/framework/details/parallel_ssa_graph_executor.cc b/paddle/fluid/framework/details/parallel_ssa_graph_executor.cc index e8deb5bfc6c013addce11e2a04286a828acc1930..5b8ae8b6770df79df309bb6be16e4f2a24ee0460 100644 --- a/paddle/fluid/framework/details/parallel_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/parallel_ssa_graph_executor.cc @@ -13,22 +13,85 @@ // limitations under the License. #include "paddle/fluid/framework/details/parallel_ssa_graph_executor.h" +#include "paddle/fluid/framework/ir/graph_helper.h" namespace paddle { namespace framework { namespace details { +std::vector> +ParallelSSAGraphExecutor::SeparateMultiDevicesGraph(ir::Graph *graph) { + std::vector> graphs; + graphs.reserve(places_.size()); + for (size_t i = 0; i < places_.size(); ++i) { + ProgramDesc empty; + graphs.emplace_back(std::unique_ptr(new ir::Graph(empty))); + auto &g = graphs.back(); + g->Set(kGraphVars, new GraphVars(1UL)); + g->Set(kGraphDepVars, new GraphDepVars); + } + auto op_handles = ir::FilterByNodeWrapper(*graph); + + for (auto &op : op_handles) { + auto &dev_ctx = op->DeviceContext(); + auto &p = dev_ctx.begin()->first; + int dev_id = boost::get(p).device; + auto &dev_dummys = graphs[dev_id]->Get(kGraphDepVars); + graphs[dev_id]->AddNode(graph->RemoveNode(op->Node()).release()); + + for (auto &var : op->Inputs()) { + auto dummy_ptr = dynamic_cast(var); + if (dummy_ptr) { + dev_dummys.insert(var); + if (graph->Nodes().count(var->Node())) + graphs[dev_id]->AddNode(graph->RemoveNode(var->Node()).release()); + } + } + for (auto &var : op->Outputs()) { + auto dummy_ptr = dynamic_cast(var); + if (dummy_ptr) { + dev_dummys.insert(var); + if (graph->Nodes().count(var->Node())) + graphs[dev_id]->AddNode(graph->RemoveNode(var->Node()).release()); + } + } + } + + for (size_t dev_id = 0; dev_id < places_.size(); ++dev_id) { + auto &dev_vars = graphs[dev_id]->Get(kGraphVars)[0]; + auto &origin_vars = graph->Get(kGraphVars)[dev_id]; + for (auto &name_pair : origin_vars) { + dev_vars.emplace(name_pair.first, name_pair.second); + for (auto &version_pair : name_pair.second) { + if (graph->Nodes().count(version_pair->Node())) { + graphs[dev_id]->AddNode( + graph->RemoveNode(version_pair->Node()).release()); + } + } + } + } + + return graphs; +} + ParallelSSAGraphExecutor::ParallelSSAGraphExecutor( const ExecutionStrategy &strategy, const std::vector &local_scopes, - const std::vector &places, - std::vector> &&graphs) + const std::vector &places, ir::Graph *graph) : strategy_(std::move(strategy)), local_scopes_(std::move(local_scopes)), pool_(places.size() >= 2 ? new ::ThreadPool(places.size()) : nullptr), places_(std::move(places)), - graphs_(std::move(graphs)) { + // TODO(Yancey1989): Copying graphs is not safely since it deleted the + // attrs. + graphs_(SeparateMultiDevicesGraph(graph)) { PADDLE_ENFORCE_EQ(places_.size(), local_scopes_.size()); + auto seq_allreduce_pass = + ir::PassRegistry::Instance().Get("all_reduce_deps_pass"); + for (size_t i = 0; i < graphs_.size(); ++i) { + graphs_[i] = seq_allreduce_pass->Apply(std::move(graphs_[i])); + } + // set the correct size of thread pool to each device. strategy_.num_threads_ = strategy_.num_threads_ < places_.size() ? 1UL @@ -37,7 +100,7 @@ ParallelSSAGraphExecutor::ParallelSSAGraphExecutor( << " to run the operators of the graph on each device."; for (size_t i = 0; i < places.size(); ++i) { executors_.emplace_back(new details::ThreadedSSAGraphExecutor( - strategy_, {local_scopes_[i]}, {places_[i]}, std::move(graphs_[i]))); + strategy_, local_scopes_, {places_[i]}, graphs_.at(i).get())); } } diff --git a/paddle/fluid/framework/details/parallel_ssa_graph_executor.h b/paddle/fluid/framework/details/parallel_ssa_graph_executor.h index c00c5bc2d1b4b78593f99c819b5a3d642150e773..1e421f2a3a51363fe368859f7a34593c8c894077 100644 --- a/paddle/fluid/framework/details/parallel_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/parallel_ssa_graph_executor.h @@ -18,7 +18,9 @@ #include #include "ThreadPool.h" +#include "paddle/fluid/framework/details/multi_devices_helper.h" #include "paddle/fluid/framework/details/threaded_ssa_graph_executor.h" +#include "paddle/fluid/framework/ir/graph.h" namespace paddle { namespace framework { @@ -29,13 +31,17 @@ class ParallelSSAGraphExecutor : public SSAGraphExecutor { ParallelSSAGraphExecutor(const ExecutionStrategy &strategy, const std::vector &local_scopes, const std::vector &places, - std::vector> &&graphs); + ir::Graph *graph); ~ParallelSSAGraphExecutor() final = default; + const ir::Graph &Graph() const override { return *graphs_[0]; } FeedFetchList Run(const std::vector &fetch_tensors) override; private: + std::vector> SeparateMultiDevicesGraph( + ir::Graph *graph); + ExecutionStrategy strategy_; std::vector local_scopes_; std::unique_ptr<::ThreadPool> pool_{nullptr}; diff --git a/paddle/fluid/framework/details/reduce_op_handle.cc b/paddle/fluid/framework/details/reduce_op_handle.cc index ee4c8a6ecf77e5d0f23f38b763917d926afdb07a..4e2477c205db5966aa0b2d0c7a608be94a69eb82 100644 --- a/paddle/fluid/framework/details/reduce_op_handle.cc +++ b/paddle/fluid/framework/details/reduce_op_handle.cc @@ -139,7 +139,7 @@ void ReduceOpHandle::GatherSelectedRows( #endif void ReduceOpHandle::RunImpl() { - platform::RecordEvent record_event(Name(), dev_ctxes_.cbegin()->second); + platform::RecordEvent record_event(Name()); if (places_.size() == 1) return; // the input and output may have dummy var. @@ -153,7 +153,7 @@ void ReduceOpHandle::RunImpl() { { auto out_var_handles = DynamicCast(outputs_); - PADDLE_ENFORCE_EQ(out_var_handles.size(), 1, + PADDLE_ENFORCE_EQ(out_var_handles.size(), 1UL, "The number of output should be one."); out_var_handle = out_var_handles.front(); } diff --git a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc index 91e4f9adb418978c30f512abe6924c0ace182124..7b13112986f9ad85056a3e8a5a6ed99bd0be95d5 100644 --- a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc @@ -63,7 +63,7 @@ FeedFetchList ScopeBufferedSSAGraphExecutor::Run( eptr = std::current_exception(); } - platform::RecordEvent e("ScopeBufferedSSAGraphExecutorAfterRun", nullptr); + platform::RecordEvent e("ScopeBufferedSSAGraphExecutorAfterRun"); ++drop_scope_counter_; bool stream_end = false; diff --git a/paddle/fluid/framework/details/sequential_execution_pass.cc b/paddle/fluid/framework/details/sequential_execution_pass.cc index 879fb29d5926941e574d0080051c195293bc60a9..0b53a76e7877891509ea4d0334673ae2a1fcf949 100644 --- a/paddle/fluid/framework/details/sequential_execution_pass.cc +++ b/paddle/fluid/framework/details/sequential_execution_pass.cc @@ -40,7 +40,7 @@ std::unique_ptr SequentialExecutionPass::ApplyImpl( static std::unordered_set skip_dist_ops{ "send", "recv", "send_barrier", "fetch_barrier"}; - auto &ops = Get>(kAllOpDescs); + auto &ops = graph->Get>(kStaleProgramOpDescs); std::vector op_node_list; op_node_list.reserve(ops.size()); @@ -107,4 +107,4 @@ std::unique_ptr SequentialExecutionPass::ApplyImpl( REGISTER_PASS(sequential_execution_pass, paddle::framework::details::SequentialExecutionPass) - .RequirePassAttr(paddle::framework::details::kAllOpDescs); + .RequireGraphAttr(paddle::framework::details::kStaleProgramOpDescs); diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc index 677a2937945b03fa577317cb4f26e09354d06957..9ba295a2b06a5ee9c3069e95fa688595fe72d6fd 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc @@ -23,9 +23,8 @@ namespace framework { namespace details { ThreadedSSAGraphExecutor::ThreadedSSAGraphExecutor( const ExecutionStrategy &strategy, const std::vector &local_scopes, - const std::vector &places, - std::unique_ptr &&graph) - : graph_(std::move(graph)), + const std::vector &places, ir::Graph *graph) + : graph_(graph), pool_(strategy.num_threads_ >= 2 ? new ::ThreadPool(strategy.num_threads_) : nullptr), local_scopes_(local_scopes), @@ -37,7 +36,7 @@ ThreadedSSAGraphExecutor::ThreadedSSAGraphExecutor( FeedFetchList ThreadedSSAGraphExecutor::Run( const std::vector &fetch_tensors) { std::unique_ptr event( - new platform::RecordEvent("ThreadedSSAGraphExecutorPrepare", nullptr)); + new platform::RecordEvent("ThreadedSSAGraphExecutorPrepare")); std::unordered_map pending_ops; std::unordered_set pending_vars; auto ready_vars = std::make_shared>(); @@ -110,7 +109,7 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( for (auto &run_op_future : run_op_futures_) { run_op_future.wait(); } - ClearFetchOp(graph_.get(), &fetch_ops); + ClearFetchOp(graph_, &fetch_ops); exception_holder_.ReThrow(); } else { continue; @@ -135,7 +134,7 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( } PADDLE_ENFORCE(ready_ops.empty()); // Wait FetchOps. - ClearFetchOp(graph_.get(), &fetch_ops); + ClearFetchOp(graph_, &fetch_ops); return fetch_data; } @@ -219,7 +218,7 @@ void ThreadedSSAGraphExecutor::RunOp( VLOG(10) << op << " " << op->Name() << " Done "; running_ops_--; ready_var_q->Extend(op->Outputs()); - VLOG(10) << op << " " << op->Name() << "Signal posted"; + VLOG(10) << op << " " << op->Name() << " Signal posted"; } catch (...) { exception_holder_.Catch(std::current_exception()); } diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h index 24da56c09e3e0f3894d58e5af8838c98e3e1e67c..0867f6210480ec405e7cc4ea42c74b750133ea4e 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h @@ -41,7 +41,7 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor { ThreadedSSAGraphExecutor(const ExecutionStrategy &strategy, const std::vector &local_scopes, const std::vector &places, - std::unique_ptr &&graph); + ir::Graph *graph); const ir::Graph &Graph() const override { return *graph_; } // Run a SSAGraph by a thread pool @@ -55,7 +55,7 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor { details::OpHandleBase *op); private: - std::unique_ptr graph_; + ir::Graph *graph_; std::unique_ptr<::ThreadPool> pool_; std::vector local_scopes_; std::vector places_; diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc index 4323883fa5cc9b26a68c2980f3b7a49eca610543..c31d0beec306fe165164837cd15c95b4efd76af0 100644 --- a/paddle/fluid/framework/executor.cc +++ b/paddle/fluid/framework/executor.cc @@ -20,6 +20,7 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/reader.h" +#include "paddle/fluid/framework/threadpool.h" #include "paddle/fluid/framework/transfer_scope_cache.h" #include "paddle/fluid/framework/variable_helper.h" #include "paddle/fluid/operators/distributed/distributed.h" diff --git a/paddle/fluid/framework/feed_fetch_method.cc b/paddle/fluid/framework/feed_fetch_method.cc index 6338be75a4b1d3c4caf7a6f7add4d05fec690340..96530b2a3f9cfd9462627a42b2bb0fea98758f92 100644 --- a/paddle/fluid/framework/feed_fetch_method.cc +++ b/paddle/fluid/framework/feed_fetch_method.cc @@ -44,6 +44,7 @@ LoDTensor& GetFetchVariable(const Scope& scope, const std::string& var_name, // Since we want to fetch LodTensor from a variable, the variable must // be created alreadly. Variable* g_fetch_value = scope.FindVar(var_name); + PADDLE_ENFORCE_NOT_NULL(g_fetch_value, "%s is not found.", var_name); PADDLE_ENFORCE(g_fetch_value->IsType(), "Only %s can be invoked by GetFetchVariable", typeid(FeedFetchList).name()); diff --git a/paddle/fluid/framework/inplace_op_inference_test.cc b/paddle/fluid/framework/inplace_op_inference_test.cc index 3e4d715c6f089496d1b1f7906e3f10147a073622..bf9d1dcd380cdff886301faf13b0015fd5a2ed5c 100644 --- a/paddle/fluid/framework/inplace_op_inference_test.cc +++ b/paddle/fluid/framework/inplace_op_inference_test.cc @@ -179,11 +179,11 @@ TEST(InferInplace, SingleOpInplaceInToOut) { op->SetOutput("Out", {"test2_out"}); prog.MutableBlock(0)->Var("test2_a")->SetType(proto::VarType::LOD_TENSOR); - prog.MutableBlock(0)->Var("test2_a")->SetShape({32, 64}); + prog.MutableBlock(0)->Var("test2_a")->SetShape({32, 64, 128, 128}); prog.MutableBlock(0)->Var("test2_b")->SetType(proto::VarType::LOD_TENSOR); prog.MutableBlock(0)->Var("test2_c")->SetType(proto::VarType::LOD_TENSOR); prog.MutableBlock(0)->Var("test2_out"); - prog.MutableBlock(0)->Var("test2_out")->SetShape({32, 16}); + prog.MutableBlock(0)->Var("test2_out")->SetShape({32, 16, 128, 128}); auto& infer_inplace = OpInfoMap::Instance().Get(op->Type()).infer_inplace_; auto in_to_outs = infer_inplace(*op, op->Block()); @@ -201,11 +201,11 @@ TEST(InferInplace, SingleGradOpInplaceInToOut) { op->SetOutput(GradVarName("X"), {"test2_a", "test2_b", "test2_c"}); prog.MutableBlock(0)->Var("test2_a")->SetType(proto::VarType::LOD_TENSOR); - prog.MutableBlock(0)->Var("test2_a")->SetShape({32, 16}); + prog.MutableBlock(0)->Var("test2_a")->SetShape({32, 16, 1024, 1024}); prog.MutableBlock(0)->Var("test2_b")->SetType(proto::VarType::LOD_TENSOR); prog.MutableBlock(0)->Var("test2_c")->SetType(proto::VarType::LOD_TENSOR); prog.MutableBlock(0)->Var("test2_out"); - prog.MutableBlock(0)->Var("test2_out")->SetShape({32, 16}); + prog.MutableBlock(0)->Var("test2_out")->SetShape({32, 16, 1024, 1024}); auto& infer_inplace = OpInfoMap::Instance().Get(op->Type()).infer_inplace_; auto in_to_outs = infer_inplace(*op, op->Block()); @@ -233,12 +233,12 @@ TEST(InferInplace, MultiOutInplaceInToOut) { prog.MutableBlock(0)->Var("o0"); prog.MutableBlock(0)->Var("y0"); prog.MutableBlock(0)->Var("z0"); - prog.MutableBlock(0)->Var("a0")->SetShape({32, 16}); - prog.MutableBlock(0)->Var("b0")->SetShape({32, 16}); - prog.MutableBlock(0)->Var("c0")->SetShape({32, 16}); - prog.MutableBlock(0)->Var("o0")->SetShape({32, 16}); - prog.MutableBlock(0)->Var("y0")->SetShape({32, 16}); - prog.MutableBlock(0)->Var("z0")->SetShape({32, 16}); + prog.MutableBlock(0)->Var("a0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("b0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("c0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("o0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("y0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("z0")->SetShape({32, 16, 1024, 1024}); auto& infer_inplace = OpInfoMap::Instance().Get(op->Type()).infer_inplace_; auto in_to_outs = infer_inplace(*op, op->Block()); @@ -267,12 +267,12 @@ TEST(InferInplace, MultiGradInplaceInToOut) { prog.MutableBlock(0)->Var("o0"); prog.MutableBlock(0)->Var("y0"); prog.MutableBlock(0)->Var("z0"); - prog.MutableBlock(0)->Var("a0")->SetShape({32, 16}); - prog.MutableBlock(0)->Var("b0")->SetShape({32, 16}); - prog.MutableBlock(0)->Var("c0")->SetShape({32, 16}); - prog.MutableBlock(0)->Var("o0")->SetShape({32, 16}); - prog.MutableBlock(0)->Var("y0")->SetShape({32, 16}); - prog.MutableBlock(0)->Var("z0")->SetShape({32, 16}); + prog.MutableBlock(0)->Var("a0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("b0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("c0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("o0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("y0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("z0")->SetShape({32, 16, 1024, 1024}); auto& infer_inplace = OpInfoMap::Instance().Get(op->Type()).infer_inplace_; auto in_to_outs = infer_inplace(*op, op->Block()); diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index 07c2c970d4de3cecf03e4cf80e60e81e7a9595a8..ca6b0229e906c0f8bfbf9ee6781013cb4ef7bbce 100644 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -102,6 +102,8 @@ cc_test(test_seqpool_concat_fuse_pass SRCS seqpool_concat_fuse_pass_tester.cc DE cc_test(test_is_test_pass SRCS is_test_pass_tester.cc DEPS is_test_pass) if (WITH_MKLDNN) cc_test(test_depthwise_conv_mkldnn_pass SRCS mkldnn/depthwise_conv_mkldnn_pass_tester.cc DEPS depthwise_conv_mkldnn_pass) + cc_test(test_conv_bias_mkldnn_fuse_pass SRCS mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc DEPS conv_bias_mkldnn_fuse_pass naive_executor) cc_test(test_conv_relu_mkldnn_fuse_pass SRCS mkldnn/conv_relu_mkldnn_fuse_pass_tester.cc DEPS conv_relu_mkldnn_fuse_pass) cc_test(test_conv_elementwise_add_mkldnn_fuse_pass SRCS mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc DEPS conv_elementwise_add_mkldnn_fuse_pass) + cc_test(test_mkldnn_placement_pass SRCS mkldnn/mkldnn_placement_pass_tester.cc DEPS mkldnn_placement_pass) endif () diff --git a/paddle/fluid/framework/ir/attention_lstm_fuse_pass.h b/paddle/fluid/framework/ir/attention_lstm_fuse_pass.h index a756dfc1b98e1de55c809c73e2c4df1e628950ae..39b0585d3a6f9b52c9ec4b0a24f8532a3410851a 100644 --- a/paddle/fluid/framework/ir/attention_lstm_fuse_pass.h +++ b/paddle/fluid/framework/ir/attention_lstm_fuse_pass.h @@ -22,7 +22,8 @@ namespace ir { class AttentionLSTMFusePass : public FusePassBase { protected: - std::unique_ptr ApplyImpl(std::unique_ptr graph) const; + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; }; } // namespace ir diff --git a/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.h b/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.h index ad966e11e6222a4ed4c730089c454b0d1c7bd0b3..8c3c8b56c08cc09e66b20d17bf730edec0499f35 100644 --- a/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.h +++ b/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.h @@ -31,7 +31,8 @@ class ConvAffineChannelFusePass : public FusePassBase { virtual ~ConvAffineChannelFusePass() {} protected: - std::unique_ptr ApplyImpl(std::unique_ptr graph) const; + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; const std::string name_scope_{"conv_affine_channel_fuse"}; }; @@ -40,7 +41,8 @@ class ConvEltwiseAddAffineChannelFusePass : public FusePassBase { virtual ~ConvEltwiseAddAffineChannelFusePass() {} protected: - std::unique_ptr ApplyImpl(std::unique_ptr graph) const; + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; const std::string name_scope_{"conv_eltwiseadd_affine_channel_fuse"}; }; diff --git a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc index 846a14e365e6bd7f056d409130a3b246371931da..04765dd1440331fb37ed2eb05a9ce762eb2b81bc 100644 --- a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc @@ -169,7 +169,7 @@ std::unique_ptr ConvBNFusePass::ApplyImpl( if (has_bias && conv->Op()->Input("Bias").size() > 0) { // reuse existing conv bias node auto conv_bias_names = conv->Op()->Input("Bias"); - PADDLE_ENFORCE_EQ(conv_bias_names.size(), 1); + PADDLE_ENFORCE_EQ(conv_bias_names.size(), 1UL); auto* conv_bias_var = scope->FindVar(conv_bias_names[0]); auto* conv_bias_tensor = conv_bias_var->GetMutable(); PADDLE_ENFORCE_EQ(conv_bias_tensor->dims(), diff --git a/paddle/fluid/framework/ir/conv_bn_fuse_pass.h b/paddle/fluid/framework/ir/conv_bn_fuse_pass.h index 2c9eb574fe8e054e0ae221f08f664b91f05d95c9..cf425a2730904d4ab21c33e66b72db0692cb087c 100644 --- a/paddle/fluid/framework/ir/conv_bn_fuse_pass.h +++ b/paddle/fluid/framework/ir/conv_bn_fuse_pass.h @@ -31,7 +31,8 @@ class ConvBNFusePass : public FusePassBase { virtual ~ConvBNFusePass() {} protected: - std::unique_ptr ApplyImpl(std::unique_ptr graph) const; + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; const std::string name_scope_{"conv_bn_fuse"}; }; @@ -40,7 +41,8 @@ class ConvEltwiseAddBNFusePass : public FusePassBase { virtual ~ConvEltwiseAddBNFusePass() {} protected: - std::unique_ptr ApplyImpl(std::unique_ptr graph) const; + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; const std::string name_scope_{"conv_eltwiseadd_bn_fuse"}; }; diff --git a/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.h b/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.h index 3b40a5a92665c07bc2b66e6a96721f573d40393f..9259a4ac5c89b1a7d1413fb2eaaa5fc6a70348f2 100644 --- a/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.h +++ b/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.h @@ -25,7 +25,8 @@ class ConvElementwiseAdd2ActFusePass : public FusePassBase { virtual ~ConvElementwiseAdd2ActFusePass() {} protected: - std::unique_ptr ApplyImpl(std::unique_ptr graph) const; + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; }; } // namespace ir diff --git a/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.h b/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.h index ac69aa6458fc8c19b670dea2af1251c44dc353a8..9c0b50f155821cf2bd815a6fb087e3f6cc513641 100644 --- a/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.h +++ b/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.h @@ -25,7 +25,8 @@ class ConvElementwiseAddActFusePass : public FusePassBase { virtual ~ConvElementwiseAddActFusePass() {} protected: - std::unique_ptr ApplyImpl(std::unique_ptr graph) const; + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; }; } // namespace ir diff --git a/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.h b/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.h index f234603f5856a9238164f7fb0e5cc81ea9b7ed60..bf43bd5ce2602a3e240c56f00f66f13b79151002 100644 --- a/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.h +++ b/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.h @@ -25,7 +25,8 @@ class ConvElementwiseAddFusePass : public FusePassBase { virtual ~ConvElementwiseAddFusePass() {} protected: - std::unique_ptr ApplyImpl(std::unique_ptr graph) const; + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; }; } // namespace ir diff --git a/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.h b/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.h index e5ad3067ec4060e41f1464395f3fc76183de3e66..fde2a0a4eecdec9ad5ac58ad8e63c26cce482682 100644 --- a/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.h +++ b/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.h @@ -14,6 +14,8 @@ #pragma once +#include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" @@ -30,7 +32,8 @@ class EmbeddingFCLSTMFusePass : public FusePassBase { virtual ~EmbeddingFCLSTMFusePass() {} protected: - std::unique_ptr ApplyImpl(std::unique_ptr graph) const; + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; const std::string name_scope_{"embedding_fc_lstm_fuse"}; }; diff --git a/paddle/fluid/framework/ir/fc_fuse_pass.h b/paddle/fluid/framework/ir/fc_fuse_pass.h index 6c69539d1e48268afc2435f8f73b3818d13107cd..783a052edcf84c8c437a7b2e25f0d67c0366691e 100644 --- a/paddle/fluid/framework/ir/fc_fuse_pass.h +++ b/paddle/fluid/framework/ir/fc_fuse_pass.h @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#pragma once + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" @@ -29,7 +31,8 @@ class FCFusePass : public FusePassBase { virtual ~FCFusePass() {} protected: - std::unique_ptr ApplyImpl(std::unique_ptr graph) const; + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; }; } // namespace ir diff --git a/paddle/fluid/framework/ir/fc_gru_fuse_pass.h b/paddle/fluid/framework/ir/fc_gru_fuse_pass.h index 63e1c72bfb2e2641ae5d44858b342d5e427e9045..e359a3289440fffbec622488ecf3a7f49e986574 100644 --- a/paddle/fluid/framework/ir/fc_gru_fuse_pass.h +++ b/paddle/fluid/framework/ir/fc_gru_fuse_pass.h @@ -30,7 +30,8 @@ class FCGRUFusePass : public FusePassBase { virtual ~FCGRUFusePass() {} protected: - std::unique_ptr ApplyImpl(std::unique_ptr graph) const; + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; const std::string name_scope_{"fc_gru_fuse"}; }; @@ -41,7 +42,8 @@ class MulGRUFusePass : public FusePassBase { virtual ~MulGRUFusePass() {} protected: - std::unique_ptr ApplyImpl(std::unique_ptr graph) const; + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; const std::string name_scope_{"fc_nobias_gru_fuse"}; }; diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.h b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.h index 3ee32c63a46fcc34bdccd1e14d4bbaf9668c49e9..21482615a6efef930b7328594477a51f4aaf28e7 100644 --- a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.h +++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.h @@ -14,6 +14,8 @@ #pragma once +#include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" @@ -30,7 +32,8 @@ class FCLstmFusePass : public FusePassBase { virtual ~FCLstmFusePass() {} protected: - std::unique_ptr ApplyImpl(std::unique_ptr graph) const; + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; const std::string name_scope_{"fc_lstm_fuse"}; }; @@ -40,7 +43,8 @@ class MulLstmFusePass : public FusePassBase { virtual ~MulLstmFusePass() {} protected: - std::unique_ptr ApplyImpl(std::unique_ptr graph) const; + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; const std::string name_scope_{"fc_nobias_lstm_fuse"}; }; diff --git a/paddle/fluid/framework/ir/fuse_elewise_add_act_pass.h b/paddle/fluid/framework/ir/fuse_elewise_add_act_pass.h index b2fecc076efca333539fe81e67eee222873aee2a..0fee5274478e8b8db852774077ff5979f0aaba25 100644 --- a/paddle/fluid/framework/ir/fuse_elewise_add_act_pass.h +++ b/paddle/fluid/framework/ir/fuse_elewise_add_act_pass.h @@ -32,7 +32,8 @@ class FuseElewiseAddActPass : public FusePassBase { virtual ~FuseElewiseAddActPass() {} protected: - std::unique_ptr ApplyImpl(std::unique_ptr graph) const; + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; std::unique_ptr FuseElewiseAddAct( std::unique_ptr graph, diff --git a/paddle/fluid/framework/ir/fuse_relu_depthwise_conv_pass.cc b/paddle/fluid/framework/ir/fuse_relu_depthwise_conv_pass.cc index 0d94008ea82d0e09732d4b6448fdded94b60733c..fe844caed2e757fb080dcee398c8903b929b06e5 100644 --- a/paddle/fluid/framework/ir/fuse_relu_depthwise_conv_pass.cc +++ b/paddle/fluid/framework/ir/fuse_relu_depthwise_conv_pass.cc @@ -111,7 +111,7 @@ std::unique_ptr FuseReluDepthwiseConvPass::FuseReluDepthwiseConv( xg_var = subgraph.at(xg)->Var(); } - PADDLE_ENFORCE_EQ(layer_op->Input("Input").size(), 1); + PADDLE_ENFORCE_EQ(layer_op->Input("Input").size(), 1UL); PADDLE_ENFORCE_EQ(layer_op->Input("Input")[0], y_var->Name()); layer_op->SetInput("Input", {x_var->Name()}); subgraph.at(layer)->inputs.push_back(subgraph.at(x)); @@ -119,13 +119,13 @@ std::unique_ptr FuseReluDepthwiseConvPass::FuseReluDepthwiseConv( VLOG(4) << "replace " << y_var->Name() << " -> " << x_var->Name(); if (!only_forward) { - PADDLE_ENFORCE_EQ(layer_g_op->Input("Input").size(), 1); + PADDLE_ENFORCE_EQ(layer_g_op->Input("Input").size(), 1UL); PADDLE_ENFORCE_EQ(layer_g_op->Input("Input")[0], y_var->Name()); layer_g_op->SetInput("Input", {x_var->Name()}); subgraph.at(layer_g)->inputs.push_back(subgraph.at(x)); subgraph.at(x)->outputs.push_back(subgraph.at(layer_g)); - PADDLE_ENFORCE_EQ(layer_g_op->Output(GradVarName("Input")).size(), 1); + PADDLE_ENFORCE_EQ(layer_g_op->Output(GradVarName("Input")).size(), 1UL); PADDLE_ENFORCE_EQ(layer_g_op->Output(GradVarName("Input"))[0], yg_var->Name()); layer_g_op->SetOutput(GradVarName("Input"), {xg_var->Name()}); diff --git a/paddle/fluid/framework/ir/fuse_relu_depthwise_conv_pass.h b/paddle/fluid/framework/ir/fuse_relu_depthwise_conv_pass.h index 6bd653775e42c1ee16051e205e9fa9888ea05eaa..efb49b8300e677f17d9e205800d837b88edfd2e9 100644 --- a/paddle/fluid/framework/ir/fuse_relu_depthwise_conv_pass.h +++ b/paddle/fluid/framework/ir/fuse_relu_depthwise_conv_pass.h @@ -32,7 +32,8 @@ class FuseReluDepthwiseConvPass : public FusePassBase { virtual ~FuseReluDepthwiseConvPass() {} protected: - std::unique_ptr ApplyImpl(std::unique_ptr graph) const; + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; std::unique_ptr FuseReluDepthwiseConv( std::unique_ptr graph, bool only_forward) const; }; diff --git a/paddle/fluid/framework/ir/graph.cc b/paddle/fluid/framework/ir/graph.cc index 4b5c846f3271b2dd5e094020571069aff590cd2b..5e954fa9c419b249bb8a4be5a78c01da85b017b2 100644 --- a/paddle/fluid/framework/ir/graph.cc +++ b/paddle/fluid/framework/ir/graph.cc @@ -76,6 +76,9 @@ std::map> Graph::InitFromProgram( var->inputs.push_back(node); } } + Set>( + details::kStaleProgramOpDescs, + new std::vector(program.Block(0).AllOps())); return var_nodes; } diff --git a/paddle/fluid/framework/ir/graph.h b/paddle/fluid/framework/ir/graph.h index feb3330176490e71c51cce826fe49d5499469ad7..cfd974e4bd679fdd06739f4c943bb197865020fb 100644 --- a/paddle/fluid/framework/ir/graph.h +++ b/paddle/fluid/framework/ir/graph.h @@ -26,6 +26,14 @@ limitations under the License. */ namespace paddle { namespace framework { + +namespace details { + +// This attr is not recommended, because the graph should not dependence +// the program once it is built. +constexpr char kStaleProgramOpDescs[] = "stale_program_op_descs"; +} // namespace details + namespace ir { /* @@ -168,10 +176,13 @@ class Graph { return ret; } - void RemoveNode(ir::Node *node) { + std::unique_ptr RemoveNode(ir::Node *node) { PADDLE_ENFORCE(node_set_.find(node) != node_set_.end()); - node_set_.erase(node); + std::unique_ptr ret; + ret.reset(nodes_.at(node).release()); nodes_.erase(node); + node_set_.erase(node); + return ret; } // NOTE low performance, but simple and secure. @@ -184,12 +195,11 @@ class Graph { return nullptr; } - void ResolveHazard( - const std::map> &var_nodes); - - private: - std::map> InitFromProgram( - const ProgramDesc &program); + // Returns reference to the original program. + // WARN: After a series of passes, the current graph can be quite + // different from OriginProgram. Caller shouldn't assume much from + // the returned OriginProgram. + const ProgramDesc &OriginProgram() const { return program_; } // This method takes ownership of `node`. ir::Node *AddNode(ir::Node *node) { @@ -199,6 +209,13 @@ class Graph { return node; } + void ResolveHazard( + const std::map> &var_nodes); + + private: + std::map> InitFromProgram( + const ProgramDesc &program); + // NOTE: program_ shouldn't be exposed to user. const ProgramDesc program_; std::map attrs_; diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc index 9ea0729e1f3339c2f17371ecc8fa51325b9629bb..c0c34d186b00814fe6c6fd42beb78133233a1357 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc @@ -38,7 +38,7 @@ size_t PDPattern::id_ = 0UL; PDNode *PDPattern::NewNode(const std::string &name) { if (!name.empty()) { - PADDLE_ENFORCE_EQ(node_map_.count(name), 0, + PADDLE_ENFORCE_EQ(node_map_.count(name), 0UL, "PDNode's name should be unique, get duplicate [%s]", name); } @@ -51,7 +51,7 @@ PDNode *PDPattern::NewNode(const std::string &name) { PDNode *PDPattern::NewNode(PDNode::teller_t &&teller, const std::string &name) { if (!name.empty()) { - PADDLE_ENFORCE_EQ(node_map_.count(name), 0, + PADDLE_ENFORCE_EQ(node_map_.count(name), 0UL, "PDNode's name should be unique, get duplicate [%s]", name); } diff --git a/paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc b/paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc index 3b738aa159ebfd77f00c9e532fbd94542e2097db..5bdc0c5faed7131b873edf9b43c847c010b6e3f3 100644 --- a/paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc +++ b/paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc @@ -38,9 +38,13 @@ std::unique_ptr IdentityScaleOpCleanPass::ApplyImpl( ->assert_is_op("scale") ->assert_op_attr("scale", 1.) ->assert_op_attr("bias", 0.); - auto scale_out = detector.mutable_pattern() - ->NewNode("scale_out") - ->assert_is_op_output("scale"); + auto scale_out = + detector.mutable_pattern() + ->NewNode("scale_out") + ->assert_is_op_output("scale") + // scale's output var should has only one consumer, or it can't be + // removed. + ->assert_more([](Node* x) { return x->outputs.size() == 1UL; }); pre_op->LinksTo({scale_in}); scale_op->LinksFrom({scale_in}).LinksTo({scale_out}); diff --git a/paddle/fluid/framework/ir/identity_scale_op_clean_pass.h b/paddle/fluid/framework/ir/identity_scale_op_clean_pass.h index 50a654d82f0e4fb7e8e91c665397716407e6d2a5..6da592561da1e4046acbfd86c04862f69b7a97a8 100644 --- a/paddle/fluid/framework/ir/identity_scale_op_clean_pass.h +++ b/paddle/fluid/framework/ir/identity_scale_op_clean_pass.h @@ -22,7 +22,8 @@ namespace ir { class IdentityScaleOpCleanPass : public FusePassBase { protected: - std::unique_ptr ApplyImpl(std::unique_ptr graph) const; + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; private: virtual ~IdentityScaleOpCleanPass() = default; diff --git a/paddle/fluid/framework/ir/infer_clean_graph_pass.cc b/paddle/fluid/framework/ir/infer_clean_graph_pass.cc index 7713ed1eab88ee4fa16d52e7425075ae66f721a3..6607c026a748576f38419b275d71217f3eee0c59 100644 --- a/paddle/fluid/framework/ir/infer_clean_graph_pass.cc +++ b/paddle/fluid/framework/ir/infer_clean_graph_pass.cc @@ -37,6 +37,7 @@ class InferCleanGraphPass : public FusePassBase { std::unordered_set invalid_nodes; int valid_op = 0; for (auto* node : graph->Nodes()) { + PADDLE_ENFORCE_NOT_NULL(node); if (is_valid_node(node)) { invalid_nodes.insert(node); } else if (node->IsOp()) { diff --git a/paddle/fluid/framework/ir/lock_free_optimize_pass.h b/paddle/fluid/framework/ir/lock_free_optimize_pass.h index 7310f596f8a3170e84840be4bab8390b780b6577..f9157b10d9554092a5da6a6f73ecf7ceac1430dd 100644 --- a/paddle/fluid/framework/ir/lock_free_optimize_pass.h +++ b/paddle/fluid/framework/ir/lock_free_optimize_pass.h @@ -60,7 +60,8 @@ class LockFreeOptimizePass : public Pass { virtual ~LockFreeOptimizePass() {} protected: - std::unique_ptr ApplyImpl(std::unique_ptr graph) const; + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; private: // Create a new sgd node via current optimizer node diff --git a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.h index f3ad9f1c2bf14db418629e0c607e2510f01908b8..0ef5c177bf98b354bb18fc1d2ec8e5bef4b58951 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.h +++ b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.h @@ -29,7 +29,8 @@ class ConvBiasFusePass : public FusePassBase { virtual bool is_conv3d() const { return false; } protected: - std::unique_ptr ApplyImpl(std::unique_ptr graph) const; + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; const std::string name_scope_{"conv_bias_mkldnn_fuse"}; }; /* diff --git a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc new file mode 100644 index 0000000000000000000000000000000000000000..38b7fe52037c1a264e4251b7a54ef7569ee6d765 --- /dev/null +++ b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc @@ -0,0 +1,151 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.h" +#include "paddle/fluid/framework/naive_executor.h" +#include "paddle/fluid/platform/place.h" + +#include +#include "paddle/fluid/framework/op_proto_maker.h" + +namespace paddle { +namespace framework { +namespace ir { + +void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, + const std::vector& inputs, + const std::vector& outputs) { + auto* op = prog->MutableBlock(0)->AppendOp(); + op->SetType(type); + if (type == "conv2d") { + op->SetAttr("use_mkldnn", true); + op->SetAttr("name", name); + op->SetInput("Input", {inputs[0]}); + op->SetInput("Filter", {inputs[1]}); + if (inputs.size() > 2) + op->SetInput("Bias", {inputs[2]}); + else + op->SetInput("Bias", {}); + } else if (type == "elementwise_add") { + op->SetAttr("use_mkldnn", true); + op->SetInput("X", {inputs[0]}); + op->SetInput("Y", {inputs[1]}); + } + op->SetOutput("Out", outputs); + op->SetAttr(OpProtoAndCheckerMaker::OpRoleAttrName(), + static_cast(OpRole::kForward)); +} + +// (c, weights)->conv->f +// (f)->elementwise_add->g +ProgramDesc BuildProgramDesc(bool convWithExistingBias) { + ProgramDesc prog; + std::vector nodes{"c", "weights", "f", "eltwise_bias", "g"}; + if (convWithExistingBias) nodes.push_back("conv_bias"); + for (auto& v : nodes) { + auto* var = prog.MutableBlock(0)->Var(v); + var->SetType(proto::VarType::LOD_TENSOR); + if (v == "weights" || v == "conv_bias" || v == "eltwise_bias") { + var->SetPersistable(true); + } + } + + // conv+bias, both with MKL-DNN + if (convWithExistingBias) { + SetOp(&prog, "conv2d", "conv", + std::vector({"c", "weights", "conv_bias"}), + std::vector({"f"})); + } else { + SetOp(&prog, "conv2d", "conv", std::vector({"c", "weights"}), + std::vector({"f"})); + } + SetOp(&prog, "elementwise_add", "eltwise", + std::vector({"f", "eltwise_bias"}), + std::vector({"g"})); + + return prog; +} + +void InitTensorHolder(Scope* scope, const paddle::platform::Place& place, + const char* var_name) { + auto x = scope->Var(var_name); + auto tensor = x->GetMutable(); + tensor->mutable_data(place, proto::VarType::FP32, + ::paddle::memory::Allocator::kDefault, 1); +} + +void MainTest(bool convWithExistingBias) { + auto prog = BuildProgramDesc(convWithExistingBias); + std::unique_ptr graph(new ir::Graph(prog)); + auto place = paddle::platform::CPUPlace(); + NaiveExecutor exe{place}; + Scope scope; + // Init scope, as it is used in pass + exe.CreateVariables(prog, 0, true, &scope); + if (convWithExistingBias) { + InitTensorHolder(&scope, place, "conv_bias"); + InitTensorHolder(&scope, place, "eltwise_bias"); + } + graph->Set(kParamScopeAttr, new framework::Scope*(&scope)); + + auto pass = PassRegistry::Instance().Get("conv_bias_mkldnn_fuse_pass"); + + int original_nodes_num = graph->Nodes().size(); + + graph = pass->Apply(std::move(graph)); + + int current_nodes_num = graph->Nodes().size(); + + // Remove 3 Nodes: Conv, Bias, conv_out + // Add 1 Node: ConvBias + EXPECT_EQ(original_nodes_num - 2, current_nodes_num); + + // Assert conv_bias op in newly generated graph + int conv_bias_count = 0; + + for (auto* node : graph->Nodes()) { + if (node->IsOp() && node->Op()->Type() == "conv2d") { + auto* op = node->Op(); + ASSERT_TRUE(op->HasAttr("use_mkldnn")); + EXPECT_TRUE(boost::get(op->GetAttr("use_mkldnn"))); + // check if "conv" convolution is fused + auto op_name = boost::get(op->GetAttr("name")); + if (op_name == "conv") { + auto input_names = op->InputNames(); + ASSERT_TRUE(std::find(input_names.begin(), input_names.end(), "Bias") != + input_names.end()); + auto bias = boost::get>(op->Input("Bias")); + if (bias.size()) { + ++conv_bias_count; + } + } + } + } + EXPECT_EQ(conv_bias_count, 1); +} + +TEST(ConvBiasFusePass, bias_free_conv) { MainTest(false); } + +TEST(ConvBiasFusePass, conv_with_existing_bias) { MainTest(true); } + +TEST(ConvBiasFusePass, conv3d) { + Conv3DBiasFusePass pass; + ASSERT_TRUE(pass.is_conv3d()); +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +USE_PASS(conv_bias_mkldnn_fuse_pass); diff --git a/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc index 9ef5c298b8cddfec094e9544dc6da9afdcaf0dab..433d89d8d3f20b3f87cd94901ebbf79cd99de813 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc @@ -44,10 +44,14 @@ struct TestIsReachable { using func = std::function; auto operator()(const std::unique_ptr& graph) -> func { - auto find_node = [](const std::unique_ptr& graph, - const std::string& name) -> Node* { + auto hash = [](const Node* node) -> std::string { + return node->Name() + std::to_string(node->id()); + }; + + auto find_node = [&](const std::unique_ptr& graph, + const std::string& name) -> Node* { for (auto& node : GraphTraits::DFS(*graph)) { - if (name == node.Name()) { + if (name == hash(&node)) { return &node; } } @@ -55,13 +59,17 @@ struct TestIsReachable { return nullptr; }; - return [&](std::string from, const std::string to) -> bool { + // update the from and to strings to hashed equivs in loop from graph traits + return [&](std::string from, std::string to) -> bool { if (from == to) return true; std::map visited; for (auto& node : GraphTraits::DFS(*graph)) { - visited[node.Name()] = false; + auto hashed = hash(&node); + if (node.Name() == from) from = hashed; + if (node.Name() == to) to = hashed; + visited[hashed] = false; } visited[from] = true; @@ -72,15 +80,15 @@ struct TestIsReachable { while (!queue.empty()) { auto cur = find_node(graph, queue.front()); queue.pop_front(); - if (cur == nullptr) return false; for (auto n : cur->outputs) { - if (n->Name() == to) return true; + auto hashed_name = hash(n); + if (hashed_name == to) return true; - if (!visited[n->Name()]) { - visited[n->Name()] = true; - queue.push_back(n->Name()); + if (!visited[hashed_name]) { + visited[hashed_name] = true; + queue.push_back(hashed_name); } } } @@ -166,6 +174,28 @@ TEST(ConvElementwiseAddMKLDNNFusePass, ConvolutionAsYWithElementwiseAddRelu) { RunPassAndAssert(&prog, "a", "relu", 1); } +TEST(ConvElementwiseAddMKLDNNFusePass, + ConvolutionProjectionAsYWithElementwiseAddRelu) { + auto prog = BuildProgramDesc({"a", "b", "c", "d", "e", "f"}, + {"bias", "weights", "bias2", "weights2"}); + + SetOp(&prog, "sigmoid", {{"X", "a"}}, {"Out", "b"}); + // right branch + SetOp(&prog, "conv2d", + {{"Input", "b"}, {"Bias", "bias"}, {"Filter", "weights"}}, + {"Output", "c"}); + + // left branch + SetOp(&prog, "conv2d", + {{"Input", "a"}, {"Bias", "bias2"}, {"Filter", "weights2"}}, + {"Output", "f"}); + + SetOp(&prog, "elementwise_add", {{"X", "f"}, {"Y", "c"}}, {"Out", "d"}); + SetOp(&prog, "relu", {{"X", "d"}}, {"Out", "e"}); + + RunPassAndAssert(&prog, "a", "relu", 2); +} + TEST(ConvElementwiseAddMKLDNNFusePass, ConvolutionAsYWithElementwiseAddReluNoBias) { auto prog = BuildProgramDesc({"a", "b", "c", "d", "e"}, {"weights"}); diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.cc b/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.cc index 20e52410ffe3caa86450bc05bf3aabf5a5bce374..ccac65f3b3ad22d0f424ef9de9a7bd506e8ac862 100644 --- a/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.cc @@ -21,7 +21,7 @@ namespace ir { std::unique_ptr MKLDNNPlacementPass::ApplyImpl( std::unique_ptr graph) const { - VLOG(3) << "Aplies MKL-DNN placement strategy."; + VLOG(3) << "Applies MKL-DNN placement strategy."; const auto& op_types_list = Get>("mkldnn_enabled_op_types"); for (const Node* n : graph->Nodes()) { diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass_tester.cc new file mode 100644 index 0000000000000000000000000000000000000000..b6ec7e4d68b95125d630ce4a60635eb7b711e820 --- /dev/null +++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass_tester.cc @@ -0,0 +1,136 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.h" + +#include +#include + +namespace paddle { +namespace framework { +namespace ir { + +void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, + const std::vector& inputs, + const std::vector& outputs, boost::tribool use_mkldnn) { + auto* op = prog->MutableBlock(0)->AppendOp(); + + op->SetType(type); + + if (!boost::indeterminate(use_mkldnn)) op->SetAttr("use_mkldnn", use_mkldnn); + + if (type == "conv2d") { + op->SetAttr("name", name); + op->SetInput("Input", {inputs[0]}); + op->SetInput("Filter", {inputs[1]}); + op->SetInput("Bias", {inputs[2]}); + } else if (type == "relu") { + op->SetInput("X", inputs); + } else if (type == "concat") { + op->SetAttr("axis", 1); + op->SetInput("X", {inputs[0], inputs[1]}); + } else if (type == "pool2d") { + op->SetInput("X", {inputs[0]}); + } else { + FAIL() << "Unexpected operator type."; + } + op->SetOutput("Out", {outputs[0]}); +} + +// operator use_mkldnn +// --------------------------------------- +// (a,b)->concat->c none +// (c,weights,bias)->conv->f none +// f->relu->g false +// g->pool->h false +// (h,weights2,bias2)->conv->k true +// k->relu->l true +ProgramDesc BuildProgramDesc() { + ProgramDesc prog; + + for (auto& v : + std::vector({"a", "b", "c", "weights", "bias", "f", "g", + "h", "weights2", "bias2", "k", "l"})) { + auto* var = prog.MutableBlock(0)->Var(v); + var->SetType(proto::VarType::SELECTED_ROWS); + if (v == "weights" || v == "bias") { + var->SetPersistable(true); + } + } + + SetOp(&prog, "concat", "concat1", std::vector({"a", "b"}), + std::vector({"c"}), boost::indeterminate); + SetOp(&prog, "conv2d", "conv1", + std::vector({"c", "weights", "bias"}), + std::vector({"f"}), boost::indeterminate); + SetOp(&prog, "relu", "relu1", std::vector({"f"}), + std::vector({"g"}), false); + SetOp(&prog, "pool2d", "pool1", std::vector({"g"}), + std::vector({"h"}), false); + SetOp(&prog, "conv2d", "conv2", + std::vector({"h", "weights2", "bias2"}), + std::vector({"k"}), true); + SetOp(&prog, "relu", "relu2", std::vector({"k"}), + std::vector({"l"}), true); + + return prog; +} + +void MainTest(std::initializer_list mkldnn_enabled_op_types, + unsigned expected_use_mkldnn_true_count) { + auto prog = BuildProgramDesc(); + + std::unique_ptr graph(new ir::Graph(prog)); + + auto pass = PassRegistry::Instance().Get("mkldnn_placement_pass"); + pass->Set("mkldnn_enabled_op_types", + new std::unordered_set(mkldnn_enabled_op_types)); + + graph = pass->Apply(std::move(graph)); + + unsigned use_mkldnn_true_count = 0; + + for (auto* node : graph->Nodes()) { + if (node->IsOp()) { + auto* op = node->Op(); + if (op->HasAttr("use_mkldnn") && + boost::get(op->GetAttr("use_mkldnn"))) { + ++use_mkldnn_true_count; + } + } + } + + EXPECT_EQ(use_mkldnn_true_count, expected_use_mkldnn_true_count); +} + +TEST(MKLDNNPlacementPass, enable_conv_relu) { + // 1 conv (1 conv is always true) + 2 relu (1 relu is always true) + 0 pool + MainTest({"conv2d", "relu"}, 3); +} + +TEST(MKLDNNPlacementPass, enable_relu_pool) { + // 1 conv (1 conv is always true) + 2 relu (1 relu is always true) + 1 pool + MainTest({"relu", "pool2d"}, 4); +} + +TEST(MKLDNNPlacementPass, enable_all) { + // 1 conv (1 conv is always true) + 2 relu (1 relu is always true) + 1 pool + MainTest({}, 4); +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +USE_PASS(mkldnn_placement_pass); diff --git a/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.h b/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.h index 3f3f0846eba1201e57a653f8e515c28d2bcdd5e3..ede0bea07ff4130a0f6b3d21d6e34222a5013170 100644 --- a/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.h +++ b/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.h @@ -31,7 +31,8 @@ class RepeatedFCReluFusePass : public FusePassBase { virtual ~RepeatedFCReluFusePass() {} protected: - std::unique_ptr ApplyImpl(std::unique_ptr graph) const; + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; const std::string name_scope_{"repeated_fc_relu_fuse"}; }; diff --git a/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.h b/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.h index 9f5fd1a29adf918806d8f30097d8c7f002f48f3e..06e18f9dc327bf2ffaf8d2ab64edcbddea2eb04c 100644 --- a/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.h +++ b/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.h @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#pragma once + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/pass.h" @@ -25,7 +27,8 @@ class SeqConcatFcFusePass : public FusePassBase { virtual ~SeqConcatFcFusePass() {} protected: - std::unique_ptr ApplyImpl(std::unique_ptr graph) const; + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; }; } // namespace ir diff --git a/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.h b/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.h index dac9de71930c1768bdf416520caae6468449cd3d..c36c6b76a238dd21eb0c9308e780761aa9e4e27a 100644 --- a/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.h +++ b/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.h @@ -28,7 +28,8 @@ class SeqConvEltAddReluFusePass : public FusePassBase { virtual ~SeqConvEltAddReluFusePass() {} protected: - std::unique_ptr ApplyImpl(std::unique_ptr graph) const; + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; const std::string name_scope_{"seqconv_eltadd_relu_fuse"}; }; diff --git a/paddle/fluid/framework/ir/seqpool_concat_fuse_pass.h b/paddle/fluid/framework/ir/seqpool_concat_fuse_pass.h index ba2154045e62c687173565c5ad30ea4d45d3c8f4..a5db3528da36ad08bb7f4d2765ee78222c569a5c 100644 --- a/paddle/fluid/framework/ir/seqpool_concat_fuse_pass.h +++ b/paddle/fluid/framework/ir/seqpool_concat_fuse_pass.h @@ -42,7 +42,8 @@ class SeqPoolConcatFusePass : public FusePassBase { virtual ~SeqPoolConcatFusePass() {} protected: - std::unique_ptr ApplyImpl(std::unique_ptr graph) const; + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; const std::string name_scope_{"seqpool_concat_fuse"}; }; diff --git a/paddle/fluid/framework/ir/seqpool_concat_fuse_pass_tester.cc b/paddle/fluid/framework/ir/seqpool_concat_fuse_pass_tester.cc index 456a03192cc4e4a9d0dbe2dcb649b6c1b4d9cd5a..35d1d5129bba7043026e5489b806480775473257 100644 --- a/paddle/fluid/framework/ir/seqpool_concat_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/seqpool_concat_fuse_pass_tester.cc @@ -164,7 +164,7 @@ ProgramDesc BuildProgramDesc(int num_inputs_of_concat) { }; std::vector concat_inputs; for (int i = 0; i < num_inputs_of_concat; ++i) { - std::string prefix = "seqpool_op_" + i; + std::string prefix = "seqpool_op_" + std::to_string(i); new_var(prefix + "in"); new_var(prefix + "out"); new_var(prefix + "out_unused"); diff --git a/paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.h b/paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.h index fb49adc3768ec99cab4321c6b90c93dfed6d32f2..c21ba65c40a8d54c315ab347e5a8a3266a143779 100644 --- a/paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.h +++ b/paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.h @@ -31,7 +31,8 @@ class SquaredMatSubFusePass : public FusePassBase { virtual ~SquaredMatSubFusePass() {} protected: - std::unique_ptr ApplyImpl(std::unique_ptr graph) const; + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; const std::string name_scope_{"squared_mat_sub_fuse"}; }; diff --git a/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.h b/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.h index fb0f0ae9efdc5a25a799d6123fa658a99860cd86..a7d18ec86da1c02aef84c25c378691eb8f651015 100644 --- a/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.h +++ b/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.h @@ -30,7 +30,8 @@ class TransposeFlattenConcatFusePass : public FusePassBase { virtual ~TransposeFlattenConcatFusePass() {} protected: - std::unique_ptr ApplyImpl(std::unique_ptr graph) const; + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; }; } // namespace ir diff --git a/paddle/fluid/framework/op_proto_maker.h b/paddle/fluid/framework/op_proto_maker.h index 0a0f8f4655bc34cdb25205ff6eaec9f96c801ebd..5f3ce60e1d94ea4078cf0b709df362bad317f621 100644 --- a/paddle/fluid/framework/op_proto_maker.h +++ b/paddle/fluid/framework/op_proto_maker.h @@ -27,7 +27,7 @@ enum class OpRole { kForward = 0x0000, kBackward = 0x0001, kOptimize = 0x0002, - // RPC role is for send/recv releated op + // RPC role is for send/recv related op kRPC = 0x0004, // Dist role is for split_byref/split_selected_rows/concat // used for distributed training. diff --git a/paddle/fluid/framework/op_registry.h b/paddle/fluid/framework/op_registry.h index 2c1648c81fc999c6306d5b08bc243f3ad21fec04..a53a81c270aeec1b6ee4ed30e77526f4ea2e7977 100644 --- a/paddle/fluid/framework/op_registry.h +++ b/paddle/fluid/framework/op_registry.h @@ -290,7 +290,7 @@ struct OpKernelRegistrarFunctorExsecond.empty() ? nullptr : it->second[0]; } -const Variable* ExecutionContext::LegacyInputVar( - const std::string& name) const { - auto ipt = op_.Input(name); - return ipt == kEmptyVarName ? nullptr : scope_.FindVar(ipt); -} - Variable* ExecutionContext::OutputVar(const std::string& name) const { auto it = ctx_.outputs.find(name); if (it == ctx_.outputs.end()) return nullptr; @@ -485,22 +477,11 @@ Variable* ExecutionContext::OutputVar(const std::string& name) const { return it->second.empty() ? nullptr : it->second[0]; } -Variable* ExecutionContext::LegacyOutputVar(const std::string& name) const { - auto opt = op_.Output(name); - return opt == kEmptyVarName ? nullptr : scope_.FindVar(opt); -} - template <> const Tensor* ExecutionContext::Input(const std::string& name) const { return Input(name); } -template <> -const Tensor* ExecutionContext::LegacyInput( - const std::string& name) const { - return LegacyInput(name); -} - template <> const std::vector ExecutionContext::MultiInput( const std::string& name) const { @@ -523,35 +504,11 @@ const std::vector ExecutionContext::MultiInput( return res; } -template <> -const std::vector ExecutionContext::LegacyMultiInput( - const std::string& name) const { - auto names = op().Inputs(name); - std::vector res; - res.reserve(names.size()); - std::transform(names.begin(), names.end(), std::back_inserter(res), - [&](const std::string& sub_name) -> const Tensor* { - auto var = scope_.FindVar(sub_name); - if (var == nullptr) return nullptr; - PADDLE_ENFORCE( - var->IsType(), - "%s should be LoDTensor, but the received type is %s", - sub_name, ToTypeName(var->Type())); - return &(var->Get()); - }); - return res; -} - template <> Tensor* ExecutionContext::Output(const std::string& name) const { return Output(name); } -template <> -Tensor* ExecutionContext::LegacyOutput(const std::string& name) const { - return LegacyOutput(name); -} - template <> std::vector ExecutionContext::MultiOutput( const std::string& name) const { @@ -884,7 +841,8 @@ class RuntimeInferShapeContext : public InferShapeContext { const RuntimeContext& ctx_; }; -static void CheckTensorNANOrInf(const std::string& name, +static void CheckTensorNANOrInf(const std::string& op_type, + const std::string& name, const framework::Tensor& tensor) { if (tensor.memory_size() == 0) { return; @@ -894,9 +852,9 @@ static void CheckTensorNANOrInf(const std::string& name, return; } PADDLE_ENFORCE(!framework::TensorContainsInf(tensor), - "Tensor %s contains Inf", name); + "Operator %s output Tensor %s contains Inf", op_type, name); PADDLE_ENFORCE(!framework::TensorContainsNAN(tensor), - "Tensor %s contains NAN", name); + "Operator %s output Tensor %s contains NAN", op_type, name); } void OperatorWithKernel::RuntimeInferShape(const Scope& scope, @@ -906,6 +864,16 @@ void OperatorWithKernel::RuntimeInferShape(const Scope& scope, this->InferShape(&infer_shape_ctx); } +std::vector* OperatorWithKernel::GetKernelConfig( + const OpKernelType& key) const { + auto config_iter = kernel_configs_map_.find(key); + std::vector* kernel_configs = nullptr; + if (config_iter != kernel_configs_map_.end()) { + kernel_configs = &(config_iter->second); + } + return kernel_configs; +} + void OperatorWithKernel::RunImpl(const Scope& scope, const platform::Place& place) const { RuntimeContext ctx(Inputs(), Outputs(), scope); @@ -923,7 +891,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope, OpKernelMap& kernels = kernels_iter->second; auto expected_kernel_key = this->GetExpectedKernelType( - ExecutionContext(*this, scope, *dev_ctx, ctx)); + ExecutionContext(*this, scope, *dev_ctx, ctx, nullptr)); VLOG(3) << "expected_kernel_key:" << expected_kernel_key; auto kernel_iter = kernels.find(expected_kernel_key); @@ -942,6 +910,9 @@ void OperatorWithKernel::RunImpl(const Scope& scope, KernelTypeToString(expected_kernel_key)); } + std::vector* kernel_configs = + GetKernelConfig(expected_kernel_key); + // do data transformScope &transfer_scope; std::vector transfered_inplace_vars; auto* transfer_scope = @@ -959,7 +930,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope, this->InferShape(&infer_shape_ctx); // TODO(panyx0718): ExecutionContext should only depend on RuntimeContext // not Scope. Imperative mode only pass inputs and get outputs. - kernel_iter->second(ExecutionContext(*this, exec_scope, *dev_ctx, ctx)); + kernel_iter->second( + ExecutionContext(*this, exec_scope, *dev_ctx, ctx, kernel_configs)); if (!transfered_inplace_vars.empty()) { // there is inplace variable has been transfered. @@ -976,9 +948,10 @@ void OperatorWithKernel::RunImpl(const Scope& scope, auto* var = exec_scope.FindVar(vname); if (var == nullptr) continue; if (var->IsType()) { - CheckTensorNANOrInf(vname, var->Get()); + CheckTensorNANOrInf(type_, vname, var->Get()); } else if (var->IsType()) { - CheckTensorNANOrInf(vname, var->Get().value()); + CheckTensorNANOrInf(type_, vname, + var->Get().value()); } } } @@ -989,11 +962,14 @@ void OperatorWithKernel::TransferInplaceVarsBack( const Scope& transfer_scope) const { for (auto& var_name : inplace_vars) { VLOG(3) << "share inplace var " + var_name + " back to it's original scope"; + auto* origin_var = scope.FindVar(var_name); + PADDLE_ENFORCE_NOT_NULL(origin_var, "The var[%s] should not be nullptr.", + var_name); auto* original_tensor = - GetMutableLoDTensorOrSelectedRowsValueFromVar(scope.FindVar(var_name)); + GetMutableLoDTensorOrSelectedRowsValueFromVar(origin_var); auto* var = transfer_scope.FindVar(var_name); - PADDLE_ENFORCE(var != nullptr, "The var[%s] should not be nullptr", - var_name); + PADDLE_ENFORCE_NOT_NULL(var, "The var[%s] should not be nullptr.", + var_name); auto* transformed_tensor = GetLoDTensorOrSelectedRowsValueFromVar(*var); original_tensor->ShareDataWith(*transformed_tensor); } diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index e33214b44bb5d8ea5eb32d442d597a369c198bdd..55629636a816982c4debe4b5b7138558ac309eb5 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -16,9 +16,11 @@ limitations under the License. */ #include #include +#include #include #include #include +#include #include #include "glog/logging.h" // For VLOG @@ -28,6 +30,7 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/op_kernel_type.h" +#include "paddle/fluid/framework/operator_kernel_configs.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/tensor.h" @@ -184,12 +187,30 @@ class OperatorBase { const platform::Place& place) const = 0; }; +#ifdef PADDLE_WITH_CUDA +using KernelConfig = boost::variant< + std::shared_ptr>, + std::shared_ptr>, + std::shared_ptr>>; +#else +using KernelConfig = boost::variant; +#endif + +using OpKernelConfigsMap = + std::unordered_map, + OpKernelType::Hash>; + class ExecutionContext { public: ExecutionContext(const OperatorBase& op, const Scope& scope, const platform::DeviceContext& device_context, - const RuntimeContext& ctx) - : op_(op), scope_(scope), device_context_(device_context), ctx_(ctx) {} + const RuntimeContext& ctx, + std::vector* configs) + : op_(op), + scope_(scope), + device_context_(device_context), + ctx_(ctx), + kernel_configs_(configs) {} const OperatorBase& op() const { return op_; } @@ -234,31 +255,6 @@ class ExecutionContext { return it->second; } - const std::vector LegacyMultiInputVar( - const std::string& name) const { - auto names = op_.Inputs(name); - std::vector res; - res.reserve(names.size()); - std::transform(names.begin(), names.end(), std::back_inserter(res), - [this](const std::string& name) { - return name == kEmptyVarName ? nullptr - : scope_.FindVar(name); - }); - return res; - } - - std::vector LegacyMultiOutputVar(const std::string& name) const { - auto names = op_.Outputs(name); - std::vector res; - res.reserve(names.size()); - std::transform(names.begin(), names.end(), std::back_inserter(res), - [this](const std::string& name) { - return name == kEmptyVarName ? nullptr - : scope_.FindVar(name); - }); - return res; - } - template const T* Input(const std::string& name) const { auto* var = InputVar(name); @@ -271,22 +267,6 @@ class ExecutionContext { return var == nullptr ? nullptr : var->GetMutable(); } - template - const T* LegacyInput(const std::string& name) const { - auto* var = LegacyInputVar(name); - return var == nullptr ? nullptr : &var->Get(); - } - - template - T* LegacyOutput(const std::string& name) const { - auto var = LegacyOutputVar(name); - return var == nullptr ? nullptr : var->GetMutable(); - } - - const Variable* LegacyInputVar(const std::string& name) const; - - Variable* LegacyOutputVar(const std::string& name) const; - template const std::vector MultiInput(const std::string& name) const { auto it = ctx_.inputs.find(name); @@ -319,32 +299,6 @@ class ExecutionContext { return res; } - template - const std::vector LegacyMultiInput(const std::string& name) const { - auto names = op_.Inputs(name); - std::vector res; - res.reserve(names.size()); - std::transform(names.begin(), names.end(), std::back_inserter(res), - [&](const std::string& sub_name) -> const T* { - auto var = scope_.FindVar(sub_name); - return var == nullptr ? nullptr : &var->Get(); - }); - return res; - } - - template - std::vector LegacyMultiOutput(const std::string& name) const { - auto names = op_.Outputs(name); - std::vector res; - res.reserve(names.size()); - std::transform(names.begin(), names.end(), std::back_inserter(res), - [&](const std::string& sub_name) -> T* { - auto var = scope_.FindVar(sub_name); - return var == nullptr ? nullptr : var->GetMutable(); - }); - return res; - } - platform::Place GetPlace() const { return device_context_.GetPlace(); } template @@ -398,34 +352,32 @@ class ExecutionContext { return temp_tensor; } + template + T& GetKernelConfig(int idx) const { + PADDLE_ENFORCE(kernel_configs_ && kernel_configs_->size() > idx, + "%s selected kernel doesn't have kernel config %lu <= %d", + op_.Type().c_str(), kernel_configs_->size(), idx); + return *boost::get>(kernel_configs_->at(idx)); + } + private: const OperatorBase& op_; const Scope& scope_; const platform::DeviceContext& device_context_; const RuntimeContext& ctx_; + mutable std::vector* kernel_configs_; }; template <> const Tensor* ExecutionContext::Input(const std::string& name) const; -template <> -const Tensor* ExecutionContext::LegacyInput( - const std::string& name) const; - template <> const std::vector ExecutionContext::MultiInput( const std::string& name) const; -template <> -const std::vector ExecutionContext::LegacyMultiInput( - const std::string& name) const; - template <> Tensor* ExecutionContext::Output(const std::string& name) const; -template <> -Tensor* ExecutionContext::LegacyOutput(const std::string& name) const; - template <> std::vector ExecutionContext::MultiOutput( const std::string& name) const; @@ -483,6 +435,8 @@ class OperatorWithKernel : public OperatorBase { virtual OpKernelType GetExpectedKernelType(const ExecutionContext& ctx) const; + std::vector* GetKernelConfig(const OpKernelType& key) const; + protected: virtual OpKernelType GetKernelTypeForVar( const std::string& var_name, const Tensor& tensor, @@ -508,6 +462,9 @@ class OperatorWithKernel : public OperatorBase { void TransferInplaceVarsBack(const Scope& scope, const std::vector& inplace_vars, const Scope& exec_scope) const; + + protected: + mutable OpKernelConfigsMap kernel_configs_map_; }; extern bool OpSupportGPU(const std::string& op_type); diff --git a/paddle/fluid/framework/operator_kernel_configs.h b/paddle/fluid/framework/operator_kernel_configs.h new file mode 100644 index 0000000000000000000000000000000000000000..c520c222350ceeef246dae756a7157872ae087fa --- /dev/null +++ b/paddle/fluid/framework/operator_kernel_configs.h @@ -0,0 +1,118 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include + +namespace paddle { +namespace framework { + +// Not thread-safe. Should be owned per-kernel. +template +class AlgorithmsCache { + public: + AlgorithmsCache() : search_times_(0) { hash_.clear(); } + // Caches the best algorithm for a given + // combination of tensor dimensions & compute data type. + TAlgorithm GetAlgorithm( + const std::vector& dims1, const std::vector& dims2, + const std::vector& strides, const std::vector& paddings, + const std::vector& dilations, + int algorithmFlags, // can set for different data type + std::function gen_func); + + TAlgorithm GetAlgorithm(int64_t area, int search_times, int algorithmFlags, + std::function gen_func); + + private: + std::unordered_map hash_; + int search_times_; +}; + +template +TAlgorithm framework::AlgorithmsCache::GetAlgorithm( + const std::vector& dims1, const std::vector& dims2, + const std::vector& strides, const std::vector& paddings, + const std::vector& dilations, int algorithmFlags, + std::function gen_func) { + int64_t seed = 0; + // Hash all of the inputs, use to try and look up a previously + // discovered algorithm, or fall back to generating a new one. + std::hash hashFn; + // do hash like boost + // https://stackoverflow.com/questions/2590677/how-do-i-combine-hash-values-in-c0x + for (const auto num : dims1) { + seed ^= hashFn(num) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + } + + for (const auto num : dims2) { + seed ^= hashFn(num) + 0x9e3779b9 + (seed << 6) + (seed >> 2) + 1; + } + + for (const auto num : strides) { + seed ^= hashFn(static_cast(num)) + 0x9e3779b9 + (seed << 6) + + (seed >> 2) + 2; + } + + for (const auto num : paddings) { + seed ^= hashFn(static_cast(num)) + 0x9e3779b9 + (seed << 6) + + (seed >> 2) + 3; + } + + for (const auto num : dilations) { + seed ^= hashFn(static_cast(num)) + 0x9e3779b9 + (seed << 6) + + (seed >> 2) + 4; + } + + seed ^= hashFn(static_cast(algorithmFlags)) + 0x9e3779b9 + + (seed << 6) + (seed >> 2) + 5; + + if (seed == 0) return gen_func(); + + if (hash_.find(seed) == hash_.end()) { + TAlgorithm value = gen_func(); + hash_[seed] = value; + } + return hash_[seed]; +} + +template +TAlgorithm AlgorithmsCache::GetAlgorithm( + int64_t area, int search_times, int algorithmFlags, + std::function gen_func) { + if (hash_.find(area) != hash_.end()) { + return hash_[area]; + } + if (search_times_ < search_times) { + auto algo = gen_func(); + hash_[area] = algo; + ++search_times_; + return algo; + } + TAlgorithm algo; + int64_t min = static_cast(INT_MAX); + for (const auto& m : hash_) { + if (m.first < min) { + min = m.first; + algo = m.second; + } + } + return algo; +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index ff7ef0cce2f12fe89dd087c4a5006b2cfdc5a4a9..3e1d61813ca83ebdf9435036117e79abe501b24b 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -21,6 +21,7 @@ limitations under the License. */ #include "paddle/fluid/framework/ir/graph.h" +#include "paddle/fluid/framework/details/all_reduce_deps_pass.h" #include "paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h" #include "paddle/fluid/framework/details/multi_devices_helper.h" #include "paddle/fluid/framework/details/parallel_ssa_graph_executor.h" @@ -183,9 +184,10 @@ std::vector &ParallelExecutor::GetLocalScopes() { ParallelExecutor::ParallelExecutor( const std::vector &places, const std::unordered_set &bcast_vars, - const ProgramDesc &main_program, const std::string &loss_var_name, - Scope *scope, const std::vector &local_scopes, - const ExecutionStrategy &exec_strategy, const BuildStrategy &build_strategy) + const std::string &loss_var_name, Scope *scope, + const std::vector &local_scopes, + const ExecutionStrategy &exec_strategy, const BuildStrategy &build_strategy, + ir::Graph *graph) : member_(new ParallelExecutorPrivate(places)) { member_->global_scope_ = scope; member_->use_cuda_ = exec_strategy.use_cuda_; @@ -193,7 +195,6 @@ ParallelExecutor::ParallelExecutor( member_->use_all_reduce_ = build_strategy.reduce_ == BuildStrategy::ReduceStrategy::kAllReduce; member_->nranks_ = build_strategy.num_trainers_ * places.size(); - if (!member_->use_all_reduce_) { PADDLE_ENFORCE(places.size() > 1, "If you set build_strategy.reduce with 'Reduce'," @@ -216,14 +217,17 @@ ParallelExecutor::ParallelExecutor( } } + std::unique_ptr temp_owned_graph(graph); + // FIXME(Yancey1989): parallel graph mode get better performance // in GPU allreduce distributed training. Need an elegant way to // choice the execution strategy. - build_strategy.enable_parallel_graph_ = - EnableParallelGraphExecution(main_program, exec_strategy, build_strategy); - - VLOG(1) << "Enable ParallelGraph Execution: " - << build_strategy.enable_parallel_graph_; + build_strategy.enable_parallel_graph_ = EnableParallelGraphExecution( + *temp_owned_graph, exec_strategy, build_strategy); + if (build_strategy.enable_parallel_graph_) + VLOG(0) << "The Executor would execute the graph by ParallelGraph " + "Execution which can get better performance," + << "you can force it off by env FLAGS_enable_parallel_graph=0"; if (member_->use_cuda_) { // Bcast Parameters to all GPUs @@ -253,64 +257,54 @@ ParallelExecutor::ParallelExecutor( if (member_->local_scopes_.size() != 1 && local_scopes.empty()) { BCastParamsToDevices(bcast_vars); } - // Startup Program has been run. All local scopes has correct parameters. +// Startup Program has been run. All local scopes has correct parameters. - // Step 2. Convert main_program to SSA form and dependency graph. Also, insert - // ncclOp - std::vector> graphs; +// Step 2. Convert main_program to SSA form and dependency graph. Also, insert +// ncclOp #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) - if (build_strategy.enable_parallel_graph_) { - for (size_t i = 0; i < member_->places_.size(); ++i) { - std::unique_ptr graph = build_strategy.Apply( - main_program, {member_->places_[i]}, loss_var_name, - {member_->local_scopes_[i]}, member_->nranks_, member_->use_cuda_, - member_->nccl_ctxs_.get()); - graphs.push_back(std::move(graph)); - } - } else { - std::unique_ptr graph = build_strategy.Apply( - main_program, member_->places_, loss_var_name, member_->local_scopes_, - member_->nranks_, member_->use_cuda_, member_->nccl_ctxs_.get()); - graphs.push_back(std::move(graph)); - } + + temp_owned_graph = build_strategy.Apply( + std::move(temp_owned_graph), member_->places_, loss_var_name, + member_->local_scopes_, member_->nranks_, member_->use_cuda_, + member_->nccl_ctxs_.get()); #else - std::unique_ptr graph = build_strategy.Apply( - main_program, member_->places_, loss_var_name, member_->local_scopes_, - member_->nranks_, member_->use_cuda_); - graphs.push_back(std::move(graph)); + temp_owned_graph = build_strategy.Apply( + std::move(temp_owned_graph), member_->places_, loss_var_name, + member_->local_scopes_, member_->nranks_, member_->use_cuda_); + #endif auto max_memory_size = GetEagerDeletionThreshold(); VLOG(10) << "Eager Deletion Threshold " << static_cast(max_memory_size) / (1 << 30); if (max_memory_size >= 0) { - for (size_t i = 0; i < graphs.size(); ++i) { - graphs[i] = member_->PrepareGCAndRefCnts( - std::move(graphs[i]), static_cast(max_memory_size)); - } + graph = member_ + ->PrepareGCAndRefCnts(std::move(temp_owned_graph), + static_cast(max_memory_size)) + .release(); + } else { + graph = temp_owned_graph.release(); } // Step 3. Create vars in each scope. Passes may also create new vars. // skip control vars and empty vars std::vector var_infos; - for (auto &graph : graphs) { - for (auto &node : graph->Nodes()) { - if (node->IsVar() && !node->IsCtrlVar() && node->Var()) { - var_infos.emplace_back(); - var_infos.back().name_ = node->Var()->Name(); - var_infos.back().type_ = node->Var()->GetType(); - var_infos.back().persistable_ = node->Var()->Persistable(); - } + for (auto &node : graph->Nodes()) { + if (node->IsVar() && !node->IsCtrlVar() && node->Var()) { + var_infos.emplace_back(); + var_infos.back().name_ = node->Var()->Name(); + var_infos.back().type_ = node->Var()->GetType(); + var_infos.back().persistable_ = node->Var()->Persistable(); } } // If the loss_var_name is given, the number of graph should be only one. if (loss_var_name.size()) { - size_t graph_num = ir::GraphNum(*graphs[0]); + size_t graph_num = ir::GraphNum(*graph); if (graph_num > 1) { LOG(WARNING) << "The number of graph should be only one, " "but the current graph has " - << ir::GraphNum(*graphs[0]) + << ir::GraphNum(*graph) << " sub_graphs. If you want to see the nodes of the " "sub_graphs, you should use 'FLAGS_print_sub_graph_dir' " "to specify the output dir. NOTES: if you not do training, " @@ -319,18 +313,22 @@ ParallelExecutor::ParallelExecutor( } if (build_strategy.enable_parallel_graph_) { +#ifdef PADDLE_WITH_CUDA + // TODO(Yancey1989): Remove passing in the main_program when + // allreduce_seq_pass doesn't need it as the attr. member_->executor_.reset(new details::ParallelSSAGraphExecutor( - exec_strategy, member_->local_scopes_, member_->places_, - std::move(graphs))); + exec_strategy, member_->local_scopes_, member_->places_, graph)); +#else + PADDLE_THROW( + "Paddle should be compiled with CUDA for ParallelGraph Execution."); +#endif } else { if (exec_strategy.type_ == ExecutionStrategy::kDefault) { member_->executor_.reset(new details::ThreadedSSAGraphExecutor( - exec_strategy, member_->local_scopes_, member_->places_, - std::move(graphs[0]))); + exec_strategy, member_->local_scopes_, member_->places_, graph)); } else { member_->executor_.reset(new details::FastThreadedSSAGraphExecutor( - exec_strategy, member_->local_scopes_, member_->places_, - std::move(graphs[0]))); + exec_strategy, member_->local_scopes_, member_->places_, graph)); } } @@ -460,43 +458,44 @@ void ParallelExecutor::FeedAndSplitTensorIntoLocalScopes( } } +ParallelExecutor::~ParallelExecutor() { + for (auto &p : member_->places_) { + platform::DeviceContextPool::Instance().Get(p)->Wait(); + } + delete member_; +} + bool ParallelExecutor::EnableParallelGraphExecution( - const ProgramDesc &main_program, const ExecutionStrategy &exec_strategy, + const ir::Graph &graph, const ExecutionStrategy &exec_strategy, const BuildStrategy &build_strategy) const { if (!FLAGS_enable_parallel_graph) return false; bool enable_parallel_graph = true; - // TODO(Yancey1989): support sparse update in ParallelGraph mode. - for (auto &var_desc : main_program.Block(0).AllVars()) { - if (var_desc->GetType() == proto::VarType::SELECTED_ROWS) { - enable_parallel_graph = false; - } - } - // TODO(Yancey1989): support pserver mode - for (auto &op_desc : main_program.Block(0).AllOps()) { - if (op_desc->Type() == "send" || op_desc->Type() == "recv") { - enable_parallel_graph = false; - break; + for (ir::Node *node : graph.Nodes()) { + if (node->IsVar() && node->Var()) { + // TODO(Yancey1989): support sparse update in ParallelGraph mode. + if (node->Var()->GetType() == proto::VarType::SELECTED_ROWS) { + enable_parallel_graph = false; + break; + } + } else if (node->IsOp() && node->Op()) { + // TODO(Yancey1989): support pserver mode + if (node->Op()->Type() == "send" || node->Op()->Type() == "recv") { + enable_parallel_graph = false; + break; + } } } if (!member_->use_all_reduce_ || !member_->use_cuda_) - enable_parallel_graph = false; - if (build_strategy.enable_sequential_execution_ || - exec_strategy.type_ == ExecutionStrategy::ExecutorType::kExperimental) - enable_parallel_graph = false; + if (build_strategy.enable_sequential_execution_ || + exec_strategy.type_ == ExecutionStrategy::ExecutorType::kExperimental) + enable_parallel_graph = false; return enable_parallel_graph; } -ParallelExecutor::~ParallelExecutor() { - for (auto &p : member_->places_) { - platform::DeviceContextPool::Instance().Get(p)->Wait(); - } - delete member_; -} - } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index 121bbd55ad575477424a2fb12baab82585eae517..ddf60b39466e72822142e1dad2cfe9a97b6cf6f2 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -46,11 +46,11 @@ class ParallelExecutor { public: explicit ParallelExecutor(const std::vector &places, const std::unordered_set &bcast_vars, - const ProgramDesc &main_program, const std::string &loss_var_name, Scope *scope, const std::vector &local_scopes, const ExecutionStrategy &exec_strategy, - const BuildStrategy &build_strategy); + const BuildStrategy &build_strategy, + ir::Graph *graph); ~ParallelExecutor(); @@ -71,7 +71,7 @@ class ParallelExecutor { private: void BCastParamsToDevices(const std::unordered_set &vars) const; - bool EnableParallelGraphExecution(const ProgramDesc &main_program, + bool EnableParallelGraphExecution(const ir::Graph &graph, const ExecutionStrategy &exec_strategy, const BuildStrategy &build_strategy) const; diff --git a/paddle/fluid/framework/python_headers.h b/paddle/fluid/framework/python_headers.h index 422af19a13683dc9ae6377cac1b1ab2c2ac8f96b..8f9e3fad57f7bb87e78e334e741be23751417a78 100644 --- a/paddle/fluid/framework/python_headers.h +++ b/paddle/fluid/framework/python_headers.h @@ -24,3 +24,11 @@ limitations under the License. */ #pragma pop_macro("_XOPEN_SOURCE") #pragma pop_macro("_POSIX_C_SOURCE") + +#if !defined(PYBIND11_HIDDEN) +#ifdef _WIN32 +#define PYBIND11_HIDDEN __declspec(dllexport) +#else +#define PYBIND11_HIDDEN __attribute__((visibility("hidden"))) +#endif +#endif diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h index 40606d9b06baf4dbebf87f3c02580e49ae6e2a70..88f5b757a8111f6a7e269ff71054dab425c0de01 100644 --- a/paddle/fluid/framework/tensor.h +++ b/paddle/fluid/framework/tensor.h @@ -27,6 +27,10 @@ limitations under the License. */ #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" +#ifdef PADDLE_WITH_MKLDNN +#include "paddle/fluid/platform/mkldnn_utils.h" +#endif + namespace paddle { namespace framework { @@ -37,10 +41,34 @@ class Tensor { #ifdef PADDLE_WITH_MKLDNN public: - inline mkldnn::memory::format format() const { return format_; } + // TODO(jczaja): This is depracted and will be removed + inline mkldnn::memory::format format() const { + if (layout_ == DataLayout::kMKLDNN) { + return static_cast(mem_pd_.desc().data.format); + } else { + return mkldnn::memory::format::format_undef; + } + } - inline void set_format(const mkldnn::memory::format format) { - format_ = format; + // TODO(jczaja): This is depracted and will be removed + inline void set_format( + const mkldnn::memory::format fmt, + mkldnn::memory::data_type data_type = mkldnn::memory::f32) { + mem_pd_ = paddle::platform::create_prim_desc_from_format( + paddle::framework::vectorize2int(dims()), fmt, data_type); + layout_ = DataLayout::kMKLDNN; + } + + inline mkldnn::memory::primitive_desc get_mkldnn_prim_desc() const { + return mem_pd_; + } + + inline void set_mkldnn_prim_desc( + const mkldnn::memory::primitive_desc& mem_pd) { + // Internally MKL-DNN is just copying (increasing reference counter) + // to shared_ptr. So asignment should be quite cheap + mem_pd_ = mem_pd; + layout_ = DataLayout::kMKLDNN; } protected: @@ -48,12 +76,9 @@ class Tensor { * @brief the detail format of memory block which have layout as kMKLDNN * * @note MKLDNN lib support various memory format like nchw, nhwc, nChw8C, - * nChw16c, etc. For a MKLDNN memory block, layout will be set as - * DataLayout::kMKLDNN meanwhile detail memory format will be kept in - * this field. + * nChw16c, etc. For a MKLDNN memory block, we store memory descriptor */ - - mkldnn::memory::format format_ = mkldnn::memory::format::format_undef; + mutable mkldnn::memory::primitive_desc mem_pd_; #endif public: diff --git a/paddle/fluid/framework/tensor_util.cc b/paddle/fluid/framework/tensor_util.cc index 85d15c5d3faa5a3d021b12396f9f8ea7735f9148..89166bfd15f26e066d32a7191217a9b9a8977bda 100644 --- a/paddle/fluid/framework/tensor_util.cc +++ b/paddle/fluid/framework/tensor_util.cc @@ -14,6 +14,8 @@ #include "paddle/fluid/framework/tensor_util.h" #include #include +#include +#include #include #include "paddle/fluid/framework/data_type.h" diff --git a/paddle/fluid/framework/var_type_traits.h b/paddle/fluid/framework/var_type_traits.h index 733542e4972b16a71f9e76c3076b424b7a901066..fa77b96a7bdfa28ed982db022e8e5ecaef0b443c 100644 --- a/paddle/fluid/framework/var_type_traits.h +++ b/paddle/fluid/framework/var_type_traits.h @@ -50,8 +50,6 @@ class Scope; } // namespace framework namespace operators { -template -class AlgorithmsCache; class CudnnRNNCache; @@ -144,9 +142,6 @@ using VarTypeRegistry = detail::VarTypeRegistryImpl< #ifndef _WIN32 ncclUniqueId, platform::Communicator, #endif - operators::AlgorithmsCache, - operators::AlgorithmsCache, - operators::AlgorithmsCache, operators::CudnnRNNCache, #endif int, float>; diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index 47488d4dea79f285769f29c93f7888a7f783f070..012dfc1c7f66027bc5375794e0d70ed78e70e781 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -18,6 +18,7 @@ #include #include #include +#include #include #include "paddle/fluid/framework/lod_tensor.h" @@ -139,6 +140,8 @@ class Autograd { } } } + + ready_op->InvokeBackwardHooks(); } } @@ -156,8 +159,10 @@ class Autograd { for (auto it : candidate->pre_ops_) { for (OpBase* pre_op : it.second) { if (!pre_op) continue; - VLOG(5) << "op dep " << candidate->op_desc_->Type() << " <---- " - << it.first << " <---- " << pre_op->op_desc_->Type(); + VLOG(5) << "op dep " << candidate->op_desc_->Type() << " trace id " + << candidate->trace_id_ << " <---- " << it.first << " <---- " + << pre_op->op_desc_->Type() << " trace id " + << pre_op->trace_id_; if (visited.find(pre_op) == visited.end()) { visited.insert(pre_op); queue.push_back(pre_op); @@ -207,10 +212,11 @@ framework::LoDTensor& VarBase::GradValue() { std::map> OpBase::ApplyGrad() { if (grad_op_descs_.empty() && backward_id_ <= 0) { - LOG(WARNING) << "op with no grad: " << op_desc_->Type(); + VLOG(3) << "op with no grad: " << op_desc_->Type(); return {}; } + VLOG(3) << "apply op grad: " << op_desc_->Type(); std::vector grad_outputs; if (backward_id_ > 0) { VLOG(3) << "py_layer_grad"; @@ -249,7 +255,8 @@ std::map> OpBase::ApplyGrad() { framework::Scope scope; PreparedOp p = PreparedOp::Prepare(ctx, *op_kernel, place_); p.op.RuntimeInferShape(scope, place_, ctx); - p.func(framework::ExecutionContext(p.op, scope, *p.dev_ctx, p.ctx)); + p.func( + framework::ExecutionContext(p.op, scope, *p.dev_ctx, p.ctx, nullptr)); } } @@ -271,6 +278,22 @@ std::map> OpBase::ApplyGrad() { return input_vars_; } +void OpBase::InvokeBackwardHooks() { + VLOG(3) << "call backward hooks, hooks num: " << backward_hooks_.size(); + + // call backward hooks + for (py::object& callable : backward_hooks_) { + callable(this); + } +} + +void OpBase::RegisterBackwardHooks(const py::object& callable) { + VLOG(3) << "Register backward hooks " << trace_id_; + + // TODO(minqiyang): check the callable format + backward_hooks_.push_back(callable); +} + void VarBase::RunBackward() { if (!pre_op_) return; diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h index 78205486c5534ac0c61cc6d545bdafa4dfc95695..7a9f33dc1e6cbc0c3ec1e649906fb0a8de047189 100644 --- a/paddle/fluid/imperative/layer.h +++ b/paddle/fluid/imperative/layer.h @@ -44,8 +44,13 @@ class PreparedOp { PreparedOp(const framework::OperatorBase& op, const framework::RuntimeContext& ctx, framework::OperatorWithKernel::OpKernelFunc func, - platform::DeviceContext* dev_ctx) - : op(op), ctx(ctx), func(func), dev_ctx(dev_ctx) {} + platform::DeviceContext* dev_ctx, + std::vector* kernel_configs) + : op(op), + ctx(ctx), + func(func), + dev_ctx(dev_ctx), + kernel_configs(kernel_configs) {} static PreparedOp Prepare(const framework::RuntimeContext& ctx, const framework::OperatorWithKernel& op, @@ -64,8 +69,9 @@ class PreparedOp { framework::OperatorWithKernel::OpKernelMap& kernels = kernels_iter->second; - auto expected_kernel_key = op.GetExpectedKernelType( - framework::ExecutionContext(op, framework::Scope(), *dev_ctx, ctx)); + auto expected_kernel_key = + op.GetExpectedKernelType(framework::ExecutionContext( + op, framework::Scope(), *dev_ctx, ctx, nullptr)); VLOG(3) << "expected_kernel_key:" << expected_kernel_key; auto kernel_iter = kernels.find(expected_kernel_key); @@ -83,7 +89,9 @@ class PreparedOp { PADDLE_THROW("op %s does not have kernel for %s", op.Type(), KernelTypeToString(expected_kernel_key)); } - return PreparedOp(op, ctx, kernel_iter->second, dev_ctx); + std::vector* kernel_configs = + op.GetKernelConfig(expected_kernel_key); + return PreparedOp(op, ctx, kernel_iter->second, dev_ctx, kernel_configs); } inline platform::DeviceContext* GetDeviceContext() const { return dev_ctx; } @@ -92,6 +100,7 @@ class PreparedOp { const framework::RuntimeContext& ctx; framework::OperatorWithKernel::OpKernelFunc func; platform::DeviceContext* dev_ctx; + std::vector* kernel_configs; }; class OpBase; @@ -105,41 +114,61 @@ class VarBase { public: VarBase() : VarBase(new framework::Variable(), new VarBase(true)) {} - // Owns `var` and `grad` + explicit VarBase(bool stop_gradient) + : VarBase(new framework::Variable(), + stop_gradient ? nullptr : new VarBase(true), stop_gradient) {} + VarBase(framework::Variable* var, VarBase* grad) - : var_desc_(nullptr), + : VarBase(var, grad, false) {} + + private: + VarBase(framework::Variable* var, VarBase* grad, bool stop_gradient) + : name_(), + var_desc_(nullptr), var_(var), grads_(grad), - stop_gradient_(false), - pre_op_(nullptr), - pre_op_out_idx_(-1) {} - - explicit VarBase(bool stop_gradient) - : var_desc_(nullptr), - var_(new framework::Variable()), - grads_(stop_gradient ? nullptr : new VarBase(true)), + block_(nullptr), + persistable_(false), stop_gradient_(stop_gradient), pre_op_(nullptr), + pre_op_out_name_(), pre_op_out_idx_(-1) {} + public: virtual ~VarBase() { + // TODO(minqiyang): remove var desc from block desc if (var_) { delete var_; + var_ = nullptr; } if (grads_) { delete grads_; + grads_ = nullptr; } + + pre_op_ = nullptr; + pre_op_out_idx_ = -1; } - OpBase* PreOp() const { return pre_op_; } - int PreOpOutIdx() const { return pre_op_out_idx_; } + inline OpBase* PreOp() const { return pre_op_; } + inline int PreOpOutIdx() const { return pre_op_out_idx_; } - void SetStopGradient(bool stop_gradient) { stop_gradient_ = stop_gradient; } - bool IsStopGradient() const { return stop_gradient_; } + inline void SetStopGradient(bool stop_gradient) { + stop_gradient_ = stop_gradient; + } + inline bool IsStopGradient() const { return stop_gradient_; } void RunBackward(); + inline void ResetPreOp(OpBase* op) { + if (op == pre_op_) { + // clear pre_op info when op equals to var's pre_op + pre_op_ = nullptr; + pre_op_out_idx_ = -1; + } + } + void TrackPreOp(OpBase* pre_op, const std::string& pre_op_out_name, int pre_op_out_idx, bool pre_op_stop_gradient) { pre_op_ = pre_op; @@ -173,11 +202,15 @@ class VarBase { return string::Sprintf("%s@IGrad", var_desc_->Name()); } + std::string name_; framework::VarDesc* var_desc_; framework::Variable* var_; VarBase* grads_; + framework::BlockDesc* block_; + bool persistable_; + private: bool stop_gradient_; OpBase* pre_op_; @@ -188,15 +221,27 @@ class VarBase { /* The wrapper for OpDesc which holds a OpDesc and a OpDesc of its * gradient. This object should be managed totally by Python intepreter. */ -class OpBase { +class PYBIND11_HIDDEN OpBase { public: OpBase() : op_desc_(nullptr), forward_id_(-1), backward_id_(-1), - place_(platform::CPUPlace()) {} + trace_id_(-1), + place_(platform::CPUPlace()), + backward_hooks_() {} virtual ~OpBase() { + // TODO(minqiyang): remove op_desc from block_desc in tracer + // + // reset all output vars' pre op + for (auto iter : output_vars_) { + for (VarBase* var : iter.second) { + var->ResetPreOp(this); + } + } + + // release resource for (framework::OpDesc* desc : grad_op_descs_) { delete desc; } @@ -204,6 +249,10 @@ class OpBase { std::map> ApplyGrad(); + void RegisterBackwardHooks(const py::object& callable); + + void InvokeBackwardHooks(); + // One of `op_desc_` or `forward_id_` is set, not both. // For pure python PyLayer, use `forward_id_`, otherwise, use op_desc_. framework::OpDesc* op_desc_; @@ -214,6 +263,7 @@ class OpBase { // Note: each fwd op corresponds to a vector of bwd ops. std::vector grad_op_descs_; int backward_id_; + int trace_id_; platform::Place place_; @@ -228,6 +278,8 @@ class OpBase { std::vector grad_output_vars_; framework::BlockDesc* block_; + + std::vector backward_hooks_; }; class Layer { diff --git a/paddle/fluid/imperative/tracer.cc b/paddle/fluid/imperative/tracer.cc index bc39d11ba00a6a7c386162a1f9201c6f992c8692..0cb1676372fdd35a762e897d269550f2d1e1ac36 100644 --- a/paddle/fluid/imperative/tracer.cc +++ b/paddle/fluid/imperative/tracer.cc @@ -14,13 +14,32 @@ #include "paddle/fluid/imperative/tracer.h" +#include +#include +#include +#include + #include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/enforce.h" +#ifdef WITH_GPERFTOOLS +#include "gperftools/profiler.h" +#endif + +DEFINE_string( + tracer_profile_fname, "", + "Profiler filename for imperative tracer, which generated by gperftools." + "Only valid when compiled `WITH_PROFILER=ON`. Empty if disable."); + namespace paddle { namespace imperative { +static std::once_flag gTracerProfileOnce; +#ifdef WITH_GPERFTOOLS +static bool gTracerProfilerStarted = false; +#endif + void CreateGradOp(const framework::OpDesc& op_desc, const std::unordered_set& no_grad_set, const std::vector& grad_sub_block, @@ -66,16 +85,39 @@ platform::Place GetExpectedPlace(platform::Place place, VarBasePtrMap inputs) { return result; } -void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, - const VarBasePtrMap& outputs, framework::BlockDesc* block, - const platform::Place expected_place, - const bool stop_gradient) { +Tracer::Tracer(framework::BlockDesc* root_block) : root_block_(root_block) { + if (!FLAGS_tracer_profile_fname.empty()) { + std::call_once(gTracerProfileOnce, [] { +#ifdef WITH_GPERFTOOLS + ProfilerStart(FLAGS_tracer_profile_fname.c_str()); + gTracerProfilerStarted = true; +#else + LOG(WARNING) << "Paddle is not compiled with gperftools. " + "FLAGS_tracer_profile_fname will be ignored"; +#endif + }); + } +} + +std::set Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, + const VarBasePtrMap& outputs, + framework::BlockDesc* block, + const platform::Place expected_place, + const bool stop_gradient) { +#ifdef WITH_GPERFTOOLS + if (gTracerProfilerStarted) { + ProfilerFlush(); + } +#endif + std::map vars; framework::OpDesc* op_desc = op->op_desc_; - VLOG(3) << "tracer tracing " << op_desc->Type(); + VLOG(3) << "tracer tracing " << op_desc->Type() << " trace id " + << op->trace_id_; op_desc->InferShape(*block); op_desc->InferVarType(block); + std::unique_ptr op_base = framework::OpRegistry::CreateOp(*op_desc); @@ -92,14 +134,16 @@ void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, invars.emplace_back(inp->var_); vars[inp->var_desc_->Name()] = inp; - if (inp->PreOp()) { + if (inp->PreOp() && !inp->IsStopGradient()) { op->pre_ops_[it.first].push_back(inp->PreOp()); op->pre_ops_out_idx_[it.first].push_back(inp->PreOpOutIdx()); + VLOG(3) << "add pre op " << inp->PreOp()->op_desc_->Type(); } else { op->pre_ops_[it.first].push_back(nullptr); } VLOG(3) << "input vname " << inp->var_desc_->Name() << " " - << inp->var_->IsInitialized(); + << inp->var_->IsInitialized() << " stop_gradient " + << inp->IsStopGradient(); } } @@ -138,8 +182,11 @@ void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, op->place_ = GetExpectedPlace(expected_place, inputs); PreparedOp prepared_op = PreparedOp::Prepare(ctx, *op_kernel, op->place_); prepared_op.op.RuntimeInferShape(scope, op->place_, ctx); - prepared_op.func(framework::ExecutionContext( - prepared_op.op, scope, *prepared_op.dev_ctx, prepared_op.ctx)); + prepared_op.func( + framework::ExecutionContext(prepared_op.op, scope, *prepared_op.dev_ctx, + prepared_op.ctx, prepared_op.kernel_configs)); + + std::set vars_saved_for_backward; if (!stop_gradient) { std::unique_ptr> grad_to_var( @@ -148,6 +195,7 @@ void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, op->grad_input_vars_.resize(op->grad_op_descs_.size()); op->grad_output_vars_.resize(op->grad_op_descs_.size()); + for (size_t i = 0; i < op->grad_op_descs_.size(); ++i) { framework::OpDesc* grad_op_desc = op->grad_op_descs_[i]; for (auto it : grad_op_desc->Inputs()) { @@ -169,6 +217,8 @@ void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, // Douts. grad_in_vars.push_back(var->grads_->var_); } + + vars_saved_for_backward.insert(it.first); } } @@ -193,6 +243,7 @@ void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, } op->block_ = block; + return vars_saved_for_backward; } std::vector Tracer::PyTrace(OpBase* op, @@ -202,7 +253,7 @@ std::vector Tracer::PyTrace(OpBase* op, op->input_vars_[PyLayer::kFwdInp] = inputs; op->output_vars_[PyLayer::kFwdOut] = PyLayer::Apply(op->forward_id_, inputs); for (VarBase* inp : inputs) { - if (inp->PreOp()) { + if (inp->PreOp() && !inp->IsStopGradient()) { op->pre_ops_[PyLayer::kFwdInp].push_back(inp->PreOp()); op->pre_ops_out_idx_[PyLayer::kFwdInp].push_back(inp->PreOpOutIdx()); } else { diff --git a/paddle/fluid/imperative/tracer.h b/paddle/fluid/imperative/tracer.h index 690838215581b09ff35a0ea13f30655b77e6e187..8a0267c37f7c98a172fe0fa573955dc420952c0a 100644 --- a/paddle/fluid/imperative/tracer.h +++ b/paddle/fluid/imperative/tracer.h @@ -15,6 +15,7 @@ #pragma once #include +#include #include #include @@ -39,14 +40,15 @@ platform::Place GetExpectedPlace(platform::Place place, VarBasePtrMap inputs); class Tracer { public: - explicit Tracer(framework::BlockDesc* root_block) : root_block_(root_block) {} + explicit Tracer(framework::BlockDesc* root_block); virtual ~Tracer() {} - void Trace(OpBase* op, const VarBasePtrMap& inputs, - const VarBasePtrMap& outputs, framework::BlockDesc* block, - const platform::Place expected_place, - const bool stop_gradient = false); + std::set Trace(OpBase* op, const VarBasePtrMap& inputs, + const VarBasePtrMap& outputs, + framework::BlockDesc* block, + const platform::Place expected_place, + const bool stop_gradient = false); std::vector PyTrace(OpBase* op, const std::vector& inputs, bool stop_gradient = false); diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index 157862016e3556902f6507e02417624363ed1029..762640d6d1ce12dff511fc7149e872efa834036c 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -16,6 +16,7 @@ add_subdirectory(utils) if (TENSORRT_FOUND) add_subdirectory(tensorrt) endif() +# add_subdirectory(anakin) get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES) get_property(cuda_modules GLOBAL PROPERTY CUDA_MODULES) diff --git a/paddle/fluid/inference/anakin/CMakeLists.txt b/paddle/fluid/inference/anakin/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..b418af62f8cae4513bcca24f057d1fe100bbea25 --- /dev/null +++ b/paddle/fluid/inference/anakin/CMakeLists.txt @@ -0,0 +1,4 @@ +cc_library(anakin_engine SRCS engine.cc) +target_link_libraries(anakin_engine anakin anakin_saber_common) +cc_test(test_anakin_engine SRCS test_anakin_engine.cc DEPS anakin_engine) +add_subdirectory(convert) diff --git a/paddle/fluid/inference/anakin/convert/CMakeLists.txt b/paddle/fluid/inference/anakin/convert/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..f5bfee861f14877b5a67bc48aeb14b8213a27370 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/CMakeLists.txt @@ -0,0 +1,2 @@ +cc_library(anakin_op_converter SRCS fc.cc registrar.cc DEPS anakin_engine framework_proto scope) +cc_test(test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op) diff --git a/paddle/fluid/inference/anakin/convert/fc.cc b/paddle/fluid/inference/anakin/convert/fc.cc new file mode 100644 index 0000000000000000000000000000000000000000..33a5aff1de2851ad55c2df83cc48ba86f8ded754 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/fc.cc @@ -0,0 +1,73 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/anakin/convert/fc.h" +#include + +using anakin::graph::GraphGlobalMem; +using anakin::AK_FLOAT; +using anakin::Precision; +using anakin::saber::NV; +using anakin::saber::X86; +using anakin::saber::Shape; +using anakin::PBlock; +using anakin::PTuple; + +namespace paddle { +namespace inference { +namespace anakin { + +void FcOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, bool test_mode) { + framework::OpDesc op_desc(op, nullptr); + PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); + PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); + PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); + + auto x_name = op_desc.Input("X").front(); + auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); + auto *y_v = scope.FindVar(op_desc.Input("Y").front()); + PADDLE_ENFORCE_NOT_NULL(y_v); + auto *y_t = y_v->GetMutable(); + + auto input_name = op_desc.Input("X").front(); + auto output_name = op_desc.Output("Out").front(); + + auto weight_shape = framework::vectorize2int(y_t->dims()); + engine_->AddOp(op_name, "Dense", {input_name}, {output_name}); + engine_->AddOpAttr(op_name, "bias_term", false); + engine_->AddOpAttr(op_name, "axis", 1); + int out_dim = weight_shape[1]; + engine_->AddOpAttr(op_name, "out_dim", out_dim); + + weight_shape.push_back(1); + weight_shape.push_back(1); + Shape anakin_shape(weight_shape); + + framework::LoDTensor weight_tensor; + weight_tensor.Resize(y_t->dims()); + TensorCopySync((*y_t), platform::CPUPlace(), &weight_tensor); + + auto *weight1 = + GraphGlobalMem::Global().template new_block(anakin_shape); + float *cpu_data = static_cast(weight1->h_tensor().mutable_data()); + std::copy_n(weight_tensor.data(), weight_tensor.numel(), cpu_data); + weight1->d_tensor().set_shape(anakin_shape); + weight1->d_tensor().copy_from(weight1->h_tensor()); + engine_->AddOpAttr(op_name, "weight_1", *weight1); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/convert/fc.h b/paddle/fluid/inference/anakin/convert/fc.h new file mode 100644 index 0000000000000000000000000000000000000000..b670486f12b36043a01ceb002da8756901ed01ce --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/fc.h @@ -0,0 +1,38 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/inference/anakin/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace anakin { + +class FcOpConverter : public AnakinOpConverter { + public: + FcOpConverter() = default; + + virtual void operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) override; + virtual ~FcOpConverter() {} + + private: +}; + +static Registrar register_fc_op_converter("fc"); +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/convert/op_converter.h b/paddle/fluid/inference/anakin/convert/op_converter.h new file mode 100644 index 0000000000000000000000000000000000000000..b9a221079dcec78fc86ebed7dfac0c59ec0f8540 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/op_converter.h @@ -0,0 +1,112 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include "framework/core/types.h" +#include "paddle/fluid/framework/block_desc.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/inference/anakin/convert/registrar.h" +#include "paddle/fluid/inference/anakin/engine.h" +#include "paddle/fluid/inference/utils/singleton.h" +#include "saber/saber_types.h" + +namespace paddle { +namespace inference { +namespace anakin { + +using AnakinNvEngine = + AnakinEngine<::anakin::saber::NV, ::anakin::Precision::FP32>; + +class AnakinOpConverter { + public: + AnakinOpConverter() = default; + + virtual void operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, bool test_mode) {} + void ConvertOp(const framework::proto::OpDesc &op, + const std::unordered_set ¶meters, + const framework::Scope &scope, AnakinNvEngine *engine, + bool test_mode = false) { + framework::OpDesc op_desc(op, nullptr); + std::string op_type = op_desc.Type(); + std::shared_ptr it{nullptr}; + + if (op_type == "mul") { + PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL); + std::string Y = op_desc.Input("Y")[0]; + std::cout << Y << parameters.count(Y) << std::endl; + if (parameters.count(Y)) { + it = OpRegister::instance()->Get("fc"); + } + } + + if (!it) { + it = OpRegister::instance()->Get(op_type); + } + PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", op_type); + it->SetEngine(engine); + (*it)(op, scope, test_mode); + } + + void ConvertBlock(const framework::proto::BlockDesc &block, + const std::unordered_set ¶meters, + const framework::Scope &scope, AnakinNvEngine *engine) { + std::unique_lock lock(mutex_); + for (auto i = 0; i < block.ops_size(); i++) { + auto &op = block.ops(i); + ConvertOp(op, parameters, scope, engine); + } + } + void SetEngine(AnakinNvEngine *engine) { engine_ = engine; } + virtual ~AnakinOpConverter() {} + + protected: + bool test_mode_; + AnakinNvEngine *engine_{nullptr}; + + private: + std::unordered_map converters_; + framework::Scope *scope_{nullptr}; + std::mutex mutex_; +}; + +} // namespace anakin +} // namespace inference +} // namespace paddle + +#define REGISTER_ANAKIN_OP_CONVERTER(op_type__, Converter__) \ + struct anakin_##op_type__##_converter \ + : public ::paddle::framework::Registrar { \ + anakin_##op_type__##_converter() { \ + ::paddle::inference:: \ + Registry::Register< \ + ::paddle::inference::anakin::Converter__>(#op_type__); \ + } \ + }; \ + anakin_##op_type__##_converter anakin_##op_type__##_converter__; \ + int TouchConverterRegister_anakin_##op_type__() { \ + anakin_##op_type__##_converter__.Touch(); \ + return 0; \ + } + +#define USE_ANAKIN_CONVERTER(op_type__) \ + extern int TouchConverterRegister_anakin_##op_type__(); \ + static int use_op_converter_anakin_##op_type__ __attribute__((unused)) = \ + TouchConverterRegister_anakin_##op_type__(); diff --git a/paddle/fluid/inference/anakin/convert/registrar.cc b/paddle/fluid/inference/anakin/convert/registrar.cc new file mode 100644 index 0000000000000000000000000000000000000000..701ebdb2d43cf524330f946ac56d32dfa884f42a --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/registrar.cc @@ -0,0 +1,34 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/anakin/convert/registrar.h" + +namespace paddle { +namespace inference { +namespace anakin { + +std::shared_ptr OpRegister::Get(const std::string &name) { + auto it = registry_.find(name); + if (it == registry_.end()) return nullptr; + return it->second(); +} + +OpRegister *OpRegister::instance() { + static OpRegister factory; + return &factory; +} + +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/convert/registrar.h b/paddle/fluid/inference/anakin/convert/registrar.h new file mode 100644 index 0000000000000000000000000000000000000000..afce66ca084143ae203af9a60089aa2f5d18a725 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/registrar.h @@ -0,0 +1,58 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include + +namespace paddle { +namespace inference { +namespace anakin { + +class AnakinOpConverter; + +class OpRegister { + public: + OpRegister() = default; + std::shared_ptr Get(const std::string &name); + static OpRegister *instance(); + void OpRegisterFn(const std::string &name, + std::function()> fn) { + registry_[name] = fn; + } + + private: + using RegisterFnType = std::function()>; + std::map()>> + registry_; +}; + +template +class Registrar { + public: + Registrar(const std::string &name, Args... args) { + std::shared_ptr converter = + std::make_shared(std::move(args)...); + OpRegister::instance()->OpRegisterFn(name, + [converter]() { return converter; }); + } +}; + +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/convert/test_fc_op.cc b/paddle/fluid/inference/anakin/convert/test_fc_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..7b8ceefe28873f0ffb9cedbb04b832ba029b7de4 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/test_fc_op.cc @@ -0,0 +1,54 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/inference/anakin/convert/fc.h" +#include "paddle/fluid/inference/anakin/convert/op_converter.h" +#include "paddle/fluid/inference/anakin/convert/ut_helper.h" + +namespace paddle { +namespace inference { +namespace anakin { + +TEST(fc_op, test) { + auto fc_converter = OpRegister::instance()->Get("fc"); + ASSERT_TRUE(fc_converter != nullptr); + // Registrar register_fc("fc"); + // auto fc = std::make_shared(); + + std::unordered_set parameters({"mul_y"}); + framework::Scope scope; + AnakinConvertValidation validator(parameters, scope); + validator.DeclInputVar("mul_x", {1, 1, 1, 1}); + validator.DeclParamVar("mul_y", {1, 2}); + validator.DeclOutputVar("mul_out", {1, 1, 1, 2}); + + // Prepare Op description + framework::OpDesc desc; + desc.SetType("mul"); + desc.SetInput("X", {"mul_x"}); + desc.SetInput("Y", {"mul_y"}); + desc.SetOutput("Out", {"mul_out"}); + int num_flatten_dims = 3; + desc.SetAttr("x_num_col_dims", num_flatten_dims); + validator.SetOp(*desc.Proto()); + + validator.Execute(10); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +USE_OP(mul); diff --git a/paddle/fluid/inference/anakin/convert/ut_helper.h b/paddle/fluid/inference/anakin/convert/ut_helper.h new file mode 100644 index 0000000000000000000000000000000000000000..38d8e596a738ac98c9f9870473f72dcc72b0e7aa --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/ut_helper.h @@ -0,0 +1,202 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/tensor_util.h" +#include "paddle/fluid/inference/anakin/engine.h" +#include "paddle/fluid/inference/analysis/helper.h" +#include "paddle/fluid/inference/utils/singleton.h" +#include "paddle/fluid/platform/enforce.h" + +using anakin::graph::GraphGlobalMem; +using anakin::AK_FLOAT; +using anakin::Precision; +using anakin::saber::NV; +using anakin::saber::X86; +using anakin::saber::Shape; +using anakin::PBlock; +using anakin::PTuple; + +namespace paddle { +namespace inference { +namespace anakin { + +/* + * Get a random float value between [low, high] + */ +float random(float low, float high) { + static std::random_device rd; + static std::mt19937 mt(rd()); + std::uniform_real_distribution dist(low, high); + return dist(mt); +} + +void RandomizeTensor(framework::LoDTensor* tensor, const platform::Place& place, + const platform::DeviceContext& ctx) { + auto dims = tensor->dims(); + size_t num_elements = analysis::AccuDims(dims, dims.size()); + PADDLE_ENFORCE_GT(num_elements, 0); + + platform::CPUPlace cpu_place; + framework::LoDTensor temp_tensor; + temp_tensor.Resize(dims); + auto* temp_data = temp_tensor.mutable_data(cpu_place); + + for (size_t i = 0; i < num_elements; i++) { + *(temp_data + i) = random(0., 1.); + } + + TensorCopySync(temp_tensor, place, tensor); +} + +/* + * Help to validate the correctness between Fluid Op and the corresponding + * anakin + * layer. + */ +class AnakinConvertValidation { + using AnakinNvEngineT = AnakinEngine; + + public: + AnakinConvertValidation() = delete; + + AnakinConvertValidation(const std::unordered_set& parameters, + const framework::Scope& scope) + : parameters_(parameters), scope_(scope), place_(0) { + PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_), 0); + engine_.reset(new AnakinEngine(true)); + } + + // Declare a Variable as input with random initialization. + void DeclInputVar(const std::string& name, + const std::vector tensor_dims) { + DeclVar(name, tensor_dims); + // should decalre anakin input here. + } + + void DeclParamVar(const std::string& name, const std::vector dim_vec) { + DeclVar(name, dim_vec); + } + + void DeclOutputVar(const std::string& name, const std::vector dim_vec) { + DeclVar(name, dim_vec); + // should declare anakin output here. + } + + void DeclVar(const std::string& name, const std::vector dim_vec) { + platform::CUDADeviceContext ctx(place_); + auto* x = scope_.Var(name); + auto* x_tensor = x->GetMutable(); + x_tensor->Resize(framework::make_ddim(dim_vec)); + RandomizeTensor(x_tensor, place_, ctx); + } + + void SetOp(const framework::proto::OpDesc& desc) { + op_ = framework::OpRegistry::CreateOp(desc); + op_desc_.reset(new framework::OpDesc(desc, nullptr)); + // should init anakin engine here. + + Singleton::Global().ConvertOp( + desc, parameters_, scope_, engine_.get(), true /*test_mode*/); + engine_->Freeze(); + for (const auto& input : op_desc_->InputArgumentNames()) { + if (parameters_.count(input)) continue; + auto& t = inference::analysis::GetFromScope(scope_, + input); + auto t_shape = framework::vectorize2int(t.dims()); + engine_->SetInputShape(input, t_shape); + } + engine_->Optimize(); + engine_->InitGraph(); + } + + // We use the set 'neglected_output' here, because some Ops like batch norm, + // the outputs specified in the op des are only used during training, + // so we should neglect those output during inference. + void Execute(int batch_size, + std::unordered_set neglected_output = {}) { + // Execute Fluid Op + platform::CUDADeviceContext ctx(place_); + op_->Run(scope_, place_); + + // std::vector input_vector; + // std::vector output_vector; + std::map inputs; + for (const auto& input : op_desc_->InputArgumentNames()) { + if (parameters_.count(input)) continue; + auto* var = scope_.FindVar(input); + auto tensor = var->GetMutable(); + inputs.insert({input, tensor}); + } + + std::map outputs; + std::vector> fluid_outputs; + for (const auto& output : op_desc_->OutputArgumentNames()) { + if (neglected_output.count(output)) continue; + std::vector fluid_out; + auto* var = scope_.FindVar(output); + auto tensor = var->GetMutable(); + framework::TensorToVector(*tensor, ctx, &fluid_out); + fluid_outputs.push_back(fluid_out); + + // size_t fluid_out_size = fluid_out.size(); + /*for (size_t i = 0; i < fluid_out_size; i++) { + std::cout << fluid_out[i] << std::endl; + }*/ + outputs.insert({output, tensor}); + } + + engine_->Execute(inputs, outputs); + int i_output = 0; + for (const auto& output : op_desc_->OutputArgumentNames()) { + if (neglected_output.count(output)) continue; + std::vector anakin_out; + auto* var = scope_.FindVar(output); + auto tensor = var->GetMutable(); + framework::TensorToVector(*tensor, ctx, &anakin_out); + + size_t anakin_out_size = anakin_out.size(); + auto fluid_out = fluid_outputs[i_output++]; + for (size_t i = 0; i < anakin_out_size; i++) { + LOG(INFO) << "Output[" << i << "]: anakin[" << anakin_out[i] << "], " + << "fluid[" << fluid_out[i] << "]"; + } + } + } + + framework::Scope& scope() { return scope_; } + + private: + std::unique_ptr engine_{nullptr}; + cudaStream_t stream_; + std::unique_ptr op_; + std::unique_ptr op_desc_; + const std::unordered_set& parameters_; + framework::Scope& scope_; + platform::CUDAPlace place_; +}; + +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/engine.cc b/paddle/fluid/inference/anakin/engine.cc new file mode 100644 index 0000000000000000000000000000000000000000..6549991474f4834f0c3ef74c60d294cca6bebc91 --- /dev/null +++ b/paddle/fluid/inference/anakin/engine.cc @@ -0,0 +1,112 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/anakin/engine.h" +#include +#include +#include +#include +#include "paddle/fluid/framework/ddim.h" + +using anakin::Precision; +using anakin::OpRunType; +using paddle::framework::LoDTensor; +template +using AnakinNetT = anakin::Net; + +template +using AnakinGraphT = anakin::graph::Graph; + +namespace paddle { +namespace inference { +namespace anakin { + +template +AnakinEngine::AnakinEngine(bool need_summary) + : graph_(new AnakinGraphT()), + net_(new AnakinNetT(need_summary)) {} + +template +AnakinEngine::~AnakinEngine() {} + +template +void AnakinEngine::SetInputShape( + const std::string &name, std::vector shape) { + graph_->AddOpAttr<::anakin::PTuple>(name, "input_shape", + std::move(shape)); +} + +template +void AnakinEngine::InitGraph() { + net_->init(*graph_); +} + +template +void AnakinEngine::AddOp( + const std::string &name, const std::string &type, + const std::vector &inputs, + const std::vector &outputs) { + PADDLE_ENFORCE(graph_->AddOp(name, type, inputs, outputs), "Add operation."); +} + +template +void AnakinEngine::Execute( + const std::map &inputs, + const std::map &outputs) { + for (const auto &input : inputs) { + auto *tensor = input.second; + auto *data = tensor->data(); + auto shape = framework::vectorize2int(tensor->dims()); + ::anakin::saber::Shape anakin_shape(shape); + auto *anakin_input = net_->get_in(input.first); + ::anakin::saber::Tensor tmp_anakin_tensor(data, TargetT(), 0, + anakin_shape); + anakin_input->share_from(tmp_anakin_tensor); + } + + for (const auto &output : outputs) { + auto *tensor = output.second; + auto *data = tensor->data(); + auto shape = framework::vectorize2int(tensor->dims()); + ::anakin::saber::Shape anakin_shape(shape); + auto *anakin_output = net_->get_out(output.first); + ::anakin::saber::Tensor tmp_anakin_tensor(data, TargetT(), 0, + anakin_shape); + anakin_output->share_from(tmp_anakin_tensor); + } + net_->prediction(); +} + +template +void AnakinEngine::Freeze() { + PADDLE_ENFORCE(graph_->Freeze(), "Freeze anakin subgraph."); +} + +template +void AnakinEngine::Optimize() { + PADDLE_ENFORCE(graph_->Optimize(), "Graph optimization."); +} + +template +std::unique_ptr> +AnakinEngine::Clone() { + auto *engine = new AnakinEngine(); + engine->net_ = std::move(net_->Clone()); + return std::unique_ptr(engine); +} + +template class AnakinEngine<::anakin::saber::NV, ::anakin::Precision::FP32>; +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/engine.h b/paddle/fluid/inference/anakin/engine.h new file mode 100644 index 0000000000000000000000000000000000000000..d8f32f57be5aabb91ba720c6457a03f15083db43 --- /dev/null +++ b/paddle/fluid/inference/anakin/engine.h @@ -0,0 +1,80 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/inference/engine.h" +#include "paddle/fluid/inference/utils/singleton.h" + +#include "framework/core/net/net.h" +#include "framework/core/types.h" +#include "framework/graph/graph.h" +#include "saber/saber_types.h" + +namespace anakin { + +template +class Net; + +namespace graph { +template +class Graph; +} // namespace graph +} // namespace anakin + +namespace paddle { +namespace inference { +namespace anakin { + +template +class AnakinEngine { + public: + explicit AnakinEngine(bool need_summary = false); + ~AnakinEngine(); + void InitGraph(); + void SetInputShape(const std::string &name, std::vector shape); + void AddOp(const std::string &name, const std::string &type, + const std::vector &inputs, + const std::vector &outputs); + + template + void AddOpAttr(const std::string &op_name, const std::string &attr_name, + const T &attr_value) { + PADDLE_ENFORCE(graph_->AddOpAttr(op_name, attr_name, attr_value), + "Add operation's attribution."); + } + + std::unique_ptr Clone(); + void Freeze(); + void Optimize(); + void Execute(const std::map &inputs, + const std::map &outputs); + + private: + using NetT = ::anakin::Net; + using GraphT = ::anakin::graph::Graph; + std::unique_ptr graph_; + std::unique_ptr net_; +}; + +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/test_anakin_engine.cc b/paddle/fluid/inference/anakin/test_anakin_engine.cc new file mode 100644 index 0000000000000000000000000000000000000000..571294d3e22fb9489686bfcb2f3a64198099f970 --- /dev/null +++ b/paddle/fluid/inference/anakin/test_anakin_engine.cc @@ -0,0 +1,96 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include + +#include + +#include "framework/core/net/net.h" +#include "framework/graph/graph.h" +#include "framework/graph/graph_global_mem.h" +#include "paddle/fluid/inference/anakin/engine.h" + +using anakin::graph::GraphGlobalMem; +using anakin::AK_FLOAT; +using anakin::Precision; +using anakin::saber::NV; +using anakin::saber::X86; +using anakin::saber::Shape; +using anakin::PBlock; +using anakin::PTuple; +namespace paddle { +namespace inference { +namespace anakin { + +class TestAnakinEngine : public ::testing::Test { + protected: + void SetUp() override; + void TearDown() override {} + + protected: + using AnakinNvEngineT = AnakinEngine; + std::unique_ptr engine_{nullptr}; +}; + +void TestAnakinEngine::SetUp() { + engine_.reset(new AnakinEngine(true)); +} + +TEST_F(TestAnakinEngine, Execute) { + engine_->AddOp("op1", "Dense", {"x"}, {"y"}); + engine_->AddOpAttr("op1", "out_dim", 2); + engine_->AddOpAttr("op1", "bias_term", false); + engine_->AddOpAttr("op1", "axis", 1); + std::vector shape = {1, 1, 1, 2}; + Shape tmp_shape(shape); + // PBlock weight1(tmp_shape); + auto *weight1 = + GraphGlobalMem::Global().template new_block(tmp_shape); + // auto *weight1 = new PBlock(tmp_shape, AK_FLOAT); + + float *cpu_data = static_cast(weight1->h_tensor().mutable_data()); + cpu_data[0] = 2.; + weight1->d_tensor().set_shape(tmp_shape); + weight1->d_tensor().copy_from(weight1->h_tensor()); + engine_->AddOpAttr("op1", "weight_1", *weight1); + + engine_->Freeze(); + // PTuple input_shape = {1}; + // engine_->AddOpAttr("x", "input_shape", input_shape); + engine_->SetInputShape("x", {1, 1, 1, 1}); + engine_->Optimize(); + engine_->InitGraph(); + framework::LoDTensor x; + framework::LoDTensor y; + x.Resize({1, 1, 1, 1}); + y.Resize({1, 1, 1, 2}); + auto *x_data = x.mutable_data(platform::CUDAPlace()); + float x_data_cpu[] = {1.}; + cudaMemcpy(x_data, x_data_cpu, sizeof(float), cudaMemcpyHostToDevice); + + std::map inputs = {{"x", &x}}; + auto *y_data = y.mutable_data(platform::CUDAPlace()); + std::map outputs = {{"y", &y}}; + + engine_->Execute(inputs, outputs); + auto *y_data_gpu = y_data; + float y_data_cpu[2]; + cudaMemcpy(y_data_cpu, y_data_gpu, sizeof(float) * 2, cudaMemcpyDeviceToHost); + LOG(INFO) << "output value: " << y_data_cpu[0] << ", " << y_data_cpu[1]; +} + +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc b/paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc index a64f85ee9ac1a7bb8f0ed7bb8678166bbbcd5746..96befe7f8a5d16402338ac337daa96d714b4d310 100644 --- a/paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc +++ b/paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc @@ -460,77 +460,6 @@ inline bool CheckNodeIndegreeEquals(const Node &node, size_t n) { return node.inputs.size() == n; } -NodesTSIterator::NodesTSIterator(const std::vector &source) { - PADDLE_ENFORCE(!source.empty(), - "Start points of topological sorting should not be empty!"); - // CHECK all the inputs' in-degree is 0 - for (auto *node : source) { - PADDLE_ENFORCE(CheckNodeIndegreeEquals(*node, 0)); - } - - std::unordered_set visited; - std::unordered_set to_visit{source.begin(), source.end()}; - - std::vector inlink_visited; - while (!to_visit.empty()) { - std::vector queue(to_visit.begin(), to_visit.end()); - for (auto *p : queue) { - if (Agent(p).deleted()) { - visited.insert(p); - to_visit.erase(p); - } - - inlink_visited.clear(); - - std::copy_if(p->inputs.begin(), p->inputs.end(), - std::back_inserter(inlink_visited), - [&](Node *x) -> bool { return visited.count(x) != 0; }); - - if (inlink_visited.size() == p->inputs.size()) { - sorted_.push_back(p); - for (auto *_ : p->outputs) { - if (!visited.count(_)) { - to_visit.insert(_); - } - } - - to_visit.erase(p); - visited.insert(p); - } - } - } -} - -NodesTSIterator::NodesTSIterator(const NodesTSIterator &other) - : sorted_(other.sorted_), cursor_(other.cursor_) {} - -Node &NodesTSIterator::operator*() { - PADDLE_ENFORCE_LT(cursor_, sorted_.size()); - return *sorted_[cursor_]; -} - -NodesTSIterator &NodesTSIterator::operator++() { - if (++cursor_ >= sorted_.size()) { - sorted_.clear(); - cursor_ = 0; - } - return *this; -} -NodesTSIterator &NodesTSIterator::operator=(const NodesTSIterator &other) { - cursor_ = other.cursor_; - sorted_ = other.sorted_; - return *this; -} - -bool NodesTSIterator::operator==(const NodesTSIterator &other) { - return sorted_ == other.sorted_ && cursor_ == other.cursor_; -} - -Node *NodesTSIterator::operator->() { - PADDLE_ENFORCE_LT(cursor_, sorted_.size()); - return sorted_[cursor_]; -} - } // namespace analysis } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h b/paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h index ea88edd042aa9d46f66af1aa92f2cb273696c118..5d11c217b69f11d45c6fb6d552dc404fa8313daf 100644 --- a/paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h +++ b/paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h @@ -30,6 +30,7 @@ namespace inference { namespace analysis { using framework::ir::Graph; +using framework::ir::NodesTSIterator; const char kIsFunctionNode[] = "__is_function_node__"; const char kFunctionNodeSubGraph[] = "__function_node_sub_graph__"; @@ -132,32 +133,6 @@ struct Agent { framework::ir::Node *x_; }; -// Topological sorting iterator on nodes. -struct NodesTSIterator - : public std::iterator { - NodesTSIterator() = default; - explicit NodesTSIterator(const std::vector &source); - NodesTSIterator(NodesTSIterator &&other) - : sorted_(std::move(other.sorted_)), cursor_(other.cursor_) { - other.cursor_ = 0; - } - NodesTSIterator(const NodesTSIterator &other); - - framework::ir::Node &operator*(); - NodesTSIterator &operator++(); - // TODO(Superjomn) current implementation just compare the first - // element, need to compare the graph and all the elements in the queue and - // set. - NodesTSIterator &operator=(const NodesTSIterator &other); - bool operator==(const NodesTSIterator &other); - bool operator!=(const NodesTSIterator &other) { return !(*this == other); } - framework::ir::Node *operator->(); - - private: - std::vector sorted_; - size_t cursor_{0}; -}; - // The nodes those have no input will be treated as start points. static std::vector ExtractStartPoints(const Graph &g) { std::vector result; diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index e92273b4dd94f11e0e90c91fd82dafe42bf158f3..522ab495227e9b8c52b8d38db696fa9b785ba642 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -89,7 +89,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { CP_MEMBER(params_file_); CP_MEMBER(model_from_memory_); // the memory model reuses prog_file_ and // params_file_ fields. - // Gpu releated. + // Gpu related. CP_MEMBER(use_gpu_); CP_MEMBER(device_id_); CP_MEMBER(memory_pool_init_size_mb_); @@ -97,13 +97,13 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { CP_MEMBER(enable_memory_optim_); CP_MEMBER(static_memory_optim_); CP_MEMBER(static_memory_optim_force_update_); - // TensorRT releated. + // TensorRT related. CP_MEMBER(use_tensorrt_); CP_MEMBER(tensorrt_workspace_size_); CP_MEMBER(tensorrt_max_batchsize_); CP_MEMBER(tensorrt_min_subgraph_size_); CP_MEMBER(tensorrt_precision_mode_); - // MKLDNN releated. + // MKLDNN related. CP_MEMBER(use_mkldnn_); CP_MEMBER(mkldnn_enabled_op_types_); diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 712e010db4340bde55945f89c488ba8cc38a1926..467d4411376381df950bb582f9c73410284a5e2d 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -183,6 +183,9 @@ void AnalysisPredictor::SetMkldnnThreadID(int tid) { bool AnalysisPredictor::Run(const std::vector &inputs, std::vector *output_data, int batch_size) { + if (UNLIKELY(config_.cpu_math_library_num_threads() > 1)) { + paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); + } VLOG(3) << "Predictor::predict"; inference::Timer timer; timer.tic(); @@ -392,7 +395,7 @@ std::unique_ptr CreatePaddlePredictor< AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig &config) { VLOG(3) << "create AnalysisConfig"; if (config.use_gpu()) { - // 1. GPU memeroy + // 1. GPU memory PADDLE_ENFORCE_GT(config.memory_pool_init_size_mb(), 0.f); PADDLE_ENFORCE_GE(config.gpu_device_id(), 0, "Invalid device id %d", config.gpu_device_id()); @@ -726,7 +729,7 @@ bool AnalysisPredictor::need_collect_var_shapes_for_memory_optim() { return need; } -std::string AnalysisPredictor::GetSeriazlizedProgram() const { +std::string AnalysisPredictor::GetSerializedProgram() const { return inference_program_->Proto()->SerializeAsString(); } diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h index 014df4ee8b6d86232212736c43a9aff32ffee011..d5445c58e45ae64a8cfab03cb610e3677729338b 100644 --- a/paddle/fluid/inference/api/analysis_predictor.h +++ b/paddle/fluid/inference/api/analysis_predictor.h @@ -74,7 +74,7 @@ class AnalysisPredictor : public PaddlePredictor { void SetMkldnnThreadID(int tid); - std::string GetSeriazlizedProgram() const override; + std::string GetSerializedProgram() const override; protected: // For memory optimization. diff --git a/paddle/fluid/inference/api/analysis_predictor_tester.cc b/paddle/fluid/inference/api/analysis_predictor_tester.cc index 002ba90e40e69d565f5a54e374a3f0083b84273f..6696839b53fb21c274843afd86b5d8b5c2042c51 100644 --- a/paddle/fluid/inference/api/analysis_predictor_tester.cc +++ b/paddle/fluid/inference/api/analysis_predictor_tester.cc @@ -214,8 +214,8 @@ TEST(AnalysisPredictor, memory_optim) { { // The first predictor help to cache the memory optimize strategy. auto predictor = CreatePaddlePredictor(config); - LOG(INFO) << "serialized program: " << predictor->GetSeriazlizedProgram(); - ASSERT_FALSE(predictor->GetSeriazlizedProgram().empty()); + LOG(INFO) << "serialized program: " << predictor->GetSerializedProgram(); + ASSERT_FALSE(predictor->GetSerializedProgram().empty()); // Run several times to check the parameters are not reused by mistake. for (int i = 0; i < 5; i++) { diff --git a/paddle/fluid/inference/api/api.cc b/paddle/fluid/inference/api/api.cc index 6cd18277d63200f5bccf180a7ae3196b0ce126ff..f83537f064187e67a08c8bbce52707d1c824abeb 100644 --- a/paddle/fluid/inference/api/api.cc +++ b/paddle/fluid/inference/api/api.cc @@ -92,7 +92,7 @@ void PaddleBuf::Reset(void *data, size_t length) { void PaddleBuf::Free() { if (memory_owned_ && data_) { - PADDLE_ENFORCE_GT(length_, 0); + PADDLE_ENFORCE_GT(length_, 0UL); free(static_cast(data_)); data_ = nullptr; length_ = 0; diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc index e18bc02d92eb517fa20dc83811694b8ac80ae316..048286a843f0190a8139cb86eda4f3a3a40d89a1 100644 --- a/paddle/fluid/inference/api/api_impl.cc +++ b/paddle/fluid/inference/api/api_impl.cc @@ -131,6 +131,9 @@ NativePaddlePredictor::~NativePaddlePredictor() { bool NativePaddlePredictor::Run(const std::vector &inputs, std::vector *output_data, int batch_size) { + if (UNLIKELY(config_.cpu_math_library_num_threads() > 1)) { + paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); + } VLOG(3) << "Predictor::predict"; Timer timer; timer.tic(); @@ -290,7 +293,7 @@ std::unique_ptr CreatePaddlePredictor< NativeConfig, PaddleEngineKind::kNative>(const NativeConfig &config) { VLOG(3) << "create NativePaddlePredictor"; if (config.use_gpu) { - // 1. GPU memeroy + // 1. GPU memory PADDLE_ENFORCE_GE( config.fraction_of_gpu_memory, 0.f, "fraction_of_gpu_memory in the config should be set to range (0., 1.]"); diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index 47361b3279e14dd65a0e6e7f864e508ef1183045..c1c6227cdd8b2042f6765c7932327ecae246c260 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -212,12 +212,12 @@ struct AnalysisConfig { std::string prog_file_; std::string params_file_; - // GPU releated. + // GPU related. bool use_gpu_{false}; int device_id_{0}; uint64_t memory_pool_init_size_mb_{100}; // initial size is 100MB. - // TensorRT releated. + // TensorRT related. bool use_tensorrt_{false}; // For workspace_size, refer it from here: // https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#troubleshooting diff --git a/paddle/fluid/inference/api/paddle_api.h b/paddle/fluid/inference/api/paddle_api.h index 8ac8bc529183edc2f8f888ca7ba14611acaadc10..c9a45b4aa3b4037d3725622fc960848bc1ccfb2c 100644 --- a/paddle/fluid/inference/api/paddle_api.h +++ b/paddle/fluid/inference/api/paddle_api.h @@ -16,6 +16,12 @@ /*! \file paddle_api.h */ +/*! \mainpage Paddle Inference APIs + * \section intro_sec Introduction + * The Paddle inference library aims to offer an high performance inference SDK + * for Paddle users. + */ + #include #include #include @@ -34,26 +40,49 @@ enum PaddleDType { }; /** - *\brief Memory menager for PaddleTensor. + * \brief Memory manager for `PaddleTensor`. * - *The PaddleBuf holds a buffer for data input or output. The memory can be - *allocated by user or by PaddleBuf itself, but in any case, the PaddleBuf - *should be reused for better performance. + * The PaddleBuf holds a buffer for data input or output. The memory can be + * allocated by user or by PaddleBuf itself, but in any case, the PaddleBuf + * should be reused for better performance. * - *For user allocated memory, the following API can be used: - *- PaddleBuf(void* data, size_t length) to set an external memory by - *specifying - * the memory address and length. - *- Reset(void* data, size_t length) to reset the PaddleBuf with an external + * For user allocated memory, the following API can be used: + * - PaddleBuf(void* data, size_t length) to set an external memory by + * specifying the memory address and length. + * - Reset(void* data, size_t length) to reset the PaddleBuf with an external *memory. - *ATTENTION, for user allocated memory, deallocation should be done by users + * ATTENTION, for user allocated memory, deallocation should be done by users *externally after the program finished. The PaddleBuf won't do any allocation *or deallocation. * - *To have the PaddleBuf allocate and manage the memory: - *- PaddleBuf(size_t length) will allocate a memory of size `length`. - *- Resize(size_t length) resize the memory to no less than `length`, ATTENTION + * To have the PaddleBuf allocate and manage the memory: + * - PaddleBuf(size_t length) will allocate a memory of size `length`. + * - Resize(size_t length) resize the memory to no less than `length`, ATTENTION * if the allocated memory is larger than `length`, nothing will done. + * + * Usage: + * + * Let PaddleBuf manage the memory internally. + * \code{cpp} + * const int num_elements = 128; + * PaddleBuf buf(num_elements * sizeof(float)); + * \endcode + * + * Or + * \code{cpp} + * PaddleBuf buf; + * buf.Resize(num_elements * sizeof(float)); + * \endcode + * Works the exactly the same. + * + * One can also make the `PaddleBuf` use the external memory. + * \code{cpp} + * PaddleBuf buf; + * void* external_memory = new float[num_elements]; + * buf.Reset(external_memory, num_elements*sizeof(float)); + * ... + * delete[] external_memory; // manage the memory lifetime outside. + * \endcode */ class PaddleBuf { public: @@ -78,7 +107,7 @@ class PaddleBuf { /** Tell whether the buffer is empty. */ bool empty() const { return length_ == 0; } - /** Get the memory address. + /** Get the data's memory address. */ void* data() const { return data_; } /** Get the memory length. @@ -110,7 +139,8 @@ struct PaddleTensor { }; enum class PaddlePlace { kUNK = -1, kCPU, kGPU }; -/** Tensor without copy, currently only supports AnalysisPredictor. + +/** Tensor without copy, currently only supports `AnalysisPredictor`. */ class ZeroCopyTensor { public: @@ -218,7 +248,7 @@ class PaddlePredictor { /** \brief Get the serialized model program that executes in inference phase. * Its data type is ProgramDesc, which is a protobuf message. */ - virtual std::string GetSeriazlizedProgram() const { + virtual std::string GetSerializedProgram() const { assert(false); // Force raise error. return "NotImplemented"; } @@ -269,9 +299,11 @@ struct NativeConfig : public PaddlePredictor::Config { * * Usage: * + * \code{.cpp} * NativeConfig config; * ... // change the configs. * auto native_predictor = CreatePaddlePredictor(config); + * \endcode * * FOR EXTENSION DEVELOPER: * Different predictors are designated by config type. Similar configs can be diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index 039389a4cf99da6c2576c148d8c294e5d79aa7a8..92c24647e87a096e7cfbbf69876b678fe48842a4 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/inference/api/paddle_pass_builder.h" - +#ifdef PADDLE_WITH_CUDA +#include +#endif #include namespace paddle { @@ -66,8 +68,54 @@ void GpuPassStrategy::EnableMKLDNN() { LOG(ERROR) << "GPU not support MKLDNN yet"; } +GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) { + passes_.assign({ + "infer_clean_graph_pass", // + "identity_scale_op_clean_pass", // + "conv_affine_channel_fuse_pass", // + "conv_eltwiseadd_affine_channel_fuse_pass", // + "conv_bn_fuse_pass", // +#if CUDNN_VERSION >= 7100 // To run conv_fusion, the version of cudnn must be + // guaranteed at least v7 + "conv_elementwise_add_act_fuse_pass", // + "conv_elementwise_add2_act_fuse_pass", // + "conv_elementwise_add_fuse_pass", // +#endif + }); + + for (int i = 6; i >= 3; i--) { + passes_.push_back("transpose_flatten" + std::to_string(i) + + "_concat_fuse_pass"); + } + use_gpu_ = true; +} + void PaddlePassBuilder::AppendAnalysisPass(const std::string &pass) { analysis_passes_.push_back(pass); } +CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) { + // NOTE the large fusions should be located in the front, so that they will + // not be damaged by smaller ones. + passes_.assign({ + "infer_clean_graph_pass", // + "attention_lstm_fuse_pass", // + "seqpool_concat_fuse_pass", // + "seqconv_eltadd_relu_fuse_pass", // + // "embedding_fc_lstm_fuse_pass", // + "fc_lstm_fuse_pass", // + "mul_lstm_fuse_pass", // + "fc_gru_fuse_pass", // + "mul_gru_fuse_pass", // + "seq_concat_fc_fuse_pass", // + "fc_fuse_pass", // + "repeated_fc_relu_fuse_pass", // + "squared_mat_sub_fuse_pass", // + "conv_bn_fuse_pass", // + "conv_eltwiseadd_bn_fuse_pass", // + "is_test_pass", // + "identity_scale_op_clean_pass", // + }); + use_gpu_ = false; +} } // namespace paddle diff --git a/paddle/fluid/inference/api/paddle_pass_builder.h b/paddle/fluid/inference/api/paddle_pass_builder.h index aa353f12ca7333713e2d640cce6b2dfbea3c4e26..2524d89fcd1322e105ad2217347aa2380448f2bc 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.h +++ b/paddle/fluid/inference/api/paddle_pass_builder.h @@ -97,30 +97,7 @@ class PassStrategy : public PaddlePassBuilder { */ class CpuPassStrategy : public PassStrategy { public: - CpuPassStrategy() : PassStrategy({}) { - // NOTE the large fusions should be located in the front, so that they will - // not be damaged by smaller ones. - passes_.assign({ - "infer_clean_graph_pass", // - "attention_lstm_fuse_pass", // - "seqpool_concat_fuse_pass", // - "seqconv_eltadd_relu_fuse_pass", // - // "embedding_fc_lstm_fuse_pass", // - "fc_lstm_fuse_pass", // - "mul_lstm_fuse_pass", // - "fc_gru_fuse_pass", // - "mul_gru_fuse_pass", // - "seq_concat_fc_fuse_pass", // - "fc_fuse_pass", // - "repeated_fc_relu_fuse_pass", // - "squared_mat_sub_fuse_pass", // - "conv_bn_fuse_pass", // - "conv_eltwiseadd_bn_fuse_pass", // - "is_test_pass", // - "identity_scale_op_clean_pass", // - }); - use_gpu_ = false; - } + CpuPassStrategy(); explicit CpuPassStrategy(const CpuPassStrategy &other) : PassStrategy(other.AllPasses()) {} @@ -153,27 +130,7 @@ class CpuPassStrategy : public PassStrategy { */ class GpuPassStrategy : public PassStrategy { public: - GpuPassStrategy() : PassStrategy({}) { - passes_.assign({ - "infer_clean_graph_pass", // - "identity_scale_op_clean_pass", // - "conv_affine_channel_fuse_pass", // - "conv_eltwiseadd_affine_channel_fuse_pass", // - "conv_bn_fuse_pass", // -#if CUDNN_VERSION >= 7100 // To run conv_fusion, the version of cudnn must be - // guaranteed at least v7 - "conv_elementwise_add_act_fuse_pass", // - "conv_elementwise_add2_act_fuse_pass", // - "conv_elementwise_add_fuse_pass", // -#endif - }); - - for (int i = 6; i >= 3; i--) { - passes_.push_back("transpose_flatten" + std::to_string(i) + - "_concat_fuse_pass"); - } - use_gpu_ = true; - } + GpuPassStrategy(); explicit GpuPassStrategy(const GpuPassStrategy &other) : PassStrategy(other.AllPasses()) { diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 7ecd9e35332843e3a391cdad5ce32220d890abd1..55ab04bfe16ec6a3d97c443f59c72e7b85fb1899 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -60,10 +60,13 @@ set(RNN2_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/rnn2") download_model_and_data(${RNN2_INSTALL_DIR} "rnn2_model.tar.gz" "rnn2_data.txt.tar.gz") inference_analysis_api_test(test_analyzer_rnn2 ${RNN2_INSTALL_DIR} analyzer_rnn2_tester.cc) +# TODO(luotao, Superjom) Disable DAM test, temporarily fix +# https://github.com/PaddlePaddle/Paddle/issues/15032#issuecomment-455990914. +# After inference framework refactor, will reopen it. # normal DAM set(DAM_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/dam") download_model_and_data(${DAM_INSTALL_DIR} "DAM_model.tar.gz" "DAM_data.txt.tar.gz") -inference_analysis_api_test(test_analyzer_dam ${DAM_INSTALL_DIR} analyzer_dam_tester.cc EXTRA_DEPS legacy_allocator SERIAL) +#inference_analysis_api_test(test_analyzer_dam ${DAM_INSTALL_DIR} analyzer_dam_tester.cc EXTRA_DEPS legacy_allocator SERIAL) # small DAM set(DAM_SMALL_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/small_dam") diff --git a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc index c27c39f40a2067dd2bd2150e4b1e53eab7cdf06e..36282b3efe5756da55b056c09e94aa352e3dcf8a 100644 --- a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc @@ -366,15 +366,17 @@ TEST(Analyzer_rnn1, ZeroCopyMultiThread) { #define NEW_TENSOR(name__) \ auto name__##_tensor = predictor->GetInputTensor(#name__); - auto base_predictor = CreatePaddlePredictor(config); + std::vector> predictors; + predictors.emplace_back(CreatePaddlePredictor(config)); + for (int tid = 1; tid < FLAGS_num_threads; tid++) { + predictors.emplace_back(predictors.front()->Clone()); + } double total_time_of_threads{0}; std::vector threads; for (int tid = 0; tid < FLAGS_num_threads; tid++) { threads.emplace_back([&, tid] { - // To ensure the thread binding correctly, - // please clone inside the threadpool. - auto predictor = base_predictor->Clone(); + auto &predictor = predictors[tid]; NEW_TENSOR(data_lod_attention); NEW_TENSOR(cell_init); NEW_TENSOR(data); diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc index dd953e0dccbb3749bfcc87966453c6976dfefa10..cca2ab1ee148b568e714c24dded7cd72403f0e5f 100644 --- a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc @@ -56,14 +56,14 @@ struct DataRecord { std::vector slot_data; split_to_float(data[1], ' ', &slot_data); std::string name = data[0]; - PADDLE_ENFORCE_EQ(slot_data.size() % 11, 0, + PADDLE_ENFORCE_EQ(slot_data.size() % 11, 0UL, "line %d, %s should be divisible", num_lines, name); datasets[name].emplace_back(std::move(slot_data)); } num_samples = num_lines / num_slots; PADDLE_ENFORCE_EQ(num_samples * num_slots, static_cast(num_lines), "num samples should be divisible"); - PADDLE_ENFORCE_GT(num_samples, 0); + PADDLE_ENFORCE_GT(num_samples, 0UL); } void Prepare(int bs) { @@ -266,15 +266,17 @@ TEST(Analyzer_seq_pool1, zerocopy_profile_threads) { SetConfig(&config); config.SwitchUseFeedFetchOps(false); - auto base_predictor = CreatePaddlePredictor(config); + std::vector> predictors; + predictors.emplace_back(CreatePaddlePredictor(config)); + for (int tid = 1; tid < FLAGS_num_threads; tid++) { + predictors.emplace_back(predictors.front()->Clone()); + } double total_time_of_threads{0}; std::vector threads; for (int tid = 0; tid < FLAGS_num_threads; tid++) { threads.emplace_back([&, tid] { - // To ensure the thread binding correctly, - // please clone inside the threadpool. - auto predictor = base_predictor->Clone(); + auto &predictor = predictors[tid]; std::vector> inputs; PrepareZeroCopyInputs(predictor, &inputs); auto output_tensor = predictor->GetOutputTensor(out_var_name); diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h index 2811eb4946ea025cf6c7ab197c4e603df86f6f2d..2e53fddfe7f6f0c5b31ff069fb1661f143022841 100644 --- a/paddle/fluid/inference/tests/api/tester_helper.h +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -17,8 +17,10 @@ #include #include +#include #include #include // NOLINT +#include #include #ifdef WITH_GPERFTOOLS #include @@ -252,7 +254,11 @@ void TestMultiThreadPrediction( int batch_size = FLAGS_batch_size; int num_times = FLAGS_repeat; std::vector threads; - auto main_predictor = CreateTestPredictor(config, use_analysis); + std::vector> predictors; + predictors.emplace_back(CreateTestPredictor(config, use_analysis)); + for (int tid = 1; tid < num_threads; tid++) { + predictors.emplace_back(predictors.front()->Clone()); + } size_t total_time{0}; for (int tid = 0; tid < num_threads; ++tid) { @@ -260,9 +266,7 @@ void TestMultiThreadPrediction( // Each thread should have local inputs and outputs. // The inputs of each thread are all the same. std::vector outputs_tid; - // To ensure the thread binding correctly, - // please clone inside the threadpool. - auto predictor = main_predictor->Clone(); + auto &predictor = predictors[tid]; #ifdef PADDLE_WITH_MKLDNN if (use_analysis) { static_cast(predictor.get()) diff --git a/paddle/fluid/inference/tests/test.cmake b/paddle/fluid/inference/tests/test.cmake index 29f0f034a2aab50330d4d0127b870a5cb00d56a5..6c5fe043ffa3f3dcafe2dbbebd6244467f859abf 100644 --- a/paddle/fluid/inference/tests/test.cmake +++ b/paddle/fluid/inference/tests/test.cmake @@ -1,18 +1,43 @@ +include(ExternalProject) set(INFERENCE_URL "http://paddle-inference-dist.cdn.bcebos.com" CACHE STRING "inference download url") set(INFERENCE_DEMO_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo" CACHE STRING "A path setting inference demo download directories.") -function (inference_download install_dir url filename) - message(STATUS "Download inference test stuff from ${url}/${filename}") - file(DOWNLOAD "${url}/${filename}" "${install_dir}/${filename}") - message(STATUS "finish downloading ${filename}") + +function(inference_download INSTALL_DIR URL FILENAME) + message(STATUS "Download inference test stuff from ${URL}/${FILENAME}") + string(REGEX REPLACE "[-%.]" "_" FILENAME_EX ${FILENAME}) + ExternalProject_Add( + extern_inference_download_${FILENAME_EX} + ${EXTERNAL_PROJECT_LOG_ARGS} + PREFIX ${INSTALL_DIR} + URL ${URL}/${FILENAME} + DOWNLOAD_COMMAND wget -q -O ${INSTALL_DIR}/${FILENAME} ${URL}/${FILENAME} + DOWNLOAD_DIR ${INSTALL_DIR} + DOWNLOAD_NO_PROGRESS 1 + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + UPDATE_COMMAND "" + INSTALL_COMMAND "" + ) endfunction() -function (inference_download_and_uncompress install_dir url filename) - inference_download(${install_dir} ${url} ${filename}) - execute_process( - COMMAND ${CMAKE_COMMAND} -E tar xzf ${install_dir}/${filename} - WORKING_DIRECTORY ${install_dir} - ) +function(inference_download_and_uncompress INSTALL_DIR URL FILENAME) + message(STATUS "Download inference test stuff from ${URL}/${FILENAME}") + string(REGEX REPLACE "[-%.]" "_" FILENAME_EX ${FILENAME}) + set(EXTERNAL_PROJECT_NAME "extern_inference_download_${FILENAME_EX}") + set(UNPACK_DIR "${INSTALL_DIR}/src/${EXTERNAL_PROJECT_NAME}") + ExternalProject_Add( + ${EXTERNAL_PROJECT_NAME} + ${EXTERNAL_PROJECT_LOG_ARGS} + PREFIX ${INSTALL_DIR} + URL ${URL}/${FILENAME} + DOWNLOAD_DIR ${INSTALL_DIR} + DOWNLOAD_NO_PROGRESS 1 + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + UPDATE_COMMAND "" + INSTALL_COMMAND ${CMAKE_COMMAND} -E copy_directory ${UNPACK_DIR} ${INSTALL_DIR} + ) endfunction() set(WORD2VEC_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/word2vec") diff --git a/paddle/fluid/inference/tests/test_helper.h b/paddle/fluid/inference/tests/test_helper.h index 75fa611c0d701dd81dfe5b33231655e0959c7dbf..861f69f4d2143b16bdec546d92ce7bd13ca53ed3 100644 --- a/paddle/fluid/inference/tests/test_helper.h +++ b/paddle/fluid/inference/tests/test_helper.h @@ -171,9 +171,7 @@ void TestInference(const std::string& dirname, // Enable the profiler paddle::platform::EnableProfiler(state); { - paddle::platform::RecordEvent record_event( - "init_program", - paddle::platform::DeviceContextPool::Instance().Get(place)); + paddle::platform::RecordEvent record_event("init_program"); inference_program = InitProgram(&executor, scope, dirname, is_combined); } @@ -230,9 +228,7 @@ void TestInference(const std::string& dirname, // Run repeat times to profile the performance for (int i = 0; i < repeat; ++i) { - paddle::platform::RecordEvent record_event( - "run_inference", - paddle::platform::DeviceContextPool::Instance().Get(place)); + paddle::platform::RecordEvent record_event("run_inference"); if (PrepareContext) { // Note: if you change the inference_program, you need to call diff --git a/paddle/fluid/memory/allocation/best_fit_allocator.cc b/paddle/fluid/memory/allocation/best_fit_allocator.cc index 6f3e512fb0b68df5e86eba3e50a255c18f75214f..e3d6c2f511ef083ef9ecc1fe8df96051b2b85cc2 100644 --- a/paddle/fluid/memory/allocation/best_fit_allocator.cc +++ b/paddle/fluid/memory/allocation/best_fit_allocator.cc @@ -111,6 +111,8 @@ size_t BestFitAllocator::NumFreeChunks() const { } void BestFitAllocator::Free(Allocation* allocation) { auto* bf_allocation = dynamic_cast(allocation); + PADDLE_ENFORCE_NOT_NULL(bf_allocation, + "The input allocation is not BestFitAllocation."); auto chunk_it = bf_allocation->ChunkIterator(); PADDLE_ENFORCE(!chunk_it->is_free); chunk_it->is_free = true; diff --git a/paddle/fluid/memory/allocation/legacy_allocator.cc b/paddle/fluid/memory/allocation/legacy_allocator.cc index e983ae327d69389526ae4cae226b0eb324759700..1936f9d4cd83c53cf7b322ab29a3e0d92e042abc 100644 --- a/paddle/fluid/memory/allocation/legacy_allocator.cc +++ b/paddle/fluid/memory/allocation/legacy_allocator.cc @@ -356,7 +356,7 @@ void MemInfo::Minus(const size_t &size) { usage_ -= size; } -uint64_t MemInfo::GetPeakUsage() { return peak_usage_; } +uint64_t MemInfo::GetPeakUsage() const { return peak_usage_; } LegacyMemMonitor::~LegacyMemMonitor() { for (auto &item : gpu_mem_info_) delete item.second; @@ -380,10 +380,10 @@ void LegacyMemMonitor::Minus(const int &device, const size_t &size) { gpu_mem_info_[device]->Minus(size); } -uint64_t LegacyMemMonitor::GetMemUsage(const int &device) { +uint64_t LegacyMemMonitor::GetMemUsage(const int &device) const { return gpu_mem_info_.find(device) == gpu_mem_info_.end() ? 0 - : gpu_mem_info_[device]->GetPeakUsage(); + : gpu_mem_info_.at(device)->GetPeakUsage(); } void LegacyMemMonitor::PrintMemUsage() { diff --git a/paddle/fluid/memory/allocation/legacy_allocator.h b/paddle/fluid/memory/allocation/legacy_allocator.h index ccbc8c70d8e9a16e7edb1be54bf80bec3b368eca..d9bdae153da6439598f76f5cac226897e6e0c596 100644 --- a/paddle/fluid/memory/allocation/legacy_allocator.h +++ b/paddle/fluid/memory/allocation/legacy_allocator.h @@ -27,20 +27,20 @@ namespace allocation { class MemInfo { public: MemInfo() : usage_(0), peak_usage_(0) {} - MemInfo(const MemInfo &) = delete; - MemInfo &operator=(const MemInfo &) = delete; // return a flag to indicate current operation will create a peak point or not bool Add(const size_t &); void Minus(const size_t &); - uint64_t GetPeakUsage(); + uint64_t GetPeakUsage() const; private: /* current memory usage*/ uint64_t usage_; uint64_t peak_usage_; std::mutex mutex_; + + DISABLE_COPY_AND_ASSIGN(MemInfo); }; class LegacyMemMonitor { @@ -56,11 +56,11 @@ class LegacyMemMonitor { void Add(const int &, const size_t &); void Minus(const int &, const size_t &); - uint64_t GetMemUsage(const int &); + uint64_t GetMemUsage(const int &) const; void PrintMemUsage(); - protected: + private: MemUsage gpu_mem_info_; }; diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index e099425b94221bf1229e936fc1781615d13dbc26..a3f2a69aef52b6f55aa09e6dee2c22c048626c0d 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -66,7 +66,7 @@ set(COMMON_OP_DEPS ${OP_HEADER_DEPS}) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} selected_rows_functor selected_rows lod_tensor maxouting unpooling pooling lod_rank_table context_project sequence_pooling executor) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} dynload_warpctc) -set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence_padding sequence_scale cos_sim_functor memory jit_kernel_helper concat_and_split cross_entropy softmax vol2col im2col sampler tree2col) +set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence_padding sequence_scale cos_sim_functor memory jit_kernel_helper concat_and_split cross_entropy softmax vol2col im2col sampler sample_prob tree2col) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions beam_search) if (WITH_GPU) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} depthwise_conv prelu) @@ -97,3 +97,4 @@ if (WITH_PYTHON) endif() set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library") +add_subdirectory(benchmark) diff --git a/paddle/fluid/operators/activation_cudnn.cu.cc b/paddle/fluid/operators/activation_cudnn.cu.cc new file mode 100644 index 0000000000000000000000000000000000000000..494c02374a9faa22486644c9b9c7d586c86d41b0 --- /dev/null +++ b/paddle/fluid/operators/activation_cudnn.cu.cc @@ -0,0 +1,40 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/activation_op.h" +#include "paddle/fluid/platform/cudnn_desc.h" + +namespace paddle { +namespace operators { +using framework::Tensor; +using platform::ActivationDescriptor; +using platform::TensorDescriptor; + +template +class CudnnActivationKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + framework::Tensor *X, *Out; + ExtractActivationTensor(context, X, Out); + ActivationDescriptor act_desc; + TensorDescriptor x_desc, out_desc; + x_desc.set(detail::Ref(X)); + out_desc.set(detail::Ref(Out)); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/activation_cudnn_op.cu.cc b/paddle/fluid/operators/activation_cudnn_op.cu.cc new file mode 100644 index 0000000000000000000000000000000000000000..a382414d5c473a9c36f92a9af56837da819e96a4 --- /dev/null +++ b/paddle/fluid/operators/activation_cudnn_op.cu.cc @@ -0,0 +1,175 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/activation_op.h" +#include "paddle/fluid/platform/cudnn_desc.h" + +namespace paddle { +namespace operators { +using framework::Tensor; +using platform::ActivationDescriptor; +using platform::TensorDescriptor; +using platform::CUDADeviceContext; + +template +struct CudnnActivationFunctor { + using ELEMENT_TYPE = T; + CudnnActivationFunctor(const CUDADeviceContext& ctx, const T& c, + const cudnnActivationMode_t& m) + : ctx_(ctx), coef_(c), mode_(m) {} + void operator()(const Tensor& x, Tensor* out) { + ActivationDescriptor act_desc; + act_desc.set(mode_, coef_); + TensorDescriptor x_desc, out_desc; + x_desc.set(x); + out_desc.set(detail::Ref(out)); + PADDLE_ENFORCE(platform::dynload::cudnnActivationForward( + ctx_.cudnn_handle(), act_desc.desc(), + platform::CudnnDataType::kOne(), x_desc.desc(), x.data(), + platform::CudnnDataType::kZero(), out_desc.desc(), + out->mutable_data(ctx_.GetPlace()))); + } + const CUDADeviceContext& ctx_; + const T coef_; + const cudnnActivationMode_t mode_; +}; + +template +struct CudnnActivationGradFunctor { + using ELEMENT_TYPE = T; + CudnnActivationGradFunctor(const CUDADeviceContext& ctx, const T& c, + const cudnnActivationMode_t& m) + : ctx_(ctx), coef_(c), mode_(m) {} + void operator()(const Tensor& x, const Tensor& out, const Tensor dout, + Tensor* dx) { + ActivationDescriptor act_desc; + act_desc.set(mode_, coef_); + TensorDescriptor x_desc, out_desc, dout_desc, dx_desc; + x_desc.set(x); + out_desc.set(out); + dout_desc.set(dout); + dx_desc.set(detail::Ref(dx)); + PADDLE_ENFORCE(platform::dynload::cudnnActivationBackward( + ctx_.cudnn_handle(), act_desc.desc(), + platform::CudnnDataType::kOne(), out_desc.desc(), out.data(), + dout_desc.desc(), dout.data(), x_desc.desc(), x.data(), + platform::CudnnDataType::kZero(), dx_desc.desc(), + dx->mutable_data(ctx_.GetPlace()))); + } + const CUDADeviceContext& ctx_; + const T coef_; + const cudnnActivationMode_t mode_; +}; + +template +struct CudnnReluFunctor : public CudnnActivationFunctor { + explicit CudnnReluFunctor(const CUDADeviceContext& ctx) + : CudnnActivationFunctor(ctx, 0.0, CUDNN_ACTIVATION_RELU) {} +}; +template +struct CudnnReluGradFunctor : public CudnnActivationGradFunctor { + explicit CudnnReluGradFunctor(const CUDADeviceContext& ctx) + : CudnnActivationGradFunctor(ctx, 0.0, CUDNN_ACTIVATION_RELU) {} +}; + +template +struct CudnnRelu6Functor : public CudnnActivationFunctor { + explicit CudnnRelu6Functor(const CUDADeviceContext& ctx) + : CudnnActivationFunctor(ctx, 6.0, CUDNN_ACTIVATION_CLIPPED_RELU) {} +}; +template +struct CudnnRelu6GradFunctor : public CudnnActivationGradFunctor { + explicit CudnnRelu6GradFunctor(const CUDADeviceContext& ctx) + : CudnnActivationGradFunctor(ctx, 6.0, CUDNN_ACTIVATION_CLIPPED_RELU) { + } +}; + +template +struct CudnnSigmoidFunctor : public CudnnActivationFunctor { + explicit CudnnSigmoidFunctor(const CUDADeviceContext& ctx) + : CudnnActivationFunctor(ctx, 0.0, CUDNN_ACTIVATION_SIGMOID) {} +}; +template +struct CudnnSigmoidGradFunctor : public CudnnActivationGradFunctor { + explicit CudnnSigmoidGradFunctor(const CUDADeviceContext& ctx) + : CudnnActivationGradFunctor(ctx, 0.0, CUDNN_ACTIVATION_SIGMOID) {} +}; + +template +struct CudnnTanhFunctor : public CudnnActivationFunctor { + explicit CudnnTanhFunctor(const CUDADeviceContext& ctx) + : CudnnActivationFunctor(ctx, 0.0, CUDNN_ACTIVATION_TANH) {} +}; +template +struct CudnnTanhGradFunctor : public CudnnActivationGradFunctor { + explicit CudnnTanhGradFunctor(const CUDADeviceContext& ctx) + : CudnnActivationGradFunctor(ctx, 0.0, CUDNN_ACTIVATION_TANH) {} +}; + +template +class CudnnActivationKernel + : public framework::OpKernel { + public: + using T = typename Functor::ELEMENT_TYPE; + void Compute(const framework::ExecutionContext& context) const override { + const framework::Tensor* X = nullptr; + framework::Tensor* Out = nullptr; + ExtractActivationTensor(context, &X, &Out); + Out->mutable_data(context.GetPlace()); + auto& dev_ctx = context.template device_context(); + Functor functor(dev_ctx); + functor(detail::Ref(X), Out); + } +}; + +template +class CudnnActivationGradKernel + : public framework::OpKernel { + public: + using T = typename Functor::ELEMENT_TYPE; + void Compute(const framework::ExecutionContext& context) const override { + const framework::Tensor *X, *Out, *dOut; + X = Out = dOut = nullptr; + framework::Tensor* dX = nullptr; + ExtractActivationGradTensor(context, &X, &Out, &dOut, &dX); + dX->mutable_data(context.GetPlace()); + auto& dev_ctx = context.template device_context(); + Functor functor(dev_ctx); + functor(detail::Ref(X), detail::Ref(Out), detail::Ref(dOut), dX); + } +}; + +} // namespace operators +} // namespace paddle + +namespace plat = paddle::platform; +namespace ops = paddle::operators; + +#define FOR_EACH_CUDNN_OP_FUNCTOR(__macro) \ + __macro(relu, CudnnReluFunctor, CudnnReluGradFunctor); \ + __macro(relu6, CudnnRelu6Functor, CudnnRelu6GradFunctor); \ + __macro(sigmoid, CudnnTanhFunctor, CudnnTanhGradFunctor); \ + __macro(tanh, CudnnTanhFunctor, CudnnTanhGradFunctor) + +#define REGISTER_ACTIVATION_CUDNN_KERNEL(act_type, functor, grad_functor) \ + REGISTER_OP_KERNEL(act_type, CUDNN, plat::CUDAPlace, \ + ops::CudnnActivationKernel>, \ + ops::CudnnActivationKernel>); \ + REGISTER_OP_KERNEL( \ + act_type##_grad, CUDNN, plat::CUDAPlace, \ + ops::CudnnActivationGradKernel>, \ + ops::CudnnActivationGradKernel>); + +FOR_EACH_CUDNN_OP_FUNCTOR(REGISTER_ACTIVATION_CUDNN_KERNEL); diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index 189db2317d0544014d9c74e0fd5e9ead54925b9c..2feb8e4c4787440fd086c597fa2a7f97204e34ac 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -16,29 +16,36 @@ limitations under the License. */ #include #include "paddle/fluid/operators/mkldnn/mkldnn_activation_op.h" #include "paddle/fluid/platform/port.h" +#ifdef PADDLE_WITH_CUDA +#include "paddle/fluid/platform/cudnn_helper.h" +#endif namespace paddle { namespace operators { using paddle::framework::Tensor; -#define REGISTER_ACTIVATION_OP_MAKER(OP_NAME, OP_COMMENT) \ - class OP_NAME##OpMaker \ - : public ::paddle::framework::OpProtoAndCheckerMaker { \ - public: \ - void Make() override { \ - AddInput("X", "Input of " #OP_NAME " operator"); \ - AddOutput("Out", "Output of " #OP_NAME " operator"); \ - AddAttr("use_mkldnn", \ - "(bool, default false) Only used in mkldnn kernel") \ - .SetDefault(false); \ - AddAttr( \ - "is_test", \ - "(bool, default false) Set to true for inference only, false " \ - "for training. Some layers may run faster when this is true.") \ - .SetDefault(false); \ - AddComment(#OP_COMMENT); \ - } \ +#define REGISTER_ACTIVATION_OP_MAKER(OP_NAME, OP_COMMENT) \ + class OP_NAME##OpMaker \ + : public ::paddle::framework::OpProtoAndCheckerMaker { \ + public: \ + void Make() override { \ + AddInput("X", "Input of " #OP_NAME " operator"); \ + AddOutput("Out", "Output of " #OP_NAME " operator"); \ + AddAttr("use_mkldnn", \ + "(bool, default false) Only used in mkldnn kernel") \ + .SetDefault(false); \ + AddAttr("use_cudnn", \ + "(bool, default false) Only used in cudnn kernel, need " \ + "install cudnn") \ + .SetDefault(false); \ + AddAttr( \ + "is_test", \ + "(bool, default false) Set to true for inference only, false " \ + "for training. Some layers may run faster when this is true.") \ + .SetDefault(false); \ + AddComment(OP_COMMENT); \ + } \ } #define REGISTER_ACTIVATION_OP_GRAD_MAKER(OP_NAME, KERNEL_TYPE) \ @@ -67,6 +74,12 @@ framework::OpKernelType GetKernelType(const framework::ExecutionContext& ctx, const std::string& name) { framework::LibraryType library{framework::LibraryType::kPlain}; framework::DataLayout layout = framework::DataLayout::kAnyLayout; +#ifdef PADDLE_WITH_CUDA + auto it1 = oper.Attrs().find("use_cudnn"); + if (it1 != oper.Attrs().end() && platform::CanCUDNNBeUsed(ctx)) { + library = framework::LibraryType::kCUDNN; + } +#endif #ifdef PADDLE_WITH_MKLDNN auto it = oper.Attrs().find("use_mkldnn"); if (library == framework::LibraryType::kPlain && it != oper.Attrs().end() && @@ -124,7 +137,7 @@ class ActivationOpGrad : public framework::OperatorWithKernel { UNUSED constexpr char SigmoidDoc[] = R"DOC( Sigmoid Activation Operator -$$out = \frac{1}{1 + e^{-x}}$$ +$$out = \\frac{1}{1 + e^{-x}}$$ )DOC"; @@ -187,14 +200,14 @@ $out = |x|$ UNUSED constexpr char CeilDoc[] = R"DOC( Ceil Activation Operator. -$out = ceil(x)$ +$out = \left \lceil x \right \rceil$ )DOC"; UNUSED constexpr char FloorDoc[] = R"DOC( Floor Activation Operator. -$out = floor(x)$ +$out = \left \lfloor x \right \rfloor$ )DOC"; @@ -252,7 +265,7 @@ $out = \ln(1 + e^{x})$ UNUSED constexpr char SoftsignDoc[] = R"DOC( Softsign Activation Operator. -$$out = \frac{x}{1 + |x|}$$ +$$out = \\frac{x}{1 + \|x\|}$$ )DOC"; diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h index c7df3ea58a91579e35ff0d486516271a6daf054f..1f5ae7fb5cd2e1c14190602d2c35e6c3755cfd70 100644 --- a/paddle/fluid/operators/activation_op.h +++ b/paddle/fluid/operators/activation_op.h @@ -11,6 +11,7 @@ limitations under the License. */ #pragma once #include +#include #include #include #include @@ -24,6 +25,7 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/detail/safe_ref.h" +#include "paddle/fluid/operators/math/blas.h" #include "paddle/fluid/platform/float16.h" #ifdef PADDLE_WITH_MKLDNN @@ -41,53 +43,115 @@ static std::unordered_set InplaceOpSet = { "floor", "reciprocal", "relu6", "soft_relu", "hard_sigmoid", }; +static bool IsInplace(const std::string& op) { + bool inplace = InplaceOpSet.count(op); + // for op_grad + const int kGradSuffixLen = 4; + if (op.size() > kGradSuffixLen && + op.compare(op.size() - kGradSuffixLen - 1, kGradSuffixLen, "grad")) { + inplace = + InplaceOpSet.count(op.substr(0, op.size() - (kGradSuffixLen + 1))); + } + return inplace; +} + /* The following operator can be used to process SelectedRows, because the * output of those operator for zero is zero too. */ static std::unordered_set CanBeUsedBySelectedRows = { "abs", "abs_grad", "square", "square_grad", "sqrt", "sqrt_grad"}; -static bool IsInplace(std::string op) { return InplaceOpSet.count(op); } - -template -class ActivationKernel - : public framework::OpKernel { - public: - using T = typename Functor::ELEMENT_TYPE; - - void Compute(const framework::ExecutionContext& context) const override { +inline void ExtractActivationTensor(const framework::ExecutionContext& context, + const framework::Tensor** X, + framework::Tensor** Out) { + auto x_var = context.InputVar("X"); + auto out_var = context.OutputVar("Out"); + PADDLE_ENFORCE(x_var != nullptr, + "Cannot get input Variable X, variable name = %s", + context.op().Input("X")); + PADDLE_ENFORCE(out_var != nullptr, + "Cannot get output Variable Out, variable name = %s", + context.op().Output("Out")); + if (CanBeUsedBySelectedRows.count(context.op().Type())) { + *X = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*x_var); + *Out = paddle::framework::GetMutableLoDTensorOrSelectedRowsValueFromVar( + out_var); + } else { + *X = context.Input("X"); + *Out = context.Output("Out"); + } + + PADDLE_ENFORCE(*Out != nullptr, + "Cannot get output tensor Out, variable name = %s", + context.op().Output("Out")); +} + +inline void ExtractActivationGradTensor( + const framework::ExecutionContext& context, const framework::Tensor** X, + const framework::Tensor** Out, const framework::Tensor** dOut, + framework::Tensor** dX) { + auto out_var = context.InputVar("Out"); + auto out_grad_var = context.InputVar(framework::GradVarName("Out")); + auto x_grad_var = context.OutputVar(framework::GradVarName("X")); + PADDLE_ENFORCE(out_var != nullptr, + "Cannot get input Variable Out, variable name = %s", + context.op().Input("Out")); + PADDLE_ENFORCE(out_grad_var != nullptr, + "Cannot get input Variable %s, variable name = %s", + framework::GradVarName("Out"), + context.op().Input(framework::GradVarName("Out"))); + PADDLE_ENFORCE(x_grad_var != nullptr, + "Cannot get output Variable %s, variable name = %s", + framework::GradVarName("X"), + context.op().Output(framework::GradVarName("X"))); + + if (CanBeUsedBySelectedRows.count(context.op().Type())) { + *Out = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*out_var); + *dOut = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar( + *out_grad_var); + *dX = paddle::framework::GetMutableLoDTensorOrSelectedRowsValueFromVar( + x_grad_var); + } else { + *Out = context.Input("Out"); + *dOut = context.Input(framework::GradVarName("Out")); + *dX = context.Output(framework::GradVarName("X")); + } + PADDLE_ENFORCE(*dX != nullptr, + "Cannot get output tensor %s, variable name = %s", + framework::GradVarName("X"), + context.op().Output(framework::GradVarName("X"))); + + bool inplace = IsInplace(context.op().Type()); + if (!inplace) { auto x_var = context.InputVar("X"); - auto out_var = context.OutputVar("Out"); PADDLE_ENFORCE(x_var != nullptr, - "Cannot get input Variable X, variable name = %s", + "Cannot get input tensor X, variable name = %s", context.op().Input("X")); - PADDLE_ENFORCE(out_var != nullptr, - "Cannot get output Variable Out, variable name = %s", - context.op().Output("Out")); - - framework::Tensor X, *Out; - if (CanBeUsedBySelectedRows.count(context.op().Type())) { - X = detail::Ref( - paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*x_var), - "Cannot get input Tensor X, variable name = %s", - context.op().Input("X")); - Out = paddle::framework::GetMutableLoDTensorOrSelectedRowsValueFromVar( - out_var); + *X = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*x_var); } else { - X = detail::Ref(context.Input("X"), - "Cannot get input Tensor X, variable name = %s", - context.op().Input("X")); - Out = context.Output("Out"); + *X = context.Input("X"); } + } else { + VLOG(10) << " Inplace activation of Op : " << context.op().Type(); + *X = *dX; + } +} - PADDLE_ENFORCE(Out != nullptr, - "Cannot get output tensor Out, variable name = %s", - context.op().Output("Out")); +template +class ActivationKernel + : public framework::OpKernel { + public: + using T = typename Functor::ELEMENT_TYPE; + void Compute(const framework::ExecutionContext& context) const override { + const framework::Tensor* X = nullptr; + framework::Tensor* Out = nullptr; + ExtractActivationTensor(context, &X, &Out); Out->mutable_data(context.GetPlace()); - auto x = framework::EigenVector::Flatten(X); - auto out = framework::EigenVector::Flatten(*Out); + + auto x = framework::EigenVector::Flatten(detail::Ref(X)); + auto out = framework::EigenVector::Flatten(detail::Ref(Out)); auto* place = context.template device_context().eigen_device(); Functor functor; @@ -106,55 +170,15 @@ class ActivationGradKernel public: using T = typename Functor::ELEMENT_TYPE; void Compute(const framework::ExecutionContext& context) const override { - auto out_var = context.InputVar("Out"); - auto out_grad_var = context.InputVar(framework::GradVarName("Out")); - auto x_grad_var = context.OutputVar(framework::GradVarName("X")); - PADDLE_ENFORCE(out_var != nullptr, - "Cannot get input Variable Out, variable name = %s", - context.op().Input("Out")); - PADDLE_ENFORCE(out_grad_var != nullptr, - "Cannot get input Variable %s, variable name = %s", - framework::GradVarName("Out"), - context.op().Input(framework::GradVarName("Out"))); - PADDLE_ENFORCE(x_grad_var != nullptr, - "Cannot get output Variable %s, variable name = %s", - framework::GradVarName("X"), - context.op().Output(framework::GradVarName("X"))); - - framework::Tensor Out, dOut, *dX; - if (CanBeUsedBySelectedRows.count(context.op().Type())) { - Out = detail::Ref( - paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*out_var), - "Cannot get input Tensor Out, variable name = %s", - context.op().Input("Out")); - dOut = - detail::Ref(paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar( - *out_grad_var), - "Cannot get input Tensor %s, variable name = %s", - framework::GradVarName("Out"), - context.op().Input(framework::GradVarName("Out"))); - dX = paddle::framework::GetMutableLoDTensorOrSelectedRowsValueFromVar( - x_grad_var); - } else { - Out = detail::Ref(context.Input("Out"), - "Cannot get input Tensor Out, variable name = %s", - context.op().Input("Out")); - dOut = detail::Ref( - context.Input(framework::GradVarName("Out")), - "Cannot get input Tensor %s, variable name = %s", - framework::GradVarName("Out"), - context.op().Input(framework::GradVarName("Out"))); - dX = context.Output(framework::GradVarName("X")); - } - PADDLE_ENFORCE(dX != nullptr, - "Cannot get output tensor %s, variable name = %s", - framework::GradVarName("X"), - context.op().Output(framework::GradVarName("X"))); + const framework::Tensor *X, *Out, *dOut; + framework::Tensor* dX = nullptr; + X = Out = dOut = nullptr; + ExtractActivationGradTensor(context, &X, &Out, &dOut, &dX); dX->mutable_data(context.GetPlace()); - - auto dout = framework::EigenVector::Flatten(dOut); - auto out = framework::EigenVector::Flatten(Out); - auto dx = framework::EigenVector::Flatten(*dX); + auto dout = framework::EigenVector::Flatten(detail::Ref(dOut)); + auto out = framework::EigenVector::Flatten(detail::Ref(Out)); + auto dx = framework::EigenVector::Flatten(detail::Ref(dX)); + auto x = framework::EigenVector::Flatten(detail::Ref(X)); auto* place = context.template device_context().eigen_device(); Functor functor; @@ -162,27 +186,7 @@ class ActivationGradKernel for (auto& attr : attrs) { *attr.second = context.Attr(attr.first); } - bool inplace = functor.Inplace(); - if (!inplace) { - auto x_var = context.InputVar("X"); - PADDLE_ENFORCE(x_var != nullptr, - "Cannot get input tensor X, variable name = %s", - context.op().Input("X")); - framework::Tensor X; - if (CanBeUsedBySelectedRows.count(context.op().Type())) { - X = detail::Ref( - paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*x_var)); - } else { - X = detail::Ref(context.Input("X")); - } - - auto x = framework::EigenVector::Flatten(X); - functor(*place, x, out, dout, dx); - } else { - VLOG(10) << " Inplace activation "; - auto x = framework::EigenVector::Flatten(*dX); - functor(*place, x, out, dout, dx); - } + functor(*place, x, out, dout, dx); } }; @@ -214,7 +218,6 @@ struct SigmoidFunctor : public BaseActivationFunctor { template struct SigmoidGradFunctor : public BaseActivationFunctor { - bool Inplace() const { return IsInplace("sigmoid"); } template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { @@ -269,7 +272,6 @@ struct ExpFunctor : public BaseActivationFunctor { template struct ExpGradFunctor : public BaseActivationFunctor { - bool Inplace() const { return IsInplace("exp"); } template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { @@ -288,7 +290,6 @@ struct ReluFunctor : public BaseActivationFunctor { template struct ReluGradFunctor : public BaseActivationFunctor { - bool Inplace() const { return IsInplace("relu"); } template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { @@ -301,8 +302,28 @@ template struct GeluFunctor : public BaseActivationFunctor { template void operator()(Device d, X x, Out out) const { +// Because the execute or device context can not be deliver here, it keep the +// marco for NVCC. +#if defined(PADDLE_WITH_MKLML) && !defined(_WIN32) && !defined(__APPLE__) && \ + !defined(__OSX__) && !defined(PADDLE_WITH_CUDA) + auto x_data = x.data(); + auto out_data = out.data(); + int n = std::min(x.size(), out.size()); + + std::memset(out_data, 0, n * sizeof(T)); + math::CBlas::AXPY(n, static_cast(M_SQRT1_2), x_data, 1, out_data, 1); + math::CBlas::VMERF(n, out_data, out_data, VML_LA); + for (int i = 0; i < n; i++) { + out_data[i] += static_cast(1); + } + math::CBlas::VMUL(n, x_data, out_data, out_data); + for (int i = 0; i < n; i++) { + out_data[i] *= static_cast(0.5); + } +#else auto temp = (x * static_cast(M_SQRT1_2)).erf(); out.device(d) = x * static_cast(0.5) * (static_cast(1) + temp); +#endif } }; @@ -331,7 +352,6 @@ struct TanhFunctor : public BaseActivationFunctor { template struct TanhGradFunctor : public BaseActivationFunctor { - bool Inplace() const { return IsInplace("tanh"); } template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { @@ -437,7 +457,6 @@ struct SqrtFunctor : public BaseActivationFunctor { template struct SqrtGradFunctor : public BaseActivationFunctor { - bool Inplace() const { return IsInplace("sqrt"); } template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { @@ -456,7 +475,6 @@ struct CeilFunctor : public BaseActivationFunctor { template struct ZeroGradFunctor : public BaseActivationFunctor { - bool Inplace() const { return IsInplace("ceil"); } template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { @@ -573,7 +591,6 @@ struct ReciprocalFunctor : public BaseActivationFunctor { template struct ReciprocalGradFunctor : public BaseActivationFunctor { - bool Inplace() const { return IsInplace("reciprocal"); } template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { @@ -673,7 +690,6 @@ struct Relu6GradFunctor : public BaseActivationFunctor { typename BaseActivationFunctor::AttrPair GetAttrs() { return {{"threshold", &threshold}}; } - bool Inplace() const { return IsInplace("relu6"); } template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { @@ -755,7 +771,6 @@ struct SoftReluGradFunctor : public BaseActivationFunctor { typename BaseActivationFunctor::AttrPair GetAttrs() { return {{"threshold", &threshold}}; } - bool Inplace() const { return IsInplace("soft_relu"); } template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { @@ -936,7 +951,6 @@ struct HardSigmoidGradFunctor : public BaseActivationFunctor { typename BaseActivationFunctor::AttrPair GetAttrs() { return {{"slope", &slope}, {"offset", &offset}}; } - bool Inplace() { return IsInplace("hard_sigmoid"); } template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { diff --git a/paddle/fluid/operators/alloc_continuous_space_op.cc b/paddle/fluid/operators/alloc_continuous_space_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..df0e9911cf7186e952cfd7fbf7f43889e9098c84 --- /dev/null +++ b/paddle/fluid/operators/alloc_continuous_space_op.cc @@ -0,0 +1,211 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/var_type.h" +#include "paddle/fluid/operators/math/math_function.h" + +namespace paddle { +namespace operators { + +static framework::proto::VarType::Type kDefaultDtype = + framework::proto::VarType::Type::VarType_Type_BOOL; + +template +class AllocContinuousSpaceKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &context) const override { + auto &in_var_names = context.Inputs("Input"); + auto &out_var_names = context.Outputs("Output"); + auto &in_vars = context.MultiInputVar("Input"); + auto out_vars = context.MultiOutputVar("Output"); + + PADDLE_ENFORCE_GT(in_var_names.size(), static_cast(0)); + PADDLE_ENFORCE_EQ(in_var_names.size(), out_var_names.size()); + + for (size_t i = 0; i < in_var_names.size(); ++i) { + // Only support LoDTensor + PADDLE_ENFORCE_NOT_NULL(in_vars[i], "%s should not be nullptr,", + in_var_names[i]); + PADDLE_ENFORCE_NOT_NULL(out_vars[i], "%s should not be nullptr,", + out_var_names[i]); + PADDLE_ENFORCE(in_vars[i]->IsType()); + PADDLE_ENFORCE(out_vars[i]->IsType()); + } + + auto in_tensors = context.MultiInput("Input"); + + if (context.Attr("check_name")) { + for (size_t i = 0; i < in_var_names.size(); ++i) { + PADDLE_ENFORCE_EQ(in_var_names[i], out_var_names[i]); + } + } else { + // Init the output as input + for (size_t i = 0; i < in_tensors.size(); ++i) { + out_vars[i]->GetMutable()->Resize( + in_tensors[i]->dims()); + } + } + + auto &dev_ctx = context.template device_context(); + + // Get numel and dtype + size_t numel = 0; + auto dtype = kDefaultDtype; + GetMemSizeAndDtype(in_tensors, in_var_names, &numel, &dtype); + + // Alloc the continuous space + auto fused_tensor = context.Output("FusedOutput"); + fused_tensor->Resize(framework::make_ddim({static_cast(numel)})) + .mutable_data(context.GetPlace(), dtype); + + // Init the continuous space + auto out_tensors = context.MultiOutput("Output"); + int64_t offset = 0; + if (context.Attr("copy_data")) { + for (size_t i = 0; i < in_var_names.size(); ++i) { + int64_t len = out_tensors[i]->numel(); + auto sub_tensor = fused_tensor->Slice(offset, offset + len); + offset += len; + framework::TensorCopy(*out_tensors[i], context.GetPlace(), dev_ctx, + &sub_tensor); + } + } else if (context.Attr("set_constant")) { + math::SetConstant set_constant; + set_constant(dev_ctx, fused_tensor, + static_cast(context.Attr("constant"))); + } + + // Make the outputs point to the continuous space. + offset = 0; + for (size_t i = 0; i < out_tensors.size(); ++i) { + int64_t len = out_tensors[i]->numel(); + auto dim = out_tensors[i]->dims(); + out_tensors[i] + ->ShareDataWith(fused_tensor->Slice(offset, offset + len)) + .Resize(dim); + offset += len; + VLOG(10) << "alloc_space_for_vars: output(" << out_var_names[i] + << ") ,dim:(" << dim << ")" + << " Address: " << out_tensors[i]->data(); + } + } + + void GetMemSizeAndDtype( + const std::vector &lod_tensors, + const std::vector var_names, size_t *numel, + framework::proto::VarType::Type *dtype) const { + PADDLE_ENFORCE_EQ(lod_tensors.size(), var_names.size()); + *numel = 0; + for (size_t i = 0; i < var_names.size(); ++i) { + PADDLE_ENFORCE(lod_tensors[i]->IsInitialized(), "%s is not initialized.", + var_names[i]); + + auto p_dtype = lod_tensors[i]->type(); + if (*dtype == kDefaultDtype) { + PADDLE_ENFORCE_NE(p_dtype, kDefaultDtype, "%s's type should not be %s.", + var_names[i], kDefaultDtype); + *dtype = p_dtype; + } + PADDLE_ENFORCE_EQ(p_dtype, *dtype, "Input vars is not equal."); + + auto size = lod_tensors[i]->numel(); + PADDLE_ENFORCE_GT(size, 0); + VLOG(10) << "alloc_space_for_vars: input(" << var_names[i] << ") ,dim:(" + << lod_tensors[i]->dims() << ")"; + *numel += size; + } + } +}; + +class AllocContinuousSpaceOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext *ctx) const override {} +}; + +class AllocContinuousSpaceOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("Input", + "(vector) The input tensors of" + " alloc_continuous_space operator.") + .AsDuplicable(); + AddOutput("Output", + "(vector) The output " + "tensors of alloc_continuous_space operator. And the address " + "of output tensors are continuous, they are sliced from the " + "tensor of FusedOutput.") + .AsDuplicable(); + AddOutput("FusedOutput", + "(LoDTensor) The output tensor " + "of alloc_continuous_space operator. And the tensors of" + " Output is sliced from the tensor of FusedOutput."); + AddAttr("copy_data", "Whether to copy the Input value to Output.") + .SetDefault(false); + AddAttr("set_constant", + "Whether to set the Output with a constant value.") + .SetDefault(false); + AddAttr("constant", + "If set_constant is true, the constant value will be used " + "to set the Output.") + .SetDefault(0.0); + AddAttr("check_name", + "Whether to check the name of Input and Output to ensure " + "they are the same separately.") + .SetDefault(false); + AddComment(R"DOC( +AllocContinuousSpace Operator. + +alloc_continuous_space is used to make the address of Output +continuous according to the Input. This Op will alloc a big tensor +according to the tensors of Input, the dtype is the same with those input tensors, +the size is the sum of those input tensors' numel, and the dim of the big +tensor is {sum(numel)}. And the big tensor is stored in FusedOutput. +The tensors of Output are sliced from the tensor of FusedOutput. +Note that, the dtype of Input should be the same, and the dim of Input +and Output should equal. +The tensors of Input and Output could be the same or different. And +alloc_continuous_space allows copying the value of Input to Output, or +setting the Output with a constant value. + +)DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OPERATOR(alloc_continuous_space, + paddle::operators::AllocContinuousSpaceOp, + paddle::operators::AllocContinuousSpaceOpMaker); +namespace ops = paddle::operators; +REGISTER_OP_CPU_KERNEL( + alloc_continuous_space, + ops::AllocContinuousSpaceKernel, + ops::AllocContinuousSpaceKernel, + ops::AllocContinuousSpaceKernel); + +#ifdef PADDLE_WITH_CUDA +REGISTER_OP_CUDA_KERNEL( + alloc_continuous_space, + ops::AllocContinuousSpaceKernel, + ops::AllocContinuousSpaceKernel, + ops::AllocContinuousSpaceKernel); +#endif diff --git a/paddle/fluid/operators/attention_lstm_op.cc b/paddle/fluid/operators/attention_lstm_op.cc index b6996be4b0984bcee3b16da268d79708a68b65b3..912ec79910301b67bc520b1aa78d3fa1fd165d1f 100644 --- a/paddle/fluid/operators/attention_lstm_op.cc +++ b/paddle/fluid/operators/attention_lstm_op.cc @@ -293,7 +293,7 @@ class AttentionLSTMKernel : public framework::OpKernel { int len = x_lod[0][i + 1] - x_lod[0][i]; max_seq_len = max_seq_len < len ? len : max_seq_len; } - PADDLE_ENFORCE_EQ(x_lod.size(), 1, "Input(X)'s lod size must be 1."); + PADDLE_ENFORCE_EQ(x_lod.size(), 1UL, "Input(X)'s lod size must be 1."); PADDLE_ENFORCE_EQ(c0->dims()[0], N, "C0 dims should be %d x %d.", N, D); fc_out->Resize({max_seq_len, 1}); diff --git a/paddle/fluid/operators/beam_search_decode_op.cc b/paddle/fluid/operators/beam_search_decode_op.cc index 7f2bde55c98277b9fd4b3374657001c42d673d43..cf78c83297a87beb08a8b8e6e4b182f03f1909d3 100644 --- a/paddle/fluid/operators/beam_search_decode_op.cc +++ b/paddle/fluid/operators/beam_search_decode_op.cc @@ -123,7 +123,7 @@ class BeamSearchDecodeOp : public framework::OperatorBase { auto& dev_ctx = *pool.Get(dev_place); framework::RuntimeContext run_ctx(Inputs(), Outputs(), scope); - framework::ExecutionContext ctx(*this, scope, dev_ctx, run_ctx); + framework::ExecutionContext ctx(*this, scope, dev_ctx, run_ctx, nullptr); const LoDTensorArray* ids = ctx.Input("Ids"); const LoDTensorArray* scores = ctx.Input("Scores"); diff --git a/paddle/fluid/operators/beam_search_decode_op.h b/paddle/fluid/operators/beam_search_decode_op.h index 6aefc5446f167eebb0da673b3fbdf7ed128daa98..0b883c3158fb922caae2e731875bbb8d43a1e9ca 100644 --- a/paddle/fluid/operators/beam_search_decode_op.h +++ b/paddle/fluid/operators/beam_search_decode_op.h @@ -122,7 +122,7 @@ void BeamSearchDecoder::ConvertSentenceVectorToLodTensor( auto cpu_place = std::unique_ptr( new paddle::platform::CPUPlace()); - paddle::platform::CPUDeviceContext cpu_ctx(*cpu_place.get()); + paddle::platform::CPUDeviceContext cpu_ctx(*cpu_place); framework::LoD lod; lod.push_back(source_level_lod); diff --git a/paddle/fluid/operators/benchmark/CMakeLists.txt b/paddle/fluid/operators/benchmark/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..54008336a9f67f0123ba1cfa6fcea35b79b7ac4c --- /dev/null +++ b/paddle/fluid/operators/benchmark/CMakeLists.txt @@ -0,0 +1,3 @@ +cc_test(op_tester SRCS op_tester.cc op_tester_config.cc + DEPS memory timer framework_proto proto_desc lod_tensor op_registry + device_context scope ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS}) diff --git a/paddle/fluid/operators/benchmark/op_tester.cc b/paddle/fluid/operators/benchmark/op_tester.cc new file mode 100644 index 0000000000000000000000000000000000000000..064903c299d947df3c6b42d916fce8dcbd85eebb --- /dev/null +++ b/paddle/fluid/operators/benchmark/op_tester.cc @@ -0,0 +1,336 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/benchmark/op_tester.h" +#include +#include "gflags/gflags.h" +#include "gtest/gtest.h" +#include "paddle/fluid/framework/op_info.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/variable_helper.h" +#include "paddle/fluid/platform/init.h" +#include "paddle/fluid/platform/profiler.h" +#include "paddle/fluid/platform/timer.h" +#include "paddle/fluid/pybind/pybind.h" + +namespace paddle { +namespace operators { +namespace benchmark { + +DEFINE_string(op_config_list, "", "Path of op config file."); +DEFINE_int32(specified_config_id, -1, "Test the specified op config."); + +void OpTester::Init(const std::string &filename) { + Init(OpTesterConfig(filename)); +} + +void OpTester::Init(const OpTesterConfig &config) { + config_ = config; + + auto &op_desc_info = framework::OpInfoMap::Instance(); + // Initialize the OpDesc + if (op_desc_info.Has(config_.op_type)) { + type_ = config_.op_type; + op_desc_.SetType(config_.op_type); + + CreateInputVarDesc(); + CreateOutputVarDesc(); + } else { + LOG(FATAL) << "Op \"" << config_.op_type << "\" is not registered."; + } + + if (config_.device_id >= 0) { + place_ = paddle::platform::CUDAPlace(config_.device_id); + } else { + place_ = paddle::platform::CPUPlace(); + } + + framework::InitDevices(false); + scope_.reset(new paddle::framework::Scope()); + + op_ = framework::OpRegistry::CreateOp(op_desc_); + CreateVariables(scope_.get()); +} + +void OpTester::Run() { + if (config_.print_debug_string) { + LOG(INFO) << DebugString(); + } + + // Warm up + RunImpl(); + + platform::Timer timer; + if (config_.profile) { + if (platform::is_cpu_place(place_)) { + platform::EnableProfiler(platform::ProfilerState::kCPU); + } else { +#ifdef PADDLE_WITH_CUDA + platform::EnableProfiler(platform::ProfilerState::kAll); + platform::SetDeviceId(config_.device_id); +#else + PADDLE_THROW("'CUDAPlace' is not supported in CPU only device."); +#endif + } + + timer.Start(); + for (int i = config_.repeat; i > 0; --i) { + RunImpl(); + } + timer.Pause(); + platform::DisableProfiler(platform::EventSortingKey::kDefault, + "op_tester_profiler"); + } else { + timer.Start(); + for (int i = config_.repeat; i > 0; --i) { + RunImpl(); + } + timer.Pause(); + } + config_.runtime = timer.ElapsedMS() / config_.repeat; + LOG(INFO) << "=== Run " << config_.repeat + << " times, latency: " << config_.runtime << " ms ==="; +} + +void OpTester::RunImpl() { + op_->Run(*scope_, place_); + platform::DeviceContextPool::Instance().Get(place_)->Wait(); + scope_->DropKids(); +} + +std::vector OpTester::GetOpProtoInputNames() { + std::vector input_names; + const framework::proto::OpProto &proto = + framework::OpInfoMap::Instance().Get(type_).Proto(); + for (int i = 0; i != proto.inputs_size(); ++i) { + const auto &input = proto.inputs(i); + input_names.push_back(input.name()); + } + return input_names; +} + +std::vector OpTester::GetOpProtoOutputNames() { + std::vector output_names; + const framework::proto::OpProto &proto = + framework::OpInfoMap::Instance().Get(type_).Proto(); + for (int i = 0; i != proto.outputs_size(); ++i) { + const auto &output = proto.outputs(i); + output_names.push_back(output.name()); + } + return output_names; +} + +void OpTester::CreateInputVarDesc() { + std::vector input_names = GetOpProtoInputNames(); + for (auto &name : input_names) { + const OpInputConfig *input = config_.GetInput(name); + if (input == nullptr) { + LOG(FATAL) << "The input " << name << " of op " << config_.op_type + << " is not correctlly provided."; + } + + std::string var_name = config_.op_type + "." + name; + framework::VarDesc *var = Var(var_name); + // Need to support more type + var->SetType(framework::proto::VarType::LOD_TENSOR); + var->SetPersistable(false); + var->SetDataType(framework::proto::VarType::FP32); + var->SetShape(input->dims); + + op_desc_.SetInput(name, {var_name}); + input_lods_[var_name] = input->lod; + } +} + +void OpTester::CreateOutputVarDesc() { + std::vector output_names = GetOpProtoOutputNames(); + for (auto &name : output_names) { + std::string var_name = config_.op_type + "." + name; + framework::VarDesc *var = Var(var_name); + // Need to support more type + var->SetType(framework::proto::VarType::LOD_TENSOR); + var->SetPersistable(false); + var->SetDataType(framework::proto::VarType::FP32); + + op_desc_.SetOutput(name, {var_name}); + } +} + +framework::VarDesc *OpTester::Var(const std::string &name) { + auto it = vars_.find(name); + if (it != vars_.end()) { + return it->second.get(); + } + auto *var = new framework::VarDesc(name); + vars_[name].reset(var); + return var; +} + +template +void OpTester::SetupTensor(framework::LoDTensor *tensor, + const std::vector &shape, T lower, + T upper) { + static unsigned int seed = 100; + std::mt19937 rng(seed++); + std::uniform_real_distribution uniform_dist(0, 1); + + T *ptr = tensor->mutable_data(framework::make_ddim(shape), place_); + if (platform::is_cpu_place(place_)) { + for (int i = 0; i < tensor->numel(); ++i) { + ptr[i] = static_cast(uniform_dist(rng) * (upper - lower) + lower); + } + } else { + framework::LoDTensor cpu_tensor; + T *cpu_ptr = cpu_tensor.mutable_data(framework::make_ddim(shape), + platform::CPUPlace()); + for (int i = 0; i < cpu_tensor.numel(); ++i) { + cpu_ptr[i] = static_cast(uniform_dist(rng) * (upper - lower) + lower); + } + TensorCopySync(cpu_tensor, place_, tensor); + } +} + +void OpTester::CreateVariables(framework::Scope *scope) { + for (auto &item : vars_) { + auto &var = item.second; + if (var->Name() == framework::kEmptyVarName) { + continue; + } + + auto *ptr = scope->Var(var->Name()); + framework::InitializeVariable(ptr, var->GetType()); + if (var->Persistable()) { + VLOG(3) << "Create Variable " << var->Name() + << " global, which pointer is " << ptr; + } else { + VLOG(3) << "Create Variable " << var->Name() + << " locally, which pointer is " << ptr; + } + } + + for (auto &item : input_lods_) { + // Allocate memory for input tensor + auto &var_name = item.first; + VLOG(3) << "Allocate memory for tensor " << var_name; + + auto &var_desc = vars_[var_name]; + std::vector shape = var_desc->GetShape(); + + auto *var = scope->Var(var_name); + auto *tensor = var->GetMutable(); + SetupTensor(tensor, shape, static_cast(0.0), + static_cast(1.0)); + + VLOG(3) << "Set lod for tensor " << var_name; + std::vector> &lod_vec = item.second; + framework::LoD lod; + for (size_t i = 0; i < lod_vec.size(); ++i) { + lod.push_back(lod_vec[i]); + } + tensor->set_lod(lod); + } +} + +static std::string GenSpaces(int count) { + std::stringstream ss; + for (int i = 0; i < count; ++i) { + ss << " "; + } + return ss.str(); +} + +std::string OpTester::DebugString() { + std::stringstream ss; + int count = 0; + for (auto &item : vars_) { + auto &var = item.second; + ss << GenSpaces(count++) << "vars {\n"; + ss << GenSpaces(count) << "name: \"" << var->Name() << "\"\n"; + ss << GenSpaces(count++) << "type: {\n"; + ss << GenSpaces(count) << "type: LOD_TENSOR\n"; + ss << GenSpaces(count++) << "lod_tensor {\n"; + ss << GenSpaces(count++) << "tensor {\n"; + ss << GenSpaces(count) << "data_type: FP32\n"; + std::vector shape = var->GetShape(); + for (auto d : shape) { + ss << GenSpaces(count) << "dims: " << d << "\n"; + } + ss << GenSpaces(--count) << "}\n"; + ss << GenSpaces(--count) << "}\n"; + ss << GenSpaces(--count) << "}\n"; + ss << GenSpaces(count) << "persistable: " << var->Persistable() << "\n"; + ss << GenSpaces(--count) << "}\n"; + } + ss << GenSpaces(count++) << "ops {\n"; + for (auto &name : op_desc_.InputNames()) { + ss << GenSpaces(count++) << "inputs {\n"; + ss << GenSpaces(count) << "parameters: \"" << name << "\"\n"; + ss << GenSpaces(count) << "arguments: \"" << op_desc_.Input(name)[0] + << "\"\n"; + ss << GenSpaces(--count) << "}\n"; + } + for (auto &name : op_desc_.OutputNames()) { + ss << GenSpaces(count++) << "outputs {\n"; + ss << GenSpaces(count) << "parameters: \"" << name << "\"\n"; + ss << GenSpaces(count) << "arguments: \"" << op_desc_.Output(name)[0] + << "\"\n"; + ss << GenSpaces(--count) << "}\n"; + } + ss << GenSpaces(count) << "type: " << op_desc_.Type() << "\n"; + ss << GenSpaces(--count) << "}\n"; + return ss.str(); +} + +TEST(op_tester, base) { + if (!FLAGS_op_config_list.empty()) { + std::ifstream fin(FLAGS_op_config_list, std::ios::in | std::ios::binary); + PADDLE_ENFORCE(static_cast(fin), "Cannot open file %s", + FLAGS_op_config_list.c_str()); + std::vector op_configs; + while (!fin.eof()) { + OpTesterConfig config; + bool result = config.Init(fin); + if (result) { + op_configs.push_back(config); + } + } + if (FLAGS_specified_config_id >= 0 && + FLAGS_specified_config_id < static_cast(op_configs.size())) { + OpTester tester; + tester.Init(op_configs[FLAGS_specified_config_id]); + tester.Run(); + } else { + for (size_t i = 0; i < op_configs.size(); ++i) { + OpTester tester; + tester.Init(op_configs[i]); + tester.Run(); + } + } + } else { + OpTester tester; + OpTesterConfig config; + config.op_type = "elementwise_add"; + config.inputs.resize(2); + config.inputs[0].name = "X"; + config.inputs[0].dims = {64, 64}; + config.inputs[1].name = "Y"; + config.inputs[1].dims = {64, 1}; + tester.Init(config); + tester.Run(); + } +} + +} // namespace benchmark +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/benchmark/op_tester.h b/paddle/fluid/operators/benchmark/op_tester.h new file mode 100644 index 0000000000000000000000000000000000000000..8f150b23ad783acdfd203d471d578ab6aae71494 --- /dev/null +++ b/paddle/fluid/operators/benchmark/op_tester.h @@ -0,0 +1,68 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include "paddle/fluid/framework/ddim.h" +#include "paddle/fluid/framework/op_desc.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/operators/benchmark/op_tester_config.h" + +namespace paddle { +namespace operators { +namespace benchmark { + +class OpTester { + public: + OpTester() {} + + void Init(const std::string &filename); + void Init(const OpTesterConfig &config); + + void Run(); + + std::string DebugString(); + + private: + std::vector GetOpProtoInputNames(); + std::vector GetOpProtoOutputNames(); + + void CreateInputVarDesc(); + void CreateOutputVarDesc(); + + framework::VarDesc *Var(const std::string &name); + void CreateVariables(framework::Scope *scope); + + template + void SetupTensor(framework::LoDTensor *input, + const std::vector &shape, T lower, T upper); + + void RunImpl(); + + private: + OpTesterConfig config_; + std::string type_; + framework::OpDesc op_desc_; + std::unordered_map> vars_; + std::unordered_map>> input_lods_; + std::unique_ptr op_; + platform::Place place_; + std::unique_ptr scope_; +}; + +} // namespace benchmark +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/benchmark/op_tester_config.cc b/paddle/fluid/operators/benchmark/op_tester_config.cc new file mode 100644 index 0000000000000000000000000000000000000000..8336804ec07d2b7b176f55ad4113452086296494 --- /dev/null +++ b/paddle/fluid/operators/benchmark/op_tester_config.cc @@ -0,0 +1,188 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/benchmark/op_tester_config.h" +#include +#include "glog/logging.h" +#include "paddle/fluid/platform/enforce.h" + +namespace paddle { +namespace operators { +namespace benchmark { + +static const char kStartSeparator[] = "{"; +static const char kEndSeparator[] = "}"; +static const char kSepBetweenItems[] = ";"; + +static bool StartWith(const std::string& str, const std::string& substr) { + return str.find(substr) == 0; +} + +static bool EndWith(const std::string& str, const std::string& substr) { + return str.rfind(substr) == (str.length() - substr.length()); +} + +static void EraseEndSep(std::string* str, + std::string substr = kSepBetweenItems) { + if (EndWith(*str, substr)) { + str->erase(str->length() - substr.length(), str->length()); + } +} + +void OpInputConfig::ParseDims(std::istream& is) { + std::string dims_str; + is >> dims_str; + + dims.clear(); + std::string token; + std::istringstream token_stream(dims_str); + while (std::getline(token_stream, token, 'x')) { + dims.push_back(std::stoi(token)); + } +} + +void OpInputConfig::ParseLoD(std::istream& is) { + std::string lod_str; + std::string start_sep = + std::string(kStartSeparator) + std::string(kStartSeparator); + std::string end_sep = std::string(kEndSeparator) + std::string(kEndSeparator); + + std::string sep; + is >> sep; + if (StartWith(sep, start_sep)) { + lod_str += sep; + while (!EndWith(sep, end_sep)) { + is >> sep; + lod_str += sep; + } + } + EraseEndSep(&lod_str); + PADDLE_ENFORCE_GE(lod_str.length(), 4U); + VLOG(4) << "lod: " << lod_str << ", length: " << lod_str.length(); + + // Parse the lod_str + lod.clear(); + for (size_t i = 1; i < lod_str.length() - 1;) { + if (lod_str[i] == '{') { + std::vector level; + while (lod_str[i] != '}') { + ++i; + + std::string number; + while (lod_str[i] >= '0' && lod_str[i] <= '9') { + number += lod_str[i]; + ++i; + } + level.push_back(atoi(number.c_str())); + } + lod.push_back(level); + } else if (lod_str[i] == '}') { + ++i; + } + } +} + +OpInputConfig::OpInputConfig(std::istream& is) { + std::string sep; + is >> sep; + if (sep == kStartSeparator) { + while (sep != kEndSeparator) { + is >> sep; + if (sep == "name" || sep == "name:") { + is >> name; + EraseEndSep(&name); + } else if (sep == "dims" || sep == "dims:") { + ParseDims(is); + } else if (sep == "lod" || sep == "lod:") { + ParseLoD(is); + } + } + } +} + +OpTesterConfig::OpTesterConfig(const std::string& filename) { + std::ifstream fin(filename, std::ios::in | std::ios::binary); + PADDLE_ENFORCE(static_cast(fin), "Cannot open file %s", + filename.c_str()); + + Init(fin); +} + +bool OpTesterConfig::Init(std::istream& is) { + std::string sep; + is >> sep; + if (sep == kStartSeparator) { + while (sep != kEndSeparator) { + is >> sep; + if (sep == "op_type" || sep == "op_type:") { + is >> op_type; + } else if (sep == "device_id" || sep == "device_id:") { + is >> device_id; + } else if (sep == "repeat" || sep == "repeat:") { + is >> repeat; + } else if (sep == "profile" || sep == "profile:") { + is >> profile; + } else if (sep == "print_debug_string" || sep == "print_debug_string:") { + is >> print_debug_string; + } else if (sep == "input" || sep == "input:") { + OpInputConfig input_config(is); + inputs.push_back(input_config); + } else if (sep == "attrs" || sep == "attrs:") { + ParseAttrs(is); + } else { + if (sep != kEndSeparator) { + return false; + } + } + } + } else { + return false; + } + return true; +} + +bool OpTesterConfig::ParseAttrs(std::istream& is) { + std::string sep; + is >> sep; + if (sep == kStartSeparator) { + while (true) { + std::string key; + is >> key; + if (key == kEndSeparator) { + break; + } + + std::string value; + is >> value; + EraseEndSep(&key, ":"); + EraseEndSep(&value); + + attrs[key] = value; + } + } + return true; +} + +const OpInputConfig* OpTesterConfig::GetInput(const std::string& name) { + for (size_t i = 0; i < inputs.size(); ++i) { + if (inputs[i].name == name) { + return &inputs[i]; + } + } + return nullptr; +} + +} // namespace benchmark +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/benchmark/op_tester_config.h b/paddle/fluid/operators/benchmark/op_tester_config.h new file mode 100644 index 0000000000000000000000000000000000000000..c2ff6dafc053eb7202a686954d53ae6f3d62d02e --- /dev/null +++ b/paddle/fluid/operators/benchmark/op_tester_config.h @@ -0,0 +1,60 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include + +namespace paddle { +namespace operators { +namespace benchmark { + +struct OpInputConfig { + OpInputConfig() {} + explicit OpInputConfig(std::istream& is); + + void ParseDims(std::istream& is); + void ParseLoD(std::istream& is); + + std::string name; + std::vector dims; + std::vector> lod; +}; + +struct OpTesterConfig { + OpTesterConfig() {} + explicit OpTesterConfig(const std::string& filename); + + bool Init(std::istream& is); + + bool ParseAttrs(std::istream& is); + + const OpInputConfig* GetInput(const std::string& name); + + std::string op_type; + std::vector inputs; + std::unordered_map attrs; + int device_id{-1}; // CPU: -1 + int repeat{1}; + int profile{0}; + int print_debug_string{0}; + double runtime{0.0}; +}; + +} // namespace benchmark +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/controlflow/compare_op.cc b/paddle/fluid/operators/controlflow/compare_op.cc index 688457d4a75168577302e45817ef0463d6ff3718..5d3f9b43f8c08d356319fa0b9ccaf808811d3d39 100644 --- a/paddle/fluid/operators/controlflow/compare_op.cc +++ b/paddle/fluid/operators/controlflow/compare_op.cc @@ -51,6 +51,11 @@ class CompareOpProtoMaker : public framework::OpProtoAndCheckerMaker { comment.type)); AddInput("Y", string::Sprintf("the right hand operand of %s operator", comment.type)); + AddAttr( + "axis", + "The start dimension index for broadcasting Y onto X. [default -1]") + .SetDefault(-1) + .EqualGreaterThan(-1); AddAttr("force_cpu", "Force fill output variable to cpu " "memory. Otherwise, fill output variable to the running " @@ -64,11 +69,6 @@ N-dim tensor. X and Y could be any type. The each element of the Out tensor is calculated by $%s$ )DOC", comment.equation)); - AddAttr( - "axis", - "The start dimension index for broadcasting Y onto X. [default -1]") - .SetDefault(-1) - .EqualGreaterThan(-1); } }; diff --git a/paddle/fluid/operators/controlflow/get_places_op.cc b/paddle/fluid/operators/controlflow/get_places_op.cc index db6ff7825690176ded0ab957764ed8411d3cd804..1a157688f3d02185d18b66ff5ba3613b6cf438ad 100644 --- a/paddle/fluid/operators/controlflow/get_places_op.cc +++ b/paddle/fluid/operators/controlflow/get_places_op.cc @@ -52,7 +52,7 @@ class GetPlacesOp : public framework::OperatorBase { device_count = is_gpu ? CUDADevCount() : std::thread::hardware_concurrency(); } - PADDLE_ENFORCE_NE(device_count, 0, "Cannot indicate %s device count", + PADDLE_ENFORCE_NE(device_count, 0UL, "Cannot indicate %s device count", is_gpu ? "GPU" : "CPU"); auto out_var_name = Output("Out"); diff --git a/paddle/fluid/operators/conv_cudnn_op.cu.cc b/paddle/fluid/operators/conv_cudnn_op.cu.cc index f5208e7a601f4dd33b486e5840178022f66431e5..9e5ccd928e9d6012c1da3baa17521dcac0c8ff2f 100644 --- a/paddle/fluid/operators/conv_cudnn_op.cu.cc +++ b/paddle/fluid/operators/conv_cudnn_op.cu.cc @@ -42,6 +42,7 @@ using ScopedConvolutionDescriptor = platform::ScopedConvolutionDescriptor; using DataLayout = platform::DataLayout; template using ScalingParamType = typename platform::CudnnDataType::ScalingParamType; +using framework::AlgorithmsCache; template class CUDNNConvOpKernel : public framework::OpKernel { @@ -169,18 +170,8 @@ class CUDNNConvOpKernel : public framework::OpKernel { workspace_size_limit, &algo)); VLOG(3) << "cuDNN forward algo " << algo; } else if (exhaustive_search && (!half_float)) { - AlgorithmsCache* algo_cache = nullptr; - if (ctx.scope().FindVar(kCUDNNFwdAlgoCache)) { - algo_cache = - ctx.scope() - .FindVar(kCUDNNFwdAlgoCache) - ->GetMutable>(); - } else { - algo_cache = - const_cast(ctx.scope()) - .Var(kCUDNNFwdAlgoCache) - ->GetMutable>(); - } + AlgorithmsCache& algo_cache = + ctx.GetKernelConfig>(0); cudnn_workspace = ctx.AllocateTmpTensor( framework::make_ddim( @@ -188,7 +179,7 @@ class CUDNNConvOpKernel : public framework::OpKernel { dev_ctx); cudnn_workspace_ptr = static_cast(cudnn_workspace.data()); - algo = algo_cache->GetAlgorithm( + algo = algo_cache.GetAlgorithm( x_dims, f_dims, strides, paddings, dilations, 0, [&]() { int returned_algo_count; std::array @@ -382,22 +373,11 @@ class CUDNNConvGradOpKernel : public framework::OpKernel { if (input_grad) { T* input_grad_data = input_grad->mutable_data(ctx.GetPlace()); if (exhaustive_search) { - AlgorithmsCache* data_algo_cache; - if (ctx.scope().FindVar(kCUDNNBwdDataAlgoCache)) { - data_algo_cache = - ctx.scope() - .FindVar(kCUDNNBwdDataAlgoCache) - ->GetMutable< - AlgorithmsCache>(); - } else { - data_algo_cache = - const_cast(ctx.scope()) - .Var(kCUDNNBwdDataAlgoCache) - ->GetMutable< - AlgorithmsCache>(); - } - - data_algo = data_algo_cache->GetAlgorithm( + AlgorithmsCache& data_algo_cache = + ctx.GetKernelConfig>( + 0); + + data_algo = data_algo_cache.GetAlgorithm( x_dims, f_dims, strides, paddings, dilations, 0, [&]() { int returned_algo_count; std::array { if (filter_grad) { T* filter_grad_data = filter_grad->mutable_data(ctx.GetPlace()); if (exhaustive_search) { - AlgorithmsCache* f_algo_cache; - if (ctx.scope().FindVar(kCUDNNBwdFilterAlgoCache)) { - f_algo_cache = - ctx.scope() - .FindVar(kCUDNNBwdFilterAlgoCache) - ->GetMutable< - AlgorithmsCache>(); - } else { - f_algo_cache = - const_cast(ctx.scope()) - .Var(kCUDNNBwdFilterAlgoCache) - ->GetMutable< - AlgorithmsCache>(); - } - - filter_algo = f_algo_cache->GetAlgorithm( + AlgorithmsCache& f_algo_cache = + ctx.GetKernelConfig< + AlgorithmsCache>(1); + + filter_algo = f_algo_cache.GetAlgorithm( x_dims, f_dims, strides, paddings, dilations, 0, [&]() { int returned_algo_count; std::array #include #include +#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/platform/cudnn_helper.h" DECLARE_uint64(conv_workspace_size_limit); @@ -46,100 +47,5 @@ static constexpr size_t kNUM_CUDNN_BWD_FILTER_ALGS = 4; static constexpr size_t kNUM_CUDNN_BWD_DATA_ALGS = 5; #endif -template -class AlgorithmsCache { - public: - AlgorithmsCache() : search_times_(0) { hash_.clear(); } - // Caches the best algorithm for a given - // combination of tensor dimensions & compute data type. - TAlgorithm GetAlgorithm( - const std::vector& dims1, const std::vector& dims2, - const std::vector& strides, const std::vector& paddings, - const std::vector& dilations, - int algorithmFlags, // can set for different data type - std::function gen_func); - - TAlgorithm GetAlgorithm(int64_t area, int search_times, int algorithmFlags, - std::function gen_func); - - private: - std::unordered_map hash_; - std::mutex mutex_; - - int search_times_; -}; - -template -TAlgorithm AlgorithmsCache::GetAlgorithm( - const std::vector& dims1, const std::vector& dims2, - const std::vector& strides, const std::vector& paddings, - const std::vector& dilations, int algorithmFlags, - std::function gen_func) { - std::lock_guard lock(mutex_); - int64_t seed = 0; - // Hash all of the inputs, use to try and look up a previously - // discovered algorithm, or fall back to generating a new one. - std::hash hashFn; - // do hash like boost - // https://stackoverflow.com/questions/2590677/how-do-i-combine-hash-values-in-c0x - for (const auto num : dims1) { - seed ^= hashFn(num) + 0x9e3779b9 + (seed << 6) + (seed >> 2); - } - - for (const auto num : dims2) { - seed ^= hashFn(num) + 0x9e3779b9 + (seed << 6) + (seed >> 2) + 1; - } - - for (const auto num : strides) { - seed ^= hashFn(static_cast(num)) + 0x9e3779b9 + (seed << 6) + - (seed >> 2) + 2; - } - - for (const auto num : paddings) { - seed ^= hashFn(static_cast(num)) + 0x9e3779b9 + (seed << 6) + - (seed >> 2) + 3; - } - - for (const auto num : dilations) { - seed ^= hashFn(static_cast(num)) + 0x9e3779b9 + (seed << 6) + - (seed >> 2) + 4; - } - - seed ^= hashFn(static_cast(algorithmFlags)) + 0x9e3779b9 + - (seed << 6) + (seed >> 2) + 5; - - if (seed == 0) return gen_func(); - - if (hash_.find(seed) == hash_.end()) { - TAlgorithm value = gen_func(); - hash_[seed] = value; - } - return hash_[seed]; -} - -template -TAlgorithm AlgorithmsCache::GetAlgorithm( - int64_t area, int search_times, int algorithmFlags, - std::function gen_func) { - if (hash_.find(area) != hash_.end()) { - return hash_[area]; - } - if (search_times_ < search_times) { - auto algo = gen_func(); - hash_[area] = algo; - ++search_times_; - return algo; - } - TAlgorithm algo; - int64_t min = static_cast(INT_MAX); - for (const auto& m : hash_) { - if (m.first < min) { - min = m.first; - algo = m.second; - } - } - return algo; -} - } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/conv_fusion_op.cu.cc b/paddle/fluid/operators/conv_fusion_op.cu.cc index d8b997cca613f660046106512fc03bf55f9b992d..64152829b4f000e545054e528edca33dfe96ec56 100644 --- a/paddle/fluid/operators/conv_fusion_op.cu.cc +++ b/paddle/fluid/operators/conv_fusion_op.cu.cc @@ -30,6 +30,8 @@ using ScopedFilterDescriptor = platform::ScopedFilterDescriptor; using ScopedConvolutionDescriptor = platform::ScopedConvolutionDescriptor; using ScopedActivationDescriptor = platform::ScopedActivationDescriptor; using DataLayout = platform::DataLayout; +using framework::AlgorithmsCache; + template using ScalingParamType = typename platform::CudnnDataType::ScalingParamType; @@ -139,38 +141,21 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel { } return fwd_perf_stat[0].algo; }; - AlgorithmsCache* algo_cache = nullptr; + AlgorithmsCache& algo_cache = + ctx.GetKernelConfig>(0); int search_times = ctx.Attr("search_times"); search_times = std::max( static_cast(FLAGS_cudnn_exhaustive_search_times), search_times); + // TODO(dangqingqing): Unify this if-else. if (search_times > 0) { // The searched algo will be cached by `search_times` times for // different input dimension. For other dimensions, select the algo // of closest area. - auto var_name = ctx.Inputs("AlgoCache")[0]; - algo_cache = - ctx.scope() - .FindVar(var_name) - ->GetMutable>(); - algo = algo_cache->GetAlgorithm(x_dims[2] * x_dims[3], search_times, 0, - search_func); + algo = algo_cache.GetAlgorithm(x_dims[2] * x_dims[3], search_times, 0, + search_func); } else { - // Cache searched algo in Var(kCUDNNFwdAlgoCache). - // all conv ops use the same kCUDNNFwdAlgoCache variable. - if (ctx.scope().FindVar(kCUDNNFwdAlgoCache)) { - algo_cache = - ctx.scope() - .FindVar(kCUDNNFwdAlgoCache) - ->GetMutable>(); - } else { - // TODO(qingqing) remove const_cast - algo_cache = - const_cast(ctx.scope().parent()) - ->Var(kCUDNNFwdAlgoCache) - ->GetMutable>(); - } - algo = algo_cache->GetAlgorithm(x_dims, f_dims, strides, paddings, - dilations, 0, search_func); + algo = algo_cache.GetAlgorithm(x_dims, f_dims, strides, paddings, + dilations, 0, search_func); } VLOG(3) << "choose algo " << algo; } diff --git a/paddle/fluid/operators/conv_op.cc b/paddle/fluid/operators/conv_op.cc index bd788f03e7d666aad7ce6f0c63cea30f029e3491..ca6bc4df0fe2c6cddaf548d3e708e777172a0841 100644 --- a/paddle/fluid/operators/conv_op.cc +++ b/paddle/fluid/operators/conv_op.cc @@ -18,6 +18,7 @@ limitations under the License. */ #include #ifdef PADDLE_WITH_CUDA +#include "paddle/fluid/operators/conv_cudnn_op_cache.h" #include "paddle/fluid/platform/cudnn_helper.h" #endif #ifdef PADDLE_WITH_MKLDNN @@ -80,6 +81,7 @@ framework::OpKernelType ConvOp::GetExpectedKernelType( framework::OpKernelType::kDefaultCustomizedTypeValue; framework::LibraryType library{framework::LibraryType::kPlain}; // TODO(pzelazko-intel): enable MKLDNN layout when it's ready + auto input_data_type = ctx.Input("Input")->type(); std::string data_format = ctx.Attr("data_format"); framework::DataLayout layout = framework::StringToDataLayout(data_format); @@ -93,11 +95,14 @@ framework::OpKernelType ConvOp::GetExpectedKernelType( platform::CanMKLDNNBeUsed(ctx)) { library = framework::LibraryType::kMKLDNN; layout = framework::DataLayout::kMKLDNN; - customized_type_value = kConvMKLDNNFP32; + customized_type_value = + (input_data_type == framework::DataTypeTrait::DataType || + input_data_type == framework::DataTypeTrait::DataType) + ? kConvMKLDNNINT8 + : kConvMKLDNNFP32; } #endif - auto input_data_type = ctx.Input("Input")->type(); if (input_data_type != framework::proto::VarType::INT8 && input_data_type != framework::proto::VarType::UINT8) { auto filter_data_type = ctx.Input("Filter")->type(); @@ -109,8 +114,20 @@ framework::OpKernelType ConvOp::GetExpectedKernelType( "float16 can only be used when CUDNN is used"); } - return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout, - library, customized_type_value); + auto type = framework::OpKernelType(input_data_type, ctx.GetPlace(), layout, + library, customized_type_value); +#ifdef PADDLE_WITH_CUDA + std::vector& configs = kernel_configs_map_[type]; + // TODO(dangqingqing): Currently conv_fusion_op use cudnn but sets use_cudnn + // to false. It should be fixed and then here should only create if library + // is kCUDNN. + if (configs.empty()) { + std::shared_ptr> p( + new framework::AlgorithmsCache()); + configs.push_back(p); + } +#endif + return type; } void Conv2DOpMaker::Make() { @@ -222,7 +239,7 @@ void Conv2DOpMaker::Make() { .SetDefault(4096); AddAttr("exhaustive_search", "(bool, default false) cuDNN has many algorithm to calculation " - "convolution, whether enable exhaustive search ", + "convolution, whether enable exhaustive search " "for cuDNN convolution or not, defalut is False.") .SetDefault(false); AddComment(R"DOC( @@ -341,7 +358,7 @@ void Conv3DOpMaker::Make() { .SetDefault(4096); AddAttr("exhaustive_search", "(bool, default false) cuDNN has many algorithm to calculation " - "convolution, whether enable exhaustive search ", + "convolution, whether enable exhaustive search " "for cuDNN convolution or not, defalut is False.") .SetDefault(false); AddComment(R"DOC( @@ -410,9 +427,25 @@ framework::OpKernelType ConvOpGrad::GetExpectedKernelType( } #endif - return framework::OpKernelType(ctx.Input("Input")->type(), - ctx.GetPlace(), layout_, library_, - customized_type_value); + auto type = framework::OpKernelType(ctx.Input("Input")->type(), + ctx.GetPlace(), layout_, library_, + customized_type_value); +#ifdef PADDLE_WITH_CUDA + if (library_ == framework::LibraryType::kCUDNN) { + std::vector& configs = kernel_configs_map_[type]; + if (configs.empty()) { + std::shared_ptr> + p(new framework::AlgorithmsCache()); + configs.push_back(p); + + std::shared_ptr< + framework::AlgorithmsCache> + p2(new framework::AlgorithmsCache()); + configs.push_back(p2); + } + } +#endif + return type; } class Conv2dGradMaker : public framework::SingleGradOpDescMaker { diff --git a/paddle/fluid/operators/conv_transpose_op.cc b/paddle/fluid/operators/conv_transpose_op.cc index 86a140f15219001126283aa8b3f76d72fddb28fc..c994c6f642d286d9b52ada667058b064ff242ce6 100644 --- a/paddle/fluid/operators/conv_transpose_op.cc +++ b/paddle/fluid/operators/conv_transpose_op.cc @@ -127,6 +127,12 @@ void Conv2DTransposeOpMaker::Make() { "output feature channels," "H is the height of the filter, and W is the width of the filter. " "We enforce groups number == 1 in the convolution transpose scenario."); + AddInput("Bias", + "(Tensor) Bias to be added to each output of filter application." + "The format of output tensor is X (one-dimensional) of size equal" + "to the number of output channels. Only used with MKL-DNN.") + .AsDispensable(); + AddOutput("Output", "(Tensor) The output tensor of convolution transpose operator. " "The format of output tensor is also NCHW."); diff --git a/paddle/fluid/operators/crf_decoding_op.cc b/paddle/fluid/operators/crf_decoding_op.cc index 81c9e9e543191d9b2d606217d726cc783be97fea..e053ae57739d3d96209e9ca180cc041f8b55396e 100644 --- a/paddle/fluid/operators/crf_decoding_op.cc +++ b/paddle/fluid/operators/crf_decoding_op.cc @@ -84,12 +84,12 @@ class CRFDecodingOp : public framework::OperatorWithKernel { "Output(ViterbiPath) should be not null."); auto emission_dims = ctx->GetInputDim("Emission"); - PADDLE_ENFORCE_EQ(emission_dims.size(), 2UL, + PADDLE_ENFORCE_EQ(emission_dims.size(), 2, "The Input(Emission) should be a 2-D tensor."); PADDLE_ENFORCE(emission_dims[0], "An empty mini-batch is not allowed."); auto transition_dims = ctx->GetInputDim("Transition"); - PADDLE_ENFORCE_EQ(transition_dims.size(), 2UL, + PADDLE_ENFORCE_EQ(transition_dims.size(), 2, "The Input(Transition) should be a 2-D tensor."); PADDLE_ENFORCE_EQ( transition_dims[0] - 2, transition_dims[1], diff --git a/paddle/fluid/operators/cross_entropy_op.cc b/paddle/fluid/operators/cross_entropy_op.cc index 1968e54b00601139e252f0480ca3ae1fc08904f4..3adc7baebddd06ced74afea1e77017beb57582e8 100644 --- a/paddle/fluid/operators/cross_entropy_op.cc +++ b/paddle/fluid/operators/cross_entropy_op.cc @@ -32,14 +32,23 @@ class CrossEntropyOp : public framework::OperatorWithKernel { int rank = x_dims.size(); PADDLE_ENFORCE_EQ(rank, label_dims.size(), "Input(X) and Input(Label) shall have the same rank."); - PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), - framework::slice_ddim(label_dims, 0, rank - 1), - "Input(X) and Input(Label) shall have the same shape " - "except the last dimension."); + bool check = true; + if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 || + framework::product(label_dims) <= 0)) { + check = false; + } + if (check) { + PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), + framework::slice_ddim(label_dims, 0, rank - 1), + "Input(X) and Input(Label) shall have the same shape " + "except the last dimension."); + } if (ctx->Attrs().Get("soft_label")) { - PADDLE_ENFORCE_EQ(x_dims[rank - 1], label_dims[rank - 1], - "If Attr(soft_label) == true, the last dimension of " - "Input(X) and Input(Label) should be equal."); + if (check) { + PADDLE_ENFORCE_EQ(x_dims[rank - 1], label_dims[rank - 1], + "If Attr(soft_label) == true, the last dimension of " + "Input(X) and Input(Label) should be equal."); + } } else { PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1UL, "If Attr(softLabel) == false, the last dimension of " @@ -82,20 +91,32 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel { "Input(Y@Grad) and Input(X) should have the same rank."); PADDLE_ENFORCE_EQ(label_dims.size(), rank, "Input(Label) and Input(X) should have the same rank."); - PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), - framework::slice_ddim(label_dims, 0, rank - 1), - "The Input(X) and Input(Label) should have the same " - "shape except the last dimension."); - PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), - framework::slice_ddim(dy_dims, 0, rank - 1), - "The Input(X) and Input(Y@Grad) should have the same " - "shape except the last dimension."); + + bool check = true; + if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 || + framework::product(label_dims) <= 0)) { + check = false; + } + + if (check) { + PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), + framework::slice_ddim(label_dims, 0, rank - 1), + "The Input(X) and Input(Label) should have the same " + "shape except the last dimension."); + PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), + framework::slice_ddim(dy_dims, 0, rank - 1), + "The Input(X) and Input(Y@Grad) should have the same " + "shape except the last dimension."); + } PADDLE_ENFORCE_EQ(dy_dims[rank - 1], 1, "The last dimension of Input(Y@Grad) should be 1."); if (ctx->Attrs().Get("soft_label")) { - PADDLE_ENFORCE_EQ(x_dims[rank - 1], label_dims[rank - 1], - "When Attr(soft_label) == true, the last dimension of " - "Input(X) and Input(Label) should be equal."); + if (check) { + PADDLE_ENFORCE_EQ( + x_dims[rank - 1], label_dims[rank - 1], + "When Attr(soft_label) == true, the last dimension of " + "Input(X) and Input(Label) should be equal."); + } } else { PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1, "When Attr(soft_label) == false, the last dimension of " diff --git a/paddle/fluid/operators/data_norm_op.cc b/paddle/fluid/operators/data_norm_op.cc index d5bc25d19cba4de6f059612e3e8c4a65b2edd0f9..45bce6e5203f8c1dbb744e0f954f7f0a71c53372 100644 --- a/paddle/fluid/operators/data_norm_op.cc +++ b/paddle/fluid/operators/data_norm_op.cc @@ -140,9 +140,6 @@ class DataNormOpMaker : public framework::OpProtoAndCheckerMaker { "Scales of the history data batch, " "will apply to output when training") .AsIntermediate(); - AddAttr("use_mkldnn", - "(bool, default false) Only used in mkldnn kernel") - .SetDefault(false); AddComment(R"DOC( Data Normalization. diff --git a/paddle/fluid/operators/detection/CMakeLists.txt b/paddle/fluid/operators/detection/CMakeLists.txt index f6fbe97565c43c306ea885c765c0a665492fa317..933a28f3f909ad48dc5a62b9b04beccbe60e7d6b 100644 --- a/paddle/fluid/operators/detection/CMakeLists.txt +++ b/paddle/fluid/operators/detection/CMakeLists.txt @@ -33,6 +33,7 @@ detection_library(rpn_target_assign_op SRCS rpn_target_assign_op.cc) detection_library(generate_proposal_labels_op SRCS generate_proposal_labels_op.cc) detection_library(box_clip_op SRCS box_clip_op.cc box_clip_op.cu) detection_library(yolov3_loss_op SRCS yolov3_loss_op.cc) +detection_library(box_decoder_and_assign_op SRCS box_decoder_and_assign_op.cc box_decoder_and_assign_op.cu) if(WITH_GPU) detection_library(generate_proposals_op SRCS generate_proposals_op.cc generate_proposals_op.cu DEPS memory cub) diff --git a/paddle/fluid/operators/detection/anchor_generator_op.cc b/paddle/fluid/operators/detection/anchor_generator_op.cc index f2984d1af2f26d901bc30ecfd519d5268a60278a..4a333b559f82e6d39d2d4345c8ad58bc8d430c69 100644 --- a/paddle/fluid/operators/detection/anchor_generator_op.cc +++ b/paddle/fluid/operators/detection/anchor_generator_op.cc @@ -85,7 +85,7 @@ class AnchorGeneratorOpMaker : public framework::OpProtoAndCheckerMaker { " For instance, the anchor size of 64 means the area of this anchor " "equals to 64**2.") .AddCustomChecker([](const std::vector& anchor_sizes) { - PADDLE_ENFORCE_GT(anchor_sizes.size(), 0, + PADDLE_ENFORCE_GT(anchor_sizes.size(), 0UL, "Size of anchor_sizes must be at least 1."); for (size_t i = 0; i < anchor_sizes.size(); ++i) { PADDLE_ENFORCE_GT(anchor_sizes[i], 0.0, @@ -103,7 +103,7 @@ class AnchorGeneratorOpMaker : public framework::OpProtoAndCheckerMaker { "(vector) List of variances to be used " "in box regression deltas") .AddCustomChecker([](const std::vector& variances) { - PADDLE_ENFORCE_EQ(variances.size(), 4, + PADDLE_ENFORCE_EQ(variances.size(), 4UL, "Must and only provide 4 variance."); for (size_t i = 0; i < variances.size(); ++i) { PADDLE_ENFORCE_GT(variances[i], 0.0, @@ -117,7 +117,7 @@ class AnchorGeneratorOpMaker : public framework::OpProtoAndCheckerMaker { .SetDefault(std::vector(2, 16.0)) .AddCustomChecker([](const std::vector& stride) { PADDLE_ENFORCE_EQ( - stride.size(), 2, + stride.size(), 2UL, "Must and only provide 2 stride for width and height."); for (size_t i = 0; i < stride.size(); ++i) { PADDLE_ENFORCE_GT(stride[i], 0.0, diff --git a/paddle/fluid/operators/detection/box_decoder_and_assign_op.cc b/paddle/fluid/operators/detection/box_decoder_and_assign_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..945d575a6446429a0ec34a603356c2c99263a776 --- /dev/null +++ b/paddle/fluid/operators/detection/box_decoder_and_assign_op.cc @@ -0,0 +1,169 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/detection/box_decoder_and_assign_op.h" + +namespace paddle { +namespace operators { + +using LoDTensor = framework::LoDTensor; + +class BoxDecoderAndAssignOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE( + ctx->HasInput("PriorBox"), + "Input(PriorBox) of BoxDecoderAndAssignOp should not be null."); + PADDLE_ENFORCE( + ctx->HasInput("PriorBoxVar"), + "Input(PriorBoxVar) of BoxDecoderAndAssignOp should not be null."); + PADDLE_ENFORCE( + ctx->HasInput("TargetBox"), + "Input(TargetBox) of BoxDecoderAndAssignOp should not be null."); + PADDLE_ENFORCE( + ctx->HasInput("BoxScore"), + "Input(BoxScore) of BoxDecoderAndAssignOp should not be null."); + PADDLE_ENFORCE( + ctx->HasOutput("DecodeBox"), + "Output(DecodeBox) of BoxDecoderAndAssignOp should not be null."); + PADDLE_ENFORCE( + ctx->HasOutput("OutputAssignBox"), + "Output(OutputAssignBox) of BoxDecoderAndAssignOp should not be null."); + + auto prior_box_dims = ctx->GetInputDim("PriorBox"); + auto prior_box_var_dims = ctx->GetInputDim("PriorBoxVar"); + auto target_box_dims = ctx->GetInputDim("TargetBox"); + auto box_score_dims = ctx->GetInputDim("BoxScore"); + + PADDLE_ENFORCE_EQ(prior_box_dims.size(), 2, + "The rank of Input of PriorBox must be 2"); + PADDLE_ENFORCE_EQ(prior_box_dims[1], 4, "The shape of PriorBox is [N, 4]"); + PADDLE_ENFORCE_EQ(prior_box_var_dims.size(), 1, + "The rank of Input of PriorBoxVar must be 1"); + PADDLE_ENFORCE_EQ(prior_box_var_dims[0], 4, + "The shape of PriorBoxVar is [4]"); + PADDLE_ENFORCE_EQ(target_box_dims.size(), 2, + "The rank of Input of TargetBox must be 2"); + PADDLE_ENFORCE_EQ(box_score_dims.size(), 2, + "The rank of Input of BoxScore must be 2"); + PADDLE_ENFORCE_EQ(prior_box_dims[0], target_box_dims[0], + "The first dim of prior_box and target_box is roi nums " + "and should be same!"); + PADDLE_ENFORCE_EQ(prior_box_dims[0], box_score_dims[0], + "The first dim of prior_box and box_score is roi nums " + "and should be same!"); + PADDLE_ENFORCE_EQ(target_box_dims[1], box_score_dims[1] * prior_box_dims[1], + "The shape of target_box is [N, classnum * 4], The shape " + "of box_score is [N, classnum], The shape of prior_box " + "is [N, 4]"); + + ctx->SetOutputDim("DecodeBox", framework::make_ddim({target_box_dims[0], + target_box_dims[1]})); + ctx->ShareLoD("TargetBox", /*->*/ "DecodeBox"); + ctx->SetOutputDim( + "OutputAssignBox", + framework::make_ddim({prior_box_dims[0], prior_box_dims[1]})); + ctx->ShareLoD("PriorBox", /*->*/ "OutputAssignBox"); + } +}; + +class BoxDecoderAndAssignOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput( + "PriorBox", + "(Tensor, default Tensor) " + "Box list PriorBox is a 2-D Tensor with shape [N, 4] which holds N " + "boxes and each box is represented as [xmin, ymin, xmax, ymax], " + "[xmin, ymin] is the left top coordinate of the anchor box, " + "if the input is image feature map, they are close to the origin " + "of the coordinate system. [xmax, ymax] is the right bottom " + "coordinate of the anchor box."); + AddInput("PriorBoxVar", + "(Tensor, default Tensor, optional) " + "PriorBoxVar is a 2-D Tensor with shape [N, 4] which holds N " + "group of variance. PriorBoxVar will set all elements to 1 by " + "default.") + .AsDispensable(); + AddInput("TargetBox", + "(LoDTensor or Tensor) " + "This input can be a 2-D LoDTensor with shape " + "[N, classnum*4]. It holds N targets for N boxes."); + AddInput("BoxScore", + "(LoDTensor or Tensor) " + "This input can be a 2-D LoDTensor with shape " + "[N, classnum], each box is represented as [classnum] which is " + "the classification probabilities."); + AddAttr("box_clip", + "(float, default 4.135, np.log(1000. / 16.)) " + "clip box to prevent overflowing") + .SetDefault(4.135f); + AddOutput("DecodeBox", + "(LoDTensor or Tensor) " + "the output tensor of op with shape [N, classnum * 4] " + "representing the result of N target boxes decoded with " + "M Prior boxes and variances for each class."); + AddOutput("OutputAssignBox", + "(LoDTensor or Tensor) " + "the output tensor of op with shape [N, 4] " + "representing the result of N target boxes decoded with " + "M Prior boxes and variances with the best non-background class " + "by BoxScore."); + AddComment(R"DOC( + +Bounding Box Coder. + +Decode the target bounding box with the prior_box information. + +The Decoding schema is described below: + + $$ + ox = (pw \\times pxv \\times tx + px) - \\frac{tw}{2} + $$ + $$ + oy = (ph \\times pyv \\times ty + py) - \\frac{th}{2} + $$ + $$ + ow = \\exp (pwv \\times tw) \\times pw + \\frac{tw}{2} + $$ + $$ + oh = \\exp (phv \\times th) \\times ph + \\frac{th}{2} + $$ + +where `tx`, `ty`, `tw`, `th` denote the target box's center coordinates, width +and height respectively. Similarly, `px`, `py`, `pw`, `ph` denote the +prior_box's (anchor) center coordinates, width and height. `pxv`, `pyv`, `pwv`, +`phv` denote the variance of the prior_box and `ox`, `oy`, `ow`, `oh` denote the +decoded coordinates, width and height in decode_box. + +decode_box is obtained after box decode, then assigning schema is described below: + +For each prior_box, use the best non-background class's decoded values to +update the prior_box locations and get output_assign_box. So, the shape of +output_assign_box is the same as PriorBox. +)DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(box_decoder_and_assign, ops::BoxDecoderAndAssignOp, + ops::BoxDecoderAndAssignOpMaker, + paddle::framework::EmptyGradOpMaker); +REGISTER_OP_CPU_KERNEL( + box_decoder_and_assign, + ops::BoxDecoderAndAssignKernel, + ops::BoxDecoderAndAssignKernel); diff --git a/paddle/fluid/operators/detection/box_decoder_and_assign_op.cu b/paddle/fluid/operators/detection/box_decoder_and_assign_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..25e6545eb59bde5e080dc907f9ecd4281062413f --- /dev/null +++ b/paddle/fluid/operators/detection/box_decoder_and_assign_op.cu @@ -0,0 +1,147 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/memory/memcpy.h" +#include "paddle/fluid/operators/detection/box_decoder_and_assign_op.h" +#include "paddle/fluid/platform/cuda_primitives.h" + +namespace paddle { +namespace operators { + +template +__global__ void DecodeBoxKernel(const T* prior_box_data, + const T* prior_box_var_data, + const T* target_box_data, const int roi_num, + const int class_num, const T box_clip, + T* output_box_data) { + const int idx = threadIdx.x + blockIdx.x * blockDim.x; + if (idx < roi_num * class_num) { + int i = idx / class_num; + int j = idx % class_num; + T prior_box_width = prior_box_data[i * 4 + 2] - prior_box_data[i * 4] + 1; + T prior_box_height = + prior_box_data[i * 4 + 3] - prior_box_data[i * 4 + 1] + 1; + T prior_box_center_x = prior_box_data[i * 4] + prior_box_width / 2; + T prior_box_center_y = prior_box_data[i * 4 + 1] + prior_box_height / 2; + + int offset = i * class_num * 4 + j * 4; + T dw = prior_box_var_data[2] * target_box_data[offset + 2]; + T dh = prior_box_var_data[3] * target_box_data[offset + 3]; + if (dw > box_clip) { + dw = box_clip; + } + if (dh > box_clip) { + dh = box_clip; + } + T target_box_center_x = 0, target_box_center_y = 0; + T target_box_width = 0, target_box_height = 0; + target_box_center_x = + prior_box_var_data[0] * target_box_data[offset] * prior_box_width + + prior_box_center_x; + target_box_center_y = + prior_box_var_data[1] * target_box_data[offset + 1] * prior_box_height + + prior_box_center_y; + target_box_width = expf(dw) * prior_box_width; + target_box_height = expf(dh) * prior_box_height; + + output_box_data[offset] = target_box_center_x - target_box_width / 2; + output_box_data[offset + 1] = target_box_center_y - target_box_height / 2; + output_box_data[offset + 2] = + target_box_center_x + target_box_width / 2 - 1; + output_box_data[offset + 3] = + target_box_center_y + target_box_height / 2 - 1; + } +} + +template +__global__ void AssignBoxKernel(const T* prior_box_data, + const T* box_score_data, T* output_box_data, + const int roi_num, const int class_num, + T* output_assign_box_data) { + const int idx = threadIdx.x + blockIdx.x * blockDim.x; + if (idx < roi_num) { + int i = idx; + T max_score = -1; + int max_j = -1; + for (int j = 0; j < class_num; ++j) { + T score = box_score_data[i * class_num + j]; + if (score > max_score && j > 0) { + max_score = score; + max_j = j; + } + } + if (max_j > 0) { + for (int pno = 0; pno < 4; pno++) { + output_assign_box_data[i * 4 + pno] = + output_box_data[i * class_num * 4 + max_j * 4 + pno]; + } + } else { + for (int pno = 0; pno < 4; pno++) { + output_assign_box_data[i * 4 + pno] = prior_box_data[i * 4 + pno]; + } + } + } +} + +template +class BoxDecoderAndAssignCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + PADDLE_ENFORCE(platform::is_gpu_place(context.GetPlace()), + "This kernel only runs on GPU device."); + auto* prior_box = context.Input("PriorBox"); + auto* prior_box_var = context.Input("PriorBoxVar"); + auto* target_box = context.Input("TargetBox"); + auto* box_score = context.Input("BoxScore"); + auto* output_box = context.Output("DecodeBox"); + auto* output_assign_box = + context.Output("OutputAssignBox"); + + auto roi_num = target_box->dims()[0]; + auto class_num = box_score->dims()[1]; + auto* target_box_data = target_box->data(); + auto* prior_box_data = prior_box->data(); + auto* prior_box_var_data = prior_box_var->data(); + auto* box_score_data = box_score->data(); + output_box->mutable_data({roi_num, class_num * 4}, context.GetPlace()); + output_assign_box->mutable_data({roi_num, 4}, context.GetPlace()); + T* output_box_data = output_box->data(); + T* output_assign_box_data = output_assign_box->data(); + + int block = 512; + int grid = (roi_num * class_num + block - 1) / block; + auto& device_ctx = context.cuda_device_context(); + + const T box_clip = context.Attr("box_clip"); + + DecodeBoxKernel<<>>( + prior_box_data, prior_box_var_data, target_box_data, roi_num, class_num, + box_clip, output_box_data); + + context.device_context().Wait(); + int assign_grid = (roi_num + block - 1) / block; + AssignBoxKernel<<>>( + prior_box_data, box_score_data, output_box_data, roi_num, class_num, + output_assign_box_data); + context.device_context().Wait(); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_CUDA_KERNEL( + box_decoder_and_assign, + ops::BoxDecoderAndAssignCUDAKernel, + ops::BoxDecoderAndAssignCUDAKernel); diff --git a/paddle/fluid/operators/detection/box_decoder_and_assign_op.h b/paddle/fluid/operators/detection/box_decoder_and_assign_op.h new file mode 100644 index 0000000000000000000000000000000000000000..e66a8351f4761fc805dbd2e44f237c751642d816 --- /dev/null +++ b/paddle/fluid/operators/detection/box_decoder_and_assign_op.h @@ -0,0 +1,103 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include +#include +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/math/math_function.h" + +namespace paddle { +namespace operators { + +template +class BoxDecoderAndAssignKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* prior_box = context.Input("PriorBox"); + auto* prior_box_var = context.Input("PriorBoxVar"); + auto* target_box = context.Input("TargetBox"); + auto* box_score = context.Input("BoxScore"); + auto* output_box = context.Output("DecodeBox"); + auto* output_assign_box = + context.Output("OutputAssignBox"); + int roi_num = target_box->dims()[0]; + int class_num = box_score->dims()[1]; + auto* target_box_data = target_box->data(); + auto* prior_box_data = prior_box->data(); + auto* prior_box_var_data = prior_box_var->data(); + auto* box_score_data = box_score->data(); + output_box->mutable_data({roi_num, class_num * 4}, context.GetPlace()); + output_assign_box->mutable_data({roi_num, 4}, context.GetPlace()); + T* output_box_data = output_box->data(); + T* output_assign_box_data = output_assign_box->data(); + const T bbox_clip = context.Attr("box_clip"); + + for (int i = 0; i < roi_num; ++i) { + T prior_box_width = prior_box_data[i * 4 + 2] - prior_box_data[i * 4] + 1; + T prior_box_height = + prior_box_data[i * 4 + 3] - prior_box_data[i * 4 + 1] + 1; + T prior_box_center_x = prior_box_data[i * 4] + prior_box_width / 2; + T prior_box_center_y = prior_box_data[i * 4 + 1] + prior_box_height / 2; + for (int j = 0; j < class_num; ++j) { + int64_t offset = i * class_num * 4 + j * 4; + T dw = std::min(prior_box_var_data[2] * target_box_data[offset + 2], + bbox_clip); + T dh = std::min(prior_box_var_data[3] * target_box_data[offset + 3], + bbox_clip); + T target_box_center_x = 0, target_box_center_y = 0; + T target_box_width = 0, target_box_height = 0; + target_box_center_x = + prior_box_var_data[0] * target_box_data[offset] * prior_box_width + + prior_box_center_x; + target_box_center_y = prior_box_var_data[1] * + target_box_data[offset + 1] * + prior_box_height + + prior_box_center_y; + target_box_width = std::exp(dw) * prior_box_width; + target_box_height = std::exp(dh) * prior_box_height; + + output_box_data[offset] = target_box_center_x - target_box_width / 2; + output_box_data[offset + 1] = + target_box_center_y - target_box_height / 2; + output_box_data[offset + 2] = + target_box_center_x + target_box_width / 2 - 1; + output_box_data[offset + 3] = + target_box_center_y + target_box_height / 2 - 1; + } + + T max_score = -1; + int max_j = -1; + for (int j = 0; j < class_num; ++j) { + T score = box_score_data[i * class_num + j]; + if (score > max_score && j > 0) { + max_score = score; + max_j = j; + } + } + + if (max_j > 0) { + for (int pno = 0; pno < 4; pno++) { + output_assign_box_data[i * 4 + pno] = + output_box_data[i * class_num * 4 + max_j * 4 + pno]; + } + } else { + for (int pno = 0; pno < 4; pno++) { + output_assign_box_data[i * 4 + pno] = prior_box_data[i * 4 + pno]; + } + } + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/detection/density_prior_box_op.h b/paddle/fluid/operators/detection/density_prior_box_op.h index 3591681fc3f6951dfc8d73e8edce38180b771eaf..42137215e21af1a529563ecc995a54d610120beb 100644 --- a/paddle/fluid/operators/detection/density_prior_box_op.h +++ b/paddle/fluid/operators/detection/density_prior_box_op.h @@ -72,7 +72,7 @@ class DensityPriorBoxOpKernel : public framework::OpKernel { #ifdef PADDLE_WITH_MKLML #pragma omp parallel for #endif - for (int i = 0; i < fixed_ratios.size(); i++) { + for (size_t i = 0; i < fixed_ratios.size(); i++) { sqrt_fixed_ratios.push_back(sqrt(fixed_ratios[i])); } @@ -115,11 +115,10 @@ class DensityPriorBoxOpKernel : public framework::OpKernel { } } if (clip) { - platform::Transform trans; - ClipFunctor clip_func; - trans(ctx.template device_context(), - boxes->data(), boxes->data() + boxes->numel(), - boxes->data(), clip_func); + T* dt = boxes->data(); + std::transform(dt, dt + boxes->numel(), dt, [](T v) -> T { + return std::min(std::max(v, 0.), 1.); + }); } framework::Tensor var_t; var_t.mutable_data( @@ -141,7 +140,7 @@ class DensityPriorBoxOpKernel : public framework::OpKernel { #pragma omp parallel for collapse(2) #endif for (int i = 0; i < box_num; ++i) { - for (int j = 0; j < variances.size(); ++j) { + for (size_t j = 0; j < variances.size(); ++j) { e_vars(i, j) = variances[j]; } } diff --git a/paddle/fluid/operators/detection/prior_box_op.h b/paddle/fluid/operators/detection/prior_box_op.h index 4e226abbb51c271502f0ca5419d488643b5a1a82..d3e26256b50f2d7010fee3738802d59173678b34 100644 --- a/paddle/fluid/operators/detection/prior_box_op.h +++ b/paddle/fluid/operators/detection/prior_box_op.h @@ -46,13 +46,6 @@ inline void ExpandAspectRatios(const std::vector& input_aspect_ratior, } } -template -struct ClipFunctor { - HOSTDEVICE inline T operator()(T in) const { - return std::min(std::max(in, 0.), 1.); - } -}; - template class PriorBoxOpKernel : public framework::OpKernel { public: @@ -101,31 +94,30 @@ class PriorBoxOpKernel : public framework::OpKernel { boxes->mutable_data(ctx.GetPlace()); vars->mutable_data(ctx.GetPlace()); - auto e_boxes = framework::EigenTensor::From(*boxes); + T* b_t = boxes->data(); for (int h = 0; h < feature_height; ++h) { for (int w = 0; w < feature_width; ++w) { T center_x = (w + offset) * step_width; T center_y = (h + offset) * step_height; T box_width, box_height; - int idx = 0; for (size_t s = 0; s < min_sizes.size(); ++s) { auto min_size = min_sizes[s]; if (min_max_aspect_ratios_order) { box_width = box_height = min_size / 2.; - e_boxes(h, w, idx, 0) = (center_x - box_width) / img_width; - e_boxes(h, w, idx, 1) = (center_y - box_height) / img_height; - e_boxes(h, w, idx, 2) = (center_x + box_width) / img_width; - e_boxes(h, w, idx, 3) = (center_y + box_height) / img_height; - idx++; + b_t[0] = (center_x - box_width) / img_width; + b_t[1] = (center_y - box_height) / img_height; + b_t[2] = (center_x + box_width) / img_width; + b_t[3] = (center_y + box_height) / img_height; + b_t += 4; if (max_sizes.size() > 0) { auto max_size = max_sizes[s]; // square prior with size sqrt(minSize * maxSize) box_width = box_height = sqrt(min_size * max_size) / 2.; - e_boxes(h, w, idx, 0) = (center_x - box_width) / img_width; - e_boxes(h, w, idx, 1) = (center_y - box_height) / img_height; - e_boxes(h, w, idx, 2) = (center_x + box_width) / img_width; - e_boxes(h, w, idx, 3) = (center_y + box_height) / img_height; - idx++; + b_t[0] = (center_x - box_width) / img_width; + b_t[1] = (center_y - box_height) / img_height; + b_t[2] = (center_x + box_width) / img_width; + b_t[3] = (center_y + box_height) / img_height; + b_t += 4; } // priors with different aspect ratios for (size_t r = 0; r < aspect_ratios.size(); ++r) { @@ -135,11 +127,11 @@ class PriorBoxOpKernel : public framework::OpKernel { } box_width = min_size * sqrt(ar) / 2.; box_height = min_size / sqrt(ar) / 2.; - e_boxes(h, w, idx, 0) = (center_x - box_width) / img_width; - e_boxes(h, w, idx, 1) = (center_y - box_height) / img_height; - e_boxes(h, w, idx, 2) = (center_x + box_width) / img_width; - e_boxes(h, w, idx, 3) = (center_y + box_height) / img_height; - idx++; + b_t[0] = (center_x - box_width) / img_width; + b_t[1] = (center_y - box_height) / img_height; + b_t[2] = (center_x + box_width) / img_width; + b_t[3] = (center_y + box_height) / img_height; + b_t += 4; } } else { // priors with different aspect ratios @@ -147,21 +139,21 @@ class PriorBoxOpKernel : public framework::OpKernel { float ar = aspect_ratios[r]; box_width = min_size * sqrt(ar) / 2.; box_height = min_size / sqrt(ar) / 2.; - e_boxes(h, w, idx, 0) = (center_x - box_width) / img_width; - e_boxes(h, w, idx, 1) = (center_y - box_height) / img_height; - e_boxes(h, w, idx, 2) = (center_x + box_width) / img_width; - e_boxes(h, w, idx, 3) = (center_y + box_height) / img_height; - idx++; + b_t[0] = (center_x - box_width) / img_width; + b_t[1] = (center_y - box_height) / img_height; + b_t[2] = (center_x + box_width) / img_width; + b_t[3] = (center_y + box_height) / img_height; + b_t += 4; } if (max_sizes.size() > 0) { auto max_size = max_sizes[s]; // square prior with size sqrt(minSize * maxSize) box_width = box_height = sqrt(min_size * max_size) / 2.; - e_boxes(h, w, idx, 0) = (center_x - box_width) / img_width; - e_boxes(h, w, idx, 1) = (center_y - box_height) / img_height; - e_boxes(h, w, idx, 2) = (center_x + box_width) / img_width; - e_boxes(h, w, idx, 3) = (center_y + box_height) / img_height; - idx++; + b_t[0] = (center_x - box_width) / img_width; + b_t[1] = (center_y - box_height) / img_height; + b_t[2] = (center_x + box_width) / img_width; + b_t[3] = (center_y + box_height) / img_height; + b_t += 4; } } } @@ -169,11 +161,10 @@ class PriorBoxOpKernel : public framework::OpKernel { } if (clip) { - platform::Transform trans; - ClipFunctor clip_func; - trans(ctx.template device_context(), - boxes->data(), boxes->data() + boxes->numel(), - boxes->data(), clip_func); + T* dt = boxes->data(); + std::transform(dt, dt + boxes->numel(), dt, [](T v) -> T { + return std::min(std::max(v, 0.), 1.); + }); } framework::Tensor var_t; @@ -181,6 +172,10 @@ class PriorBoxOpKernel : public framework::OpKernel { framework::make_ddim({1, static_cast(variances.size())}), ctx.GetPlace()); auto var_et = framework::EigenTensor::From(var_t); + +#ifdef PADDLE_WITH_MKLML +#pragma omp parallel for +#endif for (size_t i = 0; i < variances.size(); ++i) { var_et(0, i) = variances[i]; } @@ -190,8 +185,15 @@ class PriorBoxOpKernel : public framework::OpKernel { vars->Resize({box_num, static_cast(variances.size())}); auto e_vars = framework::EigenMatrix::From(*vars); - e_vars = var_et.broadcast(Eigen::DSizes(box_num, 1)); +#ifdef PADDLE_WITH_MKLML +#pragma omp parallel for collapse(2) +#endif + for (int i = 0; i < box_num; ++i) { + for (int j = 0; j < variances.size(); ++j) { + e_vars(i, j) = variances[j]; + } + } vars->Resize(var_dim); } }; // namespace operators diff --git a/paddle/fluid/operators/detection/yolov3_loss_op.cc b/paddle/fluid/operators/detection/yolov3_loss_op.cc index 2a69ad4b53c26f5e2e0547e75e0d9c6518a8bcba..ab01bdf7ca8c5a369bd8838b1acc734364666992 100644 --- a/paddle/fluid/operators/detection/yolov3_loss_op.cc +++ b/paddle/fluid/operators/detection/yolov3_loss_op.cc @@ -144,34 +144,40 @@ class Yolov3LossOpMaker : public framework::OpProtoAndCheckerMaker { "The ignore threshold to ignore confidence loss.") .SetDefault(0.7); AddComment(R"DOC( - This operator generate yolov3 loss by given predict result and ground + This operator generates yolov3 loss based on given predict result and ground truth boxes. The output of previous network is in shape [N, C, H, W], while H and W - should be the same, specify the grid size, each grid point predict given - number boxes, this given number is specified by anchors, it should be - half anchors length, which following will be represented as S. In the - second dimention(the channel dimention), C should be S * (class_num + 5), - class_num is the box categoriy number of source dataset(such as coco), - so in the second dimention, stores 4 box location coordinates x, y, w, h - and confidence score of the box and class one-hot key of each anchor box. + should be the same, H and W specify the grid size, each grid point predict + given number boxes, this given number, which following will be represented as S, + is specified by the number of anchors, In the second dimension(the channel + dimension), C should be equal to S * (class_num + 5), class_num is the object + category number of source dataset(such as 80 in coco dataset), so in the + second(channel) dimension, apart from 4 box location coordinates x, y, w, h, + also includes confidence score of the box and class one-hot key of each anchor box. - While the 4 location coordinates if $$tx, ty, tw, th$$, the box predictions - correspnd to: + Assume the 4 location coordinates are :math:`t_x, t_y, t_w, t_h`, the box predictions + should be as follows: $$ - b_x = \sigma(t_x) + c_x - b_y = \sigma(t_y) + c_y + b_x = \\sigma(t_x) + c_x + $$ + $$ + b_y = \\sigma(t_y) + c_y + $$ + $$ b_w = p_w e^{t_w} + $$ + $$ b_h = p_h e^{t_h} $$ - While $$c_x, c_y$$ is the left top corner of current grid and $$p_w, p_h$$ - is specified by anchors. + In the equation above, :math:`c_x, c_y` is the left top corner of current grid + and :math:`p_w, p_h` is specified by anchors. As for confidence score, it is the logistic regression value of IoU between anchor boxes and ground truth boxes, the score of the anchor box which has - the max IoU should be 1, and if the anchor box has IoU bigger then ignore + the max IoU should be 1, and if the anchor box has IoU bigger than ignore thresh, the confidence score loss of this anchor box will be ignored. Therefore, the yolov3 loss consist of three major parts, box location loss, @@ -186,13 +192,13 @@ class Yolov3LossOpMaker : public framework::OpProtoAndCheckerMaker { In order to trade off box coordinate losses between big boxes and small boxes, box coordinate losses will be mutiplied by scale weight, which is - calculated as follow. + calculated as follows. $$ weight_{box} = 2.0 - t_w * t_h $$ - Final loss will be represented as follow. + Final loss will be represented as follows. $$ loss = (loss_{xy} + loss_{wh}) * weight_{box} diff --git a/paddle/fluid/operators/distributed/brpc/brpc_client.cc b/paddle/fluid/operators/distributed/brpc/brpc_client.cc index b8e63f42e2040730ac79c57651d86d9e3176fa01..a1a3443348129b5cdf057592fced8fdff238ac09 100644 --- a/paddle/fluid/operators/distributed/brpc/brpc_client.cc +++ b/paddle/fluid/operators/distributed/brpc/brpc_client.cc @@ -80,7 +80,7 @@ VarHandlePtr BRPCClient::AsyncSendVar(const std::string& ep, google::protobuf::Closure* done = brpc::NewCallback( &HandleSendResponse, cntl, response, var_h, ch_ptr, ch_ctx, this); - platform::RecordRPCEvent record_event(method, p_ctx); + platform::RecordRPCEvent record_event(method); ch_ctx->stub->SendVariable(cntl, &request, response, done); @@ -184,7 +184,7 @@ VarHandlePtr BRPCClient::_AsyncGetVar(const std::string& ep, google::protobuf::Closure* done = brpc::NewCallback( &HandleGetResponse, cntl, response, var_h, ch_ptr, ch_ctx, this); - platform::RecordRPCEvent record_event(method, p_ctx); + platform::RecordRPCEvent record_event(method); if (method_name == kGetMonomerRPC) { ch_ctx->stub->GetMonomerVariable(cntl, &req, response, done); @@ -272,7 +272,7 @@ VarHandlePtr BRPCClient::AsyncPrefetchVar(const std::string& ep, &cntl->request_attachment(), out_var_name_val, false, 0, table_name_val); - platform::RecordRPCEvent record_event(method, p_ctx); + platform::RecordRPCEvent record_event(method); google::protobuf::Closure* done = brpc::NewCallback( &HandleGetResponse, cntl, response, var_h, ch_ptr, ch_ctx, this); @@ -311,7 +311,7 @@ VarHandlePtr BRPCClient::AsyncSendFetchBarrier(const std::string& ep, VarHandlePtr var_h( new VarHandle(ep, method, FETCH_BARRIER_MESSAGE, nullptr, nullptr)); - platform::RecordRPCEvent record_event(method, nullptr); + platform::RecordRPCEvent record_event(method); google::protobuf::Closure* done = brpc::NewCallback( &HandleFetchBarrierResponse, cntl, response, var_h, ch_ptr, ch_ctx, this); @@ -406,7 +406,7 @@ VarHandlePtr BRPCClient::AsyncSendVarMessage( sendrecv::VoidMessage* response = new sendrecv::VoidMessage(); cntl->set_timeout_ms(time_out); - platform::RecordRPCEvent record_event(method_name, nullptr); + platform::RecordRPCEvent record_event(method_name); VarHandlePtr var_h( new VarHandle(ep, method_name, req.varname(), nullptr, nullptr)); diff --git a/paddle/fluid/operators/distributed/grpc/grpc_client.cc b/paddle/fluid/operators/distributed/grpc/grpc_client.cc index 52310f8d04db6a5df9967c0a5ec9a5e95a24cdab..61e94dae3c7a107e10fa5e5518651014cec078bc 100644 --- a/paddle/fluid/operators/distributed/grpc/grpc_client.cc +++ b/paddle/fluid/operators/distributed/grpc/grpc_client.cc @@ -89,7 +89,7 @@ VarHandlePtr GRPCClient::AsyncSendVar(const std::string& ep, // stub context s->response_call_back_ = nullptr; - platform::RecordRPCEvent record_event(method, p_ctx); + platform::RecordRPCEvent record_event(method); auto call = s->stub_g_.PrepareUnaryCall( s->context_.get(), "/sendrecv.SendRecvService/SendVariable", req, &cq_); @@ -184,7 +184,7 @@ VarHandlePtr GRPCClient::_AsyncGetVar( // stub context s->response_call_back_ = ProcGetResponse; - platform::RecordRPCEvent record_event(method, p_ctx); + platform::RecordRPCEvent record_event(method); auto call = s->stub_g_.PrepareUnaryCall(s->context_.get(), rpc_path, buf, &cq_); @@ -235,7 +235,7 @@ VarHandlePtr GRPCClient::AsyncPrefetchVar(const std::string& ep, // stub context s->response_call_back_ = ProcGetResponse; - platform::RecordRPCEvent record_event(method, p_ctx); + platform::RecordRPCEvent record_event(method); auto call = s->stub_g_.PrepareUnaryCall( s->context_.get(), "/sendrecv.SendRecvService/PrefetchVariable", req, @@ -265,7 +265,7 @@ VarHandlePtr GRPCClient::AsyncSendBatchBarrier(const std::string& ep, sendrecv::VariableMessage req; req.set_varname(BATCH_BARRIER_MESSAGE); - platform::RecordRPCEvent record_event(method, nullptr); + platform::RecordRPCEvent record_event(method); auto rpc = s->stub_->AsyncSendVariable(s->context_.get(), req, &cq_); rpc->Finish(&s->reply_, &s->status_, reinterpret_cast(s)); @@ -290,7 +290,7 @@ VarHandlePtr GRPCClient::AsyncSendFetchBarrier(const std::string& ep, sendrecv::VariableMessage req; req.set_varname(FETCH_BARRIER_MESSAGE); - platform::RecordRPCEvent record_event(method, nullptr); + platform::RecordRPCEvent record_event(method); auto rpc = s->stub_->AsyncGetVariable(s->context_.get(), req, &cq_); rpc->Finish(&s->reply_, &s->status_, reinterpret_cast(s)); @@ -317,7 +317,7 @@ VarHandlePtr GRPCClient::AsyncGetMonomerBarrier(const std::string& ep, sendrecv::VariableMessage req; req.set_varname(var_name); - platform::RecordRPCEvent record_event(method, nullptr); + platform::RecordRPCEvent record_event(method); auto rpc = s->stub_->AsyncGetMonomerBarrier(s->context_.get(), req, &cq_); rpc->Finish(&s->reply_, &s->status_, reinterpret_cast(s)); @@ -342,7 +342,7 @@ VarHandlePtr GRPCClient::AsyncSendComplete(const std::string& ep, sendrecv::VariableMessage req; req.set_varname(COMPLETE_MESSAGE); - platform::RecordRPCEvent record_event(method, nullptr); + platform::RecordRPCEvent record_event(method); auto rpc = s->stub_->AsyncSendVariable(s->context_.get(), req, &cq_); rpc->Finish(&s->reply_, &s->status_, reinterpret_cast(s)); @@ -372,7 +372,7 @@ VarHandlePtr GRPCClient::AsyncCheckpointNotify(const std::string& ep, req.set_varname(CHECKPOINT_SAVE_MESSAGE); req.set_out_varname(dir); - platform::RecordRPCEvent record_event(method, nullptr); + platform::RecordRPCEvent record_event(method); auto rpc = s->stub_->AsyncCheckpointNotify(s->context_.get(), req, &cq_); rpc->Finish(&s->reply_, &s->status_, reinterpret_cast(s)); diff --git a/paddle/fluid/operators/distributed/grpc/grpc_serde.cc b/paddle/fluid/operators/distributed/grpc/grpc_serde.cc index 6df4fd36f95b127a0bbc0725b83c4494b160785f..6e65aa5fae83536d229be63fbaf7874bd45f967d 100644 --- a/paddle/fluid/operators/distributed/grpc/grpc_serde.cc +++ b/paddle/fluid/operators/distributed/grpc/grpc_serde.cc @@ -38,7 +38,7 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, ::grpc::ByteBuffer* msg, const std::string& out_name, const int trainer_id, const std::string& table_name) { - platform::RecordRPCEvent record_event("serial", &ctx); + platform::RecordRPCEvent record_event("serial"); VarMsg request; TensorPayload* payload = nullptr; @@ -147,7 +147,7 @@ void DeserializeFromByteBuffer(const ::grpc::ByteBuffer& msg, const platform::DeviceContext& ctx, const framework::Scope* scope, framework::Variable** var, int* trainer_id) { - platform::RecordRPCEvent record_event("deserial", &ctx); + platform::RecordRPCEvent record_event("deserial"); operators::distributed::GRPCVariableResponse resp(scope, &ctx); PADDLE_ENFORCE(resp.Parse(msg) == 0, "parse bytebuffer to tensor error!"); *var = resp.GetVar(); diff --git a/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc b/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc index 6a6741d8fc54d22addca91b75dfabf5950c1a35a..7aaa607f1585c98fe2dd816e8d66e5c6fd171e80 100644 --- a/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc +++ b/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc @@ -77,8 +77,7 @@ class EltwiseAddMKLDNNKernel : public framework::OpKernel { } else { functor.RunMidWise(n, pre, post); } - z->set_layout(DataLayout::kMKLDNN); - z->set_format(x->format()); + z->set_mkldnn_prim_desc(x->get_mkldnn_prim_desc()); } else { PADDLE_ENFORCE(x->layout() == DataLayout::kMKLDNN && x->format() != memory::format::format_undef, @@ -116,7 +115,8 @@ class EltwiseAddMKLDNNKernel : public framework::OpKernel { auto sum_pd = sum::primitive_desc(dst_md, scales, srcs_pd); // create mkldnn memory for dst - memory dst_memory = memory(sum_pd.dst_primitive_desc(), z_data); + auto dst_mem_pd = sum_pd.dst_primitive_desc(); + memory dst_memory = memory(dst_mem_pd, z_data); std::vector inputs; inputs.push_back(srcs[0]); @@ -129,9 +129,7 @@ class EltwiseAddMKLDNNKernel : public framework::OpKernel { pipeline.push_back(sum_prim); stream(stream::kind::eager).submit(pipeline).wait(); - z->set_layout(DataLayout::kMKLDNN); - z->set_format( - (memory::format)dst_memory.get_primitive_desc().desc().data.format); + z->set_mkldnn_prim_desc(dst_mem_pd); } } }; @@ -152,24 +150,19 @@ class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel { auto* out = dout; auto *x = dout, *y = dout; - auto set_mkldnn_format = [](Tensor* in, const Tensor* out) { - in->set_layout(DataLayout::kMKLDNN); - in->set_format(out->format()); - }; - if (dx != nullptr && dy != nullptr && dx->dims() == dy->dims()) { if (dx->dims() == dy->dims()) { auto blas = math::GetBlas(ctx); if (dx) { blas.VCOPY(dout->numel(), dout->data(), dx->mutable_data(ctx.GetPlace())); - set_mkldnn_format(dx, dout); + dx->set_mkldnn_prim_desc(dout->get_mkldnn_prim_desc()); } if (dy) { blas.VCOPY(dout->numel(), dout->data(), dy->mutable_data(ctx.GetPlace())); - set_mkldnn_format(dy, dout); + dy->set_mkldnn_prim_desc(dout->get_mkldnn_prim_desc()); } } } else { diff --git a/paddle/fluid/operators/expand_op.cc b/paddle/fluid/operators/expand_op.cc index 6aa4c76b9ce7f52f5816ea136e04b32a7d2e8d44..44a2f37b66772425a835c26e94c37b500e8a5d19 100644 --- a/paddle/fluid/operators/expand_op.cc +++ b/paddle/fluid/operators/expand_op.cc @@ -146,7 +146,11 @@ REGISTER_OPERATOR(expand, ops::ExpandOp, ops::ExpandOpMaker, paddle::framework::DefaultGradOpDescMaker); REGISTER_OPERATOR(expand_grad, ops::ExpandGradOp); REGISTER_OP_CPU_KERNEL( - expand, ops::ExpandKernel); + expand, ops::ExpandKernel, + ops::ExpandKernel, + ops::ExpandKernel, + ops::ExpandKernel); REGISTER_OP_CPU_KERNEL( expand_grad, - ops::ExpandGradKernel); + ops::ExpandGradKernel, + ops::ExpandGradKernel); diff --git a/paddle/fluid/operators/expand_op.cu b/paddle/fluid/operators/expand_op.cu index d95c9b61802b5fe7059e1c95a50776db5aa7ad93..50a506b294db14f0d170c60a0ed760dcf280ad60 100644 --- a/paddle/fluid/operators/expand_op.cu +++ b/paddle/fluid/operators/expand_op.cu @@ -15,7 +15,11 @@ limitations under the License. */ namespace ops = paddle::operators; REGISTER_OP_CUDA_KERNEL( - expand, ops::ExpandKernel); + expand, ops::ExpandKernel, + ops::ExpandKernel, + ops::ExpandKernel, + ops::ExpandKernel); REGISTER_OP_CUDA_KERNEL( expand_grad, - ops::ExpandGradKernel); + ops::ExpandGradKernel, + ops::ExpandGradKernel); diff --git a/paddle/fluid/operators/fake_quantize_op.cc b/paddle/fluid/operators/fake_quantize_op.cc index 8aff9111412030265491289bbdb03cf688d59ad8..3bb07d383548e6f4be810c96d2a916c0fe5e45f5 100644 --- a/paddle/fluid/operators/fake_quantize_op.cc +++ b/paddle/fluid/operators/fake_quantize_op.cc @@ -21,26 +21,17 @@ limitations under the License. */ namespace paddle { namespace operators { -template -using EigenVectorArrayMap = - Eigen::TensorMap>; - -template -using ConstEigenVectorArrayMap = - Eigen::TensorMap>; +template +struct Compare { + public: + bool operator()(const T a, const T b) { return (std::abs(a) < std::abs(b)); } +}; template struct FindAbsMaxFunctor { void operator()(const platform::CPUDeviceContext& ctx, const T* in, const int num, T* out) { - Eigen::DSizes idim(num); - Eigen::DSizes odim(1); - Eigen::TensorMap> in_e(in, idim); - Eigen::TensorMap> out_e(out, odim); - - out_e = in_e.abs().maximum(); + *out = std::abs(*(std::max_element(in + 0, in + num, Compare()))); } }; @@ -55,10 +46,8 @@ struct ClipAndFakeQuantFunctor { platform::Transform trans; trans(ctx, in.data(), in.data() + in.numel(), out->mutable_data(ctx.GetPlace()), ClipFunctor(-s, s)); - auto in_e = framework::EigenVector::Flatten(in); auto out_e = framework::EigenVector::Flatten(*out); - - out_e.device(*ctx.eigen_device()) = (bin_cnt / s * in_e).round(); + out_e.device(*ctx.eigen_device()) = (bin_cnt / s * out_e).round(); } }; diff --git a/paddle/fluid/operators/fc_op.cc b/paddle/fluid/operators/fc_op.cc index 38e57a41ed253eab4d0713af8bb14bac19041f6d..eb4617a9359353820fc41b9ad1c8db5327fdacde 100644 --- a/paddle/fluid/operators/fc_op.cc +++ b/paddle/fluid/operators/fc_op.cc @@ -47,7 +47,7 @@ void FCOp::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE(in_dims.size() == 2 || in_dims.size() == 4, "Fully Connected input should be 2-D or 4-D tensor."); } - PADDLE_ENFORCE_EQ(w_dims.size(), 2UL, + PADDLE_ENFORCE_EQ(w_dims.size(), 2, "Fully Connected input should be 2-D tensor."); int in_num_col_dims = ctx->Attrs().Get("in_num_col_dims"); PADDLE_ENFORCE_GT( diff --git a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc index fe4c73f4723355d4b56d075423de29b45b9cd4e4..80caf70b08e65932d6ccb90a5293d072b2b2bc72 100644 --- a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc +++ b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc @@ -23,6 +23,9 @@ class FusedEmbeddingSeqPoolOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { + if (ctx->IsRuntime()) { + return; + } PADDLE_ENFORCE(ctx->HasInput("W"), "Input W of FusedEmbeddingSeqPoolOp should not be null."); PADDLE_ENFORCE(ctx->HasInput("Ids"), @@ -42,36 +45,15 @@ class FusedEmbeddingSeqPoolOp : public framework::OperatorWithKernel { // we only support sum now PADDLE_ENFORCE_EQ(combiner, "sum"); - int64_t last_dim = table_dims[1]; - for (int i = 1; i != ids_dims.size(); ++i) { - last_dim *= ids_dims[i]; - } - - if (ctx->IsRuntime()) { - framework::Variable* ids_var = - boost::get(ctx->GetInputVarPtrs("Ids")[0]); - const auto& ids_lod = ids_var->Get().lod(); + int64_t last_dim = FusedEmbeddingSeqPoolLastDim(table_dims, ids_dims); + // in compile time, the lod level of ids must be 1 + framework::VarDesc* ids_desc = + boost::get(ctx->GetInputVarPtrs("Ids")[0]); + PADDLE_ENFORCE_EQ(ids_desc->GetLoDLevel(), 1); - // in run time, the LoD of ids must be 1 - PADDLE_ENFORCE(ids_lod.size(), 1u, - "The LoD level of Input(Ids) must be 1"); - PADDLE_ENFORCE_GE(ids_lod[0].size(), 1u, "The LoD could NOT be empty"); - - int64_t batch_size = ids_lod[0].size() - 1; - - // in run time, the shape from Ids -> output - // should be [seq_length, 1] -> [batch_size, embedding_size] - ctx->SetOutputDim("Out", framework::make_ddim({batch_size, last_dim})); - } else { - // in compile time, the lod level of ids must be 1 - framework::VarDesc* ids_desc = - boost::get(ctx->GetInputVarPtrs("Ids")[0]); - PADDLE_ENFORCE_EQ(ids_desc->GetLoDLevel(), 1); - - // in compile time, the shape from Ids -> output - // should be [-1, 1] -> [-1, embedding_size] - ctx->SetOutputDim("Out", framework::make_ddim({-1, last_dim})); - } + // in compile time, the shape from Ids -> output + // should be [-1, 1] -> [-1, embedding_size] + ctx->SetOutputDim("Out", framework::make_ddim({-1, last_dim})); } protected: diff --git a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h index 758432fd9e4197302e0bd8f76a1ca7c524026a70..f13c02038606e52337b7ef85545e37054e54b631 100644 --- a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h +++ b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h @@ -21,7 +21,7 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/selected_rows.h" -#include "paddle/fluid/operators/math/blas.h" +#include "paddle/fluid/operators/jit/kernels.h" namespace paddle { namespace operators { @@ -37,36 +37,38 @@ struct EmbeddingVSumFunctor { const LoDTensor *table_t, const LoDTensor *ids_t, LoDTensor *output_t) { auto *table = table_t->data(); - int64_t row_number = table_t->dims()[0]; - int64_t row_width = table_t->dims()[1]; - int64_t last_dim = output_t->dims()[1]; + int64_t table_height = table_t->dims()[0]; + int64_t table_width = table_t->dims()[1]; + int64_t out_width = output_t->dims()[1]; const int64_t *ids = ids_t->data(); auto ids_lod = ids_t->lod()[0]; - int64_t ids_count = ids_t->numel() / ids_lod.back(); - + int64_t idx_width = ids_t->numel() / ids_lod.back(); auto *output = output_t->mutable_data(context.GetPlace()); - auto blas = math::GetBlas(context); - for (int64_t i = 0; i != ids_lod.size() - 1; ++i) { - size_t begin = ids_lod[i] * ids_count; - for (int64_t j = 0; j != ids_count; ++j) { - PADDLE_ENFORCE_LT(ids[begin], row_number); - PADDLE_ENFORCE_GE(ids[begin], 0, "ids %d", i); - blas.VCOPY(row_width, table + ids[begin + j] * row_width, - output + i * last_dim + j * row_width); - } - - for (int64_t r = (ids_lod[i] + 1) * ids_count; - r < ids_lod[i + 1] * ids_count; ++r) { - PADDLE_ENFORCE_LT(ids[r], row_number); - PADDLE_ENFORCE_GE(ids[r], 0, "ids %d", i); - blas.AXPY(row_width, 1., table + ids[r] * row_width, - output + i * last_dim + (r % ids_count) * row_width); - } + PADDLE_ENFORCE_LE(table_width * idx_width, out_width); + PADDLE_ENFORCE_GT(ids_lod.size(), 1UL, "The LoD[0] could NOT be empty"); + + jit::emb_seq_pool_attr_t attr(table_height, table_width, 0, idx_width, + out_width, jit::SeqPoolType::kSum); + for (size_t i = 0; i != ids_lod.size() - 1; ++i) { + attr.index_height = ids_lod[i + 1] - ids_lod[i]; + auto emb_seqpool = jit::Get, + platform::CPUPlace>(attr); + emb_seqpool(table, ids + ids_lod[i] * idx_width, output + i * out_width, + &attr); } } }; +inline int FusedEmbeddingSeqPoolLastDim(const framework::DDim &table_dims, + const framework::DDim &ids_dims) { + int64_t last_dim = table_dims[1]; + for (int i = 1; i != ids_dims.size(); ++i) { + last_dim *= ids_dims[i]; + } + return last_dim; +} + template class FusedEmbeddingSeqPoolKernel : public framework::OpKernel { public: @@ -76,6 +78,17 @@ class FusedEmbeddingSeqPoolKernel : public framework::OpKernel { const LoDTensor *table_var = context.Input("W"); const std::string &combiner_type = context.Attr("combiner"); + int64_t last_dim = + FusedEmbeddingSeqPoolLastDim(table_var->dims(), ids_t->dims()); + const auto &ids_lod = ids_t->lod(); + // in run time, the LoD of ids must be 1 + PADDLE_ENFORCE(ids_lod.size(), 1UL, + "The LoD level of Input(Ids) must be 1"); + int64_t batch_size = ids_lod[0].size() - 1; + // in run time, the shape from Ids -> output + // should be [seq_length, 1] -> [batch_size, last_dim] + output_t->Resize({batch_size, last_dim}); + if (combiner_type == "sum") { EmbeddingVSumFunctor functor; functor(context, table_var, ids_t, output_t); @@ -111,7 +124,7 @@ class FusedEmbeddingSeqPoolGradKernel : public framework::OpKernel { auto *ids_data = ids->data(); int64_t ids_num = ids->numel(); auto lod = ids->lod()[0]; - int64_t row_width = d_output->dims()[1]; + int64_t out_width = d_output->dims()[1]; framework::Vector *new_rows = d_table->mutable_rows(); new_rows->resize(ids_num); @@ -122,15 +135,13 @@ class FusedEmbeddingSeqPoolGradKernel : public framework::OpKernel { T *d_table_data = d_table_value->mutable_data(context.GetPlace()); const T *d_output_data = d_output->data(); - auto blas = math::GetBlas(context); + auto vbroadcast = jit::Get, + platform::CPUPlace>(out_width); for (int i = 0; i < static_cast(lod.size()) - 1; ++i) { int64_t h = static_cast(lod[i + 1] - lod[i]); - int64_t in_offset = lod[i] * row_width; - const T *out_pos = d_output_data + i * row_width; - T *in_pos = d_table_data + in_offset; - for (int r = 0; r != h; ++r) { - blas.VCOPY(row_width, out_pos, in_pos + r * row_width); - } + const T *src = d_output_data + i * out_width; + T *dst = d_table_data + lod[i] * out_width; + vbroadcast(src, dst, h, out_width); } } else { LOG(ERROR) << "Dense is not supported in fused_embedding_seq_pool_op now"; diff --git a/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc b/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc index e9e2a3b1f5c1c00bb2e95b6171ecd09bfe7a0d21..8ecdf2ed9d40e7f5dc9226c635a8c8e6406a76ba 100644 --- a/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc +++ b/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc @@ -37,7 +37,7 @@ void FusionRepeatedFCReluOp::InferShape( "Output(Out) of FusionRepeatedFCReluOp should not be null."); auto i_dims = ctx->GetInputDim("X"); - PADDLE_ENFORCE_EQ(i_dims.size(), 2UL, "Input shape size should be 2"); + PADDLE_ENFORCE_EQ(i_dims.size(), 2, "Input shape size should be 2"); auto w_dims = ctx->GetInputsDim("W"); auto b_dims = ctx->GetInputsDim("Bias"); @@ -49,7 +49,7 @@ void FusionRepeatedFCReluOp::InferShape( "inpute width should be equal with weight height"); for (size_t i = 1; i < sz; ++i) { - PADDLE_ENFORCE_EQ(w_dims[i].size(), 2UL, + PADDLE_ENFORCE_EQ(w_dims[i].size(), 2, "Every weight shape size should be 2."); PADDLE_ENFORCE_EQ(framework::product(b_dims[i]), w_dims[i][1], "The length of Bias must be equal with w_dims[1]."); diff --git a/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc b/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc index aaef46de0d3b88720a762abb000e42d560fbd8cf..d091da5aa8a7e7ec30798d68021bfd2b9b87b32f 100644 --- a/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc +++ b/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc @@ -39,7 +39,7 @@ void FusionSeqExpandConcatFCOp::InferShape( auto ins_dims = ctx->GetInputsDim("X"); auto w_dims = ctx->GetInputDim("FCWeight"); // (M0+M1+M2+..) x D - PADDLE_ENFORCE_EQ(w_dims.size(), 2UL, "Input(FCWeight)'s rank must be 2."); + PADDLE_ENFORCE_EQ(w_dims.size(), 2, "Input(FCWeight)'s rank must be 2."); const int D = w_dims[1]; int sum = ins_dims[0][1]; for (size_t i = 1; i < ins_dims.size(); ++i) { diff --git a/paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc b/paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc index b181140db750a8d1b74c0b6cc93259a208fe5b06..d48bdafe0aa38cb860b54b2e41ebad3421b93bce 100644 --- a/paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc +++ b/paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc @@ -39,7 +39,7 @@ void FusionSeqPoolConcatOp::InferShape( // The output height should be confirmed in Compute, // since input lod is not accessible here. - PADDLE_ENFORCE_EQ(ins_dims[0].size(), 2UL, + PADDLE_ENFORCE_EQ(ins_dims[0].size(), 2, "The dims size of first input should be 2."); ctx->SetOutputDim("Out", {-1, ins_dims[0][axis] * static_cast(n)}); } diff --git a/paddle/fluid/operators/fused/fusion_squared_mat_sub_op.cc b/paddle/fluid/operators/fused/fusion_squared_mat_sub_op.cc index 8c8b079633aacb711aa304ec7016c37c6bec61ce..8493f4468fc994964116d99dc85dd34fb19a44cc 100644 --- a/paddle/fluid/operators/fused/fusion_squared_mat_sub_op.cc +++ b/paddle/fluid/operators/fused/fusion_squared_mat_sub_op.cc @@ -42,7 +42,7 @@ void FusionSquaredMatSubOp::InferShape( auto y_dims = ctx->GetInputDim("Y"); PADDLE_ENFORCE_EQ(x_dims.size(), y_dims.size(), "Input tensors dims size should be equal."); - PADDLE_ENFORCE_EQ(x_dims.size(), 2UL, "Input tensors should be a Matrix."); + PADDLE_ENFORCE_EQ(x_dims.size(), 2, "Input tensors should be a Matrix."); PADDLE_ENFORCE_EQ(x_dims[1], y_dims[0], "Inputs Matrix should be multiply."); ctx->SetOutputDim("SquaredX", x_dims); diff --git a/paddle/fluid/operators/group_norm_op.cc b/paddle/fluid/operators/group_norm_op.cc index e18d9841bb87c6a684d53e1bceb6c20a37dcfcfa..cbdffa0db8277dbf7257c3b3c1d03c1b459d5b2b 100644 --- a/paddle/fluid/operators/group_norm_op.cc +++ b/paddle/fluid/operators/group_norm_op.cc @@ -170,13 +170,48 @@ class GroupNormGradMaker : public framework::SingleGradOpDescMaker { } }; +class GroupNormInplaceInToOut : public framework::InplaceInToOut { + public: + using InplaceInToOut::InplaceInToOut; + + protected: + std::unordered_map Apply( + const framework::OpDesc &op_desc, + framework::BlockDesc *block) const override { + return {{"X", "Y"}}; + } +}; + +class GroupNormGradInplaceInToOut : public framework::InplaceInToOut { + public: + using InplaceInToOut::InplaceInToOut; + + protected: + std::unordered_map Apply( + const framework::OpDesc &op_desc, + framework::BlockDesc *block) const override { + return {{framework::GradVarName("Y"), framework::GradVarName("X")}}; + } +}; + +class GroupNormOpInferVarType + : public framework::PassInDtypeAndVarTypeToOutput { + protected: + std::unordered_map GetInputOutputWithSameType() + const override { + return {{"X", /*->*/ "Y"}}; + } +}; + } // namespace operators } // namespace paddle namespace ops = paddle::operators; REGISTER_OPERATOR(group_norm, ops::GroupNormOp, ops::GroupNormOpMaker, - ops::GroupNormGradMaker); -REGISTER_OPERATOR(group_norm_grad, ops::GroupNormGradOp); + ops::GroupNormOpInferVarType, ops::GroupNormGradMaker, + ops::GroupNormInplaceInToOut); +REGISTER_OPERATOR(group_norm_grad, ops::GroupNormGradOp, + ops::GroupNormGradInplaceInToOut); REGISTER_OP_CPU_KERNEL( group_norm, ops::GroupNormKernel, ops::GroupNormKernel); diff --git a/paddle/fluid/operators/hash_op.cc b/paddle/fluid/operators/hash_op.cc index b2c2c7954b79658e66f1524a81bcad0b7bf22c35..7a29f80ff1ce413519ea9cea6a35747bdced5885 100644 --- a/paddle/fluid/operators/hash_op.cc +++ b/paddle/fluid/operators/hash_op.cc @@ -14,7 +14,6 @@ limitations under the License. */ #include "paddle/fluid/operators/hash_op.h" #include -#include namespace paddle { namespace operators { @@ -27,6 +26,9 @@ class HashOp : public framework::OperatorWithKernel { : OperatorWithKernel(type, inputs, outputs, attrs) {} void InferShape(framework::InferShapeContext *ctx) const override { + if (ctx->IsRuntime()) { + return; + } PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) of HashOp should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Out"), @@ -36,15 +38,8 @@ class HashOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(dims.size(), 2UL, "The input of hash_op's dimensions must be 2"); std::vector out_dims; - out_dims.reserve(dims.size() + 1); - // copy all dims except the last one - for (int i = 0u; i != dims.size() - 1; ++i) { - out_dims.emplace_back(dims[i]); - } int num_hash = ctx->Attrs().Get("num_hash"); - out_dims.emplace_back(num_hash); - // keep the last dim to 1 - out_dims.emplace_back(1); + HashOutputSize(dims, out_dims, num_hash); ctx->SetOutputDim("Out", framework::make_ddim(out_dims)); ctx->ShareLoD("X", /*->*/ "Out"); @@ -71,4 +66,4 @@ $$Out = scale * X$$ namespace ops = paddle::operators; REGISTER_OP_WITHOUT_GRADIENT(hash, ops::HashOp, ops::HashOpMaker); -REGISTER_OP_CPU_KERNEL(hash, ops::HashKerel, ops::HashKerel); +REGISTER_OP_CPU_KERNEL(hash, ops::HashKernel, ops::HashKernel); diff --git a/paddle/fluid/operators/hash_op.h b/paddle/fluid/operators/hash_op.h index 9781bb0f453642cefb3eb59a05389c339a7de39d..9e7ad5235ff483a2fc0cfbb8bc35c620084bb896 100644 --- a/paddle/fluid/operators/hash_op.h +++ b/paddle/fluid/operators/hash_op.h @@ -17,21 +17,34 @@ limitations under the License. */ extern "C" { #include } +#include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" namespace paddle { namespace operators { -// template + +inline void HashOutputSize(const framework::DDim& in_dims, + std::vector& out_dims, // NOLINT + int num_hash) { + out_dims.reserve(in_dims.size() + 1); + // copy all dims except the last one + for (int i = 0u; i != in_dims.size() - 1; ++i) { + out_dims.emplace_back(in_dims[i]); + } + out_dims.emplace_back(num_hash); + // keep the last dim to 1 + out_dims.emplace_back(1); +} + template -class HashKerel : public framework::OpKernel { +class HashKernel : public framework::OpKernel { public: virtual void Compute(const framework::ExecutionContext& context) const { auto* out_t = context.Output("Out"); auto* in_t = context.Input("X"); int mod_by = context.Attr("mod_by"); int num_hash = context.Attr("num_hash"); - auto* output = out_t->mutable_data(context.GetPlace()); auto in_dims = in_t->dims(); auto in_lod = in_t->lod(); @@ -39,6 +52,11 @@ class HashKerel : public framework::OpKernel { static_cast(in_dims[0]), in_lod[0].back(), "The actual input data's size mismatched with LoD information."); + std::vector out_dims; + HashOutputSize(in_dims, out_dims, num_hash); + out_t->Resize(framework::make_ddim(out_dims)); + auto* output = out_t->mutable_data(context.GetPlace()); + auto seq_length = in_dims[0]; auto last_dim = in_dims[in_dims.size() - 1]; auto* input = in_t->data(); @@ -49,6 +67,7 @@ class HashKerel : public framework::OpKernel { } input += last_dim; } + out_t->set_lod(in_t->lod()); } }; diff --git a/paddle/fluid/operators/interpolate_op.cc b/paddle/fluid/operators/interpolate_op.cc index de91ba6270ac2ed22c8380878c0a0037fb1629c0..10d01af982d01800bdd2d5d59761cfb09e2a8139 100644 --- a/paddle/fluid/operators/interpolate_op.cc +++ b/paddle/fluid/operators/interpolate_op.cc @@ -84,13 +84,13 @@ class InterpolateOpMaker : public framework::OpProtoAndCheckerMaker { .SetDefault("bilinear"); AddAttr( "align_corners", - "an optinal bool. Defaults to True. " + "an optional bool. Defaults to True. " "If True, the centers of 4 corner pixels of the input and output " "tensors are aligned, preserving the values at the corner pixels, " - "if Flase, are not aligned") + "If False, are not aligned") .SetDefault(true); AddAttr("align_mode", - "(int, default \'1\'), optional for bilinear interpolation" + "(int, default \'1\'), optional for bilinear interpolation, " "can be \'0\' for src_idx = scale*(dst_indx+0.5)-0.5 , " "can be \'1\' for src_idx = scale*dst_index .") .SetDefault(1); diff --git a/paddle/fluid/operators/is_empty_op.cc b/paddle/fluid/operators/is_empty_op.cc index ba50bdf34baf2b9b0748b24c98c274aa18e22e36..092a6eae6f5b7edcc5656522377de10a08a01ea8 100644 --- a/paddle/fluid/operators/is_empty_op.cc +++ b/paddle/fluid/operators/is_empty_op.cc @@ -34,9 +34,8 @@ class IsEmptyOp : public framework::OperatorWithKernel { framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { - framework::OpKernelType kt = framework::OpKernelType( - ctx.Input("X")->type(), platform::CPUPlace()); - return kt; + auto *x = ctx.Input("X"); + return framework::OpKernelType(x->type(), x->place()); } }; @@ -58,7 +57,6 @@ It will just return product(tensor.ddims()) > 0; } // namespace paddle namespace ops = paddle::operators; - REGISTER_OPERATOR(is_empty, ops::IsEmptyOp, ops::IsEmptyOpMaker, paddle::framework::EmptyGradOpMaker); REGISTER_OP_CPU_KERNEL( diff --git a/paddle/fluid/operators/is_empty_op.cu.cc b/paddle/fluid/operators/is_empty_op.cu.cc new file mode 100644 index 0000000000000000000000000000000000000000..3c256503baf6ba3bc8f8dff866a2ce9c57ec5bf1 --- /dev/null +++ b/paddle/fluid/operators/is_empty_op.cu.cc @@ -0,0 +1,23 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/is_empty_op.h" +#include "paddle/fluid/framework/op_registry.h" + +namespace ops = paddle::operators; +REGISTER_OP_CUDA_KERNEL( + is_empty, ops::IsEmptyOpKernel, + ops::IsEmptyOpKernel, + ops::IsEmptyOpKernel, + ops::IsEmptyOpKernel); diff --git a/paddle/fluid/operators/is_empty_op.h b/paddle/fluid/operators/is_empty_op.h index 3e3af22fa8d842b6a1e67418446f1a40949e046b..4f6419eb577709836275481cf617c07ea6c7f4c0 100644 --- a/paddle/fluid/operators/is_empty_op.h +++ b/paddle/fluid/operators/is_empty_op.h @@ -28,6 +28,9 @@ class IsEmptyOpKernel : public framework::OpKernel { // get output auto* output_tensor = context.Output("Out"); + // Note: is_empty is always executed on CPU and the output data should + // always be allocated for CPUPlace. We reigister CUDA kernel for this op to + // avoid the unnecessary data transform. output_tensor->mutable_data(platform::CPUPlace())[0] = framework::product(input_tensor->dims()) == 0; } diff --git a/paddle/fluid/operators/jit/benchmark.cc b/paddle/fluid/operators/jit/benchmark.cc index 97ddf223aefcdfaf8a488f93a152336c1ed458f4..3088280bb90174e6195a349c07a3435e131e2b33 100644 --- a/paddle/fluid/operators/jit/benchmark.cc +++ b/paddle/fluid/operators/jit/benchmark.cc @@ -301,6 +301,76 @@ void BenchSeqPoolKernel() { } } +template +void BenchEmbSeqPoolKernel() { + std::vector pool_types = {jit::SeqPoolType::kSum}; + int64_t tbl_h = 1e4; + for (int tbl_w : {10, 16, 256}) { + Tensor table; + table.Resize({tbl_h, tbl_w}); + RandomVec(tbl_h * tbl_w, table.mutable_data(PlaceType()), -2.f, 2.f); + const T* table_data = table.data(); + for (auto type : pool_types) { + for (int idx_w : {1, 2, 10, 16}) { + for (int idx_h : {1, 2, 9, 13, 16}) { + int64_t out_w = tbl_w * idx_w; + jit::emb_seq_pool_attr_t attr(tbl_h, tbl_w, idx_h, idx_w, out_w, + type); + Tensor idx, out; + idx.Resize({idx_h, idx_w}); + out.Resize({out_w}); + RandomVec(idx_h * idx_w, + idx.mutable_data(PlaceType()), 0, + tbl_h - 1); + const int64_t* idx_data = idx.data(); + T* o_data = out.mutable_data(PlaceType()); + BenchAllImpls, PlaceType>( + attr, table_data, idx_data, o_data, &attr); + } + } + } + } +} + +template +void BenchSgdKernel() { + const T lr = 0.1; + auto UnDuplicatedRandomVec = [](int n, const int64_t lower, + const int64_t upper) -> std::vector { + PADDLE_ENFORCE_LE(static_cast(upper - lower), n - 1); + PADDLE_ENFORCE_GT(n, 0); + std::vector all, out; + for (int i = 0; i < n; ++i) { + all.push_back(i); + } + std::random_shuffle(all.begin(), all.end()); + out.insert(out.begin(), all.begin(), all.begin() + n); + return out; + }; + for (int param_h : {1, 1000}) { + for (int grad_w : {1, 2, 8, 16, 30, 256}) { + // only benchmark inplace + Tensor param; + param.Resize({param_h, grad_w}); + T* param_data = param.mutable_data(PlaceType()); + RandomVec(param_h * grad_w, param_data, -2.f, 2.f); + for (int rows_size = 1; rows_size <= std::min(param_h, 10); ++rows_size) { + Tensor grad; + grad.Resize({rows_size, grad_w}); + std::vector rows = + UnDuplicatedRandomVec(rows_size, 0, rows_size - 1); + RandomVec(rows_size * grad_w, grad.mutable_data(PlaceType()), + -2.f, 2.f); + const T* grad_data = grad.data(); + const int64_t* rows_data = rows.data(); + jit::sgd_attr_t attr(param_h, grad_w, rows_size, grad_w, rows_size); + BenchAllImpls, PlaceType>( + attr, &lr, param_data, grad_data, rows_data, param_data, &attr); + } + } + } +} + template void BenchMatMulKernel() { for (int m : {1, 2, 3, 4}) { @@ -339,6 +409,88 @@ void BenchSoftmaxKernel() { } } +template +void BenchLayerNormKernel() { + const T epsilon = 9.99999975e-06; + for (int n : {1, 2, 10}) { + for (int x_dim_0 : {1, 9, 17, 50}) { + int left = n * x_dim_0; + for (int x_dim_1 : TestSizes()) { + int right = x_dim_1; + int sz = left * right; + Tensor x, mean, var, scale, bias, out; + x.Resize({n, x_dim_0, x_dim_1}); + out.Resize({n, x_dim_0, x_dim_1}); + mean.Resize({n, x_dim_0}); + var.Resize({n, x_dim_0}); + scale.Resize({x_dim_1}); + bias.Resize({x_dim_1}); + + RandomVec(sz, x.mutable_data(PlaceType()), -2.f, 2.f); + RandomVec(left, mean.mutable_data(PlaceType()), -2.f, 2.f); + RandomVec(left, var.mutable_data(PlaceType()), -2.f, 2.f); + RandomVec(right, scale.mutable_data(PlaceType()), -2.f, 2.f); + RandomVec(right, bias.mutable_data(PlaceType()), -2.f, 2.f); + + const T* scale_data = scale.data(); + const T* bias_data = bias.data(); + T* x_data = x.data(); + T* mean_data = mean.data(); + T* var_data = var.data(); + T* out_data = out.mutable_data(PlaceType()); + + BenchAllImpls, PlaceType>( + right, x_data, out_data, mean_data, var_data, scale_data, bias_data, + left, epsilon, right); + } + } + } +} + +template +void BenchCRFDecodingKernel() { + constexpr int state_trans_base_idx = 2; + for (int seq_len : {1, 11, 17, 50}) { + for (int tag_num : TestSizes()) { + int x_sz = seq_len * tag_num; + int w_sz = (tag_num + state_trans_base_idx) * tag_num; + Tensor x, w, alpha, track; + x.Resize({seq_len, tag_num}); + w.Resize({tag_num + state_trans_base_idx, tag_num}); + alpha.Resize({seq_len, tag_num}); + track.Resize({seq_len, tag_num}); + + RandomVec(x_sz, x.mutable_data(PlaceType()), -2.f, 2.f); + RandomVec(w_sz, w.mutable_data(PlaceType()), -2.f, 2.f); + + const T* x_data = x.data(); + const T* w_data = w.data(); + T* alpha_data = alpha.mutable_data(PlaceType()); + int* track_data = track.mutable_data(PlaceType()); + + BenchAllImpls, PlaceType>( + tag_num, seq_len, x_data, w_data, alpha_data, track_data, tag_num); + } + } +} + +template +void BenchVBroadcastKernel() { + for (int64_t w : {1, 16, 64, 100, 256}) { + Tensor x; + x.Resize({w}); + RandomVec(w, x.mutable_data(PlaceType())); + const T* x_data = x.data(); + for (int h : TestSizes()) { + Tensor y; + y.Resize({h * w}); + T* y_data = y.mutable_data(PlaceType()); + BenchAllImpls, PlaceType>( + w, x_data, y_data, static_cast(h), w); + } + } +} + using T = float; using CPUPlace = paddle::platform::CPUPlace; @@ -363,6 +515,7 @@ BENCH_FP32_CPU(kVSquare) { BenchXYNKernel(); } BENCH_FP32_CPU(kVExp) { BenchXYNKernel(); } BENCH_FP32_CPU(kVSigmoid) { BenchXYNKernel(); } BENCH_FP32_CPU(kVTanh) { BenchXYNKernel(); } +BENCH_FP32_CPU(kVCopy) { BenchXYNKernel(); } // lstm and peephole BENCH_FP32_CPU(kLSTMCtHt) { BenchLSTMKernel(); } @@ -376,12 +529,35 @@ BENCH_FP32_CPU(kGRUHtPart2) { BenchGRUKernel(); } // seq pool function BENCH_FP32_CPU(kSeqPool) { BenchSeqPoolKernel(); } +// embedding seq pool function +BENCH_FP32_CPU(kEmbSeqPool) { + BenchEmbSeqPoolKernel(); +} + +// sgd function +BENCH_FP32_CPU(kSgd) { BenchSgdKernel(); } + // matmul BENCH_FP32_CPU(kMatMul) { BenchMatMulKernel(); } // softmax BENCH_FP32_CPU(kSoftmax) { BenchSoftmaxKernel(); } +// layernorm +BENCH_FP32_CPU(kLayerNorm) { + BenchLayerNormKernel(); +} + +// crfdecoding +BENCH_FP32_CPU(kCRFDecoding) { + BenchCRFDecodingKernel(); +} + +// vbroadcast function +BENCH_FP32_CPU(kVBroadcast) { + BenchVBroadcastKernel(); +} + // Benchmark all jit kernels including jitcode, mkl and refer. // To use this tool, run command: ./benchmark [options...] // Options: diff --git a/paddle/fluid/operators/jit/gen/CMakeLists.txt b/paddle/fluid/operators/jit/gen/CMakeLists.txt index efc7eb79d36c5cf9fac4ac40db4e2e28cb242e22..99244ea9bd919a018732b75d1ab811e8bf338516 100644 --- a/paddle/fluid/operators/jit/gen/CMakeLists.txt +++ b/paddle/fluid/operators/jit/gen/CMakeLists.txt @@ -31,3 +31,6 @@ USE_JITKERNEL_GEN(kNCHW16CMulNC) USE_JITKERNEL_GEN(kSeqPool) USE_JITKERNEL_GEN(kHMax) USE_JITKERNEL_GEN(kHSum) +USE_JITKERNEL_GEN(kEmbSeqPool) +USE_JITKERNEL_GEN(kSgd) +USE_JITKERNEL_GEN(kVBroadcast) diff --git a/paddle/fluid/operators/jit/gen/act.h b/paddle/fluid/operators/jit/gen/act.h index 1664dfa906bf213b7820bb702ad25413686c4265..13d98577e21db9041686822f57cb4992e5ad71ec 100644 --- a/paddle/fluid/operators/jit/gen/act.h +++ b/paddle/fluid/operators/jit/gen/act.h @@ -268,7 +268,7 @@ class VActJitCode : public VActFunc { this->genCode(); } - const char* name() const override { + std::string name() const override { std::string base = "VActJitCode"; switch (type_) { case operand_type::RELU: @@ -292,7 +292,7 @@ class VActJitCode : public VActFunc { default: break; } - return base.c_str(); + return base; } void genCode() override; diff --git a/paddle/fluid/operators/jit/gen/blas.h b/paddle/fluid/operators/jit/gen/blas.h index e9911392666eb59aefa8caecb4d819aaf07ec9cd..70312bbe5e97fcf465ce13ef71e5acc9bab4874e 100644 --- a/paddle/fluid/operators/jit/gen/blas.h +++ b/paddle/fluid/operators/jit/gen/blas.h @@ -41,7 +41,7 @@ class VXXJitCode : public JitCode { this->genCode(); } - virtual const char* name() const override { + std::string name() const override { std::string base = "VXXJitCode"; if (scalar_index_ == 1) { base += "_Scalar"; @@ -62,7 +62,7 @@ class VXXJitCode : public JitCode { } base += (with_relu_ ? "_Relu" : ""); base += "_D" + std::to_string(num_); - return base.c_str(); + return base; } void genCode() override; diff --git a/paddle/fluid/operators/jit/gen/embseqpool.cc b/paddle/fluid/operators/jit/gen/embseqpool.cc new file mode 100644 index 0000000000000000000000000000000000000000..23837a3fb9886ae8a839d4b31bd57916168ea53c --- /dev/null +++ b/paddle/fluid/operators/jit/gen/embseqpool.cc @@ -0,0 +1,149 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + +#include "paddle/fluid/operators/jit/gen/embseqpool.h" +#include // offsetof +#include +#include "paddle/fluid/operators/jit/gen/act.h" // for exp_float_consts ones +#include "paddle/fluid/operators/jit/registry.h" +#include "paddle/fluid/platform/cpu_info.h" + +namespace paddle { +namespace operators { +namespace jit { +namespace gen { + +void EmbSeqPoolJitCode::genCode() { + preCode(); + constexpr int block = YMM_FLOAT_BLOCK; + constexpr int max_num_regs = 8; + const int num_block = tbl_w_ / block; + const int num_groups = num_block / max_num_regs; + const size_t block_size = sizeof(float) * block; + std::vector groups(num_groups, max_num_regs); + int rest_num_regs = num_block % max_num_regs; + if (rest_num_regs > 0) { + groups.push_back(rest_num_regs); + } + + // protect param_dst + mov(reg_ptr_param_dst, param_dst); + mov(reg_idx_width_in_byte, + qword[param_attr + offsetof(emb_seq_pool_attr_t, index_width)]); + mov(reg_idx_height, + qword[param_attr + offsetof(emb_seq_pool_attr_t, index_height)]); + mov(rax, sizeof(int64_t)); + mul(reg_idx_width_in_byte); + mov(reg_idx_width_in_byte, rax); + const size_t tbl_width_in_byte = sizeof(float) * tbl_w_; + int acc_num_regs = 0; + for (int num_regs : groups) { + Label l_next_idx_w, l_next_idx_h, l_save_now; + xor_(reg_idx_w_i_in_byte, reg_idx_w_i_in_byte); + mov(reg_ptr_dst_i, reg_ptr_param_dst); + add(reg_ptr_dst_i, acc_num_regs * block_size); + + L(l_next_idx_w); + { + // h == 0 + mov(reg_ptr_idx_i, param_idx); + add(reg_ptr_idx_i, reg_idx_w_i_in_byte); + mov(reg_idx, qword[reg_ptr_idx_i]); + mov(rax, tbl_width_in_byte); + mul(reg_idx); + mov(reg_ptr_tbl_i, rax); // reg is offset now + add(reg_ptr_tbl_i, param_tbl); // reg is ptr_i now + size_t w_offset = 0; + for (int reg_i = 0; reg_i < num_regs; ++reg_i) { + vmovups(ymm_t(reg_i + num_regs), ptr[reg_ptr_tbl_i + w_offset]); + w_offset += block_size; + } + add(reg_ptr_idx_i, reg_idx_width_in_byte); + + // end condition of idx h + mov(reg_idx_h_end, reg_idx_height); + mov(rax, reg_idx_width_in_byte); + mul(reg_idx_h_end); + mov(reg_idx_h_end, rax); + add(reg_idx_h_end, reg_idx_w_i_in_byte); + add(reg_idx_h_end, param_idx); + + cmp(reg_ptr_idx_i, reg_idx_h_end); + jge(l_save_now, T_NEAR); + L(l_next_idx_h); + { + mov(reg_idx, qword[reg_ptr_idx_i]); + mov(reg_ptr_tbl_i, reg_idx); + mov(rax, tbl_width_in_byte); + mul(reg_idx); + mov(reg_ptr_tbl_i, rax); + add(reg_ptr_tbl_i, param_tbl); + size_t w_offset = 0; + for (int reg_i = 0; reg_i < num_regs; ++reg_i) { + vmovups(ymm_t(reg_i), ptr[reg_ptr_tbl_i + w_offset]); + vaddps(ymm_t(reg_i + num_regs), ymm_t(reg_i + num_regs), + ymm_t(reg_i)); + w_offset += block_size; + } + add(reg_ptr_idx_i, reg_idx_width_in_byte); + cmp(reg_ptr_idx_i, reg_idx_h_end); + jl(l_next_idx_h, T_NEAR); + } // end of idx h + L(l_save_now); + // avg or sqrt here, if needed + w_offset = 0; + for (int reg_i = 0; reg_i < num_regs; ++reg_i) { + vmovups(ptr[reg_ptr_dst_i + w_offset], ymm_t(reg_i + num_regs)); + w_offset += block_size; + } + add(reg_ptr_dst_i, tbl_width_in_byte); + add(reg_idx_w_i_in_byte, sizeof(int64_t)); + cmp(reg_idx_w_i_in_byte, reg_idx_width_in_byte); + jl(l_next_idx_w, T_NEAR); + } // end of idx w + + acc_num_regs += num_regs; + add(param_tbl, num_regs * block_size); // do not use acc_num_regs + } // end of groups + postCode(); +} + +class EmbSeqPoolCreator : public JitCodeCreator { + public: + bool UseMe(const emb_seq_pool_attr_t& attr) const override { + return platform::MayIUse(platform::avx) && + attr.table_width % YMM_FLOAT_BLOCK == 0; + } + size_t CodeSize(const emb_seq_pool_attr_t& attr) const override { + return 96 + (attr.table_width / YMM_FLOAT_BLOCK) * 96 * 8; + } + std::unique_ptr CreateJitCode( + const emb_seq_pool_attr_t& attr) const override { + PADDLE_ENFORCE_GT(attr.table_height, 0); + PADDLE_ENFORCE_GT(attr.table_width, 0); + PADDLE_ENFORCE_GT(attr.index_height, 0); + PADDLE_ENFORCE_GT(attr.index_width, 0); + PADDLE_ENFORCE_GT(attr.out_width, 0); + return make_unique(attr, CodeSize(attr)); + } +}; + +} // namespace gen +} // namespace jit +} // namespace operators +} // namespace paddle + +namespace gen = paddle::operators::jit::gen; + +REGISTER_JITKERNEL_GEN(kEmbSeqPool, gen::EmbSeqPoolCreator); diff --git a/paddle/fluid/operators/jit/gen/embseqpool.h b/paddle/fluid/operators/jit/gen/embseqpool.h new file mode 100644 index 0000000000000000000000000000000000000000..5afcfbdc1786bef160864fcde06f8738207751be --- /dev/null +++ b/paddle/fluid/operators/jit/gen/embseqpool.h @@ -0,0 +1,81 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + +#pragma once + +#include +#include "glog/logging.h" +#include "paddle/fluid/operators/jit/gen/jitcode.h" +#include "paddle/fluid/platform/enforce.h" + +namespace paddle { +namespace operators { +namespace jit { +namespace gen { + +class EmbSeqPoolJitCode : public JitCode { + public: + explicit EmbSeqPoolJitCode(const emb_seq_pool_attr_t& attr, + size_t code_size = 256 * 1024, + void* code_ptr = nullptr) + : JitCode(code_size, code_ptr), + tbl_w_(attr.table_width), + type_(attr.pool_type) { + if (type_ != SeqPoolType::kSum) { + LOG(FATAL) << "Only support sum pool yet "; + } + this->genCode(); + } + + std::string name() const override { + std::string base = "EmbSeqPoolJitCode"; + if (type_ == SeqPoolType::kSum) { + base += "_Sum"; + } else if (type_ == SeqPoolType::kAvg) { + base += "_Avg"; + } else if (type_ == SeqPoolType::kSqrt) { + base += "_Sqrt"; + } + base += ("_W" + std::to_string(tbl_w_)); + return base; + } + void genCode() override; + + private: + int tbl_w_; + SeqPoolType type_; + reg64_t param_tbl{abi_param1}; + reg64_t param_idx{abi_param2}; + reg64_t param_dst{abi_param3}; + reg64_t param_attr{abi_param4}; + + reg64_t reg_tmp{rax}; + + reg64_t reg_idx_width_in_byte{r8}; + reg64_t reg_idx_height{r9}; + + reg64_t reg_ptr_tbl_i{r10}; + reg64_t reg_idx{r10}; // could use same of reg_ptr_tbl_i + reg64_t reg_ptr_idx_i{r11}; + reg64_t reg_ptr_dst_i{r12}; + reg64_t reg_ptr_param_dst{r13}; // rdx is used in mul so protect param_dst + + reg64_t reg_idx_w_i_in_byte{r14}; + reg64_t reg_idx_h_end{r15}; +}; + +} // namespace gen +} // namespace jit +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/jit/gen/gru.h b/paddle/fluid/operators/jit/gen/gru.h index a4d7222a3459d175fc5eaf5cdf0e7a1a610f8b0c..d91f828e6aa7673265a460524dfcad119758aa77 100644 --- a/paddle/fluid/operators/jit/gen/gru.h +++ b/paddle/fluid/operators/jit/gen/gru.h @@ -49,7 +49,7 @@ class GRUJitCode : public VActFunc { this->genCode(); } - const char* name() const override { + std::string name() const override { std::string base = "GRUJitCode"; if (id_ == 0) { base += "_H1"; @@ -81,7 +81,7 @@ class GRUJitCode : public VActFunc { }; AddTypeStr(act_gate_); AddTypeStr(act_cand_); - return base.c_str(); + return base; } void genCode() override; diff --git a/paddle/fluid/operators/jit/gen/hopv.h b/paddle/fluid/operators/jit/gen/hopv.h index c336fe73fe5b6bcce32348656b7bccc12ea92f4c..28d213e5e48749f84405454a2708d9289b9d290c 100644 --- a/paddle/fluid/operators/jit/gen/hopv.h +++ b/paddle/fluid/operators/jit/gen/hopv.h @@ -35,14 +35,14 @@ class HOPVJitCode : public JitCode { this->genCode(); } - virtual const char* name() const override { + std::string name() const override { std::string base = "VXXJitCode"; if (type_ == operand_type::MAX) { base += "_MAX"; } else { base += "_SUM"; } - return base.c_str(); + return base; } void genCode() override; diff --git a/paddle/fluid/operators/jit/gen/jitcode.h b/paddle/fluid/operators/jit/gen/jitcode.h index 91058f6cf66c24a404ca9ca5b6a05acfab4c7741..39847d1b65f771976c4dde5a3e34cc40e33851e6 100644 --- a/paddle/fluid/operators/jit/gen/jitcode.h +++ b/paddle/fluid/operators/jit/gen/jitcode.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include "paddle/fluid/operators/jit/gen_base.h" #include "paddle/fluid/platform/cpu_info.h" @@ -30,7 +31,8 @@ namespace gen { // Application Binary Interface constexpr Xbyak::Operand::Code abi_param1(Xbyak::Operand::RDI), abi_param2(Xbyak::Operand::RSI), abi_param3(Xbyak::Operand::RDX), - abi_param4(Xbyak::Operand::RCX); + abi_param4(Xbyak::Operand::RCX), abi_param5(Xbyak::Operand::R8), + abi_param6(Xbyak::Operand::R9); constexpr Xbyak::Operand::Code g_abi_regs[] = { Xbyak::Operand::RBX, Xbyak::Operand::RBP, Xbyak::Operand::R12, @@ -59,7 +61,7 @@ typedef enum { } operand_type; #define DECLARE_JIT_CODE(codename) \ - const char* name() const override { return #codename; } + std::string name() const override { return #codename; } class JitCode : public GenBase, public Xbyak::CodeGenerator { public: diff --git a/paddle/fluid/operators/jit/gen/lstm.h b/paddle/fluid/operators/jit/gen/lstm.h index d4753bca23de91c74415d41c372cde1610712ef7..fa560b6230d7164be907f0172fb1d91860c05db2 100644 --- a/paddle/fluid/operators/jit/gen/lstm.h +++ b/paddle/fluid/operators/jit/gen/lstm.h @@ -53,7 +53,7 @@ class LSTMJitCode : public VActFunc { this->genCode(); } - const char* name() const override { + std::string name() const override { std::string base = "LSTMJitCode"; if (use_peephole_) { base += "_Peephole"; @@ -85,7 +85,7 @@ class LSTMJitCode : public VActFunc { AddTypeStr(act_gate_); AddTypeStr(act_cand_); AddTypeStr(act_cell_); - return base.c_str(); + return base; } void genCode() override; diff --git a/paddle/fluid/operators/jit/gen/matmul.h b/paddle/fluid/operators/jit/gen/matmul.h index 7976e3112dae8ecca31f67dd897927dcdcf68e8e..881cea581acc27a7aa7d395c041d40a4d3281947 100644 --- a/paddle/fluid/operators/jit/gen/matmul.h +++ b/paddle/fluid/operators/jit/gen/matmul.h @@ -36,11 +36,11 @@ class MatMulJitCode : public JitCode { this->genCode(); } - virtual const char* name() const override { + std::string name() const override { std::string base = "MatMulJitCode"; base = base + "_M" + std::to_string(m_) + "_N" + std::to_string(n_) + "_K" + std::to_string(k_); - return base.c_str(); + return base; } void genCode() override; diff --git a/paddle/fluid/operators/jit/gen/seqpool.h b/paddle/fluid/operators/jit/gen/seqpool.h index c464c2eac852f1838beda4f78c3cb37ceae66242..e909bc7c7939ee5cb7a2d367c7a452b96e6a91c2 100644 --- a/paddle/fluid/operators/jit/gen/seqpool.h +++ b/paddle/fluid/operators/jit/gen/seqpool.h @@ -32,13 +32,13 @@ class SeqPoolJitCode : public JitCode { : JitCode(code_size, code_ptr), w_(attr.w), type_(attr.type) { if (!(type_ == SeqPoolType::kSum || type_ == SeqPoolType::kAvg || type_ == SeqPoolType::kSqrt)) { - LOG(FATAL) << "Only support sum pool yet "; + LOG(FATAL) << "Only supported pool type: sum, avg and sqrt."; } fp_h_[0] = 1.f; this->genCode(); } - virtual const char* name() const override { + std::string name() const override { std::string base = "SeqPoolJitCode"; if (type_ == SeqPoolType::kSum) { base += "_Sum"; @@ -48,7 +48,7 @@ class SeqPoolJitCode : public JitCode { base += "_Sqrt"; } base += ("_W" + std::to_string(w_)); - return base.c_str(); + return base; } void genCode() override; diff --git a/paddle/fluid/operators/jit/gen/sgd.cc b/paddle/fluid/operators/jit/gen/sgd.cc new file mode 100644 index 0000000000000000000000000000000000000000..a745a27f9543a75f6915c9316aad62fa41305bb1 --- /dev/null +++ b/paddle/fluid/operators/jit/gen/sgd.cc @@ -0,0 +1,130 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + +#include "paddle/fluid/operators/jit/gen/sgd.h" +#include // offsetof +#include +#include "paddle/fluid/operators/jit/registry.h" +#include "paddle/fluid/platform/cpu_info.h" + +namespace paddle { +namespace operators { +namespace jit { +namespace gen { + +void SgdJitCode::genCode() { + preCode(); + constexpr int block = YMM_FLOAT_BLOCK; + constexpr int max_num_regs = 7; + const int num_block = w_ / block; + const int num_groups = num_block / max_num_regs; + const size_t block_size = sizeof(float) * block; + const size_t width_size = w_ * sizeof(float); + std::vector groups(num_groups, max_num_regs); + int rest_num_regs = num_block % max_num_regs; + if (rest_num_regs > 0) { + groups.push_back(rest_num_regs); + } + + vbroadcastss(ymm_lr, ptr[param_lr]); + // protect rdx + mov(reg_ptr_grad_i, param_grad); + mov(reg_ptr_rows_i, param_rows); + + mov(reg_rows_size_in_byte, + qword[param_attr + offsetof(sgd_attr_t, selected_rows_size)]); + mov(rax, sizeof(int64_t)); + mul(reg_rows_size_in_byte); + mov(reg_rows_size_in_byte, rax); + add(reg_rows_size_in_byte, reg_ptr_rows_i); + + Label l_next_row; + L(l_next_row); + { + mov(reg_row, qword[reg_ptr_rows_i]); + mov(rax, width_size); + mul(reg_row); + mov(reg_row, rax); + + mov(reg_ptr_param_i, param_param); + mov(reg_ptr_out_i, param_out); + add(reg_ptr_param_i, reg_row); + add(reg_ptr_out_i, reg_row); + + size_t w_offset = 0; + for (int num_regs : groups) { + // load grad + size_t inner_offfset = w_offset; + for (int reg_i = 0; reg_i < num_regs; ++reg_i) { + vmovups(ymm_t(reg_i), ptr[reg_ptr_grad_i + inner_offfset]); + inner_offfset += block_size; + } + + // load param + inner_offfset = w_offset; + for (int reg_i = 0; reg_i < num_regs; ++reg_i) { + vmovups(ymm_t(reg_i + num_regs), ptr[reg_ptr_param_i + inner_offfset]); + inner_offfset += block_size; + } + + // compute out + for (int reg_i = 0; reg_i < num_regs; ++reg_i) { + vmulps(ymm_t(reg_i), ymm_t(reg_i), ymm_lr); + vsubps(ymm_t(reg_i + num_regs), ymm_t(reg_i + num_regs), ymm_t(reg_i)); + } + + // save out + inner_offfset = w_offset; + for (int reg_i = 0; reg_i < num_regs; ++reg_i) { + vmovups(ptr[reg_ptr_out_i + inner_offfset], ymm_t(reg_i + num_regs)); + inner_offfset += block_size; + } + w_offset += (block_size * num_regs); + } + + add(reg_ptr_grad_i, width_size); + add(reg_ptr_rows_i, sizeof(int64_t)); + cmp(reg_ptr_rows_i, reg_rows_size_in_byte); + jl(l_next_row, T_NEAR); + } + + postCode(); +} + +class SgdCreator : public JitCodeCreator { + public: + bool UseMe(const sgd_attr_t& attr) const override { + return platform::MayIUse(platform::avx) && + attr.grad_width % YMM_FLOAT_BLOCK == 0; + } + size_t CodeSize(const sgd_attr_t& attr) const override { + return 96 + (attr.grad_width / YMM_FLOAT_BLOCK) * 32 * 8; + } + std::unique_ptr CreateJitCode( + const sgd_attr_t& attr) const override { + PADDLE_ENFORCE_EQ(attr.param_width, attr.grad_width); + PADDLE_ENFORCE_LE(attr.selected_rows_size, attr.grad_height); + PADDLE_ENFORCE_GE(attr.selected_rows_size, 0); + return make_unique(attr, CodeSize(attr)); + } +}; + +} // namespace gen +} // namespace jit +} // namespace operators +} // namespace paddle + +namespace gen = paddle::operators::jit::gen; + +REGISTER_JITKERNEL_GEN(kSgd, gen::SgdCreator); diff --git a/paddle/fluid/operators/jit/gen/sgd.h b/paddle/fluid/operators/jit/gen/sgd.h new file mode 100644 index 0000000000000000000000000000000000000000..317edcd2bcb5fea1f14f32260fd16c9c706eaf00 --- /dev/null +++ b/paddle/fluid/operators/jit/gen/sgd.h @@ -0,0 +1,60 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + +#pragma once + +#include +#include "glog/logging.h" +#include "paddle/fluid/operators/jit/gen/jitcode.h" +#include "paddle/fluid/platform/enforce.h" + +namespace paddle { +namespace operators { +namespace jit { +namespace gen { + +class SgdJitCode : public JitCode { + public: + explicit SgdJitCode(const sgd_attr_t& attr, size_t code_size = 256 * 1024, + void* code_ptr = nullptr) + : JitCode(code_size, code_ptr), w_(attr.grad_width) { + this->genCode(); + } + + DECLARE_JIT_CODE(SgdJitCode); + void genCode() override; + + private: + int w_; + reg64_t param_lr{abi_param1}; + reg64_t param_param{abi_param2}; + reg64_t param_grad{abi_param3}; + reg64_t param_rows{abi_param4}; + reg64_t param_out{abi_param5}; + reg64_t param_attr{abi_param6}; + + ymm_t ymm_lr = ymm_t(15); + + reg64_t reg_ptr_grad_i{r10}; + reg64_t reg_ptr_rows_i{r11}; + reg64_t reg_rows_size_in_byte{r12}; + reg64_t reg_row{r13}; + reg64_t reg_ptr_param_i{r14}; + reg64_t reg_ptr_out_i{r15}; +}; + +} // namespace gen +} // namespace jit +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/jit/gen/vbroadcast.cc b/paddle/fluid/operators/jit/gen/vbroadcast.cc new file mode 100644 index 0000000000000000000000000000000000000000..3f9fbdbd821acae0940c5a7b8d9a5eb2432712ff --- /dev/null +++ b/paddle/fluid/operators/jit/gen/vbroadcast.cc @@ -0,0 +1,91 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + +#include "paddle/fluid/operators/jit/gen/vbroadcast.h" +#include +#include +#include "paddle/fluid/operators/jit/registry.h" +#include "paddle/fluid/platform/cpu_info.h" + +namespace paddle { +namespace operators { +namespace jit { +namespace gen { + +void VBroadcastJitCode::genCode() { + preCode(); + constexpr int block = YMM_FLOAT_BLOCK; + constexpr int max_num_regs = 16; + const int num_block = w_ / block; + const int num_groups = num_block / max_num_regs; + const size_t block_size = sizeof(float) * block; + std::vector groups(num_groups, max_num_regs); + int rest_num_regs = num_block % max_num_regs; + if (rest_num_regs > 0) { + groups.push_back(rest_num_regs); + } + + // protect param_h + mov(reg_height, param_h); + Label l_next_h; + xor_(reg_h_i, reg_h_i); + mov(reg_ptr_dst_i, param_dst); + L(l_next_h); + { + mov(reg_ptr_src_i, param_src); + for (int num_regs : groups) { + size_t w_offset = 0; + for (int reg_i = 0; reg_i < num_regs; ++reg_i) { + vmovups(ymm_t(reg_i), ptr[reg_ptr_src_i + w_offset]); + w_offset += block_size; + } + add(reg_ptr_src_i, num_regs * block_size); + + w_offset = 0; + for (int reg_i = 0; reg_i < num_regs; ++reg_i) { + vmovups(ptr[reg_ptr_dst_i + w_offset], ymm_t(reg_i)); + w_offset += block_size; + } + add(reg_ptr_dst_i, num_regs * block_size); + } // end of groups + inc(reg_h_i); + cmp(reg_h_i, reg_height); + jl(l_next_h, T_NEAR); + } // end of l_next_h + + postCode(); +} + +class VBroadcastCreator : public JitCodeCreator { + public: + bool UseMe(const int64_t& w) const override { + return platform::MayIUse(platform::avx) && w % YMM_FLOAT_BLOCK == 0; + } + size_t CodeSize(const int64_t& w) const override { + return 96 + (w / YMM_FLOAT_BLOCK) * 16 * 8; + } + std::unique_ptr CreateJitCode(const int64_t& w) const override { + PADDLE_ENFORCE_GT(w, 0); + return make_unique(w, CodeSize(w)); + } +}; + +} // namespace gen +} // namespace jit +} // namespace operators +} // namespace paddle + +namespace gen = paddle::operators::jit::gen; + +REGISTER_JITKERNEL_GEN(kVBroadcast, gen::VBroadcastCreator); diff --git a/paddle/fluid/operators/jit/gen/vbroadcast.h b/paddle/fluid/operators/jit/gen/vbroadcast.h new file mode 100644 index 0000000000000000000000000000000000000000..27c75f6f710e9514c7d91181e7f447d9dd997081 --- /dev/null +++ b/paddle/fluid/operators/jit/gen/vbroadcast.h @@ -0,0 +1,53 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + +#pragma once + +#include +#include "glog/logging.h" +#include "paddle/fluid/operators/jit/gen/jitcode.h" + +namespace paddle { +namespace operators { +namespace jit { +namespace gen { + +class VBroadcastJitCode : public JitCode { + public: + explicit VBroadcastJitCode(const int64_t& w, size_t code_size = 256 * 1024, + void* code_ptr = nullptr) + : JitCode(code_size, code_ptr), w_(w) { + this->genCode(); + } + + DECLARE_JIT_CODE(VBroadcastJitCode); + void genCode() override; + + private: + int w_; + reg64_t param_src{abi_param1}; + reg64_t param_dst{abi_param2}; + reg64_t param_h{abi_param3}; + reg64_t param_w{abi_param4}; + + reg64_t reg_height{r9}; + reg64_t reg_h_i{r10}; + reg64_t reg_ptr_src_i{r11}; + reg64_t reg_ptr_dst_i{r12}; +}; + +} // namespace gen +} // namespace jit +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/jit/gen_base.cc b/paddle/fluid/operators/jit/gen_base.cc index 3cd5f6554bdc188ce9ea0c0b85c84d032c509600..f3603875ad7bda1fc688f9c053e0d37f7bb31f02 100644 --- a/paddle/fluid/operators/jit/gen_base.cc +++ b/paddle/fluid/operators/jit/gen_base.cc @@ -17,7 +17,13 @@ #include #include #include +#include "paddle/fluid/memory/allocation/cpu_allocator.h" // for posix_memalign #include "paddle/fluid/platform/cpu_info.h" +#include "paddle/fluid/platform/enforce.h" + +#ifndef _WIN32 +#define posix_memalign_free free +#endif DEFINE_bool(dump_jitcode, false, "Whether to dump the jitcode to file"); @@ -40,6 +46,17 @@ void GenBase::dumpCode(const unsigned char* code) const { } } +void* GenBase::operator new(size_t size) { + void* ptr; + constexpr size_t alignment = 32ul; + PADDLE_ENFORCE_EQ(posix_memalign(&ptr, alignment, size), 0, + "GenBase Alloc %ld error!", size); + PADDLE_ENFORCE(ptr, "Fail to allocate GenBase CPU memory: size = %d .", size); + return ptr; +} + +void GenBase::operator delete(void* ptr) { posix_memalign_free(ptr); } + std::vector packed_groups(int n, int k, int* block_out, int* rest_out) { int block; int max_num_regs; diff --git a/paddle/fluid/operators/jit/gen_base.h b/paddle/fluid/operators/jit/gen_base.h index d808a332472ae86240cb63356cb417123523366a..a7c7a35a7ea35bd80333b04f001d4ab5b5d1e06b 100644 --- a/paddle/fluid/operators/jit/gen_base.h +++ b/paddle/fluid/operators/jit/gen_base.h @@ -16,6 +16,7 @@ #include #include // for unique_ptr +#include #include #include "paddle/fluid/operators/jit/kernel_base.h" @@ -28,7 +29,7 @@ namespace jit { class GenBase : public Kernel { public: virtual ~GenBase() = default; - virtual const char* name() const = 0; + virtual std::string name() const = 0; virtual size_t getSize() const = 0; virtual const unsigned char* getCodeInternal() = 0; template @@ -42,6 +43,11 @@ class GenBase : public Kernel { return reinterpret_cast(const_cast(code)); } + void* operator new(size_t size); + void operator delete(void* ptr); + void* operator new[](size_t size) { return operator new(size); } + void operator delete[](void* ptr) { operator delete(ptr); } + protected: void dumpCode(const unsigned char* code) const; }; diff --git a/paddle/fluid/operators/jit/helper.cc b/paddle/fluid/operators/jit/helper.cc index e7292fe2bd8031aa5bbff68e7c2305a238085bf1..eb1c410b6f9a31c3f97a274c5e5ff55bf1c32ea0 100644 --- a/paddle/fluid/operators/jit/helper.cc +++ b/paddle/fluid/operators/jit/helper.cc @@ -36,6 +36,8 @@ const char* to_string(KernelType kt) { ONE_CASE(kVScal); ONE_CASE(kVAddBias); ONE_CASE(kVRelu); + ONE_CASE(kVBroadcast); + ONE_CASE(kVCopy); ONE_CASE(kVIdentity); ONE_CASE(kVExp); ONE_CASE(kVSquare); @@ -54,6 +56,8 @@ const char* to_string(KernelType kt) { ONE_CASE(kHMax); ONE_CASE(kHSum); ONE_CASE(kSoftmax); + ONE_CASE(kEmbSeqPool); + ONE_CASE(kSgd); default: PADDLE_THROW("Not support type: %d, or forget to add it.", kt); return "NOT JITKernel"; diff --git a/paddle/fluid/operators/jit/helper.h b/paddle/fluid/operators/jit/helper.h index d5773d65940127ea0a9b77ed2760bd371b778f4c..d85c719c1c58c88ec244f1f6ad8343d66391241d 100644 --- a/paddle/fluid/operators/jit/helper.h +++ b/paddle/fluid/operators/jit/helper.h @@ -172,6 +172,23 @@ inline std::ostream& operator<<(std::ostream& os, const seq_pool_attr_t& attr) { return os; } +inline std::ostream& operator<<(std::ostream& os, + const emb_seq_pool_attr_t& attr) { + os << "table_height[" << attr.table_height << "],table_width[" + << attr.table_width << "],index_height[" << attr.index_height + << "],index_width[" << attr.index_width << "],output_width[" + << attr.out_width << "],pool_type[" << to_string(attr.pool_type) << "]"; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const sgd_attr_t& attr) { + os << "param_height[" << attr.param_height << "],param_width[" + << attr.param_width << "],grad_height[" << attr.grad_height + << "],grad_width[" << attr.grad_width << "],selected_rows_size[" + << attr.selected_rows_size << "]"; + return os; +} + inline std::ostream& operator<<(std::ostream& os, const matmul_attr_t& attr) { os << "M[" << attr.m << "],N[" << attr.n << "],K[" << attr.k << "]"; return os; diff --git a/paddle/fluid/operators/jit/kernel_base.h b/paddle/fluid/operators/jit/kernel_base.h index 4a8f61146a1921fa1d5f6b7e15af40cd45d31a22..96e162a21bff2a5624f35ada615c9a9a17ad3c75 100644 --- a/paddle/fluid/operators/jit/kernel_base.h +++ b/paddle/fluid/operators/jit/kernel_base.h @@ -13,6 +13,7 @@ * limitations under the License. */ #pragma once +#include #include "paddle/fluid/operators/jit/macro.h" #include "paddle/fluid/platform/macros.h" @@ -20,34 +21,38 @@ namespace paddle { namespace operators { namespace jit { -// TODO(TJ): reorder by alphabet typedef enum { kNone = 0, - kVMul = 1, - kVAdd = 2, - kVAddRelu, - kVSub, - kVScal, - kVAddBias, - kVRelu, - kVIdentity, - kVSquare, - kVExp, - kVSigmoid, - kVTanh, - kLSTMCtHt, - kLSTMC1H1, + // sort by alphabet + kCRFDecoding = 1, + kEmbSeqPool = 2, kGRUH1, kGRUHtPart1, kGRUHtPart2, - kCRFDecoding, + kHSum, // horizontal max + kHMax, // horizontal sum + kLSTMCtHt, + kLSTMC1H1, kLayerNorm, + kMatMul, kNCHW16CMulNC, kSeqPool, - kMatMul, - kHSum, // horizontal max - kHMax, // horizontal sum kSoftmax, + kVAdd, + kVAddBias, + kVAddRelu, + kVBroadcast, + kVCopy, + kVExp, + kVIdentity, + kVMul, + kVRelu, + kVScal, + kSgd, + kVSigmoid, + kVSquare, + kVSub, + kVTanh, } KernelType; typedef enum { @@ -130,6 +135,13 @@ struct GRUTuples { typedef void (*func_type)(gru_t*, const gru_attr_t*); }; +template +struct VBroadcastTuples { + typedef T data_type; + typedef int64_t attr_type; + typedef void (*func_type)(const T*, T*, int64_t, int64_t); +}; + typedef struct seq_pool_attr_s { int h, w; // h should always be the first one SeqPoolType type; @@ -145,6 +157,54 @@ struct SeqPoolTuples { typedef void (*func_type)(const T*, T*, const seq_pool_attr_t*); }; +typedef struct emb_seq_pool_attr_s { + int64_t table_height, table_width; + int64_t index_height, index_width; + int64_t out_width; + SeqPoolType pool_type; + emb_seq_pool_attr_s() = default; + explicit emb_seq_pool_attr_s(int64_t tbl_height, int64_t tbl_width, + int64_t idx_height, int64_t idx_width, + int64_t output_width, + SeqPoolType seqpool_type = SeqPoolType::kSum) + : table_height(tbl_height), + table_width(tbl_width), + index_height(idx_height), + index_width(idx_width), + out_width(output_width), + pool_type(seqpool_type) {} +} emb_seq_pool_attr_t; + +template +struct EmbSeqPoolTuples { + typedef T data_type; + typedef emb_seq_pool_attr_t attr_type; + typedef void (*func_type)(const T*, const int64_t*, T*, + const emb_seq_pool_attr_t*); +}; + +typedef struct sgd_attr_s { + int64_t param_height, param_width; + int64_t grad_height, grad_width; + int64_t selected_rows_size; + sgd_attr_s() = default; + explicit sgd_attr_s(int64_t param_h, int64_t param_w, int64_t grad_h, + int64_t grad_w, int64_t selected_rows_sz) + : param_height(param_h), + param_width(param_w), + grad_height(grad_h), + grad_width(grad_w), + selected_rows_size(selected_rows_sz) {} +} sgd_attr_t; + +template +struct SgdTuples { + typedef T data_type; + typedef sgd_attr_t attr_type; + typedef void (*func_type)(const T*, const T*, const T*, const int64_t*, T*, + const sgd_attr_t*); +}; + typedef struct matmul_attr_s { int m, n, k; void* packed_weight{nullptr}; diff --git a/paddle/fluid/operators/jit/kernel_key.cc b/paddle/fluid/operators/jit/kernel_key.cc index 1e4a8884e78c5d3c1748988f05ecf461a6f0eb94..1c2fddcae79d8b89e1169d5bcb364b3ff2e42dd3 100644 --- a/paddle/fluid/operators/jit/kernel_key.cc +++ b/paddle/fluid/operators/jit/kernel_key.cc @@ -13,6 +13,7 @@ * limitations under the License. */ #include "paddle/fluid/operators/jit/kernel_key.h" +#include "paddle/fluid/platform/enforce.h" namespace paddle { namespace operators { @@ -23,14 +24,35 @@ size_t JitCodeKey(const int& d) { return d; } +template <> +size_t JitCodeKey(const int64_t& d) { + return d; +} + +// TODO(TJ): refine and benchmark JitCodeKey generatation constexpr int act_type_shift = 3; // suppot 2^3 act types +static inline int act_type_convert(KernelType type) { + if (type == kVIdentity) { + return 0; + } else if (type == kVExp) { + return 1; + } else if (type == kVRelu) { + return 2; + } else if (type == kVSigmoid) { + return 3; + } else if (type == kVTanh) { + return 4; + } + PADDLE_THROW("Unsupported act type %d", type); + return 0; +} template <> size_t JitCodeKey(const lstm_attr_t& attr) { size_t key = attr.d; - int gate_key = static_cast(attr.act_gate) << 1; - int cand_key = static_cast(attr.act_cand) << (1 + act_type_shift); - int cell_key = static_cast(attr.act_cell) << (1 + act_type_shift * 2); + int gate_key = act_type_convert(attr.act_gate) << 1; + int cand_key = act_type_convert(attr.act_cand) << (1 + act_type_shift); + int cell_key = act_type_convert(attr.act_cell) << (1 + act_type_shift * 2); return (key << (1 + act_type_shift * 3)) + gate_key + cand_key + cell_key + attr.use_peephole; } @@ -38,8 +60,8 @@ size_t JitCodeKey(const lstm_attr_t& attr) { template <> size_t JitCodeKey(const gru_attr_t& attr) { size_t key = attr.d; - return (key << (act_type_shift * 2)) + static_cast(attr.act_gate) + - (static_cast(attr.act_cand) << act_type_shift); + return (key << (act_type_shift * 2)) + act_type_convert(attr.act_gate) + + (act_type_convert(attr.act_cand) << act_type_shift); } template <> @@ -56,6 +78,16 @@ size_t JitCodeKey(const matmul_attr_t& attr) { return (key << shift * 2) + ((static_cast(attr.n)) << shift) + attr.k; } +template <> +size_t JitCodeKey(const emb_seq_pool_attr_t& attr) { + return attr.table_width; +} + +template <> +size_t JitCodeKey(const sgd_attr_t& attr) { + return attr.grad_width; +} + } // namespace jit } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt b/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt index f9e5aea32e7cd48e9b39c4c3ee0e30f4a5c84f6f..f69417c370b653d93cce04a2248ad809168670da 100644 --- a/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt +++ b/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt @@ -9,7 +9,11 @@ USE_JITKERNEL_MORE(kVAdd, mkl) USE_JITKERNEL_MORE(kVScal, mkl) USE_JITKERNEL_MORE(kVExp, mkl) USE_JITKERNEL_MORE(kVSquare, mkl) +USE_JITKERNEL_MORE(kVCopy, mkl) USE_JITKERNEL_MORE(kVSigmoid, mkl) USE_JITKERNEL_MORE(kVTanh, mkl) USE_JITKERNEL_MORE(kSeqPool, mkl) USE_JITKERNEL_MORE(kSoftmax, mkl) +USE_JITKERNEL_MORE(kEmbSeqPool, mkl) +USE_JITKERNEL_MORE(kSgd, mkl) +USE_JITKERNEL_MORE(kVBroadcast, mkl) diff --git a/paddle/fluid/operators/jit/more/mkl/mkl.cc b/paddle/fluid/operators/jit/more/mkl/mkl.cc index 4c999131ab116ebe3484355158993558b02cc4b2..4f51353bce834325e6c659399a374e4fbc40d4b7 100644 --- a/paddle/fluid/operators/jit/more/mkl/mkl.cc +++ b/paddle/fluid/operators/jit/more/mkl/mkl.cc @@ -154,6 +154,21 @@ bool VSquareKernel::UseMe(const int& d) const { return d > 7; } +template <> +bool VCopyKernel::UseMe(const int& d) const { + return d > 15; +} + +template <> +bool VBroadcastKernel::UseMe(const int64_t& d) const { + return d > 127; +} + +template <> +bool VBroadcastKernel::UseMe(const int64_t& attr) const { + return true; +} + template <> bool VSigmoidKernel::UseMe(const int& d) const { return d > 7; @@ -174,6 +189,26 @@ bool SeqPoolKernel::UseMe(const seq_pool_attr_t& attr) const { return true; } +template <> +bool EmbSeqPoolKernel::UseMe(const emb_seq_pool_attr_t& attr) const { + return true; +} + +template <> +bool EmbSeqPoolKernel::UseMe(const emb_seq_pool_attr_t& attr) const { + return true; +} + +template <> +bool SgdKernel::UseMe(const sgd_attr_t& attr) const { + return true; +} + +template <> +bool SgdKernel::UseMe(const sgd_attr_t& attr) const { + return true; +} + template <> bool MatMulKernel::UseMe(const matmul_attr_t& attr) const { return platform::MayIUse(platform::avx); @@ -203,6 +238,7 @@ AWALYS_USE_ME_WITH_DOUBLE(VExp); AWALYS_USE_ME_WITH_DOUBLE(VSigmoid); AWALYS_USE_ME_WITH_DOUBLE(VTanh); AWALYS_USE_ME_WITH_DOUBLE(VSquare); +AWALYS_USE_ME_WITH_DOUBLE(VCopy); AWALYS_USE_ME_WITH_DOUBLE(Softmax); #undef AWALYS_USE_ME_WITH_DOUBLE @@ -224,9 +260,13 @@ REGISTER_MKL_KERNEL(kVAdd, VAdd); REGISTER_MKL_KERNEL(kVScal, VScal); REGISTER_MKL_KERNEL(kVExp, VExp); REGISTER_MKL_KERNEL(kVSquare, VSquare); +REGISTER_MKL_KERNEL(kVCopy, VCopy); +REGISTER_MKL_KERNEL(kVBroadcast, VBroadcast); REGISTER_MKL_KERNEL(kVSigmoid, VSigmoid); REGISTER_MKL_KERNEL(kVTanh, VTanh); REGISTER_MKL_KERNEL(kSeqPool, SeqPool); +REGISTER_MKL_KERNEL(kEmbSeqPool, EmbSeqPool); REGISTER_MKL_KERNEL(kSoftmax, Softmax); +REGISTER_MKL_KERNEL(kSgd, Sgd); #undef REGISTER_MKL_KERNEL diff --git a/paddle/fluid/operators/jit/more/mkl/mkl.h b/paddle/fluid/operators/jit/more/mkl/mkl.h index 8130b87326f1887f232022ab30fa7bf42b0723e7..db2d6faed4fdcfebedb9d9eb752831259af30186 100644 --- a/paddle/fluid/operators/jit/more/mkl/mkl.h +++ b/paddle/fluid/operators/jit/more/mkl/mkl.h @@ -18,6 +18,7 @@ #include #include #include "paddle/fluid/operators/jit/kernel_base.h" +#include "paddle/fluid/platform/enforce.h" namespace paddle { namespace operators { @@ -49,6 +50,13 @@ void VCopy(const T* x, T* y, int n); template void VAXPY(T a, const T* x, T* y, int n); +template +void VBroadcast(const T* x, T* y, int64_t y_h, int64_t x_len) { + for (int64_t h = 0; h < y_h; ++h) { + VCopy(x, y + h * x_len, x_len); + } +} + template void VSigmoid(const T* x, T* y, int n) { const T min = SIGMOID_THRESHOLD_MIN; @@ -91,6 +99,32 @@ void SeqPool(const T* x, T* y, const seq_pool_attr_t* attr) { } } +template +void EmbSeqPool(const T* table, const int64_t* idx, T* out, + const emb_seq_pool_attr_t* attr) { + PADDLE_ENFORCE_EQ(attr->table_width * attr->index_width, attr->out_width); + auto check_idx_value_valid = [&](int64_t i) { + PADDLE_ENFORCE_LT(idx[i], attr->table_height, "idx value: %d, i: %d", + idx[i], i); + PADDLE_ENFORCE_GE(idx[i], 0, "idx value: %d, i: %d", idx[i], i); + }; + + for (int64_t w = 0; w != attr->index_width; ++w) { + check_idx_value_valid(w); + VCopy(table + idx[w] * attr->table_width, out + w * attr->table_width, + attr->table_width); + } + + for (int64_t h = 1; h < attr->index_height; ++h) { + for (int64_t w = 0; w < attr->index_width; ++w) { + int64_t i = h * attr->index_width + w; + check_idx_value_valid(i); + VAXPY(static_cast(1), table + idx[i] * attr->table_width, + out + w * attr->table_width, attr->table_width); + } + } +} + template void ASum(const T* x, T* res, int n); @@ -115,6 +149,32 @@ void Softmax(const T* x, T* y, int n, int bs) { } } +template +void Sgd(const T* lr, const T* param, const T* grad, const int64_t* rows, + T* out, const sgd_attr_t* attr) { + PADDLE_ENFORCE_EQ(attr->param_width, attr->grad_width); + PADDLE_ENFORCE_LE(attr->selected_rows_size, attr->grad_height); + T scalar = -lr[0]; + int width = attr->grad_width; + if (out == param) { + for (int64_t i = 0; i < attr->selected_rows_size; ++i) { + auto h_idx = rows[i]; + PADDLE_ENFORCE_LT(h_idx, attr->param_height); + PADDLE_ENFORCE_GE(h_idx, 0); + VAXPY(scalar, grad + i * width, out + h_idx * width, width); + } + } else { + for (int64_t i = 0; i < attr->selected_rows_size; ++i) { + auto h_idx = rows[i]; + PADDLE_ENFORCE_LT(h_idx, attr->param_height); + PADDLE_ENFORCE_GE(h_idx, 0); + VScal(&scalar, grad + i * width, out + h_idx * width, width); + VAdd(param + h_idx * width, out + h_idx * width, out + h_idx * width, + width); + } + } +} + #define DECLARE_MKL_KERNEL(name, tuples) \ template \ class name##Kernel : public KernelMore> { \ @@ -139,11 +199,18 @@ DECLARE_MKL_KERNEL(VExp, XYNTuples); DECLARE_MKL_KERNEL(VSigmoid, XYNTuples); DECLARE_MKL_KERNEL(VTanh, XYNTuples); DECLARE_MKL_KERNEL(VSquare, XYNTuples); +DECLARE_MKL_KERNEL(VCopy, XYNTuples); DECLARE_MKL_KERNEL(SeqPool, SeqPoolTuples); +DECLARE_MKL_KERNEL(EmbSeqPool, EmbSeqPoolTuples); + DECLARE_MKL_KERNEL(Softmax, SoftmaxTuples); +DECLARE_MKL_KERNEL(Sgd, SgdTuples); + +DECLARE_MKL_KERNEL(VBroadcast, VBroadcastTuples); + #undef DECLARE_MKL_KERNEL } // namespace mkl diff --git a/paddle/fluid/operators/jit/refer/CMakeLists.txt b/paddle/fluid/operators/jit/refer/CMakeLists.txt index 9f2935828ca300dbdb71b0fefb6b9883cb45e4b0..ffab9c1457b932b3211e6aa75954bb1435f8e34c 100644 --- a/paddle/fluid/operators/jit/refer/CMakeLists.txt +++ b/paddle/fluid/operators/jit/refer/CMakeLists.txt @@ -13,6 +13,7 @@ USE_JITKERNEL_REFER(kVAddRelu) USE_JITKERNEL_REFER(kVSub) USE_JITKERNEL_REFER(kVScal) USE_JITKERNEL_REFER(kVAddBias) +USE_JITKERNEL_REFER(kVCopy) USE_JITKERNEL_REFER(kVRelu) USE_JITKERNEL_REFER(kVIdentity) USE_JITKERNEL_REFER(kVExp) @@ -32,3 +33,6 @@ USE_JITKERNEL_REFER(kVSquare) USE_JITKERNEL_REFER(kHSum) USE_JITKERNEL_REFER(kHMax) USE_JITKERNEL_REFER(kSoftmax) +USE_JITKERNEL_REFER(kEmbSeqPool) +USE_JITKERNEL_REFER(kSgd) +USE_JITKERNEL_REFER(kVBroadcast) diff --git a/paddle/fluid/operators/jit/refer/refer.cc b/paddle/fluid/operators/jit/refer/refer.cc index b8adb40ec7e1b64df2b04a3201292db235af7b19..c279d1b2ca4f53bb6bc5da0cab41e9086ed475bd 100644 --- a/paddle/fluid/operators/jit/refer/refer.cc +++ b/paddle/fluid/operators/jit/refer/refer.cc @@ -30,6 +30,7 @@ REGISTER_REFER_KERNEL(kVScal, VScal); REGISTER_REFER_KERNEL(kVAddBias, VAddBias); REGISTER_REFER_KERNEL(kVRelu, VRelu); +REGISTER_REFER_KERNEL(kVCopy, VCopy); REGISTER_REFER_KERNEL(kVIdentity, VIdentity); REGISTER_REFER_KERNEL(kVSquare, VSquare); REGISTER_REFER_KERNEL(kVExp, VExp); @@ -57,4 +58,10 @@ REGISTER_REFER_KERNEL(kHSum, HSum); REGISTER_REFER_KERNEL(kSoftmax, Softmax); +REGISTER_REFER_KERNEL(kEmbSeqPool, EmbSeqPool); + +REGISTER_REFER_KERNEL(kSgd, Sgd); + +REGISTER_REFER_KERNEL(kVBroadcast, VBroadcast); + #undef REGISTER_REFER_KERNEL diff --git a/paddle/fluid/operators/jit/refer/refer.h b/paddle/fluid/operators/jit/refer/refer.h index 0c4a985f8e8ece0a6169478fa3a9b111f5a6f3b4..b3b2097828c5b6d647fd6bfe14a6e8bff04409e0 100644 --- a/paddle/fluid/operators/jit/refer/refer.h +++ b/paddle/fluid/operators/jit/refer/refer.h @@ -16,6 +16,7 @@ #include #include +#include #include "paddle/fluid/operators/jit/helper.h" #include "paddle/fluid/operators/jit/kernel_base.h" #include "paddle/fluid/platform/enforce.h" @@ -69,6 +70,20 @@ void VAddBias(const T* a, const T* x, T* y, int n) { } } +template +void VCopy(const T* x, T* y, int n) { + std::memcpy(y, x, n * sizeof(T)); +} + +// x shape: (x_len) +// y shape: (h, x_len) +template +void VBroadcast(const T* x, T* y, int64_t y_h, int64_t x_len) { + for (int64_t h = 0; h < y_h; ++h) { + VCopy(x, y + h * x_len, x_len); + } +} + template void VRelu(const T* x, T* y, int n) { for (int i = 0; i < n; ++i) { @@ -414,6 +429,67 @@ void Softmax(const T* x, T* y, int n, int bs = 1) { } } +// embedding seq pool +// table is a matrix with (tbl_h, tbl_w) +// idx is a matrix with (idx_h, idx_w) +// output is a vector with length tbl_w * idx_w +template +void EmbSeqPool(const T* table, const int64_t* idx, T* out, + const emb_seq_pool_attr_t* attr) { + PADDLE_ENFORCE_EQ(attr->table_width * attr->index_width, attr->out_width); + + auto check_idx_value_valid = [&](int64_t i) { + PADDLE_ENFORCE_LT(idx[i], attr->table_height, "idx value: %d, i: %d", + idx[i], i); + PADDLE_ENFORCE_GE(idx[i], 0, "idx value: %d, i: %d", idx[i], i); + }; + + for (int64_t w = 0; w != attr->index_width; ++w) { + check_idx_value_valid(w); + std::memcpy(out + w * attr->table_width, table + idx[w] * attr->table_width, + attr->table_width * sizeof(T)); + } + + for (int64_t h = 1; h < attr->index_height; ++h) { + for (int64_t w = 0; w < attr->index_width; ++w) { + int64_t i = h * attr->index_width + w; + check_idx_value_valid(i); + VAdd(table + idx[i] * attr->table_width, out + w * attr->table_width, + out + w * attr->table_width, attr->table_width); + } + } +} + +// SGD algorithm: +// lr is pointor of learning rate scalar +// param is an input matrix with (param_h, param_w) +// grad is an input matrix with (grad_h, grad_w), here grad_w == param_w +// selected_rows is a vectot with size selected_rows_size( <= grad_h ) +// out is an output matrix with (param_h, param_w) +// +// support both regular and sparse grad +// regular SGD: out[:] = param[:] - lr[0] * grad[:]; +// sparse SGD: out[rows[i]][:] = param[rows[i]][:] - lr[0] * grad[i][:] +// +// Note: when use sparse SGD, and if out != param, +// the out rows which are not selected have not beed changed, which maybe empty +template +void Sgd(const T* lr, const T* param, const T* grad, const int64_t* rows, + T* out, const sgd_attr_t* attr) { + PADDLE_ENFORCE_EQ(attr->param_width, attr->grad_width); + PADDLE_ENFORCE_LE(attr->selected_rows_size, attr->grad_height); + for (int64_t i = 0; i < attr->selected_rows_size; ++i) { + auto h_idx = rows[i]; + PADDLE_ENFORCE_LT(h_idx, attr->param_height); + PADDLE_ENFORCE_GE(h_idx, 0); + for (int64_t j = 0; j < attr->grad_width; ++j) { + out[h_idx * attr->grad_width + j] = + param[h_idx * attr->grad_width + j] - + lr[0] * grad[i * attr->grad_width + j]; + } + } +} + #define DECLARE_REFER_KERNEL(name, tuples) \ template \ class name##Kernel : public ReferKernel> { \ @@ -438,6 +514,7 @@ DECLARE_REFER_KERNEL(VExp, XYNTuples); DECLARE_REFER_KERNEL(VSigmoid, XYNTuples); DECLARE_REFER_KERNEL(VTanh, XYNTuples); DECLARE_REFER_KERNEL(VSquare, XYNTuples); +DECLARE_REFER_KERNEL(VCopy, XYNTuples); // lstm_t*, const lstm_attr_t* DECLARE_REFER_KERNEL(LSTMCtHt, LSTMTuples); @@ -462,6 +539,12 @@ DECLARE_REFER_KERNEL(HSum, XRNTuples); DECLARE_REFER_KERNEL(Softmax, SoftmaxTuples); +DECLARE_REFER_KERNEL(EmbSeqPool, EmbSeqPoolTuples); + +DECLARE_REFER_KERNEL(Sgd, SgdTuples); + +DECLARE_REFER_KERNEL(VBroadcast, VBroadcastTuples); + #undef DECLARE_REFER_KERNEL } // namespace refer diff --git a/paddle/fluid/operators/jit/test.cc b/paddle/fluid/operators/jit/test.cc index 237e588d35cc3b33658a830db34676967818aab6..cdec14dc4383897f4ae24fc89b99fe00c713cf42 100644 --- a/paddle/fluid/operators/jit/test.cc +++ b/paddle/fluid/operators/jit/test.cc @@ -1,17 +1,18 @@ /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. */ +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include #include #include #include @@ -25,8 +26,8 @@ DEFINE_double(acc, 1e-5, "Test accuracy threshold."); template -void RandomVec(const int n, T* a, const T lower = static_cast(-20.f), - const T upper = static_cast(20.f)) { +void RandomVec(const int n, T* a, const T lower = static_cast(-2.f), + const T upper = static_cast(2.f)) { static unsigned int seed = 100; std::mt19937 rng(seed++); std::uniform_real_distribution uniform_dist(0, 1); @@ -36,14 +37,14 @@ void RandomVec(const int n, T* a, const T lower = static_cast(-20.f), } template -void ExpectEQ(const T* target, const T* refer, int n) { +void ExpectEQ(const T* target, const T* refer, size_t n) { if (std::is_floating_point::value) { - for (int i = 0; i < n; ++i) { - EXPECT_NEAR(target[i], refer[i], FLAGS_acc); + for (size_t i = 0; i < n; ++i) { + EXPECT_NEAR(target[i], refer[i], FLAGS_acc) << " at index : " << i; } } else { - for (int i = 0; i < n; ++i) { - EXPECT_EQ(target[i], refer[i]); + for (size_t i = 0; i < n; ++i) { + EXPECT_EQ(target[i], refer[i]) << " at index : " << i; } } } @@ -156,6 +157,26 @@ struct TestFuncWithRefer, std::vector, T> { } }; +template +struct TestFuncWithRefer, std::vector, + std::vector, int64_t, + typename jit::VBroadcastTuples::attr_type> { + void operator()(const typename jit::VBroadcastTuples::func_type tgt, + const std::vector& x, const std::vector& yref, + int64_t h, + const typename jit::VBroadcastTuples::attr_type& attr) { + EXPECT_TRUE(tgt != nullptr); + EXPECT_EQ(x.size(), static_cast(attr)); + EXPECT_EQ(yref.size(), x.size() * h); + std::vector y(yref.size()); + const T* x_data = x.data(); + const T* yref_data = yref.data(); + T* y_data = y.data(); + tgt(x_data, y_data, h, attr); + ExpectEQ(y_data, yref_data, yref.size()); + } +}; + template struct TestFuncWithRefer, std::vector, std::vector> { void operator()(const typename jit::XYNTuples::func_type tgt, @@ -259,7 +280,7 @@ struct TestFuncWithRefer, std::vector, std::vector, const std::vector& x, const std::vector& yref, const typename jit::SeqPoolTuples::attr_type& attr) { EXPECT_TRUE(tgt != nullptr); - EXPECT_EQ(x.size() % yref.size(), 0); + EXPECT_EQ(x.size() % yref.size(), static_cast(0)); int w = yref.size(); std::vector y(w); const T* x_data = x.data(); @@ -270,6 +291,71 @@ struct TestFuncWithRefer, std::vector, std::vector, } }; +template +struct TestFuncWithRefer, std::vector, + std::vector, std::vector, + typename jit::EmbSeqPoolTuples::attr_type> { + void operator()(const typename jit::EmbSeqPoolTuples::func_type tgt, + const std::vector& table, const std::vector& idx, + const std::vector& oref, + const typename jit::EmbSeqPoolTuples::attr_type& attr) { + EXPECT_TRUE(tgt != nullptr); + EXPECT_EQ(table.size(), + static_cast(attr.table_height * attr.table_width)); + EXPECT_EQ(idx.size(), + static_cast(attr.index_height * attr.index_width)); + EXPECT_EQ(oref.size(), + static_cast(attr.table_width * attr.index_width)); + const T* table_data = table.data(); + const int64_t* idx_data = idx.data(); + const T* oref_data = oref.data(); + int o_w = oref.size(); + std::vector out(o_w); + T* o_data = out.data(); + tgt(table_data, idx_data, o_data, &attr); + ExpectEQ(o_data, oref_data, o_w); + } +}; + +template +struct TestFuncWithRefer, T, std::vector, std::vector, + std::vector, std::vector, + typename jit::SgdTuples::attr_type> { + void operator()(const typename jit::SgdTuples::func_type tgt, const T lr, + const std::vector& param, const std::vector& grad, + const std::vector& rows, const std::vector& oref, + const typename jit::SgdTuples::attr_type& attr) { + EXPECT_TRUE(tgt != nullptr); + EXPECT_EQ(param.size(), + static_cast(attr.param_height * attr.param_width)); + EXPECT_EQ(grad.size(), + static_cast(attr.grad_height * attr.grad_width)); + EXPECT_EQ(rows.size(), static_cast(attr.selected_rows_size)); + EXPECT_EQ(param.size(), oref.size()); + const T* param_data = param.data(); + const T* grad_data = grad.data(); + const int64_t* rows_data = rows.data(); + const T* oref_data = oref.data(); + + std::vector out(oref.size()); + T* o_data = out.data(); + tgt(&lr, param_data, grad_data, rows_data, o_data, &attr); + // only the selected rows should be equal + for (size_t i = 0; i < rows.size(); ++i) { + ExpectEQ(o_data + rows[i] * attr.grad_width, + oref_data + rows[i] * attr.grad_width, attr.grad_width); + } + + // inplace + std::copy(param.begin(), param.end(), out.begin()); + tgt(&lr, o_data, grad_data, rows_data, o_data, &attr); + for (size_t i = 0; i < rows.size(); ++i) { + ExpectEQ(o_data + rows[i] * attr.grad_width, + oref_data + rows[i] * attr.grad_width, attr.grad_width); + } + } +}; + template struct TestFuncWithRefer, std::vector, std::vector, std::vector, @@ -292,6 +378,63 @@ struct TestFuncWithRefer, std::vector, std::vector, } }; +template +struct TestFuncWithRefer, std::vector, + std::vector, std::vector, std::vector, + std::vector, std::vector, int, float, int> { + void operator()(const typename jit::LayerNormTuples::func_type tgt, + std::vector& x, std::vector& outref, // NOLINT + std::vector& mean, std::vector& var, // NOLINT + const std::vector& scale, const std::vector& bias, + int left, const float epsilon, int right) { + EXPECT_TRUE(tgt != nullptr); + EXPECT_EQ(x.size(), static_cast(left * right)); + EXPECT_EQ(outref.size(), static_cast(left * right)); + EXPECT_EQ(mean.size(), static_cast(left)); + EXPECT_EQ(var.size(), static_cast(left)); + EXPECT_EQ(scale.size(), static_cast(right)); + EXPECT_EQ(bias.size(), static_cast(right)); + std::vector outtgt(outref.size()); + const T* scale_data = scale.data(); + const T* bias_data = bias.data(); + T* x_data = x.data(); + T* mean_data = mean.data(); + T* var_data = var.data(); + T* outref_data = outref.data(); + T* outtgt_data = outtgt.data(); + + tgt(x_data, outtgt_data, mean_data, var_data, scale_data, bias_data, left, + epsilon, right); + ExpectEQ(outtgt_data, outref_data, left * right); + } +}; + +template +struct TestFuncWithRefer, int, std::vector, + std::vector, std::vector, std::vector, + int> { + void operator()(const typename jit::CRFDecodingTuples::func_type tgt, + const int seq_len, const std::vector& x, + const std::vector& w, std::vector& alpharef, // NOLINT + std::vector& trackref, int tag_num) { // NOLINT + constexpr int state_trans_base_idx = 2; + EXPECT_TRUE(tgt != nullptr); + EXPECT_EQ(x.size(), static_cast(seq_len * tag_num)); + EXPECT_EQ(w.size(), + static_cast((tag_num + state_trans_base_idx) * tag_num)); + EXPECT_EQ(alpharef.size(), static_cast(seq_len * tag_num)); + EXPECT_EQ(trackref.size(), static_cast(seq_len * tag_num)); + std::vector alphatgt(alpharef.size()); + std::vector tracktgt(trackref.size()); + + memcpy(trackref.data(), tracktgt.data(), tag_num * sizeof(int)); + tgt(seq_len, (const T*)x.data(), (const T*)w.data(), alphatgt.data(), + tracktgt.data(), tag_num); + ExpectEQ(alpharef.data(), alphatgt.data(), seq_len * tag_num); + ExpectEQ(trackref.data(), tracktgt.data(), seq_len * tag_num); + } +}; + template void TestAllImpls(const typename KernelTuples::attr_type& attr, Args... args) { @@ -324,7 +467,7 @@ void TestAllImpls(const typename KernelTuples::attr_type& attr, Args... args) { } template -void TestXYZNKernel() { +void TestKernelXYZNTuples() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); for (int d : TestSizes()) { auto ref = jit::GetRefer>(); @@ -357,7 +500,7 @@ void TestXYZNKernel() { } template -void TestAXYNKernel() { +void TestKernelAXYNTuples() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); for (int d : TestSizes()) { auto ref = jit::GetRefer>(); @@ -383,7 +526,7 @@ void TestAXYNKernel() { } template -void TestXRNKernel() { +void TestKernelXRNTuples() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); auto last_acc = FLAGS_acc; FLAGS_acc = 1e-4; @@ -391,7 +534,7 @@ void TestXRNKernel() { auto ref = jit::GetRefer>(); EXPECT_TRUE(ref != nullptr); std::vector x(d); - RandomVec(d, x.data(), -2.f, 2.f); + RandomVec(d, x.data()); T ref_res; ref(x.data(), &ref_res, d); TestAllImpls, PlaceType, std::vector, T>(d, x, @@ -401,7 +544,7 @@ void TestXRNKernel() { } template -void TestXYNKernel() { +void TestKernelXYNTuples() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); for (int d : TestSizes()) { auto ref = jit::GetRefer>(); @@ -409,7 +552,7 @@ void TestXYNKernel() { std::vector x(d), yref(d); std::vector xinp(d); // inplace test - RandomVec(d, x.data(), -2.f, 2.f); + RandomVec(d, x.data()); std::copy(x.begin(), x.end(), xinp.begin()); const T* x_data = x.data(); @@ -426,10 +569,12 @@ void TestXYNKernel() { } template -void TestLSTMKernel() { +void TestKernelLSTMTuples() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); std::vector all_acts = {"sigmoid", "tanh", "relu", "identity"}; - for (int d : TestSizes()) { + auto test_sizes = TestSizes(); + test_sizes.erase(std::remove(test_sizes.begin(), test_sizes.end(), 1000)); + for (int d : test_sizes) { for (bool use_peephole : {true, false}) { for (auto& act_gate : all_acts) { for (auto& act_cand : all_acts) { @@ -441,7 +586,7 @@ void TestLSTMKernel() { EXPECT_TRUE(ref != nullptr); std::vector xsrc(4 * d), wp(3 * d), ct_1(d); std::vector ct_ref(d), ht_ref(d), checked(2 * d); - RandomVec(4 * d, xsrc.data(), -2.f, 2.f); + RandomVec(4 * d, xsrc.data()); RandomVec(3 * d, wp.data(), -1.f, 1.f); RandomVec(d, ct_1.data(), -1.f, 1.f); // x could be changed after compute, so copy to save src @@ -476,10 +621,12 @@ void TestLSTMKernel() { } template -void TestGRUKernel() { +void TestKernelGRUTuples() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); std::vector all_acts = {"sigmoid", "tanh", "relu", "identity"}; - for (int d : TestSizes()) { + auto test_sizes = TestSizes(); + test_sizes.erase(std::remove(test_sizes.begin(), test_sizes.end(), 1000)); + for (int d : test_sizes) { for (auto& act_gate : all_acts) { for (auto& act_cand : all_acts) { const jit::gru_attr_t attr(d, jit::to_kerneltype(act_gate), @@ -487,8 +634,8 @@ void TestGRUKernel() { auto ref = jit::GetRefer>(); EXPECT_TRUE(ref != nullptr); std::vector xsrc(3 * d), ht_1(d), ht_ref(d); - RandomVec(3 * d, xsrc.data(), -2.f, 2.f); - RandomVec(d, ht_1.data(), -2.f, 2.f); + RandomVec(3 * d, xsrc.data()); + RandomVec(d, ht_1.data()); // x could be changed after compute, so copy to save src std::vector x(xsrc.size()); std::copy(xsrc.begin(), xsrc.end(), x.begin()); @@ -510,19 +657,21 @@ void TestGRUKernel() { } template -void TestSeqPoolKernel() { +void TestKernelSeqPoolTuples() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); std::vector pool_types = { jit::SeqPoolType::kSum, jit::SeqPoolType::kAvg, jit::SeqPoolType::kSqrt}; + auto test_sizes = TestSizes(); + test_sizes.erase(std::remove(test_sizes.begin(), test_sizes.end(), 1000)); for (auto type : pool_types) { - for (int w : TestSizes()) { + for (int w : test_sizes) { jit::seq_pool_attr_t attr(w, type); - for (int h : TestSizes()) { + for (int h : test_sizes) { attr.h = h; auto ref = jit::GetRefer>(); EXPECT_TRUE(ref != nullptr); std::vector x(h * w), yref(w); - RandomVec(h * w, x.data(), -2.f, 2.f); + RandomVec(h * w, x.data()); const T* x_data = x.data(); T* yref_data = yref.data(); ref(x_data, yref_data, &attr); @@ -535,11 +684,11 @@ void TestSeqPoolKernel() { } template -void TestMatMulKernel() { +void TestKernelMatMulTuples() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); auto last_acc = FLAGS_acc; - // TODO(intel): fix MKL acc issue - // https://github.com/PaddlePaddle/Paddle/issues/15447 + // export MKL_CBWR=AVX would make MKL force to use AVX + // export KMP_DETERMINISTIC_REDUCTION=yes would make the result deterministic FLAGS_acc = 1e-3; for (int m : {1, 2, 3, 4}) { for (int n : {1, 2, 3, 4}) { @@ -547,8 +696,8 @@ void TestMatMulKernel() { auto ref = jit::GetRefer>(); EXPECT_TRUE(ref != nullptr); std::vector a(m * k), b(k * n), c(m * n); - RandomVec(m * k, a.data(), -2.f, 2.f); - RandomVec(k * n, b.data(), -2.f, 2.f); + RandomVec(m * k, a.data()); + RandomVec(k * n, b.data()); const T* a_data = a.data(); const T* b_data = b.data(); T* c_data = c.data(); @@ -563,14 +712,14 @@ void TestMatMulKernel() { } template -void TestSoftmaxKernel() { +void TestKernelSoftmaxTuples() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); for (int bs : {1, 2, 10}) { for (int n : TestSizes()) { auto ref = jit::GetRefer>(); EXPECT_TRUE(ref != nullptr); std::vector x(bs * n), y(bs * n); - RandomVec(bs * n, x.data(), -2.f, 2.f); + RandomVec(bs * n, x.data()); const T* x_data = x.data(); T* y_data = y.data(); @@ -588,7 +737,97 @@ void TestSoftmaxKernel() { } template -void TestNCHW16CMulNCKernel() { +void TestKernelEmbSeqPoolTuples() { + VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); + int64_t tbl_h = 1e4; + std::vector pool_types = { + jit::SeqPoolType::kSum}; // only support sum yet + auto test_sizes = TestSizes(); + test_sizes.erase(std::remove(test_sizes.begin(), test_sizes.end(), 1000)); + for (int tbl_w : test_sizes) { + std::vector table(tbl_h * tbl_w); + RandomVec(tbl_h * tbl_w, table.data()); + const T* table_data = table.data(); + for (auto type : pool_types) { + for (int idx_w : {1, 2, 10, 16}) { + for (int idx_h : {1, 2, 9, 13, 16}) { + auto ref = jit::GetRefer>(); + EXPECT_TRUE(ref != nullptr); + std::vector idx(idx_h * idx_w); + RandomVec(idx_h * idx_w, idx.data(), 0, tbl_h - 1); + int64_t out_w = tbl_w * idx_w; + std::vector oref(out_w); + const int64_t* idx_data = idx.data(); + T* o_data = oref.data(); + jit::emb_seq_pool_attr_t attr(tbl_h, tbl_w, idx_h, idx_w, out_w, + type); + ref(table_data, idx_data, o_data, &attr); + + TestAllImpls, PlaceType, std::vector, + std::vector, std::vector>(attr, table, idx, + oref, attr); + } + } + } + } +} + +template +void TestKernelSgdTuples() { + VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); + const T lr = 0.1; + auto UnDuplicatedRandomVec = [](int n, const int64_t lower, + const int64_t upper) -> std::vector { + PADDLE_ENFORCE_LE(static_cast(upper - lower), n - 1); + PADDLE_ENFORCE_GT(n, 0); + std::vector all, out; + for (int i = 0; i < n; ++i) { + all.push_back(i); + } + std::random_shuffle(all.begin(), all.end()); + out.insert(out.begin(), all.begin(), all.begin() + n); + return out; + }; + for (int param_h : {1, 10}) { + for (int grad_w : TestSizes()) { + std::vector param(param_h * grad_w); + std::vector param_out(param_h * grad_w); + RandomVec(param_h * grad_w, param.data()); + const T* param_data = param.data(); + T* out_data = param_out.data(); + for (int rows_size = 1; rows_size <= param_h; ++rows_size) { + std::vector grad(rows_size * grad_w); + std::vector rows = + UnDuplicatedRandomVec(rows_size, 0, rows_size - 1); + RandomVec(rows_size * grad_w, grad.data()); + const int64_t* rows_data = rows.data(); + const T* grad_data = grad.data(); + auto ref = jit::GetRefer>(); + EXPECT_TRUE(ref != nullptr); + jit::sgd_attr_t attr(param_h, grad_w, rows_size, grad_w, rows_size); + ref(&lr, param_data, grad_data, rows_data, out_data, &attr); + + // inplace test + std::vector inp(param.size()); + std::copy(param.begin(), param.end(), inp.begin()); + T* inp_data = inp.data(); + ref(&lr, inp_data, grad_data, rows_data, inp_data, &attr); + // only the selected rows should be equal + for (int i = 0; i < rows_size; ++i) { + ExpectEQ(inp_data + rows[i] * grad_w, out_data + rows[i] * grad_w, + grad_w); + } + + TestAllImpls, PlaceType, T, std::vector, + std::vector, std::vector, std::vector>( + attr, lr, param, grad, rows, param_out, attr); + } + } + } +} + +template +void TestKernelNCHW16CMulNCTuples() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); const int n = 3, c = 16 * 4, h = 10, w = 10; auto ref = jit::GetRefer>(); @@ -596,8 +835,8 @@ void TestNCHW16CMulNCKernel() { int sz = n * c * h * w; std::vector x(sz), y(n * c), zref(sz); std::vector ztgt(sz), zjit(sz); - RandomVec(sz, x.data(), -2.f, 2.f); - RandomVec(n * c, y.data(), -2.f, 2.f); + RandomVec(sz, x.data()); + RandomVec(n * c, y.data()); const T* x_data = x.data(); const T* y_data = y.data(); @@ -640,129 +879,166 @@ void TestNCHW16CMulNCKernel() { } } -// XYZNTuple -TEST(JITKernel, kVMul) { - TestXYZNKernel(); - TestXYZNKernel(); -} - -TEST(JITKernel, kVAdd) { - TestXYZNKernel(); - TestXYZNKernel(); -} - -TEST(JITKernel, kVAddRelu) { - TestXYZNKernel(); - TestXYZNKernel(); -} - -TEST(JITKernel, kVSub) { - TestXYZNKernel(); - TestXYZNKernel(); -} - -// AXYNTuples -TEST(JITKernel, kVScal) { - TestAXYNKernel(); - TestAXYNKernel(); -} - -TEST(JITKernel, kVAddBias) { - TestAXYNKernel(); - TestAXYNKernel(); -} - -// XRNTuples -TEST(JITKernel, kHMax) { - TestXRNKernel(); - TestXRNKernel(); -} - -TEST(JITKernel, kHSum) { - TestXRNKernel(); - TestXRNKernel(); -} - -// XYNTuples -TEST(JITKernel, kVRelu) { - TestXYNKernel(); - TestXYNKernel(); -} - -TEST(JITKernel, kVIdentity) { - TestXYNKernel(); - TestXYNKernel(); -} - -TEST(JITKernel, kVSquare) { - TestXYNKernel(); - TestXYNKernel(); -} - -TEST(JITKernel, kVExp) { - TestXYNKernel(); - TestXYNKernel(); -} - -TEST(JITKernel, kVSigmoid) { - TestXYNKernel(); - TestXYNKernel(); -} - -TEST(JITKernel, kVTanh) { - TestXYNKernel(); - TestXYNKernel(); -} - -// LSTM -TEST(JITKernel, kLSTMCtHt) { - TestLSTMKernel(); - TestLSTMKernel(); -} +template +void TestKernelLayerNormTuples() { + VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); + const T epsilon = 9.99999975e-06; + for (int n : {1, 2, 10}) { + for (int x_dim_0 : {1, 9, 17, 50}) { + int left = n * x_dim_0; + for (int x_dim_1 : TestSizes()) { + int right = x_dim_1; + auto ref = jit::GetRefer>(); + EXPECT_TRUE(ref != nullptr); + int sz = left * right; + std::vector x(sz), mean(left), var(left), scale(right), bias(right), + outref(sz); + RandomVec(sz, x.data()); + RandomVec(left, mean.data()); + RandomVec(left, var.data()); + RandomVec(right, scale.data()); + RandomVec(right, bias.data()); + + const T* scale_data = scale.data(); + const T* bias_data = bias.data(); + T* x_data = x.data(); + T* mean_data = mean.data(); + T* var_data = var.data(); + T* outref_data = outref.data(); -TEST(JITKernel, kLSTMC1H1) { - TestLSTMKernel(); - TestLSTMKernel(); -} + ref(x_data, outref_data, mean_data, var_data, scale_data, bias_data, + left, epsilon, right); -// GRU -TEST(JITKernel, kGRUH1) { - TestGRUKernel(); - TestGRUKernel(); + TestAllImpls, PlaceType, std::vector, + std::vector, std::vector, std::vector, + std::vector, std::vector, int, float>( + right, x, outref, mean, var, scale, bias, left, epsilon, right); + } + } + } } -TEST(JITKernel, kGRUHtPart1) { - TestGRUKernel(); - TestGRUKernel(); +template +void TestKernelCRFDecodingTuples() { + VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); + constexpr int state_trans_base_idx = 2; + auto test_sizes = TestSizes(); + test_sizes.erase(std::remove(test_sizes.begin(), test_sizes.end(), 2000)); + for (int seq_len : {1, 11, 17, 50}) { + for (int tag_num : test_sizes) { + auto ref = jit::GetRefer>(); + EXPECT_TRUE(ref != nullptr); + int x_sz = seq_len * tag_num; + int w_sz = (tag_num + state_trans_base_idx) * tag_num; + std::vector x(x_sz), w(w_sz), alpharef(x_sz); + std::vector trackref(x_sz); + RandomVec(x_sz, x.data()); + RandomVec(w_sz, w.data()); + + ref(seq_len, (const T*)x.data(), (const T*)w.data(), alpharef.data(), + trackref.data(), tag_num); + + TestAllImpls, PlaceType, int, + std::vector, std::vector, std::vector, + std::vector, int>(tag_num, seq_len, x, w, alpharef, + trackref, tag_num); + } + } } -TEST(JITKernel, kGRUHtPart2) { - TestGRUKernel(); - TestGRUKernel(); -} +template +void TestKernelVBroadcastTuples() { + VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); + for (int w : TestSizes()) { + std::vector x(w); + RandomVec(w, x.data()); + const T* x_data = x.data(); + for (int64_t h : {1, 2, 6}) { + auto ref = jit::GetRefer>(); + EXPECT_TRUE(ref != nullptr); + std::vector y(w * h); + T* y_data = y.data(); + ref(x_data, y_data, h, w); -TEST(JITKernel, kSeqPool) { - TestSeqPoolKernel(); - TestSeqPoolKernel(); + TestAllImpls, PlaceType, std::vector, + std::vector, int64_t>(static_cast(w), x, y, h, + static_cast(w)); + } + } } -TEST(JITKernel, kMatMul) { - TestMatMulKernel(); - TestMatMulKernel(); -} +#define TEST_CPU_KERNEL(test_tuple, kernel_type) \ + TEST(JITKernel, kernel_type) { \ + TestKernel##test_tuple(); \ + TestKernel##test_tuple(); \ + } -TEST(JITKernel, kSoftmax) { - TestSoftmaxKernel(); - TestSoftmaxKernel(); +TEST_CPU_KERNEL(XYZNTuples, kVMul); +TEST_CPU_KERNEL(XYZNTuples, kVAdd); +TEST_CPU_KERNEL(XYZNTuples, kVAddRelu); +TEST_CPU_KERNEL(XYZNTuples, kVSub); + +TEST_CPU_KERNEL(AXYNTuples, kVScal); +TEST_CPU_KERNEL(AXYNTuples, kVAddBias); + +TEST_CPU_KERNEL(XRNTuples, kHMax); +TEST_CPU_KERNEL(XRNTuples, kHSum); + +TEST_CPU_KERNEL(XYNTuples, kVRelu); +TEST_CPU_KERNEL(XYNTuples, kVIdentity); +TEST_CPU_KERNEL(XYNTuples, kVSquare); +TEST_CPU_KERNEL(XYNTuples, kVExp); +TEST_CPU_KERNEL(XYNTuples, kVSigmoid); +TEST_CPU_KERNEL(XYNTuples, kVTanh); +TEST_CPU_KERNEL(XYNTuples, kVCopy); + +TEST_CPU_KERNEL(LSTMTuples, kLSTMCtHt); +TEST_CPU_KERNEL(LSTMTuples, kLSTMC1H1); + +TEST_CPU_KERNEL(GRUTuples, kGRUH1); +TEST_CPU_KERNEL(GRUTuples, kGRUHtPart1); +TEST_CPU_KERNEL(GRUTuples, kGRUHtPart2); + +TEST_CPU_KERNEL(NCHW16CMulNCTuples, kNCHW16CMulNC); + +TEST_CPU_KERNEL(SeqPoolTuples, kSeqPool); +TEST_CPU_KERNEL(MatMulTuples, kMatMul); +TEST_CPU_KERNEL(SoftmaxTuples, kSoftmax); +TEST_CPU_KERNEL(EmbSeqPoolTuples, kEmbSeqPool); +TEST_CPU_KERNEL(SgdTuples, kSgd); +TEST_CPU_KERNEL(LayerNormTuples, kLayerNorm); +TEST_CPU_KERNEL(CRFDecodingTuples, kCRFDecoding); +TEST_CPU_KERNEL(VBroadcastTuples, kVBroadcast); + +TEST(JITKernel_key, lstm) { + jit::lstm_attr_t attr1(8, jit::kVIdentity, jit::kVSigmoid, jit::kVTanh); + jit::lstm_attr_t attr2(9, jit::kVIdentity, jit::kVSigmoid, jit::kVTanh); + jit::lstm_attr_t attr3(9, jit::kVIdentity, jit::kVSigmoid, jit::kVTanh); + jit::lstm_attr_t attr4(9, jit::kVRelu, jit::kVSigmoid, jit::kVTanh); + + auto key1 = jit::JitCodeKey(attr1); + auto key2 = jit::JitCodeKey(attr2); + auto key3 = jit::JitCodeKey(attr3); + auto key4 = jit::JitCodeKey(attr4); + + EXPECT_TRUE(key1 != key2); + EXPECT_TRUE(key2 == key3); + EXPECT_TRUE(key3 != key4); } -TEST(JITKernel, kNCHW16CMulNC) { - TestNCHW16CMulNCKernel(); - TestNCHW16CMulNCKernel(); -} +TEST(JITKernel_key, gru) { + jit::gru_attr_t attr1(8, jit::kVSigmoid, jit::kVTanh); + jit::gru_attr_t attr2(9, jit::kVSigmoid, jit::kVTanh); + jit::gru_attr_t attr3(9, jit::kVSigmoid, jit::kVTanh); + jit::gru_attr_t attr4(9, jit::kVSigmoid, jit::kVIdentity); -// TODO(yihua/TJ): add crf decoding and layer norm unit tests + auto key1 = jit::JitCodeKey(attr1); + auto key2 = jit::JitCodeKey(attr2); + auto key3 = jit::JitCodeKey(attr3); + auto key4 = jit::JitCodeKey(attr4); -TEST(JITKernel, pool) { - // TODO(TJ): add some test + EXPECT_TRUE(key1 != key2); + EXPECT_TRUE(key2 == key3); + EXPECT_TRUE(key3 != key4); } +// TODO(TJ): add more test about key and pool diff --git a/paddle/fluid/operators/layer_norm_op.cc b/paddle/fluid/operators/layer_norm_op.cc index f83fe355b85566d229a2673d8f27cfb5ca4831d5..b9db6daf0825b573bfc7f684266212f998c91627 100644 --- a/paddle/fluid/operators/layer_norm_op.cc +++ b/paddle/fluid/operators/layer_norm_op.cc @@ -44,11 +44,11 @@ class LayerNormOp : public framework::OperatorWithKernel { int left = static_cast(matrix_dim[0]); int right = static_cast(matrix_dim[1]); if (ctx->HasInput("Scale")) { - PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale").size(), 1UL); + PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale").size(), 1); PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale")[0], right); } if (ctx->HasInput("Bias")) { - PADDLE_ENFORCE_EQ(ctx->GetInputDim("Bias").size(), 1UL); + PADDLE_ENFORCE_EQ(ctx->GetInputDim("Bias").size(), 1); PADDLE_ENFORCE_EQ(ctx->GetInputDim("Bias")[0], right); } diff --git a/paddle/fluid/operators/linear_chain_crf_op.cc b/paddle/fluid/operators/linear_chain_crf_op.cc index 1da14631e35608d479e1b861228d52d6d57def79..e17b6cb59898524d793f3cc78a09232f5b664617 100644 --- a/paddle/fluid/operators/linear_chain_crf_op.cc +++ b/paddle/fluid/operators/linear_chain_crf_op.cc @@ -144,12 +144,12 @@ class LinearChainCRFOp : public framework::OperatorWithKernel { "Output(LogLikelihood) should be not null."); auto emission_dims = ctx->GetInputDim("Emission"); - PADDLE_ENFORCE_EQ(emission_dims.size(), 2UL, + PADDLE_ENFORCE_EQ(emission_dims.size(), 2, "The Input(Emission) should be a 2-D tensor."); PADDLE_ENFORCE(emission_dims[0], "An empty mini-batch is not allowed."); auto transition_dims = ctx->GetInputDim("Transition"); - PADDLE_ENFORCE_EQ(transition_dims.size(), 2UL, + PADDLE_ENFORCE_EQ(transition_dims.size(), 2, "The Input(Transition) should be a 2-D tensor."); PADDLE_ENFORCE_EQ( transition_dims[0] - 2, transition_dims[1], @@ -202,13 +202,13 @@ class LinearChainCRFGradOp : public framework::OperatorWithKernel { "Input(LogLikelihood@GRAD) shoudl be not null."); auto emission_exps_dims = ctx->GetInputDim("EmissionExps"); - PADDLE_ENFORCE_EQ(emission_exps_dims.size(), 2UL, + PADDLE_ENFORCE_EQ(emission_exps_dims.size(), 2, "The Input(EmissionExps) should be a 2-D tensor."); PADDLE_ENFORCE(emission_exps_dims[0], "An empty mini-batch is not allowed."); auto transition_exps_dims = ctx->GetInputDim("TransitionExps"); - PADDLE_ENFORCE_EQ(transition_exps_dims.size(), 2UL, + PADDLE_ENFORCE_EQ(transition_exps_dims.size(), 2, "The Input(TransitionExps) should be a 2-D tensor."); PADDLE_ENFORCE_EQ( transition_exps_dims[0] - 2, transition_exps_dims[1], diff --git a/paddle/fluid/operators/load_combine_op.cc b/paddle/fluid/operators/load_combine_op.cc index c4a2282e16483dbe78a32a4148c5bc4349dde3dc..f5c802986e0573e81b3ab6187b57657b52b37215 100644 --- a/paddle/fluid/operators/load_combine_op.cc +++ b/paddle/fluid/operators/load_combine_op.cc @@ -64,7 +64,7 @@ class LoadCombineOp : public framework::OperatorBase { auto *tensor = out_var->GetMutable(); // Error checking - PADDLE_ENFORCE(static_cast(buffer), "Cannot read more"); + PADDLE_ENFORCE(static_cast(*buffer), "Cannot read more"); // Get data from fin to tensor DeserializeFromStream(*buffer, tensor, dev_ctx); @@ -90,6 +90,10 @@ class LoadCombineOp : public framework::OperatorBase { tensor->ShareDataWith(fp16_tensor); } } + buffer->peek(); + PADDLE_ENFORCE(buffer->eof(), + "You are not allowed to load partial data via " + "load_combine_op, use load_op instead."); } }; diff --git a/paddle/fluid/operators/lookup_table_op.h b/paddle/fluid/operators/lookup_table_op.h index a7d0fd4856edc74237151c64f286d468ad86e7ca..56c6e37ae3c62e1f9af66ef6ed16111dc1e93d9d 100644 --- a/paddle/fluid/operators/lookup_table_op.h +++ b/paddle/fluid/operators/lookup_table_op.h @@ -129,6 +129,7 @@ class LookupTableGradKernel : public framework::OpKernel { "must be either LoDTensor or SelectedRows"); } + int64_t padding_idx = context.Attr("padding_idx"); bool is_sparse = context.Attr("is_sparse"); // Since paddings are not trainable and fixed in forward, the gradient of // paddings makes no sense and we don't deal with it in backward. @@ -187,10 +188,15 @@ class LookupTableGradKernel : public framework::OpKernel { memset(d_table_data, 0, d_table->numel() * sizeof(T)); for (int64_t i = 0; i < ids->numel(); ++i) { - PADDLE_ENFORCE_LT(ids_data[i], N); - PADDLE_ENFORCE_GE(ids_data[i], 0); - for (int j = 0; j < D; ++j) { - d_table_data[ids_data[i] * D + j] += d_output_data[i * D + j]; + if (padding_idx != kNoPadding && ids_data[i] == padding_idx) { + // the gradient of padding_idx should be 0, already done by memset, so + // do nothing. + } else { + PADDLE_ENFORCE_LT(ids_data[i], N); + PADDLE_ENFORCE_GE(ids_data[i], 0); + for (int j = 0; j < D; ++j) { + d_table_data[ids_data[i] * D + j] += d_output_data[i * D + j]; + } } } } diff --git a/paddle/fluid/operators/lstm_op.h b/paddle/fluid/operators/lstm_op.h index 7d62d2d020ec2e3a29ad8720a8f04fead3a90a63..ca998826dd0118ab4b1ecc23bed8ef882f1bcc92 100644 --- a/paddle/fluid/operators/lstm_op.h +++ b/paddle/fluid/operators/lstm_op.h @@ -151,9 +151,10 @@ class LSTMKernel : public framework::OpKernel { lstm_value.output_value = out_t.data(); lstm_value.state_value = cell_t.data(); lstm_value.state_active_value = cell_pre_act_t.data(); + T cell_clip = 0.0; math::LstmUnitFunctor::compute( - device_ctx, lstm_value, frame_size, cur_batch_size, gate_act, - cell_act, cand_act); + device_ctx, lstm_value, frame_size, cur_batch_size, cell_clip, + gate_act, cell_act, cand_act); lstm_value.prev_state_value = lstm_value.state_value; } @@ -311,10 +312,15 @@ class LSTMGradKernel : public framework::OpKernel { lstm_grad.prev_state_grad = c0_g ? ordered_c0_g.data() : nullptr; } + // lstm_value.output_value not used in bp, set to nullptr + // lstm_grad.state_active_grad not used in bp, set to nullptr + lstm_value.output_value = nullptr; + lstm_grad.state_active_grad = nullptr; int cur_batch_size = bend - bstart; + T cell_clip = 0.0; math::LstmUnitGradFunctor::compute( device_ctx, lstm_value, lstm_grad, frame_size, cur_batch_size, - gate_act, cell_act, cand_act); + cell_clip, gate_act, cell_act, cand_act); if (n > 0) { int pre_h_start = static_cast(batch_starts[n - 1]); diff --git a/paddle/fluid/operators/lstmp_op.cc b/paddle/fluid/operators/lstmp_op.cc index 7a62bc9f828e4d3485628747cdf52c60c5354144..2728aa8a4ee21a9e1fe3deddcdba4c35a6aba7bc 100644 --- a/paddle/fluid/operators/lstmp_op.cc +++ b/paddle/fluid/operators/lstmp_op.cc @@ -73,12 +73,6 @@ class LSTMPOp : public framework::OperatorWithKernel { PADDLE_ENFORCE(ctx->HasInput("C0"), "Input(C0) of LSTMP operator should not be null after " "Input(H0) provided."); - auto h_dims = ctx->GetInputDim("H0"); - auto c_dims = ctx->GetInputDim("C0"); - PADDLE_ENFORCE(h_dims == c_dims, - "The dimension of Input(H0) and Input(C0) " - "should be the same."); - ctx->SetOutputDim("OrderedP0", {h_dims[0], proj_dims[1]}); } auto b_dims = ctx->GetInputDim("Bias"); @@ -180,11 +174,6 @@ class LSTMPOpMaker : public framework::OpProtoAndCheckerMaker { "This LoDTensor is obtained in the forward and used in the " "backward.") .AsIntermediate(); - AddOutput("OrderedP0", - "(Tensor) the projection of the initial hidden state " - "H0. This is a tensor with shape (N x P), where N is the " - "batch size and P is the hidden size.") - .AsIntermediate(); AddAttr("use_peepholes", "(bool, defalut: True) " "whether to enable diagonal/peephole connections.") @@ -193,6 +182,16 @@ class LSTMPOpMaker : public framework::OpProtoAndCheckerMaker { "(bool, defalut: False) " "whether to compute reversed LSTMP.") .SetDefault(false); + AddAttr("cell_clip", + "(float, defalut: 0.0) " + "Clip for Tensor for cell state tensor when clip value is " + "greater than 0.0") + .SetDefault(0.0); + AddAttr("proj_clip", + "(float, defalut: 0.0) " + "Clip for Tensor for projection tensor when clip value is " + "greater than 0.0") + .SetDefault(0.0); AddAttr( "gate_activation", "(string, default: sigmoid)" diff --git a/paddle/fluid/operators/lstmp_op.h b/paddle/fluid/operators/lstmp_op.h index 370dd04d1449a8e211febf9a4f9e90e6f5008e20..c7d6e4205f8862526904e4fa767a2f4c4a2d8481 100644 --- a/paddle/fluid/operators/lstmp_op.h +++ b/paddle/fluid/operators/lstmp_op.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include +#include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/activation_op.h" @@ -21,17 +22,50 @@ limitations under the License. */ #include "paddle/fluid/operators/math/detail/activation_functions.h" #include "paddle/fluid/operators/math/lstm_compute.h" #include "paddle/fluid/operators/math/sequence2batch.h" +#include "paddle/fluid/platform/transform.h" namespace paddle { namespace operators { using LoDTensor = framework::LoDTensor; using Tensor = framework::Tensor; +using platform::Transform; template using EigenMatrix = framework::EigenMatrix; +template +class _ClipFunctor { + public: + explicit _ClipFunctor(const T min, const T max) : min_(min), max_(max) {} + HOSTDEVICE T operator()(const T& x) const { + if (x < min_) + return min_; + else if (x > max_) + return max_; + else + return x; + } + + private: + T min_; + T max_; +}; + +template +class _ClipGradFunctor { + public: + explicit _ClipGradFunctor(const T min, const T max) : min_(min), max_(max) {} + HOSTDEVICE T operator()(const T& x, const T& y) const { + return (y > min_ && y < max_) ? x : 0; + } + + private: + T min_; + T max_; +}; + template inline void ReorderInitState(const DeviceContext& ctx, const framework::Tensor& src, @@ -67,9 +101,11 @@ class LSTMPKernel : public framework::OpKernel { auto* bias = ctx.Input("Bias"); auto* hidden_t0 = ctx.Input("H0"); - auto* ordered_proj0 = ctx.Output("OrderedP0"); auto* cell_t0 = ctx.Input("C0"); + auto proj_clip = static_cast(ctx.Attr("proj_clip")); + auto cell_clip = static_cast(ctx.Attr("cell_clip")); + auto* batch_gate = ctx.Output("BatchGate"); batch_gate->mutable_data(ctx.GetPlace()); auto* proj_out = ctx.Output("Projection"); @@ -110,6 +146,7 @@ class LSTMPKernel : public framework::OpKernel { } lstmp_value.prev_state_value = nullptr; Tensor ordered_c0; + Tensor ordered_h0; framework::Vector order(batch_gate->lod()[2]); @@ -169,18 +206,9 @@ class LSTMPKernel : public framework::OpKernel { // Since the batch computing for LSTMP reorders the input sequence // according to their length. The initialized hidden state also needs // to reorder. - - Tensor ordered_h0; - ordered_proj0->mutable_data(ctx.GetPlace()); ReorderInitState(device_ctx, *hidden_t0, order, &ordered_h0, true); - blas.MatMul(ordered_h0, false, *proj_weight, false, static_cast(1.0), - ordered_proj0, static_cast(0.0)); - if (proj_act != math::detail::ActivationType::kIdentity) { - auto proj0_dev = EigenMatrix::From(*ordered_proj0); - ActCompute(cell_act, place, proj0_dev, proj0_dev); - } - blas.MatMul(*ordered_proj0, false, *weight, false, static_cast(1.0), + blas.MatMul(ordered_h0, false, *weight, false, static_cast(1.0), &gate_t, static_cast(1.0)); } @@ -189,8 +217,8 @@ class LSTMPKernel : public framework::OpKernel { lstmp_value.state_value = cell_t.data(); lstmp_value.state_active_value = cell_pre_act_t.data(); math::LstmUnitFunctor::compute( - device_ctx, lstmp_value, frame_size, cur_batch_size, gate_act, - cell_act, cand_act); + device_ctx, lstmp_value, frame_size, cur_batch_size, cell_clip, + gate_act, cell_act, cand_act); lstmp_value.prev_state_value = lstmp_value.state_value; blas.MatMul(hidden_t, false, *proj_weight, false, static_cast(1.0), &proj_t, static_cast(0.0)); @@ -198,6 +226,14 @@ class LSTMPKernel : public framework::OpKernel { auto proj_t_dev = EigenMatrix::From(proj_t); ActCompute(cell_act, place, proj_t_dev, proj_t_dev); } + if (proj_clip && proj_clip > 0.0) { + T* x_data = proj_t.data(); + int64_t numel = proj_t.numel(); + Transform trans; + trans(ctx.template device_context(), x_data, + x_data + numel, x_data, + _ClipFunctor(-1.0 * proj_clip, proj_clip)); + } } math::Batch2LoDTensorFunctor to_seq; @@ -239,6 +275,9 @@ class LSTMPGradKernel : public framework::OpKernel { auto* proj_out = ctx.Input("Projection"); auto* cell_out = ctx.Input("Cell"); + auto proj_clip = static_cast(ctx.Attr("proj_clip")); + auto cell_clip = static_cast(ctx.Attr("cell_clip")); + auto* batch_gate = ctx.Input("BatchGate"); auto* batch_cell_pre_act = ctx.Input("BatchCellPreAct"); auto* batch_hidden = ctx.Input("BatchHidden"); @@ -253,7 +292,6 @@ class LSTMPGradKernel : public framework::OpKernel { auto* bias_g = ctx.Output(framework::GradVarName("Bias")); auto* h0 = ctx.Input("H0"); - auto* ordered_proj0 = ctx.Input("OrderedP0"); auto* c0 = ctx.Input("C0"); auto* h0_g = ctx.Output(framework::GradVarName("H0")); @@ -363,6 +401,17 @@ class LSTMPGradKernel : public framework::OpKernel { Tensor cur_proj = batch_proj.Slice(bstart, bend); Tensor proj_g = batch_proj_g.Slice(bstart, bend); + + if (proj_clip && proj_clip > 0.0) { + T* dx_data = proj_g.data(); + T* x_data = cur_proj.data(); + int64_t numel = proj_g.numel(); + Transform trans; + trans(ctx.template device_context(), dx_data, + dx_data + numel, x_data, dx_data, + _ClipGradFunctor(-1.0 * proj_clip, proj_clip)); + } + if (proj_act != math::detail::ActivationType::kIdentity) { auto cur_proj_dev = EigenMatrix::From(cur_proj); auto proj_g_dev = EigenMatrix::From(proj_g); @@ -405,9 +454,14 @@ class LSTMPGradKernel : public framework::OpKernel { } int cur_batch_size = bend - bstart; + // lstmp_value.output_value not used in bp, set to null + // lstmp_grad.state_active_grad not used in bp, set to null + lstmp_value.output_value = nullptr; + lstmp_grad.state_active_grad = nullptr; + math::LstmUnitGradFunctor::compute( device_ctx, lstmp_value, lstmp_grad, frame_size, cur_batch_size, - gate_act, cell_act, cand_act); + cell_clip, gate_act, cell_act, cand_act); if (n > 0) { int pre_h_start = static_cast(batch_starts[n - 1]); @@ -426,31 +480,14 @@ class LSTMPGradKernel : public framework::OpKernel { ReorderInitState(device_ctx, *h0, order, &ordered_h0, true); if (weight_g) { - blas.MatMul(*ordered_proj0, true, gate_g, false, - static_cast(1.0), weight_g, static_cast(1.0)); + blas.MatMul(ordered_h0, true, gate_g, false, static_cast(1.0), + weight_g, static_cast(1.0)); } } if (h0 && (h0_g || proj_weight_g)) { ordered_h0_g.mutable_data(h0_g->dims(), ctx.GetPlace()); - Tensor proj0_g; - proj0_g.Resize({in_dims[0], proj_weight->dims()[1]}); - proj0_g.mutable_data(ctx.GetPlace()); blas.MatMul(gate_g, false, *weight, true, static_cast(1.0), - &proj0_g, static_cast(0.0)); - if (proj_act != math::detail::ActivationType::kIdentity) { - auto proj0_dev = EigenMatrix::From(*ordered_proj0); - auto proj0_g_dev = EigenMatrix::From(proj0_g); - ActGradCompute(cell_act, place, proj0_dev, proj0_dev, proj0_g_dev, - proj0_g_dev); - } - if (h0_g) { - blas.MatMul(proj0_g, false, *proj_weight, true, static_cast(1.0), - &ordered_h0_g, static_cast(0.0)); - } - if (proj_weight_g) { - blas.MatMul(ordered_h0, true, proj0_g, false, static_cast(1.0), - proj_weight_g, static_cast(1.0)); - } + &ordered_h0_g, static_cast(0.0)); } } } diff --git a/paddle/fluid/operators/math/CMakeLists.txt b/paddle/fluid/operators/math/CMakeLists.txt index 4b6eef18d8b967af5f3a5df0dee750620e7e412a..d4837696241b8c4e3cca4f2afe872c6be559853c 100644 --- a/paddle/fluid/operators/math/CMakeLists.txt +++ b/paddle/fluid/operators/math/CMakeLists.txt @@ -39,6 +39,7 @@ math_library(cross_entropy) math_library(cos_sim_functor) math_library(depthwise_conv DEPS cub) math_library(im2col) +math_library(sample_prob) math_library(sampler) math_library(gru_compute DEPS activation_functions math_function) diff --git a/paddle/fluid/operators/math/beam_search.cu b/paddle/fluid/operators/math/beam_search.cu index 61d021ef627f1ccd90b992c2078a7f3ca879422d..d66778a6fe05c0460c805581ee6ffd6d5e9d746e 100644 --- a/paddle/fluid/operators/math/beam_search.cu +++ b/paddle/fluid/operators/math/beam_search.cu @@ -119,6 +119,18 @@ __device__ __forceinline__ int SelectTopBeam( __syncthreads(); } + if ((num_used_threads & 0x1) != 0) { + // If num_used_threads is a odd number, merge local top_beam of thread 0 + // and num_used_threads - 1 + if (tid_of_seq == 0) { + int index_in_sh = (num_used_threads - 1 + tid) * beam_size; + for (int i = 0; i < beam_size; i++) { + Insert(top_beam_local, top_beam[index_in_sh], beam_size); + index_in_sh++; + } + } + } + num_used_threads = num_used_threads >> 1; if (tid_of_seq < num_used_threads) { int index_in_sh = (num_used_threads + tid) * beam_size; diff --git a/paddle/fluid/operators/math/blas.h b/paddle/fluid/operators/math/blas.h index f67f57827bc03e134bf87edd5bf033adb5098916..ce8109f64d62b0d412419107881952f1b4ffc75e 100644 --- a/paddle/fluid/operators/math/blas.h +++ b/paddle/fluid/operators/math/blas.h @@ -184,6 +184,9 @@ class Blas { template void VINV(int n, const T* a, T* y) const; + template + void VMERF(int n, const T* a, T* y, int64_t mode) const; + private: const DeviceContext& context_; }; @@ -290,6 +293,11 @@ class BlasT : private Blas { Base()->template VINV(args...); } + template + void VMERF(ARGS... args) const { + Base()->template VMERF(args...); + } + private: const Blas* Base() const { return static_cast*>(this); diff --git a/paddle/fluid/operators/math/blas_impl.h b/paddle/fluid/operators/math/blas_impl.h index 972366bc093f4b7f0a090cf31213f75ccd89fd82..ba995dabecbfab8c4952bb7efeaa381f8078821a 100644 --- a/paddle/fluid/operators/math/blas_impl.h +++ b/paddle/fluid/operators/math/blas_impl.h @@ -123,6 +123,11 @@ struct CBlas { static void VINV(ARGS... args) { platform::dynload::vsInv(args...); } + + template + static void VMERF(ARGS... args) { + platform::dynload::vmsErf(args...); + } }; template <> @@ -223,6 +228,11 @@ struct CBlas { static void VINV(ARGS... args) { platform::dynload::vdInv(args...); } + + template + static void VMERF(ARGS... args) { + platform::dynload::vmdErf(args...); + } }; #else @@ -625,6 +635,19 @@ void Blas::VINV(int n, const T *a, T *y) const { #endif } +template <> +template +void Blas::VMERF(int n, const T *a, T *y, + int64_t mode) const { +#ifdef PADDLE_WITH_MKLML + CBlas::VMERF(n, a, y, mode); +#else + for (int i = 0; i < n; ++i) { + y[i] = std::erf(a[i]); + } +#endif +} + } // namespace math } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/math/detail/lstm_cpu_kernel.h b/paddle/fluid/operators/math/detail/lstm_cpu_kernel.h index 2e3779ff0845294e71f27801049c010e0a585e6b..ad79c58063a8a12c703979fe32a8e671a5ade857 100644 --- a/paddle/fluid/operators/math/detail/lstm_cpu_kernel.h +++ b/paddle/fluid/operators/math/detail/lstm_cpu_kernel.h @@ -32,7 +32,8 @@ namespace detail { template void naive_lstm_forward_one_sequence(Op op, LstmMetaValue value, - int frame_size, ActivationType active_node, + int frame_size, T cell_clip, + ActivationType active_node, ActivationType active_gate, ActivationType active_state) { T r_value_in; @@ -67,7 +68,7 @@ void naive_lstm_forward_one_sequence(Op op, LstmMetaValue value, op(&r_value_in, &r_value_ig, &r_value_fg, &r_value_og, &r_prev_state, &r_state, &r_state_atv, &r_out, &r_checkI, &r_checkF, &r_checkO, - active_node, active_gate, active_state); + &cell_clip, active_node, active_gate, active_state); value_in[i] = r_value_in; value_ig[i] = r_value_ig; @@ -82,7 +83,7 @@ void naive_lstm_forward_one_sequence(Op op, LstmMetaValue value, template void naive_lstm_backward_one_sequence(Op op, LstmMetaValue value, LstmMetaGrad grad, int frame_size, - ActivationType active_node, + T cell_clip, ActivationType active_node, ActivationType active_gate, ActivationType active_state) { T r_value_in; @@ -135,7 +136,7 @@ void naive_lstm_backward_one_sequence(Op op, LstmMetaValue value, &r_grad_ig, &r_grad_fg, &r_grad_og, &r_prev_state, &r_prev_state_grad, &r_state, &r_state_grad, &r_state_atv, &r_output_grad, &r_checkI, &r_checkF, &r_checkO, &r_checkIGrad, &r_checkFGrad, &r_checkOGrad, - active_node, active_gate, active_state); + &cell_clip, active_node, active_gate, active_state); grad_in[i] = r_grad_in; grad_ig[i] = r_grad_ig; @@ -154,7 +155,8 @@ void naive_lstm_backward_one_sequence(Op op, LstmMetaValue value, template void avx_lstm_forward_one_sequence(Op op, LstmMetaValue value, - int frame_size, ActivationType active_node, + int frame_size, T cell_clip, + ActivationType active_node, ActivationType active_gate, ActivationType active_state) { #ifdef __AVX__ @@ -194,7 +196,7 @@ void avx_lstm_forward_one_sequence(Op op, LstmMetaValue value, op(&r_value_in, &r_value_ig, &r_value_fg, &r_value_og, &r_prev_state, &r_state, &r_state_atv, &r_out, &r_checkI, &r_checkF, &r_checkO, - active_node, active_gate, active_state); + &cell_clip, active_node, active_gate, active_state); value_in[i] = r_value_in; value_ig[i] = r_value_ig; @@ -210,7 +212,7 @@ void avx_lstm_forward_one_sequence(Op op, LstmMetaValue value, template void avx_lstm_backward_one_sequence(Op op, LstmMetaValue value, LstmMetaGrad grad, int frame_size, - ActivationType active_node, + T cell_clip, ActivationType active_node, ActivationType active_gate, ActivationType active_state) { #ifdef __AVX__ @@ -268,7 +270,7 @@ void avx_lstm_backward_one_sequence(Op op, LstmMetaValue value, &r_grad_ig, &r_grad_fg, &r_grad_og, &r_prev_state, &r_prev_state_grad, &r_state, &r_state_grad, &r_state_atv, &r_output_grad, &r_checkI, &r_checkF, &r_checkO, &r_checkIGrad, &r_checkFGrad, &r_checkOGrad, - active_node, active_gate, active_state); + &cell_clip, active_node, active_gate, active_state); grad_in[i] = r_grad_in; grad_ig[i] = r_grad_ig; @@ -292,27 +294,27 @@ void avx_lstm_backward_one_sequence(Op op, LstmMetaValue value, template void cpu_lstm_forward(Op op, LstmMetaValue value, int frame_size, - ActivationType active_node, ActivationType active_gate, - ActivationType active_state) { + T cell_clip, ActivationType active_node, + ActivationType active_gate, ActivationType active_state) { if (Op::avx && !(frame_size & (8 - 1)) && (std::is_same::value)) { - avx_lstm_forward_one_sequence(op, value, frame_size, active_node, - active_gate, active_state); + avx_lstm_forward_one_sequence(op, value, frame_size, cell_clip, + active_node, active_gate, active_state); } else { - naive_lstm_forward_one_sequence(op, value, frame_size, active_node, - active_gate, active_state); + naive_lstm_forward_one_sequence(op, value, frame_size, cell_clip, + active_node, active_gate, active_state); } } template void cpu_lstm_backward(Op op, LstmMetaValue value, LstmMetaGrad grad, - int frame_size, ActivationType active_node, + int frame_size, T cell_clip, ActivationType active_node, ActivationType active_gate, ActivationType active_state) { if (Op::avx && !(frame_size & (8 - 1)) && (std::is_same::value)) { - avx_lstm_backward_one_sequence(op, value, grad, frame_size, active_node, - active_gate, active_state); + avx_lstm_backward_one_sequence(op, value, grad, frame_size, cell_clip, + active_node, active_gate, active_state); } else { - naive_lstm_backward_one_sequence(op, value, grad, frame_size, + naive_lstm_backward_one_sequence(op, value, grad, frame_size, cell_clip, active_node, active_gate, active_state); } } diff --git a/paddle/fluid/operators/math/detail/lstm_gpu_kernel.h b/paddle/fluid/operators/math/detail/lstm_gpu_kernel.h index 2aecb69237fdf344ebc0bfe72d9c7c147f06358d..e0ca9e7f5b2f4a8bb837768d645b5103aa3e6760 100644 --- a/paddle/fluid/operators/math/detail/lstm_gpu_kernel.h +++ b/paddle/fluid/operators/math/detail/lstm_gpu_kernel.h @@ -31,7 +31,8 @@ namespace detail { */ template __global__ void KeLstmForward(Op op, LstmMetaValue value, int frame_size, - int batch_size, ActivationType active_node, + int batch_size, T cell_clip, + ActivationType active_node, ActivationType active_gate, ActivationType active_state) { const int frame_idx = blockIdx.x * blockDim.x + threadIdx.x; @@ -72,7 +73,7 @@ __global__ void KeLstmForward(Op op, LstmMetaValue value, int frame_size, op(&r_value_in, &r_value_ig, &r_value_fg, &r_value_og, &r_prev_state, &r_state, &r_state_atv, &r_out, &r_checkI, &r_checkF, &r_checkO, - active_node, active_gate, active_state); + &cell_clip, active_node, active_gate, active_state); value.gate_value[frame_idx] = r_value_in; value.gate_value[frame_idx + frame_size] = r_value_ig; @@ -91,7 +92,8 @@ __global__ void KeLstmForward(Op op, LstmMetaValue value, int frame_size, template __global__ void KeLstmBackward(Op op, LstmMetaValue value, LstmMetaGrad grad, int frame_size, - int batch_size, ActivationType active_node, + int batch_size, T cell_clip, + ActivationType active_node, ActivationType active_gate, ActivationType active_state) { const int frame_idx = blockIdx.x * blockDim.x + threadIdx.x; @@ -148,8 +150,8 @@ __global__ void KeLstmBackward(Op op, LstmMetaValue value, op(&r_value_in, &r_value_ig, &r_value_fg, &r_value_og, &r_grad_in, &r_grad_ig, &r_grad_fg, &r_grad_og, &r_prev_state, &r_prev_state_grad, &r_state, &r_state_grad, &r_state_atv, &r_output_grad, &r_checkI, &r_checkF, - &r_checkO, &r_checkIGrad, &r_checkFGrad, &r_checkOGrad, active_node, - active_gate, active_state); + &r_checkO, &r_checkIGrad, &r_checkFGrad, &r_checkOGrad, &cell_clip, + active_node, active_gate, active_state); grad.gate_grad[frame_idx] = r_grad_in; grad.gate_grad[frame_idx + frame_size] = r_grad_ig; @@ -185,8 +187,8 @@ __global__ void KeLstmBackward(Op op, LstmMetaValue value, template void gpu_lstm_forward(const platform::DeviceContext& context, Op op, LstmMetaValue value, int frame_size, int batch_size, - ActivationType active_node, ActivationType active_gate, - ActivationType active_state) { + T cell_clip, ActivationType active_node, + ActivationType active_gate, ActivationType active_state) { dim3 threads; dim3 grid; if (batch_size == 1) { @@ -205,12 +207,12 @@ void gpu_lstm_forward(const platform::DeviceContext& context, Op op, if (batch_size == 1) { KeLstmForward<<>>( - op, value, frame_size, batch_size, active_node, active_gate, + op, value, frame_size, batch_size, cell_clip, active_node, active_gate, active_state); } else { KeLstmForward<<>>( - op, value, frame_size, batch_size, active_node, active_gate, + op, value, frame_size, batch_size, cell_clip, active_node, active_gate, active_state); } } @@ -218,7 +220,7 @@ void gpu_lstm_forward(const platform::DeviceContext& context, Op op, template void gpu_lstm_backward(const platform::DeviceContext& context, Op op, LstmMetaValue value, LstmMetaGrad grad, - int frame_size, int batch_size, + int frame_size, int batch_size, T cell_clip, ActivationType active_node, ActivationType active_gate, ActivationType active_state) { dim3 threads; @@ -239,13 +241,13 @@ void gpu_lstm_backward(const platform::DeviceContext& context, Op op, if (batch_size == 1) { KeLstmBackward<<>>( - op, value, grad, frame_size, batch_size, active_node, active_gate, - active_state); + op, value, grad, frame_size, batch_size, cell_clip, active_node, + active_gate, active_state); } else { KeLstmBackward<<>>( - op, value, grad, frame_size, batch_size, active_node, active_gate, - active_state); + op, value, grad, frame_size, batch_size, cell_clip, active_node, + active_gate, active_state); } } diff --git a/paddle/fluid/operators/math/detail/lstm_kernel.h b/paddle/fluid/operators/math/detail/lstm_kernel.h index cbe73d62938d7c4c03a2c8731665260624417fd7..8149686c97a030b91e0c4de708b9abf07f83203d 100644 --- a/paddle/fluid/operators/math/detail/lstm_kernel.h +++ b/paddle/fluid/operators/math/detail/lstm_kernel.h @@ -29,7 +29,7 @@ class lstm { public: HOSTDEVICE void operator()(T *value_in, T *value_ig, T *value_fg, T *value_og, T *prev_state, T *state, T *state_atv, T *output, - T *checkI, T *checkF, T *checkO, + T *checkI, T *checkF, T *checkO, T *cell_clip, ActivationType active_node, ActivationType active_gate, ActivationType active_state) { @@ -37,6 +37,15 @@ class lstm { *value_ig = activation(*value_ig + (*prev_state) * (*checkI), active_gate); *value_fg = activation(*value_fg + (*prev_state) * (*checkF), active_gate); *state = (*value_in) * (*value_ig) + (*prev_state) * (*value_fg); + + if (*cell_clip > 0.0) { + if (*state < -1.0 * (*cell_clip)) { + *state = -1.0 * (*cell_clip); + } + if (*state > *cell_clip) { + *state = *cell_clip; + } + } *value_og = activation(*value_og + (*state) * (*checkO), active_gate); *state_atv = activation(*state, active_state); *output = (*value_og) * (*state_atv); @@ -52,7 +61,7 @@ class lstm { __m256 *value_fg, __m256 *value_og, __m256 *prev_state, __m256 *state, __m256 *state_atv, __m256 *output, __m256 *checkI, - __m256 *checkF, __m256 *checkO, + __m256 *checkF, __m256 *checkO, T *cell_clip, ActivationType active_node, ActivationType active_gate, ActivationType active_state) { @@ -65,6 +74,13 @@ class lstm { active_gate); *state = _mm256_add_ps(_mm256_mul_ps(*value_in, *value_ig), _mm256_mul_ps(*prev_state, *value_fg)); + + if (*cell_clip > 0.0f) { + __m256 min = _mm256_set1_ps(0.0f - *cell_clip); + __m256 max = _mm256_set1_ps(*cell_clip); + *state = _mm256_min_ps(max, *state); + *state = _mm256_max_ps(min, *state); + } *value_og = activation( _mm256_add_ps(*value_og, _mm256_mul_ps(*state, *checkO)), active_gate); *state_atv = activation(*state, active_state); @@ -86,15 +102,26 @@ class lstm { T *prev_state, T *prev_state_grad, T *state, T *state_grad, T *state_atv, T *output_grad, T *checkI, T *checkF, T *checkO, T *checkIGrad, - T *checkFGrad, T *checkOGrad, + T *checkFGrad, T *checkOGrad, T *cell_clip, ActivationType active_node, ActivationType active_gate, ActivationType active_state) { *grad_og = activation((*output_grad) * (*state_atv), *value_og, active_gate); - *state_grad += - activation((*output_grad) * (*value_og), *state_atv, active_state) + - (*grad_og) * (*checkO); + if (*cell_clip > 0.0f) { + if (*state >= (*cell_clip) || *state <= (0.0f - (*cell_clip))) { + *state_grad = 0.0f; + } else { + *state_grad += + activation((*output_grad) * (*value_og), *state_atv, active_state) + + (*grad_og) * (*checkO); + } + } else { + *state_grad += + activation((*output_grad) * (*value_og), *state_atv, active_state) + + (*grad_og) * (*checkO); + } + *grad_in = activation((*state_grad) * (*value_ig), *value_in, active_node); *grad_ig = activation((*state_grad) * (*value_in), *value_ig, active_gate); *grad_fg = @@ -117,15 +144,24 @@ class lstm { __m256 *prev_state, __m256 *prev_state_grad, __m256 *state, __m256 *state_grad, __m256 *state_atv, __m256 *output_grad, __m256 *checkI, __m256 *checkF, __m256 *checkO, __m256 *checkIGrad, - __m256 *checkFGrad, __m256 *checkOGrad, ActivationType active_node, - ActivationType active_gate, ActivationType active_state) { + __m256 *checkFGrad, __m256 *checkOGrad, T *cell_clip, + ActivationType active_node, ActivationType active_gate, + ActivationType active_state) { *grad_og = activation(_mm256_mul_ps(*output_grad, *state_atv), *value_og, active_gate); - *state_grad = - _mm256_add_ps(activation(_mm256_mul_ps(*output_grad, *value_og), - *state_atv, active_state), - *state_grad); - *state_grad = _mm256_add_ps(_mm256_mul_ps(*grad_og, *checkO), *state_grad); + if (*cell_clip > 0.0f) { + T *state_ = reinterpret_cast(state); + if (*state_ >= (*cell_clip) || *state_ <= (0.0f - (*cell_clip))) { + *state_grad = _mm256_set1_ps(0.0f); + } else { + *state_grad = + _mm256_add_ps(activation(_mm256_mul_ps(*output_grad, *value_og), + *state_atv, active_state), + *state_grad); + *state_grad = + _mm256_add_ps(_mm256_mul_ps(*grad_og, *checkO), *state_grad); + } + } *grad_in = activation(_mm256_mul_ps(*state_grad, *value_ig), *value_in, active_node); *grad_ig = activation(_mm256_mul_ps(*state_grad, *value_in), *value_ig, diff --git a/paddle/fluid/operators/math/lstm_compute.cc b/paddle/fluid/operators/math/lstm_compute.cc index b6882b4fd8e6db8592a282410888d5625bae742a..94bbcbb50670d9f0b11b77cf6a54a99c227521bf 100644 --- a/paddle/fluid/operators/math/lstm_compute.cc +++ b/paddle/fluid/operators/math/lstm_compute.cc @@ -24,12 +24,12 @@ template struct LstmUnitFunctor { static void compute(const platform::CPUDeviceContext& context, LstmMetaValue value, int frame_size, int batch_size, - const detail::ActivationType& gate_act, + T cell_clip, const detail::ActivationType& gate_act, const detail::ActivationType& cell_act, const detail::ActivationType& cand_act) { for (int b = 0; b < batch_size; b++) { detail::cpu_lstm_forward(detail::forward::lstm(), value, frame_size, - cand_act, gate_act, cell_act); + cell_clip, cand_act, gate_act, cell_act); value.gate_value += frame_size * 4; value.state_value += frame_size; value.state_active_value += frame_size; @@ -45,13 +45,14 @@ template struct LstmUnitGradFunctor { static void compute(const platform::CPUDeviceContext& context, LstmMetaValue value, LstmMetaGrad grad, - int frame_size, int batch_size, + int frame_size, int batch_size, T cell_clip, const detail::ActivationType& gate_act, const detail::ActivationType& cell_act, const detail::ActivationType& cand_act) { for (int b = 0; b < batch_size; b++) { detail::cpu_lstm_backward(detail::backward::lstm(), value, grad, - frame_size, cand_act, gate_act, cell_act); + frame_size, cell_clip, cand_act, gate_act, + cell_act); value.gate_value += frame_size * 4; value.state_value += frame_size; diff --git a/paddle/fluid/operators/math/lstm_compute.cu b/paddle/fluid/operators/math/lstm_compute.cu index 1233000083d6efc31fcbc527e8e9efb83224b4e3..e7445d3d40ae92ff66e7d33a38bfdebfc8455f0a 100644 --- a/paddle/fluid/operators/math/lstm_compute.cu +++ b/paddle/fluid/operators/math/lstm_compute.cu @@ -24,12 +24,12 @@ template struct LstmUnitFunctor { static void compute(const platform::CUDADeviceContext& context, LstmMetaValue value, int frame_size, int batch_size, - const detail::ActivationType& gate_act, + T cell_clip, const detail::ActivationType& gate_act, const detail::ActivationType& cell_act, const detail::ActivationType& cand_act) { detail::gpu_lstm_forward(context, detail::forward::lstm(), value, - frame_size, batch_size, cand_act, gate_act, - cell_act); + frame_size, batch_size, cell_clip, cand_act, + gate_act, cell_act); } }; @@ -37,13 +37,13 @@ template struct LstmUnitGradFunctor { static void compute(const platform::CUDADeviceContext& context, LstmMetaValue value, LstmMetaGrad grad, - int frame_size, int batch_size, + int frame_size, int batch_size, T cell_clip, const detail::ActivationType& gate_act, const detail::ActivationType& cell_act, const detail::ActivationType& cand_act) { detail::gpu_lstm_backward(context, detail::backward::lstm(), value, grad, - frame_size, batch_size, cand_act, gate_act, - cell_act); + frame_size, batch_size, cell_clip, cand_act, + gate_act, cell_act); } }; diff --git a/paddle/fluid/operators/math/lstm_compute.h b/paddle/fluid/operators/math/lstm_compute.h index ca2f78e6f318ce39bd2272bbce20f6a6f98fe430..80af5639387aaf6a983365e13c3478353c27a617 100644 --- a/paddle/fluid/operators/math/lstm_compute.h +++ b/paddle/fluid/operators/math/lstm_compute.h @@ -50,7 +50,7 @@ template class LstmUnitFunctor { public: static void compute(const DeviceContext &context, LstmMetaValue value, - int frame_size, int batch_size, + int frame_size, int batch_size, T cell_clip, const detail::ActivationType &gate_act, const detail::ActivationType &cell_act, const detail::ActivationType &cand_act); @@ -61,7 +61,7 @@ class LstmUnitGradFunctor { public: static void compute(const DeviceContext &context, LstmMetaValue value, LstmMetaGrad grad, int frame_size, int batch_size, - const detail::ActivationType &gate_act, + T cell_clip, const detail::ActivationType &gate_act, const detail::ActivationType &cell_act, const detail::ActivationType &cand_act); }; diff --git a/paddle/fluid/operators/math/sample_prob.cc b/paddle/fluid/operators/math/sample_prob.cc new file mode 100644 index 0000000000000000000000000000000000000000..99aa318453eae161807353198a78e11085cd6237 --- /dev/null +++ b/paddle/fluid/operators/math/sample_prob.cc @@ -0,0 +1,26 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/math/sample_prob.h" + +namespace paddle { +namespace operators { +namespace math { + +template class SampleWithProb; +template class SampleWithProb; + +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/math/sample_prob.cu b/paddle/fluid/operators/math/sample_prob.cu new file mode 100644 index 0000000000000000000000000000000000000000..8f9391591560cc3f76ac67f43121c4b1cff90e12 --- /dev/null +++ b/paddle/fluid/operators/math/sample_prob.cu @@ -0,0 +1,161 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include +#include +#include + +#include "paddle/fluid/framework/ddim.h" +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/operators/math/sample_prob.h" +#include "paddle/fluid/operators/math/sampler.h" + +namespace paddle { +namespace operators { +namespace math { + +using Tensor = framework::Tensor; + +template +__device__ T gpu_adjust_prob(const T prob, const int num_samples, + const int num_tries) { + if (num_samples == num_tries) { + return prob * num_samples; + } else { + return -expm1(num_tries * log1p(-prob)); + } +} + +class GPULogUniformSampler { + public: + __device__ int64_t Sample(float random, const int range, + const float log_range) const; + __device__ float Probability(int64_t value, const float log_range) const; +}; + +__device__ int64_t GPULogUniformSampler::Sample(float random, const int range, + const float log_range) const { + // Got Log Uniform distribution from uniform distribution by + // inverse_transform_sampling method + const int64_t value = static_cast(exp(random * log_range)) - 1; + // Mathematically, value should be <= range_, but might not be due to some + // floating point roundoff, so we mod by range_. + return value % range; +} + +__device__ float GPULogUniformSampler::Probability( + int64_t value, const float log_range) const { + // Given f(x) = 1/[(x+1) * log_range_] + // The value's probability is integral of f(x) from value to (value + 1) + return (log((value + 2.0) / (value + 1.0))) / log_range; +} + +template +__global__ void SamplingCondidate( + const size_t n, const int num_tries, const int range, const float log_range, + const int num_true, const std::size_t num_samples, + const int64_t* label_data, int64_t* samples_data, T* probabilities_data) { + const int num_sampled_classes = num_true + num_samples; + + int idx = blockDim.x * blockIdx.x + threadIdx.x; + int step_size = 0; + GPULogUniformSampler sampler; + + for (; idx < n; idx += blockDim.x * gridDim.x) { + int col_idx = idx % num_sampled_classes; + int row_idx = idx / num_sampled_classes; + if (col_idx < num_true) { + samples_data[idx] = label_data[row_idx * num_true + col_idx]; + } else { + samples_data[idx] = samples_data[col_idx]; + } + probabilities_data[idx] = sampler.Probability(samples_data[idx], log_range); + probabilities_data[idx] = + gpu_adjust_prob(probabilities_data[idx], num_samples, num_tries); + } +} + +template +int UniqSampler(const Sampler& sampler, const std::size_t num_samples, + int64_t* samples_data) { + // sample num_samles unique samples for an example, note that they are not + // all negative samples + std::unordered_set tmp_samples; + tmp_samples.clear(); + int num_tries = 0; + int j = 0; + while (j < num_samples) { + ++num_tries; + auto v = sampler.Sample(); + auto insert_ok = tmp_samples.insert(v).second; + if (!insert_ok) { + continue; + } + samples_data[j] = v; + ++j; + } + return num_tries; +} + +template +void GPUSampleWithProb::operator()( + const platform::CUDADeviceContext& context, const int seed, + const int dict_size, const bool uniq, const std::size_t num_samples, + const Tensor* L, Tensor* S, Tensor* P) { + // UNDERSTAND: dimension issues + const auto lbl_dim = L->dims(); + const int batch_size = lbl_dim[0]; + const int num_true = lbl_dim[1]; + const int num_sampled_classes = num_true + num_samples; + framework::DDim ret_dim{batch_size, num_sampled_classes}; + + // UNDERSTAND: raw data view + const int64_t* label_data = L->data(); + int64_t* samples_data = S->data(); + T* probabilities_data = P->data(); + + int s_size = num_samples; + framework::DDim s_dim{s_size}; + Tensor s; + int64_t* s_data = s.mutable_data(s_dim, platform::CPUPlace()); + + math::LogUniformSampler sampler(dict_size, seed); + + int range = dict_size; + float log_range = log(range + 1); + + int num_tries = UniqSampler(sampler, num_samples, s_data); + VLOG(1) << "num_tries: " << num_tries; + PADDLE_ENFORCE(cudaMemcpy(samples_data + num_true, s_data, + sizeof(int64_t) * num_samples, + cudaMemcpyHostToDevice)); + + int threads = 512; + const size_t size = batch_size * num_sampled_classes; + int grid = (batch_size * num_sampled_classes + threads - 1) / threads; + SamplingCondidate<<>>( + size, num_tries, range, log_range, num_true, num_samples, label_data, + samples_data, probabilities_data); +} + +template class GPUSampleWithProb; +template class GPUSampleWithProb; +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/math/sample_prob.h b/paddle/fluid/operators/math/sample_prob.h new file mode 100644 index 0000000000000000000000000000000000000000..e5a6d84cb2b0527c606e62a19ef02d669945ecb1 --- /dev/null +++ b/paddle/fluid/operators/math/sample_prob.h @@ -0,0 +1,118 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include +#include +#include "paddle/fluid/framework/ddim.h" +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/operators/math/sampler.h" + +namespace paddle { +namespace operators { +namespace math { + +using Tensor = framework::Tensor; + +/* UNDERSTAND: utility function to adjust probability for unique sampling, +return whatever as it is if not using unique samping */ +template +static T adjust_prob(const T prob, const int num_samples, const int num_tries) { + if (num_samples == num_tries) { + return prob * num_samples; + } else { + return -expm1(num_tries * log1p(-prob)); + } +} + +template +class SampleWithProb { + public: + void operator()(const DeviceContext& context, const Sampler& sampler, + const std::size_t num_samples, const Tensor* L, Tensor* S, + Tensor* P) { + // UNDERSTAND: dimension issues + const auto lbl_dim = L->dims(); + const int batch_size = lbl_dim[0]; + const int num_true = lbl_dim[1]; + const int num_sampled_classes = num_true + num_samples; + framework::DDim ret_dim{batch_size, num_sampled_classes}; + + // UNDERSTAND: raw data view + const int64_t* label_data = L->data(); + int64_t* samples_data = + S->mutable_data(ret_dim, context.GetPlace()); + T* probabilities_data = P->mutable_data(ret_dim, context.GetPlace()); + + // temp sets for unique sampling + std::unordered_set tmp_samples; + int j = 0; // column index + // add true labels, not that efficient + while (j < num_true) { + for (int i = 0; i < batch_size; ++i) { + auto samples_index = i * num_sampled_classes + j; + auto v = label_data[i * num_true + j]; + samples_data[samples_index] = v; + probabilities_data[samples_index] = sampler.Probability(v); + } + ++j; + } + + // sample num_samles unique samples for an example, note that they are not + // all negative samples + tmp_samples.clear(); + int num_tries = 0; + while (j < num_sampled_classes) { + ++num_tries; + auto v = sampler.Sample(); + auto insert_ok = tmp_samples.insert(v).second; + if (!insert_ok) { + continue; + } + auto p = sampler.Probability(v); + for (int i = 0; i < batch_size; ++i) { + auto samples_index = i * num_sampled_classes + j; + samples_data[samples_index] = v; + probabilities_data[samples_index] = p; + } + ++j; + } + + // compute Q(y|x), because of unique sampling, probabilities need to be + // adjusted + for (int k = 0; k < num_sampled_classes; ++k) { + for (int i = 0; i < batch_size; ++i) { + auto samples_index = i * num_sampled_classes + k; + probabilities_data[samples_index] = adjust_prob( + probabilities_data[samples_index], num_samples, num_tries); + } + } + } +}; + +#ifdef PADDLE_WITH_CUDA +template +class GPUSampleWithProb { + public: + void operator()(const platform::CUDADeviceContext& context, const int seed, + const int dict_size, const bool uniq, + const std::size_t num_samples, const Tensor* L, Tensor* S, + Tensor* P); +}; +#endif +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc index e16b6f78d16ce29cc493c4c795c7fe97a4bf2550..43559940d925e6fff29f0c5c66ec1a3dc717aaf4 100644 --- a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc @@ -52,11 +52,6 @@ class MKLDNNActivationKernel "Wrong layout/format set for Input x tensor"); Functor functor; - - auto attrs = functor.GetAttrs(); - for (auto &attr : attrs) { - *attr.second = ctx.Attr(attr.first); - } functor(ctx); } }; @@ -76,11 +71,6 @@ class MKLDNNActivationGradKernel "is_test attribute should be set to False in training phase."); Functor functor; - - auto attrs = functor.GetAttrs(); - for (auto &attr : attrs) { - *attr.second = ctx.Attr(attr.first); - } functor(ctx); } }; @@ -106,8 +96,7 @@ void eltwise_forward(const framework::ExecutionContext &ctx, std::vector src_tz = framework::vectorize2int(x->dims()); - auto src_format = - src_tz.size() == 2 ? mkldnn::memory::format::nc : x->format(); + auto src_format = x->format(); const std::string key = gethash(src_tz, algorithm); const std::string key_src_data = @@ -137,10 +126,8 @@ void eltwise_forward(const framework::ExecutionContext &ctx, if (p_fwd == nullptr) { // create mkldnn memory for input X - auto src_md = platform::MKLDNNMemDesc( - src_tz, platform::MKLDNNGetDataType(), src_format); auto src_memory = std::shared_ptr( - new memory({src_md, mkldnn_engine}, to_void_cast(x_data))); + new memory(x->get_mkldnn_prim_desc(), to_void_cast(x_data))); // save src_memory to be referred in backward path dev_ctx.SetBlob(key_src_mem, src_memory); @@ -187,8 +174,7 @@ void eltwise_forward(const framework::ExecutionContext &ctx, pipeline.push_back(*p_fwd); stream(stream::kind::eager).submit(pipeline).wait(); - y->set_layout(DataLayout::kMKLDNN); - y->set_format(GetMKLDNNFormat(*dst_memory)); + y->set_mkldnn_prim_desc(dst_memory->get_primitive_desc()); } template @@ -206,9 +192,6 @@ void eltwise_grad(const framework::ExecutionContext &ctx, std::vector diff_dst_tz = framework::vectorize2int(diff_y->dims()); - auto diff_y_format = - diff_dst_tz.size() == 2 ? mkldnn::memory::format::nc : diff_y->format(); - const std::string key = gethash(diff_dst_tz, algorithm); const std::string key_src_data = key + ctx.op().Input("Out") + "@eltwise_fwd_src_data"; @@ -220,8 +203,8 @@ void eltwise_grad(const framework::ExecutionContext &ctx, key + std::to_string(*p_src_layout) + "@eltwise_fwd_src_mem"; const std::string key_fwd_pd = key + std::to_string(*p_src_layout) + "@eltwise_fwd_pd"; - const std::string key_with_layouts = - key + std::to_string(*p_src_layout) + "-" + std::to_string(diff_y_format); + const std::string key_with_layouts = key + std::to_string(*p_src_layout) + + "-" + std::to_string(diff_y->format()); const std::string key_diff_src_mem = key_with_layouts + "@eltwise_diff_src_mem"; const std::string key_diff_dst_mem = @@ -235,7 +218,7 @@ void eltwise_grad(const framework::ExecutionContext &ctx, std::static_pointer_cast(dev_ctx.GetBlob(key_src_mem)); PADDLE_ENFORCE(src_memory != nullptr, "Fail to find src_memory in device context"); - src_memory->set_data_handle(*p_src_data.get()); + src_memory->set_data_handle(*p_src_data); std::shared_ptr diff_src_memory; @@ -244,10 +227,8 @@ void eltwise_grad(const framework::ExecutionContext &ctx, if (p_grad == nullptr) { // create mkldnn memory for input diff_y - auto diff_dst_md = platform::MKLDNNMemDesc( - diff_dst_tz, platform::MKLDNNGetDataType(), diff_y_format); auto diff_dst_memory = std::shared_ptr( - new memory({diff_dst_md, mkldnn_engine}, to_void_cast(diff_y_data))); + new memory(diff_y->get_mkldnn_prim_desc(), to_void_cast(diff_y_data))); dev_ctx.SetBlob(key_diff_dst_mem, diff_dst_memory); // retrieve eltwise primitive desc from device context @@ -291,8 +272,7 @@ void eltwise_grad(const framework::ExecutionContext &ctx, pipeline.push_back(*p_grad); stream(stream::kind::eager).submit(pipeline).wait(); - diff_x->set_layout(DataLayout::kMKLDNN); - diff_x->set_format(GetMKLDNNFormat(*diff_src_memory)); + diff_x->set_mkldnn_prim_desc(diff_src_memory->get_primitive_desc()); } template diff --git a/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc index bddca232e6c8a2a7fde998877006e37ee6d3d0dc..04e45d4853907bb7d6b5ce362892a2183fd4b60e 100644 --- a/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc @@ -206,17 +206,14 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel { if (fuse_with_relu) flags |= mkldnn::fuse_bn_relu; // create mkldnn memory from input x tensor - mkldnn::memory::format input_format = - platform::MKLDNNFormatForSize(src_tz.size(), x->format()); // keys for backward pass const std::string key = BatchNormMKLDNNHandler::GetHash( - src_tz, epsilon, flags, global_stats, input_format, + src_tz, epsilon, flags, global_stats, x->format(), ctx.op().Output("SavedMean")); const std::string key_batch_norm_fwd_pd = key + "@bn_fwd_pd"; - auto user_src_md = platform::MKLDNNMemDesc( - {src_tz}, platform::MKLDNNGetDataType(), input_format); + auto user_src_md = x->get_mkldnn_prim_desc().desc(); // create primitive descriptor for batch norm forward using bn_fwd_types = bn_type_traits; @@ -230,8 +227,8 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel { BatchNormMKLDNNHandler handler(batch_norm_fwd_pd, dev_ctx, mkldnn_engine, key); - auto src_memory = - handler.AcquireSrcMemory(user_src_md, to_void_cast(x_data)); + auto src_memory = handler.AcquireSrcMemory(x->get_mkldnn_prim_desc(), + to_void_cast(x_data)); // crate mkldnn memory for weights(scale/shift) auto scaleshift_memory = @@ -265,8 +262,7 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel { variance_memory, false); } - y->set_layout(DataLayout::kMKLDNN); - y->set_format(platform::GetMKLDNNFormat(*dst_memory)); + y->set_mkldnn_prim_desc(dst_memory->get_primitive_desc()); std::vector pipeline; pipeline.push_back(*batch_norm_p); @@ -336,9 +332,6 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel { using bn_bwd_types = bn_type_traits; - mkldnn::memory::format dst_format = - platform::MKLDNNFormatForSize(src_tz.size(), diff_y->format()); - mkldnn::memory::format input_format = platform::MKLDNNFormatForSize(src_tz.size(), x->format()); @@ -346,14 +339,14 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel { // keys from forward pass const std::string key = BatchNormMKLDNNHandler::GetHash( - src_tz, epsilon, flags, false, input_format, + src_tz, epsilon, flags, false, x->format(), ctx.op().Input("SavedMean")); const std::string key_batch_norm_fwd_pd = key + "@bn_fwd_pd"; // keys for primitives reuse const std::string key_with_hash = key + BatchNormMKLDNNHandler::GetHash(src_tz, epsilon, flags, false, - input_format); + x->format()); const std::string key_batch_norm_bwd_p = key_with_hash + "@batch_norm_bwd_p"; const std::string key_batch_norm_src_mem_p = @@ -373,9 +366,8 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel { primitive reorder_diff_dst; bool is_diff_dst_reordered = false; - auto user_diff_dst_memory = memory( - {{{diff_dst_tz}, memory::data_type::f32, dst_format}, mkldnn_engine}, - to_void_cast(diff_y_data)); + auto user_diff_dst_memory = + memory(diff_y->get_mkldnn_prim_desc(), to_void_cast(diff_y_data)); // MKLDNN requires a single piece of memory for scale and shift/bias data const size_t scaleshift_size = 2 * ic; @@ -459,10 +451,7 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel { dev_ctx.SetBlob(key_batch_norm_diff_dst_mem_p, diff_dst_memory); // set layout/format of output tensors - diff_x->set_layout(DataLayout::kMKLDNN); - diff_x->set_format((memory::format)diff_src_memory->get_primitive_desc() - .desc() - .data.format); + diff_x->set_mkldnn_prim_desc(diff_src_memory->get_primitive_desc()); } else { // primitives already exist UpdateMemoryData(dev_ctx, key_batch_norm_src_mem_p, to_void_cast(x_data)); @@ -487,10 +476,7 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel { } // set layout/format of output tensors - diff_x->set_layout(DataLayout::kMKLDNN); - diff_x->set_format((memory::format)diff_src_memory->get_primitive_desc() - .desc() - .data.format); + diff_x->set_mkldnn_prim_desc(diff_src_memory->get_primitive_desc()); } // execute optional reorder and batch_norm backward primitive diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc index 7ad674056f0d753d79408a11eff1aca47a84998a..54c6a71111a2cc2f9e5004922ae5d3541a9d0a70 100644 --- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc @@ -47,11 +47,6 @@ static memory::primitive_desc CreateMemPrimDesc(const Tensor& input, return mem_prim_desc; } -static mkldnn::memory::format GetDstMemFormat( - const concat::primitive_desc& concat_pd) { - return (memory::format)concat_pd.dst_primitive_desc().desc().data.format; -} - static platform::CPUPlace GetCpuPlace( const paddle::framework::ExecutionContext& ctx) { auto place = ctx.GetPlace(); @@ -139,8 +134,7 @@ class ConcatMKLDNNOpKernel : public paddle::framework::OpKernel { auto concat = prim_creator.CreateConcatPrimitive(concat_pd, output, place); stream(stream::kind::eager).submit({concat}).wait(); - output->set_layout(DataLayout::kMKLDNN); - output->set_format(GetDstMemFormat(concat_pd)); + output->set_mkldnn_prim_desc(concat_pd.dst_primitive_desc()); } }; } // namespace operators diff --git a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc index 0ce174654e85175f0b949f860a00afafc548ed3e..14ca3e8073b9512732876e512a30968b15884495 100644 --- a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc @@ -96,12 +96,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { auto* bias = ctx.HasInput("Bias") ? ctx.Input("Bias") : nullptr; auto* output = ctx.Output("Output"); - PADDLE_ENFORCE(input->layout() == DataLayout::kMKLDNN && - input->format() != memory::format::format_undef, - "Wrong layout/format set for Input tensor"); - PADDLE_ENFORCE(filter->layout() == DataLayout::kMKLDNN && - filter->format() != memory::format::format_undef, - "Wrong layout/format set for Filter tensor"); + PADDLE_ENFORCE(input->layout() == DataLayout::kMKLDNN); + PADDLE_ENFORCE(filter->layout() == DataLayout::kMKLDNN); PADDLE_ENFORCE(input->dims().size() == 4 || input->dims().size() == 5, "Input must be with 4 or 5 dimensions, i.e. NCHW or NCDHW"); PADDLE_ENFORCE(filter->dims().size() == 4 || filter->dims().size() == 5, @@ -148,14 +144,19 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { std::vector pipeline; - auto src_format = input->format(); - mkldnn::memory::format weights_format = - GetWeightsFormat(filter->format(), g, is_conv3d); - - auto user_src_md = platform::MKLDNNMemDesc( - {src_tz}, platform::MKLDNNGetDataType(), src_format); - auto user_weights_md = platform::MKLDNNMemDesc( - {weights_tz}, platform::MKLDNNGetDataType(), weights_format); + // For convolution with groups we need to recreate primitive descriptor + // as Paddle tensor is not having group dims while mkldnn treats + // group as another dimensions + mkldnn::memory::primitive_desc user_weights_mpd = + filter->get_mkldnn_prim_desc(); + if (g > 1) { + mkldnn::memory::format weights_format = + GetWeightsFormat(filter->format(), g, is_conv3d); + auto user_weights_md = platform::MKLDNNMemDesc( + {weights_tz}, platform::MKLDNNGetDataType(), weights_format); + user_weights_mpd = + mkldnn::memory::primitive_desc(user_weights_md, mkldnn_engine); + } /* create memory descriptor for convolution without specified format * ('any') which lets a primitive (convolution in this case) choose @@ -165,7 +166,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { auto chosen_memory_format = platform::data_format_to_memory_format(data_format); - weights_format = mkldnn::memory::format::any; + mkldnn::memory::format weights_format = mkldnn::memory::format::any; // Check the format for user's special output if (chosen_memory_format != mkldnn::memory::format::any) { if (is_conv3d) { @@ -205,10 +206,10 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { platform::ConvMKLDNNHandler handler(conv_pd, dev_ctx, mkldnn_engine, key); // create mkldnn memory from input tensors (data/weights) - auto user_src_memory_p = - handler.AcquireSrcMemory(user_src_md, to_void_cast(input_data)); + auto user_src_memory_p = handler.AcquireSrcMemory( + input->get_mkldnn_prim_desc(), to_void_cast(input_data)); auto user_weights_memory_p = handler.AcquireWeightsMemory( - user_weights_md, to_void_cast(filter_data)); + user_weights_mpd, to_void_cast(filter_data)); // create reorder primitive if the input format is not the preferred one auto src_memory_p = @@ -281,8 +282,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { pipeline.push_back(*conv_p); stream(stream::kind::eager).submit(pipeline).wait(); - output->set_layout(DataLayout::kMKLDNN); - output->set_format(GetMKLDNNFormat(*dst_memory_p)); + output->set_mkldnn_prim_desc(dst_memory_p->get_primitive_desc()); } void ComputeINT8(const paddle::framework::ExecutionContext& ctx) const { const bool is_test = ctx.Attr("is_test"); @@ -947,8 +947,8 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel { // push primitive to stream and wait until it's executed pipeline.push_back(*conv_bwd_weights_p); - filter_grad->set_layout(DataLayout::kMKLDNN); - filter_grad->set_format(GetMKLDNNFormat(*diff_weights_memory_p)); + auto filter_grad_mpd = diff_weights_memory_p->get_primitive_desc(); + filter_grad->set_mkldnn_prim_desc(filter_grad_mpd); } if (input_grad) { @@ -971,8 +971,7 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel { pipeline.push_back(*conv_bwd_data_p); - input_grad->set_layout(DataLayout::kMKLDNN); - input_grad->set_format(GetMKLDNNFormat(*diff_src_memory_p)); + input_grad->set_mkldnn_prim_desc(diff_src_memory_p->get_primitive_desc()); } stream(stream::kind::eager).submit(pipeline).wait(); } @@ -990,12 +989,12 @@ REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, ::paddle::platform::CPUPlace, U8, - ops::kConvMKLDNNFP32, + ops::kConvMKLDNNINT8, ops::ConvMKLDNNOpKernel); REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, ::paddle::platform::CPUPlace, S8, - ops::kConvMKLDNNFP32, + ops::kConvMKLDNNINT8, ops::ConvMKLDNNOpKernel); REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(conv2d_grad, MKLDNN, diff --git a/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc index 317d4cebe26b81ff03c212e6328233d5152ed1b4..79a0c5c7683d677daeb4feea10deab86407f944c 100644 --- a/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc @@ -221,8 +221,7 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel { pipeline.push_back(*conv_p); mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait(); - output->set_layout(DataLayout::kMKLDNN); - output->set_format(platform::GetMKLDNNFormat(*dst_memory_p)); + output->set_mkldnn_prim_desc(dst_memory_p->get_primitive_desc()); } private: diff --git a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc index 262b7408a7f5f65c4d97120914c16f38ce5fdbe7..accc9a9d71ffccf2812d57a7516eaf7e0f83275c 100644 --- a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc @@ -17,6 +17,7 @@ limitations under the License. */ #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/operators/dequantize_op.h" #include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/mkldnn_reuse.h" namespace paddle { namespace operators { @@ -30,6 +31,18 @@ using framework::DataLayout; using mkldnn::stream; using platform::GetMKLDNNFormat; +std::string CreateKey(const paddle::framework::ExecutionContext& ctx, + const mkldnn::memory::data_type& src_dt, + const std::vector& src_tz, const float scale_data) { + std::string key; + key.reserve(platform::MKLDNNHandler::MaxKeyLength); + platform::MKLDNNHandler::AppendKey(&key, std::to_string(src_dt)); + platform::MKLDNNHandler::AppendKeyDims(&key, src_tz); + platform::MKLDNNHandler::AppendKey(&key, std::to_string(scale_data)); + platform::MKLDNNHandler::AppendKey(&key, ctx.op().Output("Output")); + return key; +} + template class DeQuantOpKernel : public framework::OpKernel { public: @@ -51,31 +64,55 @@ class DeQuantOpKernel : public framework::OpKernel { mkldnn::memory::data_type src_dt = paddle::framework::ToMKLDNNDataType(input->type()); mkldnn::memory::format src_fmt = input->format(); + std::string key = CreateKey(ctx, src_dt, src_tz, reorder_scale[0]); + const std::string key_prim = key + "@reorder_p"; + const std::string key_src_mem = key + "@src_mem"; + const std::string key_dst_mem = key + "@dst_mem"; + + std::shared_ptr src_memory; + std::shared_ptr dst_memory; + std::shared_ptr reorder_p; + reorder_p = std::static_pointer_cast(dev_ctx.GetBlob(key_prim)); + + if (reorder_p == nullptr) { + mkldnn::primitive_attr attri; + int mask = 0; + attri.set_output_scales(mask, reorder_scale); + + auto src_md = platform::MKLDNNMemDesc({src_tz}, src_dt, src_fmt); + auto src_pd = mkldnn::memory::primitive_desc(src_md, engine); + src_memory = + std::make_shared(src_pd, to_void_cast(input_data)); + std::shared_ptr src_memory_p = + std::shared_ptr(new primitive::at(*src_memory)); + + auto dst_md = platform::MKLDNNMemDesc({dst_tz}, memory::data_type::f32, + memory::format::nchw); + auto dst_pd = mkldnn::memory::primitive_desc(dst_md, engine); + dst_memory = std::make_shared( + dst_pd, to_void_cast(output_data)); + + auto reorder_pd = std::shared_ptr( + new reorder::primitive_desc(src_pd, dst_pd, attri)); + reorder_p = std::shared_ptr( + new reorder(*reorder_pd, *src_memory_p, *dst_memory)); + dev_ctx.SetBlob(key_prim, reorder_p); + dev_ctx.SetBlob(key_src_mem, src_memory); + dev_ctx.SetBlob(key_dst_mem, dst_memory); + } else { + src_memory = std::static_pointer_cast( + dev_ctx.GetBlob(key_src_mem)); + src_memory->set_data_handle(to_void_cast(input_data)); + + dst_memory = std::static_pointer_cast( + dev_ctx.GetBlob(key_dst_mem)); + dst_memory->set_data_handle(output->mutable_data(ctx.GetPlace())); + } - mkldnn::primitive_attr attri; - int mask = 0; - attri.set_output_scales(mask, reorder_scale); - - auto src_md = platform::MKLDNNMemDesc({src_tz}, src_dt, src_fmt); - auto src_pd = mkldnn::memory::primitive_desc(src_md, engine); - auto src_memory = - std::make_shared(src_pd, to_void_cast(input_data)); - std::shared_ptr src_memory_p = - std::shared_ptr(new primitive::at(*src_memory)); - - auto dst_md = platform::MKLDNNMemDesc({dst_tz}, memory::data_type::f32, - memory::format::nchw); - auto dst_pd = mkldnn::memory::primitive_desc(dst_md, engine); - auto dst_memory = mkldnn::memory(dst_pd, to_void_cast(output_data)); - - auto reorder_pd = std::shared_ptr( - new reorder::primitive_desc(src_pd, dst_pd, attri)); - auto reorder_p = std::shared_ptr( - new reorder(*reorder_pd, *src_memory_p, dst_memory)); pipeline.push_back(*reorder_p); stream(stream::kind::eager).submit(pipeline).wait(); - output->set_format(GetMKLDNNFormat(dst_memory)); + output->set_format(GetMKLDNNFormat(*dst_memory)); } }; diff --git a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc index e595f1a627cfefbb91b070b898046cf135dc4988..3a926a716f54a094eba11d63c3b29de27dff274b 100644 --- a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc @@ -282,7 +282,7 @@ class FCMKLDNNGradOpKernel : public paddle::framework::OpKernel { ? mkldnn::inner_product_backward_weights::desc( src, diff_weights, bias, diff_dst) : mkldnn::inner_product_backward_weights::desc( - src, diff_weights, bias, diff_dst); + src, diff_weights, diff_dst); return mkldnn::inner_product_backward_weights::primitive_desc( bwd_weight_desc, engine, pd); diff --git a/paddle/fluid/operators/mkldnn/gaussian_random_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/gaussian_random_mkldnn_op.cc index 76b00b396c1349eff5db1059268e7cf280a8fc64..d01e8dbf4ce0c92bb81fc76df68d5424f9da0717 100644 --- a/paddle/fluid/operators/mkldnn/gaussian_random_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/gaussian_random_mkldnn_op.cc @@ -42,8 +42,12 @@ class GaussianMKLDNNKernel : public paddle::framework::OpKernel { // The format of output is set as the mkldnn's format // TODO(@mozga-intel) The format of matrix sets inside the another layers. - tensor->set_layout(DataLayout::kMKLDNN); - tensor->set_format(mkldnn::memory::format::oihw); + // TODO(jczaja): Remove this hack after checking performance on block layout + + auto tensor_mem_pd = paddle::platform::create_prim_desc_from_dims( + paddle::framework::vectorize2int(tensor->dims()), + mkldnn::memory::format::oihw); + tensor->set_mkldnn_prim_desc(tensor_mem_pd); } }; } // namespace operators diff --git a/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc index 097ba01d401dbc7969e30f576cac2567c874ed99..4ff27ab12280b56abdf72056fe69ec713f2f2f46 100644 --- a/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc @@ -81,10 +81,7 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel { auto e_mid = framework::EigenTensor::From(*mid); e_mid = e_mid.constant(k); - auto dims = paddle::framework::vectorize2int(x->dims()); - - auto src_md = paddle::platform::MKLDNNMemDesc( - dims, mkldnn::memory::data_type::f32, x->format()); + auto src_md = x->get_mkldnn_prim_desc().desc(); auto forward_desc = mkldnn::lrn_forward::desc{mkldnn::prop_kind::forward, mkldnn::lrn_across_channels, @@ -94,7 +91,7 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel { beta, k}; - auto src_memory_pd = mkldnn::memory::primitive_desc{src_md, mkldnn_engine}; + auto src_memory_pd = x->get_mkldnn_prim_desc(); if (!is_test) { const std::string key = ctx.op().Output("Out"); @@ -111,16 +108,15 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel { src_memory->set_data_handle( static_cast(const_cast(input_data))); - auto dst_memory = mkldnn::memory(forward_pd->dst_primitive_desc(), - static_cast(output_data)); + auto dst_memory_pd = forward_pd->dst_primitive_desc(); + auto dst_memory = + mkldnn::memory(dst_memory_pd, static_cast(output_data)); auto workspace_memory = insert_to_context( key_workspace_memory, dev_ctx, forward_pd->workspace_primitive_desc()); run_primitive(*forward_pd, *src_memory, *workspace_memory, dst_memory); - - out->set_layout(framework::DataLayout::kMKLDNN); - out->set_format(platform::GetMKLDNNFormat(dst_memory)); + out->set_mkldnn_prim_desc(dst_memory_pd); } else { auto forward_pd = mkldnn::lrn_forward::primitive_desc{forward_desc, mkldnn_engine}; @@ -128,13 +124,12 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel { src_memory_pd, static_cast(const_cast(input_data))}; auto workspace_memory = mkldnn::memory{forward_pd.workspace_primitive_desc()}; + auto dst_memory_pd = forward_pd.dst_primitive_desc(); auto dst_memory = mkldnn::memory(forward_pd.dst_primitive_desc(), static_cast(output_data)); run_primitive(forward_pd, src_memory, workspace_memory, dst_memory); - - out->set_layout(framework::DataLayout::kMKLDNN); - out->set_format(platform::GetMKLDNNFormat(dst_memory)); + out->set_mkldnn_prim_desc(dst_memory_pd); } } }; diff --git a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc index f4bad7b712b2b078ed68f0a3d0e751d9ae2d6191..5d8e81921157cbdf35f7016741ab45c362b7261f 100644 --- a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/operators/pool_op.h" #include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/mkldnn_reuse.h" namespace paddle { namespace operators { @@ -29,23 +30,23 @@ using mkldnn::stream; using platform::to_void_cast; // Generate keys for storing/retriving primitives for this operator -// TODO(jczaja): Make hashing function more optimial -static std::string gethash(const memory::dims& input_dims, - const std::string& pooling_type, - const std::vector& ksize, - const std::vector& strides, - const std::vector& paddings, - const memory::data_type& dt, - const std::string& suffix) { - auto dims2str = [](const memory::dims& operand_dims) { - std::string dstr = ""; - for (size_t i = 0; i < operand_dims.size(); ++i) { - dstr += std::to_string(operand_dims[i]) + "-"; - } - return dstr; - }; - return dims2str(input_dims) + dims2str(ksize) + dims2str(strides) + - dims2str(paddings) + std::to_string(dt) + pooling_type + suffix; +std::string CreateKey(const paddle::framework::ExecutionContext& ctx, + const memory::dims& input_dims, + const std::string& pooling_type, + const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, + const memory::data_type& dt, const std::string& suffix) { + std::string key; + key.reserve(platform::MKLDNNHandler::MaxKeyLength); + platform::MKLDNNHandler::AppendKeyDims(&key, input_dims); + platform::MKLDNNHandler::AppendKey(&key, pooling_type); + platform::MKLDNNHandler::AppendKeyVec(&key, ksize); + platform::MKLDNNHandler::AppendKeyVec(&key, strides); + platform::MKLDNNHandler::AppendKeyVec(&key, paddings); + platform::MKLDNNHandler::AppendKey(&key, std::to_string(dt)); + platform::MKLDNNHandler::AppendKey(&key, suffix); + return key; } static inline int ComputeCeiledOutput(int input_size, int kernel_size, @@ -114,8 +115,8 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel { mkldnn::memory::data_type dt = paddle::framework::ToMKLDNNDataType(input->type()); - const std::string key = gethash(src_tz, pooling_type, ksize, strides, - paddings, dt, ctx.op().Output("Out")); + const std::string key = CreateKey(ctx, src_tz, pooling_type, ksize, strides, + paddings, dt, ctx.op().Output("Out")); const std::string key_pool_p = key + "@pool_p"; const std::string key_pool_pd = key + "@pool_pd"; const std::string key_pool_src_mem_p = key + "@pool_src_mem_p"; @@ -198,7 +199,7 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel { } // push primitive to stream and wait until it's executed - std::vector pipeline{*(pool_p.get())}; + std::vector pipeline{*pool_p}; stream(stream::kind::eager).submit(pipeline).wait(); output->set_layout(DataLayout::kMKLDNN); @@ -294,8 +295,8 @@ class PoolMKLDNNGradOpKernel : public paddle::framework::OpKernel { // Get an unique name from "argument" name of "Out" variable // This name will be used as key when referring info from device context const std::string key = - gethash(diff_src_tz, pooling_type, ksize, strides, paddings, - memory::data_type::f32, ctx.op().Input("Out")); + CreateKey(ctx, diff_src_tz, pooling_type, ksize, strides, paddings, + memory::data_type::f32, ctx.op().Input("Out")); const std::string key_pool_bwd_p = key + "@pool_bwd_p"; const std::string key_pool_diff_src_mem_p = key + "@pool_diff_src_mem_p"; const std::string key_pool_diff_dst_mem_p = key + "@pool_diff_dst_mem_p"; @@ -367,8 +368,7 @@ class PoolMKLDNNGradOpKernel : public paddle::framework::OpKernel { dev_ctx.SetBlob(key_pool_diff_dst_mem_p, diff_dst_memory); pool_bwd_p = std::make_shared( - pool_bwd_pd, *(diff_dst_memory.get()), *workspace_memory, - *(diff_src_memory)); + pool_bwd_pd, *diff_dst_memory, *workspace_memory, *diff_src_memory); dev_ctx.SetBlob(key_pool_bwd_p, pool_bwd_p); } else { @@ -404,7 +404,7 @@ class PoolMKLDNNGradOpKernel : public paddle::framework::OpKernel { if (is_diff_dst_reordered) { pipeline.push_back(reorder_diff_dst); } - pipeline.push_back(*(pool_bwd_p.get())); + pipeline.push_back(*pool_bwd_p); mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait(); in_x_grad->set_layout(DataLayout::kMKLDNN); diff --git a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc index 0638e42873376bcec6e4de61494da46d1f0073d1..04cd60be964a3967a45e73122324c4b3fdf0b3d0 100644 --- a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc @@ -30,6 +30,18 @@ using framework::DataLayout; using mkldnn::stream; using platform::GetMKLDNNFormat; +std::string CreateKey(const paddle::framework::ExecutionContext& ctx, + const std::vector& src_tz, const float scale_data, + const bool is_negative) { + std::string key; + key.reserve(platform::MKLDNNHandler::MaxKeyLength); + platform::MKLDNNHandler::AppendKeyDims(&key, src_tz); + platform::MKLDNNHandler::AppendKey(&key, std::to_string(scale_data)); + platform::MKLDNNHandler::AppendKey(&key, std::to_string(is_negative)); + platform::MKLDNNHandler::AppendKey(&key, ctx.op().Output("Output")); + return key; +} + template class QuantOpKernel : public framework::OpKernel { public: @@ -47,32 +59,61 @@ class QuantOpKernel : public framework::OpKernel { const T* input_data = input->data(); - mkldnn::primitive_attr attri; - int mask = 0; - attri.set_output_scales(mask, {scale_data}); - - auto src_md = platform::MKLDNNMemDesc({src_tz}, memory::data_type::f32, - input->format()); - auto src_pd = mkldnn::memory::primitive_desc(src_md, engine); - auto src_memory = - std::make_shared(src_pd, to_void_cast(input_data)); - std::shared_ptr src_memory_p = - std::shared_ptr(new primitive::at(*src_memory)); - bool is_negative = ctx.Attr("is_negative_input"); - std::shared_ptr dst_pd; + std::string key = CreateKey(ctx, src_tz, scale_data, is_negative); + const std::string key_prim = key + "@reorder_p"; + const std::string key_src_mem = key + "@src_mem"; + const std::string key_dst_mem = key + "@dst_mem"; + + std::shared_ptr src_memory; std::shared_ptr dst_memory; - if (is_negative) { - platform::ConvMKLDNNHandler::SetDstMemory( - ctx, output, dst_tz, engine, dst_pd, dst_memory); + std::shared_ptr reorder_p; + reorder_p = std::static_pointer_cast(dev_ctx.GetBlob(key_prim)); + + if (reorder_p == nullptr) { + mkldnn::primitive_attr attri; + int mask = 0; + attri.set_output_scales(mask, {scale_data}); + + auto src_md = platform::MKLDNNMemDesc({src_tz}, memory::data_type::f32, + input->format()); + auto src_pd = mkldnn::memory::primitive_desc(src_md, engine); + src_memory = + std::make_shared(src_pd, to_void_cast(input_data)); + std::shared_ptr src_memory_p = + std::shared_ptr(new primitive::at(*src_memory)); + + std::shared_ptr dst_pd; + if (is_negative) { + platform::ConvMKLDNNHandler::SetDstMemory( + ctx, output, dst_tz, engine, dst_pd, dst_memory); + } else { + platform::ConvMKLDNNHandler::SetDstMemory( + ctx, output, dst_tz, engine, dst_pd, dst_memory); + } + auto reorder_pd = std::shared_ptr( + new reorder::primitive_desc(src_pd, *dst_pd, attri)); + reorder_p = std::shared_ptr( + new reorder(*reorder_pd, *src_memory_p, *dst_memory)); + + dev_ctx.SetBlob(key_prim, reorder_p); + dev_ctx.SetBlob(key_src_mem, src_memory); + dev_ctx.SetBlob(key_dst_mem, dst_memory); } else { - platform::ConvMKLDNNHandler::SetDstMemory( - ctx, output, dst_tz, engine, dst_pd, dst_memory); + src_memory = std::static_pointer_cast( + dev_ctx.GetBlob(key_src_mem)); + src_memory->set_data_handle(to_void_cast(input_data)); + + dst_memory = std::static_pointer_cast( + dev_ctx.GetBlob(key_dst_mem)); + auto place = ctx.GetPlace(); + if (is_negative) { + dst_memory->set_data_handle(output->mutable_data(place)); + } else { + dst_memory->set_data_handle(output->mutable_data(place)); + } } - auto reorder_pd = std::shared_ptr( - new reorder::primitive_desc(src_pd, *dst_pd, attri)); - auto reorder_p = std::shared_ptr( - new reorder(*reorder_pd, *src_memory_p, *dst_memory)); + pipeline.push_back(*reorder_p); stream(stream::kind::eager).submit(pipeline).wait(); output->set_layout(DataLayout::kMKLDNN); diff --git a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..44e8281424ba6937dad2c2dee1db4dee96b3b2eb --- /dev/null +++ b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc @@ -0,0 +1,94 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "mkldnn.hpp" +#include "paddle/fluid/framework/data_layout_transform.h" +#include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/operators/requantize_op.h" +#include "paddle/fluid/platform/mkldnn_helper.h" + +namespace paddle { +namespace operators { + +using mkldnn::memory; +using mkldnn::primitive; +using mkldnn::reorder; +using platform::to_void_cast; +using Tensor = framework::Tensor; +using framework::DataLayout; +using mkldnn::stream; +using platform::GetMKLDNNFormat; + +template +class ReQuantOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* input = ctx.Input("Input"); + auto scale_in = ctx.Attr("Scale_in"); + auto scale_out = ctx.Attr("Scale_out"); + auto* output = ctx.Output("Output"); + auto& dev_ctx = + ctx.template device_context(); + const auto& engine = dev_ctx.GetEngine(); + + std::vector pipeline; + std::vector src_tz = paddle::framework::vectorize2int(input->dims()); + std::vector dst_tz = paddle::framework::vectorize2int(output->dims()); + mkldnn::memory::data_type src_dt = + paddle::framework::ToMKLDNNDataType(input->type()); + mkldnn::memory::data_type dst_dt = src_dt; // TODO(Xiaoli) support + // requantize from different + // data type (e.g., s8 to u8) + mkldnn::memory::format src_fmt = memory::format::nhwc; + mkldnn::memory::format dst_fmt = memory::format::nhwc; + + const T* input_data = input->data(); + T* output_data = output->mutable_data(ctx.GetPlace()); + float scale_shift = scale_out / scale_in; + + mkldnn::primitive_attr attri; + int mask = 0; + attri.set_output_scales(mask, {scale_shift}); + + auto src_md = platform::MKLDNNMemDesc({src_tz}, src_dt, src_fmt); + auto src_pd = mkldnn::memory::primitive_desc(src_md, engine); + auto src_memory = + std::make_shared(src_pd, to_void_cast(input_data)); + std::shared_ptr src_memory_p = + std::shared_ptr(new primitive::at(*src_memory)); + + auto dst_md = platform::MKLDNNMemDesc({dst_tz}, dst_dt, dst_fmt); + auto dst_pd = mkldnn::memory::primitive_desc(dst_md, engine); + auto dst_memory = mkldnn::memory(dst_pd, to_void_cast(output_data)); + + auto reorder_pd = std::shared_ptr( + new reorder::primitive_desc(src_pd, dst_pd, attri)); + + auto reorder_p = std::shared_ptr( + new reorder(*reorder_pd, *src_memory_p, dst_memory)); + pipeline.push_back(*reorder_p); + stream(stream::kind::eager).submit(pipeline).wait(); + + output->set_layout(DataLayout::kMKLDNN); + output->set_format(GetMKLDNNFormat(dst_memory)); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP_KERNEL(requantize, MKLDNN, ::paddle::platform::CPUPlace, + ops::ReQuantOpKernel, ops::ReQuantOpKernel); diff --git a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc index d2b149535426d097fea4b8fffa9efe82bd6edc64..0ce552219458859e147ba207c94270bf84a1fe75 100644 --- a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc @@ -66,8 +66,7 @@ class SoftmaxMKLDNNHandler : public platform::MKLDNNHandler { "Fail to find softmax primitive in device context"); if (softmax_p == nullptr) { softmax_p = std::make_shared( - *(softmax_pd_.get()), - *(static_cast(src_memory_p.get())), + *softmax_pd_, *(static_cast(src_memory_p.get())), *(static_cast(dst_memory_p.get()))); dev_ctx_.SetBlob(prim_key, softmax_p); } else { @@ -88,8 +87,8 @@ class SoftmaxMKLDNNHandler : public platform::MKLDNNHandler { "Fail to find softmax backward primitive in device context"); if (softmax_bwd_p == nullptr) { softmax_bwd_p = std::make_shared( - *softmax_bwd_pd_, *(dst_memory_p.get()), *(diff_dst_memory_p.get()), - *(diff_src_memory_p.get())); + *softmax_bwd_pd_, *dst_memory_p, *diff_dst_memory_p, + *diff_src_memory_p); dev_ctx_.SetBlob(prim_key, softmax_bwd_p); } else { is_reusing_ = true; @@ -159,6 +158,14 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel { auto softmax_p = handler.AcquireSoftmax(softmax_dst_memory_p, softmax_src_memory_p); + // We cannot use softmax_dst_memory_p to get prim desc as + // it contains flattened dims (2D) while output tensor can + // have 2,3,4+ dims + auto output_mem_pd = paddle::platform::create_prim_desc_from_dims( + paddle::framework::vectorize2int(output->dims()), + mkldnn::memory::format::blocked); + output->set_mkldnn_prim_desc(output_mem_pd); + std::vector pipeline{ *(static_cast(softmax_p.get()))}; stream(stream::kind::eager).submit(pipeline).wait(); diff --git a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc index c39f94637a1abb5bfce9a5428419282f2b870c91..aef5b7d4311adfedb3db157f17506c3a2c76fbf6 100644 --- a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc @@ -79,15 +79,6 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel { memory::format input_format = input0.format(); - if (src_tz.size() == 1 && (input_format == memory::format::nchw || - input_format == memory::format::nhwc)) { - input_format = memory::format::x; - } - if (src_tz.size() == 2 && (input_format == memory::format::nchw || - input_format == memory::format::nhwc)) { - input_format = memory::format::nc; - } - for (int i = 0; i < N; i++) { PADDLE_ENFORCE(in_vars[i]->IsType(), "all inputs must be all LoDTensors"); @@ -115,12 +106,12 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel { memory::desc(dst_tz, memory::data_type::f32, memory::format::any); auto sum_pd = sum::primitive_desc(dst_md, scales, srcs_mpd); - + auto dst_mem_pd = sum_pd.dst_primitive_desc(); std::shared_ptr dst_mem; if (in_place) { - dst_mem.reset(new memory(sum_pd.dst_primitive_desc())); + dst_mem.reset(new memory(dst_mem_pd)); } else { - dst_mem.reset(new memory(sum_pd.dst_primitive_desc(), output_data)); + dst_mem.reset(new memory(dst_mem_pd, output_data)); } std::vector inputs; for (size_t i = 0; i < srcs_mem.size(); ++i) { @@ -145,107 +136,11 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel { if (in_place) pipeline.push_back(reorder_prim); stream(stream::kind::eager).submit(pipeline).wait(); - output->set_layout(DataLayout::kMKLDNN); - output->set_format(output_format); - } else if (out_var->IsType()) { - // TODO(@mozga-intel) Add MKLDNN SelectedRows support - std::unique_ptr in0; - if (in_place) { - // If is in_place, we store the input[0] to in0 - auto& in_sel0 = in_vars[0]->Get(); - auto& rows = in_sel0.rows(); - in0.reset(new framework::SelectedRows(rows, in_sel0.height())); - in0->mutable_value()->ShareDataWith(in_sel0.value()); - } - - auto get_selected_row = [&](size_t i) -> const SelectedRows& { - if (i == 0 && in0) { - return *in0.get(); - } else { - return in_vars[i]->Get(); - } - }; - auto* out = ctx.Output("Out"); - out->mutable_rows()->clear(); - auto* out_value = out->mutable_value(); - - // Runtime InferShape - size_t first_dim = 0; - for (int i = 0; i < N; i++) { - auto& sel_row = get_selected_row(i); - first_dim += sel_row.rows().size(); - } - - std::vector in_dim; - for (int i = 0; i < N; i++) { - auto& sel_row = get_selected_row(i); - if (sel_row.rows().size() > 0) { - in_dim = framework::vectorize(sel_row.value().dims()); - break; - } - } - - if (in_dim.empty()) { - VLOG(3) << "WARNING: all the inputs are empty"; - in_dim = framework::vectorize(get_selected_row(N - 1).value().dims()); - } else { - in_dim[0] = static_cast(first_dim); - } - - in_dim[0] = static_cast(first_dim); - - out_value->Resize(framework::make_ddim(in_dim)); - - out_value->mutable_data(ctx.GetPlace()); - - // if all the input sparse vars are empty, no need to - // merge these vars. - if (first_dim == 0UL) { - return; - } - - math::SelectedRowsAddTo functor; - int64_t offset = 0; - for (int i = 0; i < N; i++) { - auto& sel_row = get_selected_row(i); - if (sel_row.rows().size() == 0) { - continue; - } - PADDLE_ENFORCE_EQ(out->height(), sel_row.height()); - functor(ctx.template device_context(), sel_row, - offset, out); - offset += sel_row.value().numel(); - } - } else if (out_var->IsType()) { - // TODO(@mozga-intel) Add MKLDNN LoDTensorArray support - auto& out_array = *out_var->GetMutable(); - for (size_t i = in_place ? 1 : 0; i < in_vars.size(); ++i) { - PADDLE_ENFORCE(in_vars[i]->IsType(), - "Only support all inputs are TensorArray"); - auto& in_array = in_vars[i]->Get(); - - for (size_t i = 0; i < in_array.size(); ++i) { - if (in_array[i].numel() != 0) { - if (i >= out_array.size()) { - out_array.resize(i + 1); - } - if (out_array[i].numel() == 0) { - framework::TensorCopy(in_array[i], in_array[i].place(), - ctx.device_context(), &out_array[i]); - out_array[i].set_lod(in_array[i].lod()); - } else { - PADDLE_ENFORCE(out_array[i].lod() == in_array[i].lod()); - auto in = EigenVector::Flatten(in_array[i]); - auto result = EigenVector::Flatten(out_array[i]); - result.device(*ctx.template device_context() - .eigen_device()) = result + in; - } - } - } - } - } else { - PADDLE_THROW("Unexpected branch, output variable type is %s", - framework::ToTypeName(out_var->Type())); + output->set_mkldnn_prim_desc(dst_mem_pd); + } else { // Fallback to naive version + // TODO(@mozga-intel) Add MKLDNN SelectedRows & LoDTensorArray support + SumKernel reference_kernel; + reference_kernel.Compute(ctx); } } }; diff --git a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc index e6df7028f540d0928e2bb0763bd4cfef12059665..e41bfb80dfc0452955f7978f74ccfea184886b69 100644 --- a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc @@ -52,7 +52,7 @@ class TransposeMKLDNNOpKernel : public paddle::framework::OpKernel { mkldnn_engine, key); auto transpose_src_memory_p = handler.AcquireSrcMemory( - input->format(), platform::to_void_cast(input_data)); + input->get_mkldnn_prim_desc(), platform::to_void_cast(input_data)); auto transpose_dst_memory_p = handler.AcquireDstMemory(output, ctx.GetPlace()); auto transpose_p = handler.AcquireTranspose(transpose_dst_memory_p, @@ -61,6 +61,15 @@ class TransposeMKLDNNOpKernel : public paddle::framework::OpKernel { std::vector pipeline; pipeline.push_back(*transpose_p); mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait(); + + // Transpose did change logical dimensions of Tensor, but reorder does not. + // Reorder does change only physical layout eg. format , strides + // so we need to create new primitive descriptor with changed logical layout + // so it match output shape + auto output_mem_pd = paddle::platform::create_prim_desc_from_dims( + paddle::framework::vectorize2int(output->dims()), + mkldnn::memory::format::blocked); + output->set_mkldnn_prim_desc(output_mem_pd); } }; @@ -102,8 +111,9 @@ class TransposeMKLDNNGradOpKernel : public paddle::framework::OpKernel { platform::TransposeMKLDNNHandler handler(nchw_tz, reversed_axis, dev_ctx, mkldnn_engine, key); - auto transpose_src_memory_p = handler.AcquireSrcMemory( - out_grad->format(), platform::to_void_cast(out_grad_data)); + auto transpose_src_memory_p = + handler.AcquireSrcMemory(out_grad->get_mkldnn_prim_desc(), + platform::to_void_cast(out_grad_data)); auto transpose_dst_memory_p = handler.AcquireDstMemory(x_grad, ctx.GetPlace()); auto transpose_p = handler.AcquireTranspose(transpose_dst_memory_p, @@ -112,6 +122,15 @@ class TransposeMKLDNNGradOpKernel : public paddle::framework::OpKernel { std::vector pipeline; pipeline.push_back(*transpose_p); mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait(); + + // Transpose did change logical dimensions of Tensor, but reorder does not. + // Reorder does change only physical layout eg. format , strides + // so we need to create new primitive descriptor with changed logical layout + // so it match output shape + auto x_grad_mem_pd = paddle::platform::create_prim_desc_from_dims( + paddle::framework::vectorize2int(x_grad->dims()), + mkldnn::memory::format::blocked); + x_grad->set_mkldnn_prim_desc(x_grad_mem_pd); } }; diff --git a/paddle/fluid/operators/ngraph/CMakeLists.txt b/paddle/fluid/operators/ngraph/CMakeLists.txt index 6b256ef02666c21ec1db3f6922b56bb23363b4a0..7559d29ce233dfcebf8b3118b4c700c35fe15d32 100644 --- a/paddle/fluid/operators/ngraph/CMakeLists.txt +++ b/paddle/fluid/operators/ngraph/CMakeLists.txt @@ -2,4 +2,5 @@ if(WITH_NGRAPH) cc_library(ngraph_bridge SRCS ngraph_bridge.cc DEPS operator framework_proto ngraph) cc_library(ngraph_engine SRCS ngraph_engine.cc DEPS ngraph_bridge framework_proto) op_library(ngraph_engine_op DEPS ngraph_engine op_registry op_info device_context) + add_subdirectory(ops) endif() diff --git a/paddle/fluid/operators/ngraph/ngraph_bridge.cc b/paddle/fluid/operators/ngraph/ngraph_bridge.cc index 38e65524e870834710ff29f722c69eadf67d9dbe..dafc31b546e3ca6d8dc8d5634dd51cff9fe5bfb7 100644 --- a/paddle/fluid/operators/ngraph/ngraph_bridge.cc +++ b/paddle/fluid/operators/ngraph/ngraph_bridge.cc @@ -14,46 +14,27 @@ limitations under the License. */ #include #include +#include #include #include "ngraph/ngraph.hpp" #include "paddle/fluid/operators/ngraph/ngraph_bridge.h" #include "paddle/fluid/operators/ngraph/ngraph_ops.h" +#include "paddle/fluid/operators/ngraph/ops/op_bridge.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/ngraph_helper.h" namespace paddle { namespace operators { -namespace NG_OPS = paddle::operators::ngraphs; -std::map&, - std::shared_ptr>>)>> - NgraphBridge::NG_NODE_MAP = { - {"accuracy", NG_OPS::BuildAccuracyNode}, - {"conv2d", NG_OPS::BuildConv2dNode}, - {"conv2d_grad", NG_OPS::BuildConv2dGradNode}, - {"elementwise_add", NG_OPS::BuildElementwiseAddNode}, - {"elementwise_add_grad", NG_OPS::BuildElementwiseAddGradNode}, - {"fill_constant", NG_OPS::BuildFillConstantNode}, - {"mean", NG_OPS::BuildMeanNode}, - {"mean_grad", NG_OPS::BuildMeanGradNode}, - {"mul", NG_OPS::BuildMulNode}, - {"mul_grad", NG_OPS::BuildMulGradNode}, - {"pool2d", NG_OPS::BuildPool2dNode}, - {"pool2d_grad", NG_OPS::BuildPool2dGradNode}, - {"softmax", NG_OPS::BuildSoftmaxNode}, - {"softmax_grad", NG_OPS::BuildSoftmaxGradNode}, - {"scale", NG_OPS::BuildScaleNode}, - {"relu", NG_OPS::BuildUnaryNode}, - {"tanh", NG_OPS::BuildUnaryNode}, - {"top_k", NG_OPS::BuildTopKNode}}; +bool NgraphBridge::isRegister(const std::string& str) { + return ops::NgraphSingleton::Lookup(str); +} void NgraphBridge::BuildNgNode( const std::shared_ptr& op) { auto& op_type = op->Type(); - NG_NODE_MAP[op_type](op, ngb_node_map_); + ops::NgraphSingleton::BuildNode(ngb_node_map_, op, op_type); } } // namespace operators diff --git a/paddle/fluid/operators/ngraph/ngraph_bridge.h b/paddle/fluid/operators/ngraph/ngraph_bridge.h index c57988f8f6322e76678c572aa21ff5b17b9e3c22..b609c284959238689eaf35c87d1bc4e4330b5c1f 100644 --- a/paddle/fluid/operators/ngraph/ngraph_bridge.h +++ b/paddle/fluid/operators/ngraph/ngraph_bridge.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include +#include #include #include @@ -28,13 +29,6 @@ namespace operators { class NgraphBridge { public: - static std::map< - std::string, - std::function&, - std::shared_ptr>>)>> - NG_NODE_MAP; - explicit NgraphBridge( std::shared_ptr< std::unordered_map>> @@ -43,6 +37,8 @@ class NgraphBridge { void BuildNgNode(const std::shared_ptr& op); + static bool isRegister(const std::string& str); + private: std::shared_ptr< std::unordered_map>> diff --git a/paddle/fluid/operators/ngraph/ngraph_engine.cc b/paddle/fluid/operators/ngraph/ngraph_engine.cc index bec4b514a218715134d2366dd7efd7cf5b377b68..41037d9039bb53038af80eafa269ee9246dc9980 100644 --- a/paddle/fluid/operators/ngraph/ngraph_engine.cc +++ b/paddle/fluid/operators/ngraph/ngraph_engine.cc @@ -16,7 +16,10 @@ limitations under the License. */ #include #include +#include #include +#include +#include #include #include "paddle/fluid/framework/block_desc.h" @@ -88,14 +91,12 @@ static std::vector> NgraphOpIntervals( int pivot = left; while (pivot < right) { auto op_type = ops.at(pivot)->Type(); - if (NgraphBridge::NG_NODE_MAP.find(op_type) == - NgraphBridge::NG_NODE_MAP.end()) { + if (NgraphBridge::isRegister(op_type)) { ++pivot; } else { int start = pivot, end = start; while (pivot < right && - (NgraphBridge::NG_NODE_MAP.find(ops.at(pivot)->Type()) != - NgraphBridge::NG_NODE_MAP.end())) { + (!NgraphBridge::isRegister(ops.at(pivot)->Type()))) { ++pivot; ++end; } @@ -485,7 +486,8 @@ void NgraphEngine::Run(const framework::Scope& scope, } } - backend_->call(backend_->compile(ngraph_function_), t_out, t_in); + auto handle = backend_->compile(ngraph_function_); + handle->call_with_validate(t_out, t_in); } // NgraphEngine::Run } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/ngraph/ngraph_ops.h b/paddle/fluid/operators/ngraph/ngraph_ops.h deleted file mode 100644 index fb574f1bc1160c79f5802f11c00716eccad7f48d..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/ngraph/ngraph_ops.h +++ /dev/null @@ -1,34 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -/* - * This file contains the list of the ngraph operators for Paddle. - * - * ATTENTION: It requires some C++11 features, for lower version C++ or C, we - * might release another API. - */ - -#pragma once - -#include "ops/accuracy_op.h" -#include "ops/binary_unary_op.h" -#include "ops/conv2d_op.h" -#include "ops/elementwise_add_op.h" -#include "ops/fill_constant_op.h" -#include "ops/mean_op.h" -#include "ops/mul_op.h" -#include "ops/pool2d_op.h" -#include "ops/scale_op.h" -#include "ops/softmax_op.h" -#include "ops/top_k_op.h" diff --git a/paddle/fluid/operators/ngraph/ops/CMakeLists.txt b/paddle/fluid/operators/ngraph/ops/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..7dee3308b74a70a2daf35055d3ac80a14de99ac1 --- /dev/null +++ b/paddle/fluid/operators/ngraph/ops/CMakeLists.txt @@ -0,0 +1,8 @@ +file(GLOB LIST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.h") +set(pass_file ${PADDLE_BINARY_DIR}/paddle/fluid/operators/ngraph/ngraph_ops.h) +file(APPEND ${pass_file} "\#pragma once\n") +file(WRITE ${pass_file} "// Generated by the /paddle/fluid/operators/ngraph/ops/CMakeLists.txt. DO NOT EDIT!\n\n") + +foreach(OPS_NAME ${LIST_OPS}) + file(APPEND ${pass_file} "\#include \"paddle/fluid/operators/ngraph/ops/${OPS_NAME}\"\n") +endforeach(OPS_NAME) diff --git a/paddle/fluid/operators/ngraph/ops/accuracy_op.h b/paddle/fluid/operators/ngraph/ops/accuracy_op.h index bf37ce48d8c2ce3b97fac154be9d1dfb08421f97..0da57517a733985ce1208732f13b08cd7bb8ca30 100644 --- a/paddle/fluid/operators/ngraph/ops/accuracy_op.h +++ b/paddle/fluid/operators/ngraph/ops/accuracy_op.h @@ -14,9 +14,12 @@ limitations under the License. */ #pragma once +#include #include +#include #include #include "ngraph/ngraph.hpp" +#include "paddle/fluid/operators/ngraph/ops/op_bridge.h" #include "paddle/fluid/platform/ngraph_helper.h" namespace paddle { @@ -63,3 +66,5 @@ void BuildAccuracyNode( } // namespace ngraphs } // namespace operators } // namespace paddle + +REGISTER_NG_OP(accuracy, BuildAccuracyNode); diff --git a/paddle/fluid/operators/ngraph/ops/activation_op.h b/paddle/fluid/operators/ngraph/ops/activation_op.h new file mode 100644 index 0000000000000000000000000000000000000000..a66ec65a336f807f554157628888633db22ebfec --- /dev/null +++ b/paddle/fluid/operators/ngraph/ops/activation_op.h @@ -0,0 +1,58 @@ +/*Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include + +#include "ngraph/ngraph.hpp" +#include "paddle/fluid/operators/ngraph/ops/op_bridge.h" +#include "paddle/fluid/platform/ngraph_helper.h" + +namespace paddle { +namespace operators { +namespace ngraphs { + +void BuildReluGradNode( + const std::shared_ptr& op, + std::shared_ptr< + std::unordered_map>> + ngb_node_map) { + auto out = platform::GetInputNode(op, "Out", ngb_node_map); + auto dout = platform::GetInputNode(op, "Out@GRAD", ngb_node_map); + auto relu_grad = std::make_shared(out, dout); + platform::SetOutputNode(op, "X@GRAD", relu_grad, ngb_node_map); +} + +void BuildTanhGradNode( + const std::shared_ptr& op, + std::shared_ptr< + std::unordered_map>> + ngb_node_map) { + auto out = platform::GetInputNode(op, "Out", ngb_node_map); + auto dout = platform::GetInputNode(op, "Out@GRAD", ngb_node_map); + auto shape = out->get_shape(); + auto node_const = + ngraph::op::Constant::create(ngraph::element::f32, shape, {1}); + auto result = dout * (node_const - out * out); + platform::SetOutputNode(op, "X@GRAD", result, ngb_node_map); +} +} // namespace ngraphs +} // namespace operators +} // namespace paddle + +REGISTER_NG_OP(relu_grad, BuildReluGradNode); +REGISTER_NG_OP(tanh_grad, BuildTanhGradNode); diff --git a/paddle/fluid/operators/ngraph/ops/adam_op.h b/paddle/fluid/operators/ngraph/ops/adam_op.h new file mode 100644 index 0000000000000000000000000000000000000000..beba5d3d237d4dea578651f440b65a15251d5ad2 --- /dev/null +++ b/paddle/fluid/operators/ngraph/ops/adam_op.h @@ -0,0 +1,79 @@ +/*Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include + +#include "ngraph/ngraph.hpp" +#include "paddle/fluid/operators/ngraph/ops/elementwise_scalar_op.h" +#include "paddle/fluid/operators/ngraph/ops/op_bridge.h" +#include "paddle/fluid/platform/ngraph_helper.h" + +namespace paddle { +namespace operators { +namespace ngraphs { + +void BuildAdamNode( + const std::shared_ptr& op, + std::shared_ptr< + std::unordered_map>> + ngb_node_map) { + auto op_attrs = framework::AttrReader(op->Attrs()); + auto beta1pow = platform::GetInputNode(op, "Beta1Pow", ngb_node_map); + auto beta2pow = platform::GetInputNode(op, "Beta2Pow", ngb_node_map); + auto grad = platform::GetInputNode(op, "Grad", ngb_node_map); + auto learning_rate = platform::GetInputNode(op, "LearningRate", ngb_node_map); + auto moment1 = platform::GetInputNode(op, "Moment1", ngb_node_map); + auto moment2 = platform::GetInputNode(op, "Moment2", ngb_node_map); + auto param = platform::GetInputNode(op, "Param", ngb_node_map); + + auto epsilon = op_attrs.Get("epsilon"); + auto beta2 = op_attrs.Get("beta2"); + auto beta1 = op_attrs.Get("beta1"); + + auto moment1_shape = moment1->get_shape(); + auto grad_shape = grad->get_shape(); + + auto moment1out = std::make_shared( + ElementwiseScalar(beta1, moment1), + ElementwiseScalar(1. - beta1, grad)); + + auto grad_square = std::make_shared(grad, grad); + auto moment2out = std::make_shared( + ElementwiseScalar(beta2, moment2), + ElementwiseScalar(1. - beta2, grad_square)); + auto node_sqrt = std::make_shared( + ElementwiseScalar(1., beta2pow)); + auto lr = std::make_shared( + node_sqrt, ElementwiseScalar(1., beta1pow)); + auto updated_lr = std::make_shared(learning_rate, lr); + + auto moment2_sqrt = std::make_shared(moment2out); + auto param_grad = std::make_shared( + moment1out, ElementwiseScalar(epsilon, moment2_sqrt)); + auto delta = ElementwiseScalar(updated_lr, param_grad); + auto param_out = std::make_shared(param, delta); + + platform::SetOutputNode(op, "Moment1Out", moment1out, ngb_node_map); + platform::SetOutputNode(op, "Moment2Out", moment2out, ngb_node_map); + platform::SetOutputNode(op, "ParamOut", param_out, ngb_node_map); +} +} // namespace ngraphs +} // namespace operators +} // namespace paddle + +REGISTER_NG_OP(adam, BuildAdamNode); diff --git a/paddle/fluid/operators/ngraph/ops/batch_norm_op.h b/paddle/fluid/operators/ngraph/ops/batch_norm_op.h new file mode 100644 index 0000000000000000000000000000000000000000..01fe78cdb24652429f713d09ea2abb8c73bbddf5 --- /dev/null +++ b/paddle/fluid/operators/ngraph/ops/batch_norm_op.h @@ -0,0 +1,163 @@ +/*Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include + +#include "ngraph/ngraph.hpp" +#include "paddle/fluid/operators/ngraph/ops/elementwise_node.h" +#include "paddle/fluid/operators/ngraph/ops/elementwise_scalar_op.h" +#include "paddle/fluid/operators/ngraph/ops/op_bridge.h" +#include "paddle/fluid/platform/ngraph_helper.h" + +namespace paddle { +namespace operators { +namespace ngraphs { + +void BuildBatchNormNode( + const std::shared_ptr& op, + std::shared_ptr< + std::unordered_map>> + ngb_node_map) { + auto op_attrs = paddle::framework::AttrReader(op->Attrs()); + auto& data_layout = op_attrs.Get("data_layout"); + + auto bias = paddle::platform::GetInputNode(op, "Bias", ngb_node_map); + auto mean = paddle::platform::GetInputNode(op, "Mean", ngb_node_map); + auto variance = paddle::platform::GetInputNode(op, "Variance", ngb_node_map); + auto scale = paddle::platform::GetInputNode(op, "Scale", ngb_node_map); + auto x = paddle::platform::GetInputNode(op, "X", ngb_node_map); + + const bool is_test = op_attrs.Get("is_test"); + const float epsilon = op_attrs.Get("epsilon"); + const float momentum = op_attrs.Get("momentum"); + + PADDLE_ENFORCE( + data_layout == "NHWC" || data_layout == "NCHW" || data_layout == "NC", + "The BatchNorm operator only supports NHWC/NCHW/NC data format"); + + if (data_layout == "NHWC") { + x = paddle::platform::Nhwc2Nchw(x); + } + + std::shared_ptr mean_out, saved_mean, saved_variance, + variance_out, y; + + if (!is_test) { + auto BN = std::make_shared(epsilon, scale, + bias, x); + y = std::make_shared(BN, 0); + saved_mean = std::make_shared(BN, 1); + saved_variance = std::make_shared(BN, 2); + + mean_out = std::make_shared( + paddle::operators::ngraphs::ElementwiseScalar( + momentum, mean), + paddle::operators::ngraphs::ElementwiseScalar( + 1. - momentum, saved_mean)); + variance_out = std::make_shared( + paddle::operators::ngraphs::ElementwiseScalar( + momentum, variance), + paddle::operators::ngraphs::ElementwiseScalar( + 1. - momentum, saved_variance)); + + if (data_layout == "NHWC") { + y = paddle::platform::Nchw2Nhwc(y); + } + + paddle::platform::SetOutputNode(op, "MeanOut", mean_out, ngb_node_map); + paddle::platform::SetOutputNode(op, "VarianceOut", variance_out, + ngb_node_map); + paddle::platform::SetOutputNode(op, "SavedMean", saved_mean, ngb_node_map); + paddle::platform::SetOutputNode(op, "SavedVariance", saved_variance, + ngb_node_map); + paddle::platform::SetOutputNode(op, "Y", y, ngb_node_map); + } else { + y = std::make_shared(epsilon, scale, bias, + x, mean, variance); + paddle::platform::SetOutputNode(op, "Y", y, ngb_node_map); + } +} + +void BuildBatchNormGradNode( + const std::shared_ptr& op, + std::shared_ptr< + std::unordered_map>> + ngb_node_map) { + auto op_attrs = paddle::framework::AttrReader(op->Attrs()); + auto& data_layout = op_attrs.Get("data_layout"); + + auto bias = paddle::platform::GetInputNode(op, "Bias", ngb_node_map); + auto saved_mean = + paddle::platform::GetInputNode(op, "SavedMean", ngb_node_map); + auto saved_variance = + paddle::platform::GetInputNode(op, "SavedVariance", ngb_node_map); + auto scale = paddle::platform::GetInputNode(op, "Scale", ngb_node_map); + auto x = paddle::platform::GetInputNode(op, "X", ngb_node_map); + auto dy = paddle::platform::GetInputNode(op, "Y@GRAD", ngb_node_map); + auto x_shape = x->get_shape(); + auto dy_shape = dy->get_shape(); + + PADDLE_ENFORCE(x_shape.size() == 2 || x_shape.size() == 4, + "BN grap input size needs to be 2 or 4"); + PADDLE_ENFORCE_EQ(x_shape.size(), dy_shape.size(), + "BN grap input and delta size needs to be equal"); + PADDLE_ENFORCE( + data_layout == "NHWC" || data_layout == "NCHW" || data_layout == "NC", + "The BatchNorm operator only supports NHWC/NCHW/NC data format"); + + if (x_shape.size() == 2) { + x = std::make_shared( + x, ngraph::AxisVector{0, 1}, + ngraph::Shape{x_shape.at(0), x_shape.at(1), 1, 1}); + dy = std::make_shared( + dy, ngraph::AxisVector{0, 1}, + ngraph::Shape{dy_shape.at(0), dy_shape.at(1), 1, 1}); + } + + if (data_layout == "NHWC") { + x = paddle::platform::Nhwc2Nchw(dy); + dy = paddle::platform::Nhwc2Nchw(dy); + } + const float epsilon = op_attrs.Get("epsilon"); + + auto bn_bprop = std::make_shared( + epsilon, scale, bias, x, saved_mean, saved_variance, dy); + + std::shared_ptr dx = + std::make_shared(bn_bprop, 0); + auto dscale = std::make_shared(bn_bprop, 1); + auto dbias = std::make_shared(bn_bprop, 2); + paddle::platform::SetOutputNode(op, "Bias@GRAD", dbias, ngb_node_map); + paddle::platform::SetOutputNode(op, "Scale@GRAD", dscale, ngb_node_map); + if (x_shape.size() == 2) { + paddle::platform::SetOutputNode( + op, "X@GRAD", paddle::platform::NgReshaper(dx, x_shape), ngb_node_map); + } else { + if (data_layout == "NHWC") { + dx = paddle::platform::Nchw2Nhwc(dx); + } + paddle::platform::SetOutputNode(op, "X@GRAD", dx, ngb_node_map); + } +} +} // namespace ngraphs +} // namespace operators +} // namespace paddle + +REGISTER_NG_OP(batch_norm, BuildBatchNormNode); +REGISTER_NG_OP(batch_norm_grad, BuildBatchNormGradNode); diff --git a/paddle/fluid/operators/ngraph/ops/binary_unary_op.h b/paddle/fluid/operators/ngraph/ops/binary_unary_op.h index 0c0d25d0cd1ae536618057ce80388b8eeb81c68a..2d11775849a778262dcd3e36ff35d8851fb350f1 100644 --- a/paddle/fluid/operators/ngraph/ops/binary_unary_op.h +++ b/paddle/fluid/operators/ngraph/ops/binary_unary_op.h @@ -14,8 +14,11 @@ limitations under the License. */ #pragma once +#include #include +#include #include "ngraph/ngraph.hpp" +#include "paddle/fluid/operators/ngraph/ops/op_bridge.h" #include "paddle/fluid/platform/ngraph_helper.h" namespace paddle { @@ -47,3 +50,7 @@ static void BuildUnaryNode( } // namespace ngraphs } // namespace operators } // namespace paddle + +REGISTER_NG_OP(relu, BuildUnaryNode); +REGISTER_NG_OP(tanh, BuildUnaryNode); +REGISTER_NG_OP(sigmoid, BuildUnaryNode); diff --git a/paddle/fluid/operators/ngraph/ops/concat_op.h b/paddle/fluid/operators/ngraph/ops/concat_op.h new file mode 100644 index 0000000000000000000000000000000000000000..27d796851501b9158e1ce7f6415b4d5373e88e2d --- /dev/null +++ b/paddle/fluid/operators/ngraph/ops/concat_op.h @@ -0,0 +1,50 @@ +/*Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include +#include "ngraph/ngraph.hpp" +#include "paddle/fluid/operators/ngraph/ops/op_bridge.h" +#include "paddle/fluid/platform/ngraph_helper.h" + +namespace paddle { +namespace operators { +namespace ngraphs { + +void BuildConcatNode( + const std::shared_ptr& op, + std::shared_ptr< + std::unordered_map>> + ngb_node_map) { + std::vector> args; + for (auto& var_name_item : op->Inputs()) { + for (auto& var_name : var_name_item.second) { + auto& node0 = ngb_node_map->at(var_name); + args.push_back(node0); + } + } + auto op_attrs = framework::AttrReader(op->Attrs()); + const size_t axis = op_attrs.Get("axis"); + auto out = std::make_shared(args, axis); + platform::SetOutputNode(op, "Out", out, ngb_node_map); +} +} // namespace ngraphs +} // namespace operators +} // namespace paddle + +REGISTER_NG_OP(concat, BuildConcatNode); diff --git a/paddle/fluid/operators/ngraph/ops/conv2d_op.h b/paddle/fluid/operators/ngraph/ops/conv2d_op.h index 46fb2703f51482afa0546f08b8fc7b2c98e281bc..be766ebeb4796be102c917296238b8ab14710131 100644 --- a/paddle/fluid/operators/ngraph/ops/conv2d_op.h +++ b/paddle/fluid/operators/ngraph/ops/conv2d_op.h @@ -14,9 +14,12 @@ limitations under the License. */ #pragma once +#include #include +#include #include #include "ngraph/ngraph.hpp" +#include "paddle/fluid/operators/ngraph/ops/op_bridge.h" #include "paddle/fluid/platform/ngraph_helper.h" namespace paddle { @@ -233,3 +236,6 @@ void BuildConv2dGradNode( } // namespace ngraphs } // namespace operators } // namespace paddle + +REGISTER_NG_OP(conv2d, BuildConv2dNode); +REGISTER_NG_OP(conv2d_grad, BuildConv2dGradNode); diff --git a/paddle/fluid/operators/ngraph/ops/cross_entropy_op.h b/paddle/fluid/operators/ngraph/ops/cross_entropy_op.h new file mode 100644 index 0000000000000000000000000000000000000000..be36b9d21ef6ebe5c11d783462e7dc564afe2aba --- /dev/null +++ b/paddle/fluid/operators/ngraph/ops/cross_entropy_op.h @@ -0,0 +1,151 @@ +/*Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include + +#include "ngraph/ngraph.hpp" +#include "paddle/fluid/operators/ngraph/ops/op_bridge.h" +#include "paddle/fluid/platform/ngraph_helper.h" + +namespace paddle { +namespace operators { +namespace ngraphs { + +void BuildCrossEntropyNode( + const std::shared_ptr& op, + std::shared_ptr< + std::unordered_map>> + ngb_node_map) { + auto x = paddle::platform::GetInputNode(op, "X", ngb_node_map); + auto label = paddle::platform::GetInputNode(op, "Label", ngb_node_map); + auto label_shape = label->get_shape(); + auto x_shape = x->get_shape(); + auto label_rank = label_shape.size(); + auto x_rank = x_shape.size(); + std::shared_ptr x_2d = x, label_2d = label; + auto label_2d_shape = label_shape, x_2d_shape = x_shape; + + if (label_rank > 2) { + label_2d_shape = paddle::platform::FlattenTo2d(label_shape, label_rank - 1); + label_2d = paddle::platform::NgReshaper(label, label_2d_shape); + } + if (x_rank > 2) { + x_2d_shape = paddle::platform::FlattenTo2d(x_shape, x_rank - 1); + x_2d = paddle::platform::NgReshaper(x, x_2d_shape); + } + + auto batch_size = x_2d_shape.at(0); + auto op_attrs = paddle::framework::AttrReader(op->Attrs()); + const bool is_soft_label = op_attrs.Get("soft_label"); + + std::shared_ptr node_1_hot = label_2d; + if (!is_soft_label) { + auto label_1d = paddle::platform::NgReshaper( + label_2d, ngraph::Shape{label_2d_shape.at(0)}); + node_1_hot = std::make_shared(label_1d, x_2d_shape, 1); + } + if (x->get_element_type() != node_1_hot->get_element_type()) { + node_1_hot = std::make_shared(node_1_hot, + x->get_element_type()); + } + + auto node_log = std::make_shared(x_2d); + auto high_clip = ngraph::op::Constant::create(node_log->get_element_type(), + node_log->get_shape(), {1e20}); + auto low_clip = ngraph::op::Constant::create(node_log->get_element_type(), + node_log->get_shape(), {-1e20}); + auto node_min = std::make_shared(node_log, high_clip); + auto node_max = std::make_shared(node_min, low_clip); + auto node_mul = node_1_hot * node_log; + auto node_sum = + std::make_shared(node_mul, ngraph::AxisSet{1}); + auto node_neg = std::make_shared(node_sum); + auto xe = + paddle::platform::NgReshaper(node_neg, ngraph::Shape{batch_size, 1}); + + if (!is_soft_label) { + auto ignore_index = op_attrs.Get("ignore_index"); + auto ignore_node = ngraph::op::Constant::create( + label->get_element_type(), label_2d_shape, {ignore_index}); + auto not_equal_node = + std::make_shared(label_2d, ignore_node); + auto mask = std::make_shared(not_equal_node, + xe->get_element_type()); + xe = xe * mask; + } + + paddle::platform::SetOutputNode(op, "Y", xe, ngb_node_map); +} + +void BuildCrossEntropyGradNode( + const std::shared_ptr& op, + std::shared_ptr< + std::unordered_map>> + ngb_node_map) { + auto op_attrs = paddle::framework::AttrReader(op->Attrs()); + const bool is_soft_label = op_attrs.Get("soft_label"); + + auto x = paddle::platform::GetInputNode(op, "X", ngb_node_map); + auto label = paddle::platform::GetInputNode(op, "Label", ngb_node_map); + auto dy = paddle::platform::GetInputNode(op, "Y@GRAD", ngb_node_map); + auto x_shape = x->get_shape(); + auto rank = x_shape.size(); + + std::shared_ptr mask; + if (!is_soft_label) { + auto label_shape = label->get_shape(); + label_shape.pop_back(); + label = paddle::platform::NgReshaper(label, label_shape); + + auto ignore_index = op_attrs.Get("ignore_index"); + auto ignore_node = ngraph::op::Constant::create( + label->get_element_type(), label_shape, {ignore_index}); + auto not_equal_node = + std::make_shared(label, ignore_node); + mask = std::make_shared(not_equal_node, + x->get_element_type()); + mask = std::make_shared(mask, x_shape, + ngraph::AxisSet{rank - 1}); + + label = std::make_shared(label, x_shape, rank - 1); + } + + auto dy_shape = dy->get_shape(); + dy_shape.pop_back(); + auto dy_reshape = paddle::platform::NgReshaper(dy, dy_shape); + auto dy_bcast = std::make_shared( + dy_reshape, x_shape, ngraph::AxisSet{rank - 1}); + if (x->get_element_type() != label->get_element_type()) { + label = std::make_shared(label, x->get_element_type()); + } + + auto xe_grad = -label * dy_bcast / x; + + if (!is_soft_label) { + xe_grad = xe_grad * mask; + } + + paddle::platform::SetOutputNode(op, "X@GRAD", xe_grad, ngb_node_map); +} +} // namespace ngraphs +} // namespace operators +} // namespace paddle + +REGISTER_NG_OP(cross_entropy, BuildCrossEntropyNode); +REGISTER_NG_OP(cross_entropy_grad, BuildCrossEntropyGradNode); diff --git a/paddle/fluid/operators/ngraph/ops/elementwise_add_op.h b/paddle/fluid/operators/ngraph/ops/elementwise_add_op.h index 868df51e16a9714a750bac64dadc3441de79165e..d7485a706a193a52113cb993a3604c444b4303c0 100644 --- a/paddle/fluid/operators/ngraph/ops/elementwise_add_op.h +++ b/paddle/fluid/operators/ngraph/ops/elementwise_add_op.h @@ -14,11 +14,14 @@ limitations under the License. */ #pragma once +#include #include +#include #include #include "ngraph/ngraph.hpp" #include "paddle/fluid/operators/ngraph/ops/elementwise_node.h" +#include "paddle/fluid/operators/ngraph/ops/op_bridge.h" #include "paddle/fluid/platform/ngraph_helper.h" namespace paddle { @@ -85,3 +88,6 @@ void BuildElementwiseAddGradNode( } // namespace ngraphs } // namespace operators } // namespace paddle + +REGISTER_NG_OP(elementwise_add, BuildElementwiseAddNode); +REGISTER_NG_OP(elementwise_add_grad, BuildElementwiseAddGradNode); diff --git a/paddle/fluid/operators/ngraph/ops/fill_constant_op.h b/paddle/fluid/operators/ngraph/ops/fill_constant_op.h index 406a4314f89810df192280cc97de245553d5520f..42c2df5259242b7ae28613ab12c237834febc574 100644 --- a/paddle/fluid/operators/ngraph/ops/fill_constant_op.h +++ b/paddle/fluid/operators/ngraph/ops/fill_constant_op.h @@ -14,9 +14,12 @@ limitations under the License. */ #pragma once +#include #include +#include #include #include "ngraph/ngraph.hpp" +#include "paddle/fluid/operators/ngraph/ops/op_bridge.h" #include "paddle/fluid/platform/ngraph_helper.h" namespace paddle { @@ -46,8 +49,6 @@ void BuildFillConstantNode( ng_dtype = ngraph::element::i64; } else if (data_type == paddle::framework::proto::VarType::INT32) { ng_dtype = ngraph::element::i32; - } else if (data_type == paddle::framework::proto::VarType::BOOL) { - ng_dtype = ngraph::element::boolean; } else { PADDLE_THROW("unsupported data type: %s", data_type); } @@ -57,3 +58,5 @@ void BuildFillConstantNode( } // namespace ngraphs } // namespace operators } // namespace paddle + +REGISTER_NG_OP(fill_constant, BuildFillConstantNode); diff --git a/paddle/fluid/operators/ngraph/ops/mean_op.h b/paddle/fluid/operators/ngraph/ops/mean_op.h index 4c44bc4c112f401c2707f7babd49a33f238a768f..86e697d260eb0f26428258b5faea958a7319948c 100644 --- a/paddle/fluid/operators/ngraph/ops/mean_op.h +++ b/paddle/fluid/operators/ngraph/ops/mean_op.h @@ -15,10 +15,13 @@ limitations under the License. */ #pragma once #include +#include #include +#include #include "ngraph/ngraph.hpp" #include "paddle/fluid/operators/ngraph/ops/elementwise_scalar_op.h" +#include "paddle/fluid/operators/ngraph/ops/op_bridge.h" #include "paddle/fluid/platform/ngraph_helper.h" namespace paddle { @@ -64,3 +67,6 @@ void BuildMeanGradNode( } // namespace ngraphs } // namespace operators } // namespace paddle + +REGISTER_NG_OP(mean, BuildMeanNode); +REGISTER_NG_OP(mean_grad, BuildMeanGradNode); diff --git a/paddle/fluid/operators/ngraph/ops/momentum_op.h b/paddle/fluid/operators/ngraph/ops/momentum_op.h new file mode 100644 index 0000000000000000000000000000000000000000..84bddacba89d2921bca4915af7f64dcfbfdd42db --- /dev/null +++ b/paddle/fluid/operators/ngraph/ops/momentum_op.h @@ -0,0 +1,106 @@ +/*Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include +#include "ngraph/ngraph.hpp" +#include "paddle/fluid/operators/ngraph/ops/op_bridge.h" +#include "paddle/fluid/platform/ngraph_helper.h" + +namespace paddle { +namespace operators { +namespace ngraphs { + +void BuildMomentumNode( + const std::shared_ptr& op, + std::shared_ptr< + std::unordered_map>> + ngb_node_map) { + auto op_attrs = paddle::framework::AttrReader(op->Attrs()); + auto param = paddle::platform::GetInputNode(op, "Param", ngb_node_map); + auto grad = paddle::platform::GetInputNode(op, "Grad", ngb_node_map); + auto velocity = paddle::platform::GetInputNode(op, "Velocity", ngb_node_map); + auto learning_rate = + paddle::platform::GetInputNode(op, "LearningRate", ngb_node_map); + + auto mu = op_attrs.Get("mu"); + bool use_nesterov = op_attrs.Get("use_nesterov"); + + auto param_shape = param->get_shape(); + auto velocity_shape = velocity->get_shape(); + auto grad_shape = grad->get_shape(); + auto lr_shape = learning_rate->get_shape(); + + auto shape_velocity = ngraph::Shape{velocity_shape}; + auto mu_create = + ngraph::op::Constant::create(ngraph::element::f32, shape_velocity, {mu}); + + auto vel_mul = std::make_shared(velocity, mu_create); + auto vel_out = std::make_shared(vel_mul, grad); + + ngraph::NodeVector result; + if (use_nesterov) { + auto mul_res = std::make_shared(vel_out, mu_create); + auto add_res = std::make_shared(grad, mul_res); + + auto add_2d = paddle::platform::FlattenTo2d(add_res->get_shape(), 0); + auto vel_reshape = paddle::platform::NgReshaper(vel_out, add_2d); + + auto lr_bcast = std::make_shared( + learning_rate, vel_reshape->get_shape(), + ngraph::AxisSet{vel_reshape->get_shape().size() - 1}); + + auto lr_1d = paddle::platform::FlattenTo1d(lr_bcast->get_shape(), 0); + auto lr_reshape = std::make_shared( + lr_bcast, ngraph::AxisVector{0, 1}, lr_1d); + + lr_reshape = std::make_shared( + lr_reshape, ngraph::AxisVector{0}, param->get_shape()); + + auto mul_res1 = std::make_shared(add_res, lr_reshape); + auto res = std::make_shared(param, mul_res1); + paddle::platform::SetOutputNode(op, "ParamOut", res, ngb_node_map); + } else { + auto vel_2d = paddle::platform::FlattenTo2d(vel_out->get_shape(), 0); + auto vel_reshape = paddle::platform::NgReshaper(vel_out, vel_2d); + + auto lr_bcast = std::make_shared( + learning_rate, vel_reshape->get_shape(), + ngraph::AxisSet{vel_reshape->get_shape().size() - 1}); + + auto lr_1d = paddle::platform::FlattenTo1d(lr_bcast->get_shape(), 0); + auto lr_reshape = std::make_shared( + lr_bcast, ngraph::AxisVector{0, 1}, lr_1d); + + lr_reshape = std::make_shared( + lr_reshape, ngraph::AxisVector{0}, param->get_shape()); + + auto mul_result = + std::make_shared(lr_reshape, vel_out); + + auto res = std::make_shared(param, mul_result); + paddle::platform::SetOutputNode(op, "ParamOut", res, ngb_node_map); + } + paddle::platform::SetOutputNode(op, "VelocityOut", vel_out, ngb_node_map); +} + +} // namespace ngraphs +} // namespace operators +} // namespace paddle + +REGISTER_NG_OP(momentum, BuildMomentumNode); diff --git a/paddle/fluid/operators/ngraph/ops/mul_op.h b/paddle/fluid/operators/ngraph/ops/mul_op.h index 4a6cbebe245f891c6c33b2116330a41d89d50e25..d13665864b8950436298b7cf685c803593007803 100644 --- a/paddle/fluid/operators/ngraph/ops/mul_op.h +++ b/paddle/fluid/operators/ngraph/ops/mul_op.h @@ -14,8 +14,11 @@ limitations under the License. */ #pragma once +#include #include +#include #include "ngraph/ngraph.hpp" +#include "paddle/fluid/operators/ngraph/ops/op_bridge.h" #include "paddle/fluid/platform/ngraph_helper.h" namespace paddle { @@ -130,3 +133,6 @@ static void BuildMulGradNode( } // namespace ngraphs } // namespace operators } // namespace paddle + +REGISTER_NG_OP(mul, BuildMulNode); +REGISTER_NG_OP(mul_grad, BuildMulGradNode); diff --git a/paddle/fluid/operators/ngraph/ops/op_bridge.h b/paddle/fluid/operators/ngraph/ops/op_bridge.h new file mode 100644 index 0000000000000000000000000000000000000000..93df0ad8062745380d9cd4ca5027bef1425083bf --- /dev/null +++ b/paddle/fluid/operators/ngraph/ops/op_bridge.h @@ -0,0 +1,84 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once +#include +#include +#include +#include + +#include "ngraph/node.hpp" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/operators/ngraph/ngraph_bridge.h" +#include "paddle/fluid/platform/enforce.h" + +namespace paddle { +namespace operators { +namespace ops { + +class NgraphSingleton { + NgraphSingleton() = default; + NgraphSingleton(NgraphSingleton const&) = delete; + void operator=(NgraphSingleton const) = delete; + + ~NgraphSingleton() = default; + + static std::map< + std::string, + std::function&, + std::shared_ptr>>)>> + ng_node_maps_; + + public: + template + static void Register(TF&& tf, const std::string& name) { + ng_node_maps_[name] = tf; + } + + static bool Lookup(const std::string& name) { + auto it = ng_node_maps_.find(name); + if (it == ng_node_maps_.end()) { + return true; + } + return false; + } + + static void BuildNode( + const std::shared_ptr>>& ng_maps, + const std::shared_ptr& op, + const std::string& name) { + ng_node_maps_[name](op, ng_maps); + } +}; + +std::map&, + std::shared_ptr>>)>> + NgraphSingleton::ng_node_maps_; + +} // namespace ops +} // namespace operators +} // namespace paddle + +#define REGISTER_NG_OP(op_type__, Converter__) \ + struct ng_##op_type__##_converter { \ + ng_##op_type__##_converter() { \ + paddle::operators::ops::NgraphSingleton::Register( \ + paddle::operators::ngraphs::Converter__, #op_type__); \ + } \ + }; \ + ng_##op_type__##_converter ng_##op_type__##_converter__; diff --git a/paddle/fluid/operators/ngraph/ops/pool2d_op.h b/paddle/fluid/operators/ngraph/ops/pool2d_op.h index 836c9d6c185b305d3dd4c9e9d30e23abb0c1431c..c7b9c9316171a448d16ed68339f5754d25f3cabd 100644 --- a/paddle/fluid/operators/ngraph/ops/pool2d_op.h +++ b/paddle/fluid/operators/ngraph/ops/pool2d_op.h @@ -14,10 +14,13 @@ limitations under the License. */ #pragma once +#include #include +#include #include #include "ngraph/ngraph.hpp" +#include "paddle/fluid/operators/ngraph/ops/op_bridge.h" #include "paddle/fluid/platform/ngraph_helper.h" namespace paddle { @@ -172,3 +175,6 @@ void BuildPool2dGradNode( } // namespace ngraphs } // namespace operators } // namespace paddle + +REGISTER_NG_OP(pool2d, BuildPool2dNode); +REGISTER_NG_OP(pool2d_grad, BuildPool2dGradNode); diff --git a/paddle/fluid/operators/ngraph/ops/scale_op.h b/paddle/fluid/operators/ngraph/ops/scale_op.h index 91a57d0be606373e985a30b7ac9c73648062d8e4..1461b85b16ece79548f3ca95be811fb31136c610 100644 --- a/paddle/fluid/operators/ngraph/ops/scale_op.h +++ b/paddle/fluid/operators/ngraph/ops/scale_op.h @@ -14,9 +14,12 @@ limitations under the License. */ #pragma once +#include #include +#include #include "ngraph/ngraph.hpp" #include "paddle/fluid/operators/ngraph/ops/elementwise_scalar_op.h" +#include "paddle/fluid/operators/ngraph/ops/op_bridge.h" #include "paddle/fluid/platform/ngraph_helper.h" namespace paddle { @@ -37,3 +40,5 @@ void BuildScaleNode( } // namespace ngraphs } // namespace operators } // namespace paddle + +REGISTER_NG_OP(scale, BuildScaleNode); diff --git a/paddle/fluid/operators/ngraph/ops/softmax_op.h b/paddle/fluid/operators/ngraph/ops/softmax_op.h index fc6395c08bc6b00990679c5327c3152a980be821..7d5720c460c4194ce06670a715b8d7ff4435bb2a 100644 --- a/paddle/fluid/operators/ngraph/ops/softmax_op.h +++ b/paddle/fluid/operators/ngraph/ops/softmax_op.h @@ -14,10 +14,13 @@ limitations under the License. */ #pragma once +#include #include +#include #include #include "ngraph/ngraph.hpp" #include "paddle/fluid/operators/ngraph/ops/elementwise_scalar_op.h" +#include "paddle/fluid/operators/ngraph/ops/op_bridge.h" #include "paddle/fluid/platform/ngraph_helper.h" namespace paddle { @@ -72,3 +75,6 @@ void BuildSoftmaxGradNode( } // namespace ngraphs } // namespace operators } // namespace paddle + +REGISTER_NG_OP(softmax, BuildSoftmaxNode); +REGISTER_NG_OP(softmax_grad, BuildSoftmaxGradNode); diff --git a/paddle/fluid/operators/ngraph/ops/sum_op.h b/paddle/fluid/operators/ngraph/ops/sum_op.h new file mode 100644 index 0000000000000000000000000000000000000000..ab8cdb8f4d847c0acb60b39d07dc83f085b60bbd --- /dev/null +++ b/paddle/fluid/operators/ngraph/ops/sum_op.h @@ -0,0 +1,58 @@ +/*Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include + +#include "ngraph/ngraph.hpp" +#include "paddle/fluid/operators/ngraph/ops/op_bridge.h" +#include "paddle/fluid/platform/ngraph_helper.h" + +namespace paddle { +namespace operators { +namespace ngraphs { + +void BuildSumNode( + const std::shared_ptr& op, + std::shared_ptr< + std::unordered_map>> + ngb_node_map) { + std::vector op_inputs; + for (auto& var_name_item : op->Inputs()) { + for (auto& var_name : var_name_item.second) { + op_inputs.push_back(var_name); + if (ngb_node_map->find(var_name) == ngb_node_map->end()) { + PADDLE_THROW("op % input varname %s is not found in var_node_map", + op->Type(), var_name); + } + } + } + std::shared_ptr& sum = ngb_node_map->at(op_inputs[0]); + for (size_t k = 1; k < op_inputs.size(); ++k) { + std::shared_ptr& nodek = ngb_node_map->at(op_inputs[k]); + if (nodek->get_element_type() != sum->get_element_type()) { + nodek = + std::make_shared(nodek, sum->get_element_type()); + } + sum = sum + nodek; + } + platform::SetOutputNode(op, "Out", sum, ngb_node_map); +} +} // namespace ngraphs +} // namespace operators +} // namespace paddle + +REGISTER_NG_OP(sum, BuildSumNode); diff --git a/paddle/fluid/operators/ngraph/ops/top_k_op.h b/paddle/fluid/operators/ngraph/ops/top_k_op.h index 852ecd7139a3c7046e78265ca021b2ce286c63c0..cdc26f6afd58700c3a1f57fa955d60bc8925d2d1 100644 --- a/paddle/fluid/operators/ngraph/ops/top_k_op.h +++ b/paddle/fluid/operators/ngraph/ops/top_k_op.h @@ -14,8 +14,11 @@ limitations under the License. */ #pragma once +#include #include +#include #include "ngraph/ngraph.hpp" +#include "paddle/fluid/operators/ngraph/ops/op_bridge.h" #include "paddle/fluid/platform/ngraph_helper.h" namespace paddle { @@ -42,3 +45,5 @@ void BuildTopKNode( } // namespace ngraphs } // namespace operators } // namespace paddle + +REGISTER_NG_OP(top_k, BuildTopKNode); diff --git a/paddle/fluid/operators/optimizers/sgd_op.h b/paddle/fluid/operators/optimizers/sgd_op.h index 98bae5e1d329005f9463fd7bb0751c44952dea88..c9c9f530fe846c1713ad176e05a377996d04470b 100644 --- a/paddle/fluid/operators/optimizers/sgd_op.h +++ b/paddle/fluid/operators/optimizers/sgd_op.h @@ -16,6 +16,7 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/selected_rows.h" +#include "paddle/fluid/operators/jit/kernels.h" namespace paddle { namespace operators { @@ -32,53 +33,57 @@ class SGDOpKernel : public framework::OpKernel { if (param_var->IsType()) { const auto *param = ctx.Input("Param"); auto *param_out = ctx.Output("ParamOut"); - // Actually, all tensors are LoDTensor except SelectedRows. if (grad_var->IsType()) { - param_out->mutable_data(ctx.GetPlace()); const auto *grad = ctx.Input("Grad"); - - auto p = framework::EigenVector::Flatten(*param); - auto g = framework::EigenVector::Flatten(*grad); - auto o = framework::EigenVector::Flatten(*param_out); - auto *lr = learning_rate->data(); - - o = p - lr[0] * g; + auto sz = param_out->numel(); + PADDLE_ENFORCE_EQ(param->numel(), sz); + PADDLE_ENFORCE_EQ(grad->numel(), sz); + + jit::sgd_attr_t attr(1, sz, 1, sz, 1); + const T *lr = learning_rate->data(); + const T *param_data = param->data(); + const T *grad_data = grad->data(); + int64_t rows_idx = 0; + T *out_data = param_out->mutable_data(ctx.GetPlace()); + + auto sgd = + jit::Get, platform::CPUPlace>(attr); + sgd(lr, param_data, grad_data, &rows_idx, out_data, &attr); } else if (grad_var->IsType()) { // TODO(qijun): In Sparse SGD operator, in-place update is enforced. // This manual optimization brings difficulty to track data dependency. // It's better to find a more elegant solution. PADDLE_ENFORCE_EQ(param, param_out); const auto *grad = ctx.Input("Grad"); + auto &grad_rows = grad->rows(); // for distributed training, a sparse var may be empty, // just skip updating. - if (grad->rows().size() == 0) { + if (grad_rows.size() == 0) { return; } - auto grad_height = grad->height(); auto out_dims = param_out->dims(); - PADDLE_ENFORCE_EQ(grad_height, out_dims[0]); - + PADDLE_ENFORCE_EQ(grad->height(), out_dims[0]); auto &grad_value = grad->value(); - auto &grad_rows = grad->rows(); - - size_t grad_row_numel = grad_value.numel() / grad_rows.size(); - PADDLE_ENFORCE_EQ(static_cast(grad_row_numel), - param_out->numel() / grad_height); - - auto *grad_data = grad_value.data(); - auto *out_data = param_out->data(); - auto *lr = learning_rate->data(); - for (size_t i = 0; i < grad_rows.size(); i++) { - PADDLE_ENFORCE(grad_rows[i] < grad_height, - "Input rows index should less than height"); - for (size_t j = 0; j < grad_row_numel; j++) { - out_data[grad_rows[i] * grad_row_numel + j] -= - lr[0] * grad_data[i * grad_row_numel + j]; - } - } + const T *param_data = param->data(); + const T *grad_data = grad_value.data(); + const T *lr = learning_rate->data(); + const int64_t *rows_data = grad_rows.data(); + T *out_data = param_out->mutable_data(ctx.GetPlace()); + + jit::sgd_attr_t attr; + attr.param_height = out_dims[0]; + attr.param_width = param_out->numel() / attr.param_height; + attr.grad_height = grad_rows.size(); // note: it is not grad->height() + attr.grad_width = grad_value.numel() / attr.grad_height; + attr.selected_rows_size = grad_rows.size(); + PADDLE_ENFORCE_EQ(attr.grad_width, attr.param_width); + + auto sgd = + jit::Get, platform::CPUPlace>(attr); + sgd(lr, param_data, grad_data, rows_data, out_data, &attr); } else { PADDLE_THROW("Unsupported Variable Type of Grad"); } diff --git a/paddle/fluid/operators/pool_op.cc b/paddle/fluid/operators/pool_op.cc index fc3636e0b24765f681d3260b07fe854309774a40..0a0ece162cc63696974383d8ed49fdd10204c331 100644 --- a/paddle/fluid/operators/pool_op.cc +++ b/paddle/fluid/operators/pool_op.cc @@ -168,9 +168,10 @@ void Pool2dOpMaker::Make() { "be ignored."); // TODO(Chengduo): Add checker. // (Currently, // TypedAttrChecker don't support vector type.) - AddAttr("global_pooling", - "(bool, default false) Whether to use the global pooling. " - "If global_pooling = true, ksize and paddings will be ignored.") + AddAttr( + "global_pooling", + "(bool, default false) Whether to use the global pooling. " + "If global_pooling = true, kernel size and paddings will be ignored.") .SetDefault(false); AddAttr>("strides", "(vector, default {1, 1}), strides(height, " @@ -182,7 +183,7 @@ void Pool2dOpMaker::Make() { "paddings", "(vector, default {0,0}), paddings(height, width) of pooling " "operator." - "If global_pooling = true, paddings and ksize will be ignored.") + "If global_pooling = true, paddings and kernel size will be ignored.") .SetDefault({0, 0}); AddAttr( "exclusive", @@ -204,7 +205,7 @@ void Pool2dOpMaker::Make() { .SetDefault(false); AddAttr( "ceil_mode", - "(bool, default false) Wether to use the ceil function to calculate " + "(bool, default false) Whether to use the ceil function to calculate " "output height and width. False is the default. If it is set to False, " "the floor function will be used.") .SetDefault(false); @@ -262,28 +263,37 @@ Example: For exclusive = false: $$ hstart = i * strides[0] - paddings[0] + $$ + $$ hend = hstart + ksize[0] + $$ + $$ wstart = j * strides[1] - paddings[1] + $$ + $$ wend = wstart + ksize[1] + $$ + $$ Output(i ,j) = \\frac{sum(Input[hstart:hend, wstart:wend])}{ksize[0] * ksize[1]} $$ + For exclusive = true: $$ hstart = max(0, i * strides[0] - paddings[0]) + $$ + $$ hend = min(H, hstart + ksize[0]) + $$ + $$ wstart = max(0, j * strides[1] - paddings[1]) + $$ + $$ wend = min(W, wstart + ksize[1]) + $$ + $$ Output(i ,j) = \\frac{sum(Input[hstart:hend, wstart:wend])}{(hend - hstart) * (wend - wstart)} $$ - For adaptive = true: - $$ - hstart = floor(i * H_{in} / H_{out}) - hend = ceil((i + 1) * H_{in} / H_{out}) - wstart = floor(j * W_{in} / W_{out}) - wend = ceil((j + 1) * W_{in} / W_{out}) - Output(i ,j) = \\frac{sum(Input[hstart:hend, wstart:wend])}{(hend - hstart) * (wend - wstart)} - $$ )DOC"); } @@ -324,7 +334,7 @@ void Pool3dOpMaker::Make() { AddAttr( "global_pooling", "(bool, default false) Whether to use the global pooling. " - "If global_pooling = true, ksize and paddings wille be ignored.") + "If global_pooling = true, kernel size and paddings will be ignored.") .SetDefault(false); AddAttr>( "strides", @@ -359,7 +369,7 @@ void Pool3dOpMaker::Make() { .SetDefault(false); AddAttr( "ceil_mode", - "(bool, default false) Wether to use the ceil function to calculate " + "(bool, default false) Whether to use the ceil function to calculate " "output height and width. False is the default. If it is set to False, " "the floor function will be used.") .SetDefault(false); @@ -392,48 +402,68 @@ Example: Output: Out shape: $(N, C, D_{out}, H_{out}, W_{out})$ For ceil_mode = false: - $$ - D_{out} = \frac{(D_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1 \\ - H_{out} = \frac{(H_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1 \\ - W_{out} = \frac{(W_{in} - ksize[2] + 2 * paddings[2])}{strides[2]} + 1 - $$ + $$ + D_{out} = \\frac{(D_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1 + $$ + $$ + H_{out} = \\frac{(H_{in} - ksize[1] + 2 * paddings[1])}{strides[2]} + 1 + $$ + $$ + W_{out} = \\frac{(W_{in} - ksize[2] + 2 * paddings[2])}{strides[2]} + 1 + $$ For ceil_mode = true: - $$ - D_{out} = \frac{(D_{in} - ksize[0] + 2 * paddings[0] + strides[0] -1)}{strides[0]} + 1 \\ - H_{out} = \frac{(H_{in} - ksize[1] + 2 * paddings[1] + strides[1] -1)}{strides[1]} + 1 \\ - W_{out} = \frac{(W_{in} - ksize[2] + 2 * paddings[2] + strides[2] -1)}{strides[2]} + 1 - $$ + $$ + D_{out} = \\frac{(D_{in} - ksize[0] + 2 * paddings[0] + strides[0] -1)}{strides[0]} + 1 + $$ + $$ + H_{out} = \\frac{(H_{in} - ksize[1] + 2 * paddings[1] + strides[1] -1)}{strides[1]} + 1 + $$ + $$ + W_{out} = \\frac{(W_{in} - ksize[2] + 2 * paddings[2] + strides[2] -1)}{strides[2]} + 1 + $$ + For exclusive = false: - $$ - dstart = i * strides[0] - paddings[0] - dend = dstart + ksize[0] - hstart = j * strides[1] - paddings[1] - hend = hstart + ksize[1] - wstart = k * strides[2] - paddings[2] - wend = wstart + ksize[2] - Output(i ,j, k) = \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{ksize[0] * ksize[1] * ksize[2]} - $$ + $$ + dstart = i * strides[0] - paddings[0] + $$ + $$ + dend = dstart + ksize[0] + $$ + $$ + hstart = j * strides[1] - paddings[1] + $$ + $$ + hend = hstart + ksize[1] + $$ + $$ + wstart = k * strides[2] - paddings[2] + $$ + $$ + wend = wstart + ksize[2] + $$ + $$ + Output(i ,j, k) = \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{ksize[0] * ksize[1] * ksize[2]} + $$ + For exclusive = true: - $$ - dstart = max(0, i * strides[0] - paddings[0]) - dend = min(D, dstart + ksize[0]) - hstart = max(0, j * strides[1] - paddings[1]) - hend = min(H, hstart + ksize[1]) - wstart = max(0, k * strides[2] - paddings[2]) - wend = min(W, wstart + ksize[2]) - Output(i ,j, k) = \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{(dend - dstart) * (hend - hstart) * (wend - wstart)} - $$ - - For adaptive = true: - $$ - dstart = floor(i * D_{in} / D_{out}) - dend = ceil((i + 1) * D_{in} / D_{out}) - hstart = floor(j * H_{in} / H_{out}) - hend = ceil((j + 1) * H_{in} / H_{out}) - wstart = floor(k * W_{in} / W_{out}) - wend = ceil((k + 1) * W_{in} / W_{out}) - Output(i ,j, k) = \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{(dend - dstart) * (hend - hstart) * (wend - wstart)} - $$ + $$ + dstart = max(0, i * strides[0] - paddings[0]) + $$ + $$ + dend = min(D, dstart + ksize[0]) + $$ + $$ + hend = min(H, hstart + ksize[1]) + $$ + $$ + wstart = max(0, k * strides[2] - paddings[2]) + $$ + $$ + wend = min(W, wstart + ksize[2]) + $$ + $$ + Output(i ,j, k) = \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{(dend - dstart) * (hend - hstart) * (wend - wstart)} + $$ )DOC"); } diff --git a/paddle/fluid/operators/random_crop_op.h b/paddle/fluid/operators/random_crop_op.h index d68ba9d661698bb0d33b139f5748daec2ead6595..ee034b270527376fc268b8a868f90db52c51848a 100644 --- a/paddle/fluid/operators/random_crop_op.h +++ b/paddle/fluid/operators/random_crop_op.h @@ -121,7 +121,7 @@ struct RandomCropFunctor { HOSTDEVICE void operator()(size_t ins_idx) { typename Random::Engine engine(seed_); engine.discard(ins_idx * (rank_ - num_batchsize_dims_)); - size_t offsets[9]; + size_t offsets[9] = {}; for (int i = num_batchsize_dims_; i < rank_; ++i) { typename Random::template UniformIntDist dist( 0, x_dims_[i] - out_dims_[i]); diff --git a/paddle/fluid/operators/reader/buffered_reader.cc b/paddle/fluid/operators/reader/buffered_reader.cc index defc29b91f81cb851fec24c5cd9d62dc72c54147..52e96c4fb3a058057f5acd5e30b7a0e869aefacc 100644 --- a/paddle/fluid/operators/reader/buffered_reader.cc +++ b/paddle/fluid/operators/reader/buffered_reader.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/operators/reader/buffered_reader.h" +#include #include #include "paddle/fluid/framework/data_type.h" diff --git a/paddle/fluid/operators/reader/read_op.cc b/paddle/fluid/operators/reader/read_op.cc index 8fe638ac2fdc6e0baed7d6cd3c57b72f23164129..846b2ed77e46d82fbeda8faaeed99cddf23c8824 100644 --- a/paddle/fluid/operators/reader/read_op.cc +++ b/paddle/fluid/operators/reader/read_op.cc @@ -85,9 +85,7 @@ class ReadOp : public framework::OperatorBase { std::vector ins; // For profiling - platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); - auto& ctx = *pool.Get(dev_place); - platform::RecordEvent record_event(Type(), &ctx); + platform::RecordEvent record_event(Type()); reader->ReadNext(&ins); if (ins.empty()) { diff --git a/paddle/fluid/operators/recurrent_op.cc b/paddle/fluid/operators/recurrent_op.cc index a1e02a3fd0e7902e89890f8d3b13159172571f5c..88c968a0eaae8a2ac6f14ede9348c837bcd92d76 100644 --- a/paddle/fluid/operators/recurrent_op.cc +++ b/paddle/fluid/operators/recurrent_op.cc @@ -157,11 +157,13 @@ class RecurrentBase : public framework::OperatorBase { const std::vector &src_vars, framework::Scope *dst_scope, const std::vector &dst_vars, - Callback callback) { + Callback callback, + bool is_backward = false) { PADDLE_ENFORCE_EQ(src_vars.size(), dst_vars.size()); for (size_t i = 0; i < dst_vars.size(); ++i) { VLOG(10) << "Link " << src_vars[i] << " to " << dst_vars[i]; - AccessTensor(src_scope, src_vars[i], dst_scope, dst_vars[i], callback); + AccessTensor(src_scope, src_vars[i], dst_scope, dst_vars[i], callback, + is_backward); } } @@ -173,11 +175,13 @@ class RecurrentBase : public framework::OperatorBase { const std::vector &src_vars, const framework::Scope &dst_scope, const std::vector &dst_vars, - Callback callback) { + Callback callback, + bool is_backward = false) { PADDLE_ENFORCE_EQ(src_vars.size(), dst_vars.size()); for (size_t i = 0; i < dst_vars.size(); ++i) { VLOG(10) << "Link " << src_vars[i] << " to " << dst_vars[i]; - AccessTensor(src_scope, src_vars[i], dst_scope, dst_vars[i], callback); + AccessTensor(src_scope, src_vars[i], dst_scope, dst_vars[i], callback, + is_backward); } } @@ -194,9 +198,13 @@ class RecurrentBase : public framework::OperatorBase { static void AccessTensor(const framework::Scope &src_scope, const std::string &src_var_name, framework::Scope *dst_scope, - const std::string &dst_var_name, Callback callback) { + const std::string &dst_var_name, Callback callback, + bool is_backward = false) { auto *src_var = src_scope.FindVar(src_var_name); - PADDLE_ENFORCE(src_var != nullptr); + if (is_backward && src_var == nullptr) { + return; + } + PADDLE_ENFORCE(src_var != nullptr, "%s is not found.", src_var_name); auto &src_tensor = src_var->Get(); auto *dst_var = dst_scope->Var(dst_var_name); @@ -208,12 +216,16 @@ class RecurrentBase : public framework::OperatorBase { static void AccessTensor(const framework::Scope &src_scope, const std::string &src_var_name, const framework::Scope &dst_scope, - const std::string &dst_var_name, Callback callback) { + const std::string &dst_var_name, Callback callback, + bool is_backward = false) { + auto *dst_var = dst_scope.FindVar(dst_var_name); + if (is_backward && dst_var == nullptr) { + return; + } auto *src_var = src_scope.FindVar(src_var_name); - PADDLE_ENFORCE(src_var != nullptr); + PADDLE_ENFORCE(src_var != nullptr, "%s is not found.", src_var_name); auto &src_tensor = src_var->Get(); - auto *dst_var = dst_scope.FindVar(dst_var_name); - PADDLE_ENFORCE(dst_var != nullptr); + PADDLE_ENFORCE(dst_var != nullptr, "%s is not found.", dst_var_name); auto *dst_tensor = dst_var->GetMutable(); callback(src_tensor, dst_tensor); } @@ -345,7 +357,8 @@ class RecurrentGradOp : public RecurrentBase { auto dims = framework::vectorize(inside->dims()); dims.erase(dims.begin()); inside->Resize(framework::make_ddim(dims)); - }); + }, + true /*is_backward*/); auto og_set = List2Set(Inputs(kOutputGrads)); if (VLOG_IS_ON(10)) { @@ -454,7 +467,8 @@ class RecurrentGradOp : public RecurrentBase { auto dst = outside->Slice(seq_offset, seq_offset + 1); framework::TensorCopy(inside, place, dev_ctx, &dst); - }); + }, + true /*is_backward*/); VLOG(5) << "Link outside gradient finished "; if (step_id + 1 == seq_len) { // at_end @@ -467,7 +481,8 @@ class RecurrentGradOp : public RecurrentBase { outside->Resize(inside.dims()); outside->mutable_data(place, inside.type()); framework::TensorCopy(inside, place, dev_ctx, outside); - }); + }, + true /*is_backward*/); VLOG(5) << "Link initialize state gradient finished "; } scopes.Next(); @@ -608,10 +623,8 @@ class RecurrentGradOpShapeInference : public framework::InferShapeBase { std::vector input{kInputs, kInitialStates}; std::vector output{kOutputs}; for (auto &s : input) { + // NOTE(zcd): In some case, some of kInputs doesn't have gradient. PADDLE_ENFORCE(ctx->HasInputs(s)); - PADDLE_ENFORCE(ctx->HasOutputs(framework::GradVarName(s)), - "Cannot find the gradient variable %s", - framework::GradVarName(s)); } for (auto &s : output) { PADDLE_ENFORCE(ctx->HasInputs(s)); diff --git a/paddle/fluid/operators/requantize_op.cc b/paddle/fluid/operators/requantize_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..08ba1470aaddf146fe3685ff6c3cd9f3d7e16d75 --- /dev/null +++ b/paddle/fluid/operators/requantize_op.cc @@ -0,0 +1,46 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + +#include "paddle/fluid/operators/requantize_op.h" +#ifdef PADDLE_WITH_MKLDNN +#include "paddle/fluid/platform/mkldnn_helper.h" +#endif + +namespace paddle { +namespace operators { + +framework::OpKernelType ReQuantOp::GetExpectedKernelType( + const framework::ExecutionContext& ctx) const { + framework::LibraryType library_ = framework::LibraryType::kMKLDNN; + framework::DataLayout layout_ = framework::DataLayout::kMKLDNN; + + return framework::OpKernelType(ctx.Input("Input")->type(), + ctx.GetPlace(), layout_, library_); +} + +void ReQuantOpMaker::Make() { + AddInput("Input", "input data"); + AddOutput("Output", "output data"); + AddAttr("Scale_in", "scale in data").SetDefault({1.0f}); + AddAttr("Scale_out", "scale out data").SetDefault({1.0f}); + AddComment( + R"DOC(This op will re-quantize data from INT8 with scale_in to INT8 with scale_out)DOC"); +} + +} // namespace operators +} // namespace paddle +namespace ops = paddle::operators; + +REGISTER_OPERATOR(requantize, ops::ReQuantOp, ops::ReQuantOpMaker, + paddle::framework::DefaultGradOpDescMaker); diff --git a/paddle/fluid/operators/requantize_op.h b/paddle/fluid/operators/requantize_op.h new file mode 100644 index 0000000000000000000000000000000000000000..c2b154db11dc713fdce1b9ef2f2616428bc09202 --- /dev/null +++ b/paddle/fluid/operators/requantize_op.h @@ -0,0 +1,47 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using framework::OpKernelType; +using framework::Tensor; + +class ReQuantOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + ctx->SetOutputDim("Output", ctx->GetInputDim("Input")); + ctx->ShareLoD("Input", /*->*/ "Output"); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override; +}; + +class ReQuantOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override; +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/reshape_op.cc b/paddle/fluid/operators/reshape_op.cc index eda54f76b898cdf893347d31cadb86dea892a4ce..37f69426b62fedf8cbeca68105fb86fb4ea72eab 100644 --- a/paddle/fluid/operators/reshape_op.cc +++ b/paddle/fluid/operators/reshape_op.cc @@ -56,6 +56,9 @@ class ReshapeOp : public framework::OperatorWithKernel { static framework::DDim ValidateShape(const std::vector shape, const framework::DDim &in_dims) { const int64_t in_size = framework::product(in_dims); + auto in_dims_vec = framework::vectorize(in_dims); + bool all_positive = std::all_of(in_dims_vec.cbegin(), in_dims_vec.cend(), + [](int64_t i) { return i > 0; }); // only one dimension can be set to -1, whose size will be automatically // infered. const int64_t unk_dim_val = -1; @@ -88,7 +91,7 @@ class ReshapeOp : public framework::OperatorWithKernel { } if (unk_dim_idx != -1) { - if (in_size > 0) { + if (all_positive) { // in_size < 0 and is un-determinate in compile time, skip the check, // for example, in_dims = [-1, 8, 1, 1], shape = [-1, 3, 8], // capacity = -24, in_size = -8, output_shape[0] = 0 diff --git a/paddle/fluid/operators/row_conv_op.cc b/paddle/fluid/operators/row_conv_op.cc index 10b1b0c899d833d70fa6afe51998fe210899e3c3..d283bddbe9f974ac6835ee91d5a7851453687b80 100644 --- a/paddle/fluid/operators/row_conv_op.cc +++ b/paddle/fluid/operators/row_conv_op.cc @@ -109,23 +109,23 @@ from future subsequences in a computationally efficient manner to improve unidirectional recurrent neural networks. The row convolution operator is different from the 1D sequence convolution, and is computed as follows: -Given an input sequence $in$ of length $t$ and input dimension $d$, -and a filter ($W$) of size $context \times d$, +Given an input sequence $X$ of length $t$ and input dimension $D$, +and a filter ($W$) of size $context \times D$, the output sequence is convolved as: $$ -out_{i, :} = \\sum_{j=i}^{i + context} in_{j,:} \\cdot W_{i-j, :} +out_{i} = \\sum_{j=i}^{i + context - 1} X_{j} \\cdot W_{j-i} $$ In the above equation: * $Out_{i}$: The i-th row of output variable with shape [1, D]. -* $\\tau$: Future context size. +* $context$: Future context size. * $X_{j}$: The j-th row of input variable with shape [1, D]. -* $W_{i-j}$: The (i-j)-th row of parameters with shape [1, D]. +* $W_{j-i}$: The (j-i)-th row of parameters with shape [1, D]. More details about row_conv please refer to the design document diff --git a/paddle/fluid/operators/sample_logits_op.cc b/paddle/fluid/operators/sample_logits_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..a7f7fb26b17c77e6fe87646d3cac20c02c49b52c --- /dev/null +++ b/paddle/fluid/operators/sample_logits_op.cc @@ -0,0 +1,225 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/sample_logits_op.h" +#include "paddle/fluid/operators/math/sample_prob.h" + +namespace paddle { +namespace operators { + +class SampleLogitsOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("Logits", + "(Tensor, default: Tensor), The unscaled log probabilities " + "which is a 2-D tensor with shape [N x K]. N is the batch_size, " + "and K is the class number."); + AddInput("Labels", + "(Tensor) The ground truth which is a 2-D tensor. Labels is a " + "Tensor with shape [N x NT], where NT is the number of" + "true labels for each example."); + AddInput("CustomizedSamples", + "(Tensor, default: Tensor), A 2-D tensor with shape [N, " + "NT + S]," + " where N is the batch size, NT is the number of true labels " + "and S is the number of negtive sample for each example." + "The first NT elements of each row should be the same with true " + "labels, " + "followed by S custom negtive samples. This tensor" + "is only used when use_customized_samples is true.") + .AsDispensable(); + AddInput( + "CustomizedProbabilities", + "(Tensor, default: Tensor), A 2-D tensor with shape [N, NT + S]." + "The tensor has the same shape with CustomSamples," + "and each element represents probability of element in CustomSamples. " + "This " + "tensor is only used when use_customized_samples is true.") + .AsDispensable(); + AddOutput("Samples", + "(Tensor, default: Tensor), A 2-D tensor with shape [N, " + "NT + S]." + "The outputs value of sampler, including NT true lables and S " + "negetive samples " + "for each example. This will be used in" + "backward calculation.") + .AsIntermediate(); + AddOutput( + "Probabilities", + "(Tensor, default: Tensor), A 2-D tensor with shape [N, NT + S]." + "The probabilites of sampled positive and negtive labels.") + .AsIntermediate(); + AddOutput("SampledLogits", + "(Tensor, default: Tensor), A 2-D tensor with shape" + "[N, NT + S]. The outputs value of sampled logits, which will be" + "used in backward propagation.") + .AsIntermediate(); + AddOutput( + "SampledLabels", + "(Tensor, default: Tensor), A 2-D tensor. The sampled labels" + "with shape [N, NT]. The tonsor contains hard labels as input to " + " softmax op, that is 0, 1, ..., NT-1 because of the first NT elements" + " of Sampels are positive lables."); + AddAttr( + "use_customized_samples", + "An indicator whether to use customized samples with probabilities, if " + "True" + "the operator will use customized samples and customized probabilities" + "otherwise, the operator will generate them by itself.") + .SetDefault(false); + AddAttr( + "uniq", + "An indicator whether to sample non-repetitive negtive labels, if True" + "the operator will sample negtive labels without replacement." + "Otherwise, the operator will sample negtive labels with replacement.") + .SetDefault(true); + AddAttr( + "remove_accidental_hits", + "An indicator whether to remove accidental hits when samples hits true" + "labels, the removal is implemented by subtracting the corresponding" + "logits by float_max to subpress their softmax to be zero.") + .SetDefault(true); + AddAttr("num_samples", "The number of negative samples."); + AddAttr("seed", "Random seed for generating samples").SetDefault(0); + + AddComment(R"DOC( + """ + Computes sampled output training logits and labels suitable for implementing + sampled softmax. + """ + +)DOC"); + } +}; + +class SampleLogitsOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("Logits"), + "Input(Logits) should be not null."); + PADDLE_ENFORCE(ctx->HasInput("Labels"), + "Input(Labels) should be not null."); + + PADDLE_ENFORCE(ctx->HasOutput("Samples"), + "Output(Samples) should be not null."); + PADDLE_ENFORCE(ctx->HasOutput("Probabilities"), + "Output(Probabilities) should be not null."); + PADDLE_ENFORCE(ctx->HasOutput("SampledLogits"), + "Output(SampledLogits) should be not null."); + PADDLE_ENFORCE(ctx->HasOutput("SampledLabels"), + "Output(SampledLabels) should be not null."); + + auto logits_dims = ctx->GetInputDim("Logits"); + auto labels_dims = ctx->GetInputDim("Labels"); + + PADDLE_ENFORCE_EQ( + logits_dims.size(), 2UL, + "The logits of softmax_with_cross_entropy should be a 2-D tensor."); + PADDLE_ENFORCE_EQ(labels_dims.size(), 2UL, + "The labels should be a 2-D tensor."); + + const int num_samples = ctx->Attrs().Get("num_samples"); + const int num_sampled_classes = labels_dims[1] + num_samples; + ctx->SetOutputDim("Samples", {logits_dims[0], num_sampled_classes}); + ctx->SetOutputDim("Probabilities", {logits_dims[0], num_sampled_classes}); + ctx->SetOutputDim("SampledLogits", {logits_dims[0], num_sampled_classes}); + ctx->SetOutputDim("SampledLabels", {logits_dims[0], labels_dims[1]}); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + auto data_type = framework::GetDataTypeOfVar(ctx.InputVar("Logits")); + framework::OpKernelType kt = + framework::OpKernelType(data_type, ctx.device_context()); + return kt; + } +}; + +// UNDERSTAND: InferShape for Grad +class SampleLogitsOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("Logits"), + "Input(Logits) should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Labels"), + "Input(Labels) should be not null."); + PADDLE_ENFORCE(ctx->HasInput("Samples"), + "Input(Samples) should be not null."); + PADDLE_ENFORCE(ctx->HasInput("SampledLogits"), + "Input(SampledLogits) should be not null."); + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("SampledLogits")), + "Input(SampledLogits@Grad) should not be null."); + PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("Logits")), + "Output(Logits@Grad) should be not null."); + + auto logit_dims = ctx->GetInputDim("Logits"); + auto label_dims = ctx->GetInputDim("Labels"); + PADDLE_ENFORCE_EQ(label_dims.size(), 2UL, + "The label should be a 2-D tensor."); + PADDLE_ENFORCE_EQ(logit_dims.size(), 2UL, + "The logits should be a 2-D tensor."); + + ctx->SetOutputDim(framework::GradVarName("Logits"), + ctx->GetInputDim("Logits")); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + auto data_type = framework::GetDataTypeOfVar( + ctx.InputVar(framework::GradVarName("SampledLogits"))); + framework::OpKernelType kt = + framework::OpKernelType(data_type, ctx.device_context()); + return kt; + } +}; + +// UNDERSTAND: what's the rule for making a GradMaker TODO +class SampleLogitsGradMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + auto* grad_op = new framework::OpDesc(); + grad_op->SetType("sample_logits_grad"); + grad_op->SetInput("Logits", Input("Logits")); + grad_op->SetInput("Labels", Input("Labels")); + grad_op->SetInput("Samples", Output("Samples")); + grad_op->SetInput("SampledLogits", Output("SampledLogits")); + grad_op->SetInput(framework::GradVarName("SampledLogits"), + OutputGrad("SampledLogits")); + grad_op->SetOutput(framework::GradVarName("Logits"), InputGrad("Logits")); + grad_op->SetAttrMap(Attrs()); + return std::unique_ptr(grad_op); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OPERATOR(sample_logits, ops::SampleLogitsOp, ops::SampleLogitsOpMaker, + ops::SampleLogitsGradMaker); +REGISTER_OPERATOR(sample_logits_grad, ops::SampleLogitsOpGrad); +REGISTER_OP_CPU_KERNEL(sample_logits, ops::SampleLogitsKernel, + ops::SampleLogitsKernel); +REGISTER_OP_CPU_KERNEL(sample_logits_grad, ops::SampleLogitsGradKernel, + ops::SampleLogitsGradKernel); diff --git a/paddle/fluid/operators/sample_logits_op.cu b/paddle/fluid/operators/sample_logits_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..fb49793b730f72d66dc846f233bd95ebdab37c52 --- /dev/null +++ b/paddle/fluid/operators/sample_logits_op.cu @@ -0,0 +1,257 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/tensor_util.h" +#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/operators/math/sample_prob.h" +#include "paddle/fluid/operators/math/softmax.h" +#include "paddle/fluid/operators/sample_logits_op.h" + +namespace paddle { +namespace operators { + +// UNDERSTAND: something like take_along_axis in numpy. +template +__global__ void GPUTakeAlongD1(size_t size, const int batch_size, + const int array_slice_size, + const int idx_slice_size, const T* p_array, + const int64_t* p_index, T* p_value) { + const auto value_slice_size = idx_slice_size; + int idx = blockDim.x * blockIdx.x + threadIdx.x; + int step_size = blockDim.x * gridDim.x; + + for (; idx < size; idx += step_size) { + int i = idx / idx_slice_size; + auto array_index = p_index[idx]; + p_value[idx] = p_array[i * array_slice_size + array_index]; + } +} + +// UNDERSTAND: something like put_along_axis in numpy but if there is duplicate +// indices, scatter is done in += way. +template +__global__ void GPUPutAlongD1(size_t size, const int batch_size, + const int array_slice_size, + const int idx_slice_size, T* p_array, + const int64_t* p_index, const T* p_value) { + const auto value_slice_size = idx_slice_size; + int idx = blockDim.x * blockIdx.x + threadIdx.x; + int step_size = blockDim.x * gridDim.x; + + // size == batch_size + for (; idx < size; idx += step_size) { + int i = idx; + for (int j = 0; j < idx_slice_size; ++j) { + auto array_index = p_index[i * idx_slice_size + j]; + p_array[i * array_slice_size + array_index] += + p_value[i * idx_slice_size + j]; + } + } +} + +// UNDERSTAND: set label as 0,1,...,num_true-1 +template +__global__ void GPUSetLabel(size_t size, const int num_true, int64_t* p_array) { + int idx = blockDim.x * blockIdx.x + threadIdx.x; + int step_size = blockDim.x * gridDim.x; + + for (; idx < size; idx += step_size) { + p_array[idx] = idx % num_true; + } +} + +// UNDERSTAND: compute accidentdal hits from samples and minus corresponding +// logits by a float max, here 1e20 +template +__global__ void gpu_compute_remove_accidental_hits(const int size, + const int num_true, + const int idx_slice_size, + const int64_t* p_index, + T* p_value) { + const auto value_slice_size = idx_slice_size; + int idx = blockDim.x * blockIdx.x + threadIdx.x; + int step_size = blockDim.x * gridDim.x; + + for (; idx < size; idx += step_size) { + int i = idx / idx_slice_size; + if (idx % idx_slice_size < num_true) continue; + for (int j = 0; j < num_true; ++j) { + const auto true_idx = i * idx_slice_size + j; + if (p_index[true_idx] == p_index[idx]) { + p_value[idx] -= 1e20; + break; + } + } + } +} + +template +class SampleLogitsCUDAKernel : public framework::OpKernel { + public: + using Tensor = framework::Tensor; + void Compute(const framework::ExecutionContext& context) const override { + // get necessary inputs + const Tensor* logits = context.Input("Logits"); + const Tensor* labels = context.Input("Labels"); + VLOG(3) << "Enter SampleLogitsCUDAKernel"; + + // get necessary outputs + Tensor* samples = context.Output("Samples"); + Tensor* probabilities = context.Output("Probabilities"); + Tensor* sampled_logits = context.Output("SampledLogits"); + Tensor* sampled_labels = context.Output("SampledLabels"); + + // shapes + const auto batch_size = logits->dims()[0]; + const auto num_classes = logits->dims()[1]; + const auto labels_dim = labels->dims(); + const auto num_true = labels_dim[1]; + const auto samples_dim = samples->dims(); + + // attrs + const auto num_samples = context.Attr("num_samples"); + const bool use_customized_samples = + context.Attr("use_customized_samples"); + const bool uniq = context.Attr("uniq"); + const bool remove_accidental_hits = + context.Attr("remove_accidental_hits"); + + // device contexts + auto& dev_ctx = context.cuda_device_context(); + + // UNDERSTAND: allocate memories for temporaries + sampled_logits->mutable_data(samples_dim, context.GetPlace()); + math::SetConstant set_zero; + set_zero(dev_ctx, sampled_logits, static_cast(0)); + + auto sampled_labels_data = + sampled_labels->mutable_data(labels_dim, context.GetPlace()); + int threads = 512; + size_t size = batch_size * num_true; + int grid = (size + threads - 1) / threads; + GPUSetLabel< + T><<>>( + size, num_true, sampled_labels_data); + + if (use_customized_samples) { + const Tensor* customized_samples = + context.Input("CustomizedSamples"); + const Tensor* customized_probabilities = + context.Input("CustomizedProbabilities"); + samples->ShareDataWith(*customized_samples); + probabilities->ShareDataWith(*customized_probabilities); + } else { + samples->mutable_data(context.GetPlace()); + probabilities->mutable_data(samples_dim, context.GetPlace()); + // UNDERSTAND: sampling + const auto seed = context.Attr("seed"); + auto sampler_with_prob = math::GPUSampleWithProb(); + sampler_with_prob(context.cuda_device_context(), seed, num_classes, uniq, + num_samples, labels, samples, probabilities); + } + + // UNDERSTAND: gather sampled logits and remove accidental hits if needed + const auto num_take = samples->dims()[1]; + const auto array_dims = logits->dims(); + const auto idx_dims = samples->dims(); + + const T* p_array = logits->data(); + const int64_t* p_index = samples->data(); + T* p_value = sampled_logits->data(); + + // src slice size + const auto array_slice_size = array_dims[1]; + // index slice size + const auto idx_slice_size = idx_dims[1]; + + size = batch_size * num_take; + grid = (size + threads - 1) / threads; + GPUTakeAlongD1< + T><<>>( + size, batch_size, array_slice_size, idx_slice_size, p_array, p_index, + p_value); + + if (remove_accidental_hits) { + const size_t size = batch_size * (num_true + num_samples); + int grid = (size + threads - 1) / threads; + gpu_compute_remove_accidental_hits< + T><<>>( + size, num_true, idx_slice_size, p_index, p_value); + } + + // subtracted sampled logits with logQ(y|x) + auto probs = EigenMatrix::From(*probabilities); + auto smp_logits = EigenMatrix::From(*sampled_logits); + smp_logits.device(*dev_ctx.eigen_device()) = + (smp_logits - probs.log().unaryExpr(TolerableValue())) + .unaryExpr(TolerableValue()); + } +}; + +template +class SampleLogitsGradCUDAKernel : public framework::OpKernel { + public: + using Tensor = framework::Tensor; + void Compute(const framework::ExecutionContext& context) const override { + auto logits_grad = context.Output(framework::GradVarName("Logits")); + const Tensor* samples = context.Input("Samples"); + const Tensor* sampled_logits_grad = + context.Input(framework::GradVarName("SampledLogits")); + logits_grad->mutable_data(context.GetPlace()); + + auto& dev_ctx = context.cuda_device_context(); + math::SetConstant set_zero; + set_zero(dev_ctx, logits_grad, static_cast(0)); + + // UNDERSTAND: scatter it back to logit_grad + const auto batch_size = samples->dims()[0]; + const auto num_put = samples->dims()[1]; + const auto array_dims = logits_grad->dims(); + const auto idx_dims = samples->dims(); + + T* p_array = logits_grad->data(); + const int64_t* p_index = samples->data(); + const T* p_value = sampled_logits_grad->data(); + + // src slice size + const auto array_slice_size = array_dims[1]; + // index slice size + const auto idx_slice_size = idx_dims[1]; + + int threads = 128; + const size_t size = batch_size; + int grid = (size + threads - 1) / threads; + + GPUPutAlongD1< + T><<>>( + size, batch_size, array_slice_size, idx_slice_size, p_array, p_index, + p_value); + } +}; + +} // namespace operators +} // namespace paddle +namespace ops = paddle::operators; + +REGISTER_OP_CUDA_KERNEL(sample_logits, ops::SampleLogitsCUDAKernel, + ops::SampleLogitsCUDAKernel); +REGISTER_OP_CUDA_KERNEL(sample_logits_grad, + ops::SampleLogitsGradCUDAKernel, + ops::SampleLogitsGradCUDAKernel); diff --git a/paddle/fluid/operators/sample_logits_op.h b/paddle/fluid/operators/sample_logits_op.h new file mode 100644 index 0000000000000000000000000000000000000000..b55a24863cc09d5f80e07aedbbb5b3d9ac99e69e --- /dev/null +++ b/paddle/fluid/operators/sample_logits_op.h @@ -0,0 +1,245 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/tensor_util.h" +#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/operators/math/sample_prob.h" +#include "paddle/fluid/operators/math/softmax.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +template +using EigenMatrix = framework::EigenMatrix; + +template +struct TolerableValue { + HOSTDEVICE T operator()(const T& x) const { + PADDLE_ASSERT(std::is_floating_point::value); + const T kApproInf = 1e20; + if (x == INFINITY) return kApproInf; + if (x == -INFINITY) return -kApproInf; + return x; + } +}; + +// UNDERSTAND: something like take_along_axis in numpy. +template +static void CPUTakeAlongD1(const platform::DeviceContext& ctx, + const framework::Tensor& array, + const framework::Tensor& index, + framework::Tensor* value) { + PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace())); + // UNDERSTAND: check shape src(B, C), index(B, K), out should also be (B, K) + PADDLE_ENFORCE(index.dims().size() == 2 && array.dims().size() == 2 && + index.dims()[0] == array.dims()[0] && + index.dims() == value->dims()); + + const auto batch_size = index.dims()[0]; + const auto num_take = index.dims()[1]; + const auto array_dims = array.dims(); + const auto idx_dims = index.dims(); + + // UNDERSTAND: no allocations here + const T* p_array = array.data(); + const int64_t* p_index = index.data(); + T* p_value = value->data(); + + // src slice size + const auto array_slice_size = array_dims[1]; + + // index slice size + const auto idx_slice_size = idx_dims[1]; + const auto value_slice_size = idx_slice_size; + + for (int i = 0; i < batch_size; ++i) { + for (int j = 0; j < num_take; ++j) { + auto array_index = p_index[i * idx_slice_size + j]; + p_value[i * value_slice_size + j] = + p_array[i * array_slice_size + array_index]; + } + } +} + +// UNDERSTAND: something like put_along_axis in numpy but if there is duplicate +// indices, scatter is done in += way. +template +static void CPUPutAlongD1(const platform::DeviceContext& ctx, + framework::Tensor* array, + const framework::Tensor& index, + const framework::Tensor& value) { + PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace())); + // UNDERSTAND: check shape src(B, C), index(B, K), out should also be (B, K) + PADDLE_ENFORCE(index.dims().size() == 2 && array->dims().size() == 2 && + index.dims()[0] == array->dims()[0] && + index.dims() == value.dims()); + const auto batch_size = index.dims()[0]; + const auto num_put = index.dims()[1]; + auto array_dims = array->dims(); + auto idx_dims = index.dims(); + + // UNDERSTAND: no allocations here + T* p_array = array->data(); + const int64_t* p_index = index.data(); + const T* p_value = value.data(); + + // slice sizes + const auto array_slice_size = array_dims[1]; + const auto idx_slice_size = idx_dims[1]; + const auto value_slice_size = idx_slice_size; + + for (int i = 0; i < batch_size; ++i) { + for (int j = 0; j < num_put; ++j) { + auto array_index = p_index[i * idx_slice_size + j]; + p_array[i * array_slice_size + array_index] += + p_value[i * value_slice_size + j]; + } + } +} + +// UNDERSTAND: compute accidentdal hits from samples and minus corresponding +// logits by a float max, here 1e20 +template +static void compute_remove_accidental_hits(const platform::DeviceContext& ctx, + framework::Tensor* sampled_logits, + const framework::Tensor& samples, + const int num_true) { + const auto batch_size = sampled_logits->dims()[0]; + const auto num_sampled_classes = sampled_logits->dims()[1]; + T* sampled_logits_data = sampled_logits->data(); + const auto samples_data = samples.data(); + + std::unordered_set tmp_true_labels; + for (int i = 0; i < batch_size; ++i) { + tmp_true_labels.clear(); + tmp_true_labels.insert(samples_data + i * num_sampled_classes, + samples_data + i * num_sampled_classes + num_true); + for (int j = num_true; j < num_sampled_classes; ++j) { + const auto idx = i * num_sampled_classes + j; + if (tmp_true_labels.find(samples_data[idx]) != tmp_true_labels.end()) + sampled_logits_data[idx] -= 1e20; + } + } +} + +template +class SampleLogitsKernel : public framework::OpKernel { + public: + using Tensor = framework::Tensor; + void Compute(const framework::ExecutionContext& context) const override { + PADDLE_ENFORCE(platform::is_cpu_place(context.GetPlace()), + "This kernel only runs on CPU."); + VLOG(3) << "Enter SampleLogitsKernel"; + // get necessary inputs + const Tensor* logits = context.Input("Logits"); + const Tensor* labels = context.Input("Labels"); + + // get necessary outputs + Tensor* samples = context.Output("Samples"); + Tensor* probabilities = context.Output("Probabilities"); + Tensor* sampled_logits = context.Output("SampledLogits"); + Tensor* sampled_labels = context.Output("SampledLabels"); + + // shapes + const auto batch_size = logits->dims()[0]; + const auto num_classes = logits->dims()[1]; + const auto labels_dim = labels->dims(); + const auto num_true = labels_dim[1]; + const auto samples_dim = samples->dims(); + + // attrs + const auto num_samples = context.Attr("num_samples"); + const bool use_customized_samples = + context.Attr("use_customized_samples"); + const bool remove_accidental_hits = + context.Attr("remove_accidental_hits"); + + // device contexts + auto& dev_ctx = + context.template device_context(); + + // UNDERSTAND: allocate memories for temporaries + sampled_logits->mutable_data(samples_dim, context.GetPlace()); + auto sampled_labels_data = + sampled_labels->mutable_data(labels_dim, context.GetPlace()); + for (int i = 0; i < batch_size; ++i) { + for (int j = 0; j < num_true; ++j) { + sampled_labels_data[i * num_true + j] = j; + } + } + + if (use_customized_samples) { + const Tensor* customized_samples = + context.Input("CustomizedSamples"); + const Tensor* customized_probabilities = + context.Input("CustomizedProbabilities"); + samples->ShareDataWith(*customized_samples); + probabilities->ShareDataWith(*customized_probabilities); + } else { + samples->mutable_data(context.GetPlace()); + probabilities->mutable_data(samples_dim, context.GetPlace()); + // UNDERSTAND: sampling + const auto seed = context.Attr("seed"); + auto sampler_with_prob = + math::SampleWithProb(); + sampler_with_prob(dev_ctx, math::LogUniformSampler(num_classes, seed), + num_samples, labels, samples, probabilities); + } + + // UNDERSTAND: gather sampled logits and remove accidental hits if needed + CPUTakeAlongD1(dev_ctx, *logits, *samples, sampled_logits); + if (remove_accidental_hits) { + compute_remove_accidental_hits(dev_ctx, sampled_logits, *samples, + num_true); + } + + // subtracted sampled logits with logQ(y|x) + auto probs = EigenMatrix::From(*probabilities); + auto smp_logits = EigenMatrix::From(*sampled_logits); + smp_logits.device(*dev_ctx.eigen_device()) = + (smp_logits - probs.log().unaryExpr(TolerableValue())) + .unaryExpr(TolerableValue()); + } +}; + +template +class SampleLogitsGradKernel : public framework::OpKernel { + public: + using Tensor = framework::Tensor; + void Compute(const framework::ExecutionContext& context) const override { + auto logits_grad = context.Output(framework::GradVarName("Logits")); + const Tensor* samples = context.Input("Samples"); + const Tensor* sampled_logits_grad = + context.Input(framework::GradVarName("SampledLogits")); + logits_grad->mutable_data(context.GetPlace()); + + auto& dev_ctx = + context.template device_context(); + math::SetConstant set_zero; + set_zero(dev_ctx, logits_grad, static_cast(0)); + + // UNDERSTAND: scatter it back to logit_grad + CPUPutAlongD1(dev_ctx, logits_grad, *samples, *sampled_logits_grad); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc index 1eebadc2c980ddf1cbaaefef1568dd401d0c77ed..d3dcd1f96a986d2450c8af780a12183f7dfc66d5 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc @@ -22,6 +22,9 @@ class SequenceEnumerateOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { + if (ctx->IsRuntime()) { + return; + } PADDLE_ENFORCE( ctx->HasInput("X"), "Input(X) of SequecceEnumerate operator should not be null."); @@ -31,11 +34,11 @@ class SequenceEnumerateOp : public framework::OperatorWithKernel { const auto x_dims = ctx->GetInputDim("X"); PADDLE_ENFORCE_EQ( - x_dims.size(), 2UL, + x_dims.size(), 2, "Input(X) of SequenceEnumerate operator's rank should be 2."); - PADDLE_ENFORCE_EQ( - x_dims[1], 1UL, - "Input(X) of SequenceEnumerate operator's 2nd dimension should be 1."); + PADDLE_ENFORCE_EQ(x_dims[1], 1, + "Input(X) of SequenceEnumerate operator's 2nd " + "dimension should be 1."); const auto win_size = ctx->Attrs().Get("win_size"); ctx->SetOutputDim("Out", {x_dims[0], win_size}); diff --git a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu index 28821e7129c1601f1214b0b56696fbf526a2123f..d5deb7582c7c00f3102ea568a716b715611212ce 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu +++ b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu @@ -65,6 +65,7 @@ class SequenceEnumerateOpCUDAKernel : public framework::OpKernel { auto lod0 = in_lod[0]; auto in_len = in->numel(); auto in_data = in->data(); + out->Resize({in_dims[0], win_size}); auto out_data = out->mutable_data(context.GetPlace()); // Copy LoD to GPU const size_t* dev_in_lod_ptr = lod0.CUDAData(context.GetPlace()); @@ -72,6 +73,7 @@ class SequenceEnumerateOpCUDAKernel : public framework::OpKernel { CalcOutPut<<<(in_len - 1) / PADDLE_CUDA_NUM_THREADS + 1, PADDLE_CUDA_NUM_THREADS, 0, stream>>>( in_data, dev_in_lod_ptr, lod0.size(), win_size, pad_value, out_data); + out->set_lod(in->lod()); } }; diff --git a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h index dc18d9b2071303377505155476b87ed029eaf986..18da69993b2ad5879dd4678ec0d4b06d7e30cb0a 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h @@ -39,6 +39,7 @@ class SequenceEnumerateKernel : public framework::OpKernel { // Generate enumerate sequence set auto lod0 = in_lod[0]; auto in_data = in->data(); + out->Resize({in_dims[0], win_size}); auto out_data = out->mutable_data(context.GetPlace()); for (size_t i = 0; i < lod0.size() - 1; ++i) { for (size_t idx = lod0[i]; idx < lod0[i + 1]; ++idx) { @@ -49,6 +50,7 @@ class SequenceEnumerateKernel : public framework::OpKernel { } } } + out->set_lod(in->lod()); } }; diff --git a/paddle/fluid/operators/sequence_ops/sequence_erase_op.cu b/paddle/fluid/operators/sequence_ops/sequence_erase_op.cu index 619c40dbd10ad6b538f2d4e3567966b222fc5e2d..0401c22c92e1a9be35c2ff6b2c7e95924afe3f1b 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_erase_op.cu +++ b/paddle/fluid/operators/sequence_ops/sequence_erase_op.cu @@ -64,8 +64,7 @@ class SequenceEraseOpCUDAKernel : public framework::OpKernel { auto* out = ctx.Output("Out"); auto lod = in->lod(); - PADDLE_ENFORCE_EQ(lod.size(), 1UL, "Only support one level sequence now."); - PADDLE_ENFORCE_EQ(lod[0].back(), (size_t)in->numel(), + PADDLE_ENFORCE_EQ(lod[lod.size() - 1].back(), (size_t)in->numel(), "The actual size mismatches with the LoD information."); auto tokens = ctx.Attr>("tokens"); auto in_len = in->numel(); @@ -85,10 +84,9 @@ class SequenceEraseOpCUDAKernel : public framework::OpKernel { num_erased.begin() + 1); // Copy LoD to GPU - auto lod0 = lod[0]; - auto lod_len = lod0.size(); - const size_t* dev_in_lod_ptr = lod0.CUDAData(ctx.GetPlace()); - + auto last_lod = lod[lod.size() - 1]; + auto lod_len = last_lod.size(); + const size_t* dev_in_lod_ptr = last_lod.CUDAData(ctx.GetPlace()); // Calc output LoD thrust::device_vector dev_out_lod(lod_len); size_t* dev_out_lod_ptr = thrust::raw_pointer_cast(dev_out_lod.data()); @@ -96,13 +94,16 @@ class SequenceEraseOpCUDAKernel : public framework::OpKernel { PADDLE_CUDA_NUM_THREADS, 0, stream>>>( num_erased_ptr, dev_in_lod_ptr, lod_len, dev_out_lod_ptr); // Set LoD for output - std::vector out_lod0(dev_out_lod.begin(), dev_out_lod.end()); + std::vector out_last_lod(dev_out_lod.begin(), dev_out_lod.end()); framework::LoD out_lod; - out_lod.push_back(out_lod0); + for (size_t i = 0; i < lod.size() - 1; ++i) { + out_lod.push_back(lod[i]); + } + out_lod.push_back(out_last_lod); out->set_lod(out_lod); // Set output - out->Resize({static_cast(out_lod0.back()), 1}); + out->Resize({static_cast(out_last_lod.back()), 1}); auto out_dat = out->mutable_data(ctx.GetPlace()); SetOutput<<<(in_len - 1) / PADDLE_CUDA_NUM_THREADS + 1, PADDLE_CUDA_NUM_THREADS, 0, stream>>>(in_dat, in_len, diff --git a/paddle/fluid/operators/sequence_ops/sequence_erase_op.h b/paddle/fluid/operators/sequence_ops/sequence_erase_op.h index 265390528a15aa060900276f98128d754fc907fe..af5a64dce5d2484ad9006f0c30e8851746794f38 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_erase_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_erase_op.h @@ -28,19 +28,18 @@ class SequenceEraseKernel : public framework::OpKernel { auto* out = ctx.Output("Out"); auto lod = in->lod(); - PADDLE_ENFORCE_EQ(lod.size(), 1UL, "Only support one level sequence now."); - PADDLE_ENFORCE_EQ(lod[0].back(), (size_t)in->numel(), + PADDLE_ENFORCE_EQ(lod[lod.size() - 1].back(), (size_t)in->numel(), "The actual size mismatches with the LoD information."); auto tokens = ctx.Attr>("tokens"); auto in_len = in->numel(); auto in_dat = in->data(); - auto lod0 = lod[0]; + auto last_lod = lod[lod.size() - 1]; std::vector num_erased(in_len + 1, 0); - std::vector out_lod0(1, 0); - for (size_t i = 0; i < lod0.size() - 1; ++i) { + std::vector out_last_lod(1, 0); + for (size_t i = 0; i < last_lod.size() - 1; ++i) { size_t num_out = 0; - for (auto j = lod0[i] + 1; j <= lod0[i + 1]; ++j) { + for (auto j = last_lod[i] + 1; j <= last_lod[i + 1]; ++j) { num_erased[j] = num_erased[j - 1]; if (std::find(tokens.begin(), tokens.end(), in_dat[j - 1]) != tokens.end()) { @@ -49,7 +48,7 @@ class SequenceEraseKernel : public framework::OpKernel { num_out += 1; } } - out_lod0.push_back(out_lod0.back() + num_out); + out_last_lod.push_back(out_last_lod.back() + num_out); } auto out_len = in_len - num_erased[in_len]; @@ -62,7 +61,10 @@ class SequenceEraseKernel : public framework::OpKernel { } } framework::LoD out_lod; - out_lod.push_back(out_lod0); + for (size_t i = 0; i < lod.size() - 1; ++i) { + out_lod.push_back(lod[i]); + } + out_lod.push_back(out_last_lod); out->set_lod(out_lod); } }; diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_op.cc b/paddle/fluid/operators/sequence_ops/sequence_expand_op.cc index 27e0201bd70df59c58eaa7567d5bb69eb1b721b4..f6c42415301bc8d6f3509bfba2ff356265643bad 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_expand_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_expand_op.cc @@ -48,10 +48,10 @@ class SequenceExpandOp : public framework::OperatorWithKernel { auto& x_lod = x_var->Get().lod(); auto& y_lod = y_var->Get().lod(); - PADDLE_ENFORCE_LE(x_lod.size(), 1, + PADDLE_ENFORCE_LE(x_lod.size(), 1UL, "Level number of Input(X)'s lod should not be " "greater than 1."); - PADDLE_ENFORCE_GT(y_lod.size(), 0, + PADDLE_ENFORCE_GT(y_lod.size(), 0UL, "Level number of Input(Y)'s lod should be " "greater than 0."); PADDLE_ENFORCE( @@ -69,7 +69,8 @@ class SequenceExpandOp : public framework::OperatorWithKernel { "size of Input(X)'s first level lod should be equal to " "size of Input(Y)'s referred level lod."); } else { - PADDLE_ENFORCE_EQ(x_dims[0], y_lod[ref_level].size() - 1, + PADDLE_ENFORCE_EQ(x_dims[0], + static_cast(y_lod[ref_level].size()) - 1, "When Input(X)'s lod is null, the dims[0] of " "Input(X) should match the " "size of Input(Y)'s referred level lod."); diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_op.cu b/paddle/fluid/operators/sequence_ops/sequence_expand_op.cu index afc08c7b3f6596efd3b6e0b74c17aa3c9268c47d..888d1a12e6751eeb91f0af04b50cf6d5bea74162 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_expand_op.cu +++ b/paddle/fluid/operators/sequence_ops/sequence_expand_op.cu @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include +#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/operators/sequence_ops/sequence_expand_op.h" #include "paddle/fluid/platform/cuda_primitives.h" @@ -88,6 +89,49 @@ void GetOutputOffset(const framework::Vector& x_lod, } } +template +static int ExpandByMemoryCopy(const platform::CUDADeviceContext& context, + const LoDTensor& x, LoDTensor* out, + const framework::Vector& x_lod, + const framework::Vector& ref_lod, + bool do_copy) { + auto out_data = out->data(); + auto x_data = x.data(); + + auto& gpu_place = boost::get(context.GetPlace()); + + int x_item_length = x.numel() / x.dims()[0]; + int out_offset = 0; + int num_copys = 0; + for (size_t i = 1; i < ref_lod.size(); ++i) { + int repeat_num = ref_lod[i] - ref_lod[i - 1]; + int x_start = x_lod[i - 1]; + int x_end = x_lod[i]; + int x_seq_len = x_end - x_start; + if (repeat_num > 0) { + if (do_copy) { + int out_start = out_offset; + if (out->lod().size() == 1) { + out_start = out->lod()[0][out_offset]; + } + for (int j = 0; j < repeat_num; j++) { + for (int k = 0; k < x_seq_len; k++) { + memory::Copy( + gpu_place, + out_data + (out_start + j * x_seq_len + k) * x_item_length, + gpu_place, x_data + (x_start + k) * x_item_length, + sizeof(T) * x_item_length, context.stream()); + } + } + } else { + num_copys += repeat_num * x_seq_len; + } + } + out_offset += repeat_num; + } + return num_copys; +} + template struct SequenceExpandFunctor { void operator()( @@ -95,22 +139,40 @@ struct SequenceExpandFunctor { const framework::Vector& x_lod, /*expand source lod*/ const framework::Vector& ref_lod, /*expand referenced lod*/ LoDTensor* out) { - int x_item_length = x.numel() / x.dims()[0]; - framework::Vector out_offset(x_lod.size()); - GetOutputOffset(x_lod, ref_lod, &out_offset); - - int thread_x = std::min(32, std::max(static_cast(ref_lod.size()), 16)); - int thread_y = 16; - int thread_z = 1024 / thread_x / thread_y; - int block_x = static_cast(ref_lod.size()); - dim3 block_size(thread_x, thread_y, thread_z); - dim3 grid_size(block_x, 1); + int num_copys = + ExpandByMemoryCopy(context, x, out, x_lod, ref_lod, false); + // Sometimes direct copies will be faster, this maybe need deeply analysis. + if (num_copys < 5) { + ExpandByMemoryCopy(context, x, out, x_lod, ref_lod, true); + } else { + int x_item_length = x.numel() / x.dims()[0]; + size_t x_lod_size = x_lod.size(); + framework::Vector out_offset(x_lod_size * 2 + ref_lod.size()); + GetOutputOffset(x_lod, ref_lod, &out_offset); + + for (size_t i = 0; i < x_lod_size; ++i) { + out_offset[x_lod_size + i] = x_lod[i]; + } + for (size_t i = 0; i < ref_lod.size(); ++i) { + out_offset[2 * x_lod_size + i] = ref_lod[i]; + } - sequence_expand_kernel<<>>( - x.data(), x_lod.CUDAData(context.GetPlace()), - ref_lod.CUDAData(context.GetPlace()), - out_offset.CUDAData(context.GetPlace()), x_lod.size(), x_item_length, - out->mutable_data(context.GetPlace())); + const size_t* out_offset_data = out_offset.CUDAData(context.GetPlace()); + const size_t* x_lod_data = out_offset_data + x_lod_size; + const size_t* ref_lod_data = out_offset_data + 2 * x_lod_size; + + int thread_x = + std::min(32, std::max(static_cast(ref_lod.size()), 16)); + int thread_y = 16; + int thread_z = 1024 / thread_x / thread_y; + int block_x = static_cast(ref_lod.size()); + dim3 block_size(thread_x, thread_y, thread_z); + dim3 grid_size(block_x, 1); + + sequence_expand_kernel<<>>( + x.data(), x_lod_data, ref_lod_data, out_offset_data, x_lod_size, + x_item_length, out->mutable_data(context.GetPlace())); + } } }; diff --git a/paddle/fluid/operators/shape_op.cc b/paddle/fluid/operators/shape_op.cc index 1be9fe47af71d31ce2e0eba807ea4a43601f8aca..efc497fa47d1d954bbd1e214b43f5de4c76b0714 100644 --- a/paddle/fluid/operators/shape_op.cc +++ b/paddle/fluid/operators/shape_op.cc @@ -35,14 +35,15 @@ class ShapeOp : public framework::OperatorWithKernel { class ShapeOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { - AddInput("Input", "(Tensor), The input tensor."); - AddOutput("Out", - "(Tensor), The shape of input tensor, the data type of the shape" - " is int32_t, will be on the same device with the input Tensor."); + AddInput("Input", "(LoDTensor), The input tensor."); + AddOutput( + "Out", + "(LoDTensor), The shape of input tensor, the data type of the shape" + " is int32_t, will be on the same device with the input Tensor."); AddComment(R"DOC( -Shape Operator +Shape Operator. -Get the shape of input tensor. Only support CPU input Tensor now. +Return the shape of the input. )DOC"); } }; diff --git a/paddle/fluid/operators/softmax_with_cross_entropy_op.cc b/paddle/fluid/operators/softmax_with_cross_entropy_op.cc index 0397c7791e1768393ff642743d2f7085b25fb551..7754d2bfebdbc81e25432641b2eb4315386f75ff 100644 --- a/paddle/fluid/operators/softmax_with_cross_entropy_op.cc +++ b/paddle/fluid/operators/softmax_with_cross_entropy_op.cc @@ -46,10 +46,10 @@ class SoftmaxWithCrossEntropyOpMaker .SetDefault(false); AddAttr( "numeric_stable_mode", - "(bool, default: false), A flag to indicate whether to use more " + "(bool, default: true), A flag to indicate whether to use more " "numerically stable algorithm. This flag is only valid when " "soft_label is false and GPU is used.") - .SetDefault(false); + .SetDefault(true); AddAttr( "ignore_index", "(int, default -100), Specifies a target value that is ignored and" diff --git a/paddle/fluid/operators/spectral_norm_op.cc b/paddle/fluid/operators/spectral_norm_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..357d055756523cd83bf0e4b30719155b32c65974 --- /dev/null +++ b/paddle/fluid/operators/spectral_norm_op.cc @@ -0,0 +1,197 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/fluid/operators/spectral_norm_op.h" +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using framework::Tensor; + +class SpectralNormOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("Weight"), + "Input(Weight) of SpectralNormOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("U"), + "Input(U) of SpectralNormOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("V"), + "Input(V) of SpectralNormOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of SpectralNormOp should not be null."); + + auto dim_weight = ctx->GetInputDim("Weight"); + auto rank_weight = dim_weight.size(); + PADDLE_ENFORCE(rank_weight >= 2 && rank_weight <= 5, + "The rank of Input(Weights) can only be 2, 3," + "4, 5 for fc, conv1d, conv2d, conv3d layers."); + + int dim = ctx->Attrs().Get("dim"); + int power_iters = ctx->Attrs().Get("power_iters"); + PADDLE_ENFORCE(dim == 0 || dim == 1, "Attr(dim) can only be 0 or 1"); + PADDLE_ENFORCE(power_iters >= 0, + "Attr(power_iters) should be larger equal then 0"); + + int h = dim_weight[dim]; + int w = 1; + for (int i = 0; i < rank_weight; i++) { + if (i != dim) { + w *= dim_weight[i]; + } + } + auto dim_u = ctx->GetInputDim("U"); + auto dim_v = ctx->GetInputDim("V"); + PADDLE_ENFORCE_EQ(dim_u[0], h, + "Input(U) dims[0] should be equal to " + "Input(Weight) dims[Attr(dim)]"); + PADDLE_ENFORCE_EQ( + dim_v[0], w, + "Input(V) dims[0] should be equal to " + "the product of Input(Weight) dims except dims[Attr(dim)]"); + + ctx->SetOutputDim("Out", dim_weight); + ctx->ShareLoD("Weight", /*->*/ "Out"); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType(ctx.Input("Weight")->type(), + ctx.GetPlace()); + } +}; + +class SpectralNormOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("Weight", + "The input weight tensor of spectral_norm operator, " + "This can be a 2-D, 3-D, 4-D, 5-D tensor which is the " + "weights of fc, conv1d, conv2d, conv3d layer."); + AddInput("U", + "The weight_u tensor of spectral_norm operator, " + "This can be a 1-D tensor in shape [H, 1]," + "H is the 1st dimentions of Weight after reshape" + "corresponding by Attr(dim). As for Attr(dim) = 1" + "in conv2d layer with weight shape [M, C, K1, K2]" + "Weight will be reshape to [C, M*K1*K2], U will" + "be in shape [C, 1]."); + AddInput("V", + "The weight_v tensor of spectral_norm operator, " + "This can be a 1-D tensor in shape [W, 1], " + "W is the 2nd dimentions of Weight after reshape " + "corresponding by Attr(dim). As for Attr(dim) = 1 " + "in conv2d layer with weight shape [M, C, K1, K2] " + "Weight will be reshape to [C, M*K1*K2], V will " + "be in shape [M*K1*K2, 1]."); + AddOutput("Out", + "The output weight tensor of spectral_norm operator, " + "This tensor is in same shape with Input(Weight)."); + + AddAttr("dim", + "The index of dimension which should be permuted " + "to the first before reshaping Input(Weight) to " + "matrix, it should be set as 0 if Input(Weight) is " + "the weight of fc layer, and should be set as 1 if " + "Input(Weight) is the weight of conv layer, " + "default 0.") + .SetDefault(0); + AddAttr("power_iters", + "number of power iterations to calculate " + "spectral norm, default 1.") + .SetDefault(1); + AddAttr("eps", + "epsilon for numerical stability in " + "calculating norms") + .SetDefault(1e-12); + + AddComment(R"DOC( + This layer calculates the spectral normalization value of weight of + fc, conv1d, conv2d, conv3d layers which should be 2-D, 3-D, 4-D, 5-D + tensor. + + Spectral normalization stabilizes the training of critic in GANs + (Generative Adversarial Networks). This layer rescaling weight tensor + with spectral normalize value. + + For spectral normalization calculations, we rescaling weight + tensor with :math:`\sigma`, while :math:`\sigma{\mathbf{W}}` is + + $$\sigma(\mathbf{W}) = \max_{\mathbf{h}: \mathbf{h} \ne 0} \\frac{\|\mathbf{W} \mathbf{h}\|_2}{\|\mathbf{h}\|_2}$$ + + We calculate :math:`\sigma{\mathbf{W}}` through power iterations as + + $$ + \mathbf{v} = \mathbf{W}^{T} \mathbf{u} + $$ + $$ + \mathbf{v} = \\frac{\mathbf{v}}{\|\mathbf{v}\|_2} + $$ + $$ + \mathbf{u} = \mathbf{W}^{T} \mathbf{v} + $$ + $$ + \mathbf{u} = \\frac{\mathbf{u}}{\|\mathbf{u}\|_2} + $$ + + And :math:`\sigma` should be + + $$\sigma{\mathbf{W}} = \mathbf{u}^{T} \mathbf{W} \mathbf{v}$$ + + For details of spectral normalization, please refer to paper: + `Spectral Normalization `_ . + )DOC"); + } +}; + +class SpectralNormOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("Weight"), "Input(Weight) should not be null"); + PADDLE_ENFORCE(ctx->HasInput("U"), "Input(U) should not be null"); + PADDLE_ENFORCE(ctx->HasInput("V"), "Input(V) should not be null"); + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), + "Input(Out@GRAD) should not be null"); + auto dim_x = ctx->GetInputDim("Weight"); + if (ctx->HasOutput(framework::GradVarName("Weight"))) { + ctx->SetOutputDim(framework::GradVarName("Weight"), dim_x); + } + } + + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType(ctx.Input("Weight")->type(), + ctx.GetPlace()); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(spectral_norm, ops::SpectralNormOp, ops::SpectralNormOpMaker, + paddle::framework::DefaultGradOpDescMaker); +REGISTER_OPERATOR(spectral_norm_grad, ops::SpectralNormOpGrad); +REGISTER_OP_CPU_KERNEL( + spectral_norm, + ops::SpectralNormKernel, + ops::SpectralNormKernel); +REGISTER_OP_CPU_KERNEL( + spectral_norm_grad, + ops::SpectralNormGradKernel, + ops::SpectralNormGradKernel); diff --git a/paddle/fluid/operators/spectral_norm_op.cu b/paddle/fluid/operators/spectral_norm_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..ea90e3b4c122b00d5bfe13617e48a9bbe0ee8395 --- /dev/null +++ b/paddle/fluid/operators/spectral_norm_op.cu @@ -0,0 +1,22 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/fluid/operators/spectral_norm_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_CUDA_KERNEL( + spectral_norm, + ops::SpectralNormKernel, + ops::SpectralNormKernel); +REGISTER_OP_CUDA_KERNEL( + spectral_norm_grad, + ops::SpectralNormGradKernel, + ops::SpectralNormGradKernel); diff --git a/paddle/fluid/operators/spectral_norm_op.h b/paddle/fluid/operators/spectral_norm_op.h new file mode 100644 index 0000000000000000000000000000000000000000..eb48e3b7840e18efe809540dd697f243a0a63a52 --- /dev/null +++ b/paddle/fluid/operators/spectral_norm_op.h @@ -0,0 +1,273 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once +#include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/math/blas.h" +#include "paddle/fluid/operators/math/math_function.h" + +namespace paddle { +namespace operators { + +template +using EigenTensor = framework::EigenTensor; +using Tensor = framework::Tensor; + +using Array1 = Eigen::DSizes; +using Array2 = Eigen::DSizes; +using IndexPair = Eigen::IndexPair; + +template +static inline void TransCompute(const int rank, const Tensor& in, Tensor* out, + const std::vector& perm, + const DeviceContext& dev_ctx) { + if (rank <= 1 || rank > 5) { + PADDLE_THROW("Invalid weight rank."); + } + + switch (rank) { + case 2: + math::Transpose trans2; + trans2(dev_ctx, in, out, perm); + break; + case 3: + math::Transpose trans3; + trans3(dev_ctx, in, out, perm); + break; + case 4: + math::Transpose trans4; + trans4(dev_ctx, in, out, perm); + break; + case 5: + math::Transpose trans5; + trans5(dev_ctx, in, out, perm); + break; + default: + break; + } +} + +template +static inline void CalcMatrixSigmaAndNormWeight( + Tensor* sigma, Tensor* u, Tensor* v, Tensor* weight, const int power_iters, + const float eps, const framework::ExecutionContext& ctx) { + auto& place = *ctx.template device_context().eigen_device(); + auto blas = math::GetBlas(ctx); + auto sigma_t = EigenTensor::From(*sigma); + auto weight_t = EigenTensor::From(*weight); + auto u_t = EigenTensor::From(*u); + auto v_t = EigenTensor::From(*v); + + const int h = weight->dims()[0]; + const int w = weight->dims()[1]; + + for (int i = 0; i < power_iters; i++) { + // V = W^T * U / ||W^T * U||_2 + blas.MatMul(*weight, true, *u, false, T(1), v, T(0)); + auto v_t_norm = + v_t.square().sum().sqrt().eval().reshape(Array1(1)).broadcast( + Array1(w)); + v_t.device(place) = v_t / (v_t_norm + v_t_norm.constant(eps)); + // U = W^T * V / ||W^T * V||_2 + blas.MatMul(*weight, false, *v, false, T(1), u, T(0)); + auto u_t_norm = + u_t.square().sum().sqrt().eval().reshape(Array1(1)).broadcast( + Array1(h)); + u_t.device(place) = u_t / (u_t_norm + u_t_norm.constant(eps)); + } + Tensor weight_v; + weight_v.mutable_data({h, 1}, ctx.GetPlace()); + blas.MatMul(*weight, false, *v, false, T(1), &weight_v, T(0)); + auto weight_v_t = EigenTensor::From(weight_v); + sigma_t.device(place) = (u_t * weight_v_t) + .sum() + .eval() + .reshape(Array2(1, 1)) + .broadcast(Array2(h, w)); + weight_t.device(place) = weight_t / sigma_t; +} + +template +class SpectralNormKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto& dev_ctx = ctx.template device_context(); + auto weight = ctx.Input("Weight"); + auto u = ctx.Input("U"); + auto v = ctx.Input("V"); + auto out = ctx.Output("Out"); + + int dim = ctx.Attr("dim"); + int power_iters = ctx.Attr("power_iters"); + float eps = ctx.Attr("eps"); + + const int h = u->dims()[0]; + const int w = v->dims()[0]; + + Tensor weight_mat; + auto dims = weight->dims(); + const int rank = dims.size(); + std::vector real_dims; + if (dim != 0) { + std::vector perm; + perm.push_back(dim); + real_dims.push_back(dims[dim]); + for (int i = 0; i < rank; i++) { + if (i != dim) { + perm.push_back(i); + real_dims.push_back(dims[i]); + } + } + weight_mat.mutable_data(framework::make_ddim(real_dims), + ctx.GetPlace()); + TransCompute(rank, *weight, &weight_mat, perm, dev_ctx); + } else { + for (int i = 0; i < rank; i++) { + real_dims.push_back(i); + } + TensorCopySync(*weight, ctx.GetPlace(), &weight_mat); + } + weight_mat = weight_mat.Resize({h, w}); + + Tensor sigma; + sigma.mutable_data(weight_mat.dims(), ctx.GetPlace()); + Tensor uu, vv; + TensorCopySync(*u, ctx.GetPlace(), &uu); + TensorCopySync(*v, ctx.GetPlace(), &vv); + CalcMatrixSigmaAndNormWeight( + &sigma, &(uu.Resize({h, 1})), &(vv.Resize({w, 1})), &weight_mat, + power_iters, eps, ctx); + + if (dim != 0) { + std::vector perm; + for (int i = 0; i < rank; i++) { + if (i < dim) { + perm.push_back(i + 1); + } else if (i == dim) { + perm.push_back(0); + } else { + perm.push_back(i); + } + } + out->mutable_data(dims, ctx.GetPlace()); + TransCompute( + rank, weight_mat.Resize(framework::make_ddim(real_dims)), out, perm, + dev_ctx); + } else { + TensorCopySync(weight_mat.Resize(dims), ctx.GetPlace(), out); + } + } +}; + +template +class SpectralNormGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto& place = *ctx.template device_context().eigen_device(); + auto& dev_ctx = ctx.template device_context(); + auto blas = math::GetBlas(ctx); + auto weight = ctx.Input("Weight"); + auto u = ctx.Input("U"); + auto v = ctx.Input("V"); + auto out_grad = ctx.Input(framework::GradVarName("Out")); + auto weight_grad = ctx.Output(framework::GradVarName("Weight")); + + int dim = ctx.Attr("dim"); + int power_iters = ctx.Attr("power_iters"); + float eps = ctx.Attr("eps"); + + const int h = u->dims()[0]; + const int w = v->dims()[0]; + + Tensor weight_mat, out_grad_mat; + auto dims = weight->dims(); + const int rank = dims.size(); + std::vector real_dims; + if (dim != 0) { + std::vector perm; + perm.push_back(dim); + real_dims.push_back(dims[dim]); + for (int i = 0; i < rank; i++) { + if (i != dim) { + perm.push_back(i); + real_dims.push_back(dims[i]); + } + } + weight_mat.mutable_data(framework::make_ddim(real_dims), + ctx.GetPlace()); + out_grad_mat.mutable_data(framework::make_ddim(real_dims), + ctx.GetPlace()); + TransCompute(rank, *weight, &weight_mat, perm, dev_ctx); + TransCompute(rank, *out_grad, &out_grad_mat, perm, + dev_ctx); + } else { + for (int i = 0; i < rank; i++) { + real_dims.push_back(i); + } + TensorCopySync(*weight, ctx.GetPlace(), &weight_mat); + TensorCopySync(*out_grad, ctx.GetPlace(), &out_grad_mat); + } + weight_mat = weight_mat.Resize({h, w}); + out_grad_mat = out_grad_mat.Resize({h, w}); + + Tensor sigma; + sigma.mutable_data(weight_mat.dims(), ctx.GetPlace()); + Tensor uu, vv; + TensorCopySync(*u, ctx.GetPlace(), &uu); + TensorCopySync(*v, ctx.GetPlace(), &vv); + CalcMatrixSigmaAndNormWeight( + &sigma, &(uu.Resize({h, 1})), &(vv.Resize({w, 1})), &weight_mat, + power_iters, eps, ctx); + + Tensor uv; + uv.mutable_data({h, w}, ctx.GetPlace()); + blas.MatMul(uu.Resize({h, 1}), false, vv.Resize({w, 1}), false, T(1), &uv, + T(0)); + + Tensor weight_grad_mat; + weight_grad_mat.mutable_data({h, w}, ctx.GetPlace()); + auto weight_grad_mat_t = EigenTensor::From(weight_grad_mat); + auto weight_mat_t = EigenTensor::From(weight_mat); + auto out_grad_mat_t = EigenTensor::From(out_grad_mat); + auto sigma_t = EigenTensor::From(sigma); + auto uv_t = EigenTensor::From(uv); + weight_mat_t.device(place) = + weight_mat_t.sum().eval().reshape(Array2(1, 1)).broadcast(Array2(h, w)); + weight_grad_mat_t.device(place) = + out_grad_mat_t * (out_grad_mat_t.constant(1.0) - uv_t * weight_mat_t) / + sigma_t; + + if (dim != 0) { + std::vector perm; + for (int i = 0; i < rank; i++) { + if (i < dim) { + perm.push_back(i + 1); + } else if (i == dim) { + perm.push_back(0); + } else { + perm.push_back(i); + } + } + weight_grad->mutable_data(dims, ctx.GetPlace()); + TransCompute( + rank, weight_grad_mat.Resize(framework::make_ddim(real_dims)), + weight_grad, perm, dev_ctx); + } else { + TensorCopySync(weight_grad_mat.Resize(dims), ctx.GetPlace(), weight_grad); + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc b/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc index c8ee13875c5ae772de3c09f97fded8f70c5698e6..640644a94690d9682a5e6b1aa788a9ebdc5d2a54 100644 --- a/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc +++ b/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc @@ -117,11 +117,11 @@ class TeacherStudentSigmoidLossOpMaker "[N x 1]. The teacher student sigmoid loss."); AddAttr( "soft_max_up_bound", - "fp32, if input > soft_max_up_bound, will be bound, default 15.0") + "fp32, if input > soft_max_up_bound, input will be bound, default 15.0") .SetDefault(15.0); - AddAttr( - "soft_max_lower_bound", - "fp32, if input < soft_max_lower_bound, will be bound, default -15.0") + AddAttr("soft_max_lower_bound", + "fp32, if input < soft_max_lower_bound, input will be " + "bound, default -15.0") .SetDefault(-15.0); AddComment(R"DOC( TeacherStudentSigmoidLoss Operator. @@ -134,7 +134,7 @@ we add another label(z') to original. label = {-2, -1, [0, 2]} when z' is not exist, clk = 0 : label = -2; when z' is not exist, clk = 1 : label = -1; - when z' is exist , clk = 0 : label = 0 + z'; + when z' is exist , clk = 0 : label = 0 + z'; when z' is exist , clk = 1 : label = 1 + z'; )DOC"); diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index fbb2ac3fe8c5de9b0be593df225677c6a7a89e9c..9220d35707b286d76ab4824e3f1080453f60bfe6 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -36,7 +36,7 @@ cc_test(cpu_info_test SRCS cpu_info_test.cc DEPS cpu_info) nv_library(gpu_info SRCS gpu_info.cc DEPS gflags glog enforce) -cc_library(place SRCS place.cc DEPS enforce boost lib_any) +cc_library(place SRCS place.cc DEPS enforce boost) cc_test(place_test SRCS place_test.cc DEPS place glog gflags) add_subdirectory(dynload) @@ -82,13 +82,18 @@ nv_test(device_context_test SRCS device_context_test.cu DEPS device_context gpu_ cc_test(init_test SRCS init_test.cc DEPS device_context) nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda) +nv_test(cudnn_desc_test SRCS cudnn_desc_test.cc DEPS dynload_cuda) nv_test(transform_test SRCS transform_test.cu DEPS memory place device_context) cc_library(timer SRCS timer.cc) cc_test(timer_test SRCS timer_test.cc DEPS timer) cc_library(device_tracer SRCS device_tracer.cc DEPS boost profiler_proto framework_proto ${GPU_CTX_DEPS}) -cc_library(profiler SRCS profiler.cc DEPS device_context device_tracer) +if(WITH_GPU) + nv_library(profiler SRCS profiler.cc profiler.cu DEPS device_tracer gpu_info enforce) +else() + cc_library(profiler SRCS profiler.cc DEPS device_tracer enforce) +endif() cc_test(profiler_test SRCS profiler_test.cc DEPS profiler) nv_test(float16_gpu_test SRCS float16_test.cu DEPS lod_tensor) diff --git a/paddle/fluid/platform/cudnn_desc.h b/paddle/fluid/platform/cudnn_desc.h new file mode 100644 index 0000000000000000000000000000000000000000..1062b403f289610a6dec28dead9177d387f0d4e0 --- /dev/null +++ b/paddle/fluid/platform/cudnn_desc.h @@ -0,0 +1,124 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include "paddle/fluid/platform/cudnn_helper.h" + +namespace paddle { +namespace platform { +using framework::Tensor; + +template +cudnnDataType_t ToCudnnDataType(const T& t) { + auto type = framework::ToDataType(t); + return ToCudnnDataType(type); +} + +template <> +cudnnDataType_t ToCudnnDataType(const framework::proto::VarType::Type& t) { + cudnnDataType_t type = CUDNN_DATA_FLOAT; + switch (t) { + case framework::proto::VarType::FP16: + type = CUDNN_DATA_HALF; + break; + case framework::proto::VarType::FP32: + type = CUDNN_DATA_FLOAT; + break; + case framework::proto::VarType::FP64: + type = CUDNN_DATA_DOUBLE; + break; + default: + break; + } + return type; +} + +class ActivationDescriptor { + public: + using T = cudnnActivationStruct; + struct Deleter { + void operator()(T* t) { + if (t != nullptr) { + PADDLE_ENFORCE(dynload::cudnnDestroyActivationDescriptor(t)); + t = nullptr; + } + } + }; + ActivationDescriptor() { + T* raw_ptr; + PADDLE_ENFORCE(dynload::cudnnCreateActivationDescriptor(&raw_ptr)); + desc_.reset(raw_ptr); + } + template + void set(cudnnActivationMode_t mode, const T& coef) { + CUDNN_ENFORCE(dynload::cudnnSetActivationDescriptor( + desc_.get(), mode, CUDNN_NOT_PROPAGATE_NAN, static_cast(coef))); + } + + T* desc() { return desc_.get(); } + T* desc() const { return desc_.get(); } + + private: + std::unique_ptr desc_; +}; + +class TensorDescriptor { + public: + using T = cudnnTensorStruct; + struct Deleter { + void operator()(T* t) { + if (t != nullptr) { + PADDLE_ENFORCE(dynload::cudnnDestroyTensorDescriptor(t)); + t = nullptr; + } + } + }; + TensorDescriptor() { + T* raw_ptr; + PADDLE_ENFORCE(dynload::cudnnCreateTensorDescriptor(&raw_ptr)); + desc_.reset(raw_ptr); + } + T* desc() { return desc_.get(); } + T* desc() const { return desc_.get(); } + void set(const Tensor& tensor, const int groups = 1) { + auto dims = framework::vectorize2int(tensor.dims()); + std::vector strides(dims.size()); + strides[dims.size() - 1] = 1; + for (int i = dims.size() - 2; i >= 0; i--) { + strides[i] = dims[i + 1] * strides[i + 1]; + } + std::vector dims_with_group(dims.begin(), dims.end()); + if (groups > 1) { + dims_with_group[1] = dims_with_group[1] / groups; + } + PADDLE_ENFORCE(dynload::cudnnSetTensorNdDescriptor( + desc_.get(), ToCudnnDataType(tensor.type()), dims_with_group.size(), + dims_with_group.data(), strides.data())); + } + + private: + std::unique_ptr desc_; +}; + +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/cudnn_desc_test.cc b/paddle/fluid/platform/cudnn_desc_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..a60102a54899b25c89d8c131220dde21f77bba70 --- /dev/null +++ b/paddle/fluid/platform/cudnn_desc_test.cc @@ -0,0 +1,41 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/platform/cudnn_desc.h" +#include + +namespace paddle { +namespace platform { + +TEST(TensorDescriptor, Empty) { + ActivationDescriptor a; + TensorDescriptor t; + TensorDescriptor t1; + TensorDescriptor *t11 = new TensorDescriptor(); + delete t11; + std::unique_ptr tt(new TensorDescriptor()); +} + +TEST(TensorDescriptor, Normal) { + framework::Tensor tt; + tt.Resize({2, 3, 4}); + tt.mutable_data(platform::CPUPlace()); + + TensorDescriptor desc; + desc.set(tt); + EXPECT_TRUE(desc.desc() != nullptr); +} + +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 2493fb71c019f9923012afa4a46cb3e95479f860..920b43b2b1990af58b73888bf7a652d57c20563c 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -291,7 +291,7 @@ CUDADeviceContext::CUDADeviceContext(CUDAPlace place) if (dynload::HasCUDNN()) { auto local_cudnn_version = cudnn_dso_ver / 100; auto compile_cudnn_version = CUDNN_VERSION / 100; - if (local_cudnn_version < compile_cudnn_version) { + if (local_cudnn_version < static_cast(compile_cudnn_version)) { LOG_FIRST_N(WARNING, 1) << "WARNING: device: " << place_.device << ". The installed Paddle is compiled with CUDNN " @@ -394,7 +394,7 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name, int tid = platform::get_cur_thread_id(); - std::lock_guard lock(*p_mutex_.get()); + std::lock_guard lock(*p_mutex_); // Find KeyBlob for current thread auto map_it = pMap->find(tid); @@ -427,7 +427,7 @@ std::shared_ptr MKLDNNDeviceContext::GetBlob( int tid = platform::get_cur_thread_id(); - std::lock_guard lock(*p_mutex_.get()); + std::lock_guard lock(*p_mutex_); // Find KeyBlob for current thread firstly auto map_it = pMap->find(tid); diff --git a/paddle/fluid/platform/device_tracer.cc b/paddle/fluid/platform/device_tracer.cc index 0a4563ead65b1e45adca1d1a1fce066a1a55d932..0179daa55715be9787bc7cc8a693319024d404b7 100644 --- a/paddle/fluid/platform/device_tracer.cc +++ b/paddle/fluid/platform/device_tracer.cc @@ -14,17 +14,23 @@ limitations under the License. */ #include "paddle/fluid/platform/device_tracer.h" #include +#include #include +#include #include #include // NOLINT #include +#include #include #include // NOLINT +#include +#include #include #include "glog/logging.h" #include "google/protobuf/text_format.h" #include "paddle/fluid/framework/block_desc.h" +#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/string/printf.h" namespace paddle { @@ -33,17 +39,31 @@ namespace { // Tracking the nested block stacks of each thread. thread_local std::deque block_id_stack; // Tracking the nested event stacks. -thread_local std::deque annotation_stack; +thread_local std::deque annotation_stack; + +std::map system_thread_id_map; std::once_flag tracer_once_flag; DeviceTracer *tracer = nullptr; + +void PrintCuptiHint() { + static bool showed = false; + if (showed) return; + showed = true; + LOG(WARNING) << "Invalid timestamp occured. Please try increasing the " + "FLAGS_multiple_of_cupti_buffer_size."; +} + } // namespace #ifdef PADDLE_WITH_CUPTI namespace { -// TODO(panyx0718): Revisit the buffer size here. -uint64_t kBufSize = 32 * 1024; +// The experimental best performance is +// the same size with CUPTI device buffer size(8M) +uint64_t kBufSize = 1024 * 1024 * 8; uint64_t kAlignSize = 8; +std::unordered_map runtime_cbid_str, + driver_cbid_str; #define ALIGN_BUFFER(buffer, align) \ (((uintptr_t)(buffer) & ((align)-1)) \ @@ -92,15 +112,33 @@ std::string MemcpyKind(CUpti_ActivityMemcpyKind kind) { return "MEMCPY"; } +std::string DriverKind(CUpti_CallbackId cbid) { + auto iter = driver_cbid_str.find(cbid); + if (iter == driver_cbid_str.end()) + return "Driver API " + std::to_string(cbid); + return iter->second; +} + +std::string RuntimeKind(CUpti_CallbackId cbid) { + auto iter = runtime_cbid_str.find(cbid); + if (iter == runtime_cbid_str.end()) + return "Runtime API " + std::to_string(cbid); + return iter->second; +} + void EnableActivity() { // Device activity record is created when CUDA initializes, so we // want to enable it before cuInit() or any CUDA runtime call. CUPTI_CALL(dynload::cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMCPY)); - CUPTI_CALL(dynload::cuptiActivityEnable(CUPTI_ACTIVITY_KIND_KERNEL)); - CUPTI_CALL(dynload::cuptiActivityEnable(CUPTI_ACTIVITY_KIND_DEVICE)); - CUPTI_CALL(dynload::cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMSET)); - CUPTI_CALL(dynload::cuptiActivityEnable(CUPTI_ACTIVITY_KIND_OVERHEAD)); + CUPTI_CALL( + dynload::cuptiActivityEnable(CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL)); + // CUPTI_CALL(dynload::cuptiActivityEnable(CUPTI_ACTIVITY_KIND_KERNEL)); + CUPTI_CALL(dynload::cuptiActivityEnable(CUPTI_ACTIVITY_KIND_DRIVER)); + CUPTI_CALL(dynload::cuptiActivityEnable(CUPTI_ACTIVITY_KIND_RUNTIME)); // We don't track these activities for now. + CUPTI_CALL(dynload::cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMSET)); + // CUPTI_CALL(dynload::cuptiActivityEnable(CUPTI_ACTIVITY_KIND_OVERHEAD)); + // CUPTI_CALL(dynload::cuptiActivityEnable(CUPTI_ACTIVITY_KIND_DEVICE)); // CUPTI_CALL(dynload::cuptiActivityEnable(CUPTI_ACTIVITY_KIND_CONTEXT)); // CUPTI_CALL(dynload::cuptiActivityEnable(CUPTI_ACTIVITY_KIND_DRIVER)); // CUPTI_CALL(dynload::cuptiActivityEnable(CUPTI_ACTIVITY_KIND_RUNTIME)); @@ -110,16 +148,17 @@ void EnableActivity() { void DisableActivity() { CUPTI_CALL(dynload::cuptiActivityDisable(CUPTI_ACTIVITY_KIND_MEMCPY)); - CUPTI_CALL(dynload::cuptiActivityDisable(CUPTI_ACTIVITY_KIND_KERNEL)); - CUPTI_CALL(dynload::cuptiActivityDisable(CUPTI_ACTIVITY_KIND_DEVICE)); + CUPTI_CALL( + dynload::cuptiActivityDisable(CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL)); + // CUPTI_CALL(dynload::cuptiActivityDisable(CUPTI_ACTIVITY_KIND_DEVICE)); // Disable all other activity record kinds. - CUPTI_CALL(dynload::cuptiActivityDisable(CUPTI_ACTIVITY_KIND_CONTEXT)); + // CUPTI_CALL(dynload::cuptiActivityDisable(CUPTI_ACTIVITY_KIND_CONTEXT)); CUPTI_CALL(dynload::cuptiActivityDisable(CUPTI_ACTIVITY_KIND_DRIVER)); CUPTI_CALL(dynload::cuptiActivityDisable(CUPTI_ACTIVITY_KIND_RUNTIME)); CUPTI_CALL(dynload::cuptiActivityDisable(CUPTI_ACTIVITY_KIND_MEMSET)); - CUPTI_CALL(dynload::cuptiActivityDisable(CUPTI_ACTIVITY_KIND_NAME)); - CUPTI_CALL(dynload::cuptiActivityDisable(CUPTI_ACTIVITY_KIND_MARKER)); - CUPTI_CALL(dynload::cuptiActivityDisable(CUPTI_ACTIVITY_KIND_OVERHEAD)); + // CUPTI_CALL(dynload::cuptiActivityDisable(CUPTI_ACTIVITY_KIND_NAME)); + // CUPTI_CALL(dynload::cuptiActivityDisable(CUPTI_ACTIVITY_KIND_MARKER)); + // CUPTI_CALL(dynload::cuptiActivityDisable(CUPTI_ACTIVITY_KIND_OVERHEAD)); } void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, @@ -132,6 +171,11 @@ void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize) { + static std::thread::id cupti_thread_id(0); + if (cupti_thread_id == std::thread::id(0)) + cupti_thread_id = std::this_thread::get_id(); + PADDLE_ENFORCE_EQ(std::this_thread::get_id(), cupti_thread_id, + "Only one thread is allowed to call bufferCompleted()"); CUptiResult status; CUpti_Activity *record = NULL; if (validSize > 0) { @@ -168,6 +212,31 @@ void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, memcpy->correlationId, memcpy->bytes); break; } + case CUPTI_ACTIVITY_KIND_MEMSET: { + auto *memset = + reinterpret_cast(record); + tracer->AddKernelRecords("MEMSET", memset->start, memset->end, + memset->deviceId, memset->streamId, + memset->correlationId); + break; + } + case CUPTI_ACTIVITY_KIND_DRIVER: { + auto *api = reinterpret_cast(record); + if (api->start != 0 && api->end != 0) + // -1 device id represents CUDA api call + tracer->AddCPURecords( + DriverKind(api->cbid), api->start, api->end, -1, + GetThreadIdFromSystemThreadId(api->threadId)); + break; + } + case CUPTI_ACTIVITY_KIND_RUNTIME: { + auto *api = reinterpret_cast(record); + if (api->start != 0 && api->end != 0) + tracer->AddCPURecords( + RuntimeKind(api->cbid), api->start, api->end, -1, + GetThreadIdFromSystemThreadId(api->threadId)); + break; + } default: { break; } } } else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED) { @@ -183,21 +252,35 @@ void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, dynload::cuptiActivityGetNumDroppedRecords(ctx, streamId, &dropped)); if (dropped != 0) { fprintf(stderr, "Dropped %u activity records\n", (unsigned int)dropped); + PrintCuptiHint(); } } free(buffer); } + +void initCuptiCbidStr(); + } // namespace #endif // PADDLE_WITH_CUPTI class DeviceTracerImpl : public DeviceTracer { public: - DeviceTracerImpl() : enabled_(false) {} + DeviceTracerImpl() : enabled_(false) { +#ifdef PADDLE_WITH_CUPTI + initCuptiCbidStr(); +#endif + } - void AddAnnotation(uint64_t id, const std::string &anno) { - std::lock_guard l(trace_mu_); - correlations_[id] = anno; + void AddAnnotation(uint32_t id, Event *event) { + thread_local std::forward_list> + *local_correlations_pairs = nullptr; + if (local_correlations_pairs == nullptr) { + std::lock_guard l(trace_mu_); + correlations_pairs.emplace_front(); + local_correlations_pairs = &correlations_pairs.front(); + } + local_correlations_pairs->push_front(std::make_pair(id, event)); } void AddCPURecords(const std::string &anno, uint64_t start_ns, @@ -206,8 +289,13 @@ class DeviceTracerImpl : public DeviceTracer { VLOG(1) << "Empty timeline annotation."; return; } - std::lock_guard l(trace_mu_); - cpu_records_.push_back( + thread_local std::forward_list *local_cpu_records_ = nullptr; + if (local_cpu_records_ == nullptr) { + std::lock_guard l(trace_mu_); + cpu_records_.emplace_front(); + local_cpu_records_ = &cpu_records_.front(); + } + local_cpu_records_->push_front( CPURecord{anno, start_ns, end_ns, device_id, thread_id}); } @@ -215,25 +303,27 @@ class DeviceTracerImpl : public DeviceTracer { uint64_t end_ns, int64_t device_id, int64_t stream_id, uint32_t correlation_id, uint64_t bytes) { // 0 means timestamp information could not be collected for the kernel. - if (start_ns == 0 || end_ns == 0) { + if (start_ns == 0 || end_ns == 0 || start_ns == end_ns) { VLOG(3) << name << " cannot be traced"; + PrintCuptiHint(); return; } - std::lock_guard l(trace_mu_); - mem_records_.push_back(MemRecord{name, start_ns, end_ns, device_id, - stream_id, correlation_id, bytes}); + // NOTE(liangdun): lock is not needed, only one thread call this function. + mem_records_.push_front(MemRecord{name, start_ns, end_ns, device_id, + stream_id, correlation_id, bytes}); } void AddKernelRecords(std::string name, uint64_t start, uint64_t end, int64_t device_id, int64_t stream_id, uint32_t correlation_id) { // 0 means timestamp information could not be collected for the kernel. - if (start == 0 || end == 0) { + if (start == 0 || end == 0 || start == end) { VLOG(3) << correlation_id << " cannot be traced"; + PrintCuptiHint(); return; } - std::lock_guard l(trace_mu_); - kernel_records_.push_back( + // NOTE(liangdun): lock is not needed, only one thread call this function. + kernel_records_.push_front( KernelRecord{name, start, end, device_id, stream_id, correlation_id}); } @@ -263,25 +353,82 @@ class DeviceTracerImpl : public DeviceTracer { } else if (ret != CUPTI_SUCCESS) { fprintf(stderr, "Failed to create CUPTI subscriber.\n"); } - CUPTI_CALL( - dynload::cuptiEnableCallback(1, subscriber_, CUPTI_CB_DOMAIN_DRIVER_API, - CUPTI_DRIVER_TRACE_CBID_cuLaunchKernel)); + const std::vector cbids { + CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy_v3020, + CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyAsync_v3020, + CUPTI_RUNTIME_TRACE_CBID_cudaMemset_v3020, + CUPTI_RUNTIME_TRACE_CBID_cudaMemsetAsync_v3020, + CUPTI_RUNTIME_TRACE_CBID_cudaLaunch_v3020, + CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernel_v7000 +#if CUDA_VERSION >= 9000 + , + CUPTI_RUNTIME_TRACE_CBID_cudaLaunchCooperativeKernel_v9000, + CUPTI_RUNTIME_TRACE_CBID_cudaLaunchCooperativeKernelMultiDevice_v9000 +#endif + }; + for (auto cbid : cbids) + CUPTI_CALL(dynload::cuptiEnableCallback( + 1, subscriber_, CUPTI_CB_DOMAIN_RUNTIME_API, cbid)); CUPTI_CALL(dynload::cuptiGetTimestamp(&start_ns_)); #endif // PADDLE_WITH_CUPTI enabled_ = true; } + void Reset() { +#ifdef PADDLE_WITH_CUPTI + CUPTI_CALL( + dynload::cuptiActivityFlushAll(CUPTI_ACTIVITY_FLAG_FLUSH_FORCED)); +#endif + std::lock_guard l(trace_mu_); + kernel_records_.clear(); + mem_records_.clear(); + correlations_.clear(); + for (auto &tmp : correlations_pairs) tmp.clear(); + for (auto &tmp : cpu_records_) tmp.clear(); + } + + void GenEventKernelCudaElapsedTime() { +#ifdef PADDLE_WITH_CUPTI + if (correlations_.empty()) + for (auto &tmp : correlations_pairs) + for (auto &pair : tmp) correlations_[pair.first] = pair.second; + for (const KernelRecord &r : kernel_records_) { + auto c = correlations_.find(r.correlation_id); + if (c != correlations_.end() && c->second != nullptr) { + Event *e = c->second; + e->AddCudaElapsedTime(r.start_ns, r.end_ns); + } + } + for (const auto &r : mem_records_) { + auto c = correlations_.find(r.correlation_id); + if (c != correlations_.end() && c->second != nullptr) { + Event *e = c->second; + e->AddCudaElapsedTime(r.start_ns, r.end_ns); + } + } +#endif + } + proto::Profile GenProfile(const std::string &profile_path) { + int miss = 0, find = 0; std::lock_guard l(trace_mu_); proto::Profile profile_pb; profile_pb.set_start_ns(start_ns_); profile_pb.set_end_ns(end_ns_); + if (correlations_.empty()) + for (auto &tmp : correlations_pairs) + for (auto &pair : tmp) correlations_[pair.first] = pair.second; for (const KernelRecord &r : kernel_records_) { auto *event = profile_pb.add_events(); event->set_type(proto::Event::GPUKernel); - if (correlations_.find(r.correlation_id) != correlations_.end()) { - event->set_name(correlations_.at(r.correlation_id)); + auto c = correlations_.find(r.correlation_id); + if (c != correlations_.end() && c->second != nullptr) { + event->set_name(c->second->name()); + event->set_detail_info(r.name); + find++; } else { + VLOG(10) << "Missing Kernel Event: " + r.name; + miss++; event->set_name(r.name); } event->set_start_ns(r.start_ns); @@ -289,31 +436,41 @@ class DeviceTracerImpl : public DeviceTracer { event->set_sub_device_id(r.stream_id); event->set_device_id(r.device_id); } - - for (const CPURecord &r : cpu_records_) { - auto *event = profile_pb.add_events(); - event->set_type(proto::Event::CPU); - event->set_name(r.name); - event->set_start_ns(r.start_ns); - event->set_end_ns(r.end_ns); - event->set_sub_device_id(r.thread_id); - event->set_device_id(r.device_id); - } + VLOG(1) << "KernelRecord event miss: " << miss << " find: " << find; + for (auto &tmp : cpu_records_) + for (const CPURecord &r : tmp) { + auto *event = profile_pb.add_events(); + event->set_type(proto::Event::CPU); + event->set_name(r.name); + event->set_start_ns(r.start_ns); + event->set_end_ns(r.end_ns); + event->set_sub_device_id(r.thread_id); + event->set_device_id(r.device_id); + } + miss = find = 0; for (const MemRecord &r : mem_records_) { auto *event = profile_pb.add_events(); event->set_type(proto::Event::GPUKernel); - event->set_name(r.name); + auto c = correlations_.find(r.correlation_id); + if (c != correlations_.end() && c->second != nullptr) { + event->set_name(c->second->name()); + event->set_detail_info(r.name); + find++; + } else { + miss++; + event->set_name(r.name); + } event->set_start_ns(r.start_ns); event->set_end_ns(r.end_ns); event->set_sub_device_id(r.stream_id); event->set_device_id(r.device_id); event->mutable_memcopy()->set_bytes(r.bytes); } + VLOG(1) << "MemRecord event miss: " << miss << " find: " << find; std::ofstream profile_f; - profile_f.open(profile_path, std::ios::out | std::ios::trunc); - std::string profile_str; - profile_pb.SerializeToString(&profile_str); - profile_f << profile_str; + profile_f.open(profile_path, + std::ios::out | std::ios::trunc | std::ios::binary); + profile_pb.SerializeToOstream(&profile_f); profile_f.close(); return profile_pb; } @@ -321,12 +478,13 @@ class DeviceTracerImpl : public DeviceTracer { void Disable() { #ifdef PADDLE_WITH_CUPTI // flush might cause additional calls to DeviceTracker. - dynload::cuptiActivityFlushAll(CUPTI_ACTIVITY_FLAG_FLUSH_FORCED); + CUPTI_CALL( + dynload::cuptiActivityFlushAll(CUPTI_ACTIVITY_FLAG_FLUSH_FORCED)); #endif // PADDLE_WITH_CUPTI std::lock_guard l(trace_mu_); #ifdef PADDLE_WITH_CUPTI DisableActivity(); - dynload::cuptiUnsubscribe(subscriber_); + CUPTI_CALL(dynload::cuptiUnsubscribe(subscriber_)); CUPTI_CALL(dynload::cuptiGetTimestamp(&end_ns_)); #endif // PADDLE_WITH_CUPTI enabled_ = false; @@ -337,18 +495,10 @@ class DeviceTracerImpl : public DeviceTracer { static void CUPTIAPI ApiCallback(void *userdata, CUpti_CallbackDomain domain, CUpti_CallbackId cbid, const void *cbdata) { auto *cbInfo = reinterpret_cast(cbdata); - DeviceTracer *tracer = reinterpret_cast(userdata); - - if ((domain == CUPTI_CB_DOMAIN_DRIVER_API) && - (cbid == CUPTI_DRIVER_TRACE_CBID_cuLaunchKernel)) { - if (cbInfo->callbackSite == CUPTI_API_ENTER) { - const std::string anno = !annotation_stack.empty() - ? annotation_stack.back() - : cbInfo->symbolName; - tracer->AddAnnotation(cbInfo->correlationId, anno); - } - } else { - VLOG(1) << "Unhandled API Callback for " << domain << " " << cbid; + DeviceTracerImpl *tracer = reinterpret_cast(userdata); + if (cbInfo->callbackSite == CUPTI_API_ENTER) { + Event *event = CurAnnotation(); + tracer->AddAnnotation(cbInfo->correlationId, event); } } CUpti_SubscriberHandle subscriber_; @@ -357,10 +507,12 @@ class DeviceTracerImpl : public DeviceTracer { bool enabled_; uint64_t start_ns_; uint64_t end_ns_; - std::vector kernel_records_; - std::vector mem_records_; - std::vector cpu_records_; - std::unordered_map correlations_; + std::forward_list kernel_records_; + std::forward_list mem_records_; + std::forward_list> cpu_records_; + std::forward_list>> + correlations_pairs; + std::unordered_map correlations_; }; void CreateTracer(DeviceTracer **t) { *t = new DeviceTracerImpl(); } @@ -370,21 +522,106 @@ DeviceTracer *GetDeviceTracer() { return tracer; } -void SetCurAnnotation(const std::string &anno) { - annotation_stack.push_back(anno); -} +void SetCurAnnotation(Event *event) { annotation_stack.push_back(event); } void ClearCurAnnotation() { annotation_stack.pop_back(); } -std::string CurAnnotation() { - if (annotation_stack.empty()) return ""; +Event *CurAnnotation() { + if (annotation_stack.empty()) return nullptr; return annotation_stack.back(); } +std::string CurAnnotationName() { + if (annotation_stack.empty()) return ""; + return annotation_stack.back()->name(); +} void SetCurBlock(int block_id) { block_id_stack.push_back(block_id); } void ClearCurBlock() { block_id_stack.pop_back(); } int BlockDepth() { return block_id_stack.size(); } + +uint32_t GetCurSystemThreadId() { + std::stringstream ss; + ss << std::this_thread::get_id(); + uint32_t id = static_cast(std::stoull(ss.str())); + return id; +} + +void RecoreCurThreadId(int32_t id) { + auto gid = GetCurSystemThreadId(); + VLOG(1) << "RecoreCurThreadId: " << gid << " -> " << id; + system_thread_id_map[gid] = id; +} + +int32_t GetThreadIdFromSystemThreadId(uint32_t id) { + auto it = system_thread_id_map.find(id); + if (it != system_thread_id_map.end()) return it->second; + // return origin id if no event is recorded in this thread. + return static_cast(id); +} + +#ifdef PADDLE_WITH_CUPTI +namespace { + +void initCuptiCbidStr() { + static bool called = false; + if (called) return; + called = true; +#define REGISTER_RUNTIME_CBID_STR(cbid) \ + runtime_cbid_str[CUPTI_RUNTIME_TRACE_CBID_##cbid] = #cbid + + REGISTER_RUNTIME_CBID_STR(cudaBindTexture_v3020); + REGISTER_RUNTIME_CBID_STR(cudaConfigureCall_v3020); + REGISTER_RUNTIME_CBID_STR(cudaDeviceGetAttribute_v5000); + REGISTER_RUNTIME_CBID_STR(cudaDeviceGetStreamPriorityRange_v5050); + REGISTER_RUNTIME_CBID_STR(cudaDeviceSynchronize_v3020); + REGISTER_RUNTIME_CBID_STR(cudaDriverGetVersion_v3020); + REGISTER_RUNTIME_CBID_STR(cudaEventCreateWithFlags_v3020); + REGISTER_RUNTIME_CBID_STR(cudaEventDestroy_v3020); + REGISTER_RUNTIME_CBID_STR(cudaEventDestroy_v3020); + REGISTER_RUNTIME_CBID_STR(cudaEventQuery_v3020); + REGISTER_RUNTIME_CBID_STR(cudaEventRecord_v3020); + REGISTER_RUNTIME_CBID_STR(cudaFreeHost_v3020); + REGISTER_RUNTIME_CBID_STR(cudaFree_v3020); + REGISTER_RUNTIME_CBID_STR(cudaFuncGetAttributes_v3020); + REGISTER_RUNTIME_CBID_STR(cudaGetDeviceCount_v3020); + REGISTER_RUNTIME_CBID_STR(cudaGetDeviceProperties_v3020); + REGISTER_RUNTIME_CBID_STR(cudaGetDevice_v3020); + REGISTER_RUNTIME_CBID_STR(cudaGetErrorString_v3020); + REGISTER_RUNTIME_CBID_STR(cudaGetLastError_v3020); + REGISTER_RUNTIME_CBID_STR(cudaHostAlloc_v3020); + REGISTER_RUNTIME_CBID_STR(cudaHostGetDevicePointer_v3020); + REGISTER_RUNTIME_CBID_STR(cudaLaunchKernel_v7000); + REGISTER_RUNTIME_CBID_STR(cudaMallocHost_v3020); + REGISTER_RUNTIME_CBID_STR(cudaMalloc_v3020); + REGISTER_RUNTIME_CBID_STR(cudaMemcpyAsync_v3020); + REGISTER_RUNTIME_CBID_STR(cudaMemcpy_v3020); + REGISTER_RUNTIME_CBID_STR(cudaMemsetAsync_v3020); + REGISTER_RUNTIME_CBID_STR(cudaMemset_v3020); + REGISTER_RUNTIME_CBID_STR( + cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_v7000); + REGISTER_RUNTIME_CBID_STR(cudaPeekAtLastError_v3020); + REGISTER_RUNTIME_CBID_STR(cudaRuntimeGetVersion_v3020); + REGISTER_RUNTIME_CBID_STR(cudaSetDevice_v3020); + REGISTER_RUNTIME_CBID_STR(cudaStreamCreate_v3020); + REGISTER_RUNTIME_CBID_STR(cudaStreamCreateWithFlags_v5000); + REGISTER_RUNTIME_CBID_STR(cudaStreamCreateWithPriority_v5050); + REGISTER_RUNTIME_CBID_STR(cudaStreamDestroy_v5050); + REGISTER_RUNTIME_CBID_STR(cudaStreamSynchronize_v3020); + REGISTER_RUNTIME_CBID_STR(cudaStreamWaitEvent_v3020); + REGISTER_RUNTIME_CBID_STR(cudaUnbindTexture_v3020); + REGISTER_RUNTIME_CBID_STR(cudaSetupArgument_v3020); + REGISTER_RUNTIME_CBID_STR(cudaLaunch_v3020); +#if CUDA_VERSION >= 9000 + REGISTER_RUNTIME_CBID_STR(cudaLaunchCooperativeKernel_v9000); + REGISTER_RUNTIME_CBID_STR(cudaLaunchCooperativeKernelMultiDevice_v9000); +#endif + +#undef REGISTER_RUNTIME_CBID_STR +} +} // namespace +#endif // PADDLE_WITH_CUPTI + } // namespace platform } // namespace paddle diff --git a/paddle/fluid/platform/device_tracer.h b/paddle/fluid/platform/device_tracer.h index bf0786be2d0fafbf4b610d16ef587ac219399203..d4418d836d66e329af8ed3f5ec05f49d47146b3e 100644 --- a/paddle/fluid/platform/device_tracer.h +++ b/paddle/fluid/platform/device_tracer.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include "paddle/fluid/platform/dynload/cupti.h" +#include "paddle/fluid/platform/event.h" #include "paddle/fluid/platform/port.h" #include "paddle/fluid/platform/profiler.pb.h" @@ -68,11 +69,13 @@ class DeviceTracer { virtual void Enable() = 0; // Needs to be called once after use. virtual void Disable() = 0; + // Needs to be called once before reuse. + virtual void Reset() = 0; // Add a pair to correlate internal cuda id with high level - // annotation (string). So cuda statistics can be represented by + // annotation event(with string). So cuda statistics can be represented by // human-readable annotations. - virtual void AddAnnotation(uint64_t id, const std::string& anno) = 0; + virtual void AddAnnotation(uint32_t id, Event* event) = 0; virtual void AddMemRecords(const std::string& name, uint64_t start_ns, uint64_t end_ns, int64_t device_id, @@ -92,6 +95,9 @@ class DeviceTracer { // Generate a proto after done (Disabled). virtual proto::Profile GenProfile(const std::string& profile_path) = 0; + // generate kernel elapsed time into Event + virtual void GenEventKernelCudaElapsedTime() = 0; + virtual bool IsEnabled() = 0; }; @@ -99,14 +105,19 @@ class DeviceTracer { DeviceTracer* GetDeviceTracer(); // Set a name for the cuda kernel operation being launched by the thread. -void SetCurAnnotation(const std::string& anno); +void SetCurAnnotation(Event* event); // Clear the name after the operation is done. void ClearCurAnnotation(); // Current name of the operation being run in the thread. -std::string CurAnnotation(); +std::string CurAnnotationName(); +Event* CurAnnotation(); void SetCurBlock(int block_id); void ClearCurBlock(); int BlockDepth(); + +// Set current thread id, so we can map the system thread id to thread id. +void RecoreCurThreadId(int32_t id); +int32_t GetThreadIdFromSystemThreadId(uint32_t id); } // namespace platform } // namespace paddle diff --git a/paddle/fluid/platform/dynload/cudnn.h b/paddle/fluid/platform/dynload/cudnn.h index 2f4f8101e4b957634d68fb0d64649ff8afba7c54..3008c166938d7db190e8f716ca925fda5ccebc25 100644 --- a/paddle/fluid/platform/dynload/cudnn.h +++ b/paddle/fluid/platform/dynload/cudnn.h @@ -99,6 +99,7 @@ extern void EnforceCUDNNLoaded(const char* fn_name); __macro(cudnnDestroy); \ __macro(cudnnSetStream); \ __macro(cudnnActivationForward); \ + __macro(cudnnActivationBackward); \ __macro(cudnnConvolutionForward); \ __macro(cudnnConvolutionBackwardBias); \ __macro(cudnnGetConvolutionForwardWorkspaceSize); \ diff --git a/paddle/fluid/platform/dynload/mklml.h b/paddle/fluid/platform/dynload/mklml.h index a260cda49138580b209e647af459e9392d9f18f1..a5b846f500f3677188b170dda76c65047d628064 100644 --- a/paddle/fluid/platform/dynload/mklml.h +++ b/paddle/fluid/platform/dynload/mklml.h @@ -86,6 +86,8 @@ extern void* mklml_dso_handle; __macro(vdPowx); \ __macro(vsInv); \ __macro(vdInv); \ + __macro(vmsErf); \ + __macro(vmdErf); \ __macro(MKL_Set_Num_Threads) MKLML_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_MKLML_WRAP); diff --git a/paddle/fluid/platform/enforce.h b/paddle/fluid/platform/enforce.h index 142d38f0609d963ce3ff45c595b8432b0e5edd21..bdb1d1bd3bf47ea89984587ae84d2aa84be232a4 100644 --- a/paddle/fluid/platform/enforce.h +++ b/paddle/fluid/platform/enforce.h @@ -31,7 +31,10 @@ limitations under the License. */ #include #include #include +#include +#include +#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h #include "glog/logging.h" #include "paddle/fluid/platform/macros.h" #include "paddle/fluid/platform/port.h" @@ -233,9 +236,11 @@ inline void throw_on_error(ncclResult_t stat, const std::string& msg) { #endif // __APPLE__ and windows #endif // PADDLE_WITH_CUDA -#define PADDLE_THROW(...) \ - throw ::paddle::platform::EnforceNotMet( \ - ::paddle::string::Sprintf(__VA_ARGS__), __FILE__, __LINE__) +#define PADDLE_THROW(...) \ + do { \ + throw ::paddle::platform::EnforceNotMet( \ + ::paddle::string::Sprintf(__VA_ARGS__), __FILE__, __LINE__); \ + } while (0) #define PADDLE_ENFORCE(COND, ...) \ do { \ @@ -270,23 +275,71 @@ inline void throw_on_error(ncclResult_t stat, const std::string& msg) { * extra messages is also supported, for example: * PADDLE_ENFORCE(a, b, "some simple enforce failed between %d numbers", 2) */ -#define PADDLE_ENFORCE_NOT_NULL(__VAL, ...) \ - do { \ - if (UNLIKELY(nullptr == (__VAL))) { \ - PADDLE_THROW(#__VAL " should not be null\n%s", \ - paddle::string::Sprintf("" __VA_ARGS__)); \ - } \ +#define PADDLE_ENFORCE_NOT_NULL(__VAL, ...) \ + do { \ + if (UNLIKELY(nullptr == (__VAL))) { \ + PADDLE_THROW(#__VAL " should not be null\n%s", \ + ::paddle::string::Sprintf(__VA_ARGS__)); \ + } \ } while (0) -#define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \ +namespace details { +template +inline constexpr bool IsArithmetic() { + return std::is_arithmetic::value; +} + +template +struct TypeConverterImpl { + using Type1 = typename std::common_type::type; + using Type2 = Type1; +}; + +template +struct TypeConverterImpl { + using Type1 = T1; + using Type2 = T2; +}; + +template +struct TypeConverter { + private: + static constexpr bool kIsArithmetic = + IsArithmetic() && IsArithmetic(); + + public: + using Type1 = typename TypeConverterImpl::Type1; + using Type2 = typename TypeConverterImpl::Type2; +}; + +template +using CommonType1 = typename std::add_lvalue_reference< + typename std::add_const::Type1>::type>::type; + +template +using CommonType2 = typename std::add_lvalue_reference< + typename std::add_const::Type2>::type>::type; +} // namespace details + +#define __PADDLE_BINARY_COMPARE(__VAL1, __VAL2, __CMP, __INV_CMP, ...) \ do { \ - if (UNLIKELY(!((__VAL0)__CMP(__VAL1)))) { \ + auto __val1 = (__VAL1); \ + auto __val2 = (__VAL2); \ + using __TYPE1__ = decltype(__val1); \ + using __TYPE2__ = decltype(__val2); \ + using __COMMON_TYPE1__ = \ + ::paddle::platform::details::CommonType1<__TYPE1__, __TYPE2__>; \ + using __COMMON_TYPE2__ = \ + ::paddle::platform::details::CommonType2<__TYPE1__, __TYPE2__>; \ + bool __is_not_error = (static_cast<__COMMON_TYPE1__>(__val1))__CMP( \ + static_cast<__COMMON_TYPE2__>(__val2)); \ + if (UNLIKELY(!__is_not_error)) { \ PADDLE_THROW("Enforce failed. Expected %s " #__CMP \ " %s, but received %s:%s " #__INV_CMP " %s:%s.\n%s", \ - #__VAL0, #__VAL1, #__VAL0, \ - paddle::string::to_string(__VAL0), #__VAL1, \ - paddle::string::to_string(__VAL1), \ - paddle::string::Sprintf("" __VA_ARGS__)); \ + #__VAL1, #__VAL2, #__VAL1, \ + ::paddle::string::to_string(__val1), #__VAL2, \ + ::paddle::string::to_string(__val2), \ + ::paddle::string::Sprintf(__VA_ARGS__)); \ } \ } while (0) diff --git a/paddle/fluid/platform/enforce_test.cc b/paddle/fluid/platform/enforce_test.cc index 1091badae54a809c4a9da6d0398bcbb538420af0..adcc95367f11dfa2722226e5a0386bedfa6e746e 100644 --- a/paddle/fluid/platform/enforce_test.cc +++ b/paddle/fluid/platform/enforce_test.cc @@ -118,59 +118,58 @@ TEST(ENFORCE_GT, OK) { PADDLE_ENFORCE_GT(2, 1); } TEST(ENFORCE_GT, FAIL) { bool caught_exception = false; try { - PADDLE_ENFORCE_GT(1, 2UL); + PADDLE_ENFORCE_GT(1, 2); } catch (paddle::platform::EnforceNotMet error) { caught_exception = true; - EXPECT_TRUE(HasPrefix( - StringPiece(error.what()), - "Enforce failed. Expected 1 > 2UL, but received 1:1 <= 2UL:2.")); + EXPECT_TRUE( + HasPrefix(StringPiece(error.what()), + "Enforce failed. Expected 1 > 2, but received 1:1 <= 2:2.")); } EXPECT_TRUE(caught_exception); } TEST(ENFORCE_GE, OK) { - PADDLE_ENFORCE_GE(2, 2UL); - PADDLE_ENFORCE_GE(3, 2UL); + PADDLE_ENFORCE_GE(2, 2); PADDLE_ENFORCE_GE(3, 2); - PADDLE_ENFORCE_GE(3.21, 2UL); + PADDLE_ENFORCE_GE(3.21, 2.0); } TEST(ENFORCE_GE, FAIL) { bool caught_exception = false; try { - PADDLE_ENFORCE_GE(1, 2UL); + PADDLE_ENFORCE_GE(1, 2); } catch (paddle::platform::EnforceNotMet error) { caught_exception = true; - EXPECT_TRUE(HasPrefix( - StringPiece(error.what()), - "Enforce failed. Expected 1 >= 2UL, but received 1:1 < 2UL:2.")); + EXPECT_TRUE( + HasPrefix(StringPiece(error.what()), + "Enforce failed. Expected 1 >= 2, but received 1:1 < 2:2.")); } EXPECT_TRUE(caught_exception); } TEST(ENFORCE_LE, OK) { PADDLE_ENFORCE_LE(1, 1); - PADDLE_ENFORCE_LE(1, 1UL); - PADDLE_ENFORCE_LE(2, 3UL); - PADDLE_ENFORCE_LE(2UL, 3); - PADDLE_ENFORCE_LE(2UL, 3.2); + PADDLE_ENFORCE_LE(1UL, 1UL); + PADDLE_ENFORCE_LE(2, 3); + PADDLE_ENFORCE_LE(2UL, 3UL); + PADDLE_ENFORCE_LE(2.0, 3.2); } TEST(ENFORCE_LE, FAIL) { bool caught_exception = false; try { - PADDLE_ENFORCE_GT(1, 2UL); + PADDLE_ENFORCE_GT(1, 2); } catch (paddle::platform::EnforceNotMet error) { caught_exception = true; - EXPECT_TRUE(HasPrefix( - StringPiece(error.what()), - "Enforce failed. Expected 1 > 2UL, but received 1:1 <= 2UL:2.")); + EXPECT_TRUE( + HasPrefix(StringPiece(error.what()), + "Enforce failed. Expected 1 > 2, but received 1:1 <= 2:2.")); } EXPECT_TRUE(caught_exception); } TEST(ENFORCE_LT, OK) { PADDLE_ENFORCE_LT(3, 10); - PADDLE_ENFORCE_LT(2, 3UL); - PADDLE_ENFORCE_LT(2UL, 3); + PADDLE_ENFORCE_LT(2UL, 3UL); + PADDLE_ENFORCE_LT(2, 3); } TEST(ENFORCE_LT, FAIL) { bool caught_exception = false; @@ -235,7 +234,13 @@ TEST(ENFORCE_USER_DEFINED_CLASS, EQ) { TEST(ENFORCE_USER_DEFINED_CLASS, NE) { Dims a{{1, 2, 3, 4}}, b{{5, 6, 7, 8}}; - ASSERT_THROW(PADDLE_ENFORCE_EQ(a, b), paddle::platform::EnforceNotMet); + bool caught_exception = false; + try { + PADDLE_ENFORCE_EQ(a, b); + } catch (paddle::platform::EnforceNotMet&) { + caught_exception = true; + } + EXPECT_TRUE(caught_exception); } TEST(EOF_EXCEPTION, THROW_EOF) { diff --git a/paddle/fluid/platform/event.h b/paddle/fluid/platform/event.h new file mode 100644 index 0000000000000000000000000000000000000000..2dcf966754cbed2670acb9c3548c23355be5503c --- /dev/null +++ b/paddle/fluid/platform/event.h @@ -0,0 +1,68 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#ifdef PADDLE_WITH_CUDA +#include +#endif + +namespace paddle { +namespace platform { + +enum EventType { kMark, kPushRange, kPopRange }; + +class Event { + public: + // The DeviceContext is used to get the cuda stream. + // If CPU profiling mode, can pass nullptr. + Event(EventType type, std::string name, uint32_t thread_id); + + const EventType& type() const; + std::string name() const { return name_; } + uint32_t thread_id() const { return thread_id_; } + +#ifdef PADDLE_WITH_CUDA +#ifndef PADDLE_WITH_CUPTI + cudaEvent_t event() const { return event_; } + int device() const { return device_; } +#endif +#endif + + double CpuElapsedMs(const Event& e) const; + double CudaElapsedMs(const Event& e) const; + + private: + EventType type_; + std::string name_; + uint32_t thread_id_; + int64_t cpu_ns_; +#ifdef PADDLE_WITH_CUDA +#ifdef PADDLE_WITH_CUPTI + int64_t gpu_ns_ = 0; + + public: + void AddCudaElapsedTime(int64_t start_ns, int64_t end_ns) { + gpu_ns_ += end_ns - start_ns; + } + + private: +#else + cudaEvent_t event_ = nullptr; + int device_ = -1; +#endif +#endif +}; +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/init.cc b/paddle/fluid/platform/init.cc index ac86b38a61c9d8e3e946d9fb3f46d8feba7c034d..4dcf7e79043af008cb2067d90d12d629c5c2d0d9 100644 --- a/paddle/fluid/platform/init.cc +++ b/paddle/fluid/platform/init.cc @@ -22,6 +22,7 @@ limitations under the License. */ #include "paddle/fluid/string/split.h" #ifdef PADDLE_WITH_CUDA #include "paddle/fluid/platform/cuda_device_guard.h" +#include "paddle/fluid/platform/dynload/cupti.h" #endif #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/init.h" @@ -30,6 +31,9 @@ limitations under the License. */ DEFINE_int32(paddle_num_threads, 1, "Number of threads for each paddle instance."); +DEFINE_int32(multiple_of_cupti_buffer_size, 1, + "Multiple of the CUPTI device buffer size. If the timestamps have " + "been dropped when you are profiling, try increasing this value."); namespace paddle { namespace framework { @@ -78,7 +82,32 @@ void InitP2P(std::vector devices) { #endif } +void InitCupti() { +#ifdef PADDLE_WITH_CUPTI + if (FLAGS_multiple_of_cupti_buffer_size == 1) return; + size_t attrValue = 0, attrValueSize = sizeof(size_t); +#define MULTIPLY_ATTR_VALUE(attr) \ + { \ + PADDLE_ENFORCE(!platform::dynload::cuptiActivityGetAttribute( \ + attr, &attrValueSize, &attrValue)); \ + attrValue *= FLAGS_multiple_of_cupti_buffer_size; \ + LOG(WARNING) << "Set " #attr " " << attrValue << " byte"; \ + PADDLE_ENFORCE(!platform::dynload::cuptiActivitySetAttribute( \ + attr, &attrValueSize, &attrValue)); \ + } + MULTIPLY_ATTR_VALUE(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE); + MULTIPLY_ATTR_VALUE(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE_CDP); +#if CUDA_VERSION >= 9000 + MULTIPLY_ATTR_VALUE(CUPTI_ACTIVITY_ATTR_PROFILING_SEMAPHORE_POOL_SIZE); +#endif +#undef MULTIPLY_ATTR_VALUE +#endif +} + void InitDevices(bool init_p2p) { + // CUPTI attribute should be set before any CUDA context is created (see CUPTI + // documentation about CUpti_ActivityAttribute). + InitCupti(); /*Init all available devices by default */ std::vector devices; #ifdef PADDLE_WITH_CUDA diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index 269280d604a13a62046fb7811d34b7c69b61b50f..4fa6774f028bef901f6e11f2d3dafe52a10a548e 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -39,6 +39,45 @@ class MKLDNNHandler { return this->AcquireMemory(md, ptr, "@user_src_mem_p"); } + // TODO(jczaja): extract common part and make AcquireMemory + std::shared_ptr AcquireSrcMemory( + const mkldnn::memory::primitive_desc& mpd, void* ptr) { + auto local_key = key_ + "@user_src_mem_p"; + auto mem_p = + std::static_pointer_cast(dev_ctx_.GetBlob(local_key)); + PADDLE_ENFORCE((mem_p != nullptr) || (is_reusing_ == false), + " find mem primitive in device context"); + if (mem_p == nullptr) { + mem_p = std::make_shared(mpd, ptr); + dev_ctx_.SetBlob(local_key, mem_p); + } else { + mem_p->set_data_handle(ptr); + // Mark that reusing happenned. All primitives from operator instance + // should be reused or none of them. So we check consistency + is_reusing_ = true; + } + return mem_p; + } + + std::shared_ptr AcquireWeightsMemory( + const mkldnn::memory::primitive_desc& mpd, void* ptr) { + auto local_key = key_ + "@user_weights_mem_p"; + auto mem_p = + std::static_pointer_cast(dev_ctx_.GetBlob(local_key)); + PADDLE_ENFORCE((mem_p != nullptr) || (is_reusing_ == false), + " find mem primitive in device context"); + if (mem_p == nullptr) { + mem_p = std::make_shared(mpd, ptr); + dev_ctx_.SetBlob(local_key, mem_p); + } else { + mem_p->set_data_handle(ptr); + // Mark that reusing happenned. All primitives from operator instance + // should be reused or none of them. So we check consistency + is_reusing_ = true; + } + return mem_p; + } + std::shared_ptr AcquireWeightsMemory( const mkldnn::memory::desc& md, void* ptr, user_function custom_func = {}) { @@ -232,7 +271,6 @@ class MKLDNNHandler { AppendKey(key, suffix); } - protected: static void AppendKeyDims(std::string* key, const mkldnn::memory::dims& dims) { for (unsigned int i = 0; i < dims.size(); i++) { @@ -250,6 +288,7 @@ class MKLDNNHandler { key->append(s); } + protected: static std::string dims2str(const mkldnn::memory::dims& operand_dims) { std::string dstr = ""; for (size_t i = 0; i < operand_dims.size(); ++i) { @@ -263,6 +302,9 @@ class MKLDNNHandler { mkldnn::engine engine_; std::string key_; bool is_reusing_; + + public: + static constexpr int MaxKeyLength = 256; }; class TransposeMKLDNNHandler : public MKLDNNHandler { @@ -273,37 +315,7 @@ class TransposeMKLDNNHandler : public MKLDNNHandler { mkldnn::engine engine, const std::string& base_key) : platform::MKLDNNHandler(dev_ctx, engine, base_key), dims_(dims), - axis_(axis), - logical_axis_(dims.size(), 0) {} - - std::shared_ptr AcquireSrcMemory( - const mkldnn::memory::format& fmt, void* ptr) { - auto local_key = key_ + "@user_src_mem_p"; - auto mem_p = - std::static_pointer_cast(dev_ctx_.GetBlob(local_key)); - PADDLE_ENFORCE((mem_p != nullptr) || (is_reusing_ == false), - " find mem primitive in device context"); - if (mem_p == nullptr) { - // Make memory descriptor using input format, unless it - // cannot be trusted (nchw) then make up memory fmt manually - for (size_t i = 0; i < logical_axis_.size(); ++i) { - logical_axis_[i] = i; - } - auto src_md = fmt != mkldnn::memory::format::nchw - ? platform::MKLDNNMemDesc( - dims_, platform::MKLDNNGetDataType(), fmt) - : Axis2MemoryDesc(dims_, logical_axis_); - mem_p = std::make_shared( - mkldnn::memory::primitive_desc{src_md, engine_}, ptr); - dev_ctx_.SetBlob(local_key, mem_p); - } else { - mem_p->set_data_handle(ptr); - // Mark that reusing happenned. All primitives from operator instance - // should be reused or none of them. So we check consistency - is_reusing_ = true; - } - return mem_p; - } + axis_(axis) {} std::shared_ptr AcquireDstMemory(framework::Tensor* output, platform::Place place) { @@ -388,7 +400,6 @@ class TransposeMKLDNNHandler : public MKLDNNHandler { private: std::vector dims_; std::vector axis_; - std::vector logical_axis_; }; template @@ -548,9 +559,8 @@ class ConvMKLDNNTemplateHandler : public MKLDNNHandler { PADDLE_ENFORCE((conv_p != nullptr) || (is_reusing_ == false), "Fail to find convolution primitive in device context"); if (conv_p == nullptr) { - conv_p = std::make_shared(*conv_pd_, *(src_memory_p), - *(weights_memory_p.get()), - *(dst_memory_p.get())); + conv_p = std::make_shared(*conv_pd_, *src_memory_p, + *weights_memory_p, *dst_memory_p); dev_ctx_.SetBlob(prim_key, conv_p); } else { @@ -570,9 +580,9 @@ class ConvMKLDNNTemplateHandler : public MKLDNNHandler { PADDLE_ENFORCE((conv_p != nullptr) || (is_reusing_ == false), "Fail to find convolution primitive in device context"); if (conv_p == nullptr) { - conv_p = std::make_shared( - *conv_pd_, *(src_memory_p), *(weights_memory_p.get()), - *(bias_memory_p.get()), *(dst_memory_p.get())); + conv_p = std::make_shared(*conv_pd_, *src_memory_p, + *weights_memory_p, *bias_memory_p, + *dst_memory_p); dev_ctx_.SetBlob(prim_key, conv_p); } else { diff --git a/paddle/fluid/platform/mkldnn_utils.h b/paddle/fluid/platform/mkldnn_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..8c511f97d12cfe299ad5629eff1871e8d156c850 --- /dev/null +++ b/paddle/fluid/platform/mkldnn_utils.h @@ -0,0 +1,69 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include + +namespace paddle { +namespace platform { + +inline mkldnn::memory::primitive_desc create_prim_desc_from_dims( + const std::vector& ltz, mkldnn::memory::format fmt, + mkldnn::memory::data_type data_type = mkldnn::memory::data_type::f32) { + mkldnn_memory_desc_t mem_fmt; + + mem_fmt.primitive_kind = mkldnn_memory; + mem_fmt.ndims = ltz.size(); + for (unsigned int i = 0; i < ltz.size(); ++i) { + mem_fmt.dims[i] = ltz[i]; // logical dimensions (nchw format, + // regardless physical layout) + } + mem_fmt.data_type = static_cast(data_type); + mem_fmt.format = static_cast(fmt); + + unsigned int total_stride = 1; + for (int i = ltz.size() - 1; i >= 0; --i) { + mem_fmt.layout_desc.blocking.padding_dims[i] = + ltz[i]; // logical dimensions (nchw format, regardless physical + // layout) + mem_fmt.layout_desc.blocking.block_dims[i] = 1; + mem_fmt.layout_desc.blocking.offset_padding_to_data[i] = 0; // no offset + mem_fmt.layout_desc.blocking.strides[0][i] = total_stride; + mem_fmt.layout_desc.blocking.strides[1][i] = 1; + total_stride *= ltz[i]; + } + mem_fmt.layout_desc.blocking.offset_padding = 0; // no initial offset + + auto& pool = platform::DeviceContextPool::Instance(); + auto place = paddle::platform::CPUPlace(); + auto* dev_ctx = dynamic_cast(pool.Get(place)); + auto& cpu_engine = dev_ctx->GetEngine(); + return mkldnn::memory::primitive_desc(mem_fmt, cpu_engine); +} + +inline mkldnn::memory::primitive_desc create_prim_desc_from_format( + const std::vector& ltz, const mkldnn::memory::format format, + const mkldnn::memory::data_type data_type) { + auto md = mkldnn::memory::desc({ltz}, data_type, format); + auto& pool = platform::DeviceContextPool::Instance(); + auto place = paddle::platform::CPUPlace(); + auto dev_ctx = dynamic_cast(pool.Get(place)); + PADDLE_ENFORCE_NOT_NULL(dev_ctx, "Could not get valid device"); + auto& cpu_engine = dev_ctx->GetEngine(); + return mkldnn::memory::primitive_desc(md, cpu_engine); +} + +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/ngraph_helper.h b/paddle/fluid/platform/ngraph_helper.h index b84315995a9d8a65668f57eef67f6dab8c20f9b3..e74f57a79a66ea8fe8c9b972a9a2ec9d722731eb 100644 --- a/paddle/fluid/platform/ngraph_helper.h +++ b/paddle/fluid/platform/ngraph_helper.h @@ -23,6 +23,33 @@ limitations under the License. */ namespace paddle { namespace platform { +std::shared_ptr Nhwc2Nchw(std::shared_ptr in) { + auto in_shape = in->get_shape(); + in_shape[0] = in->get_shape()[0]; + in_shape[1] = in->get_shape()[3]; + in_shape[2] = in->get_shape()[1]; + in_shape[3] = in->get_shape()[2]; + ngraph::AxisVector axis_vec = {0, 3, 1, 2}; + return std::make_shared(in, axis_vec, in_shape); +} + +std::shared_ptr Nchw2Nhwc(std::shared_ptr in) { + auto in_shape = in->get_shape(); + in_shape[0] = in->get_shape()[0]; + in_shape[1] = in->get_shape()[2]; + in_shape[2] = in->get_shape()[3]; + in_shape[3] = in->get_shape()[1]; + ngraph::AxisVector axis_vec = {0, 2, 3, 1}; + return std::make_shared(in, axis_vec, in_shape); +} + +ngraph::Shape FlattenTo1d(ngraph::Shape sh, int num) { + auto x1 = std::accumulate(std::begin(sh), std::end(sh) + num, 1, + std::multiplies()); + size_t x1_l = (size_t)x1; + return ngraph::Shape{x1_l}; +} + ngraph::Shape FlattenTo2d(ngraph::Shape sh, int num) { auto x1 = std::accumulate(std::begin(sh), std::begin(sh) + num, 1, std::multiplies()); diff --git a/paddle/fluid/platform/profiler.cc b/paddle/fluid/platform/profiler.cc index 85977366e61c676fc5d2d3c5d22dd2f606543684..9a285a6b533dcb48013e3b3e4d34dc27186173ac 100644 --- a/paddle/fluid/platform/profiler.cc +++ b/paddle/fluid/platform/profiler.cc @@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/platform/profiler.h" + #include #include #include @@ -27,7 +29,6 @@ limitations under the License. */ #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/platform/device_tracer.h" #include "paddle/fluid/platform/port.h" -#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/string/printf.h" DEFINE_bool(enable_rpc_profiler, false, "Enable rpc profiler or not."); @@ -66,12 +67,13 @@ struct EventList { ((kEventSize + kEventAlign - 1) / kEventAlign * kEventAlign); template - void Record(Args&&... args) { + Event* Record(Args&&... args) { if (event_blocks.empty() || event_blocks.front().size() == kNumBlock) { event_blocks.emplace_front(); event_blocks.front().reserve(kNumBlock); } event_blocks.front().emplace_back(std::forward(args)...); + return &event_blocks.front().back(); } std::vector Reduce() { @@ -98,21 +100,8 @@ inline uint64_t GetTimeInNsec() { .count(); } -Event::Event(EventType type, std::string name, uint32_t thread_id, - const DeviceContext* dev_ctx) - : type_(type), name_(name), thread_id_(thread_id), has_cuda_(false) { -#ifdef PADDLE_WITH_CUDA - has_cuda_ = dev_ctx ? platform::is_gpu_place(dev_ctx->GetPlace()) : false; - if (has_cuda_) { - auto* cuda_dev_ctx = static_cast(dev_ctx); - PADDLE_ENFORCE(cudaSetDevice( - boost::get(cuda_dev_ctx->GetPlace()).device)); - PADDLE_ENFORCE(cudaGetDevice(&device_)); - PADDLE_ENFORCE(cudaEventCreate(&event_)); - auto stream = cuda_dev_ctx->stream(); - PADDLE_ENFORCE(cudaEventRecord(event_, stream)); - } -#endif +Event::Event(EventType type, std::string name, uint32_t thread_id) + : type_(type), name_(name), thread_id_(thread_id) { cpu_ns_ = GetTimeInNsec(); } @@ -123,89 +112,70 @@ double Event::CpuElapsedMs(const Event& e) const { } double Event::CudaElapsedMs(const Event& e) const { -#ifdef PADDLE_WITH_CUDA - if (!has_cuda_) return 0.0; - PADDLE_ENFORCE(e.has_cuda() && has_cuda()); - PADDLE_ENFORCE(e.device() == device()); - PADDLE_ENFORCE(cudaEventSynchronize(event_)); - PADDLE_ENFORCE(cudaEventSynchronize(e.event())); - float ms; - PADDLE_ENFORCE(cudaEventElapsedTime(&ms, event_, e.event())); - return ms; +#ifdef PADDLE_WITH_CUPTI + return gpu_ns_ / 1000000.0; #else - PADDLE_THROW("CUDA is not enabled"); + LOG_FIRST_N(WARNING, 1) << "CUDA CUPTI is not enabled"; + return 0; #endif } -#ifdef PADDLE_WITH_CUDA -static void ForEachDevice(std::function func) { - auto original_device = GetCurrentDeviceId(); - int count = GetCUDADeviceCount(); - for (int i = 0; i < count; i++) { - SetDeviceId(i); - func(i); - } - SetDeviceId(original_device); -} -#endif - inline EventList& GetEventList() { if (!g_event_list) { std::lock_guard guard(g_all_event_lists_mutex); g_event_list = std::make_shared(); g_thread_id = g_next_thread_id++; g_all_event_lists.emplace_front(g_event_list); + RecoreCurThreadId(g_thread_id); } return *g_event_list; } -void Mark(const std::string& name, const DeviceContext* dev_ctx) { - GetEventList().Record(EventType::kMark, name, g_thread_id, dev_ctx); +void Mark(const std::string& name) { + GetEventList().Record(EventType::kMark, name, g_thread_id); } -void PushEvent(const std::string& name, const DeviceContext* dev_ctx) { - GetEventList().Record(EventType::kPushRange, name, g_thread_id, dev_ctx); +Event* PushEvent(const std::string& name) { + return GetEventList().Record(EventType::kPushRange, name, g_thread_id); } -void PopEvent(const std::string& name, const DeviceContext* dev_ctx) { - GetEventList().Record(EventType::kPopRange, name, g_thread_id, dev_ctx); +void PopEvent(const std::string& name) { + GetEventList().Record(EventType::kPopRange, name, g_thread_id); } -RecordEvent::RecordEvent(const std::string& name, const DeviceContext* dev_ctx) +RecordEvent::RecordEvent(const std::string& name) : is_enabled_(false), start_ns_(PosixInNsec()) { if (g_state == ProfilerState::kDisabled) return; - std::lock_guard l(profiler_mu); + // lock is not needed, the code below is thread-safe is_enabled_ = true; - dev_ctx_ = dev_ctx; name_ = name; - PushEvent(name_, dev_ctx_); + Event* e = PushEvent(name_); // Maybe need the same push/pop behavior. - SetCurAnnotation(name_); + SetCurAnnotation(e); } RecordEvent::~RecordEvent() { if (g_state == ProfilerState::kDisabled || !is_enabled_) return; - std::lock_guard l(profiler_mu); + // lock is not needed, the code below is thread-safe DeviceTracer* tracer = GetDeviceTracer(); if (tracer) { - tracer->AddCPURecords(CurAnnotation(), start_ns_, PosixInNsec(), + tracer->AddCPURecords(CurAnnotationName(), start_ns_, PosixInNsec(), BlockDepth(), g_thread_id); } ClearCurAnnotation(); - PopEvent(name_, dev_ctx_); + PopEvent(name_); } -RecordRPCEvent::RecordRPCEvent(const std::string& name, - const DeviceContext* dev_ctx) { +RecordRPCEvent::RecordRPCEvent(const std::string& name) { if (FLAGS_enable_rpc_profiler) { - event_.reset(new platform::RecordEvent(name, dev_ctx)); + event_.reset(new platform::RecordEvent(name)); } } RecordBlock::RecordBlock(int block_id) : is_enabled_(false), start_ns_(PosixInNsec()) { - std::lock_guard l(profiler_mu); + // lock is not needed, the code below is thread-safe if (g_state == ProfilerState::kDisabled) return; is_enabled_ = true; SetCurBlock(block_id); @@ -213,7 +183,7 @@ RecordBlock::RecordBlock(int block_id) } RecordBlock::~RecordBlock() { - std::lock_guard l(profiler_mu); + // lock is not needed, the code below is thread-safe if (g_state == ProfilerState::kDisabled || !is_enabled_) return; DeviceTracer* tracer = GetDeviceTracer(); if (tracer) { @@ -225,11 +195,21 @@ RecordBlock::~RecordBlock() { ClearCurBlock(); } +void SynchronizeAllDevice() { +#ifdef PADDLE_WITH_CUDA + int count = GetCUDADeviceCount(); + for (int i = 0; i < count; i++) { + SetDeviceId(i); + PADDLE_ENFORCE(cudaDeviceSynchronize()); + } +#endif +} + void EnableProfiler(ProfilerState state) { PADDLE_ENFORCE(state != ProfilerState::kDisabled, "Can't enable profiling, since the input state is ", "ProfilerState::kDisabled"); - + SynchronizeAllDevice(); std::lock_guard l(profiler_mu); if (state == g_state) { return; @@ -238,23 +218,20 @@ void EnableProfiler(ProfilerState state) { should_send_profile_state = true; GetDeviceTracer()->Enable(); #ifdef PADDLE_WITH_CUDA - if (g_state == ProfilerState::kCUDA) { + if (g_state == ProfilerState::kCUDA || g_state == ProfilerState::kAll || + g_state == ProfilerState::kCPU) { // Generate some dummy events first to reduce the startup overhead. - for (int i = 0; i < 5; i++) { - ForEachDevice([](int d) { - DeviceContext* dev_ctx = new CUDADeviceContext(CUDAPlace(d)); - Mark("_cuda_startup_", dev_ctx); - dev_ctx->Wait(); - delete dev_ctx; - }); - } + DummyKernelAndEvent(); + GetDeviceTracer()->Reset(); } #endif // Mark the profiling start. - Mark("_start_profiler_", nullptr); + Mark("_start_profiler_"); } void ResetProfiler() { + SynchronizeAllDevice(); + GetDeviceTracer()->Reset(); std::lock_guard guard(g_all_event_lists_mutex); for (auto it = g_all_event_lists.begin(); it != g_all_event_lists.end(); ++it) { @@ -277,9 +254,11 @@ struct EventItem { std::string name; int calls; double total_time; - double min_time; double max_time; double ave_time; + double min_time; + double cpu_time; + double gpu_time; float ratio; }; @@ -313,8 +292,12 @@ void PrintProfiler(const std::vector>& events_table, // Output events table std::cout.setf(std::ios::left); std::cout << std::setw(name_width) << "Event" << std::setw(data_width) - << "Calls" << std::setw(data_width) << "Total" - << std::setw(data_width) << "Min." << std::setw(data_width) + << "Calls" << std::setw(data_width) << "Total"; + if (g_state == ProfilerState::kAll) { + std::cout << std::setw(data_width * 2) << "CPU Time (Ratio)" + << std::setw(data_width * 2) << "GPU Time (Ratio)"; + } + std::cout << std::setw(data_width) << "Min." << std::setw(data_width) << "Max." << std::setw(data_width) << "Ave." << std::setw(data_width) << "Ratio." << std::endl; for (size_t i = 0; i < events_table.size(); ++i) { @@ -322,8 +305,18 @@ void PrintProfiler(const std::vector>& events_table, const EventItem& event_item = events_table[i][j]; std::cout << std::setw(name_width) << event_item.name << std::setw(data_width) << event_item.calls - << std::setw(data_width) << event_item.total_time - << std::setw(data_width) << event_item.min_time + << std::setw(data_width) << event_item.total_time; + if (g_state == ProfilerState::kAll) { + std::cout << std::setw(data_width * 2) + << string::Sprintf( + "%f (%f)", event_item.cpu_time, + (event_item.cpu_time / event_item.total_time)) + << std::setw(data_width * 2) + << string::Sprintf( + "%f (%f)", event_item.gpu_time, + (event_item.gpu_time / event_item.total_time)); + } + std::cout << std::setw(data_width) << event_item.min_time << std::setw(data_width) << event_item.max_time << std::setw(data_width) << event_item.ave_time << std::setw(data_width) << event_item.ratio << std::endl; @@ -372,6 +365,18 @@ void ParseEvents(const std::vector>& events, return a.ave_time > b.ave_time; }; break; + case EventSortingKey::kGPUTime: + sorted_domain = "average time"; + sorted_func = [](const EventItem& a, const EventItem& b) { + return a.gpu_time > b.gpu_time; + }; + break; + case EventSortingKey::kCPUTime: + sorted_domain = "average time"; + sorted_func = [](const EventItem& a, const EventItem& b) { + return a.cpu_time > b.cpu_time; + }; + break; default: sorted_domain = "event first end time"; } @@ -410,10 +415,17 @@ void ParseEvents(const std::vector>& events, } if (rit != pushed_events.rend()) { - double event_time = (g_state == ProfilerState::kCUDA || - g_state == ProfilerState::kAll) - ? rit->CudaElapsedMs((*analyze_events)[i][j]) - : rit->CpuElapsedMs((*analyze_events)[i][j]); + double event_time = 0; + double gpu_time = rit->CudaElapsedMs((*analyze_events)[i][j]); + double cpu_time = rit->CpuElapsedMs((*analyze_events)[i][j]); + if (g_state == ProfilerState::kCUDA) { + event_time = gpu_time; + } else if (g_state == ProfilerState::kCPU) { + event_time = cpu_time; + } else { + event_time = gpu_time + cpu_time; + } + total += event_time; std::string event_name; @@ -430,7 +442,7 @@ void ParseEvents(const std::vector>& events, event_idx[event_name] = event_items.size(); EventItem event_item = {event_name, 1, event_time, event_time, event_time, event_time, - 0.}; + gpu_time, cpu_time, 0.}; event_items.push_back(event_item); } else { int index = event_idx[event_name]; @@ -443,6 +455,8 @@ void ParseEvents(const std::vector>& events, // max time event_items[index].max_time = std::max(event_time, event_items[index].max_time); + event_items[index].gpu_time += gpu_time; + event_items[index].cpu_time += cpu_time; } // remove the push marker from the list @@ -481,20 +495,23 @@ void ParseEvents(const std::vector>& events, void DisableProfiler(EventSortingKey sorted_key, const std::string& profile_path) { + SynchronizeAllDevice(); std::lock_guard l(profiler_mu); if (g_state == ProfilerState::kDisabled) return; // Mark the profiling stop. - Mark("_stop_profiler_", nullptr); + Mark("_stop_profiler_"); - std::vector> all_events = GetAllEvents(); - ParseEvents(all_events, true, sorted_key); - ParseEvents(all_events, false, sorted_key); - ResetProfiler(); DeviceTracer* tracer = GetDeviceTracer(); if (tracer->IsEnabled()) { tracer->Disable(); tracer->GenProfile(profile_path); + tracer->GenEventKernelCudaElapsedTime(); } + + std::vector> all_events = GetAllEvents(); + ParseEvents(all_events, true, sorted_key); + ParseEvents(all_events, false, sorted_key); + ResetProfiler(); g_state = ProfilerState::kDisabled; should_send_profile_state = true; } diff --git a/paddle/fluid/platform/profiler.cu b/paddle/fluid/platform/profiler.cu new file mode 100644 index 0000000000000000000000000000000000000000..aed276b16e95f954539d3fadac65309314ed34f1 --- /dev/null +++ b/paddle/fluid/platform/profiler.cu @@ -0,0 +1,50 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/platform/profiler.h" + +namespace paddle { +namespace platform { + +__global__ void DummyKernel(int *a) { a[0] = 0; } + +static void ForEachDevice(std::function func) { + auto original_device = platform::GetCurrentDeviceId(); + int count = platform::GetCUDADeviceCount(); + for (int i = 0; i < count; i++) { + platform::SetDeviceId(i); + func(i); + } + platform::SetDeviceId(original_device); +} + +void DummyKernelAndEvent() { + for (int i = 0; i < 5; i++) { + ForEachDevice([](int d) { + platform::SetDeviceId(d); + cudaStream_t stream; + PADDLE_ENFORCE(cudaStreamCreate(&stream)); + Mark("_cuda_startup_"); + int *ptr; + PADDLE_ENFORCE(cudaMalloc(&ptr, sizeof(int))); + DummyKernel<<<1, 1, 0, stream>>>(ptr); + PADDLE_ENFORCE(cudaStreamSynchronize(stream)); + PADDLE_ENFORCE(cudaFree(ptr)); + }); + } +} + +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/profiler.h b/paddle/fluid/platform/profiler.h index f5d3490634f3199a23986ec3ae13d9fe3577ac35..aec0ae34292d62905de0e1f459b2b6db4554ebb7 100644 --- a/paddle/fluid/platform/profiler.h +++ b/paddle/fluid/platform/profiler.h @@ -17,44 +17,13 @@ limitations under the License. */ #include #include #include -#include "paddle/fluid/platform/device_context.h" - -namespace paddle { -namespace platform { - -enum EventType { kMark, kPushRange, kPopRange }; - -class Event { - public: - // The DeviceContext is used to get the cuda stream. - // If CPU profiling mode, can pass nullptr. - Event(EventType type, std::string name, uint32_t thread_id, - const DeviceContext* dev_ctx); - - const EventType& type() const; - std::string name() const { return name_; } - uint32_t thread_id() const { return thread_id_; } - bool has_cuda() const { return has_cuda_; } - +#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/event.h" #ifdef PADDLE_WITH_CUDA - cudaEvent_t event() const { return event_; } - int device() const { return device_; } +#include "paddle/fluid/platform/gpu_info.h" #endif - - double CpuElapsedMs(const Event& e) const; - double CudaElapsedMs(const Event& e) const; - - private: - EventType type_; - std::string name_; - uint32_t thread_id_; - int64_t cpu_ns_; - bool has_cuda_; -#ifdef PADDLE_WITH_CUDA - cudaEvent_t event_ = nullptr; - int device_ = -1; -#endif -}; +namespace paddle { +namespace platform { enum ProfilerState { kDisabled, // disabled state @@ -63,22 +32,19 @@ enum ProfilerState { kAll, // Profile both CPU and GPU. (Currently experimental). }; -void Mark(const std::string& name, const DeviceContext* dev_ctx); +void Mark(const std::string& name); -void PushEvent(const std::string& name, const DeviceContext* dev_ctx); +Event* PushEvent(const std::string& name); -void PopEvent(const std::string& name, const DeviceContext* dev_ctx); +void PopEvent(const std::string& name); struct RecordEvent { - // dev_ctx can be set to nullptr if device is cpu. - RecordEvent(const std::string& name, const DeviceContext* dev_ctx); + explicit RecordEvent(const std::string& name); ~RecordEvent(); bool is_enabled_; uint64_t start_ns_; - // The device context is used by Event to get the current cuda stream. - const DeviceContext* dev_ctx_; // Event name std::string name_; // Need to distinguish name by op type, block_id, program_id and perhaps @@ -88,8 +54,7 @@ struct RecordEvent { class RecordRPCEvent { public: - // dev_ctx can be set to nullptr if device is cpu. - RecordRPCEvent(const std::string& name, const DeviceContext* dev_ctx); + explicit RecordRPCEvent(const std::string& name); ~RecordRPCEvent() {} private: @@ -111,7 +76,16 @@ struct RecordBlock { std::vector> GetAllEvents(); // Candidate keys to sort the profiling report -enum EventSortingKey { kDefault, kCalls, kTotal, kMin, kMax, kAve }; +enum EventSortingKey { + kDefault, + kCalls, + kTotal, + kMin, + kMax, + kAve, + kCPUTime, + kGPUTime +}; // Enable the profiling function. void EnableProfiler(ProfilerState state); @@ -132,5 +106,9 @@ bool ShouldSendProfileState(); void SetProfileListener(); int64_t ListenerId(); +#ifdef PADDLE_WITH_CUDA +void DummyKernelAndEvent(); +#endif + } // namespace platform } // namespace paddle diff --git a/paddle/fluid/platform/profiler.proto b/paddle/fluid/platform/profiler.proto index 7b42aa785ec6ad5731e3adee1e9f189127a826a1..e761d7b266e92fd5d47b5b6073ffc8bea1dc877d 100644 --- a/paddle/fluid/platform/profiler.proto +++ b/paddle/fluid/platform/profiler.proto @@ -31,6 +31,7 @@ message Event { optional int64 sub_device_id = 6; optional MemCopy memcopy = 7; + optional string detail_info = 9; } message Profile { diff --git a/paddle/fluid/platform/profiler_test.cc b/paddle/fluid/platform/profiler_test.cc index 61f467814ba4a24c8b73f1bc614cda0ab8c4debd..a851488e72d27dfcbd04546d9b531d26257f611c 100644 --- a/paddle/fluid/platform/profiler_test.cc +++ b/paddle/fluid/platform/profiler_test.cc @@ -23,76 +23,48 @@ TEST(Event, CpuElapsedTime) { using paddle::platform::Event; using paddle::platform::EventType; - Event start_event(EventType::kPushRange, "test", 0, nullptr); - EXPECT_TRUE(start_event.has_cuda() == false); + Event start_event(EventType::kPushRange, "test", 0); int counter = 0; while (counter != 1000) { counter++; } - Event stop_event(EventType::kPopRange, "test", 0, nullptr); + Event stop_event(EventType::kPopRange, "test", 0); EXPECT_GT(start_event.CpuElapsedMs(stop_event), 0); } -#ifdef PADDLE_WITH_CUDA -TEST(Event, CudaElapsedTime) { - using paddle::platform::DeviceContext; - using paddle::platform::CUDADeviceContext; - using paddle::platform::CUDAPlace; - using paddle::platform::Event; - using paddle::platform::EventType; - - DeviceContext* dev_ctx = new CUDADeviceContext(CUDAPlace(0)); - Event start_event(EventType::kPushRange, "test", 0, dev_ctx); - EXPECT_TRUE(start_event.has_cuda() == true); - int counter = 0; - while (counter != 1000) { - counter++; - } - Event stop_event(EventType::kPopRange, "test", 0, dev_ctx); - EXPECT_GT(start_event.CudaElapsedMs(stop_event), 0); -} -#endif - TEST(RecordEvent, RecordEvent) { - using paddle::platform::DeviceContext; using paddle::platform::Event; using paddle::platform::EventType; using paddle::platform::RecordEvent; + using paddle::platform::PushEvent; + using paddle::platform::PopEvent; using paddle::platform::ProfilerState; using paddle::platform::EventSortingKey; ProfilerState state = ProfilerState::kCPU; - DeviceContext* dev_ctx = nullptr; -#ifdef PADDLE_WITH_CUDA - using paddle::platform::CUDADeviceContext; - using paddle::platform::CUDAPlace; - state = ProfilerState::kCUDA; - dev_ctx = - new paddle::platform::CUDADeviceContext(paddle::platform::CUDAPlace(0)); -#endif EnableProfiler(state); /* Usage 1: - * PushEvent(evt_name, dev_ctx); + * PushEvent(evt_name); * ... * code to be analyzed * ... - * PopEvent(evt_name, dev_ctx); + * PopEvent(evt_name); */ LOG(INFO) << "Usage 1: PushEvent & PopEvent"; for (int loop = 0; loop < 3; ++loop) { for (int i = 1; i < 5; ++i) { std::string name = "op_" + std::to_string(i); - PushEvent(name, dev_ctx); + PushEvent(name); int counter = 1; while (counter != i * 1000) counter++; - PopEvent(name, dev_ctx); + PopEvent(name); } } /* Usage 2: * { - * RecordEvent record_event(name, dev_ctx); + * RecordEvent record_event(name); * ... * code to be analyzed * ... @@ -101,7 +73,7 @@ TEST(RecordEvent, RecordEvent) { LOG(INFO) << "Usage 2: RecordEvent"; for (int i = 1; i < 5; ++i) { std::string name = "evs_op_" + std::to_string(i); - RecordEvent record_event(name, dev_ctx); + RecordEvent record_event(name); int counter = 1; while (counter != i * 1000) counter++; } @@ -123,20 +95,20 @@ TEST(RecordEvent, RecordEvent) { LOG(INFO) << "Usage 3: nested RecordEvent"; for (int i = 1; i < 5; ++i) { std::string name = "ano_evs_op_" + std::to_string(i); - RecordEvent record_event(name, dev_ctx); + RecordEvent record_event(name); int counter = 1; while (counter != i * 100) counter++; { std::string nested_name = "nested_ano_evs_op_" + std::to_string(i); - RecordEvent nested_record_event(nested_name, dev_ctx); + RecordEvent nested_record_event(nested_name); int nested_counter = 1; while (nested_counter != i * 100) nested_counter++; } } // Bad Usage: - PushEvent("event_without_pop", dev_ctx); - PopEvent("event_without_push", dev_ctx); + PushEvent("event_without_pop"); + PopEvent("event_without_push"); std::vector> events = paddle::platform::GetAllEvents(); int cuda_startup_count = 0; diff --git a/paddle/fluid/platform/temporary_allocator_test.cc b/paddle/fluid/platform/temporary_allocator_test.cc index 3879cd540017ea22b0cf4eee794a172e56716b74..6dae84f016e5db8007b4a4b4df2b5ed7f5cb4f19 100644 --- a/paddle/fluid/platform/temporary_allocator_test.cc +++ b/paddle/fluid/platform/temporary_allocator_test.cc @@ -141,7 +141,7 @@ TEST(temporary_allocator, create_tensor_with_allocationptr) { platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); auto* dev_ctx = static_cast(pool.Get(cpu_place)); - framework::ExecutionContext ctx(op, scope, *dev_ctx, run_ctx); + framework::ExecutionContext ctx(op, scope, *dev_ctx, run_ctx, nullptr); int numel = memory_size / sizeof(float); framework::Tensor tensor = @@ -156,7 +156,7 @@ TEST(temporary_allocator, create_tensor_with_allocationptr) { platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); auto* dev_ctx = static_cast(pool.Get(gpu_place)); - framework::ExecutionContext ctx(op, scope, *dev_ctx, run_ctx); + framework::ExecutionContext ctx(op, scope, *dev_ctx, run_ctx, nullptr); int numel = memory_size / sizeof(float); framework::Tensor tensor = ctx.AllocateTmpTensor( @@ -179,7 +179,7 @@ TEST(temporary_allocator, create_tensor_with_allocationptr2) { platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); auto* dev_ctx = static_cast(pool.Get(cpu_place)); - framework::ExecutionContext ctx(op, scope, *dev_ctx, run_ctx); + framework::ExecutionContext ctx(op, scope, *dev_ctx, run_ctx, nullptr); int numel = memory_size / sizeof(float); framework::Tensor out_side_tensor; @@ -200,7 +200,7 @@ TEST(temporary_allocator, create_tensor_with_allocationptr2) { platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); auto* dev_ctx = static_cast(pool.Get(gpu_place)); - framework::ExecutionContext ctx(op, scope, *dev_ctx, run_ctx); + framework::ExecutionContext ctx(op, scope, *dev_ctx, run_ctx, nullptr); size_t memory_size = 500; int numel = memory_size / sizeof(float); diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 31c3bfa43ffec22059a602e9ff09a33188d72c91..aeabed19abfda3c857f54e5ada54d52bf95e2602 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -34,8 +34,8 @@ void BindTracer(pybind11::module* m) { framework::BlockDesc* block, const platform::CPUPlace expected_place, const bool stop_gradient = false) { - self.Trace(op, inputs, outputs, block, expected_place, - stop_gradient); + return self.Trace(op, inputs, outputs, block, expected_place, + stop_gradient); }) .def("trace", [](imperative::Tracer& self, imperative::OpBase* op, @@ -44,8 +44,8 @@ void BindTracer(pybind11::module* m) { framework::BlockDesc* block, const platform::CUDAPlace expected_place, const bool stop_gradient = false) { - self.Trace(op, inputs, outputs, block, expected_place, - stop_gradient); + return self.Trace(op, inputs, outputs, block, expected_place, + stop_gradient); }) .def("py_trace", &imperative::Tracer::PyTrace, pybind11::return_value_policy::take_ownership); diff --git a/paddle/fluid/pybind/imperative.h b/paddle/fluid/pybind/imperative.h index f947b743f99d5d4994b1a87f89fd6815357d8125..8c48b2a7153c566930a074bd0bab1f054c13c2d5 100644 --- a/paddle/fluid/pybind/imperative.h +++ b/paddle/fluid/pybind/imperative.h @@ -33,7 +33,7 @@ class Layer : public imperative::Layer { } }; -class PyOpBase : public imperative::OpBase { +class PYBIND11_HIDDEN PyOpBase : public imperative::OpBase { public: using imperative::OpBase::OpBase; // Inherit constructors }; diff --git a/paddle/fluid/pybind/ir.cc b/paddle/fluid/pybind/ir.cc index 24059140ab20e24917b93a5f60936b1087797ff9..68f74a8531fff0c49c8a62d12f5cde7af77faf8a 100644 --- a/paddle/fluid/pybind/ir.cc +++ b/paddle/fluid/pybind/ir.cc @@ -13,10 +13,13 @@ // limitations under the License. #include "paddle/fluid/pybind/ir.h" +#include +#include #include #include #include #include "paddle/fluid/framework/ir/graph.h" +#include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/ir/node.h" #include "paddle/fluid/framework/op_desc.h" @@ -27,6 +30,10 @@ namespace py = pybind11; using paddle::framework::ir::Graph; using paddle::framework::ir::Node; using paddle::framework::ir::GraphSafeRemoveNodes; +using paddle::framework::ir::HasCircle; +using paddle::framework::ir::GraphNum; +using paddle::framework::ir::TopologySortOperations; +using paddle::framework::ir::BuildOperationAdjList; using paddle::framework::OpDesc; using paddle::framework::ProgramDesc; using paddle::framework::VarDesc; @@ -36,6 +43,12 @@ namespace paddle { namespace pybind { void BindGraph(py::module *m) { m->def("graph_safe_remove_nodes", GraphSafeRemoveNodes); + m->def("has_circle", HasCircle); + m->def("graph_num", GraphNum); + m->def("topology_sort", TopologySortOperations, + return_value_policy::reference); + m->def("build_adjacency_list", BuildOperationAdjList, + return_value_policy::reference); py::class_>( *m, "Graph", "The graph is a Directed Acyclic Single Static Assignment Graph, see " @@ -46,7 +59,6 @@ void BindGraph(py::module *m) { .def("get_float", &Graph::Get) .def("get_double", &Graph::Get) .def("get_string", &Graph::Get) - .def("get_program", &Graph::Get) .def("get_marked_nodes", &Graph::Get>) .def("set", [](Graph &self, const std::string &attr_name, int attr) { return self.Set(attr_name, new int(attr)); }) @@ -63,11 +75,6 @@ void BindGraph(py::module *m) { [](Graph &self, const std::string &attr_name, double attr) { return self.Set(attr_name, new double(attr)); }) - .def("set", - [](Graph &self, const std::string &attr_name, - const ProgramDesc &attr) { - return self.Set(attr_name, new ProgramDesc(attr)); - }) .def("set", [](Graph &self, const std::string &attr_name, const std::unordered_set &attr) { @@ -95,7 +102,8 @@ void BindGraph(py::module *m) { [](Graph &self, Node &node) { return self.RemoveNode(&node); }) .def("retrieve_node", &Graph::RetrieveNode, return_value_policy::reference) - .def("resolve_hazard", &Graph::ResolveHazard); + .def("resolve_hazard", &Graph::ResolveHazard) + .def("origin_program_desc", &Graph::OriginProgram); } void BindNode(py::module *m) { @@ -108,45 +116,45 @@ void BindNode(py::module *m) { .def("is_op", &Node::IsOp) .def("is_var", &Node::IsVar) .def("is_ctrl_var", &Node::IsCtrlVar) - .def("inputs_remove", + .def("clear_inputs", [](Node &self) { self.inputs.clear(); }) + .def("remove_input", [](Node &self, int node_id) { - for (auto it = self.inputs.begin(); it != self.inputs.end(); - it++) { - if ((*it)->id() == node_id) { - self.inputs.erase(it); - } + auto pos = std::find_if( + self.inputs.begin(), self.inputs.end(), + [&node_id](const Node *n) { return n->id() == node_id; }); + if (pos != self.inputs.end()) { + self.inputs.erase(pos); } }) - .def("inputs_remove", + .def("remove_input", [](Node &self, Node &node) { - for (auto it = self.inputs.begin(); it != self.inputs.end(); - it++) { - if (*it == &node) { - self.inputs.erase(it); - } + auto pos = + std::find(self.inputs.begin(), self.inputs.end(), &node); + if (pos != self.inputs.end()) { + self.inputs.erase(pos); } }) - .def("inputs_append", + .def("append_input", [](Node &self, Node &node) { self.inputs.push_back(&node); }) - .def("outputs_remove", + .def("clear_outputs", [](Node &self) { self.outputs.clear(); }) + .def("remove_output", [](Node &self, int node_id) { - for (auto it = self.outputs.begin(); it != self.outputs.end(); - it++) { - if ((*it)->id() == node_id) { - self.outputs.erase(it); - } + auto pos = std::find_if( + self.outputs.begin(), self.outputs.end(), + [&node_id](const Node *n) { return n->id() == node_id; }); + if (pos != self.outputs.end()) { + self.outputs.erase(pos); } }) - .def("outputs_remove", + .def("remove_output", [](Node &self, Node &node) { - for (auto it = self.outputs.begin(); it != self.outputs.end(); - it++) { - if (*it == &node) { - self.outputs.erase(it); - } + auto pos = + std::find(self.outputs.begin(), self.outputs.end(), &node); + if (pos != self.outputs.end()) { + self.outputs.erase(pos); } }) - .def("outputs_append", + .def("append_output", [](Node &self, Node &node) { self.outputs.push_back(&node); }) .def_readwrite("inputs", &Node::inputs) .def_readwrite("outputs", &Node::outputs); diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 351513712cc4297bf7fbe67878aeba162ef66e4d..cf59ff6d3b97a4be5d87f1185acc6173b5d501b2 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -86,6 +86,14 @@ bool IsCompiledWithCUDA() { #endif } +bool IsCompiledWithMKLDNN() { +#ifndef PADDLE_WITH_MKLDNN + return false; +#else + return true; +#endif +} + bool IsCompiledWithBrpc() { #ifndef PADDLE_WITH_DISTRIBUTE return false; @@ -106,6 +114,11 @@ bool IsCompiledWithDIST() { #endif } +template +static inline bool IsSamePlace(const PlaceType1 &p1, const PlaceType2 &p2) { + return paddle::platform::Place(p1) == paddle::platform::Place(p2); +} + PYBIND11_MODULE(core, m) { // Not used, just make sure cpu_info.cc is linked. paddle::platform::CpuTotalPhysicalMemory(); @@ -164,6 +177,23 @@ PYBIND11_MODULE(core, m) { py::return_value_policy::take_ownership) .def("value", [](const imperative::VarBase &self) { return self.var_; }, py::return_value_policy::reference) + .def_property("name", + [](const imperative::VarBase &self) { return self.name_; }, + [](imperative::VarBase &self, const std::string &name) { + self.name_ = name; + }) + .def_property("block", + [](const imperative::VarBase &self) { return self.block_; }, + [](imperative::VarBase &self, framework::BlockDesc *block) { + self.block_ = block; + }, + py::return_value_policy::reference) + .def_property( + "persistable", + [](const imperative::VarBase &self) { return self.persistable_; }, + [](imperative::VarBase &self, const bool persistable) { + self.persistable_ = persistable; + }) .def_property( "desc", [](const imperative::VarBase &self) { return self.var_desc_; }, @@ -180,6 +210,10 @@ PYBIND11_MODULE(core, m) { py::class_(m, "OpBase", R"DOC()DOC") .def(py::init<>()) + .def("register_backward_hooks", + [](imperative::OpBase &self, const py::object &callable) { + self.RegisterBackwardHooks(callable); + }) .def_property( "desc", [](const imperative::OpBase &self) { return self.op_desc_; }, [](imperative::OpBase &self, framework::OpDesc *op_desc) { @@ -188,6 +222,16 @@ PYBIND11_MODULE(core, m) { } }, py::return_value_policy::reference) + .def_property("_trace_id", + [](const imperative::OpBase &self) { + pybind11::gil_scoped_release release; + return self.trace_id_; + }, + [](imperative::OpBase &self, int trace_id) { + pybind11::gil_scoped_release release; + self.trace_id_ = trace_id; + }, + py::return_value_policy::reference) .def_property( "forward_id", [](const imperative::OpBase &self) { return self.forward_id_; }, @@ -373,7 +417,13 @@ PYBIND11_MODULE(core, m) { PADDLE_ENFORCE(CheckLoD(new_lod, vectorize(self.dims()).front()), "the provided lod info is invalid"); self.set_lod(new_lod); - }) + }, + py::arg("lod"), R"DOC( + Set LoD of the LoDTensor. + + Args: + lod (List[List[int]]): the lod to be set. + )DOC") .def("set_recursive_sequence_lengths", [](LoDTensor &self, const std::vector> &recursive_sequence_lengths) { @@ -389,7 +439,17 @@ PYBIND11_MODULE(core, m) { CheckLoD(new_offset_lod, vectorize(self.dims()).front()), "the provided recursive_sequence_lengths info is invalid"); self.set_lod(new_offset_lod); - }) + }, + py::arg("recursive_sequence_lengths"), R"DOC( + Set LoD of the LoDTensor according to recursive sequence length. + + For example, if recursive_sequence_lengths=[[2, 3]], meaning that + there are two sequences with length 2 and 3 respectively, the + corresponding lod would be [[0, 2, 2+3]], i.e, [[0, 2, 5]]. + + Args: + recursive_sequence_lengths (List[List[int]]): sequence lengths. + )DOC") .def("lod", [](LoDTensor &self) -> std::vector> { // output the offset-based lod info @@ -398,7 +458,13 @@ PYBIND11_MODULE(core, m) { new_lod.reserve(lod.size()); std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); return new_lod; - }) + }, + R"DOC( + Return the LoD of the LoDTensor. + + Returns: + out (List[List[int]]): the lod of the LoDTensor. + )DOC") // Set above comments of set_lod. .def("recursive_sequence_lengths", [](LoDTensor &self) -> std::vector> { @@ -408,12 +474,25 @@ PYBIND11_MODULE(core, m) { new_lod.reserve(lod.size()); std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); return new_lod; - }) - .def("has_valid_recursive_sequence_lengths", [](LoDTensor &self) -> bool { - // Check that the lod info is valid and match the outermost - // dimension of the LoDTensor data - return CheckLoD(self.lod(), vectorize(self.dims()).front()); - }); + }, + R"DOC( + Return the sequence length of the LoDTensor corresponding to LoD. + + Returns: + out (List[List[int]): the sequence lengths. + )DOC") + .def("has_valid_recursive_sequence_lengths", + [](LoDTensor &self) -> bool { + // Check that the lod info is valid and match the outermost + // dimension of the LoDTensor data + return CheckLoD(self.lod(), vectorize(self.dims()).front()); + }, + R"DOC( + Check whether the lod of the LoDTensor is valid. + + Returns: + out (bool): whether the lod is valid. + )DOC"); py::class_(m, "SelectedRows") .def("__init__", @@ -549,11 +628,45 @@ All parameter, weight, gradient are variables in Paddle. [](Scope &self, const std::string &name) -> Variable * { return self.Var(name); }, + py::arg("name"), + R"DOC( + Find or create variable named :code:`name` in the current scope. + + If the variable named :code:`name` does not exist in the + current scope, the variable would be created. Otherwise, + return the existing variable. + + Args: + name (str): the variable name. + + Returns: + out (core.Variable): the found or created variable. + )DOC", + py::return_value_policy::reference) + .def("find_var", &Scope::FindVar, py::arg("name"), + R"DOC( + Find variable named :code:`name` in the current scope or + its parent scope. Return None if not found. + + Args: + name (str): the variable name. + + Returns: + out (core.Variable|None): the found variable or None. + )DOC", py::return_value_policy::reference) - .def("find_var", &Scope::FindVar, py::return_value_policy::reference) .def("new_scope", [](Scope &self) -> Scope * { return &self.NewScope(); }, + R"DOC( + Create a new sub-scope of the current scope. + + Returns: + out (core._Scope): the created sub-scope. + )DOC", py::return_value_policy::reference) - .def("drop_kids", &Scope::DropKids); + .def("drop_kids", &Scope::DropKids, + R"DOC( + Delete all sub-scopes of the current scope. + )DOC"); m.def("Scope", []() -> Scope * { @@ -561,6 +674,12 @@ All parameter, weight, gradient are variables in Paddle. ScopePool::Instance().Insert(std::unique_ptr(s)); return s; }, + R"DOC( + Create a new scope. + + Returns: + out (core._Scope): the created scope. + )DOC", py::return_value_policy::reference); //! @note: Be careful! PyBind will return std::string as an unicode, not @@ -657,23 +776,45 @@ All parameter, weight, gradient are variables in Paddle. PADDLE_THROW("Cannot use CUDAPlace in CPU only version"); #endif }) + .def("_equals", &IsSamePlace) + .def("_equals", &IsSamePlace) + .def("_equals", &IsSamePlace) + .def("_equals", + &IsSamePlace) .def("__str__", string::to_string); py::class_(m, "CPUPlace") .def(py::init<>()) + .def("_equals", &IsSamePlace) + .def("_equals", &IsSamePlace) + .def("_equals", &IsSamePlace) + .def("_equals", + &IsSamePlace) .def("__str__", string::to_string); py::class_(m, "CUDAPinnedPlace") .def("__init__", - [](platform::CUDAPinnedPlace &) { + [](platform::CUDAPinnedPlace &self) { #ifndef PADDLE_WITH_CUDA PADDLE_THROW("Cannot use CUDAPinnedPlace in CPU only version"); #endif + new (&self) platform::CUDAPinnedPlace(); }) + .def("_equals", &IsSamePlace) + .def("_equals", + &IsSamePlace) + .def("_equals", + &IsSamePlace) + .def("_equals", + &IsSamePlace) .def("__str__", string::to_string); py::class_(m, "Place") .def(py::init<>()) + .def("_equals", &IsSamePlace) + .def("_equals", &IsSamePlace) + .def("_equals", &IsSamePlace) + .def("_equals", &IsSamePlace) .def("is_gpu_place", [](platform::Place &self) { return platform::is_gpu_place(self); }) .def("gpu_device_id", @@ -746,6 +887,7 @@ All parameter, weight, gradient are variables in Paddle. [](bool init_p2p) { framework::InitDevices(init_p2p); }); m.def("is_compiled_with_cuda", IsCompiledWithCUDA); + m.def("is_compiled_with_mkldnn", IsCompiledWithMKLDNN); m.def("is_compiled_with_brpc", IsCompiledWithBrpc); m.def("is_compiled_with_dist", IsCompiledWithDIST); #ifdef PADDLE_WITH_CUDA @@ -789,11 +931,13 @@ All parameter, weight, gradient are variables in Paddle. self[i].ShareDataWith(t); self[i].set_lod(t.lod()); }) - .def("append", [](LoDTensorArray &self, const LoDTensor &t) { - self.emplace_back(); - self.back().ShareDataWith(t); - self.back().set_lod(t.lod()); - }); + .def("append", + [](LoDTensorArray &self, const LoDTensor &t) { + self.emplace_back(); + self.back().ShareDataWith(t); + self.back().set_lod(t.lod()); + }, + py::arg("tensor"), "Append a LoDensor to LoDTensorArray."); m.def("IsInplace", [](std::string op) -> bool { return operators::IsInplace(op); }); @@ -829,8 +973,7 @@ All parameter, weight, gradient are variables in Paddle. m.def("disable_profiler", platform::DisableProfiler); m.def("is_profiler_enabled", platform::IsProfileEnabled); m.def("reset_profiler", platform::ResetProfiler); - m.def("get_pass", [](const py::bytes &binary_str) { - std::string pass_type(binary_str); + m.def("get_pass", [](const std::string &pass_type) { auto pass = framework::ir::PassRegistry::Instance().Get(pass_type); return std::shared_ptr(std::move(pass)); }); @@ -838,10 +981,9 @@ All parameter, weight, gradient are variables in Paddle. py::class_> pass(m, "Pass"); pass.def(py::init()) .def("has", &ir::Pass::Has) - .def("set", - [](ir::Pass &self, const std::string &attr_name, - const ProgramDesc &attr) { - return self.Set(attr_name, new ProgramDesc(attr)); + .def("set_not_owned", + [](ir::Pass &self, const std::string &attr_name, ProgramDesc &attr) { + self.SetNotOwned(attr_name, &attr); }) .def( "set", @@ -850,7 +992,6 @@ All parameter, weight, gradient are variables in Paddle. }) .def("set", [](ir::Pass &self, const std::string &name, int val) { self.Set(name, new int(val)); }) - .def("get_program", &ir::Pass::Get) .def("type", &ir::Pass::Type) .def("apply", [](ir::Pass &self, std::shared_ptr graph) { std::unique_ptr origin_graph(graph.get()); @@ -875,6 +1016,7 @@ All parameter, weight, gradient are variables in Paddle. [](ir::PassBuilder &self, size_t idx) { self.RemovePass(idx); }); // -- python binds for parallel executor. + py::class_ pe(m, "ParallelExecutor"); py::class_ exec_strategy(pe, "ExecutionStrategy", R"DOC( ExecutionStrategy allows the user to more preciously control how to run @@ -1112,9 +1254,9 @@ All parameter, weight, gradient are variables in Paddle. cannot be updated after being finalized.)DOC"); pe.def(py::init &, - const std::unordered_set &, const ProgramDesc &, - const std::string &, Scope *, std::vector &, - const ExecutionStrategy &, const BuildStrategy &>()) + const std::unordered_set &, const std::string &, + Scope *, std::vector &, const ExecutionStrategy &, + const BuildStrategy &, ir::Graph *>()) // NOTE: even we return a vec* to Python use reference policy. // We still cannot get local_scope from this vector, since the element // of vec will be freed by Python GC. We can only return Scope* diff --git a/paddle/fluid/train/demo/README.md b/paddle/fluid/train/demo/README.md index 191da20669e185d819ec5eed55427461cc0b10e4..bd53ab4b0c023b2591d792b504ab496a42d2835d 100644 --- a/paddle/fluid/train/demo/README.md +++ b/paddle/fluid/train/demo/README.md @@ -9,7 +9,6 @@ PADDLE_LIB=/paddle/lib/dir cmake .. -DFLUID_INSTALL_DIR=$PADDLE_LIB \ -DCMAKE_BUILD_TYPE=Release \ - -DWITH_FLUID_ONLY=ON \ -DWITH_GPU=OFF \ -DWITH_STYLE_CHECK=OFF \ -DWITH_MKL=OFF \ diff --git a/paddle/fluid/train/demo/demo_trainer.cc b/paddle/fluid/train/demo/demo_trainer.cc index a0757b53f37b29de0b3802c345b1ad9db69f16e9..1087f5672459506cc7b824127cd822c0df7ba566 100644 --- a/paddle/fluid/train/demo/demo_trainer.cc +++ b/paddle/fluid/train/demo/demo_trainer.cc @@ -73,7 +73,7 @@ int main() { PADDLE_ENFORCE_NE(loss_name, "", "loss not found"); // init all parameters - executor.Run(*startup_program.get(), &scope, 0); + executor.Run(*startup_program, &scope, 0); // prepare data auto x_var = scope.Var("x"); @@ -101,7 +101,7 @@ int main() { clock_t t1 = clock(); for (int i = 0; i < 10; ++i) { - executor.Run(*train_program.get(), &scope, 0, false, true); + executor.Run(*train_program, &scope, 0, false, true); std::cout << "step: " << i << " loss: " << loss_var->Get().data()[0] << std::endl; diff --git a/paddle/fluid/train/test_train_recognize_digits.cc b/paddle/fluid/train/test_train_recognize_digits.cc index e8731dd51ad698e53b7f10cc781c52134f2d17a8..a7846da8c191ac96e9ad7fb5b3184518e32120b2 100644 --- a/paddle/fluid/train/test_train_recognize_digits.cc +++ b/paddle/fluid/train/test_train_recognize_digits.cc @@ -74,7 +74,7 @@ void Train() { float first_loss = 0.0; float last_loss = 0.0; for (int i = 0; i < 100; ++i) { - executor.Run(*train_program.get(), &scope, 0, false, true); + executor.Run(*train_program, &scope, 0, false, true); if (i == 0) { first_loss = loss_var->Get().data()[0]; } else if (i == 99) { diff --git a/paddle/scripts/README.md b/paddle/scripts/README.md index 6c608fce3cdad38f3109e563be3ffbe2f73e5390..1db262f06d97665ee09b8e1d3485982b6b1b33d6 100644 --- a/paddle/scripts/README.md +++ b/paddle/scripts/README.md @@ -66,12 +66,10 @@ Users can specify the following Docker build arguments with either "ON" or "OFF" | `WITH_AVX` | OFF | Set to "ON" to enable AVX support. | | `WITH_TESTING` | OFF | Build unit tests binaries. | | `WITH_MKL` | ON | Build with [Intel® MKL](https://software.intel.com/en-us/mkl) and [Intel® MKL-DNN](https://github.com/01org/mkl-dnn) support. | -| `WITH_GOLANG` | OFF | Build fault-tolerant parameter server written in go. | | `WITH_PYTHON` | ON | Build with python support. Turn this off if build is only for capi. | | `WITH_STYLE_CHECK` | ON | Check the code style when building. | | `PYTHON_ABI` | "" | Build for different python ABI support, can be cp27-cp27m or cp27-cp27mu | | `RUN_TEST` | OFF | Run unit test immediently after the build. | -| `WITH_DOC` | OFF | Build docs after build binaries. | | `WOBOQ` | OFF | Generate WOBOQ code viewer under `build/woboq_out` | ## Docker Images diff --git a/paddle/scripts/cpplint.py b/paddle/scripts/cpplint.py deleted file mode 100644 index dff4339ea33b72e22104a56183e3302067dc583d..0000000000000000000000000000000000000000 --- a/paddle/scripts/cpplint.py +++ /dev/null @@ -1,6425 +0,0 @@ -#!/usr/bin/env python -# -# Copyright (c) 2009 Google Inc. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following disclaimer -# in the documentation and/or other materials provided with the -# distribution. -# * Neither the name of Google Inc. nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -"""Does google-lint on c++ files. - -The goal of this script is to identify places in the code that *may* -be in non-compliance with google style. It does not attempt to fix -up these problems -- the point is to educate. It does also not -attempt to find all problems, or to ensure that everything it does -find is legitimately a problem. - -In particular, we can get very confused by /* and // inside strings! -We do a small hack, which is to ignore //'s with "'s after them on the -same line, but it is far from perfect (in either direction). - -EDIT(yuyang18): Add #pragma once as include guard. -EDIT(yuyang18): Add NOLINTNEXTLINES_ to suppress multiline lint. -""" - -import codecs -import copy -import getopt -import math # for log -import os -import re -import sre_compile -import string -import sys -import unicodedata - -_USAGE = """ -Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...] - [--counting=total|toplevel|detailed] [--root=subdir] - [--linelength=digits] - [--write-success=success_status_file] - [file] ... - - The style guidelines this tries to follow are those in - http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml - - Every problem is given a confidence score from 1-5, with 5 meaning we are - certain of the problem, and 1 meaning it could be a legitimate construct. - This will miss some errors, and is not a substitute for a code review. - - To suppress false-positive errors of a certain category, add a - 'NOLINT(category)' comment to the line. NOLINT or NOLINT(*) - suppresses errors of all categories on that line. - - The files passed in will be linted; at least one file must be provided. - Default linted extensions are .cc, .cpp, .cu, .cuh and .h. Change the - extensions with the --extensions flag. - - Flags: - - output=vs7 - By default, the output is formatted to ease emacs parsing. Visual Studio - compatible output (vs7) may also be used. Other formats are unsupported. - - verbose=# - Specify a number 0-5 to restrict errors to certain verbosity levels. - - filter=-x,+y,... - Specify a comma-separated list of category-filters to apply: only - error messages whose category names pass the filters will be printed. - (Category names are printed with the message and look like - "[whitespace/indent]".) Filters are evaluated left to right. - "-FOO" and "FOO" means "do not print categories that start with FOO". - "+FOO" means "do print categories that start with FOO". - - Examples: --filter=-whitespace,+whitespace/braces - --filter=whitespace,runtime/printf,+runtime/printf_format - --filter=-,+build/include_what_you_use - - To see a list of all the categories used in cpplint, pass no arg: - --filter= - - counting=total|toplevel|detailed - The total number of errors found is always printed. If - 'toplevel' is provided, then the count of errors in each of - the top-level categories like 'build' and 'whitespace' will - also be printed. If 'detailed' is provided, then a count - is provided for each category like 'build/class'. - - root=subdir - The root directory used for deriving header guard CPP variable. - By default, the header guard CPP variable is calculated as the relative - path to the directory that contains .git, .hg, or .svn. When this flag - is specified, the relative path is calculated from the specified - directory. If the specified directory does not exist, this flag is - ignored. - - Examples: - Assuming that src/.git exists, the header guard CPP variables for - src/chrome/browser/ui/browser.h are: - - No flag => CHROME_BROWSER_UI_BROWSER_H_ - --root=chrome => BROWSER_UI_BROWSER_H_ - --root=chrome/browser => UI_BROWSER_H_ - - linelength=digits - This is the allowed line length for the project. The default value is - 80 characters. - - Examples: - --linelength=120 - - extensions=extension,extension,... - The allowed file extensions that cpplint will check - - Examples: - --extensions=hpp,cpp - - cpplint.py supports per-directory configurations specified in CPPLINT.cfg - files. CPPLINT.cfg file can contain a number of key=value pairs. - Currently the following options are supported: - - set noparent - filter=+filter1,-filter2,... - exclude_files=regex - linelength=80 - - "set noparent" option prevents cpplint from traversing directory tree - upwards looking for more .cfg files in parent directories. This option - is usually placed in the top-level project directory. - - The "filter" option is similar in function to --filter flag. It specifies - message filters in addition to the |_DEFAULT_FILTERS| and those specified - through --filter command-line flag. - - "exclude_files" allows to specify a regular expression to be matched against - a file name. If the expression matches, the file is skipped and not run - through liner. - - "linelength" allows to specify the allowed line length for the project. - - CPPLINT.cfg has an effect on files in the same directory and all - sub-directories, unless overridden by a nested configuration file. - - Example file: - filter=-build/include_order,+build/include_alpha - exclude_files=.*\.cc - - The above example disables build/include_order warning and enables - build/include_alpha as well as excludes all .cc from being - processed by linter, in the current directory (where the .cfg - file is located) and all sub-directories. -""" - -# We categorize each error message we print. Here are the categories. -# We want an explicit list so we can list them all in cpplint --filter=. -# If you add a new error message with a new category, add it to the list -# here! cpplint_unittest.py should tell you if you forget to do this. -_ERROR_CATEGORIES = [ - 'build/class', - 'build/c++11', - 'build/deprecated', - 'build/endif_comment', - 'build/explicit_make_pair', - 'build/forward_decl', - 'build/header_guard', - 'build/include', - 'build/include_alpha', - 'build/include_order', - 'build/include_what_you_use', - 'build/namespaces', - 'build/printf_format', - 'build/storage_class', - 'legal/copyright', - 'readability/alt_tokens', - 'readability/braces', - 'readability/casting', - 'readability/check', - 'readability/constructors', - 'readability/fn_size', - 'readability/function', - 'readability/inheritance', - 'readability/multiline_comment', - 'readability/multiline_string', - 'readability/namespace', - 'readability/nolint', - 'readability/nul', - 'readability/strings', - 'readability/todo', - 'readability/utf8', - 'runtime/arrays', - 'runtime/casting', - 'runtime/explicit', - 'runtime/int', - 'runtime/init', - 'runtime/invalid_increment', - 'runtime/member_string_references', - 'runtime/memset', - 'runtime/indentation_namespace', - 'runtime/operator', - 'runtime/printf', - 'runtime/printf_format', - 'runtime/references', - 'runtime/string', - 'runtime/threadsafe_fn', - 'runtime/vlog', - 'whitespace/blank_line', - 'whitespace/braces', - 'whitespace/comma', - 'whitespace/comments', - 'whitespace/empty_conditional_body', - 'whitespace/empty_loop_body', - 'whitespace/end_of_line', - 'whitespace/ending_newline', - 'whitespace/forcolon', - 'whitespace/indent', - 'whitespace/line_length', - 'whitespace/newline', - 'whitespace/operators', - 'whitespace/parens', - 'whitespace/semicolon', - 'whitespace/tab', - 'whitespace/todo', -] - -# These error categories are no longer enforced by cpplint, but for backwards- -# compatibility they may still appear in NOLINT comments. -_LEGACY_ERROR_CATEGORIES = ['readability/streams', ] - -# The default state of the category filter. This is overridden by the --filter= -# flag. By default all errors are on, so only add here categories that should be -# off by default (i.e., categories that must be enabled by the --filter= flags). -# All entries here should start with a '-' or '+', as in the --filter= flag. -_DEFAULT_FILTERS = ['-build/include_alpha'] - -# We used to check for high-bit characters, but after much discussion we -# decided those were OK, as long as they were in UTF-8 and didn't represent -# hard-coded international strings, which belong in a separate i18n file. - -# C++ headers -_CPP_HEADERS = frozenset([ - # Legacy - 'algobase.h', - 'algo.h', - 'alloc.h', - 'builtinbuf.h', - 'bvector.h', - 'complex.h', - 'defalloc.h', - 'deque.h', - 'editbuf.h', - 'fstream.h', - 'function.h', - 'hash_map', - 'hash_map.h', - 'hash_set', - 'hash_set.h', - 'hashtable.h', - 'heap.h', - 'indstream.h', - 'iomanip.h', - 'iostream.h', - 'istream.h', - 'iterator.h', - 'list.h', - 'map.h', - 'multimap.h', - 'multiset.h', - 'ostream.h', - 'pair.h', - 'parsestream.h', - 'pfstream.h', - 'procbuf.h', - 'pthread_alloc', - 'pthread_alloc.h', - 'rope', - 'rope.h', - 'ropeimpl.h', - 'set.h', - 'slist', - 'slist.h', - 'stack.h', - 'stdiostream.h', - 'stl_alloc.h', - 'stl_relops.h', - 'streambuf.h', - 'stream.h', - 'strfile.h', - 'strstream.h', - 'tempbuf.h', - 'tree.h', - 'type_traits.h', - 'vector.h', - # 17.6.1.2 C++ library headers - 'algorithm', - 'array', - 'atomic', - 'bitset', - 'chrono', - 'codecvt', - 'complex', - 'condition_variable', - 'deque', - 'exception', - 'forward_list', - 'fstream', - 'functional', - 'future', - 'initializer_list', - 'iomanip', - 'ios', - 'iosfwd', - 'iostream', - 'istream', - 'iterator', - 'limits', - 'list', - 'locale', - 'map', - 'memory', - 'mutex', - 'new', - 'numeric', - 'ostream', - 'queue', - 'random', - 'ratio', - 'regex', - 'set', - 'sstream', - 'stack', - 'stdexcept', - 'streambuf', - 'string', - 'strstream', - 'system_error', - 'thread', - 'tuple', - 'typeindex', - 'typeinfo', - 'type_traits', - 'unordered_map', - 'unordered_set', - 'utility', - 'valarray', - 'vector', - # 17.6.1.2 C++ headers for C library facilities - 'cassert', - 'ccomplex', - 'cctype', - 'cerrno', - 'cfenv', - 'cfloat', - 'cinttypes', - 'ciso646', - 'climits', - 'clocale', - 'cmath', - 'csetjmp', - 'csignal', - 'cstdalign', - 'cstdarg', - 'cstdbool', - 'cstddef', - 'cstdint', - 'cstdio', - 'cstdlib', - 'cstring', - 'ctgmath', - 'ctime', - 'cuchar', - 'cwchar', - 'cwctype', -]) - -# These headers are excluded from [build/include] and [build/include_order] -# checks: -# - Anything not following google file name conventions (containing an -# uppercase character, such as Python.h or nsStringAPI.h, for example). -# - Lua headers. -_THIRD_PARTY_HEADERS_PATTERN = re.compile( - r'^(?:[^/]*[A-Z][^/]*\.h|lua\.h|lauxlib\.h|lualib\.h)$') - -# Assertion macros. These are defined in base/logging.h and -# testing/base/gunit.h. Note that the _M versions need to come first -# for substring matching to work. -_CHECK_MACROS = [ - 'DCHECK', - 'CHECK', - 'EXPECT_TRUE_M', - 'EXPECT_TRUE', - 'ASSERT_TRUE_M', - 'ASSERT_TRUE', - 'EXPECT_FALSE_M', - 'EXPECT_FALSE', - 'ASSERT_FALSE_M', - 'ASSERT_FALSE', -] - -# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE -_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS]) - -for op, replacement in [('==', 'EQ'), ('!=', 'NE'), ('>=', 'GE'), ('>', 'GT'), - ('<=', 'LE'), ('<', 'LT')]: - _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement - _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement - _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement - _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement - _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement - _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement - -for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'), ('>=', 'LT'), - ('>', 'LE'), ('<=', 'GT'), ('<', 'GE')]: - _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement - _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement - _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement - _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement - -# Alternative tokens and their replacements. For full list, see section 2.5 -# Alternative tokens [lex.digraph] in the C++ standard. -# -# Digraphs (such as '%:') are not included here since it's a mess to -# match those on a word boundary. -_ALT_TOKEN_REPLACEMENT = { - 'and': '&&', - 'bitor': '|', - 'or': '||', - 'xor': '^', - 'compl': '~', - 'bitand': '&', - 'and_eq': '&=', - 'or_eq': '|=', - 'xor_eq': '^=', - 'not': '!', - 'not_eq': '!=' -} - -# Compile regular expression that matches all the above keywords. The "[ =()]" -# bit is meant to avoid matching these keywords outside of boolean expressions. -# -# False positives include C-style multi-line comments and multi-line strings -# but those have always been troublesome for cpplint. -_ALT_TOKEN_REPLACEMENT_PATTERN = re.compile(r'[ =()](' + ('|'.join( - _ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]|$)') - -# These constants define types of headers for use with -# _IncludeState.CheckNextIncludeOrder(). -_C_SYS_HEADER = 1 -_CPP_SYS_HEADER = 2 -_LIKELY_MY_HEADER = 3 -_POSSIBLE_MY_HEADER = 4 -_OTHER_HEADER = 5 - -# These constants define the current inline assembly state -_NO_ASM = 0 # Outside of inline assembly block -_INSIDE_ASM = 1 # Inside inline assembly block -_END_ASM = 2 # Last line of inline assembly block -_BLOCK_ASM = 3 # The whole block is an inline assembly block - -# Match start of assembly blocks -_MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)' - r'(?:\s+(volatile|__volatile__))?' - r'\s*[{(]') - -_regexp_compile_cache = {} - -# {str, set(int)}: a map from error categories to sets of linenumbers -# on which those errors are expected and should be suppressed. -_error_suppressions = {} - -# The root directory used for deriving header guard CPP variable. -# This is set by --root flag. -_root = None - -# The allowed line length of files. -# This is set by --linelength flag. -_line_length = 80 - -# The allowed extensions for file names -# This is set by --extensions flag. -_valid_extensions = set(['cc', 'h', 'cpp', 'cu', 'cuh']) - -_write_success = None - - -def ParseNolintSuppressions(filename, raw_line, linenum, error): - """Updates the global list of error-suppressions. - - Parses any NOLINT comments on the current line, updating the global - error_suppressions store. Reports an error if the NOLINT comment - was malformed. - - Args: - filename: str, the name of the input file. - raw_line: str, the line of input text, with comments. - linenum: int, the number of the current line. - error: function, an error handler. - """ - matched = Search(r'\bNOLINT(NEXTLINE(S_\d+)?)?\b(\([^)]+\))?', raw_line) - if matched: - if matched.group(1): - lines = matched.group(2) - if lines: - lines = int(lines[2:]) - suppressed_line = [linenum + i for i in xrange(lines)] - else: - suppressed_line = linenum + 1 - else: - suppressed_line = linenum - category = matched.group(3) - if category in (None, '(*)'): # => "suppress all" - if isinstance(suppressed_line, int): - _error_suppressions.setdefault(None, set()).add(suppressed_line) - else: - for _line in suppressed_line: - _error_suppressions.setdefault(None, set()).add(_line) - else: - if category.startswith('(') and category.endswith(')'): - category = category[1:-1] - if category in _ERROR_CATEGORIES: - if isinstance(suppressed_line, int): - _error_suppressions.setdefault( - category, set()).add(suppressed_line) - else: - for _line in suppressed_line: - _error_suppressions.setdefault(category, - set()).add(_line) - elif category not in _LEGACY_ERROR_CATEGORIES: - error(filename, linenum, 'readability/nolint', 5, - 'Unknown NOLINT error category: %s' % category) - - -def ResetNolintSuppressions(): - """Resets the set of NOLINT suppressions to empty.""" - _error_suppressions.clear() - - -def IsErrorSuppressedByNolint(category, linenum): - """Returns true if the specified error category is suppressed on this line. - - Consults the global error_suppressions map populated by - ParseNolintSuppressions/ResetNolintSuppressions. - - Args: - category: str, the category of the error. - linenum: int, the current line number. - Returns: - bool, True iff the error should be suppressed due to a NOLINT comment. - """ - return (linenum in _error_suppressions.get(category, set()) or - linenum in _error_suppressions.get(None, set())) - - -def Match(pattern, s): - """Matches the string with the pattern, caching the compiled regexp.""" - # The regexp compilation caching is inlined in both Match and Search for - # performance reasons; factoring it out into a separate function turns out - # to be noticeably expensive. - if pattern not in _regexp_compile_cache: - _regexp_compile_cache[pattern] = sre_compile.compile(pattern) - return _regexp_compile_cache[pattern].match(s) - - -def ReplaceAll(pattern, rep, s): - """Replaces instances of pattern in a string with a replacement. - - The compiled regex is kept in a cache shared by Match and Search. - - Args: - pattern: regex pattern - rep: replacement text - s: search string - - Returns: - string with replacements made (or original string if no replacements) - """ - if pattern not in _regexp_compile_cache: - _regexp_compile_cache[pattern] = sre_compile.compile(pattern) - return _regexp_compile_cache[pattern].sub(rep, s) - - -def Search(pattern, s): - """Searches the string for the pattern, caching the compiled regexp.""" - if pattern not in _regexp_compile_cache: - _regexp_compile_cache[pattern] = sre_compile.compile(pattern) - return _regexp_compile_cache[pattern].search(s) - - -class _IncludeState(object): - """Tracks line numbers for includes, and the order in which includes appear. - - include_list contains list of lists of (header, line number) pairs. - It's a lists of lists rather than just one flat list to make it - easier to update across preprocessor boundaries. - - Call CheckNextIncludeOrder() once for each header in the file, passing - in the type constants defined above. Calls in an illegal order will - raise an _IncludeError with an appropriate error message. - - """ - # self._section will move monotonically through this set. If it ever - # needs to move backwards, CheckNextIncludeOrder will raise an error. - _INITIAL_SECTION = 0 - _MY_H_SECTION = 1 - _C_SECTION = 2 - _CPP_SECTION = 3 - _OTHER_H_SECTION = 4 - - _TYPE_NAMES = { - _C_SYS_HEADER: 'C system header', - _CPP_SYS_HEADER: 'C++ system header', - _LIKELY_MY_HEADER: 'header this file implements', - _POSSIBLE_MY_HEADER: 'header this file may implement', - _OTHER_HEADER: 'other header', - } - _SECTION_NAMES = { - _INITIAL_SECTION: "... nothing. (This can't be an error.)", - _MY_H_SECTION: 'a header this file implements', - _C_SECTION: 'C system header', - _CPP_SECTION: 'C++ system header', - _OTHER_H_SECTION: 'other header', - } - - def __init__(self): - self.include_list = [[]] - self.ResetSection('') - - def FindHeader(self, header): - """Check if a header has already been included. - - Args: - header: header to check. - Returns: - Line number of previous occurrence, or -1 if the header has not - been seen before. - """ - for section_list in self.include_list: - for f in section_list: - if f[0] == header: - return f[1] - return -1 - - def ResetSection(self, directive): - """Reset section checking for preprocessor directive. - - Args: - directive: preprocessor directive (e.g. "if", "else"). - """ - # The name of the current section. - self._section = self._INITIAL_SECTION - # The path of last found header. - self._last_header = '' - - # Update list of includes. Note that we never pop from the - # include list. - if directive in ('if', 'ifdef', 'ifndef'): - self.include_list.append([]) - elif directive in ('else', 'elif'): - self.include_list[-1] = [] - - def SetLastHeader(self, header_path): - self._last_header = header_path - - def CanonicalizeAlphabeticalOrder(self, header_path): - """Returns a path canonicalized for alphabetical comparison. - - - replaces "-" with "_" so they both cmp the same. - - removes '-inl' since we don't require them to be after the main header. - - lowercase everything, just in case. - - Args: - header_path: Path to be canonicalized. - - Returns: - Canonicalized path. - """ - return header_path.replace('-inl.h', '.h').replace('-', '_').lower() - - def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path): - """Check if a header is in alphabetical order with the previous header. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - header_path: Canonicalized header to be checked. - - Returns: - Returns true if the header is in alphabetical order. - """ - # If previous section is different from current section, _last_header will - # be reset to empty string, so it's always less than current header. - # - # If previous line was a blank line, assume that the headers are - # intentionally sorted the way they are. - if (self._last_header > header_path and - Match(r'^\s*#\s*include\b', clean_lines.elided[linenum - 1])): - return False - return True - - def CheckNextIncludeOrder(self, header_type): - """Returns a non-empty error message if the next header is out of order. - - This function also updates the internal state to be ready to check - the next include. - - Args: - header_type: One of the _XXX_HEADER constants defined above. - - Returns: - The empty string if the header is in the right order, or an - error message describing what's wrong. - - """ - error_message = ('Found %s after %s' % ( - self._TYPE_NAMES[header_type], self._SECTION_NAMES[self._section])) - - last_section = self._section - - if header_type == _C_SYS_HEADER: - if self._section <= self._C_SECTION: - self._section = self._C_SECTION - else: - self._last_header = '' - return error_message - elif header_type == _CPP_SYS_HEADER: - if self._section <= self._CPP_SECTION: - self._section = self._CPP_SECTION - else: - self._last_header = '' - return error_message - elif header_type == _LIKELY_MY_HEADER: - if self._section <= self._MY_H_SECTION: - self._section = self._MY_H_SECTION - else: - self._section = self._OTHER_H_SECTION - elif header_type == _POSSIBLE_MY_HEADER: - if self._section <= self._MY_H_SECTION: - self._section = self._MY_H_SECTION - else: - # This will always be the fallback because we're not sure - # enough that the header is associated with this file. - self._section = self._OTHER_H_SECTION - else: - assert header_type == _OTHER_HEADER - self._section = self._OTHER_H_SECTION - - if last_section != self._section: - self._last_header = '' - - return '' - - -class _CppLintState(object): - """Maintains module-wide state..""" - - def __init__(self): - self.verbose_level = 1 # global setting. - self.error_count = 0 # global count of reported errors - # filters to apply when emitting error messages - self.filters = _DEFAULT_FILTERS[:] - # backup of filter list. Used to restore the state after each file. - self._filters_backup = self.filters[:] - self.counting = 'total' # In what way are we counting errors? - self.errors_by_category = {} # string to int dict storing error counts - - # output format: - # "emacs" - format that emacs can parse (default) - # "vs7" - format that Microsoft Visual Studio 7 can parse - self.output_format = 'emacs' - - def SetOutputFormat(self, output_format): - """Sets the output format for errors.""" - self.output_format = output_format - - def SetVerboseLevel(self, level): - """Sets the module's verbosity, and returns the previous setting.""" - last_verbose_level = self.verbose_level - self.verbose_level = level - return last_verbose_level - - def SetCountingStyle(self, counting_style): - """Sets the module's counting options.""" - self.counting = counting_style - - def SetFilters(self, filters): - """Sets the error-message filters. - - These filters are applied when deciding whether to emit a given - error message. - - Args: - filters: A string of comma-separated filters (eg "+whitespace/indent"). - Each filter should start with + or -; else we die. - - Raises: - ValueError: The comma-separated filters did not all start with '+' or '-'. - E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter" - """ - # Default filters always have less priority than the flag ones. - self.filters = _DEFAULT_FILTERS[:] - self.AddFilters(filters) - - def AddFilters(self, filters): - """ Adds more filters to the existing list of error-message filters. """ - for filt in filters.split(','): - clean_filt = filt.strip() - if clean_filt: - self.filters.append(clean_filt) - for filt in self.filters: - if not (filt.startswith('+') or filt.startswith('-')): - raise ValueError( - 'Every filter in --filters must start with + or -' - ' (%s does not)' % filt) - - def BackupFilters(self): - """ Saves the current filter list to backup storage.""" - self._filters_backup = self.filters[:] - - def RestoreFilters(self): - """ Restores filters previously backed up.""" - self.filters = self._filters_backup[:] - - def ResetErrorCounts(self): - """Sets the module's error statistic back to zero.""" - self.error_count = 0 - self.errors_by_category = {} - - def IncrementErrorCount(self, category): - """Bumps the module's error statistic.""" - self.error_count += 1 - if self.counting in ('toplevel', 'detailed'): - if self.counting != 'detailed': - category = category.split('/')[0] - if category not in self.errors_by_category: - self.errors_by_category[category] = 0 - self.errors_by_category[category] += 1 - - def PrintErrorCounts(self): - """Print a summary of errors by category, and the total.""" - for category, count in self.errors_by_category.iteritems(): - sys.stdout.write('Category \'%s\' errors found: %d\n' % - (category, count)) - sys.stdout.write('Total errors found: %d\n' % self.error_count) - - -_cpplint_state = _CppLintState() - - -def _OutputFormat(): - """Gets the module's output format.""" - return _cpplint_state.output_format - - -def _SetOutputFormat(output_format): - """Sets the module's output format.""" - _cpplint_state.SetOutputFormat(output_format) - - -def _VerboseLevel(): - """Returns the module's verbosity setting.""" - return _cpplint_state.verbose_level - - -def _SetVerboseLevel(level): - """Sets the module's verbosity, and returns the previous setting.""" - return _cpplint_state.SetVerboseLevel(level) - - -def _SetCountingStyle(level): - """Sets the module's counting options.""" - _cpplint_state.SetCountingStyle(level) - - -def _Filters(): - """Returns the module's list of output filters, as a list.""" - return _cpplint_state.filters - - -def _SetFilters(filters): - """Sets the module's error-message filters. - - These filters are applied when deciding whether to emit a given - error message. - - Args: - filters: A string of comma-separated filters (eg "whitespace/indent"). - Each filter should start with + or -; else we die. - """ - _cpplint_state.SetFilters(filters) - - -def _AddFilters(filters): - """Adds more filter overrides. - - Unlike _SetFilters, this function does not reset the current list of filters - available. - - Args: - filters: A string of comma-separated filters (eg "whitespace/indent"). - Each filter should start with + or -; else we die. - """ - _cpplint_state.AddFilters(filters) - - -def _BackupFilters(): - """ Saves the current filter list to backup storage.""" - _cpplint_state.BackupFilters() - - -def _RestoreFilters(): - """ Restores filters previously backed up.""" - _cpplint_state.RestoreFilters() - - -class _FunctionState(object): - """Tracks current function name and the number of lines in its body.""" - - _NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc. - _TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER. - - def __init__(self): - self.in_a_function = False - self.lines_in_function = 0 - self.current_function = '' - - def Begin(self, function_name): - """Start analyzing function body. - - Args: - function_name: The name of the function being tracked. - """ - self.in_a_function = True - self.lines_in_function = 0 - self.current_function = function_name - - def Count(self): - """Count line in current function body.""" - if self.in_a_function: - self.lines_in_function += 1 - - def Check(self, error, filename, linenum): - """Report if too many lines in function body. - - Args: - error: The function to call with any errors found. - filename: The name of the current file. - linenum: The number of the line to check. - """ - if Match(r'T(EST|est)', self.current_function): - base_trigger = self._TEST_TRIGGER - else: - base_trigger = self._NORMAL_TRIGGER - trigger = base_trigger * 2**_VerboseLevel() - - if self.lines_in_function > trigger: - error_level = int( - math.log(self.lines_in_function / base_trigger, 2)) - # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ... - if error_level > 5: - error_level = 5 - error(filename, linenum, 'readability/fn_size', error_level, - 'Small and focused functions are preferred:' - ' %s has %d non-comment lines' - ' (error triggered by exceeding %d lines).' % ( - self.current_function, self.lines_in_function, trigger)) - - def End(self): - """Stop analyzing function body.""" - self.in_a_function = False - - -class _IncludeError(Exception): - """Indicates a problem with the include order in a file.""" - pass - - -class FileInfo(object): - """Provides utility functions for filenames. - - FileInfo provides easy access to the components of a file's path - relative to the project root. - """ - - def __init__(self, filename): - self._filename = filename - - def FullName(self): - """Make Windows paths like Unix.""" - return os.path.abspath(self._filename).replace('\\', '/') - - def RepositoryName(self): - """FullName after removing the local path to the repository. - - If we have a real absolute path name here we can try to do something smart: - detecting the root of the checkout and truncating /path/to/checkout from - the name so that we get header guards that don't include things like - "C:\Documents and Settings\..." or "/home/username/..." in them and thus - people on different computers who have checked the source out to different - locations won't see bogus errors. - """ - fullname = self.FullName() - - if os.path.exists(fullname): - project_dir = os.path.dirname(fullname) - - if os.path.exists(os.path.join(project_dir, ".svn")): - # If there's a .svn file in the current directory, we recursively look - # up the directory tree for the top of the SVN checkout - root_dir = project_dir - one_up_dir = os.path.dirname(root_dir) - while os.path.exists(os.path.join(one_up_dir, ".svn")): - root_dir = os.path.dirname(root_dir) - one_up_dir = os.path.dirname(one_up_dir) - - prefix = os.path.commonprefix([root_dir, project_dir]) - return fullname[len(prefix) + 1:] - - # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by - # searching up from the current path. - root_dir = os.path.dirname(fullname) - while (root_dir != os.path.dirname(root_dir) and - not os.path.exists(os.path.join(root_dir, ".git")) and - not os.path.exists(os.path.join(root_dir, ".hg")) and - not os.path.exists(os.path.join(root_dir, ".svn"))): - root_dir = os.path.dirname(root_dir) - - if (os.path.exists(os.path.join(root_dir, ".git")) or - os.path.exists(os.path.join(root_dir, ".hg")) or - os.path.exists(os.path.join(root_dir, ".svn"))): - prefix = os.path.commonprefix([root_dir, project_dir]) - return fullname[len(prefix) + 1:] - - # Don't know what to do; header guard warnings may be wrong... - return fullname - - def Split(self): - """Splits the file into the directory, basename, and extension. - - For 'chrome/browser/browser.cc', Split() would - return ('chrome/browser', 'browser', '.cc') - - Returns: - A tuple of (directory, basename, extension). - """ - - googlename = self.RepositoryName() - project, rest = os.path.split(googlename) - return (project, ) + os.path.splitext(rest) - - def BaseName(self): - """File base name - text after the final slash, before the final period.""" - return self.Split()[1] - - def Extension(self): - """File extension - text following the final period.""" - return self.Split()[2] - - def NoExtension(self): - """File has no source file extension.""" - return '/'.join(self.Split()[0:2]) - - def IsSource(self): - """File has a source file extension.""" - return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx') - - -def _ShouldPrintError(category, confidence, linenum): - """If confidence >= verbose, category passes filter and is not suppressed.""" - - # There are three ways we might decide not to print an error message: - # a "NOLINT(category)" comment appears in the source, - # the verbosity level isn't high enough, or the filters filter it out. - if IsErrorSuppressedByNolint(category, linenum): - return False - - if confidence < _cpplint_state.verbose_level: - return False - - is_filtered = False - for one_filter in _Filters(): - if one_filter.startswith('-'): - if category.startswith(one_filter[1:]): - is_filtered = True - elif one_filter.startswith('+'): - if category.startswith(one_filter[1:]): - is_filtered = False - else: - assert False # should have been checked for in SetFilter. - if is_filtered: - return False - - return True - - -def Error(filename, linenum, category, confidence, message): - """Logs the fact we've found a lint error. - - We log where the error was found, and also our confidence in the error, - that is, how certain we are this is a legitimate style regression, and - not a misidentification or a use that's sometimes justified. - - False positives can be suppressed by the use of - "cpplint(category)" comments on the offending line. These are - parsed into _error_suppressions. - - Args: - filename: The name of the file containing the error. - linenum: The number of the line containing the error. - category: A string used to describe the "category" this bug - falls under: "whitespace", say, or "runtime". Categories - may have a hierarchy separated by slashes: "whitespace/indent". - confidence: A number from 1-5 representing a confidence score for - the error, with 5 meaning that we are certain of the problem, - and 1 meaning that it could be a legitimate construct. - message: The error message. - """ - if _ShouldPrintError(category, confidence, linenum): - _cpplint_state.IncrementErrorCount(category) - if _cpplint_state.output_format == 'vs7': - sys.stderr.write('%s(%s): %s [%s] [%d]\n' % - (filename, linenum, message, category, confidence)) - elif _cpplint_state.output_format == 'eclipse': - sys.stderr.write('%s:%s: warning: %s [%s] [%d]\n' % - (filename, linenum, message, category, confidence)) - else: - sys.stderr.write('%s:%s: %s [%s] [%d]\n' % - (filename, linenum, message, category, confidence)) - - -# Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard. -_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile( - r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)') -# Match a single C style comment on the same line. -_RE_PATTERN_C_COMMENTS = r'/\*(?:[^*]|\*(?!/))*\*/' -# Matches multi-line C style comments. -# This RE is a little bit more complicated than one might expect, because we -# have to take care of space removals tools so we can handle comments inside -# statements better. -# The current rule is: We only clear spaces from both sides when we're at the -# end of the line. Otherwise, we try to remove spaces from the right side, -# if this doesn't work we try on left side but only if there's a non-character -# on the right. -_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile( - r'(\s*' + _RE_PATTERN_C_COMMENTS + r'\s*$|' + _RE_PATTERN_C_COMMENTS + - r'\s+|' + r'\s+' + _RE_PATTERN_C_COMMENTS + r'(?=\W)|' + - _RE_PATTERN_C_COMMENTS + r')') - - -def IsCppString(line): - """Does line terminate so, that the next symbol is in string constant. - - This function does not consider single-line nor multi-line comments. - - Args: - line: is a partial line of code starting from the 0..n. - - Returns: - True, if next character appended to 'line' is inside a - string constant. - """ - - line = line.replace(r'\\', 'XX') # after this, \\" does not match to \" - return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1 - - -def CleanseRawStrings(raw_lines): - """Removes C++11 raw strings from lines. - - Before: - static const char kData[] = R"( - multi-line string - )"; - - After: - static const char kData[] = "" - (replaced by blank line) - ""; - - Args: - raw_lines: list of raw lines. - - Returns: - list of lines with C++11 raw strings replaced by empty strings. - """ - - delimiter = None - lines_without_raw_strings = [] - for line in raw_lines: - if delimiter: - # Inside a raw string, look for the end - end = line.find(delimiter) - if end >= 0: - # Found the end of the string, match leading space for this - # line and resume copying the original lines, and also insert - # a "" on the last line. - leading_space = Match(r'^(\s*)\S', line) - line = leading_space.group(1) + '""' + line[end + len( - delimiter):] - delimiter = None - else: - # Haven't found the end yet, append a blank line. - line = '""' - - # Look for beginning of a raw string, and replace them with - # empty strings. This is done in a loop to handle multiple raw - # strings on the same line. - while delimiter is None: - # Look for beginning of a raw string. - # See 2.14.15 [lex.string] for syntax. - matched = Match(r'^(.*)\b(?:R|u8R|uR|UR|LR)"([^\s\\()]*)\((.*)$', - line) - if matched: - delimiter = ')' + matched.group(2) + '"' - - end = matched.group(3).find(delimiter) - if end >= 0: - # Raw string ended on same line - line = (matched.group(1) + '""' + - matched.group(3)[end + len(delimiter):]) - delimiter = None - else: - # Start of a multi-line raw string - line = matched.group(1) + '""' - else: - break - - lines_without_raw_strings.append(line) - - # TODO(unknown): if delimiter is not None here, we might want to - # emit a warning for unterminated string. - return lines_without_raw_strings - - -def FindNextMultiLineCommentStart(lines, lineix): - """Find the beginning marker for a multiline comment.""" - while lineix < len(lines): - if lines[lineix].strip().startswith('/*'): - # Only return this marker if the comment goes beyond this line - if lines[lineix].strip().find('*/', 2) < 0: - return lineix - lineix += 1 - return len(lines) - - -def FindNextMultiLineCommentEnd(lines, lineix): - """We are inside a comment, find the end marker.""" - while lineix < len(lines): - if lines[lineix].strip().endswith('*/'): - return lineix - lineix += 1 - return len(lines) - - -def RemoveMultiLineCommentsFromRange(lines, begin, end): - """Clears a range of lines for multi-line comments.""" - # Having // dummy comments makes the lines non-empty, so we will not get - # unnecessary blank line warnings later in the code. - for i in range(begin, end): - lines[i] = '/**/' - - -def RemoveMultiLineComments(filename, lines, error): - """Removes multiline (c-style) comments from lines.""" - lineix = 0 - while lineix < len(lines): - lineix_begin = FindNextMultiLineCommentStart(lines, lineix) - if lineix_begin >= len(lines): - return - lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin) - if lineix_end >= len(lines): - error(filename, lineix_begin + 1, 'readability/multiline_comment', - 5, 'Could not find end of multi-line comment') - return - RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1) - lineix = lineix_end + 1 - - -def CleanseComments(line): - """Removes //-comments and single-line C-style /* */ comments. - - Args: - line: A line of C++ source. - - Returns: - The line with single-line comments removed. - """ - commentpos = line.find('//') - if commentpos != -1 and not IsCppString(line[:commentpos]): - line = line[:commentpos].rstrip() - # get rid of /* ... */ - return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line) - - -class CleansedLines(object): - """Holds 4 copies of all lines with different preprocessing applied to them. - - 1) elided member contains lines without strings and comments. - 2) lines member contains lines without comments. - 3) raw_lines member contains all the lines without processing. - 4) lines_without_raw_strings member is same as raw_lines, but with C++11 raw - strings removed. - All these members are of , and of the same length. - """ - - def __init__(self, lines): - self.elided = [] - self.lines = [] - self.raw_lines = lines - self.num_lines = len(lines) - self.lines_without_raw_strings = CleanseRawStrings(lines) - for linenum in range(len(self.lines_without_raw_strings)): - self.lines.append( - CleanseComments(self.lines_without_raw_strings[linenum])) - elided = self._CollapseStrings(self.lines_without_raw_strings[ - linenum]) - self.elided.append(CleanseComments(elided)) - - def NumLines(self): - """Returns the number of lines represented.""" - return self.num_lines - - @staticmethod - def _CollapseStrings(elided): - """Collapses strings and chars on a line to simple "" or '' blocks. - - We nix strings first so we're not fooled by text like '"http://"' - - Args: - elided: The line being processed. - - Returns: - The line with collapsed strings. - """ - if _RE_PATTERN_INCLUDE.match(elided): - return elided - - # Remove escaped characters first to make quote/single quote collapsing - # basic. Things that look like escaped characters shouldn't occur - # outside of strings and chars. - elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided) - - # Replace quoted strings and digit separators. Both single quotes - # and double quotes are processed in the same loop, otherwise - # nested quotes wouldn't work. - collapsed = '' - while True: - # Find the first quote character - match = Match(r'^([^\'"]*)([\'"])(.*)$', elided) - if not match: - collapsed += elided - break - head, quote, tail = match.groups() - - if quote == '"': - # Collapse double quoted strings - second_quote = tail.find('"') - if second_quote >= 0: - collapsed += head + '""' - elided = tail[second_quote + 1:] - else: - # Unmatched double quote, don't bother processing the rest - # of the line since this is probably a multiline string. - collapsed += elided - break - else: - # Found single quote, check nearby text to eliminate digit separators. - # - # There is no special handling for floating point here, because - # the integer/fractional/exponent parts would all be parsed - # correctly as long as there are digits on both sides of the - # separator. So we are fine as long as we don't see something - # like "0.'3" (gcc 4.9.0 will not allow this literal). - if Search(r'\b(?:0[bBxX]?|[1-9])[0-9a-fA-F]*$', head): - match_literal = Match(r'^((?:\'?[0-9a-zA-Z_])*)(.*)$', - "'" + tail) - collapsed += head + match_literal.group(1).replace("'", '') - elided = match_literal.group(2) - else: - second_quote = tail.find('\'') - if second_quote >= 0: - collapsed += head + "''" - elided = tail[second_quote + 1:] - else: - # Unmatched single quote - collapsed += elided - break - - return collapsed - - -def FindEndOfExpressionInLine(line, startpos, stack): - """Find the position just after the end of current parenthesized expression. - - Args: - line: a CleansedLines line. - startpos: start searching at this position. - stack: nesting stack at startpos. - - Returns: - On finding matching end: (index just after matching end, None) - On finding an unclosed expression: (-1, None) - Otherwise: (-1, new stack at end of this line) - """ - for i in xrange(startpos, len(line)): - char = line[i] - if char in '([{': - # Found start of parenthesized expression, push to expression stack - stack.append(char) - elif char == '<': - # Found potential start of template argument list - if i > 0 and line[i - 1] == '<': - # Left shift operator - if stack and stack[-1] == '<': - stack.pop() - if not stack: - return (-1, None) - elif i > 0 and Search(r'\boperator\s*$', line[0:i]): - # operator<, don't add to stack - continue - else: - # Tentative start of template argument list - stack.append('<') - elif char in ')]}': - # Found end of parenthesized expression. - # - # If we are currently expecting a matching '>', the pending '<' - # must have been an operator. Remove them from expression stack. - while stack and stack[-1] == '<': - stack.pop() - if not stack: - return (-1, None) - if ((stack[-1] == '(' and char == ')') or - (stack[-1] == '[' and char == ']') or - (stack[-1] == '{' and char == '}')): - stack.pop() - if not stack: - return (i + 1, None) - else: - # Mismatched parentheses - return (-1, None) - elif char == '>': - # Found potential end of template argument list. - - # Ignore "->" and operator functions - if (i > 0 and (line[i - 1] == '-' or Search(r'\boperator\s*$', - line[0:i - 1]))): - continue - - # Pop the stack if there is a matching '<'. Otherwise, ignore - # this '>' since it must be an operator. - if stack: - if stack[-1] == '<': - stack.pop() - if not stack: - return (i + 1, None) - elif char == ';': - # Found something that look like end of statements. If we are currently - # expecting a '>', the matching '<' must have been an operator, since - # template argument list should not contain statements. - while stack and stack[-1] == '<': - stack.pop() - if not stack: - return (-1, None) - - # Did not find end of expression or unbalanced parentheses on this line - return (-1, stack) - - -def CloseExpression(clean_lines, linenum, pos): - """If input points to ( or { or [ or <, finds the position that closes it. - - If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the - linenum/pos that correspond to the closing of the expression. - - TODO(unknown): cpplint spends a fair bit of time matching parentheses. - Ideally we would want to index all opening and closing parentheses once - and have CloseExpression be just a simple lookup, but due to preprocessor - tricks, this is not so easy. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - pos: A position on the line. - - Returns: - A tuple (line, linenum, pos) pointer *past* the closing brace, or - (line, len(lines), -1) if we never find a close. Note we ignore - strings and comments when matching; and the line we return is the - 'cleansed' line at linenum. - """ - - line = clean_lines.elided[linenum] - if (line[pos] not in '({[<') or Match(r'<[<=]', line[pos:]): - return (line, clean_lines.NumLines(), -1) - - # Check first line - (end_pos, stack) = FindEndOfExpressionInLine(line, pos, []) - if end_pos > -1: - return (line, linenum, end_pos) - - # Continue scanning forward - while stack and linenum < clean_lines.NumLines() - 1: - linenum += 1 - line = clean_lines.elided[linenum] - (end_pos, stack) = FindEndOfExpressionInLine(line, 0, stack) - if end_pos > -1: - return (line, linenum, end_pos) - - # Did not find end of expression before end of file, give up - return (line, clean_lines.NumLines(), -1) - - -def FindStartOfExpressionInLine(line, endpos, stack): - """Find position at the matching start of current expression. - - This is almost the reverse of FindEndOfExpressionInLine, but note - that the input position and returned position differs by 1. - - Args: - line: a CleansedLines line. - endpos: start searching at this position. - stack: nesting stack at endpos. - - Returns: - On finding matching start: (index at matching start, None) - On finding an unclosed expression: (-1, None) - Otherwise: (-1, new stack at beginning of this line) - """ - i = endpos - while i >= 0: - char = line[i] - if char in ')]}': - # Found end of expression, push to expression stack - stack.append(char) - elif char == '>': - # Found potential end of template argument list. - # - # Ignore it if it's a "->" or ">=" or "operator>" - if (i > 0 and - (line[i - 1] == '-' or Match(r'\s>=\s', line[i - 1:]) or - Search(r'\boperator\s*$', line[0:i]))): - i -= 1 - else: - stack.append('>') - elif char == '<': - # Found potential start of template argument list - if i > 0 and line[i - 1] == '<': - # Left shift operator - i -= 1 - else: - # If there is a matching '>', we can pop the expression stack. - # Otherwise, ignore this '<' since it must be an operator. - if stack and stack[-1] == '>': - stack.pop() - if not stack: - return (i, None) - elif char in '([{': - # Found start of expression. - # - # If there are any unmatched '>' on the stack, they must be - # operators. Remove those. - while stack and stack[-1] == '>': - stack.pop() - if not stack: - return (-1, None) - if ((char == '(' and stack[-1] == ')') or - (char == '[' and stack[-1] == ']') or - (char == '{' and stack[-1] == '}')): - stack.pop() - if not stack: - return (i, None) - else: - # Mismatched parentheses - return (-1, None) - elif char == ';': - # Found something that look like end of statements. If we are currently - # expecting a '<', the matching '>' must have been an operator, since - # template argument list should not contain statements. - while stack and stack[-1] == '>': - stack.pop() - if not stack: - return (-1, None) - - i -= 1 - - return (-1, stack) - - -def ReverseCloseExpression(clean_lines, linenum, pos): - """If input points to ) or } or ] or >, finds the position that opens it. - - If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the - linenum/pos that correspond to the opening of the expression. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - pos: A position on the line. - - Returns: - A tuple (line, linenum, pos) pointer *at* the opening brace, or - (line, 0, -1) if we never find the matching opening brace. Note - we ignore strings and comments when matching; and the line we - return is the 'cleansed' line at linenum. - """ - line = clean_lines.elided[linenum] - if line[pos] not in ')}]>': - return (line, 0, -1) - - # Check last line - (start_pos, stack) = FindStartOfExpressionInLine(line, pos, []) - if start_pos > -1: - return (line, linenum, start_pos) - - # Continue scanning backward - while stack and linenum > 0: - linenum -= 1 - line = clean_lines.elided[linenum] - (start_pos, stack) = FindStartOfExpressionInLine(line, - len(line) - 1, stack) - if start_pos > -1: - return (line, linenum, start_pos) - - # Did not find start of expression before beginning of file, give up - return (line, 0, -1) - - -def CheckForCopyright(filename, lines, error): - """Logs an error if no Copyright message appears at the top of the file.""" - - # We'll say it should occur by line 10. Don't forget there's a - # dummy line at the front. - for line in xrange(1, min(len(lines), 11)): - if re.search(r'Copyright', lines[line], re.I): break - else: # means no copyright line was found - error(filename, 0, 'legal/copyright', 5, 'No copyright message found. ' - 'You should have a line: "Copyright [year] "') - - -def GetIndentLevel(line): - """Return the number of leading spaces in line. - - Args: - line: A string to check. - - Returns: - An integer count of leading spaces, possibly zero. - """ - indent = Match(r'^( *)\S', line) - if indent: - return len(indent.group(1)) - else: - return 0 - - -def GetHeaderGuardCPPVariable(filename): - """Returns the CPP variable that should be used as a header guard. - - Args: - filename: The name of a C++ header file. - - Returns: - The CPP variable that should be used as a header guard in the - named file. - - """ - filename = os.path.basename(filename) - return re.sub(r'[^a-zA-Z0-9]', '_', filename).upper() + '_' - - -def CheckForHeaderGuard(filename, clean_lines, error): - """Checks that the file contains a header guard. - - Logs an error if no #ifndef header guard is present. For other - headers, checks that the full pathname is used. - - Args: - filename: The name of the C++ header file. - clean_lines: A CleansedLines instance containing the file. - error: The function to call with any errors found. - """ - - # Don't check for header guards if there are error suppression - # comments somewhere in this file. - # - # Because this is silencing a warning for a nonexistent line, we - # only support the very specific NOLINT(build/header_guard) syntax, - # and not the general NOLINT or NOLINT(*) syntax. - raw_lines = clean_lines.lines_without_raw_strings - for i in raw_lines: - if Search(r'//\s*NOLINT\(build/header_guard\)', i): - return - - cppvar = GetHeaderGuardCPPVariable(filename) - - ifndef = '' - ifndef_linenum = 0 - define = '' - endif = '' - endif_linenum = 0 - pragma_linenum = -1 - for linenum, line in enumerate(raw_lines): - linesplit = line.split() - if len(linesplit) >= 2: - if linesplit[0] == '#pragma' and linesplit[1] == 'once': - pragma_linenum = linenum - # find the first occurrence of #ifndef and #define, save arg - if not ifndef and linesplit[0] == '#ifndef': - # set ifndef to the header guard presented on the #ifndef line. - ifndef = linesplit[1] - ifndef_linenum = linenum - if not define and linesplit[0] == '#define': - define = linesplit[1] - # find the last occurrence of #endif, save entire line - if line.startswith('#endif'): - endif = line - endif_linenum = linenum - if pragma_linenum != -1: - return # short path for pragma once - if not ifndef or not define or ifndef != define: - error(filename, 0, 'build/header_guard', 5, - 'No #ifndef header guard found, suggested CPP variable is: %s' % - cppvar) - return - - # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__ - # for backward compatibility. - if ifndef != cppvar: - error_level = 0 - if ifndef != cppvar + '_': - error_level = 5 - - ParseNolintSuppressions(filename, raw_lines[ifndef_linenum], - ifndef_linenum, error) - error(filename, ifndef_linenum, 'build/header_guard', error_level, - '#ifndef header guard has wrong style, please use: %s' % cppvar) - - # Check for "//" comments on endif line. - ParseNolintSuppressions(filename, raw_lines[endif_linenum], endif_linenum, - error) - match = Match(r'#endif\s*//\s*' + cppvar + r'(_)?\b', endif) - if match: - if match.group(1) == '_': - # Issue low severity warning for deprecated double trailing underscore - error(filename, endif_linenum, 'build/header_guard', 0, - '#endif line should be "#endif // %s"' % cppvar) - return - - # Didn't find the corresponding "//" comment. If this file does not - # contain any "//" comments at all, it could be that the compiler - # only wants "/**/" comments, look for those instead. - no_single_line_comments = True - for i in xrange(1, len(raw_lines) - 1): - line = raw_lines[i] - if Match(r'^(?:(?:\'(?:\.|[^\'])*\')|(?:"(?:\.|[^"])*")|[^\'"])*//', - line): - no_single_line_comments = False - break - - if no_single_line_comments: - match = Match(r'#endif\s*/\*\s*' + cppvar + r'(_)?\s*\*/', endif) - if match: - if match.group(1) == '_': - # Low severity warning for double trailing underscore - error(filename, endif_linenum, 'build/header_guard', 0, - '#endif line should be "#endif /* %s */"' % cppvar) - return - - # Didn't find anything - error(filename, endif_linenum, 'build/header_guard', 5, - '#endif line should be "#endif // %s"' % cppvar) - - -def CheckHeaderFileIncluded(filename, include_state, error): - """Logs an error if a .cc file does not include its header.""" - - # Do not check test files - if filename.endswith('_test.cc') or filename.endswith('_unittest.cc'): - return - - fileinfo = FileInfo(filename) - headerfile = filename[0:len(filename) - 2] + 'h' - if not os.path.exists(headerfile): - return - headername = FileInfo(headerfile).RepositoryName() - first_include = 0 - for section_list in include_state.include_list: - for f in section_list: - if headername in f[0] or f[0] in headername: - return - if not first_include: - first_include = f[1] - - error(filename, first_include, 'build/include', 5, - '%s should include its header file %s' % (fileinfo.RepositoryName(), - headername)) - - -def CheckForBadCharacters(filename, lines, error): - """Logs an error for each line containing bad characters. - - Two kinds of bad characters: - - 1. Unicode replacement characters: These indicate that either the file - contained invalid UTF-8 (likely) or Unicode replacement characters (which - it shouldn't). Note that it's possible for this to throw off line - numbering if the invalid UTF-8 occurred adjacent to a newline. - - 2. NUL bytes. These are problematic for some tools. - - Args: - filename: The name of the current file. - lines: An array of strings, each representing a line of the file. - error: The function to call with any errors found. - """ - for linenum, line in enumerate(lines): - if u'\ufffd' in line: - error( - filename, linenum, 'readability/utf8', 5, - 'Line contains invalid UTF-8 (or Unicode replacement character).' - ) - if '\0' in line: - error(filename, linenum, 'readability/nul', 5, - 'Line contains NUL byte.') - - -def CheckForNewlineAtEOF(filename, lines, error): - """Logs an error if there is no newline char at the end of the file. - - Args: - filename: The name of the current file. - lines: An array of strings, each representing a line of the file. - error: The function to call with any errors found. - """ - - # The array lines() was created by adding two newlines to the - # original file (go figure), then splitting on \n. - # To verify that the file ends in \n, we just have to make sure the - # last-but-two element of lines() exists and is empty. - if len(lines) < 3 or lines[-2]: - error(filename, - len(lines) - 2, 'whitespace/ending_newline', 5, - 'Could not find a newline character at the end of the file.') - - -def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error): - """Logs an error if we see /* ... */ or "..." that extend past one line. - - /* ... */ comments are legit inside macros, for one line. - Otherwise, we prefer // comments, so it's ok to warn about the - other. Likewise, it's ok for strings to extend across multiple - lines, as long as a line continuation character (backslash) - terminates each line. Although not currently prohibited by the C++ - style guide, it's ugly and unnecessary. We don't do well with either - in this lint program, so we warn about both. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # Remove all \\ (escaped backslashes) from the line. They are OK, and the - # second (escaped) slash may trigger later \" detection erroneously. - line = line.replace('\\\\', '') - - if line.count('/*') > line.count('*/'): - error(filename, linenum, 'readability/multiline_comment', 5, - 'Complex multi-line /*...*/-style comment found. ' - 'Lint may give bogus warnings. ' - 'Consider replacing these with //-style comments, ' - 'with #if 0...#endif, ' - 'or with more clearly structured multi-line comments.') - - if (line.count('"') - line.count('\\"')) % 2: - error(filename, linenum, 'readability/multiline_string', 5, - 'Multi-line string ("...") found. This lint script doesn\'t ' - 'do well with such strings, and may give bogus warnings. ' - 'Use C++11 raw strings or concatenation instead.') - - -# (non-threadsafe name, thread-safe alternative, validation pattern) -# -# The validation pattern is used to eliminate false positives such as: -# _rand(); // false positive due to substring match. -# ->rand(); // some member function rand(). -# ACMRandom rand(seed); // some variable named rand. -# ISAACRandom rand(); // another variable named rand. -# -# Basically we require the return value of these functions to be used -# in some expression context on the same line by matching on some -# operator before the function name. This eliminates constructors and -# member function calls. -_UNSAFE_FUNC_PREFIX = r'(?:[-+*/=%^&|(<]\s*|>\s+)' -_THREADING_LIST = ( - ('asctime(', 'asctime_r(', _UNSAFE_FUNC_PREFIX + r'asctime\([^)]+\)'), - ('ctime(', 'ctime_r(', _UNSAFE_FUNC_PREFIX + r'ctime\([^)]+\)'), - ('getgrgid(', 'getgrgid_r(', _UNSAFE_FUNC_PREFIX + r'getgrgid\([^)]+\)'), - ('getgrnam(', 'getgrnam_r(', _UNSAFE_FUNC_PREFIX + r'getgrnam\([^)]+\)'), - ('getlogin(', 'getlogin_r(', _UNSAFE_FUNC_PREFIX + r'getlogin\(\)'), - ('getpwnam(', 'getpwnam_r(', _UNSAFE_FUNC_PREFIX + r'getpwnam\([^)]+\)'), - ('getpwuid(', 'getpwuid_r(', _UNSAFE_FUNC_PREFIX + r'getpwuid\([^)]+\)'), - ('gmtime(', 'gmtime_r(', _UNSAFE_FUNC_PREFIX + r'gmtime\([^)]+\)'), - ('localtime(', 'localtime_r(', _UNSAFE_FUNC_PREFIX + r'localtime\([^)]+\)'), - ('rand(', 'rand_r(', _UNSAFE_FUNC_PREFIX + r'rand\(\)'), - ('strtok(', 'strtok_r(', _UNSAFE_FUNC_PREFIX + r'strtok\([^)]+\)'), - ('ttyname(', 'ttyname_r(', _UNSAFE_FUNC_PREFIX + r'ttyname\([^)]+\)'), ) - - -def CheckPosixThreading(filename, clean_lines, linenum, error): - """Checks for calls to thread-unsafe functions. - - Much code has been originally written without consideration of - multi-threading. Also, engineers are relying on their old experience; - they have learned posix before threading extensions were added. These - tests guide the engineers to use thread-safe functions (when using - posix directly). - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - for single_thread_func, multithread_safe_func, pattern in _THREADING_LIST: - # Additional pattern matching check to confirm that this is the - # function we are looking for - if Search(pattern, line): - error(filename, linenum, 'runtime/threadsafe_fn', 2, - 'Consider using ' + multithread_safe_func + '...) instead of ' - + single_thread_func + '...) for improved thread safety.') - - -def CheckVlogArguments(filename, clean_lines, linenum, error): - """Checks that VLOG() is only used for defining a logging level. - - For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and - VLOG(FATAL) are not. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - if Search(r'\bVLOG\((INFO|ERROR|WARNING|DFATAL|FATAL)\)', line): - error(filename, linenum, 'runtime/vlog', 5, - 'VLOG() should be used with numeric verbosity level. ' - 'Use LOG() if you want symbolic severity levels.') - - -# Matches invalid increment: *count++, which moves pointer instead of -# incrementing a value. -_RE_PATTERN_INVALID_INCREMENT = re.compile(r'^\s*\*\w+(\+\+|--);') - - -def CheckInvalidIncrement(filename, clean_lines, linenum, error): - """Checks for invalid increment *count++. - - For example following function: - void increment_counter(int* count) { - *count++; - } - is invalid, because it effectively does count++, moving pointer, and should - be replaced with ++*count, (*count)++ or *count += 1. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - if _RE_PATTERN_INVALID_INCREMENT.match(line): - error( - filename, linenum, 'runtime/invalid_increment', 5, - 'Changing pointer instead of value (or unused value of operator*).') - - -def IsMacroDefinition(clean_lines, linenum): - if Search(r'^#define', clean_lines[linenum]): - return True - - if linenum > 0 and Search(r'\\$', clean_lines[linenum - 1]): - return True - - return False - - -def IsForwardClassDeclaration(clean_lines, linenum): - return Match(r'^\s*(\btemplate\b)*.*class\s+\w+;\s*$', clean_lines[linenum]) - - -class _BlockInfo(object): - """Stores information about a generic block of code.""" - - def __init__(self, seen_open_brace): - self.seen_open_brace = seen_open_brace - self.open_parentheses = 0 - self.inline_asm = _NO_ASM - self.check_namespace_indentation = False - - def CheckBegin(self, filename, clean_lines, linenum, error): - """Run checks that applies to text up to the opening brace. - - This is mostly for checking the text after the class identifier - and the "{", usually where the base class is specified. For other - blocks, there isn't much to check, so we always pass. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - pass - - def CheckEnd(self, filename, clean_lines, linenum, error): - """Run checks that applies to text after the closing brace. - - This is mostly used for checking end of namespace comments. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - pass - - def IsBlockInfo(self): - """Returns true if this block is a _BlockInfo. - - This is convenient for verifying that an object is an instance of - a _BlockInfo, but not an instance of any of the derived classes. - - Returns: - True for this class, False for derived classes. - """ - return self.__class__ == _BlockInfo - - -class _ExternCInfo(_BlockInfo): - """Stores information about an 'extern "C"' block.""" - - def __init__(self): - _BlockInfo.__init__(self, True) - - -class _ClassInfo(_BlockInfo): - """Stores information about a class.""" - - def __init__(self, name, class_or_struct, clean_lines, linenum): - _BlockInfo.__init__(self, False) - self.name = name - self.starting_linenum = linenum - self.is_derived = False - self.check_namespace_indentation = True - if class_or_struct == 'struct': - self.access = 'public' - self.is_struct = True - else: - self.access = 'private' - self.is_struct = False - - # Remember initial indentation level for this class. Using raw_lines here - # instead of elided to account for leading comments. - self.class_indent = GetIndentLevel(clean_lines.raw_lines[linenum]) - - # Try to find the end of the class. This will be confused by things like: - # class A { - # } *x = { ... - # - # But it's still good enough for CheckSectionSpacing. - self.last_line = 0 - depth = 0 - for i in range(linenum, clean_lines.NumLines()): - line = clean_lines.elided[i] - depth += line.count('{') - line.count('}') - if not depth: - self.last_line = i - break - - def CheckBegin(self, filename, clean_lines, linenum, error): - # Look for a bare ':' - if Search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]): - self.is_derived = True - - def CheckEnd(self, filename, clean_lines, linenum, error): - # If there is a DISALLOW macro, it should appear near the end of - # the class. - seen_last_thing_in_class = False - for i in xrange(linenum - 1, self.starting_linenum, -1): - match = Search( - r'\b(DISALLOW_COPY_AND_ASSIGN|DISALLOW_IMPLICIT_CONSTRUCTORS)\(' - + self.name + r'\)', clean_lines.elided[i]) - if match: - if seen_last_thing_in_class: - error(filename, i, 'readability/constructors', 3, - match.group(1) + - ' should be the last thing in the class') - break - - if not Match(r'^\s*$', clean_lines.elided[i]): - seen_last_thing_in_class = True - - # Check that closing brace is aligned with beginning of the class. - # Only do this if the closing brace is indented by only whitespaces. - # This means we will not check single-line class definitions. - indent = Match(r'^( *)\}', clean_lines.elided[linenum]) - if indent and len(indent.group(1)) != self.class_indent: - if self.is_struct: - parent = 'struct ' + self.name - else: - parent = 'class ' + self.name - error(filename, linenum, 'whitespace/indent', 3, - 'Closing brace should be aligned with beginning of %s' % - parent) - - -class _NamespaceInfo(_BlockInfo): - """Stores information about a namespace.""" - - def __init__(self, name, linenum): - _BlockInfo.__init__(self, False) - self.name = name or '' - self.starting_linenum = linenum - self.check_namespace_indentation = True - - def CheckEnd(self, filename, clean_lines, linenum, error): - """Check end of namespace comments.""" - line = clean_lines.raw_lines[linenum] - - # Check how many lines is enclosed in this namespace. Don't issue - # warning for missing namespace comments if there aren't enough - # lines. However, do apply checks if there is already an end of - # namespace comment and it's incorrect. - # - # TODO(unknown): We always want to check end of namespace comments - # if a namespace is large, but sometimes we also want to apply the - # check if a short namespace contained nontrivial things (something - # other than forward declarations). There is currently no logic on - # deciding what these nontrivial things are, so this check is - # triggered by namespace size only, which works most of the time. - if (linenum - self.starting_linenum < 10 and - not Match(r'};*\s*(//|/\*).*\bnamespace\b', line)): - return - - # Look for matching comment at end of namespace. - # - # Note that we accept C style "/* */" comments for terminating - # namespaces, so that code that terminate namespaces inside - # preprocessor macros can be cpplint clean. - # - # We also accept stuff like "// end of namespace ." with the - # period at the end. - # - # Besides these, we don't accept anything else, otherwise we might - # get false negatives when existing comment is a substring of the - # expected namespace. - if self.name: - # Named namespace - if not Match((r'};*\s*(//|/\*).*\bnamespace\s+' + - re.escape(self.name) + r'[\*/\.\\\s]*$'), line): - error(filename, linenum, 'readability/namespace', 5, - 'Namespace should be terminated with "// namespace %s"' % - self.name) - else: - # Anonymous namespace - if not Match(r'};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line): - # If "// namespace anonymous" or "// anonymous namespace (more text)", - # mention "// anonymous namespace" as an acceptable form - if Match(r'}.*\b(namespace anonymous|anonymous namespace)\b', - line): - error( - filename, linenum, 'readability/namespace', 5, - 'Anonymous namespace should be terminated with "// namespace"' - ' or "// anonymous namespace"') - else: - error( - filename, linenum, 'readability/namespace', 5, - 'Anonymous namespace should be terminated with "// namespace"' - ) - - -class _PreprocessorInfo(object): - """Stores checkpoints of nesting stacks when #if/#else is seen.""" - - def __init__(self, stack_before_if): - # The entire nesting stack before #if - self.stack_before_if = stack_before_if - - # The entire nesting stack up to #else - self.stack_before_else = [] - - # Whether we have already seen #else or #elif - self.seen_else = False - - -class NestingState(object): - """Holds states related to parsing braces.""" - - def __init__(self): - # Stack for tracking all braces. An object is pushed whenever we - # see a "{", and popped when we see a "}". Only 3 types of - # objects are possible: - # - _ClassInfo: a class or struct. - # - _NamespaceInfo: a namespace. - # - _BlockInfo: some other type of block. - self.stack = [] - - # Top of the previous stack before each Update(). - # - # Because the nesting_stack is updated at the end of each line, we - # had to do some convoluted checks to find out what is the current - # scope at the beginning of the line. This check is simplified by - # saving the previous top of nesting stack. - # - # We could save the full stack, but we only need the top. Copying - # the full nesting stack would slow down cpplint by ~10%. - self.previous_stack_top = [] - - # Stack of _PreprocessorInfo objects. - self.pp_stack = [] - - def SeenOpenBrace(self): - """Check if we have seen the opening brace for the innermost block. - - Returns: - True if we have seen the opening brace, False if the innermost - block is still expecting an opening brace. - """ - return (not self.stack) or self.stack[-1].seen_open_brace - - def InNamespaceBody(self): - """Check if we are currently one level inside a namespace body. - - Returns: - True if top of the stack is a namespace block, False otherwise. - """ - return self.stack and isinstance(self.stack[-1], _NamespaceInfo) - - def InExternC(self): - """Check if we are currently one level inside an 'extern "C"' block. - - Returns: - True if top of the stack is an extern block, False otherwise. - """ - return self.stack and isinstance(self.stack[-1], _ExternCInfo) - - def InClassDeclaration(self): - """Check if we are currently one level inside a class or struct declaration. - - Returns: - True if top of the stack is a class/struct, False otherwise. - """ - return self.stack and isinstance(self.stack[-1], _ClassInfo) - - def InAsmBlock(self): - """Check if we are currently one level inside an inline ASM block. - - Returns: - True if the top of the stack is a block containing inline ASM. - """ - return self.stack and self.stack[-1].inline_asm != _NO_ASM - - def InTemplateArgumentList(self, clean_lines, linenum, pos): - """Check if current position is inside template argument list. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - pos: position just after the suspected template argument. - Returns: - True if (linenum, pos) is inside template arguments. - """ - while linenum < clean_lines.NumLines(): - # Find the earliest character that might indicate a template argument - line = clean_lines.elided[linenum] - match = Match(r'^[^{};=\[\]\.<>]*(.)', line[pos:]) - if not match: - linenum += 1 - pos = 0 - continue - token = match.group(1) - pos += len(match.group(0)) - - # These things do not look like template argument list: - # class Suspect { - # class Suspect x; } - if token in ('{', '}', ';'): return False - - # These things look like template argument list: - # template - # template - # template - # template - if token in ('>', '=', '[', ']', '.'): return True - - # Check if token is an unmatched '<'. - # If not, move on to the next character. - if token != '<': - pos += 1 - if pos >= len(line): - linenum += 1 - pos = 0 - continue - - # We can't be sure if we just find a single '<', and need to - # find the matching '>'. - (_, end_line, end_pos) = CloseExpression(clean_lines, linenum, - pos - 1) - if end_pos < 0: - # Not sure if template argument list or syntax error in file - return False - linenum = end_line - pos = end_pos - return False - - def UpdatePreprocessor(self, line): - """Update preprocessor stack. - - We need to handle preprocessors due to classes like this: - #ifdef SWIG - struct ResultDetailsPageElementExtensionPoint { - #else - struct ResultDetailsPageElementExtensionPoint : public Extension { - #endif - - We make the following assumptions (good enough for most files): - - Preprocessor condition evaluates to true from #if up to first - #else/#elif/#endif. - - - Preprocessor condition evaluates to false from #else/#elif up - to #endif. We still perform lint checks on these lines, but - these do not affect nesting stack. - - Args: - line: current line to check. - """ - if Match(r'^\s*#\s*(if|ifdef|ifndef)\b', line): - # Beginning of #if block, save the nesting stack here. The saved - # stack will allow us to restore the parsing state in the #else case. - self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack))) - elif Match(r'^\s*#\s*(else|elif)\b', line): - # Beginning of #else block - if self.pp_stack: - if not self.pp_stack[-1].seen_else: - # This is the first #else or #elif block. Remember the - # whole nesting stack up to this point. This is what we - # keep after the #endif. - self.pp_stack[-1].seen_else = True - self.pp_stack[-1].stack_before_else = copy.deepcopy( - self.stack) - - # Restore the stack to how it was before the #if - self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if) - else: - # TODO(unknown): unexpected #else, issue warning? - pass - elif Match(r'^\s*#\s*endif\b', line): - # End of #if or #else blocks. - if self.pp_stack: - # If we saw an #else, we will need to restore the nesting - # stack to its former state before the #else, otherwise we - # will just continue from where we left off. - if self.pp_stack[-1].seen_else: - # Here we can just use a shallow copy since we are the last - # reference to it. - self.stack = self.pp_stack[-1].stack_before_else - # Drop the corresponding #if - self.pp_stack.pop() - else: - # TODO(unknown): unexpected #endif, issue warning? - pass - - # TODO(unknown): Update() is too long, but we will refactor later. - def Update(self, filename, clean_lines, linenum, error): - """Update nesting state with current line. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # Remember top of the previous nesting stack. - # - # The stack is always pushed/popped and not modified in place, so - # we can just do a shallow copy instead of copy.deepcopy. Using - # deepcopy would slow down cpplint by ~28%. - if self.stack: - self.previous_stack_top = self.stack[-1] - else: - self.previous_stack_top = None - - # Update pp_stack - self.UpdatePreprocessor(line) - - # Count parentheses. This is to avoid adding struct arguments to - # the nesting stack. - if self.stack: - inner_block = self.stack[-1] - depth_change = line.count('(') - line.count(')') - inner_block.open_parentheses += depth_change - - # Also check if we are starting or ending an inline assembly block. - if inner_block.inline_asm in (_NO_ASM, _END_ASM): - if (depth_change != 0 and inner_block.open_parentheses == 1 and - _MATCH_ASM.match(line)): - # Enter assembly block - inner_block.inline_asm = _INSIDE_ASM - else: - # Not entering assembly block. If previous line was _END_ASM, - # we will now shift to _NO_ASM state. - inner_block.inline_asm = _NO_ASM - elif (inner_block.inline_asm == _INSIDE_ASM and - inner_block.open_parentheses == 0): - # Exit assembly block - inner_block.inline_asm = _END_ASM - - # Consume namespace declaration at the beginning of the line. Do - # this in a loop so that we catch same line declarations like this: - # namespace proto2 { namespace bridge { class MessageSet; } } - while True: - # Match start of namespace. The "\b\s*" below catches namespace - # declarations even if it weren't followed by a whitespace, this - # is so that we don't confuse our namespace checker. The - # missing spaces will be flagged by CheckSpacing. - namespace_decl_match = Match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', - line) - if not namespace_decl_match: - break - - new_namespace = _NamespaceInfo( - namespace_decl_match.group(1), linenum) - self.stack.append(new_namespace) - - line = namespace_decl_match.group(2) - if line.find('{') != -1: - new_namespace.seen_open_brace = True - line = line[line.find('{') + 1:] - - # Look for a class declaration in whatever is left of the line - # after parsing namespaces. The regexp accounts for decorated classes - # such as in: - # class LOCKABLE API Object { - # }; - class_decl_match = Match( - r'^(\s*(?:template\s*<[\w\s<>,:]*>\s*)?' - r'(class|struct)\s+(?:[A-Z_]+\s+)*(\w+(?:::\w+)*))' - r'(.*)$', line) - if (class_decl_match and - (not self.stack or self.stack[-1].open_parentheses == 0)): - # We do not want to accept classes that are actually template arguments: - # template , - # template class Ignore3> - # void Function() {}; - # - # To avoid template argument cases, we scan forward and look for - # an unmatched '>'. If we see one, assume we are inside a - # template argument list. - end_declaration = len(class_decl_match.group(1)) - if not self.InTemplateArgumentList(clean_lines, linenum, - end_declaration): - self.stack.append( - _ClassInfo( - class_decl_match.group(3), - class_decl_match.group(2), clean_lines, linenum)) - line = class_decl_match.group(4) - - # If we have not yet seen the opening brace for the innermost block, - # run checks here. - if not self.SeenOpenBrace(): - self.stack[-1].CheckBegin(filename, clean_lines, linenum, error) - - # Update access control if we are inside a class/struct - if self.stack and isinstance(self.stack[-1], _ClassInfo): - classinfo = self.stack[-1] - access_match = Match( - r'^(.*)\b(public|private|protected|signals)(\s+(?:slots\s*)?)?' - r':(?:[^:]|$)', line) - if access_match: - classinfo.access = access_match.group(2) - - # Check that access keywords are indented +1 space. Skip this - # check if the keywords are not preceded by whitespaces. - indent = access_match.group(1) - if (len(indent) != classinfo.class_indent + 1 and - Match(r'^\s*$', indent)): - if classinfo.is_struct: - parent = 'struct ' + classinfo.name - else: - parent = 'class ' + classinfo.name - slots = '' - if access_match.group(3): - slots = access_match.group(3) - error(filename, linenum, 'whitespace/indent', 3, - '%s%s: should be indented +1 space inside %s' % ( - access_match.group(2), slots, parent)) - - # Consume braces or semicolons from what's left of the line - while True: - # Match first brace, semicolon, or closed parenthesis. - matched = Match(r'^[^{;)}]*([{;)}])(.*)$', line) - if not matched: - break - - token = matched.group(1) - if token == '{': - # If namespace or class hasn't seen a opening brace yet, mark - # namespace/class head as complete. Push a new block onto the - # stack otherwise. - if not self.SeenOpenBrace(): - self.stack[-1].seen_open_brace = True - elif Match(r'^extern\s*"[^"]*"\s*\{', line): - self.stack.append(_ExternCInfo()) - else: - self.stack.append(_BlockInfo(True)) - if _MATCH_ASM.match(line): - self.stack[-1].inline_asm = _BLOCK_ASM - - elif token == ';' or token == ')': - # If we haven't seen an opening brace yet, but we already saw - # a semicolon, this is probably a forward declaration. Pop - # the stack for these. - # - # Similarly, if we haven't seen an opening brace yet, but we - # already saw a closing parenthesis, then these are probably - # function arguments with extra "class" or "struct" keywords. - # Also pop these stack for these. - if not self.SeenOpenBrace(): - self.stack.pop() - else: # token == '}' - # Perform end of block checks and pop the stack. - if self.stack: - self.stack[-1].CheckEnd(filename, clean_lines, linenum, - error) - self.stack.pop() - line = matched.group(2) - - def InnermostClass(self): - """Get class info on the top of the stack. - - Returns: - A _ClassInfo object if we are inside a class, or None otherwise. - """ - for i in range(len(self.stack), 0, -1): - classinfo = self.stack[i - 1] - if isinstance(classinfo, _ClassInfo): - return classinfo - return None - - def CheckCompletedBlocks(self, filename, error): - """Checks that all classes and namespaces have been completely parsed. - - Call this when all lines in a file have been processed. - Args: - filename: The name of the current file. - error: The function to call with any errors found. - """ - # Note: This test can result in false positives if #ifdef constructs - # get in the way of brace matching. See the testBuildClass test in - # cpplint_unittest.py for an example of this. - for obj in self.stack: - if isinstance(obj, _ClassInfo): - error(filename, obj.starting_linenum, 'build/class', 5, - 'Failed to find complete declaration of class %s' % - obj.name) - elif isinstance(obj, _NamespaceInfo): - error(filename, obj.starting_linenum, 'build/namespaces', 5, - 'Failed to find complete declaration of namespace %s' % - obj.name) - - -def CheckForNonStandardConstructs(filename, clean_lines, linenum, nesting_state, - error): - r"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2. - - Complain about several constructs which gcc-2 accepts, but which are - not standard C++. Warning about these in lint is one way to ease the - transition to new compilers. - - put storage class first (e.g. "static const" instead of "const static"). - - "%lld" instead of %qd" in printf-type functions. - - "%1$d" is non-standard in printf-type functions. - - "\%" is an undefined character escape sequence. - - text after #endif is not allowed. - - invalid inner-style forward declaration. - - >? and ?= and )\?=?\s*(\w+|[+-]?\d+)(\.\d*)?', - line): - error( - filename, linenum, 'build/deprecated', 3, - '>? and ))?' - # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;' - error(filename, linenum, 'runtime/member_string_references', 2, - 'const string& members are dangerous. It is much better to use ' - 'alternatives, such as pointers or simple constants.') - - # Everything else in this function operates on class declarations. - # Return early if the top of the nesting stack is not a class, or if - # the class head is not completed yet. - classinfo = nesting_state.InnermostClass() - if not classinfo or not classinfo.seen_open_brace: - return - - # The class may have been declared with namespace or classname qualifiers. - # The constructor and destructor will not have those qualifiers. - base_classname = classinfo.name.split('::')[-1] - - # Look for single-argument constructors that aren't marked explicit. - # Technically a valid construct, but against style. Also look for - # non-single-argument constructors which are also technically valid, but - # strongly suggest something is wrong. - explicit_constructor_match = Match( - r'\s+(?:inline\s+)?(explicit\s+)?(?:inline\s+)?%s\s*' - r'\(((?:[^()]|\([^()]*\))*)\)' % re.escape(base_classname), line) - - if explicit_constructor_match: - is_marked_explicit = explicit_constructor_match.group(1) - - if not explicit_constructor_match.group(2): - constructor_args = [] - else: - constructor_args = explicit_constructor_match.group(2).split(',') - - # collapse arguments so that commas in template parameter lists and function - # argument parameter lists don't split arguments in two - i = 0 - while i < len(constructor_args): - constructor_arg = constructor_args[i] - while (constructor_arg.count('<') > constructor_arg.count('>') or - constructor_arg.count('(') > constructor_arg.count(')')): - constructor_arg += ',' + constructor_args[i + 1] - del constructor_args[i + 1] - constructor_args[i] = constructor_arg - i += 1 - - defaulted_args = [arg for arg in constructor_args if '=' in arg] - noarg_constructor = ( - not constructor_args or # empty arg list - # 'void' arg specifier - (len(constructor_args) == 1 and - constructor_args[0].strip() == 'void')) - onearg_constructor = ( - ( - len(constructor_args) == 1 and # exactly one arg - not noarg_constructor) or - # all but at most one arg defaulted - (len(constructor_args) >= 1 and not noarg_constructor and - len(defaulted_args) >= len(constructor_args) - 1)) - initializer_list_constructor = bool( - onearg_constructor and - Search(r'\bstd\s*::\s*initializer_list\b', constructor_args[0])) - copy_constructor = bool( - onearg_constructor and - Match(r'(const\s+)?%s(\s*<[^>]*>)?(\s+const)?\s*(?:<\w+>\s*)?&' % - re.escape(base_classname), constructor_args[0].strip())) - - if (not is_marked_explicit and onearg_constructor and - not initializer_list_constructor and not copy_constructor): - if defaulted_args: - error(filename, linenum, 'runtime/explicit', 5, - 'Constructors callable with one argument ' - 'should be marked explicit.') - else: - error( - filename, linenum, 'runtime/explicit', 5, - 'Single-parameter constructors should be marked explicit.') - elif is_marked_explicit and not onearg_constructor: - if noarg_constructor: - error( - filename, linenum, 'runtime/explicit', 5, - 'Zero-parameter constructors should not be marked explicit.') - else: - error(filename, linenum, 'runtime/explicit', 0, - 'Constructors that require multiple arguments ' - 'should not be marked explicit.') - - -def CheckSpacingForFunctionCall(filename, clean_lines, linenum, error): - """Checks for the correctness of various spacing around function calls. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # Since function calls often occur inside if/for/while/switch - # expressions - which have their own, more liberal conventions - we - # first see if we should be looking inside such an expression for a - # function call, to which we can apply more strict standards. - fncall = line # if there's no control flow construct, look at whole line - for pattern in (r'\bif\s*\((.*)\)\s*{', r'\bfor\s*\((.*)\)\s*{', - r'\bwhile\s*\((.*)\)\s*[{;]', r'\bswitch\s*\((.*)\)\s*{'): - match = Search(pattern, line) - if match: - fncall = match.group(1) # look inside the parens for function calls - break - - # Except in if/for/while/switch, there should never be space - # immediately inside parens (eg "f( 3, 4 )"). We make an exception - # for nested parens ( (a+b) + c ). Likewise, there should never be - # a space before a ( when it's a function argument. I assume it's a - # function argument when the char before the whitespace is legal in - # a function name (alnum + _) and we're not starting a macro. Also ignore - # pointers and references to arrays and functions coz they're too tricky: - # we use a very simple way to recognize these: - # " (something)(maybe-something)" or - # " (something)(maybe-something," or - # " (something)[something]" - # Note that we assume the contents of [] to be short enough that - # they'll never need to wrap. - if ( # Ignore control structures. - not Search( - r'\b(if|for|while|switch|return|new|delete|catch|sizeof)\b', - fncall) and - # Ignore pointers/references to functions. - not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and - # Ignore pointers/references to arrays. - not Search(r' \([^)]+\)\[[^\]]+\]', fncall)): - if Search(r'\w\s*\(\s(?!\s*\\$)', fncall): # a ( used for a fn call - error(filename, linenum, 'whitespace/parens', 4, - 'Extra space after ( in function call') - elif Search(r'\(\s+(?!(\s*\\)|\()', fncall): - error(filename, linenum, 'whitespace/parens', 2, - 'Extra space after (') - if (Search(r'\w\s+\(', fncall) and - not Search(r'#\s*define|typedef|using\s+\w+\s*=', fncall) and - not Search(r'\w\s+\((\w+::)*\*\w+\)\(', fncall) and - not Search(r'\bcase\s+\(', fncall)): - # TODO(unknown): Space after an operator function seem to be a common - # error, silence those for now by restricting them to highest verbosity. - if Search(r'\boperator_*\b', line): - error(filename, linenum, 'whitespace/parens', 0, - 'Extra space before ( in function call') - else: - error(filename, linenum, 'whitespace/parens', 4, - 'Extra space before ( in function call') - # If the ) is followed only by a newline or a { + newline, assume it's - # part of a control statement (if/while/etc), and don't complain - if Search(r'[^)]\s+\)\s*[^{\s]', fncall): - # If the closing parenthesis is preceded by only whitespaces, - # try to give a more descriptive error message. - if Search(r'^\s+\)', fncall): - error(filename, linenum, 'whitespace/parens', 2, - 'Closing ) should be moved to the previous line') - else: - error(filename, linenum, 'whitespace/parens', 2, - 'Extra space before )') - - -def IsBlankLine(line): - """Returns true if the given line is blank. - - We consider a line to be blank if the line is empty or consists of - only white spaces. - - Args: - line: A line of a string. - - Returns: - True, if the given line is blank. - """ - return not line or line.isspace() - - -def CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line, - error): - is_namespace_indent_item = ( - len(nesting_state.stack) > 1 and - nesting_state.stack[-1].check_namespace_indentation and - isinstance(nesting_state.previous_stack_top, _NamespaceInfo) and - nesting_state.previous_stack_top == nesting_state.stack[-2]) - - if ShouldCheckNamespaceIndentation(nesting_state, is_namespace_indent_item, - clean_lines.elided, line): - CheckItemIndentationInNamespace(filename, clean_lines.elided, line, - error) - - -def CheckForFunctionLengths(filename, clean_lines, linenum, function_state, - error): - """Reports for long function bodies. - - For an overview why this is done, see: - http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions - - Uses a simplistic algorithm assuming other style guidelines - (especially spacing) are followed. - Only checks unindented functions, so class members are unchecked. - Trivial bodies are unchecked, so constructors with huge initializer lists - may be missed. - Blank/comment lines are not counted so as to avoid encouraging the removal - of vertical space and comments just to get through a lint check. - NOLINT *on the last line of a function* disables this check. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - function_state: Current function name and lines in body so far. - error: The function to call with any errors found. - """ - lines = clean_lines.lines - line = lines[linenum] - joined_line = '' - - starting_func = False - regexp = r'(\w(\w|::|\*|\&|\s)*)\(' # decls * & space::name( ... - match_result = Match(regexp, line) - if match_result: - # If the name is all caps and underscores, figure it's a macro and - # ignore it, unless it's TEST or TEST_F. - function_name = match_result.group(1).split()[-1] - if function_name == 'TEST' or function_name == 'TEST_F' or ( - not Match(r'[A-Z_]+$', function_name)): - starting_func = True - - if starting_func: - body_found = False - for start_linenum in xrange(linenum, clean_lines.NumLines()): - start_line = lines[start_linenum] - joined_line += ' ' + start_line.lstrip() - if Search(r'(;|})', - start_line): # Declarations and trivial functions - body_found = True - break # ... ignore - elif Search(r'{', start_line): - body_found = True - function = Search(r'((\w|:)*)\(', line).group(1) - if Match(r'TEST', function): # Handle TEST... macros - parameter_regexp = Search(r'(\(.*\))', joined_line) - if parameter_regexp: # Ignore bad syntax - function += parameter_regexp.group(1) - else: - function += '()' - function_state.Begin(function) - break - if not body_found: - # No body for the function (or evidence of a non-function) was found. - error(filename, linenum, 'readability/fn_size', 5, - 'Lint failed to find start of function body.') - elif Match(r'^\}\s*$', line): # function end - function_state.Check(error, filename, linenum) - function_state.End() - elif not Match(r'^\s*$', line): - function_state.Count() # Count non-blank/non-comment lines. - - -_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?') - - -def CheckComment(line, filename, linenum, next_line_start, error): - """Checks for common mistakes in comments. - - Args: - line: The line in question. - filename: The name of the current file. - linenum: The number of the line to check. - next_line_start: The first non-whitespace column of the next line. - error: The function to call with any errors found. - """ - commentpos = line.find('//') - if commentpos != -1: - # Check if the // may be in quotes. If so, ignore it - # Comparisons made explicit for clarity -- pylint: disable=g-explicit-bool-comparison - if (line.count('"', 0, commentpos) - line.count('\\"', 0, commentpos) - ) % 2 == 0: # not in quotes - # Allow one space for new scopes, two spaces otherwise: - if (not (Match(r'^.*{ *//', line) and next_line_start == commentpos) - and ((commentpos >= 1 and - line[commentpos - 1] not in string.whitespace) or - (commentpos >= 2 and - line[commentpos - 2] not in string.whitespace))): - error(filename, linenum, 'whitespace/comments', 2, - 'At least two spaces is best between code and comments') - - # Checks for common mistakes in TODO comments. - comment = line[commentpos:] - match = _RE_PATTERN_TODO.match(comment) - if match: - # One whitespace is correct; zero whitespace is handled elsewhere. - leading_whitespace = match.group(1) - if len(leading_whitespace) > 1: - error(filename, linenum, 'whitespace/todo', 2, - 'Too many spaces before TODO') - - username = match.group(2) - if not username: - error(filename, linenum, 'readability/todo', 2, - 'Missing username in TODO; it should look like ' - '"// TODO(my_username): Stuff."') - - middle_whitespace = match.group(3) - # Comparisons made explicit for correctness -- pylint: disable=g-explicit-bool-comparison - if middle_whitespace != ' ' and middle_whitespace != '': - error(filename, linenum, 'whitespace/todo', 2, - 'TODO(my_username) should be followed by a space') - - # If the comment contains an alphanumeric character, there - # should be a space somewhere between it and the // unless - # it's a /// or //! Doxygen comment. - if (Match(r'//[^ ]*\w', comment) and - not Match(r'(///|//\!)(\s+|$)', comment)): - error(filename, linenum, 'whitespace/comments', 4, - 'Should have a space between // and comment') - - -def CheckAccess(filename, clean_lines, linenum, nesting_state, error): - """Checks for improper use of DISALLOW* macros. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - nesting_state: A NestingState instance which maintains information about - the current stack of nested blocks being parsed. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] # get rid of comments and strings - - matched = Match((r'\s*(DISALLOW_COPY_AND_ASSIGN|' - r'DISALLOW_IMPLICIT_CONSTRUCTORS)'), line) - if not matched: - return - if nesting_state.stack and isinstance(nesting_state.stack[-1], _ClassInfo): - if nesting_state.stack[-1].access != 'private': - error(filename, linenum, 'readability/constructors', 3, - '%s must be in the private: section' % matched.group(1)) - - else: - # Found DISALLOW* macro outside a class declaration, or perhaps it - # was used inside a function when it should have been part of the - # class declaration. We could issue a warning here, but it - # probably resulted in a compiler error already. - pass - - -def CheckSpacing(filename, clean_lines, linenum, nesting_state, error): - """Checks for the correctness of various spacing issues in the code. - - Things we check for: spaces around operators, spaces after - if/for/while/switch, no spaces around parens in function calls, two - spaces between code and comment, don't start a block with a blank - line, don't end a function with a blank line, don't add a blank line - after public/protected/private, don't have too many blank lines in a row. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - nesting_state: A NestingState instance which maintains information about - the current stack of nested blocks being parsed. - error: The function to call with any errors found. - """ - - # Don't use "elided" lines here, otherwise we can't check commented lines. - # Don't want to use "raw" either, because we don't want to check inside C++11 - # raw strings, - raw = clean_lines.lines_without_raw_strings - line = raw[linenum] - - # Before nixing comments, check if the line is blank for no good - # reason. This includes the first line after a block is opened, and - # blank lines at the end of a function (ie, right before a line like '}' - # - # Skip all the blank line checks if we are immediately inside a - # namespace body. In other words, don't issue blank line warnings - # for this block: - # namespace { - # - # } - # - # A warning about missing end of namespace comments will be issued instead. - # - # Also skip blank line checks for 'extern "C"' blocks, which are formatted - # like namespaces. - if (IsBlankLine(line) and not nesting_state.InNamespaceBody() and - not nesting_state.InExternC()): - elided = clean_lines.elided - prev_line = elided[linenum - 1] - prevbrace = prev_line.rfind('{') - # TODO(unknown): Don't complain if line before blank line, and line after, - # both start with alnums and are indented the same amount. - # This ignores whitespace at the start of a namespace block - # because those are not usually indented. - if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1: - # OK, we have a blank line at the start of a code block. Before we - # complain, we check if it is an exception to the rule: The previous - # non-empty line has the parameters of a function header that are indented - # 4 spaces (because they did not fit in a 80 column line when placed on - # the same line as the function name). We also check for the case where - # the previous line is indented 6 spaces, which may happen when the - # initializers of a constructor do not fit into a 80 column line. - exception = False - if Match(r' {6}\w', prev_line): # Initializer list? - # We are looking for the opening column of initializer list, which - # should be indented 4 spaces to cause 6 space indentation afterwards. - search_position = linenum - 2 - while (search_position >= 0 and - Match(r' {6}\w', elided[search_position])): - search_position -= 1 - exception = (search_position >= 0 and - elided[search_position][:5] == ' :') - else: - # Search for the function arguments or an initializer list. We use a - # simple heuristic here: If the line is indented 4 spaces; and we have a - # closing paren, without the opening paren, followed by an opening brace - # or colon (for initializer lists) we assume that it is the last line of - # a function header. If we have a colon indented 4 spaces, it is an - # initializer list. - exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)', - prev_line) or Match(r' {4}:', prev_line)) - - if not exception: - error(filename, linenum, 'whitespace/blank_line', 2, - 'Redundant blank line at the start of a code block ' - 'should be deleted.') - # Ignore blank lines at the end of a block in a long if-else - # chain, like this: - # if (condition1) { - # // Something followed by a blank line - # - # } else if (condition2) { - # // Something else - # } - if linenum + 1 < clean_lines.NumLines(): - next_line = raw[linenum + 1] - if (next_line and Match(r'\s*}', next_line) and - next_line.find('} else ') == -1): - error(filename, linenum, 'whitespace/blank_line', 3, - 'Redundant blank line at the end of a code block ' - 'should be deleted.') - - matched = Match(r'\s*(public|protected|private):', prev_line) - if matched: - error(filename, linenum, 'whitespace/blank_line', 3, - 'Do not leave a blank line after "%s:"' % matched.group(1)) - - # Next, check comments - next_line_start = 0 - if linenum + 1 < clean_lines.NumLines(): - next_line = raw[linenum + 1] - next_line_start = len(next_line) - len(next_line.lstrip()) - CheckComment(line, filename, linenum, next_line_start, error) - - # get rid of comments and strings - line = clean_lines.elided[linenum] - - # You shouldn't have spaces before your brackets, except maybe after - # 'delete []' or 'return []() {};' - if Search(r'\w\s+\[', line) and not Search(r'(?:delete|return)\s+\[', line): - error(filename, linenum, 'whitespace/braces', 5, 'Extra space before [') - - # In range-based for, we wanted spaces before and after the colon, but - # not around "::" tokens that might appear. - if (Search(r'for *\(.*[^:]:[^: ]', line) or - Search(r'for *\(.*[^: ]:[^:]', line)): - error(filename, linenum, 'whitespace/forcolon', 2, - 'Missing space around colon in range-based for loop') - - -def CheckOperatorSpacing(filename, clean_lines, linenum, error): - """Checks for horizontal spacing around operators. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # Don't try to do spacing checks for operator methods. Do this by - # replacing the troublesome characters with something else, - # preserving column position for all other characters. - # - # The replacement is done repeatedly to avoid false positives from - # operators that call operators. - while True: - match = Match(r'^(.*\boperator\b)(\S+)(\s*\(.*)$', line) - if match: - line = match.group(1) + ('_' * len(match.group(2))) + match.group(3) - else: - break - - # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )". - # Otherwise not. Note we only check for non-spaces on *both* sides; - # sometimes people put non-spaces on one side when aligning ='s among - # many lines (not that this is behavior that I approve of...) - if ((Search(r'[\w.]=', line) or - Search(r'=[\w.]', line)) and not Search(r'\b(if|while|for) ', line) - # Operators taken from [lex.operators] in C++11 standard. - and - not Search(r'(>=|<=|==|!=|&=|\^=|\|=|\+=|\*=|\/=|\%=)', line) and - not Search(r'operator=', line)): - error(filename, linenum, 'whitespace/operators', 4, - 'Missing spaces around =') - - # It's ok not to have spaces around binary operators like + - * /, but if - # there's too little whitespace, we get concerned. It's hard to tell, - # though, so we punt on this one for now. TODO. - - # You should always have whitespace around binary operators. - # - # Check <= and >= first to avoid false positives with < and >, then - # check non-include lines for spacing around < and >. - # - # If the operator is followed by a comma, assume it's be used in a - # macro context and don't do any checks. This avoids false - # positives. - # - # Note that && is not included here. Those are checked separately - # in CheckRValueReference - match = Search(r'[^<>=!\s](==|!=|<=|>=|\|\|)[^<>=!\s,;\)]', line) - if match: - error(filename, linenum, 'whitespace/operators', 3, - 'Missing spaces around %s' % match.group(1)) - elif not Match(r'#.*include', line): - # Look for < that is not surrounded by spaces. This is only - # triggered if both sides are missing spaces, even though - # technically should should flag if at least one side is missing a - # space. This is done to avoid some false positives with shifts. - match = Match(r'^(.*[^\s<])<[^\s=<,]', line) - if match: - (_, _, end_pos) = CloseExpression(clean_lines, linenum, - len(match.group(1))) - if end_pos <= -1: - error(filename, linenum, 'whitespace/operators', 3, - 'Missing spaces around <') - - # Look for > that is not surrounded by spaces. Similar to the - # above, we only trigger if both sides are missing spaces to avoid - # false positives with shifts. - match = Match(r'^(.*[^-\s>])>[^\s=>,]', line) - if match: - (_, _, start_pos) = ReverseCloseExpression(clean_lines, linenum, - len(match.group(1))) - if start_pos <= -1: - error(filename, linenum, 'whitespace/operators', 3, - 'Missing spaces around >') - - # We allow no-spaces around << when used like this: 10<<20, but - # not otherwise (particularly, not when used as streams) - # - # We also allow operators following an opening parenthesis, since - # those tend to be macros that deal with operators. - match = Search(r'(operator|[^\s(<])(?:L|UL|ULL|l|ul|ull)?<<([^\s,=<])', - line) - if (match and - not (match.group(1).isdigit() and match.group(2).isdigit()) and - not (match.group(1) == 'operator' and match.group(2) == ';')): - error(filename, linenum, 'whitespace/operators', 3, - 'Missing spaces around <<') - - # We allow no-spaces around >> for almost anything. This is because - # C++11 allows ">>" to close nested templates, which accounts for - # most cases when ">>" is not followed by a space. - # - # We still warn on ">>" followed by alpha character, because that is - # likely due to ">>" being used for right shifts, e.g.: - # value >> alpha - # - # When ">>" is used to close templates, the alphanumeric letter that - # follows would be part of an identifier, and there should still be - # a space separating the template type and the identifier. - # type> alpha - match = Search(r'>>[a-zA-Z_]', line) - if match: - error(filename, linenum, 'whitespace/operators', 3, - 'Missing spaces around >>') - - # There shouldn't be space around unary operators - match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line) - if match: - error(filename, linenum, 'whitespace/operators', 4, - 'Extra space for operator %s' % match.group(1)) - - -def CheckParenthesisSpacing(filename, clean_lines, linenum, error): - """Checks for horizontal spacing around parentheses. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # No spaces after an if, while, switch, or for - match = Search(r' (if\(|for\(|while\(|switch\()', line) - if match: - error(filename, linenum, 'whitespace/parens', 5, - 'Missing space before ( in %s' % match.group(1)) - - # For if/for/while/switch, the left and right parens should be - # consistent about how many spaces are inside the parens, and - # there should either be zero or one spaces inside the parens. - # We don't want: "if ( foo)" or "if ( foo )". - # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed. - match = Search(r'\b(if|for|while|switch)\s*' - r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$', line) - if match: - if len(match.group(2)) != len(match.group(4)): - if not (match.group(3) == ';' and - len(match.group(2)) == 1 + len(match.group(4)) or - not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)): - error(filename, linenum, 'whitespace/parens', 5, - 'Mismatching spaces inside () in %s' % match.group(1)) - if len(match.group(2)) not in [0, 1]: - error(filename, linenum, 'whitespace/parens', 5, - 'Should have zero or one spaces inside ( and ) in %s' % - match.group(1)) - - -def CheckCommaSpacing(filename, clean_lines, linenum, error): - """Checks for horizontal spacing near commas and semicolons. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - raw = clean_lines.lines_without_raw_strings - line = clean_lines.elided[linenum] - - # You should always have a space after a comma (either as fn arg or operator) - # - # This does not apply when the non-space character following the - # comma is another comma, since the only time when that happens is - # for empty macro arguments. - # - # We run this check in two passes: first pass on elided lines to - # verify that lines contain missing whitespaces, second pass on raw - # lines to confirm that those missing whitespaces are not due to - # elided comments. - if (Search(r',[^,\s]', ReplaceAll(r'\boperator\s*,\s*\(', 'F(', line)) and - Search(r',[^,\s]', raw[linenum])): - error(filename, linenum, 'whitespace/comma', 3, 'Missing space after ,') - - # You should always have a space after a semicolon - # except for few corner cases - # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more - # space after ; - if Search(r';[^\s};\\)/]', line): - error(filename, linenum, 'whitespace/semicolon', 3, - 'Missing space after ;') - - -def CheckBracesSpacing(filename, clean_lines, linenum, error): - """Checks for horizontal spacing near commas. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # Except after an opening paren, or after another opening brace (in case of - # an initializer list, for instance), you should have spaces before your - # braces. And since you should never have braces at the beginning of a line, - # this is an easy test. - match = Match(r'^(.*[^ ({>]){', line) - if match: - # Try a bit harder to check for brace initialization. This - # happens in one of the following forms: - # Constructor() : initializer_list_{} { ... } - # Constructor{}.MemberFunction() - # Type variable{}; - # FunctionCall(type{}, ...); - # LastArgument(..., type{}); - # LOG(INFO) << type{} << " ..."; - # map_of_type[{...}] = ...; - # ternary = expr ? new type{} : nullptr; - # OuterTemplate{}> - # - # We check for the character following the closing brace, and - # silence the warning if it's one of those listed above, i.e. - # "{.;,)<>]:". - # - # To account for nested initializer list, we allow any number of - # closing braces up to "{;,)<". We can't simply silence the - # warning on first sight of closing brace, because that would - # cause false negatives for things that are not initializer lists. - # Silence this: But not this: - # Outer{ if (...) { - # Inner{...} if (...){ // Missing space before { - # }; } - # - # There is a false negative with this approach if people inserted - # spurious semicolons, e.g. "if (cond){};", but we will catch the - # spurious semicolon with a separate check. - (endline, endlinenum, endpos) = CloseExpression(clean_lines, linenum, - len(match.group(1))) - trailing_text = '' - if endpos > -1: - trailing_text = endline[endpos:] - for offset in xrange(endlinenum + 1, - min(endlinenum + 3, clean_lines.NumLines() - 1)): - trailing_text += clean_lines.elided[offset] - if not Match(r'^[\s}]*[{.;,)<>\]:]', trailing_text): - error(filename, linenum, 'whitespace/braces', 5, - 'Missing space before {') - - # Make sure '} else {' has spaces. - if Search(r'}else', line): - error(filename, linenum, 'whitespace/braces', 5, - 'Missing space before else') - - # You shouldn't have a space before a semicolon at the end of the line. - # There's a special case for "for" since the style guide allows space before - # the semicolon there. - if Search(r':\s*;\s*$', line): - error(filename, linenum, 'whitespace/semicolon', 5, - 'Semicolon defining empty statement. Use {} instead.') - elif Search(r'^\s*;\s*$', line): - error( - filename, linenum, 'whitespace/semicolon', 5, - 'Line contains only semicolon. If this should be an empty statement, ' - 'use {} instead.') - elif (Search(r'\s+;\s*$', line) and not Search(r'\bfor\b', line)): - error(filename, linenum, 'whitespace/semicolon', 5, - 'Extra space before last semicolon. If this should be an empty ' - 'statement, use {} instead.') - - -def IsDecltype(clean_lines, linenum, column): - """Check if the token ending on (linenum, column) is decltype(). - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: the number of the line to check. - column: end column of the token to check. - Returns: - True if this token is decltype() expression, False otherwise. - """ - (text, _, start_col) = ReverseCloseExpression(clean_lines, linenum, column) - if start_col < 0: - return False - if Search(r'\bdecltype\s*$', text[0:start_col]): - return True - return False - - -def IsTemplateParameterList(clean_lines, linenum, column): - """Check if the token ending on (linenum, column) is the end of template<>. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: the number of the line to check. - column: end column of the token to check. - Returns: - True if this token is end of a template parameter list, False otherwise. - """ - (_, startline, startpos) = ReverseCloseExpression(clean_lines, linenum, - column) - if (startpos > -1 and Search(r'\btemplate\s*$', - clean_lines.elided[startline][0:startpos])): - return True - return False - - -def IsRValueType(typenames, clean_lines, nesting_state, linenum, column): - """Check if the token ending on (linenum, column) is a type. - - Assumes that text to the right of the column is "&&" or a function - name. - - Args: - typenames: set of type names from template-argument-list. - clean_lines: A CleansedLines instance containing the file. - nesting_state: A NestingState instance which maintains information about - the current stack of nested blocks being parsed. - linenum: the number of the line to check. - column: end column of the token to check. - Returns: - True if this token is a type, False if we are not sure. - """ - prefix = clean_lines.elided[linenum][0:column] - - # Get one word to the left. If we failed to do so, this is most - # likely not a type, since it's unlikely that the type name and "&&" - # would be split across multiple lines. - match = Match(r'^(.*)(\b\w+|[>*)&])\s*$', prefix) - if not match: - return False - - # Check text following the token. If it's "&&>" or "&&," or "&&...", it's - # most likely a rvalue reference used inside a template. - suffix = clean_lines.elided[linenum][column:] - if Match(r'&&\s*(?:[>,]|\.\.\.)', suffix): - return True - - # Check for known types and end of templates: - # int&& variable - # vector&& variable - # - # Because this function is called recursively, we also need to - # recognize pointer and reference types: - # int* Function() - # int& Function() - if (match.group(2) in typenames or match.group(2) in [ - 'char', 'char16_t', 'char32_t', 'wchar_t', 'bool', 'short', 'int', - 'long', 'signed', 'unsigned', 'float', 'double', 'void', 'auto', - '>', '*', '&' - ]): - return True - - # If we see a close parenthesis, look for decltype on the other side. - # decltype would unambiguously identify a type, anything else is - # probably a parenthesized expression and not a type. - if match.group(2) == ')': - return IsDecltype(clean_lines, linenum, - len(match.group(1)) + len(match.group(2)) - 1) - - # Check for casts and cv-qualifiers. - # match.group(1) remainder - # -------------- --------- - # const_cast< type&& - # const type&& - # type const&& - if Search(r'\b(?:const_cast\s*<|static_cast\s*<|dynamic_cast\s*<|' - r'reinterpret_cast\s*<|\w+\s)\s*$', match.group(1)): - return True - - # Look for a preceding symbol that might help differentiate the context. - # These are the cases that would be ambiguous: - # match.group(1) remainder - # -------------- --------- - # Call ( expression && - # Declaration ( type&& - # sizeof ( type&& - # if ( expression && - # while ( expression && - # for ( type&& - # for( ; expression && - # statement ; type&& - # block { type&& - # constructor { expression && - start = linenum - line = match.group(1) - match_symbol = None - while start >= 0: - # We want to skip over identifiers and commas to get to a symbol. - # Commas are skipped so that we can find the opening parenthesis - # for function parameter lists. - match_symbol = Match(r'^(.*)([^\w\s,])[\w\s,]*$', line) - if match_symbol: - break - start -= 1 - line = clean_lines.elided[start] - - if not match_symbol: - # Probably the first statement in the file is an rvalue reference - return True - - if match_symbol.group(2) == '}': - # Found closing brace, probably an indicate of this: - # block{} type&& - return True - - if match_symbol.group(2) == ';': - # Found semicolon, probably one of these: - # for(; expression && - # statement; type&& - - # Look for the previous 'for(' in the previous lines. - before_text = match_symbol.group(1) - for i in xrange(start - 1, max(start - 6, 0), -1): - before_text = clean_lines.elided[i] + before_text - if Search(r'for\s*\([^{};]*$', before_text): - # This is the condition inside a for-loop - return False - - # Did not find a for-init-statement before this semicolon, so this - # is probably a new statement and not a condition. - return True - - if match_symbol.group(2) == '{': - # Found opening brace, probably one of these: - # block{ type&& = ... ; } - # constructor{ expression && expression } - - # Look for a closing brace or a semicolon. If we see a semicolon - # first, this is probably a rvalue reference. - line = clean_lines.elided[start][0:len(match_symbol.group(1)) + 1] - end = start - depth = 1 - while True: - for ch in line: - if ch == ';': - return True - elif ch == '{': - depth += 1 - elif ch == '}': - depth -= 1 - if depth == 0: - return False - end += 1 - if end >= clean_lines.NumLines(): - break - line = clean_lines.elided[end] - # Incomplete program? - return False - - if match_symbol.group(2) == '(': - # Opening parenthesis. Need to check what's to the left of the - # parenthesis. Look back one extra line for additional context. - before_text = match_symbol.group(1) - if linenum > 1: - before_text = clean_lines.elided[linenum - 1] + before_text - before_text = match_symbol.group(1) - - # Patterns that are likely to be types: - # [](type&& - # for (type&& - # sizeof(type&& - # operator=(type&& - # - if Search(r'(?:\]|\bfor|\bsizeof|\boperator\s*\S+\s*)\s*$', - before_text): - return True - - # Patterns that are likely to be expressions: - # if (expression && - # while (expression && - # : initializer(expression && - # , initializer(expression && - # ( FunctionCall(expression && - # + FunctionCall(expression && - # + (expression && - # - # The last '+' represents operators such as '+' and '-'. - if Search(r'(?:\bif|\bwhile|[-+=%^(]*>)?\s*$', - match_symbol.group(1)) - if match_func: - # Check for constructors, which don't have return types. - if Search(r'\b(?:explicit|inline)$', match_func.group(1)): - return True - implicit_constructor = Match(r'\s*(\w+)\((?:const\s+)?(\w+)', - prefix) - if (implicit_constructor and implicit_constructor.group(1) == - implicit_constructor.group(2)): - return True - return IsRValueType(typenames, clean_lines, nesting_state, linenum, - len(match_func.group(1))) - - # Nothing before the function name. If this is inside a block scope, - # this is probably a function call. - return not (nesting_state.previous_stack_top and - nesting_state.previous_stack_top.IsBlockInfo()) - - if match_symbol.group(2) == '>': - # Possibly a closing bracket, check that what's on the other side - # looks like the start of a template. - return IsTemplateParameterList(clean_lines, start, - len(match_symbol.group(1))) - - # Some other symbol, usually something like "a=b&&c". This is most - # likely not a type. - return False - - -def IsDeletedOrDefault(clean_lines, linenum): - """Check if current constructor or operator is deleted or default. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - Returns: - True if this is a deleted or default constructor. - """ - open_paren = clean_lines.elided[linenum].find('(') - if open_paren < 0: - return False - (close_line, _, close_paren) = CloseExpression(clean_lines, linenum, - open_paren) - if close_paren < 0: - return False - return Match(r'\s*=\s*(?:delete|default)\b', close_line[close_paren:]) - - -def IsRValueAllowed(clean_lines, linenum, typenames): - """Check if RValue reference is allowed on a particular line. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - typenames: set of type names from template-argument-list. - Returns: - True if line is within the region where RValue references are allowed. - """ - # Allow region marked by PUSH/POP macros - for i in xrange(linenum, 0, -1): - line = clean_lines.elided[i] - if Match(r'GOOGLE_ALLOW_RVALUE_REFERENCES_(?:PUSH|POP)', line): - if not line.endswith('PUSH'): - return False - for j in xrange(linenum, clean_lines.NumLines(), 1): - line = clean_lines.elided[j] - if Match(r'GOOGLE_ALLOW_RVALUE_REFERENCES_(?:PUSH|POP)', line): - return line.endswith('POP') - - # Allow operator= - line = clean_lines.elided[linenum] - if Search(r'\boperator\s*=\s*\(', line): - return IsDeletedOrDefault(clean_lines, linenum) - - # Allow constructors - match = Match(r'\s*(?:[\w<>]+::)*([\w<>]+)\s*::\s*([\w<>]+)\s*\(', line) - if match and match.group(1) == match.group(2): - return IsDeletedOrDefault(clean_lines, linenum) - if Search(r'\b(?:explicit|inline)\s+[\w<>]+\s*\(', line): - return IsDeletedOrDefault(clean_lines, linenum) - - if Match(r'\s*[\w<>]+\s*\(', line): - previous_line = 'ReturnType' - if linenum > 0: - previous_line = clean_lines.elided[linenum - 1] - if Match(r'^\s*$', previous_line) or Search(r'[{}:;]\s*$', - previous_line): - return IsDeletedOrDefault(clean_lines, linenum) - - # Reject types not mentioned in template-argument-list - while line: - match = Match(r'^.*?(\w+)\s*&&(.*)$', line) - if not match: - break - if match.group(1) not in typenames: - return False - line = match.group(2) - - # All RValue types that were in template-argument-list should have - # been removed by now. Those were allowed, assuming that they will - # be forwarded. - # - # If there are no remaining RValue types left (i.e. types that were - # not found in template-argument-list), flag those as not allowed. - return line.find('&&') < 0 - - -def GetTemplateArgs(clean_lines, linenum): - """Find list of template arguments associated with this function declaration. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: Line number containing the start of the function declaration, - usually one line after the end of the template-argument-list. - Returns: - Set of type names, or empty set if this does not appear to have - any template parameters. - """ - # Find start of function - func_line = linenum - while func_line > 0: - line = clean_lines.elided[func_line] - if Match(r'^\s*$', line): - return set() - if line.find('(') >= 0: - break - func_line -= 1 - if func_line == 0: - return set() - - # Collapse template-argument-list into a single string - argument_list = '' - match = Match(r'^(\s*template\s*)<', clean_lines.elided[func_line]) - if match: - # template-argument-list on the same line as function name - start_col = len(match.group(1)) - _, end_line, end_col = CloseExpression(clean_lines, func_line, - start_col) - if end_col > -1 and end_line == func_line: - start_col += 1 # Skip the opening bracket - argument_list = clean_lines.elided[func_line][start_col:end_col] - - elif func_line > 1: - # template-argument-list one line before function name - match = Match(r'^(.*)>\s*$', clean_lines.elided[func_line - 1]) - if match: - end_col = len(match.group(1)) - _, start_line, start_col = ReverseCloseExpression( - clean_lines, func_line - 1, end_col) - if start_col > -1: - start_col += 1 # Skip the opening bracket - while start_line < func_line - 1: - argument_list += clean_lines.elided[start_line][start_col:] - start_col = 0 - start_line += 1 - argument_list += clean_lines.elided[func_line - 1][start_col: - end_col] - - if not argument_list: - return set() - - # Extract type names - typenames = set() - while True: - match = Match(r'^[,\s]*(?:typename|class)(?:\.\.\.)?\s+(\w+)(.*)$', - argument_list) - if not match: - break - typenames.add(match.group(1)) - argument_list = match.group(2) - return typenames - - -def CheckRValueReference(filename, clean_lines, linenum, nesting_state, error): - """Check for rvalue references. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - nesting_state: A NestingState instance which maintains information about - the current stack of nested blocks being parsed. - error: The function to call with any errors found. - """ - # Find lines missing spaces around &&. - # TODO(unknown): currently we don't check for rvalue references - # with spaces surrounding the && to avoid false positives with - # boolean expressions. - line = clean_lines.elided[linenum] - match = Match(r'^(.*\S)&&', line) - if not match: - match = Match(r'(.*)&&\S', line) - if (not match) or '(&&)' in line or Search(r'\boperator\s*$', - match.group(1)): - return - - # Either poorly formed && or an rvalue reference, check the context - # to get a more accurate error message. Mostly we want to determine - # if what's to the left of "&&" is a type or not. - typenames = GetTemplateArgs(clean_lines, linenum) - and_pos = len(match.group(1)) - if IsRValueType(typenames, clean_lines, nesting_state, linenum, and_pos): - if not IsRValueAllowed(clean_lines, linenum, typenames): - error(filename, linenum, 'build/c++11', 3, - 'RValue references are an unapproved C++ feature.') - else: - error(filename, linenum, 'whitespace/operators', 3, - 'Missing spaces around &&') - - -def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error): - """Checks for additional blank line issues related to sections. - - Currently the only thing checked here is blank line before protected/private. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - class_info: A _ClassInfo objects. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - # Skip checks if the class is small, where small means 25 lines or less. - # 25 lines seems like a good cutoff since that's the usual height of - # terminals, and any class that can't fit in one screen can't really - # be considered "small". - # - # Also skip checks if we are on the first line. This accounts for - # classes that look like - # class Foo { public: ... }; - # - # If we didn't find the end of the class, last_line would be zero, - # and the check will be skipped by the first condition. - if (class_info.last_line - class_info.starting_linenum <= 24 or - linenum <= class_info.starting_linenum): - return - - matched = Match(r'\s*(public|protected|private):', - clean_lines.lines[linenum]) - if matched: - # Issue warning if the line before public/protected/private was - # not a blank line, but don't do this if the previous line contains - # "class" or "struct". This can happen two ways: - # - We are at the beginning of the class. - # - We are forward-declaring an inner class that is semantically - # private, but needed to be public for implementation reasons. - # Also ignores cases where the previous line ends with a backslash as can be - # common when defining classes in C macros. - prev_line = clean_lines.lines[linenum - 1] - if (not IsBlankLine(prev_line) and - not Search(r'\b(class|struct)\b', prev_line) and - not Search(r'\\$', prev_line)): - # Try a bit harder to find the beginning of the class. This is to - # account for multi-line base-specifier lists, e.g.: - # class Derived - # : public Base { - end_class_head = class_info.starting_linenum - for i in range(class_info.starting_linenum, linenum): - if Search(r'\{\s*$', clean_lines.lines[i]): - end_class_head = i - break - if end_class_head < linenum - 1: - error(filename, linenum, 'whitespace/blank_line', 3, - '"%s:" should be preceded by a blank line' % - matched.group(1)) - - -def GetPreviousNonBlankLine(clean_lines, linenum): - """Return the most recent non-blank line and its line number. - - Args: - clean_lines: A CleansedLines instance containing the file contents. - linenum: The number of the line to check. - - Returns: - A tuple with two elements. The first element is the contents of the last - non-blank line before the current line, or the empty string if this is the - first non-blank line. The second is the line number of that line, or -1 - if this is the first non-blank line. - """ - - prevlinenum = linenum - 1 - while prevlinenum >= 0: - prevline = clean_lines.elided[prevlinenum] - if not IsBlankLine(prevline): # if not a blank line... - return (prevline, prevlinenum) - prevlinenum -= 1 - return ('', -1) - - -def CheckBraces(filename, clean_lines, linenum, error): - """Looks for misplaced braces (e.g. at the end of line). - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - - line = clean_lines.elided[linenum] # get rid of comments and strings - - if Match(r'\s*{\s*$', line): - # We allow an open brace to start a line in the case where someone is using - # braces in a block to explicitly create a new scope, which is commonly used - # to control the lifetime of stack-allocated variables. Braces are also - # used for brace initializers inside function calls. We don't detect this - # perfectly: we just don't complain if the last non-whitespace character on - # the previous non-blank line is ',', ';', ':', '(', '{', or '}', or if the - # previous line starts a preprocessor block. - prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] - if (not Search(r'[,;:}{(]\s*$', prevline) and - not Match(r'\s*#', prevline)): - error(filename, linenum, 'whitespace/braces', 4, - '{ should almost always be at the end of the previous line') - - # An else clause should be on the same line as the preceding closing brace. - if Match(r'\s*else\b\s*(?:if\b|\{|$)', line): - prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] - if Match(r'\s*}\s*$', prevline): - error(filename, linenum, 'whitespace/newline', 4, - 'An else should appear on the same line as the preceding }') - - # If braces come on one side of an else, they should be on both. - # However, we have to worry about "else if" that spans multiple lines! - if Search(r'else if\s*\(', line): # could be multi-line if - brace_on_left = bool(Search(r'}\s*else if\s*\(', line)) - # find the ( after the if - pos = line.find('else if') - pos = line.find('(', pos) - if pos > 0: - (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos) - brace_on_right = endline[endpos:].find('{') != -1 - if brace_on_left != brace_on_right: # must be brace after if - error( - filename, linenum, 'readability/braces', 5, - 'If an else has a brace on one side, it should have it on both' - ) - elif Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line): - error(filename, linenum, 'readability/braces', 5, - 'If an else has a brace on one side, it should have it on both') - - # Likewise, an else should never have the else clause on the same line - if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line): - error(filename, linenum, 'whitespace/newline', 4, - 'Else clause should never be on same line as else (use 2 lines)') - - # In the same way, a do/while should never be on one line - if Match(r'\s*do [^\s{]', line): - error(filename, linenum, 'whitespace/newline', 4, - 'do/while clauses should not be on a single line') - - # Check single-line if/else bodies. The style guide says 'curly braces are not - # required for single-line statements'. We additionally allow multi-line, - # single statements, but we reject anything with more than one semicolon in - # it. This means that the first semicolon after the if should be at the end of - # its line, and the line after that should have an indent level equal to or - # lower than the if. We also check for ambiguous if/else nesting without - # braces. - if_else_match = Search(r'\b(if\s*\(|else\b)', line) - if if_else_match and not Match(r'\s*#', line): - if_indent = GetIndentLevel(line) - endline, endlinenum, endpos = line, linenum, if_else_match.end() - if_match = Search(r'\bif\s*\(', line) - if if_match: - # This could be a multiline if condition, so find the end first. - pos = if_match.end() - 1 - (endline, endlinenum, endpos) = CloseExpression(clean_lines, - linenum, pos) - # Check for an opening brace, either directly after the if or on the next - # line. If found, this isn't a single-statement conditional. - if (not Match(r'\s*{', endline[endpos:]) and - not (Match(r'\s*$', endline[endpos:]) and endlinenum < - (len(clean_lines.elided) - 1) and - Match(r'\s*{', clean_lines.elided[endlinenum + 1]))): - while (endlinenum < len(clean_lines.elided) and - ';' not in clean_lines.elided[endlinenum][endpos:]): - endlinenum += 1 - endpos = 0 - if endlinenum < len(clean_lines.elided): - endline = clean_lines.elided[endlinenum] - # We allow a mix of whitespace and closing braces (e.g. for one-liner - # methods) and a single \ after the semicolon (for macros) - endpos = endline.find(';') - if not Match(r';[\s}]*(\\?)$', endline[endpos:]): - # Semicolon isn't the last character, there's something trailing. - # Output a warning if the semicolon is not contained inside - # a lambda expression. - if not Match( - r'^[^{};]*\[[^\[\]]*\][^{}]*\{[^{}]*\}\s*\)*[;,]\s*$', - endline): - error( - filename, linenum, 'readability/braces', 4, - 'If/else bodies with multiple statements require braces' - ) - elif endlinenum < len(clean_lines.elided) - 1: - # Make sure the next line is dedented - next_line = clean_lines.elided[endlinenum + 1] - next_indent = GetIndentLevel(next_line) - # With ambiguous nested if statements, this will error out on the - # if that *doesn't* match the else, regardless of whether it's the - # inner one or outer one. - if (if_match and Match(r'\s*else\b', next_line) and - next_indent != if_indent): - error( - filename, linenum, 'readability/braces', 4, - 'Else clause should be indented at the same level as if. ' - 'Ambiguous nested if/else chains require braces.') - elif next_indent > if_indent: - error( - filename, linenum, 'readability/braces', 4, - 'If/else bodies with multiple statements require braces' - ) - - -def CheckTrailingSemicolon(filename, clean_lines, linenum, error): - """Looks for redundant trailing semicolon. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - - line = clean_lines.elided[linenum] - - # Block bodies should not be followed by a semicolon. Due to C++11 - # brace initialization, there are more places where semicolons are - # required than not, so we use a whitelist approach to check these - # rather than a blacklist. These are the places where "};" should - # be replaced by just "}": - # 1. Some flavor of block following closing parenthesis: - # for (;;) {}; - # while (...) {}; - # switch (...) {}; - # Function(...) {}; - # if (...) {}; - # if (...) else if (...) {}; - # - # 2. else block: - # if (...) else {}; - # - # 3. const member function: - # Function(...) const {}; - # - # 4. Block following some statement: - # x = 42; - # {}; - # - # 5. Block at the beginning of a function: - # Function(...) { - # {}; - # } - # - # Note that naively checking for the preceding "{" will also match - # braces inside multi-dimensional arrays, but this is fine since - # that expression will not contain semicolons. - # - # 6. Block following another block: - # while (true) {} - # {}; - # - # 7. End of namespaces: - # namespace {}; - # - # These semicolons seems far more common than other kinds of - # redundant semicolons, possibly due to people converting classes - # to namespaces. For now we do not warn for this case. - # - # Try matching case 1 first. - match = Match(r'^(.*\)\s*)\{', line) - if match: - # Matched closing parenthesis (case 1). Check the token before the - # matching opening parenthesis, and don't warn if it looks like a - # macro. This avoids these false positives: - # - macro that defines a base class - # - multi-line macro that defines a base class - # - macro that defines the whole class-head - # - # But we still issue warnings for macros that we know are safe to - # warn, specifically: - # - TEST, TEST_F, TEST_P, MATCHER, MATCHER_P - # - TYPED_TEST - # - INTERFACE_DEF - # - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED: - # - # We implement a whitelist of safe macros instead of a blacklist of - # unsafe macros, even though the latter appears less frequently in - # google code and would have been easier to implement. This is because - # the downside for getting the whitelist wrong means some extra - # semicolons, while the downside for getting the blacklist wrong - # would result in compile errors. - # - # In addition to macros, we also don't want to warn on - # - Compound literals - # - Lambdas - # - alignas specifier with anonymous structs: - closing_brace_pos = match.group(1).rfind(')') - opening_parenthesis = ReverseCloseExpression(clean_lines, linenum, - closing_brace_pos) - if opening_parenthesis[2] > -1: - line_prefix = opening_parenthesis[0][0:opening_parenthesis[2]] - macro = Search(r'\b([A-Z_]+)\s*$', line_prefix) - func = Match(r'^(.*\])\s*$', line_prefix) - if ((macro and macro.group(1) not in - ('TEST', 'TEST_F', 'MATCHER', 'MATCHER_P', 'TYPED_TEST', - 'EXCLUSIVE_LOCKS_REQUIRED', 'SHARED_LOCKS_REQUIRED', - 'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or - (func and not Search(r'\boperator\s*\[\s*\]', func.group(1))) or - Search(r'\b(?:struct|union)\s+alignas\s*$', line_prefix) or - Search(r'\s+=\s*$', line_prefix)): - match = None - if (match and opening_parenthesis[1] > 1 and Search( - r'\]\s*$', clean_lines.elided[opening_parenthesis[1] - 1])): - # Multi-line lambda-expression - match = None - - else: - # Try matching cases 2-3. - match = Match(r'^(.*(?:else|\)\s*const)\s*)\{', line) - if not match: - # Try matching cases 4-6. These are always matched on separate lines. - # - # Note that we can't simply concatenate the previous line to the - # current line and do a single match, otherwise we may output - # duplicate warnings for the blank line case: - # if (cond) { - # // blank line - # } - prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] - if prevline and Search(r'[;{}]\s*$', prevline): - match = Match(r'^(\s*)\{', line) - - # Check matching closing brace - if match: - (endline, endlinenum, endpos) = CloseExpression(clean_lines, linenum, - len(match.group(1))) - if endpos > -1 and Match(r'^\s*;', endline[endpos:]): - # Current {} pair is eligible for semicolon check, and we have found - # the redundant semicolon, output warning here. - # - # Note: because we are scanning forward for opening braces, and - # outputting warnings for the matching closing brace, if there are - # nested blocks with trailing semicolons, we will get the error - # messages in reversed order. - error(filename, endlinenum, 'readability/braces', 4, - "You don't need a ; after a }") - - -def CheckEmptyBlockBody(filename, clean_lines, linenum, error): - """Look for empty loop/conditional body with only a single semicolon. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - - # Search for loop keywords at the beginning of the line. Because only - # whitespaces are allowed before the keywords, this will also ignore most - # do-while-loops, since those lines should start with closing brace. - # - # We also check "if" blocks here, since an empty conditional block - # is likely an error. - line = clean_lines.elided[linenum] - matched = Match(r'\s*(for|while|if)\s*\(', line) - if matched: - # Find the end of the conditional expression - (end_line, end_linenum, end_pos) = CloseExpression(clean_lines, linenum, - line.find('(')) - - # Output warning if what follows the condition expression is a semicolon. - # No warning for all other cases, including whitespace or newline, since we - # have a separate check for semicolons preceded by whitespace. - if end_pos >= 0 and Match(r';', end_line[end_pos:]): - if matched.group(1) == 'if': - error(filename, end_linenum, - 'whitespace/empty_conditional_body', 5, - 'Empty conditional bodies should use {}') - else: - error(filename, end_linenum, 'whitespace/empty_loop_body', 5, - 'Empty loop bodies should use {} or continue') - - -def FindCheckMacro(line): - """Find a replaceable CHECK-like macro. - - Args: - line: line to search on. - Returns: - (macro name, start position), or (None, -1) if no replaceable - macro is found. - """ - for macro in _CHECK_MACROS: - i = line.find(macro) - if i >= 0: - # Find opening parenthesis. Do a regular expression match here - # to make sure that we are matching the expected CHECK macro, as - # opposed to some other macro that happens to contain the CHECK - # substring. - matched = Match(r'^(.*\b' + macro + r'\s*)\(', line) - if not matched: - continue - return (macro, len(matched.group(1))) - return (None, -1) - - -def CheckCheck(filename, clean_lines, linenum, error): - """Checks the use of CHECK and EXPECT macros. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - - # Decide the set of replacement macros that should be suggested - lines = clean_lines.elided - (check_macro, start_pos) = FindCheckMacro(lines[linenum]) - if not check_macro: - return - - # Find end of the boolean expression by matching parentheses - (last_line, end_line, end_pos) = CloseExpression(clean_lines, linenum, - start_pos) - if end_pos < 0: - return - - # If the check macro is followed by something other than a - # semicolon, assume users will log their own custom error messages - # and don't suggest any replacements. - if not Match(r'\s*;', last_line[end_pos:]): - return - - if linenum == end_line: - expression = lines[linenum][start_pos + 1:end_pos - 1] - else: - expression = lines[linenum][start_pos + 1:] - for i in xrange(linenum + 1, end_line): - expression += lines[i] - expression += last_line[0:end_pos - 1] - - # Parse expression so that we can take parentheses into account. - # This avoids false positives for inputs like "CHECK((a < 4) == b)", - # which is not replaceable by CHECK_LE. - lhs = '' - rhs = '' - operator = None - while expression: - matched = Match(r'^\s*(<<|<<=|>>|>>=|->\*|->|&&|\|\||' - r'==|!=|>=|>|<=|<|\()(.*)$', expression) - if matched: - token = matched.group(1) - if token == '(': - # Parenthesized operand - expression = matched.group(2) - (end, _) = FindEndOfExpressionInLine(expression, 0, ['(']) - if end < 0: - return # Unmatched parenthesis - lhs += '(' + expression[0:end] - expression = expression[end:] - elif token in ('&&', '||'): - # Logical and/or operators. This means the expression - # contains more than one term, for example: - # CHECK(42 < a && a < b); - # - # These are not replaceable with CHECK_LE, so bail out early. - return - elif token in ('<<', '<<=', '>>', '>>=', '->*', '->'): - # Non-relational operator - lhs += token - expression = matched.group(2) - else: - # Relational operator - operator = token - rhs = matched.group(2) - break - else: - # Unparenthesized operand. Instead of appending to lhs one character - # at a time, we do another regular expression match to consume several - # characters at once if possible. Trivial benchmark shows that this - # is more efficient when the operands are longer than a single - # character, which is generally the case. - matched = Match(r'^([^-=!<>()&|]+)(.*)$', expression) - if not matched: - matched = Match(r'^(\s*\S)(.*)$', expression) - if not matched: - break - lhs += matched.group(1) - expression = matched.group(2) - - # Only apply checks if we got all parts of the boolean expression - if not (lhs and operator and rhs): - return - - # Check that rhs do not contain logical operators. We already know - # that lhs is fine since the loop above parses out && and ||. - if rhs.find('&&') > -1 or rhs.find('||') > -1: - return - - # At least one of the operands must be a constant literal. This is - # to avoid suggesting replacements for unprintable things like - # CHECK(variable != iterator) - # - # The following pattern matches decimal, hex integers, strings, and - # characters (in that order). - lhs = lhs.strip() - rhs = rhs.strip() - match_constant = r'^([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')$' - if Match(match_constant, lhs) or Match(match_constant, rhs): - # Note: since we know both lhs and rhs, we can provide a more - # descriptive error message like: - # Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42) - # Instead of: - # Consider using CHECK_EQ instead of CHECK(a == b) - # - # We are still keeping the less descriptive message because if lhs - # or rhs gets long, the error message might become unreadable. - error(filename, linenum, 'readability/check', 2, - 'Consider using %s instead of %s(a %s b)' % - (_CHECK_REPLACEMENT[check_macro][operator], check_macro, - operator)) - - -def CheckAltTokens(filename, clean_lines, linenum, error): - """Check alternative keywords being used in boolean expressions. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # Avoid preprocessor lines - if Match(r'^\s*#', line): - return - - # Last ditch effort to avoid multi-line comments. This will not help - # if the comment started before the current line or ended after the - # current line, but it catches most of the false positives. At least, - # it provides a way to workaround this warning for people who use - # multi-line comments in preprocessor macros. - # - # TODO(unknown): remove this once cpplint has better support for - # multi-line comments. - if line.find('/*') >= 0 or line.find('*/') >= 0: - return - - for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line): - error(filename, linenum, 'readability/alt_tokens', 2, - 'Use operator %s instead of %s' % ( - _ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1))) - - -def GetLineWidth(line): - """Determines the width of the line in column positions. - - Args: - line: A string, which may be a Unicode string. - - Returns: - The width of the line in column positions, accounting for Unicode - combining characters and wide characters. - """ - if isinstance(line, unicode): - width = 0 - for uc in unicodedata.normalize('NFC', line): - if unicodedata.east_asian_width(uc) in ('W', 'F'): - width += 2 - elif not unicodedata.combining(uc): - width += 1 - return width - else: - return len(line) - - -def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state, - error): - """Checks rules from the 'C++ style rules' section of cppguide.html. - - Most of these rules are hard to test (naming, comment style), but we - do what we can. In particular we check for 2-space indents, line lengths, - tab usage, spaces inside code, etc. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - file_extension: The extension (without the dot) of the filename. - nesting_state: A NestingState instance which maintains information about - the current stack of nested blocks being parsed. - error: The function to call with any errors found. - """ - - # Don't use "elided" lines here, otherwise we can't check commented lines. - # Don't want to use "raw" either, because we don't want to check inside C++11 - # raw strings, - raw_lines = clean_lines.lines_without_raw_strings - line = raw_lines[linenum] - - if line.find('\t') != -1: - error(filename, linenum, 'whitespace/tab', 1, - 'Tab found; better to use spaces') - - # One or three blank spaces at the beginning of the line is weird; it's - # hard to reconcile that with 2-space indents. - # NOTE: here are the conditions rob pike used for his tests. Mine aren't - # as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces - # if(RLENGTH > 20) complain = 0; - # if(match($0, " +(error|private|public|protected):")) complain = 0; - # if(match(prev, "&& *$")) complain = 0; - # if(match(prev, "\\|\\| *$")) complain = 0; - # if(match(prev, "[\",=><] *$")) complain = 0; - # if(match($0, " <<")) complain = 0; - # if(match(prev, " +for \\(")) complain = 0; - # if(prevodd && match(prevprev, " +for \\(")) complain = 0; - scope_or_label_pattern = r'\s*\w+\s*:\s*\\?$' - classinfo = nesting_state.InnermostClass() - initial_spaces = 0 - cleansed_line = clean_lines.elided[linenum] - while initial_spaces < len(line) and line[initial_spaces] == ' ': - initial_spaces += 1 - if line and line[-1].isspace(): - error(filename, linenum, 'whitespace/end_of_line', 4, - 'Line ends in whitespace. Consider deleting these extra spaces.') - # There are certain situations we allow one space, notably for - # section labels, and also lines containing multi-line raw strings. - elif ((initial_spaces == 1 or initial_spaces == 3) and - not Match(scope_or_label_pattern, cleansed_line) and - not (clean_lines.raw_lines[linenum] != line and - Match(r'^\s*""', line))): - error(filename, linenum, 'whitespace/indent', 3, - 'Weird number of spaces at line-start. ' - 'Are you using a 2-space indent?') - - # Check if the line is a header guard. - is_header_guard = False - if file_extension == 'h': - cppvar = GetHeaderGuardCPPVariable(filename) - if (line.startswith('#ifndef %s' % cppvar) or - line.startswith('#define %s' % cppvar) or - line.startswith('#endif // %s' % cppvar)): - is_header_guard = True - # #include lines and header guards can be long, since there's no clean way to - # split them. - # - # URLs can be long too. It's possible to split these, but it makes them - # harder to cut&paste. - # - # The "$Id:...$" comment may also get very long without it being the - # developers fault. - if (not line.startswith('#include') and not is_header_guard and - not Match(r'^\s*//.*http(s?)://\S*$', line) and - not Match(r'^// \$Id:.*#[0-9]+ \$$', line)): - line_width = GetLineWidth(line) - extended_length = int((_line_length * 1.25)) - if line_width > extended_length: - error(filename, linenum, 'whitespace/line_length', 4, - 'Lines should very rarely be longer than %i characters' % - extended_length) - elif line_width > _line_length: - error(filename, linenum, 'whitespace/line_length', 2, - 'Lines should be <= %i characters long' % _line_length) - - if (cleansed_line.count(';') > 1 and - # for loops are allowed two ;'s (and may run over two lines). - cleansed_line.find('for') == -1 and - (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or - GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and - # It's ok to have many commands in a switch case that fits in 1 line - not ((cleansed_line.find('case ') != -1 or - cleansed_line.find('default:') != -1) and - cleansed_line.find('break;') != -1)): - error(filename, linenum, 'whitespace/newline', 0, - 'More than one command on the same line') - - # Some more style checks - CheckBraces(filename, clean_lines, linenum, error) - CheckTrailingSemicolon(filename, clean_lines, linenum, error) - CheckEmptyBlockBody(filename, clean_lines, linenum, error) - CheckAccess(filename, clean_lines, linenum, nesting_state, error) - CheckSpacing(filename, clean_lines, linenum, nesting_state, error) - CheckOperatorSpacing(filename, clean_lines, linenum, error) - CheckParenthesisSpacing(filename, clean_lines, linenum, error) - CheckCommaSpacing(filename, clean_lines, linenum, error) - CheckBracesSpacing(filename, clean_lines, linenum, error) - CheckSpacingForFunctionCall(filename, clean_lines, linenum, error) - CheckRValueReference(filename, clean_lines, linenum, nesting_state, error) - CheckCheck(filename, clean_lines, linenum, error) - CheckAltTokens(filename, clean_lines, linenum, error) - classinfo = nesting_state.InnermostClass() - if classinfo: - CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error) - - -_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$') -# Matches the first component of a filename delimited by -s and _s. That is: -# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo' -# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo' -# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo' -# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo' -_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+') - - -def _DropCommonSuffixes(filename): - """Drops common suffixes like _test.cc or -inl.h from filename. - - For example: - >>> _DropCommonSuffixes('foo/foo-inl.h') - 'foo/foo' - >>> _DropCommonSuffixes('foo/bar/foo.cc') - 'foo/bar/foo' - >>> _DropCommonSuffixes('foo/foo_internal.h') - 'foo/foo' - >>> _DropCommonSuffixes('foo/foo_unusualinternal.h') - 'foo/foo_unusualinternal' - - Args: - filename: The input filename. - - Returns: - The filename with the common suffix removed. - """ - for suffix in ('test.cc', 'regtest.cc', 'unittest.cc', 'inl.h', 'impl.h', - 'internal.h'): - if (filename.endswith(suffix) and len(filename) > len(suffix) and - filename[-len(suffix) - 1] in ('-', '_')): - return filename[:-len(suffix) - 1] - return os.path.splitext(filename)[0] - - -def _IsTestFilename(filename): - """Determines if the given filename has a suffix that identifies it as a test. - - Args: - filename: The input filename. - - Returns: - True if 'filename' looks like a test, False otherwise. - """ - if (filename.endswith('_test.cc') or filename.endswith('_unittest.cc') or - filename.endswith('_regtest.cc')): - return True - else: - return False - - -def _ClassifyInclude(fileinfo, include, is_system): - """Figures out what kind of header 'include' is. - - Args: - fileinfo: The current file cpplint is running over. A FileInfo instance. - include: The path to a #included file. - is_system: True if the #include used <> rather than "". - - Returns: - One of the _XXX_HEADER constants. - - For example: - >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True) - _C_SYS_HEADER - >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True) - _CPP_SYS_HEADER - >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False) - _LIKELY_MY_HEADER - >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'), - ... 'bar/foo_other_ext.h', False) - _POSSIBLE_MY_HEADER - >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False) - _OTHER_HEADER - """ - # This is a list of all standard c++ header files, except - # those already checked for above. - is_cpp_h = include in _CPP_HEADERS - - if is_system: - if is_cpp_h: - return _CPP_SYS_HEADER - else: - return _C_SYS_HEADER - - # If the target file and the include we're checking share a - # basename when we drop common extensions, and the include - # lives in . , then it's likely to be owned by the target file. - target_dir, target_base = ( - os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName()))) - include_dir, include_base = os.path.split(_DropCommonSuffixes(include)) - if target_base == include_base and ( - include_dir == target_dir or - include_dir == os.path.normpath(target_dir + '/../public')): - return _LIKELY_MY_HEADER - - # If the target and include share some initial basename - # component, it's possible the target is implementing the - # include, so it's allowed to be first, but we'll never - # complain if it's not there. - target_first_component = _RE_FIRST_COMPONENT.match(target_base) - include_first_component = _RE_FIRST_COMPONENT.match(include_base) - if (target_first_component and include_first_component and - target_first_component.group(0) == - include_first_component.group(0)): - return _POSSIBLE_MY_HEADER - - return _OTHER_HEADER - - -def CheckIncludeLine(filename, clean_lines, linenum, include_state, error): - """Check rules that are applicable to #include lines. - - Strings on #include lines are NOT removed from elided line, to make - certain tasks easier. However, to prevent false positives, checks - applicable to #include lines in CheckLanguage must be put here. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - include_state: An _IncludeState instance in which the headers are inserted. - error: The function to call with any errors found. - """ - fileinfo = FileInfo(filename) - line = clean_lines.lines[linenum] - - # "include" should use the new style "foo/bar.h" instead of just "bar.h" - # Only do this check if the included header follows google naming - # conventions. If not, assume that it's a 3rd party API that - # requires special include conventions. - # - # We also make an exception for Lua headers, which follow google - # naming convention but not the include convention. - match = Match(r'#include\s*"([^/]+\.h)"', line) - if match and not _THIRD_PARTY_HEADERS_PATTERN.match(match.group(1)): - error(filename, linenum, 'build/include', 4, - 'Include the directory when naming .h files') - - # we shouldn't include a file more than once. actually, there are a - # handful of instances where doing so is okay, but in general it's - # not. - match = _RE_PATTERN_INCLUDE.search(line) - if match: - include = match.group(2) - is_system = (match.group(1) == '<') - duplicate_line = include_state.FindHeader(include) - if duplicate_line >= 0: - error(filename, linenum, 'build/include', 4, - '"%s" already included at %s:%s' % - (include, filename, duplicate_line)) - elif (include.endswith('.cc') and - os.path.dirname(fileinfo.RepositoryName()) != - os.path.dirname(include)): - error(filename, linenum, 'build/include', 4, - 'Do not include .cc files from other packages') - elif not _THIRD_PARTY_HEADERS_PATTERN.match(include): - include_state.include_list[-1].append((include, linenum)) - - # We want to ensure that headers appear in the right order: - # 1) for foo.cc, foo.h (preferred location) - # 2) c system files - # 3) cpp system files - # 4) for foo.cc, foo.h (deprecated location) - # 5) other google headers - # - # We classify each include statement as one of those 5 types - # using a number of techniques. The include_state object keeps - # track of the highest type seen, and complains if we see a - # lower type after that. - error_message = include_state.CheckNextIncludeOrder( - _ClassifyInclude(fileinfo, include, is_system)) - if error_message: - error(filename, linenum, 'build/include_order', 4, - '%s. Should be: %s.h, c system, c++ system, other.' % - (error_message, fileinfo.BaseName())) - canonical_include = include_state.CanonicalizeAlphabeticalOrder( - include) - if not include_state.IsInAlphabeticalOrder(clean_lines, linenum, - canonical_include): - error(filename, linenum, 'build/include_alpha', 4, - 'Include "%s" not in alphabetical order' % include) - include_state.SetLastHeader(canonical_include) - - -def _GetTextInside(text, start_pattern): - r"""Retrieves all the text between matching open and close parentheses. - - Given a string of lines and a regular expression string, retrieve all the text - following the expression and between opening punctuation symbols like - (, [, or {, and the matching close-punctuation symbol. This properly nested - occurrences of the punctuations, so for the text like - printf(a(), b(c())); - a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'. - start_pattern must match string having an open punctuation symbol at the end. - - Args: - text: The lines to extract text. Its comments and strings must be elided. - It can be single line and can span multiple lines. - start_pattern: The regexp string indicating where to start extracting - the text. - Returns: - The extracted text. - None if either the opening string or ending punctuation could not be found. - """ - # TODO(unknown): Audit cpplint.py to see what places could be profitably - # rewritten to use _GetTextInside (and use inferior regexp matching today). - - # Give opening punctuations to get the matching close-punctuations. - matching_punctuation = {'(': ')', '{': '}', '[': ']'} - closing_punctuation = set(matching_punctuation.itervalues()) - - # Find the position to start extracting text. - match = re.search(start_pattern, text, re.M) - if not match: # start_pattern not found in text. - return None - start_position = match.end(0) - - assert start_position > 0, ( - 'start_pattern must ends with an opening punctuation.') - assert text[start_position - 1] in matching_punctuation, ( - 'start_pattern must ends with an opening punctuation.') - # Stack of closing punctuations we expect to have in text after position. - punctuation_stack = [matching_punctuation[text[start_position - 1]]] - position = start_position - while punctuation_stack and position < len(text): - if text[position] == punctuation_stack[-1]: - punctuation_stack.pop() - elif text[position] in closing_punctuation: - # A closing punctuation without matching opening punctuations. - return None - elif text[position] in matching_punctuation: - punctuation_stack.append(matching_punctuation[text[position]]) - position += 1 - if punctuation_stack: - # Opening punctuations left without matching close-punctuations. - return None - # punctuations match. - return text[start_position:position - 1] - - -# Patterns for matching call-by-reference parameters. -# -# Supports nested templates up to 2 levels deep using this messy pattern: -# < (?: < (?: < [^<>]* -# > -# | [^<>] )* -# > -# | [^<>] )* -# > -_RE_PATTERN_IDENT = r'[_a-zA-Z]\w*' # =~ [[:alpha:]][[:alnum:]]* -_RE_PATTERN_TYPE = ( - r'(?:const\s+)?(?:typename\s+|class\s+|struct\s+|union\s+|enum\s+)?' - r'(?:\w|' - r'\s*<(?:<(?:<[^<>]*>|[^<>])*>|[^<>])*>|' - r'::)+') -# A call-by-reference parameter ends with '& identifier'. -_RE_PATTERN_REF_PARAM = re.compile( - r'(' + _RE_PATTERN_TYPE + r'(?:\s*(?:\bconst\b|[*]))*\s*' - r'&\s*' + _RE_PATTERN_IDENT + r')\s*(?:=[^,()]+)?[,)]') -# A call-by-const-reference parameter either ends with 'const& identifier' -# or looks like 'const type& identifier' when 'type' is atomic. -_RE_PATTERN_CONST_REF_PARAM = ( - r'(?:.*\s*\bconst\s*&\s*' + _RE_PATTERN_IDENT + r'|const\s+' + - _RE_PATTERN_TYPE + r'\s*&\s*' + _RE_PATTERN_IDENT + r')') - - -def CheckLanguage(filename, clean_lines, linenum, file_extension, include_state, - nesting_state, error): - """Checks rules from the 'C++ language rules' section of cppguide.html. - - Some of these rules are hard to test (function overloading, using - uint32 inappropriately), but we do the best we can. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - file_extension: The extension (without the dot) of the filename. - include_state: An _IncludeState instance in which the headers are inserted. - nesting_state: A NestingState instance which maintains information about - the current stack of nested blocks being parsed. - error: The function to call with any errors found. - """ - # If the line is empty or consists of entirely a comment, no need to - # check it. - line = clean_lines.elided[linenum] - if not line: - return - - match = _RE_PATTERN_INCLUDE.search(line) - if match: - CheckIncludeLine(filename, clean_lines, linenum, include_state, error) - return - - # Reset include state across preprocessor directives. This is meant - # to silence warnings for conditional includes. - match = Match(r'^\s*#\s*(if|ifdef|ifndef|elif|else|endif)\b', line) - if match: - include_state.ResetSection(match.group(1)) - - # Make Windows paths like Unix. - fullname = os.path.abspath(filename).replace('\\', '/') - - # Perform other checks now that we are sure that this is not an include line - CheckCasts(filename, clean_lines, linenum, error) - CheckGlobalStatic(filename, clean_lines, linenum, error) - CheckPrintf(filename, clean_lines, linenum, error) - - if file_extension == 'h': - # TODO(unknown): check that 1-arg constructors are explicit. - # How to tell it's a constructor? - # (handled in CheckForNonStandardConstructs for now) - # TODO(unknown): check that classes declare or disable copy/assign - # (level 1 error) - pass - - # Check if people are using the verboten C basic types. The only exception - # we regularly allow is "unsigned short port" for port. - if Search(r'\bshort port\b', line): - if not Search(r'\bunsigned short port\b', line): - error(filename, linenum, 'runtime/int', 4, - 'Use "unsigned short" for ports, not "short"') - else: - match = Search(r'\b(short|long(?! +double)|long long)\b', line) - if match: - error(filename, linenum, 'runtime/int', 4, - 'Use int16/int64/etc, rather than the C type %s' % - match.group(1)) - - # Check if some verboten operator overloading is going on - # TODO(unknown): catch out-of-line unary operator&: - # class X {}; - # int operator&(const X& x) { return 42; } // unary operator& - # The trick is it's hard to tell apart from binary operator&: - # class Y { int operator&(const Y& x) { return 23; } }; // binary operator& - if Search(r'\boperator\s*&\s*\(\s*\)', line): - error(filename, linenum, 'runtime/operator', 4, - 'Unary operator& is dangerous. Do not use it.') - - # Check for suspicious usage of "if" like - # } if (a == b) { - if Search(r'\}\s*if\s*\(', line): - error(filename, linenum, 'readability/braces', 4, - 'Did you mean "else if"? If not, start a new line for "if".') - - # Check for potential format string bugs like printf(foo). - # We constrain the pattern not to pick things like DocidForPrintf(foo). - # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str()) - # TODO(unknown): Catch the following case. Need to change the calling - # convention of the whole function to process multiple line to handle it. - # printf( - # boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line); - printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(') - if printf_args: - match = Match(r'([\w.\->()]+)$', printf_args) - if match and match.group(1) != '__VA_ARGS__': - function_name = re.search(r'\b((?:string)?printf)\s*\(', line, - re.I).group(1) - error(filename, linenum, 'runtime/printf', 4, - 'Potential format string bug. Do %s("%%s", %s) instead.' % - (function_name, match.group(1))) - - # Check for potential memset bugs like memset(buf, sizeof(buf), 0). - match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line) - if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)): - error(filename, linenum, 'runtime/memset', 4, - 'Did you mean "memset(%s, 0, %s)"?' % - (match.group(1), match.group(2))) - - if Search(r'\busing namespace\b', line): - error(filename, linenum, 'build/namespaces', 5, - 'Do not use namespace using-directives. ' - 'Use using-declarations instead.') - - # Detect variable-length arrays. - match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line) - if (match and match.group(2) != 'return' and match.group(2) != 'delete' and - match.group(3).find(']') == -1): - # Split the size using space and arithmetic operators as delimiters. - # If any of the resulting tokens are not compile time constants then - # report the error. - tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3)) - is_const = True - skip_next = False - for tok in tokens: - if skip_next: - skip_next = False - continue - - if Search(r'sizeof\(.+\)', tok): continue - if Search(r'arraysize\(\w+\)', tok): continue - - tok = tok.lstrip('(') - tok = tok.rstrip(')') - if not tok: continue - if Match(r'\d+', tok): continue - if Match(r'0[xX][0-9a-fA-F]+', tok): continue - if Match(r'k[A-Z0-9]\w*', tok): continue - if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue - if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue - # A catch all for tricky sizeof cases, including 'sizeof expression', - # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)' - # requires skipping the next token because we split on ' ' and '*'. - if tok.startswith('sizeof'): - skip_next = True - continue - is_const = False - break - if not is_const: - error( - filename, linenum, 'runtime/arrays', 1, - 'Do not use variable-length arrays. Use an appropriately named ' - "('k' followed by CamelCase) compile-time constant for the size." - ) - - # Check for use of unnamed namespaces in header files. Registration - # macros are typically OK, so we allow use of "namespace {" on lines - # that end with backslashes. - if (file_extension == 'h' and Search(r'\bnamespace\s*{', line) and - line[-1] != '\\'): - error( - filename, linenum, 'build/namespaces', 4, - 'Do not use unnamed namespaces in header files. See ' - 'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces' - ' for more information.') - - -def CheckGlobalStatic(filename, clean_lines, linenum, error): - """Check for unsafe global or static objects. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # Match two lines at a time to support multiline declarations - if linenum + 1 < clean_lines.NumLines() and not Search(r'[;({]', line): - line += clean_lines.elided[linenum + 1].strip() - - # Check for people declaring static/global STL strings at the top level. - # This is dangerous because the C++ language does not guarantee that - # globals with constructors are initialized before the first access. - match = Match(r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)', - line) - - # Remove false positives: - # - String pointers (as opposed to values). - # string *pointer - # const string *pointer - # string const *pointer - # string *const pointer - # - # - Functions and template specializations. - # string Function(... - # string Class::Method(... - # - # - Operators. These are matched separately because operator names - # cross non-word boundaries, and trying to match both operators - # and functions at the same time would decrease accuracy of - # matching identifiers. - # string Class::operator*() - if (match and - not Search(r'\bstring\b(\s+const)?\s*\*\s*(const\s+)?\w', line) and - not Search(r'\boperator\W', line) and not Match( - r'\s*(<.*>)?(::[a-zA-Z0-9_]+)*\s*\(([^"]|$)', match.group(3))): - error( - filename, linenum, 'runtime/string', 4, - 'For a static/global string constant, use a C style string instead: ' - '"%schar %s[]".' % (match.group(1), match.group(2))) - - if Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line): - error(filename, linenum, 'runtime/init', 4, - 'You seem to be initializing a member variable with itself.') - - -def CheckPrintf(filename, clean_lines, linenum, error): - """Check for printf related issues. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # When snprintf is used, the second argument shouldn't be a literal. - match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line) - if match and match.group(2) != '0': - # If 2nd arg is zero, snprintf is used to calculate size. - error(filename, linenum, 'runtime/printf', 3, - 'If you can, use sizeof(%s) instead of %s as the 2nd arg ' - 'to snprintf.' % (match.group(1), match.group(2))) - - # Check if some verboten C functions are being used. - if Search(r'\bsprintf\s*\(', line): - error(filename, linenum, 'runtime/printf', 5, - 'Never use sprintf. Use snprintf instead.') - match = Search(r'\b(strcpy|strcat)\s*\(', line) - if match: - error(filename, linenum, 'runtime/printf', 4, - 'Almost always, snprintf is better than %s' % match.group(1)) - - -def IsDerivedFunction(clean_lines, linenum): - """Check if current line contains an inherited function. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - Returns: - True if current line contains a function with "override" - virt-specifier. - """ - # Scan back a few lines for start of current function - for i in xrange(linenum, max(-1, linenum - 10), -1): - match = Match(r'^([^()]*\w+)\(', clean_lines.elided[i]) - if match: - # Look for "override" after the matching closing parenthesis - line, _, closing_paren = CloseExpression(clean_lines, i, - len(match.group(1))) - return (closing_paren >= 0 and - Search(r'\boverride\b', line[closing_paren:])) - return False - - -def IsOutOfLineMethodDefinition(clean_lines, linenum): - """Check if current line contains an out-of-line method definition. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - Returns: - True if current line contains an out-of-line method definition. - """ - # Scan back a few lines for start of current function - for i in xrange(linenum, max(-1, linenum - 10), -1): - if Match(r'^([^()]*\w+)\(', clean_lines.elided[i]): - return Match(r'^[^()]*\w+::\w+\(', - clean_lines.elided[i]) is not None - return False - - -def IsInitializerList(clean_lines, linenum): - """Check if current line is inside constructor initializer list. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - Returns: - True if current line appears to be inside constructor initializer - list, False otherwise. - """ - for i in xrange(linenum, 1, -1): - line = clean_lines.elided[i] - if i == linenum: - remove_function_body = Match(r'^(.*)\{\s*$', line) - if remove_function_body: - line = remove_function_body.group(1) - - if Search(r'\s:\s*\w+[({]', line): - # A lone colon tend to indicate the start of a constructor - # initializer list. It could also be a ternary operator, which - # also tend to appear in constructor initializer lists as - # opposed to parameter lists. - return True - if Search(r'\}\s*,\s*$', line): - # A closing brace followed by a comma is probably the end of a - # brace-initialized member in constructor initializer list. - return True - if Search(r'[{};]\s*$', line): - # Found one of the following: - # - A closing brace or semicolon, probably the end of the previous - # function. - # - An opening brace, probably the start of current class or namespace. - # - # Current line is probably not inside an initializer list since - # we saw one of those things without seeing the starting colon. - return False - - # Got to the beginning of the file without seeing the start of - # constructor initializer list. - return False - - -def CheckForNonConstReference(filename, clean_lines, linenum, nesting_state, - error): - """Check for non-const references. - - Separate from CheckLanguage since it scans backwards from current - line, instead of scanning forward. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - nesting_state: A NestingState instance which maintains information about - the current stack of nested blocks being parsed. - error: The function to call with any errors found. - """ - # Do nothing if there is no '&' on current line. - line = clean_lines.elided[linenum] - if '&' not in line: - return - - # If a function is inherited, current function doesn't have much of - # a choice, so any non-const references should not be blamed on - # derived function. - if IsDerivedFunction(clean_lines, linenum): - return - - # Don't warn on out-of-line method definitions, as we would warn on the - # in-line declaration, if it isn't marked with 'override'. - if IsOutOfLineMethodDefinition(clean_lines, linenum): - return - - # Long type names may be broken across multiple lines, usually in one - # of these forms: - # LongType - # ::LongTypeContinued &identifier - # LongType:: - # LongTypeContinued &identifier - # LongType< - # ...>::LongTypeContinued &identifier - # - # If we detected a type split across two lines, join the previous - # line to current line so that we can match const references - # accordingly. - # - # Note that this only scans back one line, since scanning back - # arbitrary number of lines would be expensive. If you have a type - # that spans more than 2 lines, please use a typedef. - if linenum > 1: - previous = None - if Match(r'\s*::(?:[\w<>]|::)+\s*&\s*\S', line): - # previous_line\n + ::current_line - previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+[\w<>])\s*$', - clean_lines.elided[linenum - 1]) - elif Match(r'\s*[a-zA-Z_]([\w<>]|::)+\s*&\s*\S', line): - # previous_line::\n + current_line - previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+::)\s*$', - clean_lines.elided[linenum - 1]) - if previous: - line = previous.group(1) + line.lstrip() - else: - # Check for templated parameter that is split across multiple lines - endpos = line.rfind('>') - if endpos > -1: - (_, startline, startpos) = ReverseCloseExpression( - clean_lines, linenum, endpos) - if startpos > -1 and startline < linenum: - # Found the matching < on an earlier line, collect all - # pieces up to current line. - line = '' - for i in xrange(startline, linenum + 1): - line += clean_lines.elided[i].strip() - - # Check for non-const references in function parameters. A single '&' may - # found in the following places: - # inside expression: binary & for bitwise AND - # inside expression: unary & for taking the address of something - # inside declarators: reference parameter - # We will exclude the first two cases by checking that we are not inside a - # function body, including one that was just introduced by a trailing '{'. - # TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare]. - if (nesting_state.previous_stack_top and - not (isinstance(nesting_state.previous_stack_top, _ClassInfo) or - isinstance(nesting_state.previous_stack_top, _NamespaceInfo))): - # Not at toplevel, not within a class, and not within a namespace - return - - # Avoid initializer lists. We only need to scan back from the - # current line for something that starts with ':'. - # - # We don't need to check the current line, since the '&' would - # appear inside the second set of parentheses on the current line as - # opposed to the first set. - if linenum > 0: - for i in xrange(linenum - 1, max(0, linenum - 10), -1): - previous_line = clean_lines.elided[i] - if not Search(r'[),]\s*$', previous_line): - break - if Match(r'^\s*:\s+\S', previous_line): - return - - # Avoid preprocessors - if Search(r'\\\s*$', line): - return - - # Avoid constructor initializer lists - if IsInitializerList(clean_lines, linenum): - return - - # We allow non-const references in a few standard places, like functions - # called "swap()" or iostream operators like "<<" or ">>". Do not check - # those function parameters. - # - # We also accept & in static_assert, which looks like a function but - # it's actually a declaration expression. - whitelisted_functions = (r'(?:[sS]wap(?:<\w:+>)?|' - r'operator\s*[<>][<>]|' - r'static_assert|COMPILE_ASSERT' - r')\s*\(') - if Search(whitelisted_functions, line): - return - elif not Search(r'\S+\([^)]*$', line): - # Don't see a whitelisted function on this line. Actually we - # didn't see any function name on this line, so this is likely a - # multi-line parameter list. Try a bit harder to catch this case. - for i in xrange(2): - if (linenum > i and Search(whitelisted_functions, - clean_lines.elided[linenum - i - 1])): - return - - decls = ReplaceAll(r'{[^}]*}', ' ', line) # exclude function body - for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls): - if not Match(_RE_PATTERN_CONST_REF_PARAM, parameter): - error(filename, linenum, 'runtime/references', 2, - 'Is this a non-const reference? ' - 'If so, make const or use a pointer: ' + ReplaceAll( - ' *<', '<', parameter)) - - -def CheckCasts(filename, clean_lines, linenum, error): - """Various cast related checks. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # Check to see if they're using an conversion function cast. - # I just try to capture the most common basic types, though there are more. - # Parameterless conversion functions, such as bool(), are allowed as they are - # probably a member operator declaration or default constructor. - match = Search(r'(\bnew\s+|\S<\s*(?:const\s+)?)?\b' - r'(int|float|double|bool|char|int32|uint32|int64|uint64)' - r'(\([^)].*)', line) - expecting_function = ExpectingFunctionArgs(clean_lines, linenum) - if match and not expecting_function: - matched_type = match.group(2) - - # matched_new_or_template is used to silence two false positives: - # - New operators - # - Template arguments with function types - # - # For template arguments, we match on types immediately following - # an opening bracket without any spaces. This is a fast way to - # silence the common case where the function type is the first - # template argument. False negative with less-than comparison is - # avoided because those operators are usually followed by a space. - # - # function // bracket + no space = false positive - # value < double(42) // bracket + space = true positive - matched_new_or_template = match.group(1) - - # Avoid arrays by looking for brackets that come after the closing - # parenthesis. - if Match(r'\([^()]+\)\s*\[', match.group(3)): - return - - # Other things to ignore: - # - Function pointers - # - Casts to pointer types - # - Placement new - # - Alias declarations - matched_funcptr = match.group(3) - if (matched_new_or_template is None and not (matched_funcptr and (Match( - r'\((?:[^() ]+::\s*\*\s*)?[^() ]+\)\s*\(', - matched_funcptr) or matched_funcptr.startswith('(*)'))) and - not Match(r'\s*using\s+\S+\s*=\s*' + matched_type, line) and - not Search(r'new\(\S+\)\s*' + matched_type, line)): - error(filename, linenum, 'readability/casting', 4, - 'Using deprecated casting style. ' - 'Use static_cast<%s>(...) instead' % matched_type) - - if not expecting_function: - CheckCStyleCast(filename, clean_lines, linenum, 'static_cast', - r'\((int|float|double|bool|char|u?int(16|32|64))\)', - error) - - # This doesn't catch all cases. Consider (const char * const)"hello". - # - # (char *) "foo" should always be a const_cast (reinterpret_cast won't - # compile). - if CheckCStyleCast(filename, clean_lines, linenum, 'const_cast', - r'\((char\s?\*+\s?)\)\s*"', error): - pass - else: - # Check pointer casts for other than string constants - CheckCStyleCast(filename, clean_lines, linenum, 'reinterpret_cast', - r'\((\w+\s?\*+\s?)\)', error) - - # In addition, we look for people taking the address of a cast. This - # is dangerous -- casts can assign to temporaries, so the pointer doesn't - # point where you think. - # - # Some non-identifier character is required before the '&' for the - # expression to be recognized as a cast. These are casts: - # expression = &static_cast(temporary()); - # function(&(int*)(temporary())); - # - # This is not a cast: - # reference_type&(int* function_param); - match = Search(r'(?:[^\w]&\(([^)*][^)]*)\)[\w(])|' - r'(?:[^\w]&(static|dynamic|down|reinterpret)_cast\b)', line) - if match: - # Try a better error message when the & is bound to something - # dereferenced by the casted pointer, as opposed to the casted - # pointer itself. - parenthesis_error = False - match = Match(r'^(.*&(?:static|dynamic|down|reinterpret)_cast\b)<', - line) - if match: - _, y1, x1 = CloseExpression(clean_lines, linenum, - len(match.group(1))) - if x1 >= 0 and clean_lines.elided[y1][x1] == '(': - _, y2, x2 = CloseExpression(clean_lines, y1, x1) - if x2 >= 0: - extended_line = clean_lines.elided[y2][x2:] - if y2 < clean_lines.NumLines() - 1: - extended_line += clean_lines.elided[y2 + 1] - if Match(r'\s*(?:->|\[)', extended_line): - parenthesis_error = True - - if parenthesis_error: - error(filename, linenum, 'readability/casting', 4, - ('Are you taking an address of something dereferenced ' - 'from a cast? Wrapping the dereferenced expression in ' - 'parentheses will make the binding more obvious')) - else: - error(filename, linenum, 'runtime/casting', 4, - ('Are you taking an address of a cast? ' - 'This is dangerous: could be a temp var. ' - 'Take the address before doing the cast, rather than after')) - - -def CheckCStyleCast(filename, clean_lines, linenum, cast_type, pattern, error): - """Checks for a C-style cast by looking for the pattern. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - cast_type: The string for the C++ cast to recommend. This is either - reinterpret_cast, static_cast, or const_cast, depending. - pattern: The regular expression used to find C-style casts. - error: The function to call with any errors found. - - Returns: - True if an error was emitted. - False otherwise. - """ - line = clean_lines.elided[linenum] - match = Search(pattern, line) - if not match: - return False - - # Exclude lines with keywords that tend to look like casts - context = line[0:match.start(1) - 1] - if Match(r'.*\b(?:sizeof|alignof|alignas|[_A-Z][_A-Z0-9]*)\s*$', context): - return False - - # Try expanding current context to see if we one level of - # parentheses inside a macro. - if linenum > 0: - for i in xrange(linenum - 1, max(0, linenum - 5), -1): - context = clean_lines.elided[i] + context - if Match(r'.*\b[_A-Z][_A-Z0-9]*\s*\((?:\([^()]*\)|[^()])*$', context): - return False - - # operator++(int) and operator--(int) - if context.endswith(' operator++') or context.endswith(' operator--'): - return False - - # A single unnamed argument for a function tends to look like old - # style cast. If we see those, don't issue warnings for deprecated - # casts, instead issue warnings for unnamed arguments where - # appropriate. - # - # These are things that we want warnings for, since the style guide - # explicitly require all parameters to be named: - # Function(int); - # Function(int) { - # ConstMember(int) const; - # ConstMember(int) const { - # ExceptionMember(int) throw (...); - # ExceptionMember(int) throw (...) { - # PureVirtual(int) = 0; - # [](int) -> bool { - # - # These are functions of some sort, where the compiler would be fine - # if they had named parameters, but people often omit those - # identifiers to reduce clutter: - # (FunctionPointer)(int); - # (FunctionPointer)(int) = value; - # Function((function_pointer_arg)(int)) - # Function((function_pointer_arg)(int), int param) - # ; - # <(FunctionPointerTemplateArgument)(int)>; - remainder = line[match.end(0):] - if Match(r'^\s*(?:;|const\b|throw\b|final\b|override\b|[=>{),]|->)', - remainder): - # Looks like an unnamed parameter. - - # Don't warn on any kind of template arguments. - if Match(r'^\s*>', remainder): - return False - - # Don't warn on assignments to function pointers, but keep warnings for - # unnamed parameters to pure virtual functions. Note that this pattern - # will also pass on assignments of "0" to function pointers, but the - # preferred values for those would be "nullptr" or "NULL". - matched_zero = Match(r'^\s=\s*(\S+)\s*;', remainder) - if matched_zero and matched_zero.group(1) != '0': - return False - - # Don't warn on function pointer declarations. For this we need - # to check what came before the "(type)" string. - if Match(r'.*\)\s*$', line[0:match.start(0)]): - return False - - # Don't warn if the parameter is named with block comments, e.g.: - # Function(int /*unused_param*/); - raw_line = clean_lines.raw_lines[linenum] - if '/*' in raw_line: - return False - - # Passed all filters, issue warning here. - error(filename, linenum, 'readability/function', 3, - 'All parameters should be named in a function') - return True - - # At this point, all that should be left is actual casts. - error(filename, linenum, 'readability/casting', 4, - 'Using C-style cast. Use %s<%s>(...) instead' % - (cast_type, match.group(1))) - - return True - - -def ExpectingFunctionArgs(clean_lines, linenum): - """Checks whether where function type arguments are expected. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - - Returns: - True if the line at 'linenum' is inside something that expects arguments - of function types. - """ - line = clean_lines.elided[linenum] - return (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or - (linenum >= 2 and - (Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s*$', - clean_lines.elided[linenum - 1]) or - Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s*$', - clean_lines.elided[linenum - 2]) or - Search(r'\bstd::m?function\s*\<\s*$', - clean_lines.elided[linenum - 1])))) - - -_HEADERS_CONTAINING_TEMPLATES = ( - ('', ('deque', )), - ('', ( - 'unary_function', - 'binary_function', - 'plus', - 'minus', - 'multiplies', - 'divides', - 'modulus', - 'negate', - 'equal_to', - 'not_equal_to', - 'greater', - 'less', - 'greater_equal', - 'less_equal', - 'logical_and', - 'logical_or', - 'logical_not', - 'unary_negate', - 'not1', - 'binary_negate', - 'not2', - 'bind1st', - 'bind2nd', - 'pointer_to_unary_function', - 'pointer_to_binary_function', - 'ptr_fun', - 'mem_fun_t', - 'mem_fun', - 'mem_fun1_t', - 'mem_fun1_ref_t', - 'mem_fun_ref_t', - 'const_mem_fun_t', - 'const_mem_fun1_t', - 'const_mem_fun_ref_t', - 'const_mem_fun1_ref_t', - 'mem_fun_ref', )), - ('', ('numeric_limits', )), - ('', ('list', )), - ('', ( - 'map', - 'multimap', )), - ('', ('allocator', )), - ('', ( - 'queue', - 'priority_queue', )), - ('', ( - 'set', - 'multiset', )), - ('', ('stack', )), - ('', ( - 'char_traits', - 'basic_string', )), - ('', ('tuple', )), - ('', ('pair', )), - ('', ('vector', )), - - # gcc extensions. - # Note: std::hash is their hash, ::hash is our hash - ('', ( - 'hash_map', - 'hash_multimap', )), - ('', ( - 'hash_set', - 'hash_multiset', )), - ('', ('slist', )), ) - -_RE_PATTERN_STRING = re.compile(r'\bstring\b') - -_re_pattern_algorithm_header = [] -for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap', - 'transform'): - # Match max(..., ...), max(..., ...), but not foo->max, foo.max or - # type::max(). - _re_pattern_algorithm_header.append( - (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'), _template, - '')) - -_re_pattern_templates = [] -for _header, _templates in _HEADERS_CONTAINING_TEMPLATES: - for _template in _templates: - _re_pattern_templates.append( - (re.compile(r'(\<|\b)' + _template + r'\s*\<'), _template + '<>', - _header)) - - -def FilesBelongToSameModule(filename_cc, filename_h): - """Check if these two filenames belong to the same module. - - The concept of a 'module' here is a as follows: - foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the - same 'module' if they are in the same directory. - some/path/public/xyzzy and some/path/internal/xyzzy are also considered - to belong to the same module here. - - If the filename_cc contains a longer path than the filename_h, for example, - '/absolute/path/to/base/sysinfo.cc', and this file would include - 'base/sysinfo.h', this function also produces the prefix needed to open the - header. This is used by the caller of this function to more robustly open the - header file. We don't have access to the real include paths in this context, - so we need this guesswork here. - - Known bugs: tools/base/bar.cc and base/bar.h belong to the same module - according to this implementation. Because of this, this function gives - some false positives. This should be sufficiently rare in practice. - - Args: - filename_cc: is the path for the .cc file - filename_h: is the path for the header path - - Returns: - Tuple with a bool and a string: - bool: True if filename_cc and filename_h belong to the same module. - string: the additional prefix needed to open the header file. - """ - - if not filename_cc.endswith('.cc'): - return (False, '') - filename_cc = filename_cc[:-len('.cc')] - if filename_cc.endswith('_unittest'): - filename_cc = filename_cc[:-len('_unittest')] - elif filename_cc.endswith('_test'): - filename_cc = filename_cc[:-len('_test')] - filename_cc = filename_cc.replace('/public/', '/') - filename_cc = filename_cc.replace('/internal/', '/') - - if not filename_h.endswith('.h'): - return (False, '') - filename_h = filename_h[:-len('.h')] - if filename_h.endswith('-inl'): - filename_h = filename_h[:-len('-inl')] - filename_h = filename_h.replace('/public/', '/') - filename_h = filename_h.replace('/internal/', '/') - - files_belong_to_same_module = filename_cc.endswith(filename_h) - common_path = '' - if files_belong_to_same_module: - common_path = filename_cc[:-len(filename_h)] - return files_belong_to_same_module, common_path - - -def UpdateIncludeState(filename, include_dict, io=codecs): - """Fill up the include_dict with new includes found from the file. - - Args: - filename: the name of the header to read. - include_dict: a dictionary in which the headers are inserted. - io: The io factory to use to read the file. Provided for testability. - - Returns: - True if a header was successfully added. False otherwise. - """ - headerfile = None - try: - headerfile = io.open(filename, 'r', 'utf8', 'replace') - except IOError: - return False - linenum = 0 - for line in headerfile: - linenum += 1 - clean_line = CleanseComments(line) - match = _RE_PATTERN_INCLUDE.search(clean_line) - if match: - include = match.group(2) - include_dict.setdefault(include, linenum) - return True - - -def CheckForIncludeWhatYouUse(filename, - clean_lines, - include_state, - error, - io=codecs): - """Reports for missing stl includes. - - This function will output warnings to make sure you are including the headers - necessary for the stl containers and functions that you use. We only give one - reason to include a header. For example, if you use both equal_to<> and - less<> in a .h file, only one (the latter in the file) of these will be - reported as a reason to include the . - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - include_state: An _IncludeState instance. - error: The function to call with any errors found. - io: The IO factory to use to read the header file. Provided for unittest - injection. - """ - required = {} # A map of header name to linenumber and the template entity. - # Example of required: { '': (1219, 'less<>') } - - for linenum in xrange(clean_lines.NumLines()): - line = clean_lines.elided[linenum] - if not line or line[0] == '#': - continue - - # String is special -- it is a non-templatized type in STL. - matched = _RE_PATTERN_STRING.search(line) - if matched: - # Don't warn about strings in non-STL namespaces: - # (We check only the first match per line; good enough.) - prefix = line[:matched.start()] - if prefix.endswith('std::') or not prefix.endswith('::'): - required[''] = (linenum, 'string') - - for pattern, template, header in _re_pattern_algorithm_header: - if pattern.search(line): - required[header] = (linenum, template) - - # The following function is just a speed up, no semantics are changed. - if not '<' in line: # Reduces the cpu time usage by skipping lines. - continue - - for pattern, template, header in _re_pattern_templates: - if pattern.search(line): - required[header] = (linenum, template) - - # The policy is that if you #include something in foo.h you don't need to - # include it again in foo.cc. Here, we will look at possible includes. - # Let's flatten the include_state include_list and copy it into a dictionary. - include_dict = dict( - [item for sublist in include_state.include_list for item in sublist]) - - # Did we find the header for this file (if any) and successfully load it? - header_found = False - - # Use the absolute path so that matching works properly. - abs_filename = FileInfo(filename).FullName() - - # For Emacs's flymake. - # If cpplint is invoked from Emacs's flymake, a temporary file is generated - # by flymake and that file name might end with '_flymake.cc'. In that case, - # restore original file name here so that the corresponding header file can be - # found. - # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h' - # instead of 'foo_flymake.h' - abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename) - - # include_dict is modified during iteration, so we iterate over a copy of - # the keys. - header_keys = include_dict.keys() - for header in header_keys: - (same_module, common_path) = FilesBelongToSameModule(abs_filename, - header) - fullpath = common_path + header - if same_module and UpdateIncludeState(fullpath, include_dict, io): - header_found = True - - # If we can't find the header file for a .cc, assume it's because we don't - # know where to look. In that case we'll give up as we're not sure they - # didn't include it in the .h file. - # TODO(unknown): Do a better job of finding .h files so we are confident that - # not having the .h file means there isn't one. - if filename.endswith('.cc') and not header_found: - return - - # All the lines have been processed, report the errors found. - for required_header_unstripped in required: - template = required[required_header_unstripped][1] - if required_header_unstripped.strip('<>"') not in include_dict: - error(filename, required[required_header_unstripped][0], - 'build/include_what_you_use', 4, 'Add #include ' + - required_header_unstripped + ' for ' + template) - - -_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<') - - -def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error): - """Check that make_pair's template arguments are deduced. - - G++ 4.6 in C++11 mode fails badly if make_pair's template arguments are - specified explicitly, and such use isn't intended in any case. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line) - if match: - error( - filename, - linenum, - 'build/explicit_make_pair', - 4, # 4 = high confidence - 'For C++11-compatibility, omit template arguments from make_pair' - ' OR use pair directly OR if appropriate, construct a pair directly') - - -def CheckDefaultLambdaCaptures(filename, clean_lines, linenum, error): - """Check that default lambda captures are not used. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # A lambda introducer specifies a default capture if it starts with "[=" - # or if it starts with "[&" _not_ followed by an identifier. - match = Match(r'^(.*)\[\s*(?:=|&[^\w])', line) - if match: - # Found a potential error, check what comes after the lambda-introducer. - # If it's not open parenthesis (for lambda-declarator) or open brace - # (for compound-statement), it's not a lambda. - line, _, pos = CloseExpression(clean_lines, linenum, - len(match.group(1))) - if pos >= 0 and Match(r'^\s*[{(]', line[pos:]): - error( - filename, - linenum, - 'build/c++11', - 4, # 4 = high confidence - 'Default lambda captures are an unapproved C++ feature.') - - -def CheckRedundantVirtual(filename, clean_lines, linenum, error): - """Check if line contains a redundant "virtual" function-specifier. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - # Look for "virtual" on current line. - line = clean_lines.elided[linenum] - virtual = Match(r'^(.*)(\bvirtual\b)(.*)$', line) - if not virtual: return - - # Ignore "virtual" keywords that are near access-specifiers. These - # are only used in class base-specifier and do not apply to member - # functions. - if (Search(r'\b(public|protected|private)\s+$', virtual.group(1)) or - Match(r'^\s+(public|protected|private)\b', virtual.group(3))): - return - - # Ignore the "virtual" keyword from virtual base classes. Usually - # there is a column on the same line in these cases (virtual base - # classes are rare in google3 because multiple inheritance is rare). - if Match(r'^.*[^:]:[^:].*$', line): return - - # Look for the next opening parenthesis. This is the start of the - # parameter list (possibly on the next line shortly after virtual). - # TODO(unknown): doesn't work if there are virtual functions with - # decltype() or other things that use parentheses, but csearch suggests - # that this is rare. - end_col = -1 - end_line = -1 - start_col = len(virtual.group(2)) - for start_line in xrange(linenum, min(linenum + 3, clean_lines.NumLines())): - line = clean_lines.elided[start_line][start_col:] - parameter_list = Match(r'^([^(]*)\(', line) - if parameter_list: - # Match parentheses to find the end of the parameter list - (_, end_line, end_col) = CloseExpression( - clean_lines, start_line, - start_col + len(parameter_list.group(1))) - break - start_col = 0 - - if end_col < 0: - return # Couldn't find end of parameter list, give up - - # Look for "override" or "final" after the parameter list - # (possibly on the next few lines). - for i in xrange(end_line, min(end_line + 3, clean_lines.NumLines())): - line = clean_lines.elided[i][end_col:] - match = Search(r'\b(override|final)\b', line) - if match: - error(filename, linenum, 'readability/inheritance', 4, - ('"virtual" is redundant since function is ' - 'already declared as "%s"' % match.group(1))) - - # Set end_col to check whole lines after we are done with the - # first line. - end_col = 0 - if Search(r'[^\w]\s*$', line): - break - - -def CheckRedundantOverrideOrFinal(filename, clean_lines, linenum, error): - """Check if line contains a redundant "override" or "final" virt-specifier. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - # Look for closing parenthesis nearby. We need one to confirm where - # the declarator ends and where the virt-specifier starts to avoid - # false positives. - line = clean_lines.elided[linenum] - declarator_end = line.rfind(')') - if declarator_end >= 0: - fragment = line[declarator_end:] - else: - if linenum > 1 and clean_lines.elided[linenum - 1].rfind(')') >= 0: - fragment = line - else: - return - - # Check that at most one of "override" or "final" is present, not both - if Search(r'\boverride\b', fragment) and Search(r'\bfinal\b', fragment): - error(filename, linenum, 'readability/inheritance', 4, - ('"override" is redundant since function is ' - 'already declared as "final"')) - - -# Returns true if we are at a new block, and it is directly -# inside of a namespace. -def IsBlockInNameSpace(nesting_state, is_forward_declaration): - """Checks that the new block is directly in a namespace. - - Args: - nesting_state: The _NestingState object that contains info about our state. - is_forward_declaration: If the class is a forward declared class. - Returns: - Whether or not the new block is directly in a namespace. - """ - if is_forward_declaration: - if len(nesting_state.stack) >= 1 and ( - isinstance(nesting_state.stack[-1], _NamespaceInfo)): - return True - else: - return False - - return (len(nesting_state.stack) > 1 and - nesting_state.stack[-1].check_namespace_indentation and - isinstance(nesting_state.stack[-2], _NamespaceInfo)) - - -def ShouldCheckNamespaceIndentation(nesting_state, is_namespace_indent_item, - raw_lines_no_comments, linenum): - """This method determines if we should apply our namespace indentation check. - - Args: - nesting_state: The current nesting state. - is_namespace_indent_item: If we just put a new class on the stack, True. - If the top of the stack is not a class, or we did not recently - add the class, False. - raw_lines_no_comments: The lines without the comments. - linenum: The current line number we are processing. - - Returns: - True if we should apply our namespace indentation check. Currently, it - only works for classes and namespaces inside of a namespace. - """ - - is_forward_declaration = IsForwardClassDeclaration(raw_lines_no_comments, - linenum) - - if not (is_namespace_indent_item or is_forward_declaration): - return False - - # If we are in a macro, we do not want to check the namespace indentation. - if IsMacroDefinition(raw_lines_no_comments, linenum): - return False - - return IsBlockInNameSpace(nesting_state, is_forward_declaration) - - -# Call this method if the line is directly inside of a namespace. -# If the line above is blank (excluding comments) or the start of -# an inner namespace, it cannot be indented. -def CheckItemIndentationInNamespace(filename, raw_lines_no_comments, linenum, - error): - line = raw_lines_no_comments[linenum] - if Match(r'^\s+', line): - error(filename, linenum, 'runtime/indentation_namespace', 4, - 'Do not indent within a namespace') - - -def ProcessLine(filename, - file_extension, - clean_lines, - line, - include_state, - function_state, - nesting_state, - error, - extra_check_functions=[]): - """Processes a single line in the file. - - Args: - filename: Filename of the file that is being processed. - file_extension: The extension (dot not included) of the file. - clean_lines: An array of strings, each representing a line of the file, - with comments stripped. - line: Number of line being processed. - include_state: An _IncludeState instance in which the headers are inserted. - function_state: A _FunctionState instance which counts function lines, etc. - nesting_state: A NestingState instance which maintains information about - the current stack of nested blocks being parsed. - error: A callable to which errors are reported, which takes 4 arguments: - filename, line number, error level, and message - extra_check_functions: An array of additional check functions that will be - run on each source line. Each function takes 4 - arguments: filename, clean_lines, line, error - """ - raw_lines = clean_lines.raw_lines - ParseNolintSuppressions(filename, raw_lines[line], line, error) - nesting_state.Update(filename, clean_lines, line, error) - CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line, - error) - if nesting_state.InAsmBlock(): return - CheckForFunctionLengths(filename, clean_lines, line, function_state, error) - CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error) - CheckStyle(filename, clean_lines, line, file_extension, nesting_state, - error) - CheckLanguage(filename, clean_lines, line, file_extension, include_state, - nesting_state, error) - CheckForNonConstReference(filename, clean_lines, line, nesting_state, error) - CheckForNonStandardConstructs(filename, clean_lines, line, nesting_state, - error) - CheckVlogArguments(filename, clean_lines, line, error) - CheckPosixThreading(filename, clean_lines, line, error) - CheckInvalidIncrement(filename, clean_lines, line, error) - CheckMakePairUsesDeduction(filename, clean_lines, line, error) - CheckDefaultLambdaCaptures(filename, clean_lines, line, error) - CheckRedundantVirtual(filename, clean_lines, line, error) - CheckRedundantOverrideOrFinal(filename, clean_lines, line, error) - for check_fn in extra_check_functions: - check_fn(filename, clean_lines, line, error) - - -def FlagCxx11Features(filename, clean_lines, linenum, error): - """Flag those c++11 features that we only allow in certain places. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # Flag unapproved C++11 headers. - include = Match(r'\s*#\s*include\s+[<"]([^<"]+)[">]', line) - if include and include.group(1) in ( - 'cfenv', - 'condition_variable', - 'fenv.h', - 'future', - 'mutex', - 'thread', - 'chrono', - 'ratio', - 'regex', - 'system_error', ): - error(filename, linenum, 'build/c++11', 5, - ('<%s> is an unapproved C++11 header.') % include.group(1)) - - # The only place where we need to worry about C++11 keywords and library - # features in preprocessor directives is in macro definitions. - if Match(r'\s*#', line) and not Match(r'\s*#\s*define\b', line): return - - # These are classes and free functions. The classes are always - # mentioned as std::*, but we only catch the free functions if - # they're not found by ADL. They're alphabetical by header. - for top_name in ( - # type_traits - 'alignment_of', - 'aligned_union', ): - if Search(r'\bstd::%s\b' % top_name, line): - error(filename, linenum, 'build/c++11', 5, ( - 'std::%s is an unapproved C++11 class or function. Send c-style ' - 'an example of where it would make your code more readable, and ' - 'they may let you use it.') % top_name) - - -def ProcessFileData(filename, - file_extension, - lines, - error, - extra_check_functions=[]): - """Performs lint checks and reports any errors to the given error function. - - Args: - filename: Filename of the file that is being processed. - file_extension: The extension (dot not included) of the file. - lines: An array of strings, each representing a line of the file, with the - last element being empty if the file is terminated with a newline. - error: A callable to which errors are reported, which takes 4 arguments: - filename, line number, error level, and message - extra_check_functions: An array of additional check functions that will be - run on each source line. Each function takes 4 - arguments: filename, clean_lines, line, error - """ - lines = (['// marker so line numbers and indices both start at 1'] + lines + - ['// marker so line numbers end in a known way']) - - include_state = _IncludeState() - function_state = _FunctionState() - nesting_state = NestingState() - - ResetNolintSuppressions() - - CheckForCopyright(filename, lines, error) - - RemoveMultiLineComments(filename, lines, error) - clean_lines = CleansedLines(lines) - - if file_extension == 'h': - CheckForHeaderGuard(filename, clean_lines, error) - - for line in xrange(clean_lines.NumLines()): - ProcessLine(filename, file_extension, clean_lines, line, include_state, - function_state, nesting_state, error, extra_check_functions) - FlagCxx11Features(filename, clean_lines, line, error) - nesting_state.CheckCompletedBlocks(filename, error) - - CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error) - - # Check that the .cc file has included its header if it exists. - if file_extension == 'cc': - CheckHeaderFileIncluded(filename, include_state, error) - - # We check here rather than inside ProcessLine so that we see raw - # lines rather than "cleaned" lines. - CheckForBadCharacters(filename, lines, error) - - CheckForNewlineAtEOF(filename, lines, error) - - -def ProcessConfigOverrides(filename): - """ Loads the configuration files and processes the config overrides. - - Args: - filename: The name of the file being processed by the linter. - - Returns: - False if the current |filename| should not be processed further. - """ - - abs_filename = os.path.abspath(filename) - cfg_filters = [] - keep_looking = True - while keep_looking: - abs_path, base_name = os.path.split(abs_filename) - if not base_name: - break # Reached the root directory. - - cfg_file = os.path.join(abs_path, "CPPLINT.cfg") - abs_filename = abs_path - if not os.path.isfile(cfg_file): - continue - - try: - with open(cfg_file) as file_handle: - for line in file_handle: - line, _, _ = line.partition('#') # Remove comments. - if not line.strip(): - continue - - name, _, val = line.partition('=') - name = name.strip() - val = val.strip() - if name == 'set noparent': - keep_looking = False - elif name == 'filter': - cfg_filters.append(val) - elif name == 'exclude_files': - # When matching exclude_files pattern, use the base_name of - # the current file name or the directory name we are processing. - # For example, if we are checking for lint errors in /foo/bar/baz.cc - # and we found the .cfg file at /foo/CPPLINT.cfg, then the config - # file's "exclude_files" filter is meant to be checked against "bar" - # and not "baz" nor "bar/baz.cc". - if base_name: - pattern = re.compile(val) - if pattern.match(base_name): - sys.stderr.write( - 'Ignoring "%s": file excluded by "%s". ' - 'File path component "%s" matches ' - 'pattern "%s"\n' % - (filename, cfg_file, base_name, val)) - return False - elif name == 'linelength': - global _line_length - try: - _line_length = int(val) - except ValueError: - sys.stderr.write('Line length must be numeric.') - else: - sys.stderr.write( - 'Invalid configuration option (%s) in file %s\n' % - (name, cfg_file)) - - except IOError: - sys.stderr.write( - "Skipping config file '%s': Can't open for reading\n" % - cfg_file) - keep_looking = False - - # Apply all the accumulated filters in reverse order (top-level directory - # config options having the least priority). - for filter in reversed(cfg_filters): - _AddFilters(filter) - - return True - - -def ProcessFile(filename, vlevel, extra_check_functions=[]): - """Does google-lint on a single file. - - Args: - filename: The name of the file to parse. - - vlevel: The level of errors to report. Every error of confidence - >= verbose_level will be reported. 0 is a good default. - - extra_check_functions: An array of additional check functions that will be - run on each source line. Each function takes 4 - arguments: filename, clean_lines, line, error - """ - - _SetVerboseLevel(vlevel) - _BackupFilters() - - if not ProcessConfigOverrides(filename): - _RestoreFilters() - return - - lf_lines = [] - crlf_lines = [] - try: - # Support the UNIX convention of using "-" for stdin. Note that - # we are not opening the file with universal newline support - # (which codecs doesn't support anyway), so the resulting lines do - # contain trailing '\r' characters if we are reading a file that - # has CRLF endings. - # If after the split a trailing '\r' is present, it is removed - # below. - if filename == '-': - lines = codecs.StreamReaderWriter(sys.stdin, - codecs.getreader('utf8'), - codecs.getwriter('utf8'), - 'replace').read().split('\n') - else: - lines = codecs.open(filename, 'r', 'utf8', - 'replace').read().split('\n') - - # Remove trailing '\r'. - # The -1 accounts for the extra trailing blank line we get from split() - for linenum in range(len(lines) - 1): - if lines[linenum].endswith('\r'): - lines[linenum] = lines[linenum].rstrip('\r') - crlf_lines.append(linenum + 1) - else: - lf_lines.append(linenum + 1) - - except IOError: - sys.stderr.write("Skipping input '%s': Can't open for reading\n" % - filename) - _RestoreFilters() - return - - # Note, if no dot is found, this will give the entire filename as the ext. - file_extension = filename[filename.rfind('.') + 1:] - - # When reading from stdin, the extension is unknown, so no cpplint tests - # should rely on the extension. - if filename != '-' and file_extension not in _valid_extensions: - sys.stderr.write('Ignoring %s; not a valid file name ' - '(%s)\n' % (filename, ', '.join(_valid_extensions))) - else: - ProcessFileData(filename, file_extension, lines, Error, - extra_check_functions) - - # If end-of-line sequences are a mix of LF and CR-LF, issue - # warnings on the lines with CR. - # - # Don't issue any warnings if all lines are uniformly LF or CR-LF, - # since critique can handle these just fine, and the style guide - # doesn't dictate a particular end of line sequence. - # - # We can't depend on os.linesep to determine what the desired - # end-of-line sequence should be, since that will return the - # server-side end-of-line sequence. - if lf_lines and crlf_lines: - # Warn on every line with CR. An alternative approach might be to - # check whether the file is mostly CRLF or just LF, and warn on the - # minority, we bias toward LF here since most tools prefer LF. - for linenum in crlf_lines: - Error(filename, linenum, 'whitespace/newline', 1, - 'Unexpected \\r (^M) found; better to use only \\n') - - sys.stdout.write('Done processing %s\n' % filename) - _RestoreFilters() - - -def PrintUsage(message): - """Prints a brief usage string and exits, optionally with an error message. - - Args: - message: The optional error message. - """ - sys.stderr.write(_USAGE) - if message: - sys.exit('\nFATAL ERROR: ' + message) - else: - sys.exit(1) - - -def PrintCategories(): - """Prints a list of all the error-categories used by error messages. - - These are the categories used to filter messages via --filter. - """ - sys.stderr.write(''.join(' %s\n' % cat for cat in _ERROR_CATEGORIES)) - sys.exit(0) - - -def ParseArguments(args): - """Parses the command line arguments. - - This may set the output format and verbosity level as side-effects. - - Args: - args: The command line arguments: - - Returns: - The list of filenames to lint. - """ - try: - (opts, filenames) = getopt.getopt(args, '', [ - 'help', 'output=', 'verbose=', 'counting=', 'filter=', 'root=', - 'linelength=', 'extensions=', 'write-success=' - ]) - except getopt.GetoptError: - PrintUsage('Invalid arguments.') - - verbosity = _VerboseLevel() - output_format = _OutputFormat() - filters = '' - counting_style = '' - - for (opt, val) in opts: - if opt == '--help': - PrintUsage(None) - elif opt == '--output': - if val not in ('emacs', 'vs7', 'eclipse'): - PrintUsage( - 'The only allowed output formats are emacs, vs7 and eclipse.' - ) - output_format = val - elif opt == '--verbose': - verbosity = int(val) - elif opt == '--filter': - filters = val - if not filters: - PrintCategories() - elif opt == '--counting': - if val not in ('total', 'toplevel', 'detailed'): - PrintUsage( - 'Valid counting options are total, toplevel, and detailed') - counting_style = val - elif opt == '--root': - global _root - _root = val - elif opt == '--linelength': - global _line_length - try: - _line_length = int(val) - except ValueError: - PrintUsage('Line length must be digits.') - elif opt == '--extensions': - global _valid_extensions - try: - _valid_extensions = set(val.split(',')) - except ValueError: - PrintUsage('Extensions must be comma seperated list.') - elif opt == '--write-success': - global _write_success - _write_success = val - - if not filenames: - PrintUsage('No files were specified.') - - _SetOutputFormat(output_format) - _SetVerboseLevel(verbosity) - _SetFilters(filters) - _SetCountingStyle(counting_style) - - return filenames - - -def main(): - filenames = ParseArguments(sys.argv[1:]) - - # Change stderr to write with replacement characters so we don't die - # if we try to print something containing non-ASCII characters. - sys.stderr = codecs.StreamReaderWriter(sys.stderr, - codecs.getreader('utf8'), - codecs.getwriter('utf8'), 'replace') - - _cpplint_state.ResetErrorCounts() - for filename in filenames: - ProcessFile(filename, _cpplint_state.verbose_level) - _cpplint_state.PrintErrorCounts() - - if _cpplint_state.error_count == 0 and _write_success is not None: - with open(_write_success, 'a'): - os.utime(_write_success, None) - - sys.exit(_cpplint_state.error_count > 0) - - -if __name__ == '__main__': - main() diff --git a/paddle/scripts/fast_install.sh b/paddle/scripts/fast_install.sh index b960d0f00a26196c827053c41a3b35b97e7cdb07..0461944ca8c6c5aeaffcac1eceac097e4d25b6d1 100644 --- a/paddle/scripts/fast_install.sh +++ b/paddle/scripts/fast_install.sh @@ -1,5 +1,37 @@ #!/bin/bash +## purple to echo +function purple(){ + echo -e "\033[35m$1\033[0m" +} + + +## green to echo +function green(){ + echo -e "\033[32m$1\033[0m" +} + +## Error to warning with blink +function bred(){ + echo -e "\033[31m\033[01m\033[05m$1\033[0m" +} + +## Error to warning with blink +function byellow(){ + echo -e "\033[33m\033[01m\033[05m$1\033[0m" +} + + +## Error +function red(){ + echo -e "\033[31m\033[01m$1\033[0m" +} + +## warning +function yellow(){ + echo -e "\033[33m\033[01m$1\033[0m" +} + path='http://paddlepaddle.org/download?url=' #release_version=`curl -s https://pypi.org/project/paddlepaddle/|grep -E "/project/paddlepaddle/"|grep "release"|awk -F '/' '{print $(NF-1)}'|head -1` release_version=1.2.0 @@ -228,36 +260,128 @@ function checkLinuxPaddleVersion(){ done } -function checkLinuxPip(){ +function checkPythonVirtualenv(){ while true do - echo "请输入您要使用的pip目录(您可以另起终端,并使用which pip来查看):" - read -p "" pip_path - if [ "$pip_path" == "" -o ! -f "$pip_path" ];then - echo "检测结果:pip不存在,请重新输入" - continue - fi - python_version=`$pip_path --version|awk -F "[ |)]" '{print $6}'|sed 's#\.##g'` - if [ "$python_version" == "27" ];then - uncode=`python -c "import pip._internal;print(pip._internal.pep425tags.get_supported())"|grep "cp27mu"` - if [[ "$uncode" == "" ]];then - uncode= - else - uncode=u - fi - fi - if [ "$python_version" == "" ];then - echo "检测结果:pip不存在,请重新输入" - else - version_list=`echo "${python_list[@]}" | grep "$python_version" ` - if [ "$version_list" != "" ];then - echo "检测结果:找到python${python_version}版本" - break - else - echo "检测结果:找不到可用的 pip, 我们只支持Python27/35/36/37及其对应的pip, 请重新输入, 或使用ctrl + c退出 " - fi - fi + read -p " + 是否使用python virtualenv虚环境安装(y/n)": check_virtualenv + case $check_virtualenv in + y) + echo "为您使用python虚环境安装" + ;; + n) + break + ;; + *) + continue + ;; + esac + + virtualenv_path=`which virtualenv 2>&1` + if [ "$virtualenv_path" == "" ];then + $python_path -m pip install virtualenv + if [ "$?" != '0' ];then + echo "安装虚拟环境失败,请检查本地环境" + fi + fi + + while true + do + read -p "请输入虚拟环境名字:" virtualenv_name + if [ "$virtualenv_name" == "" ];then + echo "不能为空" + continue + fi + break + done + + virtualenv -p $python_path ${virtualenv_name} + if [ "$?" != 0 ];then + echo "创建虚环境失败,请检查环境" + exit 2 + fi + cd ${virtualenv_name} + source ./bin/activate + + if [ "$?" == 0 ];then + use_virtualenv= + python_path=`which python` + break + else + echo "创建虚环境失败,请检查环境" + exit 2 + fi + done +} + +function checkLinuxPython(){ + python_path=`which python 2>/dev/null` + while true + do + if [ "$python_path" == '' ];then + while true + do + read -p "没有找到默认的python版本,请输入要安装的python路径:" python_path + python_path=`$python_path -V` + if [ "$python_path" != "" ];then + break + else + echo "输入路径有误,未找到pyrhon" + fi done + fi + + python_version=`$python_path -V 2>&1|awk -F '[ .]' '{print $2$3}'` + pip_version=`$python_path -m pip -V|awk -F '[ .]' '{print $2}'` + while true + do + read -p " + 找到python版本$python_version,使用请输入y,选择其他版本请输n(y/n):" check_python + case $check_python in + n) + read -p "请指定您的python路径:" new_python_path + python_V=`$new_python_path -V 2>/dev/null` + if [ "$python_V" != "" ];then + python_path=$new_python_path + python_version=`$python_path -V 2>&1|awk -F '[ .]' '{print $2$3}'` + pip_version=`python -m pip -V|awk -F '[ .]' '{print $2}'` + echo "您的python版本为${python_version}" + break + else + echo 输入有误,未找到python路径 + fi + ;; + y) + break + ;; + *) + echo "输入有误,请重新输入." + continue + ;; + esac + done + + if [ "$pip_version" -lt 9 ];then + echo "您的pip版本小于9.0.1 请升级pip (pip install --upgrade pip)" + exit 0 + fi + + if [ "$python_version" == "27" ];then + uncode=`python -c "import pip._internal;print(pip._internal.pep425tags.get_supported())"|grep "cp27mu"` + if [[ "$uncode" == "" ]];then + uncode= + else + uncode=u + fi + fi + + version_list=`echo "${python_list[@]}" | grep "$python_version" ` + if [ "$version_list" == "" ];then + echo "找不到可用的 pip, 我们只支持Python27/35/36/37及其对应的pip, 请重新输入, 或使用ctrl + c退出 " + else + break + fi + done } function checkLinuxAVX(){ @@ -287,25 +411,36 @@ function PipLinuxInstall(){ wheel_cpu_develop="http://paddle-wheel.bj.bcebos.com/latest-cpu-${AVX}-${math}/paddlepaddle-latest-cp${python_version}-cp${python_version}m${uncode}-linux_x86_64.whl" wheel_gpu_develop="http://paddle-wheel.bj.bcebos.com/latest-gpu-cuda${CUDA}-cudnn${CUDNN}-${AVX}-${math}/paddlepaddle_gpu-latest-cp${python_version}-cp${python_version}m${uncode}-linux_x86_64.whl" - if [[ "$paddle_version" == "2" ]];then if [[ "$GPU" == "gpu" ]];then if [[ ${AVX} == "avx" ]];then rm -rf `echo $wheel_gpu_release|awk -F '/' '{print $NF}'` wget -q $wheel_gpu_release if [ "$?" == "0" ];then - $pip_path install --user -i https://mirrors.aliyun.com/pypi/simple --trusted-host=mirrors.aliyun.com $wheel_gpu_release + $python_path -m pip install ${use_virtualenv} -i https://mirrors.aliyun.com/pypi/simple --trusted-host=mirrors.aliyun.com $wheel_gpu_release + if [ "$?" == 0 ];then + echo 安装成功 + else + echo 安装失败 + exit 1 + fi else - echo "paddlepaddle whl包下载失败" + echo paddlepaddle whl包下载失败 exit 1 fi else rm -rf `echo $wheel_gpu_release_novax|awk -F '/' '{print $NF}'` wget -q $wheel_gpu_release_novax if [ "$?" == "0" ];then - $pip_path install --user -i https://mirrors.aliyun.com/pypi/simple --trusted-host=mirrors.aliyun.com $wheel_gpu_release_noavx + $python_path -m pip install ${use_virtualenv} -i https://mirrors.aliyun.com/pypi/simple --trusted-host=mirrors.aliyun.com $wheel_gpu_release_noavx + if [ "$?" == 0 ];then + echo 安装成功 + else + echo 安装失败 + exit 1 + fi else - echo "paddlepaddle whl包下载失败" + echo paddlepaddle whl包下载失败 exit 1 fi fi @@ -313,9 +448,15 @@ function PipLinuxInstall(){ rm -rf `echo $wheel_cpu_release|awk -F '/' '{print $NF}'` wget -q $wheel_cpu_release if [ "$?" == "0" ];then - $pip_path install --user -i https://mirrors.aliyun.com/pypi/simple --trusted-host=mirrors.aliyun.com $wheel_cpu_release + $python_path -m pip install ${use_virtualenv} -i https://mirrors.aliyun.com/pypi/simple --trusted-host=mirrors.aliyun.com $wheel_cpu_release + if [ "$?" == 0 ];then + echo 安装成功 + else + echo 安装失败 + exit 1 + fi else - echo "paddlepaddle whl包下载失败" + echo paddlepaddle whl包下载失败 exit 1 fi fi @@ -324,18 +465,30 @@ function PipLinuxInstall(){ rm -rf `echo $wheel_gpu_develop|awk -F '/' '{print $NF}'` wget -q $wheel_gpu_develop if [ "$?" == "0" ];then - $pip_path install --user -i https://mirrors.aliyun.com/pypi/simple --trusted-host=mirrors.aliyun.com $wheel_gpu_develop + $python_path -m pip install ${use_virtualenv} -i https://mirrors.aliyun.com/pypi/simple --trusted-host=mirrors.aliyun.com $wheel_gpu_develop + if [ "$?" == 0 ];then + echo 安装成功 + else + echo 安装失败 + exit 1 + fi else - echo "paddlepaddle whl包下载失败" + echo paddlepaddle whl包下载失败 exit 1 fi else rm -rf `echo $wheel_cpu_develop|awk -F '/' '{print $NF}'` wget -q $wheel_cpu_develop if [ "$?" == "0" ];then - $pip_path install --user -i https://mirrors.aliyun.com/pypi/simple --trusted-host=mirrors.aliyun.com $wheel_cpu_develop + $python_path -m pip install ${use_virtualenv} -i https://mirrors.aliyun.com/pypi/simple --trusted-host=mirrors.aliyun.com $wheel_cpu_develop + if [ "$?" == 0 ];then + echo 安装成功 + else + echo 安装失败 + exit 1 + fi else - echo "paddlepaddle whl包下载失败" + echo paddlepaddle whl包下载失败 exit 1 fi fi @@ -575,95 +728,122 @@ gpu_list=( echo echo "Step 5. 检测pip版本" echo - checkLinuxPip + checkLinuxPython echo checkLinuxAVX + echo + echo "Step 6.是否使用Python的虚拟环境" + use_virtualenv="--user" + checkPythonVirtualenv echo "*********************2. 开始安装*****************************" PipLinuxInstall + if [ "$check_virtualenv" == 'y' ];then + echo "虚环境创建成功,请cd 进入${virtualenv_name}, 执行 source bin/activate 进入虚环境。退出虚环境执行 deactivate命令。 + 更多虚环境使用方法请参考virtualenv官网:https://virtualenv.pypa.io/en/latest/" + fi +} + +function clearMacPythonEnv(){ + python_version="" + python_brief_version="" + python_root="" } function checkMacPython2(){ while true do - read -p " - => 未能在常规路径下找到Python2,请使用ctrl+c命令退出安装程序,并使用brew或pypi.org下载安装Python2(注意Python版本不能低于2.7.15) - 如希望自定义Python路径,请输入路径:" python_root - echo python_version=`$python_root --version 2>&1 1>&1` - if [ $? == "0" ];then - : + if [[ $? == "0" ]];then + if [ "$python_version" == "" ] || [ "$python_root" == "/usr/bin/python" -a "$python_version" == "Python 2.7.10" ];then + clearMacPythonEnv + else + check_python=`echo $python_version | grep "Python 2"` + if [[ -n "$check_python" ]];then + while true + do + echo -e " => 在您的环境中找到 \033[32m[ $python_version ]\033[0m, 确认使用此版本请输入y;如您希望自定义Python路径请输入n。请在这里输入(y/n)并回车: " + read -p "" use_python + echo + use_python=`echo $use_python | tr 'A-Z' 'a-z'` + if [[ "$use_python" == "y" ]]||[[ "$use_python" == "" ]];then + use_python="y" + break + elif [[ "$use_python" == "n" ]];then + clearMacPythonEnv + break + else + red " 输入错误,请重新输入(y/n)" + fi + done + if [[ "$use_python" == "y" ]];then + return 0 + fi + else + red " 您输入Python的不是Python2" + clearMacPythonEnv + fi + fi else - python_version="" + clearMacPythonEnv + red " => 未能在常规路径下找到可用的Python2,请使用ctrl+c命令退出安装程序,并使用brew或pypi.org下载安装Python2(注意Python版本不能低于2.7.15)" + read -p " 如希望自定义Python路径,请输入路径 + 如果希望重新选择Python版本,请回车:" python_root + echo + if [[ "$python_root" == "" ]];then + python_V="" + clearMacPythonEnv + return 1 + fi fi - check_python=`echo $python_version | grep "Python 2"` - if [ "$python_version" == "" ] || [ "$python_root" == "/usr/bin/python" -a "$python_version" == "Python 2.7.10" ] ;then - python_version="" - elif [ -n "$check_python" ];then - while true - do - read -p " - => 在您的环境中找到 $python_version, 确认使用此版本请输入y;如您希望自定义Python路径请输入n。请在这里输入(y/n)并回车: " use_python - echo - use_python=`echo $use_python | tr 'A-Z' 'a-z'` - if [ "$use_python" == "y" ]||[ "$use_python" == "" ];then - use_python="y" - break - elif [ "$use_python" == "n" ];then - python_root="" - break - else - echo "输入错误,请重新输入(y/n)" - fi - done - if [ "$use_python" == "y" ];then - break - fi - else - echo "您输入Python的不是Python2" - python_version="" - fi done } function checkMacPython3(){ while true do - read -p " - => 未能在常规路径下找到Python3,请使用ctrl+c命令退出安装程序,并使用brew或pypi.org下载Python3 - 如希望自定义Python路径,请输入路径:" python_root - python_version=`$python_root --version 2>&1 1>&1` - if [ $? == "0" ];then - : + python_version=`$python_root --version 2>&1 1>&1` + if [[ $? == "0" ]];then + if [ "$python_version" == "" ] || [ "$python_root" == "/usr/bin/python" -a "$python_version" == "Python 2.7.10" ] ;then + clearMacPythonEnv + else + check_python=`echo $python_version | grep "Python 3"` + if [[ -n "$check_python" ]];then + while true + do + echo -e " => 在您的环境中找到 \033[32m[ $python_version ]\033[0m, 确认使用此版本请输入y;如您希望自定义Python路径请输入n。请在这里输入(y/n)并回车: " + read -p "" use_python + echo + use_python=`echo $use_python | tr 'A-Z' 'a-z'` + if [[ "$use_python" == "y" ]]||[[ "$use_python" == "" ]];then + use_python="y" + break + elif [[ "$use_python" == "n" ]];then + clearMacPythonEnv + break + else + red " 输入错误,请重新输入(y/n)" + fi + done + if [[ "$use_python" == "y" ]];then + return 0 + fi + else + red " 您输入Python的不是Python3" + clearMacPythonEnv + fi + fi else - python_version="" + clearMacPythonEnv + red " => 未能在常规路径下找到可用的Python3,请使用ctrl+c命令退出安装程序,并使用brew或pypi.org下载安装Python3(注意Python版本不能低于3.5.x)" + read -p " 如希望自定义Python路径,请输入路径 + 如果希望重新选择Python版本,请回车:" python_root + echo + if [[ "$python_root" == "" ]];then + python_V="" + clearMacPythonEnv + return 1 + fi fi - check_python=`echo $python_version | grep "Python 3"` - if [ "$python_version" == "" ] || [ "$python_root" == "/usr/bin/python" -a "$python_version" == "Python 2.7.10" ] ;then - python_version="" - elif [ -n "$check_python" ] ;then - while true - do - read -p " - => 在您的环境中找到 $python_version, 确认使用此版本请输入y;如您希望自定义Python路径请输入n。请在这里输入(y/n)并回车: " use_python - echo - use_python=`echo $use_python | tr 'A-Z' 'a-z'` - if [ "$use_python" == "y" ]||[ "$use_python" == "" ];then - use_python="y" - break - elif [ "$use_python" == "n" ];then - python_root="" - break - else - echo "输入错误,请重新输入(y/n)" - fi - done - if [ "$use_python" == "y" ];then - break - fi - else - echo "您输入Python的不是Python3" - python_version="" - fi done } @@ -672,145 +852,160 @@ function checkMacPaddleVersion(){ do read -n1 -p "Step 2. 选择PaddlePaddle的版本,请按回车键继续..." echo - read -p " - 1. 开发版:对应Github上develop分支,如您需要开发、或希望使用PaddlePaddle最新功能,请选用此版本 - 2. 稳定版(推荐):如您无特殊开发需求,建议使用此版本,目前最新的版本号为 ${release_version} - - => 请输入数字1或2。如输入其他字符或直接回车,将会默认选择【 2. 稳定版 】 。请在这里输入并回车:" paddle_version - if [ "$paddle_version" == "1" ]||[ "$paddle_version" == "2" ];then + yellow " 1. 开发版:对应Github上develop分支,如您需要开发、或希望使用PaddlePaddle最新功能,请选用此版本" + yellow " 2. 稳定版(推荐):如您无特殊开发需求,建议使用此版本,目前最新的版本号为 ${release_version}" + read -p " => 请输入数字1或2。如输入其他字符或直接回车,将会默认选择【 2. 稳定版 】 。请在这里输入并回车:" paddle_version + if [[ "$paddle_version" == "1" ]]||[[ "$paddle_version" == "2" ]];then echo - echo "您选择了数字【"$paddle_version" 】" + yellow " 您选择了数字【"$paddle_version" 】" echo break else paddle_version="2" echo - echo "您选择了数字【2】" + yellow " 您选择了数字【2】" echo break fi done } +function initCheckMacPython2(){ + echo + yellow " 您选择了Python "$python_V",正在寻找符合要求的Python 2版本" + echo + python_root=`which python2.7` + if [[ "$python_root" == "" ]];then + python_root=`which python` + fi + checkMacPython2 + if [[ "$?" == "1" ]];then + return 1 + else + return 0 + fi +} -function checkMacPythonVersion(){ - while true - do - read -n1 -p "Step 3. 选择Python版本,请按回车键继续..." - read -p " - 2. 使用python 2.x - 3. 使用python 3.x +function initCheckMacPython3(){ + echo + yellow " 您选择了Python "$python_V",正在寻找符合您要求的Python 2版本" + echo + python_root=`which python3` + checkMacPython3 + if [[ "$?" == "1" ]];then + return 1 + else + return 0 + fi +} - => 请输入数字2或3。如输入其他字符或直接回车,将会默认使用【Python 2 】。请在这里输入并回车:" python_V - echo - if [ "$python_V" == "" ];then - python_V="2" +function checkMacPip(){ + if [[ "$python_V" == "2" ]]||[[ "$python_V" == "3" ]];then + + python_brief_version=`$python_root -m pip -V |awk -F "[ |)]" '{print $6}'|sed 's#\.##g'` + if [[ ${python_brief_version} == "" ]];then + red "您输入的python:${python_root} 对应的pip不可用,请检查此pip或重新选择其他python" + echo + return 1 fi - echo "您选择了数字【"$python_V"】,正在寻找符合您要求的Python版本,请按回车键继续..." - echo - if [ "$python_V" == "2" ];then - python_root=`which python2.7` - if [ "$python_root" == "" ];then - python_root=`which python` - fi - python_version=`$python_root --version 2>&1 1>&1` - if [ $? == "0" ];then - : - else - python_version="" - fi - if [ "$python_root" == "" ]||[ "$python_root" == "/usr/bin/python" -a "$python_version" == "Python 2.7.10" ]||[ "$python_root" == "/usr/bin/python2.7" -a "$python_version" == "Python 2.7.10" ];then - checkMacPython2 - fi - while true - do - read -p " - => 在您的环境中找到 $python_version, 确认使用此版本请输入y;如您希望自定义Python路径请输入n。请在这里输入(y/n)并回车:" use_python - echo - use_python=`echo $use_python | tr 'A-Z' 'a-z'` - if [ "$use_python" == "y" ]||[ "$use_python" == "" ];then - break - elif [ "$use_python" == "n" ];then - python_root="" - checkMacPython2 - break + pip_version=`$python_root -m pip -V |awk -F '[ .]' '{print $2}'` + if [[ 9 -le ${pip_version} ]];then + : + else + red "您的pip版本过低,请安装pip 9.0.1及以上的版本" + echo + return 1 + fi + if [[ "$python_brief_version" == "" ]];then + clearMacPythonEnv + red "您的 $python_root 对应的pip存在问题,请按ctrl + c退出后重新安装pip,或切换其他python版本" + echo + return 1 + else + if [[ $python_brief_version == "27" ]];then + uncode=`python -c "import pip._internal;print(pip._internal.pep425tags.get_supported())"|grep "cp27"` + if [[ $uncode == "" ]];then + uncode="mu" else - echo "输入错误,请重新输入(y/n)" + uncode="m" fi - done - - elif [ "$python_V" == "3" ];then - python_root=`which python3` - python_version=`$python_root --version 2>&1 1>&1` - if [ $? == "0" ];then - : - else - python_version="" - fi - if [ "$python_root" == "" ]||[ "$python_root" == "/usr/bin/python" -a "$python_version" == "Python 2.7.10" ];then - checkMacPython3 - fi - while true - do - read -p " - => 在您的环境中找到 $python_version, 确认使用此版本请输入y;如您希望自定义Python路径请输入n。请在这里输入(y/n)并回车:" use_python + fi + version_list=`echo "${python_list[@]}" | grep "$python_brief_version" ` + if [[ "$version_list" != "" ]];then + return 0 + else + red "未找到可用的pip或pip3。PaddlePaddle目前支持:Python2.7/3.5/3.6/3.7及其对应的pip, 请重新输入,或使用ctrl + c退出" echo - use_python=`echo $use_python | tr 'A-Z' 'a-z'` - if [ "$use_python" == "y" ]||[ "$use_python" == "" ];then - break - elif [ "$use_python" == "n" ];then - checkMacPython3 - break - else - echo "输入错误,请重新输入(y/n)" - fi - done - else - : - fi + clearMacPythonEnv + return 1 + fi + fi + fi +} - if [ "$python_V" == "2" ]||[ "$python_V" == "3" ];then - python_brief_version=`$python_root -m pip -V |awk -F "[ |)]" '{print $6}'|sed 's#\.##g'` - if [[ $python_brief_version == "27" ]];then - uncode=`python -c "import pip._internal;print(pip._internal.pep425tags.get_supported())"|grep "cp27"` - if [[ $uncode == "" ]];then - uncode="mu" - else - uncode="m" - fi - fi - version_list=`echo "${python_list[@]}" | grep "$python_brief_version" ` - if [ "$version_list" != "" ];then - break +function checkMacPythonVersion(){ + while true + do + read -n1 -p "Step 3. 选择Python版本,请按回车键继续..." + echo + yellow " 2. 使用python 2.x" + yellow " 3. 使用python 3.x" + read -p " => 请输入数字2或3。如输入其他字符或直接回车,将会默认使用【Python 2 】。请在这里输入并回车:" python_V + if [[ "$python_V" == "" ]];then + python_V="2" + fi + if [[ "$python_V" == "2" ]];then + initCheckMacPython2 + if [[ "$?" == "0" ]];then + checkMacPip + if [[ "$?" == "0" ]];then + return 0 + else + : + fi else - echo "未找到可用的pip或pip3。PaddlePaddle目前支持:Python2.7/3.5/3.6/3.7及其对应的pip, 请重新输入,或使用ctrl + c退出" - fi - else - echo "输入错误,请重新输入" - fi + : + fi + elif [[ "$python_V" == "3" ]];then + initCheckMacPython3 + if [[ "$?" == "0" ]];then + checkMacPip + if [[ "$?" == "0" ]];then + return 0 + else + : + fi + else + : + fi + else + red "输入错误,请重新输入" + fi done } function checkMacAVX(){ read -n1 -p "Step 4. 检测您的Mac是否支持AVX指令集,请按回车键继续..." - echo if [[ $AVX != "" ]];then AVX="avx" - echo "检测结果:支持" + echo "" + green " 检测结果:支持" + echo "" + return 0 else - read -n1 -p "检测结果:不支持。非常抱歉,PaddlePaddle在Mac系统暂不提供no_avx类型的安装包,您可以选择在Linux系统中安装no_avx版的PaddlePaddle, 请按回车键退出..." - exit + red " 检测结果:不支持。非常抱歉,PaddlePaddle在Mac系统暂不提供no_avx类型的安装包,您可以选择在Linux系统中安装no_avx版的PaddlePaddle, 请按回车键退出..." + echo + return 1 fi - echo } function checkMacGPU(){ read -n1 -p "Step 5. 选择CPU/GPU版本,请按回车键继续..." echo if [[ $GPU != "" ]];then - echo "MacOS环境下,暂未提供GPU版本的PaddlePaddle安装包,将为您安装CPU版本的PaddlePaddle" + yellow " MacOS环境下,暂未提供GPU版本的PaddlePaddle安装包,将为您安装CPU版本的PaddlePaddle" else - echo "MacOS环境下,暂未提供GPU版本的PaddlePaddle安装包,将为您安装CPU版本的PaddlePaddle" + yellow " MacOS环境下,暂未提供GPU版本的PaddlePaddle安装包,将为您安装CPU版本的PaddlePaddle" GPU=cpu fi echo @@ -822,38 +1017,44 @@ function macos() { while true do + checkMacPaddleVersion + checkMacPythonVersion + checkMacAVX + checkMacGPU - echo "*********************2. 开始安装*****************************" + green "*********************2. 开始安装*****************************" echo - read -n1 -p "即将为您下载并安装PaddlePaddle,请按回车键继续..." + yellow "即将为您下载并安装PaddlePaddle,请按回车键继续..." + read -n1 -p "" echo if [[ $paddle_version == "2" ]];then $python_root -m pip install paddlepaddle - if [ $? == "0" ];then - echo "安装成功,可以使用: ${python_root} 来启动安装了PaddlePaddle的Python解释器" + if [[ $? == "0" ]];then + green "安装成功,可以使用: ${python_root} 来启动安装了PaddlePaddle的Python解释器" break else rm $whl_cpu_release - echo "未能正常安装PaddlePaddle,请尝试更换您输入的python路径,或者ctrl + c退出后请检查您使用的python对应的pip或pip源是否可用" + red "未能正常安装PaddlePaddle,请尝试更换您输入的python路径,或者ctrl + c退出后请检查您使用的python对应的pip或pip源是否可用" echo"" echo "==========================================================================================" echo"" exit 1 fi else - if [ -f $whl_cpu_develop ];then + if [[ -f $whl_cpu_develop ]];then $python_root -m pip install $whl_cpu_develop - if [ $? == "0" ];then + if [[ $? == "0" ]];then rm -rf $whl_cpu_develop - echo "安装成功!小提示:可以使用: ${python_root} 来启动安装了PaddlePaddle的Python解释器" + # TODO add install success check here + green "安装成功!小提示:可以使用: ${python_root} 来启动安装了PaddlePaddle的Python解释器" break else - echo "未能正常安装PaddlePaddle,请尝试更换您输入的python路径,或者ctrl + c退出后请检查您使用的python对应的pip或pip源是否可用" + red "未能正常安装PaddlePaddle,请尝试更换您输入的python路径,或者ctrl + c退出后请检查您使用的python对应的pip或pip源是否可用" echo"" echo "==========================================================================================" echo"" @@ -861,15 +1062,15 @@ function macos() { fi else wget ${path}$whl_cpu_develop -O $whl_cpu_develop - if [ $? == "0" ];then + if [[ $? == "0" ]];then $python_root -m pip install $whl_cpu_develop - if [ $? == "0" ];then + if [[ $? == "0" ]];then rm $wheel_cpu_develop - echo "安装成功,可以使用: ${python_root} 来启动安装了PaddlePaddle的Python解释器" + green "安装成功,可以使用: ${python_root} 来启动安装了PaddlePaddle的Python解释器" break else rm $whl_cpu_release - echo "未能正常安装PaddlePaddle,请尝试更换您输入的python路径,或者ctrl + c退出后请检查您使用的python对应的pip或pip源是否可用" + red "未能正常安装PaddlePaddle,请尝试更换您输入的python路径,或者ctrl + c退出后请检查您使用的python对应的pip或pip源是否可用" echo"" echo "==========================================================================================" echo"" @@ -877,7 +1078,7 @@ function macos() { fi else rm $whl_cpu_develop - echo "未能正常安装PaddlePaddle,请检查您的网络 或者确认您是否安装有 wget,或者ctrl + c退出后反馈至https://github.com/PaddlePaddle/Paddle/issues" + red "未能正常安装PaddlePaddle,请检查您的网络 或者确认您是否安装有 wget,或者ctrl + c退出后反馈至https://github.com/PaddlePaddle/Paddle/issues" echo"" echo "==========================================================================================" echo"" @@ -890,33 +1091,35 @@ function macos() { function main() { echo "*********************************" - echo "欢迎使用PaddlePaddle快速安装脚本" + green "欢迎使用PaddlePaddle快速安装脚本" echo "*********************************" echo - echo "如果您在安装过程中遇到任何问题,请在https://github.com/PaddlePaddle/Paddle/issues反馈,我们的工作人员将会帮您答疑解惑" + yellow "如果您在安装过程中遇到任何问题,请在https://github.com/PaddlePaddle/Paddle/issues反馈,我们的工作人员将会帮您答疑解惑" echo - echo "本安装包将帮助您在Linux或Mac系统下安装PaddlePaddle,包括 1)安装前的准备和 2)开始安装 两部分" + echo "本安装包将帮助您在Linux或Mac系统下安装PaddlePaddle,包括" + yellow "1)安装前的准备" + yellow "2)开始安装" echo read -n1 -p "请按回车键进行下一步..." echo echo - echo "*********************1. 安装前的准备*****************************" + green "*********************1. 安装前的准备*****************************" echo echo "Step 1. 正在检测您的操作系统信息..." echo SYSTEM=`uname -s` - if [ "$SYSTEM" == "Darwin" ];then - echo "您的系统为:MAC OSX" + if [[ "$SYSTEM" == "Darwin" ]];then + yellow " 您的系统为:MAC OSX" echo macos else - echo "您的系统为:Linux" + yellow " 您的系统为:Linux" echo OS=`cat /etc/issue|awk 'NR==1 {print $1}'` - if [ $OS == "\S" ] || [ "$OS" == "CentOS" ] || [ $OS == "Ubuntu" ];then + if [[ $OS == "\S" ]] || [[ "$OS" == "CentOS" ]] || [[ $OS == "Ubuntu" ]];then linux else - echo "您的系统不在本安装包的支持范围,如您需要在windows环境下安装PaddlePaddle,请您参考PaddlePaddle官网的windows安装文档" + red "您的系统不在本安装包的支持范围,如您需要在windows环境下安装PaddlePaddle,请您参考PaddlePaddle官网的windows安装文档" fi fi } diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 1135caf4f8c32901d93270d372fdaac702acf006..9899eee8841147a509b7997fd905a1b68bc098da 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -87,7 +87,7 @@ function cmake_gen() { PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.5/bin/python3 -DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.5/include/python3.5m/ -DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.5/lib/libpython3.5m.dylib" - WITH_FLUID_ONLY=${WITH_FLUID_ONLY:-ON} + pip3.5 uninstall -y protobuf pip3.5 install --user -r ${PADDLE_ROOT}/python/requirements.txt else exit 1 @@ -100,7 +100,7 @@ function cmake_gen() { PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.6/bin/python3 -DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.6/include/python3.6m/ -DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.6/lib/libpython3.6m.dylib" - WITH_FLUID_ONLY=${WITH_FLUID_ONLY:-ON} + pip3.6 uninstall -y protobuf pip3.6 install --user -r ${PADDLE_ROOT}/python/requirements.txt else exit 1 @@ -113,7 +113,7 @@ function cmake_gen() { PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.7/bin/python3 -DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.7/include/python3.7m/ -DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.7/lib/libpython3.7m.dylib" - WITH_FLUID_ONLY=${WITH_FLUID_ONLY:-ON} + pip3.7 uninstall -y protobuf pip3.7 install --user -r ${PADDLE_ROOT}/python/requirements.txt else exit 1 @@ -128,31 +128,44 @@ function cmake_gen() { PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27m/bin/python -DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27m/include/python2.7 -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs2/lib/libpython2.7.so" + pip uninstall -y protobuf + pip install -r ${PADDLE_ROOT}/python/requirements.txt elif [ "$1" == "cp27-cp27mu" ]; then export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs4/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs2/lib:} export PATH=/opt/python/cp27-cp27mu/bin/:${PATH} PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27mu/bin/python -DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27mu/include/python2.7 -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs4/lib/libpython2.7.so" + pip uninstall -y protobuf + pip install -r ${PADDLE_ROOT}/python/requirements.txt elif [ "$1" == "cp35-cp35m" ]; then export LD_LIBRARY_PATH=/opt/_internal/cpython-3.5.1/lib/:${LD_LIBRARY_PATH} export PATH=/opt/_internal/cpython-3.5.1/bin/:${PATH} export PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/_internal/cpython-3.5.1/bin/python3 -DPYTHON_INCLUDE_DIR:PATH=/opt/_internal/cpython-3.5.1/include/python3.5m -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-3.5.1/lib/libpython3.so" + pip3.5 uninstall -y protobuf + pip3.5 install -r ${PADDLE_ROOT}/python/requirements.txt elif [ "$1" == "cp36-cp36m" ]; then export LD_LIBRARY_PATH=/opt/_internal/cpython-3.6.0/lib/:${LD_LIBRARY_PATH} export PATH=/opt/_internal/cpython-3.6.0/bin/:${PATH} export PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/_internal/cpython-3.6.0/bin/python3 -DPYTHON_INCLUDE_DIR:PATH=/opt/_internal/cpython-3.6.0/include/python3.6m -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-3.6.0/lib/libpython3.so" + pip3.6 uninstall -y protobuf + pip3.6 install -r ${PADDLE_ROOT}/python/requirements.txt elif [ "$1" == "cp37-cp37m" ]; then export LD_LIBRARY_PATH=/opt/_internal/cpython-3.7.0/lib/:${LD_LIBRARY_PATH} export PATH=/opt/_internal/cpython-3.7.0/bin/:${PATH} export PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/_internal/cpython-3.7.0/bin/python3.7 -DPYTHON_INCLUDE_DIR:PATH=/opt/_internal/cpython-3.7.0/include/python3.7m -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-3.7.0/lib/libpython3.so" + pip3.7 uninstall -y protobuf + pip3.7 install -r ${PADDLE_ROOT}/python/requirements.txt fi + else + pip uninstall -y protobuf + pip install -r ${PADDLE_ROOT}/python/requirements.txt fi fi @@ -186,7 +199,6 @@ function cmake_gen() { -DWITH_TESTING=${WITH_TESTING:-ON} -DCMAKE_MODULE_PATH=/opt/rocm/hip/cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON - -DWITH_FLUID_ONLY=${WITH_FLUID_ONLY:-OFF} -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DWITH_CONTRIB=${WITH_CONTRIB:-ON} -DWITH_INFERENCE_API_TEST=${WITH_INFERENCE_API_TEST:-ON} @@ -219,7 +231,6 @@ EOF -DCUDNN_ROOT=/usr/ \ -DWITH_TESTING=${WITH_TESTING:-ON} \ -DCMAKE_MODULE_PATH=/opt/rocm/hip/cmake \ - -DWITH_FLUID_ONLY=${WITH_FLUID_ONLY:-OFF} \ -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ -DWITH_CONTRIB=${WITH_CONTRIB:-ON} \ -DWITH_INFERENCE_API_TEST=${WITH_INFERENCE_API_TEST:-ON} \ @@ -248,6 +259,7 @@ function check_style() { eval "$(GIMME_GO_VERSION=1.8.3 gimme)" fi + pip install cpplint # set up go environment for running gometalinter mkdir -p $GOPATH/src/github.com/PaddlePaddle/ ln -sf ${PADDLE_ROOT} $GOPATH/src/github.com/PaddlePaddle/Paddle @@ -382,9 +394,7 @@ EOF pip3.7 install --user ${INSTALL_PREFIX:-/paddle/build}/opt/paddle/share/wheels/*.whl fi - if [[ ${WITH_FLUID_ONLY:-OFF} == "OFF" ]] ; then - paddle version - fi + paddle version if [ "$1" == "cp27-cp27m" ]; then pip uninstall -y paddlepaddle @@ -405,10 +415,11 @@ function assert_api_not_changed() { source .env/bin/activate pip install ${PADDLE_ROOT}/build/python/dist/*whl python ${PADDLE_ROOT}/tools/print_signatures.py paddle.fluid,paddle.reader > new.spec + if [ "$1" == "cp35-cp35m" ] || [ "$1" == "cp36-cp36m" ] || [ "$1" == "cp37-cp37m" ]; then # Use sed to make python2 and python3 sepc keeps the same sed -i 's/arg0: str/arg0: unicode/g' new.spec - sed -i "s/\(.*Transpiler.*\).__init__ ArgSpec(args=\['self'].*/\1.__init__ /g" new.spec + sed -i "s/\(.*Transpiler.*\).__init__ (ArgSpec(args=\['self'].*/\1.__init__ /g" new.spec fi # ComposeNotAligned has significant difference between py2 and py3 sed -i '/.*ComposeNotAligned.*/d' new.spec @@ -422,8 +433,8 @@ function assert_api_spec_approvals() { BRANCH="develop" fi - API_FILES=("cmake/external" - "paddle/fluid/API.spec" + API_FILES=("paddle/fluid/API.spec" + "python/paddle/fluid/parallel_executor.py" "paddle/fluid/framework/operator.h" "paddle/fluid/framework/tensor.h" "paddle/fluid/framework/lod_tensor.h" @@ -435,18 +446,28 @@ function assert_api_spec_approvals() { "paddle/fluid/framework/ir/node.h" "paddle/fluid/framework/ir/graph.h" "paddle/fluid/framework/framework.proto" + "python/paddle/fluid/compiler.py" "paddle/fluid/operators/distributed/send_recv.proto.in") for API_FILE in ${API_FILES[*]}; do API_CHANGE=`git diff --name-only upstream/$BRANCH | grep "${API_FILE}" || true` echo "checking ${API_FILE} change, PR: ${GIT_PR_ID}, changes: ${API_CHANGE}" if [ ${API_CHANGE} ] && [ "${GIT_PR_ID}" != "" ]; then # NOTE: per_page=10000 should be ok for all cases, a PR review > 10000 is not human readable. - APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \ - python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 2887803` + if [ "$API_FILE" == "paddle/fluid/API.spec" ];then + APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \ + python ${PADDLE_ROOT}/tools/check_pr_approval.py 2 2887803 35982308` + else + APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \ + python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 2887803` + fi echo "current pr ${GIT_PR_ID} got approvals: ${APPROVALS}" if [ "${APPROVALS}" == "FALSE" ]; then + if [ "$API_FILE" == "paddle/fluid/API.spec" ];then + echo "You must have panyx0718 and shanyi15 approval for the api change! ${API_FILE}" + else echo "You must have panyx0718 approval for the api change! ${API_FILE}" - exit 1 + fi + exit 1 fi fi done @@ -461,19 +482,6 @@ function assert_api_spec_approvals() { exit 1 fi fi - - pip install ${PADDLE_ROOT}/build/opt/paddle/share/wheels/*.whl - CHECK_DOCK_MD5=`python ${PADDLE_ROOT}/tools/check_doc_approval.py` - if [ "True" != ${CHECK_DOCK_MD5} ]; then - APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \ - python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 35982308` - echo "current pr ${GIT_PR_ID} got approvals: ${APPROVALS}" - if [ "${APPROVALS}" == "FALSE" ]; then - echo "You must have shanyi15 approval for the api doc change! " - exit 1 - fi - echo ${CHECK_DOCK_MD5} >/root/.cache/doc_md5.txt - fi } @@ -539,7 +547,6 @@ EOF -DCMAKE_BUILD_TYPE=Release \ -DWITH_GPU=OFF \ -DWITH_MKL=OFF \ - -DWITH_FLUID_ONLY=ON local LIB_TYPE=$1 case $LIB_TYPE in @@ -615,13 +622,8 @@ EOF NCCL_DEPS="true" fi - if [[ ${WITH_FLUID_ONLY:-OFF} == "OFF" ]]; then - PADDLE_VERSION="paddle version" - CMD='"paddle", "version"' - else - PADDLE_VERSION="true" - CMD='"true"' - fi + PADDLE_VERSION="paddle version" + CMD='"paddle", "version"' if [ "$1" == "cp35-cp35m" ]; then cat >> ${PADDLE_ROOT}/build/Dockerfile <> ${PADDLE_ROOT}/build/Dockerfile <> ${PADDLE_ROOT}/build/Dockerfile < 1 and \ + self._program._trainers_endpoints: + tps = self._program._trainers_endpoints - if self._build_strategy.num_trainers > 1 and trainers_endpoints: assert self._build_strategy.num_trainers == len( - trainers_endpoints), "num_trainers == len(end_points)" - self._build_strategy.trainers_endpoints = trainers_endpoints - - self._persistable_vars = set([ - cpt.to_text(v.name) - for v in [ - var for var in self._program.list_vars() - if var.persistable and var.type != core.VarDesc.VarType.RAW - ] - ]) + tps), "num_trainers == len(end_points)" + self._build_strategy.trainers_endpoints = tps + + self._persistable_vars = [] + for block_id in range(self._program_desc.num_blocks()): + bdesc = self._program_desc.block(block_id) + self._persistable_vars.extend([ + cpt.to_text(v.name()) for v in bdesc.all_vars() + if v.persistable() and v.type() != core.VarDesc.VarType.RAW + ]) places = list(map(_place_obj, self._places)) - return core.ParallelExecutor( - places, self._persistable_vars, self._program.desc, - cpt.to_text(self._loss_name) - if self._loss_name else six.u(''), self._scope, self._local_scopes, - self._exec_strategy, self._build_strategy) + + return core.ParallelExecutor(places, + set(self._persistable_vars), + cpt.to_text(self._loss_name) + if self._loss_name else six.u(''), scope, + self._local_scopes, self._exec_strategy, + self._build_strategy, self._graph) def _compile_inference(self): - assert self._is_data_parallel is False return core.create_paddle_predictor(self._infer_config) def _compile(self, scope, place): @@ -217,7 +257,7 @@ class CompiledProgram(object): if self._compiled: if scope and self._scope != scope: raise ValueError("Cannot compile with different scope") - if place and self._place != place: + if place and not self._place._equals(place): raise ValueError("Cannot compile with different place") return self self._compiled = True @@ -225,7 +265,9 @@ class CompiledProgram(object): self._scope = scope self._place = place if self._is_data_parallel: - self._executor = self._compile_data_parallel() + self._executor = self._compile_data_parallel( + use_cuda=isinstance(self._place, core.CUDAPlace), + scope=self._scope) elif self._is_inference: self._executor = self._compile_inference() else: diff --git a/python/paddle/fluid/contrib/int8_inference/README.md b/python/paddle/fluid/contrib/int8_inference/README.md index a9691dad4494f5eacf427b2806b2393baa57dc1e..460ae393f158ae320c93601365a68b8cfe2ba50e 100644 --- a/python/paddle/fluid/contrib/int8_inference/README.md +++ b/python/paddle/fluid/contrib/int8_inference/README.md @@ -63,10 +63,10 @@ Notes: ## 4. How to reproduce the results * Small dataset ```bash -python python/paddle/fluid/contrib/tests/test_calibration.py +FLAGS_use_mkldnn=true python python/paddle/fluid/contrib/tests/test_calibration.py ``` * Full dataset ```bash -DATASET=full python python/paddle/fluid/contrib/tests/test_calibration.py +FLAGS_use_mkldnn=true DATASET=full python python/paddle/fluid/contrib/tests/test_calibration.py ``` diff --git a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py index 266a106bc507104c0a8db1c882b55ac59e88195e..622add48430c63a0c4293457127a49dd8d851e35 100644 --- a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py +++ b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py @@ -13,20 +13,27 @@ # limitations under the License. import collections +import numpy as np +import six +from ..... import compat as cpt from .... import core +from .... import Executor from ....framework import IrGraph +from ....framework import IrNode from ....framework import Program -from ....framework import Variable from ....initializer import Constant from .... import unique_name -__all__ = ['QuantizationTransformPass'] +__all__ = [ + 'QuantizationTransformPass', 'QuantizationFreezePass', 'ConvertToInt8Pass', + 'TransformForMobilePass' +] class QuantizationTransformPass(object): def __init__(self, scope=None, - program_exe=None, + place=None, weight_bits=8, activation_bits=8, activation_quantize_type='abs_max', @@ -35,7 +42,13 @@ class QuantizationTransformPass(object): """ Convert and rewrite the IrGraph according to weight and activation quantization type. + Args: + scope(fluid.Scope): When activation use 'range_abs_max' as the quantize + type, this pass will create some new parameters. The scope is used to + initialize these new parameters. + place(fluid.CPUPlace|fluid.CUDAPlace): place is used to initialize new + parameters described above. weight_bits (int): quantization bit number for weights, the bias is not quantized. activation_bits (int): quantization bit number for activation. @@ -49,6 +62,7 @@ class QuantizationTransformPass(object): support 'abs_max'. The 'range_abs_max' usually is not used for weight, since weights are fixed once the model is well trained. window_size (int): the window size for 'range_abs_max' quantization. + Examples: .. code-block:: python # The original graph will be rewrite. @@ -59,13 +73,13 @@ class QuantizationTransformPass(object): from paddle.fluid import core graph = IrGraph(core.Graph(program.desc), for_test=False) - exe = fluid.Executor(fluid.CPUPlace()) + place = fluid.CPUPlace() transform_pass = QuantizationTransformPass(fluid.global_scope(), - exe) + place) transform_pass.apply(graph) """ self._scope = scope - self._program_exe = program_exe + self._place = place self._weight_bits = weight_bits self._activation_bits = activation_bits @@ -88,31 +102,35 @@ class QuantizationTransformPass(object): self._quantizable_grad_ops = [ '%s_grad' % (op) for op in self._quantizable_ops ] - self._fake_quant_op_types = [ - 'fake_quantize_abs_max', 'fake_quantize_range_abs_max' - ] - self._fake_dequant_op_types = ['fake_dequantize_max_abs'] self._is_test = None self._global_step = None def apply(self, graph): + """ + Quantize the graph for training process. According to weight and + activation quantization type, the graph will be added some fake + quantize operators and fake dequantize operators. + + Args: + graph(IrGraph): the applied graph. + """ assert isinstance(graph, IrGraph), 'graph must be the instance of IrGraph.' self._need_initialized.clear() self._is_test = graph.is_test() # marked the variable which has been dequantized. dequantized_vars = collections.OrderedDict() - params = [p.name() for p in graph.all_parameters()] + persistable_vars = [p.name() for p in graph.all_persistable_nodes()] def _transform_forward(graph, op): for var_node in op.inputs: if var_node.name() in dequantized_vars: dequant_var_node = dequantized_vars[var_node.name()] else: - quant_bits = self._weight_bits if var_node.name() in params \ + quant_bits = self._weight_bits if var_node.name() in persistable_vars \ else self._activation_bits quant_type = self._weight_quantize_type if var_node.name() \ - in params else self._activation_quantize_type + in persistable_vars else self._activation_quantize_type quant_var_node, scale_var_node = self._insert_quant_op( graph, var_node, quant_bits, quant_type) dequant_var_node = self._insert_dequant_op( @@ -133,7 +151,7 @@ class QuantizationTransformPass(object): if not self._is_test: self._create_global_step(graph) - ops = graph.all_ops() + ops = graph.all_op_nodes() # The process of _transform_forward and _transform_backward is needed in two for loops. # The loop for transforming the forward graph: for op in ops: @@ -147,26 +165,32 @@ class QuantizationTransformPass(object): if len(self._need_initialized) > 0: assert self._scope is not None, \ 'The scope cannot be set None when activation_quantize_type equals to range_abs_max.' - assert self._program_exe is not None, \ - 'The program_exe cannot be set None when activation_quantize_type equals to range_abs_max.' + assert self._place is not None, \ + 'The place cannot be set None when activation_quantize_type equals to range_abs_max.' init_program = Program() - for var_desc, initializer in self._need_initialized.iteritems(): - var = Variable(init_program.global_block()) - var._set_desc(var_desc) + for var_desc, initializer in six.iteritems(self._need_initialized): + var = init_program.global_block().create_var( + name=var_desc.name(), + shape=var_desc.shape(), + dtype=var_desc.dtype(), + type=var_desc.type(), + lod_level=var_desc.lod_level(), + persistable=var_desc.persistable()) initializer(var, init_program.global_block()) - self._program_exe.run(program=init_program, scope=self._scope) + exe = Executor(self._place) + exe.run(program=init_program, scope=self._scope) return graph def _create_global_step(self, graph): if self._weight_quantize_type == 'range_abs_max' or \ self._activation_quantize_type == 'range_abs_max': - counter_name = '@STEP_COUNTER@' - for node in graph.all_vars(): + counter_name = cpt.to_text('@STEP_COUNTER@') + for node in graph.all_var_nodes(): if node.name() == counter_name: self._global_step = node if self._global_step is None: - global_step_in = graph.create_param_node( + global_step_in = graph.create_persistable_node( name=counter_name, var_type=core.VarDesc.VarType.LOD_TENSOR, shape=[1], @@ -175,9 +199,14 @@ class QuantizationTransformPass(object): Constant(value=0, force_cpu=True) global_step_out = graph.create_var_node_from_desc( global_step_in.var()) + # The attribute of `op_role` is needed by ParallelExecutor. increment_op = graph.create_op_node( op_type='increment', - attrs={'step': 1.0}, + attrs={ + 'step': 1.0, + 'op_role': + core.op_proto_and_checker_maker.OpRole.Forward + }, inputs={'X': global_step_in}, outputs={'Out': global_step_out}) graph.link_to(global_step_in, increment_op) @@ -202,17 +231,20 @@ class QuantizationTransformPass(object): quant_var_node = graph.create_var_node( name=self._quantized_var_name(var_node.name()), - var_type=var_node.var().type(), - shape=var_node.var().shape(), - var_dtype=var_node.var().dtype()) + var_type=var_node.type(), + shape=var_node.shape(), + var_dtype=var_node.dtype()) scale_var_node = graph.create_var_node( name=self._quantized_scale_name(var_node.name()), - var_type=var_node.var().type(), - shape=var_node.var().shape(), - var_dtype=var_node.var().dtype()) + var_type=var_node.type(), + shape=var_node.shape(), + var_dtype=var_node.dtype()) quant_op_node = graph.create_op_node( op_type='fake_quantize_abs_max', - attrs={'bit_length': quant_bits}, + attrs={ + 'bit_length': quant_bits, + 'op_role': core.op_proto_and_checker_maker.OpRole.Forward + }, inputs={'X': var_node}, outputs={'Out': quant_var_node, 'OutScale': scale_var_node}) @@ -229,15 +261,15 @@ class QuantizationTransformPass(object): quant_var_node = graph.create_var_node( name=self._quantized_var_name(var_node.name()), - var_type=var_node.var().type(), - shape=var_node.var().shape(), - var_dtype=var_node.var().dtype()) + var_type=var_node.type(), + shape=var_node.shape(), + var_dtype=var_node.dtype()) - scale_in_node = graph.create_param_node( + scale_in_node = graph.create_persistable_node( name=self._quantized_scale_name(var_node.name()), var_type=core.VarDesc.VarType.LOD_TENSOR, shape=[1], - var_dtype=var_node.var().dtype()) + var_dtype=var_node.dtype()) self._need_initialized[scale_in_node.var()] = Constant(value=0.001) scale_out_node = graph.create_var_node_from_desc(scale_in_node.var()) @@ -246,18 +278,19 @@ class QuantizationTransformPass(object): if not self._is_test: # The name of scales_var_node maybe 'scales_0', 'scales_1', etc. - scales_node = graph.create_param_node( + scales_node = graph.create_persistable_node( name=unique_name.generate('scales'), var_type=core.VarDesc.VarType.LOD_TENSOR, shape=[self._window_size], - var_dtype=var_node.var().dtype()) + var_dtype=var_node.dtype()) self._need_initialized[scales_node.var()] = Constant(value=0) inputs['Iter'] = self._global_step outputs['OutScales'] = scales_node attrs = { 'window_size': self._window_size, 'bit_length': quant_bits, - 'is_test': self._is_test + 'is_test': self._is_test, + 'op_role': core.op_proto_and_checker_maker.OpRole.Forward } quant_op_node = graph.create_op_node( op_type='fake_quantize_range_abs_max', @@ -284,13 +317,16 @@ class QuantizationTransformPass(object): dequant_var_node = graph.create_var_node( name=self._dequantized_var_name(var_node.name()), - var_type=var_node.var().type(), - shape=var_node.var().shape(), - var_dtype=var_node.var().dtype()) + var_type=var_node.type(), + shape=var_node.shape(), + var_dtype=var_node.dtype()) max_range = (1 << (quant_bits - 1)) - 1 dequant_op_node = graph.create_op_node( op_type='fake_dequantize_max_abs', - attrs={'max_range': float(max_range)}, + attrs={ + 'max_range': float(max_range), + 'op_role': core.op_proto_and_checker_maker.OpRole.Forward + }, inputs={'X': var_node, 'Scale': scale_var_node}, outputs={'Out': dequant_var_node}) @@ -316,3 +352,340 @@ class QuantizationTransformPass(object): Return the scale name of quantized variable for the input `var_name`. """ return "%s.scale" % (var_name) + + +class QuantizationFreezePass(object): + """ + The freeze pass is used to adjust the quantize operator order, for example: + 1) `activation -> quant -> dequant -> conv2d` will be freezed into + `activation -> quant -> conv2d -> dequant` + 2) `weight -> quant -> dequant -> conv2d` will be freezed into `weight -> conv2d`, + and weight will be sacled offline. + + Args: + scope(fluid.Scope): scope is used to get the weight tensor values. + place(fluid.CPUPlace|fluid.CUDAPlace): place is used to restore the weight tensors. + weight_bits (int): quantization bit number for weights. + activation_bits (int): quantization bit number for activation. + weight_quantize_type (str): quantization type for weights, support 'abs_max'. + The 'range_abs_max' usually is not used for weight, since weights are fixed once the + model is well trained. + """ + + def __init__(self, + scope, + place, + weight_bits=8, + activation_bits=8, + weight_quantize_type='abs_max'): + assert scope is not None, \ + 'The scope cannot be set None.' + assert place is not None, \ + 'The place cannot be set None.' + self._scope = scope + self._place = place + self._weight_bits = weight_bits + self._activation_bits = activation_bits + self._weight_quantize_type = weight_quantize_type + self._quantizable_ops = ['conv2d', 'depthwise_conv2d', 'mul'] + self._fake_quant_op_names = [ + 'fake_quantize_abs_max', 'fake_quantize_range_abs_max' + ] + self._fake_dequant_op_names = ['fake_dequantize_max_abs'] + self._op_input_rename_map = collections.OrderedDict() + self._op_output_rename_map = collections.OrderedDict() + self._var_scale_map = collections.OrderedDict() + + def apply(self, graph): + """ + Adjust quantize/dequantize operators order for the inference process. + + Args: + graph(IrGraph): the applied graph. + """ + persistable_vars = [p.name() for p in graph.all_persistable_nodes()] + ops = graph.all_op_nodes() + for op_node in ops: + op_name = op_node.name() + if op_name in self._fake_quant_op_names: + input_arg_name = op_node.input('X')[0] + if input_arg_name in persistable_vars: + if self._weight_quantize_type == 'abs_max': + param = self._load_var(input_arg_name) + scale_v = np.max(np.abs(param)) + else: + scale_v = self._load_var( + op_node.output('OutScale')[0])[0] + self._var_scale_map[input_arg_name] = scale_v + else: + scale_v = graph.var_node(op_node.output('OutScale')[0]) + self._var_scale_map[input_arg_name] = scale_v + if input_arg_name in persistable_vars: + self._remove_fake_quant_and_dequant_op(graph, op_node) + # quantize weight and restore + param_v = self._load_var(input_arg_name) + quantized_param_v = self._quant(param_v, scale_v, + self._weight_bits) + self._restore_var(input_arg_name, quantized_param_v) + + ops = graph.all_op_nodes() + for op_node in ops: + op_name = op_node.name() + if op_name in self._fake_dequant_op_names: + self._remove_fake_quant_and_dequant_op(graph, op_node) + + ops = graph.all_op_nodes() + for op_node in ops: + op_name = op_node.name() + if op_name in self._quantizable_ops: + self._insert_post_dequant_op(graph, op_node) + + for op_node in ops: + # insert dequant_op after fc/conv, need to rename inputs of the followed ops + for var_node in op_node.inputs: + name = var_node.name() + if name in self._op_output_rename_map: + old_in = graph.var_node(name) + new_in = self._op_output_rename_map[name] + graph.update_input_link(old_in, new_in, op_node) + + # remove the unused var node in the graph + self._remove_unused_var_nodes(graph) + return graph + + def _remove_fake_quant_and_dequant_op(self, graph, op_node): + k = op_node.output('Out')[0] + v = op_node.input('X')[0] + if v not in self._op_input_rename_map: + self._op_input_rename_map[k] = v + else: + self._op_input_rename_map[k] = self._op_input_rename_map[v] + graph.safe_remove_nodes(op_node) + + def _insert_post_dequant_op(self, graph, op_node): + max_range = None + scale_var_node = None + persistable_vars = [p.name() for p in graph.all_persistable_nodes()] + for var_node in op_node.inputs: + name = var_node.name() + if name in self._op_input_rename_map: + old_in = graph.var_node(name) + new_in = graph.var_node(self._op_input_rename_map[name]) + new_in.clear_outputs() + graph.update_input_link(old_in, new_in, op_node) + original_var_name = self._original_var_name(name) + scale_v = self._var_scale_map[original_var_name] + if original_var_name in persistable_vars: + param_range = (1 << (self._weight_bits - 1)) - 1 + act_range = (1 << (self._activation_bits - 1)) - 1 + assert self._is_float( + scale_v), 'The scale of parameter %s is not a float.' % ( + original_var_name) + max_range = param_range * act_range / scale_v + else: + assert isinstance(scale_v, IrNode) + scale_var_node = self._var_scale_map[original_var_name] + + if len(op_node.outputs) != 1: + raise ValueError("Only support one output, but op %s has" + " more than one output." % (op_node.name())) + + output_var_node = op_node.outputs[0] + dequant_var_node = graph.create_var_node( + name=self._dequantized_var_name(output_var_node.name()), + var_type=output_var_node.type(), + shape=output_var_node.shape(), + var_dtype=output_var_node.dtype()) + dequant_op_node = graph.create_op_node( + op_type='fake_dequantize_max_abs', + attrs={ + 'max_range': float(max_range), + 'op_role': core.op_proto_and_checker_maker.OpRole.Forward + }, + inputs={'X': output_var_node, + 'Scale': scale_var_node}, + outputs={'Out': dequant_var_node}) + graph.link_to(output_var_node, dequant_op_node) + graph.link_to(scale_var_node, dequant_op_node) + graph.link_to(dequant_op_node, dequant_var_node) + self._op_output_rename_map[output_var_node.name()] = dequant_var_node + return dequant_var_node + + def _load_var(self, name): + return np.array(self._scope.find_var(name).get_tensor()) + + def _restore_var(self, name, array): + tensor = self._scope.find_var(name).get_tensor() + tensor.set(array, self._place) + + def _remove_unused_var_nodes(self, graph): + all_used_vars = set() + ops = graph.all_op_nodes() + for op_node in ops: + for input_node in op_node.inputs: + all_used_vars.add(input_node) + for output_node in op_node.outputs: + all_used_vars.add(output_node) + + all_used_vars = {n.node for n in all_used_vars} + all_unused_vars = { + n + for n in filter(lambda node: node.node not in all_used_vars, + graph.all_var_nodes()) + } + graph.safe_remove_nodes(all_unused_vars) + + def _original_var_name(self, var_name): + """ + Return the original variable name. + """ + if var_name.endswith('.quantized.dequantized'): + return var_name[:-len('.quantized.dequantized')] + if var_name.endswith('.quantized'): + return var_name[:-len('.quantized')] + if var_name.endswith('.dequantized'): + return var_name[:-len('.dequantized')] + if var_name.endswith('.scale'): + return var_name[:-len('.scale')] + else: + return var_name + + def _dequantized_var_name(self, var_name): + """ + Return dequantized variable name for the input `var_name`. + """ + return "%s.dequantized" % (var_name) + + def _is_float(self, v): + return isinstance(v, float) or isinstance(v, np.float32) \ + or isinstance(v, np.float64) + + def _quant(self, x, scale, num_bits): + return np.round(x / scale * ((1 << (num_bits - 1)) - 1)) + + +class ConvertToInt8Pass(object): + """ + Convert the weights into int8_t type. + + Args: + scope(fluid.Scope): scope is used to get the weight tensor values. + place(fluid.CPUPlace|fluid.CUDAPlace): place is used to restore the + 8bits weight tensors. + """ + + def __init__(self, scope, place): + assert scope is not None, \ + 'The scope cannot be set None.' + assert place is not None, \ + 'The place cannot be set None.' + self._scope = scope + self._place = place + self._quantizable_ops = ['conv2d', 'depthwise_conv2d', 'mul'] + + def apply(self, graph): + """ + Convert weights' tpye of the graph. After that, the data type of the + graph weigths is int8_t. + + Args: + graph(IrGraph): the applied graph. + """ + persistable_vars = [p.name() for p in graph.all_persistable_nodes()] + ops = graph.all_op_nodes() + input_map = {} + for op_node in ops: + op_name = op_node.name() + if op_name in self._quantizable_ops: + for var_node in op_node.inputs: + name = var_node.name() + if name in persistable_vars: + if name not in input_map: + int8_var_node = self._convert_to_int8(graph, + var_node) + input_map[name] = int8_var_node + graph.update_input_link(var_node, input_map[name], + op_node) + + # remove the unused var node in the graph + self._remove_unused_var_nodes(graph) + return graph + + def _convert_to_int8(self, graph, var_node): + int8_var_node_name = var_node.name() + ".int8" + int8_var_node = graph.create_persistable_node( + name=cpt.to_text(int8_var_node_name), + var_type=var_node.type(), + shape=var_node.shape(), + var_dtype=core.VarDesc.VarType.INT8) + array = self._load_var(var_node.name()) + self._scope.var(int8_var_node_name) + self._store_var(int8_var_node_name, array, np.int8) + return int8_var_node + + def _load_var(self, name): + return np.array(self._scope.find_var(name).get_tensor()) + + def _store_var(self, name, array, dtype): + tensor = self._scope.find_var(name).get_tensor() + tensor.set(array.astype(dtype), self._place) + + def _remove_unused_var_nodes(self, graph): + all_used_vars = set() + ops = graph.all_op_nodes() + for op_node in ops: + for input_node in op_node.inputs: + all_used_vars.add(input_node) + for output_node in op_node.outputs: + all_used_vars.add(output_node) + + all_used_vars = {n.node for n in all_used_vars} + all_unused_vars = { + n + for n in filter(lambda node: node.node not in all_used_vars, + graph.all_var_nodes()) + } + graph.safe_remove_nodes(all_unused_vars) + + +class TransformForMobilePass(object): + """ + This pass is used to convert the freezed graph for paddle-mobile execution. + """ + + def __init__(self): + self._fake_quant_op_names = [ + 'fake_quantize_abs_max', 'fake_quantize_range_abs_max' + ] + self._fake_dequant_op_names = ['fake_dequantize_max_abs'] + + def apply(self, graph): + """ + Because paddle-mobile use `quantize` an `dequantize` as the names of + quantize operator and dequantize operator, the `apply` function just + realize this logic. + + Args: + graph(IrGraph): the graph will be transformed. + """ + ops = graph.all_op_nodes() + for op_node in ops: + name = op_node.name() + if name in self._fake_quant_op_names: + op_node.set_type('quantize') + quant_node = graph.create_op_node_from_desc(op_node.op()) + for input_node in op_node.inputs: + graph.link_to(input_node, quant_node) + for output_node in op_node.outputs: + graph.link_to(quant_node, output_node) + graph.safe_remove_nodes(op_node) + if name in self._fake_dequant_op_names: + op_node.set_type('dequantize') + dequant_node = graph.create_op_node_from_desc(op_node.op()) + for input_node in op_node.inputs: + graph.link_to(input_node, dequant_node) + for output_node in op_node.outputs: + graph.link_to(dequant_node, output_node) + graph.safe_remove_nodes(op_node) + + return graph diff --git a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..79bec8c4ad34d682895250bc29b1fddb3a569bd4 --- /dev/null +++ b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt @@ -0,0 +1,6 @@ +file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") + +foreach(src ${TEST_OPS}) + py_test(${src} SRCS ${src}.py) +endforeach() diff --git a/python/paddle/fluid/contrib/slim/unitest/__init__.py b/python/paddle/fluid/contrib/slim/tests/__init__.py similarity index 100% rename from python/paddle/fluid/contrib/slim/unitest/__init__.py rename to python/paddle/fluid/contrib/slim/tests/__init__.py diff --git a/python/paddle/fluid/contrib/slim/unitest/configs/config.yaml b/python/paddle/fluid/contrib/slim/tests/configs/config.yaml similarity index 88% rename from python/paddle/fluid/contrib/slim/unitest/configs/config.yaml rename to python/paddle/fluid/contrib/slim/tests/configs/config.yaml index db488b96330210df15b02b19d90abd5c9101f844..d9b49029d3e34d487ad65fe0f7e54e2cee1d5838 100644 --- a/python/paddle/fluid/contrib/slim/unitest/configs/config.yaml +++ b/python/paddle/fluid/contrib/slim/tests/configs/config.yaml @@ -1,5 +1,5 @@ version: 1.0 -include: ["./unitest/configs/pruners.yaml", "./unitest/configs/pruners_0.yaml"] +include: ["./configs/pruners.yaml", "./configs/pruners_0.yaml"] pruners: pruner_1: class: 'RatioPruner' diff --git a/python/paddle/fluid/contrib/slim/unitest/configs/pruners.yaml b/python/paddle/fluid/contrib/slim/tests/configs/pruners.yaml similarity index 100% rename from python/paddle/fluid/contrib/slim/unitest/configs/pruners.yaml rename to python/paddle/fluid/contrib/slim/tests/configs/pruners.yaml diff --git a/python/paddle/fluid/contrib/slim/unitest/configs/pruners_0.yaml b/python/paddle/fluid/contrib/slim/tests/configs/pruners_0.yaml similarity index 100% rename from python/paddle/fluid/contrib/slim/unitest/configs/pruners_0.yaml rename to python/paddle/fluid/contrib/slim/tests/configs/pruners_0.yaml diff --git a/python/paddle/fluid/contrib/slim/unitest/test_factory.py b/python/paddle/fluid/contrib/slim/tests/test_factory.py similarity index 95% rename from python/paddle/fluid/contrib/slim/unitest/test_factory.py rename to python/paddle/fluid/contrib/slim/tests/test_factory.py index 07f28aac905d1a2813dbde6143235c7916fd9278..2fc72b6475e6bdd977dafb57696046a1100d0087 100644 --- a/python/paddle/fluid/contrib/slim/unitest/test_factory.py +++ b/python/paddle/fluid/contrib/slim/tests/test_factory.py @@ -18,7 +18,7 @@ import unittest class TestFactory(unittest.TestCase): def test_parse(self): - factory = ConfigFactory('./unitest/configs/config.yaml') + factory = ConfigFactory('./configs/config.yaml') pruner = factory.instance('pruner_1') self.assertEquals(pruner.ratios['conv1_1.w'], 0.3) diff --git a/python/paddle/fluid/contrib/slim/tests/test_graph.py b/python/paddle/fluid/contrib/slim/tests/test_graph.py new file mode 100644 index 0000000000000000000000000000000000000000..2d2f1384dec65ee19dcade8a46f80bd3f9eb7013 --- /dev/null +++ b/python/paddle/fluid/contrib/slim/tests/test_graph.py @@ -0,0 +1,80 @@ +# copyright (c) 2018 paddlepaddle authors. all rights reserved. +# +# licensed under the apache license, version 2.0 (the "license"); +# you may not use this file except in compliance with the license. +# you may obtain a copy of the license at +# +# http://www.apache.org/licenses/license-2.0 +# +# unless required by applicable law or agreed to in writing, software +# distributed under the license is distributed on an "as is" basis, +# without warranties or conditions of any kind, either express or implied. +# see the license for the specific language governing permissions and +# limitations under the license. + +from __future__ import print_function +import unittest +import paddle.fluid as fluid +import six +from paddle.fluid.framework import IrGraph +from paddle.fluid import core + + +def residual_block(num): + def conv_bn_layer(input, + ch_out, + filter_size, + stride, + padding, + act='relu', + bias_attr=False): + tmp = fluid.layers.conv2d( + input=input, + filter_size=filter_size, + num_filters=ch_out, + stride=stride, + padding=padding, + act=None, + bias_attr=bias_attr) + return fluid.layers.batch_norm(input=tmp, act=act) + + data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + hidden = data + for _ in six.moves.xrange(num): + conv = conv_bn_layer(hidden, 16, 3, 1, 1, act=None, bias_attr=True) + short = conv_bn_layer(hidden, 16, 1, 1, 0, act=None) + hidden = fluid.layers.elementwise_add(x=conv, y=short, act='relu') + fc = fluid.layers.fc(input=hidden, size=10) + loss = fluid.layers.cross_entropy(input=fc, label=label) + loss = fluid.layers.mean(loss) + return loss + + +class TestGraph(unittest.TestCase): + def test_graph_functions(self): + main = fluid.Program() + startup = fluid.Program() + with fluid.program_guard(main, startup): + loss = residual_block(2) + opt = fluid.optimizer.Adam(learning_rate=0.001) + opt.minimize(loss) + graph = IrGraph(core.Graph(main.desc), for_test=False) + marked_nodes = set() + for op in graph.all_op_nodes(): + if op.name().find('conv2d') > -1: + marked_nodes.add(op) + graph.draw('.', 'residual', marked_nodes) + self.assertFalse(graph.has_circle()) + self.assertEqual(graph.graph_num(), 1) + nodes = graph.topology_sort() + self.assertEqual(len(nodes), len(graph.all_op_nodes())) + nodes_map = graph.build_adjacency_list() + self.assertEqual(len(nodes_map), len(graph.all_op_nodes())) + nodes_num = len(graph.all_nodes()) + graph.safe_remove_nodes(marked_nodes) + self.assertEqual(len(graph.all_nodes()), nodes_num - len(marked_nodes)) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py b/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py new file mode 100644 index 0000000000000000000000000000000000000000..254b73a124734f3693f4757801f0f544d6aa6f27 --- /dev/null +++ b/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py @@ -0,0 +1,367 @@ +# copyright (c) 2018 paddlepaddle authors. all rights reserved. +# +# licensed under the apache license, version 2.0 (the "license"); +# you may not use this file except in compliance with the license. +# you may obtain a copy of the license at +# +# http://www.apache.org/licenses/license-2.0 +# +# unless required by applicable law or agreed to in writing, software +# distributed under the license is distributed on an "as is" basis, +# without warranties or conditions of any kind, either express or implied. +# see the license for the specific language governing permissions and +# limitations under the license. + +import unittest +import random +import numpy as np +import paddle.fluid as fluid +import six +import paddle +from paddle.fluid.framework import IrGraph +from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass +from paddle.fluid.contrib.slim.quantization import QuantizationFreezePass +from paddle.fluid.contrib.slim.quantization import ConvertToInt8Pass +from paddle.fluid.contrib.slim.quantization import TransformForMobilePass +from paddle.fluid import core + + +def linear_fc(num): + data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + hidden = data + for _ in six.moves.xrange(num): + hidden = fluid.layers.fc(hidden, size=128, act='relu') + loss = fluid.layers.cross_entropy(input=hidden, label=label) + loss = fluid.layers.mean(loss) + return loss + + +def residual_block(num): + def conv_bn_layer(input, + ch_out, + filter_size, + stride, + padding, + act='relu', + bias_attr=False): + tmp = fluid.layers.conv2d( + input=input, + filter_size=filter_size, + num_filters=ch_out, + stride=stride, + padding=padding, + act=None, + bias_attr=bias_attr) + return fluid.layers.batch_norm(input=tmp, act=act) + + data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + hidden = data + for _ in six.moves.xrange(num): + conv = conv_bn_layer(hidden, 16, 3, 1, 1, act=None, bias_attr=True) + short = conv_bn_layer(hidden, 16, 1, 1, 0, act=None) + hidden = fluid.layers.elementwise_add(x=conv, y=short, act='relu') + fc = fluid.layers.fc(input=hidden, size=10) + loss = fluid.layers.cross_entropy(input=fc, label=label) + loss = fluid.layers.mean(loss) + return loss + + +def conv_net(img, label): + conv_pool_1 = fluid.nets.simple_img_conv_pool( + input=img, + filter_size=5, + num_filters=20, + pool_size=2, + pool_stride=2, + act="relu") + conv_pool_1 = fluid.layers.batch_norm(conv_pool_1) + conv_pool_2 = fluid.nets.simple_img_conv_pool( + input=conv_pool_1, + filter_size=5, + num_filters=50, + pool_size=2, + pool_stride=2, + act="relu") + prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax') + loss = fluid.layers.cross_entropy(input=prediction, label=label) + avg_loss = fluid.layers.mean(loss) + return avg_loss + + +class TestQuantizationTransformPass(unittest.TestCase): + def setUp(self): + self.quantizable_op_and_inputs = { + 'conv2d': ['Input', 'Filter'], + 'depthwise_conv2d': ['Input', 'Filter'], + 'mul': ['X', 'Y'] + } + self.quantizable_grad_op_inputs = { + 'conv2d_grad': ['Input', 'Filter'], + 'depthwise_conv2d_grad': ['Input', 'Filter'], + 'mul_grad': ['X', 'Y'] + } + + def check_program(self, transform_pass, program): + quantized_ops = set() + for block in program.blocks: + for op in block.ops: + # check forward + if op.type in self.quantizable_op_and_inputs: + for arg_name in op.input_arg_names: + self.assertTrue( + arg_name.endswith('.quantized.dequantized')) + quantized_ops.add(arg_name) + + for op in block.ops: + # check backward + if op.type in self.quantizable_grad_op_inputs: + for pname in self.quantizable_grad_op_inputs[op.type]: + arg_name = op.input(pname)[0] + self.assertTrue( + arg_name.endswith('.quantized.dequantized')) + self.assertTrue(arg_name in quantized_ops) + + def linear_fc_quant(self, quant_type): + main = fluid.Program() + startup = fluid.Program() + with fluid.program_guard(main, startup): + loss = linear_fc(3) + opt = fluid.optimizer.Adam(learning_rate=0.001) + opt.minimize(loss) + place = fluid.CPUPlace() + exe = fluid.Executor(place) + graph = IrGraph(core.Graph(main.desc), for_test=False) + transform_pass = QuantizationTransformPass( + scope=fluid.global_scope(), + place=place, + activation_quantize_type=quant_type) + transform_pass.apply(graph) + marked_nodes = set() + for op in graph.all_op_nodes(): + if op.name().find('quantize') > -1: + marked_nodes.add(op) + graph.draw('.', 'quantize_fc_' + quant_type, marked_nodes) + program = graph.to_program() + self.check_program(transform_pass, program) + val_graph = IrGraph(core.Graph(program.desc), for_test=False) + val_marked_nodes = set() + for op in val_graph.all_op_nodes(): + if op.name().find('quantize') > -1: + val_marked_nodes.add(op) + val_graph.draw('.', 'val_fc_' + quant_type, val_marked_nodes) + + def test_linear_fc_quant_abs_max(self): + self.act_quant_op_type = 'fake_quantize_abs_max' + self.linear_fc_quant('abs_max') + + def test_linear_fc_quant_range_abs_max(self): + self.act_quant_op_type = 'fake_quantize_range_abs_max' + self.linear_fc_quant('range_abs_max') + + def residual_block_quant(self, quant_type): + main = fluid.Program() + startup = fluid.Program() + with fluid.program_guard(main, startup): + loss = residual_block(2) + opt = fluid.optimizer.Adam(learning_rate=0.001) + opt.minimize(loss) + place = fluid.CPUPlace() + exe = fluid.Executor(place) + graph = IrGraph(core.Graph(main.desc), for_test=False) + transform_pass = QuantizationTransformPass( + scope=fluid.global_scope(), + place=place, + activation_quantize_type=quant_type) + transform_pass.apply(graph) + marked_nodes = set() + for op in graph.all_op_nodes(): + if op.name().find('quantize') > -1: + marked_nodes.add(op) + graph.draw('.', 'quantize_residual_' + quant_type, marked_nodes) + program = graph.to_program() + self.check_program(transform_pass, program) + val_graph = IrGraph(core.Graph(program.desc), for_test=False) + val_marked_nodes = set() + for op in val_graph.all_op_nodes(): + if op.name().find('quantize') > -1: + val_marked_nodes.add(op) + val_graph.draw('.', 'val_residual_' + quant_type, val_marked_nodes) + + def test_residual_block_abs_max(self): + self.act_quant_op_type = 'fake_quantize_abs_max' + self.residual_block_quant('abs_max') + + def test_residual_block_range_abs_max(self): + self.act_quant_op_type = 'fake_quantize_range_abs_max' + self.residual_block_quant('range_abs_max') + + +class TestQuantizationFreezePass(unittest.TestCase): + def freeze_graph(self, use_cuda, seed, quant_type): + def build_program(main, startup, is_test): + main.random_seed = seed + startup.random_seed = seed + with fluid.unique_name.guard(): + with fluid.program_guard(main, startup): + img = fluid.layers.data( + name='image', shape=[1, 28, 28], dtype='float32') + label = fluid.layers.data( + name='label', shape=[1], dtype='int64') + loss = conv_net(img, label) + if not is_test: + opt = fluid.optimizer.Adam(learning_rate=0.001) + opt.minimize(loss) + return [img, label], loss + + random.seed(0) + np.random.seed(0) + + main = fluid.Program() + startup = fluid.Program() + test_program = fluid.Program() + feeds, loss = build_program(main, startup, False) + build_program(test_program, startup, True) + test_program = test_program.clone(for_test=True) + main_graph = IrGraph(core.Graph(main.desc), for_test=False) + test_graph = IrGraph(core.Graph(test_program.desc), for_test=True) + + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + exe = fluid.Executor(place) + scope = fluid.Scope() + with fluid.scope_guard(scope): + exe.run(startup) + transform_pass = QuantizationTransformPass( + scope=scope, place=place, activation_quantize_type=quant_type) + transform_pass.apply(main_graph) + transform_pass.apply(test_graph) + dev_name = '_gpu_' if use_cuda else '_cpu_' + marked_nodes = set() + for op in main_graph.all_op_nodes(): + if op.name().find('quantize') > -1: + marked_nodes.add(op) + main_graph.draw('.', 'main' + dev_name + quant_type, marked_nodes) + marked_nodes = set() + for op in test_graph.all_op_nodes(): + if op.name().find('quantize') > -1: + marked_nodes.add(op) + test_graph.draw('.', 'test' + dev_name + quant_type, marked_nodes) + + quantized_main_program = main_graph.to_program() + quantized_test_program = test_graph.to_program() + iters = 5 + batch_size = 8 + + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=500), + batch_size=batch_size) + test_reader = paddle.batch( + paddle.dataset.mnist.test(), batch_size=batch_size) + feeder = fluid.DataFeeder(feed_list=feeds, place=place) + with fluid.scope_guard(scope): + for _ in range(iters): + data = next(train_reader()) + loss_v = exe.run(program=quantized_main_program, + feed=feeder.feed(data), + fetch_list=[loss]) + print('{}: {}'.format('loss' + dev_name + quant_type, loss_v)) + + test_data = next(test_reader()) + with fluid.program_guard(quantized_test_program): + w_var = fluid.framework._get_var('conv2d_1.w_0.quantized', + quantized_test_program) + # Testing + with fluid.scope_guard(scope): + test_loss1, w_quant = exe.run(program=quantized_test_program, + feed=feeder.feed(test_data), + fetch_list=[loss, w_var]) + + # Freeze graph for inference, but the weight of fc/conv is still float type. + freeze_pass = QuantizationFreezePass(scope=scope, place=place) + freeze_pass.apply(test_graph) + marked_nodes = set() + for op in test_graph.all_op_nodes(): + if op.name().find('quantize') > -1: + marked_nodes.add(op) + test_graph.draw('.', 'test_freeze' + dev_name + quant_type, + marked_nodes) + + server_program = test_graph.to_program() + with fluid.scope_guard(scope): + test_loss2, = exe.run(program=server_program, + feed=feeder.feed(test_data), + fetch_list=[loss]) + self.assertAlmostEqual(test_loss1, test_loss2, delta=5e-3) + print('{}: {}'.format('test_loss1' + dev_name + quant_type, test_loss1)) + print('{}: {}'.format('test_loss2' + dev_name + quant_type, test_loss2)) + w_freeze = np.array(scope.find_var('conv2d_1.w_0').get_tensor()) + # Maybe failed, this is due to the calculation precision + # self.assertAlmostEqual(np.sum(w_freeze), np.sum(w_quant)) + print('{}: {}'.format('w_freeze' + dev_name + quant_type, + np.sum(w_freeze))) + print('{}: {}'.format('w_quant' + dev_name + quant_type, + np.sum(w_quant))) + + # Convert parameter to 8-bit. + convert_int8_pass = ConvertToInt8Pass(scope=scope, place=place) + convert_int8_pass.apply(test_graph) + marked_nodes = set() + for op in test_graph.all_op_nodes(): + if op.name().find('quantize') > -1: + marked_nodes.add(op) + test_graph.draw('.', 'test_int8' + dev_name + quant_type, marked_nodes) + server_program_int8 = test_graph.to_program() + # Save the 8-bit parameter and model file. + with fluid.scope_guard(scope): + fluid.io.save_inference_model('server_int8' + dev_name + quant_type, + ['image', 'label'], [loss], exe, + server_program_int8) + # Test whether the 8-bit parameter and model file can be loaded successfully. + [infer, feed, fetch] = fluid.io.load_inference_model( + 'server_int8' + dev_name + quant_type, exe) + # Check the loaded 8-bit weight. + w_8bit = np.array(scope.find_var('conv2d_1.w_0.int8').get_tensor()) + self.assertEqual(w_8bit.dtype, np.int8) + self.assertEqual(np.sum(w_8bit), np.sum(w_freeze)) + print('{}: {}'.format('w_8bit' + dev_name + quant_type, np.sum(w_8bit))) + print('{}: {}'.format('w_freeze' + dev_name + quant_type, + np.sum(w_freeze))) + + mobile_pass = TransformForMobilePass() + mobile_pass.apply(test_graph) + marked_nodes = set() + for op in test_graph.all_op_nodes(): + if op.name().find('quantize') > -1: + marked_nodes.add(op) + test_graph.draw('.', 'test_mobile' + dev_name + quant_type, + marked_nodes) + + mobile_program = test_graph.to_program() + with fluid.scope_guard(scope): + fluid.io.save_inference_model('mobile_int8' + dev_name + quant_type, + ['image', 'label'], [loss], exe, + mobile_program) + + def test_freeze_graph_cuda_dynamic(self): + if fluid.core.is_compiled_with_cuda(): + with fluid.unique_name.guard(): + self.freeze_graph(True, seed=1, quant_type='abs_max') + + def test_freeze_graph_cpu_dynamic(self): + with fluid.unique_name.guard(): + self.freeze_graph(False, seed=2, quant_type='abs_max') + + def test_freeze_graph_cuda_static(self): + if fluid.core.is_compiled_with_cuda(): + with fluid.unique_name.guard(): + self.freeze_graph(True, seed=1, quant_type='range_abs_max') + + def test_freeze_graph_cpu_static(self): + with fluid.unique_name.guard(): + self.freeze_graph(False, seed=2, quant_type='range_abs_max') + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/contrib/slim/unitest/test_quantization_pass.py b/python/paddle/fluid/contrib/slim/unitest/test_quantization_pass.py deleted file mode 100644 index 1bd4b95d6b90b7f16d507061190f0b463f6c4cc5..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/contrib/slim/unitest/test_quantization_pass.py +++ /dev/null @@ -1,175 +0,0 @@ -# copyright (c) 2018 paddlepaddle authors. all rights reserved. -# -# licensed under the apache license, version 2.0 (the "license"); -# you may not use this file except in compliance with the license. -# you may obtain a copy of the license at -# -# http://www.apache.org/licenses/license-2.0 -# -# unless required by applicable law or agreed to in writing, software -# distributed under the license is distributed on an "as is" basis, -# without warranties or conditions of any kind, either express or implied. -# see the license for the specific language governing permissions and -# limitations under the license. - -import unittest -import random -import numpy as np -import paddle.fluid as fluid -import six -from paddle.fluid.framework import Program -from paddle.fluid.framework import IrGraph -from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass -from paddle.fluid import core - - -def linear_fc(num): - data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32') - label = fluid.layers.data(name='label', shape=[1], dtype='int64') - hidden = data - for _ in six.moves.xrange(num): - hidden = fluid.layers.fc(hidden, size=128, act='relu') - loss = fluid.layers.cross_entropy(input=hidden, label=label) - loss = fluid.layers.mean(loss) - return loss - - -def residual_block(num): - def conv_bn_layer(input, - ch_out, - filter_size, - stride, - padding, - act='relu', - bias_attr=False): - tmp = fluid.layers.conv2d( - input=input, - filter_size=filter_size, - num_filters=ch_out, - stride=stride, - padding=padding, - act=None, - bias_attr=bias_attr) - return fluid.layers.batch_norm(input=tmp, act=act) - - data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32') - label = fluid.layers.data(name='label', shape=[1], dtype='int64') - hidden = data - for _ in six.moves.xrange(num): - conv = conv_bn_layer(hidden, 16, 3, 1, 1, act=None, bias_attr=True) - short = conv_bn_layer(hidden, 16, 1, 1, 0, act=None) - hidden = fluid.layers.elementwise_add(x=conv, y=short, act='relu') - fc = fluid.layers.fc(input=hidden, size=10) - loss = fluid.layers.cross_entropy(input=fc, label=label) - loss = fluid.layers.mean(loss) - return loss - - -class TestQuantizationTransformPass(unittest.TestCase): - def setUp(self): - self.quantizable_op_and_inputs = { - 'conv2d': ['Input', 'Filter'], - 'depthwise_conv2d': ['Input', 'Filter'], - 'mul': ['X', 'Y'] - } - self.quantizable_grad_op_inputs = { - 'conv2d_grad': ['Input', 'Filter'], - 'depthwise_conv2d_grad': ['Input', 'Filter'], - 'mul_grad': ['X', 'Y'] - } - - def check_program(self, transform_pass, program): - quantized_ops = set() - for block in program.blocks: - for op in block.ops: - # check forward - if op.type in self.quantizable_op_and_inputs: - for arg_name in op.input_arg_names: - self.assertTrue( - arg_name.endswith('.quantized.dequantized')) - quantized_ops.add(arg_name) - - for op in block.ops: - # check backward - if op.type in self.quantizable_grad_op_inputs: - for pname in self.quantizable_grad_op_inputs[op.type]: - arg_name = op.input(pname)[0] - self.assertTrue( - arg_name.endswith('.quantized.dequantized')) - self.assertTrue(arg_name in quantized_ops) - - def linear_fc_quant(self, quant_type): - main = fluid.Program() - startup = fluid.Program() - with fluid.program_guard(main, startup): - loss = linear_fc(3) - opt = fluid.optimizer.Adam(learning_rate=0.001) - opt.minimize(loss) - exe = fluid.Executor(fluid.CPUPlace()) - graph = IrGraph(core.Graph(main.desc), for_test=False) - transform_pass = QuantizationTransformPass( - scope=fluid.global_scope(), - program_exe=exe, - activation_quantize_type=quant_type) - transform_pass.apply(graph) - marked_nodes = set() - for op in graph.all_ops(): - if op.name().find('quantize') > -1: - marked_nodes.add(op) - graph.draw('.', 'quantize_fc_' + quant_type, marked_nodes) - program = graph.to_program() - self.check_program(transform_pass, program) - val_graph = IrGraph(core.Graph(program.desc), for_test=False) - val_marked_nodes = set() - for op in val_graph.all_ops(): - if op.name().find('quantize') > -1: - val_marked_nodes.add(op) - val_graph.draw('.', 'val_fc_' + quant_type, val_marked_nodes) - - def test_linear_fc_quant_abs_max(self): - self.act_quant_op_type = 'fake_quantize_abs_max' - self.linear_fc_quant('abs_max') - - def test_linear_fc_quant_range_abs_max(self): - self.act_quant_op_type = 'fake_quantize_range_abs_max' - self.linear_fc_quant('range_abs_max') - - def residual_block_quant(self, quant_type): - main = fluid.Program() - startup = fluid.Program() - with fluid.program_guard(main, startup): - loss = residual_block(2) - opt = fluid.optimizer.Adam(learning_rate=0.001) - opt.minimize(loss) - exe = fluid.Executor(fluid.CPUPlace()) - graph = IrGraph(core.Graph(main.desc), for_test=False) - transform_pass = QuantizationTransformPass( - scope=fluid.global_scope(), - program_exe=exe, - activation_quantize_type=quant_type) - transform_pass.apply(graph) - marked_nodes = set() - for op in graph.all_ops(): - if op.name().find('quantize') > -1: - marked_nodes.add(op) - graph.draw('.', 'quantize_residual_' + quant_type, marked_nodes) - program = graph.to_program() - self.check_program(transform_pass, program) - val_graph = IrGraph(core.Graph(program.desc), for_test=False) - val_marked_nodes = set() - for op in val_graph.all_ops(): - if op.name().find('quantize') > -1: - val_marked_nodes.add(op) - val_graph.draw('.', 'val_residual_' + quant_type, val_marked_nodes) - - def test_residual_block_abs_max(self): - self.act_quant_op_type = 'fake_quantize_abs_max' - self.residual_block_quant('abs_max') - - def test_residual_block_range_abs_max(self): - self.act_quant_op_type = 'fake_quantize_range_abs_max' - self.residual_block_quant('range_abs_max') - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/fluid/contrib/tests/CMakeLists.txt b/python/paddle/fluid/contrib/tests/CMakeLists.txt index 81aee1233d1db756686d1a934b94672dc5c770fe..a2c59416467e5dbe66f058666633807eb0e45047 100644 --- a/python/paddle/fluid/contrib/tests/CMakeLists.txt +++ b/python/paddle/fluid/contrib/tests/CMakeLists.txt @@ -6,5 +6,9 @@ if(APPLE OR WIN32 OR NOT WITH_MKL) endif() foreach(src ${TEST_OPS}) - py_test(${src} SRCS ${src}.py) + if(src MATCHES "test_calibration") + py_test(${src} SRCS ${src}.py ENVS FLAGS_use_mkldnn=true) + else() + py_test(${src} SRCS ${src}.py) + endif() endforeach() diff --git a/python/paddle/fluid/contrib/tests/test_calibration.py b/python/paddle/fluid/contrib/tests/test_calibration.py index 424ea245a0f2dff0d437ace386f2e4e0fa6b517d..b9f938bebed71dc9611df8d743a066858ea38bca 100644 --- a/python/paddle/fluid/contrib/tests/test_calibration.py +++ b/python/paddle/fluid/contrib/tests/test_calibration.py @@ -199,7 +199,6 @@ class TestCalibrationForResnet50(unittest.TestCase): def run_program(self, model_path, generate_int8=False, algo='direct'): image_shape = [3, 224, 224] - os.environ['FLAGS_use_mkldnn'] = 'True' fluid.memory_optimize(fluid.default_main_program()) @@ -241,9 +240,6 @@ class TestCalibrationForResnet50(unittest.TestCase): label = label.reshape([-1, 1]) running_program = calibrator.sampling_program.clone( ) if generate_int8 else infer_program.clone() - for op in running_program.current_block().ops: - if op.has_attr("use_mkldnn"): - op._set_attr("use_mkldnn", True) t1 = time.time() _, acc1, _ = exe.run( diff --git a/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py b/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py index 86fa84ad4bd7a55fb27f4e43128f0bfda6dfe6db..77fdf0087b93c3ad44a2492de68f8f57ce243ef3 100644 --- a/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py +++ b/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py @@ -204,9 +204,11 @@ class TestQuantizeTranspiler(unittest.TestCase): build_program(test_program, startup, True) test_program = test_program.clone(for_test=True) - quant_transpiler = QuantizeTranspiler() - quant_transpiler.training_transpile(main) - quant_transpiler.training_transpile(test_program) + quant_type = 'range_abs_max' # 'range_abs_max' or 'abs_max' + quant_transpiler = QuantizeTranspiler( + activation_quantize_type=quant_type) + quant_transpiler.training_transpile(main, startup) + quant_transpiler.training_transpile(test_program, startup) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py index 8815911eaeb36067987c0490d7a4f3e909789499..dfa50e721c979703165649dccfd6e42ef08e97b7 100644 --- a/python/paddle/fluid/executor.py +++ b/python/paddle/fluid/executor.py @@ -261,45 +261,42 @@ def _as_lodtensor(data, place): class Executor(object): """ - An Executor in Python, only support the single-GPU running. For multi-cards, please refer to - ParallelExecutor. - Python executor takes a program, add feed operators and fetch operators to this program according + An Executor in Python, supports single/multiple-GPU running, and single/multiple-CPU running. + Python executor takes a program, adds feed operators and fetch operators to this program according to feed map and fetch_list. Feed map provides input data for the program. fetch_list provides - the variables(or names) that user want to get after program run. Note: the executor will run all + the variables(or names) that user wants to get after program runs. Note: the executor will run all operators in the program but not only the operators dependent by the fetch_list. - It store the global variables into the global scope, and create a local scope for the temporary - variables. The local scope contents will be discarded after every minibatch forward/backward finished. - But the global scope variables will be persistent through different runs. - All of ops in program will be running in sequence. + It stores the global variables into the global scope, and creates a local scope for the temporary + variables. The contents in local scope may be discarded after every minibatch forward/backward + finished. But the global scope variables will be persistent through different runs. Example: - .. code-block:: python - # First create the Executor. - place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - exe = fluid.Executor(place) - - # Run the startup program once and only once. - # Not need to optimize/compile the startup program. - exe.run(fluid.default_startup_program()) - - # Run the main program directly without compile. - loss, = exe.run(fluid.default_main_program(), - feed=feed_dict, - fetch_list=[loss.name]) - # Or, compiled the program and run. See `CompiledProgram` for more detail. - compiled_prog = compiler.CompiledProgram( - fluid.default_main_program()).with_data_parallel( - loss_name=loss.name) - loss, = exe.run(compiled_prog, - feed=feed_dict, - fetch_list=[loss.name]) + + .. code-block:: python + + # First create the Executor. + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + exe = fluid.Executor(place) + + # Run the startup program once and only once. + # Not need to optimize/compile the startup program. + exe.run(fluid.default_startup_program()) + + # Run the main program directly without compile. + loss, = exe.run(fluid.default_main_program(), + feed=feed_dict, + fetch_list=[loss.name]) + # Or, compiled the program and run. See `CompiledProgram` for more detail. + compiled_prog = compiler.CompiledProgram( + fluid.default_main_program()).with_data_parallel( + loss_name=loss.name) + loss, = exe.run(compiled_prog, + feed=feed_dict, + fetch_list=[loss.name]) Args: place(core.CPUPlace|core.CUDAPlace(n)): indicate the executor run on which device - - Note: For debugging complicated network in parallel-GPUs, you can test it on the executor. - They has the exactly same arguments, and expected the same results. """ def __init__(self, place): @@ -382,6 +379,12 @@ class Executor(object): ] return outs + ''' + TODO(typhoonzero): Define "no longer use" meaning? Can user create + a new Executor for the same program and run? + TODO(panyx0718): Why ParallelExecutor doesn't have close? + ''' + def close(self): """ Close this executor. @@ -389,9 +392,6 @@ class Executor(object): You can no longer use this executor after calling this method. For the distributed training, this method would free the resource on PServers related to the current Trainer. - TODO(typhoonzero): Define "no longer use" meaning? Can user create - a new Executor for the same program and run? - TODO(panyx0718): Why ParallelExecutor doesn't have close? Example: >>> cpu = core.CPUPlace() @@ -538,6 +538,8 @@ class Executor(object): else: # TODO(panyx0718): Can compile program to optimize executor # performance. + # TODO(panyx0718): executor should be able to run graph. + assert program._program, "CompiledProgram is compiled from graph, can only run with_data_parallel." return self._run( program._program, self._default_executor, diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 832c97c7deb49b4e118e15989ab7a34da6ce57a0..7dc9178807c76b44c9aeb00054188ad1dbe18f0a 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -16,6 +16,8 @@ from __future__ import print_function import collections from collections import defaultdict +from collections import Iterable +import contextlib from .wrapped_decorator import signature_safe_contextmanager import os import re @@ -376,16 +378,22 @@ class Variable(object): # get_capacity is implemented pass - self.block.vars[name] = self - self.op = None - self.stop_gradient = stop_gradient - self.is_data = is_data if _in_imperative_mode(): + # record vars in tracer rather than blocks self._ivar = kwargs.get("ivar", None) if not self._ivar: - self._ivar = core.VarBase() + self._ivar = core.VarBase(stop_gradient) self._ivar.desc = self.desc - self._ivar.stop_gradient = stop_gradient + self._ivar.block = block.desc + self._ivar.name = name + self._ivar.persistable = persistable + if persistable: + self.block.vars[name] = self + else: + self.block.vars[name] = self + self.op = None + self.stop_gradient = stop_gradient + self.is_data = is_data def _numpy(self): new_ivar = self._ivar._copy_to(core.CPUPlace(), True) @@ -555,7 +563,8 @@ class OpProtoHolder(object): return { core.op_proto_and_checker_maker.kOpRoleAttrName(), core.op_proto_and_checker_maker.kOpRoleVarAttrName(), - core.op_proto_and_checker_maker.kOpNameScopeAttrName() + core.op_proto_and_checker_maker.kOpNameScopeAttrName(), + core.op_proto_and_checker_maker.kOpCreationCallstackAttrName() } @@ -706,7 +715,9 @@ class Operator(object): out_arg_names = [] for arg in out_args: out_arg_names.append(cpt.to_text(arg.name)) - arg.op = self + # TODO(minqiyang): could we remove variable's op in static mode? + if not _in_imperative_mode(): + arg.op = self self.desc.set_output(out_proto.name, out_arg_names) if op_attrs is not None: @@ -720,7 +731,6 @@ class Operator(object): self._update_desc_attr(attr_name, attr_val) self.desc.check_attrs() - if self._has_kernel(type): self.desc.infer_var_type(self.block.desc) self.desc.infer_shape(self.block.desc) @@ -728,6 +738,7 @@ class Operator(object): if _in_imperative_mode(): self.iop = core.OpBase() self.iop.desc = self.desc + self.inputs = defaultdict(list) if inputs is not None: for k, v in six.iteritems(inputs): @@ -735,6 +746,7 @@ class Operator(object): self.inputs[k].append(v._ivar) elif isinstance(v, list) or isinstance(v, tuple): self.inputs[k].extend([var._ivar for var in v]) + self.outputs = defaultdict(list) if outputs is not None: for k, v in six.iteritems(outputs): @@ -1314,18 +1326,18 @@ class Block(object): inputs=kwargs.get("inputs", None), outputs=kwargs.get("outputs", None), attrs=kwargs.get("attrs", None)) - self.ops.append(op) - - # TODO(minqiyang): add stop_gradient support in static mode too. - # currently, we only support stop_gradient in imperative mode. - self._trace_op(op, kwargs.get("stop_gradient", False)) - return op - def _trace_op(self, op, stop_gradient=False): if _in_imperative_mode(): - _imperative_tracer().trace(op.iop, op.inputs, op.outputs, self.desc, - _imperative_current_expected_place_, - stop_gradient) + # record ops in tracer rather than blocks + # + # TODO(minqiyang): add op stop_gradient support in static mode too. + # currently, we only support stop_gradient in imperative mode. + _imperative_tracer().trace_op(op, + kwargs.get("stop_gradient", False)) + else: + self.ops.append(op) + + return op def _insert_op(self, index, *args, **kwargs): """ @@ -1379,8 +1391,11 @@ class Block(object): inputs=kwargs.get("inputs", None), outputs=kwargs.get("outputs", None), attrs=kwargs.get("attrs", None)) - self.ops.insert(0, op) - self._trace_op(op, kwargs.get("stop_gradient", False)) + if _in_imperative_mode(): + _imperative_tracer().trace_op(op, + kwargs.get("stop_gradient", False)) + else: + self.ops.insert(0, op) return op def _sync_with_cpp(self): @@ -1527,14 +1542,405 @@ class Block(object): return ret_var +class IrNode(object): + """ + Python IrNode. Beneath it is a core.Node, which is used for Ir Pass. + """ + + def __init__(self, node): + """ + Construct an IrNode using core.Node. + + Args: + node(core.Node): C++ Node. + """ + assert isinstance(node, + core.Node), 'node must be the instance of core.Node.' + self.node = node + + def name(self): + """ + Return the node name. + + Returns: + str: node name. + """ + return self.node.name() + + def node_type(self): + """ + Return the node type. + + Returns: + core.Node.Type: node type(core.Node.Type.Operation or core.Node.Type.Variable). + """ + return self.node.node_type() + + def var(self): + """ + Return the node variable description. + + Returns: + core.VarDesc: node variable description. + """ + return self.node.var() + + def op(self): + """ + Return the node operator description. + + Returns: + core.OpDesc: node operator description. + """ + return self.node.op() + + def id(self): + """ + Return the node id. + + Returns: + int: node id. + """ + return self.node.id() + + def is_op(self): + """ + If the node is an operator, then return true. + + Returns: + bool: indicate whether the node is an operator. + """ + return self.node.is_op() + + def is_var(self): + """ + If the node is a variable, then return true. + + Returns: + bool: indicate whether the node is a variable. + """ + return self.node.is_var() + + def is_ctrl_var(self): + """ + If the node is a control dependence variable, then return true. + + Returns: + bool: indicate whether the node is a control dependence variable. + """ + return self.node.is_ctrl_var() + + def clear_inputs(self): + """ + Clear the node inputs. After executing the `clear_inputs` function, + the node inputs will be empty. + """ + self.node.clear_inputs() + + def remove_input_by_id(self, node_id): + """ + Remove a node from inputs by the given node id. + + Args: + node_id(int): the given node id. + """ + self.node.remove_input(node_id) + + def remove_input(self, node): + """ + Remove a node from inputs. + + Args: + node(IrNode): the node being removed. + """ + self.node.remove_input(node.node) + + def append_input(self, node): + """ + Append a node in inputs. + + Args: + node(IrNode): the node being appended. + """ + self.node.append_input(node.node) + + def clear_outputs(self): + """ + Clear the node outputs. After executing the `clear_outputs` function, + the node outputs will be empty. + """ + self.node.clear_outputs() + + def remove_output_by_id(self, node_id): + """ + Remove a node from outputs by the given node id. + + Args: + node_id(int): the given node id. + """ + self.node.remove_output(node_id) + + def remove_output(self, node): + """ + Remove a node from outputs. + + Args: + node(IrNode): the node being removed. + """ + self.node.remove_output(node.node) + + def append_output(self, node): + """ + Append a node in outputs. + + Args: + node(IrNode): the node being appended. + """ + self.node.append_output(node.node) + + @property + def inputs(self): + """ + Return the node inputs. + + Returns: + list(IrNode): node inputs wrapped by IrNode. + """ + return [IrNode(n) for n in self.node.inputs] + + @property + def outputs(self): + """ + Return the node outputs. + + Returns: + list(IrNode): node outputs wrapped by IrNode. + """ + return [IrNode(n) for n in self.node.outputs] + + +class IrVarNode(IrNode): + """ + Python IrVarNode. Beneath it is a core.Node, it inherits from IrNode. + """ + + def __init__(self, node): + """ + Construct an IrVarNode using core.Node. + + Args: + node(core.Node): C++ Node. + """ + assert isinstance(node, core.Node) and node.is_var(), \ + 'node must be the instance of core.Node and it must be a variable node.' + super(IrVarNode, self).__init__(node) + self.node = node + + def set_shape(self, shape): + """ + Set the node variable shape. + + Args: + shape(list): shape to be set. + """ + assert self.node.var() is not None, \ + "The node variable description cannot be None." + self.node.var().set_shape(shape) + + def persistable(self): + """ + If the variable node is a persistable variable, then return true. + + Returns: + bool: indicate whether the variable is persistable. + """ + assert self.node.var() is not None, \ + "The node variable description cannot be None." + return self.node.var().persistable() + + def type(self): + """ + Return the variable type. + + Returns: + core.VarDesc.VarType: the variable type. + """ + assert self.node.var() is not None, \ + "The node variable description cannot be None." + return self.node.var().type() + + def dtype(self): + """ + Return the variable data type. + + Returns: + core.VarDesc.VarType: the variable data type. + """ + assert self.node.var() is not None, \ + "The node variable description cannot be None." + return self.node.var().dtype() + + def shape(self): + """ + Return the variable shape. + + Returns: + list: the variable shape. + """ + assert self.node.var() is not None, \ + "The node variable description cannot be None." + return self.node.var().shape() + + @property + def inputs(self): + """ + Return the node inputs. + + Returns: + list(IrOpNode): node inputs wrapped by IrOpNode. + """ + return [IrOpNode(n) for n in self.node.inputs] + + @property + def outputs(self): + """ + Return the node outputs. + + Returns: + list(IrOpNode): node outputs wrapped by IrOpNode. + """ + return [IrOpNode(n) for n in self.node.outputs] + + +class IrOpNode(IrNode): + """ + Python IrOpNode. Beneath it is a core.Node, it inherits from IrNode. + """ + + def __init__(self, node): + """ + Construct an IrOpNode using core.Node. + + Args: + node(core.Node): C++ Node. + """ + assert isinstance(node, core.Node) and node.is_op(), \ + 'node must be the instance of core.Node and it must be a operator node.' + super(IrOpNode, self).__init__(node) + self.node = node + + def rename_input(self, old_input_name, new_input_name): + """ + Rename the input of this node. + + Args: + old_input_name(str): the old input name. + new_input_name(str): the new input name. + """ + assert self.node.op() is not None, \ + "The node operator description cannot be None." + self.node.op()._rename_input(old_input_name, new_input_name) + + def input(self, name): + """ + Get the argument name list by the parameter name for input. + + Args: + name(str): the parameter name. + + Returns: + list(str): the argument name list. + """ + assert self.node.op() is not None, \ + "The node operator description cannot be None." + return self.node.op().input(name) + + def output(self, name): + """ + Get the argument name list by the parameter name for output. + + Args: + name(str): the parameter name. + + Returns: + list(str): the argument name list. + """ + assert self.node.op() is not None, \ + "The node operator description cannot be None." + return self.node.op().output(name) + + def set_type(self, new_type): + """ + Change the operator type into new type. + + Args: + new_type(str): new operator type to be set. + """ + assert self.node.op() is not None, \ + "The node operator description cannot be None." + return self.node.op().set_type(new_type) + + def set_attr(self, name, val): + """ + Set the value of attribute by attribute's name. + + Args: + name(str): the attribute name. + val(bool|int|str|float|list): the value of the attribute. + """ + self._update_desc_attr(name, val) + + def _update_desc_attr(self, name, val): + """ + Update the value of the op desc's attribute by attribute's name. + """ + assert self.node.op() is not None, \ + "The node operator description cannot be None." + desc = self.node.op() + if isinstance(val, Block): + desc.set_block_attr(name, val.desc) + elif isinstance(val, list) and val and \ + all(isinstance(v, Block) for v in val): + desc.set_blocks_attr(name, [v.desc for v in val]) + elif isinstance(val, core.BlockDesc) or \ + isinstance(val, core.ProgramDesc): + desc.set_serialized_attr(name, val.serialize_to_string()) + else: + desc._set_attr(name, val) + + @property + def inputs(self): + """ + Return the node inputs. + + Returns: + list(IrVarNode): node inputs wrapped by IrVarNode. + """ + return [IrVarNode(n) for n in self.node.inputs] + + @property + def outputs(self): + """ + Return the node outputs. + + Returns: + list(IrVarNode): node outputs wrapped by IrVarNode. + """ + return [IrVarNode(n) for n in self.node.outputs] + + class IrGraph(object): """ - IrGraph uses core.Graph as the delegation to accomplish the manipulation. + Python IrGraph. Beneath it is a core.Graph, which is used for + creating a c++ Ir Pass Graph. An IrGraph is just a graph view of + a Program. In an IrGraph, both Variables and Operators are graph + nodes. """ def __init__(self, graph, for_test=False): """ - Construct the IrGraph using core.Graph. + Construct an IrGraph using core.Graph. + Args: graph(core.Graph): C++ Graph. for_test(bool): True for the test graph and false for the train graph. @@ -1545,81 +1951,267 @@ class IrGraph(object): self._for_test = for_test def is_test(self): + """ + If the graph is used for testing, the function returns true. Otherwise, returns false. + """ return self._for_test - def all_parameters(self): - param_nodes = set() + def all_nodes(self): + """ + Return all nodes included in the graph as a set. + """ + return {IrNode(node) for node in self.graph.nodes()} + + def all_var_nodes(self): + """ + Return all variable nodes included in the graph as a set. + """ + return {IrVarNode(node) for node in self.graph.nodes() if node.is_var()} + + def all_persistable_nodes(self): + """ + Return all persistable variable nodes included in the graph as a set. + """ + persistable_nodes = set() for node in self.graph.nodes(): if node.is_var() and node.var() is not None and node.var( ).persistable(): - param_nodes.add(node) - return param_nodes + persistable_nodes.add(node) + return {IrVarNode(p) for p in persistable_nodes} + + def all_op_nodes(self): + """ + Return all operator nodes included in the graph as a set. + """ + return {IrOpNode(node) for node in self.graph.nodes() if node.is_op()} + + def var_node(self, name): + """ + Get a variable node by name from the graph. + + Args: + name(str): the name of the variable node. - def all_vars(self): - return {node for node in self.graph.nodes() if node.is_var()} + Raises: + ValueError: The If input's type is not str, or this graph + doesn't have a variable with the giving name. - def all_ops(self): - return {node for node in self.graph.nodes() if node.is_op()} + Returns: + IrVarNode: the variable node with the giving name. + """ + if not isinstance(name, six.string_types): + raise TypeError( + "var require string as parameter, but get %s instead." % + (type(name))) + target_var_node = None + var_nodes = self.all_var_nodes() + for var_node in var_nodes: + if var_node.name() == name: + target_var_node = var_node + if target_var_node is None: + raise ValueError("var_node %s not in this graph" % name) + return target_var_node + + def create_persistable_node(self, name, var_type, shape, var_dtype): + """ + Create a persistable variable node in the graph. In IrGraph, + it can not distinguish between persistable variables and parameters. + + Args: + name(str): the name of the persistable variable node. + vart_type(core.VarDesc.VarType): the type of the persistable variable node. + shape(list): the shape of the persistable variable node. + var_dtype(core.VarDesc.VarType): the data type of the persistable variable node. - def create_param_node(self, name, var_type, shape, var_dtype): + Returns: + IrVarNode: the created persistable variable node. + """ var_desc = core.VarDesc(name) var_desc.set_type(var_type) var_desc.set_shape(shape) var_desc.set_dtype(var_dtype) var_desc.set_persistable(True) - return self.graph.create_var_node(var_desc) + return IrVarNode(self.graph.create_var_node(var_desc)) def create_var_node(self, name, var_type, shape, var_dtype): + """ + Create a variable node in the graph. The created variable node is + not persistable. + + Args: + name(str): the name of the variable node. + vart_type(core.VarDesc.VarType): the type of the variable node. + shape(list): the shape of the variable node. + var_dtype(core.VarDesc.VarType): the data type of the variable node. + + Returns: + IrVarNode: the created variable node. + """ + var_desc = core.VarDesc(name) var_desc.set_type(var_type) var_desc.set_shape(shape) var_desc.set_dtype(var_dtype) - return self.graph.create_var_node(var_desc) + return IrVarNode(self.graph.create_var_node(var_desc)) def create_var_node_from_desc(self, var_desc): - return self.graph.create_var_node(var_desc) + """ + Create a variable node by using an existing VarDesc in the graph. + Depend on the giving VarDesc, the created variable node may be persistable. + + Args: + var_desc(core.VarDesc): the giving variable description. + + Returns: + IrVarNode: the created variable node. + """ + return IrVarNode(self.graph.create_var_node(var_desc)) def create_op_node(self, op_type, attrs, inputs, outputs): + """ + Create a operator node in the graph. + + Args: + op_type(str): the type of the operator node. + attrs(dict): the attributes of the operator node. + inputs(dict): the inputs of the operator node. + outputs(dict): the outpus of the operator node. + + Returns: + IrOpNode: the created operator node. + """ op_desc = core.OpDesc() op_desc.set_type(op_type) - for attr, value in attrs.iteritems(): + for attr, value in six.iteritems(attrs): self._update_desc_attr(op_desc, attr, value) - for input_name, var_nodes in inputs.iteritems(): + for input_name, var_nodes in six.iteritems(inputs): if not isinstance(var_nodes, list): var_nodes = [var_nodes] op_desc.set_input(input_name, [var_node.name() for var_node in var_nodes]) - for output_name, var_nodes in outputs.iteritems(): + for output_name, var_nodes in six.iteritems(outputs): if not isinstance(var_nodes, list): var_nodes = [var_nodes] op_desc.set_output(output_name, [var_node.name() for var_node in var_nodes]) - return self.graph.create_op_node(op_desc) + return IrOpNode(self.graph.create_op_node(op_desc)) def create_op_node_from_desc(self, op_desc): - return self.graph.create_op_node(op_desc) + """ + Create a operator node by using an existing OpDesc in the graph. + + Args: + op_desc(core.VarDesc): the giving operator description. + + Returns: + IrOpNode: the created operator node. + """ + return IrOpNode(self.graph.create_op_node(op_desc)) def update_input_link(self, old_input_node, new_input_node, op_node): - assert old_input_node in self.graph.nodes() and new_input_node in self.graph.nodes() and \ - op_node in self.graph.nodes(), 'Th three arguments must be in the graph nodes.' - old_input_node.outputs_remove(op_node) - op_node.inputs_remove(old_input_node) - new_input_node.outputs_append(op_node) - op_node.inputs_append(new_input_node) - op_node.op()._rename_input(old_input_node.name(), new_input_node.name()) + """ + Update the input's link of a operator node. + + Args: + old_input_node(IrNode): the old input node of the giving op_node. + new_input_node(IrNode): the new input node of the giving op_node. + op_node(IrOpNode): the operator node that is needed to update input's link. + """ + assert old_input_node.node in self.graph.nodes() and new_input_node.node in \ + self.graph.nodes() and op_node.node in self.graph.nodes(), \ + 'The three arguments(old_input_node&new_input_node&op_node) must be in the graph nodes.' + old_input_node.remove_output(op_node) + op_node.remove_input(old_input_node) + new_input_node.append_output(op_node) + op_node.append_input(new_input_node) + op_node.rename_input(old_input_node.name(), new_input_node.name()) def link_to(self, node_in, node_out): - assert node_in in self.graph.nodes() and node_out in self.graph.nodes(), \ - 'Th two arguments must be in the graph nodes.' - node_in.outputs_append(node_out) - node_out.inputs_append(node_in) + """ + Connect two nodes. + + Args: + node_in(IrNode): the input node. + node_out(IrNode): the output node. + """ + assert node_in.node in self.graph.nodes() and node_out.node in self.graph.nodes(), \ + 'The two arguments(node_in&node_out) must be in the graph nodes.' + node_in.append_output(node_out) + node_out.append_input(node_in) def safe_remove_nodes(self, remove_nodes): + """ + Remove nodes safely since links connected to these removed nodes are + also removed. + + Args: + remove_nodes(set): the nodes prepared to be removed. + """ if not isinstance(remove_nodes, set): - remove_nodes = set(remove_nodes) - core.graph_safe_remove_nodes(self.graph, remove_nodes) + if isinstance(remove_nodes, Iterable): + remove_nodes = set(remove_nodes) + else: + remove_nodes = {remove_nodes} + original_nodes = {n.node for n in remove_nodes} + core.graph_safe_remove_nodes(self.graph, original_nodes) + + def has_circle(self): + """ + Check if the graph has a circle. + + Returns: + bool: True if the graph has a circle else False. + """ + return core.has_circle(self.graph) + + def graph_num(self): + """ + Count the number of unconnected graphs in this graph. + + Returns: + int: the number of unconnected graphs. + """ + return core.graph_num(self.graph) + + def topology_sort(self): + """ + Perform the topology sort operation on the graph. + + Notes: the `graph` cannot contain a circle. + + Returns: + set(IrNode): nodes in topology order. + """ + ordered_nodes = core.topology_sort(self.graph) + return {IrNode(n) for n in ordered_nodes} + + def build_adjacency_list(self): + """ + Build an adjacency list of operations for the `graph`. + + Returns: + dict{IrNode: set(IrNode)}: the adjacency list. + """ + adj_list = core.build_adjacency_list(self.graph) + wrapped_adj_list = dict() + for k, v in six.iteritems(adj_list): + wrapped_adj_list[IrNode(k)] = {IrNode(n) for n in v} + return wrapped_adj_list + + def draw(self, save_path, name, marked_nodes=None, remove_ctr_var=True): + """ + Draw the graph. If `dot` command is installed, the drawn graph + will be saved as pdf file type, otherwise dot file type is used. + + Args: + save_path(str): the save path of drawn graph. + name(str): the name of drawn graph. + marked_nodes(set(IrNode)): nodes that are needed to be marked. + Default value is None. + remove_ctr_var(bool): If it is set True, all control variable nodes + in the graph will be removed. Default value is True. + """ - def draw(self, save_path, name, marked_nodes=None): def _convert_to_pdf(dot_file_path): pdf_save_path = os.path.splitext(dot_file_path)[0] + '.pdf' exited_code = subprocess.call('dot -Tpdf ' + dot_file_path \ @@ -1630,17 +2222,21 @@ class IrGraph(object): dot_file_path)) remove_ctr_vars = set() - ops_num = 0 - for node in self.graph.nodes(): - if node.is_ctrl_var(): - remove_ctr_vars.add(node) - elif node.is_op(): - ops_num += 1 - print('Total ops num = {}.'.format(ops_num)) - self.safe_remove_nodes(remove_ctr_vars) + if remove_ctr_var: + for node in self.all_var_nodes(): + if node.is_ctrl_var(): + remove_ctr_vars.add(node) + self.safe_remove_nodes(remove_ctr_vars) + print('Total ops num = {}.'.format(len(self.all_op_nodes()))) + if marked_nodes is not None: if not isinstance(marked_nodes, set): - marked_nodes = set(marked_nodes) + if isinstance(marked_nodes, Iterable): + marked_nodes = set(marked_nodes) + else: + marked_nodes = {marked_nodes} + marked_nodes = {n.node for n in marked_nodes} + remove_ctr_vars = {n.node for n in remove_ctr_vars} marked_nodes = marked_nodes - remove_ctr_vars if self.graph.has('__graphviz__marked_node__'): self.graph.erase('__graphviz__marked_node__') @@ -1652,10 +2248,20 @@ class IrGraph(object): _convert_to_pdf(viz_dot_path) def to_program(self): + """ + Convert the graph into a Program. + + Notes: When the graph includes backward operator nodes, the + conversion process may be failed. Usually, this function is + only used to convert a test graph. + + Returns: + Program: a program converted from the graph. + """ convert_pass = core.get_pass('graph_to_program_pass') - convert_pass.set('program', Program().desc) + desc = core.ProgramDesc() + convert_pass.set_not_owned('program', desc) convert_pass.apply(self.graph) - desc = convert_pass.get_program('program') program = Program._construct_from_desc(desc) return program diff --git a/python/paddle/fluid/imperative/__init__.py b/python/paddle/fluid/imperative/__init__.py index 54dc794ea6392fac6f266477fe045b37001a8666..034a11e0a6049c17800c8fd5aab5bc2291320169 100644 --- a/python/paddle/fluid/imperative/__init__.py +++ b/python/paddle/fluid/imperative/__init__.py @@ -23,7 +23,11 @@ from .layers import * from . import nn from .nn import * +from . import tracer +from .tracer import * + __all__ = [] __all__ += layers.__all__ __all__ += base.__all__ __all__ += nn.__all__ +__all__ += tracer.__all__ diff --git a/python/paddle/fluid/imperative/base.py b/python/paddle/fluid/imperative/base.py index d4525233cc681720404770ef1d0c5d3006607a2e..174f138bfa2d3cfaa433c3235c2b0f9a5650e756 100644 --- a/python/paddle/fluid/imperative/base.py +++ b/python/paddle/fluid/imperative/base.py @@ -16,6 +16,7 @@ import numpy as np from paddle.fluid import core from paddle.fluid import framework +from .tracer import Tracer __all__ = ['enabled', 'guard', 'to_variable'] @@ -28,7 +29,7 @@ def enabled(): def guard(place=None): train = framework.Program() startup = framework.Program() - tracer = core.Tracer(train.current_block().desc) + tracer = Tracer(train.current_block().desc) if place is None: if core.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/imperative/layer_object_helper.py b/python/paddle/fluid/imperative/layer_object_helper.py new file mode 100644 index 0000000000000000000000000000000000000000..6afffe3636dd79d124a5b0e9d9eccb02630f5b8c --- /dev/null +++ b/python/paddle/fluid/imperative/layer_object_helper.py @@ -0,0 +1,220 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import copy +import six +from ..framework import Parameter, _in_imperative_mode +from ..param_attr import ParamAttr +from .. import core +from six.moves import zip +from ..layer_helper_base import LayerHelperBase + + +class LayerObjectHelper(LayerHelperBase): + def __init__(self, name): + super(LayerObjectHelper, self).__init__(name, layer_type=name) + + def append_op(self, + type=None, + inputs=None, + outputs=None, + attrs=None, + stop_gradient=None): + """append an operator for this layer object. + + Args: + type: operator type + inputs: input variable of the operator + dtype: data type of this parameter + is_bias: if this is a bias parameter + default_initializer: set the default initializer for this parameter + + Returns created parameter Variable. + """ + return self.main_program.current_block().append_op( + type=type, + inputs=inputs, + outputs=outputs, + attrs=attrs, + stop_gradient=stop_gradient) + + def _multiple_input(self, inputs_in): + inputs = inputs_in + ret = [] + if isinstance(inputs, (list, tuple)): + for inp in inputs: + ret.append(self.to_variable(inp)) + else: + ret.append(self.to_variable(inputs)) + return ret + + # TODO: make it public when we need it + def _input(self, inputs_in): + inputs = self._multiple_input(inputs_in) + if len(inputs) != 1: + raise "{0} layer only takes one input".format(self.layer_type) + return inputs[0] + + def _multiple_param_attr(self, length, param_attr_in=None): + param_attr = param_attr_in + if isinstance(param_attr, ParamAttr): + param_attr = [param_attr] + + if len(param_attr) != 1 and len(param_attr) != length: + raise ValueError("parameter number mismatch") + elif len(param_attr) == 1 and length != 1: + tmp = [None] * length + for i in six.moves.range(length): + tmp[i] = copy.deepcopy(param_attr[0]) + param_attr = tmp + return param_attr + + def iter_inputs_and_params(self, inputs_in, param_attr_in=None): + """Access all inputs and params one by one + + Args: + inputs_in: inputs to be iter + param_attr_in: param_attr to be iter + + Returns input, param_attr + """ + inputs = inputs_in if (inputs_in is not None) else [] + inputs = self._multiple_input(inputs) + param_attrs = self._multiple_param_attr(len(inputs), param_attr_in) + for ipt, param_attr in zip(inputs, param_attrs): + yield ipt, param_attr + + def input_dtype(self, inputs_in): + """Get input data type + + Args: + inputs_in: inputs wanted know the data type + + Returns dtype of the input + """ + inputs = self._multiple_input(inputs_in) + dtype = None + for each in inputs: + if dtype is None: + dtype = each.dtype + elif dtype != each.dtype: + raise ValueError("Data Type mismatch: %d to %d" % + (dtype, each.dtype)) + return dtype + + def get_parameter(self, name): + """Get parameter specifically + + Args: + name: parameter's name + + Returns target parameter + """ + param = self.main_program.global_block().var(name) + if not isinstance(param, Parameter): + raise ValueError("no Parameter name %s found" % name) + return param + + def append_bias_op(self, + input_var, + dim_start=1, + dim_end=None, + bias_attr=None): + """Append bias operator and return its output. If the user does not set bias_attr, append_bias_op will return input_var + + Args: + input_var: the input variable. The len(input_var.shape) is + larger or equal than 2. + dim_start: + dim_end: the shape of the bias will be + bias_attr: the bias_attr of it + + Return the Variable of after append bias op + """ + size = list(input_var.shape[dim_start:dim_end]) + bias_attr = bias_attr + if not bias_attr: + return input_var + + b = self.create_parameter( + attr=bias_attr, shape=size, dtype=input_var.dtype, is_bias=True) + tmp = self.create_variable_for_type_inference(dtype=input_var.dtype) + self.append_op( + type='elementwise_add', + inputs={'X': [input_var], + 'Y': [b]}, + outputs={'Out': [tmp]}, + attrs={'axis': dim_start}) + return tmp + + # TODO: this should not be called anymore after all activation func move to Layers + def append_activation(self, + input_var, + act=None, + use_cudnn=None, + use_mkl_dnn=None): + """Append activation + + Args: + input_var: the input variable. The len(input_var.shape) is + larger or equal than 2. + act: activation type + use_mkl_dnn: if use mkldnn + use_cudnn: if use cudnn + + Return the Variable of after append activation + """ + act = act + if act is None: + return input_var + if isinstance(act, six.string_types): + act = {'type': act} + else: + raise TypeError(str(act) + " should be unicode or str") + + if (use_cudnn is not None) and use_cudnn: + act['use_cudnn'] = use_cudnn + if (use_mkl_dnn is not None) and use_mkl_dnn: + act['use_mkldnn'] = use_mkl_dnn + act_type = act.pop('type') + + tmp = input_var + # NOTE(dzhwinter): some activation support inplace compution. + # NOTE(minqiyang): currently, we don't support inplace in imperative mode + if not _in_imperative_mode() and core.IsInplace(act_type): + tmp = input_var + else: + tmp = self.create_variable_for_type_inference(dtype=input_var.dtype) + self.append_op( + type=act_type, + inputs={"X": [input_var]}, + outputs={"Out": [tmp]}, + attrs=act) + return tmp + + def is_instance(self, param, cls): + """Check if the input parameter is instance of input class + + Args: + param: parameter to be check + cls: class of the parameter + + Return result of the check (True or False) + """ + param = param + if not isinstance(param, cls): + raise TypeError("The input {0} parameter of method {1} must be {2}", + param, self.layer_type, cls.__name__) diff --git a/python/paddle/fluid/imperative/layers.py b/python/paddle/fluid/imperative/layers.py index 71ff95bdea36967c1fa6b5c94cc7ca305e7a544a..0c96d4dc5910f9500755dcd9837eeaff5ad4f831 100644 --- a/python/paddle/fluid/imperative/layers.py +++ b/python/paddle/fluid/imperative/layers.py @@ -12,49 +12,130 @@ # See the License for the specific language governing permissions and # limitations under the License. +import collections import contextlib import sys import numpy as np import collections - +from .. import unique_name from paddle.fluid import core +from .layer_object_helper import LayerObjectHelper from paddle.fluid import framework -from paddle.fluid.imperative import base __all__ = ['Layer', 'PyLayer'] class Layer(core.Layer): - """Layers composed of operators.""" + """Layers composed of operators. - def __init__(self, dtype=core.VarDesc.VarType.FP32, name=None): + Args: + name_scope: prefix name used by the layer to name parameters. + If prefix is "my_model/layer_1", parameter name in MyLayer + can be "my_model/layer_1/MyLayer/w_n", where w is the parameter + base name and n is an unique suffix auto-generated. + dtype: data type for the variables in the layer. + """ + + def __init__(self, name_scope, dtype=core.VarDesc.VarType.FP32): + self._full_name = unique_name.generate(name_scope + "/" + + self.__class__.__name__) self._built = False self._dtype = dtype + self._parameters = collections.OrderedDict() + self._sub_layers = collections.OrderedDict() + + self._helper = LayerObjectHelper(self._full_name) + + def full_name(self): + """Full name for this layers. + + Full name is composed by name_scope + "/" + MyLayer.__class__.__name__ + + Returns full name of this name. + """ + return self._full_name + + def create_parameter(self, + attr, + shape, + dtype, + is_bias=False, + default_initializer=None): + """Create parameters for this layers. + + Args: + attr: [ParamAttr] should be the parameter attribute for this parameter + shape: shape of the paramter + dtype: data type of this parameter + is_bias: if this is a bias parameter + default_initializer: set the default initializer for this parameter + + Returns created parameter Variable. + """ + return self._helper.create_parameter(attr, shape, dtype, is_bias, + default_initializer) + + # TODO: Add more parameter list when we need them + def create_variable(self, + name=None, + persistable=None, + dtype=None, + type=core.VarDesc.VarType.LOD_TENSOR): + """Create Variable for this layers. + + Args: + name: name of the variable + persistable: if set this variable persistable + dtype: data type of data in the variable + type: type of the variable + + Returns created Variable. + """ + if name is not None: + var_name = ".".join([self._full_name, name]) + else: + var_name = unique_name.generate(".".join( + [self._full_name, "_generated_var"])) + + return self._helper.main_program.current_block().create_var( + name=var_name, persistable=persistable, dtype=dtype, type=type) + + def parameters(self, include_sublayers=True): + """Returns a list of Parameters from current and sub-layers. - def parameters(self): - params = [] - for key in self.__dict__.keys(): - value = self.__dict__[key] - if isinstance(value, framework.Parameter): - params.append(value) - elif isinstance(value, core.Layer): - params.extend(value.parameters()) - elif isinstance(value, collections.Container): - if len(value) == 0: - continue - if isinstance(value[0], framework.Parameter): - params.extend(value) - elif isinstance(value[0], core.Layer): - for v in value: - params.extend(v.parameters()) - - return params + Args: + include_sublayers: If true, also include the parameters from + sublayers. + + Returns a list of Parameters. + """ + ret = [p for p in self._parameters.values()] + if include_sublayers: + for l in self._sub_layers.values(): + for p in l.parameters(include_sublayers): + ret.append(p) + return ret + + def sublayers(self, include_sublayers=True): + """Returns a list of sub layers. + + Args: + include_sublayers: If true, also include the layers from sublayers. + + Returns a list of sub layers. + """ + ret = [l for l in self._sub_layers.values()] + if include_sublayers: + for l in self._sub_layers.values(): + for sub_l in l.sublayers(include_sublayers): + ret.append(sub_l) + return ret def clear_gradients(self): for p in self.parameters(): p._clear_gradient() - def _build_once(self, inputs): + def _build_once(self, *args): pass def __call__(self, *inputs): @@ -71,6 +152,66 @@ class Layer(core.Layer): def backward(self, *inputs): raise ValueError("Layer shouldn't implement backward") + def add_sublayer(self, name, sublayer): + """Adds a sub Layer instance. + + Added sublayer can be access like self.name. + + Args: + name: name of this sublayer. + sublayer: an instance of Layer. + Returns: + the sublayer passed in. + """ + assert isinstance(sublayer, core.Layer) + self._sub_layers[name] = sublayer + return sublayer + + def add_parameter(self, name, parameter): + """Adds a Parameter instance. + + Added parameter can be access like self.name. + + Args: + name: name of this sublayer. + parameter: an instance of Parameter. + Returns: + the parameter passed in. + """ + assert isinstance(parameter, framework.Parameter) + self._parameters[name] = parameter + return parameter + + def __getattr__(self, name): + if name in self._parameters: + return self._parameters[name] + elif name in self._sub_layers: + return self._sub_layers[name] + + def __setattr__(self, name, value): + if isinstance(value, framework.Parameter): + params = self.__dict__.get('_parameters', None) + if params is None: + raise ValueError( + "super(YourLayer, self).__init__() should be called first") + params[name] = value + elif isinstance(value, core.Layer): + layers = self.__dict__.get('_sub_layers', None) + if layers is None: + raise ValueError( + "super(YourLayer, self).__init__() should be called first") + layers[name] = value + else: + object.__setattr__(self, name, value) + + def __delattr__(self, name): + if name in self._parameters: + del self._parameters[name] + elif name in self._sub_layers: + del self._sub_layers[name] + else: + object.__delattr__(self, name) + class PyLayer(core.PyLayer): """Layers composed of user-defined python codes.""" diff --git a/python/paddle/fluid/imperative/nn.py b/python/paddle/fluid/imperative/nn.py index bad2c325be3524443c21981c59a6a224efcc01b5..ac25b07a821304bd3b66af24199f0e54bf81a0e0 100644 --- a/python/paddle/fluid/imperative/nn.py +++ b/python/paddle/fluid/imperative/nn.py @@ -27,6 +27,7 @@ __all__ = ['Conv2D', 'Pool2D', 'FC', 'BatchNorm', 'Embedding', 'GRUUnit'] class Conv2D(layers.Layer): def __init__(self, + name_scope, num_channels, num_filters, filter_size, @@ -38,25 +39,14 @@ class Conv2D(layers.Layer): act=None, param_attr=None, bias_attr=None, - name=None, dtype=core.VarDesc.VarType.FP32): assert param_attr is not False, "param_attr should not be False here." - super(Conv2D, self).__init__(name=name, dtype=dtype) - - # TODO(minqiyang): Move this to the top. - from ..layer_helper import LayerHelper - self._helper = LayerHelper( - type(self).__name__, - param_attr=param_attr, - bias_attr=bias_attr, - dtype=dtype, - name=name, - act=act) - + super(Conv2D, self).__init__(name_scope) self._groups = groups self._stride = utils.convert_to_list(stride, 2, 'stride') self._padding = utils.convert_to_list(padding, 2, 'padding') self._dilation = utils.convert_to_list(dilation, 2, 'dilation') + self._act = act if not isinstance(use_cudnn, bool): raise ValueError("use_cudnn should be True or False") self._use_cudnn = use_cudnn @@ -81,28 +71,28 @@ class Conv2D(layers.Layer): std = (2.0 / filter_elem_num)**0.5 return Normal(0.0, std, 0) - self._filter_param = self._helper.create_parameter( - attr=self._helper.param_attr, + self._filter_param = self.create_parameter( + attr=param_attr, shape=filter_shape, dtype=self._dtype, default_initializer=_get_default_param_initializer()) if self._use_cudnn: - self._helper.create_variable( + self.create_variable( name="kCUDNNFwdAlgoCache", persistable=True, type=core.VarDesc.VarType.RAW) - self._helper.create_variable( + self.create_variable( name="kCUDNNBwdDataAlgoCache", persistable=True, type=core.VarDesc.VarType.RAW) - self._helper.create_variable( + self.create_variable( name="kCUDNNBwdFilterAlgoCache", persistable=True, type=core.VarDesc.VarType.RAW) - self._bias_param = self._helper.create_parameter( - attr=self._helper.bias_attr, + self._bias_param = self.create_parameter( + attr=bias_attr, shape=[num_filters], dtype=self._dtype, is_bias=True) @@ -138,11 +128,12 @@ class Conv2D(layers.Layer): attrs={'axis': 1}) # Currently, we don't support inplace in imperative mode - return self._helper.append_activation(pre_act) + return self._helper.append_activation(pre_act, act=self._act) class Pool2D(layers.Layer): def __init__(self, + name_scope, pool_size=-1, pool_type="max", pool_stride=1, @@ -151,7 +142,6 @@ class Pool2D(layers.Layer): use_cudnn=True, ceil_mode=False, exclusive=True, - name=None, dtype=core.VarDesc.VarType.FP32): if pool_type not in ["max", "avg"]: raise ValueError( @@ -166,10 +156,7 @@ class Pool2D(layers.Layer): if not isinstance(use_cudnn, bool): raise ValueError("use_cudnn should be True or False") - super(Pool2D, self).__init__(name=name, dtype=dtype) - - from ..layer_helper import LayerHelper - self._helper = LayerHelper(type(self).__name__, dtype=dtype, name=name) + super(Pool2D, self).__init__(name_scope, dtype=dtype) self._pool_type = pool_type self._pool_size = utils.convert_to_list(pool_size, 2, 'pool_size') @@ -205,44 +192,37 @@ class Pool2D(layers.Layer): class FC(layers.Layer): def __init__(self, + name_scope, size, param_attr=None, bias_attr=None, num_flatten_dims=1, dtype=core.VarDesc.VarType.FP32, - act=None, - name=None): - super(FC, self).__init__() + act=None): + super(FC, self).__init__(name_scope) self._size = size self._num_flatten_dims = num_flatten_dims self._dtype = dtype - from ..layer_helper import LayerHelper - self._helper = LayerHelper( - 'FC', - param_attr=param_attr, - bias_attr=bias_attr, - act=act, - name=name) - - def parameters(self): - return [self._w, self._b] + self._param_attr = param_attr + self._bias_attr = param_attr + self._act = act def _build_once(self, input): input_shape = input.shape param_shape = [ reduce(lambda a, b: a * b, input_shape[self._num_flatten_dims:], 1) ] + [self._size] - self._w = self._helper.create_parameter( - attr=self._helper.param_attr, + self._w = self.create_parameter( + attr=self._param_attr, shape=param_shape, dtype=self._dtype, is_bias=False) - if self._helper.bias_attr: + if self._param_attr: size = list([self._size]) - self._b = self._helper.create_parameter( - attr=self._helper.bias_attr, + self._b = self.create_parameter( + attr=self._param_attr, shape=size, dtype=self._dtype, is_bias=True) @@ -280,11 +260,12 @@ class FC(layers.Layer): else: pre_activation = pre_bias # Currently, we don't support inplace in imperative mode - return self._helper.append_activation(pre_activation) + return self._helper.append_activation(pre_activation, act=self._act) class BatchNorm(layers.Layer): def __init__(self, + name_scope, num_channels, act=None, is_test=False, @@ -295,24 +276,18 @@ class BatchNorm(layers.Layer): dtype=core.VarDesc.VarType.FP32, data_layout='NCHW', in_place=False, - name=None, moving_mean_name=None, moving_variance_name=None, do_model_average_for_mean_and_var=False, fuse_with_relu=False, use_global_stats=False): - super(BatchNorm, self).__init__() + super(BatchNorm, self).__init__(name_scope) + self._param_attr = param_attr + self._param_attr = bias_attr + self._act = act assert bias_attr is not False, "bias_attr should not be False in batch_norm." - from ..layer_helper import LayerHelper - self._helper = LayerHelper( - 'batch_norm', - param_attr=param_attr, - bias_attr=bias_attr, - name=name, - act=act) - if dtype == core.VarDesc.VarType.FP16: self._dtype = core.VarDesc.VarType.FP32 else: @@ -321,23 +296,23 @@ class BatchNorm(layers.Layer): param_shape = [num_channels] # create parameter - self._scale = self._helper.create_parameter( - attr=self._helper.param_attr, + self._scale = self.create_parameter( + attr=self._param_attr, shape=param_shape, dtype=self._dtype, default_initializer=Constant(1.0)) - if use_global_stats and self._helper.param_attr.learning_rate == 0.: + if use_global_stats and self._param_attr.learning_rate == 0.: self._scale._stop_gradient = True - self._bias = self._helper.create_parameter( - attr=self._helper.bias_attr, + self._bias = self.create_parameter( + attr=self._param_attr, shape=param_shape, dtype=self._dtype, is_bias=True) - if use_global_stats and self._helper.bias_attr.learning_rate == 0.: + if use_global_stats and self._param_attr.learning_rate == 0.: self._bias._stop_gradient = True - self._mean = self._helper.create_parameter( + self._mean = self.create_parameter( attr=ParamAttr( name=moving_mean_name, initializer=Constant(0.0), @@ -347,7 +322,7 @@ class BatchNorm(layers.Layer): dtype=self._dtype) self._mean._stop_gradient = True - self._variance = self._helper.create_parameter( + self._variance = self.create_parameter( attr=ParamAttr( name=moving_variance_name, initializer=Constant(1.0), @@ -407,7 +382,7 @@ class BatchNorm(layers.Layer): }) # Currently, we don't support inplace in imperative mode - return self._helper.append_activation(batch_norm_out) + return self._helper.append_activation(batch_norm_out, self._act) class Embedding(layers.Layer): @@ -422,6 +397,7 @@ class Embedding(layers.Layer): constructor. Args: + name_scope: See base class. size(tuple|list): The shape of the look up table parameter. It should have two elements which indicate the size of the dictionary of embeddings and the size of each embedding vector respectively. @@ -449,6 +425,7 @@ class Embedding(layers.Layer): """ def __init__(self, + name_scope, size, is_sparse=False, is_distributed=False, @@ -456,7 +433,7 @@ class Embedding(layers.Layer): param_attr=None, dtype='float32'): - super(Embedding, self).__init__() + super(Embedding, self).__init__(name_scope) self._size = size self._is_sparse = is_sparse self._is_distributed = is_distributed @@ -470,17 +447,12 @@ class Embedding(layers.Layer): if self._remote_prefetch: assert self._is_sparse is True and self._is_distributed is False - from ..layer_helper import LayerHelper - self._helper = LayerHelper('embedding', param_attr=param_attr) - self._w = self._helper.create_parameter( + self._w = self.create_parameter( attr=self._param_attr, shape=self._size, dtype=self._dtype, is_bias=False) - def parameters(self): - return [self._w] - def forward(self, input): out = self._helper.create_variable_for_type_inference(self._dtype) self._helper.append_op( diff --git a/python/paddle/fluid/imperative/tracer.py b/python/paddle/fluid/imperative/tracer.py new file mode 100644 index 0000000000000000000000000000000000000000..1064ad63e7103acde9bb8106b7791441ce68849b --- /dev/null +++ b/python/paddle/fluid/imperative/tracer.py @@ -0,0 +1,67 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import six + +from collections import defaultdict +from paddle.fluid import core +from paddle.fluid import framework + +__all__ = ['Tracer'] + + +def release_op(op): + del framework._imperative_tracer()._ops[op._trace_id] + + +class Tracer(core.Tracer): + """ + Python wrapper of imperative tracer + """ + + def __init__(self, block): + super(Tracer, self).__init__(block) + + self._ops = defaultdict() + self._trace_id = 0 + + def trace_op(self, op, stop_gradient=False): + # record op's trace id + op.iop._trace_id = self._trace_id + + # trace op and save it + backward_refs = self.trace(op.iop, op.inputs, op.outputs, op.block.desc, + framework._current_expected_place(), + stop_gradient) + + if not stop_gradient: + self._trace_id += 1 + self._ops[op.iop._trace_id] = op + + # register backward hooks and variables if needed + if len(backward_refs) > 0: + op.iop.register_backward_hooks(release_op) + + # TODO(minqiyang): remove all inputs and outputs after seperate + # var and grad + op.backward_refs = defaultdict(list) + for k, v in six.iteritems(op.inputs): + if k in backward_refs: + op.backward_refs[k] = op.inputs[k] + + for k, v in six.iteritems(op.outputs): + if k in backward_refs: + op.backward_refs[k] = op.outputs[k] diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py index e8341be28683a25971a53a37c70533a16add1593..482dfa6fac05bd914efa384bd0f5ec54cfab1dca 100644 --- a/python/paddle/fluid/initializer.py +++ b/python/paddle/fluid/initializer.py @@ -165,7 +165,8 @@ class ConstantInitializer(Initializer): 'force_cpu': self._force_cpu or force_init_on_cpu() }, stop_gradient=True) - var.op = op + if not framework._in_imperative_mode(): + var.op = op return op @@ -244,7 +245,8 @@ class UniformInitializer(Initializer): attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}) - var.op = op + if not framework._in_imperative_mode(): + var.op = op return op @@ -322,7 +324,8 @@ class NormalInitializer(Initializer): outputs={"Out": var}, attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}) - var.op = op + if not framework._in_imperative_mode(): + var.op = op return op @@ -400,7 +403,8 @@ class TruncatedNormalInitializer(Initializer): outputs={"Out": var}, attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}) - var.op = op + if not framework._in_imperative_mode(): + var.op = op return op @@ -505,7 +509,8 @@ class XavierInitializer(Initializer): "seed": self._seed }, stop_gradient=True) - var.op = op + if not framework._in_imperative_mode(): + var.op = op return op @@ -605,7 +610,8 @@ class MSRAInitializer(Initializer): "seed": self._seed }, stop_gradient=True) - var.op = op + if not framework._in_imperative_mode(): + var.op = op return op @@ -703,7 +709,8 @@ class BilinearInitializer(Initializer): 'shape': list(shape), value_name: values }) - var.op = op + if not framework._in_imperative_mode(): + var.op = op return op @@ -761,7 +768,8 @@ class NumpyArrayInitializer(Initializer): value_name: values }, stop_gradient=True) - var.op = op + if not framework._in_imperative_mode(): + var.op = op return op diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py index a2abbf36c0267d85c9c97af00c9faabf1187822c..1775159798414a98bede4a3db5b577fb5e47e611 100644 --- a/python/paddle/fluid/io.py +++ b/python/paddle/fluid/io.py @@ -468,9 +468,10 @@ def save_persistables(executor, dirname, main_program=None, filename=None): exe = fluid.Executor(fluid.CPUPlace()) param_path = "./my_paddle_model" + # `prog` can be a program defined by the user prog = fluid.default_main_program() fluid.io.save_persistables(executor=exe, dirname=param_path, - main_program=None) + main_program=prog) """ if main_program and main_program._is_distributed: @@ -766,7 +767,10 @@ def _load_distributed_persistables(executor, dirname, main_program=None): dtype=slice_var.dtype, persistable=True) - dim1_flatten = reduce(lambda x, y: x * y, slice.shape[1:]) + dim1_flatten = 1 + if len(slice.shape) >= 2: + dim1_flatten = reduce(lambda x, y: x * y, slice.shape[1:]) + start = int(offset / dim1_flatten) end = int(offset / dim1_flatten + slice.shape[0]) diff --git a/python/paddle/fluid/layer_helper.py b/python/paddle/fluid/layer_helper.py index 7d1636774c6e27ec8090ac01710e23beed5fd0e8..6f60fad94dca5b02bca14cda33df14c459d1a075 100644 --- a/python/paddle/fluid/layer_helper.py +++ b/python/paddle/fluid/layer_helper.py @@ -15,42 +15,29 @@ from __future__ import print_function import copy -import itertools import six -import sys -import numpy as np -from .framework import Variable, Parameter, default_main_program, default_startup_program, dtype_is_floating, _in_imperative_mode +from .framework import Parameter, dtype_is_floating, _in_imperative_mode from . import unique_name -from paddle.fluid.imperative import base as imperative_base from paddle.fluid.initializer import Constant, Xavier -from .param_attr import ParamAttr, WeightNormParamAttr +from .param_attr import ParamAttr from . import core from six.moves import zip +from .layer_helper_base import LayerHelperBase -class LayerHelper(object): +class LayerHelper(LayerHelperBase): def __init__(self, layer_type, **kwargs): self.kwargs = kwargs - self.layer_type = layer_type name = self.kwargs.get('name', None) + # TODO(panyx0718, minqiyang): imperative mode + # can not use both `layer_type` and `name`. Deprecate LayerHelper + # and write a Helper for imperative mode. if name is None: - self.kwargs['name'] = unique_name.generate(self.layer_type) + self.kwargs['name'] = unique_name.generate(layer_type) - @property - def name(self): - return self.kwargs['name'] - - @property - def main_program(self): - return default_main_program() - - @property - def startup_program(self): - return default_startup_program() - - def to_variable(self, x): - return imperative_base.to_variable(x, self.main_program.current_block()) + super(LayerHelper, self).__init__( + self.kwargs['name'], layer_type=layer_type) def append_op(self, *args, **kwargs): return self.main_program.current_block().append_op(*args, **kwargs) @@ -79,6 +66,7 @@ class LayerHelper(object): def bias_attr(self): return ParamAttr._to_attr(self.kwargs.get('bias_attr', None)) + #TODO (jiabin): reconstruct this in LayerObjHelper and avoid dependency of param_attr def multiple_param_attr(self, length): param_attr = self.param_attr if isinstance(param_attr, ParamAttr): @@ -110,297 +98,13 @@ class LayerHelper(object): (dtype, each.dtype)) return dtype - def _create_weight_normalize(self, attr, shape, dtype): - from .layers import elementwise_mul, elementwise_div, reshape - - # Remove these ops when LayerHelper and layers support indicating - # program and block. - def __norm_op(x, - out=None, - p=2, - dim=None, - keep_dim=False, - block=self.startup_program.global_block()): - if out is None: - out = block.create_var( - name=unique_name.generate(".".join( - [self.name, 'weight_norm_norm'])), - dtype=dtype, - persistable=False) - abs_out = block.create_var( - name=unique_name.generate(".".join( - [self.name, 'weight_norm_abs'])), - dtype=dtype, - persistable=False) - block.append_op( - type='abs', inputs={'X': x}, outputs={'Out': abs_out}) - pow_out = block.create_var( - name=unique_name.generate(".".join( - [self.name, 'weight_norm_pow'])), - dtype=dtype, - persistable=False) - block.append_op( - type='pow', - inputs={'X': abs_out}, - outputs={'Out': pow_out}, - attrs={'factor': float(p)}) - sum_out = block.create_var( - name=unique_name.generate(".".join( - [self.name, 'weight_norm_sum'])), - dtype=dtype, - persistable=False) - block.append_op( - type='reduce_sum', - inputs={'X': pow_out}, - outputs={'Out': sum_out}, - attrs={ - 'dim': dim, - 'keep_dim': keep_dim, - 'reduce_all': True if dim is None else False - }) - block.append_op( - type='pow', - inputs={'X': sum_out}, - outputs={'Out': out}, - attrs={'factor': 1. / p}) - return out - - def __reshape_op(x, - shape, - out=None, - block=self.startup_program.global_block()): - if out is None: - out = block.create_var( - name=unique_name.generate(".".join( - [self.name, 'weight_norm_reshape'])), - dtype=dtype, - persistable=False) - block.append_op( - type='reshape', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'shape': shape}) - return out - - def __transpose_op(x, - axis, - out=None, - block=self.startup_program.global_block()): - if out is None: - out = block.create_var( - name=unique_name.generate(".".join( - [self.name, 'weight_norm_transpose'])), - dtype=dtype, - persistable=False) - block.append_op( - type='transpose', - inputs={'X': x}, - outputs={'Out': out}, - attrs={'axis': axis}) - return out - - def __norm_except_dim(x, - out=None, - dim=None, - block=self.startup_program.global_block()): - """Computes the norm over all dimensions except dim""" - if out is None: - out = block.create_var( - name=unique_name.generate(".".join( - [self.name, 'weight_norm_norm'])), - dtype=dtype, - persistable=False) - if dim is None: - __norm_op(x, out, dim=dim, block=block) - elif dim == 0: - out_shape = [x.shape[0]] + [1] * (len(x.shape) - 1) - reshape = __reshape_op(x, shape=[x.shape[0], -1], block=block) - norm = __norm_op(reshape, dim=1, block=block) - __reshape_op(norm, out=out, shape=out_shape, block=block) - elif dim == len(x.shape) - 1: - out_shape = [1] * (len(x.shape) - 1) + [x.shape[-1]] - reshape = __reshape_op(x, shape=[-1, x.shape[-1]], block=block) - norm = __norm_op(reshape, dim=0, block=block) - __reshape_op(norm, out=out, shape=out_shape, block=block) - else: - perm = list(range(len(x.shape))) - perm[0], perm[dim] = dim, 0 - transpose = __transpose_op(x, perm, block=block) - norm = __norm_op(transpose, dim=0, block=block) - __transpose_op(norm, perm, out=out, block=block) - return out - - def __weight_normalize(g, v, dim): - """Calculations for weight normalization""" - norm = __norm_except_dim( - v, dim=dim, block=self.main_program.current_block()) - scale = elementwise_div( - x=g, y=norm) # The shapes of g and norm are the same. - # Currently, elementwise_mul only support broadcast when the shape - # of y is a subset of the shape of x. Thus, we reshape y to squeeze - # to achive the subset. - w = elementwise_mul( - x=v, - y=scale if dim is None else reshape( - x=scale, shape=[v.shape[dim]]), - axis=-1 if dim is None else dim) - # To serialize the original parameter for inference, maybe a - # parameter rather than a variable should be returned. - return w - - g_param_attr = copy.deepcopy(attr) - g_param_attr.name = attr.name + '_g' - g_param_shape = [1] * len(shape) - if attr.dim is not None: - g_param_shape[attr.dim] = shape[attr.dim] - v_param_attr = copy.deepcopy(attr) - v_param_attr.name = attr.name + '_v' - v_param_shape = shape - - # Add to startup_program to initialize g and v. - # Try to reconstruct the initializer of w by initializing g and v. - # Set the initializers of g and v as below, then the distribution - # of w is the same as initializing w with the given initializer. - # For Data-Dependent Initialization, please compute the init-values - # of g and v in external and then feed the values to g and v by - # executing an extra program. - g_param = self.startup_program.global_block().create_parameter( - dtype=dtype, - shape=g_param_shape, - **g_param_attr._to_kwargs(with_initializer=False)) - v_param = self.startup_program.global_block().create_parameter( - dtype=dtype, - shape=v_param_shape, - **v_param_attr._to_kwargs(with_initializer=True)) - __norm_except_dim( - x=v_param, - out=g_param, - dim=attr.dim, - block=self.startup_program.global_block()) - - # Add weight normalization to main_program - g_param = self.main_program.global_block().create_parameter( - dtype=dtype, shape=g_param_shape, **g_param_attr._to_kwargs()) - v_param = self.main_program.global_block().create_parameter( - dtype=dtype, shape=v_param_shape, **v_param_attr._to_kwargs()) - w_param = __weight_normalize(g_param, v_param, dim=attr.dim) - return w_param - - def create_parameter(self, - attr, - shape, - dtype, - is_bias=False, - default_initializer=None): - # Deepcopy the attr so that parameters can be shared in program - attr = copy.deepcopy(attr) - assert isinstance(attr, ParamAttr) - suffix = 'b' if is_bias else 'w' - if attr.name is None: - attr.name = unique_name.generate(".".join([self.name, suffix])) - - if default_initializer is None and attr.initializer is None: - if isinstance(dtype, core.VarDesc.VarType): - if dtype != core.VarDesc.VarType.FP32 and \ - dtype != core.VarDesc.VarType.FP64 and \ - dtype != core.VarDesc.VarType.FP16: - raise TypeError( - "Can not create parameter with default initializer when dtype is not float type. Set default_initializer to fit the parameter dtype!" - ) - else: - if not (dtype.startswith("float") or dtype == "double"): - raise TypeError( - "Can not create parameter with default initializer when dtype is not float type. Set default_initializer to fit the parameter dtype!" - ) - if is_bias: - attr._set_default_bias_initializer() - else: - attr._set_default_param_initializer() - else: - attr._set_default_initializer(default_initializer) - - # If weight normalization is set, insert extra parameters and ops. - # Refer to https://arxiv.org/pdf/1602.07868.pdf - if isinstance(attr, WeightNormParamAttr): - param = self._create_weight_normalize(attr, shape, dtype) - WeightNormParamAttr.params_with_weight_norm.append(param) - return param - if _in_imperative_mode(): - # In imperative mode, we want the returned parameter to be - # initialized so that it can be used imperatively. - return self.main_program.global_block().create_parameter( - dtype=dtype, - shape=shape, - **attr._to_kwargs(with_initializer=True)) - else: - self.startup_program.global_block().create_parameter( - dtype=dtype, - shape=shape, - **attr._to_kwargs(with_initializer=True)) - return self.main_program.global_block().create_parameter( - dtype=dtype, shape=shape, **attr._to_kwargs()) - def get_parameter(self, name): param = self.main_program.global_block().var(name) if not isinstance(param, Parameter): raise ValueError("no Parameter name %s found" % name) return param - def create_variable_for_type_inference(self, dtype, stop_gradient=False): - """Create a temporary variable that should be type inferred layer. - - Note: - The default type will be set to LOD_TENSOR. However, when - the var is used as operator output, its type will be updated - based on operator's `VarTypeInference` implementation in - infer_var_type. - """ - return self.main_program.current_block().create_var( - name=unique_name.generate(".".join([self.name, 'tmp'])), - dtype=dtype, - type=core.VarDesc.VarType.LOD_TENSOR, - persistable=False, - stop_gradient=stop_gradient) - - def create_variable(self, *args, **kwargs): - return self.main_program.current_block().create_var(*args, **kwargs) - - def create_global_variable(self, persistable=False, *args, **kwargs): - """ - create global variable, note that there is no initializer for this global variable. - Args: - persistable(bool): True if it is a checkpoint value. - *args: See create_var's documentation - **kwargs: See create_var's documentation - - Returns(Variable): the created variable. - """ - return self.main_program.global_block().create_var( - *args, persistable=persistable, **kwargs) - - def create_or_get_global_variable(self, name, *args, **kwargs): - """ - Creates a global variable if not exists and returns the variable and - a boolean flag which is true when it is a new variable. - """ - if self.main_program.global_block().has_var(name): - return self.main_program.global_block().var(name), False - else: - return self.create_global_variable(name=name, *args, **kwargs), True - - def set_variable_initializer(self, var, initializer): - assert isinstance(var, Variable) - if imperative_base.enabled(): - initializer(var, var.block) - else: - self.startup_program.global_block().create_var( - name=var.name, - type=var.type, - dtype=var.dtype, - shape=var.shape, - persistable=True, - initializer=initializer) - + #TODO (jiabin): reconstruct this in LayerObjHelper and avoid dependency of bias_attr def append_bias_op(self, input_var, dim_start=1, dim_end=None): """ Append bias operator and return its output. If the user does not set @@ -431,6 +135,7 @@ class LayerHelper(object): attrs={'axis': dim_start}) return tmp + #TODO (jiabin): reconstruct this in LayerObjHelper and avoid dependency of act def append_activation(self, input_var): act = self.kwargs.get('act', None) if act is None: @@ -445,10 +150,11 @@ class LayerHelper(object): if 'use_mkldnn' in self.kwargs: act['use_mkldnn'] = self.kwargs.get('use_mkldnn') act_type = act.pop('type') + tmp = input_var # NOTE(dzhwinter): some activation support inplace compution. # NOTE(minqiyang): currently, we don't support inplace in imperative mode - if not imperative_base.enabled() and core.IsInplace(act_type): + if not _in_imperative_mode() and core.IsInplace(act_type): tmp = input_var else: tmp = self.create_variable_for_type_inference(dtype=input_var.dtype) @@ -459,6 +165,7 @@ class LayerHelper(object): attrs=act) return tmp + #TODO (jiabin): should we remove this since it has never be used def _get_default_initializer(self, dtype): if dtype is None or dtype_is_floating(dtype) is True: return Xavier() @@ -466,6 +173,7 @@ class LayerHelper(object): # For integer and boolean types, initialize with all zeros return Constant() + #TODO (jiabin): reconstruct this in LayerObjHelper and avoid dependency of kwargs def is_instance(self, param_name, cls): param = self.kwargs.get(param_name, None) if not isinstance(param, cls): diff --git a/python/paddle/fluid/layer_helper_base.py b/python/paddle/fluid/layer_helper_base.py new file mode 100644 index 0000000000000000000000000000000000000000..d4b38137e4e014d0244fe206bd964a304a291345 --- /dev/null +++ b/python/paddle/fluid/layer_helper_base.py @@ -0,0 +1,381 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import copy +import numpy as np + +from .framework import Variable, default_main_program, default_startup_program, _in_imperative_mode, _current_expected_place +from . import unique_name +from .param_attr import ParamAttr, WeightNormParamAttr +from . import core + + +class LayerHelperBase(object): + def __init__(self, name, layer_type): + self._layer_type = layer_type + self._name = name + + @property + def name(self): + return self._name + + @property + def layer_type(self): + return self._layer_type + + @property + def main_program(self): + return default_main_program() + + @property + def startup_program(self): + return default_startup_program() + + def to_variable(self, value, block=None): + """convert value to variable + + Args: + value: value to be convert + block: the block of the variable + + Return Variable construct from value + """ + if isinstance(value, np.ndarray): + assert _in_imperative_mode( + ), "to_variable could only be called in imperative mode" + + if not block: + block = default_main_program().current_block() + py_var = Variable( + block, + type=core.VarDesc.VarType.LOD_TENSOR, + name=None, + shape=value.shape, + dtype=value.dtype) + var = py_var._ivar.value() + tensor = var.get_tensor() + tensor.set(value, _current_expected_place()) + return py_var + elif isinstance(value, Variable): + return value + + def _create_weight_normalize(self, attr, shape, dtype): + from .layers import elementwise_mul, elementwise_div, reshape + + # Remove these ops when LayerHelper and layers support indicating + # program and block. + def __norm_op(x, + out=None, + p=2, + dim=None, + keep_dim=False, + block=self.startup_program.global_block()): + if out is None: + out = block.create_var( + name=unique_name.generate(".".join( + [self.name, 'weight_norm_norm'])), + dtype=dtype, + persistable=False) + abs_out = block.create_var( + name=unique_name.generate(".".join( + [self.name, 'weight_norm_abs'])), + dtype=dtype, + persistable=False) + block.append_op( + type='abs', inputs={'X': x}, outputs={'Out': abs_out}) + pow_out = block.create_var( + name=unique_name.generate(".".join( + [self.name, 'weight_norm_pow'])), + dtype=dtype, + persistable=False) + block.append_op( + type='pow', + inputs={'X': abs_out}, + outputs={'Out': pow_out}, + attrs={'factor': float(p)}) + sum_out = block.create_var( + name=unique_name.generate(".".join( + [self.name, 'weight_norm_sum'])), + dtype=dtype, + persistable=False) + block.append_op( + type='reduce_sum', + inputs={'X': pow_out}, + outputs={'Out': sum_out}, + attrs={ + 'dim': dim, + 'keep_dim': keep_dim, + 'reduce_all': True if dim is None else False + }) + block.append_op( + type='pow', + inputs={'X': sum_out}, + outputs={'Out': out}, + attrs={'factor': 1. / p}) + return out + + def __reshape_op(x, + shape, + out=None, + block=self.startup_program.global_block()): + if out is None: + out = block.create_var( + name=unique_name.generate(".".join( + [self.name, 'weight_norm_reshape'])), + dtype=dtype, + persistable=False) + block.append_op( + type='reshape', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'shape': shape}) + return out + + def __transpose_op(x, + axis, + out=None, + block=self.startup_program.global_block()): + if out is None: + out = block.create_var( + name=unique_name.generate(".".join( + [self.name, 'weight_norm_transpose'])), + dtype=dtype, + persistable=False) + block.append_op( + type='transpose', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'axis': axis}) + return out + + def __norm_except_dim(x, + out=None, + dim=None, + block=self.startup_program.global_block()): + """Computes the norm over all dimensions except dim""" + if out is None: + out = block.create_var( + name=unique_name.generate(".".join( + [self.name, 'weight_norm_norm'])), + dtype=dtype, + persistable=False) + if dim is None: + __norm_op(x, out, dim=dim, block=block) + elif dim == 0: + out_shape = [x.shape[0]] + [1] * (len(x.shape) - 1) + reshape = __reshape_op(x, shape=[x.shape[0], -1], block=block) + norm = __norm_op(reshape, dim=1, block=block) + __reshape_op(norm, out=out, shape=out_shape, block=block) + elif dim == len(x.shape) - 1: + out_shape = [1] * (len(x.shape) - 1) + [x.shape[-1]] + reshape = __reshape_op(x, shape=[-1, x.shape[-1]], block=block) + norm = __norm_op(reshape, dim=0, block=block) + __reshape_op(norm, out=out, shape=out_shape, block=block) + else: + perm = list(range(len(x.shape))) + perm[0], perm[dim] = dim, 0 + transpose = __transpose_op(x, perm, block=block) + norm = __norm_op(transpose, dim=0, block=block) + __transpose_op(norm, perm, out=out, block=block) + return out + + def __weight_normalize(g, v, dim): + """Calculations for weight normalization""" + norm = __norm_except_dim( + v, dim=dim, block=self.main_program.current_block()) + scale = elementwise_div( + x=g, y=norm) # The shapes of g and norm are the same. + # Currently, elementwise_mul only support broadcast when the shape + # of y is a subset of the shape of x. Thus, we reshape y to squeeze + # to achive the subset. + w = elementwise_mul( + x=v, + y=scale if dim is None else reshape( + x=scale, shape=[v.shape[dim]]), + axis=-1 if dim is None else dim) + # To serialize the original parameter for inference, maybe a + # parameter rather than a variable should be returned. + return w + + g_param_attr = copy.deepcopy(attr) + g_param_attr.name = attr.name + '_g' + g_param_shape = [1] * len(shape) + if attr.dim is not None: + g_param_shape[attr.dim] = shape[attr.dim] + v_param_attr = copy.deepcopy(attr) + v_param_attr.name = attr.name + '_v' + v_param_shape = shape + + # Add to startup_program to initialize g and v. + # Try to reconstruct the initializer of w by initializing g and v. + # Set the initializers of g and v as below, then the distribution + # of w is the same as initializing w with the given initializer. + # For Data-Dependent Initialization, please compute the init-values + # of g and v in external and then feed the values to g and v by + # executing an extra program. + g_param = self.startup_program.global_block().create_parameter( + dtype=dtype, + shape=g_param_shape, + **g_param_attr._to_kwargs(with_initializer=False)) + v_param = self.startup_program.global_block().create_parameter( + dtype=dtype, + shape=v_param_shape, + **v_param_attr._to_kwargs(with_initializer=True)) + __norm_except_dim( + x=v_param, + out=g_param, + dim=attr.dim, + block=self.startup_program.global_block()) + + # Add weight normalization to main_program + g_param = self.main_program.global_block().create_parameter( + dtype=dtype, shape=g_param_shape, **g_param_attr._to_kwargs()) + v_param = self.main_program.global_block().create_parameter( + dtype=dtype, shape=v_param_shape, **v_param_attr._to_kwargs()) + w_param = __weight_normalize(g_param, v_param, dim=attr.dim) + return w_param + + # TODO: hide the func after we move the layers to Layers + def create_parameter(self, + attr, + shape, + dtype, + is_bias=False, + default_initializer=None): + """Create parameters for this layers. + + Args: + attr: [ParamAttr] should be the parameter attribute for this parameter + shape: shape of the paramter + dtype: data type of this parameter + is_bias: if this is a bias parameter + default_initializer: set the default initializer for this parameter + + Returns created parameter Variable. + """ + # Deepcopy the attr so that parameters can be shared in program + attr = copy.deepcopy(attr) + if attr is None: + attr = ParamAttr._to_attr(attr) + assert isinstance(attr, ParamAttr) + suffix = 'b' if is_bias else 'w' + if attr.name is None: + attr.name = unique_name.generate(".".join([self.name, suffix])) + + if default_initializer is None and attr.initializer is None: + if isinstance(dtype, core.VarDesc.VarType): + if dtype != core.VarDesc.VarType.FP32 and \ + dtype != core.VarDesc.VarType.FP64 and \ + dtype != core.VarDesc.VarType.FP16: + raise TypeError( + "Can not create parameter with default initializer when dtype is not float type. Set default_initializer to fit the parameter dtype!" + ) + else: + if not (dtype.startswith("float") or dtype == "double"): + raise TypeError( + "Can not create parameter with default initializer when dtype is not float type. Set default_initializer to fit the parameter dtype!" + ) + if is_bias: + attr._set_default_bias_initializer() + else: + attr._set_default_param_initializer() + else: + attr._set_default_initializer(default_initializer) + + # If weight normalization is set, insert extra parameters and ops. + # Refer to https://arxiv.org/pdf/1602.07868.pdf + if isinstance(attr, WeightNormParamAttr): + param = self._create_weight_normalize(attr, shape, dtype) + WeightNormParamAttr.params_with_weight_norm.append(param) + return param + if _in_imperative_mode(): + # In imperative mode, we want the returned parameter to be + # initialized so that it can be used imperatively. + return self.main_program.global_block().create_parameter( + dtype=dtype, + shape=shape, + **attr._to_kwargs(with_initializer=True)) + else: + self.startup_program.global_block().create_parameter( + dtype=dtype, + shape=shape, + **attr._to_kwargs(with_initializer=True)) + return self.main_program.global_block().create_parameter( + dtype=dtype, shape=shape, **attr._to_kwargs()) + + def create_variable_for_type_inference(self, dtype, stop_gradient=False): + """Create a temporary variable that should be type inferred layer. + + Note: + The default type will be set to LOD_TENSOR. However, when + the var is used as operator output, its type will be updated + based on operator's `VarTypeInference` implementation in + infer_var_type. + """ + return self.main_program.current_block().create_var( + name=unique_name.generate(".".join([self.name, 'tmp'])), + dtype=dtype, + type=core.VarDesc.VarType.LOD_TENSOR, + persistable=False, + stop_gradient=stop_gradient) + + def create_variable(self, *args, **kwargs): + """Create Variable for this layers. + Returns created Variable. + """ + return self.main_program.current_block().create_var(*args, **kwargs) + + def create_global_variable(self, persistable=False, *args, **kwargs): + """ + create global variable, note that there is no initializer for this global variable. + Args: + persistable(bool): True if it is a checkpoint value. + *args: See create_var's documentation + **kwargs: See create_var's documentation + + Returns(Variable): the created variable. + """ + return self.main_program.global_block().create_var( + *args, persistable=persistable, **kwargs) + + def create_or_get_global_variable(self, name, *args, **kwargs): + """ + Creates a global variable if not exists and returns the variable and + a boolean flag which is true when it is a new variable. + """ + if self.main_program.global_block().has_var(name): + return self.main_program.global_block().var(name), False + else: + return self.create_global_variable(name=name, *args, **kwargs), True + + def set_variable_initializer(self, var, initializer): + """Set target Variable's initializer + + Args: + var: target Variable + initializer: initializer to use + """ + assert isinstance(var, Variable) + if _in_imperative_mode(): + initializer(var, var.block) + else: + self.startup_program.global_block().create_var( + name=var.name, + type=var.type, + dtype=var.dtype, + shape=var.shape, + persistable=True, + initializer=initializer) diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index 3a6753b01f152f61b78a9f04f4fe32136c051a19..e7f704515df947f107df6d83a644530a0e468430 100644 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -506,9 +506,9 @@ class While(object): while loop control flow. Args: - cond (Variable): condition used to compare. + cond(Variable): condition used to compare. is_test(bool): A flag indicating whether execution is in test phase. - name (str): The name of this layer. + name(str): The name of this layer. Examples: .. code-block:: python @@ -589,7 +589,8 @@ class While(object): def lod_rank_table(x, level=0): - """LoD Rank Table Operator. Given an input variable **x** and a level number + """ + LoD Rank Table Operator. Given an input variable **x** and a level number of LoD, this layer creates a LodRankTable object. A LoDRankTable object contains a list of bi-element tuples. Each tuple consists of an index and a length, both of which are int type. Refering to specified level of LoD, @@ -847,7 +848,7 @@ def create_array(dtype): @templatedoc() -def less_than(x, y, force_cpu=None, cond=None, **ignored): +def less_than(x, y, force_cpu=None, cond=None): """ ${comment} @@ -883,10 +884,8 @@ def less_than(x, y, force_cpu=None, cond=None, **ignored): return cond -def equal(x, y, cond=None, **ignored): +def equal(x, y, cond=None): """ - **equal** - This layer returns the truth value of :math:`x == y` elementwise. Args: @@ -1458,7 +1457,6 @@ class DynamicRNN(object): Returns: The current timestep in the input sequence. - """ self._assert_in_rnn_block_("step_input") if not isinstance(x, Variable): @@ -1535,8 +1533,7 @@ class DynamicRNN(object): @signature_safe_contextmanager def block(self): """ - The block for user to define operators in RNN. See the class docstring - for more details. + The block for user to define operators in RNN. """ if self.status != DynamicRNN.BEFORE_RNN: raise ValueError("rnn.block() can only be invoke once") @@ -1640,8 +1637,7 @@ class DynamicRNN(object): dtype(str|numpy.dtype): The data type of the initialized memory. Returns: - the memory variable. - + The memory variable. """ self._assert_in_rnn_block_('memory') self._init_zero_idx_() @@ -1740,7 +1736,7 @@ class DynamicRNN(object): def output(self, *outputs): """ - mark the RNN output variables. + Mark the RNN output variables. Args: outputs: The output variables. @@ -1804,7 +1800,7 @@ def reorder_lod_tensor_by_rank(x, rank_table): return out -def is_empty(x, cond=None, **ignored): +def is_empty(x, cond=None): """ Test whether a Variable is empty. diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index 3b43ae0b9cb63a9f4708a680cb1021d74c197550..acdf619afa57309547198d3404865f24903e4e18 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -51,6 +51,7 @@ __all__ = [ 'yolov3_loss', 'box_clip', 'multiclass_nms', + 'box_decoder_and_assign', ] @@ -545,15 +546,16 @@ def yolov3_loss(x, TypeError: Attr ignore_thresh of yolov3_loss must be a float number Examples: - .. code-block:: python - - x = fluid.layers.data(name='x', shape=[255, 13, 13], dtype='float32') - gtbox = fluid.layers.data(name='gtbox', shape=[6, 5], dtype='float32') - gtlabel = fluid.layers.data(name='gtlabel', shape=[6, 1], dtype='int32') - anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326] - anchors = [0, 1, 2] - loss = fluid.layers.yolov3_loss(x=x, gtbox=gtbox, class_num=80, anchors=anchors, - ignore_thresh=0.5, downsample_ratio=32) + .. code-block:: python + + x = fluid.layers.data(name='x', shape=[255, 13, 13], dtype='float32') + gtbox = fluid.layers.data(name='gtbox', shape=[6, 5], dtype='float32') + gtlabel = fluid.layers.data(name='gtlabel', shape=[6, 1], dtype='int32') + anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326] + anchor_mask = [0, 1, 2] + loss = fluid.layers.yolov3_loss(x=x, gtbox=gtbox, gtlabel=gtlabel, anchors=anchors, + anchor_mask=anchor_mask, class_num=80, + ignore_thresh=0.7, downsample_ratio=32) """ helper = LayerHelper('yolov3_loss', **locals()) @@ -2220,3 +2222,65 @@ def multiclass_nms(bboxes, output.stop_gradient = True return output + + +@templatedoc() +def box_decoder_and_assign(prior_box, + prior_box_var, + target_box, + box_score, + box_clip, + name=None): + """ + ${comment} + Args: + prior_box(${prior_box_type}): ${prior_box_comment} + prior_box_var(${prior_box_var_type}): ${prior_box_var_comment} + target_box(${target_box_type}): ${target_box_comment} + box_score(${box_score_type}): ${box_score_comment} + box_clip(${box_clip_type}): ${box_clip_comment} + name(str|None): The name of this operator + Returns: + decode_box(Variable), output_assign_box(Variable): + + two variables: + + - decode_box(${decode_box_type}): ${decode_box_comment} + - output_assign_box(${output_assign_box_type}): ${output_assign_box_comment} + + Examples: + .. code-block:: python + + pb = fluid.layers.data( + name='prior_box', shape=[20, 4], dtype='float32') + pbv = fluid.layers.data( + name='prior_box_var', shape=[1, 4], dtype='float32') + loc = fluid.layers.data( + name='target_box', shape=[20, 4*81], dtype='float32') + scores = fluid.layers.data( + name='scores', shape=[20, 81], dtype='float32') + decoded_box, output_assign_box = fluid.layers.box_decoder_and_assign( + pb, pbv, loc, scores, 4.135) + + """ + helper = LayerHelper("box_decoder_and_assign", **locals()) + + decoded_box = helper.create_variable_for_type_inference( + dtype=prior_box.dtype) + output_assign_box = helper.create_variable_for_type_inference( + dtype=prior_box.dtype) + + helper.append_op( + type="box_decoder_and_assign", + inputs={ + "PriorBox": prior_box, + "PriorBoxVar": prior_box_var, + "TargetBox": target_box, + "BoxScore": box_score + }, + attrs={"box_clip": box_clip}, + outputs={ + "DecodeBox": decoded_box, + "OutputAssignBox": output_assign_box + }) + return decoded_box, output_assign_box diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py index b88be66906e806aeee55c1af6235a6fef9da7030..a9b391fd53a98dc05ee2d909a38dcf82cd5880ea 100644 --- a/python/paddle/fluid/layers/io.py +++ b/python/paddle/fluid/layers/io.py @@ -56,7 +56,10 @@ def data(name, Args: name(str): The name/alias of the function - shape(list): Tuple declaring the shape. + shape(list): Tuple declaring the shape. If :code:`append_batch_size` is + True and there is no -1 inside :code:`shape`, it should be + considered as the shape of the each sample. Otherwise, it + should be considered as the shape of the batched data. append_batch_size(bool): 1. If true, it prepends -1 to the shape. For example if shape=[1], the resulting shape is [-1, 1]. diff --git a/python/paddle/fluid/layers/layer_function_generator.py b/python/paddle/fluid/layers/layer_function_generator.py index 09b1b30216b03e71253ca8da1d462db897e1a607..da6c24100452ba26896c8e7c06a76d874b3f51a2 100644 --- a/python/paddle/fluid/layers/layer_function_generator.py +++ b/python/paddle/fluid/layers/layer_function_generator.py @@ -24,7 +24,7 @@ from ..framework import OpProtoHolder, Variable, core, convert_np_dtype_to_dtype from ..layer_helper import LayerHelper __all__ = [ - 'deprecated', 'generate_layer_fn', 'generate_layer_fn_noattr', 'autodoc', + 'deprecated', 'generate_layer_fn', 'generate_activation_fn', 'autodoc', 'templatedoc' ] @@ -89,6 +89,9 @@ def _generate_doc_string_(op_proto, additional_args_lines=None): buf.write('\n') skip_attrs = OpProtoHolder.generated_op_attr_names() + # attr use_mkldnn and is_test also should not be visible to users. + skip_attrs.add("use_mkldnn") + skip_attrs.add("is_test") for each_attr in op_proto.attrs: if each_attr.name in skip_attrs: @@ -226,7 +229,7 @@ def generate_layer_fn(op_type): return func -def generate_layer_fn_noattr(op_type): +def generate_activation_fn(op_type): """Register the Python layer for an Operator without Attribute. Args: @@ -246,6 +249,7 @@ def generate_layer_fn_noattr(op_type): func.__name__ = op_type func.__doc__ = _generate_doc_string_(op_proto) + return func diff --git a/python/paddle/fluid/layers/learning_rate_scheduler.py b/python/paddle/fluid/layers/learning_rate_scheduler.py index 617704a53138bd081a2ebe318de0c89e8db4aa96..378aeb37605f1971da3fe4a926e4b36b8eae2ca4 100644 --- a/python/paddle/fluid/layers/learning_rate_scheduler.py +++ b/python/paddle/fluid/layers/learning_rate_scheduler.py @@ -28,10 +28,12 @@ from . import ops from . import tensor from ..initializer import init_on_cpu from ..framework import default_main_program, Parameter, unique_name, name_scope +import math __all__ = [ 'exponential_decay', 'natural_exp_decay', 'inverse_time_decay', - 'polynomial_decay', 'piecewise_decay', 'noam_decay', 'append_LARS' + 'polynomial_decay', 'piecewise_decay', 'noam_decay', 'append_LARS', + 'cosine_decay' ] @@ -307,6 +309,41 @@ def piecewise_decay(boundaries, values): return lr +def cosine_decay(learning_rate, step_each_epoch, epochs): + """ + Applies cosine decay to the learning rate. + + when training a model, it is often recommended to lower the learning rate as the + training progresses. By using this function, the learning rate will be decayed by + following cosine decay strategy. + + decayed_lr = learning_rate * 0.5 * (math.cos(epoch * math.pi / epochs) + 1) + + Args: + learning_rate(Variable|float): The initial learning rate. + step_each_epoch(int): the number of steps in an epoch. + epochs(int): the number of epochs. + + Returns: + Variable: The decayed learning rate. + + Examples: + + ..code-block:: python + + base_lr = 0.1 + lr = fluid.layers.cosine_decay( + learning_rate = base_lr, step_each_epoch=10000, epochs=120) + """ + with default_main_program()._lr_schedule_guard(): + global_step = _decay_step_counter() + + cur_epoch = ops.floor(global_step / step_each_epoch) + decayed_lr = learning_rate * 0.5 * ( + ops.cos(cur_epoch * math.pi / epochs) + 1) + return decayed_lr + + def append_LARS(params_grads, learning_rate, weight_decay): """ Applies LARS (LAYER-WISE ADAPTIVE RATE SCALING) to learning rate for diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 0e4b5aadc0b0d7e87ea1cfb8e18339fe211e1eef..5b4f1efe479b12cb8ec390b8753d097764d70860 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -87,12 +87,14 @@ __all__ = [ 'transpose', 'im2sequence', 'nce', + 'sampled_softmax_with_cross_entropy', 'hsigmoid', 'beam_search', 'row_conv', 'multiplex', 'layer_norm', 'group_norm', + 'spectral_norm', 'softmax_with_cross_entropy', 'smooth_l1', 'one_hot', @@ -668,7 +670,11 @@ def dynamic_lstmp(input, candidate_activation='tanh', proj_activation='tanh', dtype='float32', - name=None): + name=None, + h_0=None, + c_0=None, + cell_clip=None, + proj_clip=None): """ **Dynamic LSTMP Layer** @@ -785,6 +791,17 @@ def dynamic_lstmp(input, dtype(str): Data type. Choices = ["float32", "float64"], default "float32". name(str|None): A name for this layer(optional). If set None, the layer will be named automatically. + h_0(Variable): The initial hidden state is an optional input, default is zero. + This is a tensor with shape (N x D), where N is the + batch size and D is the projection size. + c_0(Variable): The initial cell state is an optional input, default is zero. + This is a tensor with shape (N x D), where N is the + batch size. `h_0` and `c_0` can be NULL but only at the same time. + cell_clip(float): If provided the cell state is clipped + by this value prior to the cell output activation. + proj_clip(float): If `num_proj > 0` and `proj_clip` is + provided, then the projected values are clipped elementwise to within + `[-proj_clip, proj_clip]`. Returns: tuple: A tuple of two output variable: the projection of hidden state, \ @@ -831,25 +848,41 @@ def dynamic_lstmp(input, batch_hidden = helper.create_variable_for_type_inference(dtype) batch_gate = helper.create_variable_for_type_inference(dtype) batch_cell_pre_act = helper.create_variable_for_type_inference(dtype) + inputs = { + 'Input': input, + 'Weight': weight, + 'ProjWeight': proj_weight, + 'Bias': bias + } + batch_size = input.shape[0] + if h_0: + assert h_0.shape == (batch_size, proj_size), \ + 'The shape of h0 should be (batch_size, %d)' % proj_size + inputs['H0'] = h_0 + if c_0: + assert c_0.shape == (batch_size, size), \ + 'The shape of c0 should be (batch_size, %d)' % size + inputs['C0'] = c_0 + + if cell_clip: + assert cell_clip >= 0, "cell_clip should not be negtive." + if proj_clip: + assert proj_clip >= 0, "proj_clip should not be negtive." helper.append_op( type='lstmp', - inputs={ - 'Input': input, - 'Weight': weight, - 'ProjWeight': proj_weight, - 'Bias': bias - }, + inputs=inputs, outputs={ 'Projection': projection, 'Cell': cell, - 'OrderedP0': ordered_proj0, 'BatchHidden': batch_hidden, 'BatchGate': batch_gate, 'BatchCellPreAct': batch_cell_pre_act }, attrs={ 'use_peepholes': use_peepholes, + 'cell_clip': cell_clip, + 'proj_clip': proj_clip, 'is_reverse': is_reverse, 'gate_activation': gate_activation, 'cell_activation': cell_activation, @@ -1735,7 +1768,7 @@ def sequence_softmax(input, use_cudnn=False, name=None): return softmax_out -def softmax(input, use_cudnn=True, name=None): +def softmax(input, use_cudnn=False, name=None): """ The input of the softmax operator is a tensor of any rank. The output tensor has the same shape as the input. @@ -1763,7 +1796,8 @@ def softmax(input, use_cudnn=True, name=None): Args: input (Variable): The input variable. use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn \ - library is installed. + library is installed. To improve numerical stablity, set use_cudnn to \ + False by default. Default: False name (str|None): A name for this layer(optional). If set None, the layer will be named automatically. Default: None. @@ -2441,7 +2475,7 @@ def pool2d(input, data = fluid.layers.data( name='data', shape=[3, 32, 32], dtype='float32') - conv2d = fluid.layers.pool2d( + pool2d = fluid.layers.pool2d( input=data, pool_size=2, pool_type='max', @@ -2490,6 +2524,7 @@ def pool2d(input, return pool_out +@templatedoc() def pool3d(input, pool_size=-1, pool_type="max", @@ -2501,13 +2536,19 @@ def pool3d(input, name=None, exclusive=True): """ - This function adds the operator for pooling in 3-dimensions, using the - pooling configurations mentioned in input parameters. + ${comment} Args: - input (Variable): ${input_comment} - pool_size (int): ${ksize_comment} - pool_type (str): ${pooling_type_comment} + input (Variable): The input tensor of pooling operator. The format of + input tensor is NCDHW, where N is batch size, C is + the number of channels, D is the depth of the feature, + H is the height of the feature, and W is the width + of the feature. + pool_size (int|list|tuple): The pool kernel size. If pool kernel size + is a tuple or list, it must contain three integers, + (pool_size_Depth, pool_size_Height, pool_size_Width). + Otherwise, the pool kernel size will be the cube of an int. + pool_type (string): ${pooling_type_comment} pool_stride (int): stride of the pooling layer. pool_padding (int): padding size. global_pooling (bool): ${global_pooling_comment} @@ -2520,6 +2561,19 @@ def pool3d(input, Returns: Variable: output of pool3d layer. + + Examples: + + .. code-block:: python + + data = fluid.layers.data( + name='data', shape=[3, 32, 32, 32], dtype='float32') + pool3d = fluid.layers.pool3d( + input=data, + pool_size=2, + pool_type='max', + pool_stride=1, + global_pooling=False) """ if pool_type not in ["max", "avg"]: raise ValueError( @@ -2569,7 +2623,27 @@ def adaptive_pool2d(input, require_index=False, name=None): """ - ${comment} + **Adaptive Pool2d Operator** + The adaptive_pool2d operation calculates the output based on the input, pool_size, + pool_type parameters. Input(X) and output(Out) are in NCHW format, where N is batch + size, C is the number of channels, H is the height of the feature, and W is + the width of the feature. Parameters(pool_size) should contain two elements which + represent height and width, respectively. Also the H and W dimensions of output(Out) + is same as Parameter(pool_size). + + For average adaptive pool2d: + + .. math:: + + hstart &= floor(i * H_{in} / H_{out}) + + hend &= ceil((i + 1) * H_{in} / H_{out}) + + wstart &= floor(j * W_{in} / W_{out}) + + wend &= ceil((j + 1) * W_{in} / W_{out}) + + Output(i ,j) &= \\frac{sum(Input[hstart:hend, wstart:wend])}{(hend - hstart) * (wend - wstart)} Args: input (Variable): The input tensor of pooling operator. The format of @@ -2579,8 +2653,8 @@ def adaptive_pool2d(input, pool_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, it must contain two integers, (pool_size_Height, pool_size_Width). pool_type: ${pooling_type_comment} - require_index (bool): If true, the index of max pooling point along with outputs. - it cannot be set in average pooling type. + require_index (bool): If true, the index of max pooling point will be returned along + with outputs. It cannot be set in average pooling type. name (str|None): A name for this layer(optional). If set None, the layer will be named automatically. @@ -2661,18 +2735,42 @@ def adaptive_pool3d(input, require_index=False, name=None): """ - ${comment} + **Adaptive Pool3d Operator** + The adaptive_pool3d operation calculates the output based on the input, pool_size, + pool_type parameters. Input(X) and output(Out) are in NCDHW format, where N is batch + size, C is the number of channels, D is the depth of the feature, H is the height of + the feature, and W is the width of the feature. Parameters(pool_size) should contain + three elements which represent height and width, respectively. Also the D, H and W + dimensions of output(Out) is same as Parameter(pool_size). + + For average adaptive pool3d: + + .. math:: + + dstart &= floor(i * D_{in} / D_{out}) + + dend &= ceil((i + 1) * D_{in} / D_{out}) + + hstart &= floor(j * H_{in} / H_{out}) + + hend &= ceil((j + 1) * H_{in} / H_{out}) + + wstart &= floor(k * W_{in} / W_{out}) + + wend &= ceil((k + 1) * W_{in} / W_{out}) + + Output(i ,j, k) &= \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{(dend - dstart) * (hend - hstart) * (wend - wstart)} Args: input (Variable): The input tensor of pooling operator. The format of - input tensor is NCHW, where N is batch size, C is - the number of channels, H is the height of the - feature, and W is the width of the feature. + input tensor is NCDHW, where N is batch size, C is + the number of channels, D is the depth of the feature, + H is the height of the feature, and W is the width of the feature. pool_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, - it must contain two integers, (Depth, Height, Width). + it must contain three integers, (Depth, Height, Width). pool_type: ${pooling_type_comment} - require_index (bool): If true, the index of max pooling point along with outputs. - it cannot be set in average pooling type. + require_index (bool): If true, the index of max pooling point will be returned along + with outputs. It cannot be set in average pooling type. name (str|None): A name for this layer(optional). If set None, the layer will be named automatically. @@ -2709,7 +2807,7 @@ def adaptive_pool3d(input, name='data', shape=[3, 32, 32], dtype='float32') pool_out, mask = fluid.layers.adaptive_pool3d( input=data, - pool_size=[3, 3], + pool_size=[3, 3, 3], pool_type='avg') """ if pool_type not in ["max", "avg"]: @@ -2930,6 +3028,7 @@ def batch_norm(input, "momentum": momentum, "epsilon": epsilon, "is_test": is_test, + "data_layout": data_layout, "use_mkldnn": False, "fuse_with_relu": fuse_with_relu, "use_global_stats": use_global_stats @@ -2944,7 +3043,6 @@ def data_norm(input, param_attr=None, data_layout='NCHW', in_place=False, - use_mkldnn=False, name=None, moving_mean_name=None, moving_variance_name=None, @@ -2978,7 +3076,6 @@ def data_norm(input, param_attr(ParamAttr): The parameter attribute for Parameter `scale`. data_layout(string, default NCHW): NCHW|NHWC in_place(bool, Default False): Make the input and output of batch norm reuse memory. - use_mkldnn(bool, Default false): ${use_mkldnn_comment} name(string, Default None): A name for this layer(optional). If set None, the layer will be named automatically. moving_mean_name(string, Default None): The name of moving_mean which store the global Mean. @@ -3059,8 +3156,7 @@ def data_norm(input, outputs={"Y": data_norm_out, "Means": means, "Scales": scales}, - attrs={"epsilon": epsilon, - "use_mkldnn": use_mkldnn}) + attrs={"epsilon": epsilon}) return helper.append_activation(data_norm_out) @@ -3235,7 +3331,7 @@ def group_norm(input, # create output mean_out = helper.create_variable(dtype=dtype, stop_gradient=True) variance_out = helper.create_variable(dtype=dtype, stop_gradient=True) - group_norm_out = helper.create_variable(dtype) + group_norm_out = helper.create_variable(dtype=dtype) helper.append_op( type="group_norm", @@ -3251,6 +3347,98 @@ def group_norm(input, return helper.append_activation(group_norm_out) +@templatedoc() +def spectral_norm(weight, dim=0, power_iters=1, eps=1e-12, name=None): + """ + **Spectral Normalization Layer** + + This layer calculates the spectral normalization value of weight parameters of + fc, conv1d, conv2d, conv3d layers which should be 2-D, 3-D, 4-D, 5-D + Parameters. Calculations are showed as follows. + + Step 1: + Generate vector U in shape of [H], and V in shape of [W]. + While H is the :attr:`dim` th dimension of the input weights, + and W is the product result of remaining dimensions. + + Step 2: + :attr:`power_iters` shoule be a positive interger, do following + calculations with U and V for :attr:`power_iters` rounds. + + .. math:: + + \mathbf{v} := \\frac{\mathbf{W}^{T} \mathbf{u}}{\|\mathbf{W}^{T} \mathbf{u}\|_2} + + \mathbf{u} := \\frac{\mathbf{W}^{T} \mathbf{v}}{\|\mathbf{W}^{T} \mathbf{v}\|_2} + + Step 3: + Calculate :math:`\sigma(\mathbf{W})` and normalize weight values. + + .. math:: + + \sigma(\mathbf{W}) = \mathbf{u}^{T} \mathbf{W} \mathbf{v} + + \mathbf{W} = \\frac{\mathbf{W}}{\sigma(\mathbf{W})} + + + Refer to `Spectral Normalization `_ . + + Args: + weight(${weight_type}): ${weight_comment} + dim(int): ${dim_comment} + power_iters(int): ${power_iters_comment} + eps(float): ${eps_comment} + name (str): The name of this layer. It is optional. + + Returns: + Variable: A tensor variable of weight parameters after spectral normalization. + + Examples: + + >>> weight = fluid.layers.data(name='weight', shape=[8, 32, 32], + >>> dtype='float32') + >>> x = fluid.layers.spectral_norm(weight=data, dim=1, power_iters=2) + """ + helper = LayerHelper('spectral_norm', **locals()) + dtype = weight.dtype + + # create intput and parameters + inputs = {'Weight': weight} + input_shape = weight.shape + h = input_shape[dim] + w = np.prod(input_shape) // h + + u = helper.create_parameter( + attr=ParamAttr(), + shape=[h], + dtype=dtype, + default_initializer=Normal(0., 1.)) + u.stop_gradient = True + inputs['U'] = u + v = helper.create_parameter( + attr=ParamAttr(), + shape=[w], + dtype=dtype, + default_initializer=Normal(0., 1.)) + inputs['V'] = v + v.stop_gradient = True + + # create output + out = helper.create_variable(dtype=dtype) + + helper.append_op( + type="spectral_norm", + inputs=inputs, + outputs={"Out": out, }, + attrs={ + "dim": dim, + "power_iters": power_iters, + "eps": eps, + }) + + return out + + def conv2d_transpose(input, num_filters, output_size=None, @@ -4645,11 +4833,6 @@ def matmul(x, y, transpose_x=False, transpose_y=False, alpha=1.0, name=None): """ def __check_input(x, y): - if len(y.shape) > len(x.shape): - raise ValueError( - "Invalid inputs for matmul. " - "x's rank should be always greater than or equal to y'rank.") - x_shape = list(x.shape) y_shape = list(y.shape) if len(x_shape) == 1: @@ -4665,10 +4848,11 @@ def matmul(x, y, transpose_x=False, transpose_y=False, alpha=1.0, name=None): if x_shape[-1] != y_shape[-2]: raise ValueError("Invalid inputs for matmul.") - if len(y_shape) > 2: + if len(y_shape) > 2 and len(x_shape) > 2: for i, dim_x in enumerate(x_shape[:-2]): if dim_x != y_shape[i]: - raise ValueError("Invalid inputs for matmul.") + raise ValueError("Invalid inputs for matmul. x(%s), y(%s)" % + (x.shape, y.shape)) __check_input(x, y) @@ -5659,7 +5843,7 @@ def softmax_with_cross_entropy(logits, label, soft_label=False, ignore_index=kIgnoreIndex, - numeric_stable_mode=False, + numeric_stable_mode=True, return_softmax=False): """ **Softmax With Cross Entropy Operator.** @@ -5723,7 +5907,7 @@ def softmax_with_cross_entropy(logits, When soft_label is True or CPU is used, the algorithm is always numerically stable. Note that the speed may be slower when use - stable algorithm. Default: False + stable algorithm. Default: True return_softmax (bool): A flag indicating whether to return the softmax along with the cross entropy loss. Default: False @@ -5764,6 +5948,132 @@ def softmax_with_cross_entropy(logits, return loss +def sampled_softmax_with_cross_entropy(logits, + label, + num_samples, + num_true=1, + remove_accidental_hits=True, + use_customized_samples=False, + customized_samples=None, + customized_probabilities=None, + seed=0): + """ + **Sampled Softmax With Cross Entropy Operator.** + + Cross entropy loss with sampled softmax is used as the output layer for + larger output classes extensively. This operator samples a number of samples + for all examples, and computes the softmax normalized values for each + row of the sampled tensor, after which cross-entropy loss is computed. + + Because this operator performs a softmax on logits internally, it expects + unscaled logits. This operator should not be used with the output of + softmax operator since that would produce incorrect results. + + For examples with T true labels (T >= 1), we assume that each true label has + a probability of 1/T. For each sample, S samples are generated using a + log uniform distribution. True labels are concatenated with these samples to + form T + S samples for each example. So, assume the shape of logits is + [N x K], the shape for samples is [N x (T+S)]. For each sampled label, a + probability is calculated, which corresponds to the Q(y|x) in + [Jean et al., 2014](http://arxiv.org/abs/1412.2007). + + Logits are sampled according to the sampled labels. Then if + remove_accidental_hits is True, if a sample[i, j] accidentally hits true + labels, then the corresponding sampled_logits[i, j] is minus by 1e20 to + make its softmax result close to zero. Then sampled logits are subtracted by + logQ(y|x), these sampled logits and re-indexed labels are used to compute + a softmax with cross entropy. + + Args: + logits (Variable): The unscaled log probabilities, which is a 2-D tensor + with shape [N x K]. N is the batch_size, and K is the class number. + label (Variable): The ground truth which is a 2-D tensor. Label is a + Tensor with shape [N x T], where T is the number of true + labels per example. + num_samples (int): The number for each example, num_samples should be + less than the number of class. + num_true(int): The number of target classes per training example. + remove_accidental_hits (bool): A flag indicating whether to remove + accidental hits when sampling. If True and if a sample[i, j] + accidentally hits true labels, then the corresponding + sampled_logits[i, j] is minus by 1e20 to make its softmax result + close to zero. Default is True. + use_customized_samples (bool): Whether to use custom samples and probabities to sample + logits. + customized_samples (Variable): User defined samples, which is a 2-D tensor + with shape [N, T + S]. S is the num_samples, and T is the number of true + labels per example. + customized_probabilities (Variable): User defined probabilities of samples, + a 2-D tensor which has the same shape with customized_samples. + seed (int): The random seed for generating random number, which is used + in the process of sampling. Default is 0. + + Returns: + Variable: Return the cross entropy loss which is a 2-D tensor with shape + [N x 1]. + + Examples: + .. code-block:: python + + logits = fluid.layers.data(name='data', shape=[256], dtype='float32') + label = fluid.layers.data(name='label', shape=[5], dtype='int64') + fc = fluid.layers.fc(input=data, size=100) + out = fluid.layers.sampled_softmax_with_cross_entropy( + logits=fc, label=label, num_samples=25) + """ + helper = LayerHelper('sample_logits', **locals()) + samples = helper.create_variable_for_type_inference(dtype='int64') + probabilities = helper.create_variable_for_type_inference( + dtype=logits.dtype) + sampled_logits \ + = helper.create_variable_for_type_inference(dtype=logits.dtype) + sampled_label = helper.create_variable_for_type_inference(dtype='int64') + sampled_softlabel = helper.create_variable_for_type_inference( + dtype=logits.dtype) + + helper.append_op( + type='sample_logits', + inputs={ + 'Logits': logits, + 'Labels': label, + 'CustomizedSamples': customized_samples, + 'CustomizedProbabilities': customized_probabilities + }, + outputs={ + 'Samples': samples, + 'Probabilities': probabilities, + 'SampledLabels': sampled_label, + 'SampledLogits': sampled_logits + }, + attrs={ + 'use_customized_samples': use_customized_samples, + 'uniq': True, + 'remove_accidental_hits': remove_accidental_hits, + 'num_samples': num_samples, + 'seed': seed + }) + loss = helper.create_variable_for_type_inference(dtype=logits.dtype) + softmax = helper.create_variable_for_type_inference(dtype=logits.dtype) + helper.append_op( + type='one_hot', + inputs={'X': sampled_label}, + attrs={'depth': num_samples + 1}, + outputs={'Out': sampled_softlabel}) + + helper.append_op( + type='softmax_with_cross_entropy', + inputs={'Logits': sampled_logits, + 'Label': sampled_softlabel}, + outputs={'Softmax': softmax, + 'Loss': loss}, + attrs={ + 'soft_label': True, + 'ignore_index': False, + 'numeric_stable_mode': False + }) + return loss / num_true + + def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None): """ This layer computes the smooth L1 loss for Variable :attr:`x` and :attr:`y`. @@ -5935,13 +6245,10 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None): than :attr:`shape`. act (str): The non-linear activation to be applied to the reshaped tensor variable. - inplace(bool): Must use :attr:`False` if :attr:`x` is used in multiple - operators. If this flag is set :attr:`True`, reuse input - :attr:`x` to reshape, which will change the shape of - tensor variable :attr:`x` and might cause errors when - :attr:`x` is used in multiple operators. If :attr:`False`, - preserve the shape :attr:`x` and create a new output tensor - variable whose data is copied from input x but reshaped. + inplace(bool): If ``inplace`` is `True`, the input and output of ``layers.reshape`` + are the same variable, otherwise, the input and output of + ``layers.reshape`` are different variables. Note that if :attr:`x` + is more than one layer's input, ``inplace`` must be :attr:`False`. name (str): The name of this layer. It is optional. Returns: @@ -6626,56 +6933,58 @@ def image_resize(input, Example: - For scale: - - if align_corners = True && out_size > 1 : + .. code-block:: text - scale_factor = (in_size-1.0)/(out_size-1.0) - - else: + For scale: - scale_factor = float(in_size/out_size) - - - Nearest neighbor interpolation: - - if: - align_corners = False + if align_corners = True && out_size > 1 : - input : (N,C,H_in,W_in) - output: (N,C,H_out,W_out) where: + scale_factor = (in_size-1.0)/(out_size-1.0) + + else: + + scale_factor = float(in_size/out_size) + + + Nearest neighbor interpolation: + + if: + align_corners = False - H_out = \left \lfloor {H_{in} * scale_{}factor}} \right \rfloor - W_out = \left \lfloor {W_{in} * scale_{}factor}} \right \rfloor + input : (N,C,H_in,W_in) + output: (N,C,H_out,W_out) where: - else: - align_corners = True + H_out = floor (H_{in} * scale_{factor}) + W_out = floor (W_{in} * scale_{factor}) - input : (N,C,H_in,W_in) - output: (N,C,H_out,W_out) where: + else: + align_corners = True - H_out = round(H_{in} * scale_{factor}) - W_out = round(W_{in} * scale_{factor}) + input : (N,C,H_in,W_in) + output: (N,C,H_out,W_out) where: - Bilinear interpolation: + H_out = round(H_{in} * scale_{factor}) + W_out = round(W_{in} * scale_{factor}) - if: - align_corners = False , align_mode = 0 - - input : (N,C,H_in,W_in) - output: (N,C,H_out,W_out) where: - - H_out = (H_{in}+0.5) * scale_{factor} - 0.5 - W_out = (W_{in}+0.5) * scale_{factor} - 0.5 + Bilinear interpolation: + + if: + align_corners = False , align_mode = 0 + + input : (N,C,H_in,W_in) + output: (N,C,H_out,W_out) where: + + H_out = (H_{in}+0.5) * scale_{factor} - 0.5 + W_out = (W_{in}+0.5) * scale_{factor} - 0.5 - else: - - input : (N,C,H_in,W_in) - output: (N,C,H_out,W_out) where: + else: + + input : (N,C,H_in,W_in) + output: (N,C,H_out,W_out) where: - H_out = H_{in} * scale_{factor} - W_out = W_{in} * scale_{factor} + H_out = H_{in} * scale_{factor} + W_out = W_{in} * scale_{factor} For details of nearest neighbor interpolation, please refer to Wikipedia: https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation. @@ -6830,41 +7139,39 @@ def resize_bilinear(input, Align_corners and align_mode are optinal parameters,the calculation method of interpolation can be selected by them. - - Align_corners and align_mode are optinal parameters,the calculation method - of interpolation can be selected by them. - Example: - For scale: - - if align_corners = True && out_size > 1 : + .. code-block:: text - scale_factor = (in_size-1.0)/(out_size-1.0) - - else: + For scale: - scale_factor = float(in_size/out_size) + if align_corners = True && out_size > 1 : - Bilinear interpolation: + scale_factor = (in_size-1.0)/(out_size-1.0) + + else: + + scale_factor = float(in_size/out_size) - if: - align_corners = False , align_mode = 0 - - input : (N,C,H_in,W_in) - output: (N,C,H_out,W_out) where: - - H_out = (H_{in}+0.5) * scale_{factor} - 0.5 - W_out = (W_{in}+0.5) * scale_{factor} - 0.5 + Bilinear interpolation: + + if: + align_corners = False , align_mode = 0 + + input : (N,C,H_in,W_in) + output: (N,C,H_out,W_out) where: + + H_out = (H_{in}+0.5) * scale_{factor} - 0.5 + W_out = (W_{in}+0.5) * scale_{factor} - 0.5 - else: + else: - input : (N,C,H_in,W_in) - output: (N,C,H_out,W_out) where: + input : (N,C,H_in,W_in) + output: (N,C,H_out,W_out) where: - H_out = H_{in} * scale_{factor} - W_out = W_{in} * scale_{factor} + H_out = H_{in} * scale_{factor} + W_out = W_{in} * scale_{factor} @@ -6916,42 +7223,44 @@ def resize_nearest(input, align_corners=True): """ Resize input by performing nearest neighbor interpolation in both the - 3rd dimention(in height direction) and the 4th dimention(in width - direction) based on given output shape which specified by actual_shape, + 3rd dimension(in height direction) and the 4th dimension(in width + direction) based on given output shape which is specified by actual_shape, out_shape and scale in priority order. Example: - For scale: - - if align_corners = True && out_size > 1 : + .. code-block:: text + + For scale: + + if align_corners = True && out_size > 1 : - scale_factor = (in_size-1.0)/(out_size-1.0) - - else: + scale_factor = (in_size-1.0)/(out_size-1.0) + + else: + + scale_factor = float(in_size/out_size) + - scale_factor = float(in_size/out_size) - - - Nearest neighbor interpolation: - - if: - align_corners = False + Nearest neighbor interpolation: + + if: + align_corners = False - input : (N,C,H_in,W_in) - output: (N,C,H_out,W_out) where: + input : (N,C,H_in,W_in) + output: (N,C,H_out,W_out) where: - H_out = \left \lfloor {H_{in} * scale_{}factor}} \right \rfloor - W_out = \left \lfloor {W_{in} * scale_{}factor}} \right \rfloor + H_out = floor(H_{in} * scale_{factor}) + W_out = floor(W_{in} * scale_{factor}) - else: - align_corners = True + else: + align_corners = True - input : (N,C,H_in,W_in) - output: (N,C,H_out,W_out) where: + input : (N,C,H_in,W_in) + output: (N,C,H_out,W_out) where: - H_out = round(H_{in} * scale_{factor}) - W_out = round(W_{in} * scale_{factor}) + H_out = round(H_{in} * scale_{factor}) + W_out = round(W_{in} * scale_{factor}) For details of nearest neighbor interpolation, please refer to Wikipedia: @@ -8334,6 +8643,46 @@ def stack(x, axis=0): If :code:`axis` < 0, it would be replaced with :code:`axis+rank(x[0])+1`. If :code:`axis` is None, it would be replaced with 0. + For Example: + + .. code-block:: text + + Case 1: + Input: + x[0].data = [ [1.0 , 2.0 ] ] + x[0].dims = [1, 2] + x[1].data = [ [3.0 , 4.0 ] ] + x[1].dims = [1, 2] + x[2].data = [ [5.0 , 6.0 ] ] + x[2].dims = [1, 2] + + Attrs: + axis = 0 + + Output: + Out.data =[ [ [1.0, 2.0] ], + [ [3.0, 4.0] ], + [ [5.0, 6.0] ] ] + Out.dims = [3, 1, 2] + + Case 2: + Given + x[0].data = [ [1.0 , 2.0 ] ] + x[0].dims = [1, 2] + x[1].data = [ [3.0 , 4.0 ] ] + x[1].dims = [1, 2] + x[2].data = [ [5.0 , 6.0 ] ] + x[2].dims = [1, 2] + + Attrs: + axis = 1 or axis = -2 + + Output: + Out.data =[ [ [1.0, 2.0] + [3.0, 4.0] + [5.0, 6.0] ] ] + Out.dims = [1, 3, 2] + Args: x (Variable|list(Variable)|tuple(Variable)): Input variables. axis (int|None): The axis along which all inputs are stacked. @@ -8706,16 +9055,17 @@ def slice(input, axes, starts, ends): return out -@templatedoc() def shape(input): """ - ${comment} + **Shape Layer** + + Get the shape of the input. Args: - input (Variable): ${input_comment} + input (Variable): The input variable. Returns: - out (Variable): ${out_comment} + Variable: The shape of the input variable. Examples: .. code-block:: python @@ -9684,6 +10034,7 @@ def teacher_student_sigmoid_loss(input, Examples: .. code-block:: python + cost = fluid.layers.teacher_student_sigmoid_loss(input=similarity, label=label) """ helper = LayerHelper('teacher_student_sigmoid_loss', **locals()) diff --git a/python/paddle/fluid/layers/ops.py b/python/paddle/fluid/layers/ops.py index 3dcf9dc06998be9c38a48f18075cbf99f3dccb1a..4381727a090bdb1d13fb692e64e8d6fb69bba0d7 100644 --- a/python/paddle/fluid/layers/ops.py +++ b/python/paddle/fluid/layers/ops.py @@ -14,7 +14,7 @@ from __future__ import print_function import os -from .layer_function_generator import generate_layer_fn, generate_layer_fn_noattr +from .layer_function_generator import generate_layer_fn, generate_activation_fn from .. import core from ..framework import convert_np_dtype_to_dtype_ @@ -53,14 +53,35 @@ globals()['_elementwise_div'] = generate_layer_fn('elementwise_div') __all__ += __activations_noattr__ for _OP in set(__activations_noattr__): - globals()[_OP] = generate_layer_fn_noattr(_OP) + globals()[_OP] = generate_activation_fn(_OP) __all__ += ["uniform_random"] _uniform_random_ = generate_layer_fn('uniform_random') -def uniform_random(shape, dtype=None, min=None, max=None, seed=None): +def uniform_random(shape, dtype='float32', min=-1.0, max=1.0, seed=0): + """ + This operator initializes a variable with random values sampled from a + uniform distribution. The random result is in set [min, max]. + + Args: + shape (list): The shape of output variable. + dtype(np.dtype|core.VarDesc.VarType|str): The type of data, such as + float32, float64 etc. Default: float32. + min (float): Minimum value of uniform random. Default -1.0. + max (float): Maximun value of uniform random. Default 1.0. + seed (int): Random seed used for generating samples. 0 means use a + seed generated by the system. Note that if seed is not 0, this + operator will always generate the same random numbers every time. + Default 0. + + Examples: + .. code-block:: python + + result = fluid.layers.uniform_random(shape=[32, 784]) + """ + locals_var = locals().keys() if not isinstance(dtype, core.VarDesc.VarType): dtype = convert_np_dtype_to_dtype_(dtype) @@ -72,12 +93,6 @@ def uniform_random(shape, dtype=None, min=None, max=None, seed=None): return _uniform_random_(**kwargs) -uniform_random.__doc__ = _uniform_random_.__doc__ + """ -Examples: - - >>> result = fluid.layers.uniform_random(shape=[32, 784]) -""" - __all__ += ['hard_shrink'] _hard_shrink_ = generate_layer_fn('hard_shrink') diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index 2153ca254f0e286a77160a2d53473e1bc76109d5..af747c3cecac66492bb2e2642a88f66a5cfae3db 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -567,7 +567,7 @@ def ones(shape, dtype, force_cpu=False): It also sets *stop_gradient* to True. Args: - shape(tuple|list|None): Shape of output tensor + shape(tuple|list): Shape of output tensor dtype(np.dtype|core.VarDesc.VarType|str): Data type of output tensor Returns: @@ -578,6 +578,10 @@ def ones(shape, dtype, force_cpu=False): data = fluid.layers.ones(shape=[1], dtype='int64') """ + assert isinstance(shape, list) or isinstance( + shape, tuple), "The shape's type should be list or tuple." + assert reduce(lambda x, y: x * y, + shape) > 0, "The shape is invalid: %s." % (str(shape)) return fill_constant(value=1.0, **locals()) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index fbd04f1eb461268ae98ca74c0bc46cc2717733cb..86b7716664c54fb389c671d0c0d2d69d2a0e4a2d 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -379,7 +379,7 @@ class Optimizer(object): self._dtype = loss.dtype program = loss.block.program optimize_ops = [] - if imperative_base.enabled(): + if framework._in_imperative_mode(): if parameter_list is not None: parameters = parameter_list else: @@ -649,6 +649,7 @@ class AdagradOptimizer(Optimizer): regularization: A Regularizer, such as fluid.regularizer.L2DecayRegularizer. name: A optional name prefix. + initial_accumulator_value (float): Initial value for moment accumulator. Examples: .. code-block:: python @@ -662,7 +663,8 @@ class AdagradOptimizer(Optimizer): learning_rate, epsilon=1.0e-6, regularization=None, - name=None): + name=None, + initial_accumulator_value=0.0): assert learning_rate is not None assert epsilon is not None super(AdagradOptimizer, self).__init__( @@ -671,6 +673,7 @@ class AdagradOptimizer(Optimizer): name=name) self.type = "adagrad" self._epsilon = epsilon + self.initial_accumulator_value = initial_accumulator_value def _create_accumulators(self, block, parameters): assert isinstance(block, framework.Block) @@ -683,6 +686,16 @@ class AdagradOptimizer(Optimizer): moment_acc = self._get_accumulator(self._moment_acc_str, param_and_grad[0]) + startup_block = framework.default_startup_program().global_block() + startup_block.append_op( + type='fill_constant', + inputs={}, + outputs={'Out': [moment_acc]}, + attrs={ + 'dtype': moment_acc.dtype, + 'value': self.initial_accumulator_value, + 'shape': moment_acc.shape, + }) # Create the adagrad optimizer op adagrad_op = block.append_op( @@ -1368,9 +1381,9 @@ class FtrlOptimizer(Optimizer): Args: learning_rate (float|Variable): global learning rate. - l1 (float): - l2 (float): - lr_power (float): + l1 (float): L1 regularization strength. + l2 (float): L2 regularization strength. + lr_power (float): Learning Rate Power. regularization: A Regularizer, such as fluid.regularizer.L2DecayRegularizer. name: A optional name prefix. diff --git a/python/paddle/fluid/parallel_executor.py b/python/paddle/fluid/parallel_executor.py index 22212ae9a216acaab3f295f1f8d091829a0aa471..517418da1cf2f745ee5578e3c2b118394db7fae7 100644 --- a/python/paddle/fluid/parallel_executor.py +++ b/python/paddle/fluid/parallel_executor.py @@ -13,15 +13,11 @@ # limitations under the License. from __future__ import print_function -import multiprocessing from . import core from . import framework from . import executor -from .. import compat as cpt -import warnings +from . import compiler import sys -import six -import os __all__ = ['ParallelExecutor'] @@ -29,15 +25,6 @@ ExecutionStrategy = core.ParallelExecutor.ExecutionStrategy BuildStrategy = core.ParallelExecutor.BuildStrategy -def _is_pserver_mode(main_program): - main = main_program if main_program \ - else framework.default_main_program() - for op in main.global_block().ops: - if op.type in ["send", "recv"]: - return True - return False - - class ParallelExecutor(object): """ ParallelExecutor is designed for data parallelism, which focuses on distributing @@ -101,94 +88,37 @@ class ParallelExecutor(object): num_trainers=1, trainer_id=0, scope=None): - # step1: get places, the places are used in run too. - self._places = [] - if use_cuda: - gpus_env = os.getenv("FLAGS_selected_gpus") - if gpus_env: - gpus = [int(s) for s in gpus_env.split(",")] - else: - gpus = [ - i for i in six.moves.range(core.get_cuda_device_count()) - ] - self._places = [core.CUDAPlace(i) for i in gpus] - else: - cpu_num = int( - os.environ.get('CPU_NUM', multiprocessing.cpu_count())) - self._places = [core.CPUPlace() for _ in six.moves.range(cpu_num)] - assert self._places, "no place for execution" - - # step2: init exec_strategy - if exec_strategy is None: - exec_strategy = ExecutionStrategy() - exec_strategy.use_cuda = use_cuda - if exec_strategy.num_threads == 0: - if use_cuda: - # Experiments on se-resnext shows that too many threads hurt - # performance. Worth tunning for other models in the future. - exec_strategy.num_threads = len(self._places) * 4 - else: - cpu_num = int( - os.environ.get('CPU_NUM', multiprocessing.cpu_count())) - exec_strategy.num_threads = cpu_num * 2 + sys.stderr.write( + 'ParallelExecutor is deprecated. ' + 'Please use CompiledProgram and Executor. CompiledProgram ' + 'is a central place for optimization and Executor is the ' + 'unified executor. Example can be found in compiler.py.\n') - # step3: init build_strategy if build_strategy is None: build_strategy = BuildStrategy() build_strategy.num_trainers = num_trainers build_strategy.trainer_id = trainer_id - # FIXME(zcd): is_distribution_ is a temporary field, because in pserver mode, - # num_trainers is 1, so the current fields of build_strategy doesn't tell if - # it's distributed model. - build_strategy.is_distribution = _is_pserver_mode( - main_program) or num_trainers > 1 - - # step4: get main_program, scope, local_scopes - main = main_program if main_program \ - else framework.default_main_program() - # FIXME(dzhwinter): enable_inplace should be after memory_optimize - # if turn on python memory optimize, turn off the inplace_pass. - if build_strategy.enable_inplace is None: - build_strategy.enable_inplace = False if main._is_mem_optimized else True - scope = scope if scope is not None else executor.global_scope() - - if share_vars_from and not isinstance(share_vars_from, - ParallelExecutor): - raise TypeError("share_vars_from must be ParallelExecutor.") - - local_scopes = share_vars_from.executor.local_scopes()\ - if share_vars_from else [] - - # step5: check trainers_endpoints, it is used for distribution. - trainers_endpoints = main._trainers_endpoints - if num_trainers > 1 and trainers_endpoints: - assert num_trainers == len( - trainers_endpoints), "num_trainers == len(endpoints)" - build_strategy.trainers_endpoints = trainers_endpoints - - # step6: get persistable_vars, places. persistable_vars - # need be broadcast to other local_scope. - persistable_vars = set([ - cpt.to_text(v.name) for v in [ - var for var in main.list_vars() - if var.persistable and var.type != core.VarDesc.VarType.RAW - ] - ]) - def place_obj(place): - p = core.Place() - p.set_place(place) - return p + self._places = compiler.get_available_places(use_cuda) + self._scope = scope if scope is not None else executor.global_scope() - places = list(map(place_obj, self._places)) - - # step7: init ParallelExecutor - self.executor = core.ParallelExecutor( - places, persistable_vars, main.desc, - cpt.to_text(loss_name) if loss_name else six.u(''), scope, - local_scopes, exec_strategy, build_strategy) + main_program = main_program if main_program is not None \ + else framework.default_main_program() - self.scope = scope + self._compiled_program = compiler.CompiledProgram(main_program) + if share_vars_from: + assert isinstance( + share_vars_from, ParallelExecutor + ), "The share_vars_from should be ParallelExecutor." + self._compiled_program.with_data_parallel( + loss_name=loss_name, + build_strategy=build_strategy, + exec_strategy=exec_strategy, + share_vars_from=share_vars_from._compiled_program + if share_vars_from else None) + self._place = core.CUDAPlace(0) if use_cuda else core.CPUPlace() + self._exe = executor.Executor(self._place) + self._compiled_program._compile(place=self._place, scope=self._scope) def run(self, fetch_list, feed=None, feed_dict=None, return_numpy=True): """ @@ -255,56 +185,11 @@ class ParallelExecutor(object): loss = pe.run(feed=feeder.feed(cur_batch), fetch_list=[avg_cost.name])) """ - if feed is None and feed_dict is not None: - feed = feed_dict - print( - "`feed_dict` is deprecated. Please use `feed=`", - file=sys.stderr) - - if isinstance(feed, dict): - feed_tensor_dict = dict() - for feed_name in feed: - feed_tensor = feed[feed_name] - if not isinstance(feed_tensor, core.LoDTensor): - feed_tensor = core.LoDTensor() - # always set to CPU place, since the tensor need to be splitted - # it is fast in CPU - feed_tensor.set(feed[feed_name], core.CPUPlace()) - feed_tensor_dict[feed_name] = feed_tensor - - self.executor.feed_and_split_tensor_into_local_scopes( - feed_tensor_dict) - elif isinstance(feed, list) or isinstance(feed, tuple): - if len(feed) != len(self._places): - raise ValueError( - "Feed a list of tensor, the list should be the same size as places" - ) - - res = list() - - for i, each in enumerate(feed): - if not isinstance(each, dict): - raise TypeError( - "Each element of feed list should be a dict") - res_dict = dict() - for feed_name in each: - tensor = each[feed_name] - if not isinstance(tensor, core.LoDTensor): - tmp = core.LoDTensor() - tmp.set(tensor, self._places[i]) - tensor = tmp - res_dict[feed_name] = tensor - res.append(res_dict) - self.executor.feed_tensors_into_local_scopes(res) - - fetch_var_name = 'fetch' - self.executor.run(fetch_list, fetch_var_name) - arr = self.scope.find_var(fetch_var_name).get_lod_tensor_array() - - if return_numpy: - return executor.as_numpy(arr) - - return [arr[i] for i in range(len(arr))] + return self._exe.run(program=self._compiled_program, + scope=self._scope, + feed=feed, + fetch_list=fetch_list, + return_numpy=return_numpy) @property def device_count(self): diff --git a/python/paddle/fluid/tests/demo/file_reader/convert_data_to_recordio.py b/python/paddle/fluid/tests/demo/file_reader/convert_data_to_recordio.py index 45a104ec9625eacfcb87ea6eae619e3d71410da9..b00af91a9dce637e312c9dc5d7d3824106b5a051 100644 --- a/python/paddle/fluid/tests/demo/file_reader/convert_data_to_recordio.py +++ b/python/paddle/fluid/tests/demo/file_reader/convert_data_to_recordio.py @@ -16,7 +16,6 @@ from __future__ import print_function import sys import paddle.fluid as fluid -import paddle.v2 as paddle def load_vocab(filename): diff --git a/python/paddle/fluid/tests/demo/pyreader.py b/python/paddle/fluid/tests/demo/pyreader.py index ec61e0ebae4feb1a2177da916b77b2ba2d3981b9..bbcef4c3ff23d955662be10b5f4b96a66da4c7d8 100644 --- a/python/paddle/fluid/tests/demo/pyreader.py +++ b/python/paddle/fluid/tests/demo/pyreader.py @@ -20,7 +20,6 @@ import six import paddle import paddle.dataset.mnist as mnist import paddle.fluid as fluid -import paddle.v2 def network(is_train): @@ -72,7 +71,7 @@ def main(): use_cuda=use_cuda, share_vars_from=trainer, main_program=test_prog) train_reader.decorate_paddle_reader( - paddle.v2.reader.shuffle( + paddle.reader.shuffle( paddle.batch(mnist.train(), 512), buf_size=8192)) test_reader.decorate_paddle_reader(paddle.batch(mnist.test(), 512)) diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 4b26bacce968a6da72e9aa043adb38918b293a35..a1cf5fad138f068c9eac5fe8d681c9f08b192270 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -77,6 +77,7 @@ list(REMOVE_ITEM TEST_OPS test_bilinear_interp_op) list(REMOVE_ITEM TEST_OPS test_nearest_interp_op) list(REMOVE_ITEM TEST_OPS test_imperative_resnet) list(REMOVE_ITEM TEST_OPS test_imperative_optimizer) +list(REMOVE_ITEM TEST_OPS test_ir_memory_optimize_transformer) foreach(TEST_OP ${TEST_OPS}) py_test_modules(${TEST_OP} MODULES ${TEST_OP}) endforeach(TEST_OP) @@ -107,12 +108,15 @@ py_test_modules(test_parallel_executor_crf MODULES test_parallel_executor_crf SE py_test_modules(test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL) set_tests_properties(test_parallel_executor_fetch_feed PROPERTIES TIMEOUT 450) py_test_modules(test_parallel_executor_transformer MODULES test_parallel_executor_transformer SERIAL) +if(NOT WIN32) +py_test_modules(test_ir_memory_optimize_transformer MODULES test_ir_memory_optimize_transformer SERIAL) +endif() if(NOT APPLE) py_test_modules(test_image_classification_resnet MODULES test_image_classification_resnet SERIAL) endif() if(CMAKE_BUILD_TYPE STREQUAL "Debug") - # change the timeout from 600 to 900, because in debug mode, this test need more time. - set_tests_properties(test_image_classification_resnet PROPERTIES TIMEOUT 900) + # change the timeout from 600 to 1200, because in debug mode, this test need more time. + set_tests_properties(test_parallel_executor_seresnext PROPERTIES TIMEOUT 1200) endif() if (WITH_NGRAPH) diff --git a/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py b/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py new file mode 100644 index 0000000000000000000000000000000000000000..079f0d22056c7a0ebe366a177f62fafad75eff61 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py @@ -0,0 +1,150 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import six +import unittest +import time +import math +import multiprocessing +import numpy as np + +import paddle +import paddle.fluid.core as core +import paddle.fluid as fluid +from paddle.fluid import compiler + +# open eager delete mode +os.environ['FLAGS_eager_delete_tensor_gb'] = '0.0' +os.environ['FLAGS_fast_eager_deletion_mode'] = 'true' +os.environ['CPU_NUM'] = '2' + + +class BuildIrMemOptBase(unittest.TestCase): + def check_network_convergence(self, + network, + use_cuda=True, + memory_opt=True, + use_ir_memory_optimize=True, + enable_inplace=True, + iter=5): + if use_cuda and not core.is_compiled_with_cuda(): + print('Skip use_cuda=True because Paddle is not compiled with cuda') + return + + if os.name == 'nt': + print( + 'Skip use_parallel_executor=True because Paddle comes without parallel support on windows' + ) + return + fluid.default_startup_program().random_seed = 100 + fluid.default_main_program().random_seed = 100 + batch_size = 32 + batch_size *= fluid.core.get_cuda_device_count() if use_cuda else int( + os.environ.get('CPU_NUM', multiprocessing.cpu_count())) + + # build network + word_dict = paddle.dataset.imdb.word_dict() + train_reader = paddle.batch( + paddle.dataset.imdb.train(word_dict), batch_size=batch_size) + + data = fluid.layers.data( + name="words", shape=[1], dtype="int64", lod_level=1) + + label = fluid.layers.data(name="label", shape=[1], dtype="int64") + + cost = network(data, label, len(word_dict)) + optimizer = fluid.optimizer.Adam(learning_rate=0.001) + optimizer.minimize(cost) + if memory_opt: + fluid.memory_optimize(fluid.default_main_program()) + + # execution + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + feeder = fluid.DataFeeder(feed_list=[data, label], place=place) + reader = feeder.decorate_reader(train_reader, multi_devices=True) + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + train_cp = compiler.CompiledProgram(fluid.default_main_program()) + train_cp = train_cp.with_data_parallel(loss_name=cost.name) + fetch_list = [cost.name] + + begin = time.time() + first_loss, last_loss = None, None + step_id = 0 + custom_iter = getattr(self, "iter", None) + if not custom_iter == None: + iter = custom_iter + for data in reader(): + ret = exe.run(train_cp, feed=data, fetch_list=fetch_list) + print(ret) + step_id += 1 + if step_id == 1: + first_loss = ret[0] + if step_id == iter: + last_loss = ret[0] + break + end = time.time() + + print("%.4f Instance per second" % ( + (batch_size * iter) / (end - begin))) + + print(first_loss, last_loss) + avg_last_loss_val = np.array(last_loss).mean() + avg_first_loss_val = np.array(first_loss).mean() + if math.isnan(float(avg_last_loss_val)) or math.isnan( + float(avg_first_loss_val)): + sys.exit("got NaN loss, training failed.") + + return first_loss, last_loss + + +class TestIrMemOptBase(BuildIrMemOptBase): + def setUp(self): + self.network = None + + def test_network(self): + if self.network is None or not core.is_compiled_with_cuda(): + return + + baseline_first_loss, baseline_last_loss = None, None + for use_cuda in [True]: + for use_python_mem_opt in [True, False]: + print( + 'network: {}, use_cuda: {}, use_python_mem_opt: {}, use_ir_mem_opt : {}'. + format(self.network.__name__, use_cuda, use_python_mem_opt, + not use_python_mem_opt)) + with fluid.program_guard(fluid.Program(), fluid.Program()): + with fluid.scope_guard(core.Scope()): + if use_cuda is True and use_python_mem_opt is True: + baseline_first_loss, baseline_last_loss = self.check_network_convergence( + self.network, + use_cuda=use_cuda, + memory_opt=use_python_mem_opt) + else: + cur_first_loss, cur_last_loss = self.check_network_convergence( + self.network, + use_cuda=use_cuda, + memory_opt=use_python_mem_opt) + + self.assertAlmostEquals( + np.mean(baseline_last_loss), + np.mean(cur_last_loss), + delta=1e-2) + self.assertAlmostEquals( + np.mean(baseline_first_loss), + np.mean(cur_first_loss), + delta=1e-2) diff --git a/python/paddle/fluid/tests/unittests/mkldnn/mkldnn_op_test.py b/python/paddle/fluid/tests/unittests/mkldnn/mkldnn_op_test.py new file mode 100644 index 0000000000000000000000000000000000000000..57a5714fc7853905703e9db31bc143fb5cabfacb --- /dev/null +++ b/python/paddle/fluid/tests/unittests/mkldnn/mkldnn_op_test.py @@ -0,0 +1,86 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import numpy as np +import paddle.fluid.core as core +import paddle.fluid as fluid + + +def check_if_mkldnn_primitives_exist_in_bwd(test_case, op_type, x, out, + out_grad, x_grad): + def __assert_close(tensor, np_array, msg, atol=1e-4): + test_case.assertTrue( + np.allclose( + np.array(tensor), np_array, atol=atol), msg) + + place = core.CPUPlace() + + var_dict = {'x': x, 'out': out, 'out@GRAD': out_grad, 'x@GRAD': x_grad} + var_names = list(var_dict.keys()) + ground_truth = {name: var_dict[name] for name in var_names} + + program = fluid.Program() + with fluid.program_guard(program): + block = program.global_block() + for name in ground_truth: + block.create_var( + name=name, dtype=np.float32, shape=ground_truth[name].shape) + + op = block.append_op( + type=op_type, + inputs={'X': block.var('x'), }, + outputs={'Out': block.var('out')}, + attrs={'use_mkldnn': True}) + + # Generate backward op_desc + grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc(op.desc, + set(), []) + grad_op_desc = grad_op_desc_list[0] + new_op_desc = block.desc.append_op() + new_op_desc.copy_from(grad_op_desc) + for var_name in grad_op_desc.output_arg_names(): + block.desc.var(var_name.encode('ascii')) + grad_op_desc.infer_var_type(block.desc) + grad_op_desc.infer_shape(block.desc) + for arg in grad_op_desc.output_arg_names(): + grad_var = block.desc.find_var(arg.encode('ascii')) + grad_var.set_dtype(core.VarDesc.VarType.FP32) + + exe = fluid.Executor(place) + + # Do at least 2 iterations + for i in range(2): + out = exe.run( + program, + feed={name: var_dict[name] + for name in ['x', 'out@GRAD']}, + fetch_list=['x@GRAD', 'out']) + + __assert_close(x_grad, out[0], 'x@GRAD') + + +def format_reorder(out, size): + in_n = size[0] + out_h = size[2] + out_w = size[3] + out_c = size[1] + out_tmp = np.zeros((in_n, out_h, out_w, out_c)) + for n in range(in_n): + for i in range(out_h): + for j in range(out_w): + for m in range(out_c): + out_tmp[n, i, j, m] = out[n, m, i, j] + return out_tmp.reshape(in_n, out_c, out_h, out_w) diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py index ad94a4b21c347c9a2782437948c20d3b3071c679..7099387b887003a205c0dfb4c8e9c83f89e29494 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py @@ -18,8 +18,8 @@ import unittest import numpy as np import paddle.fluid.core as core from paddle.fluid.tests.unittests.op_test import OpTest -from scipy.special import expit from paddle.fluid.tests.unittests.test_activation_op import TestRelu, TestTanh, TestSqrt, TestAbs +from mkldnn_op_test import check_if_mkldnn_primitives_exist_in_bwd class TestMKLDNNReluDim2(TestRelu): @@ -97,5 +97,26 @@ class TestMKLDNNAbsDim4(TestAbs): self.attrs = {"use_mkldnn": True} +# Check if primitives already exist in backward +class TestMKLDNNAbsPrimitivesAlreadyExist(unittest.TestCase): + def setUp(self): + super(TestMKLDNNAbsPrimitivesAlreadyExist, self).setUp() + + np.random.seed(123) + self.op_type = 'abs' + self.x = np.random.uniform(-1, 1, [2, 2]).astype(np.float32) + self.out = np.abs(self.x) + self.out_grad = np.random.random_sample(self.x.shape).astype(np.float32) + self.x_grad = self.__abs_bwd(self.x, self.out_grad) + + # Abs grad calculation + def __abs_bwd(self, x, out_grad): + return out_grad * np.sign(x) + + def test_check(self): + check_if_mkldnn_primitives_exist_in_bwd( + self, self.op_type, self.x, self.out, self.out_grad, self.x_grad) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_int8_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_int8_mkldnn_op.py index 100a03cea0f740a615c4a08810d4ad9e8c974d7a..c7b8a096bf1a7e2f5b63b136c7036edad863c888 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_int8_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_int8_mkldnn_op.py @@ -20,6 +20,7 @@ import numpy as np import paddle.fluid.core as core from paddle.fluid.tests.unittests.op_test import OpTest from paddle.fluid.tests.unittests.test_conv2d_op import conv2d_forward_naive, TestConv2dOp +from mkldnn_op_test import format_reorder def conv2d_forward_refer(input, filter, group, conv_param): @@ -29,20 +30,6 @@ def conv2d_forward_refer(input, filter, group, conv_param): return format_reorder(out, size) -def format_reorder(out, size): - in_n = size[0] - out_h = size[2] - out_w = size[3] - out_c = size[1] - out_tmp = np.zeros((in_n, out_h, out_w, out_c)) - for n in range(in_n): - for i in range(out_h): - for j in range(out_w): - for m in range(out_c): - out_tmp[n, i, j, m] = out[n, m, i, j] - return out_tmp.reshape(in_n, out_c, out_h, out_w) - - class TestConv2dInt8Op(TestConv2dOp): def setUp(self): self.op_type = "conv2d" diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_mkldnn_op.py index 0542eef80070cbf281ee013c28b7092a2dd17eaa..28b670d7ab3267a03157b7e617504eb9a35656aa 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_mkldnn_op.py @@ -15,44 +15,139 @@ from __future__ import print_function import unittest +import numpy as np -from paddle.fluid.tests.unittests.test_conv2d_op import TestConv2dOp, TestWithPad, TestWithStride, TestWithGroup, TestWith1x1, TestWithInput1x1Filter1x1 +import paddle.fluid.core as core +from paddle.fluid.tests.unittests.op_test import OpTest +from paddle.fluid.tests.unittests.test_conv2d_op import TestConv2dOp -class TestMKLDNN(TestConv2dOp): - def init_kernel_type(self): - self.use_mkldnn = True - self.data_format = "NCHW" +def conv2d_bias_naive(out, bias): + _, out_c, _, _ = out.shape + for l in range(out_c): + out[:, l, :, :] = out[:, l, :, :] + bias[l] + return out -class TestMKLDNNWithPad(TestWithPad): - def init_kernel_type(self): - self.use_mkldnn = True - self.data_format = "NCHW" +def conv2d_residual_naive(out, residual): + assert out.shape == residual.shape + out = np.add(out, residual) + return out -class TestMKLDNNWithStride(TestWithStride): - def init_kernel_type(self): - self.use_mkldnn = True - self.data_format = "NCHW" +class TestConv2dMKLDNNOp(TestConv2dOp): + def init_group(self): + self.groups = 1 -class TestMKLDNNWithGroup(TestWithGroup): def init_kernel_type(self): - self.use_mkldnn = True self.data_format = "NCHW" + self.use_mkldnn = True + self._cpu_only = True + def init_test_case(self): + self.pad = [0, 0] + self.stride = [1, 1] + self.input_size = [2, 3, 5, 5] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [6, f_c, 3, 3] -class TestMKLDNNWith1x1(TestWith1x1): - def init_kernel_type(self): - self.use_mkldnn = True - self.data_format = "NCHW" + def setUp(self): + self.fuse_bias = False + self.bias_size = None + self.fuse_relu = False + self.fuse_residual_connection = False + self.input_residual_size = None + TestConv2dOp.setUp(self) + output = self.outputs['Output'] -class TestMKLDNNWithInput1x1Filter1x1(TestWithInput1x1Filter1x1): - def init_kernel_type(self): - self.use_mkldnn = True - self.data_format = "NCHW" + #mkldnn only support either conv-sum-relu, or conv-relu. + if self.fuse_bias and self.bias_size is not None: + bias = np.random.random(self.bias_size).astype(self.dtype) + output = conv2d_bias_naive(output, bias) + output = output.astype(self.dtype) + self.attrs['fuse_bias'] = self.fuse_bias + self.inputs['Bias'] = OpTest.np_dtype_to_fluid_dtype(bias) + + if self.fuse_residual_connection and self.input_residual_size is not None: + input_residual = np.random.random(self.input_residual_size).astype( + self.dtype) + output = conv2d_residual_naive(output, input_residual) + + self.attrs[ + 'fuse_residual_connection'] = self.fuse_residual_connection + self.inputs['ResidualData'] = OpTest.np_dtype_to_fluid_dtype( + input_residual) + + if self.fuse_relu: + output = np.maximum(output, 0).astype(self.dsttype) + + output = output.astype(self.dtype) + + self.attrs['fuse_bias'] = self.fuse_bias + self.attrs['fuse_relu'] = self.fuse_relu + self.attrs['fuse_residual_connection'] = self.fuse_residual_connection + + self.outputs['Output'] = output + + +class TestWithFuse(TestConv2dMKLDNNOp): + def init_test_case(self): + TestConv2dMKLDNNOp.init_test_case(self) + self.pad = [1, 1] + self.fuse_bias = True + self.bias_size = [6] + self.fuse_residual_connection = True + self.input_residual_size = [2, 6, 5, 5] + + def test_check_grad(self): + pass + + def test_check_grad_no_filter(self): + pass + + def test_check_grad_no_input(self): + pass + + +class TestWithPadWithBias(TestConv2dMKLDNNOp): + def init_test_case(self): + TestConv2dMKLDNNOp.init_test_case(self) + self.pad = [1, 1] + self.input_size = [2, 3, 6, 6] + + +class TestWithStride(TestConv2dMKLDNNOp): + def init_test_case(self): + TestConv2dMKLDNNOp.init_test_case(self) + self.pad = [1, 1] + self.stride = [2, 2] + self.input_size = [2, 3, 6, 6] + + +class TestWithGroup(TestConv2dMKLDNNOp): + def init_group(self): + self.groups = 3 + + +class TestWith1x1(TestConv2dMKLDNNOp): + def init_test_case(self): + TestConv2dMKLDNNOp.init_test_case(self) + self.filter_size = [6, 3, 1, 1] + + +class TestWithInput1x1Filter1x1(TestConv2dMKLDNNOp): + def init_test_case(self): + TestConv2dMKLDNNOp.init_test_case(self) + self.input_size = [2, 3, 1, 1] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [6, f_c, 1, 1] + + def init_group(self): + self.groups = 3 if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_transpose_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_transpose_mkldnn_op.py index 9bcdb7b2a975b648471714ab628caf91b6b6f3a9..cc72df51f1e5c0968921c206a59cce5239fe5a83 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_transpose_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_transpose_mkldnn_op.py @@ -15,36 +15,22 @@ from __future__ import print_function import unittest +import numpy as np +import paddle.fluid.core as core +from paddle.fluid.tests.unittests.op_test import OpTest -from paddle.fluid.tests.unittests.test_conv2d_transpose_op import TestConv2dTransposeOp, TestWithPad, TestWithStride +from paddle.fluid.tests.unittests.test_conv2d_transpose_op import conv2dtranspose_forward_naive, TestConv2dTransposeOp -class TestMKLDNN(TestConv2dTransposeOp): - def init_op_type(self): - self.is_test = True - self.use_mkldnn = True - self.data_format = "NCHW" - self.op_type = "conv2d_transpose" - self._cpu_only = True - - def test_check_grad(self): - return +def conv2d_bias_naive(out, bias): + _, out_c, _, _ = out.shape - def test_check_grad_no_input(self): - return - - def test_check_grad_no_filter(self): - return + for l in range(out_c): + out[:, l, :, :] = out[:, l, :, :] + bias[l] + return out -class TestMKLDNNWithPad(TestWithPad): - def init_op_type(self): - self.is_test = True - self.use_mkldnn = True - self.data_format = "NCHW" - self.op_type = "conv2d_transpose" - self._cpu_only = True - +class TestConv2dTransposeMKLDNNOp(TestConv2dTransposeOp): def test_check_grad(self): return @@ -54,24 +40,64 @@ class TestMKLDNNWithPad(TestWithPad): def test_check_grad_no_filter(self): return - -class TestMKLDNNWithStride(TestWithStride): def init_op_type(self): - self.is_test = True - self.use_mkldnn = True self.data_format = "NCHW" self.op_type = "conv2d_transpose" self._cpu_only = True - def test_check_grad(self): - return - - def test_check_grad_no_input(self): - return - - def test_check_grad_no_filter(self): - return - - -if __name__ == '__main__': - unittest.main() + def init_test_case(self): + self.use_mkldnn = True + self.is_test = True + self.pad = [0, 0] + self.fuse_bias = False + self.bias_size = None + self.fuse_relu = False + self.stride = [1, 1] + self.dilations = [1, 1] + self.input_size = [2, 3, 5, 5] # NCHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3] + self.groups = 1 + + def setUp(self): + TestConv2dTransposeOp.setUp(self) + + output = self.outputs['Output'] + + if self.fuse_bias and self.bias_size is not None: + bias = np.random.random(self.bias_size).astype(self.dtype) + output = conv2d_bias_naive(output, bias) + output = output.astype(self.dtype) + self.attrs['fuse_bias'] = self.fuse_bias + self.inputs['Bias'] = OpTest.np_dtype_to_fluid_dtype(bias) + + if self.fuse_relu: + output = np.maximum(output, 0).astype(self.dtype) + + self.attrs['fuse_bias'] = self.fuse_bias + self.attrs['fuse_relu'] = self.fuse_relu + + self.outputs['Output'] = output + + +class TestMKLDNNFuseBias(TestConv2dTransposeMKLDNNOp): + def init_test_case(self): + TestConv2dTransposeMKLDNNOp.init_test_case(self) + self.pad = [1, 1] + self.fuse_bias = True + self.bias_size = [6] + + +class TestMKLDNNWithPad(TestConv2dTransposeMKLDNNOp): + def init_test_case(self): + TestConv2dTransposeMKLDNNOp.init_test_case(self) + self.pad = [1, 1] + self.input_size = [2, 3, 10, 10] + + +class TestMKLDNNWithStride(TestConv2dTransposeMKLDNNOp): + def init_test_case(self): + TestConv2dTransposeMKLDNNOp.init_test_case(self) + self.pad = [1, 1] + self.stride = [2, 2] + self.input_size = [2, 3, 6, 6] # NCHW diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_mkldnn_op.py index 6de43dd46e5d184ec934f2d85e0c87137e9702e0..feb2a563eeaed7a83a82ec56ec08a0ed8664d126 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_mkldnn_op.py @@ -18,6 +18,24 @@ import unittest from paddle.fluid.tests.unittests.test_pool2d_op import TestPool2D_Op, TestCase1, TestCase2, TestCase3, TestCase4, TestCase5 +def create_test_mkldnn_use_ceil_class(parent): + class TestMKLDNNPool2DUseCeilCase(parent): + def init_kernel_type(self): + self.use_mkldnn = True + + def init_ceil_mode(self): + self.ceil_mode = True + + cls_name = "{0}_{1}".format(parent.__name__, "MKLDNNCeilModeCast") + TestMKLDNNPool2DUseCeilCase.__name__ = cls_name + globals()[cls_name] = TestMKLDNNPool2DUseCeilCase + + +create_test_mkldnn_use_ceil_class(TestPool2D_Op) +create_test_mkldnn_use_ceil_class(TestCase1) +create_test_mkldnn_use_ceil_class(TestCase2) + + def create_test_mkldnn_class(parent): class TestMKLDNNCase(parent): def init_kernel_type(self): diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_requantize_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_requantize_mkldnn_op.py new file mode 100644 index 0000000000000000000000000000000000000000..b7a4683558539d3f9daa6a1146355acc3ff2bab7 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_requantize_mkldnn_op.py @@ -0,0 +1,93 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from paddle.fluid.tests.unittests.op_test import OpTest +from mkldnn_op_test import format_reorder + + +class TestReQuantizeOp(OpTest): + def setUp(self): + self.op_type = 'requantize' + self.scale_in = 2.0 + self.scale_out = 1.5 + self.input_size = [1, 1, 5, 5] + self.data_type = 'int8' + self.set_scale() + self.set_data_type() + + scale_shift = self.scale_out / self.scale_in + + if self.data_type == 'int8': + input = (np.random.randint(0, 100, self.input_size) - 50 + ).astype(self.data_type) + output_tmp = np.round(input.astype('float32') * + scale_shift).astype('int8') + else: + input = (np.random.randint(0, 100, + self.input_size)).astype(self.data_type) + output_tmp = np.round(input.astype('float32') * + scale_shift).astype('uint8') + + output = format_reorder(output_tmp, self.input_size) + + self.inputs = {'Input': OpTest.np_dtype_to_fluid_dtype(input)} + + self.outputs = {'Output': output} + + self.attrs = {'Scale_in': self.scale_in, 'Scale_out': self.scale_out} + + def test_check_output(self): + self.check_output() + + def set_scale(self): + pass + + def set_data_type(OpTest): + pass + + +#--------------------test requantize with s8 input-------------------- + + +class TestReQuantizeOp1(TestReQuantizeOp): + def set_scale(self): + self.scale_in = 1.5 + self.scale_out = 1.5 + + +class TestReQuantizeOp2(TestReQuantizeOp): + def set_scale(self): + self.scale_in = 0.1 + self.scale_out = 0.2 + + +#--------------------test requantize with u8 input-------------------- + + +class TestReQuantizeOp3(TestReQuantizeOp1): + def set_data_type(self): + self.data_type = 'uint8' + + +class TestReQuantizeOp4(TestReQuantizeOp2): + def set_data_type(self): + self.data_type = 'uint8' + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_softmax_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_softmax_mkldnn_op.py new file mode 100644 index 0000000000000000000000000000000000000000..748b77f2bf48f450426d3ea918138a7db8df78f0 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_softmax_mkldnn_op.py @@ -0,0 +1,57 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from paddle.fluid.tests.unittests.op_test import OpTest +import paddle.fluid.core as core +from paddle.fluid.tests.unittests.test_softmax_op import TestSoftmaxOp, stable_softmax +from mkldnn_op_test import check_if_mkldnn_primitives_exist_in_bwd + + +class TestSoftmaxMKLDNNOp(TestSoftmaxOp): + def init_kernel_type(self): + self.use_mkldnn = True + + +class TestSoftmaxMKLDNNOp2(TestSoftmaxMKLDNNOp): + def get_x_shape(self): + return [2, 3, 4, 5] + + +# Check if primitives already exist in backward +class TestSoftmaxMKLDNNPrimitivesAlreadyExist(unittest.TestCase): + def setUp(self): + super(TestSoftmaxMKLDNNPrimitivesAlreadyExist, self).setUp() + + np.random.seed(123) + self.op_type = 'softmax' + self.x = np.random.uniform(-1, 1, 2).astype(np.float32) + self.out = stable_softmax(self.x) + self.out_grad = np.random.random_sample(self.x.shape).astype(np.float32) + self.x_grad = self.__softmax_bwd(self.out, self.out_grad) + + # Softmax grad calculation + def __softmax_bwd(self, out, out_grad): + return out * (out_grad - np.dot(out, out_grad)) + + def test_check(self): + check_if_mkldnn_primitives_exist_in_bwd( + self, self.op_type, self.x, self.out, self.out_grad, self.x_grad) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_accuracy_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_accuracy_ngraph_op.py index 84b9198dbf6569b7dbd7bd3c953d5254ece178e8..5298c3c2f6f0113977342ab3e09830027585ada1 100644 --- a/python/paddle/fluid/tests/unittests/ngraph/test_accuracy_ngraph_op.py +++ b/python/paddle/fluid/tests/unittests/ngraph/test_accuracy_ngraph_op.py @@ -15,39 +15,7 @@ from __future__ import print_function import unittest -import numpy as np -from paddle.fluid.tests.unittests.op_test import OpTest - - -class TestNGRAPHAccuracyOp(OpTest): - def setUp(self): - self.op_type = "accuracy" - self.dtype = np.float32 - self.init_dtype() - n = 128 - infer = np.random.random((n, 1)).astype(self.dtype) - indices = np.random.randint(0, 2, (n, 1)) - label = np.random.randint(0, 2, (n, 1)) - self.inputs = {'Out': infer, 'Indices': indices, "Label": label} - num_correct = 0 - for rowid in range(n): - for ele in indices[rowid]: - if ele == label[rowid]: - num_correct += 1 - break - self.outputs = { - 'Accuracy': np.array([num_correct / float(n)]).astype(self.dtype), - 'Correct': np.array([num_correct]).astype("int64"), - 'Total': np.array([n]).astype("int64") - } - self._cpu_only = True - - def init_dtype(self): - pass - - def test_check_output(self): - self.check_output() - +from paddle.fluid.tests.unittests.test_accuracy_op import TestAccuracyOp if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_activation_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_activation_ngraph_op.py index 2bd9bf843039573862a22c85557d416bf82b41f6..034d7792c13efb432e6bef6c95ee554584f29519 100644 --- a/python/paddle/fluid/tests/unittests/ngraph/test_activation_ngraph_op.py +++ b/python/paddle/fluid/tests/unittests/ngraph/test_activation_ngraph_op.py @@ -18,17 +18,7 @@ import unittest import numpy as np import paddle.fluid.core as core from paddle.fluid.tests.unittests.op_test import OpTest -from paddle.fluid.tests.unittests.test_activation_op import TestRelu, TestTanh - - -class TestNGRAPHReluDim2(TestRelu): - def setUp(self): - super(TestNGRAPHReluDim2, self).setUp() - - -class TestNGRAPHTanhDim2(TestTanh): - def setUp(self): - super(TestNGRAPHTanhDim2, self).setUp() +from paddle.fluid.tests.unittests.test_activation_op import TestSigmoid, TestRelu, TestTanh class TestNGRAPHReluDim4(TestRelu): diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_adam_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_adam_ngraph_op.py new file mode 100644 index 0000000000000000000000000000000000000000..ef2aedf65f4c0cc182738c7a7a538095f8f628d5 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ngraph/test_adam_ngraph_op.py @@ -0,0 +1,21 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +from paddle.fluid.tests.unittests.test_adam_op import TestAdamOp1, TestAdamOp2, TestAdamOpMultipleSteps, TestSparseAdamOp + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_batch_norm_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_batch_norm_ngraph_op.py new file mode 100644 index 0000000000000000000000000000000000000000..34fb73f3cf7e8b3d906ed4e04d151923aa219ab1 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ngraph/test_batch_norm_ngraph_op.py @@ -0,0 +1,21 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +from paddle.fluid.tests.unittests.test_batch_norm_op import TestBatchNormOpTraining, TestBatchNormOpInference + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_concat_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_concat_ngraph_op.py new file mode 100644 index 0000000000000000000000000000000000000000..a223d73a7416c3564d5d4ef5ca4f3e1b42595a0d --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ngraph/test_concat_ngraph_op.py @@ -0,0 +1,21 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +from paddle.fluid.tests.unittests.test_concat_op import TestConcatOp, TestConcatOp2, TestConcatOp3 + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_conv2d_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_conv2d_ngraph_op.py index dbc8557b4e1c96c13c5a189b44bed4b5f1aabf4f..ff2e865b66a5f1166281c267392b0964ca5b3082 100644 --- a/python/paddle/fluid/tests/unittests/ngraph/test_conv2d_ngraph_op.py +++ b/python/paddle/fluid/tests/unittests/ngraph/test_conv2d_ngraph_op.py @@ -17,60 +17,5 @@ from __future__ import print_function import unittest from paddle.fluid.tests.unittests.test_conv2d_op import TestConv2dOp, TestWithPad, TestWithStride, TestWithGroup, TestWith1x1, TestWithInput1x1Filter1x1 - -class TestNGRAPH(TestConv2dOp): - def setUp(self): - super(TestNGRAPH, self).setUp() - self._cpu_only = True - - def init_kernel_type(self): - super(TestNGRAPH, self).init_kernel_type() - - -class TestNGRAPHWithPad(TestWithPad): - def setUp(self): - super(TestNGRAPHWithPad, self).setUp() - self._cpu_only = True - - def init_kernel_type(self): - super(TestNGRAPHWithPad, self).init_kernel_type() - - -class TestNGRAPHWithStride(TestWithStride): - def setUp(self): - super(TestNGRAPHWithStride, self).setUp() - self._cpu_only = True - - def init_kernel_type(self): - super(TestNGRAPHWithStride, self).init_kernel_type() - - -class TestNGRAPHWithGroup(TestWithGroup): - def setUp(self): - super(TestNGRAPHWithGroup, self).setUp() - self._cpu_only = True - - def init_kernel_type(self): - super(TestNGRAPHWithGroup, self).init_kernel_type() - - -class TestNGRAPHWith1x1(TestWith1x1): - def setUp(self): - super(TestNGRAPHWith1x1, self).setUp() - self._cpu_only = True - - def init_kernel_type(self): - super(TestNGRAPHWith1x1, self).init_kernel_type() - - -class TestNGRAPHWithInput1x1Filter1x1(TestWithInput1x1Filter1x1): - def setUp(self): - super(TestNGRAPHWithInput1x1Filter1x1, self).setUp() - self._cpu_only = True - - def init_kernel_type(self): - super(TestNGRAPHWithInput1x1Filter1x1, self).init_kernel_type() - - if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_cross_entropy_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_cross_entropy_ngraph_op.py new file mode 100644 index 0000000000000000000000000000000000000000..3057218a1d80deffe7eb3164c2350143fc38007d --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ngraph/test_cross_entropy_ngraph_op.py @@ -0,0 +1,21 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +from paddle.fluid.tests.unittests.test_cross_entropy_op import TestCrossEntropyOp, TestCrossEntropyOp2, TestCrossEntropyOp3, TestCrossEntropyOp4, TestCrossEntropyOp5, TestCrossEntropyOp6, TestCrossEntropyOp7 + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_elementwise_add_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_elementwise_add_ngraph_op.py index 67f749bfeeb1bb47a8c5bb3486ac3292c8af8164..3fb9af3a542d5e6b0de7d8d839408759abdaedcb 100644 --- a/python/paddle/fluid/tests/unittests/ngraph/test_elementwise_add_ngraph_op.py +++ b/python/paddle/fluid/tests/unittests/ngraph/test_elementwise_add_ngraph_op.py @@ -13,18 +13,9 @@ # limitations under the License. from __future__ import print_function -import unittest -from paddle.fluid.tests.unittests.test_elementwise_add_op import TestElementwiseAddOp - - -class TestNGRAPHElementwiseAddOp(TestElementwiseAddOp): - def setUp(self): - super(TestNGRAPHElementwiseAddOp, self).setUp() - self._cpu_only = True - - def init_input_output(self): - super(TestNGRAPHElementwiseAddOp, self).init_input_output() +import unittest +from paddle.fluid.tests.unittests.test_elementwise_add_op import TestElementwiseAddOp, TestElementwiseAddOp_broadcast_0 if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_fill_constant_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_fill_constant_ngraph_op.py index 835376ffe78f9119a9be6c379998e3a3b50aab43..2b10b8f7a3ac0f978c13bd86824b939e69c5336a 100644 --- a/python/paddle/fluid/tests/unittests/ngraph/test_fill_constant_ngraph_op.py +++ b/python/paddle/fluid/tests/unittests/ngraph/test_fill_constant_ngraph_op.py @@ -13,24 +13,34 @@ # limitations under the License. from __future__ import print_function + import unittest +import numpy as np from paddle.fluid.tests.unittests.test_fill_constant_op import TestFillConstantOp1, TestFillConstantOp2, TestFillConstantOpWithSelectedRows -class TestNGRAPHFillConstantOp1(TestFillConstantOp1): +class TestNGRAPHFillConstantFP64(TestFillConstantOp1): def setUp(self): - super(TestNGRAPHFillConstantOp1, self).setUp() + super(TestNGRAPHFillConstantFP64, self).setUp() + + self.attrs = {'shape': [123, 92], 'value': 3.8, 'dtype': 6} + self.outputs = {'Out': np.full((123, 92), 3.8)} -class TestNGRAPHFillConstantOp2(TestFillConstantOp2): +class TestNGRAPHFillConstantINT32(TestFillConstantOp2): def setUp(self): - super(TestNGRAPHFillConstantOp2, self).setUp() + super(TestNGRAPHFillConstantINT32, self).setUp() + self.attrs = {'shape': [123, 92], 'dtype': 2} + self.outputs = {'Out': np.full((123, 92), 0)} -class TestNGRAPHFillConstantOpWithSelectedRows( - TestFillConstantOpWithSelectedRows): + +class TestNGRAPHFillConstantINT64(TestFillConstantOp2): def setUp(self): - super(TestFillConstantOpWithSelectedRows, self).setUp() + super(TestNGRAPHFillConstantINT64, self).setUp() + + self.attrs = {'shape': [123, 92], 'dtype': 3} + self.outputs = {'Out': np.full((123, 92), 0)} if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_mean_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_mean_ngraph_op.py index 11881ac6e5292ce2beea1b353c6ca857ada28839..b4894734cbcc11cf5eec7401297dc35545aa7268 100644 --- a/python/paddle/fluid/tests/unittests/ngraph/test_mean_ngraph_op.py +++ b/python/paddle/fluid/tests/unittests/ngraph/test_mean_ngraph_op.py @@ -16,12 +16,5 @@ from __future__ import print_function import unittest from paddle.fluid.tests.unittests.test_mean_op import TestMeanOp - -class TestNGRAPHMeanOp(TestMeanOp): - def setUp(self): - super(TestNGRAPHMeanOp, self).setUp() - self._cpu_only = True - - if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_momentum_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_momentum_ngraph_op.py new file mode 100644 index 0000000000000000000000000000000000000000..2c3549d907f5f67abc0cbd448a492d95b8ae6c32 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ngraph/test_momentum_ngraph_op.py @@ -0,0 +1,21 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +from paddle.fluid.tests.unittests.test_momentum_op import TestMomentumOp1, TestMomentumOp2, TestLarsMomentumOp, TestSparseMomentumOp, TestSparseMomentumOp2 + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_mul_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_mul_ngraph_op.py index a916c8d450f4a218c85f39c31737b2efa0ef926d..549d03f6e92dc7e88ec8618e5f97287bb68ed0d9 100644 --- a/python/paddle/fluid/tests/unittests/ngraph/test_mul_ngraph_op.py +++ b/python/paddle/fluid/tests/unittests/ngraph/test_mul_ngraph_op.py @@ -15,39 +15,7 @@ from __future__ import print_function import unittest -import numpy as np -from paddle.fluid.tests.unittests.op_test import OpTest - - -class TestNGRAPHMulOp(OpTest): - def setUp(self): - self.op_type = "mul" - self.dtype = np.float32 - self.init_dtype_type() - self.inputs = { - 'X': np.random.random((2, 4)).astype(self.dtype), - 'Y': np.random.random((4, 4)).astype(self.dtype) - } - self.outputs = {'Out': np.dot(self.inputs['X'], self.inputs['Y'])} - self._cpu_only = True - - def init_dtype_type(self): - pass - - def test_check_output(self): - self.check_output() - - def test_check_grad_normal(self): - self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.5) - - def test_check_grad_ingore_x(self): - self.check_grad( - ['Y'], 'Out', max_relative_error=0.5, no_grad_set=set("X")) - - def test_check_grad_ingore_y(self): - self.check_grad( - ['X'], 'Out', max_relative_error=0.5, no_grad_set=set('Y')) - +from paddle.fluid.tests.unittests.test_mul_op import TestMulOp, TestMulOp2 if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_pool2d_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_pool2d_ngraph_op.py index 96a2b72d8add9cc765a8536932b72426c8489025..ff82e9fa1d3d343aa7faf56a0bd27d2c9edc1ea4 100644 --- a/python/paddle/fluid/tests/unittests/ngraph/test_pool2d_ngraph_op.py +++ b/python/paddle/fluid/tests/unittests/ngraph/test_pool2d_ngraph_op.py @@ -14,61 +14,25 @@ from __future__ import print_function -from paddle.fluid.tests.unittests.test_pool2d_op import TestPool2D_Op, TestCase1, TestCase2, TestCase3, TestCase4, TestCase5 - - -class TestNGRAPHPool2D_Op(TestPool2D_Op): - def setUp(self): - super(TestNGRAPHPool2D_Op, self).setUp() - self._cpu_only = True - - def init_test_case(self): - super(TestNGRAPHPool2D_Op, self).init_test_case() - - -class TestNGRAPHCase1(TestCase1): - def setUp(self): - super(TestNGRAPHCase1, self).setUp() - self._cpu_only = True - - def init_test_case(self): - super(TestNGRAPHCase1, self).init_test_case() +import unittest - -class TestNGRAPHCase2(TestCase2): - def setUp(self): - super(TestNGRAPHCase2, self).setUp() - self._cpu_only = True - - def init_test_case(self): - super(TestNGRAPHCase2, self).init_test_case() - - -class TestNGRAPHCase3(TestCase3): - def setUp(self): - super(TestNGRAPHCase3, self).setUp() - self._cpu_only = True - - def init_pool_type(self): - super(TestNGRAPHCase3, self).init_pool_type() +from paddle.fluid.tests.unittests.test_pool2d_op import TestPool2D_Op, TestCase1, TestCase2, TestCase3, TestCase4, TestCase5 -class TestNGRAPHCase4(TestCase4): +class TestNGRAPHCeilMode(TestCase1): def setUp(self): - super(TestNGRAPHCase4, self).setUp() - self._cpu_only = True + super(TestNGRAPHCeilMode, self).setUp() - def init_pool_type(self): - super(TestNGRAPHCase4, self).init_pool_type() + def init_ceil_mode(self): + self.ceil_mode = True -class TestNGRAPHCase5(TestCase5): +class TestNGRAPHAdaptive(TestCase1): def setUp(self): - super(TestNGRAPHCase5, self).setUp() - self._cpu_only = True + super(TestNGRAPHAdaptive, self).setUp() - def init_pool_type(self): - super(TestNGRAPHCase5, self).init_pool_type() + def init_adaptive(self): + self.adaptive = True if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_scale_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_scale_ngraph_op.py index 4da5ca4583c65d69ead8d3e9886605a7ad104cc0..8beb44f55e487eef5f1957e9284d4a711c9770aa 100644 --- a/python/paddle/fluid/tests/unittests/ngraph/test_scale_ngraph_op.py +++ b/python/paddle/fluid/tests/unittests/ngraph/test_scale_ngraph_op.py @@ -15,24 +15,5 @@ from __future__ import print_function import unittest from paddle.fluid.tests.unittests.test_scale_op import TestScaleOp, TestScaleOpSelectedRows - -class TestNGRAPHScaleOp(TestScaleOp): - def setUp(self): - super(TestNGRAPHScaleOp, self).setUp() - self._cpu_only = True - - def init_dtype_type(self): - pass - - -class TestNGRAPHScaleOpSelectedRows(TestScaleOpSelectedRows): - def setUp(self): - super(TestNGRAPHScaleOpSelectedRows, self).setUp() - self._cpu_only = True - - def init_dtype_type(self): - pass - - if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_softmax_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_softmax_ngraph_op.py index 81894c6e3872e4617085c6bb4b0219a49c9986fd..0cb08842df0797952c47a63ba2bbb8614c0e8a22 100644 --- a/python/paddle/fluid/tests/unittests/ngraph/test_softmax_ngraph_op.py +++ b/python/paddle/fluid/tests/unittests/ngraph/test_softmax_ngraph_op.py @@ -16,11 +16,5 @@ from __future__ import print_function import unittest from paddle.fluid.tests.unittests.test_softmax_op import TestSoftmaxOp - -class TestSoftmaxNGRAPHOp(TestSoftmaxOp): - def setUp(self): - super(TestSoftmaxNGRAPHOp, self).setUp() - - if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_sum_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_sum_ngraph_op.py new file mode 100644 index 0000000000000000000000000000000000000000..ed9fb618024301818a12fd0d02b09c6f3a5f2c53 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ngraph/test_sum_ngraph_op.py @@ -0,0 +1,19 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import print_function +import unittest +from paddle.fluid.tests.unittests.test_sum_op import TestSumOp, TestSelectedRowsSumOp, TestLoDTensorAndSelectedRowsOp + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_top_k_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_top_k_ngraph_op.py index fa68df1adf2cfb31c63b70098a6fedc2ab3913aa..d2319c4d921fccb950b1a3059fdecd3b3b044182 100644 --- a/python/paddle/fluid/tests/unittests/ngraph/test_top_k_ngraph_op.py +++ b/python/paddle/fluid/tests/unittests/ngraph/test_top_k_ngraph_op.py @@ -16,30 +16,5 @@ from __future__ import print_function import unittest from paddle.fluid.tests.unittests.test_top_k_op import TestTopkOp, TestTopkOp3d, TestTopkOp2, TestTopkOp3, TestTopkOp4 - -class TestNGRAPHTopkOp(TestTopkOp): - def setUp(self): - super(TestNGRAPHTopkOp, self).setUp() - self._cpu_only = True - - -class TestNGRAPHTopkOp2(TestTopkOp2): - def setUp(self): - super(TestNGRAPHTopkOp2, self).setUp() - self._cpu_only = True - - -class TestNGRAPHTopkOp3(TestTopkOp3): - def setUp(self): - super(TestNGRAPHTopkOp3, self).setUp() - self._cpu_only = True - - -class TestNGRAPHTopkOp4(TestTopkOp4): - def setUp(self): - super(TestNGRAPHTopkOp4, self).setUp() - self._cpu_only = True - - if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/op_test.py b/python/paddle/fluid/tests/unittests/op_test.py index 0fe836683b029698b670bbb9f9bb258c2f3b68a0..823445724302dbde47bc36122c62ef44a7e2394f 100644 --- a/python/paddle/fluid/tests/unittests/op_test.py +++ b/python/paddle/fluid/tests/unittests/op_test.py @@ -14,6 +14,7 @@ from __future__ import print_function +import os import unittest import numpy as np import random @@ -374,6 +375,9 @@ class OpTest(unittest.TestCase): return [] places = [fluid.CPUPlace()] cpu_only = self._cpu_only if hasattr(self, '_cpu_only') else False + use_ngraph = bool(os.getenv("FLAGS_use_ngraph", False)) + if use_ngraph: + cpu_only = True if core.is_compiled_with_cuda() and core.op_support_gpu(self.op_type)\ and not cpu_only: places.append(core.CUDAPlace(0)) diff --git a/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py b/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py index c429c8af7d37cb4e209edc41f704868afe054829..a94487e67dc90d4df935867f841bc567c37c8aa2 100644 --- a/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py +++ b/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py @@ -79,7 +79,7 @@ class TestParallelExecutorBase(unittest.TestCase): if use_reduce else fluid.BuildStrategy.ReduceStrategy.AllReduce build_strategy.fuse_elewise_add_act_ops = fuse_elewise_add_act_ops build_strategy.fuse_relu_depthwise_conv = fuse_relu_depthwise_conv - build_strategy.memory_optimize = use_ir_memory_optimize + build_strategy.memory_optimize = False if memory_opt else use_ir_memory_optimize # python memory optimization is conflict with inplace pass. # Use ir graph memory optimization after inplace pass is the correct way. build_strategy.enable_inplace = False if memory_opt else enable_inplace diff --git a/python/paddle/fluid/tests/unittests/test_activation_op.py b/python/paddle/fluid/tests/unittests/test_activation_op.py index 55c43ef115a316cc0fe5bb336b7a766a956c1496..d5a838540994abcd1407fd258e723218670bfb58 100644 --- a/python/paddle/fluid/tests/unittests/test_activation_op.py +++ b/python/paddle/fluid/tests/unittests/test_activation_op.py @@ -26,6 +26,7 @@ class TestActivation(OpTest): self.op_type = "exp" self.dtype = np.float32 self.init_dtype() + self.init_kernel_type() x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype) out = np.exp(x) @@ -44,6 +45,9 @@ class TestActivation(OpTest): def init_dtype(self): self.dtype = np.float32 + def init_kernel_type(self): + pass + class TestSigmoid(TestActivation): def setUp(self): @@ -601,6 +605,25 @@ class TestSwish(TestActivation): self.check_grad(['X'], 'Out', max_relative_error=0.008) +#------------------ Test Cudnn Activation---------------------- +def create_test_act_cudnn_class(parent, atol=1e-3, grad_atol=1e-3): + @unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") + class TestActCudnn(parent): + def init_kernel_type(self): + self.attrs = {"use_cudnn": True} + + cls_name = "{0}_{1}".format(parent.__name__, "cudnn") + TestActCudnn.__name__ = cls_name + globals()[cls_name] = TestActCudnn + + +create_test_act_cudnn_class(TestRelu) +create_test_act_cudnn_class(TestRelu6) +create_test_act_cudnn_class(TestSigmoid) +create_test_act_cudnn_class(TestTanh) + + #------------------ Test Fp16 ---------------------- def create_test_act_fp16_class(parent, atol=1e-3, diff --git a/python/paddle/fluid/tests/unittests/test_alloc_continuous_space_op.py b/python/paddle/fluid/tests/unittests/test_alloc_continuous_space_op.py new file mode 100644 index 0000000000000000000000000000000000000000..9d5fe114bad2b2bae73cf18e17ebd7af288a91da --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_alloc_continuous_space_op.py @@ -0,0 +1,74 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np + +from op_test import OpTest + + +class TestAllocContinuousSpace(OpTest): + def setUp(self): + self.op_type = "alloc_continuous_space" + self.dtype = np.float32 + attrs = self.init_attr() + self.copy_data = attrs["copy_data"] + self.constant = attrs["constant"] + self.set_constant = attrs["set_constant"] + self.Inputs = self.init_input() + self.FusedOutput = self.init_output(self.Inputs, self.set_constant, + self.constant) + self.inputs = {'Input': self.Inputs} + self.attrs = attrs + self.outputs = {'Output': self.Inputs, 'FusedOutput': self.FusedOutput} + + def init_dtype(self): + self.dtype = np.float32 + + def init_input(self): + inputs = [] + inputs.append(("x1", np.random.random([20, 3]).astype(self.dtype))) + inputs.append(("x2", np.random.random([20]).astype(self.dtype))) + inputs.append(("x3", np.random.random([1]).astype(self.dtype))) + inputs.append(("x4", np.random.random([200, 30]).astype(self.dtype))) + inputs.append(("x5", np.random.random([30]).astype(self.dtype))) + inputs.append(("x6", np.random.random([1]).astype(self.dtype))) + return inputs + + def init_attr(self): + return {"copy_data": True, "set_constant": False, "constant": 0.0} + + def init_output(self, input_list, set_constant, constant): + inputs = [input[1].flatten() for input in input_list] + output = np.concatenate(inputs) + if set_constant: + output = np.ones((len(output))) * constant + return output + + def test_check_output(self): + self.check_output() + + +class TestAllocContinuousSpace2(TestAllocContinuousSpace): + def init_attr(self): + return {"copy_data": False, "set_constant": True, "constant": 0.5} + + def test_check_output(self): + self.check_output(no_check_set=["Output"]) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_base_layer.py b/python/paddle/fluid/tests/unittests/test_base_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..b12aaea3219cb81e8fa0e7584120db510fb7b62c --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_base_layer.py @@ -0,0 +1,79 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np + +import paddle.fluid as fluid + + +class L1(fluid.imperative.Layer): + def __init__(self, prefix): + super(L1, self).__init__(prefix) + self._param_attr = fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=0.1)) + self.w1 = self.create_parameter( + attr=self._param_attr, shape=[2, 2], dtype='float32', is_bias=False) + self.w2 = self.create_parameter( + attr=self._param_attr, shape=[2, 2], dtype='float32', is_bias=False) + + def forward(self): + return self.w1 + self.w2 + + +class L2(fluid.imperative.Layer): + def __init__(self, prefix): + super(L2, self).__init__(prefix) + self.layer1 = L1(self.full_name()) + self.layer2 = L1(self.full_name()) + + def forward(self): + return self.layer1() + self.layer2() + + +class L3(fluid.imperative.Layer): + def __init__(self, prefix): + super(L3, self).__init__(prefix) + self.layer1 = L2(self.full_name()) + self.layer2 = L2(self.full_name()) + + def forward(self): + return self.layer1() + self.layer2() + + +class TestBaseLayer(unittest.TestCase): + def test_one_level(self): + with fluid.imperative.guard(): + l = L1('test_one_level') + ret = l() + self.assertEqual(l.w1.name, "test_one_level/L1_0.w_0") + self.assertEqual(l.w2.name, "test_one_level/L1_0.w_1") + self.assertTrue(np.allclose(ret._numpy(), 0.2 * np.ones([2, 2]))) + + def test_three_level(self): + with fluid.imperative.guard(): + l = L3('test_three_level') + names = [p.name for p in l.parameters()] + ret = l() + self.assertEqual(names[0], "test_three_level/L3_0/L2_0/L1_0.w_0") + self.assertEqual(names[1], "test_three_level/L3_0/L2_0/L1_0.w_1") + self.assertEqual(names[2], "test_three_level/L3_0/L2_0/L1_1.w_0") + self.assertEqual(names[3], "test_three_level/L3_0/L2_0/L1_1.w_1") + self.assertEqual(names[4], "test_three_level/L3_0/L2_1/L1_0.w_0") + self.assertEqual(names[5], "test_three_level/L3_0/L2_1/L1_0.w_1") + self.assertTrue(np.allclose(ret._numpy(), 0.8 * np.ones([2, 2]))) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_box_decoder_and_assign_op.py b/python/paddle/fluid/tests/unittests/test_box_decoder_and_assign_op.py new file mode 100644 index 0000000000000000000000000000000000000000..b0afc2a2e4ad7b72b341536babfc595c2b6c3455 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_box_decoder_and_assign_op.py @@ -0,0 +1,96 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import sys +import math +from op_test import OpTest + + +def box_decoder_and_assign(deltas, weights, boxes, box_score, box_clip): + boxes = boxes.astype(deltas.dtype, copy=False) + widths = boxes[:, 2] - boxes[:, 0] + 1.0 + heights = boxes[:, 3] - boxes[:, 1] + 1.0 + ctr_x = boxes[:, 0] + 0.5 * widths + ctr_y = boxes[:, 1] + 0.5 * heights + wx, wy, ww, wh = weights + dx = deltas[:, 0::4] * wx + dy = deltas[:, 1::4] * wy + dw = deltas[:, 2::4] * ww + dh = deltas[:, 3::4] * wh + # Prevent sending too large values into np.exp() + dw = np.minimum(dw, box_clip) + dh = np.minimum(dh, box_clip) + pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] + pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] + pred_w = np.exp(dw) * widths[:, np.newaxis] + pred_h = np.exp(dh) * heights[:, np.newaxis] + pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype) + # x1 + pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w + # y1 + pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h + # x2 (note: "- 1" is correct; don't be fooled by the asymmetry) + pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1 + # y2 (note: "- 1" is correct; don't be fooled by the asymmetry) + pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1 + + output_assign_box = [] + for ino in range(len(pred_boxes)): + rank = np.argsort(-box_score[ino]) + maxidx = rank[0] + if maxidx == 0: + maxidx = rank[1] + beg_pos = maxidx * 4 + end_pos = maxidx * 4 + 4 + output_assign_box.append(pred_boxes[ino, beg_pos:end_pos]) + output_assign_box = np.array(output_assign_box) + + return pred_boxes, output_assign_box + + +class TestBoxDecoderAndAssignOpWithLoD(OpTest): + def test_check_output(self): + self.check_output() + + def setUp(self): + self.op_type = "box_decoder_and_assign" + lod = [[4, 8, 8]] + num_classes = 10 + prior_box = np.random.random((20, 4)).astype('float32') + prior_box_var = np.array([0.1, 0.1, 0.2, 0.2], dtype=np.float32) + target_box = np.random.random((20, 4 * num_classes)).astype('float32') + box_score = np.random.random((20, num_classes)).astype('float32') + box_clip = 4.135 + output_box, output_assign_box = box_decoder_and_assign( + target_box, prior_box_var, prior_box, box_score, box_clip) + + self.inputs = { + 'PriorBox': (prior_box, lod), + 'PriorBoxVar': prior_box_var, + 'TargetBox': (target_box, lod), + 'BoxScore': (box_score, lod), + } + self.attrs = {'box_clip': box_clip} + self.outputs = { + 'DecodeBox': output_box, + 'OutputAssignBox': output_assign_box + } + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_dist_base.py b/python/paddle/fluid/tests/unittests/test_dist_base.py index 0968ace62b6a4e258f7763dbf6fbeda07feb4cd5..f4d14d4024923a75ef86cd18179b8bd9eed44913 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_base.py +++ b/python/paddle/fluid/tests/unittests/test_dist_base.py @@ -115,6 +115,9 @@ class TestDistRunnerBase(object): strategy.allow_op_delay = False build_stra = fluid.BuildStrategy() + # FIXME force disable enable_inplace and memory_optimize + build_stra.enable_inplace = False + build_stra.memory_optimize = False if args.use_reduce: build_stra.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py index 3566fed215229223f4d2ecd1bbb66cb297dd7716..12132477d28c74c7da718321140a3ddef784fc30 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py +++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py @@ -22,6 +22,9 @@ import six import unittest import numpy as np +import gc +gc.set_debug(gc.DEBUG_COLLECTABLE) + import paddle.fluid as fluid @@ -99,6 +102,12 @@ class TranspilerTest(unittest.TestCase): with fluid.unique_name.guard(): with fluid.program_guard(main, startup): self.transpiler_test_impl() + # NOTE: run gc.collect to eliminate pybind side objects to + # prevent random double-deallocate when inherited in python. + del self.transpiler + del main + del startup + gc.collect() class TestBasicModel(TranspilerTest): @@ -797,6 +806,7 @@ class TestNCCL2Transpile(TranspilerTest): print([op.type for op in startup.global_block().ops]) self.assertEqual(startup.global_block().ops[-1].type, "gen_nccl_id") self.assertIsNotNone(startup.global_block().vars.get("NCCLID")) + gc.collect() else: pass diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py index bc3c422f2f68b79b2d938e25625093b2ce8977bb..910f53a91a7b5ca1413adf9505ed2c3ad3d56dad 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py @@ -56,6 +56,8 @@ def train(network, use_cuda, use_parallel_executor, batch_size=32, pass_num=2): train_reader, multi_devices=use_parallel_executor) exe = fluid.Executor(place) + fluid.default_startup_program().random_seed = 1 + fluid.default_main_program().random_seed = 1 exe.run(fluid.default_startup_program()) train_cp = compiler.CompiledProgram(fluid.default_main_program()) diff --git a/python/paddle/fluid/tests/unittests/test_expand_op.py b/python/paddle/fluid/tests/unittests/test_expand_op.py index 67a8d8f0721c2c75b432d68d64be8fc1035ffc74..690875662e666aab63ac5eb62df0fb52823b8dff 100644 --- a/python/paddle/fluid/tests/unittests/test_expand_op.py +++ b/python/paddle/fluid/tests/unittests/test_expand_op.py @@ -109,5 +109,32 @@ class TestExpandOpRank4(OpTest): self.check_grad(['X'], 'Out') +class TestExpandOpInteger(OpTest): + def setUp(self): + self.op_type = "expand" + self.inputs = { + 'X': np.random.randint( + 10, size=(2, 4, 5)).astype("int32") + } + self.attrs = {'expand_times': [2, 1, 4]} + output = np.tile(self.inputs['X'], (2, 1, 4)) + self.outputs = {'Out': output} + + def test_check_output(self): + self.check_output() + + +class TestExpandOpBoolean(OpTest): + def setUp(self): + self.op_type = "expand" + self.inputs = {'X': np.random.randint(2, size=(2, 4, 5)).astype("bool")} + self.attrs = {'expand_times': [2, 1, 4]} + output = np.tile(self.inputs['X'], (2, 1, 4)) + self.outputs = {'Out': output} + + def test_check_output(self): + self.check_output() + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py b/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py index 820ad4af88e9dc49cbe57ac182e1ba0402725f3d..4582b2a0eed401235835374d4cd58782d8d3a68f 100644 --- a/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py +++ b/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py @@ -35,7 +35,7 @@ class TestFakeQuantizeOp(OpTest): self.check_output() -class TestFakeQuantizeOp(OpTest): +class TestFakeQuantizeRangeAbsMaxOp(OpTest): def setUp(self): self.op_type = "fake_quantize_range_abs_max" self.attrs = { @@ -43,8 +43,10 @@ class TestFakeQuantizeOp(OpTest): 'window_size': int(1), 'is_test': False } + x = (np.random.random((8, 16, 7, 7)) - 0.5) * 10 + x = x.astype("float32") self.inputs = { - 'X': np.random.random((8, 16, 7, 7)).astype("float32"), + 'X': x, 'Iter': np.zeros(1).astype("int64"), 'InScale': np.zeros(1).astype("float32") } @@ -62,5 +64,36 @@ class TestFakeQuantizeOp(OpTest): self.check_output() +class TestFakeQuantizeRangeAbsMaxOp2(OpTest): + def setUp(self): + self.op_type = "fake_quantize_range_abs_max" + self.attrs = { + 'bit_length': int(8), + 'window_size': int(1), + 'is_test': True + } + x = (np.random.random((8, 16, 7, 7)) - 0.5) * 10 + x = x.astype("float32") + scale = np.max(np.abs(x)).astype("float32") - 1.0 + out_scales = np.zeros(self.attrs['window_size']).astype("float32") + out_scales[0] = scale + + self.inputs = { + 'X': x, + 'Iter': np.zeros(1).astype("int64"), + 'InScale': scale.astype("float32") + } + xs = np.clip(x, -scale, scale) + qs = np.round(xs / scale * ((1 << (self.attrs['bit_length'] - 1)) - 1)) + self.outputs = { + 'Out': qs, + 'OutScale': scale.astype("float32"), + 'OutScales': out_scales, + } + + def test_check_output(self): + self.check_output(no_check_set=set(['OutScale', 'OutScales'])) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py index 03471a4432f2b6bf6220e79e99aa506628b1535b..763dfa2160d22c2d89cce834a839b5e2b5eaff55 100644 --- a/python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py +++ b/python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py @@ -121,6 +121,11 @@ class TestMNIST(TestParallelExecutorBase): regularization=fluid.regularizer.L2Decay(1e-6)) return optimizer + # NOTE(dzh): + # need to make it compatible with elewise fuse act + # FIXME (liuwei12) + # the new memory optimize strategy will crash this unittest + # add enable_inplace=False here to force pass the unittest not_fuse_op_first_loss, not_fuse_op_last_loss = self.check_network_convergence( model, feed_dict={"image": img, @@ -128,6 +133,8 @@ class TestMNIST(TestParallelExecutorBase): use_cuda=use_cuda, fuse_elewise_add_act_ops=False, memory_opt=False, + use_ir_memory_optimize=False, + enable_inplace=False, optimizer=_optimizer) fuse_op_first_loss, fuse_op_last_loss = self.check_network_convergence( model, @@ -136,6 +143,8 @@ class TestMNIST(TestParallelExecutorBase): use_cuda=use_cuda, fuse_elewise_add_act_ops=True, memory_opt=False, + use_ir_memory_optimize=False, + enable_inplace=False, optimizer=_optimizer) for loss in zip(not_fuse_op_first_loss, fuse_op_first_loss): diff --git a/python/paddle/fluid/tests/unittests/test_imperative.py b/python/paddle/fluid/tests/unittests/test_imperative_basic.py similarity index 82% rename from python/paddle/fluid/tests/unittests/test_imperative.py rename to python/paddle/fluid/tests/unittests/test_imperative_basic.py index baaddf9f2e5b123300f1d083b33ea644665348fd..97fc1eab3d372b07834e8b4e6b504eb7d677b0c7 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py @@ -15,7 +15,6 @@ import contextlib import unittest import numpy as np -import sys import paddle.fluid as fluid from paddle.fluid import core @@ -24,8 +23,8 @@ from test_imperative_base import new_program_scope class MyLayer(fluid.imperative.Layer): - def __init__(self): - super(MyLayer, self).__init__() + def __init__(self, name_scope): + super(MyLayer, self).__init__(name_scope) def forward(self, inputs): x = fluid.layers.relu(inputs) @@ -50,13 +49,19 @@ class MyPyLayer(fluid.imperative.PyLayer): class MLP(fluid.imperative.Layer): - def __init__(self): - super(MLP, self).__init__() - self._fc1 = FC(3, - fluid.ParamAttr( + def __init__(self, name_scope): + super(MLP, self).__init__(name_scope) + self._fc1 = FC(self.full_name(), + 3, + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=0.1)), + bias_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0.1))) - self._fc2 = FC(4, - fluid.ParamAttr( + self._fc2 = FC(self.full_name(), + 4, + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=0.1)), + bias_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0.1))) def forward(self, inputs): @@ -67,46 +72,43 @@ class MLP(fluid.imperative.Layer): class SimpleRNNCell(fluid.imperative.Layer): - def __init__(self, step_input_size, hidden_size, output_size, param_attr): - super(SimpleRNNCell, self).__init__() + def __init__(self, name_scope, step_input_size, hidden_size, output_size, + param_attr): + super(SimpleRNNCell, self).__init__(name_scope) self.step_input_size = step_input_size self.hidden_size = hidden_size self.output_size = output_size - self._dype = core.VarDesc.VarType.FP32 - from paddle.fluid.layer_helper import LayerHelper - self._helper = LayerHelper( - 'SimpleRNNCell', act="tanh", param_attr=param_attr) + self._dtype = core.VarDesc.VarType.FP32 + self.param_attr = param_attr def _build_once(self, inputs, pre_hidden): i2h_param_shape = [self.step_input_size, self.hidden_size] h2h_param_shape = [self.hidden_size, self.hidden_size] h2o_param_shape = [self.output_size, self.hidden_size] - self._i2h_w = self._helper.create_parameter( - attr=self._helper.param_attr, + self._i2h_w = self.create_parameter( + attr=self.param_attr, shape=i2h_param_shape, dtype=self._dtype, is_bias=False) - self._h2h_w = self._helper.create_parameter( - attr=self._helper.param_attr, + self._h2h_w = self.create_parameter( + attr=self.param_attr, shape=h2h_param_shape, dtype=self._dtype, is_bias=False) - self._h2o_w = self._helper.create_parameter( - attr=self._helper.param_attr, + self._h2o_w = self.create_parameter( + attr=self.param_attr, shape=h2o_param_shape, dtype=self._dtype, is_bias=False) def forward(self, input, pre_hidden): - tmp_i2h = self._helper.create_variable_for_type_inference(self._dtype) - tmp_h2h = self._helper.create_variable_for_type_inference(self._dtype) - hidden = self._helper.create_variable_for_type_inference(self._dype) - out = self._helper.create_variable_for_type_inference(self._dype) - softmax_out = self._helper.create_variable_for_type_inference( - self._dtype) - reduce_out = self._helper.create_variable_for_type_inference( - self._dtype) + tmp_i2h = self.create_variable(dtype=self._dtype) + tmp_h2h = self.create_variable(dtype=self._dtype) + hidden = self.create_variable(dtype=self._dtype) + out = self.create_variable(dtype=self._dtype) + softmax_out = self.create_variable(dtype=self._dtype) + reduce_out = self.create_variable(dtype=self._dtype) self._helper.append_op( type="mul", inputs={"X": input, @@ -130,7 +132,7 @@ class SimpleRNNCell(fluid.imperative.Layer): outputs={'Out': hidden}, attrs={'axis': -1, 'use_mkldnn': False}) - hidden = self._helper.append_activation(hidden) + hidden = self._helper.append_activation(hidden, act='tanh') self._helper.append_op( type="mul", @@ -158,10 +160,11 @@ class SimpleRNNCell(fluid.imperative.Layer): class SimpleRNN(fluid.imperative.Layer): - def __init__(self): - super(SimpleRNN, self).__init__() + def __init__(self, name_scope): + super(SimpleRNN, self).__init__(name_scope) self.seq_len = 4 self._cell = SimpleRNNCell( + self.full_name(), 3, 3, 3, @@ -171,7 +174,7 @@ class SimpleRNN(fluid.imperative.Layer): outs = list() pre_hiddens = list() - init_hidden = fluid.layers.tensor.create_parameter( + init_hidden = self.create_parameter( attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0.1)), shape=[1, 3], @@ -205,7 +208,7 @@ class TestImperative(unittest.TestCase): with fluid.imperative.guard(): cl = core.Layer() cl.forward([]) - l = fluid.imperative.Layer() + l = fluid.imperative.Layer("l") self.assertRaises(NotImplementedError, l.forward, []) def test_pylayer_func_id(self): @@ -281,7 +284,7 @@ class TestImperative(unittest.TestCase): np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32) with fluid.imperative.guard(): var_inp = fluid.imperative.base.to_variable(np_inp) - l = MyLayer() + l = MyLayer("my_layer") x = l(var_inp)[0] self.assertIsNotNone(x) dy_out = x._numpy() @@ -291,7 +294,7 @@ class TestImperative(unittest.TestCase): with new_program_scope(): inp = fluid.layers.data( name="inp", shape=[3], append_batch_size=False) - l = MyLayer() + l = MyLayer("my_layer") x = l(inp)[0] param_grads = fluid.backward.append_backward( x, parameter_list=[l._x_for_debug.name])[0] @@ -309,7 +312,7 @@ class TestImperative(unittest.TestCase): np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) with fluid.imperative.guard(): var_inp = fluid.imperative.base.to_variable(np_inp) - mlp = MLP() + mlp = MLP("mlp") out = mlp(var_inp) dy_out = out._numpy() out._backward() @@ -318,7 +321,7 @@ class TestImperative(unittest.TestCase): with new_program_scope(): inp = fluid.layers.data( name="inp", shape=[2, 2], append_batch_size=False) - mlp = MLP() + mlp = MLP("mlp") out = mlp(inp) param_grads = fluid.backward.append_backward( out, parameter_list=[mlp._fc1._w.name])[0] @@ -333,6 +336,18 @@ class TestImperative(unittest.TestCase): self.assertTrue(np.allclose(dy_out, static_out)) self.assertTrue(np.allclose(dy_grad, static_grad)) + params = mlp.parameters(True) + self.assertEqual("mlp/MLP_0/FC_0.w_0", params[0].name) + self.assertEqual("mlp/MLP_0/FC_0.b_0", params[1].name) + self.assertEqual("mlp/MLP_0/FC_1.w_0", params[2].name) + self.assertEqual("mlp/MLP_0/FC_1.b_0", params[3].name) + self.assertEqual(len(params), 4) + + sublayers = mlp.sublayers(True) + self.assertEqual(mlp._fc1, sublayers[0]) + self.assertEqual(mlp._fc2, sublayers[1]) + self.assertEqual(len(sublayers), 2) + def test_rnn(self): np_inp = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0], [10.0, 11.0, 12.0]]) @@ -341,7 +356,7 @@ class TestImperative(unittest.TestCase): with fluid.imperative.guard(): var_inp = fluid.imperative.base.to_variable(np_inp) var_inp = fluid.layers.reshape(var_inp, shape=[1, 4, 3]) - simple_rnn = SimpleRNN() + simple_rnn = SimpleRNN("simple_rnn") outs, pre_hiddens = simple_rnn.forward(var_inp) dy_out = outs[3]._numpy() outs[3]._backward() @@ -352,7 +367,7 @@ class TestImperative(unittest.TestCase): with new_program_scope(): inp = fluid.layers.data( name="inp", shape=[1, 4, 3], append_batch_size=False) - simple_rnn = SimpleRNN() + simple_rnn = SimpleRNN("simple_rnn") outs, pre_hiddens = simple_rnn(inp) param_grads = fluid.backward.append_backward(outs[3]) exe = fluid.Executor(fluid.CPUPlace()) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_gan.py b/python/paddle/fluid/tests/unittests/test_imperative_gan.py index 681661bfc63db95653be371688a047efe96f3866..a80202d6dddacaa4cb6fa3efd3c3dfd5b0ab4400 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_gan.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_gan.py @@ -28,13 +28,10 @@ from paddle.fluid.imperative.base import to_variable class Discriminator(fluid.imperative.Layer): - def __init__(self): - super(Discriminator, self).__init__() - self._fc1 = FC(size=32, act='elu', name="d_fc1") - self._fc2 = FC(size=1, name="d_fc2") - - def parameters(self): - return self._fc1.parameters() + self._fc2.parameters() + def __init__(self, name_scope): + super(Discriminator, self).__init__(name_scope) + self._fc1 = FC(self.full_name(), size=32, act='elu') + self._fc2 = FC(self.full_name(), size=1) def forward(self, inputs): x = self._fc1(inputs) @@ -42,15 +39,11 @@ class Discriminator(fluid.imperative.Layer): class Generator(fluid.imperative.Layer): - def __init__(self): - super(Generator, self).__init__() - self._fc1 = FC(size=64, act='elu', name="g_fc1") - self._fc2 = FC(size=64, act='elu', name="g_fc2") - self._fc3 = FC(size=1, name="g_fc3") - - def parameters(self): - return self._fc1.parameters() + self._fc2.parameters( - ) + self._fc3.parameters() + def __init__(self, name_scope): + super(Generator, self).__init__(name_scope) + self._fc1 = FC(self.full_name(), size=64, act='elu') + self._fc2 = FC(self.full_name(), size=64, act='elu') + self._fc3 = FC(self.full_name(), size=1) def forward(self, inputs): x = self._fc1(inputs) @@ -72,8 +65,8 @@ class TestImperativeMnist(unittest.TestCase): scope = fluid.core.Scope() with new_program_scope( main=discriminate_p, startup=startup, scope=scope): - discriminator = Discriminator() - generator = Generator() + discriminator = Discriminator("d") + generator = Generator("g") img = fluid.layers.data( name="img", shape=[2, 1], append_batch_size=False) @@ -100,8 +93,8 @@ class TestImperativeMnist(unittest.TestCase): sgd.minimize(d_loss) with new_program_scope(main=generate_p, startup=startup, scope=scope): - discriminator = Discriminator() - generator = Generator() + discriminator = Discriminator("d") + generator = Generator("g") noise = fluid.layers.data( name="noise", shape=[2, 2], append_batch_size=False) @@ -141,8 +134,8 @@ class TestImperativeMnist(unittest.TestCase): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed - discriminator = Discriminator() - generator = Generator() + discriminator = Discriminator("d") + generator = Generator("g") sgd = SGDOptimizer(learning_rate=1e-3) d_real = discriminator(to_variable(np.ones([2, 1], np.float32))) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py index 08b155acc657c3a4a73f5b1d72ac356fc7e83a58..5b3c250501386a7854313218f5ea338281824252 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import print_function + import contextlib import unittest import numpy as np @@ -28,6 +30,7 @@ from test_imperative_base import new_program_scope class SimpleImgConvPool(fluid.imperative.Layer): def __init__(self, + name_scope, num_channels, num_filters, filter_size, @@ -44,9 +47,10 @@ class SimpleImgConvPool(fluid.imperative.Layer): use_cudnn=False, param_attr=None, bias_attr=None): - super(SimpleImgConvPool, self).__init__() + super(SimpleImgConvPool, self).__init__(name_scope) self._conv2d = Conv2D( + self.full_name(), num_channels=num_channels, num_filters=num_filters, filter_size=filter_size, @@ -59,6 +63,7 @@ class SimpleImgConvPool(fluid.imperative.Layer): use_cudnn=use_cudnn) self._pool2d = Pool2D( + self.full_name(), pool_size=pool_size, pool_type=pool_type, pool_stride=pool_stride, @@ -73,19 +78,20 @@ class SimpleImgConvPool(fluid.imperative.Layer): class MNIST(fluid.imperative.Layer): - def __init__(self, param_attr=None, bias_attr=None): - super(MNIST, self).__init__() + def __init__(self, name_scope): + super(MNIST, self).__init__(name_scope) self._simple_img_conv_pool_1 = SimpleImgConvPool( - 1, 20, 5, 2, 2, act="relu") + self.full_name(), 1, 20, 5, 2, 2, act="relu") self._simple_img_conv_pool_2 = SimpleImgConvPool( - 20, 50, 5, 2, 2, act="relu") + self.full_name(), 20, 50, 5, 2, 2, act="relu") pool_2_shape = 50 * 4 * 4 SIZE = 10 scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5 - self._fc = FC(10, + self._fc = FC(self.full_name(), + 10, param_attr=fluid.param_attr.ParamAttr( initializer=fluid.initializer.NormalInitializer( loc=0.0, scale=scale)), @@ -101,47 +107,46 @@ class MNIST(fluid.imperative.Layer): class TestImperativeMnist(unittest.TestCase): def test_mnist_float32(self): seed = 90 - batch_num = 2 + epoch_num = 1 with fluid.imperative.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed - mnist = MNIST() + mnist = MNIST("mnist") sgd = SGDOptimizer(learning_rate=1e-3) train_reader = paddle.batch( - paddle.dataset.mnist.train(), batch_size=128) + paddle.dataset.mnist.train(), batch_size=128, drop_last=True) dy_param_init_value = {} - for batch_id, data in enumerate(train_reader()): - if batch_id >= batch_num: - break - - dy_x_data = np.array( - [x[0].reshape(1, 28, 28) for x in data]).astype('float32') - y_data = np.array([x[1] for x in data]).astype('int64').reshape( - 128, 1) - - img = to_variable(dy_x_data) - label = to_variable(y_data) - label._stop_gradient = True - - cost = mnist(img) - loss = fluid.layers.cross_entropy(cost, label) - avg_loss = fluid.layers.mean(loss) - dy_out = avg_loss._numpy() - - if batch_id == 0: - for param in fluid.default_main_program().global_block( - ).all_parameters(): - dy_param_init_value[param.name] = param._numpy() - - avg_loss._backward() - sgd.minimize(avg_loss) - mnist.clear_gradients() - dy_param_value = {} - for param in fluid.default_main_program().global_block( - ).all_parameters(): - dy_param_value[param.name] = param._numpy() + for epoch in range(epoch_num): + for batch_id, data in enumerate(train_reader()): + dy_x_data = np.array( + [x[0].reshape(1, 28, 28) + for x in data]).astype('float32') + y_data = np.array( + [x[1] for x in data]).astype('int64').reshape(128, 1) + + img = to_variable(dy_x_data) + label = to_variable(y_data) + label._stop_gradient = True + + cost = mnist(img) + loss = fluid.layers.cross_entropy(cost, label) + avg_loss = fluid.layers.mean(loss) + + dy_out = avg_loss._numpy() + + if epoch == 0 and batch_id == 0: + for param in mnist.parameters(): + dy_param_init_value[param.name] = param._numpy() + + avg_loss._backward() + sgd.minimize(avg_loss) + mnist.clear_gradients() + + dy_param_value = {} + for param in mnist.parameters(): + dy_param_value[param.name] = param._numpy() with new_program_scope(): fluid.default_startup_program().random_seed = seed @@ -150,10 +155,10 @@ class TestImperativeMnist(unittest.TestCase): exe = fluid.Executor(fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) - mnist = MNIST() + mnist = MNIST("mnist") sgd = SGDOptimizer(learning_rate=1e-3) train_reader = paddle.batch( - paddle.dataset.mnist.train(), batch_size=128) + paddle.dataset.mnist.train(), batch_size=128, drop_last=True) img = fluid.layers.data( name='pixel', shape=[1, 28, 28], dtype='float32') @@ -166,8 +171,7 @@ class TestImperativeMnist(unittest.TestCase): # initialize params and fetch them static_param_init_value = {} static_param_name_list = [] - for param in fluid.default_startup_program().global_block( - ).all_parameters(): + for param in mnist.parameters(): static_param_name_list.append(param.name) out = exe.run(fluid.default_startup_program(), @@ -176,26 +180,29 @@ class TestImperativeMnist(unittest.TestCase): for i in range(len(static_param_name_list)): static_param_init_value[static_param_name_list[i]] = out[i] - for batch_id, data in enumerate(train_reader()): - if batch_id >= batch_num: - break - - static_x_data = np.array( - [x[0].reshape(1, 28, 28) for x in data]).astype('float32') - y_data = np.array([x[1] for x in data]).astype('int64').reshape( - [128, 1]) - - fetch_list = [avg_loss.name] - fetch_list.extend(static_param_name_list) - out = exe.run(fluid.default_main_program(), - feed={"pixel": static_x_data, - "label": y_data}, - fetch_list=fetch_list) - - static_param_value = {} - static_out = out[0] - for i in range(1, len(out)): - static_param_value[static_param_name_list[i - 1]] = out[i] + for epoch in range(epoch_num): + for batch_id, data in enumerate(train_reader()): + static_x_data = np.array( + [x[0].reshape(1, 28, 28) + for x in data]).astype('float32') + y_data = np.array( + [x[1] for x in data]).astype('int64').reshape([128, 1]) + + fetch_list = [avg_loss.name] + fetch_list.extend(static_param_name_list) + out = exe.run( + fluid.default_main_program(), + feed={"pixel": static_x_data, + "label": y_data}, + fetch_list=fetch_list) + + static_param_value = {} + static_out = out[0] + for i in range(1, len(out)): + static_param_value[static_param_name_list[i - 1]] = out[ + i] + + self.assertTrue(np.allclose(dy_x_data.all(), static_x_data.all())) for key, value in six.iteritems(static_param_init_value): self.assertTrue(np.allclose(value, dy_param_init_value[key])) @@ -203,7 +210,7 @@ class TestImperativeMnist(unittest.TestCase): self.assertTrue(np.allclose(static_out, dy_out)) for key, value in six.iteritems(static_param_value): - self.assertTrue(np.allclose(value, dy_param_value[key])) + self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-5)) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py index afe990e74ff96dfbca4f335b561f9bbe7d295246..3b602303ae9a183c7b66f5613321f58898fdfcc2 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py @@ -28,42 +28,52 @@ from paddle.fluid.backward import append_backward class SimpleLSTMRNN(fluid.imperative.Layer): def __init__(self, + name_scope, hidden_size, num_steps, num_layers=2, init_scale=0.1, dropout=None): - super(SimpleLSTMRNN, self).__init__() + super(SimpleLSTMRNN, self).__init__(name_scope) self._hidden_size = hidden_size self._num_layers = num_layers self._init_scale = init_scale self._dropout = dropout self._input = None self._num_steps = num_steps + self.cell_array = [] + self.hidden_array = [] def _build_once(self, input_embedding, init_hidden=None, init_cell=None): self.weight_1_arr = [] self.weight_2_arr = [] self.bias_arr = [] - self.hidden_array = [] - self.cell_array = [] self.mask_array = [] for i in range(self._num_layers): - weight_1 = fluid.layers.create_parameter( + weight_1 = self.create_parameter( + attr=fluid.ParamAttr( + initializer=fluid.initializer.UniformInitializer( + low=-self._init_scale, high=self._init_scale)), shape=[self._hidden_size * 2, self._hidden_size * 4], dtype="float32", - name="fc_weight1_" + str(i), default_initializer=fluid.initializer.UniformInitializer( low=-self._init_scale, high=self._init_scale)) self.weight_1_arr.append(weight_1) - bias_1 = fluid.layers.create_parameter( - [self._hidden_size * 4], + bias_1 = self.create_parameter( + attr=fluid.ParamAttr( + initializer=fluid.initializer.UniformInitializer( + low=-self._init_scale, high=self._init_scale)), + shape=[self._hidden_size * 4], dtype="float32", - name="fc_bias1_" + str(i), default_initializer=fluid.initializer.Constant(0.0)) self.bias_arr.append(bias_1) + def forward(self, input_embedding, init_hidden=None, init_cell=None): + self.cell_array = [] + self.hidden_array = [] + + for i in range(self._num_layers): pre_hidden = fluid.layers.slice( init_hidden, axes=[0], starts=[i], ends=[i + 1]) pre_cell = fluid.layers.slice( @@ -75,17 +85,6 @@ class SimpleLSTMRNN(fluid.imperative.Layer): self.hidden_array.append(pre_hidden) self.cell_array.append(pre_cell) - def parameters(self): - parameters = list() - for param in self.weight_1_arr: - parameters.append(param) - for param in self.weight_2_arr: - parameters.append(param) - for bias in self.bias_arr: - parameters.append(bias) - return parameters - - def forward(self, input_embedding, init_hidden=None, init_cell=None): res = [] for index in range(self._num_steps): self._input = fluid.layers.slice( @@ -134,13 +133,14 @@ class SimpleLSTMRNN(fluid.imperative.Layer): class PtbModel(fluid.imperative.Layer): def __init__(self, + name_scope, hidden_size, vocab_size, num_layers=2, num_steps=20, init_scale=0.1, dropout=None): - super(PtbModel, self).__init__() + super(PtbModel, self).__init__(name_scope) self.hidden_size = hidden_size self.vocab_size = vocab_size self.init_scale = init_scale @@ -148,12 +148,14 @@ class PtbModel(fluid.imperative.Layer): self.num_steps = num_steps self.dropout = dropout self.simple_lstm_rnn = SimpleLSTMRNN( + self.full_name(), hidden_size, num_steps, num_layers=num_layers, init_scale=init_scale, dropout=dropout) self.embedding = Embedding( + self.full_name(), size=[vocab_size, hidden_size], dtype='float32', is_sparse=False, @@ -161,30 +163,23 @@ class PtbModel(fluid.imperative.Layer): name='embedding_para', initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale))) - self.softmax_weight = fluid.layers.create_parameter( - [self.hidden_size, self.vocab_size], + self.softmax_weight = self.create_parameter( + attr=fluid.ParamAttr(), + shape=[self.hidden_size, self.vocab_size], dtype="float32", - name="softmax_weight", default_initializer=fluid.initializer.UniformInitializer( low=-self.init_scale, high=self.init_scale)) - self.softmax_bias = fluid.layers.create_parameter( - [self.vocab_size], + self.softmax_bias = self.create_parameter( + attr=fluid.ParamAttr(), + shape=[self.vocab_size], dtype="float32", - name='softmax_bias', default_initializer=fluid.initializer.UniformInitializer( low=-self.init_scale, high=self.init_scale)) def _build_once(self, input, label, init_hidden, init_cell): pass - def parameters(self): - parameters = self.simple_lstm_rnn.parameters() + [ - self.softmax_weight, self.softmax_bias - ] + self.embedding.parameters() - return parameters - def forward(self, input, label, init_hidden, init_cell): - init_h = fluid.layers.reshape( init_hidden, shape=[self.num_layers, -1, self.hidden_size]) @@ -234,6 +229,7 @@ class TestImperativePtbRnn(unittest.TestCase): fluid.default_main_program().random_seed = seed # TODO: marsyang1993 Change seed to ptb_model = PtbModel( + "ptb_model", hidden_size=hidden_size, vocab_size=vocab_size, num_layers=num_layers, @@ -246,7 +242,9 @@ class TestImperativePtbRnn(unittest.TestCase): dy_loss = None last_hidden = None last_cell = None - for i in range(2): + batch_num = 50 + + for i in range(batch_num): x_data = np.arange(12).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64') x_data = x_data.reshape((-1, num_steps, 1)) @@ -272,8 +270,8 @@ class TestImperativePtbRnn(unittest.TestCase): with new_program_scope(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed - # TODO: marsyang1993 Change seed to ptb_model = PtbModel( + "ptb_model", hidden_size=hidden_size, vocab_size=vocab_size, num_layers=num_layers, @@ -305,7 +303,7 @@ class TestImperativePtbRnn(unittest.TestCase): static_loss_value = None static_last_cell_value = None static_last_hidden_value = None - for i in range(2): + for i in range(batch_num): x_data = np.arange(12).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64') x_data = x_data.reshape((-1, num_steps, 1)) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py index c27fd0b8024a8fa3310a62de34299fb621e2902f..94ac3933151ac612ea9d308f0e28c73f0c067abf 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py @@ -21,7 +21,6 @@ import paddle import paddle.fluid as fluid from paddle.fluid import core from paddle.fluid.layer_helper import LayerHelper -from paddle.fluid.optimizer import SGDOptimizer from paddle.fluid.imperative.nn import Conv2D, Pool2D, BatchNorm, FC from paddle.fluid.imperative.base import to_variable from test_imperative_base import new_program_scope @@ -71,15 +70,17 @@ def optimizer_setting(params): class ConvBNLayer(fluid.imperative.Layer): def __init__(self, + name_scope, num_channels, num_filters, filter_size, stride=1, groups=1, act=None): - super(ConvBNLayer, self).__init__() + super(ConvBNLayer, self).__init__(name_scope) self._conv = Conv2D( + self.full_name(), num_channels=num_channels, num_filters=num_filters, filter_size=filter_size, @@ -89,7 +90,7 @@ class ConvBNLayer(fluid.imperative.Layer): act=None, bias_attr=None) - self._batch_norm = BatchNorm(num_filters, act=act) + self._batch_norm = BatchNorm(self.full_name(), num_filters, act=act) def forward(self, inputs): y = self._conv(inputs) @@ -99,21 +100,29 @@ class ConvBNLayer(fluid.imperative.Layer): class BottleneckBlock(fluid.imperative.Layer): - def __init__(self, num_channels, num_filters, stride, shortcut=True): - super(BottleneckBlock, self).__init__() + def __init__(self, + name_scope, + num_channels, + num_filters, + stride, + shortcut=True): + super(BottleneckBlock, self).__init__(name_scope) self.conv0 = ConvBNLayer( + self.full_name(), num_channels=num_channels, num_filters=num_filters, filter_size=1, act='relu') self.conv1 = ConvBNLayer( + self.full_name(), num_channels=num_filters, num_filters=num_filters, filter_size=3, stride=stride, act='relu') self.conv2 = ConvBNLayer( + self.full_name(), num_channels=num_filters, num_filters=num_filters * 4, filter_size=1, @@ -121,6 +130,7 @@ class BottleneckBlock(fluid.imperative.Layer): if not shortcut: self.short = ConvBNLayer( + self.full_name(), num_channels=num_channels, num_filters=num_filters * 4, filter_size=1, @@ -142,13 +152,13 @@ class BottleneckBlock(fluid.imperative.Layer): y = fluid.layers.elementwise_add(x=short, y=conv2) - layer_helper = LayerHelper('elementwise_add_activation', act='relu') + layer_helper = LayerHelper(self.full_name(), act='relu') return layer_helper.append_activation(y) class ResNet(fluid.imperative.Layer): - def __init__(self, layers=50, class_dim=102): - super(ResNet, self).__init__() + def __init__(self, name_scope, layers=50, class_dim=102): + super(ResNet, self).__init__(name_scope) self.layers = layers supported_layers = [50, 101, 152] @@ -164,31 +174,44 @@ class ResNet(fluid.imperative.Layer): num_filters = [64, 128, 256, 512] self.conv = ConvBNLayer( - num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu') + self.full_name(), + num_channels=3, + num_filters=64, + filter_size=7, + stride=2, + act='relu') self.pool2d_max = Pool2D( - pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + self.full_name(), + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') self.bottleneck_block_list = [] num_channels = 64 for block in range(len(depth)): shortcut = False for i in range(depth[block]): - bottleneck_block = BottleneckBlock( - num_channels=num_channels, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut) + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + self.full_name(), + num_channels=num_channels, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut)) num_channels = bottleneck_block._num_channels_out self.bottleneck_block_list.append(bottleneck_block) shortcut = True self.pool2d_avg = Pool2D( - pool_size=7, pool_type='avg', global_pooling=True) + self.full_name(), pool_size=7, pool_type='avg', global_pooling=True) import math stdv = 1.0 / math.sqrt(2048 * 1.0) - self.out = FC(size=class_dim, + self.out = FC(self.full_name(), + size=class_dim, act='softmax', param_attr=fluid.param_attr.ParamAttr( initializer=fluid.initializer.Uniform(-stdv, stdv))) @@ -208,12 +231,12 @@ class TestImperativeResnet(unittest.TestCase): seed = 90 batch_size = train_parameters["batch_size"] - batch_num = 1 + batch_num = 20 with fluid.imperative.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed - resnet = ResNet() + resnet = ResNet("resnet") optimizer = optimizer_setting(train_parameters) np.random.seed(seed) import random @@ -223,8 +246,7 @@ class TestImperativeResnet(unittest.TestCase): batch_size=batch_size) dy_param_init_value = {} - for param in fluid.default_main_program().global_block( - ).all_parameters(): + for param in resnet.parameters(): dy_param_init_value[param.name] = param._numpy() for batch_id, data in enumerate(train_reader()): @@ -247,16 +269,14 @@ class TestImperativeResnet(unittest.TestCase): dy_out = avg_loss._numpy() if batch_id == 0: - for param in fluid.default_main_program().global_block( - ).all_parameters(): + for param in resnet.parameters(): if param.name not in dy_param_init_value: dy_param_init_value[param.name] = param._numpy() avg_loss._backward() dy_grad_value = {} - for param in fluid.default_main_program().global_block( - ).all_parameters(): + for param in resnet.parameters(): if not param.stop_gradient: np_array = np.array(param._ivar._grad_ivar().value() .get_tensor()) @@ -267,8 +287,7 @@ class TestImperativeResnet(unittest.TestCase): resnet.clear_gradients() dy_param_value = {} - for param in fluid.default_main_program().global_block( - ).all_parameters(): + for param in resnet.parameters(): dy_param_value[param.name] = param._numpy() with new_program_scope(): @@ -278,7 +297,7 @@ class TestImperativeResnet(unittest.TestCase): exe = fluid.Executor(fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) - resnet = ResNet() + resnet = ResNet("resnet") optimizer = optimizer_setting(train_parameters) np.random.seed(seed) @@ -300,11 +319,9 @@ class TestImperativeResnet(unittest.TestCase): static_param_init_value = {} static_param_name_list = [] static_grad_name_list = [] - for param in fluid.default_startup_program().global_block( - ).all_parameters(): + for param in resnet.parameters(): static_param_name_list.append(param.name) - for param in fluid.default_main_program().global_block( - ).all_parameters(): + for param in resnet.parameters(): if not param.stop_gradient: static_grad_name_list.append(param.name + core.grad_var_suffix()) @@ -349,6 +366,7 @@ class TestImperativeResnet(unittest.TestCase): self.assertTrue(np.allclose(static_out, dy_out)) self.assertEqual(len(dy_param_init_value), len(static_param_init_value)) + for key, value in six.iteritems(static_param_init_value): self.assertTrue(np.allclose(value, dy_param_init_value[key])) self.assertTrue(np.isfinite(value.all())) diff --git a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_ifelse_op.py b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_ifelse_op.py new file mode 100644 index 0000000000000000000000000000000000000000..b1fe2b40b924dd46c4e518153e0edec4fb5f0a06 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_ifelse_op.py @@ -0,0 +1,123 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# nlp model stack of op operate on lod. It's a classical test case in optimize pass. + +from __future__ import print_function + +import numpy as np + +import paddle +import paddle.fluid as fluid +import paddle.fluid.layers as layers + +import unittest +import paddle.fluid.core as core + +from paddle.fluid import compiler, Program, program_guard +from paddle.fluid.executor import Executor +from paddle.fluid.backward import append_backward +from paddle.fluid.optimizer import MomentumOptimizer +from ir_memory_optimize_net_base import TestIrMemOptBase + + +class TestIrMemoryOptimizeIfElseOp(unittest.TestCase): + def check_network_convergence(self, use_cuda=True, py_opt=False, + iter_num=5): + prog = Program() + startup_prog = Program() + prog.random_seed = 100 + startup_prog.random_seed = 100 + with program_guard(prog, startup_prog): + image = layers.data(name='x', shape=[784], dtype='float32') + + label = layers.data(name='y', shape=[1], dtype='int64') + + limit = layers.fill_constant(shape=[1], dtype='int64', value=5) + cond = layers.less_than(x=label, y=limit) + ie = layers.IfElse(cond) + + with ie.true_block(): + true_image = ie.input(image) + hidden = layers.fc(input=true_image, size=100, act='tanh') + prob = layers.fc(input=hidden, size=10, act='softmax') + ie.output(prob) + + with ie.false_block(): + false_image = ie.input(image) + hidden = layers.fc(input=false_image, size=200, act='tanh') + prob = layers.fc(input=hidden, size=10, act='softmax') + ie.output(prob) + + prob = ie() + loss = layers.cross_entropy(input=prob[0], label=label) + avg_loss = layers.mean(loss) + + optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) + optimizer.minimize(avg_loss, startup_prog) + train_reader = paddle.batch( + paddle.dataset.mnist.train(), batch_size=200) + + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + exe = Executor(place) + + exec_strategy = fluid.ExecutionStrategy() + exec_strategy.use_cuda = use_cuda + + if py_opt: + fluid.memory_optimize(fluid.default_main_program()) + train_cp = compiler.CompiledProgram(fluid.default_main_program()) + train_cp = train_cp.with_data_parallel( + loss_name=avg_loss.name, exec_strategy=exec_strategy) + fetch_list = [avg_loss.name] + + exe.run(startup_prog) + PASS_NUM = 100 + loop = 0 + ret = [] + for pass_id in range(PASS_NUM): + for data in train_reader(): + x_data = np.array([x[0] for x in data]).astype("float32") + y_data = np.array([x[1] for x in data]).astype("int64") + y_data = y_data.reshape((y_data.shape[0], 1)) + + outs = exe.run(train_cp, + feed={'x': x_data, + 'y': y_data}, + fetch_list=[avg_loss]) + + loop += 1 + ret.append(outs[0]) + if iter_num == loop: + return ret + return ret + + def test_ifelse(self): + ret1 = self.check_network_convergence(False, True) + print(ret1) + ret2 = self.check_network_convergence(False, False) + print(ret2) + self.assertTrue(np.allclose(ret1, ret2)) + + if fluid.core.is_compiled_with_cuda(): + ret1 = self.check_network_convergence(True, True) + print(ret1) + ret2 = self.check_network_convergence(True, False) + print(ret2) + self.assertTrue(np.allclose(ret1, ret2)) + #self.assertEqual(ret1, ret2) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_nlp.py b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_nlp.py new file mode 100644 index 0000000000000000000000000000000000000000..30b6d6106cdc46cfed201e5bb44a0c80d7e8ca3d --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_nlp.py @@ -0,0 +1,55 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# nlp model stack of op operate on lod. It's a classical test case in optimize pass. + +from __future__ import print_function + +import paddle.fluid as fluid +import unittest +from ir_memory_optimize_net_base import TestIrMemOptBase + + +def lstm_net(data, + label, + dict_dim, + emb_dim=128, + hid_dim=128, + hid_dim2=96, + class_dim=2, + emb_lr=30.0): + emb = fluid.layers.embedding( + input=data, + size=[dict_dim, emb_dim], + param_attr=fluid.ParamAttr(learning_rate=emb_lr)) + fc0 = fluid.layers.fc(input=emb, size=hid_dim * 4) + + lstm_h, c = fluid.layers.dynamic_lstm( + input=fc0, size=hid_dim * 4, is_reverse=False) + lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max') + lstm_max_tanh = fluid.layers.tanh(lstm_max) + fc1 = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh') + prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax') + cost = fluid.layers.cross_entropy(input=prediction, label=label) + avg_cost = fluid.layers.mean(x=cost) + return avg_cost + + +class TestIrMemOptRNN(TestIrMemOptBase): + def setUp(self): + self.network = lstm_net + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_transformer.py b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..50d998990f9bbba0d35241f5e53d05675ca08c28 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_transformer.py @@ -0,0 +1,73 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import unittest +from timeit import default_timer as timer +import paddle +import paddle.fluid as fluid +import paddle.fluid.core as core +import paddle.dataset.wmt16 as wmt16 + +os.environ['FLAGS_eager_delete_tensor_gb'] = "0.0" +os.environ[ + 'RECORDIO_FILENAME'] = '/tmp/ir_memory_optimize_transformer.wmt16.recordio' + +from test_parallel_executor_transformer import transformer, ModelHyperParams, transformer_model, transformer, prepare_batch_input +from parallel_executor_test_base import TestParallelExecutorBase + + +# NOTE(dzhwinter): test diferent strategy colisions. +# open the eager delete tensor strategy by default. +class TestTransformerWithIR(TestParallelExecutorBase): + @classmethod + def setUpClass(cls): + os.environ['CPU_NUM'] = str(4) + reader = paddle.batch( + wmt16.train(ModelHyperParams.src_vocab_size, + ModelHyperParams.trg_vocab_size), + batch_size=transformer_model.batch_size) + + with fluid.recordio_writer.create_recordio_writer( + os.environ.get("RECORDIO_FILENAME")) as writer: + for batch in reader(): + for tensor in prepare_batch_input( + batch, ModelHyperParams.src_pad_idx, + ModelHyperParams.trg_pad_idx, ModelHyperParams.n_head): + t = fluid.LoDTensor() + t.set(tensor, fluid.CPUPlace()) + writer.append_tensor(t) + writer.complete_append_tensor() + + def test_main(self): + if core.is_compiled_with_cuda(): + # check python transpiler + self.check_network_convergence( + transformer, + use_cuda=True, + memory_opt=True, + use_ir_memory_optimize=False, + iter=2) + # check IR memory optimize + self.check_network_convergence( + transformer, + use_cuda=True, + memory_opt=False, + use_ir_memory_optimize=True, + iter=2) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index e7bc1601a54c8615e0e787d74145aa4987b6cb88..ff49c1be979a2076952963ec54302fb68361eedf 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -374,6 +374,17 @@ class TestBook(unittest.TestCase): self.assertIsNotNone(output) print(str(program)) + def test_sampled_softmax_with_cross_entropy(self): + program = Program() + with program_guard(program): + logits = layers.data(name='Logits', shape=[256], dtype='float64') + label = layers.data(name='Label', shape=[1], dtype='int64') + num_samples = 25 + output = layers.sampled_softmax_with_cross_entropy(logits, label, + num_samples) + self.assertIsNotNone(output) + print(str(program)) + @decorators.prog_scope() def test_nce(self): window_size = 5 @@ -1024,6 +1035,19 @@ class TestBook(unittest.TestCase): print(str(program)) + def test_spectral_norm(self): + program = Program() + with program_guard(program): + weight = layers.data( + name='weight', + shape=[2, 3, 32, 32], + dtype="float32", + append_batch_size=False) + out = layers.spectral_norm(weight, dim=1, power_iters=1) + self.assertIsNotNone(out) + + print(str(program)) + def test_shuffle_channel(self): program = Program() with program_guard(program): diff --git a/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py b/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py index 0d3e6d73e0149fe633b8f1de9041068c2e3bb293..5212d97dfbc16e463e5f68456a3d735ac6679ae1 100644 --- a/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py +++ b/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py @@ -82,6 +82,13 @@ def piecewise_decay(global_step, boundaries, values): return values[len(values) - 1] +def cosine_decay(global_step, learning_rate, step_each_epoch, epochs): + cur_epoch = math.floor(global_step / step_each_epoch) + decayed_lr = learning_rate * 0.5 * ( + math.cos(cur_epoch * math.pi / epochs) + 1) + return decayed_lr + + class TestLearningRateDecay(unittest.TestCase): def check_decay(self, python_decay_fn, fluid_decay_fn, kwargs): places = [fluid.CPUPlace()] @@ -149,6 +156,11 @@ class TestLearningRateDecay(unittest.TestCase): "boundaries": [3, 6, 9], "values": [0.1, 0.2, 0.3, 0.4] }), + (cosine_decay, layers.cosine_decay, { + "learning_rate": 0.1, + "step_each_epoch": 100, + "epochs": 120 + }), ] for py_decay_fn, fluid_decay_fn, kwargs in decay_fns: diff --git a/python/paddle/fluid/tests/unittests/test_lstmp_op.py b/python/paddle/fluid/tests/unittests/test_lstmp_op.py index 9c3ec45515ffe0a07541fd9cfb7e92b079264071..0645cfedb8089f5618c54672cac91343e5dee285 100644 --- a/python/paddle/fluid/tests/unittests/test_lstmp_op.py +++ b/python/paddle/fluid/tests/unittests/test_lstmp_op.py @@ -36,12 +36,14 @@ def lstmp( w_b=None, # 1 x 4D w_c=None, # 1 x 3D is_reverse=False, + proj_clip=0.0, + cell_clip=0.0, act_gate=None, act_cell=None, act_cand=None, act_proj=None): - def _step(x, w_r, w_rh, w_c, r_pre, c_pre, act_gate, act_cell, act_cand, - act_proj): + def _step(x, w_r, w_rh, w_c, r_pre, c_pre, proj_clip, cell_clip, act_gate, + act_cell, act_cand, act_proj): g = np.dot(r_pre, w_r) # 1 x 4D g = g + x g = np.reshape(g, (1, g.size)) @@ -55,6 +57,17 @@ def lstmp( g_f = act_gate(g_f + w_fc * c_pre) # 1 x D c = g_f * c_pre + g_i * act_cand(c) # 1 x D + def array_clip(a, clip): + size = np.prod(a.shape) + new_a = np.reshape(a, (size)) + for i in range(size): + new_a[i] = max(new_a[i], -1.0 * clip) + new_a[i] = min(new_a[i], clip) + new_a = np.reshape(new_a, a.shape) + return new_a + + if cell_clip > 0.0: + c = array_clip(c, cell_clip) if w_c is None: g_o = act_gate(g_o) # 1 x D else: @@ -64,6 +77,8 @@ def lstmp( # projection r = np.dot(h, w_rh) r = act_proj(r) + if proj_clip > 0.0: + r = array_clip(r, proj_clip) return r, c def _reverse(x, offset): @@ -87,13 +102,13 @@ def lstmp( # compute one sequence seq_len = lod[0][i] x = input[offset[i]:offset[i + 1], :] - r_pre = np.dot(h0[i], w_rh) # 1 x P - r_pre = act_proj(r_pre) + r_pre = h0[i] c_pre = c0[i] # 1 x D for j in range(seq_len): # compute one step - r_pre, c_pre = _step(x[j], w_r, w_rh, w_c, r_pre, c_pre, act_gate, - act_cell, act_cand, act_proj) + r_pre, c_pre = _step(x[j], w_r, w_rh, w_c, r_pre, c_pre, proj_clip, + cell_clip, act_gate, act_cell, act_cand, + act_proj) projection.append(r_pre.flatten()) cell.append(c_pre.flatten()) @@ -123,13 +138,12 @@ class TestLstmpOp(LstmTest.TestLstmOp): T = sum(self.lod[0]) N = len(self.lod[0]) - x = np.random.normal(size=(T, 4 * self.D)).astype('float64') if self.has_initial_state: - h0 = np.random.normal(size=(N, self.D)).astype('float64') + h0 = np.random.normal(size=(N, self.P)).astype('float64') c0 = np.random.normal(size=(N, self.D)).astype('float64') else: - h0 = np.zeros((N, self.D)).astype('float64') + h0 = np.zeros((N, self.P)).astype('float64') c0 = np.zeros((N, self.D)).astype('float64') w = np.random.normal(size=(self.P, 4 * self.D)).astype('float64') if self.use_peepholes: @@ -140,9 +154,12 @@ class TestLstmpOp(LstmTest.TestLstmOp): w_b = b[:, 0:4 * self.D] w_c = b[:, 4 * self.D:] if self.use_peepholes else None w_rh = np.random.normal(size=(self.D, self.P)).astype('float64') + proj_clip = 0.1 + cell_clip = 0.1 r, c = lstmp(x, self.lod, h0, c0, w, w_rh, w_b, w_c, self.is_reverse, - ACTIVATION[self.act_gate], ACTIVATION[self.act_cell], - ACTIVATION[self.act_cand], ACTIVATION[self.act_proj]) + proj_clip, cell_clip, ACTIVATION[self.act_gate], + ACTIVATION[self.act_cell], ACTIVATION[self.act_cand], + ACTIVATION[self.act_proj]) self.inputs = {'Input': (x, self.lod), 'Weight': w, 'ProjWeight': w_rh} @@ -159,6 +176,8 @@ class TestLstmpOp(LstmTest.TestLstmOp): self.attrs = { 'use_peepholes': self.use_peepholes, 'is_reverse': self.is_reverse, + 'proj_clip': proj_clip, + 'cell_clip': cell_clip, 'gate_activation': self.act_gate, 'cell_activation': self.act_cell, 'candidate_activation': self.act_cand, @@ -171,14 +190,14 @@ class TestLstmpOp(LstmTest.TestLstmOp): def test_check_grad(self): # TODO(qingqing) remove folowing lines after the check_grad is refined. N = len(self.lod[0]) - self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64') self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64') self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64') self.outputs['BatchCellPreAct'] = np.zeros( (N, self.D)).astype('float64') self.check_grad( ['Input', 'Weight', 'ProjWeight', 'Bias'], ['Projection'], - max_relative_error=1e-2) + max_relative_error=1e-2, + numeric_grad_delta=0.0000005) class TestLstmpOpHasInitial(TestLstmpOp): @@ -188,7 +207,6 @@ class TestLstmpOpHasInitial(TestLstmpOp): def test_check_grad(self): # TODO(qingqing) remove folowing lines after the check_grad is refined. N = len(self.lod[0]) - self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64') self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64') self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64') self.outputs['BatchCellPreAct'] = np.zeros( @@ -196,11 +214,11 @@ class TestLstmpOpHasInitial(TestLstmpOp): self.check_grad( ['Input', 'Weight', 'ProjWeight', 'Bias', 'H0', 'C0'], ['Projection'], + numeric_grad_delta=0.0000005, max_relative_error=1e-2) def test_check_grad_ingore_bias(self): N = len(self.lod[0]) - self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64') self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64') self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64') self.outputs['BatchCellPreAct'] = np.zeros( @@ -208,11 +226,11 @@ class TestLstmpOpHasInitial(TestLstmpOp): self.check_grad( ['Input', 'ProjWeight', 'Weight'], ['Projection'], max_relative_error=1e-2, + numeric_grad_delta=0.0000005, no_grad_set=set('Bias')) def test_check_grad_ingore_weight(self): N = len(self.lod[0]) - self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64') self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64') self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64') self.outputs['BatchCellPreAct'] = np.zeros( @@ -220,11 +238,11 @@ class TestLstmpOpHasInitial(TestLstmpOp): self.check_grad( ['Input', 'ProjWeight', 'Bias'], ['Projection'], max_relative_error=1e-2, + numeric_grad_delta=0.0000005, no_grad_set=set('Weight')) def test_check_grad_ingore_proj_weight(self): N = len(self.lod[0]) - self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64') self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64') self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64') self.outputs['BatchCellPreAct'] = np.zeros( @@ -232,11 +250,11 @@ class TestLstmpOpHasInitial(TestLstmpOp): self.check_grad( ['Input', 'Weight', 'Bias'], ['Projection'], max_relative_error=1e-2, + numeric_grad_delta=0.0000005, no_grad_set=set('ProjWeight')) def test_check_grad_ingore_input(self): N = len(self.lod[0]) - self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64') self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64') self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64') self.outputs['BatchCellPreAct'] = np.zeros( @@ -244,11 +262,11 @@ class TestLstmpOpHasInitial(TestLstmpOp): self.check_grad( ['Weight', 'ProjWeight', 'Bias'], ['Projection'], max_relative_error=1e-2, + numeric_grad_delta=0.0000005, no_grad_set=set('Input')) def test_check_grad_ingore_h0(self): N = len(self.lod[0]) - self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64') self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64') self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64') self.outputs['BatchCellPreAct'] = np.zeros( @@ -256,11 +274,11 @@ class TestLstmpOpHasInitial(TestLstmpOp): self.check_grad( ['Input', 'Weight', 'ProjWeight', 'Bias', 'C0'], ['Projection'], max_relative_error=1e-2, + numeric_grad_delta=0.0000005, no_grad_set=set('H0')) def test_check_grad_ingore_c0(self): N = len(self.lod[0]) - self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64') self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64') self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64') self.outputs['BatchCellPreAct'] = np.zeros( @@ -268,6 +286,7 @@ class TestLstmpOpHasInitial(TestLstmpOp): self.check_grad( ['Input', 'Weight', 'ProjWeight', 'Bias', 'H0'], ['Projection'], max_relative_error=1e-2, + numeric_grad_delta=0.0000005, no_grad_set=set('C0')) diff --git a/python/paddle/fluid/tests/unittests/test_multiclass_nms_op.py b/python/paddle/fluid/tests/unittests/test_multiclass_nms_op.py index 8fc391a1ff2529460b038979c0c7d0a9d905a7e0..69e060341ed9dbb711f13f860e047e19f741b336 100644 --- a/python/paddle/fluid/tests/unittests/test_multiclass_nms_op.py +++ b/python/paddle/fluid/tests/unittests/test_multiclass_nms_op.py @@ -173,13 +173,16 @@ def lod_multiclass_nms(boxes, scores, background, score_threshold, normalized, shared=False) if nmsed_num == 0: - #lod.append(1) continue lod.append(nmsed_num) + tmp_det_out = [] for c, indices in nmsed_outs.items(): for idx in indices: xmin, ymin, xmax, ymax = box[idx, c, :] - det_outs.append([c, score[idx][c], xmin, ymin, xmax, ymax]) + tmp_det_out.append([c, score[idx][c], xmin, ymin, xmax, ymax]) + sorted_det_out = sorted( + tmp_det_out, key=lambda tup: tup[0], reverse=False) + det_outs.extend(sorted_det_out) if len(lod) == 0: lod.append(1) diff --git a/python/paddle/fluid/tests/unittests/test_optimizer.py b/python/paddle/fluid/tests/unittests/test_optimizer.py index 34c9b7e006950f1c10fb265ce903b1e836281de7..95ddc135b3da5bc144f64f20dab5dfd2b5bd3215 100644 --- a/python/paddle/fluid/tests/unittests/test_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_optimizer.py @@ -274,7 +274,7 @@ class TestAdagradOptimizer(unittest.TestCase): # Check init_program init_ops = init_program.global_block().ops - self.assertEqual(len(init_ops), 2) + self.assertEqual(len(init_ops), 3) self.assertEqual(init_ops[0].type, "fill_constant") self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate) self.assertEqual(init_ops[1].type, "fill_constant") diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py index e0eba2147c6288e5b2f30373f610db78493d5e03..bda8b666dcde22b0e4bacdb5db252267f4c7e34b 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py @@ -59,8 +59,12 @@ class TestFetchAndFeed(unittest.TestCase): exe = fluid.Executor(place) exe.run(startup) + #FIXME force disable enable_inplace and memory_optimize to pass the unittest + build_strategy = fluid.BuildStrategy() + build_strategy.enable_inplace = False + build_strategy.memory_optimize = False train_cp = compiler.CompiledProgram(main_program).with_data_parallel( - loss_name=loss.name) + loss_name=loss.name, build_strategy=build_strategy) run_parallel_exe(train_cp, exe, use_cuda, data, label, loss) diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_pg.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_pg.py new file mode 100644 index 0000000000000000000000000000000000000000..041c56fce11e6f6abb0a941a9e9c9ad1cb60ab42 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_pg.py @@ -0,0 +1,107 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest + +import numpy as np +import os +os.environ['FLAGS_enable_parallel_graph'] = str(1) +import paddle.fluid.core as core +import os +import paddle.fluid as fluid +from parallel_executor_test_base import TestParallelExecutorBase + + +def simple_fc_net(use_feed): + img = fluid.layers.data(name='image', shape=[784], dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + hidden = img + for _ in range(4): + hidden = fluid.layers.fc( + hidden, + size=200, + act='tanh', + bias_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=1.0))) + prediction = fluid.layers.fc(hidden, size=10, act='softmax') + loss = fluid.layers.cross_entropy(input=prediction, label=label) + loss = fluid.layers.mean(loss) + return loss + + +class TestMNIST(TestParallelExecutorBase): + @classmethod + def setUpClass(cls): + os.environ['CPU_NUM'] = str(4) + + def _init_data(self): + np.random.seed(5) + img = np.random.random(size=[32, 784]).astype(np.float32) + label = np.ones(shape=[32, 1], dtype='int64') + return img, label + + # simple_fc + def check_simple_fc_convergence(self, use_cuda, use_reduce=False): + if use_cuda and not core.is_compiled_with_cuda(): + return + + img, label = self._init_data() + + self.check_network_convergence( + simple_fc_net, + feed_dict={"image": img, + "label": label}, + use_cuda=use_cuda, + use_reduce=use_reduce) + + def test_simple_fc(self): + # use_cuda + self.check_simple_fc_convergence(True) + + def check_simple_fc_parallel_accuracy(self, use_cuda): + if use_cuda and not core.is_compiled_with_cuda(): + return + + img, label = self._init_data() + + single_first_loss, single_last_loss = self.check_network_convergence( + method=simple_fc_net, + seed=1, + feed_dict={"image": img, + "label": label}, + use_cuda=use_cuda, + use_parallel_executor=False) + parallel_first_loss, parallel_last_loss = self.check_network_convergence( + method=simple_fc_net, + seed=1, + feed_dict={"image": img, + "label": label}, + use_cuda=use_cuda, + use_parallel_executor=True) + + self.assertAlmostEquals( + np.mean(parallel_first_loss), + single_first_loss, + delta=1e-6, ) + self.assertAlmostEquals( + np.mean(parallel_last_loss), single_last_loss, delta=1e-6) + + def test_simple_fc_parallel_accuracy(self): + self.check_simple_fc_parallel_accuracy(True) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_pass_builder.py b/python/paddle/fluid/tests/unittests/test_pass_builder.py index 7e1c2572f08598b8b600517e4a82b48ca71cc20d..a96cb624f52303f05e40f572ccda858d1e329941 100644 --- a/python/paddle/fluid/tests/unittests/test_pass_builder.py +++ b/python/paddle/fluid/tests/unittests/test_pass_builder.py @@ -96,6 +96,9 @@ class TestPassBuilder(unittest.TestCase): build_strategy = fluid.BuildStrategy() self.assertFalse(build_strategy.fuse_elewise_add_act_ops) build_strategy.fuse_elewise_add_act_ops = True + #FIXME: currently fuse_elewise_add_act_ops not compatible with below options + build_strategy.enable_inplace = False + build_strategy.memory_optimize = False pass_builder = build_strategy._finalize_strategy_and_create_passes() self.assertTrue("fuse_elewise_add_act_pass" in [p.type() for p in pass_builder.all_passes()]) diff --git a/python/paddle/fluid/tests/unittests/test_profiler.py b/python/paddle/fluid/tests/unittests/test_profiler.py index 7934164b84931f886967982ce0cb65c406bbf800..39d778b82a04f403bea030381ff220a68b1ff0ef 100644 --- a/python/paddle/fluid/tests/unittests/test_profiler.py +++ b/python/paddle/fluid/tests/unittests/test_profiler.py @@ -16,15 +16,19 @@ from __future__ import print_function import unittest import os +import tempfile import numpy as np import paddle.fluid as fluid import paddle.fluid.profiler as profiler import paddle.fluid.layers as layers import paddle.fluid.core as core +import paddle.fluid.proto.profiler.profiler_pb2 as profiler_pb2 class TestProfiler(unittest.TestCase): - def net_profiler(self, state, profile_path='/tmp/profile'): + def net_profiler(self, state, use_parallel_executor=False): + profile_path = os.path.join(tempfile.gettempdir(), "profile") + open(profile_path, "w").write("") startup_program = fluid.Program() main_program = fluid.Program() @@ -60,6 +64,11 @@ class TestProfiler(unittest.TestCase): place = fluid.CPUPlace() if state == 'CPU' else fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(startup_program) + if use_parallel_executor: + pe = fluid.ParallelExecutor( + state != 'CPU', + loss_name=avg_cost.name, + main_program=main_program) pass_acc_calculator = fluid.average.WeightedAverage() with profiler.profiler(state, 'total', profile_path) as prof: @@ -69,6 +78,9 @@ class TestProfiler(unittest.TestCase): x = np.random.random((32, 784)).astype("float32") y = np.random.randint(0, 10, (32, 1)).astype("int64") + if use_parallel_executor: + pe.run(feed={'x': x, 'y': y}, fetch_list=[avg_cost.name]) + continue outs = exe.run(main_program, feed={'x': x, 'y': y}, @@ -77,21 +89,37 @@ class TestProfiler(unittest.TestCase): b_size = np.array(outs[2]) pass_acc_calculator.add(value=acc, weight=b_size) pass_acc = pass_acc_calculator.eval() + data = open(profile_path, 'rb').read() + self.assertGreater(len(data), 0) + profile_pb = profiler_pb2.Profile() + profile_pb.ParseFromString(data) + self.assertGreater(len(profile_pb.events), 0) + for event in profile_pb.events: + if event.type == profiler_pb2.Event.GPUKernel: + if not event.detail_info and not event.name.startswith("MEM"): + raise Exception( + "Kernel %s missing event. Has this kernel been recorded by RecordEvent?" + % event.name) + elif event.type == profiler_pb2.Event.CPU and ( + event.name.startswith("Driver API") or + event.name.startswith("Runtime API")): + print("Warning: unregister", event.name) def test_cpu_profiler(self): self.net_profiler('CPU') + self.net_profiler('CPU', use_parallel_executor=True) @unittest.skipIf(not core.is_compiled_with_cuda(), "profiler is enabled only with GPU") def test_cuda_profiler(self): self.net_profiler('GPU') + self.net_profiler('GPU', use_parallel_executor=True) @unittest.skipIf(not core.is_compiled_with_cuda(), "profiler is enabled only with GPU") def test_all_profiler(self): - self.net_profiler('All', '/tmp/profile_out') - with open('/tmp/profile_out', 'rb') as f: - self.assertGreater(len(f.read()), 0) + self.net_profiler('All') + self.net_profiler('All', use_parallel_executor=True) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_py_func_op.py b/python/paddle/fluid/tests/unittests/test_py_func_op.py index 18207373acae45678a68d84bdf05776f5cffca43..05bef1a4762bf405ca810c61265404c57b77c184 100644 --- a/python/paddle/fluid/tests/unittests/test_py_func_op.py +++ b/python/paddle/fluid/tests/unittests/test_py_func_op.py @@ -142,6 +142,10 @@ def test_main(use_cuda, use_py_func_op, use_parallel_executor): exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) + #FIXME force use old memory optimzie strategy here to pass the unittest + #since open the new strategy will crash the unittest + fluid.memory_optimize(fluid.default_main_program()) + train_cp = compiler.CompiledProgram(fluid.default_main_program()) if use_parallel_executor: train_cp = train_cp.with_data_parallel(loss_name=loss.name) diff --git a/python/paddle/fluid/tests/unittests/test_sequence_erase_op.py b/python/paddle/fluid/tests/unittests/test_sequence_erase_op.py index 92cd5b0cbcd1ab56300158d26850969870e86f2b..b49249538bbf07f67136e04a11a42febfedecf81 100644 --- a/python/paddle/fluid/tests/unittests/test_sequence_erase_op.py +++ b/python/paddle/fluid/tests/unittests/test_sequence_erase_op.py @@ -49,6 +49,21 @@ class TestSequenceEraseOpInt32(OpTest): self.check_output() +class TestSequenceEraseOpInt32LoD2(OpTest): + def setUp(self): + self.op_type = "sequence_erase" + in_seq = np.random.randint(0, 10, (30, 1)).astype("int32") + lod = [[1, 3], [9, 4, 11, 6]] + tokens = [2, 3, 5] + out_seq, new_lod0 = sequence_erase(in_seq, lod[-1], tokens) + self.attrs = {'tokens': tokens} + self.inputs = {'X': (in_seq, lod)} + self.outputs = {'Out': (out_seq, lod[:-1] + [new_lod0])} + + def test_check_output(self): + self.check_output() + + class TestSequenceEraseOpInt64(OpTest): def setUp(self): self.op_type = "sequence_erase" diff --git a/python/paddle/fluid/tests/unittests/test_sgd_op.py b/python/paddle/fluid/tests/unittests/test_sgd_op.py index b46e4bfb86bd5dc9c74375693328f2506281be3e..162e6d1938c8174d342d8e4af1e4b6c424afc521 100644 --- a/python/paddle/fluid/tests/unittests/test_sgd_op.py +++ b/python/paddle/fluid/tests/unittests/test_sgd_op.py @@ -24,17 +24,28 @@ from op_test import OpTest class TestSGDOp(OpTest): def setUp(self): self.op_type = "sgd" - w = np.random.random((102, 105)).astype("float32") - g = np.random.random((102, 105)).astype("float32") + self.conf() + w = np.random.random((self.h, self.w)).astype("float32") + g = np.random.random((self.h, self.w)).astype("float32") lr = np.array([0.1]).astype("float32") self.inputs = {'Param': w, 'Grad': g, 'LearningRate': lr} self.outputs = {'ParamOut': w - lr * g} + def conf(self): + self.h = 102 + self.w = 105 + def test_check_output(self): self.check_output() +class TestSGDOpCase8X(TestSGDOp): + def conf(self): + self.h = 10 + self.w = 64 + + class TestSparseSGDOp(unittest.TestCase): def check_with_place(self, place): scope = core.Scope() @@ -42,12 +53,12 @@ class TestSparseSGDOp(unittest.TestCase): # create and initialize Grad Variable height = 10 rows = [0, 4, 7] - row_numel = 12 + self.conf() grad_selected_rows = scope.var('Grad').get_selected_rows() grad_selected_rows.set_height(height) grad_selected_rows.set_rows(rows) - np_array = np.ones((len(rows), row_numel)).astype("float32") + np_array = np.ones((len(rows), self.row_numel)).astype("float32") np_array[0, 0] = 2.0 np_array[2, 8] = 4.0 @@ -56,7 +67,7 @@ class TestSparseSGDOp(unittest.TestCase): # create and initialize Param Variable param = scope.var('Param').get_tensor() - param_array = np.full((height, row_numel), 5.0).astype("float32") + param_array = np.full((height, self.row_numel), 5.0).astype("float32") param.set(param_array, place) # create and initialize LeraningRate Variable @@ -98,6 +109,14 @@ class TestSparseSGDOp(unittest.TestCase): for place in places: self.check_with_place(place) + def conf(self): + self.row_numel = 12 + + +class TestSparseSGDOpCase8X(TestSparseSGDOp): + def conf(self): + self.row_numel = 16 + class TestSGDOpOptimizeSelectedRows(unittest.TestCase): def check_with_place(self, place): diff --git a/python/paddle/fluid/tests/unittests/test_softmax_op.py b/python/paddle/fluid/tests/unittests/test_softmax_op.py index 40c3135183a128cd9b7324ce27da798fa2d93afd..5c56de6779d238064f03a65b54f3c73a77119f60 100644 --- a/python/paddle/fluid/tests/unittests/test_softmax_op.py +++ b/python/paddle/fluid/tests/unittests/test_softmax_op.py @@ -144,15 +144,5 @@ class TestSoftmaxFP16CUDNNOp2(TestSoftmaxFP16CUDNNOp): return [2, 3, 4, 5] -class TestSoftmaxMKLDNNOp(TestSoftmaxOp): - def init_kernel_type(self): - self.use_mkldnn = True - - -class TestSoftmaxMKLDNNOp2(TestSoftmaxMKLDNNOp): - def get_x_shape(self): - return [2, 3, 4, 5] - - if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_spectral_norm_op.py b/python/paddle/fluid/tests/unittests/test_spectral_norm_op.py new file mode 100644 index 0000000000000000000000000000000000000000..e4e431bcce571798893ccc96c74fd9972b657f3e --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_spectral_norm_op.py @@ -0,0 +1,122 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import division + +import unittest +import numpy as np +from op_test import OpTest + +from paddle.fluid import core + + +def spectral_norm(weight, u, v, dim, power_iters, eps): + shape = weight.shape + weight_mat = weight.copy() + h = shape[dim] + w = np.prod(shape) // h + if dim != 0: + perm = [dim] + [d for d in range(len(shape)) if d != dim] + weight_mat = weight_mat.transpose(perm) + weight_mat = weight_mat.reshape((h, w)) + + u = u.reshape((h, 1)) + v = v.reshape((w, 1)) + for i in range(power_iters): + v = np.matmul(weight_mat.T, u) + v_norm = np.sqrt((v * v).sum()) + v = v / (v_norm + eps) + u = np.matmul(weight_mat, v) + u_norm = np.sqrt((u * u).sum()) + u = u / (u_norm + eps) + + sigma = (u * np.matmul(weight_mat, v)).sum() + return weight / sigma + + +class TestSpectralNormOpNoGrad(OpTest): + def setUp(self): + self.initTestCase() + self.op_type = 'spectral_norm' + weight = np.random.random(self.weight_shape).astype('float32') + u = np.random.normal(0., 1., self.u_shape).astype('float32') + v = np.random.normal(0., 1., self.v_shape).astype('float32') + + self.attrs = { + "dim": self.dim, + "power_iters": self.power_iters, + "eps": self.eps, + } + + self.inputs = { + "Weight": weight, + "U": u, + "V": v, + } + + output = spectral_norm(weight, u, v, self.dim, self.power_iters, + self.eps) + self.outputs = {"Out": output} + + def test_check_output(self): + self.check_output() + + def initTestCase(self): + self.weight_shape = (2, 3) + self.u_shape = (2, ) + self.v_shape = (3, ) + self.dim = 0 + self.power_iters = 5 + self.eps = 1e-12 + + +class TestSpectralNormOpNoGrad2(TestSpectralNormOpNoGrad): + def initTestCase(self): + self.weight_shape = (2, 3, 3, 3) + self.u_shape = (3, ) + self.v_shape = (18, ) + self.dim = 1 + self.power_iters = 10 + self.eps = 1e-12 + + +class TestSpectralNormOp(TestSpectralNormOpNoGrad): + def test_check_grad_ignore_uv(self): + self.check_grad( + ['Weight'], + 'Out', + no_grad_set=set(["U", "V"]), + max_relative_error=0.1) + + def initTestCase(self): + self.weight_shape = (2, 3) + self.u_shape = (2, ) + self.v_shape = (3, ) + self.dim = 0 + self.power_iters = 0 + self.eps = 1e-12 + + +class TestSpectralNormOp2(TestSpectralNormOp): + def initTestCase(self): + self.weight_shape = (2, 3, 3, 3) + self.u_shape = (3, ) + self.v_shape = (18, ) + self.dim = 1 + self.power_iters = 0 + self.eps = 1e-12 + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index a3293afbbd7cef8470c808e98ae88a05f2e492f4..eb54068650e8b3f4e64317778e2ad7c7aa7fe1b2 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -1020,7 +1020,11 @@ class DistributeTranspiler(object): skip_dim0 = 0 slice_vars = self.param_var_mapping[orig_var_name] - orig_dim1_flatten = reduce(lambda x, y: x * y, slice_vars[0].shape[1:]) + orig_dim1_flatten = 1 + + if len(slice_vars[0].shape) >= 2: + orig_dim1_flatten = reduce(lambda x, y: x * y, + slice_vars[0].shape[1:]) for slice_var in slice_vars[:block_idx]: skip_dim0 += slice_var.shape[0] diff --git a/python/paddle/fluid/transpiler/inference_transpiler.py b/python/paddle/fluid/transpiler/inference_transpiler.py index cc7f5ec90c26c87b7c5514c900e853be9e16d6eb..8a527e72fb9ac806254d2c055fc283c938cc55b4 100644 --- a/python/paddle/fluid/transpiler/inference_transpiler.py +++ b/python/paddle/fluid/transpiler/inference_transpiler.py @@ -15,6 +15,7 @@ from __future__ import print_function import os +import sys import numpy as np from .. import core from ..framework import Program @@ -50,6 +51,9 @@ class InferenceTranspiler(object): place (Place): inference place scope (Scope|None): inference Scope ''' + sys.stderr.write("InferenceTranspiler is deprecated since it's not " + "safe. Users should be " + "responsible for constructing the inference program\n") if not isinstance(program, Program): raise TypeError("program should be as Program type") if not isinstance(place, core.CPUPlace) and not isinstance( diff --git a/python/paddle/fluid/transpiler/memory_optimization_transpiler.py b/python/paddle/fluid/transpiler/memory_optimization_transpiler.py index 52c1aea288fa2bb7478ad14186367900c05f64e7..c434423bae76c2ebdd7bdeb164350d6ec66621c8 100755 --- a/python/paddle/fluid/transpiler/memory_optimization_transpiler.py +++ b/python/paddle/fluid/transpiler/memory_optimization_transpiler.py @@ -15,6 +15,7 @@ from __future__ import print_function import six +import sys from collections import defaultdict, MutableSet from .. import core from ... import compat as cpt @@ -355,6 +356,10 @@ class ControlFlowGraph(object): is_forward).dtype() cache_dtype = self._find_var(block_desc, cache_var, is_forward).dtype() + if x_dtype != cache_dtype: + if PRINT_LOG: + print("x_dtype and cache_dtype are different") + continue if not compare_shape(x_shape, cache_shape, level): continue @@ -505,6 +510,8 @@ def memory_optimize(input_program, Returns: None """ + sys.stderr.write('memory_optimize is deprecated. ' + 'Use CompiledProgram and Executor\n') def to_name_str(var): if isinstance(var, Variable): diff --git a/python/paddle/utils/dump_config.py b/python/paddle/utils/dump_config.py deleted file mode 100644 index 6a96a0a78fc77c50904ee7822c725c41e646c5e6..0000000000000000000000000000000000000000 --- a/python/paddle/utils/dump_config.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer.config_parser import parse_config -from paddle.proto import TrainerConfig_pb2 -import sys - -__all__ = [] - -if __name__ == '__main__': - whole_conf = False - binary = False - if len(sys.argv) == 2: - conf = parse_config(sys.argv[1], '') - elif len(sys.argv) == 3: - conf = parse_config(sys.argv[1], sys.argv[2]) - elif len(sys.argv) == 4: - conf = parse_config(sys.argv[1], sys.argv[2]) - if sys.argv[3] == '--whole': - whole_conf = True - elif sys.argv[3] == '--binary': - binary = True - else: - raise RuntimeError() - - assert isinstance(conf, TrainerConfig_pb2.TrainerConfig) - - if whole_conf: - print(conf) - else: - if binary: - sys.stdout.write(conf.model_config.SerializeToString()) - else: - print(conf.model_config) diff --git a/python/paddle/utils/dump_v2_config.py b/python/paddle/utils/dump_v2_config.py deleted file mode 100644 index 5dc2111e379fd39b40e1e9bcf2e577b57b101a68..0000000000000000000000000000000000000000 --- a/python/paddle/utils/dump_v2_config.py +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import collections - -from paddle.trainer_config_helpers.layers import LayerOutput -from paddle.v2.layer import parse_network -from paddle.proto import TrainerConfig_pb2 - -__all__ = ["dump_v2_config"] - - -def dump_v2_config(topology, save_path, binary=False): - """ Dump the network topology to a specified file. - - This function is only used to dump network defined by using PaddlePaddle V2 - APIs. This function will NOT dump configurations related to PaddlePaddle - optimizer. - - :param topology: The output layers (can be more than one layers given in a - Python List or Tuple) of the entire network. Using the - specified layers (if more than one layer is given) as root, - traversing back to the data layer(s), all the layers - connected to the specified output layers will be dumped. - Layers not connceted to the specified will not be dumped. - :type topology: LayerOutput|List|Tuple - :param save_path: The path to save the dumped network topology. - :type save_path: str - :param binary: Whether to dump the serialized network topology or not. - The default value is false. NOTE that, if you call this - function to generate network topology for PaddlePaddle C-API, - a serialized version of network topology is required. When - using PaddlePaddle C-API, this flag MUST be set to True. - :type binary: bool - """ - - if isinstance(topology, LayerOutput): - topology = [topology] - elif isinstance(topology, collections.Sequence): - for out_layer in topology: - assert isinstance(out_layer, LayerOutput), ( - "The type of each element in the parameter topology " - "should be LayerOutput.") - else: - raise RuntimeError("Error input type for parameter topology.") - - model_str = parse_network(topology) - with open(save_path, "w") as fout: - if binary: - fout.write(model_str.SerializeToString()) - else: - fout.write(str(model_str)) diff --git a/python/paddle/utils/image_multiproc.py b/python/paddle/utils/image_multiproc.py deleted file mode 100644 index d1bbda3fd3562efe486377d41a9fb7359bafa4e7..0000000000000000000000000000000000000000 --- a/python/paddle/utils/image_multiproc.py +++ /dev/null @@ -1,278 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os, sys -import numpy as np -from PIL import Image -import six -from six.moves import cStringIO as StringIO -import multiprocessing -import functools -import itertools - -from paddle.utils.image_util import * -from paddle.trainer.config_parser import logger - -try: - import cv2 -except ImportError: - logger.warning("OpenCV2 is not installed, using PIL to process") - cv2 = None - -__all__ = ["CvTransformer", "PILTransformer", "MultiProcessImageTransformer"] - - -class CvTransformer(ImageTransformer): - """ - CvTransformer used python-opencv to process image. - """ - - def __init__( - self, - min_size=None, - crop_size=None, - transpose=(2, 0, 1), # transpose to C * H * W - channel_swap=None, - mean=None, - is_train=True, - is_color=True): - ImageTransformer.__init__(self, transpose, channel_swap, mean, is_color) - self.min_size = min_size - self.crop_size = crop_size - self.is_train = is_train - - def resize(self, im, min_size): - row, col = im.shape[:2] - new_row, new_col = min_size, min_size - if row > col: - new_row = min_size * row / col - else: - new_col = min_size * col / row - im = cv2.resize(im, (new_row, new_col), interpolation=cv2.INTER_CUBIC) - return im - - def crop_and_flip(self, im): - """ - Return cropped image. - The size of the cropped image is inner_size * inner_size. - im: (H x W x K) ndarrays - """ - row, col = im.shape[:2] - start_h, start_w = 0, 0 - if self.is_train: - start_h = np.random.randint(0, row - self.crop_size + 1) - start_w = np.random.randint(0, col - self.crop_size + 1) - else: - start_h = (row - self.crop_size) / 2 - start_w = (col - self.crop_size) / 2 - end_h, end_w = start_h + self.crop_size, start_w + self.crop_size - if self.is_color: - im = im[start_h:end_h, start_w:end_w, :] - else: - im = im[start_h:end_h, start_w:end_w] - if (self.is_train) and (np.random.randint(2) == 0): - if self.is_color: - im = im[:, ::-1, :] - else: - im = im[:, ::-1] - return im - - def transform(self, im): - im = self.resize(im, self.min_size) - im = self.crop_and_flip(im) - # transpose, swap channel, sub mean - im = im.astype('float32') - ImageTransformer.transformer(self, im) - return im - - def load_image_from_string(self, data): - flag = cv2.CV_LOAD_IMAGE_COLOR if self.is_color else cv2.CV_LOAD_IMAGE_GRAYSCALE - im = cv2.imdecode(np.fromstring(data, np.uint8), flag) - return im - - def transform_from_string(self, data): - im = self.load_image_from_string(data) - return self.transform(im) - - def load_image_from_file(self, file): - flag = cv2.CV_LOAD_IMAGE_COLOR if self.is_color else cv2.CV_LOAD_IMAGE_GRAYSCALE - im = cv2.imread(file, flag) - return im - - def transform_from_file(self, file): - im = self.load_image_from_file(file) - return self.transform(im) - - -class PILTransformer(ImageTransformer): - """ - PILTransformer used PIL to process image. - """ - - def __init__( - self, - min_size=None, - crop_size=None, - transpose=(2, 0, 1), # transpose to C * H * W - channel_swap=None, - mean=None, - is_train=True, - is_color=True): - ImageTransformer.__init__(self, transpose, channel_swap, mean, is_color) - self.min_size = min_size - self.crop_size = crop_size - self.is_train = is_train - - def resize(self, im, min_size): - row, col = im.size[:2] - new_row, new_col = min_size, min_size - if row > col: - new_row = min_size * row / col - else: - new_col = min_size * col / row - im = im.resize((new_row, new_col), Image.ANTIALIAS) - return im - - def crop_and_flip(self, im): - """ - Return cropped image. - The size of the cropped image is inner_size * inner_size. - """ - row, col = im.size[:2] - start_h, start_w = 0, 0 - if self.is_train: - start_h = np.random.randint(0, row - self.crop_size + 1) - start_w = np.random.randint(0, col - self.crop_size + 1) - else: - start_h = (row - self.crop_size) / 2 - start_w = (col - self.crop_size) / 2 - end_h, end_w = start_h + self.crop_size, start_w + self.crop_size - im = im.crop((start_h, start_w, end_h, end_w)) - if (self.is_train) and (np.random.randint(2) == 0): - im = im.transpose(Image.FLIP_LEFT_RIGHT) - return im - - def transform(self, im): - im = self.resize(im, self.min_size) - im = self.crop_and_flip(im) - im = np.array(im, dtype=np.float32) # convert to numpy.array - # transpose, swap channel, sub mean - ImageTransformer.transformer(self, im) - return im - - def load_image_from_string(self, data): - im = Image.open(StringIO(data)) - return im - - def transform_from_string(self, data): - im = self.load_image_from_string(data) - return self.transform(im) - - def load_image_from_file(self, file): - im = Image.open(file) - return im - - def transform_from_file(self, file): - im = self.load_image_from_file(file) - return self.transform(im) - - -def job(is_img_string, transformer, data_label_pack): - (data, label) = data_label_pack - if is_img_string: - return transformer.transform_from_string(data), label - else: - return transformer.transform_from_file(data), label - - -class MultiProcessImageTransformer(object): - def __init__(self, - procnum=10, - resize_size=None, - crop_size=None, - transpose=(2, 0, 1), - channel_swap=None, - mean=None, - is_train=True, - is_color=True, - is_img_string=True): - """ - Processing image with multi-process. If it is used in PyDataProvider, - the simple usage for CNN is as follows: - - .. code-block:: python - - def hool(settings, is_train, **kwargs): - settings.is_train = is_train - settings.mean_value = np.array([103.939,116.779,123.68], dtype=np.float32) - settings.input_types = [ - dense_vector(3 * 224 * 224), - integer_value(1)] - settings.transformer = MultiProcessImageTransformer( - procnum=10, - resize_size=256, - crop_size=224, - transpose=(2, 0, 1), - mean=settings.mean_values, - is_train=settings.is_train) - - - @provider(init_hook=hook, pool_size=20480) - def process(settings, file_list): - with open(file_list, 'r') as fdata: - for line in fdata: - data_dic = np.load(line.strip()) # load the data batch pickled by Pickle. - data = data_dic['data'] - labels = data_dic['label'] - labels = np.array(labels, dtype=np.float32) - for im, lab in settings.dp.run(data, labels): - yield [im.astype('float32'), int(lab)] - - :param procnum: processor number. - :type procnum: int - :param resize_size: the shorter edge size of image after resizing. - :type resize_size: int - :param crop_size: the croping size. - :type crop_size: int - :param transpose: the transpose order, Paddle only allow C * H * W order. - :type transpose: tuple or list - :param channel_swap: the channel swap order, RGB or BRG. - :type channel_swap: tuple or list - :param mean: the mean values of image, per-channel mean or element-wise mean. - :type mean: array, The dimension is 1 for per-channel mean. - The dimension is 3 for element-wise mean. - :param is_train: training peroid or testing peroid. - :type is_train: bool. - :param is_color: the image is color or gray. - :type is_color: bool. - :param is_img_string: The input can be the file name of image or image string. - :type is_img_string: bool. - """ - - self.procnum = procnum - self.pool = multiprocessing.Pool(procnum) - self.is_img_string = is_img_string - if cv2 is not None: - self.transformer = CvTransformer(resize_size, crop_size, transpose, - channel_swap, mean, is_train, - is_color) - else: - self.transformer = PILTransformer(resize_size, crop_size, transpose, - channel_swap, mean, is_train, - is_color) - - def run(self, data, label): - fun = functools.partial(job, self.is_img_string, self.transformer) - return self.pool.imap_unordered( - fun, six.moves.zip(data, label), chunksize=100 * self.procnum) diff --git a/python/paddle/utils/make_model_diagram.py b/python/paddle/utils/make_model_diagram.py deleted file mode 100644 index 52759d3ad230c3a5a5488a8bc46a2e8f8fae1025..0000000000000000000000000000000000000000 --- a/python/paddle/utils/make_model_diagram.py +++ /dev/null @@ -1,140 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Generate dot diagram file for the given paddle model config -# The generated file can be viewed using Graphviz (http://graphviz.org) - -from __future__ import print_function - -import six -import sys -import traceback - -from paddle.trainer.config_parser import parse_config - - -def make_layer_label(layer_config): - label = '%s type=%s' % (layer_config.name, layer_config.type) - if layer_config.reversed: - label += ' <==' - - label2 = '' - if layer_config.active_type: - label2 += 'act=%s ' % layer_config.active_type - if layer_config.bias_parameter_name: - label2 += 'bias=%s ' % layer_config.bias_parameter_name - - if label2: - label += '\l' + label2 - return label - - -def make_diagram(config_file, dot_file, config_arg_str): - config = parse_config(config_file, config_arg_str) - make_diagram_from_proto(config.model_config, dot_file) - - -def make_diagram_from_proto(model_config, dot_file): - # print >> sys.stderr, config - name2id = {} - f = open(dot_file, 'w') - submodel_layers = set() - - def make_link(link): - return 'l%s -> l%s;' % (name2id[link.layer_name], - name2id[link.link_name]) - - def make_mem(mem): - s = '' - if mem.boot_layer_name: - s += 'l%s -> l%s;\n' % (name2id[mem.boot_layer_name], - name2id[mem.layer_name]) - s += 'l%s -> l%s [style=dashed];' % (name2id[mem.layer_name], - name2id[mem.link_name]) - return s - - print('digraph graphname {', file=f) - print('node [width=0.375,height=0.25];', file=f) - for i in six.moves.xrange(len(model_config.layers)): - l = model_config.layers[i] - name2id[l.name] = i - - i = 0 - for sub_model in model_config.sub_models: - if sub_model.name == 'root': - continue - print('subgraph cluster_%s {' % i, file=f) - print('style=dashed;', file=f) - label = '%s ' % sub_model.name - if sub_model.reversed: - label += '<==' - print('label = "%s";' % label, file=f) - i += 1 - submodel_layers.add(sub_model.name) - for layer_name in sub_model.layer_names: - submodel_layers.add(layer_name) - lid = name2id[layer_name] - layer_config = model_config.layers[lid] - label = make_layer_label(layer_config) - print('l%s [label="%s", shape=box];' % (lid, label), file=f) - print('}', file=f) - - for i in six.moves.xrange(len(model_config.layers)): - l = model_config.layers[i] - if l.name not in submodel_layers: - label = make_layer_label(l) - print('l%s [label="%s", shape=box];' % (i, label), file=f) - - for sub_model in model_config.sub_models: - if sub_model.name == 'root': - continue - for link in sub_model.in_links: - print(make_link(link), file=f) - for link in sub_model.out_links: - print(make_link(link), file=f) - for mem in sub_model.memories: - print(make_mem(mem), file=f) - - for i in six.moves.xrange(len(model_config.layers)): - for l in model_config.layers[i].inputs: - print( - 'l%s -> l%s [label="%s"];' % (name2id[l.input_layer_name], i, - l.input_parameter_name), - file=f) - - print('}', file=f) - f.close() - - -def usage(): - print( - ("Usage: python show_model_diagram.py" + - " CONFIG_FILE DOT_FILE [config_str]"), - file=sys.stderr) - exit(1) - - -if __name__ == '__main__': - if len(sys.argv) < 3 or len(sys.argv) > 4: - usage() - - config_file = sys.argv[1] - dot_file = sys.argv[2] - config_arg_str = sys.argv[3] if len(sys.argv) == 4 else '' - - try: - make_diagram(config_file, dot_file, config_arg_str) - except: - traceback.print_exc() - raise diff --git a/python/paddle/utils/merge_model.py b/python/paddle/utils/merge_model.py deleted file mode 100644 index b74649e93640c3600636034d58792b8d12dffeda..0000000000000000000000000000000000000000 --- a/python/paddle/utils/merge_model.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import gzip -import struct -import os - -from paddle.trainer_config_helpers.layers import LayerOutput -from paddle.v2.parameters import Parameters -from paddle.proto import ModelConfig_pb2 -from paddle.v2.topology import Topology - - -def merge_v2_model(net, param_file, output_file): - '''Merge the model config and parameters into one file. - - The model configuration file describes the model structure which - ends with .py. The parameters file stores the parameters of the model - which ends with .tar.gz. - - @param net The output layer of the network for inference. - @param param_file Path of the parameters (.tar.gz) which is stored by - v2 api. - @param output_file Path of the merged file which will be generated. - - Usage: - - from paddle.utils.merge_model import merge_v2_model - # import your network configuration - from example_net import net_conf - - net = net_conf(is_predict=True) - param_file = './param_pass_00000.tar.gz' - output_file = './output.paddle' - - merge_v2_model(net, param_file, output_file) - - ''' - - assert isinstance(net, LayerOutput), \ - "The net should be the output of the network for inference" - assert os.path.exists(param_file), \ - "The model parameters file %s does not exists " % (param_file) - - model_proto = Topology(net).proto() - assert isinstance(model_proto, ModelConfig_pb2.ModelConfig) - - with gzip.open(param_file) as f: - params = Parameters.from_tar(f) - - if os.path.exists(output_file): - os.remove(output_file) - - with open(output_file, 'w') as f: - param_names = [param.name for param in model_proto.parameters] - conf_str = model_proto.SerializeToString() - f.write(struct.pack('q', len(conf_str))) - f.write(conf_str) - for pname in param_names: - params.serialize(pname, f) - - print('Generate %s success!' % (output_file)) diff --git a/python/paddle/utils/plot.py b/python/paddle/utils/plot.py index 08889c0313fc24151cde6ca7b662d81eb53c9d7b..ee651f2f0cd6f2e594a4e74c896baa924f70bbf5 100644 --- a/python/paddle/utils/plot.py +++ b/python/paddle/utils/plot.py @@ -13,6 +13,7 @@ # limitations under the License. import os +import six class PlotData(object): @@ -60,9 +61,9 @@ class Ploter(object): def append(self, title, step, value): """ - Feed data - - Args: + Feed data + + Args: title: assign the group data to this subtitle. step: the x_axis of data. value: the y_axis of data. @@ -71,9 +72,9 @@ class Ploter(object): .. code-block:: python plot_curve = Ploter("Curve 1","Curve 2") plot_curve.append(title="Curve 1",step=1,value=1) - """ - assert isinstance(title, basestring) - assert self.__plot_data__.has_key(title) + """ + assert isinstance(title, six.string_types) + assert title in self.__plot_data__ data = self.__plot_data__[title] assert isinstance(data, PlotData) data.append(step, value) @@ -89,7 +90,7 @@ class Ploter(object): .. code-block:: python plot_curve = Ploter() plot_cure.plot() - """ + """ if self.__plot_is_disabled__(): return diff --git a/python/paddle/utils/predefined_net.py b/python/paddle/utils/predefined_net.py deleted file mode 100644 index 2801f4877c079615239b92be146b3e33df16b37f..0000000000000000000000000000000000000000 --- a/python/paddle/utils/predefined_net.py +++ /dev/null @@ -1,381 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import six -import os -from paddle.trainer.config_parser import * -from paddle.utils.preprocess_img import \ - ImageClassificationDatasetCreater -from paddle.trainer_config_helpers import * - - -def image_data(data_dir, - processed_image_size, - overwrite=False, - color=True, - train_list="batches/train.list", - test_list="batches/test.list", - meta_file="batches/batches.meta", - use_jpeg=1): - """ - Predefined image data provider for image classification. - train_list: a text file containing a list of training batches. - test_list: a text file containing a list of test batches. - processed_image_size: all the input images will be resized into this size. - If the image is not square. Then the shorter edge will be resized into - this size, and the aspect ratio is kept the same. - color: whether the images are color or gray. - meta_path: the path of the meta file that stores the mean image file and - other dataset information, such as the size of images, - the size of the mean image, the number of classes. - async_load_data: whether to load image data asynchronuously. - """ - data_creator = ImageClassificationDatasetCreater( - data_dir, processed_image_size, color) - batch_data_dir = data_dir - train_list = os.path.join(batch_data_dir, train_list) - test_list = os.path.join(batch_data_dir, test_list) - meta_path = os.path.join(batch_data_dir, meta_file) - image_size = processed_image_size - conf = np.load(meta_path) - mean_image_size = conf["mean_image_size"] - is_color = conf["color"] - num_classes = conf["num_classes"] - color_string = "color" if is_color else "gray" - - args = { - 'meta': meta_path, - 'mean_img_size': mean_image_size, - 'img_size': image_size, - 'num_classes': num_classes, - 'use_jpeg': use_jpeg != 0, - 'color': color_string - } - - define_py_data_sources2( - train_list, - test_list, - module='image_provider', - obj='processData', - args=args) - return { - "image_size": image_size, - "num_classes": num_classes, - "is_color": is_color - } - - -def get_extra_layer_attr(drop_rate): - if drop_rate == 0: - return None - else: - return ExtraLayerAttribute(drop_rate=drop_rate) - - -def image_data_layers(image_size, num_classes, is_color=False, - is_predict=False): - """ - Data layers for image classification. - image_size: image size. - num_classes: num of classes. - is_color: whether the input images are color. - is_predict: whether the network is used for prediction. - """ - num_image_channels = 3 if is_color else 1 - data_input = data_layer("input", - image_size * image_size * num_image_channels) - if is_predict: - return data_input, None, num_image_channels - else: - label_input = data_layer("label", 1) - return data_input, label_input, num_image_channels - - -def simple_conv_net(data_conf, is_color=False): - """ - A Wrapper for a simple network for MNIST digit recognition. - It contains two convolutional layers, one fully conencted layer, and - one softmax layer. - data_conf is a dictionary with the following keys: - image_size: image size. - num_classes: num of classes. - is_color: whether the input images are color. - """ - for k, v in six.iteritems(data_conf): - globals()[k] = v - data_input, label_input, num_image_channels = \ - image_data_layers(image_size, num_classes, is_color, is_predict) - filter_sizes = [5, 5] - num_channels = [32, 64] - strides = [1, 1] - fc_dims = [500] - conv_bn_pool1 = img_conv_bn_pool( - name="g1", - input=data_input, - filter_size=filter_sizes[0], - num_channel=num_image_channels, - num_filters=num_channels[0], - conv_stride=1, - conv_padding=0, - pool_size=3, - pool_stride=2, - act=ReluActivation()) - conv_bn_pool2 = img_conv_bn_pool( - name="g2", - input=conv_bn_pool1, - filter_size=filter_sizes[1], - num_channel=num_channels[0], - num_filters=num_channels[1], - conv_stride=1, - conv_padding=0, - pool_size=3, - pool_stride=2, - act=ReluActivation()) - fc3 = fc_layer( - name="fc3", input=conv_bn_pool2, dim=fc_dims[0], act=ReluActivation()) - fc3_dropped = dropout_layer(name="fc3_dropped", input=fc3, dropout_rate=0.5) - output = fc_layer( - name="output", - input=fc3_dropped, - dim=fc_dims[0], - act=SoftmaxActivation()) - if is_predict: - end_of_network(output) - else: - cost = classify(name="cost", input=output, label=label_input) - end_of_network(cost) - - -def conv_layer_group(prefix_num, - num_layers, - input, - input_channels, - output_channels, - drop_rates=[], - strides=[], - with_bn=[]): - """ - A set of convolution layers, and batch normalization layers, - followed by one pooling layer. - It is utilized in VGG network for image classifcation. - prefix_num: the prefix number of the layer names. - For example, if prefix_num = 1, the first convolutioal layer's - name will be conv_1_1. - num_layers: number of the convolutional layers. - input: the name of the input layer. - input_channels: the number of channels of the input feature map. - output_channels: the number of channels of the output feature map. - drop_rates: the drop rates of the BN layers. It will be all zero by default. - strides: the stride of the convolution for the layers. - It will be all 1 by default. - with_bn: whether to use Batch Normalization for Conv layers. - By default, it is all false. - """ - if len(drop_rates) == 0: drop_rates = [0] * num_layers - if len(strides) == 0: strides = [1] * num_layers - if len(with_bn) == 0: with_bn = [False] * num_layers - assert (len(drop_rates) == num_layers) - assert (len(strides) == num_layers) - - for i in range(1, num_layers + 1): - if i == 1: - i_conv_in = input - else: - i_conv_in = group_output - i_channels_conv = input_channels if i == 1 else output_channels - conv_act = LinearActivation() if with_bn[i - 1] else ReluActivation() - conv_output = img_conv_layer( - name="conv%d_%d" % (prefix_num, i), - input=i_conv_in, - filter_size=3, - num_channels=i_channels_conv, - num_filters=output_channels, - stride=strides[i - 1], - padding=1, - act=conv_act) - if with_bn[i - 1]: - bn = batch_norm_layer( - name="conv%d_%d_bn" % (prefix_num, i), - input=conv_output, - num_channels=output_channels, - act=ReluActivation(), - layer_attr=get_extra_layer_attr(drop_rate=drop_rates[i - 1])) - group_output = bn - else: - group_output = conv_output - pool = img_pool_layer( - name="pool%d" % prefix_num, - input=group_output, - pool_size=2, - num_channels=output_channels, - stride=2) - return pool - - -def vgg_conv_net(image_size, - num_classes, - num_layers, - channels, - strides, - with_bn, - fc_dims, - drop_rates, - drop_rates_fc=[], - is_color=True, - is_predict=False): - """ - A Wrapper for a VGG network for image classification. - It is a set of convolutional groups followed by several fully - connected layers, and a cross-entropy classifiation loss. - The detailed architecture of the paper can be found here: - Very Deep Convolutional Networks for Large-Scale Visual Recognition - http://www.robots.ox.ac.uk/~vgg/research/very_deep/ - image_size: image size. - num_classes: num of classes. - num_layers: the number of layers for all the convolution groups. - channels: the number of output filters for all the convolution groups. - with_bn: whether each layer of a convolution group is followed by a - batch normalization. - drop_rates: the dropout rates for all the convolutional layers. - fc_dims: the dimension for all the fully connected layers. - is_color: whether the input images are color. - """ - data_input, label_input, num_image_channels = \ - image_data_layers(image_size, num_classes, is_color, is_predict) - assert (len(num_layers) == len(channels)) - assert (len(num_layers) == len(strides)) - assert (len(num_layers) == len(with_bn)) - num_fc_layers = len(fc_dims) - assert (num_fc_layers + 1 == len(drop_rates_fc)) - - for i in range(len(num_layers)): - input_layer = data_input if i == 0 else group_output - input_channels = 3 if i == 0 else channels[i - 1] - group_output = conv_layer_group( - prefix_num=i + 1, - num_layers=num_layers[i], - input=input_layer, - input_channels=input_channels, - output_channels=channels[i], - drop_rates=drop_rates[i], - strides=strides[i], - with_bn=with_bn[i]) - conv_output_name = group_output - if drop_rates_fc[0] != 0.0: - dropped_pool_name = "pool_dropped" - conv_output_name = dropout_layer( - name=dropped_pool_name, - input=conv_output_name, - dropout_rate=drop_rates_fc[0]) - for i in range(len(fc_dims)): - input_layer_name = conv_output_name if i == 0 else fc_output - active_type = LinearActivation() if i == len( - fc_dims) - 1 else ReluActivation() - drop_rate = 0.0 if i == len(fc_dims) - 1 else drop_rates_fc[i + 1] - fc_output = fc_layer( - name="fc%d" % (i + 1), - input=input_layer_name, - size=fc_dims[i], - act=active_type, - layer_attr=get_extra_layer_attr(drop_rate)) - bn = batch_norm_layer( - name="fc_bn", - input=fc_output, - num_channels=fc_dims[len(fc_dims) - 1], - act=ReluActivation(), - layer_attr=get_extra_layer_attr(drop_rate=drop_rates_fc[-1])) - output = fc_layer( - name="output", input=bn, size=num_classes, act=SoftmaxActivation()) - if is_predict: - outputs(output) - else: - cost = classification_cost(name="cost", input=output, label=label_input) - outputs(cost) - - -def vgg16_conv_net(image_size, num_classes, is_color=True, is_predict=False): - """ - A Wrapper for a 16 layers VGG network for image classification. - The detailed architecture of the paper can be found here: - Very Deep Convolutional Networks for Large-Scale Visual Recognition - http://www.robots.ox.ac.uk/~vgg/research/very_deep/ - image_size: image size. - num_classes: num of classes. - is_color: whether the input images are color. - """ - vgg_conv_net(image_size, num_classes, - num_layers=[2, 2, 3, 3, 3], - channels=[64, 128, 256, 512, 512], - strides=[[], [], [], [], []], - with_bn=[[False, True], [False, True], [False, False, True], \ - [False, False, True], [False, False, True]], - drop_rates=[[]] * 5, - drop_rates_fc=[0.0, 0.5, 0.5], - fc_dims=[4096, 4096], - is_predict=is_predict) - - -def small_vgg(data_conf, is_predict=False): - """ - A Wrapper for a small VGG network for CIFAR-10 image classification. - The detailed architecture of the paper can be found here: - 92.45% on CIFAR-10 in Torch - http://torch.ch/blog/2015/07/30/cifar.html - Due to the constraints of CuDNN, it only has four convolutional groups - rather than five. - Thus, it only achieves 91.2% test accuracy and 98.1% training accuracy. - data_conf is a dictionary with the following keys: - image_size: image size. - num_classes: num of classes. - is_color: whether the input images are color. - """ - for k, v in six.iteritems(data_conf): - globals()[k] = v - vgg_conv_net(image_size, num_classes, - num_layers=[2, 2, 3, 3], - channels=[64, 128, 256, 512], - strides=[[], [], [], []], - with_bn=[[True, True], [True, True], [True, True, True], \ - [True, True, True]], - drop_rates=[[0.3, 0.0], [0.4, 0.0], - [0.4, 0.4, 0.0], [0.4, 0.4, 0.0]], - drop_rates_fc=[0.5, 0.5], - fc_dims=[512], - is_predict=is_predict) - - -def training_settings(learning_rate=0.1, - batch_size=128, - algorithm="sgd", - momentum=0.9, - decay_rate=0.001): - """ - Training settings. - learning_rate: learning rate of the training. - batch_size: the size of each training batch. - algorithm: training algorithm, can be - - sgd - - adagrad - - adadelta - - rmsprop - momentum: momentum of the training algorithm. - decay_rate: weight decay rate. - """ - Settings( - algorithm=algorithm, - batch_size=batch_size, - learning_rate=learning_rate / float(batch_size)) - default_momentum(momentum) - default_decay_rate(decay_rate * batch_size) diff --git a/python/paddle/utils/preprocess_img.py b/python/paddle/utils/preprocess_img.py index a322f7b769a2a32df516a4b8ea04289a7f882ff2..fc67949dfe0ef21487de29678781aa2bfd93f354 100644 --- a/python/paddle/utils/preprocess_img.py +++ b/python/paddle/utils/preprocess_img.py @@ -122,7 +122,7 @@ class ImageClassificationDatasetCreater(preprocess_util.DatasetCreater): def create_dataset_from_list(self, path): data = [] label_set = [] - for line in open(file_list): + for line in open(path): items = line.rstrip.split() image_path = items[0] label_name = items[1] @@ -141,7 +141,7 @@ class ImageClassificationDatasetCreater(preprocess_util.DatasetCreater): path: the path of the image dataset. """ if self.from_list: - return create_dataset_from_list(path) + return self.create_dataset_from_list(path) label_set = preprocess_util.get_label_set_from_dir(path) data = [] for l_name in list(label_set.keys()): diff --git a/python/requirements.txt b/python/requirements.txt index 5a70f1aa3ffc0ab6d4d148eb5bc26981784f2d32..36bd5d4261cc7aa78d26b8c8ddfd87abd4f4e2e2 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -1,6 +1,6 @@ requests==2.9.2 numpy>=1.12 -protobuf==3.1 +protobuf>=3.1.0 recordio>=0.1.0 matplotlib==2.2.3 # TODO: let python3 paddlepaddle package use latest matplotlib rarfile diff --git a/tools/check_doc_approval.py b/tools/check_doc_approval.py deleted file mode 100644 index 44fdf58b49a1715696e8c28746282c38fb3c7763..0000000000000000000000000000000000000000 --- a/tools/check_doc_approval.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import ast -import hashlib -import importlib -import paddle.fluid - -files = [ - "paddle.fluid", "paddle.fluid.average", "paddle.fluid.backward", - "paddle.fluid.clip", "paddle.fluid.data_feeder", "paddle.fluid.executor", - "paddle.fluid.initializer", "paddle.fluid.io", "paddle.fluid.layers", - "paddle.fluid.metrics", "paddle.fluid.nets", "paddle.fluid.optimizer", - "paddle.fluid.profiler", "paddle.fluid.recordio_writer", - "paddle.fluid.regularizer", "paddle.fluid.transpiler" -] - - -def md5(doc): - hash = hashlib.md5() - hash.update(str(doc)) - return hash.hexdigest() - - -def get_module(): - for fi in files: - fi_lib = importlib.import_module(fi) - doc_function = getattr(fi_lib, "__all__") - for api in doc_function: - api_name = fi + "." + api - try: - doc_module = getattr(eval(api_name), "__doc__") - except: - pass - doc_md5_code = md5(doc_module) - doc_dict[api_name] = doc_md5_code - - -def doc_md5_dict(doc_md5_path): - with open(doc_md5_path, "rb") as f: - doc_md5 = f.read() - doc_md5_dict = ast.literal_eval(doc_md5) - return doc_md5_dict - - -def check_doc_md5(): - for k, v in doc_dict.items(): - try: - if doc_ci_dict[k] != v: - return doc_dict - except: - return doc_dict - return True - - -if __name__ == "__main__": - doc_dict = {} - doc_ci_dict = {} - doc_md5_file = "/root/.cache/doc_md5.txt" - if not os.path.exists(doc_md5_file): - os.mknod(doc_md5_file) - else: - doc_ci_dict = doc_md5_dict(doc_md5_file) - get_module() - if not os.path.getsize(doc_md5_file): - with open(doc_md5_file, 'w') as f: - f.write(str(doc_dict)) - check_dic = True - print(check_dic) - else: - check_dic = check_doc_md5() - print(check_dic) diff --git a/tools/codestyle/cpplint_pre_commit.hook b/tools/codestyle/cpplint_pre_commit.hook index aa14d3a2a12208eda11e82d88bc582eb3d2f5893..658008d852123b6eab06d1f13d61ba896e7e9c98 100755 --- a/tools/codestyle/cpplint_pre_commit.hook +++ b/tools/codestyle/cpplint_pre_commit.hook @@ -1,10 +1,22 @@ #!/bin/bash TOTAL_ERRORS=0 - +if [[ ! $TRAVIS_BRANCH ]]; then + # install cpplint on local machine. + if [[ ! $(which cpplint) ]]; then + pip install cpplint + fi + # diff files on local machine. + files=$(git diff --cached --name-status | awk '$1 != "D" {print $2}') +else + # diff files between PR and latest commit on Travis CI. + branch_ref=$(git rev-parse "$TRAVIS_BRANCH") + head_ref=$(git rev-parse HEAD) + files=$(git diff --name-status $branch_ref $head_ref | awk '$1 != "D" {print $2}') +fi # The trick to remove deleted files: https://stackoverflow.com/a/2413151 -for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}'); do - if [[ $file =~ ^(paddle/legacy/api/.*|paddle/legacy/capi/.*|paddle/contrib/.*|paddle/legacy/cuda/.*|paddle/legacy/function/.*|paddle/legacy/gserver/.*|paddle/legacy/math/.*|paddle/legacy/optimizer/.*|paddle/legacy/parameter/.*|paddle/legacy/pserver/.*|paddle/legacy/trainer/.*|paddle/legacy/utils/.*|paddle/testing/TestUtil.*|patches/grpc/.*) ]]; then +for file in $files; do + if [[ $file =~ ^(patches/grpc/.*) ]]; then continue; else cpplint --filter=-readability/fn_size $file; @@ -13,4 +25,3 @@ for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}'); do done exit $TOTAL_ERRORS - diff --git a/tools/diff_api.py b/tools/diff_api.py index 97c739ed2a5627ad9fd326f206976a4579dc26a3..ec51711d68a155dabdf3125d43fc35bab0b0c944 100644 --- a/tools/diff_api.py +++ b/tools/diff_api.py @@ -26,4 +26,10 @@ for each_diff in result: print(each_diff) if error: + print( + '''If you modify/add/delete the API files, including code and comment, please follow these steps in order to pass the CI: + 1. cd ${paddle_path}, compile paddle; + 2. pip install build/python/dist/(build whl package); + 3. run "python tools/print_signatures.py paddle.fluid, paddle.reader > paddle/fluid/API.spec"''' + ) sys.exit(1) diff --git a/tools/manylinux1/Dockerfile.x64 b/tools/manylinux1/Dockerfile.x64 index 48fd145e5fe6735fca3096752f801b1ec1cb39f0..c2fd743f62f536ab7443ca215d100478021d8f7c 100644 --- a/tools/manylinux1/Dockerfile.x64 +++ b/tools/manylinux1/Dockerfile.x64 @@ -31,10 +31,10 @@ RUN wget --no-check-certificate -qO- https://storage.googleapis.com/golang/go1.8 ENV GOROOT=/usr/local/go GOPATH=/root/gopath ENV PATH=${GOROOT}/bin:${GOPATH}/bin:${PATH} -# protobuf 3.1.0 -RUN cd /opt && wget -q --no-check-certificate https://github.com/google/protobuf/releases/download/v3.1.0/protobuf-cpp-3.1.0.tar.gz && \ - tar xzf protobuf-cpp-3.1.0.tar.gz && \ - cd protobuf-3.1.0 && ./configure && make -j4 && make install && cd .. && rm -f protobuf-cpp-3.1.0.tar.gz +# protobuf 3.6.1 +RUN cd /opt && wget -q --no-check-certificate https://github.com/google/protobuf/releases/download/v3.6.1/protobuf-cpp-3.6.1.tar.gz && \ + tar xzf protobuf-cpp-3.6.1.tar.gz && \ + cd protobuf-3.6.1 && ./configure && make -j4 && make install && cd .. && rm -f protobuf-cpp-3.6.1.tar.gz RUN wget https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/python/requirements.txt -O /root/requirements.txt diff --git a/tools/manylinux1/build_all.sh b/tools/manylinux1/build_all.sh index 097bedb5265d00f8aa362bb0272af633c97192ba..caf21722158b749ffe8d026a98a8b7d015e555d8 100755 --- a/tools/manylinux1/build_all.sh +++ b/tools/manylinux1/build_all.sh @@ -24,3 +24,8 @@ sed 's//9.0-cudnn7-devel-centos6/g' Dockerfile.x64 | \ sed 's//NVCC_GENCODE="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_52,code=sm_52 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_60,code=compute_60 -gencode=arch=compute_61,code=sm_61 -gencode=arch=compute_62,code=sm_62 -gencode=arch=compute_70,code=sm_70"/g'> Dockerfile.tmp docker build -t ${REPO}/paddle_manylinux_devel:cuda9.0_cudnn7 -f Dockerfile.tmp . docker push ${REPO}/paddle_manylinux_devel:cuda9.0_cudnn7 + +sed 's//10.0-devel-centos6/g' Dockerfile.x64 | \ +sed 's//NVCC_GENCODE="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_52,code=sm_52 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_60,code=compute_60 -gencode=arch=compute_61,code=sm_61 -gencode=arch=compute_62,code=sm_62 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75"/g'> Dockerfile.tmp +docker build -t ${REPO}/paddle_manylinux_devel:cuda10.0_cudnn7 -f Dockerfile.tmp . +docker push ${REPO}/paddle_manylinux_devel:cuda10.0_cudnn7 diff --git a/tools/manylinux1/build_scripts/build.sh b/tools/manylinux1/build_scripts/build.sh index 6c551eceb4543bf33229b9e5b5124522f3ee134c..1b0059a8c69fca93ecbf1db570a6092ca5c908b1 100644 --- a/tools/manylinux1/build_scripts/build.sh +++ b/tools/manylinux1/build_scripts/build.sh @@ -17,7 +17,7 @@ OPENSSL_ROOT=openssl-1.1.0i OPENSSL_HASH=ebbfc844a8c8cc0ea5dc10b86c9ce97f401837f3fa08c17b2cdadc118253cf99 EPEL_RPM_HASH=e5ed9ecf22d0c4279e92075a64c757ad2b38049bcf5c16c4f2b75d5f6860dc0d DEVTOOLS_HASH=a8ebeb4bed624700f727179e6ef771dafe47651131a00a78b342251415646acc -PATCHELF_HASH=d9afdff4baeacfbc64861454f368b7f2c15c44d245293f7587bbf726bfe722fb +PATCHELF_HASH=f2aa40a6148cb3b0ca807a1bf836b081793e55ec9e5540a5356d800132be7e0a CURL_ROOT=curl-7.49.1 CURL_HASH=eb63cec4bef692eab9db459033f409533e6d10e20942f4b060b32819e81885f1 AUTOCONF_ROOT=autoconf-2.69 @@ -107,11 +107,13 @@ curl-config --features rm -rf /usr/local/ssl # Install patchelf (latest with unreleased bug fixes) -curl -sLO http://nipy.bic.berkeley.edu/manylinux/patchelf-0.9njs2.tar.gz -check_sha256sum patchelf-0.9njs2.tar.gz $PATCHELF_HASH -tar -xzf patchelf-0.9njs2.tar.gz -(cd patchelf-0.9njs2 && ./configure && make && make install) -rm -rf patchelf-0.9njs2.tar.gz patchelf-0.9njs2 +# FIXME(typhoonzero): restore this when the link is fixed. +# curl -sLO http://nipy.bic.berkeley.edu/manylinux/patchelf-0.9njs2.tar.gz +# check_sha256sum patchelf-0.9njs2.tar.gz $PATCHELF_HASH +# tar -xzf patchelf-0.9njs2.tar.gz +# (cd patchelf-0.9njs2 && ./configure && make && make install) +# rm -rf patchelf-0.9njs2.tar.gz patchelf-0.9njs2 +yum install -y patchelf # Install latest pypi release of auditwheel LD_LIBRARY_PATH="${ORIGINAL_LD_LIBRARY_PATH}:$(dirname ${PY35_BIN})/lib" $PY35_BIN/pip install auditwheel diff --git a/tools/manylinux1/build_scripts/build_utils.sh b/tools/manylinux1/build_scripts/build_utils.sh index 48cce15a145138376177731009c61157d1d4d0c8..083101249cd8560f63c95b3fe2aef610b01dd6ac 100755 --- a/tools/manylinux1/build_scripts/build_utils.sh +++ b/tools/manylinux1/build_scripts/build_utils.sh @@ -87,6 +87,8 @@ function do_cpython_build { # NOTE Make libpython shared library visible to python calls below LD_LIBRARY_PATH="${prefix}/lib" ${prefix}/bin/python get-pip.py LD_LIBRARY_PATH="${prefix}/lib" ${prefix}/bin/pip install wheel + cd / + ls ${MY_DIR} local abi_tag=$(LD_LIBRARY_PATH="${prefix}/lib" ${prefix}/bin/python ${MY_DIR}/python-tag-abi-tag.py) ln -s ${prefix} /opt/python/${abi_tag} } diff --git a/tools/print_signatures.py b/tools/print_signatures.py index 7e61dde0a446cf5bfe656105ffd2472f03576f05..c56f30f724ca9f183d6c5cac427411b7711739a4 100644 --- a/tools/print_signatures.py +++ b/tools/print_signatures.py @@ -24,12 +24,19 @@ import inspect import collections import sys import pydoc +import hashlib member_dict = collections.OrderedDict() experimental_namespace = {"paddle.fluid.imperative"} +def md5(doc): + hash = hashlib.md5() + hash.update(str(doc).encode('utf-8')) + return hash.hexdigest() + + def visit_member(parent_name, member): cur_name = ".".join([parent_name, member.__name__]) if inspect.isclass(member): @@ -39,7 +46,10 @@ def visit_member(parent_name, member): visit_member(cur_name, value) elif callable(member): try: - member_dict[cur_name] = inspect.getargspec(member) + doc = ('document', md5(member.__doc__)) + args = inspect.getargspec(member) + all = (args, doc) + member_dict[cur_name] = all except TypeError: # special for PyBind method member_dict[cur_name] = " ".join([ line.strip() for line in pydoc.render_doc(member).split('\n') diff --git a/tools/timeline.py b/tools/timeline.py index f850476831d84787bf5cc7c7f7c91ff9dd6a2d5b..ebadb29bdbe00caeb3fb16a95b7dde6f418db155 100644 --- a/tools/timeline.py +++ b/tools/timeline.py @@ -131,8 +131,12 @@ class Timeline(object): if (k, event.device_id, "CPU") not in self._devices: pid = self._allocate_pid() self._devices[(k, event.device_id, "CPU")] = pid - self._chrome_trace.emit_pid("%s:cpu:block:%d" % - (k, event.device_id), pid) + # -1 device id represents CUDA api call + if event.device_id == -1: + self._chrome_trace.emit_pid("%s:cuda_api" % k, pid) + else: + self._chrome_trace.emit_pid( + "%s:cpu:block:%d" % (k, event.device_id), pid) elif event.type == profiler_pb2.Event.GPUKernel: if (k, event.device_id, "GPUKernel") not in self._devices: pid = self._allocate_pid() @@ -150,7 +154,9 @@ class Timeline(object): pid = self._devices[(k, event.device_id, type)] args = {'name': event.name} if event.memcopy.bytes > 0: - args = {'mem_bytes': event.memcopy.bytes} + args['mem_bytes'] = event.memcopy.bytes + if event.detail_info: + args['detail_info'] = event.detail_info # TODO(panyx0718): Chrome tracing only handles ms. However, some # ops takes micro-seconds. Hence, we keep the ns here. self._chrome_trace.emit_region( @@ -173,7 +179,7 @@ if args.timeline_path: profile_paths = profile_path.split(',') profile_dict = dict() if len(profile_paths) == 1: - with open(profile_path, 'r') as f: + with open(profile_path, 'rb') as f: profile_s = f.read() profile_pb = profiler_pb2.Profile() profile_pb.ParseFromString(profile_s) @@ -181,7 +187,7 @@ if len(profile_paths) == 1: else: for profile_path in profile_paths: k, v = profile_path.split('=') - with open(v, 'r') as f: + with open(v, 'rb') as f: profile_s = f.read() profile_pb = profiler_pb2.Profile() profile_pb.ParseFromString(profile_s)