diff --git a/benchmark/fluid/mnist.py b/benchmark/fluid/mnist.py
index 43866da9cb113e9d49fc1c51f67da94cbc6bfd8e..dc10ac2ec195acc9a5693718141ddb32417dfb71 100644
--- a/benchmark/fluid/mnist.py
+++ b/benchmark/fluid/mnist.py
@@ -139,9 +139,6 @@ def run_benchmark(model, args):
# inference program
inference_program = fluid.default_main_program().clone()
- with fluid.program_guard(inference_program):
- inference_program = fluid.io.get_inference_program(
- target_vars=[batch_acc, batch_size_tensor])
# Optimization
opt = fluid.optimizer.AdamOptimizer(
@@ -161,7 +158,7 @@ def run_benchmark(model, args):
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=args.batch_size)
- accuracy = fluid.average.WeightedAverage()
+ accuracy = fluid.metrics.Accuracy()
iters, num_samples, start_time = 0, 0, time.time()
for pass_id in range(args.pass_num):
accuracy.reset()
@@ -184,7 +181,7 @@ def run_benchmark(model, args):
"label": y_data},
fetch_list=[avg_cost, batch_acc, batch_size_tensor]
) # The accuracy is the accumulation of batches, but not the current batch.
- accuracy.add(value=outs[1], weight=outs[2])
+ accuracy.update(value=outs[1], weight=outs[2])
iters += 1
num_samples += len(y_data)
loss = np.array(outs[0])
diff --git a/cmake/cblas.cmake b/cmake/cblas.cmake
index 6320b17520a687f88993b6f464d9115838b0f96b..52a22c1fbf4779fa3c0ca687cab664bd3ca0410a 100644
--- a/cmake/cblas.cmake
+++ b/cmake/cblas.cmake
@@ -62,29 +62,33 @@ endif()
## Then find the reference-cblas. www.netlib.org/blas/
-
-
set(REFERENCE_CBLAS_ROOT $ENV{REFERENCE_CBLAS_ROOT} CACHE PATH
"Folder contains reference-cblas")
-set(REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS
- ${REFERENCE_CBLAS_ROOT}/include
- /usr/include
- /usr/include/cblas
-)
-
-set(REFERENCE_CBLAS_LIB_SEARCH_PATHS
- ${REFERENCE_CBLAS_ROOT}/lib
- /usr/lib
- /usr/lib/blas/reference/
- /usr/lib/reference/
-)
+if(NOT CMAKE_CROSSCOMPILING)
+ set(REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS
+ ${REFERENCE_CBLAS_ROOT}/include
+ /usr/include
+ /usr/include/cblas
+ )
+
+ set(REFERENCE_CBLAS_LIB_SEARCH_PATHS
+ ${REFERENCE_CBLAS_ROOT}/lib
+ /usr/lib
+ /usr/lib/blas/reference/
+ /usr/lib/reference/
+ )
+else()
+ # Diable the finding of reference cblas under host's system path
+ set(REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS ${REFERENCE_CBLAS_ROOT}/include)
+ set(REFERENCE_CBLAS_LIB_SEARCH_PATHS ${REFERENCE_CBLAS_ROOT}/lib)
+endif()
find_path(REFERENCE_CBLAS_INCLUDE_DIR NAMES cblas.h PATHS
${REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS})
find_library(REFERENCE_CBLAS_LIBRARY NAMES cblas PATHS
${REFERENCE_CBLAS_LIB_SEARCH_PATHS})
-if (REFERENCE_CBLAS_INCLUDE_DIR AND REFERENCE_CBLAS_LIBRARY)
+if(REFERENCE_CBLAS_INCLUDE_DIR AND REFERENCE_CBLAS_LIBRARY)
set(CBLAS_FOUND ON)
set(CBLAS_PROVIDER REFERENCE)
set(CBLAS_INC_DIR ${REFERENCE_CBLAS_INCLUDE_DIR})
diff --git a/cmake/external/grpc.cmake b/cmake/external/grpc.cmake
index 0853b981813c5d60a12603471df7e0b216b0822f..aa249159470773241e0f6da2e8e086264634dd4a 100644
--- a/cmake/external/grpc.cmake
+++ b/cmake/external/grpc.cmake
@@ -24,16 +24,16 @@ SET(GRPC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/grpc)
SET(GRPC_INCLUDE_DIR "${GRPC_INSTALL_DIR}/include/" CACHE PATH "grpc include directory." FORCE)
SET(GRPC_CPP_PLUGIN "${GRPC_INSTALL_DIR}/bin/grpc_cpp_plugin" CACHE FILEPATH "GRPC_CPP_PLUGIN" FORCE)
IF(APPLE)
- SET(BUILD_CMD make -n HAS_SYSTEM_PROTOBUF=false -s -j8 static grpc_cpp_plugin | sed "s/-Werror//g" | sh)
+ SET(BUILD_CMD make -n HAS_SYSTEM_PROTOBUF=false -s -j static grpc_cpp_plugin | sed "s/-Werror//g" | sh)
ELSE()
- SET(BUILD_CMD make HAS_SYSTEM_PROTOBUF=false -s -j8 static grpc_cpp_plugin)
+ SET(BUILD_CMD make HAS_SYSTEM_PROTOBUF=false -s -j static grpc_cpp_plugin)
ENDIF()
ExternalProject_Add(
extern_grpc
DEPENDS protobuf zlib
GIT_REPOSITORY "https://github.com/grpc/grpc.git"
- GIT_TAG "v1.8.x"
+ GIT_TAG "v1.11.x"
PREFIX ${GRPC_SOURCES_DIR}
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
diff --git a/cmake/external/nccl.cmake b/cmake/external/nccl.cmake
deleted file mode 100644
index af5c689c3524741a88518eeb3f85996872257677..0000000000000000000000000000000000000000
--- a/cmake/external/nccl.cmake
+++ /dev/null
@@ -1,67 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-if(NOT WITH_GPU)
- return()
-endif()
-
-include(ExternalProject)
-
-set(NCCL_SOURCE_DIR ${THIRD_PARTY_PATH}/nccl)
-
-include_directories(${NCCL_SOURCE_DIR}/src/extern_nccl/src)
-
-if(WITH_DSO)
- # If we use DSO, we do not build nccl, just download the dependencies
- set(NCCL_BUILD_COMMAND "")
- set(NCCL_INSTALL_COMMAND "")
- set(NCCL_INSTALL_DIR "")
-else()
- # otherwise, we build nccl and link it.
- set(NCCL_INSTALL_DIR ${THIRD_PARTY_PATH}/install/nccl)
- # Note: cuda 8.0 is needed to make nccl
- # When cuda is not installed on the system directory, need to set CUDA_HOME to your cuda root
- set(NCCL_BUILD_COMMAND "make -j 8")
- set(NCCL_INSTALL_COMMAND "make install PREFIX=${NCCL_INSTALL_DIR}")
-endif()
-
-ExternalProject_Add(
- extern_nccl
- ${EXTERNAL_PROJECT_LOG_ARGS}
- GIT_REPOSITORY "https://github.com/NVIDIA/nccl.git"
- GIT_TAG "v1.3.4-1"
- PREFIX "${NCCL_SOURCE_DIR}"
- UPDATE_COMMAND ""
- CONFIGURE_COMMAND ""
- BUILD_COMMAND "${NCCL_BUILD_COMMAND}"
- INSTALL_COMMAND "${NCCL_INSTALL_COMMAND}"
- INSTALL_DIR "${NCCL_INSTALL_DIR}"
- TEST_COMMAND ""
-)
-
-if(WITH_DSO)
- if(${CMAKE_VERSION} VERSION_LESS "3.3.0")
- set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/lib_nccl_dummy.c)
- file(WRITE ${dummyfile} "const char * dummy_nccl = \"${dummyfile}\";")
- add_library(nccl STATIC ${dummyfile})
- else()
- add_library(nccl INTERFACE)
- endif()
-else()
- add_library(nccl STATIC IMPORTED GLOBAL)
- set_property(TARGET nccl PROPERTY IMPORTED_LOCATION
- ${NCCL_INSTALL_DIR}/lib/libnccl_static.a)
-endif()
-
-add_dependencies(nccl extern_nccl)
diff --git a/cmake/external/snappy.cmake b/cmake/external/snappy.cmake
index 71f54c425d4c38e271a8f1b78887d95a27252443..80282329c6ac65fbd1493a6838efca4bd9cadaad 100644
--- a/cmake/external/snappy.cmake
+++ b/cmake/external/snappy.cmake
@@ -11,19 +11,20 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-#
-IF(MOBILE_INFERENCE)
+if(MOBILE_INFERENCE OR RPI)
return()
-ENDIF()
+endif()
include (ExternalProject)
# NOTE: snappy is needed when linking with recordio
-SET(SNAPPY_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy)
-SET(SNAPPY_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy)
-SET(SNAPPY_INCLUDE_DIR "${SNAPPY_INSTALL_DIR}/include/" CACHE PATH "snappy include directory." FORCE)
+set(SNAPPY_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy)
+set(SNAPPY_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy)
+set(SNAPPY_INCLUDE_DIR "${SNAPPY_INSTALL_DIR}/include" CACHE PATH "snappy include directory." FORCE)
+
+set(SNAPPY_LIBRARIES "${SNAPPY_INSTALL_DIR}/lib/libsnappy.a")
ExternalProject_Add(
extern_snappy
@@ -51,8 +52,7 @@ ExternalProject_Add(
)
add_library(snappy STATIC IMPORTED GLOBAL)
-set_property(TARGET snappy PROPERTY IMPORTED_LOCATION
- "${SNAPPY_INSTALL_DIR}/lib/libsnappy.a")
+set_property(TARGET snappy PROPERTY IMPORTED_LOCATION ${SNAPPY_LIBRARIES})
include_directories(${SNAPPY_INCLUDE_DIR})
add_dependencies(snappy extern_snappy)
diff --git a/cmake/external/snappystream.cmake b/cmake/external/snappystream.cmake
index 8f7a3bf8eeaef75c8840f4ea318b484d33249bb7..20a96430823d07a07d4bb4602e7fc0cfe55c3bf2 100644
--- a/cmake/external/snappystream.cmake
+++ b/cmake/external/snappystream.cmake
@@ -11,9 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-#
-IF(MOBILE_INFERENCE)
+IF(MOBILE_INFERENCE OR RPI)
return()
ENDIF()
@@ -21,9 +20,11 @@ include (ExternalProject)
# NOTE: snappy is needed when linking with recordio
-SET(SNAPPYSTREAM_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy_stream)
-SET(SNAPPYSTREAM_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy_stream)
-SET(SNAPPYSTREAM_INCLUDE_DIR "${SNAPPYSTREAM_INSTALL_DIR}/include/" CACHE PATH "snappy stream include directory." FORCE)
+set(SNAPPYSTREAM_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy_stream)
+set(SNAPPYSTREAM_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy_stream)
+set(SNAPPYSTREAM_INCLUDE_DIR "${SNAPPYSTREAM_INSTALL_DIR}/include" CACHE PATH "snappy stream include directory." FORCE)
+
+set(SNAPPYSTREAM_LIBRARIES "${SNAPPYSTREAM_INSTALL_DIR}/lib/libsnappystream.a")
ExternalProject_Add(
extern_snappystream
@@ -51,8 +52,7 @@ ExternalProject_Add(
)
add_library(snappystream STATIC IMPORTED GLOBAL)
-set_property(TARGET snappystream PROPERTY IMPORTED_LOCATION
- "${SNAPPYSTREAM_INSTALL_DIR}/lib/libsnappystream.a")
+set_property(TARGET snappystream PROPERTY IMPORTED_LOCATION ${SNAPPYSTREAM_LIBRARIES})
include_directories(${SNAPPYSTREAM_INCLUDE_DIR}) # For snappysteam to include its own headers.
include_directories(${THIRD_PARTY_PATH}/install) # For Paddle to include snappy stream headers.
diff --git a/cmake/generic.cmake b/cmake/generic.cmake
index c4c9f77df8d57fe162616d2250bd4dfe5b7754e7..1d3e2ade6d393c6e4c37eea0dc1064cdb18808a5 100644
--- a/cmake/generic.cmake
+++ b/cmake/generic.cmake
@@ -195,14 +195,7 @@ function(cc_library TARGET_NAME)
list(REMOVE_ITEM cc_library_DEPS warpctc)
add_dependencies(${TARGET_NAME} warpctc)
endif()
- if("${cc_library_DEPS}" MATCHES "ARCHIVE_START")
- # Support linking flags: --whole-archive (Linux) / -force_load (MacOS).
- # WARNING: Please don't use ARCHIVE_START&ARCHIVE_END if TARGET_NAME will be linked by other libraries.
- target_circle_link_libraries(${TARGET_NAME} ${cc_library_DEPS})
- list(REMOVE_ITEM cc_library_DEPS ARCHIVE_START ARCHIVE_END)
- else()
- target_link_libraries(${TARGET_NAME} ${cc_library_DEPS})
- endif()
+ target_link_libraries(${TARGET_NAME} ${cc_library_DEPS})
add_dependencies(${TARGET_NAME} ${cc_library_DEPS})
endif()
@@ -243,11 +236,7 @@ function(cc_test TARGET_NAME)
set(multiValueArgs SRCS DEPS ARGS)
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${cc_test_SRCS})
- # Support linking flags: --whole-archive (Linux) / -force_load (MacOS)
- target_circle_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog)
- if("${cc_test_DEPS}" MATCHES "ARCHIVE_START")
- list(REMOVE_ITEM cc_test_DEPS ARCHIVE_START ARCHIVE_END)
- endif()
+ target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog)
add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog)
add_test(NAME ${TARGET_NAME}
COMMAND ${TARGET_NAME} ${cc_test_ARGS}
diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake
index 0323cd9698cba916d2aa04403be97c0a6a463830..cc758019827b9a5416a801e4da43d754d4492a73 100644
--- a/cmake/inference_lib.cmake
+++ b/cmake/inference_lib.cmake
@@ -1,7 +1,22 @@
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
set_property(GLOBAL PROPERTY FLUID_MODULES "")
# find all fluid modules is used for paddle fluid static library
function(find_fluid_modules TARGET_NAME)
get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE)
+ string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path ${__target_path})
string(FIND "${__target_path}" "fluid" pos)
if(pos GREATER 1)
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
@@ -77,6 +92,23 @@ elseif (WITH_MKLML)
)
endif()
+if(NOT MOBILE_INFERENCE AND NOT RPI)
+ set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/snappy")
+ copy(snappy_lib
+ SRCS ${SNAPPY_INCLUDE_DIR} ${SNAPPY_LIBRARIES}
+ DSTS ${dst_dir} ${dst_dir}/lib)
+
+ set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/snappystream")
+ copy(snappystream_lib
+ SRCS ${SNAPPYSTREAM_INCLUDE_DIR} ${SNAPPYSTREAM_LIBRARIES}
+ DSTS ${dst_dir} ${dst_dir}/lib)
+
+ set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/zlib")
+ copy(zlib_lib
+ SRCS ${ZLIB_INCLUDE_DIR} ${ZLIB_LIBRARIES}
+ DSTS ${dst_dir} ${dst_dir}/lib)
+endif()
+
# paddle fluid module
set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid")
set(dst_dir "${CMAKE_INSTALL_PREFIX}/paddle/fluid")
diff --git a/doc/fluid/design/motivation/fluid.md b/doc/fluid/design/motivation/fluid.md
index 5e147f8263e685a4665b5793f7127178cbc3cfdd..4b7696cc1bbf57ace72c4d31ffc2bfe6c1071939 100644
--- a/doc/fluid/design/motivation/fluid.md
+++ b/doc/fluid/design/motivation/fluid.md
@@ -119,7 +119,7 @@ An actual Fluid example is described [here](https://github.com/PaddlePaddle/Pad
From the example, the Fluid programs look very similar to their PyTorch equivalent programs, except that Fluid's loop structure, wrapped with Python's `with` statement, could run much faster than just a Python loop.
-We have more examples of the [`if-then-else`](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/if_else_op.md) structure of Fluid.
+We have more examples of the [`if-then-else`](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/execution/if_else_op.md) structure of Fluid.
## Turing Completeness
diff --git a/doc/v2/dev/write_docs_cn.rst b/doc/v2/dev/write_docs_cn.rst
index 23615f8830e99633676c83ec5d28139a732c623c..4231f2bb5cd800c0cd86835b5d07e491fcde4989 100644
--- a/doc/v2/dev/write_docs_cn.rst
+++ b/doc/v2/dev/write_docs_cn.rst
@@ -65,39 +65,55 @@ PaddlePaddle.org工具可以配合Docker使用,需要在系统里先安装好D
不使用PaddlePaddle.org工具
--------------------------
-使用Docker构建PaddlePaddle的文档,需要在系统里先安装好Docker工具包。Docker安装请参考 `Docker的官网 `_ 。安装好Docker之后可以使用源码目录下的脚本构建文档,即
+使用Docker构建PaddlePaddle的文档,需要在系统里先安装好Docker工具包。Docker安装请参考 `Docker的官网 `_ 。该方法与 `从源码编译PaddlePaddle `_ 相似,通过从源码中构建可用于编译PaddlePaddle文档的Docker镜像并运行,在进入Docker容器后使用源码中的脚本构建PaddlePaddle文档,具体步骤如下:
-[TBD]
+.. code-block:: bash
+
+ git clone https://github.com/PaddlePaddle/Paddle.git
+ cd Paddle
+
+ # 从源码中构建可用于编译PaddlePaddle文档的Docker镜像
+ docker build -t paddle:dev .
+ docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" -e "WITH_DOC=ON" paddle:dev /bin/bash
+
+ # 进入Docker容器后使用build.sh脚本构建PaddlePaddle文档
+ bash -x /paddle/paddle/scripts/docker/build.sh
+
+注:上述命令把当前目录(源码根目录)映射为 container 里的 :code:`/paddle` 目录。
+
+编译完成后,会产生 ``doc/v2`` 和 ``doc/fluid`` 两个目录,在这两个目录下分别都生成 ``cn/html/`` 、 ``en/html`` 、 ``api/en/html`` 共三个子目录,分别进入这些目录下,执行以下命令:
+
+.. code-block:: bash
+
+ python -m SimpleHTTPServer 8088
+
+在浏览器中输入 http://localhost:8088 就可以看到编译生成的 ``v2`` 和 ``fluid`` 两种版本的中/英文的文档页面和英文的API页面。
如果不想使用Docker,也可以使用以下命令直接构建PaddlePaddle文档,即
.. code-block:: bash
- mkdir paddle
- cd paddle
git clone https://github.com/PaddlePaddle/Paddle.git
+ cd Paddle
mkdir -p build
cd build
cmake .. -DCMAKE_BUILD_TYPE=Release -DWITH_GPU=OFF -DWITH_MKL=OFF -DWITH_DOC=ON
# 如果只需要构建使用文档,则执行以下命令
- make -j $processors gen_proto_py
- make -j $processors paddle_docs paddle_docs_cn
+ make -j $processors paddle_docs
# 如果只需要构建API,则执行以下命令
- make -j $processors gen_proto_py framework_py_proto
- make -j $processors copy_paddle_pybind
- make -j $processors paddle_api_docs
+ make -j $processors paddle_apis
其中$processors代表启动和CPU核一样多的进程来并行编译,可以根据本机的CPU核数设置相应的值。
-编译完成后,进入 ``doc/v2`` 目录,如果选择构建文档则会在该目录下生成 ``cn/html/`` 、 ``en/html`` 两个子目录,选择构建API则会生成 ``api/en/html`` 目录,分别进入这些目录下,执行以下命令:
+编译完成后,同样会产生 ``doc/v2`` 和 ``doc/fluid`` 两个目录,如果选择构建文档则会在这两个目录下分别都生成 ``cn/html/`` 、 ``en/html`` 两个子目录,选择构建API则会在这两个目录下分别生成 ``api/en/html`` 目录,分别进入这些子目录下,执行以下命令:
.. code-block:: bash
python -m SimpleHTTPServer 8088
-在浏览器中输入 http://localhost:8088 就可以看到编译生成的中/英文的文档页面和英文的API页面,下图为生成的英文文档首页示例。注意,示例中由于使用了sphinx的原始主题,所以页面的风格与官网并不一致,但这并不影响开发者进行调试。
+在浏览器中输入 http://localhost:8088 就可以看到编译生成的 ``v2`` 和 ``fluid`` 两种版本的中/英文的文档页面和英文的API页面。下图为生成的 ``v2`` 英文文档首页示例。注意,示例中由于使用了sphinx的原始主题,所以页面的风格与官网并不一致,但这并不影响开发者进行调试。
.. image:: src/doc_en.png
:align: center
diff --git a/doc/v2/dev/write_docs_en.rst b/doc/v2/dev/write_docs_en.rst
index 15ff0d34ad622f100fe98d8738b830e47c35b41b..6105455e202e4704aa25f0fd9916b9b61a569702 100644
--- a/doc/v2/dev/write_docs_en.rst
+++ b/doc/v2/dev/write_docs_en.rst
@@ -68,39 +68,56 @@ Please `click here `_ on how to install Docker. After Docker is installed, you could use the scripts in the source directory to build the documentation.
+Build PaddlePaddle's documentation with Docker,you need to install Docker first. Please refer to `Docker's official website `_ on how to install Docker. This method is quite similar to ` Build From Sources `_ , by constructing, from source code, a docker image that can be used to build PaddlePaddle documentation. Enter the Docker container and use the script ``build.sh`` in the source directory to build the PaddlePaddle documentation. The specific steps are as follows:
-[TBD]
+.. code-block:: bash
+
+ git clone https://github.com/PaddlePaddle/Paddle.git
+ cd Paddle
+
+ # Construct a docker image from source code
+ docker build -t paddle:dev .
+ docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" -e "WITH_DOC=ON" paddle:dev /bin/bash
+
+ # Use build.sh to build PaddlePaddle documentation
+ bash -x /paddle/paddle/scripts/docker/build.sh
+
+Note: The above commands maps the current directory (source root directory) to the :code:`/paddle` directory in the container.
+
+After compiling, there should be two generated directories: ``doc/v2`` and ``doc/fluid``, where three subdirectories ``cn/html/``, ``en/html`` and ``api/en/html`` are generated. Please enter these directories respectively and execute the following commands:
+
+.. code-block:: bash
+
+ python -m SimpleHTTPServer 8088
+
+Use a web browser and navigate to http://localhost:8000, you could see the compiled ``v2`` 's and ``fluid`` 's Chinese/English documents page and English APIs page.
If you do not wish to use Docker, you can also use the following commands to directly build the PaddlePaddle documentation.
.. code-block:: bash
- mkdir paddle
- cd paddle
+
git clone https://github.com/PaddlePaddle/Paddle.git
+ cd Paddle
mkdir -p build
cd build
cmake .. -DCMAKE_BUILD_TYPE=Release -DWITH_GPU=OFF -DWITH_MKL=OFF -DWITH_DOC=ON
# If you only need to build documents, use the following commands
- make -j $processors gen_proto_py
- make -j $processors paddle_docs paddle_docs_cn
+ make -j $processors paddle_docs
# If you only need to build APIs, use the following commands
- make -j $processors gen_proto_py framework_py_proto
- make -j $processors copy_paddle_pybind
- make -j $processors paddle_api_docs
+ make -j $processors paddle_apis
$processors indicates that as many processes as the CPU cores are started to compile in parallel. It should be set according to the number of CPU cores of your machine.
-After the compilation is complete, enter the ``doc/v2`` directory. If you chose to build documents, it will generate ``cn/html/`` and ``en/html`` subdirectories under this directory. If you chose to build APIs,it will generate``api/en/html`` subdirectory. Please enter these directories respectively and execute the following commands:
+After compiling, there also should be two generated directories: ``doc/v2`` and ``doc/fluid`` . If you chose to build documents, two subdirectories ``cn/html/`` and ``en/html`` will be generated in both two directories. If you chose to build APIs,a subdirectory ``api/en/html`` will be generated. Please enter these directories respectively and execute the following commands:
.. code-block:: bash
python -m SimpleHTTPServer 8088
-Use a web browser and navigate to http://localhost:8000, you could see the compiled Chinese/English documents page and the English APIs page. The following figure is an example of the built English documents home page. Note that due to the sphinx's original theme used in the example, the style of the page is not consistent with the official website, but this does not affect the developer's debugging.
+Use a web browser and navigate to http://localhost:8000, you could see the compiled ``v2`` 's and ``fluid`` 's Chinese/English documents page and English APIs page. The following figure is an example of the built ``v2`` 's English documents home page. Note that due to the sphinx's original theme used in the example, the style of the page is not consistent with the official website, but this does not affect the developer's debugging.
.. image:: src/doc_en.png
:align: center
diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt
index c44f8a8a8ecc1ba1f886fc41aec863b4ca3458a6..8b1ca5e16548334ed0c9a6d31b88e0805304579e 100644
--- a/paddle/CMakeLists.txt
+++ b/paddle/CMakeLists.txt
@@ -24,6 +24,6 @@ if(NOT WITH_FLUID_ONLY)
endif()
add_subdirectory(testing)
-if(NOT MOBILE_INFERENCE AND NOT ANDROID AND NOT IOS)
+if(NOT MOBILE_INFERENCE AND NOT RPI)
add_subdirectory(fluid)
endif()
diff --git a/paddle/fluid/CMakeLists.txt b/paddle/fluid/CMakeLists.txt
index d725763b01d5953985f8e090605f68a8419b5498..d274d96c29bdbf5973d568d783369c3975bdc436 100644
--- a/paddle/fluid/CMakeLists.txt
+++ b/paddle/fluid/CMakeLists.txt
@@ -3,6 +3,7 @@ add_subdirectory(platform)
add_subdirectory(framework)
add_subdirectory(operators)
add_subdirectory(pybind)
-add_subdirectory(inference)
add_subdirectory(string)
add_subdirectory(recordio)
+# NOTE: please add subdirectory inference at last.
+add_subdirectory(inference)
diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt
index 3840bbe83b68dc2a49aa73feb57a80e9992cad5f..1f3ca24df16cf080d325fbdc0d613a828e384b2a 100644
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@@ -79,14 +79,12 @@ add_custom_command(TARGET framework_py_proto POST_BUILD
COMMENT "Copy generated python proto into directory paddle/fluid/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
-cc_library(backward SRCS backward.cc DEPS net_op)
-cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context fill_constant_op)
cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor)
cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog)
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope
-framework_proto backward glog lod_rank_table feed_fetch_method)
+framework_proto glog lod_rank_table feed_fetch_method)
cc_library(parallel_executor SRCS parallel_executor.cc DEPS multi_devices_graph_builder threaded_ssa_graph_executor)
diff --git a/paddle/fluid/framework/backward.cc b/paddle/fluid/framework/backward.cc
deleted file mode 100644
index 1314af2b3dab281bd201e6a77bfbe87e0bd58ffb..0000000000000000000000000000000000000000
--- a/paddle/fluid/framework/backward.cc
+++ /dev/null
@@ -1,585 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/framework/backward.h"
-#include "paddle/fluid/operators/net_op.h"
-
-#include
-#include
-#include
-#include
-
-#include "paddle/fluid/framework/block_desc.h"
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/operators/net_op.h"
-
-namespace paddle {
-namespace framework {
-
-static std::unordered_set* g_ctrl_flow_ops_ = nullptr;
-// Control Flow operators's backward is significantly different from
-// computational operators. Hack Code here.
-// We should design a better way to backward CtrlFlowOps.
-static std::unordered_set& CtrlFlowOps() {
- if (g_ctrl_flow_ops_ == nullptr) {
- g_ctrl_flow_ops_ = new std::unordered_set{
- "increment", "lod_rank_table", "less_than"};
- }
- return *g_ctrl_flow_ops_;
-}
-
-static inline std::unique_ptr CreateGradOp(
- const OperatorBase& op, const std::unordered_set& no_grad_set,
- std::unordered_map* grad_to_var) {
- OpDesc op_desc;
- op_desc.SetInputMap(op.Inputs());
- op_desc.SetOutputMap(op.Outputs());
- op_desc.SetType(op.Type());
- op_desc.SetAttrMap(op.Attrs());
- auto& info = OpInfoMap::Instance().Get(op.Type());
- auto grad_descs = info.GradOpMaker()(op_desc, no_grad_set, grad_to_var, {});
- std::vector> grad_ops;
- grad_ops.reserve(grad_descs.size());
- std::transform(grad_descs.begin(), grad_descs.end(),
- std::back_inserter(grad_ops),
- [](const std::unique_ptr& grad_desc) {
- return OpRegistry::CreateOp(*grad_desc);
- });
- PADDLE_ENFORCE(!grad_ops.empty());
- if (grad_ops.size() == 1) {
- return std::move(grad_ops[0]);
- } else {
- auto net_op = new operators::NetOp();
- for (auto& grad_op : grad_ops) {
- net_op->AppendOp(std::move(grad_op));
- }
- net_op->CompleteAddOp();
- return std::unique_ptr(net_op);
- }
-}
-
-template
-static void ForEachVarName(const Map& names, T callback) {
- for (auto& name : names) {
- for (auto& n : name.second) {
- if (callback(n)) return;
- }
- }
-}
-
-// return whether all the names + suffixes in the set
-static bool AllInSet(
- const std::map>& names,
- const std::string& suffix, const std::unordered_set& set) {
- bool all_in_set = true;
- ForEachVarName(names, [&all_in_set, &set, &suffix](const std::string& n) {
- all_in_set = set.find(n + suffix) != set.end();
- return !all_in_set;
- });
- return all_in_set;
-}
-
-static std::unique_ptr NOP() {
- auto net_op = new operators::NetOp();
- net_op->SetType("@NOP@");
- net_op->CompleteAddOp();
- return std::unique_ptr(net_op);
-}
-
-// Get backward operator from a forward operator, a recursive implementation.
-//
-// no_grad_names the gradient variable names without gradient calculating.
-//
-// uniq_id is a unique index used inside recursively calling
-// BackwardRecursive. use `uid = uniq_id++;` to get the unique index, and
-// pass `uniq_id` through recursive calling.
-//
-// returns The backward operator. In a simple situation, it may be a simple
-// operator, in a complex situation, it maybe a NetOp.
-//
-// See Backward.h for details
-static std::unique_ptr BackwardRecursive(
- const OperatorBase& forwardOp,
- std::unordered_set& no_grad_names,
- std::unordered_map* grad_to_var,
- size_t& uniq_id) {
- // If all input gradients of forwarding operator do not need to calculate,
- // just return an NOP. Not return null ptr because NOP does not take
- // too much time for calculation, but it is useful for simplifying logic.
- if (AllInSet(forwardOp.Inputs() /*names*/, kGradVarSuffix /*suffix*/,
- no_grad_names /*set*/)) {
- return NOP();
- }
-
- // All output gradients of forwarding operator do not need to calculate.
- // Then all input gradients cannot be computed at all, and we put them into
- // `no_grad_names` set. Return an NOP.
- if (AllInSet(forwardOp.Outputs() /*names*/, kGradVarSuffix /*suffix*/,
- no_grad_names /*set*/)) {
- ForEachVarName(forwardOp.Inputs(),
- [&no_grad_names](const std::string& name) -> bool {
- no_grad_names.insert(GradVarName(name));
- return false;
- });
- return NOP();
- }
-
- // Returned gradient network
- auto net = std::unique_ptr(new operators::NetOp());
-
- if (forwardOp.IsNetOp()) {
- // Because forwardOp is a net op, it can static_cast.
- auto& forwardNet = static_cast(forwardOp);
-
- // Map from output gradient variable name to operator's indices in
- // backward net's ops_. That operator generates that variable.
- std::unordered_map> dup_output_ops;
-
- size_t local_op_id = 0;
- // reversely travel forwardNet and collect all duplicate outputs.
- for (auto it = forwardNet.ops_.rbegin(); it != forwardNet.ops_.rend();
- ++it, ++local_op_id) {
- auto& fwd = *it;
- auto bwd = BackwardRecursive(*fwd, no_grad_names, grad_to_var, uniq_id);
- ForEachVarName(bwd->Outputs(),
- [&dup_output_ops, local_op_id](const std::string& out) {
- dup_output_ops[out].emplace_back(local_op_id);
- return false;
- });
- net->AppendOp(std::move(bwd));
- }
- // Get unique ID for this method.
- auto uid = uniq_id++;
- // TODO(dzh): more comment
- // multiple operators which have the same output (y for example) may
- // overwrite the same y variable when backward, special operations are token
- // to handle this case. For each duplicate output, rename it to an alias
- // (original name with a offset), append an `add` op for its operator,
- // and finally sum all the alias variable to the final output variable y.
- using Pos = std::pair>;
- std::list insert_position;
- for (auto& dup_output_op : dup_output_ops) {
- const std::string& name = dup_output_op.first;
- // duplicate @Empty@ don't need to be added
- if (name == kEmptyVarName) continue;
-
- auto& dup_op = dup_output_op.second;
- // no duplicate output
- if (dup_op.size() == 1) continue;
-
- // process the duplicate outputs
- std::vector dup_outputs;
- for (size_t i = 0; i < dup_op.size(); ++i) {
- // rename each duplicate output to an alias
- auto op_offset = dup_op[i];
- dup_outputs.push_back(name + "@RENAME@" + std::to_string(uid) + "@" +
- std::to_string(i));
- net->ops_[op_offset]->Rename(name, dup_outputs.back());
- }
- // collect all the offset for each alias,
- // insert a sum operator to add all aliases to output
- insert_position.push_back(
- {dup_op.back(),
- OpRegistry::CreateOp("sum", {{"X", dup_outputs}}, {{"Out", {name}}},
- AttributeMap{})});
- }
-
- // make sure the inserted `sum` ops follow the BFS order.
- insert_position.sort(
- [](const Pos& l, const Pos& r) { return l.first > r.first; });
-
- for (auto& pos : insert_position) {
- net->InsertOp(pos.first + 1, std::move(pos.second));
- }
- } else {
- std::unique_ptr grad_op(
- CreateGradOp(forwardOp, no_grad_names, grad_to_var));
-
- ForEachVarName(grad_op->Inputs(), [&no_grad_names, &net, &grad_op](
- const std::string& grad_input) {
- if (no_grad_names.count(grad_input)) {
- // +1 for \0
- std::string prefix = grad_input.substr(
- 0, grad_input.size() - sizeof(kGradVarSuffix) / sizeof(char) + 1);
- grad_op->Rename(grad_input, prefix + kZeroVarSuffix);
-
- // If part of input gradient of that operator is not calculated, fill
- // zero variables to that input gradient.
- net->AppendOp(OpRegistry::CreateOp("fill_zeros_like", {{"X", {prefix}}},
- {{"Out", {grad_input}}},
- AttributeMap{}));
- }
- return false;
- });
-
- ForEachVarName(grad_op->Outputs(),
- [&no_grad_names, &grad_op](const std::string& grad_output) {
- if (no_grad_names.count(grad_output)) {
- grad_op->Rename(grad_output, kEmptyVarName);
- }
- return false;
- });
-
- if (net->ops_.empty()) { // Current no aux op is added to network
- return grad_op;
- }
- net->AppendOp(std::move(grad_op));
- }
- net->SetType("@GENERATED_BACKWARD@");
- net->CompleteAddOp();
- return std::unique_ptr(
- static_cast(net.release()));
-}
-
-// See header for comments
-std::unique_ptr Backward(
- const OperatorBase& forwardOp,
- const std::unordered_set& no_grad_vars) {
- std::unordered_set no_grad_names;
- no_grad_names.reserve(no_grad_vars.size() + 1);
-
- no_grad_names.insert(std::string(kEmptyVarName) + kGradVarSuffix);
-
- for (auto& name : no_grad_vars) {
- no_grad_names.insert(name + kGradVarSuffix);
- }
- size_t uid = 0;
- std::unordered_map grad_to_var;
- return BackwardRecursive(forwardOp, no_grad_names, &grad_to_var, uid);
-}
-
-// ==================================== //
-
-static bool AllGradInSet(const std::vector& names,
- const std::unordered_set& set) {
- for (const std::string& name : names) {
- if (!set.count(GradVarName(name))) {
- return false;
- }
- }
- if (VLOG_IS_ON(10)) {
- std::ostringstream sout;
- sout << "All input {";
- for (auto& name : names) {
- sout << name << ",";
- }
- sout << "} is in {";
- for (auto& name : set) {
- sout << name << ",";
- }
- sout << "}";
- VLOG(10) << sout.str();
- }
- return true;
-}
-
-static std::string FwdName(const std::string& grad_name) {
- auto pos = grad_name.find("@GRAD");
- if (pos == std::string::npos) {
- return "";
- } else {
- return grad_name.substr(0, pos);
- }
-}
-
-static void CreateGradVarInBlock(
- size_t grad_op_start_index,
- const std::unordered_map& param_name_map,
- BlockDesc* block_desc,
- std::unordered_map* grad_var_record) {
- auto ops = block_desc->AllOps();
- for (size_t op_index = grad_op_start_index; op_index < ops.size();
- ++op_index) {
- std::unordered_set new_vars;
- auto& ctrl_flow_ops = CtrlFlowOps();
- ForEachVarName(ops[op_index]->Outputs(),
- [&](const std::string& grad_var_name) {
- if (ctrl_flow_ops.find(ops[op_index]->Type()) !=
- ctrl_flow_ops.end()) {
- if (block_desc->HasVarRecursive(grad_var_name)) {
- return false;
- }
- } else {
- if (block_desc->HasVar(grad_var_name)) {
- return false;
- }
- }
- if (grad_var_name == framework::kEmptyVarName) {
- return false;
- }
- auto var = block_desc->Var(grad_var_name);
- VLOG(10) << "Creating Variable " << grad_var_name;
- new_vars.insert(var->Name());
- auto it = param_name_map.find(grad_var_name);
- if (it == param_name_map.end()) {
- return false;
- }
- auto param_var_name = it->second;
- auto& grad_record = (*grad_var_record)[param_var_name];
- grad_record.name_ = grad_var_name;
- grad_record.block_idx_ = block_desc->ID();
- grad_record.op_idx_ = static_cast(op_index);
- return false; /* not break */
- });
- ops[op_index]->InferVarType(block_desc);
- for (auto& arg : ops[op_index]->OutputArgumentNames()) {
- if (new_vars.find(arg) == new_vars.end()) {
- continue;
- }
- auto pname = FwdName(arg);
- auto* param = block_desc->FindVarRecursive(pname);
- auto* grad = block_desc->FindVar(arg);
- if (param == nullptr) {
- grad->SetDataType(proto::VarType::FP32);
- } else {
- grad->SetDataType(param->GetDataType());
- }
- }
- ops[op_index]->InferShape(*block_desc);
- }
-}
-
-std::vector> MakeOpGrad(
- const OpDesc* op_desc, std::unordered_set* no_grad_vars,
- std::unordered_map* grad_to_var,
- const std::vector& grad_block = std::vector()) {
- std::vector> grad_op_descs;
- // All input gradients of forwarding operator do not need to calculate.
- const std::vector& inputs = op_desc->InputArgumentNames();
- if (AllGradInSet(inputs, *no_grad_vars)) {
- VLOG(10) << "Drop operator " << op_desc->Type();
- return grad_op_descs; // empty vector
- }
-
- // All output gradients of forwarding operator do not need to calculate.
- const std::vector& outputs = op_desc->OutputArgumentNames();
-
- if (AllGradInSet(outputs, *no_grad_vars)) {
- VLOG(10) << "Drop operator " << op_desc->Type();
- // FIXME: Hack code here
- auto& ctrl_flow_ops = CtrlFlowOps();
- if (ctrl_flow_ops.find(op_desc->Type()) == ctrl_flow_ops.end()) {
- // Only computational op need drop input's gradient.
- for (const std::string& name : inputs) {
- no_grad_vars->insert(GradVarName(name));
- VLOG(10) << " Also drop " << GradVarName(name);
- }
- }
-
- return grad_op_descs; // empty vector
- }
-
- grad_op_descs =
- OpInfoMap::Instance()
- .Get(op_desc->Type())
- .GradOpMaker()(*op_desc, *no_grad_vars, grad_to_var, grad_block);
-
- std::list> pending_fill_zeros_ops;
- for (auto& desc : grad_op_descs) {
- for (const std::string& in_name : desc->InputArgumentNames()) {
- if (no_grad_vars->count(in_name)) {
- std::string prefix = in_name.substr(
- 0, in_name.size() - sizeof(kGradVarSuffix) / sizeof(char) + 1);
- std::string new_name = prefix + kZeroVarSuffix;
- desc->Rename(in_name, new_name);
- std::unique_ptr fill_zeros_op(
- new OpDesc("fill_zeros_like", {{"X", {prefix}}},
- {{"Out", {new_name}}}, AttributeMap{}));
- pending_fill_zeros_ops.push_back(std::move(fill_zeros_op));
- }
- }
- }
-
- for (auto& p : pending_fill_zeros_ops) {
- grad_op_descs.insert(grad_op_descs.begin(), std::move(p));
- }
- return grad_op_descs;
-}
-
-static BlockDesc* CreateStepBlock(
- ProgramDesc& program_desc, std::unordered_set* no_grad_vars,
- std::unordered_map* grad_to_var,
- int step_block_idx);
-
-std::vector> MakeBlockBackward(
- ProgramDesc& program_desc, int block_idx,
- std::unordered_set* no_grad_vars,
- std::unordered_map* grad_to_var) {
- VLOG(5) << "MakeBlockBackward";
- BlockDesc* cur_block = program_desc.MutableBlock(block_idx);
- std::vector op_descs = cur_block->AllOps();
- std::unordered_map> dup_out_ops;
- size_t grad_desc_idx = 0;
- std::vector> backward_descs;
-
- for (auto it = op_descs.rbegin(); it != op_descs.rend(); ++it) {
- VLOG(5) << "Making backward " << (*it)->Type() << " op";
- std::vector> op_grads;
-
- if ((*it)->Type() == "recurrent" || (*it)->Type() == "while" ||
- (*it)->Type() == "parallel_do") {
- int step_block_idx = (*it)->GetBlockAttr("sub_block");
- BlockDesc* backward_block = CreateStepBlock(program_desc, no_grad_vars,
- grad_to_var, step_block_idx);
- op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var, {backward_block});
- } else if ((*it)->Type() == "conditional_block") {
- BlockDesc* backward_block =
- CreateStepBlock(program_desc, no_grad_vars, grad_to_var,
- (*it)->GetBlockAttr("sub_block"));
- op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var, {backward_block});
- } else {
- op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var);
- }
-
- if (VLOG_IS_ON(10)) {
- std::ostringstream sout;
- sout << "Made ";
- for (auto& op_grad : op_grads) {
- sout << op_grad->Type() << " ";
- }
- VLOG(10) << sout.str();
- }
-
- for (const auto& desc : op_grads) {
- for (const std::string& out_name : desc->OutputArgumentNames()) {
- if (out_name.find("@GRAD") == std::string::npos) {
- // Not all outputs of a backward operator is a gradient. Only gradient
- // need to be sum. Skip variables are not gradient.
- continue;
- }
- dup_out_ops[out_name].emplace_back(grad_desc_idx);
- }
- ++grad_desc_idx;
- }
- std::transform(op_grads.begin(), op_grads.end(),
- std::back_inserter(backward_descs),
- [](std::unique_ptr& ptr) { return std::move(ptr); });
- }
-
- VLOG(5) << "Appending Sums";
- // Check whether some variables are written more than once
- std::list>> pending_sum_ops;
- for (const auto& dup : dup_out_ops) {
- const std::string& out_name = dup.first;
- const std::vector dup_op = dup.second;
- if (out_name != kEmptyVarName && dup_op.size() > 1) {
- std::vector sum_op_inputs;
- std::string next_g_name = out_name;
- for (size_t i = 0; i < dup_op.size(); ++i) {
- VLOG(10) << backward_descs[dup_op[i]]->Type() << " has " << out_name
- << " duplicated";
- std::string new_name = out_name + "@RENAME@" + std::to_string(i);
- backward_descs[dup_op[i]]->RenameOutput(out_name, new_name);
- backward_descs[dup_op[i]]->RenameInput(out_name, next_g_name);
- sum_op_inputs.emplace_back(new_name);
- next_g_name = sum_op_inputs.back();
- }
- std::unique_ptr sum_op(new OpDesc("sum", {{"X", sum_op_inputs}},
- {{"Out", {out_name}}},
- AttributeMap{}));
- pending_sum_ops.push_back({dup_op.back(), std::move(sum_op)});
- }
- }
-
- pending_sum_ops.sort([](const std::pair>& a,
- const std::pair>& b) {
- return a.first > b.first;
- });
- for (auto& p : pending_sum_ops) {
- backward_descs.insert(backward_descs.begin() + p.first + 1,
- std::move(p.second));
- }
-
- VLOG(5) << "MakeBlockBackward Finished";
-
- return backward_descs;
-}
-
-static BlockDesc* CreateStepBlock(
- ProgramDesc& program_desc, std::unordered_set* no_grad_vars,
- std::unordered_map* grad_to_var,
- int step_block_idx) {
- auto backward_block_op_descs = MakeBlockBackward(program_desc, step_block_idx,
- no_grad_vars, grad_to_var);
- BlockDesc* backward_block =
- program_desc.AppendBlock(*program_desc.MutableBlock(step_block_idx));
- for (auto& ptr : backward_block_op_descs) {
- backward_block->AppendAllocatedOp(move(ptr));
- }
- return backward_block;
-}
-
-ParamGradInfoMap AppendBackward(
- ProgramDesc& program_desc, const VarDesc& target,
- const std::unordered_set& no_grad_vars) {
- std::unordered_set no_grad_var_names;
- no_grad_var_names.reserve(no_grad_vars.size() + 1);
- no_grad_var_names.insert(std::string(kEmptyVarName) + kGradVarSuffix);
- for (auto& name : no_grad_vars) {
- no_grad_var_names.insert(GradVarName(name));
- }
-
- const int root_block_idx = 0;
- auto root_block = program_desc.MutableBlock(root_block_idx);
-
- std::string fill_one_op_out = GradVarName(target.Name());
- bool is_scalar = target.GetShape() == std::vector{1};
- PADDLE_ENFORCE(is_scalar, "target should be scalar");
- VLOG(3) << "backward from loss=" << target.Name()
- << " data_type=" << target.GetDataType();
- std::unique_ptr fill_one_op(
- new OpDesc("fill_constant", {}, {{"Out", {fill_one_op_out}}},
- {{"shape", std::vector{1}},
- {"value", static_cast(1.0)},
- {"dtype", target.GetDataType()}}));
- // infer var type of fill_one_op
- fill_one_op->InferVarType(root_block);
-
- root_block->AppendAllocatedOp(std::move(fill_one_op));
- size_t forward_op_num = root_block->OpSize();
- size_t forward_block_num = program_desc.Size();
-
- // Insert backward operators
- std::unordered_map grad_to_var;
- auto backward_op_descs = MakeBlockBackward(program_desc, root_block_idx,
- &no_grad_var_names, &grad_to_var);
-
- for (auto& ptr : backward_op_descs) {
- root_block->AppendAllocatedOp(std::move(ptr));
- }
- // Create Variable
-
- // Create target gradient variable
- std::unordered_map retv;
-
- auto var = root_block->Var(fill_one_op_out);
- var->SetDataType(target.GetDataType());
- var->SetShape(target.GetShape());
- auto& target_grad = retv[target.Name()];
- target_grad.name_ = fill_one_op_out;
- target_grad.block_idx_ = root_block_idx;
- target_grad.op_idx_ = static_cast(forward_op_num);
-
- // create grad_var for all blocks in this program
- CreateGradVarInBlock(forward_op_num, grad_to_var, root_block, &retv);
- for (size_t block_index = forward_block_num;
- block_index < program_desc.Size(); ++block_index) {
- CreateGradVarInBlock(0, grad_to_var, program_desc.MutableBlock(block_index),
- &retv);
- }
- return retv;
-}
-
-} // namespace framework
-} // namespace paddle
diff --git a/paddle/fluid/framework/backward.h b/paddle/fluid/framework/backward.h
deleted file mode 100644
index 3a971090c25c85efbf976532c364371baba9a870..0000000000000000000000000000000000000000
--- a/paddle/fluid/framework/backward.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#include
-#include
-#include
-
-#include "paddle/fluid/framework/operator.h"
-#include "paddle/fluid/framework/program_desc.h"
-
-namespace paddle {
-namespace framework {
-
-// Create the backward operator from a forward operator.
-// TODO(yuyang18): Add more API reference comment.
-extern std::unique_ptr Backward(
- const OperatorBase& forwardOp,
- const std::unordered_set& no_grad_vars);
-
-struct GradVarInfo {
- GradVarInfo() {}
- GradVarInfo(const std::string& name, int block_idx, int op_idx)
- : name_(name), block_idx_(block_idx), op_idx_(op_idx) {}
-
- bool operator==(const GradVarInfo& b) const {
- return name_ == b.name_ && block_idx_ == b.block_idx_ &&
- op_idx_ == b.op_idx_;
- }
-
- std::string name_;
- int block_idx_;
- int op_idx_;
-};
-
-using ParamGradInfoMap = std::unordered_map;
-
-ParamGradInfoMap AppendBackward(
- ProgramDesc& program_desc, const VarDesc& target,
- const std::unordered_set& no_grad_vars);
-
-} // namespace framework
-} // namespace paddle
diff --git a/paddle/fluid/framework/backward_test.cc b/paddle/fluid/framework/backward_test.cc
deleted file mode 100644
index cc1f871360ed3f7071364dbb0f932bfd997cadb0..0000000000000000000000000000000000000000
--- a/paddle/fluid/framework/backward_test.cc
+++ /dev/null
@@ -1,918 +0,0 @@
-// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/fluid/framework/backward.h"
-
-#include
-#include "paddle/fluid/framework/block_desc.h"
-#include "paddle/fluid/framework/op_desc.h"
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/framework/var_desc.h"
-#include "paddle/fluid/operators/net_op.h"
-
-USE_NO_KERNEL_OP(fill_constant);
-
-namespace paddle {
-namespace framework {
-
-using DeviceContext = platform::DeviceContext;
-
-class NoneOp : public framework::OperatorWithKernel {
- public:
- using framework::OperatorWithKernel::OperatorWithKernel;
-
- protected:
- void InferShape(framework::InferShapeContext *ctx) const override {}
-};
-
-template
-class NoneKernel : public framework::OpKernel {
- public:
- void Compute(const framework::ExecutionContext &context) const override {}
-};
-
-class RowWiseAddOpMaker : public OpProtoAndCheckerMaker {
- public:
- RowWiseAddOpMaker(OpProto *proto, OpAttrChecker *op_checker)
- : OpProtoAndCheckerMaker(proto, op_checker) {
- AddInput("X", "Input X of Add");
- AddInput("b", "Bias of Add");
- AddOutput("Out", "Out of Add");
- AddComment("Add Op");
- }
-};
-
-class RowWiseAddGradMaker : public SingleGradOpDescMaker {
- public:
- using SingleGradOpDescMaker::SingleGradOpDescMaker;
-
- protected:
- std::unique_ptr Apply() const override {
- auto grad_op = new OpDesc();
- grad_op->SetInput(GradVarName("Out"), OutputGrad("Out"));
- grad_op->SetOutput(GradVarName("X"), InputGrad("X"));
- grad_op->SetOutput(GradVarName("b"), InputGrad("b"));
- grad_op->SetType("rowwise_add_grad");
- return std::unique_ptr(grad_op);
- }
-};
-
-class MulOpMaker : public OpProtoAndCheckerMaker {
- public:
- MulOpMaker(OpProto *proto, OpAttrChecker *op_checker)
- : OpProtoAndCheckerMaker(proto, op_checker) {
- AddInput("X", "A");
- AddInput("Y", "B");
- AddOutput("Out", "Out");
- AddAttr("x_num_col_dims", "").SetDefault(1).EqualGreaterThan(1);
- AddAttr("y_num_col_dims", "").SetDefault(1).EqualGreaterThan(1);
- AddComment("Mul");
- }
-};
-
-class SigmoidOpMaker : public OpProtoAndCheckerMaker {
- public:
- SigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker)
- : OpProtoAndCheckerMaker(proto, op_checker) {
- AddInput("X", "X");
- AddOutput("Out", "Y");
- AddComment("Sigmoid");
- }
-};
-
-class NoGradOpMaker : public OpProtoAndCheckerMaker {
- public:
- NoGradOpMaker(OpProto *proto, OpAttrChecker *op_checker)
- : OpProtoAndCheckerMaker(proto, op_checker) {
- AddInput("X", "X input");
- AddOutput("Out", "Y output");
- AddComment("NoGradOp, same input output. no Grad");
- }
-};
-
-class FcOp : public operators::NetOp {
- public:
- FcOp(const std::string &type, const VariableNameMap &inputs,
- const VariableNameMap &outputs, const AttributeMap &attrs)
- : NetOp(type, inputs, outputs, attrs) {
- AppendOp(OpRegistry::CreateOp(
- "mul", {{"X", {Input("X")}}, {"Y", {Input("W")}}},
- {{"Out", {Output("mul_result")}}}, AttributeMap{}));
- auto input_b = Inputs("b");
- std::string before_act = "mul_result";
- if (input_b.size() != 0) {
- AppendOp(OpRegistry::CreateOp(
- "rowwise_add", {{"X", {Output("mul_result")}}, {"b", {input_b[0]}}},
- {{"Out", {Output("add_result")}}}, AttributeMap{}));
- before_act = "add_result";
- } else {
- auto out_varname = Output("add_result");
- if (out_varname != kEmptyVarName) {
- this->Rename(out_varname, kEmptyVarName);
- }
- }
-
- AppendOp(OpRegistry::CreateOp("sigmoid", {{"X", {Output(before_act)}}},
- {{"Out", {Output("Out")}}}, AttributeMap{}));
- CompleteAddOp(false);
- }
-};
-
-class FcOpMaker : public OpProtoAndCheckerMaker {
- public:
- FcOpMaker(OpProto *proto, OpAttrChecker *op_checker)
- : OpProtoAndCheckerMaker(proto, op_checker) {
- AddInput("X", "x");
- AddInput("W", "w");
- AddInput("b", "b");
- AddOutput("mul_result", "").AsIntermediate();
- AddOutput("add_result", "").AsIntermediate();
- AddOutput("Out", "");
- AddComment("");
- }
-};
-
-class ManyOutputOpMaker : public OpProtoAndCheckerMaker {
- public:
- ManyOutputOpMaker(OpProto *proto, OpAttrChecker *op_checker)
- : OpProtoAndCheckerMaker(proto, op_checker) {
- AddInput("x", "x");
- AddOutput("y", "y");
- AddOutput("z", "z");
- AddComment("");
- }
-};
-
-class FillZeroOpMaker : public OpProtoAndCheckerMaker {
- public:
- FillZeroOpMaker(OpProto *proto, OpAttrChecker *op_checker)
- : OpProtoAndCheckerMaker(proto, op_checker) {
- AddInput("X", "x");
- AddOutput("Out", "out");
- AddComment("");
- }
-};
-
-class SumOpMaker : public framework::OpProtoAndCheckerMaker {
- public:
- SumOpMaker(OpProto *proto, OpAttrChecker *op_checker)
- : OpProtoAndCheckerMaker(proto, op_checker) {
- AddInput("X", "the input tensors of sum operator.").AsDuplicable();
- AddOutput("Out", "the output tensor of sum operator.");
- AddComment("");
- }
-};
-
-class MultInOutOpMaker : public OpProtoAndCheckerMaker {
- public:
- MultInOutOpMaker(OpProto *proto, OpAttrChecker *op_checker)
- : OpProtoAndCheckerMaker(proto, op_checker) {
- AddInput("X", "x");
- AddInput("H", "h");
- AddOutput("Y", "y");
- AddOutput("Z", "z");
- AddComment("");
- }
-};
-
-class MinusGradOpDescMaker : public GradOpDescMakerBase {
- public:
- using GradOpDescMakerBase::GradOpDescMakerBase;
-
- std::vector> operator()() const override {
- std::vector> retv;
- auto x_g = InputGrad("X");
- if (!x_g.empty()) {
- auto *op_desc = new OpDesc();
- op_desc->SetType("scale");
- op_desc->SetInput("X", OutputGrad("Out"));
- op_desc->SetOutput("Out", x_g);
- op_desc->SetAttr("scale", 1.0f);
- retv.emplace_back(op_desc);
- }
-
- auto y_g = InputGrad("Y");
- if (!y_g.empty()) {
- auto *op_desc = new OpDesc();
- op_desc->SetType("scale");
- op_desc->SetInput("X", OutputGrad("Out"));
- op_desc->SetOutput("Out", y_g);
- op_desc->SetAttr("scale", -1.0f);
- retv.emplace_back(op_desc);
- }
- return retv;
- }
-};
-
-class MinusOpMaker : public OpProtoAndCheckerMaker {
- public:
- MinusOpMaker(OpProto *proto, OpAttrChecker *op_checker)
- : OpProtoAndCheckerMaker(proto, op_checker) {
- AddInput("X", "");
- AddInput("Y", "");
- AddOutput("Out", "");
- AddComment("minus for unittest");
- }
-};
-} // namespace framework
-} // namespace paddle
-
-namespace f = paddle::framework;
-namespace ops = paddle::operators;
-using EnforceNotMet = paddle::platform::EnforceNotMet;
-// rowwise_add
-REGISTER_OPERATOR(rowwise_add, f::NoneOp, f::RowWiseAddOpMaker,
- f::RowWiseAddGradMaker);
-REGISTER_OP_CPU_KERNEL(rowwise_add,
- f::NoneKernel);
-REGISTER_OPERATOR(rowwise_add_grad, f::NoneOp);
-REGISTER_OP_CPU_KERNEL(rowwise_add_grad,
- f::NoneKernel);
-// mul
-REGISTER_OP(mul, f::NoneOp, f::MulOpMaker, mul_grad, f::NoneOp);
-REGISTER_OP_CPU_KERNEL(mul, f::NoneKernel);
-REGISTER_OP_CPU_KERNEL(mul_grad,
- f::NoneKernel);
-// sigmoid
-REGISTER_OP(sigmoid, f::NoneOp, f::SigmoidOpMaker, sigmoid_grad, f::NoneOp);
-REGISTER_OP_CPU_KERNEL(sigmoid,
- f::NoneKernel);
-REGISTER_OP_WITHOUT_GRADIENT(nograd, f::NoneOp, f::NoGradOpMaker);
-// fill_zeros_like
-REGISTER_OP_WITHOUT_GRADIENT(fill_zeros_like, f::NoneOp, f::FillZeroOpMaker);
-REGISTER_OP_CPU_KERNEL(fill_zeros_like,
- f::NoneKernel);
-// sum
-REGISTER_OP(sum, f::NoneOp, f::SumOpMaker, sum_grad, f::NoneOp);
-REGISTER_OP_CPU_KERNEL(sum, f::NoneKernel);
-REGISTER_OP_CPU_KERNEL(sum_grad,
- f::NoneKernel);
-// fc
-REGISTER_OP_WITHOUT_GRADIENT(fc, f::FcOp, f::FcOpMaker);
-// many_output_op
-REGISTER_OP(many_output_op, f::NoneOp, f::ManyOutputOpMaker,
- many_output_op_grad, f::NoneOp);
-// mult_in_out
-REGISTER_OP(mult_in_out, f::NoneOp, f::MultInOutOpMaker, mult_in_out_grad,
- f::NoneOp);
-REGISTER_OP_CPU_KERNEL(mult_in_out,
- f::NoneKernel);
-REGISTER_OP_CPU_KERNEL(mult_in_out_grad,
- f::NoneKernel);
-// minus
-REGISTER_OPERATOR(minus, f::NoneOp, f::MinusOpMaker, f::MinusGradOpDescMaker);
-REGISTER_OP_CPU_KERNEL(minus, f::NoneKernel);
-// scale
-REGISTER_OPERATOR(scale, f::NoneOp);
-REGISTER_OP_CPU_KERNEL(scale, f::NoneKernel);
-
-TEST(Backward, simple_op_not_need_grad) {
- auto fwd =
- f::OpRegistry::CreateOp("rowwise_add", {{"X", {"x"}}, {"b", {"b"}}},
- {{"Out", {"out"}}}, f::AttributeMap{});
- ASSERT_NE(fwd, nullptr);
- auto gop = f::Backward(*fwd, {"x"});
- ASSERT_EQ(gop->Output(f::GradVarName("X")), f::kEmptyVarName);
-
- auto no_input_gop = f::Backward(*fwd, {"x", "b"});
- ASSERT_NE(no_input_gop, nullptr);
- ASSERT_TRUE(no_input_gop->IsNetOp());
- ASSERT_EQ(0UL, static_cast(no_input_gop.get())->ops_.size());
-}
-
-TEST(Backward, net_fc_backward_normal) {
- std::shared_ptr fwd =
- f::OpRegistry::CreateOp("fc", {{"X", {"x"}}, {"W", {"w"}}, {"b", {"b"}}},
- {{"mul_result", {"mul_res"}},
- {"add_result", {"add_re"}},
- {"Out", {"out"}}},
- f::AttributeMap{});
- ASSERT_NE(fwd, nullptr);
- std::shared_ptr gop =
- f::Backward(*fwd, std::unordered_set{});
- ASSERT_TRUE(gop->IsNetOp());
- auto net = static_cast(gop.get());
-
- ASSERT_NO_THROW(net->DebugString());
-
- ASSERT_EQ(3UL, net->ops_.size());
-
- f::OperatorBase &d_sigmoid = *net->ops_[0];
- ASSERT_EQ("sigmoid_grad", d_sigmoid.Type());
-
- f::OperatorBase &d_add = *net->ops_[1];
- ASSERT_EQ("rowwise_add_grad", d_add.Type());
-
- f::OperatorBase &d_mul = *net->ops_[2];
- ASSERT_EQ("mul_grad", d_mul.Type());
-}
-
-TEST(Backward, net_fc_backward_not_have_b) {
- std::shared_ptr fwd =
- f::OpRegistry::CreateOp("fc", {{"X", {"x"}}, {"W", {"w"}}, {"b", {}}},
- {{"mul_result", {"mul_res"}},
- {"add_result", {"add_res"}},
- {"Out", {"tmp"}}},
- f::AttributeMap{});
- ASSERT_NE(fwd, nullptr);
- std::shared_ptr gop =
- f::Backward(*fwd, std::unordered_set{});
- ASSERT_TRUE(gop->IsNetOp());
- auto net = static_cast(gop.get());
-
- ASSERT_NO_THROW(net->DebugString());
-
- ASSERT_EQ(2UL, net->ops_.size());
-
- f::OperatorBase &d_sigmoid = *net->ops_[0];
- ASSERT_EQ("sigmoid_grad", d_sigmoid.Type());
-
- f::OperatorBase &d_mul = *net->ops_[1];
- ASSERT_EQ("mul_grad", d_mul.Type());
-}
-
-TEST(Backward, net_input_of_network_not_need_grad) {
- ops::NetOp net;
- net.AppendOp(f::OpRegistry::CreateOp(
- "fc", {{"X", {"x"}}, {"W", {"W1"}}, {"b", {"b1"}}},
- {{"mul_result", {"mul_tmp_0"}},
- {"add_result", {"add_tmp_0"}},
- {"Out", {"hidden0"}}},
- f::AttributeMap{}));
- net.AppendOp(f::OpRegistry::CreateOp(
- "fc", {{"X", {"hidden0"}}, {"W", {"W2"}}, {"b", {"b2"}}},
- {{"mul_result", {"mul_tmp_1"}},
- {"add_result", {"add_tmp_1"}},
- {"Out", {"hidden1"}}},
- f::AttributeMap{}));
- net.CompleteAddOp();
- auto bwd = Backward(net, {"x"}); // x@GRAD is not need.
- ASSERT_TRUE(bwd->IsNetOp());
- auto bwd_net = static_cast(bwd.get());
-
- auto output_vars = bwd_net->OutputVars(true);
- std::unordered_set all_outputs =
- std::unordered_set(output_vars.begin(), output_vars.end());
- all_outputs.erase(f::kEmptyVarName);
-
- for (auto &out : {"W1", "b1", "hidden0", "W2", "b2"}) {
- ASSERT_NE(all_outputs.find(f::GradVarName(out)), all_outputs.end());
- }
-
- // Not Generated X
- ASSERT_EQ(all_outputs.find(f::GradVarName("X")), all_outputs.end());
-
- ASSERT_EQ(2UL, bwd_net->ops_.size());
- ASSERT_TRUE(bwd_net->ops_[1]->IsNetOp());
- auto first_fc_grad = static_cast(bwd_net->ops_[1].get());
- ASSERT_EQ(3UL, first_fc_grad->ops_.size());
- ASSERT_EQ(f::kEmptyVarName,
- first_fc_grad->ops_[2]->Output(f::GradVarName("X")));
-}
-
-TEST(Backward, net_shared_weight) {
- ops::NetOp net;
- net.AppendOp(f::OpRegistry::CreateOp("mul", {{"X", {"x"}}, {"Y", {"w"}}},
- {{"Out", {"out"}}}, f::AttributeMap{}));
- net.AppendOp(f::OpRegistry::CreateOp("mul", {{"X", {"out"}}, {"Y", {"w"}}},
- {{"Out", {"FinalOut"}}},
- f::AttributeMap{}));
- net.CompleteAddOp();
-
- auto bwd = f::Backward(net, std::unordered_set{});
- ASSERT_TRUE(bwd->IsNetOp());
- auto bwd_net = static_cast(bwd.get());
- ASSERT_EQ(3UL, bwd_net->ops_.size());
- ASSERT_EQ("sum", bwd_net->ops_[2]->Type());
-}
-
-TEST(Backward, op_all_input_are_not_need) {
- auto fwd =
- f::OpRegistry::CreateOp("rowwise_add", {{"X", {"x"}}, {"b", {"b"}}},
- {{"Out", {"out"}}}, f::AttributeMap{});
- auto backward = f::Backward(*fwd, {"x", "b"});
- ASSERT_TRUE(backward->IsNetOp());
- auto net = static_cast(backward.get());
- ASSERT_TRUE(net->ops_.empty());
-}
-
-TEST(Backward, op_all_output_are_not_need) {
- auto fwd =
- f::OpRegistry::CreateOp("rowwise_add", {{"X", {"x"}}, {"b", {"b"}}},
- {{"Out", {"out"}}}, f::AttributeMap{});
- auto backward = f::Backward(*fwd, {"out"});
- ASSERT_TRUE(backward->IsNetOp());
- auto net = static_cast(backward.get());
- ASSERT_TRUE(net->ops_.empty());
-}
-
-TEST(Backward, op_part_of_output_are_not_need) {
- auto fwd =
- f::OpRegistry::CreateOp("many_output_op", {{"x", {"X"}}},
- {{"y", {"Y"}}, {"z", {"Z"}}}, f::AttributeMap{});
- auto backward = f::Backward(*fwd, {"Z"});
- ASSERT_TRUE(backward->IsNetOp());
- auto net = static_cast(backward.get());
- ASSERT_EQ(net->ops_.size(), 2UL);
-
- auto &fill_zero = *net->ops_[0];
- ASSERT_EQ("fill_zeros_like", fill_zero.Type());
- ASSERT_EQ(1UL, fill_zero.Inputs("X").size());
- ASSERT_EQ("Z", fill_zero.Input("X"));
- ASSERT_EQ(1UL, fill_zero.Outputs("Out").size());
- ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix, fill_zero.Output("Out"));
-
- auto &d_many_out = *net->ops_[1];
- ASSERT_EQ("many_output_op_grad", d_many_out.Type());
- ASSERT_EQ(1UL + 2UL + 2UL, d_many_out.Inputs().size()); // I/O/OG
- ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix,
- d_many_out.Input(f::GradVarName("z")));
- ASSERT_EQ(f::GradVarName("Y"), d_many_out.Input(f::GradVarName("y")));
- ASSERT_EQ(f::GradVarName("X"), d_many_out.Output(f::GradVarName("x")));
-}
-
-TEST(Backward, op_part_of_input_are_not_need) {
- auto fwd = f::OpRegistry::CreateOp("mul", {{"X", {"a"}}, {"Y", {"b"}}},
- {{"Out", {"out"}}}, f::AttributeMap{});
- auto backward = f::Backward(*fwd, {"a"});
- auto &grad_mul = *backward;
- ASSERT_EQ(grad_mul.Type(), "mul_grad");
- ASSERT_EQ(grad_mul.Inputs().size(), 2UL + 1UL + 1UL);
- ASSERT_EQ(grad_mul.Outputs().size(), 2UL);
- ASSERT_EQ(grad_mul.Output(f::GradVarName("X")), f::kEmptyVarName);
- ASSERT_EQ(grad_mul.Output(f::GradVarName("Y")), f::GradVarName("b"));
- ASSERT_EQ(grad_mul.Input(f::GradVarName("Out")), f::GradVarName("out"));
- ASSERT_EQ(grad_mul.Input("X"), "a");
- ASSERT_EQ(grad_mul.Input("Y"), "b");
- ASSERT_EQ(grad_mul.Input("Out"), "out");
-}
-
-TEST(Backward, linear_net_intermediate_variable_has_no_grad) {
- ops::NetOp net;
- net.AppendOp(f::OpRegistry::CreateOp(
- "fc", {{"X", {"x1"}}, {"W", {"w1"}}, {"b", {"b1"}}},
- {{"mul_result", {"mul_out1"}},
- {"add_result", {"add_out1"}},
- {"Out", {"out1"}}},
- f::AttributeMap{}));
- net.AppendOp(f::OpRegistry::CreateOp(
- "fc", {{"X", {"out1"}}, {"W", {"w2"}}, {"b", {"b2"}}},
- {{"mul_result", {"mul_out2"}},
- {"add_result", {"tmp_out2"}},
- {"Out", {"out2"}}},
- f::AttributeMap{}));
- net.AppendOp(f::OpRegistry::CreateOp(
- "fc", {{"X", {"out2"}}, {"W", {"w3"}}, {"b", {"b3"}}},
- {{"mul_result", {"mul_out3"}},
- {"add_result", {"tmp_out3"}},
- {"Out", {"out3"}}},
- f::AttributeMap{}));
- net.CompleteAddOp();
-
- auto backward = f::Backward(net, {"mul_out2", "tmp_out2", "out2"});
- ASSERT_TRUE(backward->IsNetOp());
- auto bwd_net = static_cast(backward.get());
- ASSERT_EQ(bwd_net->ops_.size(), 3UL);
- auto &grad_fc = *bwd_net->ops_[0];
-
- const char *all = paddle::operators::NetOp::kAll;
- EXPECT_EQ(grad_fc.Inputs(all).size(),
- 2UL /* external input number */
- + 1UL /* external output number*/
- + 1UL /* number of gradient of external output*/
- + 2UL /* internal variable number*/
- );
- EXPECT_EQ(grad_fc.Outputs(all).size(),
- 2UL /* input number of mul*/
- + 2UL /* input number of rowwise_add*/
- + 1UL /* input number of sigmod */
- - 1UL /* out2 is not needed*/);
- EXPECT_EQ(bwd_net->ops_[1]->Inputs(all).size(), 0UL);
- EXPECT_EQ(bwd_net->ops_[1]->Outputs(all).size(), 0UL);
- EXPECT_EQ(bwd_net->ops_[2]->Inputs(all).size(), 0UL);
- EXPECT_EQ(bwd_net->ops_[2]->Outputs(all).size(), 0UL);
-}
-
-TEST(Backward, simple_single_op) {
- f::ProgramDesc program;
- f::BlockDesc *block = program.MutableBlock(0);
-
- f::OpDesc *op = block->AppendOp();
- op->SetType("rowwise_add");
- op->SetInput("X", {"x"});
- op->SetInput("b", {"b"});
- op->SetOutput("Out", {"out"});
-
- auto target = f::VarDesc("out");
- target.SetShape({1});
- auto var_to_grad =
- AppendBackward(program, target, std::unordered_set{});
-
- ASSERT_EQ(block->AllOps().size(), 3UL);
- f::OpDesc *fill_op = block->AllOps()[1];
- EXPECT_EQ(fill_op->Type(), "fill_constant");
-
- f::OpDesc *grad_op = block->AllOps()[2];
- EXPECT_EQ(grad_op->Type(), "rowwise_add_grad");
- ASSERT_EQ(grad_op->InputNames().size(), 1UL);
- ASSERT_EQ(grad_op->OutputNames().size(), 2UL);
- EXPECT_EQ(grad_op->Input(f::GradVarName("Out")),
- std::vector({f::GradVarName("out")}));
- EXPECT_EQ(grad_op->Output(f::GradVarName("X")),
- std::vector({f::GradVarName("x")}));
- EXPECT_EQ(grad_op->Output(f::GradVarName("b")),
- std::vector({f::GradVarName("b")}));
-
- EXPECT_EQ(var_to_grad.size(), 3UL);
- EXPECT_EQ(var_to_grad.at("b"), f::GradVarInfo(f::GradVarName("b"), 0, 2));
- EXPECT_EQ(var_to_grad.at("x"), f::GradVarInfo(f::GradVarName("x"), 0, 2));
-
- EXPECT_TRUE(block->HasVar(f::GradVarName("b")));
- EXPECT_TRUE(block->HasVar(f::GradVarName("x")));
-}
-
-TEST(Backward, default_attribute) {
- f::ProgramDesc program;
- f::BlockDesc *block = program.MutableBlock(0);
- f::OpDesc *op = block->AppendOp();
- op->SetType("mul");
- op->SetInput("X", {"x"});
- op->SetInput("Y", {"y"});
- op->SetOutput("Out", {"out"});
- op->CheckAttrs();
-
- auto target = f::VarDesc("out");
- target.SetShape({1});
- AppendBackward(program, target, std::unordered_set{});
-
- ASSERT_EQ(block->AllOps().size(), 3UL);
- EXPECT_EQ(boost::get(op->GetAttr("x_num_col_dims")), 1);
- EXPECT_EQ(boost::get(op->GetAttr("y_num_col_dims")), 1);
-
- f::OpDesc *fill_op = block->AllOps()[1];
- EXPECT_EQ(fill_op->Type(), "fill_constant");
-
- f::OpDesc *grad_op = block->AllOps()[2];
- ASSERT_EQ(grad_op->Type(), "mul_grad");
- EXPECT_EQ(boost::get(grad_op->GetAttr("x_num_col_dims")), 1);
- EXPECT_EQ(boost::get(grad_op->GetAttr("y_num_col_dims")), 1);
-}
-
-TEST(Backward, simple_mult_op) {
- f::ProgramDesc program;
- f::BlockDesc *block = program.MutableBlock(0);
- f::OpDesc *op1 = block->AppendOp();
- op1->SetType("rowwise_add");
- op1->SetInput("X", {"x1"});
- op1->SetInput("b", {"b1"});
- op1->SetOutput("Out", {"out1"});
-
- f::OpDesc *op2 = block->AppendOp();
- op2->SetType("mul");
- op2->SetInput("X", {"out1"});
- op2->SetInput("Y", {"y2"});
- op2->SetOutput("Out", {"out2"});
-
- f::OpDesc *op3 = block->AppendOp();
- op3->SetType("rowwise_add");
- op3->SetInput("X", {"out2"});
- op3->SetInput("b", {"b3"});
- op3->SetOutput("Out", {"out3"});
-
- auto target = f::VarDesc("out3");
- target.SetShape({1});
- size_t forward_len = block->AllOps().size();
- auto var_to_grad =
- AppendBackward(program, target, std::unordered_set{});
-
- ASSERT_EQ(block->AllOps().size(), 6UL + 1);
- f::OpDesc *fill_op = block->AllOps()[forward_len];
- EXPECT_EQ(fill_op->Type(), "fill_constant");
-
- f::OpDesc *grad_op1 = block->AllOps()[6];
- EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad");
- ASSERT_EQ(grad_op1->InputNames().size(), 1UL);
- ASSERT_EQ(grad_op1->OutputNames().size(), 2UL);
- EXPECT_EQ(grad_op1->Input(f::GradVarName("Out")),
- std::vector({f::GradVarName("out1")}));
- EXPECT_EQ(grad_op1->Output(f::GradVarName("X")),
- std::vector({f::GradVarName("x1")}));
- EXPECT_EQ(grad_op1->Output(f::GradVarName("b")),
- std::vector({f::GradVarName("b1")}));
-
- f::OpDesc *grad_op2 = block->AllOps()[5];
- EXPECT_EQ(grad_op2->Type(), "mul_grad");
- ASSERT_EQ(grad_op2->InputNames().size(), 4UL);
- ASSERT_EQ(grad_op2->OutputNames().size(), 2UL);
- EXPECT_EQ(grad_op2->Input("X"), std::vector({"out1"}));
- EXPECT_EQ(grad_op2->Input("Y"), std::vector({"y2"}));
- EXPECT_EQ(grad_op2->Input("Out"), std::vector({"out2"}));
- EXPECT_EQ(grad_op2->Input(f::GradVarName("Out")),
- std::vector({f::GradVarName("out2")}));
- EXPECT_EQ(grad_op2->Output(f::GradVarName("X")),
- std::vector({f::GradVarName("out1")}));
- EXPECT_EQ(grad_op2->Output(f::GradVarName("Y")),
- std::vector({f::GradVarName("y2")}));
-
- f::OpDesc *grad_op3 = block->AllOps()[4];
- EXPECT_EQ(grad_op3->Type(), "rowwise_add_grad");
- ASSERT_EQ(grad_op3->InputNames().size(), 1UL);
- ASSERT_EQ(grad_op3->OutputNames().size(), 2UL);
- EXPECT_EQ(grad_op3->Input(f::GradVarName("Out")),
- std::vector({f::GradVarName("out3")}));
- EXPECT_EQ(grad_op3->Output(f::GradVarName("X")),
- std::vector({f::GradVarName("out2")}));
- EXPECT_EQ(grad_op3->Output(f::GradVarName("b")),
- std::vector({f::GradVarName("b3")}));
-
- EXPECT_EQ(var_to_grad.size(), 7UL);
- EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 6));
- EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 6));
- EXPECT_EQ(var_to_grad.at("out1"),
- f::GradVarInfo(f::GradVarName("out1"), 0, 5));
- EXPECT_EQ(var_to_grad.at("y2"), f::GradVarInfo(f::GradVarName("y2"), 0, 5));
- EXPECT_EQ(var_to_grad.at("out2"),
- f::GradVarInfo(f::GradVarName("out2"), 0, 4));
- EXPECT_EQ(var_to_grad.at("b3"), f::GradVarInfo(f::GradVarName("b3"), 0, 4));
-
- EXPECT_TRUE(block->HasVar(f::GradVarName("x1")));
- EXPECT_TRUE(block->HasVar(f::GradVarName("b1")));
- EXPECT_TRUE(block->HasVar(f::GradVarName("out1")));
- EXPECT_TRUE(block->HasVar(f::GradVarName("y2")));
- EXPECT_TRUE(block->HasVar(f::GradVarName("out2")));
- EXPECT_TRUE(block->HasVar(f::GradVarName("b3")));
-}
-
-TEST(Backward, intermedia_var_no_grad) {
- f::ProgramDesc program;
- f::BlockDesc *block = program.MutableBlock(0);
- f::OpDesc *op1 = block->AppendOp();
- op1->SetType("rowwise_add");
- op1->SetInput("X", {"x1"});
- op1->SetInput("b", {"b1"});
- op1->SetOutput("Out", {"out1"});
-
- f::OpDesc *op2 = block->AppendOp();
- op2->SetType("mul");
- op2->SetInput("X", {"x2"});
- op2->SetInput("Y", {"y2"});
- op2->SetOutput("Out", {"out2"});
-
- f::OpDesc *op3 = block->AppendOp();
- op3->SetType("rowwise_add");
- op3->SetInput("X", {"out2"});
- op3->SetInput("b", {"b3"});
- op3->SetOutput("Out", {"out3"});
-
- f::OpDesc *op4 = block->AppendOp();
- op4->SetType("mul");
- op4->SetInput("X", {"out1"});
- op4->SetInput("Y", {"out3"});
- op4->SetOutput("Out", {"out4"});
-
- auto target = f::VarDesc("out4");
- target.SetShape({1});
- size_t forward_len = block->AllOps().size();
- auto var_to_grad = AppendBackward(program, target, {"out3"});
-
- ASSERT_EQ(block->AllOps().size(), 7UL);
- f::OpDesc *fill_op = block->AllOps()[forward_len];
- EXPECT_EQ(fill_op->Type(), "fill_constant");
-
- f::OpDesc *grad_op1 = block->AllOps()[6];
- EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad");
- ASSERT_EQ(grad_op1->InputNames().size(), 1UL);
- ASSERT_EQ(grad_op1->OutputNames().size(), 2UL);
- EXPECT_EQ(grad_op1->Input(f::GradVarName("Out")),
- std::vector({f::GradVarName("out1")}));
- EXPECT_EQ(grad_op1->Output(f::GradVarName("X")),
- std::vector({f::GradVarName("x1")}));
- EXPECT_EQ(grad_op1->Output(f::GradVarName("b")),
- std::vector({f::GradVarName("b1")}));
-
- f::OpDesc *grad_op4 = block->AllOps()[5];
- EXPECT_EQ(grad_op4->Type(), "mul_grad");
- ASSERT_EQ(grad_op4->InputNames().size(), 4UL);
- ASSERT_EQ(grad_op4->OutputNames().size(), 2UL);
- EXPECT_EQ(grad_op4->Input("X"), std::vector({"out1"}));
- EXPECT_EQ(grad_op4->Input("Y"), std::vector({"out3"}));
- EXPECT_EQ(grad_op4->Input("Out"), std::vector({"out4"}));
- EXPECT_EQ(grad_op4->Input(f::GradVarName("Out")),
- std::vector({f::GradVarName("out4")}));
- EXPECT_EQ(grad_op4->Output(f::GradVarName("X")),
- std::vector({f::GradVarName("out1")}));
- EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")), std::vector());
-
- EXPECT_EQ(var_to_grad.size(), 4UL);
- EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 6));
- EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 6));
- EXPECT_EQ(var_to_grad.at("out1"),
- f::GradVarInfo(f::GradVarName("out1"), 0, 5));
-
- EXPECT_TRUE(block->HasVar(f::GradVarName("x1")));
- EXPECT_TRUE(block->HasVar(f::GradVarName("b1")));
- EXPECT_TRUE(block->HasVar(f::GradVarName("out1")));
-}
-
-TEST(Backward, var_no_grad) {
- f::ProgramDesc program;
- f::BlockDesc *block = program.MutableBlock(0);
- f::OpDesc *op1 = block->AppendOp();
- op1->SetType("mult_in_out");
- op1->SetInput("X", {"x1"});
- op1->SetInput("H", {"h1"});
- op1->SetOutput("Y", {"y1"});
- op1->SetOutput("Z", {"z1"});
-
- f::OpDesc *op2 = block->AppendOp();
- op2->SetType("mult_in_out");
- op2->SetInput("X", {"y1"});
- op2->SetInput("H", {"z1"});
- op2->SetOutput("Y", {"y2"});
- op2->SetOutput("Z", {"z2"});
-
- auto target = f::VarDesc("z2");
- target.SetShape({1});
- size_t forward_len = block->AllOps().size();
- auto var_to_grad = AppendBackward(program, target, {"z1"});
-
- ASSERT_EQ(block->AllOps().size(), 6UL);
- f::OpDesc *fill_op = block->AllOps()[forward_len];
- EXPECT_EQ(fill_op->Type(), "fill_constant");
-
- f::OpDesc *grad_op2 = block->AllOps()[3];
- ASSERT_EQ(grad_op2->Type(), "mult_in_out_grad");
- ASSERT_EQ(grad_op2->InputNames().size(), 6UL);
- ASSERT_EQ(grad_op2->OutputNames().size(), 2UL);
- EXPECT_EQ(grad_op2->Input("X"), std::vector({"y1"}));
- EXPECT_EQ(grad_op2->Input("H"), std::vector({"z1"}));
- EXPECT_EQ(grad_op2->Input("Y"), std::vector({"y2"}));
- EXPECT_EQ(grad_op2->Input("Z"), std::vector({"z2"}));
- EXPECT_EQ(grad_op2->Input(f::GradVarName("Y")),
- std::vector({f::GradVarName("y2")}));
- EXPECT_EQ(grad_op2->Input(f::GradVarName("Z")),
- std::vector({f::GradVarName("z2")}));
- EXPECT_EQ(grad_op2->Output(f::GradVarName("X")),
- std::vector({f::GradVarName("y1")}));
- EXPECT_EQ(grad_op2->Output(f::GradVarName("H")), std::vector());
-
- f::OpDesc *fill_zero_op = block->AllOps()[4];
- ASSERT_EQ(fill_zero_op->Type(), "fill_zeros_like");
- ASSERT_EQ(fill_zero_op->InputNames().size(), 1UL);
- ASSERT_EQ(fill_zero_op->OutputNames().size(), 1UL);
- EXPECT_EQ(fill_zero_op->Input("X"), std::vector({"z1"}));
- EXPECT_EQ(fill_zero_op->Output("Out"),
- std::vector({std::string("z1") + f::kZeroVarSuffix}));
-
- f::OpDesc *grad_op1 = block->AllOps()[5];
- ASSERT_EQ(grad_op1->Type(), "mult_in_out_grad");
- ASSERT_EQ(grad_op1->InputNames().size(), 6UL);
- ASSERT_EQ(grad_op1->OutputNames().size(), 2UL);
- EXPECT_EQ(grad_op1->Input("X"), std::vector({"x1"}));
- EXPECT_EQ(grad_op1->Input("H"), std::vector({"h1"}));
- EXPECT_EQ(grad_op1->Input("Y"), std::vector({"y1"}));
- EXPECT_EQ(grad_op1->Input("Z"), std::vector({"z1"}));
- EXPECT_EQ(grad_op1->Input(f::GradVarName("Y")),
- std::vector({f::GradVarName("y1")}));
- EXPECT_EQ(grad_op1->Input(f::GradVarName("Z")),
- std::vector({std::string("z1") + f::kZeroVarSuffix}));
- EXPECT_EQ(grad_op1->Output(f::GradVarName("X")),
- std::vector({f::GradVarName("x1")}));
- EXPECT_EQ(grad_op1->Output(f::GradVarName("H")),
- std::vector({f::GradVarName("h1")}));
-
- EXPECT_EQ(var_to_grad.size(), 4UL);
- EXPECT_EQ(var_to_grad.at("y1"), f::GradVarInfo(f::GradVarName("y1"), 0, 3));
- EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 5));
- EXPECT_EQ(var_to_grad.at("h1"), f::GradVarInfo(f::GradVarName("h1"), 0, 5));
-
- EXPECT_TRUE(block->HasVar(f::GradVarName("y1")));
- EXPECT_TRUE(block->HasVar(f::GradVarName("x1")));
- EXPECT_TRUE(block->HasVar(f::GradVarName("h1")));
-}
-
-TEST(Backward, shared_var) {
- f::ProgramDesc program;
- f::BlockDesc *block = program.MutableBlock(0);
- f::OpDesc *op1 = block->AppendOp();
- op1->SetType("rowwise_add");
- op1->SetInput("X", {"x1"});
- op1->SetInput("b", {"b1"});
- op1->SetOutput("Out", {"out1"});
-
- f::OpDesc *op2 = block->AppendOp();
- op2->SetType("mul");
- op2->SetInput("X", {"out1"});
- op2->SetInput("Y", {"y2"});
- op2->SetOutput("Out", {"out2"});
-
- f::OpDesc *op3 = block->AppendOp();
- op3->SetType("rowwise_add");
- op3->SetInput("X", {"out1"});
- op3->SetInput("b", {"b3"});
- op3->SetOutput("Out", {"out3"});
-
- auto target = f::VarDesc("out3");
- target.SetShape({1});
- size_t forward_len = block->AllOps().size();
- auto var_to_grad =
- AppendBackward(program, target, std::unordered_set{});
-
- ASSERT_EQ(block->AllOps().size(), 8UL);
- f::OpDesc *fill_op = block->AllOps()[forward_len];
- EXPECT_EQ(fill_op->Type(), "fill_constant");
-
- f::OpDesc *grad_op3 = block->AllOps()[4];
- ASSERT_EQ(grad_op3->Type(), "rowwise_add_grad");
- ASSERT_EQ(grad_op3->InputNames().size(), 1UL);
- ASSERT_EQ(grad_op3->OutputNames().size(), 2UL);
- EXPECT_EQ(grad_op3->Input(f::GradVarName("Out")),
- std::vector({f::GradVarName("out3")}));
- EXPECT_EQ(grad_op3->Output(f::GradVarName("X")),
- std::vector({f::GradVarName("out1") + "@RENAME@0"}));
- EXPECT_EQ(grad_op3->Output(f::GradVarName("b")),
- std::vector({f::GradVarName("b3")}));
-
- f::OpDesc *grad_op4 = block->AllOps()[5];
- ASSERT_EQ(grad_op4->Type(), "mul_grad");
- ASSERT_EQ(grad_op4->InputNames().size(), 4UL);
- ASSERT_EQ(grad_op4->OutputNames().size(), 2UL);
- EXPECT_EQ(grad_op4->Input("X"), std::vector({"out1"}));
- EXPECT_EQ(grad_op4->Input("Y"), std::vector({"y2"}));
- EXPECT_EQ(grad_op4->Input("Out"), std::vector({"out2"}));
- EXPECT_EQ(grad_op4->Input(f::GradVarName("Out")),
- std::vector({f::GradVarName("out2")}));
- EXPECT_EQ(grad_op4->Output(f::GradVarName("X")),
- std::vector({f::GradVarName("out1") + "@RENAME@1"}));
- EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")),
- std::vector({f::GradVarName("y2")}));
-
- f::OpDesc *sum_op = block->AllOps()[6];
- ASSERT_EQ(sum_op->Type(), "sum");
- ASSERT_EQ(sum_op->InputNames().size(), 1UL);
- ASSERT_EQ(sum_op->OutputNames().size(), 1UL);
- EXPECT_EQ(sum_op->Input("X"),
- std::vector({f::GradVarName("out1") + "@RENAME@0",
- f::GradVarName("out1") + "@RENAME@1"}));
- EXPECT_EQ(sum_op->Output("Out"),
- std::vector({f::GradVarName("out1")}));
-
- f::OpDesc *grad_op1 = block->AllOps()[7];
- ASSERT_EQ(grad_op1->Type(), "rowwise_add_grad");
- ASSERT_EQ(grad_op1->InputNames().size(), 1UL);
- ASSERT_EQ(grad_op1->OutputNames().size(), 2UL);
- EXPECT_EQ(grad_op1->Input(f::GradVarName("Out")),
- std::vector({f::GradVarName("out1")}));
- EXPECT_EQ(grad_op1->Output(f::GradVarName("X")),
- std::vector({f::GradVarName("x1")}));
- EXPECT_EQ(grad_op1->Output(f::GradVarName("b")),
- std::vector({f::GradVarName("b1")}));
-
- EXPECT_EQ(var_to_grad.size(), 6UL);
- EXPECT_EQ(var_to_grad.at("b3"), f::GradVarInfo(f::GradVarName("b3"), 0, 4));
- EXPECT_EQ(var_to_grad.at("y2"), f::GradVarInfo(f::GradVarName("y2"), 0, 5));
- EXPECT_EQ(var_to_grad.at("out1"),
- f::GradVarInfo(f::GradVarName("out1"), 0, 6));
- EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 7));
- EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 7));
-
- EXPECT_TRUE(block->HasVar(f::GradVarName("b3")));
- EXPECT_TRUE(block->HasVar(f::GradVarName("y2")));
- EXPECT_TRUE(block->HasVar(f::GradVarName("out1")));
- EXPECT_TRUE(block->HasVar(f::GradVarName("x1")));
- EXPECT_TRUE(block->HasVar(f::GradVarName("b1")));
-}
-
-TEST(Backward, half_backward) {
- f::ProgramDesc program;
- f::BlockDesc *block = program.MutableBlock(0);
- auto *op1 = block->AppendOp();
- op1->SetType("minus");
- op1->SetInput("X", {"a"});
- op1->SetInput("Y", {"b"});
- op1->SetOutput("Out", {"out"});
-
- auto target = f::VarDesc("out");
- target.SetShape({1});
- size_t forward_len = block->AllOps().size();
- auto var_to_grad = AppendBackward(program, target, {"b"});
- f::OpDesc *fill_op = block->AllOps()[forward_len];
- EXPECT_EQ(fill_op->Type(), "fill_constant");
- auto ops = block->AllOps();
- ASSERT_EQ(3UL, ops.size());
-
- EXPECT_EQ(var_to_grad.size(), 2UL);
- EXPECT_EQ(var_to_grad.at("a"),
- f::GradVarInfo(f::GradVarName("a"), 0, forward_len + 1));
-}
diff --git a/paddle/fluid/framework/block_desc.h b/paddle/fluid/framework/block_desc.h
index 873969b2a884f6d9e133fe87bf72725c36ce8b98..eef19c4f09c60b9df18f154c85c421f5bff9413f 100644
--- a/paddle/fluid/framework/block_desc.h
+++ b/paddle/fluid/framework/block_desc.h
@@ -92,7 +92,7 @@ class BlockDesc {
/*
* Remove Op and its input/output variables.
- * Note that for either input or ouput variable, if it is also an input or
+ * Note that for either input or output variable, if it is also an input or
* output variable of other ops, we should remain it.
*/
void RemoveOp(size_t s, size_t e);
diff --git a/paddle/fluid/framework/details/CMakeLists.txt b/paddle/fluid/framework/details/CMakeLists.txt
index 89b5c6847f15b3f2a270fe1e7db9e590549e8982..85b649b2937f6a281b9ee1fe7bae8101169f6102 100644
--- a/paddle/fluid/framework/details/CMakeLists.txt
+++ b/paddle/fluid/framework/details/CMakeLists.txt
@@ -5,6 +5,7 @@ cc_library(fetch_op_handle SRCS fetch_op_handle.cc DEPS op_handle_base scope lod
nv_library(nccl_all_reduce_op_handle SRCS nccl_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
dynload_cuda)
cc_library(computation_op_handle SRCS computation_op_handle.cc DEPS framework_proto scope place operator op_registry)
+cc_library(send_op_handle SRCS send_op_handle.cc DEPS framework_proto scope place operator op_registry)
cc_library(ssa_graph SRCS ssa_graph.cc DEPS var_handle op_handle_base)
cc_library(ssa_graph_builder SRCS ssa_graph_builder.cc DEPS ssa_graph)
@@ -15,7 +16,7 @@ else()
set(multi_devices_graph_builder_deps)
endif()
cc_library(multi_devices_graph_builder SRCS multi_devices_graph_builder.cc DEPS ssa_graph_builder computation_op_handle
- scale_loss_grad_op_handle ${multi_devices_graph_builder_deps})
+ scale_loss_grad_op_handle send_op_handle ${multi_devices_graph_builder_deps})
cc_library(ssa_graph_executor SRCS ssa_graph_executor.cc DEPS ssa_graph framework_proto)
cc_library(threaded_ssa_graph_executor SRCS threaded_ssa_graph_executor.cc DEPS fetch_op_handle ssa_graph_executor scope
simple_threadpool device_context)
diff --git a/paddle/fluid/framework/details/computation_op_handle.cc b/paddle/fluid/framework/details/computation_op_handle.cc
index 7a1b40c0b60a788b1f0a70e688f8fcbe427ad076..e3f8bbb72f2a1b75b6041d41496cef0efc81874f 100644
--- a/paddle/fluid/framework/details/computation_op_handle.cc
+++ b/paddle/fluid/framework/details/computation_op_handle.cc
@@ -14,6 +14,8 @@
#include "paddle/fluid/framework/details/computation_op_handle.h"
+#include
+
namespace paddle {
namespace framework {
namespace details {
@@ -33,7 +35,7 @@ void ComputationOpHandle::RunImpl() {
}
}
- op_->Run(*scope_->FindVar("@TMP_SCOPE@")->Get(), place_);
+ op_->Run(*scope_->FindVar(kLocalExecScopeName)->Get(), place_);
}
std::string ComputationOpHandle::Name() const { return op_->Type(); }
diff --git a/paddle/fluid/framework/details/fetch_op_handle.cc b/paddle/fluid/framework/details/fetch_op_handle.cc
index 9180903b864d03e59f55f41410b2240fa4199496..e3e7c55d153aec8ce9c25c962821b266eaa84fe4 100644
--- a/paddle/fluid/framework/details/fetch_op_handle.cc
+++ b/paddle/fluid/framework/details/fetch_op_handle.cc
@@ -14,6 +14,9 @@
#include "paddle/fluid/framework/details/fetch_op_handle.h"
+#include
+#include
+
namespace paddle {
namespace framework {
namespace details {
@@ -57,7 +60,10 @@ void FetchOpHandle::RunImpl() {
for (size_t i = 0; i < scopes.size(); ++i) {
auto &scope = scopes[i];
- auto &t = scope->FindVar(var_name)->Get();
+ auto &t = scope->FindVar(kLocalExecScopeName)
+ ->Get()
+ ->FindVar(var_name)
+ ->Get();
if (platform::is_gpu_place(var->place_)) {
#ifdef PADDLE_WITH_CUDA
TensorCopy(t, cpu, *dev_ctxes_[t.place()], &tensors_[i]);
diff --git a/paddle/fluid/framework/details/multi_devices_graph_builder.cc b/paddle/fluid/framework/details/multi_devices_graph_builder.cc
index e7a0cb678ebfd8a3fe5f873e995b63b0857e5ba4..e0dd9e6068174a4b0348d503f4082bee6ff68dac 100644
--- a/paddle/fluid/framework/details/multi_devices_graph_builder.cc
+++ b/paddle/fluid/framework/details/multi_devices_graph_builder.cc
@@ -15,6 +15,7 @@
#include "paddle/fluid/framework/details/multi_devices_graph_builder.h"
#include "paddle/fluid/framework/details/computation_op_handle.h"
#include "paddle/fluid/framework/details/scale_loss_grad_op_handle.h"
+#include "paddle/fluid/framework/details/send_op_handle.h"
#include "paddle/fluid/framework/scope.h"
#ifdef PADDLE_WITH_CUDA
@@ -54,6 +55,27 @@ MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder(
}
}
+void MultiDevSSAGraphBuilder::CreateOpHandleIOs(SSAGraph *result, OpDesc *op,
+ const platform::Place &p,
+ const size_t &i) const {
+ auto *op_handle = result->ops_.back().get();
+ op_handle->dev_ctxes_[p] = const_cast(
+ platform::DeviceContextPool::Instance().Get(p));
+
+ auto var_names = op->InputArgumentNames();
+
+ for (auto &each_var_name : var_names) {
+ VarHandle *var = CreateOrGetLatestVarHandle(result, each_var_name, p, i);
+ op_handle->AddInput(var);
+ }
+
+ var_names = op->OutputArgumentNames();
+
+ for (auto &each_var_name : var_names) {
+ CreateOpOutput(result, op_handle, each_var_name, p, i);
+ }
+}
+
std::unique_ptr MultiDevSSAGraphBuilder::Build(
const ProgramDesc &program) const {
auto graph = new SSAGraph();
@@ -76,27 +98,28 @@ std::unique_ptr MultiDevSSAGraphBuilder::Build(
}
}
+ // append send op if program is distributed trainer main program.
+ // always use the first device
+ if (!is_forwarding && op->Type() == "send") {
+ auto &p = places_[0];
+ auto *s = local_scopes_[0];
+ // FIXME(wuyi): send op always copy from GPU 0
+ result.ops_.emplace_back(new SendOpHandle(*op, s, p));
+ // Create inputs for output on original place and no ssa output
+ // is created for send op.
+ CreateOpHandleIOs(&result, op, p, 0);
+ continue;
+ }
+
for (size_t i = 0; i < places_.size(); ++i) {
auto &p = places_[i];
auto *s = local_scopes_[i];
result.ops_.emplace_back(new ComputationOpHandle(*op, s, p));
auto *op_handle = result.ops_.back().get();
- op_handle->dev_ctxes_[p] = const_cast(
- platform::DeviceContextPool::Instance().Get(p));
+ CreateOpHandleIOs(&result, op, p, i);
- auto var_names = op->InputArgumentNames();
-
- for (auto &each_var_name : var_names) {
- VarHandle *var =
- CreateOrGetLatestVarHandle(&result, each_var_name, p, i);
- op_handle->AddInput(var);
- }
- var_names = op->OutputArgumentNames();
-
- for (auto &each_var_name : var_names) {
- CreateOpOutput(&result, op_handle, each_var_name, p, i);
- }
+ auto var_names = op->OutputArgumentNames();
if (is_forwarding) {
if (var_names.size() == 1 && var_names[0] == loss_var_name_) {
diff --git a/paddle/fluid/framework/details/multi_devices_graph_builder.h b/paddle/fluid/framework/details/multi_devices_graph_builder.h
index d3c8e582cf2cdf26198822e4bd2602883622df21..de34caab1be85eecb741a5003f026eb982e178ea 100644
--- a/paddle/fluid/framework/details/multi_devices_graph_builder.h
+++ b/paddle/fluid/framework/details/multi_devices_graph_builder.h
@@ -14,6 +14,9 @@
#pragma once
+#include
+#include
+
#include "paddle/fluid/framework/details/ssa_graph_builder.h"
namespace paddle {
@@ -41,6 +44,10 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
std::unique_ptr Build(const ProgramDesc &program) const override;
+ private:
+ void CreateOpHandleIOs(SSAGraph *result, OpDesc *op, const platform::Place &p,
+ const size_t &i) const;
+
private:
std::string loss_var_name_;
const std::vector &places_;
diff --git a/paddle/fluid/framework/details/op_handle_base.h b/paddle/fluid/framework/details/op_handle_base.h
index d7a541ac4bb83625060db337446d03a1afda3ed0..fbdb54ba8d940c8dedd44a42a85825af5d2ec664 100644
--- a/paddle/fluid/framework/details/op_handle_base.h
+++ b/paddle/fluid/framework/details/op_handle_base.h
@@ -24,6 +24,8 @@ namespace paddle {
namespace framework {
namespace details {
+constexpr char kLocalExecScopeName[] = "@LCOAL_SCOPE@";
+
class OpHandleBase {
private:
DISABLE_COPY_AND_ASSIGN(OpHandleBase);
diff --git a/paddle/fluid/framework/details/send_op_handle.cc b/paddle/fluid/framework/details/send_op_handle.cc
new file mode 100644
index 0000000000000000000000000000000000000000..d181607e86372f4872c38bc35db786ac142ccc65
--- /dev/null
+++ b/paddle/fluid/framework/details/send_op_handle.cc
@@ -0,0 +1,43 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/framework/details/send_op_handle.h"
+
+namespace paddle {
+namespace framework {
+namespace details {
+
+SendOpHandle::SendOpHandle(const framework::OpDesc &op_desc,
+ const Scope *local_scope,
+ const platform::Place &place)
+ : op_(framework::OpRegistry::CreateOp(op_desc)),
+ local_scope_(local_scope),
+ place_(place) {}
+
+void SendOpHandle::RunImpl() {
+ // Wait input done
+ for (auto *in : inputs_) {
+ auto &p = static_cast(in)->place_;
+ if (in->DebugString() == "dummy") { // HACK
+ continue;
+ }
+ in->generated_op_->Wait(dev_ctxes_[p]);
+ }
+ op_->Run(*local_scope_, place_);
+}
+
+std::string SendOpHandle::Name() const { return "send"; }
+} // namespace details
+} // namespace framework
+} // namespace paddle
diff --git a/paddle/fluid/framework/details/send_op_handle.h b/paddle/fluid/framework/details/send_op_handle.h
new file mode 100644
index 0000000000000000000000000000000000000000..173f9d726145aeb9e85cc0fb9056eb57bf484098
--- /dev/null
+++ b/paddle/fluid/framework/details/send_op_handle.h
@@ -0,0 +1,50 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include
+#include
+
+#include "paddle/fluid/framework/details/op_handle_base.h"
+#include "paddle/fluid/framework/lod_tensor.h"
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/framework/operator.h"
+#include "paddle/fluid/framework/scope.h"
+
+namespace paddle {
+namespace framework {
+namespace details {
+
+struct SendOpHandle : public OpHandleBase {
+ std::unique_ptr op_;
+ const Scope* local_scope_;
+ const platform::Place& place_;
+
+ SendOpHandle(const framework::OpDesc& op_desc, const Scope* local_scope,
+ const platform::Place& place);
+
+ std::string Name() const override;
+
+ // Delay and buffer nccl_all_reduce together can significantly increase
+ // performance. Disable this feature by returning false.
+ bool IsMultiDeviceTransfer() override { return false; };
+
+ protected:
+ void RunImpl() override;
+};
+
+} // namespace details
+} // namespace framework
+} // namespace paddle
diff --git a/paddle/fluid/framework/details/ssa_graph_executor.h b/paddle/fluid/framework/details/ssa_graph_executor.h
index 3b818b1a45b56351e34f9e52ec22b6d02a0c1591..a8833b7388ab907020a260d356f1484ffd227658 100644
--- a/paddle/fluid/framework/details/ssa_graph_executor.h
+++ b/paddle/fluid/framework/details/ssa_graph_executor.h
@@ -15,13 +15,15 @@
#pragma once
#include
+#include
+#include
+
#include "paddle/fluid/framework/details/ssa_graph.h"
#include "paddle/fluid/framework/feed_fetch_type.h"
namespace paddle {
namespace framework {
namespace details {
-
class SSAGraphExecutor {
DISABLE_COPY_AND_ASSIGN(SSAGraphExecutor);
diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc
index 62af4c1d79ded5eaa30e4e6d43cc0d7327ae9689..1ce69ab02b09fe7ec17f479bcef97c931e853dc4 100644
--- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc
+++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc
@@ -136,12 +136,6 @@ FeedFetchList ThreadedSSAGraphExecutor::Run(
ready_ops.clear();
};
- // Create local scopes.
- for (auto &scope : local_scopes_) {
- auto &local_scope = scope->NewScope();
- *scope->Var("@TMP_SCOPE@")->GetMutable() = &local_scope;
- }
-
// Step 3. Execution
while (!pending_vars.empty() || !ready_ops.empty() || !delayed_ops.empty()) {
// 1. Run All Ready ops
@@ -189,34 +183,10 @@ FeedFetchList ThreadedSSAGraphExecutor::Run(
PADDLE_ENFORCE(ready_ops.empty());
PADDLE_ENFORCE(delayed_ops.empty());
PADDLE_ENFORCE(blocked_by_delayed_ops.empty());
- ++computation_count_;
-
- auto sync_computation = [&] {
- computation_count_ = 0;
- // Wait All computational streams
- for (auto p : this->places_) {
- platform::DeviceContextPool::Instance().Get(p)->Wait();
- }
- for (auto &scope : local_scopes_) {
- scope->DropKids();
- }
- };
// Wait FetchOps.
if (!fetch_ops.empty()) {
fetch_ops.clear();
- sync_computation();
- }
-
- if (computation_count_ == max_async_computation) {
- sync_computation();
- }
-
- // NOTE: the temp scope can be dropped lazily if needed.
- // Drop tmp scopes;
- for (auto &scope : local_scopes_) {
- auto &kid = *scope->Var("@TMP_SCOPE@")->GetMutable();
- kid = nullptr;
}
return fetch_data;
diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h
index 79cfc26b461a39811a9a125e5aeac3492d967386..bb5e837b135c35b5aea403496b45aab1ccc288ff 100644
--- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h
+++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h
@@ -99,9 +99,6 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor {
std::unique_ptr exception_;
std::atomic running_ops_;
bool allow_op_delay_;
-
- size_t computation_count_{0};
- size_t max_async_computation{100};
};
} // namespace details
diff --git a/paddle/fluid/framework/lod_tensor.cc b/paddle/fluid/framework/lod_tensor.cc
index 8155cb55a468a09320b1196b49fc3e34cea261b1..a56674cbe216e312c4394ef537140122352dc785 100644
--- a/paddle/fluid/framework/lod_tensor.cc
+++ b/paddle/fluid/framework/lod_tensor.cc
@@ -12,9 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-#include "paddle/fluid/framework/lod_tensor.h"
+#include
+#include
+#include
+#include
+
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/framework.pb.h"
+#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/memory/memory.h"
@@ -22,11 +27,6 @@ limitations under the License. */
#include "paddle/fluid/recordio/scanner.h"
#include "paddle/fluid/recordio/writer.h"
-#include
-#include
-#include
-#include
-
namespace paddle {
namespace framework {
@@ -294,7 +294,7 @@ void DeserializeFromStream(std::istream &is, LoDTensor *tensor,
TensorFromStream(is, static_cast(tensor), dev_ctx);
}
-void WriteToRecordIO(recordio::Writer &writer,
+void WriteToRecordIO(recordio::Writer *writer,
const std::vector &tensor,
const platform::DeviceContext &dev_ctx) {
std::stringstream buffer;
@@ -303,18 +303,20 @@ void WriteToRecordIO(recordio::Writer &writer,
for (auto &each : tensor) {
SerializeToStream(buffer, each, dev_ctx);
}
- writer.Write(buffer.str());
+ writer->Write(buffer.str());
}
std::vector ReadFromRecordIO(
- recordio::Scanner &scanner, const platform::DeviceContext &dev_ctx) {
- std::istringstream sin(scanner.Next());
- uint32_t sz;
- sin.read(reinterpret_cast(&sz), sizeof(uint32_t));
+ recordio::Scanner *scanner, const platform::DeviceContext &dev_ctx) {
std::vector result;
- result.resize(sz);
- for (uint32_t i = 0; i < sz; ++i) {
- DeserializeFromStream(sin, &result[i], dev_ctx);
+ if (scanner->HasNext()) {
+ std::istringstream sin(scanner->Next());
+ uint32_t sz;
+ sin.read(reinterpret_cast(&sz), sizeof(uint32_t));
+ result.resize(sz);
+ for (uint32_t i = 0; i < sz; ++i) {
+ DeserializeFromStream(sin, &result[i], dev_ctx);
+ }
}
return result;
}
diff --git a/paddle/fluid/framework/lod_tensor.h b/paddle/fluid/framework/lod_tensor.h
index 4f130d265900483ec7a7c541f2610d17a352913f..1159fee39b0737402c60448dcbe69e7535c9d6e1 100644
--- a/paddle/fluid/framework/lod_tensor.h
+++ b/paddle/fluid/framework/lod_tensor.h
@@ -15,6 +15,9 @@ limitations under the License. */
#pragma once
#include
+#include
+#include
+#include
#ifdef PADDLE_WITH_CUDA
#include
#include
@@ -216,12 +219,12 @@ void SerializeToStream(std::ostream& os, const LoDTensor& tensor,
void DeserializeFromStream(std::istream& is, LoDTensor* tensor,
const platform::DeviceContext& dev_ctx);
-extern void WriteToRecordIO(recordio::Writer& writer,
+extern void WriteToRecordIO(recordio::Writer* writer,
const std::vector& tensor,
const platform::DeviceContext& dev_ctx);
extern std::vector ReadFromRecordIO(
- recordio::Scanner& scanner, const platform::DeviceContext& dev_ctx);
+ recordio::Scanner* scanner, const platform::DeviceContext& dev_ctx);
} // namespace framework
} // namespace paddle
diff --git a/paddle/fluid/framework/lod_tensor_test.cc b/paddle/fluid/framework/lod_tensor_test.cc
index e691e29383d4842b80769021e0e494967d38e9bb..97ab98f09b1a902a942d9667bc7716a28b98d54c 100644
--- a/paddle/fluid/framework/lod_tensor_test.cc
+++ b/paddle/fluid/framework/lod_tensor_test.cc
@@ -12,17 +12,17 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "paddle/fluid/framework/lod_tensor.h"
-
-#include "paddle/fluid/recordio/scanner.h"
-#include "paddle/fluid/recordio/writer.h"
-
#include
#include
#include
#include
#include
+#include "paddle/fluid/framework/lod_tensor.h"
+
+#include "paddle/fluid/recordio/scanner.h"
+#include "paddle/fluid/recordio/writer.h"
+
namespace paddle {
namespace framework {
@@ -240,8 +240,8 @@ TEST(LoDTensor, RecordIO) {
*platform::DeviceContextPool::Instance().Get(platform::CPUPlace());
{
recordio::Writer writer(stream, recordio::Compressor::kSnappy);
- WriteToRecordIO(writer, {tensor, tensor}, ctx);
- WriteToRecordIO(writer, {tensor, tensor}, ctx);
+ WriteToRecordIO(&writer, {tensor, tensor}, ctx);
+ WriteToRecordIO(&writer, {tensor, tensor}, ctx);
writer.Flush();
}
@@ -254,11 +254,11 @@ TEST(LoDTensor, RecordIO) {
{
std::unique_ptr stream_ptr(stream);
recordio::Scanner scanner(std::move(stream_ptr));
- auto tensors = ReadFromRecordIO(scanner, ctx);
+ auto tensors = ReadFromRecordIO(&scanner, ctx);
ASSERT_EQ(tensors.size(), 2);
assert_tensor_ok(tensors[0]);
assert_tensor_ok(tensors[1]);
- tensors = ReadFromRecordIO(scanner, ctx);
+ tensors = ReadFromRecordIO(&scanner, ctx);
ASSERT_EQ(tensors.size(), 2);
assert_tensor_ok(tensors[0]);
assert_tensor_ok(tensors[1]);
diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc
index a3b4a8c0829ae3324e933309b2eaea35fe571997..f97bd0827428feeb590fcad16c48f3461517a646 100644
--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@@ -46,7 +46,8 @@ proto::VarType::Type GetDataTypeOfVar(const Variable* var) {
}
}
-static DDim GetDims(const Scope& scope, const std::string& name) {
+static DDim GetDims(const Scope& scope, const std::string& name,
+ bool get_actual_dim = false) {
Variable* var = scope.FindVar(name);
if (var == nullptr) {
return DDim({-1});
@@ -55,7 +56,11 @@ static DDim GetDims(const Scope& scope, const std::string& name) {
if (var->IsType()) {
return var->Get().dims();
} else if (var->IsType()) {
- return var->Get().GetCompleteDims();
+ if (get_actual_dim) {
+ return var->Get().value().dims();
+ } else {
+ return var->Get().GetCompleteDims();
+ }
} else {
return DDim({-1});
}
@@ -129,7 +134,7 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const {
for (size_t i = 0; i < input.second.size(); ++i) {
ss << input.second[i];
if (scope) {
- ss << "[" << GetDims(*scope, input.second[i]) << "]";
+ ss << "[" << GetDims(*scope, input.second[i], true) << "]";
ss << "(" << GetLoD(*scope, input.second[i]) << ")";
}
if (i != input.second.size() - 1) {
@@ -149,7 +154,7 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const {
for (size_t i = 0; i < output.second.size(); ++i) {
ss << output.second[i];
if (scope) {
- ss << "[" << GetDims(*scope, output.second[i]) << "]";
+ ss << "[" << GetDims(*scope, output.second[i], true) << "]";
ss << "(" << GetLoD(*scope, output.second[i]) << ")";
}
if (i != output.second.size() - 1) {
diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc
index f393105fe82bfad70246952deada8e296c851ef5..c1486b527d2e06d2b3f7e0f89458bf9a22564586 100644
--- a/paddle/fluid/framework/parallel_executor.cc
+++ b/paddle/fluid/framework/parallel_executor.cc
@@ -15,6 +15,7 @@ limitations under the License. */
#include "paddle/fluid/framework/parallel_executor.h"
#include
+#include
#include
#ifdef PADDLE_WITH_CUDA
@@ -41,6 +42,8 @@ class ParallelExecutorPrivate {
#ifdef PADDLE_WITH_CUDA
std::unique_ptr nccl_ctxs_;
#endif
+
+ std::vector