diff --git a/.gitignore b/.gitignore
index ac56a3320ec85769d2c87c072512f5217eca0c24..fe0d13f4d9eab2c2a8e7001c9ecb69cce1333af1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,9 @@
+paddle/operators/check_t.save
+paddle/operators/check_tensor.ls
+paddle/operators/tensor.save
+python/paddle/v2/fluid/tests/book/image_classification_resnet.inference.model/
+python/paddle/v2/fluid/tests/book/image_classification_vgg.inference.model/
+python/paddle/v2/fluid/tests/book/label_semantic_roles.inference.model/
*.DS_Store
build/
build_doc/
@@ -27,5 +33,5 @@ CMakeFiles
cmake_install.cmake
paddle/.timestamp
python/paddlepaddle.egg-info/
-paddle/pybind/pybind.h
+paddle/fluid/pybind/pybind.h
python/paddle/version.py
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index a60453ff4e3bba6e6cb3b3de915dd69afd3a1ec3..3c36cffcb4eeaaf7f8cff5167777628dd2697e7d 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,5 +1,8 @@
# Contribute Code
+You are welcome to contribute to project PaddlePaddle. To contribute to PaddlePaddle, you have to agree with the
+[PaddlePaddle Contributor License Agreement](https://gist.github.com/wangkuiyi/0c22c7b1bd3bb7eb27d76f85c3a3e329).
+
We sincerely appreciate your contribution. This document explains our workflow and work style.
## Workflow
diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake
index 6bea7cf3022242ce48cc882915f7e71810937283..de94bd5008effef1bf0fd3a125d4aed56e1b7f81 100644
--- a/cmake/cuda.cmake
+++ b/cmake/cuda.cmake
@@ -181,7 +181,8 @@ elseif(CMAKE_BUILD_TYPE STREQUAL "Release")
elseif(CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo")
list(APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_RELWITHDEBINFO})
elseif(CMAKE_BUILD_TYPE STREQUAL "MinSizeRel")
- list(APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_MINSIZEREL})
+ # nvcc 9 does not support -Os. Use Release flags instead
+ list(APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_RELEASE})
endif()
mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD)
diff --git a/cmake/generic.cmake b/cmake/generic.cmake
index 33ef6860e1d38f4e87c4431addf43f9f8a655fc2..1cb54ba2164fafbfce9f28a3e894ae5e78a9cd68 100644
--- a/cmake/generic.cmake
+++ b/cmake/generic.cmake
@@ -179,20 +179,24 @@ function(cc_library TARGET_NAME)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(cc_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
- if (cc_library_SRCS)
- if (cc_library_SHARED OR cc_library_shared) # build *.so
+ if(cc_library_SRCS)
+ if(cc_library_SHARED OR cc_library_shared) # build *.so
add_library(${TARGET_NAME} SHARED ${cc_library_SRCS})
else()
add_library(${TARGET_NAME} STATIC ${cc_library_SRCS})
endif()
- if (cc_library_DEPS)
+ if(cc_library_DEPS)
# Don't need link libwarpctc.so
- if ("${cc_library_DEPS};" MATCHES "warpctc;")
+ if("${cc_library_DEPS};" MATCHES "warpctc;")
list(REMOVE_ITEM cc_library_DEPS warpctc)
add_dependencies(${TARGET_NAME} warpctc)
endif()
+ # Support linking flags: --whole-archive (Linux) / -force_load (MacOS)
+ target_circle_link_libraries(${TARGET_NAME} ${cc_library_DEPS})
+ if("${cc_library_DEPS}" MATCHES "ARCHIVE_START")
+ list(REMOVE_ITEM cc_library_DEPS ARCHIVE_START ARCHIVE_END)
+ endif()
add_dependencies(${TARGET_NAME} ${cc_library_DEPS})
- target_link_libraries(${TARGET_NAME} ${cc_library_DEPS})
endif()
# cpplint code style
diff --git a/doc/api/v2/fluid/layers.rst b/doc/api/v2/fluid/layers.rst
index e24613b94b422b7cdf9c6383c359fa92a4faf6ff..58c493fd7412cf9dbe507c9622d67dae33a5fb25 100644
--- a/doc/api/v2/fluid/layers.rst
+++ b/doc/api/v2/fluid/layers.rst
@@ -323,6 +323,12 @@ batch_norm
.. autofunction:: paddle.v2.fluid.layers.batch_norm
:noindex:
+layer_norm
+----------
+
+.. autofunction:: paddle.v2.fluid.layers.layer_norm
+ :noindex:
+
beam_search_decode
------------------
diff --git a/doc/getstarted/quickstart_cn.rst b/doc/getstarted/quickstart_cn.rst
index 51dd00f1e806e6423afe3ce53d80d53a187d2ca0..d511cead262dabafd095f68adb5ffc596a7fe596 100644
--- a/doc/getstarted/quickstart_cn.rst
+++ b/doc/getstarted/quickstart_cn.rst
@@ -1,6 +1,9 @@
快速开始
========
+快速安装
+--------
+
PaddlePaddle支持使用pip快速安装,目前支持CentOS 6以上, Ubuntu 14.04以及MacOS 10.12,并安装有Python2.7。
执行下面的命令完成快速安装,版本为cpu_avx_openblas:
@@ -16,6 +19,9 @@ PaddlePaddle支持使用pip快速安装,目前支持CentOS 6以上, Ubuntu 14.
更详细的安装和编译方法参考::ref:`install_steps` 。
+快速使用
+--------
+
创建一个 housing.py 并粘贴此Python代码:
.. code-block:: python
diff --git a/doc/getstarted/quickstart_en.rst b/doc/getstarted/quickstart_en.rst
index d1bcf82ea071e2c53760a5ccf6a5074a3ac0abd5..70f7fe0646068aa79cd72955c6848ac0250c2300 100644
--- a/doc/getstarted/quickstart_en.rst
+++ b/doc/getstarted/quickstart_en.rst
@@ -1,6 +1,9 @@
Quick Start
============
+Quick Install
+-------------
+
You can use pip to install PaddlePaddle with a single command, supports
CentOS 6 above, Ubuntu 14.04 above or MacOS 10.12, with Python 2.7 installed.
Simply run the following command to install, the version is cpu_avx_openblas:
@@ -17,6 +20,9 @@ If you need to install GPU version (cuda7.5_cudnn5_avx_openblas), run:
For more details about installation and build: :ref:`install_steps` .
+Quick Use
+---------
+
Create a new file called housing.py, and paste this Python
code:
diff --git a/doc/howto/cluster/index_cn.rst b/doc/howto/cluster/index_cn.rst
index c68b2655b65b192814b94f0013fa92b0733b9afa..a60521b4a9646bdc6d9f1bf6da482acc989d8bf3 100644
--- a/doc/howto/cluster/index_cn.rst
+++ b/doc/howto/cluster/index_cn.rst
@@ -1,10 +1,22 @@
分布式训练
==========
+本节将介绍如何使用PaddlePaddle在不同的集群框架下完成分布式训练。分布式训练架构如下图所示:
+
+.. image:: src/ps_cn.png
+ :width: 500
+
+- 数据分片(Data shard): 用于训练神经网络的数据,被切分成多个部分,每个部分分别给每个trainer使用。
+- 计算节点(Trainer): 每个trainer启动后读取切分好的一部分数据,开始神经网络的“前馈”和“后馈”计算,并和参数服务器通信。在完成一定量数据的训练后,上传计算得出的梯度(gradients),然后下载优化更新后的神经网络参数(parameters)。
+- 参数服务器(Parameter server):每个参数服务器只保存整个神经网络所有参数的一部分。参数服务器接收从计算节点上传的梯度,并完成参数优化更新,再将更新后的参数下发到每个计算节点。
+
+这样,通过计算节点和参数服务器的分布式协作,可以完成神经网络的SGD方法的训练。PaddlePaddle可以同时支持同步随机梯度下降(SGD)和异步随机梯度下降。
+
+在使用同步SGD训练神经网络时,PaddlePaddle使用同步屏障(barrier),使梯度的提交和参数的更新按照顺序方式执行。在异步SGD中,则并不会等待所有trainer提交梯度才更新参数,这样极大地提高了计算的并行性:参数服务器之间不相互依赖,并行地接收梯度和更新参数,参数服务器也不会等待计算节点全部都提交梯度之后才开始下一步,计算节点之间也不会相互依赖,并行地执行模型的训练。可以看出,虽然异步SGD方式会提高参数更新并行度, 但是并不能保证参数同步更新,在任意时间某一台参数服务器上保存的参数可能比另一台要更新,与同步SGD相比,梯度会有噪声。
+
.. toctree::
:maxdepth: 1
- introduction_cn.md
preparations_cn.md
cmd_argument_cn.md
multi_cluster/index_cn.rst
diff --git a/doc/howto/cluster/index_en.rst b/doc/howto/cluster/index_en.rst
index af957e06cd7930ce63569a1bafdde47a1d34eb69..2640a09dcc904619bc97c9bd3f3d81a9dc307663 100644
--- a/doc/howto/cluster/index_en.rst
+++ b/doc/howto/cluster/index_en.rst
@@ -1,10 +1,22 @@
Distributed Training
====================
+In this section, we'll explain how to run distributed training jobs with PaddlePaddle on different types of clusters. The diagram below shows the main architecture of a distributed trainning job:
+
+.. image:: src/ps_en.png
+ :width: 500
+
+- Data shard: training data will be split into multiple partitions, trainers use the partitions of the whole dataset to do the training job.
+- Trainer: each trainer reads the data shard, and train the neural network. Then the trainer will upload calculated "gradients" to parameter servers, and wait for parameters to be optimized on the parameter server side. When that finishes, the trainer download optimized parameters and continues its training.
+- Parameter server: every parameter server stores part of the whole neural network model data. They will do optimization calculations when gradients are uploaded from trainers, and then send updated parameters to trainers.
+
+PaddlePaddle can support both synchronize stochastic gradient descent (SGD) and asynchronous SGD.
+
+When training with synchronize SGD, PaddlePaddle uses an internal "synchronize barrier" which makes gradients update and parameter download in strict order. On the other hand, asynchronous SGD won't wait for all trainers to finish upload at a single step, this will increase the parallelism of distributed training: parameter servers do not depend on each other, they'll do parameter optimization concurrently. Parameter servers will not wait for trainers, so trainers will also do their work concurrently. But asynchronous SGD will introduce more randomness and noises in the gradient.
+
.. toctree::
:maxdepth: 1
- introduction_en.md
preparations_en.md
cmd_argument_en.md
multi_cluster/index_en.rst
diff --git a/doc/howto/cluster/introduction_cn.md b/doc/howto/cluster/introduction_cn.md
deleted file mode 100644
index 562008a898414a6566d74d08cfeb18fb9d57582a..0000000000000000000000000000000000000000
--- a/doc/howto/cluster/introduction_cn.md
+++ /dev/null
@@ -1,13 +0,0 @@
-## 概述
-
-本节将介绍如何使用PaddlePaddle在不同的集群框架下完成分布式训练。分布式训练架构如下图所示:
-
-
-
-- 数据分片(Data shard): 用于训练神经网络的数据,被切分成多个部分,每个部分分别给每个trainer使用。
-- 计算节点(Trainer): 每个trainer启动后读取切分好的一部分数据,开始神经网络的“前馈”和“后馈”计算,并和参数服务器通信。在完成一定量数据的训练后,上传计算得出的梯度(gradients),然后下载优化更新后的神经网络参数(parameters)。
-- 参数服务器(Parameter server):每个参数服务器只保存整个神经网络所有参数的一部分。参数服务器接收从计算节点上传的梯度,并完成参数优化更新,再将更新后的参数下发到每个计算节点。
-
-这样,通过计算节点和参数服务器的分布式协作,可以完成神经网络的SGD方法的训练。PaddlePaddle可以同时支持同步随机梯度下降(SGD)和异步随机梯度下降。
-
-在使用同步SGD训练神经网络时,PaddlePaddle使用同步屏障(barrier),使梯度的提交和参数的更新按照顺序方式执行。在异步SGD中,则并不会等待所有trainer提交梯度才更新参数,这样极大地提高了计算的并行性:参数服务器之间不相互依赖,并行地接收梯度和更新参数,参数服务器也不会等待计算节点全部都提交梯度之后才开始下一步,计算节点之间也不会相互依赖,并行地执行模型的训练。可以看出,虽然异步SGD方式会提高参数更新并行度, 但是并不能保证参数同步更新,在任意时间某一台参数服务器上保存的参数可能比另一台要更新,与同步SGD相比,梯度会有噪声。
diff --git a/doc/howto/cluster/introduction_en.md b/doc/howto/cluster/introduction_en.md
deleted file mode 100644
index eb70d7cf35ab729e0da4c6a3a2e732c26905f584..0000000000000000000000000000000000000000
--- a/doc/howto/cluster/introduction_en.md
+++ /dev/null
@@ -1,13 +0,0 @@
-## Introduction
-
-In this section, we'll explain how to run distributed training jobs with PaddlePaddle on different types of clusters. The diagram below shows the main architecture of a distributed trainning job:
-
-
-
-- Data shard: training data will be split into multiple partitions, trainers use the partitions of the whole dataset to do the training job.
-- Trainer: each trainer reads the data shard, and train the neural network. Then the trainer will upload calculated "gradients" to parameter servers, and wait for parameters to be optimized on the parameter server side. When that finishes, the trainer download optimized parameters and continues its training.
-- Parameter server: every parameter server stores part of the whole neural network model data. They will do optimization calculations when gradients are uploaded from trainers, and then send updated parameters to trainers.
-
-PaddlePaddle can support both synchronize stochastic gradient descent (SGD) and asynchronous SGD.
-
-When training with synchronize SGD, PaddlePaddle uses an internal "synchronize barrier" which makes gradients update and parameter download in strict order. On the other hand, asynchronous SGD won't wait for all trainers to finish upload at a single step, this will increase the parallelism of distributed training: parameter servers do not depend on each other, they'll do parameter optimization concurrently. Parameter servers will not wait for trainers, so trainers will also do their work concurrently. But asynchronous SGD will introduce more randomness and noises in the gradient.
diff --git a/doc/howto/cluster/src/ps_cn.png b/doc/howto/cluster/src/ps_cn.png
new file mode 100644
index 0000000000000000000000000000000000000000..f9525739cc8bc6506adde642aafa0a85ae3ebebc
Binary files /dev/null and b/doc/howto/cluster/src/ps_cn.png differ
diff --git a/doc/howto/cluster/src/ps_en.png b/doc/howto/cluster/src/ps_en.png
new file mode 100644
index 0000000000000000000000000000000000000000..6537d3d56589ca9f19a77a50a970e4b5275e6ce0
Binary files /dev/null and b/doc/howto/cluster/src/ps_en.png differ
diff --git a/doc/howto/rnn/index_cn.rst b/doc/howto/rnn/index_cn.rst
index 9ecab5594cff47cde4700b7ce0f58013a960a16e..bcc8c2f46eb662ec3650e829a77992224dbbb8e7 100644
--- a/doc/howto/rnn/index_cn.rst
+++ b/doc/howto/rnn/index_cn.rst
@@ -1,4 +1,4 @@
-RNN相关模型
+RNN模型
===========
.. toctree::
diff --git a/doc/index_cn.rst b/doc/index_cn.rst
index 63a78428583477792e309a3b3d26af340caccfca..0f645db6fc5d0f84bbe0cbb335677752e3a355ea 100644
--- a/doc/index_cn.rst
+++ b/doc/index_cn.rst
@@ -8,5 +8,4 @@ PaddlePaddle 文档
build_and_install/index_cn.rst
howto/index_cn.rst
dev/index_cn.rst
- api/index_cn.rst
faq/index_cn.rst
diff --git a/doc/index_en.rst b/doc/index_en.rst
index 5631381be087017c26b2a6a3984b3c5bdb49f12d..166f56c28f464563a0b36007f58cebb58c286916 100644
--- a/doc/index_en.rst
+++ b/doc/index_en.rst
@@ -8,4 +8,3 @@ PaddlePaddle Documentation
build_and_install/index_en.rst
howto/index_en.rst
dev/index_en.rst
- api/index_en.rst
diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt
index 3f9c132ef6ae03c7614e10484715676c8019821e..c7deba2ab475d3c4f2c95327af77af7031b591fd 100644
--- a/paddle/CMakeLists.txt
+++ b/paddle/CMakeLists.txt
@@ -19,12 +19,7 @@ else()
endif()
if(NOT ANDROID AND NOT IOS)
- add_subdirectory(memory)
- add_subdirectory(platform)
- add_subdirectory(framework)
- add_subdirectory(operators)
- add_subdirectory(pybind)
- add_subdirectory(inference)
+ add_subdirectory(fluid)
endif()
if(WITH_SWIG_PY)
diff --git a/paddle/fluid/CMakeLists.txt b/paddle/fluid/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a6b4191518c45d0579f800ecb901dcd9667e17d5
--- /dev/null
+++ b/paddle/fluid/CMakeLists.txt
@@ -0,0 +1,6 @@
+add_subdirectory(memory)
+add_subdirectory(platform)
+add_subdirectory(framework)
+add_subdirectory(operators)
+add_subdirectory(pybind)
+add_subdirectory(inference)
diff --git a/paddle/framework/.clang-format b/paddle/fluid/framework/.clang-format
similarity index 100%
rename from paddle/framework/.clang-format
rename to paddle/fluid/framework/.clang-format
diff --git a/paddle/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt
similarity index 100%
rename from paddle/framework/CMakeLists.txt
rename to paddle/fluid/framework/CMakeLists.txt
diff --git a/paddle/fluid/framework/attribute.cc b/paddle/fluid/framework/attribute.cc
new file mode 100644
index 0000000000000000000000000000000000000000..1d7e7366b0723c630b24d62c1f5d0a72cf42d770
--- /dev/null
+++ b/paddle/fluid/framework/attribute.cc
@@ -0,0 +1,74 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/framework/attribute.h"
+
+#include
+
+namespace paddle {
+namespace framework {
+
+Attribute GetAttrValue(const proto::OpDesc::Attr& attr_desc) {
+ switch (attr_desc.type()) {
+ case proto::AttrType::BOOLEAN: {
+ return attr_desc.b();
+ }
+ case proto::AttrType::INT: {
+ return attr_desc.i();
+ }
+ case proto::AttrType::FLOAT: {
+ return attr_desc.f();
+ }
+ case proto::AttrType::STRING: {
+ return attr_desc.s();
+ }
+ case proto::AttrType::BOOLEANS: {
+ std::vector val(attr_desc.bools_size());
+ for (int i = 0; i < attr_desc.bools_size(); ++i) {
+ val[i] = attr_desc.bools(i);
+ }
+ return val;
+ }
+ case proto::AttrType::INTS: {
+ std::vector val(attr_desc.ints_size());
+ for (int i = 0; i < attr_desc.ints_size(); ++i) {
+ val[i] = attr_desc.ints(i);
+ }
+ return val;
+ }
+ case proto::AttrType::FLOATS: {
+ std::vector val(attr_desc.floats_size());
+ for (int i = 0; i < attr_desc.floats_size(); ++i) {
+ val[i] = attr_desc.floats(i);
+ }
+ return val;
+ }
+ case proto::AttrType::STRINGS: {
+ std::vector val(attr_desc.strings_size());
+ for (int i = 0; i < attr_desc.strings_size(); ++i) {
+ val[i] = attr_desc.strings(i);
+ }
+ return val;
+ }
+ case proto::AttrType::LONG: {
+ return attr_desc.l();
+ }
+ default:
+ PADDLE_THROW("Unsupport attr type %d", attr_desc.type());
+ }
+ return boost::blank();
+}
+
+} // namespace framework
+} // namespace paddle
diff --git a/paddle/fluid/framework/attribute.h b/paddle/fluid/framework/attribute.h
new file mode 100644
index 0000000000000000000000000000000000000000..16be42ae71497bcc755d10eee2d73d331ede7da6
--- /dev/null
+++ b/paddle/fluid/framework/attribute.h
@@ -0,0 +1,284 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+
+#include "paddle/fluid/framework/framework.pb.h"
+#include "paddle/fluid/framework/type_defs.h"
+#include "paddle/fluid/platform/enforce.h"
+
+namespace paddle {
+namespace framework {
+template
+inline proto::AttrType AttrTypeID() {
+ Attribute tmp = T();
+ return static_cast(tmp.which() - 1);
+}
+
+Attribute GetAttrValue(const proto::OpDesc::Attr& attr_desc);
+
+class AttrReader {
+ public:
+ explicit AttrReader(const AttributeMap& attrs) : attrs_(attrs) {}
+
+ template
+ inline const T& Get(const std::string& name) const {
+ PADDLE_ENFORCE(attrs_.count(name) != 0, "%s should be in AttributeMap",
+ name);
+ return boost::get(attrs_.at(name));
+ }
+
+ private:
+ const AttributeMap& attrs_;
+};
+
+// check whether a value(attribute) fit a certain limit
+template
+class GreaterThanChecker {
+ public:
+ explicit GreaterThanChecker(T lower_bound) : lower_bound_(lower_bound) {}
+ void operator()(T& value) const {
+ PADDLE_ENFORCE(value > lower_bound_, "larger_than check fails.");
+ }
+
+ private:
+ T lower_bound_;
+};
+
+template
+class EqualGreaterThanChecker {
+ public:
+ explicit EqualGreaterThanChecker(T lower_bound) : lower_bound_(lower_bound) {}
+ void operator()(T& value) const {
+ PADDLE_ENFORCE_GE(value, lower_bound_, "equal_larger_than check fails.");
+ }
+
+ private:
+ T lower_bound_;
+};
+
+// we can provide users more common Checker, like 'LessThanChecker',
+// 'BetweenChecker'...
+
+template
+class DefaultValueSetter {
+ public:
+ explicit DefaultValueSetter(T default_value)
+ : default_value_(default_value) {}
+ void operator()(T& value) const { value = default_value_; }
+
+ private:
+ T default_value_;
+};
+
+template
+class EnumInContainer {
+ public:
+ explicit EnumInContainer(const std::unordered_set& c) : container_(c) {}
+ void operator()(T& val) const {
+ PADDLE_ENFORCE(container_.find(val) != container_.end(),
+ "Value %s is not in enum container %s", val,
+ ContainerDebugString());
+ }
+
+ private:
+ std::string ContainerDebugString() const {
+ std::ostringstream sout;
+ sout << "[";
+ size_t cnt = 0;
+ for (auto& v : container_) {
+ sout << v;
+ ++cnt;
+ if (cnt != container_.size()) {
+ sout << " ,";
+ }
+ }
+ sout << "]";
+ return sout.str();
+ }
+
+ std::unordered_set container_;
+};
+
+template
+struct ExtractAttribute {
+ explicit ExtractAttribute(const std::string& attr_name)
+ : attr_name_(attr_name) {}
+
+ T* operator()(Attribute& attr) const {
+ T* attr_value = nullptr;
+ try {
+ attr_value = &boost::get(attr);
+ } catch (boost::bad_get& bad_get) {
+ PADDLE_THROW("Cannot get attribute %s by type %s, its type is %s",
+ attr_name_, typeid(T).name(), attr.type().name());
+ }
+ return attr_value;
+ }
+
+ const std::string& attr_name_;
+};
+
+// special handle bool
+// FIXME(yuyang18): Currently we cast bool into int in python binding. It is
+// hard to change the logic there. In another way, we should correct handle
+// if the user set `some_flag=1`.
+//
+// FIX ME anytime if there is a better solution.
+template <>
+struct ExtractAttribute {
+ explicit ExtractAttribute(const std::string& attr_name)
+ : attr_name_(attr_name) {}
+
+ bool* operator()(Attribute& attr) const {
+ if (attr.type() == typeid(int)) { // NOLINT
+ int val = boost::get(attr);
+ attr = static_cast(val);
+ } else if (attr.type() == typeid(float)) { // NOLINT
+ float val = boost::get(attr);
+ attr = static_cast(val);
+ }
+ bool* attr_value = nullptr;
+ try {
+ attr_value = &boost::get(attr);
+ } catch (boost::bad_get& bad_get) {
+ PADDLE_THROW("Cannot get attribute %s by type bool, its type is %s",
+ attr_name_, attr.type().name());
+ }
+ return attr_value;
+ }
+
+ const std::string& attr_name_;
+};
+
+template <>
+struct ExtractAttribute {
+ explicit ExtractAttribute(const std::string& attr_name)
+ : attr_name_(attr_name) {}
+
+ int64_t* operator()(Attribute& attr) const {
+ if (attr.type() == typeid(int)) { // NOLINT
+ int val = boost::get(attr);
+ attr = static_cast(val);
+ } else if (attr.type() == typeid(float)) { // NOLINT
+ int val = boost::get(attr);
+ attr = static_cast(val);
+ }
+ int64_t* attr_value = nullptr;
+ try {
+ attr_value = &boost::get(attr);
+ } catch (boost::bad_get& bad_get) {
+ PADDLE_THROW("Cannot get attribute %s by type int64_t, its type is %s",
+ attr_name_, attr.type().name());
+ }
+ return attr_value;
+ }
+
+ const std::string& attr_name_;
+};
+
+// check whether a certain attribute fit its limits
+// an attribute can have more than one limits
+template
+class TypedAttrChecker {
+ typedef std::function ValueChecker;
+
+ public:
+ explicit TypedAttrChecker(const std::string& attr_name)
+ : attr_name_(attr_name) {}
+
+ TypedAttrChecker& InEnum(const std::unordered_set& range) {
+ value_checkers_.push_back(EnumInContainer(range));
+ return *this;
+ }
+
+ TypedAttrChecker& GreaterThan(const T& lower_bound) {
+ value_checkers_.push_back(GreaterThanChecker(lower_bound));
+ return *this;
+ }
+
+ TypedAttrChecker& EqualGreaterThan(const T& lower_bound) {
+ value_checkers_.push_back(EqualGreaterThanChecker(lower_bound));
+ return *this;
+ }
+
+ // we can add more common limits, like LessThan(), Between()...
+
+ TypedAttrChecker& SetDefault(const T& default_value) {
+ PADDLE_ENFORCE(default_value_setter_.empty(),
+ "%s can't have more than one default value!", attr_name_);
+ default_value_setter_.push_back(DefaultValueSetter(default_value));
+ return *this;
+ }
+
+ // allow users provide their own checker
+ TypedAttrChecker& AddCustomChecker(const ValueChecker& checker) {
+ value_checkers_.push_back(checker);
+ return *this;
+ }
+
+ void operator()(AttributeMap& attr_map) const {
+ if (!attr_map.count(attr_name_)) {
+ // user do not set this attr
+ PADDLE_ENFORCE(!default_value_setter_.empty(),
+ "Attribute '%s' is required!", attr_name_);
+ // default_value_setter_ has no more than one element
+ T val;
+ (default_value_setter_[0])(val);
+ attr_map[attr_name_] = val;
+ }
+ Attribute& attr = attr_map.at(attr_name_);
+ ExtractAttribute extract_attr(attr_name_);
+ T* attr_value = extract_attr(attr);
+ for (const auto& checker : value_checkers_) {
+ checker(*attr_value);
+ }
+ }
+
+ private:
+ std::string attr_name_;
+ std::vector value_checkers_;
+ std::vector default_value_setter_;
+};
+
+// check whether op's all attributes fit their own limits
+class OpAttrChecker {
+ typedef std::function AttrChecker;
+
+ public:
+ template
+ TypedAttrChecker& AddAttrChecker(const std::string& attr_name) {
+ attr_checkers_.push_back(TypedAttrChecker(attr_name));
+ AttrChecker& checker = attr_checkers_.back();
+ return *(checker.target>());
+ }
+
+ void Check(AttributeMap& attr_map) const {
+ for (const auto& checker : attr_checkers_) {
+ checker(attr_map);
+ }
+ }
+
+ private:
+ std::vector attr_checkers_;
+};
+
+} // namespace framework
+} // namespace paddle
diff --git a/paddle/fluid/framework/backward.cc b/paddle/fluid/framework/backward.cc
new file mode 100644
index 0000000000000000000000000000000000000000..c4795f4fc5c73034b23305162ea3b710480d8ebc
--- /dev/null
+++ b/paddle/fluid/framework/backward.cc
@@ -0,0 +1,585 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/framework/backward.h"
+#include "paddle/fluid/operators/net_op.h"
+
+#include
+#include
+#include
+#include
+
+#include "paddle/fluid/framework/block_desc.h"
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/net_op.h"
+
+namespace paddle {
+namespace framework {
+
+static std::unordered_set* g_ctrl_flow_ops_ = nullptr;
+// Control Flow operators's backward is significantly different from
+// computational operators. Hack Code here.
+// We should design a better way to backward CtrlFlowOps.
+static std::unordered_set& CtrlFlowOps() {
+ if (g_ctrl_flow_ops_ == nullptr) {
+ g_ctrl_flow_ops_ = new std::unordered_set{
+ "increment", "lod_rank_table", "less_than"};
+ }
+ return *g_ctrl_flow_ops_;
+}
+
+static inline std::unique_ptr CreateGradOp(
+ const OperatorBase& op, const std::unordered_set& no_grad_set,
+ std::unordered_map* grad_to_var) {
+ OpDesc op_desc;
+ op_desc.SetInputMap(op.Inputs());
+ op_desc.SetOutputMap(op.Outputs());
+ op_desc.SetType(op.Type());
+ op_desc.SetAttrMap(op.Attrs());
+ auto& info = OpInfoMap::Instance().Get(op.Type());
+ auto grad_descs = info.GradOpMaker()(op_desc, no_grad_set, grad_to_var, {});
+ std::vector> grad_ops;
+ grad_ops.reserve(grad_descs.size());
+ std::transform(grad_descs.begin(), grad_descs.end(),
+ std::back_inserter(grad_ops),
+ [](const std::unique_ptr& grad_desc) {
+ return OpRegistry::CreateOp(*grad_desc);
+ });
+ PADDLE_ENFORCE(!grad_ops.empty());
+ if (grad_ops.size() == 1) {
+ return std::move(grad_ops[0]);
+ } else {
+ auto net_op = new operators::NetOp();
+ for (auto& grad_op : grad_ops) {
+ net_op->AppendOp(std::move(grad_op));
+ }
+ net_op->CompleteAddOp();
+ return std::unique_ptr(net_op);
+ }
+}
+
+template
+static void ForEachVarName(const Map& names, T callback) {
+ for (auto& name : names) {
+ for (auto& n : name.second) {
+ if (callback(n)) return;
+ }
+ }
+}
+
+// return whether all the names + suffixes in the set
+static bool AllInSet(
+ const std::map>& names,
+ const std::string& suffix, const std::unordered_set& set) {
+ bool all_in_set = true;
+ ForEachVarName(names, [&all_in_set, &set, &suffix](const std::string& n) {
+ all_in_set = set.find(n + suffix) != set.end();
+ return !all_in_set;
+ });
+ return all_in_set;
+}
+
+static std::unique_ptr NOP() {
+ auto net_op = new operators::NetOp();
+ net_op->SetType("@NOP@");
+ net_op->CompleteAddOp();
+ return std::unique_ptr(net_op);
+}
+
+// Get backward operator from a forward operator, a recursive implementation.
+//
+// no_grad_names the gradient variable names without gradient calculating.
+//
+// uniq_id is a unique index used inside recursively calling
+// BackwardRecursive. use `uid = uniq_id++;` to get the unique index, and
+// pass `uniq_id` through recursive calling.
+//
+// returns The backward operator. In a simple situation, it may be a simple
+// operator, in a complex situation, it maybe a NetOp.
+//
+// See Backward.h for details
+static std::unique_ptr BackwardRecursive(
+ const OperatorBase& forwardOp,
+ std::unordered_set& no_grad_names,
+ std::unordered_map* grad_to_var,
+ size_t& uniq_id) {
+ // If all input gradients of forwarding operator do not need to calculate,
+ // just return an NOP. Not return null ptr because NOP does not take
+ // too much time for calculation, but it is useful for simplifying logic.
+ if (AllInSet(forwardOp.Inputs() /*names*/, kGradVarSuffix /*suffix*/,
+ no_grad_names /*set*/)) {
+ return NOP();
+ }
+
+ // All output gradients of forwarding operator do not need to calculate.
+ // Then all input gradients cannot be computed at all, and we put them into
+ // `no_grad_names` set. Return an NOP.
+ if (AllInSet(forwardOp.Outputs() /*names*/, kGradVarSuffix /*suffix*/,
+ no_grad_names /*set*/)) {
+ ForEachVarName(forwardOp.Inputs(),
+ [&no_grad_names](const std::string& name) -> bool {
+ no_grad_names.insert(GradVarName(name));
+ return false;
+ });
+ return NOP();
+ }
+
+ // Returned gradient network
+ auto net = std::unique_ptr(new operators::NetOp());
+
+ if (forwardOp.IsNetOp()) {
+ // Because forwardOp is a net op, it can static_cast.
+ auto& forwardNet = static_cast(forwardOp);
+
+ // Map from output gradient variable name to operator's indices in
+ // backward net's ops_. That operator generates that variable.
+ std::unordered_map> dup_output_ops;
+
+ size_t local_op_id = 0;
+ // reversely travel forwardNet and collect all duplicate outputs.
+ for (auto it = forwardNet.ops_.rbegin(); it != forwardNet.ops_.rend();
+ ++it, ++local_op_id) {
+ auto& fwd = *it;
+ auto bwd = BackwardRecursive(*fwd, no_grad_names, grad_to_var, uniq_id);
+ ForEachVarName(bwd->Outputs(),
+ [&dup_output_ops, local_op_id](const std::string& out) {
+ dup_output_ops[out].emplace_back(local_op_id);
+ return false;
+ });
+ net->AppendOp(std::move(bwd));
+ }
+ // Get unique ID for this method.
+ auto uid = uniq_id++;
+ // TODO(dzh): more comment
+ // multiple operators which have the same output (y for example) may
+ // overwrite the same y variable when backward, special operations are token
+ // to handle this case. For each duplicate output, rename it to an alias
+ // (original name with a offset), append an `add` op for its operator,
+ // and finally sum all the alias variable to the final output variable y.
+ using Pos = std::pair>;
+ std::list insert_position;
+ for (auto& dup_output_op : dup_output_ops) {
+ const std::string& name = dup_output_op.first;
+ // duplicate @Empty@ don't need to be added
+ if (name == kEmptyVarName) continue;
+
+ auto& dup_op = dup_output_op.second;
+ // no duplicate output
+ if (dup_op.size() == 1) continue;
+
+ // process the duplicate outputs
+ std::vector dup_outputs;
+ for (size_t i = 0; i < dup_op.size(); ++i) {
+ // rename each duplicate output to an alias
+ auto op_offset = dup_op[i];
+ dup_outputs.push_back(name + "@RENAME@" + std::to_string(uid) + "@" +
+ std::to_string(i));
+ net->ops_[op_offset]->Rename(name, dup_outputs.back());
+ }
+ // collect all the offset for each alias,
+ // insert a sum operator to add all aliases to output
+ insert_position.push_back(
+ {dup_op.back(),
+ OpRegistry::CreateOp("sum", {{"X", dup_outputs}}, {{"Out", {name}}},
+ AttributeMap{})});
+ }
+
+ // make sure the inserted `sum` ops follow the BFS order.
+ insert_position.sort(
+ [](const Pos& l, const Pos& r) { return l.first > r.first; });
+
+ for (auto& pos : insert_position) {
+ net->InsertOp(pos.first + 1, std::move(pos.second));
+ }
+ } else {
+ std::unique_ptr grad_op(
+ CreateGradOp(forwardOp, no_grad_names, grad_to_var));
+
+ ForEachVarName(grad_op->Inputs(), [&no_grad_names, &net, &grad_op](
+ const std::string& grad_input) {
+ if (no_grad_names.count(grad_input)) {
+ // +1 for \0
+ std::string prefix = grad_input.substr(
+ 0, grad_input.size() - sizeof(kGradVarSuffix) / sizeof(char) + 1);
+ grad_op->Rename(grad_input, prefix + kZeroVarSuffix);
+
+ // If part of input gradient of that operator is not calculated, fill
+ // zero variables to that input gradient.
+ net->AppendOp(OpRegistry::CreateOp("fill_zeros_like", {{"X", {prefix}}},
+ {{"Out", {grad_input}}},
+ AttributeMap{}));
+ }
+ return false;
+ });
+
+ ForEachVarName(grad_op->Outputs(),
+ [&no_grad_names, &grad_op](const std::string& grad_output) {
+ if (no_grad_names.count(grad_output)) {
+ grad_op->Rename(grad_output, kEmptyVarName);
+ }
+ return false;
+ });
+
+ if (net->ops_.empty()) { // Current no aux op is added to network
+ return grad_op;
+ }
+ net->AppendOp(std::move(grad_op));
+ }
+ net->SetType("@GENERATED_BACKWARD@");
+ net->CompleteAddOp();
+ return std::unique_ptr(
+ static_cast(net.release()));
+}
+
+// See header for comments
+std::unique_ptr Backward(
+ const OperatorBase& forwardOp,
+ const std::unordered_set& no_grad_vars) {
+ std::unordered_set no_grad_names;
+ no_grad_names.reserve(no_grad_vars.size() + 1);
+
+ no_grad_names.insert(std::string(kEmptyVarName) + kGradVarSuffix);
+
+ for (auto& name : no_grad_vars) {
+ no_grad_names.insert(name + kGradVarSuffix);
+ }
+ size_t uid = 0;
+ std::unordered_map grad_to_var;
+ return BackwardRecursive(forwardOp, no_grad_names, &grad_to_var, uid);
+}
+
+// ==================================== //
+
+static bool AllGradInSet(const std::vector& names,
+ const std::unordered_set& set) {
+ for (const std::string& name : names) {
+ if (!set.count(GradVarName(name))) {
+ return false;
+ }
+ }
+ if (VLOG_IS_ON(10)) {
+ std::ostringstream sout;
+ sout << "All input {";
+ for (auto& name : names) {
+ sout << name << ",";
+ }
+ sout << "} is in {";
+ for (auto& name : set) {
+ sout << name << ",";
+ }
+ sout << "}";
+ VLOG(10) << sout.str();
+ }
+ return true;
+}
+
+static std::string FwdName(const std::string& grad_name) {
+ auto pos = grad_name.find("@GRAD");
+ if (pos == std::string::npos) {
+ return "";
+ } else {
+ return grad_name.substr(0, pos);
+ }
+}
+
+static void CreateGradVarInBlock(
+ size_t grad_op_start_index,
+ const std::unordered_map& param_name_map,
+ BlockDesc* block_desc,
+ std::unordered_map* grad_var_record) {
+ auto ops = block_desc->AllOps();
+ for (size_t op_index = grad_op_start_index; op_index < ops.size();
+ ++op_index) {
+ std::unordered_set new_vars;
+ auto& ctrl_flow_ops = CtrlFlowOps();
+ ForEachVarName(ops[op_index]->Outputs(),
+ [&](const std::string& grad_var_name) {
+ if (ctrl_flow_ops.find(ops[op_index]->Type()) !=
+ ctrl_flow_ops.end()) {
+ if (block_desc->HasVarRecursive(grad_var_name)) {
+ return false;
+ }
+ } else {
+ if (block_desc->HasVar(grad_var_name)) {
+ return false;
+ }
+ }
+ if (grad_var_name == framework::kEmptyVarName) {
+ return false;
+ }
+ auto var = block_desc->Var(grad_var_name);
+ VLOG(10) << "Creating Variable " << grad_var_name;
+ new_vars.insert(var->Name());
+ auto it = param_name_map.find(grad_var_name);
+ if (it == param_name_map.end()) {
+ return false;
+ }
+ auto param_var_name = it->second;
+ auto& grad_record = (*grad_var_record)[param_var_name];
+ grad_record.name_ = grad_var_name;
+ grad_record.block_idx_ = block_desc->ID();
+ grad_record.op_idx_ = static_cast(op_index);
+ return false; /* not break */
+ });
+ ops[op_index]->InferVarType(block_desc);
+ for (auto& arg : ops[op_index]->OutputArgumentNames()) {
+ if (new_vars.find(arg) == new_vars.end()) {
+ continue;
+ }
+ auto pname = FwdName(arg);
+ auto* param = block_desc->FindVarRecursive(pname);
+ auto* grad = block_desc->FindVar(arg);
+ if (param == nullptr) {
+ grad->SetDataType(proto::DataType::FP32);
+ } else {
+ grad->SetDataType(param->GetDataType());
+ }
+ }
+ ops[op_index]->InferShape(*block_desc);
+ }
+}
+
+std::vector> MakeOpGrad(
+ const OpDesc* op_desc, std::unordered_set* no_grad_vars,
+ std::unordered_map* grad_to_var,
+ const std::vector& grad_block = std::vector()) {
+ std::vector> grad_op_descs;
+ // All input gradients of forwarding operator do not need to calculate.
+ const std::vector& inputs = op_desc->InputArgumentNames();
+ if (AllGradInSet(inputs, *no_grad_vars)) {
+ VLOG(10) << "Drop operator " << op_desc->Type();
+ return grad_op_descs; // empty vector
+ }
+
+ // All output gradients of forwarding operator do not need to calculate.
+ const std::vector& outputs = op_desc->OutputArgumentNames();
+
+ if (AllGradInSet(outputs, *no_grad_vars)) {
+ VLOG(10) << "Drop operator " << op_desc->Type();
+ // FIXME: Hack code here
+ auto& ctrl_flow_ops = CtrlFlowOps();
+ if (ctrl_flow_ops.find(op_desc->Type()) == ctrl_flow_ops.end()) {
+ // Only computational op need drop input's gradient.
+ for (const std::string& name : inputs) {
+ no_grad_vars->insert(GradVarName(name));
+ VLOG(10) << " Also drop " << GradVarName(name);
+ }
+ }
+
+ return grad_op_descs; // empty vector
+ }
+
+ grad_op_descs =
+ OpInfoMap::Instance()
+ .Get(op_desc->Type())
+ .GradOpMaker()(*op_desc, *no_grad_vars, grad_to_var, grad_block);
+
+ std::list> pending_fill_zeros_ops;
+ for (auto& desc : grad_op_descs) {
+ for (const std::string& in_name : desc->InputArgumentNames()) {
+ if (no_grad_vars->count(in_name)) {
+ std::string prefix = in_name.substr(
+ 0, in_name.size() - sizeof(kGradVarSuffix) / sizeof(char) + 1);
+ std::string new_name = prefix + kZeroVarSuffix;
+ desc->Rename(in_name, new_name);
+ std::unique_ptr fill_zeros_op(
+ new OpDesc("fill_zeros_like", {{"X", {prefix}}},
+ {{"Out", {new_name}}}, AttributeMap{}));
+ pending_fill_zeros_ops.push_back(std::move(fill_zeros_op));
+ }
+ }
+ }
+
+ for (auto& p : pending_fill_zeros_ops) {
+ grad_op_descs.insert(grad_op_descs.begin(), std::move(p));
+ }
+ return grad_op_descs;
+}
+
+static BlockDesc* CreateStepBlock(
+ ProgramDesc& program_desc, std::unordered_set* no_grad_vars,
+ std::unordered_map* grad_to_var,
+ int step_block_idx);
+
+std::vector> MakeBlockBackward(
+ ProgramDesc& program_desc, int block_idx,
+ std::unordered_set* no_grad_vars,
+ std::unordered_map* grad_to_var) {
+ VLOG(5) << "MakeBlockBackward";
+ BlockDesc* cur_block = program_desc.MutableBlock(block_idx);
+ std::vector op_descs = cur_block->AllOps();
+ std::unordered_map> dup_out_ops;
+ size_t grad_desc_idx = 0;
+ std::vector> backward_descs;
+
+ for (auto it = op_descs.rbegin(); it != op_descs.rend(); ++it) {
+ VLOG(5) << "Making backward " << (*it)->Type() << " op";
+ std::vector> op_grads;
+
+ if ((*it)->Type() == "recurrent" || (*it)->Type() == "while" ||
+ (*it)->Type() == "parallel_do") {
+ int step_block_idx = (*it)->GetBlockAttr("sub_block");
+ BlockDesc* backward_block = CreateStepBlock(program_desc, no_grad_vars,
+ grad_to_var, step_block_idx);
+ op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var, {backward_block});
+ } else if ((*it)->Type() == "conditional_block") {
+ BlockDesc* backward_block =
+ CreateStepBlock(program_desc, no_grad_vars, grad_to_var,
+ (*it)->GetBlockAttr("sub_block"));
+ op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var, {backward_block});
+ } else {
+ op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var);
+ }
+
+ if (VLOG_IS_ON(10)) {
+ std::ostringstream sout;
+ sout << "Made ";
+ for (auto& op_grad : op_grads) {
+ sout << op_grad->Type() << " ";
+ }
+ VLOG(10) << sout.str();
+ }
+
+ for (const auto& desc : op_grads) {
+ for (const std::string& out_name : desc->OutputArgumentNames()) {
+ if (out_name.find("@GRAD") == std::string::npos) {
+ // Not all outputs of a backward operator is a gradient. Only gradient
+ // need to be sum. Skip variables are not gradient.
+ continue;
+ }
+ dup_out_ops[out_name].emplace_back(grad_desc_idx);
+ }
+ ++grad_desc_idx;
+ }
+ std::transform(op_grads.begin(), op_grads.end(),
+ std::back_inserter(backward_descs),
+ [](std::unique_ptr& ptr) { return std::move(ptr); });
+ }
+
+ VLOG(5) << "Appending Sums";
+ // Check whether some variables are written more than once
+ std::list>> pending_sum_ops;
+ for (const auto& dup : dup_out_ops) {
+ const std::string& out_name = dup.first;
+ const std::vector dup_op = dup.second;
+ if (out_name != kEmptyVarName && dup_op.size() > 1) {
+ std::vector sum_op_inputs;
+ std::string next_g_name = out_name;
+ for (size_t i = 0; i < dup_op.size(); ++i) {
+ VLOG(10) << backward_descs[dup_op[i]]->Type() << " has " << out_name
+ << " duplicated";
+ std::string new_name = out_name + "@RENAME@" + std::to_string(i);
+ backward_descs[dup_op[i]]->RenameOutput(out_name, new_name);
+ backward_descs[dup_op[i]]->RenameInput(out_name, next_g_name);
+ sum_op_inputs.emplace_back(new_name);
+ next_g_name = sum_op_inputs.back();
+ }
+ std::unique_ptr sum_op(new OpDesc("sum", {{"X", sum_op_inputs}},
+ {{"Out", {out_name}}},
+ AttributeMap{}));
+ pending_sum_ops.push_back({dup_op.back(), std::move(sum_op)});
+ }
+ }
+
+ pending_sum_ops.sort([](const std::pair>& a,
+ const std::pair>& b) {
+ return a.first > b.first;
+ });
+ for (auto& p : pending_sum_ops) {
+ backward_descs.insert(backward_descs.begin() + p.first + 1,
+ std::move(p.second));
+ }
+
+ VLOG(5) << "MakeBlockBackward Finished";
+
+ return backward_descs;
+}
+
+static BlockDesc* CreateStepBlock(
+ ProgramDesc& program_desc, std::unordered_set* no_grad_vars,
+ std::unordered_map* grad_to_var,
+ int step_block_idx) {
+ auto backward_block_op_descs = MakeBlockBackward(program_desc, step_block_idx,
+ no_grad_vars, grad_to_var);
+ BlockDesc* backward_block =
+ program_desc.AppendBlock(*program_desc.MutableBlock(step_block_idx));
+ for (auto& ptr : backward_block_op_descs) {
+ backward_block->AppendAllocatedOp(move(ptr));
+ }
+ return backward_block;
+}
+
+ParamGradInfoMap AppendBackward(
+ ProgramDesc& program_desc, const VarDesc& target,
+ const std::unordered_set& no_grad_vars) {
+ std::unordered_set no_grad_var_names;
+ no_grad_var_names.reserve(no_grad_vars.size() + 1);
+ no_grad_var_names.insert(std::string(kEmptyVarName) + kGradVarSuffix);
+ for (auto& name : no_grad_vars) {
+ no_grad_var_names.insert(GradVarName(name));
+ }
+
+ const int root_block_idx = 0;
+ auto root_block = program_desc.MutableBlock(root_block_idx);
+
+ std::string fill_one_op_out = GradVarName(target.Name());
+ bool is_scalar = target.GetShape() == std::vector{1};
+ PADDLE_ENFORCE(is_scalar, "target should be scalar");
+ VLOG(3) << "backward from loss=" << target.Name()
+ << " data_type=" << target.GetDataType();
+ std::unique_ptr fill_one_op(
+ new OpDesc("fill_constant", {}, {{"Out", {fill_one_op_out}}},
+ {{"shape", std::vector{1}},
+ {"value", static_cast(1.0)},
+ {"dtype", target.GetDataType()}}));
+ // infer var type of fill_one_op
+ fill_one_op->InferVarType(root_block);
+
+ root_block->AppendAllocatedOp(std::move(fill_one_op));
+ size_t forward_op_num = root_block->OpSize();
+ size_t forward_block_num = program_desc.Size();
+
+ // Insert backward operators
+ std::unordered_map grad_to_var;
+ auto backward_op_descs = MakeBlockBackward(program_desc, root_block_idx,
+ &no_grad_var_names, &grad_to_var);
+
+ for (auto& ptr : backward_op_descs) {
+ root_block->AppendAllocatedOp(std::move(ptr));
+ }
+ // Create Variable
+
+ // Create target gradient variable
+ std::unordered_map retv;
+
+ auto var = root_block->Var(fill_one_op_out);
+ var->SetDataType(target.GetDataType());
+ var->SetShape(target.GetShape());
+ auto& target_grad = retv[target.Name()];
+ target_grad.name_ = fill_one_op_out;
+ target_grad.block_idx_ = root_block_idx;
+ target_grad.op_idx_ = static_cast(forward_op_num);
+
+ // create grad_var for all blocks in this program
+ CreateGradVarInBlock(forward_op_num, grad_to_var, root_block, &retv);
+ for (size_t block_index = forward_block_num;
+ block_index < program_desc.Size(); ++block_index) {
+ CreateGradVarInBlock(0, grad_to_var, program_desc.MutableBlock(block_index),
+ &retv);
+ }
+ return retv;
+}
+
+} // namespace framework
+} // namespace paddle
diff --git a/paddle/fluid/framework/backward.h b/paddle/fluid/framework/backward.h
new file mode 100644
index 0000000000000000000000000000000000000000..2ea6922426e1dad0ca9b6e1287701bca0adef5c8
--- /dev/null
+++ b/paddle/fluid/framework/backward.h
@@ -0,0 +1,56 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include
+#include
+#include
+
+#include "paddle/fluid/framework/operator.h"
+#include "paddle/fluid/framework/program_desc.h"
+
+namespace paddle {
+namespace framework {
+
+// Create the backward operator from a forward operator.
+// TODO(yuyang18): Add more API reference comment.
+extern std::unique_ptr Backward(
+ const OperatorBase& forwardOp,
+ const std::unordered_set& no_grad_vars);
+
+struct GradVarInfo {
+ GradVarInfo() {}
+ GradVarInfo(const std::string& name, int block_idx, int op_idx)
+ : name_(name), block_idx_(block_idx), op_idx_(op_idx) {}
+
+ bool operator==(const GradVarInfo& b) const {
+ return name_ == b.name_ && block_idx_ == b.block_idx_ &&
+ op_idx_ == b.op_idx_;
+ }
+
+ std::string name_;
+ int block_idx_;
+ int op_idx_;
+};
+
+using ParamGradInfoMap = std::unordered_map;
+
+ParamGradInfoMap AppendBackward(
+ ProgramDesc& program_desc, const VarDesc& target,
+ const std::unordered_set& no_grad_vars);
+
+} // namespace framework
+} // namespace paddle
diff --git a/paddle/fluid/framework/backward_test.cc b/paddle/fluid/framework/backward_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..f9604c68913f98abc4d52c84bc8fa2c02e1a6a31
--- /dev/null
+++ b/paddle/fluid/framework/backward_test.cc
@@ -0,0 +1,918 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/framework/backward.h"
+
+#include
+#include "paddle/fluid/framework/block_desc.h"
+#include "paddle/fluid/framework/op_desc.h"
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/framework/var_desc.h"
+#include "paddle/fluid/operators/net_op.h"
+
+USE_NO_KERNEL_OP(fill_constant);
+
+namespace paddle {
+namespace framework {
+
+using DeviceContext = platform::DeviceContext;
+
+class NoneOp : public framework::OperatorWithKernel {
+ public:
+ using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+ void InferShape(framework::InferShapeContext *ctx) const override {}
+};
+
+template
+class NoneKernel : public framework::OpKernel {
+ public:
+ void Compute(const framework::ExecutionContext &context) const override {}
+};
+
+class RowWiseAddOpMaker : public OpProtoAndCheckerMaker {
+ public:
+ RowWiseAddOpMaker(OpProto *proto, OpAttrChecker *op_checker)
+ : OpProtoAndCheckerMaker(proto, op_checker) {
+ AddInput("X", "Input X of Add");
+ AddInput("b", "Bias of Add");
+ AddOutput("Out", "Out of Add");
+ AddComment("Add Op");
+ }
+};
+
+class RowWiseAddGradMaker : public SingleGradOpDescMaker {
+ public:
+ using SingleGradOpDescMaker::SingleGradOpDescMaker;
+
+ protected:
+ std::unique_ptr Apply() const override {
+ auto grad_op = new OpDesc();
+ grad_op->SetInput(GradVarName("Out"), OutputGrad("Out"));
+ grad_op->SetOutput(GradVarName("X"), InputGrad("X"));
+ grad_op->SetOutput(GradVarName("b"), InputGrad("b"));
+ grad_op->SetType("rowwise_add_grad");
+ return std::unique_ptr(grad_op);
+ }
+};
+
+class MulOpMaker : public OpProtoAndCheckerMaker {
+ public:
+ MulOpMaker(OpProto *proto, OpAttrChecker *op_checker)
+ : OpProtoAndCheckerMaker(proto, op_checker) {
+ AddInput("X", "A");
+ AddInput("Y", "B");
+ AddOutput("Out", "Out");
+ AddAttr("x_num_col_dims", "").SetDefault(1).EqualGreaterThan(1);
+ AddAttr("y_num_col_dims", "").SetDefault(1).EqualGreaterThan(1);
+ AddComment("Mul");
+ }
+};
+
+class SigmoidOpMaker : public OpProtoAndCheckerMaker {
+ public:
+ SigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker)
+ : OpProtoAndCheckerMaker(proto, op_checker) {
+ AddInput("X", "X");
+ AddOutput("Out", "Y");
+ AddComment("Sigmoid");
+ }
+};
+
+class NoGradOpMaker : public OpProtoAndCheckerMaker {
+ public:
+ NoGradOpMaker(OpProto *proto, OpAttrChecker *op_checker)
+ : OpProtoAndCheckerMaker(proto, op_checker) {
+ AddInput("X", "X input");
+ AddOutput("Out", "Y output");
+ AddComment("NoGradOp, same input output. no Grad");
+ }
+};
+
+class FcOp : public operators::NetOp {
+ public:
+ FcOp(const std::string &type, const VariableNameMap &inputs,
+ const VariableNameMap &outputs, const AttributeMap &attrs)
+ : NetOp(type, inputs, outputs, attrs) {
+ AppendOp(OpRegistry::CreateOp(
+ "mul", {{"X", {Input("X")}}, {"Y", {Input("W")}}},
+ {{"Out", {Output("mul_result")}}}, AttributeMap{}));
+ auto input_b = Inputs("b");
+ std::string before_act = "mul_result";
+ if (input_b.size() != 0) {
+ AppendOp(OpRegistry::CreateOp(
+ "rowwise_add", {{"X", {Output("mul_result")}}, {"b", {input_b[0]}}},
+ {{"Out", {Output("add_result")}}}, AttributeMap{}));
+ before_act = "add_result";
+ } else {
+ auto out_varname = Output("add_result");
+ if (out_varname != kEmptyVarName) {
+ this->Rename(out_varname, kEmptyVarName);
+ }
+ }
+
+ AppendOp(OpRegistry::CreateOp("sigmoid", {{"X", {Output(before_act)}}},
+ {{"Out", {Output("Out")}}}, AttributeMap{}));
+ CompleteAddOp(false);
+ }
+};
+
+class FcOpMaker : public OpProtoAndCheckerMaker {
+ public:
+ FcOpMaker(OpProto *proto, OpAttrChecker *op_checker)
+ : OpProtoAndCheckerMaker(proto, op_checker) {
+ AddInput("X", "x");
+ AddInput("W", "w");
+ AddInput("b", "b");
+ AddOutput("mul_result", "").AsIntermediate();
+ AddOutput("add_result", "").AsIntermediate();
+ AddOutput("Out", "");
+ AddComment("");
+ }
+};
+
+class ManyOutputOpMaker : public OpProtoAndCheckerMaker {
+ public:
+ ManyOutputOpMaker(OpProto *proto, OpAttrChecker *op_checker)
+ : OpProtoAndCheckerMaker(proto, op_checker) {
+ AddInput("x", "x");
+ AddOutput("y", "y");
+ AddOutput("z", "z");
+ AddComment("");
+ }
+};
+
+class FillZeroOpMaker : public OpProtoAndCheckerMaker {
+ public:
+ FillZeroOpMaker(OpProto *proto, OpAttrChecker *op_checker)
+ : OpProtoAndCheckerMaker(proto, op_checker) {
+ AddInput("X", "x");
+ AddOutput("Out", "out");
+ AddComment("");
+ }
+};
+
+class SumOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+ SumOpMaker(OpProto *proto, OpAttrChecker *op_checker)
+ : OpProtoAndCheckerMaker(proto, op_checker) {
+ AddInput("X", "the input tensors of sum operator.").AsDuplicable();
+ AddOutput("Out", "the output tensor of sum operator.");
+ AddComment("");
+ }
+};
+
+class MultInOutOpMaker : public OpProtoAndCheckerMaker {
+ public:
+ MultInOutOpMaker(OpProto *proto, OpAttrChecker *op_checker)
+ : OpProtoAndCheckerMaker(proto, op_checker) {
+ AddInput("X", "x");
+ AddInput("H", "h");
+ AddOutput("Y", "y");
+ AddOutput("Z", "z");
+ AddComment("");
+ }
+};
+
+class MinusGradOpDescMaker : public GradOpDescMakerBase {
+ public:
+ using GradOpDescMakerBase::GradOpDescMakerBase;
+
+ std::vector> operator()() const override {
+ std::vector> retv;
+ auto x_g = InputGrad("X");
+ if (!x_g.empty()) {
+ auto *op_desc = new OpDesc();
+ op_desc->SetType("scale");
+ op_desc->SetInput("X", OutputGrad("Out"));
+ op_desc->SetOutput("Out", x_g);
+ op_desc->SetAttr("scale", 1.0f);
+ retv.emplace_back(op_desc);
+ }
+
+ auto y_g = InputGrad("Y");
+ if (!y_g.empty()) {
+ auto *op_desc = new OpDesc();
+ op_desc->SetType("scale");
+ op_desc->SetInput("X", OutputGrad("Out"));
+ op_desc->SetOutput("Out", y_g);
+ op_desc->SetAttr("scale", -1.0f);
+ retv.emplace_back(op_desc);
+ }
+ return retv;
+ }
+};
+
+class MinusOpMaker : public OpProtoAndCheckerMaker {
+ public:
+ MinusOpMaker(OpProto *proto, OpAttrChecker *op_checker)
+ : OpProtoAndCheckerMaker(proto, op_checker) {
+ AddInput("X", "");
+ AddInput("Y", "");
+ AddOutput("Out", "");
+ AddComment("minus for unittest");
+ }
+};
+} // namespace framework
+} // namespace paddle
+
+namespace f = paddle::framework;
+namespace ops = paddle::operators;
+using EnforceNotMet = paddle::platform::EnforceNotMet;
+// rowwise_add
+REGISTER_OPERATOR(rowwise_add, f::NoneOp, f::RowWiseAddOpMaker,
+ f::RowWiseAddGradMaker);
+REGISTER_OP_CPU_KERNEL(rowwise_add,
+ f::NoneKernel);
+REGISTER_OPERATOR(rowwise_add_grad, f::NoneOp);
+REGISTER_OP_CPU_KERNEL(rowwise_add_grad,
+ f::NoneKernel);
+// mul
+REGISTER_OP(mul, f::NoneOp, f::MulOpMaker, mul_grad, f::NoneOp);
+REGISTER_OP_CPU_KERNEL(mul, f::NoneKernel);
+REGISTER_OP_CPU_KERNEL(mul_grad,
+ f::NoneKernel);
+// sigmoid
+REGISTER_OP(sigmoid, f::NoneOp, f::SigmoidOpMaker, sigmoid_grad, f::NoneOp);
+REGISTER_OP_CPU_KERNEL(sigmoid,
+ f::NoneKernel);
+REGISTER_OP_WITHOUT_GRADIENT(nograd, f::NoneOp, f::NoGradOpMaker);
+// fill_zeros_like
+REGISTER_OP_WITHOUT_GRADIENT(fill_zeros_like, f::NoneOp, f::FillZeroOpMaker);
+REGISTER_OP_CPU_KERNEL(fill_zeros_like,
+ f::NoneKernel);
+// sum
+REGISTER_OP(sum, f::NoneOp, f::SumOpMaker, sum_grad, f::NoneOp);
+REGISTER_OP_CPU_KERNEL(sum, f::NoneKernel);
+REGISTER_OP_CPU_KERNEL(sum_grad,
+ f::NoneKernel);
+// fc
+REGISTER_OP_WITHOUT_GRADIENT(fc, f::FcOp, f::FcOpMaker);
+// many_output_op
+REGISTER_OP(many_output_op, f::NoneOp, f::ManyOutputOpMaker,
+ many_output_op_grad, f::NoneOp);
+// mult_in_out
+REGISTER_OP(mult_in_out, f::NoneOp, f::MultInOutOpMaker, mult_in_out_grad,
+ f::NoneOp);
+REGISTER_OP_CPU_KERNEL(mult_in_out,
+ f::NoneKernel);
+REGISTER_OP_CPU_KERNEL(mult_in_out_grad,
+ f::NoneKernel);
+// minus
+REGISTER_OPERATOR(minus, f::NoneOp, f::MinusOpMaker, f::MinusGradOpDescMaker);
+REGISTER_OP_CPU_KERNEL(minus, f::NoneKernel);
+// scale
+REGISTER_OPERATOR(scale, f::NoneOp);
+REGISTER_OP_CPU_KERNEL(scale, f::NoneKernel);
+
+TEST(Backward, simple_op_not_need_grad) {
+ auto fwd =
+ f::OpRegistry::CreateOp("rowwise_add", {{"X", {"x"}}, {"b", {"b"}}},
+ {{"Out", {"out"}}}, f::AttributeMap{});
+ ASSERT_NE(fwd, nullptr);
+ auto gop = f::Backward(*fwd, {"x"});
+ ASSERT_EQ(gop->Output(f::GradVarName("X")), f::kEmptyVarName);
+
+ auto no_input_gop = f::Backward(*fwd, {"x", "b"});
+ ASSERT_NE(no_input_gop, nullptr);
+ ASSERT_TRUE(no_input_gop->IsNetOp());
+ ASSERT_EQ(0UL, static_cast(no_input_gop.get())->ops_.size());
+}
+
+TEST(Backward, net_fc_backward_normal) {
+ std::shared_ptr fwd =
+ f::OpRegistry::CreateOp("fc", {{"X", {"x"}}, {"W", {"w"}}, {"b", {"b"}}},
+ {{"mul_result", {"mul_res"}},
+ {"add_result", {"add_re"}},
+ {"Out", {"out"}}},
+ f::AttributeMap{});
+ ASSERT_NE(fwd, nullptr);
+ std::shared_ptr gop =
+ f::Backward(*fwd, std::unordered_set{});
+ ASSERT_TRUE(gop->IsNetOp());
+ auto net = static_cast(gop.get());
+
+ ASSERT_NO_THROW(net->DebugString());
+
+ ASSERT_EQ(3UL, net->ops_.size());
+
+ f::OperatorBase &d_sigmoid = *net->ops_[0];
+ ASSERT_EQ("sigmoid_grad", d_sigmoid.Type());
+
+ f::OperatorBase &d_add = *net->ops_[1];
+ ASSERT_EQ("rowwise_add_grad", d_add.Type());
+
+ f::OperatorBase &d_mul = *net->ops_[2];
+ ASSERT_EQ("mul_grad", d_mul.Type());
+}
+
+TEST(Backward, net_fc_backward_not_have_b) {
+ std::shared_ptr fwd =
+ f::OpRegistry::CreateOp("fc", {{"X", {"x"}}, {"W", {"w"}}, {"b", {}}},
+ {{"mul_result", {"mul_res"}},
+ {"add_result", {"add_res"}},
+ {"Out", {"tmp"}}},
+ f::AttributeMap{});
+ ASSERT_NE(fwd, nullptr);
+ std::shared_ptr gop =
+ f::Backward(*fwd, std::unordered_set{});
+ ASSERT_TRUE(gop->IsNetOp());
+ auto net = static_cast(gop.get());
+
+ ASSERT_NO_THROW(net->DebugString());
+
+ ASSERT_EQ(2UL, net->ops_.size());
+
+ f::OperatorBase &d_sigmoid = *net->ops_[0];
+ ASSERT_EQ("sigmoid_grad", d_sigmoid.Type());
+
+ f::OperatorBase &d_mul = *net->ops_[1];
+ ASSERT_EQ("mul_grad", d_mul.Type());
+}
+
+TEST(Backward, net_input_of_network_not_need_grad) {
+ ops::NetOp net;
+ net.AppendOp(f::OpRegistry::CreateOp(
+ "fc", {{"X", {"x"}}, {"W", {"W1"}}, {"b", {"b1"}}},
+ {{"mul_result", {"mul_tmp_0"}},
+ {"add_result", {"add_tmp_0"}},
+ {"Out", {"hidden0"}}},
+ f::AttributeMap{}));
+ net.AppendOp(f::OpRegistry::CreateOp(
+ "fc", {{"X", {"hidden0"}}, {"W", {"W2"}}, {"b", {"b2"}}},
+ {{"mul_result", {"mul_tmp_1"}},
+ {"add_result", {"add_tmp_1"}},
+ {"Out", {"hidden1"}}},
+ f::AttributeMap{}));
+ net.CompleteAddOp();
+ auto bwd = Backward(net, {"x"}); // x@GRAD is not need.
+ ASSERT_TRUE(bwd->IsNetOp());
+ auto bwd_net = static_cast(bwd.get());
+
+ auto output_vars = bwd_net->OutputVars(true);
+ std::unordered_set all_outputs =
+ std::unordered_set(output_vars.begin(), output_vars.end());
+ all_outputs.erase(f::kEmptyVarName);
+
+ for (auto &out : {"W1", "b1", "hidden0", "W2", "b2"}) {
+ ASSERT_NE(all_outputs.find(f::GradVarName(out)), all_outputs.end());
+ }
+
+ // Not Generated X
+ ASSERT_EQ(all_outputs.find(f::GradVarName("X")), all_outputs.end());
+
+ ASSERT_EQ(2UL, bwd_net->ops_.size());
+ ASSERT_TRUE(bwd_net->ops_[1]->IsNetOp());
+ auto first_fc_grad = static_cast(bwd_net->ops_[1].get());
+ ASSERT_EQ(3UL, first_fc_grad->ops_.size());
+ ASSERT_EQ(f::kEmptyVarName,
+ first_fc_grad->ops_[2]->Output(f::GradVarName("X")));
+}
+
+TEST(Backward, net_shared_weight) {
+ ops::NetOp net;
+ net.AppendOp(f::OpRegistry::CreateOp("mul", {{"X", {"x"}}, {"Y", {"w"}}},
+ {{"Out", {"out"}}}, f::AttributeMap{}));
+ net.AppendOp(f::OpRegistry::CreateOp("mul", {{"X", {"out"}}, {"Y", {"w"}}},
+ {{"Out", {"FinalOut"}}},
+ f::AttributeMap{}));
+ net.CompleteAddOp();
+
+ auto bwd = f::Backward(net, std::unordered_set{});
+ ASSERT_TRUE(bwd->IsNetOp());
+ auto bwd_net = static_cast(bwd.get());
+ ASSERT_EQ(3UL, bwd_net->ops_.size());
+ ASSERT_EQ("sum", bwd_net->ops_[2]->Type());
+}
+
+TEST(Backward, op_all_input_are_not_need) {
+ auto fwd =
+ f::OpRegistry::CreateOp("rowwise_add", {{"X", {"x"}}, {"b", {"b"}}},
+ {{"Out", {"out"}}}, f::AttributeMap{});
+ auto backward = f::Backward(*fwd, {"x", "b"});
+ ASSERT_TRUE(backward->IsNetOp());
+ auto net = static_cast(backward.get());
+ ASSERT_TRUE(net->ops_.empty());
+}
+
+TEST(Backward, op_all_output_are_not_need) {
+ auto fwd =
+ f::OpRegistry::CreateOp("rowwise_add", {{"X", {"x"}}, {"b", {"b"}}},
+ {{"Out", {"out"}}}, f::AttributeMap{});
+ auto backward = f::Backward(*fwd, {"out"});
+ ASSERT_TRUE(backward->IsNetOp());
+ auto net = static_cast(backward.get());
+ ASSERT_TRUE(net->ops_.empty());
+}
+
+TEST(Backward, op_part_of_output_are_not_need) {
+ auto fwd =
+ f::OpRegistry::CreateOp("many_output_op", {{"x", {"X"}}},
+ {{"y", {"Y"}}, {"z", {"Z"}}}, f::AttributeMap{});
+ auto backward = f::Backward(*fwd, {"Z"});
+ ASSERT_TRUE(backward->IsNetOp());
+ auto net = static_cast(backward.get());
+ ASSERT_EQ(net->ops_.size(), 2UL);
+
+ auto &fill_zero = *net->ops_[0];
+ ASSERT_EQ("fill_zeros_like", fill_zero.Type());
+ ASSERT_EQ(1UL, fill_zero.Inputs("X").size());
+ ASSERT_EQ("Z", fill_zero.Input("X"));
+ ASSERT_EQ(1UL, fill_zero.Outputs("Out").size());
+ ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix, fill_zero.Output("Out"));
+
+ auto &d_many_out = *net->ops_[1];
+ ASSERT_EQ("many_output_op_grad", d_many_out.Type());
+ ASSERT_EQ(1UL + 2UL + 2UL, d_many_out.Inputs().size()); // I/O/OG
+ ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix,
+ d_many_out.Input(f::GradVarName("z")));
+ ASSERT_EQ(f::GradVarName("Y"), d_many_out.Input(f::GradVarName("y")));
+ ASSERT_EQ(f::GradVarName("X"), d_many_out.Output(f::GradVarName("x")));
+}
+
+TEST(Backward, op_part_of_input_are_not_need) {
+ auto fwd = f::OpRegistry::CreateOp("mul", {{"X", {"a"}}, {"Y", {"b"}}},
+ {{"Out", {"out"}}}, f::AttributeMap{});
+ auto backward = f::Backward(*fwd, {"a"});
+ auto &grad_mul = *backward;
+ ASSERT_EQ(grad_mul.Type(), "mul_grad");
+ ASSERT_EQ(grad_mul.Inputs().size(), 2UL + 1UL + 1UL);
+ ASSERT_EQ(grad_mul.Outputs().size(), 2UL);
+ ASSERT_EQ(grad_mul.Output(f::GradVarName("X")), f::kEmptyVarName);
+ ASSERT_EQ(grad_mul.Output(f::GradVarName("Y")), f::GradVarName("b"));
+ ASSERT_EQ(grad_mul.Input(f::GradVarName("Out")), f::GradVarName("out"));
+ ASSERT_EQ(grad_mul.Input("X"), "a");
+ ASSERT_EQ(grad_mul.Input("Y"), "b");
+ ASSERT_EQ(grad_mul.Input("Out"), "out");
+}
+
+TEST(Backward, linear_net_intermediate_variable_has_no_grad) {
+ ops::NetOp net;
+ net.AppendOp(f::OpRegistry::CreateOp(
+ "fc", {{"X", {"x1"}}, {"W", {"w1"}}, {"b", {"b1"}}},
+ {{"mul_result", {"mul_out1"}},
+ {"add_result", {"add_out1"}},
+ {"Out", {"out1"}}},
+ f::AttributeMap{}));
+ net.AppendOp(f::OpRegistry::CreateOp(
+ "fc", {{"X", {"out1"}}, {"W", {"w2"}}, {"b", {"b2"}}},
+ {{"mul_result", {"mul_out2"}},
+ {"add_result", {"tmp_out2"}},
+ {"Out", {"out2"}}},
+ f::AttributeMap{}));
+ net.AppendOp(f::OpRegistry::CreateOp(
+ "fc", {{"X", {"out2"}}, {"W", {"w3"}}, {"b", {"b3"}}},
+ {{"mul_result", {"mul_out3"}},
+ {"add_result", {"tmp_out3"}},
+ {"Out", {"out3"}}},
+ f::AttributeMap{}));
+ net.CompleteAddOp();
+
+ auto backward = f::Backward(net, {"mul_out2", "tmp_out2", "out2"});
+ ASSERT_TRUE(backward->IsNetOp());
+ auto bwd_net = static_cast(backward.get());
+ ASSERT_EQ(bwd_net->ops_.size(), 3UL);
+ auto &grad_fc = *bwd_net->ops_[0];
+
+ const char *all = paddle::operators::NetOp::kAll;
+ EXPECT_EQ(grad_fc.Inputs(all).size(),
+ 2UL /* external input number */
+ + 1UL /* external output number*/
+ + 1UL /* number of gradient of external output*/
+ + 2UL /* internal variable number*/
+ );
+ EXPECT_EQ(grad_fc.Outputs(all).size(),
+ 2UL /* input number of mul*/
+ + 2UL /* input number of rowwise_add*/
+ + 1UL /* input number of sigmod */
+ - 1UL /* out2 is not needed*/);
+ EXPECT_EQ(bwd_net->ops_[1]->Inputs(all).size(), 0UL);
+ EXPECT_EQ(bwd_net->ops_[1]->Outputs(all).size(), 0UL);
+ EXPECT_EQ(bwd_net->ops_[2]->Inputs(all).size(), 0UL);
+ EXPECT_EQ(bwd_net->ops_[2]->Outputs(all).size(), 0UL);
+}
+
+TEST(Backward, simple_single_op) {
+ f::ProgramDesc program;
+ f::BlockDesc *block = program.MutableBlock(0);
+
+ f::OpDesc *op = block->AppendOp();
+ op->SetType("rowwise_add");
+ op->SetInput("X", {"x"});
+ op->SetInput("b", {"b"});
+ op->SetOutput("Out", {"out"});
+
+ auto target = f::VarDesc("out");
+ target.SetShape({1});
+ auto var_to_grad =
+ AppendBackward(program, target, std::unordered_set{});
+
+ ASSERT_EQ(block->AllOps().size(), 3UL);
+ f::OpDesc *fill_op = block->AllOps()[1];
+ EXPECT_EQ(fill_op->Type(), "fill_constant");
+
+ f::OpDesc *grad_op = block->AllOps()[2];
+ EXPECT_EQ(grad_op->Type(), "rowwise_add_grad");
+ ASSERT_EQ(grad_op->InputNames().size(), 1UL);
+ ASSERT_EQ(grad_op->OutputNames().size(), 2UL);
+ EXPECT_EQ(grad_op->Input(f::GradVarName("Out")),
+ std::vector({f::GradVarName("out")}));
+ EXPECT_EQ(grad_op->Output(f::GradVarName("X")),
+ std::vector({f::GradVarName("x")}));
+ EXPECT_EQ(grad_op->Output(f::GradVarName("b")),
+ std::vector({f::GradVarName("b")}));
+
+ EXPECT_EQ(var_to_grad.size(), 3UL);
+ EXPECT_EQ(var_to_grad.at("b"), f::GradVarInfo(f::GradVarName("b"), 0, 2));
+ EXPECT_EQ(var_to_grad.at("x"), f::GradVarInfo(f::GradVarName("x"), 0, 2));
+
+ EXPECT_TRUE(block->HasVar(f::GradVarName("b")));
+ EXPECT_TRUE(block->HasVar(f::GradVarName("x")));
+}
+
+TEST(Backward, default_attribute) {
+ f::ProgramDesc program;
+ f::BlockDesc *block = program.MutableBlock(0);
+ f::OpDesc *op = block->AppendOp();
+ op->SetType("mul");
+ op->SetInput("X", {"x"});
+ op->SetInput("Y", {"y"});
+ op->SetOutput("Out", {"out"});
+ op->CheckAttrs();
+
+ auto target = f::VarDesc("out");
+ target.SetShape({1});
+ AppendBackward(program, target, std::unordered_set{});
+
+ ASSERT_EQ(block->AllOps().size(), 3UL);
+ EXPECT_EQ(boost::get(op->GetAttr("x_num_col_dims")), 1);
+ EXPECT_EQ(boost::get(op->GetAttr("y_num_col_dims")), 1);
+
+ f::OpDesc *fill_op = block->AllOps()[1];
+ EXPECT_EQ(fill_op->Type(), "fill_constant");
+
+ f::OpDesc *grad_op = block->AllOps()[2];
+ ASSERT_EQ(grad_op->Type(), "mul_grad");
+ EXPECT_EQ(boost::get(grad_op->GetAttr("x_num_col_dims")), 1);
+ EXPECT_EQ(boost::get(grad_op->GetAttr("y_num_col_dims")), 1);
+}
+
+TEST(Backward, simple_mult_op) {
+ f::ProgramDesc program;
+ f::BlockDesc *block = program.MutableBlock(0);
+ f::OpDesc *op1 = block->AppendOp();
+ op1->SetType("rowwise_add");
+ op1->SetInput("X", {"x1"});
+ op1->SetInput("b", {"b1"});
+ op1->SetOutput("Out", {"out1"});
+
+ f::OpDesc *op2 = block->AppendOp();
+ op2->SetType("mul");
+ op2->SetInput("X", {"out1"});
+ op2->SetInput("Y", {"y2"});
+ op2->SetOutput("Out", {"out2"});
+
+ f::OpDesc *op3 = block->AppendOp();
+ op3->SetType("rowwise_add");
+ op3->SetInput("X", {"out2"});
+ op3->SetInput("b", {"b3"});
+ op3->SetOutput("Out", {"out3"});
+
+ auto target = f::VarDesc("out3");
+ target.SetShape({1});
+ size_t forward_len = block->AllOps().size();
+ auto var_to_grad =
+ AppendBackward(program, target, std::unordered_set{});
+
+ ASSERT_EQ(block->AllOps().size(), 6UL + 1);
+ f::OpDesc *fill_op = block->AllOps()[forward_len];
+ EXPECT_EQ(fill_op->Type(), "fill_constant");
+
+ f::OpDesc *grad_op1 = block->AllOps()[6];
+ EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad");
+ ASSERT_EQ(grad_op1->InputNames().size(), 1UL);
+ ASSERT_EQ(grad_op1->OutputNames().size(), 2UL);
+ EXPECT_EQ(grad_op1->Input(f::GradVarName("Out")),
+ std::vector({f::GradVarName("out1")}));
+ EXPECT_EQ(grad_op1->Output(f::GradVarName("X")),
+ std::vector({f::GradVarName("x1")}));
+ EXPECT_EQ(grad_op1->Output(f::GradVarName("b")),
+ std::vector({f::GradVarName("b1")}));
+
+ f::OpDesc *grad_op2 = block->AllOps()[5];
+ EXPECT_EQ(grad_op2->Type(), "mul_grad");
+ ASSERT_EQ(grad_op2->InputNames().size(), 4UL);
+ ASSERT_EQ(grad_op2->OutputNames().size(), 2UL);
+ EXPECT_EQ(grad_op2->Input("X"), std::vector({"out1"}));
+ EXPECT_EQ(grad_op2->Input("Y"), std::vector({"y2"}));
+ EXPECT_EQ(grad_op2->Input("Out"), std::vector({"out2"}));
+ EXPECT_EQ(grad_op2->Input(f::GradVarName("Out")),
+ std::vector({f::GradVarName("out2")}));
+ EXPECT_EQ(grad_op2->Output(f::GradVarName("X")),
+ std::vector({f::GradVarName("out1")}));
+ EXPECT_EQ(grad_op2->Output(f::GradVarName("Y")),
+ std::vector({f::GradVarName("y2")}));
+
+ f::OpDesc *grad_op3 = block->AllOps()[4];
+ EXPECT_EQ(grad_op3->Type(), "rowwise_add_grad");
+ ASSERT_EQ(grad_op3->InputNames().size(), 1UL);
+ ASSERT_EQ(grad_op3->OutputNames().size(), 2UL);
+ EXPECT_EQ(grad_op3->Input(f::GradVarName("Out")),
+ std::vector({f::GradVarName("out3")}));
+ EXPECT_EQ(grad_op3->Output(f::GradVarName("X")),
+ std::vector({f::GradVarName("out2")}));
+ EXPECT_EQ(grad_op3->Output(f::GradVarName("b")),
+ std::vector({f::GradVarName("b3")}));
+
+ EXPECT_EQ(var_to_grad.size(), 7UL);
+ EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 6));
+ EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 6));
+ EXPECT_EQ(var_to_grad.at("out1"),
+ f::GradVarInfo(f::GradVarName("out1"), 0, 5));
+ EXPECT_EQ(var_to_grad.at("y2"), f::GradVarInfo(f::GradVarName("y2"), 0, 5));
+ EXPECT_EQ(var_to_grad.at("out2"),
+ f::GradVarInfo(f::GradVarName("out2"), 0, 4));
+ EXPECT_EQ(var_to_grad.at("b3"), f::GradVarInfo(f::GradVarName("b3"), 0, 4));
+
+ EXPECT_TRUE(block->HasVar(f::GradVarName("x1")));
+ EXPECT_TRUE(block->HasVar(f::GradVarName("b1")));
+ EXPECT_TRUE(block->HasVar(f::GradVarName("out1")));
+ EXPECT_TRUE(block->HasVar(f::GradVarName("y2")));
+ EXPECT_TRUE(block->HasVar(f::GradVarName("out2")));
+ EXPECT_TRUE(block->HasVar(f::GradVarName("b3")));
+}
+
+TEST(Backward, intermedia_var_no_grad) {
+ f::ProgramDesc program;
+ f::BlockDesc *block = program.MutableBlock(0);
+ f::OpDesc *op1 = block->AppendOp();
+ op1->SetType("rowwise_add");
+ op1->SetInput("X", {"x1"});
+ op1->SetInput("b", {"b1"});
+ op1->SetOutput("Out", {"out1"});
+
+ f::OpDesc *op2 = block->AppendOp();
+ op2->SetType("mul");
+ op2->SetInput("X", {"x2"});
+ op2->SetInput("Y", {"y2"});
+ op2->SetOutput("Out", {"out2"});
+
+ f::OpDesc *op3 = block->AppendOp();
+ op3->SetType("rowwise_add");
+ op3->SetInput("X", {"out2"});
+ op3->SetInput("b", {"b3"});
+ op3->SetOutput("Out", {"out3"});
+
+ f::OpDesc *op4 = block->AppendOp();
+ op4->SetType("mul");
+ op4->SetInput("X", {"out1"});
+ op4->SetInput("Y", {"out3"});
+ op4->SetOutput("Out", {"out4"});
+
+ auto target = f::VarDesc("out4");
+ target.SetShape({1});
+ size_t forward_len = block->AllOps().size();
+ auto var_to_grad = AppendBackward(program, target, {"out3"});
+
+ ASSERT_EQ(block->AllOps().size(), 7UL);
+ f::OpDesc *fill_op = block->AllOps()[forward_len];
+ EXPECT_EQ(fill_op->Type(), "fill_constant");
+
+ f::OpDesc *grad_op1 = block->AllOps()[6];
+ EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad");
+ ASSERT_EQ(grad_op1->InputNames().size(), 1UL);
+ ASSERT_EQ(grad_op1->OutputNames().size(), 2UL);
+ EXPECT_EQ(grad_op1->Input(f::GradVarName("Out")),
+ std::vector({f::GradVarName("out1")}));
+ EXPECT_EQ(grad_op1->Output(f::GradVarName("X")),
+ std::vector({f::GradVarName("x1")}));
+ EXPECT_EQ(grad_op1->Output(f::GradVarName("b")),
+ std::vector({f::GradVarName("b1")}));
+
+ f::OpDesc *grad_op4 = block->AllOps()[5];
+ EXPECT_EQ(grad_op4->Type(), "mul_grad");
+ ASSERT_EQ(grad_op4->InputNames().size(), 4UL);
+ ASSERT_EQ(grad_op4->OutputNames().size(), 2UL);
+ EXPECT_EQ(grad_op4->Input("X"), std::vector({"out1"}));
+ EXPECT_EQ(grad_op4->Input("Y"), std::vector({"out3"}));
+ EXPECT_EQ(grad_op4->Input("Out"), std::vector({"out4"}));
+ EXPECT_EQ(grad_op4->Input(f::GradVarName("Out")),
+ std::vector({f::GradVarName("out4")}));
+ EXPECT_EQ(grad_op4->Output(f::GradVarName("X")),
+ std::vector({f::GradVarName("out1")}));
+ EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")), std::vector());
+
+ EXPECT_EQ(var_to_grad.size(), 4UL);
+ EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 6));
+ EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 6));
+ EXPECT_EQ(var_to_grad.at("out1"),
+ f::GradVarInfo(f::GradVarName("out1"), 0, 5));
+
+ EXPECT_TRUE(block->HasVar(f::GradVarName("x1")));
+ EXPECT_TRUE(block->HasVar(f::GradVarName("b1")));
+ EXPECT_TRUE(block->HasVar(f::GradVarName("out1")));
+}
+
+TEST(Backward, var_no_grad) {
+ f::ProgramDesc program;
+ f::BlockDesc *block = program.MutableBlock(0);
+ f::OpDesc *op1 = block->AppendOp();
+ op1->SetType("mult_in_out");
+ op1->SetInput("X", {"x1"});
+ op1->SetInput("H", {"h1"});
+ op1->SetOutput("Y", {"y1"});
+ op1->SetOutput("Z", {"z1"});
+
+ f::OpDesc *op2 = block->AppendOp();
+ op2->SetType("mult_in_out");
+ op2->SetInput("X", {"y1"});
+ op2->SetInput("H", {"z1"});
+ op2->SetOutput("Y", {"y2"});
+ op2->SetOutput("Z", {"z2"});
+
+ auto target = f::VarDesc("z2");
+ target.SetShape({1});
+ size_t forward_len = block->AllOps().size();
+ auto var_to_grad = AppendBackward(program, target, {"z1"});
+
+ ASSERT_EQ(block->AllOps().size(), 6UL);
+ f::OpDesc *fill_op = block->AllOps()[forward_len];
+ EXPECT_EQ(fill_op->Type(), "fill_constant");
+
+ f::OpDesc *grad_op2 = block->AllOps()[3];
+ ASSERT_EQ(grad_op2->Type(), "mult_in_out_grad");
+ ASSERT_EQ(grad_op2->InputNames().size(), 6UL);
+ ASSERT_EQ(grad_op2->OutputNames().size(), 2UL);
+ EXPECT_EQ(grad_op2->Input("X"), std::vector({"y1"}));
+ EXPECT_EQ(grad_op2->Input("H"), std::vector({"z1"}));
+ EXPECT_EQ(grad_op2->Input("Y"), std::vector({"y2"}));
+ EXPECT_EQ(grad_op2->Input("Z"), std::vector({"z2"}));
+ EXPECT_EQ(grad_op2->Input(f::GradVarName("Y")),
+ std::vector({f::GradVarName("y2")}));
+ EXPECT_EQ(grad_op2->Input(f::GradVarName("Z")),
+ std::vector({f::GradVarName("z2")}));
+ EXPECT_EQ(grad_op2->Output(f::GradVarName("X")),
+ std::vector({f::GradVarName("y1")}));
+ EXPECT_EQ(grad_op2->Output(f::GradVarName("H")), std::vector());
+
+ f::OpDesc *fill_zero_op = block->AllOps()[4];
+ ASSERT_EQ(fill_zero_op->Type(), "fill_zeros_like");
+ ASSERT_EQ(fill_zero_op->InputNames().size(), 1UL);
+ ASSERT_EQ(fill_zero_op->OutputNames().size(), 1UL);
+ EXPECT_EQ(fill_zero_op->Input("X"), std::vector({"z1"}));
+ EXPECT_EQ(fill_zero_op->Output("Out"),
+ std::vector({std::string("z1") + f::kZeroVarSuffix}));
+
+ f::OpDesc *grad_op1 = block->AllOps()[5];
+ ASSERT_EQ(grad_op1->Type(), "mult_in_out_grad");
+ ASSERT_EQ(grad_op1->InputNames().size(), 6UL);
+ ASSERT_EQ(grad_op1->OutputNames().size(), 2UL);
+ EXPECT_EQ(grad_op1->Input("X"), std::vector({"x1"}));
+ EXPECT_EQ(grad_op1->Input("H"), std::vector({"h1"}));
+ EXPECT_EQ(grad_op1->Input("Y"), std::vector({"y1"}));
+ EXPECT_EQ(grad_op1->Input("Z"), std::vector({"z1"}));
+ EXPECT_EQ(grad_op1->Input(f::GradVarName("Y")),
+ std::vector({f::GradVarName("y1")}));
+ EXPECT_EQ(grad_op1->Input(f::GradVarName("Z")),
+ std::vector({std::string("z1") + f::kZeroVarSuffix}));
+ EXPECT_EQ(grad_op1->Output(f::GradVarName("X")),
+ std::vector({f::GradVarName("x1")}));
+ EXPECT_EQ(grad_op1->Output(f::GradVarName("H")),
+ std::vector({f::GradVarName("h1")}));
+
+ EXPECT_EQ(var_to_grad.size(), 4UL);
+ EXPECT_EQ(var_to_grad.at("y1"), f::GradVarInfo(f::GradVarName("y1"), 0, 3));
+ EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 5));
+ EXPECT_EQ(var_to_grad.at("h1"), f::GradVarInfo(f::GradVarName("h1"), 0, 5));
+
+ EXPECT_TRUE(block->HasVar(f::GradVarName("y1")));
+ EXPECT_TRUE(block->HasVar(f::GradVarName("x1")));
+ EXPECT_TRUE(block->HasVar(f::GradVarName("h1")));
+}
+
+TEST(Backward, shared_var) {
+ f::ProgramDesc program;
+ f::BlockDesc *block = program.MutableBlock(0);
+ f::OpDesc *op1 = block->AppendOp();
+ op1->SetType("rowwise_add");
+ op1->SetInput("X", {"x1"});
+ op1->SetInput("b", {"b1"});
+ op1->SetOutput("Out", {"out1"});
+
+ f::OpDesc *op2 = block->AppendOp();
+ op2->SetType("mul");
+ op2->SetInput("X", {"out1"});
+ op2->SetInput("Y", {"y2"});
+ op2->SetOutput("Out", {"out2"});
+
+ f::OpDesc *op3 = block->AppendOp();
+ op3->SetType("rowwise_add");
+ op3->SetInput("X", {"out1"});
+ op3->SetInput("b", {"b3"});
+ op3->SetOutput("Out", {"out3"});
+
+ auto target = f::VarDesc("out3");
+ target.SetShape({1});
+ size_t forward_len = block->AllOps().size();
+ auto var_to_grad =
+ AppendBackward(program, target, std::unordered_set{});
+
+ ASSERT_EQ(block->AllOps().size(), 8UL);
+ f::OpDesc *fill_op = block->AllOps()[forward_len];
+ EXPECT_EQ(fill_op->Type(), "fill_constant");
+
+ f::OpDesc *grad_op3 = block->AllOps()[4];
+ ASSERT_EQ(grad_op3->Type(), "rowwise_add_grad");
+ ASSERT_EQ(grad_op3->InputNames().size(), 1UL);
+ ASSERT_EQ(grad_op3->OutputNames().size(), 2UL);
+ EXPECT_EQ(grad_op3->Input(f::GradVarName("Out")),
+ std::vector({f::GradVarName("out3")}));
+ EXPECT_EQ(grad_op3->Output(f::GradVarName("X")),
+ std::vector({f::GradVarName("out1") + "@RENAME@0"}));
+ EXPECT_EQ(grad_op3->Output(f::GradVarName("b")),
+ std::vector({f::GradVarName("b3")}));
+
+ f::OpDesc *grad_op4 = block->AllOps()[5];
+ ASSERT_EQ(grad_op4->Type(), "mul_grad");
+ ASSERT_EQ(grad_op4->InputNames().size(), 4UL);
+ ASSERT_EQ(grad_op4->OutputNames().size(), 2UL);
+ EXPECT_EQ(grad_op4->Input("X"), std::vector({"out1"}));
+ EXPECT_EQ(grad_op4->Input("Y"), std::vector({"y2"}));
+ EXPECT_EQ(grad_op4->Input("Out"), std::vector({"out2"}));
+ EXPECT_EQ(grad_op4->Input(f::GradVarName("Out")),
+ std::vector({f::GradVarName("out2")}));
+ EXPECT_EQ(grad_op4->Output(f::GradVarName("X")),
+ std::vector({f::GradVarName("out1") + "@RENAME@1"}));
+ EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")),
+ std::vector({f::GradVarName("y2")}));
+
+ f::OpDesc *sum_op = block->AllOps()[6];
+ ASSERT_EQ(sum_op->Type(), "sum");
+ ASSERT_EQ(sum_op->InputNames().size(), 1UL);
+ ASSERT_EQ(sum_op->OutputNames().size(), 1UL);
+ EXPECT_EQ(sum_op->Input("X"),
+ std::vector({f::GradVarName("out1") + "@RENAME@0",
+ f::GradVarName("out1") + "@RENAME@1"}));
+ EXPECT_EQ(sum_op->Output("Out"),
+ std::vector({f::GradVarName("out1")}));
+
+ f::OpDesc *grad_op1 = block->AllOps()[7];
+ ASSERT_EQ(grad_op1->Type(), "rowwise_add_grad");
+ ASSERT_EQ(grad_op1->InputNames().size(), 1UL);
+ ASSERT_EQ(grad_op1->OutputNames().size(), 2UL);
+ EXPECT_EQ(grad_op1->Input(f::GradVarName("Out")),
+ std::vector({f::GradVarName("out1")}));
+ EXPECT_EQ(grad_op1->Output(f::GradVarName("X")),
+ std::vector({f::GradVarName("x1")}));
+ EXPECT_EQ(grad_op1->Output(f::GradVarName("b")),
+ std::vector({f::GradVarName("b1")}));
+
+ EXPECT_EQ(var_to_grad.size(), 6UL);
+ EXPECT_EQ(var_to_grad.at("b3"), f::GradVarInfo(f::GradVarName("b3"), 0, 4));
+ EXPECT_EQ(var_to_grad.at("y2"), f::GradVarInfo(f::GradVarName("y2"), 0, 5));
+ EXPECT_EQ(var_to_grad.at("out1"),
+ f::GradVarInfo(f::GradVarName("out1"), 0, 6));
+ EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 7));
+ EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 7));
+
+ EXPECT_TRUE(block->HasVar(f::GradVarName("b3")));
+ EXPECT_TRUE(block->HasVar(f::GradVarName("y2")));
+ EXPECT_TRUE(block->HasVar(f::GradVarName("out1")));
+ EXPECT_TRUE(block->HasVar(f::GradVarName("x1")));
+ EXPECT_TRUE(block->HasVar(f::GradVarName("b1")));
+}
+
+TEST(Backward, half_backward) {
+ f::ProgramDesc program;
+ f::BlockDesc *block = program.MutableBlock(0);
+ auto *op1 = block->AppendOp();
+ op1->SetType("minus");
+ op1->SetInput("X", {"a"});
+ op1->SetInput("Y", {"b"});
+ op1->SetOutput("Out", {"out"});
+
+ auto target = f::VarDesc("out");
+ target.SetShape({1});
+ size_t forward_len = block->AllOps().size();
+ auto var_to_grad = AppendBackward(program, target, {"b"});
+ f::OpDesc *fill_op = block->AllOps()[forward_len];
+ EXPECT_EQ(fill_op->Type(), "fill_constant");
+ auto ops = block->AllOps();
+ ASSERT_EQ(3UL, ops.size());
+
+ EXPECT_EQ(var_to_grad.size(), 2UL);
+ EXPECT_EQ(var_to_grad.at("a"),
+ f::GradVarInfo(f::GradVarName("a"), 0, forward_len + 1));
+}
diff --git a/paddle/fluid/framework/block_desc.cc b/paddle/fluid/framework/block_desc.cc
new file mode 100644
index 0000000000000000000000000000000000000000..9550159155c28247797a6caa5fc01c64a0c5f99f
--- /dev/null
+++ b/paddle/fluid/framework/block_desc.cc
@@ -0,0 +1,190 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/framework/block_desc.h"
+#include "paddle/fluid/framework/operator.h"
+#include "paddle/fluid/framework/program_desc.h"
+
+namespace paddle {
+namespace framework {
+
+VarDesc *BlockDesc::Var(const std::string &name) {
+ auto it = vars_.find(name);
+ if (it != vars_.end()) {
+ return it->second.get();
+ }
+ need_update_ = true;
+ auto *var = new VarDesc(name);
+ vars_[name].reset(var);
+ return var;
+}
+
+VarDesc *BlockDesc::FindVar(const std::string &name) const {
+ auto it = vars_.find(name);
+ if (it == vars_.end()) {
+ return nullptr;
+ }
+ return it->second.get();
+}
+
+bool BlockDesc::HasVar(const std::string &name) const {
+ return vars_.find(name) != vars_.end();
+}
+
+VarDesc *BlockDesc::FindVarRecursive(const std::string &name) const {
+ if (name == kEmptyVarName) return nullptr;
+
+ auto it = vars_.find(name);
+ if (it == vars_.end()) {
+ return Parent() == kNoneBlockIndex ? nullptr
+ : ParentBlock()->FindVarRecursive(name);
+ }
+ return it->second.get();
+}
+
+VarDesc &BlockDesc::FindRecursiveOrCreateVar(const std::string &name_bytes) {
+ VarDesc *res = FindVarRecursive(name_bytes);
+ if (res == nullptr) {
+ res = Var(name_bytes);
+ }
+ return *res;
+}
+
+bool BlockDesc::HasVarRecursive(const std::string &name) const {
+ return FindVarRecursive(name) != nullptr;
+}
+
+std::vector BlockDesc::AllVars() const {
+ std::vector res;
+ for (const auto &p : vars_) {
+ res.push_back(p.second.get());
+ }
+ return res;
+}
+
+OpDesc *BlockDesc::AppendOp() {
+ need_update_ = true;
+ ops_.emplace_back(new OpDesc(this));
+ return ops_.back().get();
+}
+
+void BlockDesc::AppendAllocatedOp(std::unique_ptr &&op_desc) {
+ need_update_ = true;
+ ops_.emplace_back(std::move(op_desc));
+}
+
+OpDesc *BlockDesc::PrependOp() {
+ need_update_ = true;
+ ops_.emplace_front(new OpDesc(this));
+ return ops_.front().get();
+}
+
+void BlockDesc::RemoveOp(size_t s, size_t e) {
+ if (ops_.begin() + s == ops_.end() || ops_.begin() + e == ops_.end()) {
+ return;
+ }
+ need_update_ = true;
+ for (auto it = ops_.begin() + s; it != ops_.begin() + e; it++) {
+ auto names = (*it)->InputArgumentNames();
+ for (auto n : names) {
+ // TODO(typhoonzero): delete vars if no other op use it.
+ VLOG(3) << "deleting var " << n;
+ }
+ }
+ ops_.erase(ops_.begin() + s, ops_.begin() + e);
+}
+
+std::vector BlockDesc::AllOps() const {
+ std::vector res;
+ for (const auto &op : ops_) {
+ res.push_back(op.get());
+ }
+ return res;
+}
+
+void BlockDesc::Flush() {
+ for (auto &op_desc : ops_) {
+ op_desc->Flush();
+ }
+
+ if (need_update_) {
+ auto &op_field = *this->desc_->mutable_ops();
+ this->ClearPBOps();
+ op_field.Reserve(static_cast(ops_.size()));
+ for (auto &op_desc : ops_) {
+ op_field.AddAllocated(op_desc->Proto());
+ }
+ auto &var_field = *this->desc_->mutable_vars();
+ this->ClearPBVars();
+ var_field.Reserve(static_cast(vars_.size()));
+ for (auto &var_desc : vars_) {
+ var_field.AddAllocated(var_desc.second->Proto());
+ }
+ need_update_ = false;
+ }
+}
+
+BlockDesc *BlockDesc::ParentBlock() const {
+ if (this->desc_->parent_idx() == kNoneBlockIndex) {
+ return nullptr;
+ }
+ return prog_->MutableBlock(static_cast(this->desc_->parent_idx()));
+}
+
+proto::BlockDesc *BlockDesc::Proto() {
+ Flush();
+ return desc_;
+}
+
+BlockDesc::BlockDesc(ProgramDesc *prog, proto::BlockDesc *desc)
+ : prog_(prog), desc_(desc), need_update_(false) {
+ for (const proto::VarDesc &var_desc : desc_->vars()) {
+ vars_[var_desc.name()].reset(new VarDesc(var_desc));
+ }
+ for (const proto::OpDesc &op_desc : desc_->ops()) {
+ ops_.emplace_back(new OpDesc(op_desc, prog, this));
+ }
+}
+
+BlockDesc::BlockDesc(const BlockDesc &other, proto::BlockDesc *desc,
+ ProgramDesc *prog)
+ : prog_(prog), desc_(desc) {
+ need_update_ = true;
+ for (auto &op : other.ops_) {
+ ops_.emplace_back(new OpDesc(*op->Proto(), prog, this));
+ }
+ for (auto &it : other.vars_) {
+ auto *var = new VarDesc(*it.second);
+ vars_[it.first].reset(var);
+ }
+}
+
+void BlockDesc::ClearPBOps() {
+ auto ops = this->desc_->mutable_ops();
+ while (!ops->empty()) {
+ // we do not own the OpDesc, so release the ownership.
+ ops->ReleaseLast();
+ }
+}
+
+void BlockDesc::ClearPBVars() {
+ auto vars = this->desc_->mutable_vars();
+ while (!vars->empty()) {
+ // we do not own the VarDesc, so release the ownership.
+ vars->ReleaseLast();
+ }
+}
+
+} // namespace framework
+} // namespace paddle
diff --git a/paddle/fluid/framework/block_desc.h b/paddle/fluid/framework/block_desc.h
new file mode 100644
index 0000000000000000000000000000000000000000..5f7eca3878ff6174090c7b0dd4904f5604ac8dc6
--- /dev/null
+++ b/paddle/fluid/framework/block_desc.h
@@ -0,0 +1,111 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+
+#include "paddle/fluid/framework/op_desc.h"
+#include "paddle/fluid/framework/proto_desc.h"
+#include "paddle/fluid/framework/var_desc.h"
+#include "paddle/fluid/platform/macros.h"
+
+namespace paddle {
+namespace framework {
+
+class ProgramDesc;
+
+// Each Protobuf Message, we provide a XXXBind class. In that class, we optimize
+// read/write speed. Only when we want the protobuf message, the local changes
+// will be synchronized (by `Sync` method).
+
+class BlockDesc {
+ public:
+ BlockDesc(ProgramDesc *prog, proto::BlockDesc *desc);
+
+ BlockDesc(const BlockDesc &other, proto::BlockDesc *desc, ProgramDesc *prog);
+
+ ~BlockDesc() {
+ this->ClearPBVars();
+ this->ClearPBOps();
+ }
+
+ int32_t ID() const { return desc_->idx(); }
+
+ int32_t Parent() const { return desc_->parent_idx(); }
+
+ VarDesc *Var(const std::string &name_bytes);
+
+ VarDesc *FindVar(const std::string &name_bytes) const;
+
+ bool HasVar(const std::string &var_name) const;
+
+ VarDesc *FindVarRecursive(const std::string &name_bytes) const;
+
+ VarDesc &FindRecursiveOrCreateVar(const std::string &name_bytes);
+
+ bool HasVarRecursive(const std::string &var_name) const;
+
+ std::set LocalVarNames() const {
+ std::set var_names;
+ for (auto &var : vars_) {
+ var_names.insert(var.first);
+ }
+ return var_names;
+ }
+
+ std::vector AllVars() const;
+
+ BlockDesc *ParentBlock() const;
+
+ OpDesc *AppendOp();
+
+ void AppendAllocatedOp(std::unique_ptr &&op_desc);
+
+ OpDesc *PrependOp();
+
+ void RemoveOp(size_t s, size_t e);
+
+ std::vector