diff --git a/cmake/external/boost.cmake b/cmake/external/boost.cmake index 5041504033e09a14546be4dfd6dfc52d366cd395..73713d93d5a52738651dda498fac5ea66e3589d2 100644 --- a/cmake/external/boost.cmake +++ b/cmake/external/boost.cmake @@ -26,7 +26,7 @@ set(BOOST_VER "1.41.0") if((NOT DEFINED BOOST_TAR) OR (NOT DEFINED BOOST_URL)) message(STATUS "use pre defined download url") set(BOOST_TAR "boost_1_41_0" CACHE STRING "" FORCE) - set(BOOST_URL "http://paddlepaddledeps.bj.bcebos.com/${BOOST_TAR}.tar.gz" CACHE STRING "" FORCE) + set(BOOST_URL "http://paddlepaddledeps.cdn.bcebos.com/${BOOST_TAR}.tar.gz" CACHE STRING "" FORCE) endif() MESSAGE(STATUS "BOOST_TAR: ${BOOST_TAR}, BOOST_URL: ${BOOST_URL}") set(BOOST_SOURCES_DIR ${THIRD_PARTY_PATH}/boost) diff --git a/cmake/external/mklml.cmake b/cmake/external/mklml.cmake index 5f8422ae76f4e2a22f3b3d5e650d8345c2622e7a..82c424fb79d5596c31891bc395699bf9ff4e7e7e 100644 --- a/cmake/external/mklml.cmake +++ b/cmake/external/mklml.cmake @@ -30,7 +30,7 @@ SET(MKLML_PROJECT "extern_mklml") IF((NOT DEFINED MKLML_VER) OR (NOT DEFINED MKLML_URL)) MESSAGE(STATUS "use pre defined download url") SET(MKLML_VER "mklml_lnx_2018.0.3.20180406" CACHE STRING "" FORCE) - SET(MKLML_URL "http://paddlepaddledeps.bj.bcebos.com/${MKLML_VER}.tgz" CACHE STRING "" FORCE) + SET(MKLML_URL "http://paddlepaddledeps.cdn.bcebos.com/${MKLML_VER}.tgz" CACHE STRING "" FORCE) ENDIF() MESSAGE(STATUS "MKLML_VER: ${MKLML_VER}, MKLML_URL: ${MKLML_URL}") SET(MKLML_SOURCE_DIR "${THIRD_PARTY_PATH}/mklml") diff --git a/doc/fluid/api/optimizer.rst b/doc/fluid/api/optimizer.rst index 7a92caf9b7139cf091eff834dbed3586b23ac3af..b90d481d9d91519d302ada7b3d22671382d71105 100644 --- a/doc/fluid/api/optimizer.rst +++ b/doc/fluid/api/optimizer.rst @@ -47,6 +47,28 @@ DecayedAdagrad :members: :noindex: +Adadelta +----------------- + +.. autoclass:: paddle.fluid.optimizer.Adadelta + :members: + :noindex: + +RMSProp +----------------- + +.. autoclass:: paddle.fluid.optimizer.RMSProp + :members: + :noindex: + +ModelAverage +----------------- + +.. autoclass:: paddle.fluid.optimizer.ModelAverage + :members: + :noindex: + + SGDOptimizer ------------ @@ -89,9 +111,16 @@ DecayedAdagradOptimizer :members: :noindex: -Adadelta --------------- +AdadeltaOptimizer +----------------- .. autoclass:: paddle.fluid.optimizer.AdadeltaOptimizer :members: :noindex: + +RMSPropOptimizer +----------------- + +.. autoclass:: paddle.fluid.optimizer.RMSPropOptimizer + :members: + :noindex: diff --git a/doc/fluid/howto/index_cn.rst b/doc/fluid/howto/index_cn.rst index 97aeaf167d329529f2b120b5a3d4085e0510fe16..b7c620179724ebe97a0a47b75a57b376b21ccf90 100644 --- a/doc/fluid/howto/index_cn.rst +++ b/doc/fluid/howto/index_cn.rst @@ -3,5 +3,6 @@ .. toctree:: :maxdepth: 1 - + optimization/index_cn.rst + inference/inference_support_in_fluid.md diff --git a/doc/fluid/howto/index_en.rst b/doc/fluid/howto/index_en.rst index fd21e167ce3a46da167db1e9d7013804f730e047..f3ca41cdbf1d40ec8afaf045233a38755d8a777a 100644 --- a/doc/fluid/howto/index_en.rst +++ b/doc/fluid/howto/index_en.rst @@ -5,3 +5,4 @@ HOW TO :maxdepth: 1 optimization/index_en.rst + inference/inference_support_in_fluid.md diff --git a/doc/fluid/howto/inference/inference_support_in_fluid.md b/doc/fluid/howto/inference/inference_support_in_fluid.md new file mode 100644 index 0000000000000000000000000000000000000000..d272cd3e3bdac49b9ed1a21531de1b0be03d881e --- /dev/null +++ b/doc/fluid/howto/inference/inference_support_in_fluid.md @@ -0,0 +1,361 @@ +# Fluid Inference使用指南 + +## 目录: + +- Python Inference API +- 编译Fluid Inference库 +- Inference C++ API +- Inference实例 +- Inference计算优化 + +## Python Inference API **[改进中]** +- 保存Inference模型 ([链接](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/io.py#L295)) + + ```python + def save_inference_model(dirname, + feeded_var_names, + target_vars, + executor, + main_program=None, + model_filename=None, + params_filename=None): + ``` + Inference模型和参数将会保存到`dirname`目录下: + - 序列化的模型 + - `model_filename`为`None`,保存到`dirname/__model__` + - `model_filename`非`None`,保存到`dirname/model_filename` + - 参数 + - `params_filename`为`None`,单独保存到各个独立的文件,各文件以参数变量的名字命名 + - `params_filename`非`None`,保存到`dirname/params_filename` + +- 两种存储格式 + - 参数保存到各个独立的文件 + - 如,设置`model_filename`为`None`、`params_filename`为`None` + + ```bash + $ cd recognize_digits_conv.inference.model + $ ls + $ __model__ batch_norm_1.w_0 batch_norm_1.w_2 conv2d_2.w_0 conv2d_3.w_0 fc_1.w_0 batch_norm_1.b_0 batch_norm_1.w_1 conv2d_2.b_0 conv2d_3.b_0 fc_1.b_0 + ``` + - 参数保存到同一个文件 + - 如,设置`model_filename`为`None`、`params_filename`为`__params__` + + ```bash + $ cd recognize_digits_conv.inference.model + $ ls + $ __model__ __params__ + ``` +- 加载Inference模型([链接](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/io.py#L380)) + ```python + def load_inference_model(dirname, + executor, + model_filename=None, + params_filename=None): + ... + return [program, feed_target_names, fetch_targets] + ``` + + +## 编译Fluid Inference库 + + - **不需要额外的CMake选项** + - 1、 配置CMake命令,更多配置请参考[源码编译PaddlePaddle](http://www.paddlepaddle.org/docs/develop/documentation/zh/build_and_install/build_from_source_cn.html) + ```bash + $ git clone https://github.com/PaddlePaddle/Paddle.git + $ cd Paddle + $ mkdir build + $ cd build + $ cmake -DCMAKE_INSTALL_PREFIX=your/path/to/paddle_inference_lib \ + -DCMAKE_BUILD_TYPE=Release \ + -DWITH_PYTHON=ON \ + -DWITH_MKL=OFF \ + -DWITH_GPU=OFF \ + .. + ``` + + - 2、 编译PaddlePaddle + ```bash + $ make + ``` + + - 3、 部署。执行如下命令将PaddlePaddle Fluid Inference库部署到`your/path/to/paddle_inference_lib`目录。 + ```bash + $ make inference_lib_dist + ``` + +- 目录结构 + + ```bash + $ cd your/path/to/paddle_inference_lib + $ tree + . + |-- paddle + | `-- fluid + | |-- framework + | |-- inference + | | |-- io.h + | | `-- libpaddle_fluid.so + | |-- memory + | |-- platform + | `-- string + |-- third_party + | |-- eigen3 + | `-- install + | |-- gflags + | |-- glog + | `-- protobuf + `-- ... + ``` + + 假设`PADDLE_ROOT=your/path/to/paddle_inference_lib`。 + + + +## 链接Fluid Inference库 +- 示例项目([链接](https://github.com/luotao1/fluid_inference_example.git)) + + - GCC配置 + ```bash + $ g++ -o a.out -std=c++11 main.cc \ + -I${PADDLE_ROOT}/ \ + -I${PADDLE_ROOT}/third_party/install/gflags/include \ + -I${PADDLE_ROOT}/third_party/install/glog/include \ + -I${PADDLE_ROOT}/third_party/install/protobuf/include \ + -I${PADDLE_ROOT}/third_party/eigen3 \ + -L${PADDLE_ROOT}/paddle/fluid/inference -lpaddle_fluid \ + -lrt -ldl -lpthread + ``` + + - CMake配置 + ```cmake + include_directories(${PADDLE_ROOT}/) + include_directories(${PADDLE_ROOT}/third_party/install/gflags/include) + include_directories(${PADDLE_ROOT}/third_party/install/glog/include) + include_directories(${PADDLE_ROOT}/third_party/install/protobuf/include) + include_directories(${PADDLE_ROOT}/third_party/eigen3) + target_link_libraries(${TARGET_NAME} + ${PADDLE_ROOT}/paddle/fluid/inference/libpaddle_fluid.so + -lrt -ldl -lpthread) + ``` + + - 设置环境变量: + `export LD_LIBRARY_PATH=${PADDLE_ROOT}/paddle/fluid/inference:$LD_LIBRARY_PATH` + + + +## C++ Inference API + +- 推断流程([链接](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/tests/test_helper.h#L91)) + + - 1、 初始化设备 + ```cpp + #include "paddle/fluid/framework/init.h" + paddle::framework::InitDevices(false); + ``` + + - 2、 定义place,executor,scope + ```cpp + auto place = paddle::platform::CPUPlace(); + auto executor = paddle::framework::Executor(place); + auto* scope = new paddle::framework::Scope(); + ``` + + - 3、 加载模型 + ```cpp + #include "paddle/fluid/inference/io.h" + auto inference_program = paddle::inference::Load(executor, *scope, dirname); + // or + auto inference_program = paddle::inference::Load(executor, + *scope, + dirname + "/" + model_filename, + dirname + "/" + params_filename); + ``` + + - 4、 获取`feed_target_names`和`fetch_target_names` + ```cpp + const std::vector& feed_target_names = inference_program->GetFeedTargetNames(); + const std::vector& fetch_target_names = inference_program->GetFetchTargetNames(); + ``` + + - 5、 准备`feed`数据 + ```cpp + #include "paddle/fluid/framework/lod_tensor.h" + std::vector cpu_feeds; + ... + std::map feed_targets; + for (size_t i = 0; i < feed_target_names.size(); ++i) { + // Please make sure that cpu_feeds[i] is right for feed_target_names[i] + feed_targets[feed_target_names[i]] = cpu_feeds[i]; + } + ``` + + - 6、 定义`Tensor`来`fetch`结果 + ```cpp + std::vector cpu_fetchs; + std::map fetch_targets; + for (size_t i = 0; i < fetch_target_names.size(); ++i) { + fetch_targets[fetch_target_names[i]] = cpu_fetchs[i]; + } + ``` + + - 7、 执行`inference_program` + ```cpp + executor.Run(*inference_program, scope, feed_targets, fetch_targets); + ``` + + - 8、 使用`fetch`数据 + ```cpp + for (size_t i = 0; i < cpu_fetchs.size(); ++i) { + std::cout << "lod_i: " << cpu_fetchs[i]->lod(); + std::cout << "dims_i: " << cpu_fetchs[i]->dims(); + std::cout << "result:"; + float* output_ptr = cpu_fetchs[i]->data(); + for (int j = 0; j < cpu_fetchs[i]->numel(); ++j) { + std::cout << " " << output_ptr[j]; + } + std::cout << std::endl; + } + ``` + 针对不同的数据,4. - 8.可执行多次。 + + - 9、 释放内存 + ```cpp + delete scope; + ``` + + +- 接口说明 + + ```cpp + void Run(const ProgramDesc& program, Scope* scope, + std::map& feed_targets, + std::map& fetch_targets, + bool create_vars = true, + const std::string& feed_holder_name = "feed", + const std::string& fetch_holder_name = "fetch"); + ``` + - 使用Python API `save_inference_model`保存的`program`里面包含了`feed_op`和`fetch_op`,用户提供的`feed_targets`、`fetch_targets`必须和`inference_program`中的`feed_op`、`fetch_op`保持一致。 + - 用户提供的`feed_holder_name`和`fetch_holder_name`也必须和`inference_program`中`feed_op`、`fetch_op`保持一致,可使用`SetFeedHolderName`和`SetFetchHolderName`接口重新设置`inferece_program` + - 默认情况下,除了`persistable`属性设置为`True`的`Variable`之外,每次执行`executor.Run`会创建一个局部`Scope`,并且在这个局部`Scope`中创建和销毁所有的`Variable`,以最小化空闲时的内存占用。 + - `persistable`属性为`True`的`Variable`有: + - Operators的参数`w`、`b`等 + - `feed_op`的输入变量 + - `fetch_op`的输出变量 + + +- **不在每次执行时创建和销毁变量 + ([PR](https://github.com/PaddlePaddle/Paddle/pull/9301))** + - 执行`inference_program` + ```cpp + // Call once + executor.CreateVariables(*inference_program, scope, 0); + // Call as many times as you like + executor.Run( + *inference_program, scope, feed_targets, fetch_targets, false); + ``` + - **优点** + - 节省了频繁创建、销毁变量的时间(约占每次`Run`总时间的1% ~ 12%) + - 执行结束后可获取所有Operators的计算结果 + - **缺点** + - 空闲时也会占用大量的内存 + - 在同一个`Scope`中,相同的变量名是公用同一块内存的,容易引起意想不到的错误 + + +- **不在每次执行时创建Op([PR](https://github.com/PaddlePaddle/Paddle/pull/9630))** + - 执行`inference_program` + ```cpp + // Call once + auto ctx = executor.Prepare(*inference_program, 0); + // Call as many times as you like if you have no need to change the inference_program + executor.RunPreparedContext(ctx.get(), scope, feed_targets, fetch_targets); + ``` + - **优点** + - 节省了频繁创建、销毁Op的时间 + - **缺点** + - 一旦修改了`inference_program`,则需要重新创建`ctx` + + +- **多线程共享Parameters([链接](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/tests/test_multi_thread_helper.h))** + - 主线程 + - 1、 初始化设备 + - 2、 定义`place`,`executor`,`scope` + - 3、 加载模型,得到`inference_program` + - 从线程 + - **复制`inference_program`得到`copy_program`,修改`copy_program`的`feed_holder_name`和`fetch_holder_name`** + ```cpp + auto copy_program = std::unique_ptr( + new paddle::framework::ProgramDesc(*inference_program)); + std::string feed_holder_name = "feed_" + paddle::string::to_string(thread_id); + std::string fetch_holder_name = "fetch_" + paddle::string::to_string(thread_id); + copy_program->SetFeedHolderName(feed_holder_name); + copy_program->SetFetchHolderName(fetch_holder_name); + ``` + - 4、 获取`copy_program`的`feed_target_names`和`fetch_target_names` + - 5、 准备feed数据,定义Tensor来fetch结果 + - 6、 执行`copy_program` + ```cpp + executor->Run(*copy_program, scope, feed_targets, fetch_targets, true, feed_holder_name, fetch_holder_name); + ``` + - 7、 使用fetch数据 + - 主线程 + - 8、 释放资源 + + +- 基本概念 + - 数据相关: + - [Tensor](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/concepts/tensor.md),一个N维数组,数据可以是任意类型(int,float,double等) + - [LoDTensor](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/concepts/lod_tensor.md),带LoD(Level-of-Detail)即序列信息的Tensor + - [Scope](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/scope.md),记录了变量Variable + - 执行相关: + - [Executor](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/concepts/executor.md),无状态执行器,只跟设备相关 + - Place + - CPUPlace,CPU设备 + - CUDAPlace,CUDA GPU设备 + - 神经网络表示: + - [Program](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/concepts/program.md). + + 详细介绍请参考[**Paddle Fluid开发者指南**](https://github.com/lcy-seso/learning_notes/blob/master/Fluid/developer's_guid_for_Fluid/Developer's_Guide_to_Paddle_Fluid.md) + + + +## Inference实例 + + 1. fit a line: [Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_fit_a_line.py), [C++](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/tests/book/test_inference_fit_a_line.cc) + 1. image classification: [Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_image_classification.py), [C++](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/tests/book/test_inference_image_classification.cc) + 1. label semantic roles: [Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_label_semantic_roles.py), [C++](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/tests/book/test_inference_label_semantic_roles.cc) + 1. recognize digits: [Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_recognize_digits.py), [C++](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/tests/book/test_inference_recognize_digits.cc) + 1. recommender system: [Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_recommender_system.py), [C++](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/tests/book/test_inference_recommender_system.cc) + 1. understand sentiment: [Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_understand_sentiment.py), [C++](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/tests/book/test_inference_understand_sentiment.cc) + 1. word2vec: [Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_word2vec.py), [C++](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/tests/book/test_inference_word2vec.cc) + + +## Inference计算优化 +- 使用Python推理优化工具([inference_transpiler](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/inference_transpiler.py)) + ```python + class InferenceTranspiler: + def transpile(self, program, place, scope=None): + ... + if scope is None: + scope = global_scope() + ... + ``` + - 使用`InferenceTranspiler`将会直接修改`program`。 + - 使用`InferenceTranspiler`会修改参数的值,请确保`program`的参数在`scope`内。 +- 支持的优化 + - 融合batch_norm op的计算 +- 使用示例([链接](https://github.com/Xreki/Xreki.github.io/blob/master/fluid/inference/inference_transpiler.py)) + ```python + import paddle.fluid as fluid + # NOTE: Applying the inference transpiler will change the inference_program. + t = fluid.InferenceTranspiler() + t.transpile(inference_program, place, inference_scope) + ``` + + + + +## 内存使用优化 +- 使用Python内存优化工具([memory_optimization_transipiler](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/memory_optimization_transpiler.py)) + ```python + fluid.memory_optimize(inference_program) + ``` diff --git a/paddle/fluid/inference/analysis/CMakeLists.txt b/paddle/fluid/inference/analysis/CMakeLists.txt index 47929ef7490e5edb246625cb0b3ba507039df27a..9faf5bb3036775a2ba0c08d3d6ea17ffa73753c6 100644 --- a/paddle/fluid/inference/analysis/CMakeLists.txt +++ b/paddle/fluid/inference/analysis/CMakeLists.txt @@ -1,2 +1,17 @@ -cc_library(analysis SRCS dot.cc node.cc node.h) +set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor init) +cc_library(analysis SRCS dot.cc node.cc data_flow_graph.cc graph_traits.cc subgraph_splitter.cc fluid_to_data_flow_graph_pass.cc + DEPS paddle_fluid) cc_test(test_node SRCS node_tester.cc DEPS analysis) +cc_test(test_dot SRCS dot_tester.cc DEPS analysis) + +set(PYTHON_TESTS_DIR ${PADDLE_BINARY_DIR}/python/paddle/fluid/tests) + +cc_test(test_data_flow_graph SRCS data_flow_graph_tester.cc DEPS analysis ${FLUID_CORE_MODULES} paddle_fluid + ARGS --inference_model_dir=${PYTHON_TESTS_DIR}/book/word2vec.inference.model) +set_tests_properties(test_data_flow_graph PROPERTIES DEPENDS test_word2vec) + +cc_test(test_subgraph_splitter + SRCS subgraph_splitter_tester.cc + DEPS analysis paddle_fluid tensor + ARGS --inference_model_dir=${PYTHON_TESTS_DIR}/book/word2vec.inference.model) +set_tests_properties(test_subgraph_splitter PROPERTIES DEPENDS test_word2vec) diff --git a/paddle/fluid/inference/analysis/data_flow_graph.cc b/paddle/fluid/inference/analysis/data_flow_graph.cc new file mode 100644 index 0000000000000000000000000000000000000000..4220451e3caee62caa51af5bc33d6dd3fd891018 --- /dev/null +++ b/paddle/fluid/inference/analysis/data_flow_graph.cc @@ -0,0 +1,205 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/inference/analysis/data_flow_graph.h" +#include "paddle/fluid/inference/analysis/dot.h" + +namespace paddle { +namespace inference { +namespace analysis { + +// It is a better idea that the inputs and outputs of this graph is set manully +// before, but there must be a Pass that helps to prune the unnecessary ops that +// do not contribute to the given targets, so in this pass, analysis and get the +// inputs and outputs is OK. +void DataFlowGraph::Build() { + inputs.clear(); + outputs.clear(); + std::unordered_set ins; + std::unordered_set outs; + for (auto &node : nodes.nodes()) { + for (auto *in : node->inlinks) { + ins.insert(in); + } + for (auto *out : node->outlinks) { + outs.insert(out); + } + } + + // The nodes that in ins but not in outs is the graph's inputs + // similarly, the nodes that in outs but not in ins is the graphs' outputs + for (auto *in : ins) { + if (!outs.count(in)) { + inputs.push_back(in); + } + } + for (auto *out : outs) { + if (!outs.count(out)) { + outputs.push_back(out); + } + } +} + +std::string DataFlowGraph::DotString() const { + Dot dot; + + // Add nodes + for (size_t i = 0; i < nodes.size(); i++) { + const Node &node = nodes.Get(i); + switch (node.type()) { + case Node::Type::kValue: + dot.AddNode(node.repr(), node.dot_attrs()); + break; + case Node::Type::kFunction: + dot.AddNode(node.repr(), node.dot_attrs()); + break; + case Node::Type::kFunctionBlock: + dot.AddNode(node.repr(), node.dot_attrs()); + break; + default: + PADDLE_THROW("unsupported Node type %d", static_cast(node.type())); + } + } + + // Add edges + for (size_t i = 0; i < nodes.size(); i++) { + const Node &node = nodes.Get(i); + for (auto &in : node.inlinks) { + dot.AddEdge(in->repr(), node.repr(), {}); + } + } + return dot.Build(); +} + +// +// NodesBFSIterator +// + +GraphTraits::NodesBFSIterator::NodesBFSIterator( + const std::vector &source) + : queue_(source.begin(), source.end()) {} + +// GraphTraits::NodesBFSIterator::NodesBFSIterator( +// GraphTraits::NodesBFSIterator &&other) noexcept +// : queue_(std::move(other.queue_)), +// visited_(std::move(other.visited_)) {} + +GraphTraits::NodesBFSIterator::NodesBFSIterator( + const GraphTraits::NodesBFSIterator &other) + : queue_(other.queue_), visited_(other.visited_) {} + +Node &GraphTraits::NodesBFSIterator::operator*() { + PADDLE_ENFORCE(!queue_.empty()); + return *queue_.front(); +} + +Node *GraphTraits::NodesBFSIterator::operator->() { + PADDLE_ENFORCE(!queue_.empty()); + return queue_.front(); +} + +GraphTraits::NodesBFSIterator & +GraphTraits::NodesBFSIterator::operator=( + const GraphTraits::NodesBFSIterator &other) { + queue_ = other.queue_; + visited_ = other.visited_; + return *this; +} + +GraphTraits::NodesBFSIterator + &GraphTraits::NodesBFSIterator::operator++() { + PADDLE_ENFORCE(!queue_.empty()); + auto *cur = queue_.front(); + visited_.insert(cur); + queue_.pop_front(); + for (auto *output : cur->outlinks) { + if (!visited_.count(output)) { + queue_.push_back(output); + visited_.insert(output); + } + } + return *this; +} + +bool GraphTraits::NodesBFSIterator::operator==( + const GraphTraits::NodesBFSIterator &other) { + if (queue_.empty()) return other.queue_.empty(); + if ((!queue_.empty()) && (!other.queue_.empty())) { + return queue_.front() == other.queue_.front() && + visited_.size() == other.visited_.size(); // here need to check the + // equality of queue and + // visited. Just a light but week implementation. + } + return false; +} + +// +// NodesDFSIterator +// +GraphTraits::NodesDFSIterator::NodesDFSIterator( + const std::vector &source) { + for (auto *x : source) stack_.push(x); +} + +// GraphTraits::NodesDFSIterator::NodesDFSIterator( +// GraphTraits::NodesDFSIterator &&other) noexcept +// : stack_(std::move(other.stack_)), +// visited_(std::move(other.visited_)) {} + +GraphTraits::NodesDFSIterator::NodesDFSIterator( + const GraphTraits::NodesDFSIterator &other) + : stack_(other.stack_), visited_(other.visited_) {} + +Node &GraphTraits::NodesDFSIterator::operator*() { + PADDLE_ENFORCE(!stack_.empty()); + return *stack_.top(); +} + +GraphTraits::NodesDFSIterator + &GraphTraits::NodesDFSIterator::operator++() { + if (stack_.empty()) return *this; + visited_.insert(stack_.top()); + auto *cur = stack_.top(); + stack_.pop(); + for (auto *x : cur->outlinks) { + if (!visited_.count(x)) { + stack_.push(x); + visited_.insert(x); + } + } + return *this; +} +bool GraphTraits::NodesDFSIterator::operator==( + const GraphTraits::NodesDFSIterator &other) { + if (stack_.empty()) return other.stack_.empty(); + if ((!stack_.empty()) && (!other.stack_.empty())) { + return stack_.top() == other.stack_.top(); + } + return false; +} + +GraphTraits::NodesDFSIterator & +GraphTraits::NodesDFSIterator::operator=( + const GraphTraits::NodesDFSIterator &other) { + stack_ = other.stack_; + visited_ = other.visited_; + return *this; +} +Node *GraphTraits::NodesDFSIterator::operator->() { + return stack_.top(); +} + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/data_flow_graph.h b/paddle/fluid/inference/analysis/data_flow_graph.h new file mode 100644 index 0000000000000000000000000000000000000000..9f6ce40ede25248a4f779b379c132806a4ec06ba --- /dev/null +++ b/paddle/fluid/inference/analysis/data_flow_graph.h @@ -0,0 +1,159 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +/* + * Data flow graph is an pass that build the basic graph. It contains a graph + * and the iterators that enable the iteration over the graph. + */ + +#pragma once + +#include +#include +#include + +#include "paddle/fluid/inference/analysis/graph_traits.h" +#include "paddle/fluid/inference/analysis/node.h" +#include "paddle/fluid/platform/enforce.h" + +namespace paddle { +namespace inference { +namespace analysis { + +/* + * DataFlowGraph - A container of Value and Function Nodes. + */ +struct DataFlowGraph { + NodeMap nodes; + std::vector inputs; + std::vector outputs; + + // Extract inputs and outputs of the graph. + void Build(); + + // Output a DOT graph file for debug. + std::string DotString() const; +}; + +/* + * An graph trait help to traverse the graph using BFS. + * The BFS start from a graph's inputs, the graph should be fully-connected, so + * that the iterator can reach the end. + */ +template <> +struct GraphTraits { + // BFS iterator on nodes. + struct NodesBFSIterator + : public std::iterator { + NodesBFSIterator() = default; + explicit NodesBFSIterator(const std::vector &source); + // NodesBFSIterator(NodesBFSIterator &&other) noexcept; + // NOTE Heavy to use. + NodesBFSIterator(const NodesBFSIterator &other); + + Node &operator*(); + NodesBFSIterator &operator++(); + Node *operator->(); + // TODO(Superjomn) current implementation just compare the first + // element, need to compare the graph and all the elements in the queue and + // set. + NodesBFSIterator &operator=(const NodesBFSIterator &other); + bool operator==(const NodesBFSIterator &other); + bool operator!=(const NodesBFSIterator &other) { return !(*this == other); } + + private: + std::deque queue_; + std::unordered_set visited_; + }; + + // DFS iterator on nodes. + struct NodesDFSIterator + : public std::iterator { + NodesDFSIterator() = default; + explicit NodesDFSIterator(const std::vector &source); + // NodesDFSIterator(NodesDFSIterator &&other) noexcept; + NodesDFSIterator(const NodesDFSIterator &other); + + Node &operator*(); + NodesDFSIterator &operator++(); + // TODO(Superjomn) current implementation just compare the first + // element, need to compare the graph and all the elements in the queue and + // set. + NodesDFSIterator &operator=(const NodesDFSIterator &other); + bool operator==(const NodesDFSIterator &other); + bool operator!=(const NodesDFSIterator &other) { return !(*this == other); } + Node *operator->(); + + private: + std::stack stack_; + std::unordered_set visited_; + }; + + explicit GraphTraits(DataFlowGraph *graph) : graph_(graph) {} + + // default use BFS to visit the nodes. + iterator_range nodes() { + return iterator_range(nodes_bfs_begin(), nodes_bfs_end()); + } + iterator_range nodes_in_BFS() { + return iterator_range(nodes_bfs_begin(), nodes_bfs_end()); + } + iterator_range nodes_in_DFS() { + return iterator_range(nodes_dfs_begin(), nodes_dfs_end()); + } + + private: + NodesBFSIterator nodes_bfs_begin() { + return NodesBFSIterator(graph_->inputs); + } + NodesBFSIterator nodes_bfs_end() { return NodesBFSIterator(); } + NodesDFSIterator nodes_dfs_begin() { + return NodesDFSIterator(graph_->inputs); + } + NodesDFSIterator nodes_dfs_end() { return NodesDFSIterator(); } + + private: + DataFlowGraph *graph_; +}; + +// Extract the inputs and outputs of a graph. The inputs and outputs of a +// sub-graph is the inputs nodes and output nodes that doesn't inside the +// sub-graph. +std::pair< + std::vector, + std::vector< + Node *>> static ExtractInputAndOutputOfSubGraph(std::vector + &graph) { + std::unordered_set nodes(graph.begin(), graph.end()); + std::unordered_set inputs; + std::unordered_set outputs; + for (auto &node : graph) { + for (auto *in : node->inlinks) { + if (!nodes.count(in) && in->type() == Node::Type::kValue) { + inputs.insert(in); + } + } + for (auto *out : node->outlinks) { + if (!nodes.count(out) && out->type() == Node::Type::kValue) { + outputs.insert(out); + } + } + } + return std::make_pair(std::vector(inputs.begin(), inputs.end()), + std::vector(outputs.begin(), outputs.end())); +} + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/data_flow_graph_tester.cc b/paddle/fluid/inference/analysis/data_flow_graph_tester.cc new file mode 100644 index 0000000000000000000000000000000000000000..15eddca1c760a44afb986796b08e2b8533695d60 --- /dev/null +++ b/paddle/fluid/inference/analysis/data_flow_graph_tester.cc @@ -0,0 +1,62 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/fluid/inference/analysis/data_flow_graph.h" +#include "paddle/fluid/inference/analysis/ut_helper.h" + +namespace paddle { +namespace inference { +namespace analysis { + +TEST(DataFlowGraph, BFS) { + auto desc = LoadProgramDesc(); + auto dfg = ProgramDescToDFG(desc); + dfg.Build(); + + for (auto* in : dfg.inputs) { + LOG(INFO) << "inputs: " << in->name() << " " + << static_cast(in->type()); + } + for (auto* out : dfg.outputs) { + LOG(INFO) << "outputs: " << out->name() << " " + << static_cast(out->type()); + } + + GraphTraits trait(&dfg); + auto nodes = trait.nodes(); + int count = 0; + for (auto it = nodes.begin(); it != nodes.end(); ++it) { + LOG(INFO) << "visiting " << it->name(); + ++count; + } + ASSERT_EQ(count, dfg.nodes.size()); +} + +TEST(DataFlowGraph, DFS) { + auto desc = LoadProgramDesc(); + auto dfg = ProgramDescToDFG(desc); + dfg.Build(); + GraphTraits trait(&dfg); + auto nodes = trait.nodes_in_DFS(); + int count = 0; + for (auto it = nodes.begin(); it != nodes.end(); ++it) { + LOG(INFO) << "visiting " << it->name(); + ++count; + } + ASSERT_EQ(count, dfg.nodes.size()); +} + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass_tester.cc b/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass_tester.cc new file mode 100644 index 0000000000000000000000000000000000000000..60f159da9140516284449a0274906df004b23ac5 --- /dev/null +++ b/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass_tester.cc @@ -0,0 +1,49 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h" + +#include +#include +#include +#include "paddle/fluid/framework/executor.h" +#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h" +#include "paddle/fluid/inference/analysis/ut_helper.h" +#include "paddle/fluid/inference/io.h" + +namespace paddle { +namespace inference { +namespace analysis { + +TEST_F(DFG_Tester, Test) { + framework::proto::ProgramDesc new_desc; + DataFlowGraph graph; + + FluidToDataFlowGraphPass pass0; + DataFlowGraphToFluidPass pass1; + pass0.Initialize(desc); + pass1.Initialize(&new_desc); + + pass0.Run(&graph); + pass1.Run(&graph); + + pass0.Finalize(); + pass1.Finalize(); + + LOG(INFO) << graph.nodes.size(); +} + +} // analysis +} // inference +} // paddle diff --git a/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc b/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc new file mode 100644 index 0000000000000000000000000000000000000000..52851a9acb7f90826d520cc944d04fe1c90a22e0 --- /dev/null +++ b/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc @@ -0,0 +1,83 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h" +#include + +namespace paddle { +namespace inference { +namespace analysis { + +FluidToDataFlowGraphPass::FluidToDataFlowGraphPass() {} + +bool FluidToDataFlowGraphPass::Initialize() { return Pass::Initialize(); } + +bool FluidToDataFlowGraphPass::Initialize( + const framework::proto::ProgramDesc &desc) { + desc_ = &desc; + return true; +} + +bool FluidToDataFlowGraphPass::Finalize() { return Pass::Finalize(); } + +void FluidToDataFlowGraphPass::Run(DataFlowGraph *graph) { + // insert vars + std::unordered_map var2id; + auto &main_block = desc_->blocks(framework::kRootBlockIndex); + for (int i = 0; i < main_block.vars_size(); i++) { + const auto &var = main_block.vars(i); + auto *v = graph->nodes.Create(Node::Type::kValue); + v->SetName(var.name()); + v->SetExtraInfo(const_cast(static_cast(&var))); + var2id[var.name()] = v->id(); + } + for (int i = 0; i < main_block.ops_size(); i++) { + const auto &op = main_block.ops(i); + auto *o = graph->nodes.Create(Node::Type::kFunction); + o->SetName(op.type()); + static_cast(o)->SetFuncType(op.type()); + // Link to the original protobuf message's memory, make it easier to + // generate from a data flow graph to fluid ProgramDesc. + o->SetExtraInfo(const_cast(static_cast(&op))); + // set inputs and outputs + // TODO(Superjomn) make sure the InputNames is the real variable name. + for (int j = 0; j < op.inputs_size(); j++) { + auto &in_var = op.inputs(j); + for (int k = 0; k < in_var.arguments_size(); k++) { + auto *in = graph->nodes.GetMutable(var2id.at(in_var.arguments(k))); + in->outlinks.push_back(o); + o->inlinks.push_back(in); + } + } + for (int j = 0; j < op.outputs_size(); j++) { + auto &out_var = op.outputs(j); + for (int k = 0; k < out_var.arguments_size(); k++) { + auto *out = graph->nodes.GetMutable(var2id[out_var.arguments(k)]); + out->inlinks.push_back(o); + o->outlinks.push_back(out); + } + } + } + // Analysis and extract the inputs and outputs of this graph. + graph->Build(); +} + +Pass *FluidToDataFlowGraphPass::CreatePrinterPass( + std::ostream &os, const std::string &banner) const { + return nullptr; +} + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h b/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h new file mode 100644 index 0000000000000000000000000000000000000000..cd0d4fabaafe844bcc5bb8bfc2586971197d9167 --- /dev/null +++ b/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h @@ -0,0 +1,51 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +/* + * This file implements the transformation from data flow graph to fluid + * ProgramDesc. + */ + +#pragma once + +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/inference/analysis/data_flow_graph.h" +#include "paddle/fluid/inference/analysis/pass.h" + +namespace paddle { +namespace inference { +namespace analysis { + +/* + * Transform a FluidDesc to a data flow graph. + */ +class FluidToDataFlowGraphPass final : public DataFlowGraphPass { + public: + FluidToDataFlowGraphPass(); + bool Initialize() override; + bool Initialize(const framework::proto::ProgramDesc &desc) override; + bool Finalize() override; + + void Run(DataFlowGraph *graph) override; + + Pass *CreatePrinterPass(std::ostream &os, + const std::string &banner) const override; + + private: + framework::proto::ProgramDesc const *desc_; +}; + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass_tester.cc b/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass_tester.cc new file mode 100644 index 0000000000000000000000000000000000000000..851c98bef305fa9e20dced5f7c26e9d1b6ddf4f2 --- /dev/null +++ b/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass_tester.cc @@ -0,0 +1,37 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h" + +#include +#include "paddle/fluid/inference/analysis/ut_helper.h" + +namespace paddle { +namespace inference { +namespace analysis { + +TEST_F(DFG_Tester, Init) { + FluidToDataFlowGraphPass pass; + pass.Initialize(); + pass.Initialize(desc); + DataFlowGraph graph; + pass.Run(&graph); + ASSERT_GT(graph.nodes.size(), 0); + pass.Finalize(); + LOG(INFO) << '\n' << graph.DotString(); +} + +} // analysis +} // inference +} // paddle diff --git a/paddle/fluid/inference/analysis/graph_traits.cc b/paddle/fluid/inference/analysis/graph_traits.cc new file mode 100644 index 0000000000000000000000000000000000000000..272dbb799f3759a3f6e34e93bc115cf2cea6ec3b --- /dev/null +++ b/paddle/fluid/inference/analysis/graph_traits.cc @@ -0,0 +1,15 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/fluid/inference/analysis/graph_traits.h" diff --git a/paddle/fluid/inference/analysis/graph_traits.h b/paddle/fluid/inference/analysis/graph_traits.h new file mode 100644 index 0000000000000000000000000000000000000000..aed2b1e8e27d94b430201d70ecf09d4acc33c8fa --- /dev/null +++ b/paddle/fluid/inference/analysis/graph_traits.h @@ -0,0 +1,63 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +/* + * This file defines the GraphTraits template class that should be specified + * by classes that want to be iteratable by generic graph iterators. + * + * This file also defines the marker class Inverse that is used to iterate over + * graphs in a graph defined, inverse ordering... + */ + +#pragma once + +#include "paddle/fluid/inference/analysis/helper.h" + +namespace paddle { +namespace inference { +namespace analysis { + +/* + * This class should be specialized by different graph types... + * That's why the base class is empty. + */ +template +struct GraphTraits { + // using NodesBFSIterator = xxx + + // NodesBFSIterator nodes_begin(); + // NodesBFSIterator nodes_end(); +}; + +/* + * Inverse - This class is used as a marker class to tell the graph iterator to + * iterate in a graph defined Inverse order. + */ +template +struct Inverse { + const GraphType &graph; + + explicit Inverse(const GraphType &graph) : graph(graph) {} +}; + +/* + * Provide a partial specialization of GraphTraits so that the inverse of an + * inverse turns into the original graph. + */ +template +struct GraphTraits>> : GraphTraits {}; + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/helper.h b/paddle/fluid/inference/analysis/helper.h index b2d06c5d63ff139186710cd963e07b4ba245f9f3..a79e9cbda105f3bcaf100b3f6ea2c2634e0e2451 100644 --- a/paddle/fluid/inference/analysis/helper.h +++ b/paddle/fluid/inference/analysis/helper.h @@ -1,74 +1,107 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#pragma once - -#include -#include -#include - -#include "paddle/fluid/platform/enforce.h" - -namespace paddle { -namespace inference { -namespace analysis { - -template -class iterator_range { - IteratorT begin_, end_; - - public: - template - explicit iterator_range(Container &&c) : begin_(c.begin()), end_(c.end()) {} - - iterator_range(const IteratorT &begin, const IteratorT &end) - : begin_(begin), end_(end) {} - - const IteratorT &begin() const { return begin_; } - const IteratorT &end() const { return end_; } -}; - -/* - * An registry helper class, with its records keeps the order they registers. - */ -template -class OrderedRegistry { - public: - T *Register(const std::string &name, T *x) { - PADDLE_ENFORCE(!dic_.count(name)); - dic_[name] = data_.size(); - data_.emplace_back(std::unique_ptr(x)); - return data_.back().get(); - } - - T *Lookup(const std::string &name) { - auto it = dic_.find(name); - if (it == dic_.end()) return nullptr; - return data_[it->second].get(); - } - - protected: - std::unordered_map dic_; - std::vector> data_; -}; - -} // namespace analysis -} // namespace inference -} // namespace paddle - -#define PADDLE_DISALLOW_COPY_AND_ASSIGN(type__) \ - \ - type__(const type__ &) = delete; \ - \ - void operator=(const type__ &) = delete; +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include +#include +#include + +#include "paddle/fluid/platform/enforce.h" + +namespace paddle { +namespace inference { +namespace analysis { + +#define SET_TYPE(type__) dic_[typeid(type__).hash_code()] = #type__; +/* + * Map typeid to representation. + */ +struct DataTypeNamer { + static const DataTypeNamer &Global() { + static auto *x = new DataTypeNamer(); + return *x; + } + + template + const std::string &repr() const { + auto x = typeid(T).hash_code(); + PADDLE_ENFORCE(dic_.count(x), "unknown type for representation"); + return dic_.at(x); + } + + const std::string &repr(size_t &hash) const { + PADDLE_ENFORCE(dic_.count(hash), "unknown type for representation"); + return dic_.at(hash); + } + + private: + DataTypeNamer() { + SET_TYPE(int); + SET_TYPE(bool); + SET_TYPE(float); + } + + std::unordered_map dic_; +}; +#undef SET_TYPE + +template +class iterator_range { + IteratorT begin_, end_; + + public: + template + explicit iterator_range(Container &&c) : begin_(c.begin()), end_(c.end()) {} + + iterator_range(const IteratorT &begin, const IteratorT &end) + : begin_(begin), end_(end) {} + + const IteratorT &begin() const { return begin_; } + const IteratorT &end() const { return end_; } +}; + +/* + * An registry helper class, with its records keeps the order they registers. + */ +template +class OrderedRegistry { + public: + T *Register(const std::string &name, T *x) { + PADDLE_ENFORCE(!dic_.count(name)); + dic_[name] = data_.size(); + data_.emplace_back(std::unique_ptr(x)); + return data_.back().get(); + } + + T *Lookup(const std::string &name) { + auto it = dic_.find(name); + if (it == dic_.end()) return nullptr; + return data_[it->second].get(); + } + + protected: + std::unordered_map dic_; + std::vector> data_; +}; + +} // namespace analysis +} // namespace inference +} // namespace paddle + +#define PADDLE_DISALLOW_COPY_AND_ASSIGN(type__) \ + \ + type__(const type__ &) = delete; \ + \ + void operator=(const type__ &) = delete; diff --git a/paddle/fluid/inference/analysis/node.h b/paddle/fluid/inference/analysis/node.h index 07cb7669f98237399c4165947a03c67ce2a86aa8..7972ca25c92186a8c55a76de645f4fdbb089e8d3 100644 --- a/paddle/fluid/inference/analysis/node.h +++ b/paddle/fluid/inference/analysis/node.h @@ -117,7 +117,10 @@ class Node { type_hash_ = typeid(T).hash_code(); data_.resize(sizeof(T)); } - PADDLE_ENFORCE(type_hash_ == typeid(T).hash_code(), "type not matched"); + PADDLE_ENFORCE(type_hash_ == typeid(T).hash_code(), + "type not matched, origin is %s, want %s", + DataTypeNamer::Global().repr(type_hash_), + DataTypeNamer::Global().repr()); PADDLE_ENFORCE_EQ(data_.size(), sizeof(T), "Node attr type recast error"); return *reinterpret_cast(&data_[0]); } @@ -127,6 +130,10 @@ class Node { size_t type_hash_{std::numeric_limits::max()}; }; + bool IsFunction() const { return type_ == Node::Type::kFunction; } + bool IsValue() const { return type_ == Node::Type::kValue; } + bool IsFunctionBlock() const { return type_ == Node::Type::kFunctionBlock; } + virtual ~Node() {} friend class NodeMap; diff --git a/paddle/fluid/inference/analysis/pass.cc b/paddle/fluid/inference/analysis/pass.cc new file mode 100644 index 0000000000000000000000000000000000000000..b48a4fd83497a068392fd941028fef542e45d763 --- /dev/null +++ b/paddle/fluid/inference/analysis/pass.cc @@ -0,0 +1,15 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/analysis/pass.h" \ No newline at end of file diff --git a/paddle/fluid/inference/analysis/pass.h b/paddle/fluid/inference/analysis/pass.h new file mode 100644 index 0000000000000000000000000000000000000000..5c89b1304d84abc9a4942f12da46b4bfe76f44f5 --- /dev/null +++ b/paddle/fluid/inference/analysis/pass.h @@ -0,0 +1,90 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include + +#include "paddle/fluid/framework/framework.pb.h" +#include "paddle/fluid/inference/analysis/data_flow_graph.h" +#include "paddle/fluid/inference/analysis/helper.h" +#include "paddle/fluid/inference/analysis/node.h" + +namespace paddle { +namespace inference { +namespace analysis { + +class Pass { + public: + Pass() = default; + virtual ~Pass() {} + // Virtual method overridden by subclasses to do only necessary initialization + // before any pass is run. + virtual bool Initialize() { return false; } + // There is some passes such as FlowToDataFlowGraphPass that needs a + // ProgramDesc. Here use the native ProgramDesc ProtoBuf message, so that it + // only couple with the proto file. + virtual bool Initialize(const framework::proto::ProgramDesc &desc) { + return false; + } + // There are some Passes such as DataFlowGraphToFluidPass that will output a + // ProgramDesc. + virtual bool Initialize(framework::proto::ProgramDesc *desc) { return false; } + + // Virtual method overriden by subclasses to do any necessary clean up after + // all passes have run. + virtual bool Finalize() { return false; } + + // Get a Pass appropriate to print the Node this pass operates on. + virtual Pass *CreatePrinterPass(std::ostream &os, + const std::string &banner) const = 0; + + // Run on a single Node. + virtual void Run(Node *x) { LOG(FATAL) << "not valid"; } + // Run on a single Function. + virtual void Run(Function *x) { LOG(FATAL) << "not valid"; } + // Run on a single FunctionBlock. + virtual void Run(FunctionBlock *x) { LOG(FATAL) << "not valid"; } + // Run on a single DataFlowGraph. + virtual void Run(DataFlowGraph *x) { LOG(FATAL) << "not valid"; } +}; + +// NodePass process on any Node types. +class NodePass : public Pass { + public: + virtual void Run(Node *node) = 0; +}; + +// NodePass process on any Function node types. +class FunctionPass : public Pass { + public: + virtual void Run(Function *node) = 0; +}; + +// NodePass process on any FunctionBlock node types. +class FunctionBlockPass : public Pass { + public: + virtual void Run(FunctionBlock *node) = 0; +}; + +// GraphPass processes on any GraphType. +class DataFlowGraphPass : public Pass { + public: + virtual void Run(DataFlowGraph *graph) = 0; +}; + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/subgraph_splitter.cc b/paddle/fluid/inference/analysis/subgraph_splitter.cc new file mode 100644 index 0000000000000000000000000000000000000000..43ccac96c84e987ad1f494af3e314c810fc1ffe3 --- /dev/null +++ b/paddle/fluid/inference/analysis/subgraph_splitter.cc @@ -0,0 +1,154 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/inference/analysis/subgraph_splitter.h" + +namespace paddle { +namespace inference { +namespace analysis { + +const char *SubGraphSplitter::kMarkerAttrName = + "_sub_graph_splitter_inside_sub_graph"; + +std::vector> SubGraphSplitter::operator()() { + MarkNodesInsideSubGraph(); + return ExtractSubGraphs(); +} + +// Mark the output variables inside a subgraph with the func. +inline void MarkOutLinksInSubGraph(const Function *func) { + for (auto *var : func->outlinks) { + var->attr(SubGraphSplitter::kMarkerAttrName).Bool() = true; + } +} + +void SubGraphSplitter::MarkNodesInsideSubGraph() { + for (auto &node : GraphTraits(graph_).nodes()) { + if (node_inside_subgraph_teller_(&node)) { + node.attr(kMarkerAttrName).Bool() = true; + if (node.type() == Node::Type::kFunction) { + // If a function is inside the sub-graph, mark all the output variables + // to be inside too, so that two marked functions will be inside a same + // sub-graph, lets take a example: A_function->var->B_function, if + // A_function is marked, var should also be marked, so that B_function + // will be in the same sub-graph with A_function if B_function is + // marked. + MarkOutLinksInSubGraph(static_cast(&node)); + } + } + } +} + +const char *kUnionFindParent = "_sub_graph_splitter_union_find_parent_"; + +// Use the Union Find(UF) algorithm to find fully connected sub-graphs, if node +// a's output is node b, that is a and b is in the same sub-graph. The UF +// algorithm will group them to the same cluster. +using node_map_t = std::unordered_map; +// Find the ancestor id of a node. +int UnionFindGetAncestor(const node_map_t &node_map, size_t id) { + int tmp = id; + do { + tmp = node_map.at(tmp)->attr(kUnionFindParent).Int32(); + } while (node_map.at(tmp)->attr(kUnionFindParent).Int32() != tmp); + return tmp; +} +// Make this two node share the same ancestor. +// TODO(Superjom) bad performance, make a balanced tree latter. +void UnionFindCombine(const node_map_t &node_map, size_t a, size_t b) { + int a_ancestor = UnionFindGetAncestor(node_map, a); + int b_ancestor = UnionFindGetAncestor(node_map, b); + node_map.at(b_ancestor)->attr(kUnionFindParent).Int32() = a_ancestor; + node_map.at(a)->attr(kUnionFindParent).Int32() = a_ancestor; + node_map.at(b)->attr(kUnionFindParent).Int32() = a_ancestor; +} + +std::vector> SubGraphSplitter::ExtractSubGraphs() { + std::vector marked_nodes; + for (auto &node : GraphTraits(graph_).nodes()) { + if (node.attr(kMarkerAttrName).Bool()) { + marked_nodes.push_back(&node); + } + } + // extract sub-graphs in the marked node set, use Union Find algorithm. + node_map_t node_map; // id to ptr + for (auto *n : marked_nodes) { + // n's parent == n.id means it is the ancestor + n->attr(kUnionFindParent).Int32() = n->id(); + node_map[n->id()] = n; + } + std::unordered_set visited; + for (auto *n : marked_nodes) { + for (auto *out : n->outlinks) { + if (node_map.count(out->id())) { + UnionFindCombine(node_map, n->id(), out->id()); + } + } + } + + std::unordered_map> clusters; + for (auto *n : marked_nodes) { + if (n->type() == Node::Type::kFunction) { + clusters[UnionFindGetAncestor(node_map, + n->attr(kUnionFindParent).Int32())] + .push_back(n); + } + } + std::vector> result; + std::for_each(clusters.begin(), clusters.end(), + [&](const decltype(clusters)::value_type &it) { + result.push_back(it.second); + }); + + return result; +} + +void SubGraphFuse::operator()() { ReplaceNodesWithSubGraphs(); } + +void SubGraphFuse::ReplaceNodesWithSubGraphs() { + auto subgraphs = SubGraphSplitter(graph_, node_inside_subgraph_teller_)(); + for (auto &subgraph : subgraphs) { + // replace this sub-graph with the first node. Two steps: 1. Create a Block + // Node that contains this subgraph 2. Mark the nodes inside the sub-graph + // as deleted. 3. Replace the deleted node with the new Block Node. + auto *block_node = graph_->nodes.Create(Node::Type::kFunctionBlock); + auto io = ExtractInputAndOutputOfSubGraph(subgraph); + block_node->inlinks = std::move(io.first); + block_node->outlinks = std::move(io.second); + for (auto *node : subgraph) { + // TODO(Superjomn) need a unified mechanism to treat deleted node in each + // pass. + node->SetDeleted(); + } + + std::unordered_map + delelte_node_map; // deleted node to BlockNode + for (auto *n : block_node->inlinks) { + n->inlinks.clear(); + } + for (auto *n : block_node->outlinks) { + n->outlinks.clear(); + } + for (auto *n : block_node->inlinks) { + n->outlinks.push_back(block_node); + } + for (auto *n : block_node->outlinks) { + n->inlinks.push_back(n); + } + } +} + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/subgraph_splitter.h b/paddle/fluid/inference/analysis/subgraph_splitter.h new file mode 100644 index 0000000000000000000000000000000000000000..ed90a0dcf31e154c4d82be08ce35e2f11d11c139 --- /dev/null +++ b/paddle/fluid/inference/analysis/subgraph_splitter.h @@ -0,0 +1,81 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +/* + * This file defines the the class to partition a graph. + */ + +#pragma once + +#include "paddle/fluid/inference/analysis/data_flow_graph.h" +#include "paddle/fluid/inference/analysis/node.h" + +namespace paddle { +namespace inference { +namespace analysis { + +/* + * Detect the nodes in a sub-graph that meet some conditions. This class doesn't + * modify the graph. + */ +class SubGraphSplitter { + public: + static const char *kMarkerAttrName; + // Tell whether a node is inside a sub-graph. + using NodeInsideSubgraphTeller = std::function; + + SubGraphSplitter(DataFlowGraph *graph, const NodeInsideSubgraphTeller &teller) + : graph_(graph), node_inside_subgraph_teller_(teller) {} + + std::vector> operator()(); + + protected: + // Mark the nodes inside the accepted sub-graph using + // node_inside_subgraph_teller. + void MarkNodesInsideSubGraph(); + + // Merge the marked nodes into sub-graphs and return the sub-graphs. + std::vector> ExtractSubGraphs(); + + private: + DataFlowGraph *graph_; + NodeInsideSubgraphTeller node_inside_subgraph_teller_; +}; + +/* + * SubGraphFuse - Replace some nodes with the sub-graph node they are inside. To + * some extent, the TensorRT engine is just a fusion op for a model. + */ +class SubGraphFuse { + public: + using NodeInsideSubgraphTeller = SubGraphSplitter::NodeInsideSubgraphTeller; + + SubGraphFuse(DataFlowGraph *graph, const NodeInsideSubgraphTeller &teller) + : graph_(graph), node_inside_subgraph_teller_(teller) {} + + // The main method which run all the logic. + void operator()(); + + protected: + // Remove the nodes inside sub-graphs and replace with the SubGraphNode. + void ReplaceNodesWithSubGraphs(); + + private: + DataFlowGraph *graph_; + NodeInsideSubgraphTeller node_inside_subgraph_teller_; +}; + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/subgraph_splitter_tester.cc b/paddle/fluid/inference/analysis/subgraph_splitter_tester.cc new file mode 100644 index 0000000000000000000000000000000000000000..6f695965afc9b462963660cabf988bfd81f0ba5c --- /dev/null +++ b/paddle/fluid/inference/analysis/subgraph_splitter_tester.cc @@ -0,0 +1,67 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/fluid/inference/analysis/subgraph_splitter.h" +#include "paddle/fluid/inference/analysis/ut_helper.h" + +namespace paddle { +namespace inference { +namespace analysis { + +TEST_F(DFG_Tester, Split) { + auto desc = LoadProgramDesc(); + auto dfg = ProgramDescToDFG(desc); + LOG(INFO) << "spliter\n" << dfg.DotString(); + + SubGraphSplitter::NodeInsideSubgraphTeller teller = [](const Node* node) { + if (node->type() != Node::Type::kFunction) return false; + const auto* func = static_cast(node); + if (func->func_type() == "elementwise_add" || func->func_type() == "relu" || + func->func_type() == "conv2d" || func->func_type() == "mul" || + func->func_type() == "sigmoid" || func->func_type() == "softmax") { + LOG(INFO) << "sub-graph marked " << node->repr(); + return true; + } + return false; + }; + ASSERT_GT(dfg.nodes.size(), 5UL); + + auto subgraphs = SubGraphSplitter(&dfg, teller)(); + + // Check the number of the marked nodes. + int marked_nodes = 0; + for (auto& node : dfg.nodes.nodes()) { + if (node->IsFunction() && + node->attr(SubGraphSplitter::kMarkerAttrName).Bool()) { + ++marked_nodes; + } + } + EXPECT_EQ(marked_nodes, 6); + + // For human debug. + for (auto& subgraph : subgraphs) { + LOG(INFO) << "subgraph size " << subgraph.size(); + for (auto* node : subgraph) { + LOG(INFO) << "node " << node->repr(); + } + } + + ASSERT_EQ(subgraphs.size(), 1UL); + // The last sub-graph has 5 Functions. + ASSERT_EQ(subgraphs.back().size(), 6UL); +} + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/ut_helper.h b/paddle/fluid/inference/analysis/ut_helper.h new file mode 100644 index 0000000000000000000000000000000000000000..f63550dba3549ad300ab54f5eab63770848f9833 --- /dev/null +++ b/paddle/fluid/inference/analysis/ut_helper.h @@ -0,0 +1,59 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once +#include +#include +#include "paddle/fluid/framework/executor.h" +#include "paddle/fluid/inference/analysis/data_flow_graph.h" +#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h" +#include "paddle/fluid/inference/analysis/ut_helper.h" +#include "paddle/fluid/inference/io.h" + +namespace paddle { +namespace inference { +namespace analysis { + +DEFINE_string(inference_model_dir, "", "inference test model dir"); + +static framework::proto::ProgramDesc LoadProgramDesc( + const std::string& model_dir = FLAGS_inference_model_dir) { + // TODO(Superjomn) update latter. + auto place = paddle::platform::CPUPlace(); + auto executor = paddle::framework::Executor(place); + auto* scope = new paddle::framework::Scope(); + auto program = Load(&executor, scope, model_dir); + return *program->Proto(); +} + +static DataFlowGraph ProgramDescToDFG( + const framework::proto::ProgramDesc& desc) { + DataFlowGraph graph; + FluidToDataFlowGraphPass pass; + pass.Initialize(desc); + pass.Run(&graph); + pass.Finalize(); + return graph; +} + +class DFG_Tester : public ::testing::Test { + protected: + void SetUp() override { desc = LoadProgramDesc(FLAGS_inference_model_dir); } + + framework::proto::ProgramDesc desc; +}; + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/operators/detail/sendrecvop_utils.cc b/paddle/fluid/operators/detail/sendrecvop_utils.cc index 07c43554bc6a0d71d688a5a5772d0ab3d2de319a..e6ee598db04dd9e0075b39a50d1d4e878d73086d 100644 --- a/paddle/fluid/operators/detail/sendrecvop_utils.cc +++ b/paddle/fluid/operators/detail/sendrecvop_utils.cc @@ -58,12 +58,13 @@ void GetTensorPayload(framework::Variable* var, if (platform::is_gpu_place(ctx.GetPlace())) { #ifdef PADDLE_WITH_CUDA PADDLE_ENFORCE(platform::is_gpu_place(tensor.place())); - platform::CPUPlace cpu; + platform::CUDAPinnedPlace cuda_pinned; auto& gpu_dev_ctx = static_cast(ctx); auto copy_size = tensor.numel() * framework::SizeOfType(tensor.type()); - *payload = memory::Alloc(cpu, copy_size); + *payload = memory::Alloc(cuda_pinned, copy_size); - memory::Copy(cpu, *payload, boost::get(tensor.place()), + memory::Copy(cuda_pinned, *payload, + boost::get(tensor.place()), reinterpret_cast(tensor.data()), copy_size, gpu_dev_ctx.stream()); ctx.Wait(); @@ -90,11 +91,11 @@ void GetSelectedRowsPayload(framework::Variable* var, auto* tensor = slr->mutable_value(); if (platform::is_gpu_place(ctx.GetPlace())) { #ifdef PADDLE_WITH_CUDA - platform::CPUPlace cpu; + platform::CUDAPinnedPlace cuda_pinned; auto& gpu_dev_ctx = static_cast(ctx); auto copy_size = tensor->numel() * framework::SizeOfType(tensor->type()); - *payload = memory::Alloc(cpu, copy_size); - memory::Copy(cpu, *payload, + *payload = memory::Alloc(cuda_pinned, copy_size); + memory::Copy(cuda_pinned, *payload, boost::get(tensor->place()), reinterpret_cast(tensor->data()), copy_size, gpu_dev_ctx.stream()); @@ -145,8 +146,8 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, // GPU data is copied to CPU buffer when sending, // free the buffer when possible. destroy_callback = [](void* backing) { - platform::CPUPlace cpu; - memory::Free(cpu, backing); + platform::CUDAPinnedPlace cuda_pinned; + memory::Free(cuda_pinned, backing); }; } diff --git a/paddle/fluid/operators/listen_and_serv_op.cc b/paddle/fluid/operators/listen_and_serv_op.cc index 57eb5d9a0e73a51d9e2cef7ad7539c1b9da2c4ea..48ac320089aad1f5fa5fe3f327cf28e2c90ad1a1 100644 --- a/paddle/fluid/operators/listen_and_serv_op.cc +++ b/paddle/fluid/operators/listen_and_serv_op.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include // for removing the port file #include #include #include // NOLINT @@ -77,12 +78,14 @@ ListenAndServOp::ListenAndServOp(const std::string &type, void ListenAndServOp::Stop() { rpc_service_->Push(LISTEN_TERMINATE_MESSAGE); server_thread_->join(); + auto file_path = string::Sprintf("/tmp/paddle.%d.port", ::getpid()); + remove(file_path.c_str()); } -void ListenAndServOp::SavePort(const std::string &file_path) const { +void ListenAndServOp::SavePort() const { // NOTE: default write file to /tmp/paddle.selected_port selected_port_ = rpc_service_->GetSelectedPort(); - + auto file_path = string::Sprintf("/tmp/paddle.%d.port", ::getpid()); std::ofstream port_file; port_file.open(file_path); port_file << selected_port_.load(); @@ -331,7 +334,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, // Write to a file of server selected port for python use. std::string file_path = string::Sprintf("/tmp/paddle.%d.selected_port", static_cast(::getpid())); - SavePort(file_path); + SavePort(); if (sync_mode) { RunSyncLoop(&executor, program, &recv_scope, prefetch_block); } else { diff --git a/paddle/fluid/operators/listen_and_serv_op.h b/paddle/fluid/operators/listen_and_serv_op.h index f52a55c5c2d6902df6cb7e0a0d7242c6e86dc786..8af061eaf2bec4a9edd264c8c77ac69e228b0669 100644 --- a/paddle/fluid/operators/listen_and_serv_op.h +++ b/paddle/fluid/operators/listen_and_serv_op.h @@ -48,8 +48,7 @@ class ListenAndServOp : public framework::OperatorBase { void RunAsyncLoop(framework::Executor* executor, framework::ProgramDesc* program) const; - void SavePort( - const std::string& file_path = "/tmp/paddle.selected_port") const; + void SavePort() const; void WaitServerReady(); diff --git a/paddle/fluid/operators/reduce_op.cc b/paddle/fluid/operators/reduce_op.cc index eb8c21179db690e20db29c21892fd6258dd75579..e293fd5e410b2a34b3c71ea674607ba9d7654535 100644 --- a/paddle/fluid/operators/reduce_op.cc +++ b/paddle/fluid/operators/reduce_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include "paddle/fluid/operators/reduce_op.h" +#include #include #include @@ -34,11 +35,14 @@ class ReduceOp : public framework::OperatorWithKernel { auto x_dims = ctx->GetInputDim("X"); auto x_rank = x_dims.size(); PADDLE_ENFORCE_LE(x_rank, 6, "Tensors with rank at most 6 are supported."); - int dim = ctx->Attrs().Get("dim"); - if (dim < 0) dim = x_rank + dim; - PADDLE_ENFORCE_LT( - dim, x_rank, - "The dim should be in the range [-rank(input), rank(input))."); + auto dims = ctx->Attrs().Get>("dim"); + for (size_t i = 0; i < dims.size(); ++i) { + if (dims[i] < 0) dims[i] = x_rank + dims[i]; + PADDLE_ENFORCE_LT( + dims[i], x_rank, + "The dim should be in the range [-rank(input), rank(input))."); + } + sort(dims.begin(), dims.end()); bool reduce_all = ctx->Attrs().Get("reduce_all"); bool keep_dim = ctx->Attrs().Get("keep_dim"); if (reduce_all) { @@ -49,14 +53,22 @@ class ReduceOp : public framework::OperatorWithKernel { ctx->SetOutputDim("Out", {1}); } else { auto dims_vector = vectorize(x_dims); - if (keep_dim || x_rank == 1) { - dims_vector[dim] = 1; + if (keep_dim) { + for (size_t i = 0; i < dims.size(); ++i) { + dims_vector[dims[i]] = 1; + } } else { - dims_vector.erase(dims_vector.begin() + dim); + const int kDelFlag = -2; + for (size_t i = 0; i < dims.size(); ++i) { + dims_vector[dims[i]] = kDelFlag; + } + dims_vector.erase( + remove(dims_vector.begin(), dims_vector.end(), kDelFlag), + dims_vector.end()); } auto out_dims = framework::make_ddim(dims_vector); ctx->SetOutputDim("Out", out_dims); - if (dim != 0) { + if (dims[0] != 0) { // Only pass LoD when not reducing on the first dim. ctx->ShareLoD("X", /*->*/ "Out"); } @@ -75,11 +87,14 @@ class ReduceGradOp : public framework::OperatorWithKernel { auto x_dims = ctx->GetInputDim("X"); auto x_rank = x_dims.size(); PADDLE_ENFORCE_LE(x_rank, 6, "Tensors with rank at most 6 are supported."); - int dim = ctx->Attrs().Get("dim"); - if (dim < 0) dim = x_rank + dim; - PADDLE_ENFORCE_LT( - dim, x_rank, - "The dim should be in the range [-rank(input), rank(input))."); + auto dims = ctx->Attrs().Get>("dim"); + for (size_t i = 0; i < dims.size(); ++i) { + if (dims[i] < 0) dims[i] = x_rank + dims[i]; + PADDLE_ENFORCE_LT( + dims[i], x_rank, + "The dim should be in the range [-rank(input), rank(input))."); + } + sort(dims.begin(), dims.end()); auto x_grad_name = framework::GradVarName("X"); if (ctx->HasOutput(x_grad_name)) { ctx->SetOutputDim(x_grad_name, x_dims); @@ -95,13 +110,13 @@ class ReduceOpMaker : public framework::OpProtoAndCheckerMaker { "(Tensor) The input tensor. Tensors with rank at most 6 are " "supported."); AddOutput("Out", "(Tensor) The result tensor."); - AddAttr( + AddAttr>( "dim", - "(int, default 0) The dimension to reduce. " + "(list, default {0}) The dimensions to reduce. " "Must be in the range [-rank(input), rank(input)). " - "If `dim < 0`, the dim to reduce is `rank + dim`. " + "If `dim[i] < 0`, the dims[i] to reduce is `rank + dims[i]`. " "Note that reducing on the first dim will make the LoD info lost.") - .SetDefault(0); + .SetDefault({0}); AddAttr("keep_dim", "(bool, default false) " "If true, retain the reduced dimension with length 1.") diff --git a/paddle/fluid/operators/reduce_op.h b/paddle/fluid/operators/reduce_op.h index e42b4bfe42df05346020d4f48519fecf39aa37d2..cd19cc1460a6b4d4201f21f6f27f988c1547b88a 100644 --- a/paddle/fluid/operators/reduce_op.h +++ b/paddle/fluid/operators/reduce_op.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include #include "glog/logging.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" @@ -109,6 +110,11 @@ struct ProdGradFunctor { } }; +#define HANDLE_DIM(NDIM, RDIM) \ + if (ndim == NDIM && rdim == RDIM) { \ + ReduceCompute(context); \ + } + template class ReduceKernel : public framework::OpKernel { public: @@ -127,32 +133,29 @@ class ReduceKernel : public framework::OpKernel { Functor functor; functor(place, &x, &out, reduce_dim); } else { - int rank = context.Input("X")->dims().size(); - switch (rank) { - case 1: - ReduceCompute<1>(context); - break; - case 2: - ReduceCompute<2>(context); - break; - case 3: - ReduceCompute<3>(context); - break; - case 4: - ReduceCompute<4>(context); - break; - case 5: - ReduceCompute<5>(context); - break; - case 6: - ReduceCompute<6>(context); - break; - } + int ndim = context.Input("X")->dims().size(); + int rdim = context.Attr>("dim").size(); + HANDLE_DIM(6, 5); + HANDLE_DIM(6, 4); + HANDLE_DIM(6, 3); + HANDLE_DIM(6, 2); + HANDLE_DIM(6, 1); + HANDLE_DIM(5, 4); + HANDLE_DIM(5, 3); + HANDLE_DIM(5, 2); + HANDLE_DIM(5, 1); + HANDLE_DIM(4, 3); + HANDLE_DIM(4, 2); + HANDLE_DIM(4, 1); + HANDLE_DIM(3, 2); + HANDLE_DIM(3, 1); + HANDLE_DIM(2, 1); + HANDLE_DIM(1, 1); } } private: - template + template void ReduceCompute(const framework::ExecutionContext& context) const { auto* input = context.Input("X"); auto* output = context.Output("Out"); @@ -160,18 +163,26 @@ class ReduceKernel : public framework::OpKernel { auto x = EigenTensor::From(*input); auto x_rank = static_cast(x.dimensions().size()); - int dim = static_cast(context.Attr("dim")); - if (dim < 0) dim = x_rank + dim; - auto reduce_dim = Eigen::array({{dim}}); + auto dims = context.Attr>("dim"); + auto reduce_dim = Eigen::array(); + for (size_t i = 0; i < dims.size(); ++i) { + if (dims[i] < 0) dims[i] = x_rank + dims[i]; + reduce_dim[i] = dims[i]; + } // construct the squeezed output tensor bool keep_dim = context.Attr("keep_dim"); - DDim dims = output->dims(); - auto dims_vector = vectorize(dims); + DDim out_dims = output->dims(); if (keep_dim && x_rank > 1) { - dims_vector.erase(dims_vector.begin() + dim); - dims = framework::make_ddim(dims_vector); + const int kDelFlag = -2; + auto dims_vector = vectorize(out_dims); + for (size_t i = 0; i < dims.size(); ++i) { + dims_vector[dims[i]] = kDelFlag; + } + dims_vector.erase( + remove(dims_vector.begin(), dims_vector.end(), kDelFlag), + dims_vector.end()); + out_dims = framework::make_ddim(dims_vector); } - auto& place = *context.template device_context().eigen_device(); Functor functor; @@ -180,7 +191,7 @@ class ReduceKernel : public framework::OpKernel { auto out = EigenScalar::From(*output); functor(place, &x, &out, reduce_dim); } else { - auto out = EigenTensor::From(*output, dims); + auto out = EigenTensor::From(*output, out_dims); functor(place, &x, &out, reduce_dim); } } @@ -245,21 +256,29 @@ class ReduceGradKernel : public framework::OpKernel { auto x = EigenTensor::From(*input0); auto x_grad = EigenTensor::From(*output); auto x_rank = static_cast(x.dimensions().size()); - int dim = static_cast(context.Attr("dim")); - if (dim < 0) dim = x_rank + dim; - DDim dims = input0->dims(); - dims[dim] = 1; - auto x_reduce = EigenTensor::From(*input1, dims); - auto x_reduce_grad = EigenTensor::From(*input2, dims); - + auto dims = context.Attr>("dim"); + auto x_dims = input0->dims(); + auto reduced_dims_v = vectorize(x_dims); Eigen::array broadcast_dim; for (size_t i = 0; i < D; ++i) broadcast_dim[i] = 1; - broadcast_dim[dim] = input0->dims()[dim]; + + int broad_cats_times = 1; + for (size_t i = 0; i < dims.size(); ++i) { + if (dims[i] < 0) dims[i] = x_rank + dims[i]; + reduced_dims_v[dims[i]] = 1; + broadcast_dim[dims[i]] = x_dims[dims[i]]; + broad_cats_times *= x_dims[dims[i]]; + } + auto reduced_dims = framework::make_ddim(reduced_dims_v); + auto x_reduce = EigenTensor::From(*input1, reduced_dims); + auto x_reduce_grad = EigenTensor::From(*input2, reduced_dims); + auto& place = *context.template device_context().eigen_device(); + Functor functor; functor(place, &x, &x_reduce, &x_grad, &x_reduce_grad, broadcast_dim, - broadcast_dim[dim]); + broad_cats_times); } }; diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 6936962cbebf1ad5b6d80d4bf63207f2636dc869..8d8cfec4ca55571bd64f1788e6983d7381e85fc5 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -433,7 +433,7 @@ EOF EOF if [[ ${WITH_GPU} == "ON" ]]; then - NCCL_DEPS="apt-get install -y libnccl2=2.1.2-1+cuda8.0 libnccl-dev=2.1.2-1+cuda8.0 &&" + NCCL_DEPS="apt-get install -y --allow-downgrades libnccl2=2.1.2-1+cuda${CUDA_MAJOR} libnccl-dev=2.1.2-1+cuda${CUDA_MAJOR} &&" else NCCL_DEPS="" fi diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 75f7ec2f853fb6389d0f78e81aa63e40b1c25dc5..1f2e483a0968308063710d3081fe0ddc7b559d75 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -2082,11 +2082,11 @@ def reduce_sum(input, dim=None, keep_dim=False, name=None): Args: input (Variable): The input variable which is a Tensor or LoDTensor. - dim (int|None): The dimension along which the sum is performed. If + dim (list|int|None): The dimensions along which the sum is performed. If :attr:`None`, sum all elements of :attr:`input` and return a Tensor variable with a single element, otherwise must be in the - range :math:`[-rank(input), rank(input))`. If :math:`dim < 0`, - the dimension to reduce is :math:`rank + dim`. + range :math:`[-rank(input), rank(input))`. If :math:`dim[i] < 0`, + the dimension to reduce is :math:`rank + dim[i]`. keep_dim (bool|False): Whether to reserve the reduced dimension in the output Tensor. The result tensor will have one fewer dimension than the :attr:`input` unless :attr:`keep_dim` is true. @@ -2107,15 +2107,25 @@ def reduce_sum(input, dim=None, keep_dim=False, name=None): fluid.layers.reduce_sum(x, dim=0) # [0.3, 0.5, 1.1, 1.6] fluid.layers.reduce_sum(x, dim=-1) # [1.9, 1.6] fluid.layers.reduce_sum(x, dim=1, keep_dim=True) # [[1.9], [1.6]] + + # x is a Tensor variable with shape [2, 2, 2] and elements as below: + # [[[1, 2], [3, 4]], + # [[5, 6], [7, 8]]] + # Each example is followed by the correspending output tensor. + fluid.layers.reduce_sum(x, dim=[1, 2]) # [10, 26] + fluid.layers.reduce_sum(x, dim=[0, 1]) # [16, 20] + """ helper = LayerHelper('reduce_sum', **locals()) out = helper.create_tmp_variable(dtype=helper.input_dtype()) + if dim is not None and not isinstance(dim, list): + dim = [dim] helper.append_op( type='reduce_sum', inputs={'X': input}, outputs={'Out': out}, attrs={ - 'dim': dim if dim != None else 0, + 'dim': dim if dim != None else [0], 'keep_dim': keep_dim, 'reduce_all': True if dim == None else False }) @@ -2128,11 +2138,11 @@ def reduce_mean(input, dim=None, keep_dim=False, name=None): Args: input (Variable): The input variable which is a Tensor or LoDTensor. - dim (int|None): The dimension along which the mean is computed. If + dim (list|int|None): The dimensions along which the mean is computed. If :attr:`None`, compute the mean over all elements of :attr:`input` and return a Tensor variable with a single element, otherwise must be in the range :math:`[-rank(input), rank(input))`. If - :math:`dim < 0`, the dimension to reduce is :math:`rank + dim`. + :math:`dim[i] < 0`, the dimension to reduce is :math:`rank + dim[i]`. keep_dim (bool): Whether to reserve the reduced dimension in the output Tensor. The result tensor will have one fewer dimension than the :attr:`input` unless :attr:`keep_dim` is true. @@ -2153,15 +2163,24 @@ def reduce_mean(input, dim=None, keep_dim=False, name=None): fluid.layers.reduce_mean(x, dim=0) # [0.15, 0.25, 0.55, 0.8] fluid.layers.reduce_mean(x, dim=-1) # [0.475, 0.4] fluid.layers.reduce_mean(x, dim=1, keep_dim=True) # [[0.475], [0.4]] + + # x is a Tensor variable with shape [2, 2, 2] and elements as below: + # [[[1.0, 2.0], [3.0, 4.0]], + # [[5.0, 6.0], [7.0, 8.0]]] + # Each example is followed by the correspending output tensor. + fluid.layers.reduce_mean(x, dim=[1, 2]) # [2.5, 6.5] + fluid.layers.reduce_mean(x, dim=[0, 1]) # [4.0, 5.0] """ helper = LayerHelper('reduce_mean', **locals()) out = helper.create_tmp_variable(dtype=helper.input_dtype()) + if dim is not None and not isinstance(dim, list): + dim = [dim] helper.append_op( type='reduce_mean', inputs={'X': input}, outputs={'Out': out}, attrs={ - 'dim': dim if dim != None else 0, + 'dim': dim if dim != None else [0], 'keep_dim': keep_dim, 'reduce_all': True if dim == None else False }) @@ -2174,11 +2193,11 @@ def reduce_max(input, dim=None, keep_dim=False, name=None): Args: input (Variable): The input variable which is a Tensor or LoDTensor. - dim (int|None): The dimension along which the maximum is computed. + dim (list|int|None): The dimension along which the maximum is computed. If :attr:`None`, compute the maximum over all elements of :attr:`input` and return a Tensor variable with a single element, otherwise must be in the range :math:`[-rank(input), rank(input))`. - If :math:`dim < 0`, the dimension to reduce is :math:`rank + dim`. + If :math:`dim[i] < 0`, the dimension to reduce is :math:`rank + dim[i]`. keep_dim (bool): Whether to reserve the reduced dimension in the output Tensor. The result tensor will have one fewer dimension than the :attr:`input` unless :attr:`keep_dim` is true. @@ -2199,15 +2218,24 @@ def reduce_max(input, dim=None, keep_dim=False, name=None): fluid.layers.reduce_max(x, dim=0) # [0.2, 0.3, 0.6, 0.9] fluid.layers.reduce_max(x, dim=-1) # [0.9, 0.7] fluid.layers.reduce_max(x, dim=1, keep_dim=True) # [[0.9], [0.7]] + + # x is a Tensor variable with shape [2, 2, 2] and elements as below: + # [[[1.0, 2.0], [3.0, 4.0]], + # [[5.0, 6.0], [7.0, 8.0]]] + # Each example is followed by the correspending output tensor. + fluid.layers.reduce_max(x, dim=[1, 2]) # [4.0, 8.0] + fluid.layers.reduce_max(x, dim=[0, 1]) # [7.0, 8.0] """ helper = LayerHelper('reduce_max', **locals()) out = helper.create_tmp_variable(dtype=helper.input_dtype()) + if dim is not None and not isinstance(dim, list): + dim = [dim] helper.append_op( type='reduce_max', inputs={'X': input}, outputs={'Out': out}, attrs={ - 'dim': dim if dim != None else 0, + 'dim': dim if dim != None else [0], 'keep_dim': keep_dim, 'reduce_all': True if dim == None else False }) @@ -2220,11 +2248,11 @@ def reduce_min(input, dim=None, keep_dim=False, name=None): Args: input (Variable): The input variable which is a Tensor or LoDTensor. - dim (int|None): The dimension along which the minimum is computed. + dim (list|int|None): The dimensions along which the minimum is computed. If :attr:`None`, compute the minimum over all elements of :attr:`input` and return a Tensor variable with a single element, otherwise must be in the range :math:`[-rank(input), rank(input))`. - If :math:`dim < 0`, the dimension to reduce is :math:`rank + dim`. + If :math:`dim[i] < 0`, the dimension to reduce is :math:`rank + dim[i]`. keep_dim (bool): Whether to reserve the reduced dimension in the output Tensor. The result tensor will have one fewer dimension than the :attr:`input` unless :attr:`keep_dim` is true. @@ -2245,15 +2273,24 @@ def reduce_min(input, dim=None, keep_dim=False, name=None): fluid.layers.reduce_min(x, dim=0) # [0.1, 0.2, 0.5, 0.7] fluid.layers.reduce_min(x, dim=-1) # [0.2, 0.1] fluid.layers.reduce_min(x, dim=1, keep_dim=True) # [[0.2], [0.1]] + + # x is a Tensor variable with shape [2, 2, 2] and elements as below: + # [[[1.0, 2.0], [3.0, 4.0]], + # [[5.0, 6.0], [7.0, 8.0]]] + # Each example is followed by the correspending output tensor. + fluid.layers.reduce_min(x, dim=[1, 2]) # [1.0, 5.0] + fluid.layers.reduce_min(x, dim=[0, 1]) # [1.0, 2.0] """ helper = LayerHelper('reduce_min', **locals()) out = helper.create_tmp_variable(dtype=helper.input_dtype()) + if dim is not None and not isinstance(dim, list): + dim = [dim] helper.append_op( type='reduce_min', inputs={'X': input}, outputs={'Out': out}, attrs={ - 'dim': dim if dim != None else 0, + 'dim': dim if dim != None else [0], 'keep_dim': keep_dim, 'reduce_all': True if dim == None else False }) @@ -2266,11 +2303,11 @@ def reduce_prod(input, dim=None, keep_dim=False, name=None): Args: input (Variable): The input variable which is a Tensor or LoDTensor. - dim (int|None): The dimension along which the product is performed. If + dim (list|int|None): The dimensions along which the product is performed. If :attr:`None`, multipy all elements of :attr:`input` and return a Tensor variable with a single element, otherwise must be in the - range :math:`[-rank(input), rank(input))`. If :math:`dim < 0`, - the dimension to reduce is :math:`rank + dim`. + range :math:`[-rank(input), rank(input))`. If :math:`dim[i] < 0`, + the dimension to reduce is :math:`rank + dim[i]`. keep_dim (bool|False): Whether to reserve the reduced dimension in the output Tensor. The result tensor will have one fewer dimension than the :attr:`input` unless :attr:`keep_dim` is true. @@ -2292,15 +2329,24 @@ def reduce_prod(input, dim=None, keep_dim=False, name=None): fluid.layers.reduce_prod(x, dim=-1) # [0.027, 0.0084] fluid.layers.reduce_prod(x, dim=1, keep_dim=True) # [[0.027], [0.0084]] + + # x is a Tensor variable with shape [2, 2, 2] and elements as below: + # [[[1.0, 2.0], [3.0, 4.0]], + # [[5.0, 6.0], [7.0, 8.0]]] + # Each example is followed by the correspending output tensor. + fluid.layers.reduce_prod(x, dim=[1, 2]) # [24.0, 1680.0] + fluid.layers.reduce_prod(x, dim=[0, 1]) # [105.0, 384.0] """ helper = LayerHelper('reduce_prod', **locals()) out = helper.create_tmp_variable(dtype=helper.input_dtype()) + if dim is not None and not isinstance(dim, list): + dim = [dim] helper.append_op( type='reduce_prod', inputs={'X': input}, outputs={'Out': out}, attrs={ - 'dim': dim if dim != None else 0, + 'dim': dim if dim != None else [0], 'keep_dim': keep_dim, 'reduce_all': True if dim == None else False }) @@ -2403,7 +2449,6 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None): if len(x.shape) == 1: axis = 0 - helper = LayerHelper("l2_normalize", **locals()) square = helper.create_tmp_variable(dtype=x.dtype) @@ -2415,7 +2460,7 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None): inputs={"X": square}, outputs={"Out": reduced_sum}, attrs={ - "dim": 1 if axis is None else axis, + "dim": [1] if axis is None else [axis], "keep_dim": True, "reduce_all": False }) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 0fc48055220ed84c4ab146ad01b05f393e01078e..a0deaca78eed75e9faee7bb38493afc181eb212f 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -28,8 +28,8 @@ from contextlib import contextmanager __all__ = [ 'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad', 'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer', - 'AdamaxOptimizer', 'DecayedAdagradOptimizer', 'Adadelta', 'ModelAverage', - 'Optimizer' + 'AdamaxOptimizer', 'DecayedAdagradOptimizer', 'RMSPropOptimizer', + 'Adadelta', 'ModelAverage', 'Optimizer' ] diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py new file mode 100644 index 0000000000000000000000000000000000000000..89179fc586cde99318a17bab287441c0f2d6c369 --- /dev/null +++ b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py @@ -0,0 +1,149 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import paddle +import paddle.fluid as fluid +from functools import partial +import numpy as np + +CLASS_DIM = 2 +EMB_DIM = 128 +HID_DIM = 512 +BATCH_SIZE = 128 + + +def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim): + emb = fluid.layers.embedding( + input=data, size=[input_dim, emb_dim], is_sparse=True) + conv_3 = fluid.nets.sequence_conv_pool( + input=emb, + num_filters=hid_dim, + filter_size=3, + act="tanh", + pool_type="sqrt") + conv_4 = fluid.nets.sequence_conv_pool( + input=emb, + num_filters=hid_dim, + filter_size=4, + act="tanh", + pool_type="sqrt") + prediction = fluid.layers.fc(input=[conv_3, conv_4], + size=class_dim, + act="softmax") + return prediction + + +def inference_program(word_dict): + data = fluid.layers.data( + name="words", shape=[1], dtype="int64", lod_level=1) + + dict_dim = len(word_dict) + net = convolution_net(data, dict_dim, CLASS_DIM, EMB_DIM, HID_DIM) + return net + + +def train_program(word_dict): + prediction = inference_program(word_dict) + label = fluid.layers.data(name="label", shape=[1], dtype="int64") + cost = fluid.layers.cross_entropy(input=prediction, label=label) + avg_cost = fluid.layers.mean(cost) + accuracy = fluid.layers.accuracy(input=prediction, label=label) + return [avg_cost, accuracy] + + +def train(use_cuda, train_program, save_dirname): + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + optimizer = fluid.optimizer.Adagrad(learning_rate=0.002) + + word_dict = paddle.dataset.imdb.word_dict() + trainer = fluid.Trainer( + train_func=partial(train_program, word_dict), + place=place, + optimizer=optimizer) + + def event_handler(event): + if isinstance(event, fluid.EndEpochEvent): + test_reader = paddle.batch( + paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE) + avg_cost, acc = trainer.test( + reader=test_reader, feed_order=['words', 'label']) + + print("avg_cost: %s" % avg_cost) + print("acc : %s" % acc) + + if acc > 0.2: # Smaller value to increase CI speed + trainer.save_params(save_dirname) + trainer.stop() + + else: + print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format( + event.epoch + 1, avg_cost, acc)) + if math.isnan(avg_cost): + sys.exit("got NaN loss, training failed.") + elif isinstance(event, fluid.EndStepEvent): + print("Step {0}, Epoch {1} Metrics {2}".format( + event.step, event.epoch, map(np.array, event.metrics))) + if event.step == 1: # Run 2 iterations to speed CI + trainer.save_params(save_dirname) + trainer.stop() + + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.imdb.train(word_dict), buf_size=25000), + batch_size=BATCH_SIZE) + + trainer.train( + num_epochs=1, + event_handler=event_handler, + reader=train_reader, + feed_order=['words', 'label']) + + +def infer(use_cuda, inference_program, save_dirname=None): + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + word_dict = paddle.dataset.imdb.word_dict() + + inferencer = fluid.Inferencer( + infer_func=partial(inference_program, word_dict), + param_path=save_dirname, + place=place) + + def create_random_lodtensor(lod, place, low, high): + data = np.random.random_integers(low, high, + [lod[-1], 1]).astype("int64") + res = fluid.LoDTensor() + res.set(data, place) + res.set_lod([lod]) + return res + + lod = [0, 4, 10] + tensor_words = create_random_lodtensor( + lod, place, low=0, high=len(word_dict) - 1) + results = inferencer.infer({'words': tensor_words}) + print("infer results: ", results) + + +def main(use_cuda): + if use_cuda and not fluid.core.is_compiled_with_cuda(): + return + save_path = "understand_sentiment_conv.inference.model" + train(use_cuda, train_program, save_path) + infer(use_cuda, inference_program, save_path) + + +if __name__ == '__main__': + for use_cuda in (False, True): + main(use_cuda=use_cuda) diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py new file mode 100644 index 0000000000000000000000000000000000000000..7db097b3b377c763ceed9fa909672088effe50cf --- /dev/null +++ b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py @@ -0,0 +1,164 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import paddle +import paddle.fluid as fluid +from functools import partial +import numpy as np + +CLASS_DIM = 2 +EMB_DIM = 128 +BATCH_SIZE = 128 +LSTM_SIZE = 128 + + +def dynamic_rnn_lstm(data, input_dim, class_dim, emb_dim, lstm_size): + emb = fluid.layers.embedding( + input=data, size=[input_dim, emb_dim], is_sparse=True) + sentence = fluid.layers.fc(input=emb, size=lstm_size, act='tanh') + + rnn = fluid.layers.DynamicRNN() + with rnn.block(): + word = rnn.step_input(sentence) + prev_hidden = rnn.memory(value=0.0, shape=[lstm_size]) + prev_cell = rnn.memory(value=0.0, shape=[lstm_size]) + + def gate_common(ipt, hidden, size): + gate0 = fluid.layers.fc(input=ipt, size=size, bias_attr=True) + gate1 = fluid.layers.fc(input=hidden, size=size, bias_attr=False) + return gate0 + gate1 + + forget_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden, + lstm_size)) + input_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden, + lstm_size)) + output_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden, + lstm_size)) + cell_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden, + lstm_size)) + + cell = forget_gate * prev_cell + input_gate * cell_gate + hidden = output_gate * fluid.layers.tanh(x=cell) + rnn.update_memory(prev_cell, cell) + rnn.update_memory(prev_hidden, hidden) + rnn.output(hidden) + + last = fluid.layers.sequence_last_step(rnn()) + prediction = fluid.layers.fc(input=last, size=class_dim, act="softmax") + return prediction + + +def inference_program(word_dict): + data = fluid.layers.data( + name="words", shape=[1], dtype="int64", lod_level=1) + + dict_dim = len(word_dict) + pred = dynamic_rnn_lstm(data, dict_dim, CLASS_DIM, EMB_DIM, LSTM_SIZE) + return pred + + +def train_program(word_dict): + prediction = inference_program(word_dict) + label = fluid.layers.data(name="label", shape=[1], dtype="int64") + cost = fluid.layers.cross_entropy(input=prediction, label=label) + avg_cost = fluid.layers.mean(cost) + accuracy = fluid.layers.accuracy(input=prediction, label=label) + return [avg_cost, accuracy] + + +def train(use_cuda, train_program, save_dirname): + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + optimizer = fluid.optimizer.Adagrad(learning_rate=0.002) + + word_dict = paddle.dataset.imdb.word_dict() + trainer = fluid.Trainer( + train_func=partial(train_program, word_dict), + place=place, + optimizer=optimizer) + + def event_handler(event): + if isinstance(event, fluid.EndEpochEvent): + test_reader = paddle.batch( + paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE) + avg_cost, acc = trainer.test( + reader=test_reader, feed_order=['words', 'label']) + + print("avg_cost: %s" % avg_cost) + print("acc : %s" % acc) + + if acc > 0.2: # Smaller value to increase CI speed + trainer.save_params(save_dirname) + trainer.stop() + + else: + print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format( + event.epoch + 1, avg_cost, acc)) + if math.isnan(avg_cost): + sys.exit("got NaN loss, training failed.") + elif isinstance(event, fluid.EndStepEvent): + print("Step {0}, Epoch {1} Metrics {2}".format( + event.step, event.epoch, map(np.array, event.metrics))) + if event.step == 1: # Run 2 iterations to speed CI + trainer.save_params(save_dirname) + trainer.stop() + + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.imdb.train(word_dict), buf_size=25000), + batch_size=BATCH_SIZE) + + trainer.train( + num_epochs=1, + event_handler=event_handler, + reader=train_reader, + feed_order=['words', 'label']) + + +def infer(use_cuda, inference_program, save_dirname=None): + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + word_dict = paddle.dataset.imdb.word_dict() + + inferencer = fluid.Inferencer( + infer_func=partial(inference_program, word_dict), + param_path=save_dirname, + place=place) + + def create_random_lodtensor(lod, place, low, high): + data = np.random.random_integers(low, high, + [lod[-1], 1]).astype("int64") + res = fluid.LoDTensor() + res.set(data, place) + res.set_lod([lod]) + return res + + lod = [0, 4, 10] + tensor_words = create_random_lodtensor( + lod, place, low=0, high=len(word_dict) - 1) + results = inferencer.infer({'words': tensor_words}) + print("infer results: ", results) + + +def main(use_cuda): + if use_cuda and not fluid.core.is_compiled_with_cuda(): + return + save_path = "understand_sentiment_conv.inference.model" + train(use_cuda, train_program, save_path) + infer(use_cuda, inference_program, save_path) + + +if __name__ == '__main__': + for use_cuda in (False, True): + main(use_cuda=use_cuda) diff --git a/python/paddle/fluid/tests/unittests/test_dist_train.py b/python/paddle/fluid/tests/unittests/test_dist_train.py index 7893dc11d9604830df1276a49b1e22b739a3f7f9..2314bb2ed8a4eeb34752fd5d040f8a8476798aa6 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_train.py +++ b/python/paddle/fluid/tests/unittests/test_dist_train.py @@ -36,7 +36,7 @@ class TestSendOp(unittest.TestCase): p.start() time.sleep(10) - with open("/tmp/paddle.%d.selected_port" % p.pid, "r") as fn: + with open("/tmp/paddle.%d.port" % p.pid, "r") as fn: selected_port = int(fn.readlines()[0]) self.init_client(place, selected_port) diff --git a/python/paddle/fluid/tests/unittests/test_reduce_op.py b/python/paddle/fluid/tests/unittests/test_reduce_op.py index 9b0cc3534dc551e7fdf7ef8111cad1c172f8bfa4..865c2b7df085aa6a6cb0d6eb461c342ce08695cd 100644 --- a/python/paddle/fluid/tests/unittests/test_reduce_op.py +++ b/python/paddle/fluid/tests/unittests/test_reduce_op.py @@ -34,8 +34,10 @@ class TestMeanOp(OpTest): def setUp(self): self.op_type = "reduce_mean" self.inputs = {'X': np.random.random((5, 6, 2, 10)).astype("float64")} - self.attrs = {'dim': 1} - self.outputs = {'Out': self.inputs['X'].mean(axis=self.attrs['dim'])} + self.attrs = {'dim': [1]} + self.outputs = { + 'Out': self.inputs['X'].mean(axis=tuple(self.attrs['dim'])) + } def test_check_output(self): self.check_output() @@ -50,8 +52,10 @@ class TestMaxOp(OpTest): def setUp(self): self.op_type = "reduce_max" self.inputs = {'X': np.random.random((5, 6, 10)).astype("float64")} - self.attrs = {'dim': -1} - self.outputs = {'Out': self.inputs['X'].max(axis=self.attrs['dim'])} + self.attrs = {'dim': [-1]} + self.outputs = { + 'Out': self.inputs['X'].max(axis=tuple(self.attrs['dim'])) + } def test_check_output(self): self.check_output() @@ -63,8 +67,10 @@ class TestMinOp(OpTest): def setUp(self): self.op_type = "reduce_min" self.inputs = {'X': np.random.random((5, 6, 10)).astype("float64")} - self.attrs = {'dim': 2} - self.outputs = {'Out': self.inputs['X'].min(axis=self.attrs['dim'])} + self.attrs = {'dim': [2]} + self.outputs = { + 'Out': self.inputs['X'].min(axis=tuple(self.attrs['dim'])) + } def test_check_output(self): self.check_output() @@ -87,9 +93,10 @@ class TestKeepDimReduce(OpTest): def setUp(self): self.op_type = "reduce_sum" self.inputs = {'X': np.random.random((5, 6, 10)).astype("float64")} - self.attrs = {'dim': -2, 'keep_dim': True} + self.attrs = {'dim': [-2], 'keep_dim': True} self.outputs = { - 'Out': self.inputs['X'].sum(axis=self.attrs['dim'], keepdims=True) + 'Out': + self.inputs['X'].sum(axis=tuple(self.attrs['dim']), keepdims=True) } def test_check_output(self): @@ -126,5 +133,67 @@ class TestReduceAll(OpTest): self.check_grad(['X'], 'Out') +## reduction in multi dims +class TestReduceMeanOpMultiAxises(OpTest): + def setUp(self): + self.op_type = "reduce_mean" + self.inputs = {'X': np.random.random((5, 6, 2, 10)).astype("float64")} + self.attrs = {'dim': [1, 2]} + self.outputs = {'Out': self.inputs['X'].mean(axis=(1, 2))} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + +class TestReduceMaxOpMultiAxises(OpTest): + """Remove Max with subgradient from gradient check to confirm the success of CI.""" + + def setUp(self): + self.op_type = "reduce_max" + self.inputs = {'X': np.random.random((5, 6, 10)).astype("float64")} + self.attrs = {'dim': [-2, -1]} + self.outputs = { + 'Out': self.inputs['X'].max(axis=tuple(self.attrs['dim'])) + } + + def test_check_output(self): + self.check_output() + + +class TestReduceMinOpMultiAxises(OpTest): + """Remove Min with subgradient from gradient check to confirm the success of CI.""" + + def setUp(self): + self.op_type = "reduce_min" + self.inputs = {'X': np.random.random((5, 6, 10)).astype("float64")} + self.attrs = {'dim': [1, 2]} + self.outputs = { + 'Out': self.inputs['X'].min(axis=tuple(self.attrs['dim'])) + } + + def test_check_output(self): + self.check_output() + + +class TestKeepDimReduceSumMultiAxises(OpTest): + def setUp(self): + self.op_type = "reduce_sum" + self.inputs = {'X': np.random.random((5, 6, 10)).astype("float64")} + self.attrs = {'dim': [-2, -1], 'keep_dim': True} + self.outputs = { + 'Out': + self.inputs['X'].sum(axis=tuple(self.attrs['dim']), keepdims=True) + } + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + if __name__ == '__main__': unittest.main()