提交 fcbf19bf 编写于 作者: Y yuyang18

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into...

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into feature/refine_parallel_executor
......@@ -26,7 +26,7 @@ set(BOOST_VER "1.41.0")
if((NOT DEFINED BOOST_TAR) OR (NOT DEFINED BOOST_URL))
message(STATUS "use pre defined download url")
set(BOOST_TAR "boost_1_41_0" CACHE STRING "" FORCE)
set(BOOST_URL "http://paddlepaddledeps.bj.bcebos.com/${BOOST_TAR}.tar.gz" CACHE STRING "" FORCE)
set(BOOST_URL "http://paddlepaddledeps.cdn.bcebos.com/${BOOST_TAR}.tar.gz" CACHE STRING "" FORCE)
endif()
MESSAGE(STATUS "BOOST_TAR: ${BOOST_TAR}, BOOST_URL: ${BOOST_URL}")
set(BOOST_SOURCES_DIR ${THIRD_PARTY_PATH}/boost)
......
......@@ -30,7 +30,7 @@ SET(MKLML_PROJECT "extern_mklml")
IF((NOT DEFINED MKLML_VER) OR (NOT DEFINED MKLML_URL))
MESSAGE(STATUS "use pre defined download url")
SET(MKLML_VER "mklml_lnx_2018.0.3.20180406" CACHE STRING "" FORCE)
SET(MKLML_URL "http://paddlepaddledeps.bj.bcebos.com/${MKLML_VER}.tgz" CACHE STRING "" FORCE)
SET(MKLML_URL "http://paddlepaddledeps.cdn.bcebos.com/${MKLML_VER}.tgz" CACHE STRING "" FORCE)
ENDIF()
MESSAGE(STATUS "MKLML_VER: ${MKLML_VER}, MKLML_URL: ${MKLML_URL}")
SET(MKLML_SOURCE_DIR "${THIRD_PARTY_PATH}/mklml")
......
......@@ -47,6 +47,28 @@ DecayedAdagrad
:members:
:noindex:
Adadelta
-----------------
.. autoclass:: paddle.fluid.optimizer.Adadelta
:members:
:noindex:
RMSProp
-----------------
.. autoclass:: paddle.fluid.optimizer.RMSProp
:members:
:noindex:
ModelAverage
-----------------
.. autoclass:: paddle.fluid.optimizer.ModelAverage
:members:
:noindex:
SGDOptimizer
------------
......@@ -89,9 +111,16 @@ DecayedAdagradOptimizer
:members:
:noindex:
Adadelta
--------------
AdadeltaOptimizer
-----------------
.. autoclass:: paddle.fluid.optimizer.AdadeltaOptimizer
:members:
:noindex:
RMSPropOptimizer
-----------------
.. autoclass:: paddle.fluid.optimizer.RMSPropOptimizer
:members:
:noindex:
......@@ -5,3 +5,4 @@
:maxdepth: 1
optimization/index_cn.rst
inference/inference_support_in_fluid.md
......@@ -5,3 +5,4 @@ HOW TO
:maxdepth: 1
optimization/index_en.rst
inference/inference_support_in_fluid.md
# Fluid Inference使用指南
## 目录:
- Python Inference API
- 编译Fluid Inference库
- Inference C++ API
- Inference实例
- Inference计算优化
## Python Inference API **[改进中]**
- 保存Inference模型 ([链接](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/io.py#L295))
```python
def save_inference_model(dirname,
feeded_var_names,
target_vars,
executor,
main_program=None,
model_filename=None,
params_filename=None):
```
Inference模型和参数将会保存到`dirname`目录下:
- 序列化的模型
- `model_filename``None`,保存到`dirname/__model__`
- `model_filename``None`,保存到`dirname/model_filename`
- 参数
- `params_filename``None`,单独保存到各个独立的文件,各文件以参数变量的名字命名
- `params_filename``None`,保存到`dirname/params_filename`
- 两种存储格式
- 参数保存到各个独立的文件
- 如,设置`model_filename``None``params_filename``None`
```bash
$ cd recognize_digits_conv.inference.model
$ ls
$ __model__ batch_norm_1.w_0 batch_norm_1.w_2 conv2d_2.w_0 conv2d_3.w_0 fc_1.w_0 batch_norm_1.b_0 batch_norm_1.w_1 conv2d_2.b_0 conv2d_3.b_0 fc_1.b_0
```
- 参数保存到同一个文件
- 如,设置`model_filename``None``params_filename``__params__`
```bash
$ cd recognize_digits_conv.inference.model
$ ls
$ __model__ __params__
```
- 加载Inference模型([链接](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/io.py#L380))
```python
def load_inference_model(dirname,
executor,
model_filename=None,
params_filename=None):
...
return [program, feed_target_names, fetch_targets]
```
## 编译Fluid Inference库
- **不需要额外的CMake选项**
- 1、 配置CMake命令,更多配置请参考[源码编译PaddlePaddle](http://www.paddlepaddle.org/docs/develop/documentation/zh/build_and_install/build_from_source_cn.html)
```bash
$ git clone https://github.com/PaddlePaddle/Paddle.git
$ cd Paddle
$ mkdir build
$ cd build
$ cmake -DCMAKE_INSTALL_PREFIX=your/path/to/paddle_inference_lib \
-DCMAKE_BUILD_TYPE=Release \
-DWITH_PYTHON=ON \
-DWITH_MKL=OFF \
-DWITH_GPU=OFF \
..
```
- 2、 编译PaddlePaddle
```bash
$ make
```
- 3、 部署。执行如下命令将PaddlePaddle Fluid Inference库部署到`your/path/to/paddle_inference_lib`目录。
```bash
$ make inference_lib_dist
```
- 目录结构
```bash
$ cd your/path/to/paddle_inference_lib
$ tree
.
|-- paddle
| `-- fluid
| |-- framework
| |-- inference
| | |-- io.h
| | `-- libpaddle_fluid.so
| |-- memory
| |-- platform
| `-- string
|-- third_party
| |-- eigen3
| `-- install
| |-- gflags
| |-- glog
| `-- protobuf
`-- ...
```
假设`PADDLE_ROOT=your/path/to/paddle_inference_lib`
## 链接Fluid Inference库
- 示例项目([链接](https://github.com/luotao1/fluid_inference_example.git))
- GCC配置
```bash
$ g++ -o a.out -std=c++11 main.cc \
-I${PADDLE_ROOT}/ \
-I${PADDLE_ROOT}/third_party/install/gflags/include \
-I${PADDLE_ROOT}/third_party/install/glog/include \
-I${PADDLE_ROOT}/third_party/install/protobuf/include \
-I${PADDLE_ROOT}/third_party/eigen3 \
-L${PADDLE_ROOT}/paddle/fluid/inference -lpaddle_fluid \
-lrt -ldl -lpthread
```
- CMake配置
```cmake
include_directories(${PADDLE_ROOT}/)
include_directories(${PADDLE_ROOT}/third_party/install/gflags/include)
include_directories(${PADDLE_ROOT}/third_party/install/glog/include)
include_directories(${PADDLE_ROOT}/third_party/install/protobuf/include)
include_directories(${PADDLE_ROOT}/third_party/eigen3)
target_link_libraries(${TARGET_NAME}
${PADDLE_ROOT}/paddle/fluid/inference/libpaddle_fluid.so
-lrt -ldl -lpthread)
```
- 设置环境变量:
`export LD_LIBRARY_PATH=${PADDLE_ROOT}/paddle/fluid/inference:$LD_LIBRARY_PATH`
## C++ Inference API
- 推断流程([链接](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/tests/test_helper.h#L91))
- 1、 初始化设备
```cpp
#include "paddle/fluid/framework/init.h"
paddle::framework::InitDevices(false);
```
- 2、 定义place,executor,scope
```cpp
auto place = paddle::platform::CPUPlace();
auto executor = paddle::framework::Executor(place);
auto* scope = new paddle::framework::Scope();
```
- 3、 加载模型
```cpp
#include "paddle/fluid/inference/io.h"
auto inference_program = paddle::inference::Load(executor, *scope, dirname);
// or
auto inference_program = paddle::inference::Load(executor,
*scope,
dirname + "/" + model_filename,
dirname + "/" + params_filename);
```
- 4、 获取`feed_target_names``fetch_target_names`
```cpp
const std::vector<std::string>& feed_target_names = inference_program->GetFeedTargetNames();
const std::vector<std::string>& fetch_target_names = inference_program->GetFetchTargetNames();
```
- 5、 准备`feed`数据
```cpp
#include "paddle/fluid/framework/lod_tensor.h"
std::vector<paddle::framework::LoDTensor*> cpu_feeds;
...
std::map<std::string, const paddle::framework::LoDTensor*> feed_targets;
for (size_t i = 0; i < feed_target_names.size(); ++i) {
// Please make sure that cpu_feeds[i] is right for feed_target_names[i]
feed_targets[feed_target_names[i]] = cpu_feeds[i];
}
```
- 6、 定义`Tensor``fetch`结果
```cpp
std::vector<paddle::framework::LoDTensor*> cpu_fetchs;
std::map<std::string, paddle::framework::LoDTensor*> fetch_targets;
for (size_t i = 0; i < fetch_target_names.size(); ++i) {
fetch_targets[fetch_target_names[i]] = cpu_fetchs[i];
}
```
- 7、 执行`inference_program`
```cpp
executor.Run(*inference_program, scope, feed_targets, fetch_targets);
```
- 8、 使用`fetch`数据
```cpp
for (size_t i = 0; i < cpu_fetchs.size(); ++i) {
std::cout << "lod_i: " << cpu_fetchs[i]->lod();
std::cout << "dims_i: " << cpu_fetchs[i]->dims();
std::cout << "result:";
float* output_ptr = cpu_fetchs[i]->data<float>();
for (int j = 0; j < cpu_fetchs[i]->numel(); ++j) {
std::cout << " " << output_ptr[j];
}
std::cout << std::endl;
}
```
针对不同的数据,4. - 8.可执行多次。
- 9、 释放内存
```cpp
delete scope;
```
- 接口说明
```cpp
void Run(const ProgramDesc& program, Scope* scope,
std::map<std::string, const LoDTensor*>& feed_targets,
std::map<std::string, LoDTensor*>& fetch_targets,
bool create_vars = true,
const std::string& feed_holder_name = "feed",
const std::string& fetch_holder_name = "fetch");
```
- 使用Python API `save_inference_model`保存的`program`里面包含了`feed_op``fetch_op`,用户提供的`feed_targets``fetch_targets`必须和`inference_program`中的`feed_op``fetch_op`保持一致。
- 用户提供的`feed_holder_name``fetch_holder_name`也必须和`inference_program``feed_op``fetch_op`保持一致,可使用`SetFeedHolderName``SetFetchHolderName`接口重新设置`inferece_program`
- 默认情况下,除了`persistable`属性设置为`True``Variable`之外,每次执行`executor.Run`会创建一个局部`Scope`,并且在这个局部`Scope`中创建和销毁所有的`Variable`,以最小化空闲时的内存占用。
- `persistable`属性为`True``Variable`有:
- Operators的参数`w``b`
- `feed_op`的输入变量
- `fetch_op`的输出变量
- **不在每次执行时创建和销毁变量
([PR](https://github.com/PaddlePaddle/Paddle/pull/9301))**
- 执行`inference_program`
```cpp
// Call once
executor.CreateVariables(*inference_program, scope, 0);
// Call as many times as you like
executor.Run(
*inference_program, scope, feed_targets, fetch_targets, false);
```
- **优点**
- 节省了频繁创建、销毁变量的时间(约占每次`Run`总时间的1% ~ 12%)
- 执行结束后可获取所有Operators的计算结果
- **缺点**
- 空闲时也会占用大量的内存
- 在同一个`Scope`中,相同的变量名是公用同一块内存的,容易引起意想不到的错误
- **不在每次执行时创建Op([PR](https://github.com/PaddlePaddle/Paddle/pull/9630))**
- 执行`inference_program`
```cpp
// Call once
auto ctx = executor.Prepare(*inference_program, 0);
// Call as many times as you like if you have no need to change the inference_program
executor.RunPreparedContext(ctx.get(), scope, feed_targets, fetch_targets);
```
- **优点**
- 节省了频繁创建、销毁Op的时间
- **缺点**
- 一旦修改了`inference_program`,则需要重新创建`ctx`
- **多线程共享Parameters([链接](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/tests/test_multi_thread_helper.h))**
- 主线程
- 1、 初始化设备
- 2、 定义`place``executor``scope`
- 3、 加载模型,得到`inference_program`
- 从线程
- **复制`inference_program`得到`copy_program`,修改`copy_program`的`feed_holder_name`和`fetch_holder_name`**
```cpp
auto copy_program = std::unique_ptr<paddle::framework::ProgramDesc>(
new paddle::framework::ProgramDesc(*inference_program));
std::string feed_holder_name = "feed_" + paddle::string::to_string(thread_id);
std::string fetch_holder_name = "fetch_" + paddle::string::to_string(thread_id);
copy_program->SetFeedHolderName(feed_holder_name);
copy_program->SetFetchHolderName(fetch_holder_name);
```
- 4、 获取`copy_program``feed_target_names``fetch_target_names`
- 5、 准备feed数据,定义Tensor来fetch结果
- 6、 执行`copy_program`
```cpp
executor->Run(*copy_program, scope, feed_targets, fetch_targets, true, feed_holder_name, fetch_holder_name);
```
- 7、 使用fetch数据
- 主线程
- 8、 释放资源
- 基本概念
- 数据相关:
- [Tensor](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/concepts/tensor.md),一个N维数组,数据可以是任意类型(int,float,double等)
- [LoDTensor](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/concepts/lod_tensor.md),带LoD(Level-of-Detail)即序列信息的Tensor
- [Scope](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/scope.md),记录了变量Variable
- 执行相关:
- [Executor](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/concepts/executor.md),无状态执行器,只跟设备相关
- Place
- CPUPlace,CPU设备
- CUDAPlace,CUDA GPU设备
- 神经网络表示:
- [Program](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/concepts/program.md).
详细介绍请参考[**Paddle Fluid开发者指南**](https://github.com/lcy-seso/learning_notes/blob/master/Fluid/developer's_guid_for_Fluid/Developer's_Guide_to_Paddle_Fluid.md)
## Inference实例
1. fit a line: [Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_fit_a_line.py), [C++](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/tests/book/test_inference_fit_a_line.cc)
1. image classification: [Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_image_classification.py), [C++](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/tests/book/test_inference_image_classification.cc)
1. label semantic roles: [Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_label_semantic_roles.py), [C++](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/tests/book/test_inference_label_semantic_roles.cc)
1. recognize digits: [Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_recognize_digits.py), [C++](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/tests/book/test_inference_recognize_digits.cc)
1. recommender system: [Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_recommender_system.py), [C++](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/tests/book/test_inference_recommender_system.cc)
1. understand sentiment: [Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_understand_sentiment.py), [C++](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/tests/book/test_inference_understand_sentiment.cc)
1. word2vec: [Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_word2vec.py), [C++](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/tests/book/test_inference_word2vec.cc)
## Inference计算优化
- 使用Python推理优化工具([inference_transpiler](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/inference_transpiler.py))
```python
class InferenceTranspiler:
def transpile(self, program, place, scope=None):
...
if scope is None:
scope = global_scope()
...
```
- 使用`InferenceTranspiler`将会直接修改`program`
- 使用`InferenceTranspiler`会修改参数的值,请确保`program`的参数在`scope`内。
- 支持的优化
- 融合batch_norm op的计算
- 使用示例([链接](https://github.com/Xreki/Xreki.github.io/blob/master/fluid/inference/inference_transpiler.py))
```python
import paddle.fluid as fluid
# NOTE: Applying the inference transpiler will change the inference_program.
t = fluid.InferenceTranspiler()
t.transpile(inference_program, place, inference_scope)
```
## 内存使用优化
- 使用Python内存优化工具([memory_optimization_transipiler](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/memory_optimization_transpiler.py))
```python
fluid.memory_optimize(inference_program)
```
cc_library(analysis SRCS dot.cc node.cc node.h)
set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor init)
cc_library(analysis SRCS dot.cc node.cc data_flow_graph.cc graph_traits.cc subgraph_splitter.cc fluid_to_data_flow_graph_pass.cc
DEPS paddle_fluid)
cc_test(test_node SRCS node_tester.cc DEPS analysis)
cc_test(test_dot SRCS dot_tester.cc DEPS analysis)
set(PYTHON_TESTS_DIR ${PADDLE_BINARY_DIR}/python/paddle/fluid/tests)
cc_test(test_data_flow_graph SRCS data_flow_graph_tester.cc DEPS analysis ${FLUID_CORE_MODULES} paddle_fluid
ARGS --inference_model_dir=${PYTHON_TESTS_DIR}/book/word2vec.inference.model)
set_tests_properties(test_data_flow_graph PROPERTIES DEPENDS test_word2vec)
cc_test(test_subgraph_splitter
SRCS subgraph_splitter_tester.cc
DEPS analysis paddle_fluid tensor
ARGS --inference_model_dir=${PYTHON_TESTS_DIR}/book/word2vec.inference.model)
set_tests_properties(test_subgraph_splitter PROPERTIES DEPENDS test_word2vec)
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
#include "paddle/fluid/inference/analysis/dot.h"
namespace paddle {
namespace inference {
namespace analysis {
// It is a better idea that the inputs and outputs of this graph is set manully
// before, but there must be a Pass that helps to prune the unnecessary ops that
// do not contribute to the given targets, so in this pass, analysis and get the
// inputs and outputs is OK.
void DataFlowGraph::Build() {
inputs.clear();
outputs.clear();
std::unordered_set<Node *> ins;
std::unordered_set<Node *> outs;
for (auto &node : nodes.nodes()) {
for (auto *in : node->inlinks) {
ins.insert(in);
}
for (auto *out : node->outlinks) {
outs.insert(out);
}
}
// The nodes that in ins but not in outs is the graph's inputs
// similarly, the nodes that in outs but not in ins is the graphs' outputs
for (auto *in : ins) {
if (!outs.count(in)) {
inputs.push_back(in);
}
}
for (auto *out : outs) {
if (!outs.count(out)) {
outputs.push_back(out);
}
}
}
std::string DataFlowGraph::DotString() const {
Dot dot;
// Add nodes
for (size_t i = 0; i < nodes.size(); i++) {
const Node &node = nodes.Get(i);
switch (node.type()) {
case Node::Type::kValue:
dot.AddNode(node.repr(), node.dot_attrs());
break;
case Node::Type::kFunction:
dot.AddNode(node.repr(), node.dot_attrs());
break;
case Node::Type::kFunctionBlock:
dot.AddNode(node.repr(), node.dot_attrs());
break;
default:
PADDLE_THROW("unsupported Node type %d", static_cast<int>(node.type()));
}
}
// Add edges
for (size_t i = 0; i < nodes.size(); i++) {
const Node &node = nodes.Get(i);
for (auto &in : node.inlinks) {
dot.AddEdge(in->repr(), node.repr(), {});
}
}
return dot.Build();
}
//
// NodesBFSIterator
//
GraphTraits<DataFlowGraph>::NodesBFSIterator::NodesBFSIterator(
const std::vector<Node *> &source)
: queue_(source.begin(), source.end()) {}
// GraphTraits<DataFlowGraph>::NodesBFSIterator::NodesBFSIterator(
// GraphTraits<DataFlowGraph>::NodesBFSIterator &&other) noexcept
// : queue_(std::move(other.queue_)),
// visited_(std::move(other.visited_)) {}
GraphTraits<DataFlowGraph>::NodesBFSIterator::NodesBFSIterator(
const GraphTraits<DataFlowGraph>::NodesBFSIterator &other)
: queue_(other.queue_), visited_(other.visited_) {}
Node &GraphTraits<DataFlowGraph>::NodesBFSIterator::operator*() {
PADDLE_ENFORCE(!queue_.empty());
return *queue_.front();
}
Node *GraphTraits<DataFlowGraph>::NodesBFSIterator::operator->() {
PADDLE_ENFORCE(!queue_.empty());
return queue_.front();
}
GraphTraits<DataFlowGraph>::NodesBFSIterator &
GraphTraits<DataFlowGraph>::NodesBFSIterator::operator=(
const GraphTraits<DataFlowGraph>::NodesBFSIterator &other) {
queue_ = other.queue_;
visited_ = other.visited_;
return *this;
}
GraphTraits<DataFlowGraph>::NodesBFSIterator
&GraphTraits<DataFlowGraph>::NodesBFSIterator::operator++() {
PADDLE_ENFORCE(!queue_.empty());
auto *cur = queue_.front();
visited_.insert(cur);
queue_.pop_front();
for (auto *output : cur->outlinks) {
if (!visited_.count(output)) {
queue_.push_back(output);
visited_.insert(output);
}
}
return *this;
}
bool GraphTraits<DataFlowGraph>::NodesBFSIterator::operator==(
const GraphTraits<DataFlowGraph>::NodesBFSIterator &other) {
if (queue_.empty()) return other.queue_.empty();
if ((!queue_.empty()) && (!other.queue_.empty())) {
return queue_.front() == other.queue_.front() &&
visited_.size() == other.visited_.size(); // here need to check the
// equality of queue and
// visited. Just a light but week implementation.
}
return false;
}
//
// NodesDFSIterator
//
GraphTraits<DataFlowGraph>::NodesDFSIterator::NodesDFSIterator(
const std::vector<Node *> &source) {
for (auto *x : source) stack_.push(x);
}
// GraphTraits<DataFlowGraph>::NodesDFSIterator::NodesDFSIterator(
// GraphTraits<DataFlowGraph>::NodesDFSIterator &&other) noexcept
// : stack_(std::move(other.stack_)),
// visited_(std::move(other.visited_)) {}
GraphTraits<DataFlowGraph>::NodesDFSIterator::NodesDFSIterator(
const GraphTraits<DataFlowGraph>::NodesDFSIterator &other)
: stack_(other.stack_), visited_(other.visited_) {}
Node &GraphTraits<DataFlowGraph>::NodesDFSIterator::operator*() {
PADDLE_ENFORCE(!stack_.empty());
return *stack_.top();
}
GraphTraits<DataFlowGraph>::NodesDFSIterator
&GraphTraits<DataFlowGraph>::NodesDFSIterator::operator++() {
if (stack_.empty()) return *this;
visited_.insert(stack_.top());
auto *cur = stack_.top();
stack_.pop();
for (auto *x : cur->outlinks) {
if (!visited_.count(x)) {
stack_.push(x);
visited_.insert(x);
}
}
return *this;
}
bool GraphTraits<DataFlowGraph>::NodesDFSIterator::operator==(
const GraphTraits<DataFlowGraph>::NodesDFSIterator &other) {
if (stack_.empty()) return other.stack_.empty();
if ((!stack_.empty()) && (!other.stack_.empty())) {
return stack_.top() == other.stack_.top();
}
return false;
}
GraphTraits<DataFlowGraph>::NodesDFSIterator &
GraphTraits<DataFlowGraph>::NodesDFSIterator::operator=(
const GraphTraits<DataFlowGraph>::NodesDFSIterator &other) {
stack_ = other.stack_;
visited_ = other.visited_;
return *this;
}
Node *GraphTraits<DataFlowGraph>::NodesDFSIterator::operator->() {
return stack_.top();
}
} // namespace analysis
} // namespace inference
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
/*
* Data flow graph is an pass that build the basic graph. It contains a graph
* and the iterators that enable the iteration over the graph.
*/
#pragma once
#include <deque>
#include <stack>
#include <unordered_set>
#include "paddle/fluid/inference/analysis/graph_traits.h"
#include "paddle/fluid/inference/analysis/node.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace inference {
namespace analysis {
/*
* DataFlowGraph - A container of Value and Function Nodes.
*/
struct DataFlowGraph {
NodeMap nodes;
std::vector<Node *> inputs;
std::vector<Node *> outputs;
// Extract inputs and outputs of the graph.
void Build();
// Output a DOT graph file for debug.
std::string DotString() const;
};
/*
* An graph trait help to traverse the graph using BFS.
* The BFS start from a graph's inputs, the graph should be fully-connected, so
* that the iterator can reach the end.
*/
template <>
struct GraphTraits<DataFlowGraph> {
// BFS iterator on nodes.
struct NodesBFSIterator
: public std::iterator<std::forward_iterator_tag, Node *> {
NodesBFSIterator() = default;
explicit NodesBFSIterator(const std::vector<Node *> &source);
// NodesBFSIterator(NodesBFSIterator &&other) noexcept;
// NOTE Heavy to use.
NodesBFSIterator(const NodesBFSIterator &other);
Node &operator*();
NodesBFSIterator &operator++();
Node *operator->();
// TODO(Superjomn) current implementation just compare the first
// element, need to compare the graph and all the elements in the queue and
// set.
NodesBFSIterator &operator=(const NodesBFSIterator &other);
bool operator==(const NodesBFSIterator &other);
bool operator!=(const NodesBFSIterator &other) { return !(*this == other); }
private:
std::deque<Node *> queue_;
std::unordered_set<Node *> visited_;
};
// DFS iterator on nodes.
struct NodesDFSIterator
: public std::iterator<std::forward_iterator_tag, Node *> {
NodesDFSIterator() = default;
explicit NodesDFSIterator(const std::vector<Node *> &source);
// NodesDFSIterator(NodesDFSIterator &&other) noexcept;
NodesDFSIterator(const NodesDFSIterator &other);
Node &operator*();
NodesDFSIterator &operator++();
// TODO(Superjomn) current implementation just compare the first
// element, need to compare the graph and all the elements in the queue and
// set.
NodesDFSIterator &operator=(const NodesDFSIterator &other);
bool operator==(const NodesDFSIterator &other);
bool operator!=(const NodesDFSIterator &other) { return !(*this == other); }
Node *operator->();
private:
std::stack<Node *> stack_;
std::unordered_set<Node *> visited_;
};
explicit GraphTraits(DataFlowGraph *graph) : graph_(graph) {}
// default use BFS to visit the nodes.
iterator_range<NodesBFSIterator> nodes() {
return iterator_range<NodesBFSIterator>(nodes_bfs_begin(), nodes_bfs_end());
}
iterator_range<NodesBFSIterator> nodes_in_BFS() {
return iterator_range<NodesBFSIterator>(nodes_bfs_begin(), nodes_bfs_end());
}
iterator_range<NodesDFSIterator> nodes_in_DFS() {
return iterator_range<NodesDFSIterator>(nodes_dfs_begin(), nodes_dfs_end());
}
private:
NodesBFSIterator nodes_bfs_begin() {
return NodesBFSIterator(graph_->inputs);
}
NodesBFSIterator nodes_bfs_end() { return NodesBFSIterator(); }
NodesDFSIterator nodes_dfs_begin() {
return NodesDFSIterator(graph_->inputs);
}
NodesDFSIterator nodes_dfs_end() { return NodesDFSIterator(); }
private:
DataFlowGraph *graph_;
};
// Extract the inputs and outputs of a graph. The inputs and outputs of a
// sub-graph is the inputs nodes and output nodes that doesn't inside the
// sub-graph.
std::pair<
std::vector<Node *>,
std::vector<
Node *>> static ExtractInputAndOutputOfSubGraph(std::vector<Node *>
&graph) {
std::unordered_set<Node *> nodes(graph.begin(), graph.end());
std::unordered_set<Node *> inputs;
std::unordered_set<Node *> outputs;
for (auto &node : graph) {
for (auto *in : node->inlinks) {
if (!nodes.count(in) && in->type() == Node::Type::kValue) {
inputs.insert(in);
}
}
for (auto *out : node->outlinks) {
if (!nodes.count(out) && out->type() == Node::Type::kValue) {
outputs.insert(out);
}
}
}
return std::make_pair(std::vector<Node *>(inputs.begin(), inputs.end()),
std::vector<Node *>(outputs.begin(), outputs.end()));
}
} // namespace analysis
} // namespace inference
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
namespace paddle {
namespace inference {
namespace analysis {
TEST(DataFlowGraph, BFS) {
auto desc = LoadProgramDesc();
auto dfg = ProgramDescToDFG(desc);
dfg.Build();
for (auto* in : dfg.inputs) {
LOG(INFO) << "inputs: " << in->name() << " "
<< static_cast<int>(in->type());
}
for (auto* out : dfg.outputs) {
LOG(INFO) << "outputs: " << out->name() << " "
<< static_cast<int>(out->type());
}
GraphTraits<DataFlowGraph> trait(&dfg);
auto nodes = trait.nodes();
int count = 0;
for (auto it = nodes.begin(); it != nodes.end(); ++it) {
LOG(INFO) << "visiting " << it->name();
++count;
}
ASSERT_EQ(count, dfg.nodes.size());
}
TEST(DataFlowGraph, DFS) {
auto desc = LoadProgramDesc();
auto dfg = ProgramDescToDFG(desc);
dfg.Build();
GraphTraits<DataFlowGraph> trait(&dfg);
auto nodes = trait.nodes_in_DFS();
int count = 0;
for (auto it = nodes.begin(); it != nodes.end(); ++it) {
LOG(INFO) << "visiting " << it->name();
++count;
}
ASSERT_EQ(count, dfg.nodes.size());
}
} // namespace analysis
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h"
#include <glog/logging.h>
#include <google/protobuf/text_format.h>
#include <gtest/gtest.h>
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/io.h"
namespace paddle {
namespace inference {
namespace analysis {
TEST_F(DFG_Tester, Test) {
framework::proto::ProgramDesc new_desc;
DataFlowGraph graph;
FluidToDataFlowGraphPass pass0;
DataFlowGraphToFluidPass pass1;
pass0.Initialize(desc);
pass1.Initialize(&new_desc);
pass0.Run(&graph);
pass1.Run(&graph);
pass0.Finalize();
pass1.Finalize();
LOG(INFO) << graph.nodes.size();
}
} // analysis
} // inference
} // paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
#include <vector>
namespace paddle {
namespace inference {
namespace analysis {
FluidToDataFlowGraphPass::FluidToDataFlowGraphPass() {}
bool FluidToDataFlowGraphPass::Initialize() { return Pass::Initialize(); }
bool FluidToDataFlowGraphPass::Initialize(
const framework::proto::ProgramDesc &desc) {
desc_ = &desc;
return true;
}
bool FluidToDataFlowGraphPass::Finalize() { return Pass::Finalize(); }
void FluidToDataFlowGraphPass::Run(DataFlowGraph *graph) {
// insert vars
std::unordered_map<std::string, size_t> var2id;
auto &main_block = desc_->blocks(framework::kRootBlockIndex);
for (int i = 0; i < main_block.vars_size(); i++) {
const auto &var = main_block.vars(i);
auto *v = graph->nodes.Create(Node::Type::kValue);
v->SetName(var.name());
v->SetExtraInfo(const_cast<void *>(static_cast<const void *>(&var)));
var2id[var.name()] = v->id();
}
for (int i = 0; i < main_block.ops_size(); i++) {
const auto &op = main_block.ops(i);
auto *o = graph->nodes.Create(Node::Type::kFunction);
o->SetName(op.type());
static_cast<Function *>(o)->SetFuncType(op.type());
// Link to the original protobuf message's memory, make it easier to
// generate from a data flow graph to fluid ProgramDesc.
o->SetExtraInfo(const_cast<void *>(static_cast<const void *>(&op)));
// set inputs and outputs
// TODO(Superjomn) make sure the InputNames is the real variable name.
for (int j = 0; j < op.inputs_size(); j++) {
auto &in_var = op.inputs(j);
for (int k = 0; k < in_var.arguments_size(); k++) {
auto *in = graph->nodes.GetMutable(var2id.at(in_var.arguments(k)));
in->outlinks.push_back(o);
o->inlinks.push_back(in);
}
}
for (int j = 0; j < op.outputs_size(); j++) {
auto &out_var = op.outputs(j);
for (int k = 0; k < out_var.arguments_size(); k++) {
auto *out = graph->nodes.GetMutable(var2id[out_var.arguments(k)]);
out->inlinks.push_back(o);
o->outlinks.push_back(out);
}
}
}
// Analysis and extract the inputs and outputs of this graph.
graph->Build();
}
Pass *FluidToDataFlowGraphPass::CreatePrinterPass(
std::ostream &os, const std::string &banner) const {
return nullptr;
}
} // namespace analysis
} // namespace inference
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
/*
* This file implements the transformation from data flow graph to fluid
* ProgramDesc.
*/
#pragma once
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
#include "paddle/fluid/inference/analysis/pass.h"
namespace paddle {
namespace inference {
namespace analysis {
/*
* Transform a FluidDesc to a data flow graph.
*/
class FluidToDataFlowGraphPass final : public DataFlowGraphPass {
public:
FluidToDataFlowGraphPass();
bool Initialize() override;
bool Initialize(const framework::proto::ProgramDesc &desc) override;
bool Finalize() override;
void Run(DataFlowGraph *graph) override;
Pass *CreatePrinterPass(std::ostream &os,
const std::string &banner) const override;
private:
framework::proto::ProgramDesc const *desc_;
};
} // namespace analysis
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
#include <gtest/gtest.h>
#include "paddle/fluid/inference/analysis/ut_helper.h"
namespace paddle {
namespace inference {
namespace analysis {
TEST_F(DFG_Tester, Init) {
FluidToDataFlowGraphPass pass;
pass.Initialize();
pass.Initialize(desc);
DataFlowGraph graph;
pass.Run(&graph);
ASSERT_GT(graph.nodes.size(), 0);
pass.Finalize();
LOG(INFO) << '\n' << graph.DotString();
}
} // analysis
} // inference
} // paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/graph_traits.h"
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
/*
* This file defines the GraphTraits<X> template class that should be specified
* by classes that want to be iteratable by generic graph iterators.
*
* This file also defines the marker class Inverse that is used to iterate over
* graphs in a graph defined, inverse ordering...
*/
#pragma once
#include "paddle/fluid/inference/analysis/helper.h"
namespace paddle {
namespace inference {
namespace analysis {
/*
* This class should be specialized by different graph types...
* That's why the base class is empty.
*/
template <typename GraphType>
struct GraphTraits {
// using NodesBFSIterator = xxx
// NodesBFSIterator nodes_begin();
// NodesBFSIterator nodes_end();
};
/*
* Inverse - This class is used as a marker class to tell the graph iterator to
* iterate in a graph defined Inverse order.
*/
template <typename GraphType>
struct Inverse {
const GraphType &graph;
explicit Inverse(const GraphType &graph) : graph(graph) {}
};
/*
* Provide a partial specialization of GraphTraits so that the inverse of an
* inverse turns into the original graph.
*/
template <typename GraphType>
struct GraphTraits<Inverse<Inverse<GraphType>>> : GraphTraits<GraphType> {};
} // namespace analysis
} // namespace inference
} // namespace paddle
......@@ -24,6 +24,39 @@ namespace paddle {
namespace inference {
namespace analysis {
#define SET_TYPE(type__) dic_[typeid(type__).hash_code()] = #type__;
/*
* Map typeid to representation.
*/
struct DataTypeNamer {
static const DataTypeNamer &Global() {
static auto *x = new DataTypeNamer();
return *x;
}
template <typename T>
const std::string &repr() const {
auto x = typeid(T).hash_code();
PADDLE_ENFORCE(dic_.count(x), "unknown type for representation");
return dic_.at(x);
}
const std::string &repr(size_t &hash) const {
PADDLE_ENFORCE(dic_.count(hash), "unknown type for representation");
return dic_.at(hash);
}
private:
DataTypeNamer() {
SET_TYPE(int);
SET_TYPE(bool);
SET_TYPE(float);
}
std::unordered_map<decltype(typeid(int).hash_code()), std::string> dic_;
};
#undef SET_TYPE
template <typename IteratorT>
class iterator_range {
IteratorT begin_, end_;
......
......@@ -117,7 +117,10 @@ class Node {
type_hash_ = typeid(T).hash_code();
data_.resize(sizeof(T));
}
PADDLE_ENFORCE(type_hash_ == typeid(T).hash_code(), "type not matched");
PADDLE_ENFORCE(type_hash_ == typeid(T).hash_code(),
"type not matched, origin is %s, want %s",
DataTypeNamer::Global().repr(type_hash_),
DataTypeNamer::Global().repr<T>());
PADDLE_ENFORCE_EQ(data_.size(), sizeof(T), "Node attr type recast error");
return *reinterpret_cast<T *>(&data_[0]);
}
......@@ -127,6 +130,10 @@ class Node {
size_t type_hash_{std::numeric_limits<size_t>::max()};
};
bool IsFunction() const { return type_ == Node::Type::kFunction; }
bool IsValue() const { return type_ == Node::Type::kValue; }
bool IsFunctionBlock() const { return type_ == Node::Type::kFunctionBlock; }
virtual ~Node() {}
friend class NodeMap;
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/pass.h"
\ No newline at end of file
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <glog/logging.h>
#include <iosfwd>
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/analysis/node.h"
namespace paddle {
namespace inference {
namespace analysis {
class Pass {
public:
Pass() = default;
virtual ~Pass() {}
// Virtual method overridden by subclasses to do only necessary initialization
// before any pass is run.
virtual bool Initialize() { return false; }
// There is some passes such as FlowToDataFlowGraphPass that needs a
// ProgramDesc. Here use the native ProgramDesc ProtoBuf message, so that it
// only couple with the proto file.
virtual bool Initialize(const framework::proto::ProgramDesc &desc) {
return false;
}
// There are some Passes such as DataFlowGraphToFluidPass that will output a
// ProgramDesc.
virtual bool Initialize(framework::proto::ProgramDesc *desc) { return false; }
// Virtual method overriden by subclasses to do any necessary clean up after
// all passes have run.
virtual bool Finalize() { return false; }
// Get a Pass appropriate to print the Node this pass operates on.
virtual Pass *CreatePrinterPass(std::ostream &os,
const std::string &banner) const = 0;
// Run on a single Node.
virtual void Run(Node *x) { LOG(FATAL) << "not valid"; }
// Run on a single Function.
virtual void Run(Function *x) { LOG(FATAL) << "not valid"; }
// Run on a single FunctionBlock.
virtual void Run(FunctionBlock *x) { LOG(FATAL) << "not valid"; }
// Run on a single DataFlowGraph.
virtual void Run(DataFlowGraph *x) { LOG(FATAL) << "not valid"; }
};
// NodePass process on any Node types.
class NodePass : public Pass {
public:
virtual void Run(Node *node) = 0;
};
// NodePass process on any Function node types.
class FunctionPass : public Pass {
public:
virtual void Run(Function *node) = 0;
};
// NodePass process on any FunctionBlock node types.
class FunctionBlockPass : public Pass {
public:
virtual void Run(FunctionBlock *node) = 0;
};
// GraphPass processes on any GraphType.
class DataFlowGraphPass : public Pass {
public:
virtual void Run(DataFlowGraph *graph) = 0;
};
} // namespace analysis
} // namespace inference
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/subgraph_splitter.h"
namespace paddle {
namespace inference {
namespace analysis {
const char *SubGraphSplitter::kMarkerAttrName =
"_sub_graph_splitter_inside_sub_graph";
std::vector<std::vector<Node *>> SubGraphSplitter::operator()() {
MarkNodesInsideSubGraph();
return ExtractSubGraphs();
}
// Mark the output variables inside a subgraph with the func.
inline void MarkOutLinksInSubGraph(const Function *func) {
for (auto *var : func->outlinks) {
var->attr(SubGraphSplitter::kMarkerAttrName).Bool() = true;
}
}
void SubGraphSplitter::MarkNodesInsideSubGraph() {
for (auto &node : GraphTraits<DataFlowGraph>(graph_).nodes()) {
if (node_inside_subgraph_teller_(&node)) {
node.attr(kMarkerAttrName).Bool() = true;
if (node.type() == Node::Type::kFunction) {
// If a function is inside the sub-graph, mark all the output variables
// to be inside too, so that two marked functions will be inside a same
// sub-graph, lets take a example: A_function->var->B_function, if
// A_function is marked, var should also be marked, so that B_function
// will be in the same sub-graph with A_function if B_function is
// marked.
MarkOutLinksInSubGraph(static_cast<const Function *>(&node));
}
}
}
}
const char *kUnionFindParent = "_sub_graph_splitter_union_find_parent_";
// Use the Union Find(UF) algorithm to find fully connected sub-graphs, if node
// a's output is node b, that is a and b is in the same sub-graph. The UF
// algorithm will group them to the same cluster.
using node_map_t = std::unordered_map<int, Node *>;
// Find the ancestor id of a node.
int UnionFindGetAncestor(const node_map_t &node_map, size_t id) {
int tmp = id;
do {
tmp = node_map.at(tmp)->attr(kUnionFindParent).Int32();
} while (node_map.at(tmp)->attr(kUnionFindParent).Int32() != tmp);
return tmp;
}
// Make this two node share the same ancestor.
// TODO(Superjom) bad performance, make a balanced tree latter.
void UnionFindCombine(const node_map_t &node_map, size_t a, size_t b) {
int a_ancestor = UnionFindGetAncestor(node_map, a);
int b_ancestor = UnionFindGetAncestor(node_map, b);
node_map.at(b_ancestor)->attr(kUnionFindParent).Int32() = a_ancestor;
node_map.at(a)->attr(kUnionFindParent).Int32() = a_ancestor;
node_map.at(b)->attr(kUnionFindParent).Int32() = a_ancestor;
}
std::vector<std::vector<Node *>> SubGraphSplitter::ExtractSubGraphs() {
std::vector<Node *> marked_nodes;
for (auto &node : GraphTraits<DataFlowGraph>(graph_).nodes()) {
if (node.attr(kMarkerAttrName).Bool()) {
marked_nodes.push_back(&node);
}
}
// extract sub-graphs in the marked node set, use Union Find algorithm.
node_map_t node_map; // id to ptr
for (auto *n : marked_nodes) {
// n's parent == n.id means it is the ancestor
n->attr(kUnionFindParent).Int32() = n->id();
node_map[n->id()] = n;
}
std::unordered_set<Node *> visited;
for (auto *n : marked_nodes) {
for (auto *out : n->outlinks) {
if (node_map.count(out->id())) {
UnionFindCombine(node_map, n->id(), out->id());
}
}
}
std::unordered_map<int /*ancestor*/, std::vector<Node *>> clusters;
for (auto *n : marked_nodes) {
if (n->type() == Node::Type::kFunction) {
clusters[UnionFindGetAncestor(node_map,
n->attr(kUnionFindParent).Int32())]
.push_back(n);
}
}
std::vector<std::vector<Node *>> result;
std::for_each(clusters.begin(), clusters.end(),
[&](const decltype(clusters)::value_type &it) {
result.push_back(it.second);
});
return result;
}
void SubGraphFuse::operator()() { ReplaceNodesWithSubGraphs(); }
void SubGraphFuse::ReplaceNodesWithSubGraphs() {
auto subgraphs = SubGraphSplitter(graph_, node_inside_subgraph_teller_)();
for (auto &subgraph : subgraphs) {
// replace this sub-graph with the first node. Two steps: 1. Create a Block
// Node that contains this subgraph 2. Mark the nodes inside the sub-graph
// as deleted. 3. Replace the deleted node with the new Block Node.
auto *block_node = graph_->nodes.Create(Node::Type::kFunctionBlock);
auto io = ExtractInputAndOutputOfSubGraph(subgraph);
block_node->inlinks = std::move(io.first);
block_node->outlinks = std::move(io.second);
for (auto *node : subgraph) {
// TODO(Superjomn) need a unified mechanism to treat deleted node in each
// pass.
node->SetDeleted();
}
std::unordered_map<Node *, Node *>
delelte_node_map; // deleted node to BlockNode
for (auto *n : block_node->inlinks) {
n->inlinks.clear();
}
for (auto *n : block_node->outlinks) {
n->outlinks.clear();
}
for (auto *n : block_node->inlinks) {
n->outlinks.push_back(block_node);
}
for (auto *n : block_node->outlinks) {
n->inlinks.push_back(n);
}
}
}
} // namespace analysis
} // namespace inference
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
/*
* This file defines the the class to partition a graph.
*/
#pragma once
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
#include "paddle/fluid/inference/analysis/node.h"
namespace paddle {
namespace inference {
namespace analysis {
/*
* Detect the nodes in a sub-graph that meet some conditions. This class doesn't
* modify the graph.
*/
class SubGraphSplitter {
public:
static const char *kMarkerAttrName;
// Tell whether a node is inside a sub-graph.
using NodeInsideSubgraphTeller = std::function<bool(const Node *)>;
SubGraphSplitter(DataFlowGraph *graph, const NodeInsideSubgraphTeller &teller)
: graph_(graph), node_inside_subgraph_teller_(teller) {}
std::vector<std::vector<Node *>> operator()();
protected:
// Mark the nodes inside the accepted sub-graph using
// node_inside_subgraph_teller.
void MarkNodesInsideSubGraph();
// Merge the marked nodes into sub-graphs and return the sub-graphs.
std::vector<std::vector<Node *>> ExtractSubGraphs();
private:
DataFlowGraph *graph_;
NodeInsideSubgraphTeller node_inside_subgraph_teller_;
};
/*
* SubGraphFuse - Replace some nodes with the sub-graph node they are inside. To
* some extent, the TensorRT engine is just a fusion op for a model.
*/
class SubGraphFuse {
public:
using NodeInsideSubgraphTeller = SubGraphSplitter::NodeInsideSubgraphTeller;
SubGraphFuse(DataFlowGraph *graph, const NodeInsideSubgraphTeller &teller)
: graph_(graph), node_inside_subgraph_teller_(teller) {}
// The main method which run all the logic.
void operator()();
protected:
// Remove the nodes inside sub-graphs and replace with the SubGraphNode.
void ReplaceNodesWithSubGraphs();
private:
DataFlowGraph *graph_;
NodeInsideSubgraphTeller node_inside_subgraph_teller_;
};
} // namespace analysis
} // namespace inference
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/subgraph_splitter.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
namespace paddle {
namespace inference {
namespace analysis {
TEST_F(DFG_Tester, Split) {
auto desc = LoadProgramDesc();
auto dfg = ProgramDescToDFG(desc);
LOG(INFO) << "spliter\n" << dfg.DotString();
SubGraphSplitter::NodeInsideSubgraphTeller teller = [](const Node* node) {
if (node->type() != Node::Type::kFunction) return false;
const auto* func = static_cast<const Function*>(node);
if (func->func_type() == "elementwise_add" || func->func_type() == "relu" ||
func->func_type() == "conv2d" || func->func_type() == "mul" ||
func->func_type() == "sigmoid" || func->func_type() == "softmax") {
LOG(INFO) << "sub-graph marked " << node->repr();
return true;
}
return false;
};
ASSERT_GT(dfg.nodes.size(), 5UL);
auto subgraphs = SubGraphSplitter(&dfg, teller)();
// Check the number of the marked nodes.
int marked_nodes = 0;
for (auto& node : dfg.nodes.nodes()) {
if (node->IsFunction() &&
node->attr(SubGraphSplitter::kMarkerAttrName).Bool()) {
++marked_nodes;
}
}
EXPECT_EQ(marked_nodes, 6);
// For human debug.
for (auto& subgraph : subgraphs) {
LOG(INFO) << "subgraph size " << subgraph.size();
for (auto* node : subgraph) {
LOG(INFO) << "node " << node->repr();
}
}
ASSERT_EQ(subgraphs.size(), 1UL);
// The last sub-graph has 5 Functions.
ASSERT_EQ(subgraphs.back().size(), 6UL);
}
} // namespace analysis
} // namespace inference
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/io.h"
namespace paddle {
namespace inference {
namespace analysis {
DEFINE_string(inference_model_dir, "", "inference test model dir");
static framework::proto::ProgramDesc LoadProgramDesc(
const std::string& model_dir = FLAGS_inference_model_dir) {
// TODO(Superjomn) update latter.
auto place = paddle::platform::CPUPlace();
auto executor = paddle::framework::Executor(place);
auto* scope = new paddle::framework::Scope();
auto program = Load(&executor, scope, model_dir);
return *program->Proto();
}
static DataFlowGraph ProgramDescToDFG(
const framework::proto::ProgramDesc& desc) {
DataFlowGraph graph;
FluidToDataFlowGraphPass pass;
pass.Initialize(desc);
pass.Run(&graph);
pass.Finalize();
return graph;
}
class DFG_Tester : public ::testing::Test {
protected:
void SetUp() override { desc = LoadProgramDesc(FLAGS_inference_model_dir); }
framework::proto::ProgramDesc desc;
};
} // namespace analysis
} // namespace inference
} // namespace paddle
......@@ -58,12 +58,13 @@ void GetTensorPayload(framework::Variable* var,
if (platform::is_gpu_place(ctx.GetPlace())) {
#ifdef PADDLE_WITH_CUDA
PADDLE_ENFORCE(platform::is_gpu_place(tensor.place()));
platform::CPUPlace cpu;
platform::CUDAPinnedPlace cuda_pinned;
auto& gpu_dev_ctx = static_cast<const platform::CUDADeviceContext&>(ctx);
auto copy_size = tensor.numel() * framework::SizeOfType(tensor.type());
*payload = memory::Alloc(cpu, copy_size);
*payload = memory::Alloc(cuda_pinned, copy_size);
memory::Copy(cpu, *payload, boost::get<platform::CUDAPlace>(tensor.place()),
memory::Copy(cuda_pinned, *payload,
boost::get<platform::CUDAPlace>(tensor.place()),
reinterpret_cast<const void*>(tensor.data<void>()), copy_size,
gpu_dev_ctx.stream());
ctx.Wait();
......@@ -90,11 +91,11 @@ void GetSelectedRowsPayload(framework::Variable* var,
auto* tensor = slr->mutable_value();
if (platform::is_gpu_place(ctx.GetPlace())) {
#ifdef PADDLE_WITH_CUDA
platform::CPUPlace cpu;
platform::CUDAPinnedPlace cuda_pinned;
auto& gpu_dev_ctx = static_cast<const platform::CUDADeviceContext&>(ctx);
auto copy_size = tensor->numel() * framework::SizeOfType(tensor->type());
*payload = memory::Alloc(cpu, copy_size);
memory::Copy(cpu, *payload,
*payload = memory::Alloc(cuda_pinned, copy_size);
memory::Copy(cuda_pinned, *payload,
boost::get<platform::CUDAPlace>(tensor->place()),
reinterpret_cast<const void*>(tensor->data<void>()), copy_size,
gpu_dev_ctx.stream());
......@@ -145,8 +146,8 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var,
// GPU data is copied to CPU buffer when sending,
// free the buffer when possible.
destroy_callback = [](void* backing) {
platform::CPUPlace cpu;
memory::Free(cpu, backing);
platform::CUDAPinnedPlace cuda_pinned;
memory::Free(cuda_pinned, backing);
};
}
......
......@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <stdio.h> // for removing the port file
#include <fstream>
#include <ostream>
#include <thread> // NOLINT
......@@ -77,12 +78,14 @@ ListenAndServOp::ListenAndServOp(const std::string &type,
void ListenAndServOp::Stop() {
rpc_service_->Push(LISTEN_TERMINATE_MESSAGE);
server_thread_->join();
auto file_path = string::Sprintf("/tmp/paddle.%d.port", ::getpid());
remove(file_path.c_str());
}
void ListenAndServOp::SavePort(const std::string &file_path) const {
void ListenAndServOp::SavePort() const {
// NOTE: default write file to /tmp/paddle.selected_port
selected_port_ = rpc_service_->GetSelectedPort();
auto file_path = string::Sprintf("/tmp/paddle.%d.port", ::getpid());
std::ofstream port_file;
port_file.open(file_path);
port_file << selected_port_.load();
......@@ -331,7 +334,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
// Write to a file of server selected port for python use.
std::string file_path = string::Sprintf("/tmp/paddle.%d.selected_port",
static_cast<int>(::getpid()));
SavePort(file_path);
SavePort();
if (sync_mode) {
RunSyncLoop(&executor, program, &recv_scope, prefetch_block);
} else {
......
......@@ -48,8 +48,7 @@ class ListenAndServOp : public framework::OperatorBase {
void RunAsyncLoop(framework::Executor* executor,
framework::ProgramDesc* program) const;
void SavePort(
const std::string& file_path = "/tmp/paddle.selected_port") const;
void SavePort() const;
void WaitServerReady();
......
......@@ -14,6 +14,7 @@ limitations under the License. */
#include "paddle/fluid/operators/reduce_op.h"
#include <algorithm>
#include <string>
#include <vector>
......@@ -34,11 +35,14 @@ class ReduceOp : public framework::OperatorWithKernel {
auto x_dims = ctx->GetInputDim("X");
auto x_rank = x_dims.size();
PADDLE_ENFORCE_LE(x_rank, 6, "Tensors with rank at most 6 are supported.");
int dim = ctx->Attrs().Get<int>("dim");
if (dim < 0) dim = x_rank + dim;
auto dims = ctx->Attrs().Get<std::vector<int>>("dim");
for (size_t i = 0; i < dims.size(); ++i) {
if (dims[i] < 0) dims[i] = x_rank + dims[i];
PADDLE_ENFORCE_LT(
dim, x_rank,
dims[i], x_rank,
"The dim should be in the range [-rank(input), rank(input)).");
}
sort(dims.begin(), dims.end());
bool reduce_all = ctx->Attrs().Get<bool>("reduce_all");
bool keep_dim = ctx->Attrs().Get<bool>("keep_dim");
if (reduce_all) {
......@@ -49,14 +53,22 @@ class ReduceOp : public framework::OperatorWithKernel {
ctx->SetOutputDim("Out", {1});
} else {
auto dims_vector = vectorize(x_dims);
if (keep_dim || x_rank == 1) {
dims_vector[dim] = 1;
if (keep_dim) {
for (size_t i = 0; i < dims.size(); ++i) {
dims_vector[dims[i]] = 1;
}
} else {
dims_vector.erase(dims_vector.begin() + dim);
const int kDelFlag = -2;
for (size_t i = 0; i < dims.size(); ++i) {
dims_vector[dims[i]] = kDelFlag;
}
dims_vector.erase(
remove(dims_vector.begin(), dims_vector.end(), kDelFlag),
dims_vector.end());
}
auto out_dims = framework::make_ddim(dims_vector);
ctx->SetOutputDim("Out", out_dims);
if (dim != 0) {
if (dims[0] != 0) {
// Only pass LoD when not reducing on the first dim.
ctx->ShareLoD("X", /*->*/ "Out");
}
......@@ -75,11 +87,14 @@ class ReduceGradOp : public framework::OperatorWithKernel {
auto x_dims = ctx->GetInputDim("X");
auto x_rank = x_dims.size();
PADDLE_ENFORCE_LE(x_rank, 6, "Tensors with rank at most 6 are supported.");
int dim = ctx->Attrs().Get<int>("dim");
if (dim < 0) dim = x_rank + dim;
auto dims = ctx->Attrs().Get<std::vector<int>>("dim");
for (size_t i = 0; i < dims.size(); ++i) {
if (dims[i] < 0) dims[i] = x_rank + dims[i];
PADDLE_ENFORCE_LT(
dim, x_rank,
dims[i], x_rank,
"The dim should be in the range [-rank(input), rank(input)).");
}
sort(dims.begin(), dims.end());
auto x_grad_name = framework::GradVarName("X");
if (ctx->HasOutput(x_grad_name)) {
ctx->SetOutputDim(x_grad_name, x_dims);
......@@ -95,13 +110,13 @@ class ReduceOpMaker : public framework::OpProtoAndCheckerMaker {
"(Tensor) The input tensor. Tensors with rank at most 6 are "
"supported.");
AddOutput("Out", "(Tensor) The result tensor.");
AddAttr<int>(
AddAttr<std::vector<int>>(
"dim",
"(int, default 0) The dimension to reduce. "
"(list<int>, default {0}) The dimensions to reduce. "
"Must be in the range [-rank(input), rank(input)). "
"If `dim < 0`, the dim to reduce is `rank + dim`. "
"If `dim[i] < 0`, the dims[i] to reduce is `rank + dims[i]`. "
"Note that reducing on the first dim will make the LoD info lost.")
.SetDefault(0);
.SetDefault({0});
AddAttr<bool>("keep_dim",
"(bool, default false) "
"If true, retain the reduced dimension with length 1.")
......
......@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#include <vector>
#include "glog/logging.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
......@@ -109,6 +110,11 @@ struct ProdGradFunctor {
}
};
#define HANDLE_DIM(NDIM, RDIM) \
if (ndim == NDIM && rdim == RDIM) { \
ReduceCompute<NDIM, RDIM>(context); \
}
template <typename DeviceContext, typename T, typename Functor>
class ReduceKernel : public framework::OpKernel<T> {
public:
......@@ -127,32 +133,29 @@ class ReduceKernel : public framework::OpKernel<T> {
Functor functor;
functor(place, &x, &out, reduce_dim);
} else {
int rank = context.Input<Tensor>("X")->dims().size();
switch (rank) {
case 1:
ReduceCompute<1>(context);
break;
case 2:
ReduceCompute<2>(context);
break;
case 3:
ReduceCompute<3>(context);
break;
case 4:
ReduceCompute<4>(context);
break;
case 5:
ReduceCompute<5>(context);
break;
case 6:
ReduceCompute<6>(context);
break;
}
int ndim = context.Input<Tensor>("X")->dims().size();
int rdim = context.Attr<std::vector<int>>("dim").size();
HANDLE_DIM(6, 5);
HANDLE_DIM(6, 4);
HANDLE_DIM(6, 3);
HANDLE_DIM(6, 2);
HANDLE_DIM(6, 1);
HANDLE_DIM(5, 4);
HANDLE_DIM(5, 3);
HANDLE_DIM(5, 2);
HANDLE_DIM(5, 1);
HANDLE_DIM(4, 3);
HANDLE_DIM(4, 2);
HANDLE_DIM(4, 1);
HANDLE_DIM(3, 2);
HANDLE_DIM(3, 1);
HANDLE_DIM(2, 1);
HANDLE_DIM(1, 1);
}
}
private:
template <size_t D>
template <size_t D, size_t R_D>
void ReduceCompute(const framework::ExecutionContext& context) const {
auto* input = context.Input<Tensor>("X");
auto* output = context.Output<Tensor>("Out");
......@@ -160,18 +163,26 @@ class ReduceKernel : public framework::OpKernel<T> {
auto x = EigenTensor<T, D>::From(*input);
auto x_rank = static_cast<int>(x.dimensions().size());
int dim = static_cast<int>(context.Attr<int>("dim"));
if (dim < 0) dim = x_rank + dim;
auto reduce_dim = Eigen::array<int, 1>({{dim}});
auto dims = context.Attr<std::vector<int>>("dim");
auto reduce_dim = Eigen::array<int, R_D>();
for (size_t i = 0; i < dims.size(); ++i) {
if (dims[i] < 0) dims[i] = x_rank + dims[i];
reduce_dim[i] = dims[i];
}
// construct the squeezed output tensor
bool keep_dim = context.Attr<bool>("keep_dim");
DDim dims = output->dims();
auto dims_vector = vectorize(dims);
DDim out_dims = output->dims();
if (keep_dim && x_rank > 1) {
dims_vector.erase(dims_vector.begin() + dim);
dims = framework::make_ddim(dims_vector);
const int kDelFlag = -2;
auto dims_vector = vectorize(out_dims);
for (size_t i = 0; i < dims.size(); ++i) {
dims_vector[dims[i]] = kDelFlag;
}
dims_vector.erase(
remove(dims_vector.begin(), dims_vector.end(), kDelFlag),
dims_vector.end());
out_dims = framework::make_ddim(dims_vector);
}
auto& place =
*context.template device_context<DeviceContext>().eigen_device();
Functor functor;
......@@ -180,7 +191,7 @@ class ReduceKernel : public framework::OpKernel<T> {
auto out = EigenScalar<T>::From(*output);
functor(place, &x, &out, reduce_dim);
} else {
auto out = EigenTensor<T, (D - 1)>::From(*output, dims);
auto out = EigenTensor<T, (D - R_D)>::From(*output, out_dims);
functor(place, &x, &out, reduce_dim);
}
}
......@@ -245,21 +256,29 @@ class ReduceGradKernel : public framework::OpKernel<T> {
auto x = EigenTensor<T, D>::From(*input0);
auto x_grad = EigenTensor<T, D>::From(*output);
auto x_rank = static_cast<int>(x.dimensions().size());
int dim = static_cast<int>(context.Attr<int>("dim"));
if (dim < 0) dim = x_rank + dim;
DDim dims = input0->dims();
dims[dim] = 1;
auto x_reduce = EigenTensor<T, D>::From(*input1, dims);
auto x_reduce_grad = EigenTensor<T, D>::From(*input2, dims);
auto dims = context.Attr<std::vector<int>>("dim");
auto x_dims = input0->dims();
auto reduced_dims_v = vectorize(x_dims);
Eigen::array<int, D> broadcast_dim;
for (size_t i = 0; i < D; ++i) broadcast_dim[i] = 1;
broadcast_dim[dim] = input0->dims()[dim];
int broad_cats_times = 1;
for (size_t i = 0; i < dims.size(); ++i) {
if (dims[i] < 0) dims[i] = x_rank + dims[i];
reduced_dims_v[dims[i]] = 1;
broadcast_dim[dims[i]] = x_dims[dims[i]];
broad_cats_times *= x_dims[dims[i]];
}
auto reduced_dims = framework::make_ddim(reduced_dims_v);
auto x_reduce = EigenTensor<T, D>::From(*input1, reduced_dims);
auto x_reduce_grad = EigenTensor<T, D>::From(*input2, reduced_dims);
auto& place =
*context.template device_context<DeviceContext>().eigen_device();
Functor functor;
functor(place, &x, &x_reduce, &x_grad, &x_reduce_grad, broadcast_dim,
broadcast_dim[dim]);
broad_cats_times);
}
};
......
......@@ -433,7 +433,7 @@ EOF
EOF
if [[ ${WITH_GPU} == "ON" ]]; then
NCCL_DEPS="apt-get install -y libnccl2=2.1.2-1+cuda8.0 libnccl-dev=2.1.2-1+cuda8.0 &&"
NCCL_DEPS="apt-get install -y --allow-downgrades libnccl2=2.1.2-1+cuda${CUDA_MAJOR} libnccl-dev=2.1.2-1+cuda${CUDA_MAJOR} &&"
else
NCCL_DEPS=""
fi
......
......@@ -2082,11 +2082,11 @@ def reduce_sum(input, dim=None, keep_dim=False, name=None):
Args:
input (Variable): The input variable which is a Tensor or LoDTensor.
dim (int|None): The dimension along which the sum is performed. If
dim (list|int|None): The dimensions along which the sum is performed. If
:attr:`None`, sum all elements of :attr:`input` and return a
Tensor variable with a single element, otherwise must be in the
range :math:`[-rank(input), rank(input))`. If :math:`dim < 0`,
the dimension to reduce is :math:`rank + dim`.
range :math:`[-rank(input), rank(input))`. If :math:`dim[i] < 0`,
the dimension to reduce is :math:`rank + dim[i]`.
keep_dim (bool|False): Whether to reserve the reduced dimension in the
output Tensor. The result tensor will have one fewer dimension
than the :attr:`input` unless :attr:`keep_dim` is true.
......@@ -2107,15 +2107,25 @@ def reduce_sum(input, dim=None, keep_dim=False, name=None):
fluid.layers.reduce_sum(x, dim=0) # [0.3, 0.5, 1.1, 1.6]
fluid.layers.reduce_sum(x, dim=-1) # [1.9, 1.6]
fluid.layers.reduce_sum(x, dim=1, keep_dim=True) # [[1.9], [1.6]]
# x is a Tensor variable with shape [2, 2, 2] and elements as below:
# [[[1, 2], [3, 4]],
# [[5, 6], [7, 8]]]
# Each example is followed by the correspending output tensor.
fluid.layers.reduce_sum(x, dim=[1, 2]) # [10, 26]
fluid.layers.reduce_sum(x, dim=[0, 1]) # [16, 20]
"""
helper = LayerHelper('reduce_sum', **locals())
out = helper.create_tmp_variable(dtype=helper.input_dtype())
if dim is not None and not isinstance(dim, list):
dim = [dim]
helper.append_op(
type='reduce_sum',
inputs={'X': input},
outputs={'Out': out},
attrs={
'dim': dim if dim != None else 0,
'dim': dim if dim != None else [0],
'keep_dim': keep_dim,
'reduce_all': True if dim == None else False
})
......@@ -2128,11 +2138,11 @@ def reduce_mean(input, dim=None, keep_dim=False, name=None):
Args:
input (Variable): The input variable which is a Tensor or LoDTensor.
dim (int|None): The dimension along which the mean is computed. If
dim (list|int|None): The dimensions along which the mean is computed. If
:attr:`None`, compute the mean over all elements of :attr:`input`
and return a Tensor variable with a single element, otherwise
must be in the range :math:`[-rank(input), rank(input))`. If
:math:`dim < 0`, the dimension to reduce is :math:`rank + dim`.
:math:`dim[i] < 0`, the dimension to reduce is :math:`rank + dim[i]`.
keep_dim (bool): Whether to reserve the reduced dimension in the
output Tensor. The result tensor will have one fewer dimension
than the :attr:`input` unless :attr:`keep_dim` is true.
......@@ -2153,15 +2163,24 @@ def reduce_mean(input, dim=None, keep_dim=False, name=None):
fluid.layers.reduce_mean(x, dim=0) # [0.15, 0.25, 0.55, 0.8]
fluid.layers.reduce_mean(x, dim=-1) # [0.475, 0.4]
fluid.layers.reduce_mean(x, dim=1, keep_dim=True) # [[0.475], [0.4]]
# x is a Tensor variable with shape [2, 2, 2] and elements as below:
# [[[1.0, 2.0], [3.0, 4.0]],
# [[5.0, 6.0], [7.0, 8.0]]]
# Each example is followed by the correspending output tensor.
fluid.layers.reduce_mean(x, dim=[1, 2]) # [2.5, 6.5]
fluid.layers.reduce_mean(x, dim=[0, 1]) # [4.0, 5.0]
"""
helper = LayerHelper('reduce_mean', **locals())
out = helper.create_tmp_variable(dtype=helper.input_dtype())
if dim is not None and not isinstance(dim, list):
dim = [dim]
helper.append_op(
type='reduce_mean',
inputs={'X': input},
outputs={'Out': out},
attrs={
'dim': dim if dim != None else 0,
'dim': dim if dim != None else [0],
'keep_dim': keep_dim,
'reduce_all': True if dim == None else False
})
......@@ -2174,11 +2193,11 @@ def reduce_max(input, dim=None, keep_dim=False, name=None):
Args:
input (Variable): The input variable which is a Tensor or LoDTensor.
dim (int|None): The dimension along which the maximum is computed.
dim (list|int|None): The dimension along which the maximum is computed.
If :attr:`None`, compute the maximum over all elements of
:attr:`input` and return a Tensor variable with a single element,
otherwise must be in the range :math:`[-rank(input), rank(input))`.
If :math:`dim < 0`, the dimension to reduce is :math:`rank + dim`.
If :math:`dim[i] < 0`, the dimension to reduce is :math:`rank + dim[i]`.
keep_dim (bool): Whether to reserve the reduced dimension in the
output Tensor. The result tensor will have one fewer dimension
than the :attr:`input` unless :attr:`keep_dim` is true.
......@@ -2199,15 +2218,24 @@ def reduce_max(input, dim=None, keep_dim=False, name=None):
fluid.layers.reduce_max(x, dim=0) # [0.2, 0.3, 0.6, 0.9]
fluid.layers.reduce_max(x, dim=-1) # [0.9, 0.7]
fluid.layers.reduce_max(x, dim=1, keep_dim=True) # [[0.9], [0.7]]
# x is a Tensor variable with shape [2, 2, 2] and elements as below:
# [[[1.0, 2.0], [3.0, 4.0]],
# [[5.0, 6.0], [7.0, 8.0]]]
# Each example is followed by the correspending output tensor.
fluid.layers.reduce_max(x, dim=[1, 2]) # [4.0, 8.0]
fluid.layers.reduce_max(x, dim=[0, 1]) # [7.0, 8.0]
"""
helper = LayerHelper('reduce_max', **locals())
out = helper.create_tmp_variable(dtype=helper.input_dtype())
if dim is not None and not isinstance(dim, list):
dim = [dim]
helper.append_op(
type='reduce_max',
inputs={'X': input},
outputs={'Out': out},
attrs={
'dim': dim if dim != None else 0,
'dim': dim if dim != None else [0],
'keep_dim': keep_dim,
'reduce_all': True if dim == None else False
})
......@@ -2220,11 +2248,11 @@ def reduce_min(input, dim=None, keep_dim=False, name=None):
Args:
input (Variable): The input variable which is a Tensor or LoDTensor.
dim (int|None): The dimension along which the minimum is computed.
dim (list|int|None): The dimensions along which the minimum is computed.
If :attr:`None`, compute the minimum over all elements of
:attr:`input` and return a Tensor variable with a single element,
otherwise must be in the range :math:`[-rank(input), rank(input))`.
If :math:`dim < 0`, the dimension to reduce is :math:`rank + dim`.
If :math:`dim[i] < 0`, the dimension to reduce is :math:`rank + dim[i]`.
keep_dim (bool): Whether to reserve the reduced dimension in the
output Tensor. The result tensor will have one fewer dimension
than the :attr:`input` unless :attr:`keep_dim` is true.
......@@ -2245,15 +2273,24 @@ def reduce_min(input, dim=None, keep_dim=False, name=None):
fluid.layers.reduce_min(x, dim=0) # [0.1, 0.2, 0.5, 0.7]
fluid.layers.reduce_min(x, dim=-1) # [0.2, 0.1]
fluid.layers.reduce_min(x, dim=1, keep_dim=True) # [[0.2], [0.1]]
# x is a Tensor variable with shape [2, 2, 2] and elements as below:
# [[[1.0, 2.0], [3.0, 4.0]],
# [[5.0, 6.0], [7.0, 8.0]]]
# Each example is followed by the correspending output tensor.
fluid.layers.reduce_min(x, dim=[1, 2]) # [1.0, 5.0]
fluid.layers.reduce_min(x, dim=[0, 1]) # [1.0, 2.0]
"""
helper = LayerHelper('reduce_min', **locals())
out = helper.create_tmp_variable(dtype=helper.input_dtype())
if dim is not None and not isinstance(dim, list):
dim = [dim]
helper.append_op(
type='reduce_min',
inputs={'X': input},
outputs={'Out': out},
attrs={
'dim': dim if dim != None else 0,
'dim': dim if dim != None else [0],
'keep_dim': keep_dim,
'reduce_all': True if dim == None else False
})
......@@ -2266,11 +2303,11 @@ def reduce_prod(input, dim=None, keep_dim=False, name=None):
Args:
input (Variable): The input variable which is a Tensor or LoDTensor.
dim (int|None): The dimension along which the product is performed. If
dim (list|int|None): The dimensions along which the product is performed. If
:attr:`None`, multipy all elements of :attr:`input` and return a
Tensor variable with a single element, otherwise must be in the
range :math:`[-rank(input), rank(input))`. If :math:`dim < 0`,
the dimension to reduce is :math:`rank + dim`.
range :math:`[-rank(input), rank(input))`. If :math:`dim[i] < 0`,
the dimension to reduce is :math:`rank + dim[i]`.
keep_dim (bool|False): Whether to reserve the reduced dimension in the
output Tensor. The result tensor will have one fewer dimension
than the :attr:`input` unless :attr:`keep_dim` is true.
......@@ -2292,15 +2329,24 @@ def reduce_prod(input, dim=None, keep_dim=False, name=None):
fluid.layers.reduce_prod(x, dim=-1) # [0.027, 0.0084]
fluid.layers.reduce_prod(x, dim=1,
keep_dim=True) # [[0.027], [0.0084]]
# x is a Tensor variable with shape [2, 2, 2] and elements as below:
# [[[1.0, 2.0], [3.0, 4.0]],
# [[5.0, 6.0], [7.0, 8.0]]]
# Each example is followed by the correspending output tensor.
fluid.layers.reduce_prod(x, dim=[1, 2]) # [24.0, 1680.0]
fluid.layers.reduce_prod(x, dim=[0, 1]) # [105.0, 384.0]
"""
helper = LayerHelper('reduce_prod', **locals())
out = helper.create_tmp_variable(dtype=helper.input_dtype())
if dim is not None and not isinstance(dim, list):
dim = [dim]
helper.append_op(
type='reduce_prod',
inputs={'X': input},
outputs={'Out': out},
attrs={
'dim': dim if dim != None else 0,
'dim': dim if dim != None else [0],
'keep_dim': keep_dim,
'reduce_all': True if dim == None else False
})
......@@ -2403,7 +2449,6 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None):
if len(x.shape) == 1:
axis = 0
helper = LayerHelper("l2_normalize", **locals())
square = helper.create_tmp_variable(dtype=x.dtype)
......@@ -2415,7 +2460,7 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None):
inputs={"X": square},
outputs={"Out": reduced_sum},
attrs={
"dim": 1 if axis is None else axis,
"dim": [1] if axis is None else [axis],
"keep_dim": True,
"reduce_all": False
})
......
......@@ -28,8 +28,8 @@ from contextlib import contextmanager
__all__ = [
'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad',
'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer',
'AdamaxOptimizer', 'DecayedAdagradOptimizer', 'Adadelta', 'ModelAverage',
'Optimizer'
'AdamaxOptimizer', 'DecayedAdagradOptimizer', 'RMSPropOptimizer',
'Adadelta', 'ModelAverage', 'Optimizer'
]
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import paddle
import paddle.fluid as fluid
from functools import partial
import numpy as np
CLASS_DIM = 2
EMB_DIM = 128
HID_DIM = 512
BATCH_SIZE = 128
def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim):
emb = fluid.layers.embedding(
input=data, size=[input_dim, emb_dim], is_sparse=True)
conv_3 = fluid.nets.sequence_conv_pool(
input=emb,
num_filters=hid_dim,
filter_size=3,
act="tanh",
pool_type="sqrt")
conv_4 = fluid.nets.sequence_conv_pool(
input=emb,
num_filters=hid_dim,
filter_size=4,
act="tanh",
pool_type="sqrt")
prediction = fluid.layers.fc(input=[conv_3, conv_4],
size=class_dim,
act="softmax")
return prediction
def inference_program(word_dict):
data = fluid.layers.data(
name="words", shape=[1], dtype="int64", lod_level=1)
dict_dim = len(word_dict)
net = convolution_net(data, dict_dim, CLASS_DIM, EMB_DIM, HID_DIM)
return net
def train_program(word_dict):
prediction = inference_program(word_dict)
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(cost)
accuracy = fluid.layers.accuracy(input=prediction, label=label)
return [avg_cost, accuracy]
def train(use_cuda, train_program, save_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
optimizer = fluid.optimizer.Adagrad(learning_rate=0.002)
word_dict = paddle.dataset.imdb.word_dict()
trainer = fluid.Trainer(
train_func=partial(train_program, word_dict),
place=place,
optimizer=optimizer)
def event_handler(event):
if isinstance(event, fluid.EndEpochEvent):
test_reader = paddle.batch(
paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
avg_cost, acc = trainer.test(
reader=test_reader, feed_order=['words', 'label'])
print("avg_cost: %s" % avg_cost)
print("acc : %s" % acc)
if acc > 0.2: # Smaller value to increase CI speed
trainer.save_params(save_dirname)
trainer.stop()
else:
print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
event.epoch + 1, avg_cost, acc))
if math.isnan(avg_cost):
sys.exit("got NaN loss, training failed.")
elif isinstance(event, fluid.EndStepEvent):
print("Step {0}, Epoch {1} Metrics {2}".format(
event.step, event.epoch, map(np.array, event.metrics)))
if event.step == 1: # Run 2 iterations to speed CI
trainer.save_params(save_dirname)
trainer.stop()
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.imdb.train(word_dict), buf_size=25000),
batch_size=BATCH_SIZE)
trainer.train(
num_epochs=1,
event_handler=event_handler,
reader=train_reader,
feed_order=['words', 'label'])
def infer(use_cuda, inference_program, save_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
word_dict = paddle.dataset.imdb.word_dict()
inferencer = fluid.Inferencer(
infer_func=partial(inference_program, word_dict),
param_path=save_dirname,
place=place)
def create_random_lodtensor(lod, place, low, high):
data = np.random.random_integers(low, high,
[lod[-1], 1]).astype("int64")
res = fluid.LoDTensor()
res.set(data, place)
res.set_lod([lod])
return res
lod = [0, 4, 10]
tensor_words = create_random_lodtensor(
lod, place, low=0, high=len(word_dict) - 1)
results = inferencer.infer({'words': tensor_words})
print("infer results: ", results)
def main(use_cuda):
if use_cuda and not fluid.core.is_compiled_with_cuda():
return
save_path = "understand_sentiment_conv.inference.model"
train(use_cuda, train_program, save_path)
infer(use_cuda, inference_program, save_path)
if __name__ == '__main__':
for use_cuda in (False, True):
main(use_cuda=use_cuda)
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import paddle
import paddle.fluid as fluid
from functools import partial
import numpy as np
CLASS_DIM = 2
EMB_DIM = 128
BATCH_SIZE = 128
LSTM_SIZE = 128
def dynamic_rnn_lstm(data, input_dim, class_dim, emb_dim, lstm_size):
emb = fluid.layers.embedding(
input=data, size=[input_dim, emb_dim], is_sparse=True)
sentence = fluid.layers.fc(input=emb, size=lstm_size, act='tanh')
rnn = fluid.layers.DynamicRNN()
with rnn.block():
word = rnn.step_input(sentence)
prev_hidden = rnn.memory(value=0.0, shape=[lstm_size])
prev_cell = rnn.memory(value=0.0, shape=[lstm_size])
def gate_common(ipt, hidden, size):
gate0 = fluid.layers.fc(input=ipt, size=size, bias_attr=True)
gate1 = fluid.layers.fc(input=hidden, size=size, bias_attr=False)
return gate0 + gate1
forget_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden,
lstm_size))
input_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden,
lstm_size))
output_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden,
lstm_size))
cell_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden,
lstm_size))
cell = forget_gate * prev_cell + input_gate * cell_gate
hidden = output_gate * fluid.layers.tanh(x=cell)
rnn.update_memory(prev_cell, cell)
rnn.update_memory(prev_hidden, hidden)
rnn.output(hidden)
last = fluid.layers.sequence_last_step(rnn())
prediction = fluid.layers.fc(input=last, size=class_dim, act="softmax")
return prediction
def inference_program(word_dict):
data = fluid.layers.data(
name="words", shape=[1], dtype="int64", lod_level=1)
dict_dim = len(word_dict)
pred = dynamic_rnn_lstm(data, dict_dim, CLASS_DIM, EMB_DIM, LSTM_SIZE)
return pred
def train_program(word_dict):
prediction = inference_program(word_dict)
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(cost)
accuracy = fluid.layers.accuracy(input=prediction, label=label)
return [avg_cost, accuracy]
def train(use_cuda, train_program, save_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
optimizer = fluid.optimizer.Adagrad(learning_rate=0.002)
word_dict = paddle.dataset.imdb.word_dict()
trainer = fluid.Trainer(
train_func=partial(train_program, word_dict),
place=place,
optimizer=optimizer)
def event_handler(event):
if isinstance(event, fluid.EndEpochEvent):
test_reader = paddle.batch(
paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
avg_cost, acc = trainer.test(
reader=test_reader, feed_order=['words', 'label'])
print("avg_cost: %s" % avg_cost)
print("acc : %s" % acc)
if acc > 0.2: # Smaller value to increase CI speed
trainer.save_params(save_dirname)
trainer.stop()
else:
print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
event.epoch + 1, avg_cost, acc))
if math.isnan(avg_cost):
sys.exit("got NaN loss, training failed.")
elif isinstance(event, fluid.EndStepEvent):
print("Step {0}, Epoch {1} Metrics {2}".format(
event.step, event.epoch, map(np.array, event.metrics)))
if event.step == 1: # Run 2 iterations to speed CI
trainer.save_params(save_dirname)
trainer.stop()
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.imdb.train(word_dict), buf_size=25000),
batch_size=BATCH_SIZE)
trainer.train(
num_epochs=1,
event_handler=event_handler,
reader=train_reader,
feed_order=['words', 'label'])
def infer(use_cuda, inference_program, save_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
word_dict = paddle.dataset.imdb.word_dict()
inferencer = fluid.Inferencer(
infer_func=partial(inference_program, word_dict),
param_path=save_dirname,
place=place)
def create_random_lodtensor(lod, place, low, high):
data = np.random.random_integers(low, high,
[lod[-1], 1]).astype("int64")
res = fluid.LoDTensor()
res.set(data, place)
res.set_lod([lod])
return res
lod = [0, 4, 10]
tensor_words = create_random_lodtensor(
lod, place, low=0, high=len(word_dict) - 1)
results = inferencer.infer({'words': tensor_words})
print("infer results: ", results)
def main(use_cuda):
if use_cuda and not fluid.core.is_compiled_with_cuda():
return
save_path = "understand_sentiment_conv.inference.model"
train(use_cuda, train_program, save_path)
infer(use_cuda, inference_program, save_path)
if __name__ == '__main__':
for use_cuda in (False, True):
main(use_cuda=use_cuda)
......@@ -36,7 +36,7 @@ class TestSendOp(unittest.TestCase):
p.start()
time.sleep(10)
with open("/tmp/paddle.%d.selected_port" % p.pid, "r") as fn:
with open("/tmp/paddle.%d.port" % p.pid, "r") as fn:
selected_port = int(fn.readlines()[0])
self.init_client(place, selected_port)
......
......@@ -34,8 +34,10 @@ class TestMeanOp(OpTest):
def setUp(self):
self.op_type = "reduce_mean"
self.inputs = {'X': np.random.random((5, 6, 2, 10)).astype("float64")}
self.attrs = {'dim': 1}
self.outputs = {'Out': self.inputs['X'].mean(axis=self.attrs['dim'])}
self.attrs = {'dim': [1]}
self.outputs = {
'Out': self.inputs['X'].mean(axis=tuple(self.attrs['dim']))
}
def test_check_output(self):
self.check_output()
......@@ -50,8 +52,10 @@ class TestMaxOp(OpTest):
def setUp(self):
self.op_type = "reduce_max"
self.inputs = {'X': np.random.random((5, 6, 10)).astype("float64")}
self.attrs = {'dim': -1}
self.outputs = {'Out': self.inputs['X'].max(axis=self.attrs['dim'])}
self.attrs = {'dim': [-1]}
self.outputs = {
'Out': self.inputs['X'].max(axis=tuple(self.attrs['dim']))
}
def test_check_output(self):
self.check_output()
......@@ -63,8 +67,10 @@ class TestMinOp(OpTest):
def setUp(self):
self.op_type = "reduce_min"
self.inputs = {'X': np.random.random((5, 6, 10)).astype("float64")}
self.attrs = {'dim': 2}
self.outputs = {'Out': self.inputs['X'].min(axis=self.attrs['dim'])}
self.attrs = {'dim': [2]}
self.outputs = {
'Out': self.inputs['X'].min(axis=tuple(self.attrs['dim']))
}
def test_check_output(self):
self.check_output()
......@@ -87,9 +93,10 @@ class TestKeepDimReduce(OpTest):
def setUp(self):
self.op_type = "reduce_sum"
self.inputs = {'X': np.random.random((5, 6, 10)).astype("float64")}
self.attrs = {'dim': -2, 'keep_dim': True}
self.attrs = {'dim': [-2], 'keep_dim': True}
self.outputs = {
'Out': self.inputs['X'].sum(axis=self.attrs['dim'], keepdims=True)
'Out':
self.inputs['X'].sum(axis=tuple(self.attrs['dim']), keepdims=True)
}
def test_check_output(self):
......@@ -126,5 +133,67 @@ class TestReduceAll(OpTest):
self.check_grad(['X'], 'Out')
## reduction in multi dims
class TestReduceMeanOpMultiAxises(OpTest):
def setUp(self):
self.op_type = "reduce_mean"
self.inputs = {'X': np.random.random((5, 6, 2, 10)).astype("float64")}
self.attrs = {'dim': [1, 2]}
self.outputs = {'Out': self.inputs['X'].mean(axis=(1, 2))}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Out')
class TestReduceMaxOpMultiAxises(OpTest):
"""Remove Max with subgradient from gradient check to confirm the success of CI."""
def setUp(self):
self.op_type = "reduce_max"
self.inputs = {'X': np.random.random((5, 6, 10)).astype("float64")}
self.attrs = {'dim': [-2, -1]}
self.outputs = {
'Out': self.inputs['X'].max(axis=tuple(self.attrs['dim']))
}
def test_check_output(self):
self.check_output()
class TestReduceMinOpMultiAxises(OpTest):
"""Remove Min with subgradient from gradient check to confirm the success of CI."""
def setUp(self):
self.op_type = "reduce_min"
self.inputs = {'X': np.random.random((5, 6, 10)).astype("float64")}
self.attrs = {'dim': [1, 2]}
self.outputs = {
'Out': self.inputs['X'].min(axis=tuple(self.attrs['dim']))
}
def test_check_output(self):
self.check_output()
class TestKeepDimReduceSumMultiAxises(OpTest):
def setUp(self):
self.op_type = "reduce_sum"
self.inputs = {'X': np.random.random((5, 6, 10)).astype("float64")}
self.attrs = {'dim': [-2, -1], 'keep_dim': True}
self.outputs = {
'Out':
self.inputs['X'].sum(axis=tuple(self.attrs['dim']), keepdims=True)
}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Out')
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册