diff --git a/Dockerfile b/Dockerfile
index 402adee2ea2822250ebc8f6229fd6a44545d58e5..634be18a51bf61e96a8bf6f263b6674a7932d6e4 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -53,7 +53,7 @@ RUN curl -s -q https://glide.sh/get | sh
# and its size is only one-third of the official one.
# 2. Manually add ~IPluginFactory() in IPluginFactory class of NvInfer.h, otherwise, it couldn't work in paddle.
# See https://github.com/PaddlePaddle/Paddle/issues/10129 for details.
-RUN wget -qO- http://paddlepaddledeps.bj.bcebos.com/TensorRT-4.0.0.3.Ubuntu-16.04.4.x86_64-gnu.cuda-8.0.cudnn7.0.tar.gz | \
+RUN wget -qO- http://paddlepaddledeps.cdn.bcebos.com/TensorRT-4.0.0.3.Ubuntu-16.04.4.x86_64-gnu.cuda-8.0.cudnn7.0.tar.gz | \
tar -xz -C /usr/local && \
cp -rf /usr/local/TensorRT/include /usr && \
cp -rf /usr/local/TensorRT/lib /usr
diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake
index bc36683a9facc253e7b9feb0c5a56e79491fb9b0..f61770514eb05a99c140cdb18575c89aa5235c14 100644
--- a/cmake/inference_lib.cmake
+++ b/cmake/inference_lib.cmake
@@ -128,16 +128,13 @@ set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid")
set(dst_dir "${FLUID_INSTALL_DIR}/paddle/fluid")
set(module "framework")
if (NOT WIN32)
-copy(framework_lib DEPS framework_py_proto
- SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/details/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/framework/framework.pb.h
- DSTS ${dst_dir}/${module} ${dst_dir}/${module}/details ${dst_dir}/${module}
-)
-else()
-copy(framework_lib
+set(framework_lib_deps framework_py_proto)
+endif(NOT WIN32)
+copy(framework_lib DEPS ${framework_lib_deps}
SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/details/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/framework/framework.pb.h
- DSTS ${dst_dir}/${module} ${dst_dir}/${module}/details ${dst_dir}/${module}
+ ${src_dir}/${module}/ir/*.h
+ DSTS ${dst_dir}/${module} ${dst_dir}/${module}/details ${dst_dir}/${module} ${dst_dir}/${module}/ir
)
-endif(NOT WIN32)
set(module "memory")
copy(memory_lib
@@ -161,7 +158,8 @@ set(module "inference")
copy(inference_lib DEPS ${inference_deps}
SRCS ${src_dir}/${module}/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/inference/libpaddle_fluid.*
${src_dir}/${module}/api/paddle_inference_api.h ${src_dir}/${module}/api/demo_ci
- DSTS ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module}
+ ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h
+ DSTS ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module}
)
set(module "platform")
diff --git a/doc/fluid/new_docs/beginners_guide/basics/machine_translation/README.cn.md b/doc/fluid/new_docs/beginners_guide/basics/machine_translation/README.cn.md
index fa2b930be0d26d816566599cece8afbedc1157e0..6e5f77fec8a894c390ced8c93ee344fd8d27370e 100644
--- a/doc/fluid/new_docs/beginners_guide/basics/machine_translation/README.cn.md
+++ b/doc/fluid/new_docs/beginners_guide/basics/machine_translation/README.cn.md
@@ -60,6 +60,7 @@
图3. 编码器-解码器框架
+
#### 编码器
编码阶段分为三步:
@@ -81,7 +82,7 @@
机器翻译任务的训练过程中,解码阶段的目标是最大化下一个正确的目标语言词的概率。思路是:
1. 每一个时刻,根据源语言句子的编码信息(又叫上下文向量,context vector)`$c$`、真实目标语言序列的第`$i$`个词`$u_i$`和`$i$`时刻RNN的隐层状态`$z_i$`,计算出下一个隐层状态`$z_{i+1}$`。计算公式如下:
$$z_{i+1}=\phi_{\theta '} \left ( c,u_i,z_i \right )$$
-其中`$\phi _{\theta '}$`是一个非线性激活函数;`$c=q\mathbf{h}$`是源语言句子的上下文向量,在不使用[注意力机制](#注意力机制)时,如果[编码器](#编码器)的输出是源语言句子编码后的最后一个元素,则可以定义`$c=h_T$`;`$u_i$`是目标语言序列的第`$i$`个单词,`$u_0$`是目标语言序列的开始标记``,表示解码开始;`$z_i$`是`$i$`时刻解码RNN的隐层状态,`$z_0$`是一个全零的向量。
+其中`$\phi _{\theta '}$`是一个非线性激活函数;`$c=q\mathbf{h}$`是源语言句子的上下文向量,在不使用注意力机制时,如果[编码器](#编码器)的输出是源语言句子编码后的最后一个元素,则可以定义`$c=h_T$`;`$u_i$`是目标语言序列的第`$i$`个单词,`$u_0$`是目标语言序列的开始标记``,表示解码开始;`$z_i$`是`$i$`时刻解码RNN的隐层状态,`$z_0$`是一个全零的向量。
2. 将`$z_{i+1}$`通过`softmax`归一化,得到目标语言序列的第`$i+1$`个单词的概率分布`$p_{i+1}$`。概率分布公式如下:
$$p\left ( u_{i+1}|u_{<i+1},\mathbf{x} \right )=softmax(W_sz_{i+1}+b_z)$$
@@ -93,6 +94,7 @@ $$p\left ( u_{i+1}|u_{<i+1},\mathbf{x} \right )=softmax(W_sz_{i+1}+b_z)$$
机器翻译任务的生成过程,通俗来讲就是根据预先训练的模型来翻译源语言句子。生成过程中的解码阶段和上述训练过程的有所差异,具体介绍请见[柱搜索算法](#柱搜索算法)。
+
### 柱搜索算法
柱搜索([beam search](http://en.wikipedia.org/wiki/Beam_search))是一种启发式图搜索算法,用于在图或树中搜索有限集合中的最优扩展节点,通常用在解空间非常大的系统(如机器翻译、语音识别)中,原因是内存无法装下图或树中所有展开的解。如在机器翻译任务中希望翻译“`你好`”,就算目标语言字典中只有3个词(``, ``, `hello`),也可能生成无限句话(`hello`循环出现的次数不定),为了找到其中较好的翻译结果,我们可采用柱搜索算法。
diff --git a/doc/fluid/new_docs/beginners_guide/basics/understand_sentiment/README.cn.md b/doc/fluid/new_docs/beginners_guide/basics/understand_sentiment/README.cn.md
index 9900dfb9a67dc6f8940bd7dd3abfa15ac8a3488f..8477cf32146c33947ced447c8bdd287a3e1e71f5 100644
--- a/doc/fluid/new_docs/beginners_guide/basics/understand_sentiment/README.cn.md
+++ b/doc/fluid/new_docs/beginners_guide/basics/understand_sentiment/README.cn.md
@@ -149,6 +149,8 @@ def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim):
网络的输入`input_dim`表示的是词典的大小,`class_dim`表示类别数。这里,我们使用[`sequence_conv_pool`](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/trainer_config_helpers/networks.py) API实现了卷积和池化操作。
+
+
### 栈式双向LSTM
栈式双向神经网络`stacked_lstm_net`的代码片段如下:
diff --git a/doc/fluid/new_docs/beginners_guide/basics/word2vec/README.cn.md b/doc/fluid/new_docs/beginners_guide/basics/word2vec/README.cn.md
index 2c68cdac4f10319359b74bc92569dfd3f65380b5..904d99fe2ffc9ead69a86c9763568a5c098348d5 100644
--- a/doc/fluid/new_docs/beginners_guide/basics/word2vec/README.cn.md
+++ b/doc/fluid/new_docs/beginners_guide/basics/word2vec/README.cn.md
@@ -50,7 +50,7 @@ similarity: -0.0997506977351
```
-以上结果可以通过运行`calculate_dis.py`, 加载字典里的单词和对应训练特征结果得到,我们将在[应用模型](#应用模型)中详细描述用法。
+以上结果可以通过运行`calculate_dis.py`, 加载字典里的单词和对应训练特征结果得到,我们将在[模型应用](#模型应用)中详细描述用法。
## 模型概览
@@ -189,6 +189,7 @@ dream that one day
最后,每个输入会按其单词次在字典里的位置,转化成整数的索引序列,作为PaddlePaddle的输入。
+
## 编程实现
本配置的模型结构如下图所示:
@@ -349,6 +350,7 @@ Step 20: Average Cost 5.766995
...
```
+
## 模型应用
在模型训练后,我们可以用它做一些预测。
diff --git a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/README.cn.md b/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/README.cn.md
index e6f89b23a95d1a07565f3e0a285e9c3f921930df..ac36c4ecf6b9b716fe5f0dbe2346e64918c22242 100644
--- a/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/README.cn.md
+++ b/doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/README.cn.md
@@ -102,7 +102,7 @@ Softmax回归模型采用了最简单的两层神经网络,即只有输入层
池化是非线性下采样的一种形式,主要作用是通过减少网络的参数来减小计算量,并且能够在一定程度上控制过拟合。通常在卷积层的后面会加上一个池化层。池化包括最大池化、平均池化等。其中最大池化是用不重叠的矩形框将输入层分成不同的区域,对于每个矩形框的数取最大值作为输出层,如图6所示。
-更详细的关于卷积神经网络的具体知识可以参考[斯坦福大学公开课]( http://cs231n.github.io/convolutional-networks/ )和[图像分类](https://github.com/PaddlePaddle/book/blob/develop/image_classification/README.md)教程。
+更详细的关于卷积神经网络的具体知识可以参考[斯坦福大学公开课]( http://cs231n.github.io/convolutional-networks/ )和[图像分类]( https://github.com/PaddlePaddle/book/tree/develop/03.image_classification )教程。
### 常见激活函数介绍
- sigmoid激活函数: $ f(x) = sigmoid(x) = \frac{1}{1+e^{-x}} $
diff --git a/doc/fluid/new_docs/user_guides/howto/debug/visualdl.md b/doc/fluid/new_docs/user_guides/howto/debug/visualdl.md
index a2f30823a6fcd379f94e6e98d043b0d00681827f..84987ea5daee9abd0fe2fe71bdfde62ea3388ab5 100644
--- a/doc/fluid/new_docs/user_guides/howto/debug/visualdl.md
+++ b/doc/fluid/new_docs/user_guides/howto/debug/visualdl.md
@@ -149,7 +149,7 @@ python setup.py bdist_wheel
pip install --upgrade dist/visualdl-*.whl
```
-如果打包和安装遇到其他问题,不安装只想运行Visual DL可以看[这里](https://github.com/PaddlePaddle/VisualDL/blob/develop/docs/how_to_dev_frontend_en.md)
+如果打包和安装遇到其他问题,不安装只想运行Visual DL可以看[这里](https://github.com/PaddlePaddle/VisualDL/blob/develop/docs/develop/how_to_dev_frontend_cn.md)
## SDK
diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt
index bfc649017f19d67660bd11d590134cf56772bb27..f5235f70ad79616801110644999d511eeda33a32 100644
--- a/paddle/fluid/framework/ir/CMakeLists.txt
+++ b/paddle/fluid/framework/ir/CMakeLists.txt
@@ -1,20 +1,35 @@
+set(pass_file ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h)
+file(WRITE ${pass_file} "// Generated by the paddle/fluid/framework/ir/CMakeLists.txt. DO NOT EDIT!\n\n")
+file(APPEND ${pass_file} "\#include \"paddle/fluid/framework/ir/pass.h\"\n")
+function(pass_library TARGET)
+ set(options "")
+ set(oneValueArgs "")
+ set(multiValueArgs SRCS DEPS)
+ cmake_parse_arguments(op_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+ cc_library(${TARGET} SRCS ${TARGET}.cc DEPS graph_pattern_detector pass)
+ file(APPEND ${pass_file} "USE_PASS(${TARGET});\n")
+ set(PASS_LIBRARY ${TARGET} ${PASS_LIBRARY} PARENT_SCOPE)
+endfunction()
+
cc_library(node SRCS node.cc DEPS proto_desc)
cc_library(graph SRCS graph.cc DEPS node)
cc_library(graph_helper SRCS graph_helper.cc DEPS graph)
cc_library(pass SRCS pass.cc DEPS graph node graph_helper)
-cc_library(graph_viz_pass SRCS graph_viz_pass.cc DEPS graph pass graph_helper)
-cc_library(graph_to_program_pass SRCS graph_to_program_pass.cc DEPS graph pass graph_helper)
cc_library(graph_traits SRCS graph_traits.cc DEPS graph)
cc_library(graph_pattern_detector SRCS graph_pattern_detector.cc DEPS graph graph_helper graph_traits)
-cc_library(fc_fuse_pass SRCS fc_fuse_pass.cc DEPS graph graph_pattern_detector)
-cc_library(attention_lstm_fuse_pass SRCS attention_lstm_fuse_pass.cc DEPS graph graph_pattern_detector)
-cc_library(infer_clean_graph_pass SRCS infer_clean_graph_pass.cc DEPS graph pass)
-cc_library(fc_lstm_fuse_pass SRCS fc_lstm_fuse_pass.cc DEPS graph graph_pattern_detector)
-cc_library(seq_concat_fc_fuse_pass SRCS seq_concat_fc_fuse_pass.cc DEPS graph graph_pattern_detector)
+
+pass_library(graph_to_program_pass)
+pass_library(graph_viz_pass)
+pass_library(fc_fuse_pass)
+pass_library(attention_lstm_fuse_pass)
+pass_library(infer_clean_graph_pass)
+pass_library(fc_lstm_fuse_pass)
+pass_library(seq_concat_fc_fuse_pass)
+set(GLOB_PASS_LIB ${PASS_LIBRARY} CACHE INTERNAL "Global PASS library")
cc_test(pass_test SRCS pass_test.cc DEPS graph pass graph_helper)
cc_test(graph_test SRCS graph_test.cc DEPS graph graph_helper op_registry)
cc_test(graph_helper_test SRCS graph_helper_test.cc DEPS graph graph_helper op_registry)
cc_test(graph_to_program_pass_test SRCS graph_to_program_pass_test.cc DEPS graph_to_program_pass)
cc_test(test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS graph_pattern_detector)
-cc_test(test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass graph_pattern_detector graph pass graph_traits framework_proto)
+cc_test(test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass framework_proto)
diff --git a/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
index 0278ade6763ec614701674691797d766878a378e..d7580a1cfd689c122ea1e7db5918e2cd8711718f 100644
--- a/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
@@ -99,17 +99,13 @@ void FindWhileOp(Graph* graph) {
auto* cell_init = graph->RetriveNode(6);
auto* hidden_init = graph->RetriveNode(8);
-#define LINK_TO(node0, node1) \
- node0->outputs.push_back(node1); \
- node1->inputs.push_back(node0);
-
auto* lstm_op = graph->CreateOpNode(&op_desc);
PrepareParameters(graph, param);
- LINK_TO(X, lstm_op);
- LINK_TO(cell_init, lstm_op);
- LINK_TO(hidden_init, lstm_op);
- LINK_TO(lstm_op, LSTMOUT);
+ IR_NODE_LINK_TO(X, lstm_op);
+ IR_NODE_LINK_TO(cell_init, lstm_op);
+ IR_NODE_LINK_TO(hidden_init, lstm_op);
+ IR_NODE_LINK_TO(lstm_op, LSTMOUT);
GraphSafeRemoveNodes(graph, marked_nodes);
}
diff --git a/paddle/fluid/framework/ir/fc_fuse_pass.cc b/paddle/fluid/framework/ir/fc_fuse_pass.cc
index 513742bab69d465aac1bfb7bcef2fe89108c14a0..5a4ebd6f3de555acccd72c61bd377ffd8ce69780 100644
--- a/paddle/fluid/framework/ir/fc_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/fc_fuse_pass.cc
@@ -21,74 +21,26 @@ namespace paddle {
namespace framework {
namespace ir {
-bool VarOutLinksToOp(Node* node, const std::string& op_type) {
- for (auto* out : node->outputs) {
- if (out->IsOp() && out->Op()->Type() == op_type) {
- return true;
- }
- }
- return false;
-}
-
-void BuildFCPattern(PDPattern* pattern) {
- // Create Operators
- auto* mul_op = pattern->NewNode("mul")->assert_is_op("mul");
- auto* elementwise_add_op =
- pattern->NewNode("elementwise_add")->assert_is_op("elementwise_add");
- // Create variables
- // w
- auto* mul_weight_var = pattern->NewNode("mul_weight")
- ->AsInput()
- ->assert_is_op_nth_input("mul", "Y", 0);
- // x
- auto* mul_tmp_var = pattern->NewNode("mul_tmp_var")
- ->AsInput()
- ->assert_is_op_nth_input("mul", "X", 0);
- // intermediate variable, will be removed in the IR after fuse.
- auto* mul_out_var = pattern->NewNode("mul_out")
- ->AsIntermediate()
- ->assert_is_only_output_of_op("mul")
- ->assert_is_op_input("elementwise_add");
- // bias
- auto* elementwise_add_tmp_var = pattern->NewNode("elementwise_add_tmpvar")
- ->assert_is_op_input("elementwise_add")
- ->AsInput();
- // output
- auto* elementwise_add_out_var = pattern->NewNode("elementwise_add_out")
- ->AsOutput()
- ->assert_is_op_output("elementwise_add");
-
- mul_op->LinksFrom({mul_weight_var, mul_tmp_var}).LinksTo({mul_out_var});
- elementwise_add_op->LinksFrom({mul_out_var, elementwise_add_tmp_var})
- .LinksTo({elementwise_add_out_var});
-}
-
-// Replace the node `from` in the links to `to`
-bool LinksReplace(std::vector* links, Node* from, Node* to) {
- for (auto*& n : *links) {
- if (n == from) {
- n = to;
- return true;
- }
- }
- return false;
-}
-
std::unique_ptr FCFusePass::ApplyImpl(
std::unique_ptr graph) const {
PADDLE_ENFORCE(graph.get());
- FusePassBase::Init("fc", graph.get());
+ FusePassBase::Init("fc_fuse", graph.get());
std::unordered_set nodes2delete;
GraphPatternDetector gpd;
- BuildFCPattern(gpd.mutable_pattern());
-
-#define GET_NODE(id) \
- PADDLE_ENFORCE(subgraph.count(gpd.pattern().RetrieveNode(#id)), \
- "pattern has no Node called %s", #id); \
- auto* id = subgraph.at(gpd.pattern().RetrieveNode(#id)); \
- PADDLE_ENFORCE_NOT_NULL(id, "subgraph has no node %s", #id);
+ // BuildFCPattern(gpd.mutable_pattern());
+ auto* x = gpd.mutable_pattern()
+ ->NewNode("fc_fuse/x")
+ ->AsInput()
+ ->assert_is_op_input("mul", "X");
+ patterns::FC(gpd.mutable_pattern(), "fc_fuse", x, true /*with bias*/);
+
+#define GET_NODE(id) \
+ PADDLE_ENFORCE(subgraph.count(gpd.pattern().RetrieveNode("fc_fuse/" #id)), \
+ "pattern has no Node called %s", #id); \
+ auto* id = subgraph.at(gpd.pattern().RetrieveNode("fc_fuse/" #id)); \
+ PADDLE_ENFORCE_NOT_NULL(id, "subgraph has no node %s", "fc_fuse/" #id);
int found_fc_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
@@ -98,43 +50,33 @@ std::unique_ptr FCFusePass::ApplyImpl(
// scenerio.
// FC's fusion is simple, just op fuse, no need to process the
// parameters.
- GET_NODE(mul_tmp_var); // x
- GET_NODE(mul_weight); // Y
- GET_NODE(elementwise_add_tmpvar); // bias
- GET_NODE(elementwise_add_out); // Out
- GET_NODE(mul); // MUL op
- GET_NODE(elementwise_add); // ELEMENT_ADD op
- GET_NODE(mul_out); // tmp
+ GET_NODE(x); // x
+ GET_NODE(w); // Y
+ GET_NODE(fc_bias); // bias
+ GET_NODE(fc_out); // Out
+ GET_NODE(mul); // MUL op
+ GET_NODE(elementwise_add); // ELEMENT_ADD op
+ GET_NODE(mul_out); // tmp
#undef GET_NODE
// Create an FC Node.
OpDesc desc;
- std::string fc_x_in = mul_tmp_var->Name();
- std::string fc_Y_in = mul_weight->Name();
- std::string fc_bias_in = elementwise_add_tmpvar->Name();
- std::string fc_out = elementwise_add_out->Name();
+ std::string fc_x_in = x->Name();
+ std::string fc_Y_in = w->Name();
+ std::string fc_bias_in = fc_bias->Name();
+ std::string fc_out_out = fc_out->Name();
desc.SetInput("Input", std::vector({fc_x_in}));
desc.SetInput("W", std::vector({fc_Y_in}));
desc.SetInput("Bias", std::vector({fc_bias_in}));
- desc.SetOutput("Out", std::vector({fc_out}));
+ desc.SetOutput("Out", std::vector({fc_out_out}));
desc.SetType("fc");
auto fc_node = g->CreateOpNode(&desc); // OpDesc will be copied.
- fc_node->inputs =
- std::vector({mul_tmp_var, mul_weight, elementwise_add_tmpvar});
- fc_node->outputs.push_back(elementwise_add_out);
-
- // Update link relatons
- PADDLE_ENFORCE(LinksReplace(&mul_tmp_var->outputs, mul, fc_node));
- PADDLE_ENFORCE(LinksReplace(&mul_weight->outputs, mul, fc_node));
- PADDLE_ENFORCE(LinksReplace(&elementwise_add_tmpvar->outputs,
- elementwise_add, fc_node));
- PADDLE_ENFORCE(
- LinksReplace(&elementwise_add_out->inputs, elementwise_add, fc_node));
+ GraphSafeRemoveNodes(graph.get(), {mul, elementwise_add, mul_out});
- // Drop old nodes
- graph->RemoveNode(mul);
- graph->RemoveNode(elementwise_add);
- graph->RemoveNode(mul_out); // tmp variable
+ IR_NODE_LINK_TO(x, fc_node);
+ IR_NODE_LINK_TO(w, fc_node);
+ IR_NODE_LINK_TO(fc_bias, fc_node);
+ IR_NODE_LINK_TO(fc_node, fc_out);
found_fc_count++;
};
diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
index c404a6c44ccea8287ddfad976889a9f80cf6bad9..00f5e7fad2ef5d42eb0de9703389e910090d93c1 100644
--- a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
@@ -121,15 +121,11 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
#undef TMP_NEW
#undef TMP_NAME
-#define LINK_TO(a, b) \
- a->outputs.push_back(b); \
- b->inputs.push_back(a);
- LINK_TO(input_n, op);
- LINK_TO(weight_x_n, op);
- LINK_TO(weight_h_n, op);
- LINK_TO(bias_n, op);
- LINK_TO(op, hidden_n);
-#undef LINK_TO
+ IR_NODE_LINK_TO(input_n, op);
+ IR_NODE_LINK_TO(weight_x_n, op);
+ IR_NODE_LINK_TO(weight_h_n, op);
+ IR_NODE_LINK_TO(bias_n, op);
+ IR_NODE_LINK_TO(op, hidden_n);
return op;
};
diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc
index f651ab635eadc9f248964e91dceebf3aa9c42926..a4da69a0a2ea44806b68a27647213759ebd387b1 100644
--- a/paddle/fluid/framework/ir/graph_pattern_detector.cc
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc
@@ -111,6 +111,11 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph& graph) {
return false;
}
}
+ for (auto& item : pdnodes2nodes_) {
+ for (auto& n : item.second) {
+ GetMarkedNodes(const_cast(&graph)).insert(n);
+ }
+ }
VLOG(3) << pdnodes2nodes_.size() << " nodes marked";
return !pdnodes2nodes_.empty();
@@ -278,7 +283,7 @@ void GraphPatternDetector::RemoveOverlappedMatch(
for (const auto& subgraph : *subgraphs) {
bool valid = true;
for (auto& item : subgraph) {
- if (node_set.count(item.second)) {
+ if (item.first->IsIntermediate() && node_set.count(item.second)) {
valid = false;
break;
}
@@ -334,22 +339,22 @@ PDNode& PDNode::LinksFrom(const std::vector& others) {
}
PDNode* PDNode::assert_is_op() {
- asserts_.emplace_back([this](Node* x) { return x && x->IsOp(); });
+ asserts_.emplace_back([](Node* x) { return x && x->IsOp(); });
return this;
}
PDNode* PDNode::assert_is_op(const std::string& op_type) {
- asserts_.emplace_back([this, op_type](Node* x) {
+ asserts_.emplace_back([op_type](Node* x) {
return x && x->IsOp() && x->Op()->Type() == op_type;
});
return this;
}
PDNode* PDNode::assert_is_var() {
- asserts_.emplace_back([this](Node* x) { return x && x->IsVar(); });
+ asserts_.emplace_back([](Node* x) { return x && x->IsVar(); });
return this;
}
PDNode* PDNode::assert_var_not_persistable() {
assert_is_var();
- asserts_.emplace_back([this](Node* x) { return !x->Var()->Persistable(); });
+ asserts_.emplace_back([](Node* x) { return !x->Var()->Persistable(); });
return this;
}
PDNode* PDNode::assert_is_persistable_var() {
@@ -491,14 +496,16 @@ void GraphSafeRemoveNodes(Graph* graph,
for (auto it = node->inputs.begin(); it != node->inputs.end();) {
if (nodes.count(*it)) {
it = const_cast(node)->inputs.erase(it);
- } else
+ } else {
it++;
+ }
}
for (auto it = node->outputs.begin(); it != node->outputs.end();) {
if (nodes.count(*it)) {
it = const_cast(node)->outputs.erase(it);
- } else
+ } else {
it++;
+ }
}
}
}
diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h
index 024ce8ce55616cc5e0eaced4a27a6e1fb004af2c..9d67c4a6997dfe19561f37bf3ea76eba8b59ff35 100644
--- a/paddle/fluid/framework/ir/graph_pattern_detector.h
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.h
@@ -245,6 +245,8 @@ class GraphPatternDetector {
void UniquePatterns(std::vector* subgraphs);
// Remove overlapped match subgraphs, when overlapped, keep the previous one.
+ // The intermediate PDNodes will be removed, so can't shared by multiple
+ // patterns.
void RemoveOverlappedMatch(std::vector* subgraphs);
// Validate whether the intermediate nodes are linked by external nodes.
@@ -295,6 +297,10 @@ PDNode* LSTM(PDPattern* pattern, const std::string& name_scope, PDNode* x);
} // namespace patterns
+#define IR_NODE_LINK_TO(a, b) \
+ a->outputs.push_back(b); \
+ b->inputs.push_back(a);
+
} // namespace ir
} // namespace framework
} // namespace paddle
diff --git a/paddle/fluid/framework/ir/graph_pattern_detector_tester.cc b/paddle/fluid/framework/ir/graph_pattern_detector_tester.cc
index 7e5c86b033a7c69a306491cf4bf8d099018c5f19..6c466fb21fb46e09961dc874e9e39655f83d17c6 100644
--- a/paddle/fluid/framework/ir/graph_pattern_detector_tester.cc
+++ b/paddle/fluid/framework/ir/graph_pattern_detector_tester.cc
@@ -140,8 +140,9 @@ TEST(GraphPatternDetecter, MultiSubgraph) {
return node->IsOp() && (node->Name() == "op2" || node->Name() == "op3");
},
"OP0");
- auto* any_var = x.mutable_pattern()->NewNode(
- [](Node* node) { return node->IsVar(); }, "VAR");
+ auto* any_var = x.mutable_pattern()
+ ->NewNode([](Node* node) { return node->IsVar(); }, "VAR")
+ ->AsIntermediate();
auto* any_op1 = x.mutable_pattern()->NewNode(
[](Node* node) { return node->IsOp(); }, "OP1");
diff --git a/paddle/fluid/framework/ir/infer_clean_graph_pass.cc b/paddle/fluid/framework/ir/infer_clean_graph_pass.cc
index f885567da1965b997b2063e06c839af95b43e1e1..7713ed1eab88ee4fa16d52e7425075ae66f721a3 100644
--- a/paddle/fluid/framework/ir/infer_clean_graph_pass.cc
+++ b/paddle/fluid/framework/ir/infer_clean_graph_pass.cc
@@ -13,42 +13,41 @@
// limitations under the License.
#include
+#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h"
-#include "paddle/fluid/framework/ir/pass.h"
+#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
namespace paddle {
namespace framework {
namespace ir {
-class InferCleanGraphPass : public Pass {
+class InferCleanGraphPass : public FusePassBase {
public:
virtual ~InferCleanGraphPass() {}
protected:
std::unique_ptr ApplyImpl(std::unique_ptr graph) const {
+ FusePassBase::Init("original_graph", graph.get());
PADDLE_ENFORCE(graph.get());
auto is_valid_node = [](Node* x) {
return x && IsControlDepVar(*x) && x->IsVar() && !x->Var();
};
- std::unordered_set invalid_nodes;
+ std::unordered_set invalid_nodes;
+ int valid_op = 0;
for (auto* node : graph->Nodes()) {
if (is_valid_node(node)) {
invalid_nodes.insert(node);
+ } else if (node->IsOp()) {
+ // Collect all the operators to help tracking number of operators.
+ ++valid_op;
}
}
- // remove nodes from the graph.
- for (auto* node : invalid_nodes) {
- graph->RemoveNode(node);
- }
+ GraphSafeRemoveNodes(graph.get(), invalid_nodes);
- // clean edges.
- for (auto* node : graph->Nodes()) {
- CleanEdges(&node->inputs, invalid_nodes);
- CleanEdges(&node->outputs, invalid_nodes);
- }
+ AddStatis(valid_op);
return graph;
}
diff --git a/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc b/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc
index a776a898a5ee13b4dde12460dce71433268fb9d4..e1a441d09aaa3647c4b2a582210a2c7e2b64e0da 100644
--- a/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc
@@ -219,16 +219,13 @@ std::unique_ptr SeqConcatFcFusePass::ApplyImpl(
op_desc.SetAttr("fc_activation", act->Op()->Type());
auto* op_node = graph->CreateOpNode(&op_desc);
-// Add links
-#define NODE_LINKS(a, b) \
- a->outputs.push_back(b); \
- b->inputs.push_back(a);
- NODE_LINKS(fc_w, op_node);
- NODE_LINKS(fc_bias, op_node);
- NODE_LINKS(concat_in0, op_node);
- NODE_LINKS(sequence_expand0_in, op_node);
- NODE_LINKS(sequence_expand1_in, op_node);
- NODE_LINKS(op_node, fc_out);
+ // Add links
+ IR_NODE_LINK_TO(fc_w, op_node);
+ IR_NODE_LINK_TO(fc_bias, op_node);
+ IR_NODE_LINK_TO(concat_in0, op_node);
+ IR_NODE_LINK_TO(sequence_expand0_in, op_node);
+ IR_NODE_LINK_TO(sequence_expand1_in, op_node);
+ IR_NODE_LINK_TO(op_node, fc_out);
// Clean nodes.
std::unordered_set marked_nodes;
@@ -241,7 +238,6 @@ std::unique_ptr SeqConcatFcFusePass::ApplyImpl(
marked_nodes.erase(sequence_expand0_in);
marked_nodes.erase(sequence_expand1_in);
marked_nodes.erase(fc_out);
-
GraphSafeRemoveNodes(graph, marked_nodes);
});
diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt
index 86392078b356df774fbc47aed9214e9f10fe33be..2006e3b24f71d0ae32b4e2ae34f1a1e4d3a82f91 100644
--- a/paddle/fluid/inference/CMakeLists.txt
+++ b/paddle/fluid/inference/CMakeLists.txt
@@ -10,7 +10,7 @@ set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor)
# TODO(panyx0718): Should this be called paddle_fluid_inference_api_internal?
cc_library(paddle_fluid_api
SRCS io.cc
- DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB} graph_to_program_pass)
+ DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB})
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
@@ -22,7 +22,7 @@ cc_library(paddle_fluid_origin DEPS ${fluid_modules} paddle_fluid_api)
#endif()
# Create static library
-cc_library(paddle_fluid DEPS ${fluid_modules} paddle_fluid_api paddle_inference_api)
+cc_library(paddle_fluid DEPS ${fluid_modules} paddle_fluid_api paddle_inference_api analysis_predictor)
if(NOT APPLE)
# TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
set(LINK_FLAGS "-Wl,--retain-symbols-file ${CMAKE_CURRENT_SOURCE_DIR}/paddle_fluid.sym")
@@ -32,6 +32,7 @@ endif()
# Create shared library
cc_library(paddle_fluid_shared SHARED
SRCS io.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/api.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/api_impl.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/api/analysis_predictor.cc
DEPS ${fluid_modules} paddle_fluid_api)
set_target_properties(paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid)
diff --git a/paddle/fluid/inference/analysis/CMakeLists.txt b/paddle/fluid/inference/analysis/CMakeLists.txt
index cc0dd0d492d42e9552c9ce081e268330599104f0..dadc8a53706fb9edff884dcf6d49168bfef3aa30 100644
--- a/paddle/fluid/inference/analysis/CMakeLists.txt
+++ b/paddle/fluid/inference/analysis/CMakeLists.txt
@@ -33,7 +33,7 @@ function (inference_analysis_test TARGET)
endif()
cc_test(${TARGET}
SRCS "${analysis_test_SRCS}"
- DEPS analysis graph fc_fuse_pass graph_viz_pass infer_clean_graph_pass graph_pattern_detector pass ${analysis_test_EXTRA_DEPS}
+ DEPS analysis pass ${GLOB_PASS_LIB} ${analysis_test_EXTRA_DEPS}
ARGS --inference_model_dir=${PYTHON_TESTS_DIR}/book/word2vec.inference.model ${mem_opt} ${analysis_test_ARGS})
set_tests_properties(${TARGET} PROPERTIES DEPENDS test_word2vec)
endif(WITH_TESTING)
@@ -56,25 +56,13 @@ if (NOT EXISTS ${DITU_INSTALL_DIR} AND WITH_TESTING)
endif()
inference_analysis_test(test_analyzer SRCS analyzer_tester.cc
- EXTRA_DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis
- analysis_predictor
- # ir
- fc_fuse_pass
- fc_lstm_fuse_pass
- seq_concat_fc_fuse_pass
- graph_viz_pass
- infer_clean_graph_pass
- graph_pattern_detector
- infer_clean_graph_pass
- attention_lstm_fuse_pass
- paddle_inference_api
- pass
+ EXTRA_DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis_predictor
ARGS --infer_ditu_rnn_model=${DITU_INSTALL_DIR}/model
--infer_ditu_rnn_data=${DITU_INSTALL_DIR}/data.txt)
inference_analysis_test(test_data_flow_graph SRCS data_flow_graph_tester.cc)
-inference_analysis_test(test_data_flow_graph_to_fluid_pass SRCS data_flow_graph_to_fluid_pass_tester.cc EXTRA_DEPS paddle_inference_api)
-inference_analysis_test(test_fluid_to_ir_pass SRCS fluid_to_ir_pass_tester.cc EXTRA_DEPS paddle_fluid)
+inference_analysis_test(test_data_flow_graph_to_fluid_pass SRCS data_flow_graph_to_fluid_pass_tester.cc)
+inference_analysis_test(test_fluid_to_ir_pass SRCS fluid_to_ir_pass_tester.cc)
inference_analysis_test(test_fluid_to_data_flow_graph_pass SRCS fluid_to_data_flow_graph_pass_tester.cc)
inference_analysis_test(test_subgraph_splitter SRCS subgraph_splitter_tester.cc)
inference_analysis_test(test_dfg_graphviz_draw_pass SRCS dfg_graphviz_draw_pass_tester.cc)
diff --git a/paddle/fluid/inference/analysis/analyzer_tester.cc b/paddle/fluid/inference/analysis/analyzer_tester.cc
index ec1f3979a74bd86ee7402bca441e95d3d177d113..59e103e1179240a100d492a2475573c8188bebe7 100644
--- a/paddle/fluid/inference/analysis/analyzer_tester.cc
+++ b/paddle/fluid/inference/analysis/analyzer_tester.cc
@@ -22,6 +22,7 @@
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
+#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/platform/profiler.h"
@@ -327,9 +328,20 @@ void TestDituRNNPrediction(const std::string &model_path,
LOG(INFO) << "fused " << item.first << " " << item.second;
}
- ASSERT_TRUE(fuse_statis.count("fc"));
- EXPECT_EQ(fuse_statis.at("fc"), 1);
- EXPECT_EQ(fuse_statis.at("fc_nobias_lstm_fuse"), 1);
+ int num_ops = 0;
+ for (auto &node :
+ analysis_predictor->analysis_argument().main_dfg->nodes.nodes()) {
+ if (node->IsFunction()) {
+ ++num_ops;
+ }
+ }
+ LOG(INFO) << "has num ops: " << num_ops;
+
+ ASSERT_TRUE(fuse_statis.count("fc_fuse"));
+ EXPECT_EQ(fuse_statis.at("fc_fuse"), 1);
+ EXPECT_EQ(fuse_statis.at("fc_nobias_lstm_fuse"), 2); // bi-directional LSTM
+ EXPECT_EQ(num_ops,
+ 13); // After graph optimization, only 13 operators exists.
}
}
@@ -357,10 +369,3 @@ TEST(Analyzer, DituRNN_with_analysis_with_IR) {
} // namespace analysis
} // namespace inference
} // namespace paddle
-
-USE_PASS(fc_fuse_pass);
-USE_PASS(seq_concat_fc_fuse_pass);
-USE_PASS(fc_lstm_fuse_pass);
-USE_PASS(graph_viz_pass);
-USE_PASS(infer_clean_graph_pass);
-USE_PASS(attention_lstm_fuse_pass);
diff --git a/paddle/fluid/inference/analysis/fluid_to_ir_pass_tester.cc b/paddle/fluid/inference/analysis/fluid_to_ir_pass_tester.cc
index 6a13c60e7b2ebf645b12d5ddf83ef6ab3a2e83bd..367c25805d05f8d10fb8341158760ac6356a5c48 100644
--- a/paddle/fluid/inference/analysis/fluid_to_ir_pass_tester.cc
+++ b/paddle/fluid/inference/analysis/fluid_to_ir_pass_tester.cc
@@ -16,6 +16,7 @@
#include
#include "paddle/fluid/inference/analysis/ut_helper.h"
+#include "paddle/fluid/inference/api/paddle_inference_pass.h"
namespace paddle {
namespace inference {
@@ -33,10 +34,3 @@ TEST(FluidToIrPass, Test) {
} // namespace analysis
} // namespace inference
} // namespace paddle
-
-USE_PASS(graph_viz_pass);
-USE_PASS(infer_clean_graph_pass);
-USE_PASS(attention_lstm_fuse_pass);
-USE_PASS(fc_lstm_fuse_pass);
-USE_PASS(seq_concat_fc_fuse_pass);
-USE_PASS(fc_fuse_pass);
diff --git a/paddle/fluid/inference/api/CMakeLists.txt b/paddle/fluid/inference/api/CMakeLists.txt
index adfe4392448557a30cd834022b9a5d21d9086b95..3a43c72e33b3d5d8910b554021bb1c6a626edd93 100644
--- a/paddle/fluid/inference/api/CMakeLists.txt
+++ b/paddle/fluid/inference/api/CMakeLists.txt
@@ -18,10 +18,7 @@ if(APPLE)
endif(APPLE)
-set(inference_deps paddle_inference_api paddle_fluid_api analysis pass ir_pass_manager
- graph_viz_pass fc_fuse_pass
- infer_clean_graph_pass
- )
+set(inference_deps paddle_inference_api paddle_fluid_api analysis pass ir_pass_manager ${GLOB_PASS_LIB})
if(WITH_GPU AND TENSORRT_FOUND)
set(inference_deps ${inference_deps} paddle_inference_tensorrt_subgraph_engine)
diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index 33862232bdaae817b9ca72879605386c32ed3e8b..e87abd2feeff7769eb223f83a7a28f5cb3337cdb 100644
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -18,6 +18,7 @@
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
+#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/utils/singleton.h"
namespace paddle {
@@ -133,7 +134,3 @@ std::unique_ptr CreatePaddlePredictor<
}
} // namespace paddle
-
-USE_PASS(fc_fuse_pass);
-USE_PASS(graph_viz_pass);
-USE_PASS(infer_clean_graph_pass);
diff --git a/paddle/fluid/inference/api/helper.h b/paddle/fluid/inference/api/helper.h
index bdc9a15d543818da94ac2acf34ecabbbbae3291e..ce4728ab8046f886fc40af3644d855a4f971fb71 100644
--- a/paddle/fluid/inference/api/helper.h
+++ b/paddle/fluid/inference/api/helper.h
@@ -16,6 +16,7 @@
#include
#include
+#include
#include
#include
#include
diff --git a/paddle/fluid/inference/paddle_fluid.map b/paddle/fluid/inference/paddle_fluid.map
index 5203784dc1fcb672eb6a26d9dfd3ffbe02e08038..7e5cae04b81e6ce759b92f6c4b921ecf974e8260 100644
--- a/paddle/fluid/inference/paddle_fluid.map
+++ b/paddle/fluid/inference/paddle_fluid.map
@@ -1,6 +1,7 @@
{
global:
*paddle*;
+ *Pass*;
local:
*;
};
diff --git a/paddle/fluid/operators/detection/bbox_util.h b/paddle/fluid/operators/detection/bbox_util.h
new file mode 100644
index 0000000000000000000000000000000000000000..0dee1781623d5a62830545c0952e5aadbe37accb
--- /dev/null
+++ b/paddle/fluid/operators/detection/bbox_util.h
@@ -0,0 +1,66 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma once
+#include "paddle/fluid/framework/eigen.h"
+#include "paddle/fluid/framework/tensor.h"
+
+namespace paddle {
+namespace operators {
+
+/*
+ * transform that computes target bounding-box regression deltas
+ * given proposal boxes and ground-truth boxes.
+ */
+template
+inline void BoxToDelta(const int box_num, const framework::Tensor& ex_boxes,
+ const framework::Tensor& gt_boxes, const T* weights,
+ const bool normalized, framework::Tensor* box_delta) {
+ auto ex_boxes_et = framework::EigenTensor::From(ex_boxes);
+ auto gt_boxes_et = framework::EigenTensor::From(gt_boxes);
+ auto trg = framework::EigenTensor::From(*box_delta);
+ T ex_w, ex_h, ex_ctr_x, ex_ctr_y, gt_w, gt_h, gt_ctr_x, gt_ctr_y;
+ for (int64_t i = 0; i < box_num; ++i) {
+ ex_w = ex_boxes_et(i, 2) - ex_boxes_et(i, 0) + (normalized == false);
+ ex_h = ex_boxes_et(i, 3) - ex_boxes_et(i, 1) + (normalized == false);
+ ex_ctr_x = ex_boxes_et(i, 0) + 0.5 * ex_w;
+ ex_ctr_y = ex_boxes_et(i, 1) + 0.5 * ex_h;
+
+ gt_w = gt_boxes_et(i, 2) - gt_boxes_et(i, 0) + (normalized == false);
+ gt_h = gt_boxes_et(i, 3) - gt_boxes_et(i, 1) + (normalized == false);
+ gt_ctr_x = gt_boxes_et(i, 0) + 0.5 * gt_w;
+ gt_ctr_y = gt_boxes_et(i, 1) + 0.5 * gt_h;
+
+ trg(i, 0) = (gt_ctr_x - ex_ctr_x) / ex_w;
+ trg(i, 1) = (gt_ctr_y - ex_ctr_y) / ex_h;
+ trg(i, 2) = std::log(gt_w / ex_w);
+ trg(i, 3) = std::log(gt_h / ex_h);
+
+ if (weights) {
+ trg(i, 0) = trg(i, 0) / weights[0];
+ trg(i, 1) = trg(i, 1) / weights[1];
+ trg(i, 2) = trg(i, 2) / weights[2];
+ trg(i, 3) = trg(i, 3) / weights[3];
+ }
+ }
+}
+
+template
+void Gather(const T* in, const int in_stride, const int* index, const int num,
+ T* out) {
+ const int stride_bytes = in_stride * sizeof(T);
+ for (int i = 0; i < num; ++i) {
+ int id = index[i];
+ memcpy(out + i * in_stride, in + id * in_stride, stride_bytes);
+ }
+}
+
+} // namespace operators
+} // namespace paddle
diff --git a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc
index 0571c46f6be99c9a06b7dd2abb310eeda506ecd5..be06dc19743cfa6f093bcb3f4e9f91af315d4211 100644
--- a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc
+++ b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc
@@ -14,6 +14,7 @@ limitations under the License. */
#include
#include
#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/detection/bbox_util.h"
#include "paddle/fluid/operators/gather.h"
#include "paddle/fluid/operators/math/concat.h"
#include "paddle/fluid/operators/math/math_function.h"
@@ -133,31 +134,6 @@ void BboxOverlaps(const Tensor& r_boxes, const Tensor& c_boxes,
}
}
-template
-void BoxToDelta(int box_num, const Tensor& ex_boxes, const Tensor& gt_boxes,
- const std::vector& weights, Tensor* box_delta) {
- auto ex_boxes_et = framework::EigenTensor::From(ex_boxes);
- auto gt_boxes_et = framework::EigenTensor::From(gt_boxes);
- auto box_delta_et = framework::EigenTensor::From(*box_delta);
- T ex_w, ex_h, ex_ctr_x, ex_ctr_y, gt_w, gt_h, gt_ctr_x, gt_ctr_y;
- for (int64_t i = 0; i < box_num; ++i) {
- ex_w = ex_boxes_et(i, 2) - ex_boxes_et(i, 0) + 1;
- ex_h = ex_boxes_et(i, 3) - ex_boxes_et(i, 1) + 1;
- ex_ctr_x = ex_boxes_et(i, 0) + 0.5 * ex_w;
- ex_ctr_y = ex_boxes_et(i, 1) + 0.5 * ex_h;
-
- gt_w = gt_boxes_et(i, 2) - gt_boxes_et(i, 0) + 1;
- gt_h = gt_boxes_et(i, 3) - gt_boxes_et(i, 1) + 1;
- gt_ctr_x = gt_boxes_et(i, 0) + 0.5 * gt_w;
- gt_ctr_y = gt_boxes_et(i, 1) + 0.5 * gt_h;
-
- box_delta_et(i, 0) = (gt_ctr_x - ex_ctr_x) / ex_w / weights[0];
- box_delta_et(i, 1) = (gt_ctr_y - ex_ctr_y) / ex_h / weights[1];
- box_delta_et(i, 2) = log(gt_w / ex_w) / ex_w / weights[2];
- box_delta_et(i, 3) = log(gt_h / ex_h) / ex_h / weights[3];
- }
-}
-
template
std::vector> SampleFgBgGt(
const platform::CPUDeviceContext& context, Tensor* iou,
@@ -243,12 +219,11 @@ void GatherBoxesLabels(const platform::CPUDeviceContext& context,
Tensor* sampled_labels, Tensor* sampled_gts) {
int fg_num = fg_inds.size();
int bg_num = bg_inds.size();
- int gt_num = fg_num + bg_num;
Tensor fg_inds_t, bg_inds_t, gt_box_inds_t, gt_label_inds_t;
int* fg_inds_data = fg_inds_t.mutable_data({fg_num}, context.GetPlace());
int* bg_inds_data = bg_inds_t.mutable_data({bg_num}, context.GetPlace());
int* gt_box_inds_data =
- gt_box_inds_t.mutable_data({gt_num}, context.GetPlace());
+ gt_box_inds_t.mutable_data({fg_num}, context.GetPlace());
int* gt_label_inds_data =
gt_label_inds_t.mutable_data({fg_num}, context.GetPlace());
std::copy(fg_inds.begin(), fg_inds.end(), fg_inds_data);
@@ -303,18 +278,20 @@ std::vector SampleRoisForOneImage(
// Gather boxes and labels
Tensor sampled_boxes, sampled_labels, sampled_gts;
- int boxes_num = fg_inds.size() + bg_inds.size();
+ int fg_num = fg_inds.size();
+ int bg_num = bg_inds.size();
+ int boxes_num = fg_num + bg_num;
framework::DDim bbox_dim({boxes_num, kBoxDim});
sampled_boxes.mutable_data(bbox_dim, context.GetPlace());
sampled_labels.mutable_data({boxes_num}, context.GetPlace());
- sampled_gts.mutable_data(bbox_dim, context.GetPlace());
+ sampled_gts.mutable_data({fg_num, kBoxDim}, context.GetPlace());
GatherBoxesLabels(context, boxes, *gt_boxes, *gt_classes, fg_inds, bg_inds,
gt_inds, &sampled_boxes, &sampled_labels, &sampled_gts);
// Compute targets
Tensor bbox_targets_single;
bbox_targets_single.mutable_data(bbox_dim, context.GetPlace());
- BoxToDelta(boxes_num, sampled_boxes, sampled_gts, bbox_reg_weights,
+ BoxToDelta(fg_num, sampled_boxes, sampled_gts, nullptr, false,
&bbox_targets_single);
// Scale rois
@@ -427,7 +404,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel {
auto rpn_rois_lod = rpn_rois->lod().back();
auto gt_classes_lod = gt_classes->lod().back();
auto gt_boxes_lod = gt_boxes->lod().back();
- for (size_t i = 0; i < n; ++i) {
+ for (int i = 0; i < n; ++i) {
Tensor rpn_rois_slice =
rpn_rois->Slice(rpn_rois_lod[i], rpn_rois_lod[i + 1]);
Tensor gt_classes_slice =
diff --git a/paddle/fluid/operators/detection/generate_proposals_op.cc b/paddle/fluid/operators/detection/generate_proposals_op.cc
index fcdcafae7273afa6887ee531dfc37ef833b92d68..ebe6830eccd87a156768eb0d4b96220bcc9f4edc 100644
--- a/paddle/fluid/operators/detection/generate_proposals_op.cc
+++ b/paddle/fluid/operators/detection/generate_proposals_op.cc
@@ -311,8 +311,7 @@ class GenerateProposalsKernel : public framework::OpKernel {
rpn_rois->mutable_data({bbox_deltas->numel() / 4, 4},
context.GetPlace());
- rpn_roi_probs->mutable_data({scores->numel() / 4, 1},
- context.GetPlace());
+ rpn_roi_probs->mutable_data({scores->numel(), 1}, context.GetPlace());
Tensor bbox_deltas_swap, scores_swap;
bbox_deltas_swap.mutable_data({num, h_bbox, w_bbox, c_bbox},
@@ -421,7 +420,7 @@ class GenerateProposalsKernel : public framework::OpKernel {
CPUGather(ctx, proposals, keep, &bbox_sel);
CPUGather(ctx, scores_sel, keep, &scores_filter);
if (nms_thresh <= 0) {
- return std::make_pair(bbox_sel, scores_sel);
+ return std::make_pair(bbox_sel, scores_filter);
}
Tensor keep_nms = NMS(ctx, &bbox_sel, &scores_filter, nms_thresh, eta);
diff --git a/paddle/fluid/operators/detection/rpn_target_assign_op.cc b/paddle/fluid/operators/detection/rpn_target_assign_op.cc
index 177ff7cf187bc9daf69889e99ca57ae18766de90..88757f25cd9a5789758640de2d9cae0b12350b25 100644
--- a/paddle/fluid/operators/detection/rpn_target_assign_op.cc
+++ b/paddle/fluid/operators/detection/rpn_target_assign_op.cc
@@ -14,6 +14,7 @@ limitations under the License. */
#include
#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/detection/bbox_util.h"
#include "paddle/fluid/operators/math/math_function.h"
namespace paddle {
@@ -46,156 +47,219 @@ class RpnTargetAssignOp : public framework::OperatorWithKernel {
auto in_dims = ctx->GetInputDim("DistMat");
PADDLE_ENFORCE_EQ(in_dims.size(), 2,
"The rank of Input(DistMat) must be 2.");
+
+ ctx->SetOutputDim("LocationIndex", {-1});
+ ctx->SetOutputDim("ScoreIndex", {-1});
+ ctx->SetOutputDim("TargetLabel", {-1, 1});
+ ctx->SetOutputDim("TargetBBox", {-1, 4});
+ }
+
+ protected:
+ framework::OpKernelType GetExpectedKernelType(
+ const framework::ExecutionContext& ctx) const override {
+ return framework::OpKernelType(
+ framework::ToDataType(
+ ctx.Input("DistMat")->type()),
+ platform::CPUPlace());
}
};
template
class RpnTargetAssignKernel : public framework::OpKernel {
public:
+ void Compute(const framework::ExecutionContext& context) const override {
+ auto* anchor_t = context.Input("Anchor"); // (H*W*A) * 4
+ auto* gt_bbox_t = context.Input("GtBox");
+ auto* dist_t = context.Input("DistMat");
+
+ auto* loc_index_t = context.Output("LocationIndex");
+ auto* score_index_t = context.Output("ScoreIndex");
+ auto* tgt_bbox_t = context.Output("TargetBBox");
+ auto* tgt_lbl_t = context.Output("TargetLabel");
+
+ auto lod = dist_t->lod().back();
+ int64_t batch_num = static_cast(lod.size() - 1);
+ int64_t anchor_num = dist_t->dims()[1];
+ PADDLE_ENFORCE_EQ(anchor_num, anchor_t->dims()[0]);
+
+ int rpn_batch_size = context.Attr("rpn_batch_size_per_im");
+ float pos_threshold = context.Attr("rpn_positive_overlap");
+ float neg_threshold = context.Attr("rpn_negative_overlap");
+ float fg_fraction = context.Attr("fg_fraction");
+
+ int fg_num_per_batch = static_cast(rpn_batch_size * fg_fraction);
+
+ int64_t max_num = batch_num * anchor_num;
+ auto place = context.GetPlace();
+
+ tgt_bbox_t->mutable_data({max_num, 4}, place);
+ auto* loc_index = loc_index_t->mutable_data({max_num}, place);
+ auto* score_index = score_index_t->mutable_data({max_num}, place);
+
+ Tensor tmp_tgt_lbl;
+ auto* tmp_lbl_data = tmp_tgt_lbl.mutable_data({max_num}, place);
+ auto& dev_ctx = context.device_context();
+ math::SetConstant iset;
+ iset(dev_ctx, &tmp_tgt_lbl, static_cast(-1));
+
+ std::random_device rnd;
+ std::minstd_rand engine;
+ int seed =
+ context.Attr("fix_seed") ? context.Attr("seed") : rnd();
+ engine.seed(seed);
+
+ int fg_num = 0;
+ int bg_num = 0;
+ for (int i = 0; i < batch_num; ++i) {
+ Tensor dist = dist_t->Slice(lod[i], lod[i + 1]);
+ Tensor gt_bbox = gt_bbox_t->Slice(lod[i], lod[i + 1]);
+ auto fg_bg_gt = SampleFgBgGt(dev_ctx, dist, pos_threshold, neg_threshold,
+ rpn_batch_size, fg_num_per_batch, engine,
+ tmp_lbl_data + i * anchor_num);
+
+ int cur_fg_num = fg_bg_gt[0].size();
+ int cur_bg_num = fg_bg_gt[1].size();
+ std::transform(fg_bg_gt[0].begin(), fg_bg_gt[0].end(), loc_index,
+ [i, anchor_num](int d) { return d + i * anchor_num; });
+ memcpy(score_index, loc_index, cur_fg_num * sizeof(int));
+ std::transform(fg_bg_gt[1].begin(), fg_bg_gt[1].end(),
+ score_index + cur_fg_num,
+ [i, anchor_num](int d) { return d + i * anchor_num; });
+
+ // get target bbox deltas
+ if (cur_fg_num) {
+ Tensor fg_gt;
+ T* gt_data = fg_gt.mutable_data({cur_fg_num, 4}, place);
+ Tensor tgt_bbox = tgt_bbox_t->Slice(fg_num, fg_num + cur_fg_num);
+ T* tgt_data = tgt_bbox.data();
+ Gather(anchor_t->data(), 4,
+ reinterpret_cast(&fg_bg_gt[0][0]), cur_fg_num,
+ tgt_data);
+ Gather(gt_bbox.data(), 4, reinterpret_cast(&fg_bg_gt[2][0]),
+ cur_fg_num, gt_data);
+ BoxToDelta(cur_fg_num, tgt_bbox, fg_gt, nullptr, false, &tgt_bbox);
+ }
+
+ loc_index += cur_fg_num;
+ score_index += cur_fg_num + cur_bg_num;
+ fg_num += cur_fg_num;
+ bg_num += cur_bg_num;
+ }
+
+ int lbl_num = fg_num + bg_num;
+ PADDLE_ENFORCE_LE(fg_num, max_num);
+ PADDLE_ENFORCE_LE(lbl_num, max_num);
+
+ tgt_bbox_t->Resize({fg_num, 4});
+ loc_index_t->Resize({fg_num});
+ score_index_t->Resize({lbl_num});
+ auto* lbl_data = tgt_lbl_t->mutable_data({lbl_num, 1}, place);
+ Gather(tmp_lbl_data, 1, score_index_t->data(), lbl_num,
+ lbl_data);
+ }
+
+ private:
void ScoreAssign(const T* dist_data, const Tensor& anchor_to_gt_max,
const int row, const int col, const float pos_threshold,
- const float neg_threshold, int64_t* target_label_data,
+ const float neg_threshold, int64_t* target_label,
std::vector* fg_inds, std::vector* bg_inds) const {
- int fg_offset = fg_inds->size();
- int bg_offset = bg_inds->size();
+ float epsilon = 0.0001;
for (int64_t i = 0; i < row; ++i) {
const T* v = dist_data + i * col;
- T max_dist = *std::max_element(v, v + col);
+ T max = *std::max_element(v, v + col);
for (int64_t j = 0; j < col; ++j) {
- T val = dist_data[i * col + j];
- if (val == max_dist) target_label_data[j] = 1;
+ if (std::abs(max - v[j]) < epsilon) {
+ target_label[j] = 1;
+ }
}
}
- // Pick the fg/bg and count the number
+ // Pick the fg/bg
+ const T* anchor_to_gt_max_data = anchor_to_gt_max.data();
for (int64_t j = 0; j < col; ++j) {
- if (anchor_to_gt_max.data()[j] > pos_threshold) {
- target_label_data[j] = 1;
- } else if (anchor_to_gt_max.data()[j] < neg_threshold) {
- target_label_data[j] = 0;
+ if (anchor_to_gt_max_data[j] >= pos_threshold) {
+ target_label[j] = 1;
+ } else if (anchor_to_gt_max_data[j] < neg_threshold) {
+ target_label[j] = 0;
}
- if (target_label_data[j] == 1) {
- fg_inds->push_back(fg_offset + j);
- } else if (target_label_data[j] == 0) {
- bg_inds->push_back(bg_offset + j);
+ if (target_label[j] == 1) {
+ fg_inds->push_back(j);
+ } else if (target_label[j] == 0) {
+ bg_inds->push_back(j);
}
}
}
- void ReservoirSampling(const int num, const int offset,
- std::minstd_rand engine,
+ void ReservoirSampling(const int num, std::minstd_rand engine,
std::vector* inds) const {
std::uniform_real_distribution uniform(0, 1);
- const int64_t size = static_cast(inds->size() - offset);
- if (size > num) {
- for (int64_t i = num; i < size; ++i) {
+ size_t len = inds->size();
+ if (len > static_cast(num)) {
+ for (size_t i = num; i < len; ++i) {
int rng_ind = std::floor(uniform(engine) * i);
if (rng_ind < num)
- std::iter_swap(inds->begin() + rng_ind + offset,
- inds->begin() + i + offset);
+ std::iter_swap(inds->begin() + rng_ind, inds->begin() + i);
}
+ inds->resize(num);
}
}
- void RpnTargetAssign(const framework::ExecutionContext& ctx,
- const Tensor& dist, const float pos_threshold,
- const float neg_threshold, const int rpn_batch_size,
- const int fg_num, std::minstd_rand engine,
- std::vector* fg_inds, std::vector* bg_inds,
- int64_t* target_label_data) const {
+ // std::vector> RpnTargetAssign(
+ std::vector> SampleFgBgGt(
+ const platform::CPUDeviceContext& ctx, const Tensor& dist,
+ const float pos_threshold, const float neg_threshold,
+ const int rpn_batch_size, const int fg_num, std::minstd_rand engine,
+ int64_t* target_label) const {
auto* dist_data = dist.data();
- int64_t row = dist.dims()[0];
- int64_t col = dist.dims()[1];
- int fg_offset = fg_inds->size();
- int bg_offset = bg_inds->size();
+ int row = dist.dims()[0];
+ int col = dist.dims()[1];
+
+ std::vector fg_inds;
+ std::vector bg_inds;
+ std::vector gt_inds;
// Calculate the max IoU between anchors and gt boxes
- Tensor anchor_to_gt_max;
- anchor_to_gt_max.mutable_data(
- framework::make_ddim({static_cast(col), 1}),
- platform::CPUPlace());
- auto& place = *ctx.template device_context()
- .eigen_device();
- auto x = EigenMatrix::From(dist);
- auto x_col_max = EigenMatrix::From(anchor_to_gt_max);
- x_col_max.device(place) =
- x.maximum(Eigen::DSizes(0))
- .reshape(Eigen::DSizes(static_cast(col), 1));
+ // Map from anchor to gt box that has highest overlap
+ auto place = ctx.GetPlace();
+ Tensor anchor_to_gt_max, anchor_to_gt_argmax;
+ anchor_to_gt_max.mutable_data({col}, place);
+ int* argmax = anchor_to_gt_argmax.mutable_data({col}, place);
+
+ auto x = framework::EigenMatrix::From(dist);
+ auto x_col_max = framework::EigenVector::Flatten(anchor_to_gt_max);
+ auto x_col_argmax =
+ framework::EigenVector::Flatten(anchor_to_gt_argmax);
+ x_col_max = x.maximum(Eigen::DSizes(0));
+ x_col_argmax = x.argmax(0).template cast();
+
// Follow the Faster RCNN's implementation
ScoreAssign(dist_data, anchor_to_gt_max, row, col, pos_threshold,
- neg_threshold, target_label_data, fg_inds, bg_inds);
+ neg_threshold, target_label, &fg_inds, &bg_inds);
// Reservoir Sampling
- ReservoirSampling(fg_num, fg_offset, engine, fg_inds);
- int bg_num = rpn_batch_size - (fg_inds->size() - fg_offset);
- ReservoirSampling(bg_num, bg_offset, engine, bg_inds);
- }
+ ReservoirSampling(fg_num, engine, &fg_inds);
+ int fg_num2 = static_cast(fg_inds.size());
+ int bg_num = rpn_batch_size - fg_num2;
+ ReservoirSampling(bg_num, engine, &bg_inds);
- void Compute(const framework::ExecutionContext& context) const override {
- auto* dist = context.Input("DistMat");
- auto* loc_index = context.Output("LocationIndex");
- auto* score_index = context.Output("ScoreIndex");
- auto* tgt_lbl = context.Output("TargetLabel");
-
- auto col = dist->dims()[1];
- int64_t n = dist->lod().size() == 0UL
- ? 1
- : static_cast(dist->lod().back().size() - 1);
- if (dist->lod().size()) {
- PADDLE_ENFORCE_EQ(dist->lod().size(), 1UL,
- "Only support 1 level of LoD.");
+ gt_inds.reserve(fg_num2);
+ for (int i = 0; i < fg_num2; ++i) {
+ gt_inds.emplace_back(argmax[fg_inds[i]]);
}
- int rpn_batch_size = context.Attr("rpn_batch_size_per_im");
- float pos_threshold = context.Attr("rpn_positive_overlap");
- float neg_threshold = context.Attr("rpn_negative_overlap");
- float fg_fraction = context.Attr("fg_fraction");
-
- int fg_num = static_cast(rpn_batch_size * fg_fraction);
-
- int64_t* target_label_data =
- tgt_lbl->mutable_data({n * col, 1}, context.GetPlace());
+ std::vector> fg_bg_gt;
+ fg_bg_gt.emplace_back(fg_inds);
+ fg_bg_gt.emplace_back(bg_inds);
+ fg_bg_gt.emplace_back(gt_inds);
- auto& dev_ctx = context.device_context();
- math::SetConstant iset;
- iset(dev_ctx, tgt_lbl, static_cast(-1));
-
- std::vector fg_inds;
- std::vector bg_inds;
- std::random_device rnd;
- std::minstd_rand engine;
- int seed =
- context.Attr("fix_seed") ? context.Attr("seed") : rnd();
- engine.seed(seed);
-
- if (n == 1) {
- RpnTargetAssign(context, *dist, pos_threshold, neg_threshold,
- rpn_batch_size, fg_num, engine, &fg_inds, &bg_inds,
- target_label_data);
- } else {
- auto lod = dist->lod().back();
- for (size_t i = 0; i < lod.size() - 1; ++i) {
- Tensor one_ins = dist->Slice(lod[i], lod[i + 1]);
- RpnTargetAssign(context, one_ins, pos_threshold, neg_threshold,
- rpn_batch_size, fg_num, engine, &fg_inds, &bg_inds,
- target_label_data + i * col);
- }
- }
- int* loc_index_data = loc_index->mutable_data(
- {static_cast(fg_inds.size())}, context.GetPlace());
- int* score_index_data = score_index->mutable_data(
- {static_cast(fg_inds.size() + bg_inds.size())},
- context.GetPlace());
- memcpy(loc_index_data, reinterpret_cast(&fg_inds[0]),
- fg_inds.size() * sizeof(int));
- memcpy(score_index_data, reinterpret_cast(&fg_inds[0]),
- fg_inds.size() * sizeof(int));
- memcpy(score_index_data + fg_inds.size(),
- reinterpret_cast(&bg_inds[0]), bg_inds.size() * sizeof(int));
+ return fg_bg_gt;
}
};
class RpnTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
+ AddInput("Anchor",
+ "(Tensor) input anchor is a 2-D Tensor with shape [H*W*A, 4].");
+ AddInput("GtBox", "(LoDTensor) input groud-truth bbox with shape [K, 4].");
AddInput(
"DistMat",
"(LoDTensor or Tensor) this input is a 2-D LoDTensor with shape "
@@ -241,12 +305,15 @@ class RpnTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
"ScoreIndex",
"(Tensor), The indexes of foreground and background anchors in all "
"RPN anchors(The rest anchors are ignored). The shape of the "
- "ScoreIndex is [F + B], F and B depend on the value of input "
- "tensor and attributes.");
- AddOutput("TargetLabel",
- "(Tensor), The target labels of each anchor with shape "
- "[K * M, 1], "
- "K and M is the same as they are in DistMat.");
+ "ScoreIndex is [F + B], F and B are sampled foreground and backgroud "
+ " number.");
+ AddOutput("TargetBBox",
+ "(Tensor), The target bbox deltas with shape "
+ "[F, 4], F is the sampled foreground number.");
+ AddOutput(
+ "TargetLabel",
+ "(Tensor), The target labels of each anchor with shape "
+ "[F + B, 1], F and B are sampled foreground and backgroud number.");
AddComment(R"DOC(
This operator can be, for given the IoU between the ground truth bboxes and the
anchors, to assign classification and regression targets to each prediction.
diff --git a/paddle/fluid/operators/gru_unit_op.h b/paddle/fluid/operators/gru_unit_op.h
index f18d09d33e9052929b1ff9b36bb2b371fb513d37..451ec61ba1f7239d92c6dfbad0b2961e74e1bc17 100644
--- a/paddle/fluid/operators/gru_unit_op.h
+++ b/paddle/fluid/operators/gru_unit_op.h
@@ -92,12 +92,12 @@ class GRUUnitKernel : public framework::OpKernel {
gate_data, frame_size * 3);
// calculate activited gate
- Eigen::array extents = {batch_size, frame_size};
- Eigen::array u_offsets = {0, 0};
+ Eigen::array extents{{batch_size, frame_size}};
+ Eigen::array u_offsets{{0, 0}};
ActCompute(context.Attr("gate_activation"), place,
g.slice(u_offsets, extents), g.slice(u_offsets, extents));
auto u = g.slice(u_offsets, extents); // update gate
- Eigen::array r_offsets = {0, frame_size};
+ Eigen::array r_offsets{{0, frame_size}};
ActCompute(context.Attr("gate_activation"), place,
g.slice(r_offsets, extents), g.slice(r_offsets, extents));
auto r = g.slice(r_offsets, extents); // reset gate
@@ -107,7 +107,7 @@ class GRUUnitKernel : public framework::OpKernel {
weight_data + frame_size * frame_size * 2, frame_size, 1,
gate_data + frame_size * 2, frame_size * 3);
- Eigen::array c_offsets = {0, frame_size * 2};
+ Eigen::array c_offsets{{0, frame_size * 2}};
ActCompute(context.Attr("activation"), place,
g.slice(c_offsets, extents), g.slice(c_offsets, extents));
auto c = g.slice(c_offsets, extents); // output candidate
@@ -171,12 +171,12 @@ class GRUUnitGradKernel : public framework::OpKernel {
int batch_size = input->dims()[0];
int frame_size = hidden_prev->dims()[1];
- Eigen::array extents = {batch_size, frame_size};
- Eigen::array u_offsets = {0, 0};
+ Eigen::array extents{{batch_size, frame_size}};
+ Eigen::array u_offsets{{0, 0}};
auto u = g.slice(u_offsets, extents); // update gate
- Eigen::array r_offsets = {0, frame_size};
+ Eigen::array r_offsets{{0, frame_size}};
auto r = g.slice(r_offsets, extents); // reset gate
- Eigen::array c_offsets = {0, frame_size * 2};
+ Eigen::array c_offsets{{0, frame_size * 2}};
auto c = g.slice(c_offsets, extents); // output candidate
// backward for unactivated update gate
diff --git a/paddle/fluid/operators/roi_pool_op.cu b/paddle/fluid/operators/roi_pool_op.cu
index 50450b62f7b1c0b2b5abf01a43581a0e2d2cd01e..46e20285db6d7acd39dead3994409645adddf494 100644
--- a/paddle/fluid/operators/roi_pool_op.cu
+++ b/paddle/fluid/operators/roi_pool_op.cu
@@ -31,7 +31,7 @@ static inline int NumBlocks(const int N) {
template
__global__ void GPUROIPoolForward(
- const int nthreads, const T* input_data, const int64_t* input_rois,
+ const int nthreads, const T* input_data, const T* input_rois,
const float spatial_scale, const int channels, const int height,
const int width, const int pooled_height, const int pooled_width,
int* roi_batch_id_data, T* output_data, int64_t* argmax_data) {
@@ -43,7 +43,7 @@ __global__ void GPUROIPoolForward(
int c = (i / pooled_width / pooled_height) % channels;
int n = i / pooled_width / pooled_height / channels;
- const int64_t* offset_input_rois = input_rois + n * kROISize;
+ const T* offset_input_rois = input_rois + n * kROISize;
int roi_batch_ind = roi_batch_id_data[n];
int roi_start_w = round(offset_input_rois[0] * spatial_scale);
int roi_start_h = round(offset_input_rois[1] * spatial_scale);
@@ -93,7 +93,7 @@ __global__ void GPUROIPoolForward(
template
__global__ void GPUROIPoolBackward(
- const int nthreads, const int64_t* input_rois, const T* output_grad,
+ const int nthreads, const T* input_rois, const T* output_grad,
const int64_t* argmax_data, const int num_rois, const float spatial_scale,
const int channels, const int height, const int width,
const int pooled_height, const int pooled_width, int* roi_batch_id_data,
@@ -174,8 +174,8 @@ class GPUROIPoolOpKernel : public framework::OpKernel {
GPUROIPoolForward<
T><<>>(
- output_size, in->data(), rois->data(), spatial_scale,
- channels, height, width, pooled_height, pooled_width,
+ output_size, in->data(), rois->data(), spatial_scale, channels,
+ height, width, pooled_height, pooled_width,
roi_batch_id_list_gpu.data(), out->mutable_data(ctx.GetPlace()),
argmax->mutable_data(ctx.GetPlace()));
}
@@ -228,7 +228,7 @@ class GPUROIPoolGradOpKernel : public framework::OpKernel {
if (output_grad_size > 0) {
GPUROIPoolBackward<
T><<>>(
- output_grad_size, rois->data(), out_grad->data(),
+ output_grad_size, rois->data(), out_grad->data(),
argmax->data(), rois_num, spatial_scale, channels, height,
width, pooled_height, pooled_width,
roi_batch_id_list_gpu.data(),
diff --git a/paddle/fluid/operators/roi_pool_op.h b/paddle/fluid/operators/roi_pool_op.h
index c4f739b2c6b2d62ebebcc15fd627ebad040e7b3f..07de7c9f0e070cef7c6f38f8d564ab76910842db 100644
--- a/paddle/fluid/operators/roi_pool_op.h
+++ b/paddle/fluid/operators/roi_pool_op.h
@@ -72,7 +72,7 @@ class CPUROIPoolOpKernel : public framework::OpKernel {
T* output_data = out->mutable_data(ctx.GetPlace());
int64_t* argmax_data = argmax->mutable_data(ctx.GetPlace());
- const int64_t* rois_data = rois->data();
+ const T* rois_data = rois->data();
for (int n = 0; n < rois_num; ++n) {
int roi_batch_id = roi_batch_id_data[n];
int roi_start_w = round(rois_data[0] * spatial_scale);
@@ -171,7 +171,7 @@ class CPUROIPoolGradOpKernel : public framework::OpKernel {
}
}
- const int64_t* rois_data = rois->data();
+ const T* rois_data = rois->data();
const T* out_grad_data = out_grad->data();
const int64_t* argmax_data = argmax->data();
T* in_grad_data = in_grad->mutable_data(ctx.GetPlace());
diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py
index 5757b2798e43dc70b406462a74b4f74eedcf56fa..1bc1dbbecaccd328d84cd3364a50c8f828d823c0 100644
--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
@@ -145,26 +145,23 @@ def rpn_target_assign(loc,
"""
helper = LayerHelper('rpn_target_assign', **locals())
- # 1. Compute the regression target bboxes
- target_bbox = box_coder(
- prior_box=anchor_box,
- prior_box_var=anchor_var,
- target_box=gt_box,
- code_type='encode_center_size',
- box_normalized=False)
- # 2. Compute overlaps between the prior boxes and the gt boxes overlaps
+ # Compute overlaps between the prior boxes and the gt boxes overlaps
iou = iou_similarity(x=gt_box, y=anchor_box)
- # 3. Assign target label to anchors
- loc_index = helper.create_tmp_variable(dtype=anchor_box.dtype)
- score_index = helper.create_tmp_variable(dtype=anchor_box.dtype)
- target_label = helper.create_tmp_variable(dtype=anchor_box.dtype)
+ # Assign target label to anchors
+ loc_index = helper.create_tmp_variable(dtype='int32')
+ score_index = helper.create_tmp_variable(dtype='int32')
+ target_label = helper.create_tmp_variable(dtype='int64')
+ target_bbox = helper.create_tmp_variable(dtype=anchor_box.dtype)
helper.append_op(
type="rpn_target_assign",
- inputs={'DistMat': iou},
+ inputs={'Anchor': anchor_box,
+ 'GtBox': gt_box,
+ 'DistMat': iou},
outputs={
'LocationIndex': loc_index,
'ScoreIndex': score_index,
- 'TargetLabel': target_label
+ 'TargetLabel': target_label,
+ 'TargetBBox': target_bbox,
},
attrs={
'rpn_batch_size_per_im': rpn_batch_size_per_im,
@@ -173,16 +170,16 @@ def rpn_target_assign(loc,
'fg_fraction': fg_fraction
})
- # 4. Reshape and gather the target entry
- scores = nn.reshape(x=scores, shape=(-1, 2))
- loc = nn.reshape(x=loc, shape=(-1, 4))
- target_label = nn.reshape(x=target_label, shape=(-1, 1))
- target_bbox = nn.reshape(x=target_bbox, shape=(-1, 4))
+ loc_index.stop_gradient = True
+ score_index.stop_gradient = True
+ target_label.stop_gradient = True
+ target_bbox.stop_gradient = True
+ scores = nn.reshape(x=scores, shape=(-1, 1))
+ loc = nn.reshape(x=loc, shape=(-1, 4))
predicted_scores = nn.gather(scores, score_index)
predicted_location = nn.gather(loc, loc_index)
- target_label = nn.gather(target_label, score_index)
- target_bbox = nn.gather(target_bbox, loc_index)
+
return predicted_scores, predicted_location, target_label, target_bbox
diff --git a/python/paddle/fluid/tests/test_detection.py b/python/paddle/fluid/tests/test_detection.py
index ec0bf3ff8d64345111537780aaa5367ed0e1f8ff..e2564763d19d180f7c6933429dddf58c77be7bb8 100644
--- a/python/paddle/fluid/tests/test_detection.py
+++ b/python/paddle/fluid/tests/test_detection.py
@@ -281,7 +281,7 @@ class TestRpnTargetAssign(unittest.TestCase):
gt_box = layers.data(
name='gt_box', shape=[4], lod_level=1, dtype='float32')
- predicted_scores, predicted_location, target_label, target_bbox = layers.rpn_target_assign(
+ pred_scores, pred_loc, tgt_lbl, tgt_bbox = layers.rpn_target_assign(
loc=loc,
scores=scores,
anchor_box=anchor_box,
@@ -292,15 +292,13 @@ class TestRpnTargetAssign(unittest.TestCase):
rpn_positive_overlap=0.7,
rpn_negative_overlap=0.3)
- self.assertIsNotNone(predicted_scores)
- self.assertIsNotNone(predicted_location)
- self.assertIsNotNone(target_label)
- self.assertIsNotNone(target_bbox)
- assert predicted_scores.shape[1] == 2
- assert predicted_location.shape[1] == 4
- assert predicted_location.shape[1] == target_bbox.shape[1]
-
- print(str(program))
+ self.assertIsNotNone(pred_scores)
+ self.assertIsNotNone(pred_loc)
+ self.assertIsNotNone(tgt_lbl)
+ self.assertIsNotNone(tgt_bbox)
+ assert pred_scores.shape[1] == 1
+ assert pred_loc.shape[1] == 4
+ assert pred_loc.shape[1] == tgt_bbox.shape[1]
class TestGenerateProposals(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_fusion_gru_op.py b/python/paddle/fluid/tests/unittests/test_fusion_gru_op.py
index 764f83b534c8a183dbf21511f0b05741c13c9528..36ebc8fb6ea9efdcd1807f5c8917ab1428b3381e 100644
--- a/python/paddle/fluid/tests/unittests/test_fusion_gru_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fusion_gru_op.py
@@ -37,7 +37,7 @@ def fusion_gru(
h0,
wh,
np.zeros(
- (1, wh.shape[1]), dtype='float64'),
+ (1, wh.shape[1]), dtype='float32'),
is_reverse,
act_state,
act_gate)
@@ -62,15 +62,15 @@ class TestFusionGRUOp(OpTest):
T = sum(self.lod[0])
N = len(self.lod[0])
- x = np.random.rand(T, self.M).astype('float64')
- wx = np.random.rand(self.M, 3 * self.D).astype('float64')
- wh = np.random.rand(self.D, 3 * self.D).astype('float64')
+ x = np.random.rand(T, self.M).astype('float32')
+ wx = np.random.rand(self.M, 3 * self.D).astype('float32')
+ wh = np.random.rand(self.D, 3 * self.D).astype('float32')
bias = np.random.rand(
- 1, 3 * self.D).astype('float64') if self.with_bias else np.zeros(
- (1, 3 * self.D), dtype='float64')
+ 1, 3 * self.D).astype('float32') if self.with_bias else np.zeros(
+ (1, 3 * self.D), dtype='float32')
h0 = np.random.rand(
- N, self.D).astype('float64') if self.with_h0 else np.zeros(
- (N, self.D), dtype='float64')
+ N, self.D).astype('float32') if self.with_h0 else np.zeros(
+ (N, self.D), dtype='float32')
_, _, _, hidden = fusion_gru(
x, self.lod, h0, wx, wh, bias, self.is_reverse,
@@ -93,7 +93,9 @@ class TestFusionGRUOp(OpTest):
}
def test_check_output(self):
- self.check_output(atol=1e-8)
+ for use_seq in {True, False}:
+ self.attrs['use_seq'] = use_seq
+ self.check_output()
class TestFusionGRUOpNoInitial(TestFusionGRUOp):
diff --git a/python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py b/python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py
index 5805bdf461998e90611dec05b079cd55feda520d..1f1eb37667e304351a6a85edde09e7da32cf1630 100644
--- a/python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py
@@ -114,7 +114,9 @@ class TestFusionLSTMOp(OpTest):
}
def test_check_output(self):
- self.check_output()
+ for use_seq in {True, False}:
+ self.attrs['use_seq'] = use_seq
+ self.check_output()
class TestFusionLSTMOpInit(TestFusionLSTMOp):
diff --git a/python/paddle/fluid/tests/unittests/test_generate_proposal_labels.py b/python/paddle/fluid/tests/unittests/test_generate_proposal_labels.py
index ce766fffbce98a6a2cee4c508d6db85ee0163401..6dc101b6dad8813893c6a891da0e16f952bb4c2d 100644
--- a/python/paddle/fluid/tests/unittests/test_generate_proposal_labels.py
+++ b/python/paddle/fluid/tests/unittests/test_generate_proposal_labels.py
@@ -177,8 +177,8 @@ def _box_to_delta(ex_boxes, gt_boxes, weights):
dx = (gt_ctr_x - ex_ctr_x) / ex_w / weights[0]
dy = (gt_ctr_y - ex_ctr_y) / ex_h / weights[1]
- dw = (np.log(gt_w / ex_w)) / ex_w / weights[2]
- dh = (np.log(gt_h / ex_h)) / ex_h / weights[3]
+ dw = (np.log(gt_w / ex_w)) / weights[2]
+ dh = (np.log(gt_h / ex_h)) / weights[3]
targets = np.vstack([dx, dy, dw, dh]).transpose()
return targets
diff --git a/python/paddle/fluid/tests/unittests/test_roi_pool_op.py b/python/paddle/fluid/tests/unittests/test_roi_pool_op.py
index ed7f467835f32242a9650f226b4a5ad9d6d87af4..ad4cd2e803bfae4c3fbc04503331b9a786b25d17 100644
--- a/python/paddle/fluid/tests/unittests/test_roi_pool_op.py
+++ b/python/paddle/fluid/tests/unittests/test_roi_pool_op.py
@@ -61,7 +61,7 @@ class TestROIPoolOp(OpTest):
for i in range(self.rois_num):
roi = self.rois[i]
- roi_batch_id = roi[0]
+ roi_batch_id = int(roi[0])
roi_start_w = int(cpt.round(roi[1] * self.spatial_scale))
roi_start_h = int(cpt.round(roi[2] * self.spatial_scale))
roi_end_w = int(cpt.round(roi[3] * self.spatial_scale))
@@ -125,7 +125,7 @@ class TestROIPoolOp(OpTest):
roi = [bno, x1, y1, x2, y2]
rois.append(roi)
self.rois_num = len(rois)
- self.rois = np.array(rois).astype("int64")
+ self.rois = np.array(rois).astype("float32")
def setUp(self):
self.op_type = "roi_pool"
diff --git a/python/paddle/fluid/tests/unittests/test_rpn_target_assign_op.py b/python/paddle/fluid/tests/unittests/test_rpn_target_assign_op.py
index 08c462d9036cacab81dab7c9ea16664c9159479f..bd548009b3ada9512e4b5f7d7b61b67b0717a39b 100644
--- a/python/paddle/fluid/tests/unittests/test_rpn_target_assign_op.py
+++ b/python/paddle/fluid/tests/unittests/test_rpn_target_assign_op.py
@@ -18,12 +18,17 @@ import unittest
import numpy as np
import paddle.fluid.core as core
from op_test import OpTest
+from test_anchor_generator_op import anchor_generator_in_python
+from test_generate_proposal_labels import _generate_groundtruth
+from test_generate_proposal_labels import _bbox_overlaps, _box_to_delta
-def rpn_target_assign(iou, rpn_batch_size_per_im, rpn_positive_overlap,
- rpn_negative_overlap, fg_fraction):
- iou = np.transpose(iou)
+def rpn_target_assign(gt_anchor_iou, rpn_batch_size_per_im,
+ rpn_positive_overlap, rpn_negative_overlap, fg_fraction):
+ iou = np.transpose(gt_anchor_iou)
anchor_to_gt_max = iou.max(axis=1)
+ anchor_to_gt_argmax = iou.argmax(axis=1)
+
gt_to_anchor_argmax = iou.argmax(axis=0)
gt_to_anchor_max = iou[gt_to_anchor_argmax, np.arange(iou.shape[1])]
anchors_with_max_overlap = np.where(iou == gt_to_anchor_max)[0]
@@ -42,59 +47,113 @@ def rpn_target_assign(iou, rpn_batch_size_per_im, rpn_positive_overlap,
num_bg = rpn_batch_size_per_im - np.sum(tgt_lbl == 1)
bg_inds = np.where(anchor_to_gt_max < rpn_negative_overlap)[0]
+ tgt_lbl[bg_inds] = 0
if len(bg_inds) > num_bg:
enable_inds = bg_inds[np.random.randint(len(bg_inds), size=num_bg)]
tgt_lbl[enable_inds] = 0
bg_inds = np.where(tgt_lbl == 0)[0]
+ tgt_lbl[bg_inds] = 0
loc_index = fg_inds
score_index = np.hstack((fg_inds, bg_inds))
tgt_lbl = np.expand_dims(tgt_lbl, axis=1)
- return loc_index, score_index, tgt_lbl
+
+ gt_inds = anchor_to_gt_argmax[fg_inds]
+
+ return loc_index, score_index, tgt_lbl, gt_inds
+
+
+def get_anchor(n, c, h, w):
+ input_feat = np.random.random((n, c, h, w)).astype('float32')
+ anchors, _ = anchor_generator_in_python(
+ input_feat=input_feat,
+ anchor_sizes=[32., 64.],
+ aspect_ratios=[0.5, 1.0],
+ variances=[1.0, 1.0, 1.0, 1.0],
+ stride=[16.0, 16.0],
+ offset=0.5)
+ return anchors
+
+
+def rpn_blob(anchor, gt_boxes, iou, lod, rpn_batch_size_per_im,
+ rpn_positive_overlap, rpn_negative_overlap, fg_fraction):
+
+ loc_indexes = []
+ score_indexes = []
+ tmp_tgt_labels = []
+ tgt_bboxes = []
+ anchor_num = anchor.shape[0]
+
+ batch_size = len(lod) - 1
+ for i in range(batch_size):
+ b, e = lod[i], lod[i + 1]
+ iou_slice = iou[b:e, :]
+ bboxes_slice = gt_boxes[b:e, :]
+
+ loc_idx, score_idx, tgt_lbl, gt_inds = rpn_target_assign(
+ iou_slice, rpn_batch_size_per_im, rpn_positive_overlap,
+ rpn_negative_overlap, fg_fraction)
+
+ fg_bboxes = bboxes_slice[gt_inds]
+ fg_anchors = anchor[loc_idx]
+ box_deltas = _box_to_delta(fg_anchors, fg_bboxes, [1., 1., 1., 1.])
+
+ if i == 0:
+ loc_indexes = loc_idx
+ score_indexes = score_idx
+ tmp_tgt_labels = tgt_lbl
+ tgt_bboxes = box_deltas
+ else:
+ loc_indexes = np.concatenate(
+ [loc_indexes, loc_idx + i * anchor_num])
+ score_indexes = np.concatenate(
+ [score_indexes, score_idx + i * anchor_num])
+ tmp_tgt_labels = np.concatenate([tmp_tgt_labels, tgt_lbl])
+ tgt_bboxes = np.vstack([tgt_bboxes, box_deltas])
+
+ tgt_labels = tmp_tgt_labels[score_indexes]
+ return loc_indexes, score_indexes, tgt_bboxes, tgt_labels
class TestRpnTargetAssignOp(OpTest):
def setUp(self):
- iou = np.random.random((10, 8)).astype("float32")
- self.op_type = "rpn_target_assign"
- self.inputs = {'DistMat': iou}
- self.attrs = {
- 'rpn_batch_size_per_im': 256,
- 'rpn_positive_overlap': 0.95,
- 'rpn_negative_overlap': 0.3,
- 'fg_fraction': 0.25,
- 'fix_seed': True
- }
- loc_index, score_index, tgt_lbl = rpn_target_assign(iou, 256, 0.95, 0.3,
- 0.25)
- self.outputs = {
- 'LocationIndex': loc_index,
- 'ScoreIndex': score_index,
- 'TargetLabel': tgt_lbl,
- }
+ n, c, h, w = 2, 4, 14, 14
+ anchor = get_anchor(n, c, h, w)
+ gt_num = 10
+ anchor = anchor.reshape(-1, 4)
+ anchor_num = anchor.shape[0]
- def test_check_output(self):
- self.check_output()
+ im_shapes = [[64, 64], [64, 64]]
+ gt_box, lod = _generate_groundtruth(im_shapes, 3, 4)
+ bbox = np.vstack([v['boxes'] for v in gt_box])
+ iou = _bbox_overlaps(bbox, anchor)
+
+ anchor = anchor.astype('float32')
+ bbox = bbox.astype('float32')
+ iou = iou.astype('float32')
+
+ loc_index, score_index, tgt_bbox, tgt_lbl = rpn_blob(
+ anchor, bbox, iou, [0, 4, 8], 25600, 0.95, 0.03, 0.25)
-class TestRpnTargetAssignOp2(OpTest):
- def setUp(self):
- iou = np.random.random((10, 20)).astype("float32")
self.op_type = "rpn_target_assign"
- self.inputs = {'DistMat': iou}
+ self.inputs = {
+ 'Anchor': anchor,
+ 'GtBox': (bbox, [[4, 4]]),
+ 'DistMat': (iou, [[4, 4]]),
+ }
self.attrs = {
- 'rpn_batch_size_per_im': 128,
- 'rpn_positive_overlap': 0.5,
- 'rpn_negative_overlap': 0.5,
- 'fg_fraction': 0.5,
+ 'rpn_batch_size_per_im': 25600,
+ 'rpn_positive_overlap': 0.95,
+ 'rpn_negative_overlap': 0.03,
+ 'fg_fraction': 0.25,
'fix_seed': True
}
- loc_index, score_index, tgt_lbl = rpn_target_assign(iou, 128, 0.5, 0.5,
- 0.5)
self.outputs = {
- 'LocationIndex': loc_index,
- 'ScoreIndex': score_index,
- 'TargetLabel': tgt_lbl,
+ 'LocationIndex': loc_index.astype('int32'),
+ 'ScoreIndex': score_index.astype('int32'),
+ 'TargetBBox': tgt_bbox.astype('float32'),
+ 'TargetLabel': tgt_lbl.astype('int64'),
}
def test_check_output(self):