Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
db5e3dd7
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
db5e3dd7
编写于
9月 04, 2018
作者:
F
fengjiayi
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into fix_CudnnHolder_bug
上级
7b577b92
90b5be85
变更
38
隐藏空白更改
内联
并排
Showing
38 changed file
with
558 addition
and
445 deletion
+558
-445
Dockerfile
Dockerfile
+1
-1
cmake/inference_lib.cmake
cmake/inference_lib.cmake
+7
-9
doc/fluid/new_docs/beginners_guide/basics/machine_translation/README.cn.md
...s/beginners_guide/basics/machine_translation/README.cn.md
+3
-1
doc/fluid/new_docs/beginners_guide/basics/understand_sentiment/README.cn.md
.../beginners_guide/basics/understand_sentiment/README.cn.md
+2
-0
doc/fluid/new_docs/beginners_guide/basics/word2vec/README.cn.md
...uid/new_docs/beginners_guide/basics/word2vec/README.cn.md
+3
-1
doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/README.cn.md
...beginners_guide/quick_start/recognize_digits/README.cn.md
+1
-1
doc/fluid/new_docs/user_guides/howto/debug/visualdl.md
doc/fluid/new_docs/user_guides/howto/debug/visualdl.md
+1
-1
paddle/fluid/framework/ir/CMakeLists.txt
paddle/fluid/framework/ir/CMakeLists.txt
+23
-8
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
+4
-8
paddle/fluid/framework/ir/fc_fuse_pass.cc
paddle/fluid/framework/ir/fc_fuse_pass.cc
+30
-88
paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
+5
-9
paddle/fluid/framework/ir/graph_pattern_detector.cc
paddle/fluid/framework/ir/graph_pattern_detector.cc
+14
-7
paddle/fluid/framework/ir/graph_pattern_detector.h
paddle/fluid/framework/ir/graph_pattern_detector.h
+6
-0
paddle/fluid/framework/ir/graph_pattern_detector_tester.cc
paddle/fluid/framework/ir/graph_pattern_detector_tester.cc
+3
-2
paddle/fluid/framework/ir/infer_clean_graph_pass.cc
paddle/fluid/framework/ir/infer_clean_graph_pass.cc
+11
-12
paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc
paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc
+7
-11
paddle/fluid/inference/CMakeLists.txt
paddle/fluid/inference/CMakeLists.txt
+3
-2
paddle/fluid/inference/analysis/CMakeLists.txt
paddle/fluid/inference/analysis/CMakeLists.txt
+4
-16
paddle/fluid/inference/analysis/analyzer_tester.cc
paddle/fluid/inference/analysis/analyzer_tester.cc
+15
-10
paddle/fluid/inference/analysis/fluid_to_ir_pass_tester.cc
paddle/fluid/inference/analysis/fluid_to_ir_pass_tester.cc
+1
-7
paddle/fluid/inference/api/CMakeLists.txt
paddle/fluid/inference/api/CMakeLists.txt
+1
-4
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+1
-4
paddle/fluid/inference/api/helper.h
paddle/fluid/inference/api/helper.h
+1
-0
paddle/fluid/inference/paddle_fluid.map
paddle/fluid/inference/paddle_fluid.map
+1
-0
paddle/fluid/operators/detection/bbox_util.h
paddle/fluid/operators/detection/bbox_util.h
+66
-0
paddle/fluid/operators/detection/generate_proposal_labels_op.cc
.../fluid/operators/detection/generate_proposal_labels_op.cc
+8
-31
paddle/fluid/operators/detection/generate_proposals_op.cc
paddle/fluid/operators/detection/generate_proposals_op.cc
+2
-3
paddle/fluid/operators/detection/rpn_target_assign_op.cc
paddle/fluid/operators/detection/rpn_target_assign_op.cc
+179
-112
paddle/fluid/operators/gru_unit_op.h
paddle/fluid/operators/gru_unit_op.h
+8
-8
paddle/fluid/operators/roi_pool_op.cu
paddle/fluid/operators/roi_pool_op.cu
+6
-6
paddle/fluid/operators/roi_pool_op.h
paddle/fluid/operators/roi_pool_op.h
+2
-2
python/paddle/fluid/layers/detection.py
python/paddle/fluid/layers/detection.py
+18
-21
python/paddle/fluid/tests/test_detection.py
python/paddle/fluid/tests/test_detection.py
+8
-10
python/paddle/fluid/tests/unittests/test_fusion_gru_op.py
python/paddle/fluid/tests/unittests/test_fusion_gru_op.py
+11
-9
python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py
python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py
+3
-1
python/paddle/fluid/tests/unittests/test_generate_proposal_labels.py
...le/fluid/tests/unittests/test_generate_proposal_labels.py
+2
-2
python/paddle/fluid/tests/unittests/test_roi_pool_op.py
python/paddle/fluid/tests/unittests/test_roi_pool_op.py
+2
-2
python/paddle/fluid/tests/unittests/test_rpn_target_assign_op.py
...paddle/fluid/tests/unittests/test_rpn_target_assign_op.py
+95
-36
未找到文件。
Dockerfile
浏览文件 @
db5e3dd7
...
...
@@ -53,7 +53,7 @@ RUN curl -s -q https://glide.sh/get | sh
# and its size is only one-third of the official one.
# 2. Manually add ~IPluginFactory() in IPluginFactory class of NvInfer.h, otherwise, it couldn't work in paddle.
# See https://github.com/PaddlePaddle/Paddle/issues/10129 for details.
RUN
wget
-qO-
http://paddlepaddledeps.
bj
.bcebos.com/TensorRT-4.0.0.3.Ubuntu-16.04.4.x86_64-gnu.cuda-8.0.cudnn7.0.tar.gz |
\
RUN
wget
-qO-
http://paddlepaddledeps.
cdn
.bcebos.com/TensorRT-4.0.0.3.Ubuntu-16.04.4.x86_64-gnu.cuda-8.0.cudnn7.0.tar.gz |
\
tar
-xz
-C
/usr/local
&&
\
cp
-rf
/usr/local/TensorRT/include /usr
&&
\
cp
-rf
/usr/local/TensorRT/lib /usr
...
...
cmake/inference_lib.cmake
浏览文件 @
db5e3dd7
...
...
@@ -128,16 +128,13 @@ set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid")
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/paddle/fluid"
)
set
(
module
"framework"
)
if
(
NOT WIN32
)
copy
(
framework_lib DEPS framework_py_proto
SRCS
${
src_dir
}
/
${
module
}
/*.h
${
src_dir
}
/
${
module
}
/details/*.h
${
PADDLE_BINARY_DIR
}
/paddle/fluid/framework/framework.pb.h
DSTS
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
/details
${
dst_dir
}
/
${
module
}
)
else
()
copy
(
framework_lib
set
(
framework_lib_deps framework_py_proto
)
endif
(
NOT WIN32
)
copy
(
framework_lib DEPS
${
framework_lib_deps
}
SRCS
${
src_dir
}
/
${
module
}
/*.h
${
src_dir
}
/
${
module
}
/details/*.h
${
PADDLE_BINARY_DIR
}
/paddle/fluid/framework/framework.pb.h
DSTS
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
/details
${
dst_dir
}
/
${
module
}
${
src_dir
}
/
${
module
}
/ir/*.h
DSTS
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
/details
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
/ir
)
endif
(
NOT WIN32
)
set
(
module
"memory"
)
copy
(
memory_lib
...
...
@@ -161,7 +158,8 @@ set(module "inference")
copy
(
inference_lib DEPS
${
inference_deps
}
SRCS
${
src_dir
}
/
${
module
}
/*.h
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/libpaddle_fluid.*
${
src_dir
}
/
${
module
}
/api/paddle_inference_api.h
${
src_dir
}
/
${
module
}
/api/demo_ci
DSTS
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/api/paddle_inference_pass.h
DSTS
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
)
set
(
module
"platform"
)
...
...
doc/fluid/new_docs/beginners_guide/basics/machine_translation/README.cn.md
浏览文件 @
db5e3dd7
...
...
@@ -60,6 +60,7 @@
图3. 编码器-解码器框架
</div>
<a
name=
"编码器"
></a>
#### 编码器
编码阶段分为三步:
...
...
@@ -81,7 +82,7 @@
机器翻译任务的训练过程中,解码阶段的目标是最大化下一个正确的目标语言词的概率。思路是:
1.
每一个时刻,根据源语言句子的编码信息(又叫上下文向量,context vector)
`$c$`
、真实目标语言序列的第
`$i$`
个词
`$u_i$`
和
`$i$`
时刻RNN的隐层状态
`$z_i$`
,计算出下一个隐层状态
`$z_{i+1}$`
。计算公式如下:
$$z_{i+1}=
\p
hi_{
\t
heta '}
\l
eft ( c,u_i,z_i
\r
ight )$$
其中
`$\phi _{\theta '}$`
是一个非线性激活函数;
`$c=q\mathbf{h}$`
是源语言句子的上下文向量,在不使用
[
注意力机制
](
#注意力机制
)
时,如果
[
编码器
](
#编码器
)
的输出是源语言句子编码后的最后一个元素,则可以定义
`$c=h_T$`
;
`$u_i$`
是目标语言序列的第
`$i$`
个单词,
`$u_0$`
是目标语言序列的开始标记
`<s>`
,表示解码开始;
`$z_i$`
是
`$i$`
时刻解码RNN的隐层状态,
`$z_0$`
是一个全零的向量。
其中
`$\phi _{\theta '}$`
是一个非线性激活函数;
`$c=q\mathbf{h}$`
是源语言句子的上下文向量,在不使用
注意力机制
时,如果
[
编码器
](
#编码器
)
的输出是源语言句子编码后的最后一个元素,则可以定义
`$c=h_T$`
;
`$u_i$`
是目标语言序列的第
`$i$`
个单词,
`$u_0$`
是目标语言序列的开始标记
`<s>`
,表示解码开始;
`$z_i$`
是
`$i$`
时刻解码RNN的隐层状态,
`$z_0$`
是一个全零的向量。
2.
将
`$z_{i+1}$`
通过
`softmax`
归一化,得到目标语言序列的第
`$i+1$`
个单词的概率分布
`$p_{i+1}$`
。概率分布公式如下:
$$p
\l
eft ( u_{i+1}|u_{
<
i+1},
\m
athbf{x}
\r
ight )=softmax(W_sz_{i+1}+b_z)$$
...
...
@@ -93,6 +94,7 @@ $$p\left ( u_{i+1}|u_{<i+1},\mathbf{x} \right )=softmax(W_sz_{i+1}+b_z)$$
机器翻译任务的生成过程,通俗来讲就是根据预先训练的模型来翻译源语言句子。生成过程中的解码阶段和上述训练过程的有所差异,具体介绍请见
[
柱搜索算法
](
#柱搜索算法
)
。
<a
name=
"柱搜索算法"
></a>
### 柱搜索算法
柱搜索(
[
beam search
](
http://en.wikipedia.org/wiki/Beam_search
)
)是一种启发式图搜索算法,用于在图或树中搜索有限集合中的最优扩展节点,通常用在解空间非常大的系统(如机器翻译、语音识别)中,原因是内存无法装下图或树中所有展开的解。如在机器翻译任务中希望翻译“
`<s>你好<e>`
”,就算目标语言字典中只有3个词(
`<s>`
,
`<e>`
,
`hello`
),也可能生成无限句话(
`hello`
循环出现的次数不定),为了找到其中较好的翻译结果,我们可采用柱搜索算法。
...
...
doc/fluid/new_docs/beginners_guide/basics/understand_sentiment/README.cn.md
浏览文件 @
db5e3dd7
...
...
@@ -149,6 +149,8 @@ def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim):
网络的输入
`input_dim`
表示的是词典的大小,
`class_dim`
表示类别数。这里,我们使用
[
`sequence_conv_pool`
](
https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/trainer_config_helpers/networks.py
)
API实现了卷积和池化操作。
<a
name=
"栈值双向LSTM"
></a>
### 栈式双向LSTM
栈式双向神经网络
`stacked_lstm_net`
的代码片段如下:
...
...
doc/fluid/new_docs/beginners_guide/basics/word2vec/README.cn.md
浏览文件 @
db5e3dd7
...
...
@@ -50,7 +50,7 @@ similarity: -0.0997506977351
```
以上结果可以通过运行
`calculate_dis.py`
, 加载字典里的单词和对应训练特征结果得到,我们将在
[
应用模型
](
#应用模型
)
中详细描述用法。
以上结果可以通过运行
`calculate_dis.py`
, 加载字典里的单词和对应训练特征结果得到,我们将在
[
模型应用
](
#模型应用
)
中详细描述用法。
## 模型概览
...
...
@@ -189,6 +189,7 @@ dream that one day <e>
最后,每个输入会按其单词次在字典里的位置,转化成整数的索引序列,作为PaddlePaddle的输入。
<a
name=
"训练模型"
></a>
## 编程实现
本配置的模型结构如下图所示:
...
...
@@ -349,6 +350,7 @@ Step 20: Average Cost 5.766995
...
```
<a
name=
"模型应用"
></a>
## 模型应用
在模型训练后,我们可以用它做一些预测。
...
...
doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/README.cn.md
浏览文件 @
db5e3dd7
...
...
@@ -102,7 +102,7 @@ Softmax回归模型采用了最简单的两层神经网络,即只有输入层
池化是非线性下采样的一种形式,主要作用是通过减少网络的参数来减小计算量,并且能够在一定程度上控制过拟合。通常在卷积层的后面会加上一个池化层。池化包括最大池化、平均池化等。其中最大池化是用不重叠的矩形框将输入层分成不同的区域,对于每个矩形框的数取最大值作为输出层,如图6所示。
更详细的关于卷积神经网络的具体知识可以参考
[
斯坦福大学公开课
](
http://cs231n.github.io/convolutional-networks/
)
和
[
图像分类
](
https://github.com/PaddlePaddle/book/blob/develop/image_classification/README.md
)
教程。
更详细的关于卷积神经网络的具体知识可以参考
[
斯坦福大学公开课
](
http://cs231n.github.io/convolutional-networks/
)
和
[
图像分类
](
https://github.com/PaddlePaddle/book/tree/develop/03.image_classification
)
教程。
### 常见激活函数介绍
-
sigmoid激活函数: $ f(x) = sigmoid(x) =
\f
rac{1}{1+e^{-x}} $
...
...
doc/fluid/new_docs/user_guides/howto/debug/visualdl.md
浏览文件 @
db5e3dd7
...
...
@@ -149,7 +149,7 @@ python setup.py bdist_wheel
pip install --upgrade dist/visualdl-
*
.whl
```
如果打包和安装遇到其他问题,不安装只想运行Visual DL可以看[这里](https://github.com/PaddlePaddle/VisualDL/blob/develop/docs/
how_to_dev_frontend_e
n.md)
如果打包和安装遇到其他问题,不安装只想运行Visual DL可以看[这里](https://github.com/PaddlePaddle/VisualDL/blob/develop/docs/
develop/how_to_dev_frontend_c
n.md)
## SDK
...
...
paddle/fluid/framework/ir/CMakeLists.txt
浏览文件 @
db5e3dd7
set
(
pass_file
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/api/paddle_inference_pass.h
)
file
(
WRITE
${
pass_file
}
"// Generated by the paddle/fluid/framework/ir/CMakeLists.txt. DO NOT EDIT!
\n\n
"
)
file
(
APPEND
${
pass_file
}
"
\#
include
\"
paddle/fluid/framework/ir/pass.h
\"\n
"
)
function
(
pass_library TARGET
)
set
(
options
""
)
set
(
oneValueArgs
""
)
set
(
multiValueArgs SRCS DEPS
)
cmake_parse_arguments
(
op_library
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
cc_library
(
${
TARGET
}
SRCS
${
TARGET
}
.cc DEPS graph_pattern_detector pass
)
file
(
APPEND
${
pass_file
}
"USE_PASS(
${
TARGET
}
);
\n
"
)
set
(
PASS_LIBRARY
${
TARGET
}
${
PASS_LIBRARY
}
PARENT_SCOPE
)
endfunction
()
cc_library
(
node SRCS node.cc DEPS proto_desc
)
cc_library
(
graph SRCS graph.cc DEPS node
)
cc_library
(
graph_helper SRCS graph_helper.cc DEPS graph
)
cc_library
(
pass SRCS pass.cc DEPS graph node graph_helper
)
cc_library
(
graph_viz_pass SRCS graph_viz_pass.cc DEPS graph pass graph_helper
)
cc_library
(
graph_to_program_pass SRCS graph_to_program_pass.cc DEPS graph pass graph_helper
)
cc_library
(
graph_traits SRCS graph_traits.cc DEPS graph
)
cc_library
(
graph_pattern_detector SRCS graph_pattern_detector.cc DEPS graph graph_helper graph_traits
)
cc_library
(
fc_fuse_pass SRCS fc_fuse_pass.cc DEPS graph graph_pattern_detector
)
cc_library
(
attention_lstm_fuse_pass SRCS attention_lstm_fuse_pass.cc DEPS graph graph_pattern_detector
)
cc_library
(
infer_clean_graph_pass SRCS infer_clean_graph_pass.cc DEPS graph pass
)
cc_library
(
fc_lstm_fuse_pass SRCS fc_lstm_fuse_pass.cc DEPS graph graph_pattern_detector
)
cc_library
(
seq_concat_fc_fuse_pass SRCS seq_concat_fc_fuse_pass.cc DEPS graph graph_pattern_detector
)
pass_library
(
graph_to_program_pass
)
pass_library
(
graph_viz_pass
)
pass_library
(
fc_fuse_pass
)
pass_library
(
attention_lstm_fuse_pass
)
pass_library
(
infer_clean_graph_pass
)
pass_library
(
fc_lstm_fuse_pass
)
pass_library
(
seq_concat_fc_fuse_pass
)
set
(
GLOB_PASS_LIB
${
PASS_LIBRARY
}
CACHE INTERNAL
"Global PASS library"
)
cc_test
(
pass_test SRCS pass_test.cc DEPS graph pass graph_helper
)
cc_test
(
graph_test SRCS graph_test.cc DEPS graph graph_helper op_registry
)
cc_test
(
graph_helper_test SRCS graph_helper_test.cc DEPS graph graph_helper op_registry
)
cc_test
(
graph_to_program_pass_test SRCS graph_to_program_pass_test.cc DEPS graph_to_program_pass
)
cc_test
(
test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS graph_pattern_detector
)
cc_test
(
test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass
graph_pattern_detector graph pass graph_traits
framework_proto
)
cc_test
(
test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass framework_proto
)
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
浏览文件 @
db5e3dd7
...
...
@@ -99,17 +99,13 @@ void FindWhileOp(Graph* graph) {
auto
*
cell_init
=
graph
->
RetriveNode
(
6
);
auto
*
hidden_init
=
graph
->
RetriveNode
(
8
);
#define LINK_TO(node0, node1) \
node0->outputs.push_back(node1); \
node1->inputs.push_back(node0);
auto
*
lstm_op
=
graph
->
CreateOpNode
(
&
op_desc
);
PrepareParameters
(
graph
,
param
);
LINK_TO
(
X
,
lstm_op
);
LINK_TO
(
cell_init
,
lstm_op
);
LINK_TO
(
hidden_init
,
lstm_op
);
LINK_TO
(
lstm_op
,
LSTMOUT
);
IR_NODE_
LINK_TO
(
X
,
lstm_op
);
IR_NODE_
LINK_TO
(
cell_init
,
lstm_op
);
IR_NODE_
LINK_TO
(
hidden_init
,
lstm_op
);
IR_NODE_
LINK_TO
(
lstm_op
,
LSTMOUT
);
GraphSafeRemoveNodes
(
graph
,
marked_nodes
);
}
...
...
paddle/fluid/framework/ir/fc_fuse_pass.cc
浏览文件 @
db5e3dd7
...
...
@@ -21,74 +21,26 @@ namespace paddle {
namespace
framework
{
namespace
ir
{
bool
VarOutLinksToOp
(
Node
*
node
,
const
std
::
string
&
op_type
)
{
for
(
auto
*
out
:
node
->
outputs
)
{
if
(
out
->
IsOp
()
&&
out
->
Op
()
->
Type
()
==
op_type
)
{
return
true
;
}
}
return
false
;
}
void
BuildFCPattern
(
PDPattern
*
pattern
)
{
// Create Operators
auto
*
mul_op
=
pattern
->
NewNode
(
"mul"
)
->
assert_is_op
(
"mul"
);
auto
*
elementwise_add_op
=
pattern
->
NewNode
(
"elementwise_add"
)
->
assert_is_op
(
"elementwise_add"
);
// Create variables
// w
auto
*
mul_weight_var
=
pattern
->
NewNode
(
"mul_weight"
)
->
AsInput
()
->
assert_is_op_nth_input
(
"mul"
,
"Y"
,
0
);
// x
auto
*
mul_tmp_var
=
pattern
->
NewNode
(
"mul_tmp_var"
)
->
AsInput
()
->
assert_is_op_nth_input
(
"mul"
,
"X"
,
0
);
// intermediate variable, will be removed in the IR after fuse.
auto
*
mul_out_var
=
pattern
->
NewNode
(
"mul_out"
)
->
AsIntermediate
()
->
assert_is_only_output_of_op
(
"mul"
)
->
assert_is_op_input
(
"elementwise_add"
);
// bias
auto
*
elementwise_add_tmp_var
=
pattern
->
NewNode
(
"elementwise_add_tmpvar"
)
->
assert_is_op_input
(
"elementwise_add"
)
->
AsInput
();
// output
auto
*
elementwise_add_out_var
=
pattern
->
NewNode
(
"elementwise_add_out"
)
->
AsOutput
()
->
assert_is_op_output
(
"elementwise_add"
);
mul_op
->
LinksFrom
({
mul_weight_var
,
mul_tmp_var
}).
LinksTo
({
mul_out_var
});
elementwise_add_op
->
LinksFrom
({
mul_out_var
,
elementwise_add_tmp_var
})
.
LinksTo
({
elementwise_add_out_var
});
}
// Replace the node `from` in the links to `to`
bool
LinksReplace
(
std
::
vector
<
Node
*>*
links
,
Node
*
from
,
Node
*
to
)
{
for
(
auto
*&
n
:
*
links
)
{
if
(
n
==
from
)
{
n
=
to
;
return
true
;
}
}
return
false
;
}
std
::
unique_ptr
<
ir
::
Graph
>
FCFusePass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
PADDLE_ENFORCE
(
graph
.
get
());
FusePassBase
::
Init
(
"fc"
,
graph
.
get
());
FusePassBase
::
Init
(
"fc
_fuse
"
,
graph
.
get
());
std
::
unordered_set
<
Node
*>
nodes2delete
;
GraphPatternDetector
gpd
;
BuildFCPattern
(
gpd
.
mutable_pattern
());
#define GET_NODE(id) \
PADDLE_ENFORCE(subgraph.count(gpd.pattern().RetrieveNode(#id)), \
"pattern has no Node called %s", #id); \
auto* id = subgraph.at(gpd.pattern().RetrieveNode(#id)); \
PADDLE_ENFORCE_NOT_NULL(id, "subgraph has no node %s", #id);
// BuildFCPattern(gpd.mutable_pattern());
auto
*
x
=
gpd
.
mutable_pattern
()
->
NewNode
(
"fc_fuse/x"
)
->
AsInput
()
->
assert_is_op_input
(
"mul"
,
"X"
);
patterns
::
FC
(
gpd
.
mutable_pattern
(),
"fc_fuse"
,
x
,
true
/*with bias*/
);
#define GET_NODE(id) \
PADDLE_ENFORCE(subgraph.count(gpd.pattern().RetrieveNode("fc_fuse/" #id)), \
"pattern has no Node called %s", #id); \
auto* id = subgraph.at(gpd.pattern().RetrieveNode("fc_fuse/" #id)); \
PADDLE_ENFORCE_NOT_NULL(id, "subgraph has no node %s", "fc_fuse/" #id);
int
found_fc_count
=
0
;
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
...
...
@@ -98,43 +50,33 @@ std::unique_ptr<ir::Graph> FCFusePass::ApplyImpl(
// scenerio.
// FC's fusion is simple, just op fuse, no need to process the
// parameters.
GET_NODE
(
mul_tmp_var
);
// x
GET_NODE
(
mul_weight
);
// Y
GET_NODE
(
elementwise_add_tmpvar
);
// bias
GET_NODE
(
elementwise_add_out
);
// Out
GET_NODE
(
mul
);
// MUL op
GET_NODE
(
elementwise_add
);
// ELEMENT_ADD op
GET_NODE
(
mul_out
);
// tmp
GET_NODE
(
x
);
// x
GET_NODE
(
w
);
// Y
GET_NODE
(
fc_bias
);
// bias
GET_NODE
(
fc_out
);
// Out
GET_NODE
(
mul
);
// MUL op
GET_NODE
(
elementwise_add
);
// ELEMENT_ADD op
GET_NODE
(
mul_out
);
// tmp
#undef GET_NODE
// Create an FC Node.
OpDesc
desc
;
std
::
string
fc_x_in
=
mul_tmp_var
->
Name
();
std
::
string
fc_Y_in
=
mul_weight
->
Name
();
std
::
string
fc_bias_in
=
elementwise_add_tmpvar
->
Name
();
std
::
string
fc_out
=
elementwise_add
_out
->
Name
();
std
::
string
fc_x_in
=
x
->
Name
();
std
::
string
fc_Y_in
=
w
->
Name
();
std
::
string
fc_bias_in
=
fc_bias
->
Name
();
std
::
string
fc_out
_out
=
fc
_out
->
Name
();
desc
.
SetInput
(
"Input"
,
std
::
vector
<
std
::
string
>
({
fc_x_in
}));
desc
.
SetInput
(
"W"
,
std
::
vector
<
std
::
string
>
({
fc_Y_in
}));
desc
.
SetInput
(
"Bias"
,
std
::
vector
<
std
::
string
>
({
fc_bias_in
}));
desc
.
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
fc_out
}));
desc
.
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
fc_out
_out
}));
desc
.
SetType
(
"fc"
);
auto
fc_node
=
g
->
CreateOpNode
(
&
desc
);
// OpDesc will be copied.
fc_node
->
inputs
=
std
::
vector
<
Node
*>
({
mul_tmp_var
,
mul_weight
,
elementwise_add_tmpvar
});
fc_node
->
outputs
.
push_back
(
elementwise_add_out
);
// Update link relatons
PADDLE_ENFORCE
(
LinksReplace
(
&
mul_tmp_var
->
outputs
,
mul
,
fc_node
));
PADDLE_ENFORCE
(
LinksReplace
(
&
mul_weight
->
outputs
,
mul
,
fc_node
));
PADDLE_ENFORCE
(
LinksReplace
(
&
elementwise_add_tmpvar
->
outputs
,
elementwise_add
,
fc_node
));
PADDLE_ENFORCE
(
LinksReplace
(
&
elementwise_add_out
->
inputs
,
elementwise_add
,
fc_node
));
GraphSafeRemoveNodes
(
graph
.
get
(),
{
mul
,
elementwise_add
,
mul_out
});
// Drop old nodes
graph
->
RemoveNode
(
mul
);
graph
->
RemoveNode
(
elementwise_add
);
graph
->
RemoveNode
(
mul_out
);
// tmp variable
IR_NODE_LINK_TO
(
x
,
fc_node
);
IR_NODE_LINK_TO
(
w
,
fc_node
);
IR_NODE_LINK_TO
(
fc_bias
,
fc_node
);
IR_NODE_LINK_TO
(
fc_node
,
fc_out
);
found_fc_count
++
;
};
...
...
paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
浏览文件 @
db5e3dd7
...
...
@@ -121,15 +121,11 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
#undef TMP_NEW
#undef TMP_NAME
#define LINK_TO(a, b) \
a->outputs.push_back(b); \
b->inputs.push_back(a);
LINK_TO
(
input_n
,
op
);
LINK_TO
(
weight_x_n
,
op
);
LINK_TO
(
weight_h_n
,
op
);
LINK_TO
(
bias_n
,
op
);
LINK_TO
(
op
,
hidden_n
);
#undef LINK_TO
IR_NODE_LINK_TO
(
input_n
,
op
);
IR_NODE_LINK_TO
(
weight_x_n
,
op
);
IR_NODE_LINK_TO
(
weight_h_n
,
op
);
IR_NODE_LINK_TO
(
bias_n
,
op
);
IR_NODE_LINK_TO
(
op
,
hidden_n
);
return
op
;
};
...
...
paddle/fluid/framework/ir/graph_pattern_detector.cc
浏览文件 @
db5e3dd7
...
...
@@ -111,6 +111,11 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph& graph) {
return
false
;
}
}
for
(
auto
&
item
:
pdnodes2nodes_
)
{
for
(
auto
&
n
:
item
.
second
)
{
GetMarkedNodes
(
const_cast
<
Graph
*>
(
&
graph
)).
insert
(
n
);
}
}
VLOG
(
3
)
<<
pdnodes2nodes_
.
size
()
<<
" nodes marked"
;
return
!
pdnodes2nodes_
.
empty
();
...
...
@@ -278,7 +283,7 @@ void GraphPatternDetector::RemoveOverlappedMatch(
for
(
const
auto
&
subgraph
:
*
subgraphs
)
{
bool
valid
=
true
;
for
(
auto
&
item
:
subgraph
)
{
if
(
node_set
.
count
(
item
.
second
))
{
if
(
item
.
first
->
IsIntermediate
()
&&
node_set
.
count
(
item
.
second
))
{
valid
=
false
;
break
;
}
...
...
@@ -334,22 +339,22 @@ PDNode& PDNode::LinksFrom(const std::vector<PDNode*>& others) {
}
PDNode
*
PDNode
::
assert_is_op
()
{
asserts_
.
emplace_back
([
this
](
Node
*
x
)
{
return
x
&&
x
->
IsOp
();
});
asserts_
.
emplace_back
([](
Node
*
x
)
{
return
x
&&
x
->
IsOp
();
});
return
this
;
}
PDNode
*
PDNode
::
assert_is_op
(
const
std
::
string
&
op_type
)
{
asserts_
.
emplace_back
([
this
,
op_type
](
Node
*
x
)
{
asserts_
.
emplace_back
([
op_type
](
Node
*
x
)
{
return
x
&&
x
->
IsOp
()
&&
x
->
Op
()
->
Type
()
==
op_type
;
});
return
this
;
}
PDNode
*
PDNode
::
assert_is_var
()
{
asserts_
.
emplace_back
([
this
](
Node
*
x
)
{
return
x
&&
x
->
IsVar
();
});
asserts_
.
emplace_back
([](
Node
*
x
)
{
return
x
&&
x
->
IsVar
();
});
return
this
;
}
PDNode
*
PDNode
::
assert_var_not_persistable
()
{
assert_is_var
();
asserts_
.
emplace_back
([
this
](
Node
*
x
)
{
return
!
x
->
Var
()
->
Persistable
();
});
asserts_
.
emplace_back
([](
Node
*
x
)
{
return
!
x
->
Var
()
->
Persistable
();
});
return
this
;
}
PDNode
*
PDNode
::
assert_is_persistable_var
()
{
...
...
@@ -491,14 +496,16 @@ void GraphSafeRemoveNodes(Graph* graph,
for
(
auto
it
=
node
->
inputs
.
begin
();
it
!=
node
->
inputs
.
end
();)
{
if
(
nodes
.
count
(
*
it
))
{
it
=
const_cast
<
Node
*>
(
node
)
->
inputs
.
erase
(
it
);
}
else
}
else
{
it
++
;
}
}
for
(
auto
it
=
node
->
outputs
.
begin
();
it
!=
node
->
outputs
.
end
();)
{
if
(
nodes
.
count
(
*
it
))
{
it
=
const_cast
<
Node
*>
(
node
)
->
outputs
.
erase
(
it
);
}
else
}
else
{
it
++
;
}
}
}
}
...
...
paddle/fluid/framework/ir/graph_pattern_detector.h
浏览文件 @
db5e3dd7
...
...
@@ -245,6 +245,8 @@ class GraphPatternDetector {
void
UniquePatterns
(
std
::
vector
<
subgraph_t
>*
subgraphs
);
// Remove overlapped match subgraphs, when overlapped, keep the previous one.
// The intermediate PDNodes will be removed, so can't shared by multiple
// patterns.
void
RemoveOverlappedMatch
(
std
::
vector
<
subgraph_t
>*
subgraphs
);
// Validate whether the intermediate nodes are linked by external nodes.
...
...
@@ -295,6 +297,10 @@ PDNode* LSTM(PDPattern* pattern, const std::string& name_scope, PDNode* x);
}
// namespace patterns
#define IR_NODE_LINK_TO(a, b) \
a->outputs.push_back(b); \
b->inputs.push_back(a);
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/graph_pattern_detector_tester.cc
浏览文件 @
db5e3dd7
...
...
@@ -140,8 +140,9 @@ TEST(GraphPatternDetecter, MultiSubgraph) {
return
node
->
IsOp
()
&&
(
node
->
Name
()
==
"op2"
||
node
->
Name
()
==
"op3"
);
},
"OP0"
);
auto
*
any_var
=
x
.
mutable_pattern
()
->
NewNode
(
[](
Node
*
node
)
{
return
node
->
IsVar
();
},
"VAR"
);
auto
*
any_var
=
x
.
mutable_pattern
()
->
NewNode
([](
Node
*
node
)
{
return
node
->
IsVar
();
},
"VAR"
)
->
AsIntermediate
();
auto
*
any_op1
=
x
.
mutable_pattern
()
->
NewNode
(
[](
Node
*
node
)
{
return
node
->
IsOp
();
},
"OP1"
);
...
...
paddle/fluid/framework/ir/infer_clean_graph_pass.cc
浏览文件 @
db5e3dd7
...
...
@@ -13,42 +13,41 @@
// limitations under the License.
#include <algorithm>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/
pass
.h"
#include "paddle/fluid/framework/ir/
graph_pattern_detector
.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
class
InferCleanGraphPass
:
public
Pass
{
class
InferCleanGraphPass
:
public
FusePassBase
{
public:
virtual
~
InferCleanGraphPass
()
{}
protected:
std
::
unique_ptr
<
ir
::
Graph
>
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
FusePassBase
::
Init
(
"original_graph"
,
graph
.
get
());
PADDLE_ENFORCE
(
graph
.
get
());
auto
is_valid_node
=
[](
Node
*
x
)
{
return
x
&&
IsControlDepVar
(
*
x
)
&&
x
->
IsVar
()
&&
!
x
->
Var
();
};
std
::
unordered_set
<
Node
*>
invalid_nodes
;
std
::
unordered_set
<
const
Node
*>
invalid_nodes
;
int
valid_op
=
0
;
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
is_valid_node
(
node
))
{
invalid_nodes
.
insert
(
node
);
}
else
if
(
node
->
IsOp
())
{
// Collect all the operators to help tracking number of operators.
++
valid_op
;
}
}
// remove nodes from the graph.
for
(
auto
*
node
:
invalid_nodes
)
{
graph
->
RemoveNode
(
node
);
}
GraphSafeRemoveNodes
(
graph
.
get
(),
invalid_nodes
);
// clean edges.
for
(
auto
*
node
:
graph
->
Nodes
())
{
CleanEdges
(
&
node
->
inputs
,
invalid_nodes
);
CleanEdges
(
&
node
->
outputs
,
invalid_nodes
);
}
AddStatis
(
valid_op
);
return
graph
;
}
...
...
paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc
浏览文件 @
db5e3dd7
...
...
@@ -219,16 +219,13 @@ std::unique_ptr<ir::Graph> SeqConcatFcFusePass::ApplyImpl(
op_desc
.
SetAttr
(
"fc_activation"
,
act
->
Op
()
->
Type
());
auto
*
op_node
=
graph
->
CreateOpNode
(
&
op_desc
);
// Add links
#define NODE_LINKS(a, b) \
a->outputs.push_back(b); \
b->inputs.push_back(a);
NODE_LINKS
(
fc_w
,
op_node
);
NODE_LINKS
(
fc_bias
,
op_node
);
NODE_LINKS
(
concat_in0
,
op_node
);
NODE_LINKS
(
sequence_expand0_in
,
op_node
);
NODE_LINKS
(
sequence_expand1_in
,
op_node
);
NODE_LINKS
(
op_node
,
fc_out
);
// Add links
IR_NODE_LINK_TO
(
fc_w
,
op_node
);
IR_NODE_LINK_TO
(
fc_bias
,
op_node
);
IR_NODE_LINK_TO
(
concat_in0
,
op_node
);
IR_NODE_LINK_TO
(
sequence_expand0_in
,
op_node
);
IR_NODE_LINK_TO
(
sequence_expand1_in
,
op_node
);
IR_NODE_LINK_TO
(
op_node
,
fc_out
);
// Clean nodes.
std
::
unordered_set
<
const
Node
*>
marked_nodes
;
...
...
@@ -241,7 +238,6 @@ std::unique_ptr<ir::Graph> SeqConcatFcFusePass::ApplyImpl(
marked_nodes
.
erase
(
sequence_expand0_in
);
marked_nodes
.
erase
(
sequence_expand1_in
);
marked_nodes
.
erase
(
fc_out
);
GraphSafeRemoveNodes
(
graph
,
marked_nodes
);
});
...
...
paddle/fluid/inference/CMakeLists.txt
浏览文件 @
db5e3dd7
...
...
@@ -10,7 +10,7 @@ set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor)
# TODO(panyx0718): Should this be called paddle_fluid_inference_api_internal?
cc_library
(
paddle_fluid_api
SRCS io.cc
DEPS
${
FLUID_CORE_MODULES
}
${
GLOB_OP_LIB
}
graph_to_program_pass
)
DEPS
${
FLUID_CORE_MODULES
}
${
GLOB_OP_LIB
}
)
get_property
(
fluid_modules GLOBAL PROPERTY FLUID_MODULES
)
...
...
@@ -22,7 +22,7 @@ cc_library(paddle_fluid_origin DEPS ${fluid_modules} paddle_fluid_api)
#endif()
# Create static library
cc_library
(
paddle_fluid DEPS
${
fluid_modules
}
paddle_fluid_api paddle_inference_api
)
cc_library
(
paddle_fluid DEPS
${
fluid_modules
}
paddle_fluid_api paddle_inference_api
analysis_predictor
)
if
(
NOT APPLE
)
# TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
set
(
LINK_FLAGS
"-Wl,--retain-symbols-file
${
CMAKE_CURRENT_SOURCE_DIR
}
/paddle_fluid.sym"
)
...
...
@@ -32,6 +32,7 @@ endif()
# Create shared library
cc_library
(
paddle_fluid_shared SHARED
SRCS io.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/api.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/api_impl.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/analysis_predictor.cc
DEPS
${
fluid_modules
}
paddle_fluid_api
)
set_target_properties
(
paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid
)
...
...
paddle/fluid/inference/analysis/CMakeLists.txt
浏览文件 @
db5e3dd7
...
...
@@ -33,7 +33,7 @@ function (inference_analysis_test TARGET)
endif
()
cc_test
(
${
TARGET
}
SRCS
"
${
analysis_test_SRCS
}
"
DEPS analysis
graph fc_fuse_pass graph_viz_pass infer_clean_graph_pass graph_pattern_detector pass
${
analysis_test_EXTRA_DEPS
}
DEPS analysis
pass
${
GLOB_PASS_LIB
}
${
analysis_test_EXTRA_DEPS
}
ARGS --inference_model_dir=
${
PYTHON_TESTS_DIR
}
/book/word2vec.inference.model
${
mem_opt
}
${
analysis_test_ARGS
}
)
set_tests_properties
(
${
TARGET
}
PROPERTIES DEPENDS test_word2vec
)
endif
(
WITH_TESTING
)
...
...
@@ -56,25 +56,13 @@ if (NOT EXISTS ${DITU_INSTALL_DIR} AND WITH_TESTING)
endif
()
inference_analysis_test
(
test_analyzer SRCS analyzer_tester.cc
EXTRA_DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis
analysis_predictor
# ir
fc_fuse_pass
fc_lstm_fuse_pass
seq_concat_fc_fuse_pass
graph_viz_pass
infer_clean_graph_pass
graph_pattern_detector
infer_clean_graph_pass
attention_lstm_fuse_pass
paddle_inference_api
pass
EXTRA_DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis_predictor
ARGS --infer_ditu_rnn_model=
${
DITU_INSTALL_DIR
}
/model
--infer_ditu_rnn_data=
${
DITU_INSTALL_DIR
}
/data.txt
)
inference_analysis_test
(
test_data_flow_graph SRCS data_flow_graph_tester.cc
)
inference_analysis_test
(
test_data_flow_graph_to_fluid_pass SRCS data_flow_graph_to_fluid_pass_tester.cc
EXTRA_DEPS paddle_inference_api
)
inference_analysis_test
(
test_fluid_to_ir_pass SRCS fluid_to_ir_pass_tester.cc
EXTRA_DEPS paddle_fluid
)
inference_analysis_test
(
test_data_flow_graph_to_fluid_pass SRCS data_flow_graph_to_fluid_pass_tester.cc
)
inference_analysis_test
(
test_fluid_to_ir_pass SRCS fluid_to_ir_pass_tester.cc
)
inference_analysis_test
(
test_fluid_to_data_flow_graph_pass SRCS fluid_to_data_flow_graph_pass_tester.cc
)
inference_analysis_test
(
test_subgraph_splitter SRCS subgraph_splitter_tester.cc
)
inference_analysis_test
(
test_dfg_graphviz_draw_pass SRCS dfg_graphviz_draw_pass_tester.cc
)
...
...
paddle/fluid/inference/analysis/analyzer_tester.cc
浏览文件 @
db5e3dd7
...
...
@@ -22,6 +22,7 @@
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/platform/profiler.h"
...
...
@@ -327,9 +328,20 @@ void TestDituRNNPrediction(const std::string &model_path,
LOG
(
INFO
)
<<
"fused "
<<
item
.
first
<<
" "
<<
item
.
second
;
}
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc"
));
EXPECT_EQ
(
fuse_statis
.
at
(
"fc"
),
1
);
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_nobias_lstm_fuse"
),
1
);
int
num_ops
=
0
;
for
(
auto
&
node
:
analysis_predictor
->
analysis_argument
().
main_dfg
->
nodes
.
nodes
())
{
if
(
node
->
IsFunction
())
{
++
num_ops
;
}
}
LOG
(
INFO
)
<<
"has num ops: "
<<
num_ops
;
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_nobias_lstm_fuse"
),
2
);
// bi-directional LSTM
EXPECT_EQ
(
num_ops
,
13
);
// After graph optimization, only 13 operators exists.
}
}
...
...
@@ -357,10 +369,3 @@ TEST(Analyzer, DituRNN_with_analysis_with_IR) {
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
USE_PASS
(
fc_fuse_pass
);
USE_PASS
(
seq_concat_fc_fuse_pass
);
USE_PASS
(
fc_lstm_fuse_pass
);
USE_PASS
(
graph_viz_pass
);
USE_PASS
(
infer_clean_graph_pass
);
USE_PASS
(
attention_lstm_fuse_pass
);
paddle/fluid/inference/analysis/fluid_to_ir_pass_tester.cc
浏览文件 @
db5e3dd7
...
...
@@ -16,6 +16,7 @@
#include <gtest/gtest.h>
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
namespace
paddle
{
namespace
inference
{
...
...
@@ -33,10 +34,3 @@ TEST(FluidToIrPass, Test) {
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
USE_PASS
(
graph_viz_pass
);
USE_PASS
(
infer_clean_graph_pass
);
USE_PASS
(
attention_lstm_fuse_pass
);
USE_PASS
(
fc_lstm_fuse_pass
);
USE_PASS
(
seq_concat_fc_fuse_pass
);
USE_PASS
(
fc_fuse_pass
);
paddle/fluid/inference/api/CMakeLists.txt
浏览文件 @
db5e3dd7
...
...
@@ -18,10 +18,7 @@ if(APPLE)
endif
(
APPLE
)
set
(
inference_deps paddle_inference_api paddle_fluid_api analysis pass ir_pass_manager
graph_viz_pass fc_fuse_pass
infer_clean_graph_pass
)
set
(
inference_deps paddle_inference_api paddle_fluid_api analysis pass ir_pass_manager
${
GLOB_PASS_LIB
}
)
if
(
WITH_GPU AND TENSORRT_FOUND
)
set
(
inference_deps
${
inference_deps
}
paddle_inference_tensorrt_subgraph_engine
)
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
db5e3dd7
...
...
@@ -18,6 +18,7 @@
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/utils/singleton.h"
namespace
paddle
{
...
...
@@ -133,7 +134,3 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
}
}
// namespace paddle
USE_PASS
(
fc_fuse_pass
);
USE_PASS
(
graph_viz_pass
);
USE_PASS
(
infer_clean_graph_pass
);
paddle/fluid/inference/api/helper.h
浏览文件 @
db5e3dd7
...
...
@@ -16,6 +16,7 @@
#include <sys/time.h>
#include <algorithm>
#include <numeric>
#include <sstream>
#include <string>
#include <vector>
...
...
paddle/fluid/inference/paddle_fluid.map
浏览文件 @
db5e3dd7
{
global:
*paddle*;
*Pass*;
local:
*;
};
paddle/fluid/operators/detection/bbox_util.h
0 → 100644
浏览文件 @
db5e3dd7
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/tensor.h"
namespace
paddle
{
namespace
operators
{
/*
* transform that computes target bounding-box regression deltas
* given proposal boxes and ground-truth boxes.
*/
template
<
typename
T
>
inline
void
BoxToDelta
(
const
int
box_num
,
const
framework
::
Tensor
&
ex_boxes
,
const
framework
::
Tensor
&
gt_boxes
,
const
T
*
weights
,
const
bool
normalized
,
framework
::
Tensor
*
box_delta
)
{
auto
ex_boxes_et
=
framework
::
EigenTensor
<
T
,
2
>::
From
(
ex_boxes
);
auto
gt_boxes_et
=
framework
::
EigenTensor
<
T
,
2
>::
From
(
gt_boxes
);
auto
trg
=
framework
::
EigenTensor
<
T
,
2
>::
From
(
*
box_delta
);
T
ex_w
,
ex_h
,
ex_ctr_x
,
ex_ctr_y
,
gt_w
,
gt_h
,
gt_ctr_x
,
gt_ctr_y
;
for
(
int64_t
i
=
0
;
i
<
box_num
;
++
i
)
{
ex_w
=
ex_boxes_et
(
i
,
2
)
-
ex_boxes_et
(
i
,
0
)
+
(
normalized
==
false
);
ex_h
=
ex_boxes_et
(
i
,
3
)
-
ex_boxes_et
(
i
,
1
)
+
(
normalized
==
false
);
ex_ctr_x
=
ex_boxes_et
(
i
,
0
)
+
0.5
*
ex_w
;
ex_ctr_y
=
ex_boxes_et
(
i
,
1
)
+
0.5
*
ex_h
;
gt_w
=
gt_boxes_et
(
i
,
2
)
-
gt_boxes_et
(
i
,
0
)
+
(
normalized
==
false
);
gt_h
=
gt_boxes_et
(
i
,
3
)
-
gt_boxes_et
(
i
,
1
)
+
(
normalized
==
false
);
gt_ctr_x
=
gt_boxes_et
(
i
,
0
)
+
0.5
*
gt_w
;
gt_ctr_y
=
gt_boxes_et
(
i
,
1
)
+
0.5
*
gt_h
;
trg
(
i
,
0
)
=
(
gt_ctr_x
-
ex_ctr_x
)
/
ex_w
;
trg
(
i
,
1
)
=
(
gt_ctr_y
-
ex_ctr_y
)
/
ex_h
;
trg
(
i
,
2
)
=
std
::
log
(
gt_w
/
ex_w
);
trg
(
i
,
3
)
=
std
::
log
(
gt_h
/
ex_h
);
if
(
weights
)
{
trg
(
i
,
0
)
=
trg
(
i
,
0
)
/
weights
[
0
];
trg
(
i
,
1
)
=
trg
(
i
,
1
)
/
weights
[
1
];
trg
(
i
,
2
)
=
trg
(
i
,
2
)
/
weights
[
2
];
trg
(
i
,
3
)
=
trg
(
i
,
3
)
/
weights
[
3
];
}
}
}
template
<
typename
T
>
void
Gather
(
const
T
*
in
,
const
int
in_stride
,
const
int
*
index
,
const
int
num
,
T
*
out
)
{
const
int
stride_bytes
=
in_stride
*
sizeof
(
T
);
for
(
int
i
=
0
;
i
<
num
;
++
i
)
{
int
id
=
index
[
i
];
memcpy
(
out
+
i
*
in_stride
,
in
+
id
*
in_stride
,
stride_bytes
);
}
}
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/detection/generate_proposal_labels_op.cc
浏览文件 @
db5e3dd7
...
...
@@ -14,6 +14,7 @@ limitations under the License. */
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/detection/bbox_util.h"
#include "paddle/fluid/operators/gather.h"
#include "paddle/fluid/operators/math/concat.h"
#include "paddle/fluid/operators/math/math_function.h"
...
...
@@ -133,31 +134,6 @@ void BboxOverlaps(const Tensor& r_boxes, const Tensor& c_boxes,
}
}
template
<
typename
T
>
void
BoxToDelta
(
int
box_num
,
const
Tensor
&
ex_boxes
,
const
Tensor
&
gt_boxes
,
const
std
::
vector
<
float
>&
weights
,
Tensor
*
box_delta
)
{
auto
ex_boxes_et
=
framework
::
EigenTensor
<
T
,
2
>::
From
(
ex_boxes
);
auto
gt_boxes_et
=
framework
::
EigenTensor
<
T
,
2
>::
From
(
gt_boxes
);
auto
box_delta_et
=
framework
::
EigenTensor
<
T
,
2
>::
From
(
*
box_delta
);
T
ex_w
,
ex_h
,
ex_ctr_x
,
ex_ctr_y
,
gt_w
,
gt_h
,
gt_ctr_x
,
gt_ctr_y
;
for
(
int64_t
i
=
0
;
i
<
box_num
;
++
i
)
{
ex_w
=
ex_boxes_et
(
i
,
2
)
-
ex_boxes_et
(
i
,
0
)
+
1
;
ex_h
=
ex_boxes_et
(
i
,
3
)
-
ex_boxes_et
(
i
,
1
)
+
1
;
ex_ctr_x
=
ex_boxes_et
(
i
,
0
)
+
0.5
*
ex_w
;
ex_ctr_y
=
ex_boxes_et
(
i
,
1
)
+
0.5
*
ex_h
;
gt_w
=
gt_boxes_et
(
i
,
2
)
-
gt_boxes_et
(
i
,
0
)
+
1
;
gt_h
=
gt_boxes_et
(
i
,
3
)
-
gt_boxes_et
(
i
,
1
)
+
1
;
gt_ctr_x
=
gt_boxes_et
(
i
,
0
)
+
0.5
*
gt_w
;
gt_ctr_y
=
gt_boxes_et
(
i
,
1
)
+
0.5
*
gt_h
;
box_delta_et
(
i
,
0
)
=
(
gt_ctr_x
-
ex_ctr_x
)
/
ex_w
/
weights
[
0
];
box_delta_et
(
i
,
1
)
=
(
gt_ctr_y
-
ex_ctr_y
)
/
ex_h
/
weights
[
1
];
box_delta_et
(
i
,
2
)
=
log
(
gt_w
/
ex_w
)
/
ex_w
/
weights
[
2
];
box_delta_et
(
i
,
3
)
=
log
(
gt_h
/
ex_h
)
/
ex_h
/
weights
[
3
];
}
}
template
<
typename
T
>
std
::
vector
<
std
::
vector
<
int
>>
SampleFgBgGt
(
const
platform
::
CPUDeviceContext
&
context
,
Tensor
*
iou
,
...
...
@@ -243,12 +219,11 @@ void GatherBoxesLabels(const platform::CPUDeviceContext& context,
Tensor
*
sampled_labels
,
Tensor
*
sampled_gts
)
{
int
fg_num
=
fg_inds
.
size
();
int
bg_num
=
bg_inds
.
size
();
int
gt_num
=
fg_num
+
bg_num
;
Tensor
fg_inds_t
,
bg_inds_t
,
gt_box_inds_t
,
gt_label_inds_t
;
int
*
fg_inds_data
=
fg_inds_t
.
mutable_data
<
int
>
({
fg_num
},
context
.
GetPlace
());
int
*
bg_inds_data
=
bg_inds_t
.
mutable_data
<
int
>
({
bg_num
},
context
.
GetPlace
());
int
*
gt_box_inds_data
=
gt_box_inds_t
.
mutable_data
<
int
>
({
gt
_num
},
context
.
GetPlace
());
gt_box_inds_t
.
mutable_data
<
int
>
({
fg
_num
},
context
.
GetPlace
());
int
*
gt_label_inds_data
=
gt_label_inds_t
.
mutable_data
<
int
>
({
fg_num
},
context
.
GetPlace
());
std
::
copy
(
fg_inds
.
begin
(),
fg_inds
.
end
(),
fg_inds_data
);
...
...
@@ -303,18 +278,20 @@ std::vector<Tensor> SampleRoisForOneImage(
// Gather boxes and labels
Tensor
sampled_boxes
,
sampled_labels
,
sampled_gts
;
int
boxes_num
=
fg_inds
.
size
()
+
bg_inds
.
size
();
int
fg_num
=
fg_inds
.
size
();
int
bg_num
=
bg_inds
.
size
();
int
boxes_num
=
fg_num
+
bg_num
;
framework
::
DDim
bbox_dim
({
boxes_num
,
kBoxDim
});
sampled_boxes
.
mutable_data
<
T
>
(
bbox_dim
,
context
.
GetPlace
());
sampled_labels
.
mutable_data
<
int
>
({
boxes_num
},
context
.
GetPlace
());
sampled_gts
.
mutable_data
<
T
>
(
bbox_dim
,
context
.
GetPlace
());
sampled_gts
.
mutable_data
<
T
>
(
{
fg_num
,
kBoxDim
}
,
context
.
GetPlace
());
GatherBoxesLabels
<
T
>
(
context
,
boxes
,
*
gt_boxes
,
*
gt_classes
,
fg_inds
,
bg_inds
,
gt_inds
,
&
sampled_boxes
,
&
sampled_labels
,
&
sampled_gts
);
// Compute targets
Tensor
bbox_targets_single
;
bbox_targets_single
.
mutable_data
<
T
>
(
bbox_dim
,
context
.
GetPlace
());
BoxToDelta
<
T
>
(
boxes_num
,
sampled_boxes
,
sampled_gts
,
bbox_reg_weights
,
BoxToDelta
<
T
>
(
fg_num
,
sampled_boxes
,
sampled_gts
,
nullptr
,
false
,
&
bbox_targets_single
);
// Scale rois
...
...
@@ -427,7 +404,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
auto
rpn_rois_lod
=
rpn_rois
->
lod
().
back
();
auto
gt_classes_lod
=
gt_classes
->
lod
().
back
();
auto
gt_boxes_lod
=
gt_boxes
->
lod
().
back
();
for
(
size_
t
i
=
0
;
i
<
n
;
++
i
)
{
for
(
in
t
i
=
0
;
i
<
n
;
++
i
)
{
Tensor
rpn_rois_slice
=
rpn_rois
->
Slice
(
rpn_rois_lod
[
i
],
rpn_rois_lod
[
i
+
1
]);
Tensor
gt_classes_slice
=
...
...
paddle/fluid/operators/detection/generate_proposals_op.cc
浏览文件 @
db5e3dd7
...
...
@@ -311,8 +311,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
rpn_rois
->
mutable_data
<
T
>
({
bbox_deltas
->
numel
()
/
4
,
4
},
context
.
GetPlace
());
rpn_roi_probs
->
mutable_data
<
T
>
({
scores
->
numel
()
/
4
,
1
},
context
.
GetPlace
());
rpn_roi_probs
->
mutable_data
<
T
>
({
scores
->
numel
(),
1
},
context
.
GetPlace
());
Tensor
bbox_deltas_swap
,
scores_swap
;
bbox_deltas_swap
.
mutable_data
<
T
>
({
num
,
h_bbox
,
w_bbox
,
c_bbox
},
...
...
@@ -421,7 +420,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
CPUGather
<
T
>
(
ctx
,
proposals
,
keep
,
&
bbox_sel
);
CPUGather
<
T
>
(
ctx
,
scores_sel
,
keep
,
&
scores_filter
);
if
(
nms_thresh
<=
0
)
{
return
std
::
make_pair
(
bbox_sel
,
scores_
sel
);
return
std
::
make_pair
(
bbox_sel
,
scores_
filter
);
}
Tensor
keep_nms
=
NMS
<
T
>
(
ctx
,
&
bbox_sel
,
&
scores_filter
,
nms_thresh
,
eta
);
...
...
paddle/fluid/operators/detection/rpn_target_assign_op.cc
浏览文件 @
db5e3dd7
...
...
@@ -14,6 +14,7 @@ limitations under the License. */
#include <random>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/detection/bbox_util.h"
#include "paddle/fluid/operators/math/math_function.h"
namespace
paddle
{
...
...
@@ -46,156 +47,219 @@ class RpnTargetAssignOp : public framework::OperatorWithKernel {
auto
in_dims
=
ctx
->
GetInputDim
(
"DistMat"
);
PADDLE_ENFORCE_EQ
(
in_dims
.
size
(),
2
,
"The rank of Input(DistMat) must be 2."
);
ctx
->
SetOutputDim
(
"LocationIndex"
,
{
-
1
});
ctx
->
SetOutputDim
(
"ScoreIndex"
,
{
-
1
});
ctx
->
SetOutputDim
(
"TargetLabel"
,
{
-
1
,
1
});
ctx
->
SetOutputDim
(
"TargetBBox"
,
{
-
1
,
4
});
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"DistMat"
)
->
type
()),
platform
::
CPUPlace
());
}
};
template
<
typename
T
>
class
RpnTargetAssignKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
anchor_t
=
context
.
Input
<
Tensor
>
(
"Anchor"
);
// (H*W*A) * 4
auto
*
gt_bbox_t
=
context
.
Input
<
Tensor
>
(
"GtBox"
);
auto
*
dist_t
=
context
.
Input
<
LoDTensor
>
(
"DistMat"
);
auto
*
loc_index_t
=
context
.
Output
<
Tensor
>
(
"LocationIndex"
);
auto
*
score_index_t
=
context
.
Output
<
Tensor
>
(
"ScoreIndex"
);
auto
*
tgt_bbox_t
=
context
.
Output
<
Tensor
>
(
"TargetBBox"
);
auto
*
tgt_lbl_t
=
context
.
Output
<
Tensor
>
(
"TargetLabel"
);
auto
lod
=
dist_t
->
lod
().
back
();
int64_t
batch_num
=
static_cast
<
int64_t
>
(
lod
.
size
()
-
1
);
int64_t
anchor_num
=
dist_t
->
dims
()[
1
];
PADDLE_ENFORCE_EQ
(
anchor_num
,
anchor_t
->
dims
()[
0
]);
int
rpn_batch_size
=
context
.
Attr
<
int
>
(
"rpn_batch_size_per_im"
);
float
pos_threshold
=
context
.
Attr
<
float
>
(
"rpn_positive_overlap"
);
float
neg_threshold
=
context
.
Attr
<
float
>
(
"rpn_negative_overlap"
);
float
fg_fraction
=
context
.
Attr
<
float
>
(
"fg_fraction"
);
int
fg_num_per_batch
=
static_cast
<
int
>
(
rpn_batch_size
*
fg_fraction
);
int64_t
max_num
=
batch_num
*
anchor_num
;
auto
place
=
context
.
GetPlace
();
tgt_bbox_t
->
mutable_data
<
T
>
({
max_num
,
4
},
place
);
auto
*
loc_index
=
loc_index_t
->
mutable_data
<
int
>
({
max_num
},
place
);
auto
*
score_index
=
score_index_t
->
mutable_data
<
int
>
({
max_num
},
place
);
Tensor
tmp_tgt_lbl
;
auto
*
tmp_lbl_data
=
tmp_tgt_lbl
.
mutable_data
<
int64_t
>
({
max_num
},
place
);
auto
&
dev_ctx
=
context
.
device_context
<
platform
::
CPUDeviceContext
>
();
math
::
SetConstant
<
platform
::
CPUDeviceContext
,
int64_t
>
iset
;
iset
(
dev_ctx
,
&
tmp_tgt_lbl
,
static_cast
<
int64_t
>
(
-
1
));
std
::
random_device
rnd
;
std
::
minstd_rand
engine
;
int
seed
=
context
.
Attr
<
bool
>
(
"fix_seed"
)
?
context
.
Attr
<
int
>
(
"seed"
)
:
rnd
();
engine
.
seed
(
seed
);
int
fg_num
=
0
;
int
bg_num
=
0
;
for
(
int
i
=
0
;
i
<
batch_num
;
++
i
)
{
Tensor
dist
=
dist_t
->
Slice
(
lod
[
i
],
lod
[
i
+
1
]);
Tensor
gt_bbox
=
gt_bbox_t
->
Slice
(
lod
[
i
],
lod
[
i
+
1
]);
auto
fg_bg_gt
=
SampleFgBgGt
(
dev_ctx
,
dist
,
pos_threshold
,
neg_threshold
,
rpn_batch_size
,
fg_num_per_batch
,
engine
,
tmp_lbl_data
+
i
*
anchor_num
);
int
cur_fg_num
=
fg_bg_gt
[
0
].
size
();
int
cur_bg_num
=
fg_bg_gt
[
1
].
size
();
std
::
transform
(
fg_bg_gt
[
0
].
begin
(),
fg_bg_gt
[
0
].
end
(),
loc_index
,
[
i
,
anchor_num
](
int
d
)
{
return
d
+
i
*
anchor_num
;
});
memcpy
(
score_index
,
loc_index
,
cur_fg_num
*
sizeof
(
int
));
std
::
transform
(
fg_bg_gt
[
1
].
begin
(),
fg_bg_gt
[
1
].
end
(),
score_index
+
cur_fg_num
,
[
i
,
anchor_num
](
int
d
)
{
return
d
+
i
*
anchor_num
;
});
// get target bbox deltas
if
(
cur_fg_num
)
{
Tensor
fg_gt
;
T
*
gt_data
=
fg_gt
.
mutable_data
<
T
>
({
cur_fg_num
,
4
},
place
);
Tensor
tgt_bbox
=
tgt_bbox_t
->
Slice
(
fg_num
,
fg_num
+
cur_fg_num
);
T
*
tgt_data
=
tgt_bbox
.
data
<
T
>
();
Gather
<
T
>
(
anchor_t
->
data
<
T
>
(),
4
,
reinterpret_cast
<
int
*>
(
&
fg_bg_gt
[
0
][
0
]),
cur_fg_num
,
tgt_data
);
Gather
<
T
>
(
gt_bbox
.
data
<
T
>
(),
4
,
reinterpret_cast
<
int
*>
(
&
fg_bg_gt
[
2
][
0
]),
cur_fg_num
,
gt_data
);
BoxToDelta
<
T
>
(
cur_fg_num
,
tgt_bbox
,
fg_gt
,
nullptr
,
false
,
&
tgt_bbox
);
}
loc_index
+=
cur_fg_num
;
score_index
+=
cur_fg_num
+
cur_bg_num
;
fg_num
+=
cur_fg_num
;
bg_num
+=
cur_bg_num
;
}
int
lbl_num
=
fg_num
+
bg_num
;
PADDLE_ENFORCE_LE
(
fg_num
,
max_num
);
PADDLE_ENFORCE_LE
(
lbl_num
,
max_num
);
tgt_bbox_t
->
Resize
({
fg_num
,
4
});
loc_index_t
->
Resize
({
fg_num
});
score_index_t
->
Resize
({
lbl_num
});
auto
*
lbl_data
=
tgt_lbl_t
->
mutable_data
<
int64_t
>
({
lbl_num
,
1
},
place
);
Gather
<
int64_t
>
(
tmp_lbl_data
,
1
,
score_index_t
->
data
<
int
>
(),
lbl_num
,
lbl_data
);
}
private:
void
ScoreAssign
(
const
T
*
dist_data
,
const
Tensor
&
anchor_to_gt_max
,
const
int
row
,
const
int
col
,
const
float
pos_threshold
,
const
float
neg_threshold
,
int64_t
*
target_label
_data
,
const
float
neg_threshold
,
int64_t
*
target_label
,
std
::
vector
<
int
>*
fg_inds
,
std
::
vector
<
int
>*
bg_inds
)
const
{
int
fg_offset
=
fg_inds
->
size
();
int
bg_offset
=
bg_inds
->
size
();
float
epsilon
=
0.0001
;
for
(
int64_t
i
=
0
;
i
<
row
;
++
i
)
{
const
T
*
v
=
dist_data
+
i
*
col
;
T
max
_dist
=
*
std
::
max_element
(
v
,
v
+
col
);
T
max
=
*
std
::
max_element
(
v
,
v
+
col
);
for
(
int64_t
j
=
0
;
j
<
col
;
++
j
)
{
T
val
=
dist_data
[
i
*
col
+
j
];
if
(
val
==
max_dist
)
target_label_data
[
j
]
=
1
;
if
(
std
::
abs
(
max
-
v
[
j
])
<
epsilon
)
{
target_label
[
j
]
=
1
;
}
}
}
// Pick the fg/bg and count the number
// Pick the fg/bg
const
T
*
anchor_to_gt_max_data
=
anchor_to_gt_max
.
data
<
T
>
();
for
(
int64_t
j
=
0
;
j
<
col
;
++
j
)
{
if
(
anchor_to_gt_max
.
data
<
T
>
()[
j
]
>
pos_threshold
)
{
target_label
_data
[
j
]
=
1
;
}
else
if
(
anchor_to_gt_max
.
data
<
T
>
()
[
j
]
<
neg_threshold
)
{
target_label
_data
[
j
]
=
0
;
if
(
anchor_to_gt_max
_data
[
j
]
>=
pos_threshold
)
{
target_label
[
j
]
=
1
;
}
else
if
(
anchor_to_gt_max
_data
[
j
]
<
neg_threshold
)
{
target_label
[
j
]
=
0
;
}
if
(
target_label
_data
[
j
]
==
1
)
{
fg_inds
->
push_back
(
fg_offset
+
j
);
}
else
if
(
target_label
_data
[
j
]
==
0
)
{
bg_inds
->
push_back
(
bg_offset
+
j
);
if
(
target_label
[
j
]
==
1
)
{
fg_inds
->
push_back
(
j
);
}
else
if
(
target_label
[
j
]
==
0
)
{
bg_inds
->
push_back
(
j
);
}
}
}
void
ReservoirSampling
(
const
int
num
,
const
int
offset
,
std
::
minstd_rand
engine
,
void
ReservoirSampling
(
const
int
num
,
std
::
minstd_rand
engine
,
std
::
vector
<
int
>*
inds
)
const
{
std
::
uniform_real_distribution
<
float
>
uniform
(
0
,
1
);
const
int64_t
size
=
static_cast
<
int64_t
>
(
inds
->
size
()
-
offset
);
if
(
size
>
num
)
{
for
(
int64_t
i
=
num
;
i
<
size
;
++
i
)
{
size_t
len
=
inds
->
size
(
);
if
(
len
>
static_cast
<
size_t
>
(
num
)
)
{
for
(
size_t
i
=
num
;
i
<
len
;
++
i
)
{
int
rng_ind
=
std
::
floor
(
uniform
(
engine
)
*
i
);
if
(
rng_ind
<
num
)
std
::
iter_swap
(
inds
->
begin
()
+
rng_ind
+
offset
,
inds
->
begin
()
+
i
+
offset
);
std
::
iter_swap
(
inds
->
begin
()
+
rng_ind
,
inds
->
begin
()
+
i
);
}
inds
->
resize
(
num
);
}
}
void
RpnTargetAssign
(
const
framework
::
ExecutionContext
&
ctx
,
const
Tensor
&
dist
,
const
float
pos_threshold
,
const
float
neg_threshold
,
const
int
rpn_batch_size
,
const
int
fg_num
,
std
::
minstd_rand
engine
,
std
::
vector
<
int
>*
fg_inds
,
std
::
vector
<
int
>*
bg_inds
,
int64_t
*
target_label_data
)
const
{
// std::vector<std::vector<int>> RpnTargetAssign(
std
::
vector
<
std
::
vector
<
int
>>
SampleFgBgGt
(
const
platform
::
CPUDeviceContext
&
ctx
,
const
Tensor
&
dist
,
const
float
pos_threshold
,
const
float
neg_threshold
,
const
int
rpn_batch_size
,
const
int
fg_num
,
std
::
minstd_rand
engine
,
int64_t
*
target_label
)
const
{
auto
*
dist_data
=
dist
.
data
<
T
>
();
int64_t
row
=
dist
.
dims
()[
0
];
int64_t
col
=
dist
.
dims
()[
1
];
int
fg_offset
=
fg_inds
->
size
();
int
bg_offset
=
bg_inds
->
size
();
int
row
=
dist
.
dims
()[
0
];
int
col
=
dist
.
dims
()[
1
];
std
::
vector
<
int
>
fg_inds
;
std
::
vector
<
int
>
bg_inds
;
std
::
vector
<
int
>
gt_inds
;
// Calculate the max IoU between anchors and gt boxes
Tensor
anchor_to_gt_max
;
anchor_to_gt_max
.
mutable_data
<
T
>
(
framework
::
make_ddim
({
static_cast
<
int64_t
>
(
col
),
1
}),
platform
::
CPUPlace
());
auto
&
place
=
*
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>()
.
eigen_device
();
auto
x
=
EigenMatrix
<
T
>::
From
(
dist
);
auto
x_col_max
=
EigenMatrix
<
T
>::
From
(
anchor_to_gt_max
);
x_col_max
.
device
(
place
)
=
x
.
maximum
(
Eigen
::
DSizes
<
int
,
1
>
(
0
))
.
reshape
(
Eigen
::
DSizes
<
int
,
2
>
(
static_cast
<
int64_t
>
(
col
),
1
));
// Map from anchor to gt box that has highest overlap
auto
place
=
ctx
.
GetPlace
();
Tensor
anchor_to_gt_max
,
anchor_to_gt_argmax
;
anchor_to_gt_max
.
mutable_data
<
T
>
({
col
},
place
);
int
*
argmax
=
anchor_to_gt_argmax
.
mutable_data
<
int
>
({
col
},
place
);
auto
x
=
framework
::
EigenMatrix
<
T
>::
From
(
dist
);
auto
x_col_max
=
framework
::
EigenVector
<
T
>::
Flatten
(
anchor_to_gt_max
);
auto
x_col_argmax
=
framework
::
EigenVector
<
int
>::
Flatten
(
anchor_to_gt_argmax
);
x_col_max
=
x
.
maximum
(
Eigen
::
DSizes
<
int
,
1
>
(
0
));
x_col_argmax
=
x
.
argmax
(
0
).
template
cast
<
int
>();
// Follow the Faster RCNN's implementation
ScoreAssign
(
dist_data
,
anchor_to_gt_max
,
row
,
col
,
pos_threshold
,
neg_threshold
,
target_label
_data
,
fg_inds
,
bg_inds
);
neg_threshold
,
target_label
,
&
fg_inds
,
&
bg_inds
);
// Reservoir Sampling
ReservoirSampling
(
fg_num
,
fg_offset
,
engine
,
fg_inds
);
int
bg_num
=
rpn_batch_size
-
(
fg_inds
->
size
()
-
fg_offset
);
ReservoirSampling
(
bg_num
,
bg_offset
,
engine
,
bg_inds
)
;
}
ReservoirSampling
(
fg_num
,
engine
,
&
fg_inds
);
int
fg_num2
=
static_cast
<
int
>
(
fg_inds
.
size
()
);
int
bg_num
=
rpn_batch_size
-
fg_num2
;
ReservoirSampling
(
bg_num
,
engine
,
&
bg_inds
);
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
dist
=
context
.
Input
<
LoDTensor
>
(
"DistMat"
);
auto
*
loc_index
=
context
.
Output
<
Tensor
>
(
"LocationIndex"
);
auto
*
score_index
=
context
.
Output
<
Tensor
>
(
"ScoreIndex"
);
auto
*
tgt_lbl
=
context
.
Output
<
Tensor
>
(
"TargetLabel"
);
auto
col
=
dist
->
dims
()[
1
];
int64_t
n
=
dist
->
lod
().
size
()
==
0UL
?
1
:
static_cast
<
int64_t
>
(
dist
->
lod
().
back
().
size
()
-
1
);
if
(
dist
->
lod
().
size
())
{
PADDLE_ENFORCE_EQ
(
dist
->
lod
().
size
(),
1UL
,
"Only support 1 level of LoD."
);
gt_inds
.
reserve
(
fg_num2
);
for
(
int
i
=
0
;
i
<
fg_num2
;
++
i
)
{
gt_inds
.
emplace_back
(
argmax
[
fg_inds
[
i
]]);
}
int
rpn_batch_size
=
context
.
Attr
<
int
>
(
"rpn_batch_size_per_im"
);
float
pos_threshold
=
context
.
Attr
<
float
>
(
"rpn_positive_overlap"
);
float
neg_threshold
=
context
.
Attr
<
float
>
(
"rpn_negative_overlap"
);
float
fg_fraction
=
context
.
Attr
<
float
>
(
"fg_fraction"
);
int
fg_num
=
static_cast
<
int
>
(
rpn_batch_size
*
fg_fraction
);
int64_t
*
target_label_data
=
tgt_lbl
->
mutable_data
<
int64_t
>
({
n
*
col
,
1
},
context
.
GetPlace
());
std
::
vector
<
std
::
vector
<
int
>>
fg_bg_gt
;
fg_bg_gt
.
emplace_back
(
fg_inds
);
fg_bg_gt
.
emplace_back
(
bg_inds
);
fg_bg_gt
.
emplace_back
(
gt_inds
);
auto
&
dev_ctx
=
context
.
device_context
<
platform
::
CPUDeviceContext
>
();
math
::
SetConstant
<
platform
::
CPUDeviceContext
,
int64_t
>
iset
;
iset
(
dev_ctx
,
tgt_lbl
,
static_cast
<
int
>
(
-
1
));
std
::
vector
<
int
>
fg_inds
;
std
::
vector
<
int
>
bg_inds
;
std
::
random_device
rnd
;
std
::
minstd_rand
engine
;
int
seed
=
context
.
Attr
<
bool
>
(
"fix_seed"
)
?
context
.
Attr
<
int
>
(
"seed"
)
:
rnd
();
engine
.
seed
(
seed
);
if
(
n
==
1
)
{
RpnTargetAssign
(
context
,
*
dist
,
pos_threshold
,
neg_threshold
,
rpn_batch_size
,
fg_num
,
engine
,
&
fg_inds
,
&
bg_inds
,
target_label_data
);
}
else
{
auto
lod
=
dist
->
lod
().
back
();
for
(
size_t
i
=
0
;
i
<
lod
.
size
()
-
1
;
++
i
)
{
Tensor
one_ins
=
dist
->
Slice
(
lod
[
i
],
lod
[
i
+
1
]);
RpnTargetAssign
(
context
,
one_ins
,
pos_threshold
,
neg_threshold
,
rpn_batch_size
,
fg_num
,
engine
,
&
fg_inds
,
&
bg_inds
,
target_label_data
+
i
*
col
);
}
}
int
*
loc_index_data
=
loc_index
->
mutable_data
<
int
>
(
{
static_cast
<
int
>
(
fg_inds
.
size
())},
context
.
GetPlace
());
int
*
score_index_data
=
score_index
->
mutable_data
<
int
>
(
{
static_cast
<
int
>
(
fg_inds
.
size
()
+
bg_inds
.
size
())},
context
.
GetPlace
());
memcpy
(
loc_index_data
,
reinterpret_cast
<
int
*>
(
&
fg_inds
[
0
]),
fg_inds
.
size
()
*
sizeof
(
int
));
memcpy
(
score_index_data
,
reinterpret_cast
<
int
*>
(
&
fg_inds
[
0
]),
fg_inds
.
size
()
*
sizeof
(
int
));
memcpy
(
score_index_data
+
fg_inds
.
size
(),
reinterpret_cast
<
int
*>
(
&
bg_inds
[
0
]),
bg_inds
.
size
()
*
sizeof
(
int
));
return
fg_bg_gt
;
}
};
class
RpnTargetAssignOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"Anchor"
,
"(Tensor) input anchor is a 2-D Tensor with shape [H*W*A, 4]."
);
AddInput
(
"GtBox"
,
"(LoDTensor) input groud-truth bbox with shape [K, 4]."
);
AddInput
(
"DistMat"
,
"(LoDTensor or Tensor) this input is a 2-D LoDTensor with shape "
...
...
@@ -241,12 +305,15 @@ class RpnTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
"ScoreIndex"
,
"(Tensor), The indexes of foreground and background anchors in all "
"RPN anchors(The rest anchors are ignored). The shape of the "
"ScoreIndex is [F + B], F and B depend on the value of input "
"tensor and attributes."
);
AddOutput
(
"TargetLabel"
,
"(Tensor<int64_t>), The target labels of each anchor with shape "
"[K * M, 1], "
"K and M is the same as they are in DistMat."
);
"ScoreIndex is [F + B], F and B are sampled foreground and backgroud "
" number."
);
AddOutput
(
"TargetBBox"
,
"(Tensor<int64_t>), The target bbox deltas with shape "
"[F, 4], F is the sampled foreground number."
);
AddOutput
(
"TargetLabel"
,
"(Tensor<int64_t>), The target labels of each anchor with shape "
"[F + B, 1], F and B are sampled foreground and backgroud number."
);
AddComment
(
R"DOC(
This operator can be, for given the IoU between the ground truth bboxes and the
anchors, to assign classification and regression targets to each prediction.
...
...
paddle/fluid/operators/gru_unit_op.h
浏览文件 @
db5e3dd7
...
...
@@ -92,12 +92,12 @@ class GRUUnitKernel : public framework::OpKernel<T> {
gate_data
,
frame_size
*
3
);
// calculate activited gate
Eigen
::
array
<
int
,
2
>
extents
=
{
batch_size
,
frame_size
};
Eigen
::
array
<
int
,
2
>
u_offsets
=
{
0
,
0
};
Eigen
::
array
<
int
,
2
>
extents
{{
batch_size
,
frame_size
}
};
Eigen
::
array
<
int
,
2
>
u_offsets
{{
0
,
0
}
};
ActCompute
(
context
.
Attr
<
int
>
(
"gate_activation"
),
place
,
g
.
slice
(
u_offsets
,
extents
),
g
.
slice
(
u_offsets
,
extents
));
auto
u
=
g
.
slice
(
u_offsets
,
extents
);
// update gate
Eigen
::
array
<
int
,
2
>
r_offsets
=
{
0
,
frame_size
};
Eigen
::
array
<
int
,
2
>
r_offsets
{{
0
,
frame_size
}
};
ActCompute
(
context
.
Attr
<
int
>
(
"gate_activation"
),
place
,
g
.
slice
(
r_offsets
,
extents
),
g
.
slice
(
r_offsets
,
extents
));
auto
r
=
g
.
slice
(
r_offsets
,
extents
);
// reset gate
...
...
@@ -107,7 +107,7 @@ class GRUUnitKernel : public framework::OpKernel<T> {
weight_data
+
frame_size
*
frame_size
*
2
,
frame_size
,
1
,
gate_data
+
frame_size
*
2
,
frame_size
*
3
);
Eigen
::
array
<
int
,
2
>
c_offsets
=
{
0
,
frame_size
*
2
};
Eigen
::
array
<
int
,
2
>
c_offsets
{{
0
,
frame_size
*
2
}
};
ActCompute
(
context
.
Attr
<
int
>
(
"activation"
),
place
,
g
.
slice
(
c_offsets
,
extents
),
g
.
slice
(
c_offsets
,
extents
));
auto
c
=
g
.
slice
(
c_offsets
,
extents
);
// output candidate
...
...
@@ -171,12 +171,12 @@ class GRUUnitGradKernel : public framework::OpKernel<T> {
int
batch_size
=
input
->
dims
()[
0
];
int
frame_size
=
hidden_prev
->
dims
()[
1
];
Eigen
::
array
<
int
,
2
>
extents
=
{
batch_size
,
frame_size
};
Eigen
::
array
<
int
,
2
>
u_offsets
=
{
0
,
0
};
Eigen
::
array
<
int
,
2
>
extents
{{
batch_size
,
frame_size
}
};
Eigen
::
array
<
int
,
2
>
u_offsets
{{
0
,
0
}
};
auto
u
=
g
.
slice
(
u_offsets
,
extents
);
// update gate
Eigen
::
array
<
int
,
2
>
r_offsets
=
{
0
,
frame_size
};
Eigen
::
array
<
int
,
2
>
r_offsets
{{
0
,
frame_size
}
};
auto
r
=
g
.
slice
(
r_offsets
,
extents
);
// reset gate
Eigen
::
array
<
int
,
2
>
c_offsets
=
{
0
,
frame_size
*
2
};
Eigen
::
array
<
int
,
2
>
c_offsets
{{
0
,
frame_size
*
2
}
};
auto
c
=
g
.
slice
(
c_offsets
,
extents
);
// output candidate
// backward for unactivated update gate
...
...
paddle/fluid/operators/roi_pool_op.cu
浏览文件 @
db5e3dd7
...
...
@@ -31,7 +31,7 @@ static inline int NumBlocks(const int N) {
template
<
typename
T
>
__global__
void
GPUROIPoolForward
(
const
int
nthreads
,
const
T
*
input_data
,
const
int64_t
*
input_rois
,
const
int
nthreads
,
const
T
*
input_data
,
const
T
*
input_rois
,
const
float
spatial_scale
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
int
*
roi_batch_id_data
,
T
*
output_data
,
int64_t
*
argmax_data
)
{
...
...
@@ -43,7 +43,7 @@ __global__ void GPUROIPoolForward(
int
c
=
(
i
/
pooled_width
/
pooled_height
)
%
channels
;
int
n
=
i
/
pooled_width
/
pooled_height
/
channels
;
const
int64_t
*
offset_input_rois
=
input_rois
+
n
*
kROISize
;
const
T
*
offset_input_rois
=
input_rois
+
n
*
kROISize
;
int
roi_batch_ind
=
roi_batch_id_data
[
n
];
int
roi_start_w
=
round
(
offset_input_rois
[
0
]
*
spatial_scale
);
int
roi_start_h
=
round
(
offset_input_rois
[
1
]
*
spatial_scale
);
...
...
@@ -93,7 +93,7 @@ __global__ void GPUROIPoolForward(
template
<
typename
T
>
__global__
void
GPUROIPoolBackward
(
const
int
nthreads
,
const
int64_t
*
input_rois
,
const
T
*
output_grad
,
const
int
nthreads
,
const
T
*
input_rois
,
const
T
*
output_grad
,
const
int64_t
*
argmax_data
,
const
int
num_rois
,
const
float
spatial_scale
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
int
*
roi_batch_id_data
,
...
...
@@ -174,8 +174,8 @@ class GPUROIPoolOpKernel : public framework::OpKernel<T> {
GPUROIPoolForward
<
T
><<<
blocks
,
threads
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
output_size
,
in
->
data
<
T
>
(),
rois
->
data
<
int64_t
>
(),
spatial_scale
,
channels
,
height
,
width
,
pooled_height
,
pooled_width
,
output_size
,
in
->
data
<
T
>
(),
rois
->
data
<
T
>
(),
spatial_scale
,
channels
,
height
,
width
,
pooled_height
,
pooled_width
,
roi_batch_id_list_gpu
.
data
<
int
>
(),
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
argmax
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
()));
}
...
...
@@ -228,7 +228,7 @@ class GPUROIPoolGradOpKernel : public framework::OpKernel<T> {
if
(
output_grad_size
>
0
)
{
GPUROIPoolBackward
<
T
><<<
blocks
,
threads
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
output_grad_size
,
rois
->
data
<
int64_t
>
(),
out_grad
->
data
<
T
>
(),
output_grad_size
,
rois
->
data
<
T
>
(),
out_grad
->
data
<
T
>
(),
argmax
->
data
<
int64_t
>
(),
rois_num
,
spatial_scale
,
channels
,
height
,
width
,
pooled_height
,
pooled_width
,
roi_batch_id_list_gpu
.
data
<
int
>
(),
...
...
paddle/fluid/operators/roi_pool_op.h
浏览文件 @
db5e3dd7
...
...
@@ -72,7 +72,7 @@ class CPUROIPoolOpKernel : public framework::OpKernel<T> {
T
*
output_data
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
int64_t
*
argmax_data
=
argmax
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
());
const
int64_t
*
rois_data
=
rois
->
data
<
int64_t
>
();
const
T
*
rois_data
=
rois
->
data
<
T
>
();
for
(
int
n
=
0
;
n
<
rois_num
;
++
n
)
{
int
roi_batch_id
=
roi_batch_id_data
[
n
];
int
roi_start_w
=
round
(
rois_data
[
0
]
*
spatial_scale
);
...
...
@@ -171,7 +171,7 @@ class CPUROIPoolGradOpKernel : public framework::OpKernel<T> {
}
}
const
int64_t
*
rois_data
=
rois
->
data
<
int64_t
>
();
const
T
*
rois_data
=
rois
->
data
<
T
>
();
const
T
*
out_grad_data
=
out_grad
->
data
<
T
>
();
const
int64_t
*
argmax_data
=
argmax
->
data
<
int64_t
>
();
T
*
in_grad_data
=
in_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
...
...
python/paddle/fluid/layers/detection.py
浏览文件 @
db5e3dd7
...
...
@@ -145,26 +145,23 @@ def rpn_target_assign(loc,
"""
helper
=
LayerHelper
(
'rpn_target_assign'
,
**
locals
())
# 1. Compute the regression target bboxes
target_bbox
=
box_coder
(
prior_box
=
anchor_box
,
prior_box_var
=
anchor_var
,
target_box
=
gt_box
,
code_type
=
'encode_center_size'
,
box_normalized
=
False
)
# 2. Compute overlaps between the prior boxes and the gt boxes overlaps
# Compute overlaps between the prior boxes and the gt boxes overlaps
iou
=
iou_similarity
(
x
=
gt_box
,
y
=
anchor_box
)
# 3. Assign target label to anchors
loc_index
=
helper
.
create_tmp_variable
(
dtype
=
anchor_box
.
dtype
)
score_index
=
helper
.
create_tmp_variable
(
dtype
=
anchor_box
.
dtype
)
target_label
=
helper
.
create_tmp_variable
(
dtype
=
anchor_box
.
dtype
)
# Assign target label to anchors
loc_index
=
helper
.
create_tmp_variable
(
dtype
=
'int32'
)
score_index
=
helper
.
create_tmp_variable
(
dtype
=
'int32'
)
target_label
=
helper
.
create_tmp_variable
(
dtype
=
'int64'
)
target_bbox
=
helper
.
create_tmp_variable
(
dtype
=
anchor_box
.
dtype
)
helper
.
append_op
(
type
=
"rpn_target_assign"
,
inputs
=
{
'DistMat'
:
iou
},
inputs
=
{
'Anchor'
:
anchor_box
,
'GtBox'
:
gt_box
,
'DistMat'
:
iou
},
outputs
=
{
'LocationIndex'
:
loc_index
,
'ScoreIndex'
:
score_index
,
'TargetLabel'
:
target_label
'TargetLabel'
:
target_label
,
'TargetBBox'
:
target_bbox
,
},
attrs
=
{
'rpn_batch_size_per_im'
:
rpn_batch_size_per_im
,
...
...
@@ -173,16 +170,16 @@ def rpn_target_assign(loc,
'fg_fraction'
:
fg_fraction
})
# 4. Reshape and gather the target entry
scores
=
nn
.
reshape
(
x
=
scores
,
shape
=
(
-
1
,
2
))
loc
=
nn
.
reshape
(
x
=
loc
,
shape
=
(
-
1
,
4
))
target_label
=
nn
.
reshape
(
x
=
target_label
,
shape
=
(
-
1
,
1
))
target_bbox
=
nn
.
reshape
(
x
=
target_bbox
,
shape
=
(
-
1
,
4
))
loc_index
.
stop_gradient
=
True
score_index
.
stop_gradient
=
True
target_label
.
stop_gradient
=
True
target_bbox
.
stop_gradient
=
True
scores
=
nn
.
reshape
(
x
=
scores
,
shape
=
(
-
1
,
1
))
loc
=
nn
.
reshape
(
x
=
loc
,
shape
=
(
-
1
,
4
))
predicted_scores
=
nn
.
gather
(
scores
,
score_index
)
predicted_location
=
nn
.
gather
(
loc
,
loc_index
)
target_label
=
nn
.
gather
(
target_label
,
score_index
)
target_bbox
=
nn
.
gather
(
target_bbox
,
loc_index
)
return
predicted_scores
,
predicted_location
,
target_label
,
target_bbox
...
...
python/paddle/fluid/tests/test_detection.py
浏览文件 @
db5e3dd7
...
...
@@ -281,7 +281,7 @@ class TestRpnTargetAssign(unittest.TestCase):
gt_box
=
layers
.
data
(
name
=
'gt_box'
,
shape
=
[
4
],
lod_level
=
1
,
dtype
=
'float32'
)
pred
icted_scores
,
predicted_location
,
target_label
,
targe
t_bbox
=
layers
.
rpn_target_assign
(
pred
_scores
,
pred_loc
,
tgt_lbl
,
tg
t_bbox
=
layers
.
rpn_target_assign
(
loc
=
loc
,
scores
=
scores
,
anchor_box
=
anchor_box
,
...
...
@@ -292,15 +292,13 @@ class TestRpnTargetAssign(unittest.TestCase):
rpn_positive_overlap
=
0.7
,
rpn_negative_overlap
=
0.3
)
self
.
assertIsNotNone
(
predicted_scores
)
self
.
assertIsNotNone
(
predicted_location
)
self
.
assertIsNotNone
(
target_label
)
self
.
assertIsNotNone
(
target_bbox
)
assert
predicted_scores
.
shape
[
1
]
==
2
assert
predicted_location
.
shape
[
1
]
==
4
assert
predicted_location
.
shape
[
1
]
==
target_bbox
.
shape
[
1
]
print
(
str
(
program
))
self
.
assertIsNotNone
(
pred_scores
)
self
.
assertIsNotNone
(
pred_loc
)
self
.
assertIsNotNone
(
tgt_lbl
)
self
.
assertIsNotNone
(
tgt_bbox
)
assert
pred_scores
.
shape
[
1
]
==
1
assert
pred_loc
.
shape
[
1
]
==
4
assert
pred_loc
.
shape
[
1
]
==
tgt_bbox
.
shape
[
1
]
class
TestGenerateProposals
(
unittest
.
TestCase
):
...
...
python/paddle/fluid/tests/unittests/test_fusion_gru_op.py
浏览文件 @
db5e3dd7
...
...
@@ -37,7 +37,7 @@ def fusion_gru(
h0
,
wh
,
np
.
zeros
(
(
1
,
wh
.
shape
[
1
]),
dtype
=
'float
64
'
),
(
1
,
wh
.
shape
[
1
]),
dtype
=
'float
32
'
),
is_reverse
,
act_state
,
act_gate
)
...
...
@@ -62,15 +62,15 @@ class TestFusionGRUOp(OpTest):
T
=
sum
(
self
.
lod
[
0
])
N
=
len
(
self
.
lod
[
0
])
x
=
np
.
random
.
rand
(
T
,
self
.
M
).
astype
(
'float
64
'
)
wx
=
np
.
random
.
rand
(
self
.
M
,
3
*
self
.
D
).
astype
(
'float
64
'
)
wh
=
np
.
random
.
rand
(
self
.
D
,
3
*
self
.
D
).
astype
(
'float
64
'
)
x
=
np
.
random
.
rand
(
T
,
self
.
M
).
astype
(
'float
32
'
)
wx
=
np
.
random
.
rand
(
self
.
M
,
3
*
self
.
D
).
astype
(
'float
32
'
)
wh
=
np
.
random
.
rand
(
self
.
D
,
3
*
self
.
D
).
astype
(
'float
32
'
)
bias
=
np
.
random
.
rand
(
1
,
3
*
self
.
D
).
astype
(
'float
64
'
)
if
self
.
with_bias
else
np
.
zeros
(
(
1
,
3
*
self
.
D
),
dtype
=
'float
64
'
)
1
,
3
*
self
.
D
).
astype
(
'float
32
'
)
if
self
.
with_bias
else
np
.
zeros
(
(
1
,
3
*
self
.
D
),
dtype
=
'float
32
'
)
h0
=
np
.
random
.
rand
(
N
,
self
.
D
).
astype
(
'float
64
'
)
if
self
.
with_h0
else
np
.
zeros
(
(
N
,
self
.
D
),
dtype
=
'float
64
'
)
N
,
self
.
D
).
astype
(
'float
32
'
)
if
self
.
with_h0
else
np
.
zeros
(
(
N
,
self
.
D
),
dtype
=
'float
32
'
)
_
,
_
,
_
,
hidden
=
fusion_gru
(
x
,
self
.
lod
,
h0
,
wx
,
wh
,
bias
,
self
.
is_reverse
,
...
...
@@ -93,7 +93,9 @@ class TestFusionGRUOp(OpTest):
}
def
test_check_output
(
self
):
self
.
check_output
(
atol
=
1e-8
)
for
use_seq
in
{
True
,
False
}:
self
.
attrs
[
'use_seq'
]
=
use_seq
self
.
check_output
()
class
TestFusionGRUOpNoInitial
(
TestFusionGRUOp
):
...
...
python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py
浏览文件 @
db5e3dd7
...
...
@@ -114,7 +114,9 @@ class TestFusionLSTMOp(OpTest):
}
def
test_check_output
(
self
):
self
.
check_output
()
for
use_seq
in
{
True
,
False
}:
self
.
attrs
[
'use_seq'
]
=
use_seq
self
.
check_output
()
class
TestFusionLSTMOpInit
(
TestFusionLSTMOp
):
...
...
python/paddle/fluid/tests/unittests/test_generate_proposal_labels.py
浏览文件 @
db5e3dd7
...
...
@@ -177,8 +177,8 @@ def _box_to_delta(ex_boxes, gt_boxes, weights):
dx
=
(
gt_ctr_x
-
ex_ctr_x
)
/
ex_w
/
weights
[
0
]
dy
=
(
gt_ctr_y
-
ex_ctr_y
)
/
ex_h
/
weights
[
1
]
dw
=
(
np
.
log
(
gt_w
/
ex_w
))
/
ex_w
/
weights
[
2
]
dh
=
(
np
.
log
(
gt_h
/
ex_h
))
/
ex_h
/
weights
[
3
]
dw
=
(
np
.
log
(
gt_w
/
ex_w
))
/
weights
[
2
]
dh
=
(
np
.
log
(
gt_h
/
ex_h
))
/
weights
[
3
]
targets
=
np
.
vstack
([
dx
,
dy
,
dw
,
dh
]).
transpose
()
return
targets
...
...
python/paddle/fluid/tests/unittests/test_roi_pool_op.py
浏览文件 @
db5e3dd7
...
...
@@ -61,7 +61,7 @@ class TestROIPoolOp(OpTest):
for
i
in
range
(
self
.
rois_num
):
roi
=
self
.
rois
[
i
]
roi_batch_id
=
roi
[
0
]
roi_batch_id
=
int
(
roi
[
0
])
roi_start_w
=
int
(
cpt
.
round
(
roi
[
1
]
*
self
.
spatial_scale
))
roi_start_h
=
int
(
cpt
.
round
(
roi
[
2
]
*
self
.
spatial_scale
))
roi_end_w
=
int
(
cpt
.
round
(
roi
[
3
]
*
self
.
spatial_scale
))
...
...
@@ -125,7 +125,7 @@ class TestROIPoolOp(OpTest):
roi
=
[
bno
,
x1
,
y1
,
x2
,
y2
]
rois
.
append
(
roi
)
self
.
rois_num
=
len
(
rois
)
self
.
rois
=
np
.
array
(
rois
).
astype
(
"
int64
"
)
self
.
rois
=
np
.
array
(
rois
).
astype
(
"
float32
"
)
def
setUp
(
self
):
self
.
op_type
=
"roi_pool"
...
...
python/paddle/fluid/tests/unittests/test_rpn_target_assign_op.py
浏览文件 @
db5e3dd7
...
...
@@ -18,12 +18,17 @@ import unittest
import
numpy
as
np
import
paddle.fluid.core
as
core
from
op_test
import
OpTest
from
test_anchor_generator_op
import
anchor_generator_in_python
from
test_generate_proposal_labels
import
_generate_groundtruth
from
test_generate_proposal_labels
import
_bbox_overlaps
,
_box_to_delta
def
rpn_target_assign
(
iou
,
rpn_batch_size_per_im
,
rpn_positive_overlap
,
rpn_negative_overlap
,
fg_fraction
):
iou
=
np
.
transpose
(
iou
)
def
rpn_target_assign
(
gt_anchor_iou
,
rpn_batch_size_per_im
,
rpn_
positive_overlap
,
rpn_
negative_overlap
,
fg_fraction
):
iou
=
np
.
transpose
(
gt_anchor_
iou
)
anchor_to_gt_max
=
iou
.
max
(
axis
=
1
)
anchor_to_gt_argmax
=
iou
.
argmax
(
axis
=
1
)
gt_to_anchor_argmax
=
iou
.
argmax
(
axis
=
0
)
gt_to_anchor_max
=
iou
[
gt_to_anchor_argmax
,
np
.
arange
(
iou
.
shape
[
1
])]
anchors_with_max_overlap
=
np
.
where
(
iou
==
gt_to_anchor_max
)[
0
]
...
...
@@ -42,59 +47,113 @@ def rpn_target_assign(iou, rpn_batch_size_per_im, rpn_positive_overlap,
num_bg
=
rpn_batch_size_per_im
-
np
.
sum
(
tgt_lbl
==
1
)
bg_inds
=
np
.
where
(
anchor_to_gt_max
<
rpn_negative_overlap
)[
0
]
tgt_lbl
[
bg_inds
]
=
0
if
len
(
bg_inds
)
>
num_bg
:
enable_inds
=
bg_inds
[
np
.
random
.
randint
(
len
(
bg_inds
),
size
=
num_bg
)]
tgt_lbl
[
enable_inds
]
=
0
bg_inds
=
np
.
where
(
tgt_lbl
==
0
)[
0
]
tgt_lbl
[
bg_inds
]
=
0
loc_index
=
fg_inds
score_index
=
np
.
hstack
((
fg_inds
,
bg_inds
))
tgt_lbl
=
np
.
expand_dims
(
tgt_lbl
,
axis
=
1
)
return
loc_index
,
score_index
,
tgt_lbl
gt_inds
=
anchor_to_gt_argmax
[
fg_inds
]
return
loc_index
,
score_index
,
tgt_lbl
,
gt_inds
def
get_anchor
(
n
,
c
,
h
,
w
):
input_feat
=
np
.
random
.
random
((
n
,
c
,
h
,
w
)).
astype
(
'float32'
)
anchors
,
_
=
anchor_generator_in_python
(
input_feat
=
input_feat
,
anchor_sizes
=
[
32.
,
64.
],
aspect_ratios
=
[
0.5
,
1.0
],
variances
=
[
1.0
,
1.0
,
1.0
,
1.0
],
stride
=
[
16.0
,
16.0
],
offset
=
0.5
)
return
anchors
def
rpn_blob
(
anchor
,
gt_boxes
,
iou
,
lod
,
rpn_batch_size_per_im
,
rpn_positive_overlap
,
rpn_negative_overlap
,
fg_fraction
):
loc_indexes
=
[]
score_indexes
=
[]
tmp_tgt_labels
=
[]
tgt_bboxes
=
[]
anchor_num
=
anchor
.
shape
[
0
]
batch_size
=
len
(
lod
)
-
1
for
i
in
range
(
batch_size
):
b
,
e
=
lod
[
i
],
lod
[
i
+
1
]
iou_slice
=
iou
[
b
:
e
,
:]
bboxes_slice
=
gt_boxes
[
b
:
e
,
:]
loc_idx
,
score_idx
,
tgt_lbl
,
gt_inds
=
rpn_target_assign
(
iou_slice
,
rpn_batch_size_per_im
,
rpn_positive_overlap
,
rpn_negative_overlap
,
fg_fraction
)
fg_bboxes
=
bboxes_slice
[
gt_inds
]
fg_anchors
=
anchor
[
loc_idx
]
box_deltas
=
_box_to_delta
(
fg_anchors
,
fg_bboxes
,
[
1.
,
1.
,
1.
,
1.
])
if
i
==
0
:
loc_indexes
=
loc_idx
score_indexes
=
score_idx
tmp_tgt_labels
=
tgt_lbl
tgt_bboxes
=
box_deltas
else
:
loc_indexes
=
np
.
concatenate
(
[
loc_indexes
,
loc_idx
+
i
*
anchor_num
])
score_indexes
=
np
.
concatenate
(
[
score_indexes
,
score_idx
+
i
*
anchor_num
])
tmp_tgt_labels
=
np
.
concatenate
([
tmp_tgt_labels
,
tgt_lbl
])
tgt_bboxes
=
np
.
vstack
([
tgt_bboxes
,
box_deltas
])
tgt_labels
=
tmp_tgt_labels
[
score_indexes
]
return
loc_indexes
,
score_indexes
,
tgt_bboxes
,
tgt_labels
class
TestRpnTargetAssignOp
(
OpTest
):
def
setUp
(
self
):
iou
=
np
.
random
.
random
((
10
,
8
)).
astype
(
"float32"
)
self
.
op_type
=
"rpn_target_assign"
self
.
inputs
=
{
'DistMat'
:
iou
}
self
.
attrs
=
{
'rpn_batch_size_per_im'
:
256
,
'rpn_positive_overlap'
:
0.95
,
'rpn_negative_overlap'
:
0.3
,
'fg_fraction'
:
0.25
,
'fix_seed'
:
True
}
loc_index
,
score_index
,
tgt_lbl
=
rpn_target_assign
(
iou
,
256
,
0.95
,
0.3
,
0.25
)
self
.
outputs
=
{
'LocationIndex'
:
loc_index
,
'ScoreIndex'
:
score_index
,
'TargetLabel'
:
tgt_lbl
,
}
n
,
c
,
h
,
w
=
2
,
4
,
14
,
14
anchor
=
get_anchor
(
n
,
c
,
h
,
w
)
gt_num
=
10
anchor
=
anchor
.
reshape
(
-
1
,
4
)
anchor_num
=
anchor
.
shape
[
0
]
def
test_check_output
(
self
):
self
.
check_output
()
im_shapes
=
[[
64
,
64
],
[
64
,
64
]]
gt_box
,
lod
=
_generate_groundtruth
(
im_shapes
,
3
,
4
)
bbox
=
np
.
vstack
([
v
[
'boxes'
]
for
v
in
gt_box
])
iou
=
_bbox_overlaps
(
bbox
,
anchor
)
anchor
=
anchor
.
astype
(
'float32'
)
bbox
=
bbox
.
astype
(
'float32'
)
iou
=
iou
.
astype
(
'float32'
)
loc_index
,
score_index
,
tgt_bbox
,
tgt_lbl
=
rpn_blob
(
anchor
,
bbox
,
iou
,
[
0
,
4
,
8
],
25600
,
0.95
,
0.03
,
0.25
)
class
TestRpnTargetAssignOp2
(
OpTest
):
def
setUp
(
self
):
iou
=
np
.
random
.
random
((
10
,
20
)).
astype
(
"float32"
)
self
.
op_type
=
"rpn_target_assign"
self
.
inputs
=
{
'DistMat'
:
iou
}
self
.
inputs
=
{
'Anchor'
:
anchor
,
'GtBox'
:
(
bbox
,
[[
4
,
4
]]),
'DistMat'
:
(
iou
,
[[
4
,
4
]]),
}
self
.
attrs
=
{
'rpn_batch_size_per_im'
:
128
,
'rpn_positive_overlap'
:
0.5
,
'rpn_negative_overlap'
:
0.
5
,
'fg_fraction'
:
0.5
,
'rpn_batch_size_per_im'
:
25600
,
'rpn_positive_overlap'
:
0.
9
5
,
'rpn_negative_overlap'
:
0.
03
,
'fg_fraction'
:
0.
2
5
,
'fix_seed'
:
True
}
loc_index
,
score_index
,
tgt_lbl
=
rpn_target_assign
(
iou
,
128
,
0.5
,
0.5
,
0.5
)
self
.
outputs
=
{
'LocationIndex'
:
loc_index
,
'ScoreIndex'
:
score_index
,
'TargetLabel'
:
tgt_lbl
,
'LocationIndex'
:
loc_index
.
astype
(
'int32'
),
'ScoreIndex'
:
score_index
.
astype
(
'int32'
),
'TargetBBox'
:
tgt_bbox
.
astype
(
'float32'
),
'TargetLabel'
:
tgt_lbl
.
astype
(
'int64'
),
}
def
test_check_output
(
self
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录