Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
db5e3dd7
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
1 年多 前同步成功
通知
696
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
db5e3dd7
编写于
9月 04, 2018
作者:
F
fengjiayi
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into fix_CudnnHolder_bug
上级
7b577b92
90b5be85
变更
38
显示空白变更内容
内联
并排
Showing
38 changed file
with
558 addition
and
445 deletion
+558
-445
Dockerfile
Dockerfile
+1
-1
cmake/inference_lib.cmake
cmake/inference_lib.cmake
+7
-9
doc/fluid/new_docs/beginners_guide/basics/machine_translation/README.cn.md
...s/beginners_guide/basics/machine_translation/README.cn.md
+3
-1
doc/fluid/new_docs/beginners_guide/basics/understand_sentiment/README.cn.md
.../beginners_guide/basics/understand_sentiment/README.cn.md
+2
-0
doc/fluid/new_docs/beginners_guide/basics/word2vec/README.cn.md
...uid/new_docs/beginners_guide/basics/word2vec/README.cn.md
+3
-1
doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/README.cn.md
...beginners_guide/quick_start/recognize_digits/README.cn.md
+1
-1
doc/fluid/new_docs/user_guides/howto/debug/visualdl.md
doc/fluid/new_docs/user_guides/howto/debug/visualdl.md
+1
-1
paddle/fluid/framework/ir/CMakeLists.txt
paddle/fluid/framework/ir/CMakeLists.txt
+23
-8
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
+4
-8
paddle/fluid/framework/ir/fc_fuse_pass.cc
paddle/fluid/framework/ir/fc_fuse_pass.cc
+30
-88
paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
+5
-9
paddle/fluid/framework/ir/graph_pattern_detector.cc
paddle/fluid/framework/ir/graph_pattern_detector.cc
+14
-7
paddle/fluid/framework/ir/graph_pattern_detector.h
paddle/fluid/framework/ir/graph_pattern_detector.h
+6
-0
paddle/fluid/framework/ir/graph_pattern_detector_tester.cc
paddle/fluid/framework/ir/graph_pattern_detector_tester.cc
+3
-2
paddle/fluid/framework/ir/infer_clean_graph_pass.cc
paddle/fluid/framework/ir/infer_clean_graph_pass.cc
+11
-12
paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc
paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc
+7
-11
paddle/fluid/inference/CMakeLists.txt
paddle/fluid/inference/CMakeLists.txt
+3
-2
paddle/fluid/inference/analysis/CMakeLists.txt
paddle/fluid/inference/analysis/CMakeLists.txt
+4
-16
paddle/fluid/inference/analysis/analyzer_tester.cc
paddle/fluid/inference/analysis/analyzer_tester.cc
+15
-10
paddle/fluid/inference/analysis/fluid_to_ir_pass_tester.cc
paddle/fluid/inference/analysis/fluid_to_ir_pass_tester.cc
+1
-7
paddle/fluid/inference/api/CMakeLists.txt
paddle/fluid/inference/api/CMakeLists.txt
+1
-4
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+1
-4
paddle/fluid/inference/api/helper.h
paddle/fluid/inference/api/helper.h
+1
-0
paddle/fluid/inference/paddle_fluid.map
paddle/fluid/inference/paddle_fluid.map
+1
-0
paddle/fluid/operators/detection/bbox_util.h
paddle/fluid/operators/detection/bbox_util.h
+66
-0
paddle/fluid/operators/detection/generate_proposal_labels_op.cc
.../fluid/operators/detection/generate_proposal_labels_op.cc
+8
-31
paddle/fluid/operators/detection/generate_proposals_op.cc
paddle/fluid/operators/detection/generate_proposals_op.cc
+2
-3
paddle/fluid/operators/detection/rpn_target_assign_op.cc
paddle/fluid/operators/detection/rpn_target_assign_op.cc
+179
-112
paddle/fluid/operators/gru_unit_op.h
paddle/fluid/operators/gru_unit_op.h
+8
-8
paddle/fluid/operators/roi_pool_op.cu
paddle/fluid/operators/roi_pool_op.cu
+6
-6
paddle/fluid/operators/roi_pool_op.h
paddle/fluid/operators/roi_pool_op.h
+2
-2
python/paddle/fluid/layers/detection.py
python/paddle/fluid/layers/detection.py
+18
-21
python/paddle/fluid/tests/test_detection.py
python/paddle/fluid/tests/test_detection.py
+8
-10
python/paddle/fluid/tests/unittests/test_fusion_gru_op.py
python/paddle/fluid/tests/unittests/test_fusion_gru_op.py
+11
-9
python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py
python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py
+3
-1
python/paddle/fluid/tests/unittests/test_generate_proposal_labels.py
...le/fluid/tests/unittests/test_generate_proposal_labels.py
+2
-2
python/paddle/fluid/tests/unittests/test_roi_pool_op.py
python/paddle/fluid/tests/unittests/test_roi_pool_op.py
+2
-2
python/paddle/fluid/tests/unittests/test_rpn_target_assign_op.py
...paddle/fluid/tests/unittests/test_rpn_target_assign_op.py
+95
-36
未找到文件。
Dockerfile
浏览文件 @
db5e3dd7
...
@@ -53,7 +53,7 @@ RUN curl -s -q https://glide.sh/get | sh
...
@@ -53,7 +53,7 @@ RUN curl -s -q https://glide.sh/get | sh
# and its size is only one-third of the official one.
# and its size is only one-third of the official one.
# 2. Manually add ~IPluginFactory() in IPluginFactory class of NvInfer.h, otherwise, it couldn't work in paddle.
# 2. Manually add ~IPluginFactory() in IPluginFactory class of NvInfer.h, otherwise, it couldn't work in paddle.
# See https://github.com/PaddlePaddle/Paddle/issues/10129 for details.
# See https://github.com/PaddlePaddle/Paddle/issues/10129 for details.
RUN
wget
-qO-
http://paddlepaddledeps.
bj
.bcebos.com/TensorRT-4.0.0.3.Ubuntu-16.04.4.x86_64-gnu.cuda-8.0.cudnn7.0.tar.gz |
\
RUN
wget
-qO-
http://paddlepaddledeps.
cdn
.bcebos.com/TensorRT-4.0.0.3.Ubuntu-16.04.4.x86_64-gnu.cuda-8.0.cudnn7.0.tar.gz |
\
tar
-xz
-C
/usr/local
&&
\
tar
-xz
-C
/usr/local
&&
\
cp
-rf
/usr/local/TensorRT/include /usr
&&
\
cp
-rf
/usr/local/TensorRT/include /usr
&&
\
cp
-rf
/usr/local/TensorRT/lib /usr
cp
-rf
/usr/local/TensorRT/lib /usr
...
...
cmake/inference_lib.cmake
浏览文件 @
db5e3dd7
...
@@ -128,16 +128,13 @@ set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid")
...
@@ -128,16 +128,13 @@ set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid")
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/paddle/fluid"
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/paddle/fluid"
)
set
(
module
"framework"
)
set
(
module
"framework"
)
if
(
NOT WIN32
)
if
(
NOT WIN32
)
copy
(
framework_lib DEPS framework_py_proto
set
(
framework_lib_deps framework_py_proto
)
SRCS
${
src_dir
}
/
${
module
}
/*.h
${
src_dir
}
/
${
module
}
/details/*.h
${
PADDLE_BINARY_DIR
}
/paddle/fluid/framework/framework.pb.h
endif
(
NOT WIN32
)
DSTS
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
/details
${
dst_dir
}
/
${
module
}
copy
(
framework_lib DEPS
${
framework_lib_deps
}
)
else
()
copy
(
framework_lib
SRCS
${
src_dir
}
/
${
module
}
/*.h
${
src_dir
}
/
${
module
}
/details/*.h
${
PADDLE_BINARY_DIR
}
/paddle/fluid/framework/framework.pb.h
SRCS
${
src_dir
}
/
${
module
}
/*.h
${
src_dir
}
/
${
module
}
/details/*.h
${
PADDLE_BINARY_DIR
}
/paddle/fluid/framework/framework.pb.h
DSTS
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
/details
${
dst_dir
}
/
${
module
}
${
src_dir
}
/
${
module
}
/ir/*.h
DSTS
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
/details
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
/ir
)
)
endif
(
NOT WIN32
)
set
(
module
"memory"
)
set
(
module
"memory"
)
copy
(
memory_lib
copy
(
memory_lib
...
@@ -161,7 +158,8 @@ set(module "inference")
...
@@ -161,7 +158,8 @@ set(module "inference")
copy
(
inference_lib DEPS
${
inference_deps
}
copy
(
inference_lib DEPS
${
inference_deps
}
SRCS
${
src_dir
}
/
${
module
}
/*.h
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/libpaddle_fluid.*
SRCS
${
src_dir
}
/
${
module
}
/*.h
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/libpaddle_fluid.*
${
src_dir
}
/
${
module
}
/api/paddle_inference_api.h
${
src_dir
}
/
${
module
}
/api/demo_ci
${
src_dir
}
/
${
module
}
/api/paddle_inference_api.h
${
src_dir
}
/
${
module
}
/api/demo_ci
DSTS
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/api/paddle_inference_pass.h
DSTS
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
)
)
set
(
module
"platform"
)
set
(
module
"platform"
)
...
...
doc/fluid/new_docs/beginners_guide/basics/machine_translation/README.cn.md
浏览文件 @
db5e3dd7
...
@@ -60,6 +60,7 @@
...
@@ -60,6 +60,7 @@
图3. 编码器-解码器框架
图3. 编码器-解码器框架
</div>
</div>
<a
name=
"编码器"
></a>
#### 编码器
#### 编码器
编码阶段分为三步:
编码阶段分为三步:
...
@@ -81,7 +82,7 @@
...
@@ -81,7 +82,7 @@
机器翻译任务的训练过程中,解码阶段的目标是最大化下一个正确的目标语言词的概率。思路是:
机器翻译任务的训练过程中,解码阶段的目标是最大化下一个正确的目标语言词的概率。思路是:
1.
每一个时刻,根据源语言句子的编码信息(又叫上下文向量,context vector)
`$c$`
、真实目标语言序列的第
`$i$`
个词
`$u_i$`
和
`$i$`
时刻RNN的隐层状态
`$z_i$`
,计算出下一个隐层状态
`$z_{i+1}$`
。计算公式如下:
1.
每一个时刻,根据源语言句子的编码信息(又叫上下文向量,context vector)
`$c$`
、真实目标语言序列的第
`$i$`
个词
`$u_i$`
和
`$i$`
时刻RNN的隐层状态
`$z_i$`
,计算出下一个隐层状态
`$z_{i+1}$`
。计算公式如下:
$$z_{i+1}=
\p
hi_{
\t
heta '}
\l
eft ( c,u_i,z_i
\r
ight )$$
$$z_{i+1}=
\p
hi_{
\t
heta '}
\l
eft ( c,u_i,z_i
\r
ight )$$
其中
`$\phi _{\theta '}$`
是一个非线性激活函数;
`$c=q\mathbf{h}$`
是源语言句子的上下文向量,在不使用
[
注意力机制
](
#注意力机制
)
时,如果
[
编码器
](
#编码器
)
的输出是源语言句子编码后的最后一个元素,则可以定义
`$c=h_T$`
;
`$u_i$`
是目标语言序列的第
`$i$`
个单词,
`$u_0$`
是目标语言序列的开始标记
`<s>`
,表示解码开始;
`$z_i$`
是
`$i$`
时刻解码RNN的隐层状态,
`$z_0$`
是一个全零的向量。
其中
`$\phi _{\theta '}$`
是一个非线性激活函数;
`$c=q\mathbf{h}$`
是源语言句子的上下文向量,在不使用
注意力机制
时,如果
[
编码器
](
#编码器
)
的输出是源语言句子编码后的最后一个元素,则可以定义
`$c=h_T$`
;
`$u_i$`
是目标语言序列的第
`$i$`
个单词,
`$u_0$`
是目标语言序列的开始标记
`<s>`
,表示解码开始;
`$z_i$`
是
`$i$`
时刻解码RNN的隐层状态,
`$z_0$`
是一个全零的向量。
2.
将
`$z_{i+1}$`
通过
`softmax`
归一化,得到目标语言序列的第
`$i+1$`
个单词的概率分布
`$p_{i+1}$`
。概率分布公式如下:
2.
将
`$z_{i+1}$`
通过
`softmax`
归一化,得到目标语言序列的第
`$i+1$`
个单词的概率分布
`$p_{i+1}$`
。概率分布公式如下:
$$p
\l
eft ( u_{i+1}|u_{
<
i+1},
\m
athbf{x}
\r
ight )=softmax(W_sz_{i+1}+b_z)$$
$$p
\l
eft ( u_{i+1}|u_{
<
i+1},
\m
athbf{x}
\r
ight )=softmax(W_sz_{i+1}+b_z)$$
...
@@ -93,6 +94,7 @@ $$p\left ( u_{i+1}|u_{<i+1},\mathbf{x} \right )=softmax(W_sz_{i+1}+b_z)$$
...
@@ -93,6 +94,7 @@ $$p\left ( u_{i+1}|u_{<i+1},\mathbf{x} \right )=softmax(W_sz_{i+1}+b_z)$$
机器翻译任务的生成过程,通俗来讲就是根据预先训练的模型来翻译源语言句子。生成过程中的解码阶段和上述训练过程的有所差异,具体介绍请见
[
柱搜索算法
](
#柱搜索算法
)
。
机器翻译任务的生成过程,通俗来讲就是根据预先训练的模型来翻译源语言句子。生成过程中的解码阶段和上述训练过程的有所差异,具体介绍请见
[
柱搜索算法
](
#柱搜索算法
)
。
<a
name=
"柱搜索算法"
></a>
### 柱搜索算法
### 柱搜索算法
柱搜索(
[
beam search
](
http://en.wikipedia.org/wiki/Beam_search
)
)是一种启发式图搜索算法,用于在图或树中搜索有限集合中的最优扩展节点,通常用在解空间非常大的系统(如机器翻译、语音识别)中,原因是内存无法装下图或树中所有展开的解。如在机器翻译任务中希望翻译“
`<s>你好<e>`
”,就算目标语言字典中只有3个词(
`<s>`
,
`<e>`
,
`hello`
),也可能生成无限句话(
`hello`
循环出现的次数不定),为了找到其中较好的翻译结果,我们可采用柱搜索算法。
柱搜索(
[
beam search
](
http://en.wikipedia.org/wiki/Beam_search
)
)是一种启发式图搜索算法,用于在图或树中搜索有限集合中的最优扩展节点,通常用在解空间非常大的系统(如机器翻译、语音识别)中,原因是内存无法装下图或树中所有展开的解。如在机器翻译任务中希望翻译“
`<s>你好<e>`
”,就算目标语言字典中只有3个词(
`<s>`
,
`<e>`
,
`hello`
),也可能生成无限句话(
`hello`
循环出现的次数不定),为了找到其中较好的翻译结果,我们可采用柱搜索算法。
...
...
doc/fluid/new_docs/beginners_guide/basics/understand_sentiment/README.cn.md
浏览文件 @
db5e3dd7
...
@@ -149,6 +149,8 @@ def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim):
...
@@ -149,6 +149,8 @@ def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim):
网络的输入
`input_dim`
表示的是词典的大小,
`class_dim`
表示类别数。这里,我们使用
[
`sequence_conv_pool`
](
https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/trainer_config_helpers/networks.py
)
API实现了卷积和池化操作。
网络的输入
`input_dim`
表示的是词典的大小,
`class_dim`
表示类别数。这里,我们使用
[
`sequence_conv_pool`
](
https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/trainer_config_helpers/networks.py
)
API实现了卷积和池化操作。
<a
name=
"栈值双向LSTM"
></a>
### 栈式双向LSTM
### 栈式双向LSTM
栈式双向神经网络
`stacked_lstm_net`
的代码片段如下:
栈式双向神经网络
`stacked_lstm_net`
的代码片段如下:
...
...
doc/fluid/new_docs/beginners_guide/basics/word2vec/README.cn.md
浏览文件 @
db5e3dd7
...
@@ -50,7 +50,7 @@ similarity: -0.0997506977351
...
@@ -50,7 +50,7 @@ similarity: -0.0997506977351
```
```
以上结果可以通过运行
`calculate_dis.py`
, 加载字典里的单词和对应训练特征结果得到,我们将在
[
应用模型
](
#应用模型
)
中详细描述用法。
以上结果可以通过运行
`calculate_dis.py`
, 加载字典里的单词和对应训练特征结果得到,我们将在
[
模型应用
](
#模型应用
)
中详细描述用法。
## 模型概览
## 模型概览
...
@@ -189,6 +189,7 @@ dream that one day <e>
...
@@ -189,6 +189,7 @@ dream that one day <e>
最后,每个输入会按其单词次在字典里的位置,转化成整数的索引序列,作为PaddlePaddle的输入。
最后,每个输入会按其单词次在字典里的位置,转化成整数的索引序列,作为PaddlePaddle的输入。
<a
name=
"训练模型"
></a>
## 编程实现
## 编程实现
本配置的模型结构如下图所示:
本配置的模型结构如下图所示:
...
@@ -349,6 +350,7 @@ Step 20: Average Cost 5.766995
...
@@ -349,6 +350,7 @@ Step 20: Average Cost 5.766995
...
...
```
```
<a
name=
"模型应用"
></a>
## 模型应用
## 模型应用
在模型训练后,我们可以用它做一些预测。
在模型训练后,我们可以用它做一些预测。
...
...
doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/README.cn.md
浏览文件 @
db5e3dd7
...
@@ -102,7 +102,7 @@ Softmax回归模型采用了最简单的两层神经网络,即只有输入层
...
@@ -102,7 +102,7 @@ Softmax回归模型采用了最简单的两层神经网络,即只有输入层
池化是非线性下采样的一种形式,主要作用是通过减少网络的参数来减小计算量,并且能够在一定程度上控制过拟合。通常在卷积层的后面会加上一个池化层。池化包括最大池化、平均池化等。其中最大池化是用不重叠的矩形框将输入层分成不同的区域,对于每个矩形框的数取最大值作为输出层,如图6所示。
池化是非线性下采样的一种形式,主要作用是通过减少网络的参数来减小计算量,并且能够在一定程度上控制过拟合。通常在卷积层的后面会加上一个池化层。池化包括最大池化、平均池化等。其中最大池化是用不重叠的矩形框将输入层分成不同的区域,对于每个矩形框的数取最大值作为输出层,如图6所示。
更详细的关于卷积神经网络的具体知识可以参考
[
斯坦福大学公开课
](
http://cs231n.github.io/convolutional-networks/
)
和
[
图像分类
](
https://github.com/PaddlePaddle/book/blob/develop/image_classification/README.md
)
教程。
更详细的关于卷积神经网络的具体知识可以参考
[
斯坦福大学公开课
](
http://cs231n.github.io/convolutional-networks/
)
和
[
图像分类
](
https://github.com/PaddlePaddle/book/tree/develop/03.image_classification
)
教程。
### 常见激活函数介绍
### 常见激活函数介绍
-
sigmoid激活函数: $ f(x) = sigmoid(x) =
\f
rac{1}{1+e^{-x}} $
-
sigmoid激活函数: $ f(x) = sigmoid(x) =
\f
rac{1}{1+e^{-x}} $
...
...
doc/fluid/new_docs/user_guides/howto/debug/visualdl.md
浏览文件 @
db5e3dd7
...
@@ -149,7 +149,7 @@ python setup.py bdist_wheel
...
@@ -149,7 +149,7 @@ python setup.py bdist_wheel
pip install --upgrade dist/visualdl-
*
.whl
pip install --upgrade dist/visualdl-
*
.whl
```
```
如果打包和安装遇到其他问题,不安装只想运行Visual DL可以看[这里](https://github.com/PaddlePaddle/VisualDL/blob/develop/docs/
how_to_dev_frontend_e
n.md)
如果打包和安装遇到其他问题,不安装只想运行Visual DL可以看[这里](https://github.com/PaddlePaddle/VisualDL/blob/develop/docs/
develop/how_to_dev_frontend_c
n.md)
## SDK
## SDK
...
...
paddle/fluid/framework/ir/CMakeLists.txt
浏览文件 @
db5e3dd7
set
(
pass_file
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/api/paddle_inference_pass.h
)
file
(
WRITE
${
pass_file
}
"// Generated by the paddle/fluid/framework/ir/CMakeLists.txt. DO NOT EDIT!
\n\n
"
)
file
(
APPEND
${
pass_file
}
"
\#
include
\"
paddle/fluid/framework/ir/pass.h
\"\n
"
)
function
(
pass_library TARGET
)
set
(
options
""
)
set
(
oneValueArgs
""
)
set
(
multiValueArgs SRCS DEPS
)
cmake_parse_arguments
(
op_library
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
cc_library
(
${
TARGET
}
SRCS
${
TARGET
}
.cc DEPS graph_pattern_detector pass
)
file
(
APPEND
${
pass_file
}
"USE_PASS(
${
TARGET
}
);
\n
"
)
set
(
PASS_LIBRARY
${
TARGET
}
${
PASS_LIBRARY
}
PARENT_SCOPE
)
endfunction
()
cc_library
(
node SRCS node.cc DEPS proto_desc
)
cc_library
(
node SRCS node.cc DEPS proto_desc
)
cc_library
(
graph SRCS graph.cc DEPS node
)
cc_library
(
graph SRCS graph.cc DEPS node
)
cc_library
(
graph_helper SRCS graph_helper.cc DEPS graph
)
cc_library
(
graph_helper SRCS graph_helper.cc DEPS graph
)
cc_library
(
pass SRCS pass.cc DEPS graph node graph_helper
)
cc_library
(
pass SRCS pass.cc DEPS graph node graph_helper
)
cc_library
(
graph_viz_pass SRCS graph_viz_pass.cc DEPS graph pass graph_helper
)
cc_library
(
graph_to_program_pass SRCS graph_to_program_pass.cc DEPS graph pass graph_helper
)
cc_library
(
graph_traits SRCS graph_traits.cc DEPS graph
)
cc_library
(
graph_traits SRCS graph_traits.cc DEPS graph
)
cc_library
(
graph_pattern_detector SRCS graph_pattern_detector.cc DEPS graph graph_helper graph_traits
)
cc_library
(
graph_pattern_detector SRCS graph_pattern_detector.cc DEPS graph graph_helper graph_traits
)
cc_library
(
fc_fuse_pass SRCS fc_fuse_pass.cc DEPS graph graph_pattern_detector
)
cc_library
(
attention_lstm_fuse_pass SRCS attention_lstm_fuse_pass.cc DEPS graph graph_pattern_detector
)
pass_library
(
graph_to_program_pass
)
cc_library
(
infer_clean_graph_pass SRCS infer_clean_graph_pass.cc DEPS graph pass
)
pass_library
(
graph_viz_pass
)
cc_library
(
fc_lstm_fuse_pass SRCS fc_lstm_fuse_pass.cc DEPS graph graph_pattern_detector
)
pass_library
(
fc_fuse_pass
)
cc_library
(
seq_concat_fc_fuse_pass SRCS seq_concat_fc_fuse_pass.cc DEPS graph graph_pattern_detector
)
pass_library
(
attention_lstm_fuse_pass
)
pass_library
(
infer_clean_graph_pass
)
pass_library
(
fc_lstm_fuse_pass
)
pass_library
(
seq_concat_fc_fuse_pass
)
set
(
GLOB_PASS_LIB
${
PASS_LIBRARY
}
CACHE INTERNAL
"Global PASS library"
)
cc_test
(
pass_test SRCS pass_test.cc DEPS graph pass graph_helper
)
cc_test
(
pass_test SRCS pass_test.cc DEPS graph pass graph_helper
)
cc_test
(
graph_test SRCS graph_test.cc DEPS graph graph_helper op_registry
)
cc_test
(
graph_test SRCS graph_test.cc DEPS graph graph_helper op_registry
)
cc_test
(
graph_helper_test SRCS graph_helper_test.cc DEPS graph graph_helper op_registry
)
cc_test
(
graph_helper_test SRCS graph_helper_test.cc DEPS graph graph_helper op_registry
)
cc_test
(
graph_to_program_pass_test SRCS graph_to_program_pass_test.cc DEPS graph_to_program_pass
)
cc_test
(
graph_to_program_pass_test SRCS graph_to_program_pass_test.cc DEPS graph_to_program_pass
)
cc_test
(
test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS graph_pattern_detector
)
cc_test
(
test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS graph_pattern_detector
)
cc_test
(
test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass
graph_pattern_detector graph pass graph_traits
framework_proto
)
cc_test
(
test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass framework_proto
)
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
浏览文件 @
db5e3dd7
...
@@ -99,17 +99,13 @@ void FindWhileOp(Graph* graph) {
...
@@ -99,17 +99,13 @@ void FindWhileOp(Graph* graph) {
auto
*
cell_init
=
graph
->
RetriveNode
(
6
);
auto
*
cell_init
=
graph
->
RetriveNode
(
6
);
auto
*
hidden_init
=
graph
->
RetriveNode
(
8
);
auto
*
hidden_init
=
graph
->
RetriveNode
(
8
);
#define LINK_TO(node0, node1) \
node0->outputs.push_back(node1); \
node1->inputs.push_back(node0);
auto
*
lstm_op
=
graph
->
CreateOpNode
(
&
op_desc
);
auto
*
lstm_op
=
graph
->
CreateOpNode
(
&
op_desc
);
PrepareParameters
(
graph
,
param
);
PrepareParameters
(
graph
,
param
);
LINK_TO
(
X
,
lstm_op
);
IR_NODE_
LINK_TO
(
X
,
lstm_op
);
LINK_TO
(
cell_init
,
lstm_op
);
IR_NODE_
LINK_TO
(
cell_init
,
lstm_op
);
LINK_TO
(
hidden_init
,
lstm_op
);
IR_NODE_
LINK_TO
(
hidden_init
,
lstm_op
);
LINK_TO
(
lstm_op
,
LSTMOUT
);
IR_NODE_
LINK_TO
(
lstm_op
,
LSTMOUT
);
GraphSafeRemoveNodes
(
graph
,
marked_nodes
);
GraphSafeRemoveNodes
(
graph
,
marked_nodes
);
}
}
...
...
paddle/fluid/framework/ir/fc_fuse_pass.cc
浏览文件 @
db5e3dd7
...
@@ -21,74 +21,26 @@ namespace paddle {
...
@@ -21,74 +21,26 @@ namespace paddle {
namespace
framework
{
namespace
framework
{
namespace
ir
{
namespace
ir
{
bool
VarOutLinksToOp
(
Node
*
node
,
const
std
::
string
&
op_type
)
{
for
(
auto
*
out
:
node
->
outputs
)
{
if
(
out
->
IsOp
()
&&
out
->
Op
()
->
Type
()
==
op_type
)
{
return
true
;
}
}
return
false
;
}
void
BuildFCPattern
(
PDPattern
*
pattern
)
{
// Create Operators
auto
*
mul_op
=
pattern
->
NewNode
(
"mul"
)
->
assert_is_op
(
"mul"
);
auto
*
elementwise_add_op
=
pattern
->
NewNode
(
"elementwise_add"
)
->
assert_is_op
(
"elementwise_add"
);
// Create variables
// w
auto
*
mul_weight_var
=
pattern
->
NewNode
(
"mul_weight"
)
->
AsInput
()
->
assert_is_op_nth_input
(
"mul"
,
"Y"
,
0
);
// x
auto
*
mul_tmp_var
=
pattern
->
NewNode
(
"mul_tmp_var"
)
->
AsInput
()
->
assert_is_op_nth_input
(
"mul"
,
"X"
,
0
);
// intermediate variable, will be removed in the IR after fuse.
auto
*
mul_out_var
=
pattern
->
NewNode
(
"mul_out"
)
->
AsIntermediate
()
->
assert_is_only_output_of_op
(
"mul"
)
->
assert_is_op_input
(
"elementwise_add"
);
// bias
auto
*
elementwise_add_tmp_var
=
pattern
->
NewNode
(
"elementwise_add_tmpvar"
)
->
assert_is_op_input
(
"elementwise_add"
)
->
AsInput
();
// output
auto
*
elementwise_add_out_var
=
pattern
->
NewNode
(
"elementwise_add_out"
)
->
AsOutput
()
->
assert_is_op_output
(
"elementwise_add"
);
mul_op
->
LinksFrom
({
mul_weight_var
,
mul_tmp_var
}).
LinksTo
({
mul_out_var
});
elementwise_add_op
->
LinksFrom
({
mul_out_var
,
elementwise_add_tmp_var
})
.
LinksTo
({
elementwise_add_out_var
});
}
// Replace the node `from` in the links to `to`
bool
LinksReplace
(
std
::
vector
<
Node
*>*
links
,
Node
*
from
,
Node
*
to
)
{
for
(
auto
*&
n
:
*
links
)
{
if
(
n
==
from
)
{
n
=
to
;
return
true
;
}
}
return
false
;
}
std
::
unique_ptr
<
ir
::
Graph
>
FCFusePass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
FCFusePass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
PADDLE_ENFORCE
(
graph
.
get
());
PADDLE_ENFORCE
(
graph
.
get
());
FusePassBase
::
Init
(
"fc"
,
graph
.
get
());
FusePassBase
::
Init
(
"fc
_fuse
"
,
graph
.
get
());
std
::
unordered_set
<
Node
*>
nodes2delete
;
std
::
unordered_set
<
Node
*>
nodes2delete
;
GraphPatternDetector
gpd
;
GraphPatternDetector
gpd
;
BuildFCPattern
(
gpd
.
mutable_pattern
());
// BuildFCPattern(gpd.mutable_pattern());
auto
*
x
=
gpd
.
mutable_pattern
()
->
NewNode
(
"fc_fuse/x"
)
->
AsInput
()
->
assert_is_op_input
(
"mul"
,
"X"
);
patterns
::
FC
(
gpd
.
mutable_pattern
(),
"fc_fuse"
,
x
,
true
/*with bias*/
);
#define GET_NODE(id) \
#define GET_NODE(id) \
PADDLE_ENFORCE(subgraph.count(gpd.pattern().RetrieveNode(#id)), \
PADDLE_ENFORCE(subgraph.count(gpd.pattern().RetrieveNode(
"fc_fuse/"
#id)), \
"pattern has no Node called %s", #id); \
"pattern has no Node called %s", #id); \
auto* id = subgraph.at(gpd.pattern().RetrieveNode(#id)); \
auto* id = subgraph.at(gpd.pattern().RetrieveNode(
"fc_fuse/"
#id)); \
PADDLE_ENFORCE_NOT_NULL(id, "subgraph has no node %s", #id);
PADDLE_ENFORCE_NOT_NULL(id, "subgraph has no node %s",
"fc_fuse/"
#id);
int
found_fc_count
=
0
;
int
found_fc_count
=
0
;
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
...
@@ -98,10 +50,10 @@ std::unique_ptr<ir::Graph> FCFusePass::ApplyImpl(
...
@@ -98,10 +50,10 @@ std::unique_ptr<ir::Graph> FCFusePass::ApplyImpl(
// scenerio.
// scenerio.
// FC's fusion is simple, just op fuse, no need to process the
// FC's fusion is simple, just op fuse, no need to process the
// parameters.
// parameters.
GET_NODE
(
mul_tmp_var
);
// x
GET_NODE
(
x
);
// x
GET_NODE
(
mul_weight
);
// Y
GET_NODE
(
w
);
// Y
GET_NODE
(
elementwise_add_tmpvar
);
// bias
GET_NODE
(
fc_bias
);
// bias
GET_NODE
(
elementwise_add_out
);
// Out
GET_NODE
(
fc_out
);
// Out
GET_NODE
(
mul
);
// MUL op
GET_NODE
(
mul
);
// MUL op
GET_NODE
(
elementwise_add
);
// ELEMENT_ADD op
GET_NODE
(
elementwise_add
);
// ELEMENT_ADD op
GET_NODE
(
mul_out
);
// tmp
GET_NODE
(
mul_out
);
// tmp
...
@@ -109,32 +61,22 @@ std::unique_ptr<ir::Graph> FCFusePass::ApplyImpl(
...
@@ -109,32 +61,22 @@ std::unique_ptr<ir::Graph> FCFusePass::ApplyImpl(
// Create an FC Node.
// Create an FC Node.
OpDesc
desc
;
OpDesc
desc
;
std
::
string
fc_x_in
=
mul_tmp_var
->
Name
();
std
::
string
fc_x_in
=
x
->
Name
();
std
::
string
fc_Y_in
=
mul_weight
->
Name
();
std
::
string
fc_Y_in
=
w
->
Name
();
std
::
string
fc_bias_in
=
elementwise_add_tmpvar
->
Name
();
std
::
string
fc_bias_in
=
fc_bias
->
Name
();
std
::
string
fc_out
=
elementwise_add
_out
->
Name
();
std
::
string
fc_out
_out
=
fc
_out
->
Name
();
desc
.
SetInput
(
"Input"
,
std
::
vector
<
std
::
string
>
({
fc_x_in
}));
desc
.
SetInput
(
"Input"
,
std
::
vector
<
std
::
string
>
({
fc_x_in
}));
desc
.
SetInput
(
"W"
,
std
::
vector
<
std
::
string
>
({
fc_Y_in
}));
desc
.
SetInput
(
"W"
,
std
::
vector
<
std
::
string
>
({
fc_Y_in
}));
desc
.
SetInput
(
"Bias"
,
std
::
vector
<
std
::
string
>
({
fc_bias_in
}));
desc
.
SetInput
(
"Bias"
,
std
::
vector
<
std
::
string
>
({
fc_bias_in
}));
desc
.
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
fc_out
}));
desc
.
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
fc_out
_out
}));
desc
.
SetType
(
"fc"
);
desc
.
SetType
(
"fc"
);
auto
fc_node
=
g
->
CreateOpNode
(
&
desc
);
// OpDesc will be copied.
auto
fc_node
=
g
->
CreateOpNode
(
&
desc
);
// OpDesc will be copied.
fc_node
->
inputs
=
GraphSafeRemoveNodes
(
graph
.
get
(),
{
mul
,
elementwise_add
,
mul_out
});
std
::
vector
<
Node
*>
({
mul_tmp_var
,
mul_weight
,
elementwise_add_tmpvar
});
fc_node
->
outputs
.
push_back
(
elementwise_add_out
);
// Update link relatons
PADDLE_ENFORCE
(
LinksReplace
(
&
mul_tmp_var
->
outputs
,
mul
,
fc_node
));
PADDLE_ENFORCE
(
LinksReplace
(
&
mul_weight
->
outputs
,
mul
,
fc_node
));
PADDLE_ENFORCE
(
LinksReplace
(
&
elementwise_add_tmpvar
->
outputs
,
elementwise_add
,
fc_node
));
PADDLE_ENFORCE
(
LinksReplace
(
&
elementwise_add_out
->
inputs
,
elementwise_add
,
fc_node
));
// Drop old nodes
IR_NODE_LINK_TO
(
x
,
fc_node
);
graph
->
RemoveNode
(
mul
);
IR_NODE_LINK_TO
(
w
,
fc_node
);
graph
->
RemoveNode
(
elementwise_add
);
IR_NODE_LINK_TO
(
fc_bias
,
fc_node
);
graph
->
RemoveNode
(
mul_out
);
// tmp variable
IR_NODE_LINK_TO
(
fc_node
,
fc_out
);
found_fc_count
++
;
found_fc_count
++
;
};
};
...
...
paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
浏览文件 @
db5e3dd7
...
@@ -121,15 +121,11 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
...
@@ -121,15 +121,11 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
#undef TMP_NEW
#undef TMP_NEW
#undef TMP_NAME
#undef TMP_NAME
#define LINK_TO(a, b) \
IR_NODE_LINK_TO
(
input_n
,
op
);
a->outputs.push_back(b); \
IR_NODE_LINK_TO
(
weight_x_n
,
op
);
b->inputs.push_back(a);
IR_NODE_LINK_TO
(
weight_h_n
,
op
);
LINK_TO
(
input_n
,
op
);
IR_NODE_LINK_TO
(
bias_n
,
op
);
LINK_TO
(
weight_x_n
,
op
);
IR_NODE_LINK_TO
(
op
,
hidden_n
);
LINK_TO
(
weight_h_n
,
op
);
LINK_TO
(
bias_n
,
op
);
LINK_TO
(
op
,
hidden_n
);
#undef LINK_TO
return
op
;
return
op
;
};
};
...
...
paddle/fluid/framework/ir/graph_pattern_detector.cc
浏览文件 @
db5e3dd7
...
@@ -111,6 +111,11 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph& graph) {
...
@@ -111,6 +111,11 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph& graph) {
return
false
;
return
false
;
}
}
}
}
for
(
auto
&
item
:
pdnodes2nodes_
)
{
for
(
auto
&
n
:
item
.
second
)
{
GetMarkedNodes
(
const_cast
<
Graph
*>
(
&
graph
)).
insert
(
n
);
}
}
VLOG
(
3
)
<<
pdnodes2nodes_
.
size
()
<<
" nodes marked"
;
VLOG
(
3
)
<<
pdnodes2nodes_
.
size
()
<<
" nodes marked"
;
return
!
pdnodes2nodes_
.
empty
();
return
!
pdnodes2nodes_
.
empty
();
...
@@ -278,7 +283,7 @@ void GraphPatternDetector::RemoveOverlappedMatch(
...
@@ -278,7 +283,7 @@ void GraphPatternDetector::RemoveOverlappedMatch(
for
(
const
auto
&
subgraph
:
*
subgraphs
)
{
for
(
const
auto
&
subgraph
:
*
subgraphs
)
{
bool
valid
=
true
;
bool
valid
=
true
;
for
(
auto
&
item
:
subgraph
)
{
for
(
auto
&
item
:
subgraph
)
{
if
(
node_set
.
count
(
item
.
second
))
{
if
(
item
.
first
->
IsIntermediate
()
&&
node_set
.
count
(
item
.
second
))
{
valid
=
false
;
valid
=
false
;
break
;
break
;
}
}
...
@@ -334,22 +339,22 @@ PDNode& PDNode::LinksFrom(const std::vector<PDNode*>& others) {
...
@@ -334,22 +339,22 @@ PDNode& PDNode::LinksFrom(const std::vector<PDNode*>& others) {
}
}
PDNode
*
PDNode
::
assert_is_op
()
{
PDNode
*
PDNode
::
assert_is_op
()
{
asserts_
.
emplace_back
([
this
](
Node
*
x
)
{
return
x
&&
x
->
IsOp
();
});
asserts_
.
emplace_back
([](
Node
*
x
)
{
return
x
&&
x
->
IsOp
();
});
return
this
;
return
this
;
}
}
PDNode
*
PDNode
::
assert_is_op
(
const
std
::
string
&
op_type
)
{
PDNode
*
PDNode
::
assert_is_op
(
const
std
::
string
&
op_type
)
{
asserts_
.
emplace_back
([
this
,
op_type
](
Node
*
x
)
{
asserts_
.
emplace_back
([
op_type
](
Node
*
x
)
{
return
x
&&
x
->
IsOp
()
&&
x
->
Op
()
->
Type
()
==
op_type
;
return
x
&&
x
->
IsOp
()
&&
x
->
Op
()
->
Type
()
==
op_type
;
});
});
return
this
;
return
this
;
}
}
PDNode
*
PDNode
::
assert_is_var
()
{
PDNode
*
PDNode
::
assert_is_var
()
{
asserts_
.
emplace_back
([
this
](
Node
*
x
)
{
return
x
&&
x
->
IsVar
();
});
asserts_
.
emplace_back
([](
Node
*
x
)
{
return
x
&&
x
->
IsVar
();
});
return
this
;
return
this
;
}
}
PDNode
*
PDNode
::
assert_var_not_persistable
()
{
PDNode
*
PDNode
::
assert_var_not_persistable
()
{
assert_is_var
();
assert_is_var
();
asserts_
.
emplace_back
([
this
](
Node
*
x
)
{
return
!
x
->
Var
()
->
Persistable
();
});
asserts_
.
emplace_back
([](
Node
*
x
)
{
return
!
x
->
Var
()
->
Persistable
();
});
return
this
;
return
this
;
}
}
PDNode
*
PDNode
::
assert_is_persistable_var
()
{
PDNode
*
PDNode
::
assert_is_persistable_var
()
{
...
@@ -491,16 +496,18 @@ void GraphSafeRemoveNodes(Graph* graph,
...
@@ -491,16 +496,18 @@ void GraphSafeRemoveNodes(Graph* graph,
for
(
auto
it
=
node
->
inputs
.
begin
();
it
!=
node
->
inputs
.
end
();)
{
for
(
auto
it
=
node
->
inputs
.
begin
();
it
!=
node
->
inputs
.
end
();)
{
if
(
nodes
.
count
(
*
it
))
{
if
(
nodes
.
count
(
*
it
))
{
it
=
const_cast
<
Node
*>
(
node
)
->
inputs
.
erase
(
it
);
it
=
const_cast
<
Node
*>
(
node
)
->
inputs
.
erase
(
it
);
}
else
}
else
{
it
++
;
it
++
;
}
}
}
for
(
auto
it
=
node
->
outputs
.
begin
();
it
!=
node
->
outputs
.
end
();)
{
for
(
auto
it
=
node
->
outputs
.
begin
();
it
!=
node
->
outputs
.
end
();)
{
if
(
nodes
.
count
(
*
it
))
{
if
(
nodes
.
count
(
*
it
))
{
it
=
const_cast
<
Node
*>
(
node
)
->
outputs
.
erase
(
it
);
it
=
const_cast
<
Node
*>
(
node
)
->
outputs
.
erase
(
it
);
}
else
}
else
{
it
++
;
it
++
;
}
}
}
}
}
}
}
bool
VarLinksFromOp
(
Node
*
node
,
const
std
::
string
&
op_type
)
{
bool
VarLinksFromOp
(
Node
*
node
,
const
std
::
string
&
op_type
)
{
for
(
auto
*
out
:
node
->
inputs
)
{
for
(
auto
*
out
:
node
->
inputs
)
{
...
...
paddle/fluid/framework/ir/graph_pattern_detector.h
浏览文件 @
db5e3dd7
...
@@ -245,6 +245,8 @@ class GraphPatternDetector {
...
@@ -245,6 +245,8 @@ class GraphPatternDetector {
void
UniquePatterns
(
std
::
vector
<
subgraph_t
>*
subgraphs
);
void
UniquePatterns
(
std
::
vector
<
subgraph_t
>*
subgraphs
);
// Remove overlapped match subgraphs, when overlapped, keep the previous one.
// Remove overlapped match subgraphs, when overlapped, keep the previous one.
// The intermediate PDNodes will be removed, so can't shared by multiple
// patterns.
void
RemoveOverlappedMatch
(
std
::
vector
<
subgraph_t
>*
subgraphs
);
void
RemoveOverlappedMatch
(
std
::
vector
<
subgraph_t
>*
subgraphs
);
// Validate whether the intermediate nodes are linked by external nodes.
// Validate whether the intermediate nodes are linked by external nodes.
...
@@ -295,6 +297,10 @@ PDNode* LSTM(PDPattern* pattern, const std::string& name_scope, PDNode* x);
...
@@ -295,6 +297,10 @@ PDNode* LSTM(PDPattern* pattern, const std::string& name_scope, PDNode* x);
}
// namespace patterns
}
// namespace patterns
#define IR_NODE_LINK_TO(a, b) \
a->outputs.push_back(b); \
b->inputs.push_back(a);
}
// namespace ir
}
// namespace ir
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/fluid/framework/ir/graph_pattern_detector_tester.cc
浏览文件 @
db5e3dd7
...
@@ -140,8 +140,9 @@ TEST(GraphPatternDetecter, MultiSubgraph) {
...
@@ -140,8 +140,9 @@ TEST(GraphPatternDetecter, MultiSubgraph) {
return
node
->
IsOp
()
&&
(
node
->
Name
()
==
"op2"
||
node
->
Name
()
==
"op3"
);
return
node
->
IsOp
()
&&
(
node
->
Name
()
==
"op2"
||
node
->
Name
()
==
"op3"
);
},
},
"OP0"
);
"OP0"
);
auto
*
any_var
=
x
.
mutable_pattern
()
->
NewNode
(
auto
*
any_var
=
x
.
mutable_pattern
()
[](
Node
*
node
)
{
return
node
->
IsVar
();
},
"VAR"
);
->
NewNode
([](
Node
*
node
)
{
return
node
->
IsVar
();
},
"VAR"
)
->
AsIntermediate
();
auto
*
any_op1
=
x
.
mutable_pattern
()
->
NewNode
(
auto
*
any_op1
=
x
.
mutable_pattern
()
->
NewNode
(
[](
Node
*
node
)
{
return
node
->
IsOp
();
},
"OP1"
);
[](
Node
*
node
)
{
return
node
->
IsOp
();
},
"OP1"
);
...
...
paddle/fluid/framework/ir/infer_clean_graph_pass.cc
浏览文件 @
db5e3dd7
...
@@ -13,42 +13,41 @@
...
@@ -13,42 +13,41 @@
// limitations under the License.
// limitations under the License.
#include <algorithm>
#include <algorithm>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/
pass
.h"
#include "paddle/fluid/framework/ir/
graph_pattern_detector
.h"
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
namespace
ir
{
namespace
ir
{
class
InferCleanGraphPass
:
public
Pass
{
class
InferCleanGraphPass
:
public
FusePassBase
{
public:
public:
virtual
~
InferCleanGraphPass
()
{}
virtual
~
InferCleanGraphPass
()
{}
protected:
protected:
std
::
unique_ptr
<
ir
::
Graph
>
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
std
::
unique_ptr
<
ir
::
Graph
>
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
FusePassBase
::
Init
(
"original_graph"
,
graph
.
get
());
PADDLE_ENFORCE
(
graph
.
get
());
PADDLE_ENFORCE
(
graph
.
get
());
auto
is_valid_node
=
[](
Node
*
x
)
{
auto
is_valid_node
=
[](
Node
*
x
)
{
return
x
&&
IsControlDepVar
(
*
x
)
&&
x
->
IsVar
()
&&
!
x
->
Var
();
return
x
&&
IsControlDepVar
(
*
x
)
&&
x
->
IsVar
()
&&
!
x
->
Var
();
};
};
std
::
unordered_set
<
Node
*>
invalid_nodes
;
std
::
unordered_set
<
const
Node
*>
invalid_nodes
;
int
valid_op
=
0
;
for
(
auto
*
node
:
graph
->
Nodes
())
{
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
is_valid_node
(
node
))
{
if
(
is_valid_node
(
node
))
{
invalid_nodes
.
insert
(
node
);
invalid_nodes
.
insert
(
node
);
}
else
if
(
node
->
IsOp
())
{
// Collect all the operators to help tracking number of operators.
++
valid_op
;
}
}
}
}
// remove nodes from the graph.
GraphSafeRemoveNodes
(
graph
.
get
(),
invalid_nodes
);
for
(
auto
*
node
:
invalid_nodes
)
{
graph
->
RemoveNode
(
node
);
}
// clean edges.
AddStatis
(
valid_op
);
for
(
auto
*
node
:
graph
->
Nodes
())
{
CleanEdges
(
&
node
->
inputs
,
invalid_nodes
);
CleanEdges
(
&
node
->
outputs
,
invalid_nodes
);
}
return
graph
;
return
graph
;
}
}
...
...
paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc
浏览文件 @
db5e3dd7
...
@@ -219,16 +219,13 @@ std::unique_ptr<ir::Graph> SeqConcatFcFusePass::ApplyImpl(
...
@@ -219,16 +219,13 @@ std::unique_ptr<ir::Graph> SeqConcatFcFusePass::ApplyImpl(
op_desc
.
SetAttr
(
"fc_activation"
,
act
->
Op
()
->
Type
());
op_desc
.
SetAttr
(
"fc_activation"
,
act
->
Op
()
->
Type
());
auto
*
op_node
=
graph
->
CreateOpNode
(
&
op_desc
);
auto
*
op_node
=
graph
->
CreateOpNode
(
&
op_desc
);
// Add links
// Add links
#define NODE_LINKS(a, b) \
IR_NODE_LINK_TO
(
fc_w
,
op_node
);
a->outputs.push_back(b); \
IR_NODE_LINK_TO
(
fc_bias
,
op_node
);
b->inputs.push_back(a);
IR_NODE_LINK_TO
(
concat_in0
,
op_node
);
NODE_LINKS
(
fc_w
,
op_node
);
IR_NODE_LINK_TO
(
sequence_expand0_in
,
op_node
);
NODE_LINKS
(
fc_bias
,
op_node
);
IR_NODE_LINK_TO
(
sequence_expand1_in
,
op_node
);
NODE_LINKS
(
concat_in0
,
op_node
);
IR_NODE_LINK_TO
(
op_node
,
fc_out
);
NODE_LINKS
(
sequence_expand0_in
,
op_node
);
NODE_LINKS
(
sequence_expand1_in
,
op_node
);
NODE_LINKS
(
op_node
,
fc_out
);
// Clean nodes.
// Clean nodes.
std
::
unordered_set
<
const
Node
*>
marked_nodes
;
std
::
unordered_set
<
const
Node
*>
marked_nodes
;
...
@@ -241,7 +238,6 @@ std::unique_ptr<ir::Graph> SeqConcatFcFusePass::ApplyImpl(
...
@@ -241,7 +238,6 @@ std::unique_ptr<ir::Graph> SeqConcatFcFusePass::ApplyImpl(
marked_nodes
.
erase
(
sequence_expand0_in
);
marked_nodes
.
erase
(
sequence_expand0_in
);
marked_nodes
.
erase
(
sequence_expand1_in
);
marked_nodes
.
erase
(
sequence_expand1_in
);
marked_nodes
.
erase
(
fc_out
);
marked_nodes
.
erase
(
fc_out
);
GraphSafeRemoveNodes
(
graph
,
marked_nodes
);
GraphSafeRemoveNodes
(
graph
,
marked_nodes
);
});
});
...
...
paddle/fluid/inference/CMakeLists.txt
浏览文件 @
db5e3dd7
...
@@ -10,7 +10,7 @@ set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor)
...
@@ -10,7 +10,7 @@ set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor)
# TODO(panyx0718): Should this be called paddle_fluid_inference_api_internal?
# TODO(panyx0718): Should this be called paddle_fluid_inference_api_internal?
cc_library
(
paddle_fluid_api
cc_library
(
paddle_fluid_api
SRCS io.cc
SRCS io.cc
DEPS
${
FLUID_CORE_MODULES
}
${
GLOB_OP_LIB
}
graph_to_program_pass
)
DEPS
${
FLUID_CORE_MODULES
}
${
GLOB_OP_LIB
}
)
get_property
(
fluid_modules GLOBAL PROPERTY FLUID_MODULES
)
get_property
(
fluid_modules GLOBAL PROPERTY FLUID_MODULES
)
...
@@ -22,7 +22,7 @@ cc_library(paddle_fluid_origin DEPS ${fluid_modules} paddle_fluid_api)
...
@@ -22,7 +22,7 @@ cc_library(paddle_fluid_origin DEPS ${fluid_modules} paddle_fluid_api)
#endif()
#endif()
# Create static library
# Create static library
cc_library
(
paddle_fluid DEPS
${
fluid_modules
}
paddle_fluid_api paddle_inference_api
)
cc_library
(
paddle_fluid DEPS
${
fluid_modules
}
paddle_fluid_api paddle_inference_api
analysis_predictor
)
if
(
NOT APPLE
)
if
(
NOT APPLE
)
# TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
# TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
set
(
LINK_FLAGS
"-Wl,--retain-symbols-file
${
CMAKE_CURRENT_SOURCE_DIR
}
/paddle_fluid.sym"
)
set
(
LINK_FLAGS
"-Wl,--retain-symbols-file
${
CMAKE_CURRENT_SOURCE_DIR
}
/paddle_fluid.sym"
)
...
@@ -32,6 +32,7 @@ endif()
...
@@ -32,6 +32,7 @@ endif()
# Create shared library
# Create shared library
cc_library
(
paddle_fluid_shared SHARED
cc_library
(
paddle_fluid_shared SHARED
SRCS io.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/api.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/api_impl.cc
SRCS io.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/api.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/api_impl.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/analysis_predictor.cc
DEPS
${
fluid_modules
}
paddle_fluid_api
)
DEPS
${
fluid_modules
}
paddle_fluid_api
)
set_target_properties
(
paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid
)
set_target_properties
(
paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid
)
...
...
paddle/fluid/inference/analysis/CMakeLists.txt
浏览文件 @
db5e3dd7
...
@@ -33,7 +33,7 @@ function (inference_analysis_test TARGET)
...
@@ -33,7 +33,7 @@ function (inference_analysis_test TARGET)
endif
()
endif
()
cc_test
(
${
TARGET
}
cc_test
(
${
TARGET
}
SRCS
"
${
analysis_test_SRCS
}
"
SRCS
"
${
analysis_test_SRCS
}
"
DEPS analysis
graph fc_fuse_pass graph_viz_pass infer_clean_graph_pass graph_pattern_detector pass
${
analysis_test_EXTRA_DEPS
}
DEPS analysis
pass
${
GLOB_PASS_LIB
}
${
analysis_test_EXTRA_DEPS
}
ARGS --inference_model_dir=
${
PYTHON_TESTS_DIR
}
/book/word2vec.inference.model
${
mem_opt
}
${
analysis_test_ARGS
}
)
ARGS --inference_model_dir=
${
PYTHON_TESTS_DIR
}
/book/word2vec.inference.model
${
mem_opt
}
${
analysis_test_ARGS
}
)
set_tests_properties
(
${
TARGET
}
PROPERTIES DEPENDS test_word2vec
)
set_tests_properties
(
${
TARGET
}
PROPERTIES DEPENDS test_word2vec
)
endif
(
WITH_TESTING
)
endif
(
WITH_TESTING
)
...
@@ -56,25 +56,13 @@ if (NOT EXISTS ${DITU_INSTALL_DIR} AND WITH_TESTING)
...
@@ -56,25 +56,13 @@ if (NOT EXISTS ${DITU_INSTALL_DIR} AND WITH_TESTING)
endif
()
endif
()
inference_analysis_test
(
test_analyzer SRCS analyzer_tester.cc
inference_analysis_test
(
test_analyzer SRCS analyzer_tester.cc
EXTRA_DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis
EXTRA_DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis_predictor
analysis_predictor
# ir
fc_fuse_pass
fc_lstm_fuse_pass
seq_concat_fc_fuse_pass
graph_viz_pass
infer_clean_graph_pass
graph_pattern_detector
infer_clean_graph_pass
attention_lstm_fuse_pass
paddle_inference_api
pass
ARGS --infer_ditu_rnn_model=
${
DITU_INSTALL_DIR
}
/model
ARGS --infer_ditu_rnn_model=
${
DITU_INSTALL_DIR
}
/model
--infer_ditu_rnn_data=
${
DITU_INSTALL_DIR
}
/data.txt
)
--infer_ditu_rnn_data=
${
DITU_INSTALL_DIR
}
/data.txt
)
inference_analysis_test
(
test_data_flow_graph SRCS data_flow_graph_tester.cc
)
inference_analysis_test
(
test_data_flow_graph SRCS data_flow_graph_tester.cc
)
inference_analysis_test
(
test_data_flow_graph_to_fluid_pass SRCS data_flow_graph_to_fluid_pass_tester.cc
EXTRA_DEPS paddle_inference_api
)
inference_analysis_test
(
test_data_flow_graph_to_fluid_pass SRCS data_flow_graph_to_fluid_pass_tester.cc
)
inference_analysis_test
(
test_fluid_to_ir_pass SRCS fluid_to_ir_pass_tester.cc
EXTRA_DEPS paddle_fluid
)
inference_analysis_test
(
test_fluid_to_ir_pass SRCS fluid_to_ir_pass_tester.cc
)
inference_analysis_test
(
test_fluid_to_data_flow_graph_pass SRCS fluid_to_data_flow_graph_pass_tester.cc
)
inference_analysis_test
(
test_fluid_to_data_flow_graph_pass SRCS fluid_to_data_flow_graph_pass_tester.cc
)
inference_analysis_test
(
test_subgraph_splitter SRCS subgraph_splitter_tester.cc
)
inference_analysis_test
(
test_subgraph_splitter SRCS subgraph_splitter_tester.cc
)
inference_analysis_test
(
test_dfg_graphviz_draw_pass SRCS dfg_graphviz_draw_pass_tester.cc
)
inference_analysis_test
(
test_dfg_graphviz_draw_pass SRCS dfg_graphviz_draw_pass_tester.cc
)
...
...
paddle/fluid/inference/analysis/analyzer_tester.cc
浏览文件 @
db5e3dd7
...
@@ -22,6 +22,7 @@
...
@@ -22,6 +22,7 @@
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
...
@@ -327,9 +328,20 @@ void TestDituRNNPrediction(const std::string &model_path,
...
@@ -327,9 +328,20 @@ void TestDituRNNPrediction(const std::string &model_path,
LOG
(
INFO
)
<<
"fused "
<<
item
.
first
<<
" "
<<
item
.
second
;
LOG
(
INFO
)
<<
"fused "
<<
item
.
first
<<
" "
<<
item
.
second
;
}
}
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc"
));
int
num_ops
=
0
;
EXPECT_EQ
(
fuse_statis
.
at
(
"fc"
),
1
);
for
(
auto
&
node
:
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_nobias_lstm_fuse"
),
1
);
analysis_predictor
->
analysis_argument
().
main_dfg
->
nodes
.
nodes
())
{
if
(
node
->
IsFunction
())
{
++
num_ops
;
}
}
LOG
(
INFO
)
<<
"has num ops: "
<<
num_ops
;
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_nobias_lstm_fuse"
),
2
);
// bi-directional LSTM
EXPECT_EQ
(
num_ops
,
13
);
// After graph optimization, only 13 operators exists.
}
}
}
}
...
@@ -357,10 +369,3 @@ TEST(Analyzer, DituRNN_with_analysis_with_IR) {
...
@@ -357,10 +369,3 @@ TEST(Analyzer, DituRNN_with_analysis_with_IR) {
}
// namespace analysis
}
// namespace analysis
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
USE_PASS
(
fc_fuse_pass
);
USE_PASS
(
seq_concat_fc_fuse_pass
);
USE_PASS
(
fc_lstm_fuse_pass
);
USE_PASS
(
graph_viz_pass
);
USE_PASS
(
infer_clean_graph_pass
);
USE_PASS
(
attention_lstm_fuse_pass
);
paddle/fluid/inference/analysis/fluid_to_ir_pass_tester.cc
浏览文件 @
db5e3dd7
...
@@ -16,6 +16,7 @@
...
@@ -16,6 +16,7 @@
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
...
@@ -33,10 +34,3 @@ TEST(FluidToIrPass, Test) {
...
@@ -33,10 +34,3 @@ TEST(FluidToIrPass, Test) {
}
// namespace analysis
}
// namespace analysis
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
USE_PASS
(
graph_viz_pass
);
USE_PASS
(
infer_clean_graph_pass
);
USE_PASS
(
attention_lstm_fuse_pass
);
USE_PASS
(
fc_lstm_fuse_pass
);
USE_PASS
(
seq_concat_fc_fuse_pass
);
USE_PASS
(
fc_fuse_pass
);
paddle/fluid/inference/api/CMakeLists.txt
浏览文件 @
db5e3dd7
...
@@ -18,10 +18,7 @@ if(APPLE)
...
@@ -18,10 +18,7 @@ if(APPLE)
endif
(
APPLE
)
endif
(
APPLE
)
set
(
inference_deps paddle_inference_api paddle_fluid_api analysis pass ir_pass_manager
set
(
inference_deps paddle_inference_api paddle_fluid_api analysis pass ir_pass_manager
${
GLOB_PASS_LIB
}
)
graph_viz_pass fc_fuse_pass
infer_clean_graph_pass
)
if
(
WITH_GPU AND TENSORRT_FOUND
)
if
(
WITH_GPU AND TENSORRT_FOUND
)
set
(
inference_deps
${
inference_deps
}
paddle_inference_tensorrt_subgraph_engine
)
set
(
inference_deps
${
inference_deps
}
paddle_inference_tensorrt_subgraph_engine
)
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
db5e3dd7
...
@@ -18,6 +18,7 @@
...
@@ -18,6 +18,7 @@
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/inference/utils/singleton.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -133,7 +134,3 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
...
@@ -133,7 +134,3 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
}
}
}
// namespace paddle
}
// namespace paddle
USE_PASS
(
fc_fuse_pass
);
USE_PASS
(
graph_viz_pass
);
USE_PASS
(
infer_clean_graph_pass
);
paddle/fluid/inference/api/helper.h
浏览文件 @
db5e3dd7
...
@@ -16,6 +16,7 @@
...
@@ -16,6 +16,7 @@
#include <sys/time.h>
#include <sys/time.h>
#include <algorithm>
#include <algorithm>
#include <numeric>
#include <sstream>
#include <sstream>
#include <string>
#include <string>
#include <vector>
#include <vector>
...
...
paddle/fluid/inference/paddle_fluid.map
浏览文件 @
db5e3dd7
{
{
global:
global:
*paddle*;
*paddle*;
*Pass*;
local:
local:
*;
*;
};
};
paddle/fluid/operators/detection/bbox_util.h
0 → 100644
浏览文件 @
db5e3dd7
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/tensor.h"
namespace
paddle
{
namespace
operators
{
/*
* transform that computes target bounding-box regression deltas
* given proposal boxes and ground-truth boxes.
*/
template
<
typename
T
>
inline
void
BoxToDelta
(
const
int
box_num
,
const
framework
::
Tensor
&
ex_boxes
,
const
framework
::
Tensor
&
gt_boxes
,
const
T
*
weights
,
const
bool
normalized
,
framework
::
Tensor
*
box_delta
)
{
auto
ex_boxes_et
=
framework
::
EigenTensor
<
T
,
2
>::
From
(
ex_boxes
);
auto
gt_boxes_et
=
framework
::
EigenTensor
<
T
,
2
>::
From
(
gt_boxes
);
auto
trg
=
framework
::
EigenTensor
<
T
,
2
>::
From
(
*
box_delta
);
T
ex_w
,
ex_h
,
ex_ctr_x
,
ex_ctr_y
,
gt_w
,
gt_h
,
gt_ctr_x
,
gt_ctr_y
;
for
(
int64_t
i
=
0
;
i
<
box_num
;
++
i
)
{
ex_w
=
ex_boxes_et
(
i
,
2
)
-
ex_boxes_et
(
i
,
0
)
+
(
normalized
==
false
);
ex_h
=
ex_boxes_et
(
i
,
3
)
-
ex_boxes_et
(
i
,
1
)
+
(
normalized
==
false
);
ex_ctr_x
=
ex_boxes_et
(
i
,
0
)
+
0.5
*
ex_w
;
ex_ctr_y
=
ex_boxes_et
(
i
,
1
)
+
0.5
*
ex_h
;
gt_w
=
gt_boxes_et
(
i
,
2
)
-
gt_boxes_et
(
i
,
0
)
+
(
normalized
==
false
);
gt_h
=
gt_boxes_et
(
i
,
3
)
-
gt_boxes_et
(
i
,
1
)
+
(
normalized
==
false
);
gt_ctr_x
=
gt_boxes_et
(
i
,
0
)
+
0.5
*
gt_w
;
gt_ctr_y
=
gt_boxes_et
(
i
,
1
)
+
0.5
*
gt_h
;
trg
(
i
,
0
)
=
(
gt_ctr_x
-
ex_ctr_x
)
/
ex_w
;
trg
(
i
,
1
)
=
(
gt_ctr_y
-
ex_ctr_y
)
/
ex_h
;
trg
(
i
,
2
)
=
std
::
log
(
gt_w
/
ex_w
);
trg
(
i
,
3
)
=
std
::
log
(
gt_h
/
ex_h
);
if
(
weights
)
{
trg
(
i
,
0
)
=
trg
(
i
,
0
)
/
weights
[
0
];
trg
(
i
,
1
)
=
trg
(
i
,
1
)
/
weights
[
1
];
trg
(
i
,
2
)
=
trg
(
i
,
2
)
/
weights
[
2
];
trg
(
i
,
3
)
=
trg
(
i
,
3
)
/
weights
[
3
];
}
}
}
template
<
typename
T
>
void
Gather
(
const
T
*
in
,
const
int
in_stride
,
const
int
*
index
,
const
int
num
,
T
*
out
)
{
const
int
stride_bytes
=
in_stride
*
sizeof
(
T
);
for
(
int
i
=
0
;
i
<
num
;
++
i
)
{
int
id
=
index
[
i
];
memcpy
(
out
+
i
*
in_stride
,
in
+
id
*
in_stride
,
stride_bytes
);
}
}
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/detection/generate_proposal_labels_op.cc
浏览文件 @
db5e3dd7
...
@@ -14,6 +14,7 @@ limitations under the License. */
...
@@ -14,6 +14,7 @@ limitations under the License. */
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/detection/bbox_util.h"
#include "paddle/fluid/operators/gather.h"
#include "paddle/fluid/operators/gather.h"
#include "paddle/fluid/operators/math/concat.h"
#include "paddle/fluid/operators/math/concat.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/math_function.h"
...
@@ -133,31 +134,6 @@ void BboxOverlaps(const Tensor& r_boxes, const Tensor& c_boxes,
...
@@ -133,31 +134,6 @@ void BboxOverlaps(const Tensor& r_boxes, const Tensor& c_boxes,
}
}
}
}
template
<
typename
T
>
void
BoxToDelta
(
int
box_num
,
const
Tensor
&
ex_boxes
,
const
Tensor
&
gt_boxes
,
const
std
::
vector
<
float
>&
weights
,
Tensor
*
box_delta
)
{
auto
ex_boxes_et
=
framework
::
EigenTensor
<
T
,
2
>::
From
(
ex_boxes
);
auto
gt_boxes_et
=
framework
::
EigenTensor
<
T
,
2
>::
From
(
gt_boxes
);
auto
box_delta_et
=
framework
::
EigenTensor
<
T
,
2
>::
From
(
*
box_delta
);
T
ex_w
,
ex_h
,
ex_ctr_x
,
ex_ctr_y
,
gt_w
,
gt_h
,
gt_ctr_x
,
gt_ctr_y
;
for
(
int64_t
i
=
0
;
i
<
box_num
;
++
i
)
{
ex_w
=
ex_boxes_et
(
i
,
2
)
-
ex_boxes_et
(
i
,
0
)
+
1
;
ex_h
=
ex_boxes_et
(
i
,
3
)
-
ex_boxes_et
(
i
,
1
)
+
1
;
ex_ctr_x
=
ex_boxes_et
(
i
,
0
)
+
0.5
*
ex_w
;
ex_ctr_y
=
ex_boxes_et
(
i
,
1
)
+
0.5
*
ex_h
;
gt_w
=
gt_boxes_et
(
i
,
2
)
-
gt_boxes_et
(
i
,
0
)
+
1
;
gt_h
=
gt_boxes_et
(
i
,
3
)
-
gt_boxes_et
(
i
,
1
)
+
1
;
gt_ctr_x
=
gt_boxes_et
(
i
,
0
)
+
0.5
*
gt_w
;
gt_ctr_y
=
gt_boxes_et
(
i
,
1
)
+
0.5
*
gt_h
;
box_delta_et
(
i
,
0
)
=
(
gt_ctr_x
-
ex_ctr_x
)
/
ex_w
/
weights
[
0
];
box_delta_et
(
i
,
1
)
=
(
gt_ctr_y
-
ex_ctr_y
)
/
ex_h
/
weights
[
1
];
box_delta_et
(
i
,
2
)
=
log
(
gt_w
/
ex_w
)
/
ex_w
/
weights
[
2
];
box_delta_et
(
i
,
3
)
=
log
(
gt_h
/
ex_h
)
/
ex_h
/
weights
[
3
];
}
}
template
<
typename
T
>
template
<
typename
T
>
std
::
vector
<
std
::
vector
<
int
>>
SampleFgBgGt
(
std
::
vector
<
std
::
vector
<
int
>>
SampleFgBgGt
(
const
platform
::
CPUDeviceContext
&
context
,
Tensor
*
iou
,
const
platform
::
CPUDeviceContext
&
context
,
Tensor
*
iou
,
...
@@ -243,12 +219,11 @@ void GatherBoxesLabels(const platform::CPUDeviceContext& context,
...
@@ -243,12 +219,11 @@ void GatherBoxesLabels(const platform::CPUDeviceContext& context,
Tensor
*
sampled_labels
,
Tensor
*
sampled_gts
)
{
Tensor
*
sampled_labels
,
Tensor
*
sampled_gts
)
{
int
fg_num
=
fg_inds
.
size
();
int
fg_num
=
fg_inds
.
size
();
int
bg_num
=
bg_inds
.
size
();
int
bg_num
=
bg_inds
.
size
();
int
gt_num
=
fg_num
+
bg_num
;
Tensor
fg_inds_t
,
bg_inds_t
,
gt_box_inds_t
,
gt_label_inds_t
;
Tensor
fg_inds_t
,
bg_inds_t
,
gt_box_inds_t
,
gt_label_inds_t
;
int
*
fg_inds_data
=
fg_inds_t
.
mutable_data
<
int
>
({
fg_num
},
context
.
GetPlace
());
int
*
fg_inds_data
=
fg_inds_t
.
mutable_data
<
int
>
({
fg_num
},
context
.
GetPlace
());
int
*
bg_inds_data
=
bg_inds_t
.
mutable_data
<
int
>
({
bg_num
},
context
.
GetPlace
());
int
*
bg_inds_data
=
bg_inds_t
.
mutable_data
<
int
>
({
bg_num
},
context
.
GetPlace
());
int
*
gt_box_inds_data
=
int
*
gt_box_inds_data
=
gt_box_inds_t
.
mutable_data
<
int
>
({
gt
_num
},
context
.
GetPlace
());
gt_box_inds_t
.
mutable_data
<
int
>
({
fg
_num
},
context
.
GetPlace
());
int
*
gt_label_inds_data
=
int
*
gt_label_inds_data
=
gt_label_inds_t
.
mutable_data
<
int
>
({
fg_num
},
context
.
GetPlace
());
gt_label_inds_t
.
mutable_data
<
int
>
({
fg_num
},
context
.
GetPlace
());
std
::
copy
(
fg_inds
.
begin
(),
fg_inds
.
end
(),
fg_inds_data
);
std
::
copy
(
fg_inds
.
begin
(),
fg_inds
.
end
(),
fg_inds_data
);
...
@@ -303,18 +278,20 @@ std::vector<Tensor> SampleRoisForOneImage(
...
@@ -303,18 +278,20 @@ std::vector<Tensor> SampleRoisForOneImage(
// Gather boxes and labels
// Gather boxes and labels
Tensor
sampled_boxes
,
sampled_labels
,
sampled_gts
;
Tensor
sampled_boxes
,
sampled_labels
,
sampled_gts
;
int
boxes_num
=
fg_inds
.
size
()
+
bg_inds
.
size
();
int
fg_num
=
fg_inds
.
size
();
int
bg_num
=
bg_inds
.
size
();
int
boxes_num
=
fg_num
+
bg_num
;
framework
::
DDim
bbox_dim
({
boxes_num
,
kBoxDim
});
framework
::
DDim
bbox_dim
({
boxes_num
,
kBoxDim
});
sampled_boxes
.
mutable_data
<
T
>
(
bbox_dim
,
context
.
GetPlace
());
sampled_boxes
.
mutable_data
<
T
>
(
bbox_dim
,
context
.
GetPlace
());
sampled_labels
.
mutable_data
<
int
>
({
boxes_num
},
context
.
GetPlace
());
sampled_labels
.
mutable_data
<
int
>
({
boxes_num
},
context
.
GetPlace
());
sampled_gts
.
mutable_data
<
T
>
(
bbox_dim
,
context
.
GetPlace
());
sampled_gts
.
mutable_data
<
T
>
(
{
fg_num
,
kBoxDim
}
,
context
.
GetPlace
());
GatherBoxesLabels
<
T
>
(
context
,
boxes
,
*
gt_boxes
,
*
gt_classes
,
fg_inds
,
bg_inds
,
GatherBoxesLabels
<
T
>
(
context
,
boxes
,
*
gt_boxes
,
*
gt_classes
,
fg_inds
,
bg_inds
,
gt_inds
,
&
sampled_boxes
,
&
sampled_labels
,
&
sampled_gts
);
gt_inds
,
&
sampled_boxes
,
&
sampled_labels
,
&
sampled_gts
);
// Compute targets
// Compute targets
Tensor
bbox_targets_single
;
Tensor
bbox_targets_single
;
bbox_targets_single
.
mutable_data
<
T
>
(
bbox_dim
,
context
.
GetPlace
());
bbox_targets_single
.
mutable_data
<
T
>
(
bbox_dim
,
context
.
GetPlace
());
BoxToDelta
<
T
>
(
boxes_num
,
sampled_boxes
,
sampled_gts
,
bbox_reg_weights
,
BoxToDelta
<
T
>
(
fg_num
,
sampled_boxes
,
sampled_gts
,
nullptr
,
false
,
&
bbox_targets_single
);
&
bbox_targets_single
);
// Scale rois
// Scale rois
...
@@ -427,7 +404,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
...
@@ -427,7 +404,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
auto
rpn_rois_lod
=
rpn_rois
->
lod
().
back
();
auto
rpn_rois_lod
=
rpn_rois
->
lod
().
back
();
auto
gt_classes_lod
=
gt_classes
->
lod
().
back
();
auto
gt_classes_lod
=
gt_classes
->
lod
().
back
();
auto
gt_boxes_lod
=
gt_boxes
->
lod
().
back
();
auto
gt_boxes_lod
=
gt_boxes
->
lod
().
back
();
for
(
size_
t
i
=
0
;
i
<
n
;
++
i
)
{
for
(
in
t
i
=
0
;
i
<
n
;
++
i
)
{
Tensor
rpn_rois_slice
=
Tensor
rpn_rois_slice
=
rpn_rois
->
Slice
(
rpn_rois_lod
[
i
],
rpn_rois_lod
[
i
+
1
]);
rpn_rois
->
Slice
(
rpn_rois_lod
[
i
],
rpn_rois_lod
[
i
+
1
]);
Tensor
gt_classes_slice
=
Tensor
gt_classes_slice
=
...
...
paddle/fluid/operators/detection/generate_proposals_op.cc
浏览文件 @
db5e3dd7
...
@@ -311,8 +311,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
...
@@ -311,8 +311,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
rpn_rois
->
mutable_data
<
T
>
({
bbox_deltas
->
numel
()
/
4
,
4
},
rpn_rois
->
mutable_data
<
T
>
({
bbox_deltas
->
numel
()
/
4
,
4
},
context
.
GetPlace
());
context
.
GetPlace
());
rpn_roi_probs
->
mutable_data
<
T
>
({
scores
->
numel
()
/
4
,
1
},
rpn_roi_probs
->
mutable_data
<
T
>
({
scores
->
numel
(),
1
},
context
.
GetPlace
());
context
.
GetPlace
());
Tensor
bbox_deltas_swap
,
scores_swap
;
Tensor
bbox_deltas_swap
,
scores_swap
;
bbox_deltas_swap
.
mutable_data
<
T
>
({
num
,
h_bbox
,
w_bbox
,
c_bbox
},
bbox_deltas_swap
.
mutable_data
<
T
>
({
num
,
h_bbox
,
w_bbox
,
c_bbox
},
...
@@ -421,7 +420,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
...
@@ -421,7 +420,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
CPUGather
<
T
>
(
ctx
,
proposals
,
keep
,
&
bbox_sel
);
CPUGather
<
T
>
(
ctx
,
proposals
,
keep
,
&
bbox_sel
);
CPUGather
<
T
>
(
ctx
,
scores_sel
,
keep
,
&
scores_filter
);
CPUGather
<
T
>
(
ctx
,
scores_sel
,
keep
,
&
scores_filter
);
if
(
nms_thresh
<=
0
)
{
if
(
nms_thresh
<=
0
)
{
return
std
::
make_pair
(
bbox_sel
,
scores_
sel
);
return
std
::
make_pair
(
bbox_sel
,
scores_
filter
);
}
}
Tensor
keep_nms
=
NMS
<
T
>
(
ctx
,
&
bbox_sel
,
&
scores_filter
,
nms_thresh
,
eta
);
Tensor
keep_nms
=
NMS
<
T
>
(
ctx
,
&
bbox_sel
,
&
scores_filter
,
nms_thresh
,
eta
);
...
...
paddle/fluid/operators/detection/rpn_target_assign_op.cc
浏览文件 @
db5e3dd7
...
@@ -14,6 +14,7 @@ limitations under the License. */
...
@@ -14,6 +14,7 @@ limitations under the License. */
#include <random>
#include <random>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/detection/bbox_util.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/math_function.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -46,156 +47,219 @@ class RpnTargetAssignOp : public framework::OperatorWithKernel {
...
@@ -46,156 +47,219 @@ class RpnTargetAssignOp : public framework::OperatorWithKernel {
auto
in_dims
=
ctx
->
GetInputDim
(
"DistMat"
);
auto
in_dims
=
ctx
->
GetInputDim
(
"DistMat"
);
PADDLE_ENFORCE_EQ
(
in_dims
.
size
(),
2
,
PADDLE_ENFORCE_EQ
(
in_dims
.
size
(),
2
,
"The rank of Input(DistMat) must be 2."
);
"The rank of Input(DistMat) must be 2."
);
ctx
->
SetOutputDim
(
"LocationIndex"
,
{
-
1
});
ctx
->
SetOutputDim
(
"ScoreIndex"
,
{
-
1
});
ctx
->
SetOutputDim
(
"TargetLabel"
,
{
-
1
,
1
});
ctx
->
SetOutputDim
(
"TargetBBox"
,
{
-
1
,
4
});
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"DistMat"
)
->
type
()),
platform
::
CPUPlace
());
}
}
};
};
template
<
typename
T
>
template
<
typename
T
>
class
RpnTargetAssignKernel
:
public
framework
::
OpKernel
<
T
>
{
class
RpnTargetAssignKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
anchor_t
=
context
.
Input
<
Tensor
>
(
"Anchor"
);
// (H*W*A) * 4
auto
*
gt_bbox_t
=
context
.
Input
<
Tensor
>
(
"GtBox"
);
auto
*
dist_t
=
context
.
Input
<
LoDTensor
>
(
"DistMat"
);
auto
*
loc_index_t
=
context
.
Output
<
Tensor
>
(
"LocationIndex"
);
auto
*
score_index_t
=
context
.
Output
<
Tensor
>
(
"ScoreIndex"
);
auto
*
tgt_bbox_t
=
context
.
Output
<
Tensor
>
(
"TargetBBox"
);
auto
*
tgt_lbl_t
=
context
.
Output
<
Tensor
>
(
"TargetLabel"
);
auto
lod
=
dist_t
->
lod
().
back
();
int64_t
batch_num
=
static_cast
<
int64_t
>
(
lod
.
size
()
-
1
);
int64_t
anchor_num
=
dist_t
->
dims
()[
1
];
PADDLE_ENFORCE_EQ
(
anchor_num
,
anchor_t
->
dims
()[
0
]);
int
rpn_batch_size
=
context
.
Attr
<
int
>
(
"rpn_batch_size_per_im"
);
float
pos_threshold
=
context
.
Attr
<
float
>
(
"rpn_positive_overlap"
);
float
neg_threshold
=
context
.
Attr
<
float
>
(
"rpn_negative_overlap"
);
float
fg_fraction
=
context
.
Attr
<
float
>
(
"fg_fraction"
);
int
fg_num_per_batch
=
static_cast
<
int
>
(
rpn_batch_size
*
fg_fraction
);
int64_t
max_num
=
batch_num
*
anchor_num
;
auto
place
=
context
.
GetPlace
();
tgt_bbox_t
->
mutable_data
<
T
>
({
max_num
,
4
},
place
);
auto
*
loc_index
=
loc_index_t
->
mutable_data
<
int
>
({
max_num
},
place
);
auto
*
score_index
=
score_index_t
->
mutable_data
<
int
>
({
max_num
},
place
);
Tensor
tmp_tgt_lbl
;
auto
*
tmp_lbl_data
=
tmp_tgt_lbl
.
mutable_data
<
int64_t
>
({
max_num
},
place
);
auto
&
dev_ctx
=
context
.
device_context
<
platform
::
CPUDeviceContext
>
();
math
::
SetConstant
<
platform
::
CPUDeviceContext
,
int64_t
>
iset
;
iset
(
dev_ctx
,
&
tmp_tgt_lbl
,
static_cast
<
int64_t
>
(
-
1
));
std
::
random_device
rnd
;
std
::
minstd_rand
engine
;
int
seed
=
context
.
Attr
<
bool
>
(
"fix_seed"
)
?
context
.
Attr
<
int
>
(
"seed"
)
:
rnd
();
engine
.
seed
(
seed
);
int
fg_num
=
0
;
int
bg_num
=
0
;
for
(
int
i
=
0
;
i
<
batch_num
;
++
i
)
{
Tensor
dist
=
dist_t
->
Slice
(
lod
[
i
],
lod
[
i
+
1
]);
Tensor
gt_bbox
=
gt_bbox_t
->
Slice
(
lod
[
i
],
lod
[
i
+
1
]);
auto
fg_bg_gt
=
SampleFgBgGt
(
dev_ctx
,
dist
,
pos_threshold
,
neg_threshold
,
rpn_batch_size
,
fg_num_per_batch
,
engine
,
tmp_lbl_data
+
i
*
anchor_num
);
int
cur_fg_num
=
fg_bg_gt
[
0
].
size
();
int
cur_bg_num
=
fg_bg_gt
[
1
].
size
();
std
::
transform
(
fg_bg_gt
[
0
].
begin
(),
fg_bg_gt
[
0
].
end
(),
loc_index
,
[
i
,
anchor_num
](
int
d
)
{
return
d
+
i
*
anchor_num
;
});
memcpy
(
score_index
,
loc_index
,
cur_fg_num
*
sizeof
(
int
));
std
::
transform
(
fg_bg_gt
[
1
].
begin
(),
fg_bg_gt
[
1
].
end
(),
score_index
+
cur_fg_num
,
[
i
,
anchor_num
](
int
d
)
{
return
d
+
i
*
anchor_num
;
});
// get target bbox deltas
if
(
cur_fg_num
)
{
Tensor
fg_gt
;
T
*
gt_data
=
fg_gt
.
mutable_data
<
T
>
({
cur_fg_num
,
4
},
place
);
Tensor
tgt_bbox
=
tgt_bbox_t
->
Slice
(
fg_num
,
fg_num
+
cur_fg_num
);
T
*
tgt_data
=
tgt_bbox
.
data
<
T
>
();
Gather
<
T
>
(
anchor_t
->
data
<
T
>
(),
4
,
reinterpret_cast
<
int
*>
(
&
fg_bg_gt
[
0
][
0
]),
cur_fg_num
,
tgt_data
);
Gather
<
T
>
(
gt_bbox
.
data
<
T
>
(),
4
,
reinterpret_cast
<
int
*>
(
&
fg_bg_gt
[
2
][
0
]),
cur_fg_num
,
gt_data
);
BoxToDelta
<
T
>
(
cur_fg_num
,
tgt_bbox
,
fg_gt
,
nullptr
,
false
,
&
tgt_bbox
);
}
loc_index
+=
cur_fg_num
;
score_index
+=
cur_fg_num
+
cur_bg_num
;
fg_num
+=
cur_fg_num
;
bg_num
+=
cur_bg_num
;
}
int
lbl_num
=
fg_num
+
bg_num
;
PADDLE_ENFORCE_LE
(
fg_num
,
max_num
);
PADDLE_ENFORCE_LE
(
lbl_num
,
max_num
);
tgt_bbox_t
->
Resize
({
fg_num
,
4
});
loc_index_t
->
Resize
({
fg_num
});
score_index_t
->
Resize
({
lbl_num
});
auto
*
lbl_data
=
tgt_lbl_t
->
mutable_data
<
int64_t
>
({
lbl_num
,
1
},
place
);
Gather
<
int64_t
>
(
tmp_lbl_data
,
1
,
score_index_t
->
data
<
int
>
(),
lbl_num
,
lbl_data
);
}
private:
void
ScoreAssign
(
const
T
*
dist_data
,
const
Tensor
&
anchor_to_gt_max
,
void
ScoreAssign
(
const
T
*
dist_data
,
const
Tensor
&
anchor_to_gt_max
,
const
int
row
,
const
int
col
,
const
float
pos_threshold
,
const
int
row
,
const
int
col
,
const
float
pos_threshold
,
const
float
neg_threshold
,
int64_t
*
target_label
_data
,
const
float
neg_threshold
,
int64_t
*
target_label
,
std
::
vector
<
int
>*
fg_inds
,
std
::
vector
<
int
>*
bg_inds
)
const
{
std
::
vector
<
int
>*
fg_inds
,
std
::
vector
<
int
>*
bg_inds
)
const
{
int
fg_offset
=
fg_inds
->
size
();
float
epsilon
=
0.0001
;
int
bg_offset
=
bg_inds
->
size
();
for
(
int64_t
i
=
0
;
i
<
row
;
++
i
)
{
for
(
int64_t
i
=
0
;
i
<
row
;
++
i
)
{
const
T
*
v
=
dist_data
+
i
*
col
;
const
T
*
v
=
dist_data
+
i
*
col
;
T
max
_dist
=
*
std
::
max_element
(
v
,
v
+
col
);
T
max
=
*
std
::
max_element
(
v
,
v
+
col
);
for
(
int64_t
j
=
0
;
j
<
col
;
++
j
)
{
for
(
int64_t
j
=
0
;
j
<
col
;
++
j
)
{
T
val
=
dist_data
[
i
*
col
+
j
];
if
(
std
::
abs
(
max
-
v
[
j
])
<
epsilon
)
{
if
(
val
==
max_dist
)
target_label_data
[
j
]
=
1
;
target_label
[
j
]
=
1
;
}
}
}
}
}
// Pick the fg/bg and count the number
// Pick the fg/bg
const
T
*
anchor_to_gt_max_data
=
anchor_to_gt_max
.
data
<
T
>
();
for
(
int64_t
j
=
0
;
j
<
col
;
++
j
)
{
for
(
int64_t
j
=
0
;
j
<
col
;
++
j
)
{
if
(
anchor_to_gt_max
.
data
<
T
>
()[
j
]
>
pos_threshold
)
{
if
(
anchor_to_gt_max
_data
[
j
]
>=
pos_threshold
)
{
target_label
_data
[
j
]
=
1
;
target_label
[
j
]
=
1
;
}
else
if
(
anchor_to_gt_max
.
data
<
T
>
()
[
j
]
<
neg_threshold
)
{
}
else
if
(
anchor_to_gt_max
_data
[
j
]
<
neg_threshold
)
{
target_label
_data
[
j
]
=
0
;
target_label
[
j
]
=
0
;
}
}
if
(
target_label
_data
[
j
]
==
1
)
{
if
(
target_label
[
j
]
==
1
)
{
fg_inds
->
push_back
(
fg_offset
+
j
);
fg_inds
->
push_back
(
j
);
}
else
if
(
target_label
_data
[
j
]
==
0
)
{
}
else
if
(
target_label
[
j
]
==
0
)
{
bg_inds
->
push_back
(
bg_offset
+
j
);
bg_inds
->
push_back
(
j
);
}
}
}
}
}
}
void
ReservoirSampling
(
const
int
num
,
const
int
offset
,
void
ReservoirSampling
(
const
int
num
,
std
::
minstd_rand
engine
,
std
::
minstd_rand
engine
,
std
::
vector
<
int
>*
inds
)
const
{
std
::
vector
<
int
>*
inds
)
const
{
std
::
uniform_real_distribution
<
float
>
uniform
(
0
,
1
);
std
::
uniform_real_distribution
<
float
>
uniform
(
0
,
1
);
const
int64_t
size
=
static_cast
<
int64_t
>
(
inds
->
size
()
-
offset
);
size_t
len
=
inds
->
size
(
);
if
(
size
>
num
)
{
if
(
len
>
static_cast
<
size_t
>
(
num
)
)
{
for
(
int64_t
i
=
num
;
i
<
size
;
++
i
)
{
for
(
size_t
i
=
num
;
i
<
len
;
++
i
)
{
int
rng_ind
=
std
::
floor
(
uniform
(
engine
)
*
i
);
int
rng_ind
=
std
::
floor
(
uniform
(
engine
)
*
i
);
if
(
rng_ind
<
num
)
if
(
rng_ind
<
num
)
std
::
iter_swap
(
inds
->
begin
()
+
rng_ind
+
offset
,
std
::
iter_swap
(
inds
->
begin
()
+
rng_ind
,
inds
->
begin
()
+
i
);
inds
->
begin
()
+
i
+
offset
);
}
}
inds
->
resize
(
num
);
}
}
}
}
void
RpnTargetAssign
(
const
framework
::
ExecutionContext
&
ctx
,
// std::vector<std::vector<int>> RpnTargetAssign(
const
Tensor
&
dist
,
const
float
pos_threshold
,
std
::
vector
<
std
::
vector
<
int
>>
SampleFgBgGt
(
const
float
neg_threshold
,
const
int
rpn_batch_size
,
const
platform
::
CPUDeviceContext
&
ctx
,
const
Tensor
&
dist
,
const
int
fg_num
,
std
::
minstd_rand
engine
,
const
float
pos_threshold
,
const
float
neg_threshold
,
std
::
vector
<
int
>*
fg_inds
,
std
::
vector
<
int
>*
bg_inds
,
const
int
rpn_batch_size
,
const
int
fg_num
,
std
::
minstd_rand
engine
,
int64_t
*
target_label_data
)
const
{
int64_t
*
target_label
)
const
{
auto
*
dist_data
=
dist
.
data
<
T
>
();
auto
*
dist_data
=
dist
.
data
<
T
>
();
int64_t
row
=
dist
.
dims
()[
0
];
int
row
=
dist
.
dims
()[
0
];
int64_t
col
=
dist
.
dims
()[
1
];
int
col
=
dist
.
dims
()[
1
];
int
fg_offset
=
fg_inds
->
size
();
int
bg_offset
=
bg_inds
->
size
();
std
::
vector
<
int
>
fg_inds
;
std
::
vector
<
int
>
bg_inds
;
std
::
vector
<
int
>
gt_inds
;
// Calculate the max IoU between anchors and gt boxes
// Calculate the max IoU between anchors and gt boxes
Tensor
anchor_to_gt_max
;
// Map from anchor to gt box that has highest overlap
anchor_to_gt_max
.
mutable_data
<
T
>
(
auto
place
=
ctx
.
GetPlace
();
framework
::
make_ddim
({
static_cast
<
int64_t
>
(
col
),
1
}),
Tensor
anchor_to_gt_max
,
anchor_to_gt_argmax
;
platform
::
CPUPlace
());
anchor_to_gt_max
.
mutable_data
<
T
>
({
col
},
place
);
auto
&
place
=
*
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>()
int
*
argmax
=
anchor_to_gt_argmax
.
mutable_data
<
int
>
({
col
},
place
);
.
eigen_device
();
auto
x
=
EigenMatrix
<
T
>::
From
(
dist
);
auto
x
=
framework
::
EigenMatrix
<
T
>::
From
(
dist
);
auto
x_col_max
=
EigenMatrix
<
T
>::
From
(
anchor_to_gt_max
);
auto
x_col_max
=
framework
::
EigenVector
<
T
>::
Flatten
(
anchor_to_gt_max
);
x_col_max
.
device
(
place
)
=
auto
x_col_argmax
=
x
.
maximum
(
Eigen
::
DSizes
<
int
,
1
>
(
0
))
framework
::
EigenVector
<
int
>::
Flatten
(
anchor_to_gt_argmax
);
.
reshape
(
Eigen
::
DSizes
<
int
,
2
>
(
static_cast
<
int64_t
>
(
col
),
1
));
x_col_max
=
x
.
maximum
(
Eigen
::
DSizes
<
int
,
1
>
(
0
));
x_col_argmax
=
x
.
argmax
(
0
).
template
cast
<
int
>();
// Follow the Faster RCNN's implementation
// Follow the Faster RCNN's implementation
ScoreAssign
(
dist_data
,
anchor_to_gt_max
,
row
,
col
,
pos_threshold
,
ScoreAssign
(
dist_data
,
anchor_to_gt_max
,
row
,
col
,
pos_threshold
,
neg_threshold
,
target_label
_data
,
fg_inds
,
bg_inds
);
neg_threshold
,
target_label
,
&
fg_inds
,
&
bg_inds
);
// Reservoir Sampling
// Reservoir Sampling
ReservoirSampling
(
fg_num
,
fg_offset
,
engine
,
fg_inds
);
ReservoirSampling
(
fg_num
,
engine
,
&
fg_inds
);
int
bg_num
=
rpn_batch_size
-
(
fg_inds
->
size
()
-
fg_offset
);
int
fg_num2
=
static_cast
<
int
>
(
fg_inds
.
size
()
);
ReservoirSampling
(
bg_num
,
bg_offset
,
engine
,
bg_inds
)
;
int
bg_num
=
rpn_batch_size
-
fg_num2
;
}
ReservoirSampling
(
bg_num
,
engine
,
&
bg_inds
);
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
gt_inds
.
reserve
(
fg_num2
);
auto
*
dist
=
context
.
Input
<
LoDTensor
>
(
"DistMat"
);
for
(
int
i
=
0
;
i
<
fg_num2
;
++
i
)
{
auto
*
loc_index
=
context
.
Output
<
Tensor
>
(
"LocationIndex"
);
gt_inds
.
emplace_back
(
argmax
[
fg_inds
[
i
]]);
auto
*
score_index
=
context
.
Output
<
Tensor
>
(
"ScoreIndex"
);
auto
*
tgt_lbl
=
context
.
Output
<
Tensor
>
(
"TargetLabel"
);
auto
col
=
dist
->
dims
()[
1
];
int64_t
n
=
dist
->
lod
().
size
()
==
0UL
?
1
:
static_cast
<
int64_t
>
(
dist
->
lod
().
back
().
size
()
-
1
);
if
(
dist
->
lod
().
size
())
{
PADDLE_ENFORCE_EQ
(
dist
->
lod
().
size
(),
1UL
,
"Only support 1 level of LoD."
);
}
}
int
rpn_batch_size
=
context
.
Attr
<
int
>
(
"rpn_batch_size_per_im"
);
std
::
vector
<
std
::
vector
<
int
>>
fg_bg_gt
;
float
pos_threshold
=
context
.
Attr
<
float
>
(
"rpn_positive_overlap"
);
fg_bg_gt
.
emplace_back
(
fg_inds
);
float
neg_threshold
=
context
.
Attr
<
float
>
(
"rpn_negative_overlap"
);
fg_bg_gt
.
emplace_back
(
bg_inds
);
float
fg_fraction
=
context
.
Attr
<
float
>
(
"fg_fraction"
);
fg_bg_gt
.
emplace_back
(
gt_inds
);
int
fg_num
=
static_cast
<
int
>
(
rpn_batch_size
*
fg_fraction
);
int64_t
*
target_label_data
=
return
fg_bg_gt
;
tgt_lbl
->
mutable_data
<
int64_t
>
({
n
*
col
,
1
},
context
.
GetPlace
());
auto
&
dev_ctx
=
context
.
device_context
<
platform
::
CPUDeviceContext
>
();
math
::
SetConstant
<
platform
::
CPUDeviceContext
,
int64_t
>
iset
;
iset
(
dev_ctx
,
tgt_lbl
,
static_cast
<
int
>
(
-
1
));
std
::
vector
<
int
>
fg_inds
;
std
::
vector
<
int
>
bg_inds
;
std
::
random_device
rnd
;
std
::
minstd_rand
engine
;
int
seed
=
context
.
Attr
<
bool
>
(
"fix_seed"
)
?
context
.
Attr
<
int
>
(
"seed"
)
:
rnd
();
engine
.
seed
(
seed
);
if
(
n
==
1
)
{
RpnTargetAssign
(
context
,
*
dist
,
pos_threshold
,
neg_threshold
,
rpn_batch_size
,
fg_num
,
engine
,
&
fg_inds
,
&
bg_inds
,
target_label_data
);
}
else
{
auto
lod
=
dist
->
lod
().
back
();
for
(
size_t
i
=
0
;
i
<
lod
.
size
()
-
1
;
++
i
)
{
Tensor
one_ins
=
dist
->
Slice
(
lod
[
i
],
lod
[
i
+
1
]);
RpnTargetAssign
(
context
,
one_ins
,
pos_threshold
,
neg_threshold
,
rpn_batch_size
,
fg_num
,
engine
,
&
fg_inds
,
&
bg_inds
,
target_label_data
+
i
*
col
);
}
}
int
*
loc_index_data
=
loc_index
->
mutable_data
<
int
>
(
{
static_cast
<
int
>
(
fg_inds
.
size
())},
context
.
GetPlace
());
int
*
score_index_data
=
score_index
->
mutable_data
<
int
>
(
{
static_cast
<
int
>
(
fg_inds
.
size
()
+
bg_inds
.
size
())},
context
.
GetPlace
());
memcpy
(
loc_index_data
,
reinterpret_cast
<
int
*>
(
&
fg_inds
[
0
]),
fg_inds
.
size
()
*
sizeof
(
int
));
memcpy
(
score_index_data
,
reinterpret_cast
<
int
*>
(
&
fg_inds
[
0
]),
fg_inds
.
size
()
*
sizeof
(
int
));
memcpy
(
score_index_data
+
fg_inds
.
size
(),
reinterpret_cast
<
int
*>
(
&
bg_inds
[
0
]),
bg_inds
.
size
()
*
sizeof
(
int
));
}
}
};
};
class
RpnTargetAssignOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
class
RpnTargetAssignOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
public:
void
Make
()
override
{
void
Make
()
override
{
AddInput
(
"Anchor"
,
"(Tensor) input anchor is a 2-D Tensor with shape [H*W*A, 4]."
);
AddInput
(
"GtBox"
,
"(LoDTensor) input groud-truth bbox with shape [K, 4]."
);
AddInput
(
AddInput
(
"DistMat"
,
"DistMat"
,
"(LoDTensor or Tensor) this input is a 2-D LoDTensor with shape "
"(LoDTensor or Tensor) this input is a 2-D LoDTensor with shape "
...
@@ -241,12 +305,15 @@ class RpnTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -241,12 +305,15 @@ class RpnTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
"ScoreIndex"
,
"ScoreIndex"
,
"(Tensor), The indexes of foreground and background anchors in all "
"(Tensor), The indexes of foreground and background anchors in all "
"RPN anchors(The rest anchors are ignored). The shape of the "
"RPN anchors(The rest anchors are ignored). The shape of the "
"ScoreIndex is [F + B], F and B depend on the value of input "
"ScoreIndex is [F + B], F and B are sampled foreground and backgroud "
"tensor and attributes."
);
" number."
);
AddOutput
(
"TargetLabel"
,
AddOutput
(
"TargetBBox"
,
"(Tensor<int64_t>), The target bbox deltas with shape "
"[F, 4], F is the sampled foreground number."
);
AddOutput
(
"TargetLabel"
,
"(Tensor<int64_t>), The target labels of each anchor with shape "
"(Tensor<int64_t>), The target labels of each anchor with shape "
"[K * M, 1], "
"[F + B, 1], F and B are sampled foreground and backgroud number."
);
"K and M is the same as they are in DistMat."
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
This operator can be, for given the IoU between the ground truth bboxes and the
This operator can be, for given the IoU between the ground truth bboxes and the
anchors, to assign classification and regression targets to each prediction.
anchors, to assign classification and regression targets to each prediction.
...
...
paddle/fluid/operators/gru_unit_op.h
浏览文件 @
db5e3dd7
...
@@ -92,12 +92,12 @@ class GRUUnitKernel : public framework::OpKernel<T> {
...
@@ -92,12 +92,12 @@ class GRUUnitKernel : public framework::OpKernel<T> {
gate_data
,
frame_size
*
3
);
gate_data
,
frame_size
*
3
);
// calculate activited gate
// calculate activited gate
Eigen
::
array
<
int
,
2
>
extents
=
{
batch_size
,
frame_size
};
Eigen
::
array
<
int
,
2
>
extents
{{
batch_size
,
frame_size
}
};
Eigen
::
array
<
int
,
2
>
u_offsets
=
{
0
,
0
};
Eigen
::
array
<
int
,
2
>
u_offsets
{{
0
,
0
}
};
ActCompute
(
context
.
Attr
<
int
>
(
"gate_activation"
),
place
,
ActCompute
(
context
.
Attr
<
int
>
(
"gate_activation"
),
place
,
g
.
slice
(
u_offsets
,
extents
),
g
.
slice
(
u_offsets
,
extents
));
g
.
slice
(
u_offsets
,
extents
),
g
.
slice
(
u_offsets
,
extents
));
auto
u
=
g
.
slice
(
u_offsets
,
extents
);
// update gate
auto
u
=
g
.
slice
(
u_offsets
,
extents
);
// update gate
Eigen
::
array
<
int
,
2
>
r_offsets
=
{
0
,
frame_size
};
Eigen
::
array
<
int
,
2
>
r_offsets
{{
0
,
frame_size
}
};
ActCompute
(
context
.
Attr
<
int
>
(
"gate_activation"
),
place
,
ActCompute
(
context
.
Attr
<
int
>
(
"gate_activation"
),
place
,
g
.
slice
(
r_offsets
,
extents
),
g
.
slice
(
r_offsets
,
extents
));
g
.
slice
(
r_offsets
,
extents
),
g
.
slice
(
r_offsets
,
extents
));
auto
r
=
g
.
slice
(
r_offsets
,
extents
);
// reset gate
auto
r
=
g
.
slice
(
r_offsets
,
extents
);
// reset gate
...
@@ -107,7 +107,7 @@ class GRUUnitKernel : public framework::OpKernel<T> {
...
@@ -107,7 +107,7 @@ class GRUUnitKernel : public framework::OpKernel<T> {
weight_data
+
frame_size
*
frame_size
*
2
,
frame_size
,
1
,
weight_data
+
frame_size
*
frame_size
*
2
,
frame_size
,
1
,
gate_data
+
frame_size
*
2
,
frame_size
*
3
);
gate_data
+
frame_size
*
2
,
frame_size
*
3
);
Eigen
::
array
<
int
,
2
>
c_offsets
=
{
0
,
frame_size
*
2
};
Eigen
::
array
<
int
,
2
>
c_offsets
{{
0
,
frame_size
*
2
}
};
ActCompute
(
context
.
Attr
<
int
>
(
"activation"
),
place
,
ActCompute
(
context
.
Attr
<
int
>
(
"activation"
),
place
,
g
.
slice
(
c_offsets
,
extents
),
g
.
slice
(
c_offsets
,
extents
));
g
.
slice
(
c_offsets
,
extents
),
g
.
slice
(
c_offsets
,
extents
));
auto
c
=
g
.
slice
(
c_offsets
,
extents
);
// output candidate
auto
c
=
g
.
slice
(
c_offsets
,
extents
);
// output candidate
...
@@ -171,12 +171,12 @@ class GRUUnitGradKernel : public framework::OpKernel<T> {
...
@@ -171,12 +171,12 @@ class GRUUnitGradKernel : public framework::OpKernel<T> {
int
batch_size
=
input
->
dims
()[
0
];
int
batch_size
=
input
->
dims
()[
0
];
int
frame_size
=
hidden_prev
->
dims
()[
1
];
int
frame_size
=
hidden_prev
->
dims
()[
1
];
Eigen
::
array
<
int
,
2
>
extents
=
{
batch_size
,
frame_size
};
Eigen
::
array
<
int
,
2
>
extents
{{
batch_size
,
frame_size
}
};
Eigen
::
array
<
int
,
2
>
u_offsets
=
{
0
,
0
};
Eigen
::
array
<
int
,
2
>
u_offsets
{{
0
,
0
}
};
auto
u
=
g
.
slice
(
u_offsets
,
extents
);
// update gate
auto
u
=
g
.
slice
(
u_offsets
,
extents
);
// update gate
Eigen
::
array
<
int
,
2
>
r_offsets
=
{
0
,
frame_size
};
Eigen
::
array
<
int
,
2
>
r_offsets
{{
0
,
frame_size
}
};
auto
r
=
g
.
slice
(
r_offsets
,
extents
);
// reset gate
auto
r
=
g
.
slice
(
r_offsets
,
extents
);
// reset gate
Eigen
::
array
<
int
,
2
>
c_offsets
=
{
0
,
frame_size
*
2
};
Eigen
::
array
<
int
,
2
>
c_offsets
{{
0
,
frame_size
*
2
}
};
auto
c
=
g
.
slice
(
c_offsets
,
extents
);
// output candidate
auto
c
=
g
.
slice
(
c_offsets
,
extents
);
// output candidate
// backward for unactivated update gate
// backward for unactivated update gate
...
...
paddle/fluid/operators/roi_pool_op.cu
浏览文件 @
db5e3dd7
...
@@ -31,7 +31,7 @@ static inline int NumBlocks(const int N) {
...
@@ -31,7 +31,7 @@ static inline int NumBlocks(const int N) {
template
<
typename
T
>
template
<
typename
T
>
__global__
void
GPUROIPoolForward
(
__global__
void
GPUROIPoolForward
(
const
int
nthreads
,
const
T
*
input_data
,
const
int64_t
*
input_rois
,
const
int
nthreads
,
const
T
*
input_data
,
const
T
*
input_rois
,
const
float
spatial_scale
,
const
int
channels
,
const
int
height
,
const
float
spatial_scale
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
int
*
roi_batch_id_data
,
T
*
output_data
,
int64_t
*
argmax_data
)
{
int
*
roi_batch_id_data
,
T
*
output_data
,
int64_t
*
argmax_data
)
{
...
@@ -43,7 +43,7 @@ __global__ void GPUROIPoolForward(
...
@@ -43,7 +43,7 @@ __global__ void GPUROIPoolForward(
int
c
=
(
i
/
pooled_width
/
pooled_height
)
%
channels
;
int
c
=
(
i
/
pooled_width
/
pooled_height
)
%
channels
;
int
n
=
i
/
pooled_width
/
pooled_height
/
channels
;
int
n
=
i
/
pooled_width
/
pooled_height
/
channels
;
const
int64_t
*
offset_input_rois
=
input_rois
+
n
*
kROISize
;
const
T
*
offset_input_rois
=
input_rois
+
n
*
kROISize
;
int
roi_batch_ind
=
roi_batch_id_data
[
n
];
int
roi_batch_ind
=
roi_batch_id_data
[
n
];
int
roi_start_w
=
round
(
offset_input_rois
[
0
]
*
spatial_scale
);
int
roi_start_w
=
round
(
offset_input_rois
[
0
]
*
spatial_scale
);
int
roi_start_h
=
round
(
offset_input_rois
[
1
]
*
spatial_scale
);
int
roi_start_h
=
round
(
offset_input_rois
[
1
]
*
spatial_scale
);
...
@@ -93,7 +93,7 @@ __global__ void GPUROIPoolForward(
...
@@ -93,7 +93,7 @@ __global__ void GPUROIPoolForward(
template
<
typename
T
>
template
<
typename
T
>
__global__
void
GPUROIPoolBackward
(
__global__
void
GPUROIPoolBackward
(
const
int
nthreads
,
const
int64_t
*
input_rois
,
const
T
*
output_grad
,
const
int
nthreads
,
const
T
*
input_rois
,
const
T
*
output_grad
,
const
int64_t
*
argmax_data
,
const
int
num_rois
,
const
float
spatial_scale
,
const
int64_t
*
argmax_data
,
const
int
num_rois
,
const
float
spatial_scale
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
int
*
roi_batch_id_data
,
const
int
pooled_height
,
const
int
pooled_width
,
int
*
roi_batch_id_data
,
...
@@ -174,8 +174,8 @@ class GPUROIPoolOpKernel : public framework::OpKernel<T> {
...
@@ -174,8 +174,8 @@ class GPUROIPoolOpKernel : public framework::OpKernel<T> {
GPUROIPoolForward
<
GPUROIPoolForward
<
T
><<<
blocks
,
threads
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
T
><<<
blocks
,
threads
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
output_size
,
in
->
data
<
T
>
(),
rois
->
data
<
int64_t
>
(),
spatial_scale
,
output_size
,
in
->
data
<
T
>
(),
rois
->
data
<
T
>
(),
spatial_scale
,
channels
,
channels
,
height
,
width
,
pooled_height
,
pooled_width
,
height
,
width
,
pooled_height
,
pooled_width
,
roi_batch_id_list_gpu
.
data
<
int
>
(),
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
roi_batch_id_list_gpu
.
data
<
int
>
(),
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
argmax
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
()));
argmax
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
()));
}
}
...
@@ -228,7 +228,7 @@ class GPUROIPoolGradOpKernel : public framework::OpKernel<T> {
...
@@ -228,7 +228,7 @@ class GPUROIPoolGradOpKernel : public framework::OpKernel<T> {
if
(
output_grad_size
>
0
)
{
if
(
output_grad_size
>
0
)
{
GPUROIPoolBackward
<
GPUROIPoolBackward
<
T
><<<
blocks
,
threads
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
T
><<<
blocks
,
threads
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
output_grad_size
,
rois
->
data
<
int64_t
>
(),
out_grad
->
data
<
T
>
(),
output_grad_size
,
rois
->
data
<
T
>
(),
out_grad
->
data
<
T
>
(),
argmax
->
data
<
int64_t
>
(),
rois_num
,
spatial_scale
,
channels
,
height
,
argmax
->
data
<
int64_t
>
(),
rois_num
,
spatial_scale
,
channels
,
height
,
width
,
pooled_height
,
pooled_width
,
width
,
pooled_height
,
pooled_width
,
roi_batch_id_list_gpu
.
data
<
int
>
(),
roi_batch_id_list_gpu
.
data
<
int
>
(),
...
...
paddle/fluid/operators/roi_pool_op.h
浏览文件 @
db5e3dd7
...
@@ -72,7 +72,7 @@ class CPUROIPoolOpKernel : public framework::OpKernel<T> {
...
@@ -72,7 +72,7 @@ class CPUROIPoolOpKernel : public framework::OpKernel<T> {
T
*
output_data
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
output_data
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
int64_t
*
argmax_data
=
argmax
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
());
int64_t
*
argmax_data
=
argmax
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
());
const
int64_t
*
rois_data
=
rois
->
data
<
int64_t
>
();
const
T
*
rois_data
=
rois
->
data
<
T
>
();
for
(
int
n
=
0
;
n
<
rois_num
;
++
n
)
{
for
(
int
n
=
0
;
n
<
rois_num
;
++
n
)
{
int
roi_batch_id
=
roi_batch_id_data
[
n
];
int
roi_batch_id
=
roi_batch_id_data
[
n
];
int
roi_start_w
=
round
(
rois_data
[
0
]
*
spatial_scale
);
int
roi_start_w
=
round
(
rois_data
[
0
]
*
spatial_scale
);
...
@@ -171,7 +171,7 @@ class CPUROIPoolGradOpKernel : public framework::OpKernel<T> {
...
@@ -171,7 +171,7 @@ class CPUROIPoolGradOpKernel : public framework::OpKernel<T> {
}
}
}
}
const
int64_t
*
rois_data
=
rois
->
data
<
int64_t
>
();
const
T
*
rois_data
=
rois
->
data
<
T
>
();
const
T
*
out_grad_data
=
out_grad
->
data
<
T
>
();
const
T
*
out_grad_data
=
out_grad
->
data
<
T
>
();
const
int64_t
*
argmax_data
=
argmax
->
data
<
int64_t
>
();
const
int64_t
*
argmax_data
=
argmax
->
data
<
int64_t
>
();
T
*
in_grad_data
=
in_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
in_grad_data
=
in_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
...
...
python/paddle/fluid/layers/detection.py
浏览文件 @
db5e3dd7
...
@@ -145,26 +145,23 @@ def rpn_target_assign(loc,
...
@@ -145,26 +145,23 @@ def rpn_target_assign(loc,
"""
"""
helper
=
LayerHelper
(
'rpn_target_assign'
,
**
locals
())
helper
=
LayerHelper
(
'rpn_target_assign'
,
**
locals
())
# 1. Compute the regression target bboxes
# Compute overlaps between the prior boxes and the gt boxes overlaps
target_bbox
=
box_coder
(
prior_box
=
anchor_box
,
prior_box_var
=
anchor_var
,
target_box
=
gt_box
,
code_type
=
'encode_center_size'
,
box_normalized
=
False
)
# 2. Compute overlaps between the prior boxes and the gt boxes overlaps
iou
=
iou_similarity
(
x
=
gt_box
,
y
=
anchor_box
)
iou
=
iou_similarity
(
x
=
gt_box
,
y
=
anchor_box
)
# 3. Assign target label to anchors
# Assign target label to anchors
loc_index
=
helper
.
create_tmp_variable
(
dtype
=
anchor_box
.
dtype
)
loc_index
=
helper
.
create_tmp_variable
(
dtype
=
'int32'
)
score_index
=
helper
.
create_tmp_variable
(
dtype
=
anchor_box
.
dtype
)
score_index
=
helper
.
create_tmp_variable
(
dtype
=
'int32'
)
target_label
=
helper
.
create_tmp_variable
(
dtype
=
anchor_box
.
dtype
)
target_label
=
helper
.
create_tmp_variable
(
dtype
=
'int64'
)
target_bbox
=
helper
.
create_tmp_variable
(
dtype
=
anchor_box
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"rpn_target_assign"
,
type
=
"rpn_target_assign"
,
inputs
=
{
'DistMat'
:
iou
},
inputs
=
{
'Anchor'
:
anchor_box
,
'GtBox'
:
gt_box
,
'DistMat'
:
iou
},
outputs
=
{
outputs
=
{
'LocationIndex'
:
loc_index
,
'LocationIndex'
:
loc_index
,
'ScoreIndex'
:
score_index
,
'ScoreIndex'
:
score_index
,
'TargetLabel'
:
target_label
'TargetLabel'
:
target_label
,
'TargetBBox'
:
target_bbox
,
},
},
attrs
=
{
attrs
=
{
'rpn_batch_size_per_im'
:
rpn_batch_size_per_im
,
'rpn_batch_size_per_im'
:
rpn_batch_size_per_im
,
...
@@ -173,16 +170,16 @@ def rpn_target_assign(loc,
...
@@ -173,16 +170,16 @@ def rpn_target_assign(loc,
'fg_fraction'
:
fg_fraction
'fg_fraction'
:
fg_fraction
})
})
# 4. Reshape and gather the target entry
loc_index
.
stop_gradient
=
True
scores
=
nn
.
reshape
(
x
=
scores
,
shape
=
(
-
1
,
2
))
score_index
.
stop_gradient
=
True
loc
=
nn
.
reshape
(
x
=
loc
,
shape
=
(
-
1
,
4
))
target_label
.
stop_gradient
=
True
target_label
=
nn
.
reshape
(
x
=
target_label
,
shape
=
(
-
1
,
1
))
target_bbox
.
stop_gradient
=
True
target_bbox
=
nn
.
reshape
(
x
=
target_bbox
,
shape
=
(
-
1
,
4
))
scores
=
nn
.
reshape
(
x
=
scores
,
shape
=
(
-
1
,
1
))
loc
=
nn
.
reshape
(
x
=
loc
,
shape
=
(
-
1
,
4
))
predicted_scores
=
nn
.
gather
(
scores
,
score_index
)
predicted_scores
=
nn
.
gather
(
scores
,
score_index
)
predicted_location
=
nn
.
gather
(
loc
,
loc_index
)
predicted_location
=
nn
.
gather
(
loc
,
loc_index
)
target_label
=
nn
.
gather
(
target_label
,
score_index
)
target_bbox
=
nn
.
gather
(
target_bbox
,
loc_index
)
return
predicted_scores
,
predicted_location
,
target_label
,
target_bbox
return
predicted_scores
,
predicted_location
,
target_label
,
target_bbox
...
...
python/paddle/fluid/tests/test_detection.py
浏览文件 @
db5e3dd7
...
@@ -281,7 +281,7 @@ class TestRpnTargetAssign(unittest.TestCase):
...
@@ -281,7 +281,7 @@ class TestRpnTargetAssign(unittest.TestCase):
gt_box
=
layers
.
data
(
gt_box
=
layers
.
data
(
name
=
'gt_box'
,
shape
=
[
4
],
lod_level
=
1
,
dtype
=
'float32'
)
name
=
'gt_box'
,
shape
=
[
4
],
lod_level
=
1
,
dtype
=
'float32'
)
pred
icted_scores
,
predicted_location
,
target_label
,
targe
t_bbox
=
layers
.
rpn_target_assign
(
pred
_scores
,
pred_loc
,
tgt_lbl
,
tg
t_bbox
=
layers
.
rpn_target_assign
(
loc
=
loc
,
loc
=
loc
,
scores
=
scores
,
scores
=
scores
,
anchor_box
=
anchor_box
,
anchor_box
=
anchor_box
,
...
@@ -292,15 +292,13 @@ class TestRpnTargetAssign(unittest.TestCase):
...
@@ -292,15 +292,13 @@ class TestRpnTargetAssign(unittest.TestCase):
rpn_positive_overlap
=
0.7
,
rpn_positive_overlap
=
0.7
,
rpn_negative_overlap
=
0.3
)
rpn_negative_overlap
=
0.3
)
self
.
assertIsNotNone
(
predicted_scores
)
self
.
assertIsNotNone
(
pred_scores
)
self
.
assertIsNotNone
(
predicted_location
)
self
.
assertIsNotNone
(
pred_loc
)
self
.
assertIsNotNone
(
target_label
)
self
.
assertIsNotNone
(
tgt_lbl
)
self
.
assertIsNotNone
(
target_bbox
)
self
.
assertIsNotNone
(
tgt_bbox
)
assert
predicted_scores
.
shape
[
1
]
==
2
assert
pred_scores
.
shape
[
1
]
==
1
assert
predicted_location
.
shape
[
1
]
==
4
assert
pred_loc
.
shape
[
1
]
==
4
assert
predicted_location
.
shape
[
1
]
==
target_bbox
.
shape
[
1
]
assert
pred_loc
.
shape
[
1
]
==
tgt_bbox
.
shape
[
1
]
print
(
str
(
program
))
class
TestGenerateProposals
(
unittest
.
TestCase
):
class
TestGenerateProposals
(
unittest
.
TestCase
):
...
...
python/paddle/fluid/tests/unittests/test_fusion_gru_op.py
浏览文件 @
db5e3dd7
...
@@ -37,7 +37,7 @@ def fusion_gru(
...
@@ -37,7 +37,7 @@ def fusion_gru(
h0
,
h0
,
wh
,
wh
,
np
.
zeros
(
np
.
zeros
(
(
1
,
wh
.
shape
[
1
]),
dtype
=
'float
64
'
),
(
1
,
wh
.
shape
[
1
]),
dtype
=
'float
32
'
),
is_reverse
,
is_reverse
,
act_state
,
act_state
,
act_gate
)
act_gate
)
...
@@ -62,15 +62,15 @@ class TestFusionGRUOp(OpTest):
...
@@ -62,15 +62,15 @@ class TestFusionGRUOp(OpTest):
T
=
sum
(
self
.
lod
[
0
])
T
=
sum
(
self
.
lod
[
0
])
N
=
len
(
self
.
lod
[
0
])
N
=
len
(
self
.
lod
[
0
])
x
=
np
.
random
.
rand
(
T
,
self
.
M
).
astype
(
'float
64
'
)
x
=
np
.
random
.
rand
(
T
,
self
.
M
).
astype
(
'float
32
'
)
wx
=
np
.
random
.
rand
(
self
.
M
,
3
*
self
.
D
).
astype
(
'float
64
'
)
wx
=
np
.
random
.
rand
(
self
.
M
,
3
*
self
.
D
).
astype
(
'float
32
'
)
wh
=
np
.
random
.
rand
(
self
.
D
,
3
*
self
.
D
).
astype
(
'float
64
'
)
wh
=
np
.
random
.
rand
(
self
.
D
,
3
*
self
.
D
).
astype
(
'float
32
'
)
bias
=
np
.
random
.
rand
(
bias
=
np
.
random
.
rand
(
1
,
3
*
self
.
D
).
astype
(
'float
64
'
)
if
self
.
with_bias
else
np
.
zeros
(
1
,
3
*
self
.
D
).
astype
(
'float
32
'
)
if
self
.
with_bias
else
np
.
zeros
(
(
1
,
3
*
self
.
D
),
dtype
=
'float
64
'
)
(
1
,
3
*
self
.
D
),
dtype
=
'float
32
'
)
h0
=
np
.
random
.
rand
(
h0
=
np
.
random
.
rand
(
N
,
self
.
D
).
astype
(
'float
64
'
)
if
self
.
with_h0
else
np
.
zeros
(
N
,
self
.
D
).
astype
(
'float
32
'
)
if
self
.
with_h0
else
np
.
zeros
(
(
N
,
self
.
D
),
dtype
=
'float
64
'
)
(
N
,
self
.
D
),
dtype
=
'float
32
'
)
_
,
_
,
_
,
hidden
=
fusion_gru
(
_
,
_
,
_
,
hidden
=
fusion_gru
(
x
,
self
.
lod
,
h0
,
wx
,
wh
,
bias
,
self
.
is_reverse
,
x
,
self
.
lod
,
h0
,
wx
,
wh
,
bias
,
self
.
is_reverse
,
...
@@ -93,7 +93,9 @@ class TestFusionGRUOp(OpTest):
...
@@ -93,7 +93,9 @@ class TestFusionGRUOp(OpTest):
}
}
def
test_check_output
(
self
):
def
test_check_output
(
self
):
self
.
check_output
(
atol
=
1e-8
)
for
use_seq
in
{
True
,
False
}:
self
.
attrs
[
'use_seq'
]
=
use_seq
self
.
check_output
()
class
TestFusionGRUOpNoInitial
(
TestFusionGRUOp
):
class
TestFusionGRUOpNoInitial
(
TestFusionGRUOp
):
...
...
python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py
浏览文件 @
db5e3dd7
...
@@ -114,6 +114,8 @@ class TestFusionLSTMOp(OpTest):
...
@@ -114,6 +114,8 @@ class TestFusionLSTMOp(OpTest):
}
}
def
test_check_output
(
self
):
def
test_check_output
(
self
):
for
use_seq
in
{
True
,
False
}:
self
.
attrs
[
'use_seq'
]
=
use_seq
self
.
check_output
()
self
.
check_output
()
...
...
python/paddle/fluid/tests/unittests/test_generate_proposal_labels.py
浏览文件 @
db5e3dd7
...
@@ -177,8 +177,8 @@ def _box_to_delta(ex_boxes, gt_boxes, weights):
...
@@ -177,8 +177,8 @@ def _box_to_delta(ex_boxes, gt_boxes, weights):
dx
=
(
gt_ctr_x
-
ex_ctr_x
)
/
ex_w
/
weights
[
0
]
dx
=
(
gt_ctr_x
-
ex_ctr_x
)
/
ex_w
/
weights
[
0
]
dy
=
(
gt_ctr_y
-
ex_ctr_y
)
/
ex_h
/
weights
[
1
]
dy
=
(
gt_ctr_y
-
ex_ctr_y
)
/
ex_h
/
weights
[
1
]
dw
=
(
np
.
log
(
gt_w
/
ex_w
))
/
ex_w
/
weights
[
2
]
dw
=
(
np
.
log
(
gt_w
/
ex_w
))
/
weights
[
2
]
dh
=
(
np
.
log
(
gt_h
/
ex_h
))
/
ex_h
/
weights
[
3
]
dh
=
(
np
.
log
(
gt_h
/
ex_h
))
/
weights
[
3
]
targets
=
np
.
vstack
([
dx
,
dy
,
dw
,
dh
]).
transpose
()
targets
=
np
.
vstack
([
dx
,
dy
,
dw
,
dh
]).
transpose
()
return
targets
return
targets
...
...
python/paddle/fluid/tests/unittests/test_roi_pool_op.py
浏览文件 @
db5e3dd7
...
@@ -61,7 +61,7 @@ class TestROIPoolOp(OpTest):
...
@@ -61,7 +61,7 @@ class TestROIPoolOp(OpTest):
for
i
in
range
(
self
.
rois_num
):
for
i
in
range
(
self
.
rois_num
):
roi
=
self
.
rois
[
i
]
roi
=
self
.
rois
[
i
]
roi_batch_id
=
roi
[
0
]
roi_batch_id
=
int
(
roi
[
0
])
roi_start_w
=
int
(
cpt
.
round
(
roi
[
1
]
*
self
.
spatial_scale
))
roi_start_w
=
int
(
cpt
.
round
(
roi
[
1
]
*
self
.
spatial_scale
))
roi_start_h
=
int
(
cpt
.
round
(
roi
[
2
]
*
self
.
spatial_scale
))
roi_start_h
=
int
(
cpt
.
round
(
roi
[
2
]
*
self
.
spatial_scale
))
roi_end_w
=
int
(
cpt
.
round
(
roi
[
3
]
*
self
.
spatial_scale
))
roi_end_w
=
int
(
cpt
.
round
(
roi
[
3
]
*
self
.
spatial_scale
))
...
@@ -125,7 +125,7 @@ class TestROIPoolOp(OpTest):
...
@@ -125,7 +125,7 @@ class TestROIPoolOp(OpTest):
roi
=
[
bno
,
x1
,
y1
,
x2
,
y2
]
roi
=
[
bno
,
x1
,
y1
,
x2
,
y2
]
rois
.
append
(
roi
)
rois
.
append
(
roi
)
self
.
rois_num
=
len
(
rois
)
self
.
rois_num
=
len
(
rois
)
self
.
rois
=
np
.
array
(
rois
).
astype
(
"
int64
"
)
self
.
rois
=
np
.
array
(
rois
).
astype
(
"
float32
"
)
def
setUp
(
self
):
def
setUp
(
self
):
self
.
op_type
=
"roi_pool"
self
.
op_type
=
"roi_pool"
...
...
python/paddle/fluid/tests/unittests/test_rpn_target_assign_op.py
浏览文件 @
db5e3dd7
...
@@ -18,12 +18,17 @@ import unittest
...
@@ -18,12 +18,17 @@ import unittest
import
numpy
as
np
import
numpy
as
np
import
paddle.fluid.core
as
core
import
paddle.fluid.core
as
core
from
op_test
import
OpTest
from
op_test
import
OpTest
from
test_anchor_generator_op
import
anchor_generator_in_python
from
test_generate_proposal_labels
import
_generate_groundtruth
from
test_generate_proposal_labels
import
_bbox_overlaps
,
_box_to_delta
def
rpn_target_assign
(
iou
,
rpn_batch_size_per_im
,
rpn_positive_overlap
,
def
rpn_target_assign
(
gt_anchor_iou
,
rpn_batch_size_per_im
,
rpn_negative_overlap
,
fg_fraction
):
rpn_
positive_overlap
,
rpn_
negative_overlap
,
fg_fraction
):
iou
=
np
.
transpose
(
iou
)
iou
=
np
.
transpose
(
gt_anchor_
iou
)
anchor_to_gt_max
=
iou
.
max
(
axis
=
1
)
anchor_to_gt_max
=
iou
.
max
(
axis
=
1
)
anchor_to_gt_argmax
=
iou
.
argmax
(
axis
=
1
)
gt_to_anchor_argmax
=
iou
.
argmax
(
axis
=
0
)
gt_to_anchor_argmax
=
iou
.
argmax
(
axis
=
0
)
gt_to_anchor_max
=
iou
[
gt_to_anchor_argmax
,
np
.
arange
(
iou
.
shape
[
1
])]
gt_to_anchor_max
=
iou
[
gt_to_anchor_argmax
,
np
.
arange
(
iou
.
shape
[
1
])]
anchors_with_max_overlap
=
np
.
where
(
iou
==
gt_to_anchor_max
)[
0
]
anchors_with_max_overlap
=
np
.
where
(
iou
==
gt_to_anchor_max
)[
0
]
...
@@ -42,59 +47,113 @@ def rpn_target_assign(iou, rpn_batch_size_per_im, rpn_positive_overlap,
...
@@ -42,59 +47,113 @@ def rpn_target_assign(iou, rpn_batch_size_per_im, rpn_positive_overlap,
num_bg
=
rpn_batch_size_per_im
-
np
.
sum
(
tgt_lbl
==
1
)
num_bg
=
rpn_batch_size_per_im
-
np
.
sum
(
tgt_lbl
==
1
)
bg_inds
=
np
.
where
(
anchor_to_gt_max
<
rpn_negative_overlap
)[
0
]
bg_inds
=
np
.
where
(
anchor_to_gt_max
<
rpn_negative_overlap
)[
0
]
tgt_lbl
[
bg_inds
]
=
0
if
len
(
bg_inds
)
>
num_bg
:
if
len
(
bg_inds
)
>
num_bg
:
enable_inds
=
bg_inds
[
np
.
random
.
randint
(
len
(
bg_inds
),
size
=
num_bg
)]
enable_inds
=
bg_inds
[
np
.
random
.
randint
(
len
(
bg_inds
),
size
=
num_bg
)]
tgt_lbl
[
enable_inds
]
=
0
tgt_lbl
[
enable_inds
]
=
0
bg_inds
=
np
.
where
(
tgt_lbl
==
0
)[
0
]
bg_inds
=
np
.
where
(
tgt_lbl
==
0
)[
0
]
tgt_lbl
[
bg_inds
]
=
0
loc_index
=
fg_inds
loc_index
=
fg_inds
score_index
=
np
.
hstack
((
fg_inds
,
bg_inds
))
score_index
=
np
.
hstack
((
fg_inds
,
bg_inds
))
tgt_lbl
=
np
.
expand_dims
(
tgt_lbl
,
axis
=
1
)
tgt_lbl
=
np
.
expand_dims
(
tgt_lbl
,
axis
=
1
)
return
loc_index
,
score_index
,
tgt_lbl
gt_inds
=
anchor_to_gt_argmax
[
fg_inds
]
return
loc_index
,
score_index
,
tgt_lbl
,
gt_inds
def
get_anchor
(
n
,
c
,
h
,
w
):
input_feat
=
np
.
random
.
random
((
n
,
c
,
h
,
w
)).
astype
(
'float32'
)
anchors
,
_
=
anchor_generator_in_python
(
input_feat
=
input_feat
,
anchor_sizes
=
[
32.
,
64.
],
aspect_ratios
=
[
0.5
,
1.0
],
variances
=
[
1.0
,
1.0
,
1.0
,
1.0
],
stride
=
[
16.0
,
16.0
],
offset
=
0.5
)
return
anchors
def
rpn_blob
(
anchor
,
gt_boxes
,
iou
,
lod
,
rpn_batch_size_per_im
,
rpn_positive_overlap
,
rpn_negative_overlap
,
fg_fraction
):
loc_indexes
=
[]
score_indexes
=
[]
tmp_tgt_labels
=
[]
tgt_bboxes
=
[]
anchor_num
=
anchor
.
shape
[
0
]
batch_size
=
len
(
lod
)
-
1
for
i
in
range
(
batch_size
):
b
,
e
=
lod
[
i
],
lod
[
i
+
1
]
iou_slice
=
iou
[
b
:
e
,
:]
bboxes_slice
=
gt_boxes
[
b
:
e
,
:]
loc_idx
,
score_idx
,
tgt_lbl
,
gt_inds
=
rpn_target_assign
(
iou_slice
,
rpn_batch_size_per_im
,
rpn_positive_overlap
,
rpn_negative_overlap
,
fg_fraction
)
fg_bboxes
=
bboxes_slice
[
gt_inds
]
fg_anchors
=
anchor
[
loc_idx
]
box_deltas
=
_box_to_delta
(
fg_anchors
,
fg_bboxes
,
[
1.
,
1.
,
1.
,
1.
])
if
i
==
0
:
loc_indexes
=
loc_idx
score_indexes
=
score_idx
tmp_tgt_labels
=
tgt_lbl
tgt_bboxes
=
box_deltas
else
:
loc_indexes
=
np
.
concatenate
(
[
loc_indexes
,
loc_idx
+
i
*
anchor_num
])
score_indexes
=
np
.
concatenate
(
[
score_indexes
,
score_idx
+
i
*
anchor_num
])
tmp_tgt_labels
=
np
.
concatenate
([
tmp_tgt_labels
,
tgt_lbl
])
tgt_bboxes
=
np
.
vstack
([
tgt_bboxes
,
box_deltas
])
tgt_labels
=
tmp_tgt_labels
[
score_indexes
]
return
loc_indexes
,
score_indexes
,
tgt_bboxes
,
tgt_labels
class
TestRpnTargetAssignOp
(
OpTest
):
class
TestRpnTargetAssignOp
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
iou
=
np
.
random
.
random
((
10
,
8
)).
astype
(
"float32"
)
n
,
c
,
h
,
w
=
2
,
4
,
14
,
14
self
.
op_type
=
"rpn_target_assign"
anchor
=
get_anchor
(
n
,
c
,
h
,
w
)
self
.
inputs
=
{
'DistMat'
:
iou
}
gt_num
=
10
self
.
attrs
=
{
anchor
=
anchor
.
reshape
(
-
1
,
4
)
'rpn_batch_size_per_im'
:
256
,
anchor_num
=
anchor
.
shape
[
0
]
'rpn_positive_overlap'
:
0.95
,
'rpn_negative_overlap'
:
0.3
,
'fg_fraction'
:
0.25
,
'fix_seed'
:
True
}
loc_index
,
score_index
,
tgt_lbl
=
rpn_target_assign
(
iou
,
256
,
0.95
,
0.3
,
0.25
)
self
.
outputs
=
{
'LocationIndex'
:
loc_index
,
'ScoreIndex'
:
score_index
,
'TargetLabel'
:
tgt_lbl
,
}
def
test_check_output
(
self
):
im_shapes
=
[[
64
,
64
],
[
64
,
64
]]
self
.
check_output
()
gt_box
,
lod
=
_generate_groundtruth
(
im_shapes
,
3
,
4
)
bbox
=
np
.
vstack
([
v
[
'boxes'
]
for
v
in
gt_box
])
iou
=
_bbox_overlaps
(
bbox
,
anchor
)
anchor
=
anchor
.
astype
(
'float32'
)
bbox
=
bbox
.
astype
(
'float32'
)
iou
=
iou
.
astype
(
'float32'
)
loc_index
,
score_index
,
tgt_bbox
,
tgt_lbl
=
rpn_blob
(
anchor
,
bbox
,
iou
,
[
0
,
4
,
8
],
25600
,
0.95
,
0.03
,
0.25
)
class
TestRpnTargetAssignOp2
(
OpTest
):
def
setUp
(
self
):
iou
=
np
.
random
.
random
((
10
,
20
)).
astype
(
"float32"
)
self
.
op_type
=
"rpn_target_assign"
self
.
op_type
=
"rpn_target_assign"
self
.
inputs
=
{
'DistMat'
:
iou
}
self
.
inputs
=
{
'Anchor'
:
anchor
,
'GtBox'
:
(
bbox
,
[[
4
,
4
]]),
'DistMat'
:
(
iou
,
[[
4
,
4
]]),
}
self
.
attrs
=
{
self
.
attrs
=
{
'rpn_batch_size_per_im'
:
128
,
'rpn_batch_size_per_im'
:
25600
,
'rpn_positive_overlap'
:
0.5
,
'rpn_positive_overlap'
:
0.
9
5
,
'rpn_negative_overlap'
:
0.
5
,
'rpn_negative_overlap'
:
0.
03
,
'fg_fraction'
:
0.5
,
'fg_fraction'
:
0.
2
5
,
'fix_seed'
:
True
'fix_seed'
:
True
}
}
loc_index
,
score_index
,
tgt_lbl
=
rpn_target_assign
(
iou
,
128
,
0.5
,
0.5
,
0.5
)
self
.
outputs
=
{
self
.
outputs
=
{
'LocationIndex'
:
loc_index
,
'LocationIndex'
:
loc_index
.
astype
(
'int32'
),
'ScoreIndex'
:
score_index
,
'ScoreIndex'
:
score_index
.
astype
(
'int32'
),
'TargetLabel'
:
tgt_lbl
,
'TargetBBox'
:
tgt_bbox
.
astype
(
'float32'
),
'TargetLabel'
:
tgt_lbl
.
astype
(
'int64'
),
}
}
def
test_check_output
(
self
):
def
test_check_output
(
self
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录