Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
12b483c0
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
12b483c0
编写于
9月 05, 2018
作者:
T
tensor-tang
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'ups/develop' into refine/ut/lac
上级
d83187db
6e03f790
变更
65
隐藏空白更改
内联
并排
Showing
65 changed file
with
1474 addition
and
917 deletion
+1474
-917
Dockerfile
Dockerfile
+1
-1
cmake/inference_lib.cmake
cmake/inference_lib.cmake
+7
-9
doc/fluid/new_docs/beginners_guide/basics/machine_translation/README.cn.md
...s/beginners_guide/basics/machine_translation/README.cn.md
+3
-1
doc/fluid/new_docs/beginners_guide/basics/understand_sentiment/README.cn.md
.../beginners_guide/basics/understand_sentiment/README.cn.md
+2
-0
doc/fluid/new_docs/beginners_guide/basics/word2vec/README.cn.md
...uid/new_docs/beginners_guide/basics/word2vec/README.cn.md
+3
-1
doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/README.cn.md
...beginners_guide/quick_start/recognize_digits/README.cn.md
+1
-1
doc/fluid/new_docs/user_guides/howto/debug/visualdl.md
doc/fluid/new_docs/user_guides/howto/debug/visualdl.md
+1
-1
doc/fluid/new_docs/user_guides/howto/inference/native_infer.rst
...uid/new_docs/user_guides/howto/inference/native_infer.rst
+3
-5
paddle/fluid/API.spec
paddle/fluid/API.spec
+2
-2
paddle/fluid/framework/details/multi_devices_graph_pass.cc
paddle/fluid/framework/details/multi_devices_graph_pass.cc
+1
-27
paddle/fluid/framework/details/multi_devices_graph_pass.h
paddle/fluid/framework/details/multi_devices_graph_pass.h
+0
-7
paddle/fluid/framework/ir/CMakeLists.txt
paddle/fluid/framework/ir/CMakeLists.txt
+23
-8
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
+4
-8
paddle/fluid/framework/ir/fc_fuse_pass.cc
paddle/fluid/framework/ir/fc_fuse_pass.cc
+30
-88
paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
+27
-20
paddle/fluid/framework/ir/fc_lstm_fuse_pass.h
paddle/fluid/framework/ir/fc_lstm_fuse_pass.h
+2
-0
paddle/fluid/framework/ir/graph_pattern_detector.cc
paddle/fluid/framework/ir/graph_pattern_detector.cc
+14
-8
paddle/fluid/framework/ir/graph_pattern_detector.h
paddle/fluid/framework/ir/graph_pattern_detector.h
+9
-0
paddle/fluid/framework/ir/graph_pattern_detector_tester.cc
paddle/fluid/framework/ir/graph_pattern_detector_tester.cc
+3
-2
paddle/fluid/framework/ir/infer_clean_graph_pass.cc
paddle/fluid/framework/ir/infer_clean_graph_pass.cc
+11
-12
paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc
paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc
+7
-11
paddle/fluid/inference/CMakeLists.txt
paddle/fluid/inference/CMakeLists.txt
+3
-2
paddle/fluid/inference/analysis/CMakeLists.txt
paddle/fluid/inference/analysis/CMakeLists.txt
+19
-19
paddle/fluid/inference/analysis/analyzer.cc
paddle/fluid/inference/analysis/analyzer.cc
+18
-19
paddle/fluid/inference/analysis/analyzer.h
paddle/fluid/inference/analysis/analyzer.h
+20
-7
paddle/fluid/inference/analysis/analyzer_tester.cc
paddle/fluid/inference/analysis/analyzer_tester.cc
+22
-12
paddle/fluid/inference/analysis/flags.h
paddle/fluid/inference/analysis/flags.h
+22
-0
paddle/fluid/inference/analysis/fluid_to_ir_pass.h
paddle/fluid/inference/analysis/fluid_to_ir_pass.h
+6
-3
paddle/fluid/inference/analysis/fluid_to_ir_pass_tester.cc
paddle/fluid/inference/analysis/fluid_to_ir_pass_tester.cc
+1
-7
paddle/fluid/inference/analysis/test_text_classification.cc
paddle/fluid/inference/analysis/test_text_classification.cc
+109
-0
paddle/fluid/inference/api/CMakeLists.txt
paddle/fluid/inference/api/CMakeLists.txt
+14
-5
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+18
-15
paddle/fluid/inference/api/analysis_predictor.h
paddle/fluid/inference/api/analysis_predictor.h
+4
-2
paddle/fluid/inference/api/api_impl.cc
paddle/fluid/inference/api/api_impl.cc
+2
-1
paddle/fluid/inference/api/demo_ci/run.sh
paddle/fluid/inference/api/demo_ci/run.sh
+1
-1
paddle/fluid/inference/api/helper.h
paddle/fluid/inference/api/helper.h
+1
-0
paddle/fluid/inference/api/paddle_inference_api.h
paddle/fluid/inference/api/paddle_inference_api.h
+15
-0
paddle/fluid/inference/paddle_fluid.map
paddle/fluid/inference/paddle_fluid.map
+1
-0
paddle/fluid/operators/auc_op.cc
paddle/fluid/operators/auc_op.cc
+12
-17
paddle/fluid/operators/auc_op.h
paddle/fluid/operators/auc_op.h
+66
-87
paddle/fluid/operators/detection/bbox_util.h
paddle/fluid/operators/detection/bbox_util.h
+66
-0
paddle/fluid/operators/detection/generate_proposal_labels_op.cc
.../fluid/operators/detection/generate_proposal_labels_op.cc
+8
-31
paddle/fluid/operators/detection/generate_proposals_op.cc
paddle/fluid/operators/detection/generate_proposals_op.cc
+2
-3
paddle/fluid/operators/detection/rpn_target_assign_op.cc
paddle/fluid/operators/detection/rpn_target_assign_op.cc
+179
-112
paddle/fluid/operators/distributed/request_handler_impl.cc
paddle/fluid/operators/distributed/request_handler_impl.cc
+25
-24
paddle/fluid/operators/fusion_lstm_op.cc
paddle/fluid/operators/fusion_lstm_op.cc
+192
-73
paddle/fluid/operators/gru_unit_op.h
paddle/fluid/operators/gru_unit_op.h
+8
-8
paddle/fluid/operators/lookup_table_op.h
paddle/fluid/operators/lookup_table_op.h
+1
-1
paddle/fluid/operators/rmsprop_op.cc
paddle/fluid/operators/rmsprop_op.cc
+24
-1
paddle/fluid/operators/rmsprop_op.h
paddle/fluid/operators/rmsprop_op.h
+17
-4
paddle/fluid/operators/roi_pool_op.cu
paddle/fluid/operators/roi_pool_op.cu
+6
-6
paddle/fluid/operators/roi_pool_op.h
paddle/fluid/operators/roi_pool_op.h
+2
-2
python/paddle/fluid/layers/detection.py
python/paddle/fluid/layers/detection.py
+18
-21
python/paddle/fluid/layers/metric_op.py
python/paddle/fluid/layers/metric_op.py
+14
-19
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+0
-5
python/paddle/fluid/metrics.py
python/paddle/fluid/metrics.py
+32
-44
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+29
-3
python/paddle/fluid/tests/test_detection.py
python/paddle/fluid/tests/test_detection.py
+8
-10
python/paddle/fluid/tests/unittests/op_test.py
python/paddle/fluid/tests/unittests/op_test.py
+5
-5
python/paddle/fluid/tests/unittests/test_auc_op.py
python/paddle/fluid/tests/unittests/test_auc_op.py
+9
-13
python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py
python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py
+65
-0
python/paddle/fluid/tests/unittests/test_generate_proposal_labels.py
...le/fluid/tests/unittests/test_generate_proposal_labels.py
+2
-2
python/paddle/fluid/tests/unittests/test_rmsprop_op.py
python/paddle/fluid/tests/unittests/test_rmsprop_op.py
+157
-83
python/paddle/fluid/tests/unittests/test_roi_pool_op.py
python/paddle/fluid/tests/unittests/test_roi_pool_op.py
+2
-2
python/paddle/fluid/tests/unittests/test_rpn_target_assign_op.py
...paddle/fluid/tests/unittests/test_rpn_target_assign_op.py
+95
-36
未找到文件。
Dockerfile
浏览文件 @
12b483c0
...
@@ -53,7 +53,7 @@ RUN curl -s -q https://glide.sh/get | sh
...
@@ -53,7 +53,7 @@ RUN curl -s -q https://glide.sh/get | sh
# and its size is only one-third of the official one.
# and its size is only one-third of the official one.
# 2. Manually add ~IPluginFactory() in IPluginFactory class of NvInfer.h, otherwise, it couldn't work in paddle.
# 2. Manually add ~IPluginFactory() in IPluginFactory class of NvInfer.h, otherwise, it couldn't work in paddle.
# See https://github.com/PaddlePaddle/Paddle/issues/10129 for details.
# See https://github.com/PaddlePaddle/Paddle/issues/10129 for details.
RUN
wget
-qO-
http://paddlepaddledeps.
bj
.bcebos.com/TensorRT-4.0.0.3.Ubuntu-16.04.4.x86_64-gnu.cuda-8.0.cudnn7.0.tar.gz |
\
RUN
wget
-qO-
http://paddlepaddledeps.
cdn
.bcebos.com/TensorRT-4.0.0.3.Ubuntu-16.04.4.x86_64-gnu.cuda-8.0.cudnn7.0.tar.gz |
\
tar
-xz
-C
/usr/local
&&
\
tar
-xz
-C
/usr/local
&&
\
cp
-rf
/usr/local/TensorRT/include /usr
&&
\
cp
-rf
/usr/local/TensorRT/include /usr
&&
\
cp
-rf
/usr/local/TensorRT/lib /usr
cp
-rf
/usr/local/TensorRT/lib /usr
...
...
cmake/inference_lib.cmake
浏览文件 @
12b483c0
...
@@ -128,16 +128,13 @@ set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid")
...
@@ -128,16 +128,13 @@ set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid")
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/paddle/fluid"
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/paddle/fluid"
)
set
(
module
"framework"
)
set
(
module
"framework"
)
if
(
NOT WIN32
)
if
(
NOT WIN32
)
copy
(
framework_lib DEPS framework_py_proto
set
(
framework_lib_deps framework_py_proto
)
SRCS
${
src_dir
}
/
${
module
}
/*.h
${
src_dir
}
/
${
module
}
/details/*.h
${
PADDLE_BINARY_DIR
}
/paddle/fluid/framework/framework.pb.h
endif
(
NOT WIN32
)
DSTS
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
/details
${
dst_dir
}
/
${
module
}
copy
(
framework_lib DEPS
${
framework_lib_deps
}
)
else
()
copy
(
framework_lib
SRCS
${
src_dir
}
/
${
module
}
/*.h
${
src_dir
}
/
${
module
}
/details/*.h
${
PADDLE_BINARY_DIR
}
/paddle/fluid/framework/framework.pb.h
SRCS
${
src_dir
}
/
${
module
}
/*.h
${
src_dir
}
/
${
module
}
/details/*.h
${
PADDLE_BINARY_DIR
}
/paddle/fluid/framework/framework.pb.h
DSTS
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
/details
${
dst_dir
}
/
${
module
}
${
src_dir
}
/
${
module
}
/ir/*.h
DSTS
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
/details
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
/ir
)
)
endif
(
NOT WIN32
)
set
(
module
"memory"
)
set
(
module
"memory"
)
copy
(
memory_lib
copy
(
memory_lib
...
@@ -161,7 +158,8 @@ set(module "inference")
...
@@ -161,7 +158,8 @@ set(module "inference")
copy
(
inference_lib DEPS
${
inference_deps
}
copy
(
inference_lib DEPS
${
inference_deps
}
SRCS
${
src_dir
}
/
${
module
}
/*.h
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/libpaddle_fluid.*
SRCS
${
src_dir
}
/
${
module
}
/*.h
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/libpaddle_fluid.*
${
src_dir
}
/
${
module
}
/api/paddle_inference_api.h
${
src_dir
}
/
${
module
}
/api/demo_ci
${
src_dir
}
/
${
module
}
/api/paddle_inference_api.h
${
src_dir
}
/
${
module
}
/api/demo_ci
DSTS
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/api/paddle_inference_pass.h
DSTS
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
)
)
set
(
module
"platform"
)
set
(
module
"platform"
)
...
...
doc/fluid/new_docs/beginners_guide/basics/machine_translation/README.cn.md
浏览文件 @
12b483c0
...
@@ -60,6 +60,7 @@
...
@@ -60,6 +60,7 @@
图3. 编码器-解码器框架
图3. 编码器-解码器框架
</div>
</div>
<a
name=
"编码器"
></a>
#### 编码器
#### 编码器
编码阶段分为三步:
编码阶段分为三步:
...
@@ -81,7 +82,7 @@
...
@@ -81,7 +82,7 @@
机器翻译任务的训练过程中,解码阶段的目标是最大化下一个正确的目标语言词的概率。思路是:
机器翻译任务的训练过程中,解码阶段的目标是最大化下一个正确的目标语言词的概率。思路是:
1.
每一个时刻,根据源语言句子的编码信息(又叫上下文向量,context vector)
`$c$`
、真实目标语言序列的第
`$i$`
个词
`$u_i$`
和
`$i$`
时刻RNN的隐层状态
`$z_i$`
,计算出下一个隐层状态
`$z_{i+1}$`
。计算公式如下:
1.
每一个时刻,根据源语言句子的编码信息(又叫上下文向量,context vector)
`$c$`
、真实目标语言序列的第
`$i$`
个词
`$u_i$`
和
`$i$`
时刻RNN的隐层状态
`$z_i$`
,计算出下一个隐层状态
`$z_{i+1}$`
。计算公式如下:
$$z_{i+1}=
\p
hi_{
\t
heta '}
\l
eft ( c,u_i,z_i
\r
ight )$$
$$z_{i+1}=
\p
hi_{
\t
heta '}
\l
eft ( c,u_i,z_i
\r
ight )$$
其中
`$\phi _{\theta '}$`
是一个非线性激活函数;
`$c=q\mathbf{h}$`
是源语言句子的上下文向量,在不使用
[
注意力机制
](
#注意力机制
)
时,如果
[
编码器
](
#编码器
)
的输出是源语言句子编码后的最后一个元素,则可以定义
`$c=h_T$`
;
`$u_i$`
是目标语言序列的第
`$i$`
个单词,
`$u_0$`
是目标语言序列的开始标记
`<s>`
,表示解码开始;
`$z_i$`
是
`$i$`
时刻解码RNN的隐层状态,
`$z_0$`
是一个全零的向量。
其中
`$\phi _{\theta '}$`
是一个非线性激活函数;
`$c=q\mathbf{h}$`
是源语言句子的上下文向量,在不使用
注意力机制
时,如果
[
编码器
](
#编码器
)
的输出是源语言句子编码后的最后一个元素,则可以定义
`$c=h_T$`
;
`$u_i$`
是目标语言序列的第
`$i$`
个单词,
`$u_0$`
是目标语言序列的开始标记
`<s>`
,表示解码开始;
`$z_i$`
是
`$i$`
时刻解码RNN的隐层状态,
`$z_0$`
是一个全零的向量。
2.
将
`$z_{i+1}$`
通过
`softmax`
归一化,得到目标语言序列的第
`$i+1$`
个单词的概率分布
`$p_{i+1}$`
。概率分布公式如下:
2.
将
`$z_{i+1}$`
通过
`softmax`
归一化,得到目标语言序列的第
`$i+1$`
个单词的概率分布
`$p_{i+1}$`
。概率分布公式如下:
$$p
\l
eft ( u_{i+1}|u_{
<
i+1},
\m
athbf{x}
\r
ight )=softmax(W_sz_{i+1}+b_z)$$
$$p
\l
eft ( u_{i+1}|u_{
<
i+1},
\m
athbf{x}
\r
ight )=softmax(W_sz_{i+1}+b_z)$$
...
@@ -93,6 +94,7 @@ $$p\left ( u_{i+1}|u_{<i+1},\mathbf{x} \right )=softmax(W_sz_{i+1}+b_z)$$
...
@@ -93,6 +94,7 @@ $$p\left ( u_{i+1}|u_{<i+1},\mathbf{x} \right )=softmax(W_sz_{i+1}+b_z)$$
机器翻译任务的生成过程,通俗来讲就是根据预先训练的模型来翻译源语言句子。生成过程中的解码阶段和上述训练过程的有所差异,具体介绍请见
[
柱搜索算法
](
#柱搜索算法
)
。
机器翻译任务的生成过程,通俗来讲就是根据预先训练的模型来翻译源语言句子。生成过程中的解码阶段和上述训练过程的有所差异,具体介绍请见
[
柱搜索算法
](
#柱搜索算法
)
。
<a
name=
"柱搜索算法"
></a>
### 柱搜索算法
### 柱搜索算法
柱搜索(
[
beam search
](
http://en.wikipedia.org/wiki/Beam_search
)
)是一种启发式图搜索算法,用于在图或树中搜索有限集合中的最优扩展节点,通常用在解空间非常大的系统(如机器翻译、语音识别)中,原因是内存无法装下图或树中所有展开的解。如在机器翻译任务中希望翻译“
`<s>你好<e>`
”,就算目标语言字典中只有3个词(
`<s>`
,
`<e>`
,
`hello`
),也可能生成无限句话(
`hello`
循环出现的次数不定),为了找到其中较好的翻译结果,我们可采用柱搜索算法。
柱搜索(
[
beam search
](
http://en.wikipedia.org/wiki/Beam_search
)
)是一种启发式图搜索算法,用于在图或树中搜索有限集合中的最优扩展节点,通常用在解空间非常大的系统(如机器翻译、语音识别)中,原因是内存无法装下图或树中所有展开的解。如在机器翻译任务中希望翻译“
`<s>你好<e>`
”,就算目标语言字典中只有3个词(
`<s>`
,
`<e>`
,
`hello`
),也可能生成无限句话(
`hello`
循环出现的次数不定),为了找到其中较好的翻译结果,我们可采用柱搜索算法。
...
...
doc/fluid/new_docs/beginners_guide/basics/understand_sentiment/README.cn.md
浏览文件 @
12b483c0
...
@@ -149,6 +149,8 @@ def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim):
...
@@ -149,6 +149,8 @@ def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim):
网络的输入
`input_dim`
表示的是词典的大小,
`class_dim`
表示类别数。这里,我们使用
[
`sequence_conv_pool`
](
https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/trainer_config_helpers/networks.py
)
API实现了卷积和池化操作。
网络的输入
`input_dim`
表示的是词典的大小,
`class_dim`
表示类别数。这里,我们使用
[
`sequence_conv_pool`
](
https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/trainer_config_helpers/networks.py
)
API实现了卷积和池化操作。
<a
name=
"栈值双向LSTM"
></a>
### 栈式双向LSTM
### 栈式双向LSTM
栈式双向神经网络
`stacked_lstm_net`
的代码片段如下:
栈式双向神经网络
`stacked_lstm_net`
的代码片段如下:
...
...
doc/fluid/new_docs/beginners_guide/basics/word2vec/README.cn.md
浏览文件 @
12b483c0
...
@@ -50,7 +50,7 @@ similarity: -0.0997506977351
...
@@ -50,7 +50,7 @@ similarity: -0.0997506977351
```
```
以上结果可以通过运行
`calculate_dis.py`
, 加载字典里的单词和对应训练特征结果得到,我们将在
[
应用模型
](
#应用模型
)
中详细描述用法。
以上结果可以通过运行
`calculate_dis.py`
, 加载字典里的单词和对应训练特征结果得到,我们将在
[
模型应用
](
#模型应用
)
中详细描述用法。
## 模型概览
## 模型概览
...
@@ -189,6 +189,7 @@ dream that one day <e>
...
@@ -189,6 +189,7 @@ dream that one day <e>
最后,每个输入会按其单词次在字典里的位置,转化成整数的索引序列,作为PaddlePaddle的输入。
最后,每个输入会按其单词次在字典里的位置,转化成整数的索引序列,作为PaddlePaddle的输入。
<a
name=
"训练模型"
></a>
## 编程实现
## 编程实现
本配置的模型结构如下图所示:
本配置的模型结构如下图所示:
...
@@ -349,6 +350,7 @@ Step 20: Average Cost 5.766995
...
@@ -349,6 +350,7 @@ Step 20: Average Cost 5.766995
...
...
```
```
<a
name=
"模型应用"
></a>
## 模型应用
## 模型应用
在模型训练后,我们可以用它做一些预测。
在模型训练后,我们可以用它做一些预测。
...
...
doc/fluid/new_docs/beginners_guide/quick_start/recognize_digits/README.cn.md
浏览文件 @
12b483c0
...
@@ -102,7 +102,7 @@ Softmax回归模型采用了最简单的两层神经网络,即只有输入层
...
@@ -102,7 +102,7 @@ Softmax回归模型采用了最简单的两层神经网络,即只有输入层
池化是非线性下采样的一种形式,主要作用是通过减少网络的参数来减小计算量,并且能够在一定程度上控制过拟合。通常在卷积层的后面会加上一个池化层。池化包括最大池化、平均池化等。其中最大池化是用不重叠的矩形框将输入层分成不同的区域,对于每个矩形框的数取最大值作为输出层,如图6所示。
池化是非线性下采样的一种形式,主要作用是通过减少网络的参数来减小计算量,并且能够在一定程度上控制过拟合。通常在卷积层的后面会加上一个池化层。池化包括最大池化、平均池化等。其中最大池化是用不重叠的矩形框将输入层分成不同的区域,对于每个矩形框的数取最大值作为输出层,如图6所示。
更详细的关于卷积神经网络的具体知识可以参考
[
斯坦福大学公开课
](
http://cs231n.github.io/convolutional-networks/
)
和
[
图像分类
](
https://github.com/PaddlePaddle/book/blob/develop/image_classification/README.md
)
教程。
更详细的关于卷积神经网络的具体知识可以参考
[
斯坦福大学公开课
](
http://cs231n.github.io/convolutional-networks/
)
和
[
图像分类
](
https://github.com/PaddlePaddle/book/tree/develop/03.image_classification
)
教程。
### 常见激活函数介绍
### 常见激活函数介绍
-
sigmoid激活函数: $ f(x) = sigmoid(x) =
\f
rac{1}{1+e^{-x}} $
-
sigmoid激活函数: $ f(x) = sigmoid(x) =
\f
rac{1}{1+e^{-x}} $
...
...
doc/fluid/new_docs/user_guides/howto/debug/visualdl.md
浏览文件 @
12b483c0
...
@@ -149,7 +149,7 @@ python setup.py bdist_wheel
...
@@ -149,7 +149,7 @@ python setup.py bdist_wheel
pip install --upgrade dist/visualdl-
*
.whl
pip install --upgrade dist/visualdl-
*
.whl
```
```
如果打包和安装遇到其他问题,不安装只想运行Visual DL可以看[这里](https://github.com/PaddlePaddle/VisualDL/blob/develop/docs/
how_to_dev_frontend_e
n.md)
如果打包和安装遇到其他问题,不安装只想运行Visual DL可以看[这里](https://github.com/PaddlePaddle/VisualDL/blob/develop/docs/
develop/how_to_dev_frontend_c
n.md)
## SDK
## SDK
...
...
doc/fluid/new_docs/user_guides/howto/inference/native_infer.rst
浏览文件 @
12b483c0
...
@@ -4,13 +4,12 @@ Paddle 预测 API
...
@@ -4,13 +4,12 @@ Paddle 预测 API
为了更简单方便的预测部署,Fluid 提供了一套高层 API
为了更简单方便的预测部署,Fluid 提供了一套高层 API
用来隐藏底层不同的优化实现。
用来隐藏底层不同的优化实现。
`预测库相关代码 <https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/
contrib/inference>`_
_
`预测库相关代码 <https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/
fluid/inference/api>`
_
包括
包括
- 头文件 ``paddle_inference_api.h`` 定义了所有的接口
- 头文件 ``paddle_inference_api.h`` 定义了所有的接口
- 库文件\ ``libpaddle_fluid.so`` 或 ``libpaddle_fluid.a``
- 库文件\ ``libpaddle_fluid.so`` 或 ``libpaddle_fluid.a``
- 库文件 ``libpaddle_inference_api.so`` 或
``libpaddle_inference_api.a``
编译和依赖可以参考 :ref:`install_or_build_cpp_inference_lib` 。
编译和依赖可以参考 :ref:`install_or_build_cpp_inference_lib` 。
...
@@ -97,8 +96,7 @@ engine
...
@@ -97,8 +96,7 @@ engine
CHECK(predictor->Run(slots, &outputs));
CHECK(predictor->Run(slots, &outputs));
// 获取 outputs ...
// 获取 outputs ...
编译时,联编 ``libpaddle_fluid.a/.so`` 和
编译时,联编 ``libpaddle_fluid.a/.so`` 便可。
``libpaddle_inference_api.a/.so`` 便可。
详细代码参考
详细代码参考
------------
------------
...
...
paddle/fluid/API.spec
浏览文件 @
12b483c0
...
@@ -312,7 +312,7 @@ paddle.fluid.layers.iou_similarity ArgSpec(args=[], varargs='args', keywords='kw
...
@@ -312,7 +312,7 @@ paddle.fluid.layers.iou_similarity ArgSpec(args=[], varargs='args', keywords='kw
paddle.fluid.layers.box_coder ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.box_coder ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.polygon_box_transform ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.polygon_box_transform ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.accuracy ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None))
paddle.fluid.layers.accuracy ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None))
paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk'], varargs=None, keywords=None, defaults=('ROC',
200
, 1))
paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk'], varargs=None, keywords=None, defaults=('ROC',
4095
, 1))
paddle.fluid.layers.exponential_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.layers.exponential_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.layers.natural_exp_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.layers.natural_exp_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.layers.inverse_time_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.layers.inverse_time_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))
...
@@ -376,7 +376,7 @@ paddle.fluid.optimizer.DecayedAdagradOptimizer.__init__ ArgSpec(args=['self', 'l
...
@@ -376,7 +376,7 @@ paddle.fluid.optimizer.DecayedAdagradOptimizer.__init__ ArgSpec(args=['self', 'l
paddle.fluid.optimizer.DecayedAdagradOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.DecayedAdagradOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.FtrlOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'l1', 'l2', 'lr_power'], varargs=None, keywords='kwargs', defaults=(0.0, 0.0, -0.5))
paddle.fluid.optimizer.FtrlOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'l1', 'l2', 'lr_power'], varargs=None, keywords='kwargs', defaults=(0.0, 0.0, -0.5))
paddle.fluid.optimizer.FtrlOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.FtrlOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.RMSPropOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'rho', 'epsilon', 'momentum'
], varargs=None, keywords='kwargs', defaults=(0.95, 1e-06, 0.0
))
paddle.fluid.optimizer.RMSPropOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'rho', 'epsilon', 'momentum'
, 'centered'], varargs=None, keywords='kwargs', defaults=(0.95, 1e-06, 0.0, False
))
paddle.fluid.optimizer.RMSPropOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.RMSPropOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.AdadeltaOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'epsilon', 'rho'], varargs=None, keywords='kwargs', defaults=(1e-06, 0.95))
paddle.fluid.optimizer.AdadeltaOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'epsilon', 'rho'], varargs=None, keywords='kwargs', defaults=(1e-06, 0.95))
paddle.fluid.optimizer.AdadeltaOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.AdadeltaOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
...
...
paddle/fluid/framework/details/multi_devices_graph_pass.cc
浏览文件 @
12b483c0
...
@@ -326,7 +326,7 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
...
@@ -326,7 +326,7 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
ir
::
Graph
&
result
=
*
graph
;
ir
::
Graph
&
result
=
*
graph
;
for
(
auto
&
node
:
nodes
)
{
for
(
auto
&
node
:
nodes
)
{
if
(
node
->
NodeType
()
==
ir
::
Node
::
Type
::
kVariable
&&
node
->
Var
())
{
if
(
node
->
IsVar
()
&&
node
->
Var
())
{
all_vars_
.
emplace
(
node
->
Name
(),
node
->
Var
());
all_vars_
.
emplace
(
node
->
Name
(),
node
->
Var
());
}
}
}
}
...
@@ -583,18 +583,6 @@ void MultiDevSSAGraphBuilder::InsertDataBalanceOp(
...
@@ -583,18 +583,6 @@ void MultiDevSSAGraphBuilder::InsertDataBalanceOp(
}
}
}
}
bool
MultiDevSSAGraphBuilder
::
IsParameterGradientOnce
(
const
std
::
string
&
og
,
std
::
unordered_set
<
std
::
string
>
*
og_has_been_broadcast
)
const
{
bool
is_pg_once
=
grad_names_
.
count
(
og
)
!=
0
&&
og_has_been_broadcast
->
count
(
og
)
==
0
;
if
(
is_pg_once
)
{
// Insert NCCL AllReduce Op
og_has_been_broadcast
->
insert
(
og
);
}
return
is_pg_once
;
}
int
MultiDevSSAGraphBuilder
::
GetOpDeviceID
(
const
ir
::
Graph
&
graph
,
int
MultiDevSSAGraphBuilder
::
GetOpDeviceID
(
const
ir
::
Graph
&
graph
,
ir
::
Node
*
node
)
const
{
ir
::
Node
*
node
)
const
{
if
(
strategy_
.
reduce_
!=
BuildStrategy
::
ReduceStrategy
::
kReduce
)
{
if
(
strategy_
.
reduce_
!=
BuildStrategy
::
ReduceStrategy
::
kReduce
)
{
...
@@ -688,20 +676,6 @@ VarHandle *MultiDevSSAGraphBuilder::CreateReduceOp(ir::Graph *result,
...
@@ -688,20 +676,6 @@ VarHandle *MultiDevSSAGraphBuilder::CreateReduceOp(ir::Graph *result,
return
var
;
return
var
;
}
}
// Find the first occurence of `prev_op_name` and make current `op` depend
// on it.
void
MultiDevSSAGraphBuilder
::
ConnectOp
(
ir
::
Graph
*
result
,
OpHandleBase
*
op
,
const
std
::
string
&
prev_op_name
)
const
{
for
(
auto
&
prev_op
:
result
->
Get
<
GraphOps
>
(
kGraphOps
))
{
if
(
prev_op
->
Name
()
==
prev_op_name
)
{
auto
*
dep_var
=
new
DummyVarHandle
(
result
->
CreateControlDepVar
());
prev_op
->
AddOutput
(
dep_var
);
result
->
Get
<
GraphDepVars
>
(
kGraphDepVars
).
emplace
(
dep_var
);
op
->
AddInput
(
dep_var
);
}
}
}
void
MultiDevSSAGraphBuilder
::
CreateDistTrainOp
(
ir
::
Graph
*
result
,
void
MultiDevSSAGraphBuilder
::
CreateDistTrainOp
(
ir
::
Graph
*
result
,
ir
::
Node
*
node
)
const
{
ir
::
Node
*
node
)
const
{
int
op_dev_id
=
-
1
;
int
op_dev_id
=
-
1
;
...
...
paddle/fluid/framework/details/multi_devices_graph_pass.h
浏览文件 @
12b483c0
...
@@ -69,9 +69,6 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
...
@@ -69,9 +69,6 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
std
::
vector
<
std
::
string
>
FindDistTrainRecvVars
(
std
::
vector
<
std
::
string
>
FindDistTrainRecvVars
(
const
std
::
vector
<
ir
::
Node
*>
&
nodes
)
const
;
const
std
::
vector
<
ir
::
Node
*>
&
nodes
)
const
;
void
ConnectOp
(
ir
::
Graph
*
result
,
OpHandleBase
*
op
,
const
std
::
string
&
prev_op_name
)
const
;
void
CreateComputationalOps
(
ir
::
Graph
*
result
,
ir
::
Node
*
node
,
void
CreateComputationalOps
(
ir
::
Graph
*
result
,
ir
::
Node
*
node
,
size_t
num_places
)
const
;
size_t
num_places
)
const
;
...
@@ -83,10 +80,6 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
...
@@ -83,10 +80,6 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
void
CreateComputationalOp
(
ir
::
Graph
*
result
,
ir
::
Node
*
node
,
void
CreateComputationalOp
(
ir
::
Graph
*
result
,
ir
::
Node
*
node
,
int
dev_id
)
const
;
int
dev_id
)
const
;
bool
IsParameterGradientOnce
(
const
std
::
string
&
og
,
std
::
unordered_set
<
std
::
string
>
*
og_has_been_broadcast
)
const
;
int
GetOpDeviceID
(
const
ir
::
Graph
&
graph
,
ir
::
Node
*
node
)
const
;
int
GetOpDeviceID
(
const
ir
::
Graph
&
graph
,
ir
::
Node
*
node
)
const
;
void
InsertAllReduceOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
og
)
const
;
void
InsertAllReduceOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
og
)
const
;
...
...
paddle/fluid/framework/ir/CMakeLists.txt
浏览文件 @
12b483c0
set
(
pass_file
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/api/paddle_inference_pass.h
)
file
(
WRITE
${
pass_file
}
"// Generated by the paddle/fluid/framework/ir/CMakeLists.txt. DO NOT EDIT!
\n\n
"
)
file
(
APPEND
${
pass_file
}
"
\#
include
\"
paddle/fluid/framework/ir/pass.h
\"\n
"
)
function
(
pass_library TARGET
)
set
(
options
""
)
set
(
oneValueArgs
""
)
set
(
multiValueArgs SRCS DEPS
)
cmake_parse_arguments
(
op_library
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
cc_library
(
${
TARGET
}
SRCS
${
TARGET
}
.cc DEPS graph_pattern_detector pass
)
file
(
APPEND
${
pass_file
}
"USE_PASS(
${
TARGET
}
);
\n
"
)
set
(
PASS_LIBRARY
${
TARGET
}
${
PASS_LIBRARY
}
PARENT_SCOPE
)
endfunction
()
cc_library
(
node SRCS node.cc DEPS proto_desc
)
cc_library
(
node SRCS node.cc DEPS proto_desc
)
cc_library
(
graph SRCS graph.cc DEPS node
)
cc_library
(
graph SRCS graph.cc DEPS node
)
cc_library
(
graph_helper SRCS graph_helper.cc DEPS graph
)
cc_library
(
graph_helper SRCS graph_helper.cc DEPS graph
)
cc_library
(
pass SRCS pass.cc DEPS graph node graph_helper
)
cc_library
(
pass SRCS pass.cc DEPS graph node graph_helper
)
cc_library
(
graph_viz_pass SRCS graph_viz_pass.cc DEPS graph pass graph_helper
)
cc_library
(
graph_to_program_pass SRCS graph_to_program_pass.cc DEPS graph pass graph_helper
)
cc_library
(
graph_traits SRCS graph_traits.cc DEPS graph
)
cc_library
(
graph_traits SRCS graph_traits.cc DEPS graph
)
cc_library
(
graph_pattern_detector SRCS graph_pattern_detector.cc DEPS graph graph_helper graph_traits
)
cc_library
(
graph_pattern_detector SRCS graph_pattern_detector.cc DEPS graph graph_helper graph_traits
)
cc_library
(
fc_fuse_pass SRCS fc_fuse_pass.cc DEPS graph graph_pattern_detector
)
cc_library
(
attention_lstm_fuse_pass SRCS attention_lstm_fuse_pass.cc DEPS graph graph_pattern_detector
)
pass_library
(
graph_to_program_pass
)
cc_library
(
infer_clean_graph_pass SRCS infer_clean_graph_pass.cc DEPS graph pass
)
pass_library
(
graph_viz_pass
)
cc_library
(
fc_lstm_fuse_pass SRCS fc_lstm_fuse_pass.cc DEPS graph graph_pattern_detector
)
pass_library
(
fc_fuse_pass
)
cc_library
(
seq_concat_fc_fuse_pass SRCS seq_concat_fc_fuse_pass.cc DEPS graph graph_pattern_detector
)
pass_library
(
attention_lstm_fuse_pass
)
pass_library
(
infer_clean_graph_pass
)
pass_library
(
fc_lstm_fuse_pass
)
pass_library
(
seq_concat_fc_fuse_pass
)
set
(
GLOB_PASS_LIB
${
PASS_LIBRARY
}
CACHE INTERNAL
"Global PASS library"
)
cc_test
(
pass_test SRCS pass_test.cc DEPS graph pass graph_helper
)
cc_test
(
pass_test SRCS pass_test.cc DEPS graph pass graph_helper
)
cc_test
(
graph_test SRCS graph_test.cc DEPS graph graph_helper op_registry
)
cc_test
(
graph_test SRCS graph_test.cc DEPS graph graph_helper op_registry
)
cc_test
(
graph_helper_test SRCS graph_helper_test.cc DEPS graph graph_helper op_registry
)
cc_test
(
graph_helper_test SRCS graph_helper_test.cc DEPS graph graph_helper op_registry
)
cc_test
(
graph_to_program_pass_test SRCS graph_to_program_pass_test.cc DEPS graph_to_program_pass
)
cc_test
(
graph_to_program_pass_test SRCS graph_to_program_pass_test.cc DEPS graph_to_program_pass
)
cc_test
(
test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS graph_pattern_detector
)
cc_test
(
test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS graph_pattern_detector
)
cc_test
(
test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass
graph_pattern_detector graph pass graph_traits
framework_proto
)
cc_test
(
test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass framework_proto
)
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
浏览文件 @
12b483c0
...
@@ -99,17 +99,13 @@ void FindWhileOp(Graph* graph) {
...
@@ -99,17 +99,13 @@ void FindWhileOp(Graph* graph) {
auto
*
cell_init
=
graph
->
RetriveNode
(
6
);
auto
*
cell_init
=
graph
->
RetriveNode
(
6
);
auto
*
hidden_init
=
graph
->
RetriveNode
(
8
);
auto
*
hidden_init
=
graph
->
RetriveNode
(
8
);
#define LINK_TO(node0, node1) \
node0->outputs.push_back(node1); \
node1->inputs.push_back(node0);
auto
*
lstm_op
=
graph
->
CreateOpNode
(
&
op_desc
);
auto
*
lstm_op
=
graph
->
CreateOpNode
(
&
op_desc
);
PrepareParameters
(
graph
,
param
);
PrepareParameters
(
graph
,
param
);
LINK_TO
(
X
,
lstm_op
);
IR_NODE_
LINK_TO
(
X
,
lstm_op
);
LINK_TO
(
cell_init
,
lstm_op
);
IR_NODE_
LINK_TO
(
cell_init
,
lstm_op
);
LINK_TO
(
hidden_init
,
lstm_op
);
IR_NODE_
LINK_TO
(
hidden_init
,
lstm_op
);
LINK_TO
(
lstm_op
,
LSTMOUT
);
IR_NODE_
LINK_TO
(
lstm_op
,
LSTMOUT
);
GraphSafeRemoveNodes
(
graph
,
marked_nodes
);
GraphSafeRemoveNodes
(
graph
,
marked_nodes
);
}
}
...
...
paddle/fluid/framework/ir/fc_fuse_pass.cc
浏览文件 @
12b483c0
...
@@ -21,74 +21,26 @@ namespace paddle {
...
@@ -21,74 +21,26 @@ namespace paddle {
namespace
framework
{
namespace
framework
{
namespace
ir
{
namespace
ir
{
bool
VarOutLinksToOp
(
Node
*
node
,
const
std
::
string
&
op_type
)
{
for
(
auto
*
out
:
node
->
outputs
)
{
if
(
out
->
IsOp
()
&&
out
->
Op
()
->
Type
()
==
op_type
)
{
return
true
;
}
}
return
false
;
}
void
BuildFCPattern
(
PDPattern
*
pattern
)
{
// Create Operators
auto
*
mul_op
=
pattern
->
NewNode
(
"mul"
)
->
assert_is_op
(
"mul"
);
auto
*
elementwise_add_op
=
pattern
->
NewNode
(
"elementwise_add"
)
->
assert_is_op
(
"elementwise_add"
);
// Create variables
// w
auto
*
mul_weight_var
=
pattern
->
NewNode
(
"mul_weight"
)
->
AsInput
()
->
assert_is_op_nth_input
(
"mul"
,
"Y"
,
0
);
// x
auto
*
mul_tmp_var
=
pattern
->
NewNode
(
"mul_tmp_var"
)
->
AsInput
()
->
assert_is_op_nth_input
(
"mul"
,
"X"
,
0
);
// intermediate variable, will be removed in the IR after fuse.
auto
*
mul_out_var
=
pattern
->
NewNode
(
"mul_out"
)
->
AsIntermediate
()
->
assert_is_only_output_of_op
(
"mul"
)
->
assert_is_op_input
(
"elementwise_add"
);
// bias
auto
*
elementwise_add_tmp_var
=
pattern
->
NewNode
(
"elementwise_add_tmpvar"
)
->
assert_is_op_input
(
"elementwise_add"
)
->
AsInput
();
// output
auto
*
elementwise_add_out_var
=
pattern
->
NewNode
(
"elementwise_add_out"
)
->
AsOutput
()
->
assert_is_op_output
(
"elementwise_add"
);
mul_op
->
LinksFrom
({
mul_weight_var
,
mul_tmp_var
}).
LinksTo
({
mul_out_var
});
elementwise_add_op
->
LinksFrom
({
mul_out_var
,
elementwise_add_tmp_var
})
.
LinksTo
({
elementwise_add_out_var
});
}
// Replace the node `from` in the links to `to`
bool
LinksReplace
(
std
::
vector
<
Node
*>*
links
,
Node
*
from
,
Node
*
to
)
{
for
(
auto
*&
n
:
*
links
)
{
if
(
n
==
from
)
{
n
=
to
;
return
true
;
}
}
return
false
;
}
std
::
unique_ptr
<
ir
::
Graph
>
FCFusePass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
FCFusePass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
PADDLE_ENFORCE
(
graph
.
get
());
PADDLE_ENFORCE
(
graph
.
get
());
FusePassBase
::
Init
(
"fc"
,
graph
.
get
());
FusePassBase
::
Init
(
"fc
_fuse
"
,
graph
.
get
());
std
::
unordered_set
<
Node
*>
nodes2delete
;
std
::
unordered_set
<
Node
*>
nodes2delete
;
GraphPatternDetector
gpd
;
GraphPatternDetector
gpd
;
BuildFCPattern
(
gpd
.
mutable_pattern
());
// BuildFCPattern(gpd.mutable_pattern());
auto
*
x
=
gpd
.
mutable_pattern
()
#define GET_NODE(id) \
->
NewNode
(
"fc_fuse/x"
)
PADDLE_ENFORCE(subgraph.count(gpd.pattern().RetrieveNode(#id)), \
->
AsInput
()
"pattern has no Node called %s", #id); \
->
assert_is_op_input
(
"mul"
,
"X"
);
auto* id = subgraph.at(gpd.pattern().RetrieveNode(#id)); \
patterns
::
FC
(
gpd
.
mutable_pattern
(),
"fc_fuse"
,
x
,
true
/*with bias*/
);
PADDLE_ENFORCE_NOT_NULL(id, "subgraph has no node %s", #id);
#define GET_NODE(id) \
PADDLE_ENFORCE(subgraph.count(gpd.pattern().RetrieveNode("fc_fuse/" #id)), \
"pattern has no Node called %s", #id); \
auto* id = subgraph.at(gpd.pattern().RetrieveNode("fc_fuse/" #id)); \
PADDLE_ENFORCE_NOT_NULL(id, "subgraph has no node %s", "fc_fuse/" #id);
int
found_fc_count
=
0
;
int
found_fc_count
=
0
;
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
...
@@ -98,43 +50,33 @@ std::unique_ptr<ir::Graph> FCFusePass::ApplyImpl(
...
@@ -98,43 +50,33 @@ std::unique_ptr<ir::Graph> FCFusePass::ApplyImpl(
// scenerio.
// scenerio.
// FC's fusion is simple, just op fuse, no need to process the
// FC's fusion is simple, just op fuse, no need to process the
// parameters.
// parameters.
GET_NODE
(
mul_tmp_var
);
// x
GET_NODE
(
x
);
// x
GET_NODE
(
mul_weight
);
// Y
GET_NODE
(
w
);
// Y
GET_NODE
(
elementwise_add_tmpvar
);
// bias
GET_NODE
(
fc_bias
);
// bias
GET_NODE
(
elementwise_add_out
);
// Out
GET_NODE
(
fc_out
);
// Out
GET_NODE
(
mul
);
// MUL op
GET_NODE
(
mul
);
// MUL op
GET_NODE
(
elementwise_add
);
// ELEMENT_ADD op
GET_NODE
(
elementwise_add
);
// ELEMENT_ADD op
GET_NODE
(
mul_out
);
// tmp
GET_NODE
(
mul_out
);
// tmp
#undef GET_NODE
#undef GET_NODE
// Create an FC Node.
// Create an FC Node.
OpDesc
desc
;
OpDesc
desc
;
std
::
string
fc_x_in
=
mul_tmp_var
->
Name
();
std
::
string
fc_x_in
=
x
->
Name
();
std
::
string
fc_Y_in
=
mul_weight
->
Name
();
std
::
string
fc_Y_in
=
w
->
Name
();
std
::
string
fc_bias_in
=
elementwise_add_tmpvar
->
Name
();
std
::
string
fc_bias_in
=
fc_bias
->
Name
();
std
::
string
fc_out
=
elementwise_add
_out
->
Name
();
std
::
string
fc_out
_out
=
fc
_out
->
Name
();
desc
.
SetInput
(
"Input"
,
std
::
vector
<
std
::
string
>
({
fc_x_in
}));
desc
.
SetInput
(
"Input"
,
std
::
vector
<
std
::
string
>
({
fc_x_in
}));
desc
.
SetInput
(
"W"
,
std
::
vector
<
std
::
string
>
({
fc_Y_in
}));
desc
.
SetInput
(
"W"
,
std
::
vector
<
std
::
string
>
({
fc_Y_in
}));
desc
.
SetInput
(
"Bias"
,
std
::
vector
<
std
::
string
>
({
fc_bias_in
}));
desc
.
SetInput
(
"Bias"
,
std
::
vector
<
std
::
string
>
({
fc_bias_in
}));
desc
.
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
fc_out
}));
desc
.
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
fc_out
_out
}));
desc
.
SetType
(
"fc"
);
desc
.
SetType
(
"fc"
);
auto
fc_node
=
g
->
CreateOpNode
(
&
desc
);
// OpDesc will be copied.
auto
fc_node
=
g
->
CreateOpNode
(
&
desc
);
// OpDesc will be copied.
fc_node
->
inputs
=
GraphSafeRemoveNodes
(
graph
.
get
(),
{
mul
,
elementwise_add
,
mul_out
});
std
::
vector
<
Node
*>
({
mul_tmp_var
,
mul_weight
,
elementwise_add_tmpvar
});
fc_node
->
outputs
.
push_back
(
elementwise_add_out
);
// Update link relatons
PADDLE_ENFORCE
(
LinksReplace
(
&
mul_tmp_var
->
outputs
,
mul
,
fc_node
));
PADDLE_ENFORCE
(
LinksReplace
(
&
mul_weight
->
outputs
,
mul
,
fc_node
));
PADDLE_ENFORCE
(
LinksReplace
(
&
elementwise_add_tmpvar
->
outputs
,
elementwise_add
,
fc_node
));
PADDLE_ENFORCE
(
LinksReplace
(
&
elementwise_add_out
->
inputs
,
elementwise_add
,
fc_node
));
// Drop old nodes
IR_NODE_LINK_TO
(
x
,
fc_node
);
graph
->
RemoveNode
(
mul
);
IR_NODE_LINK_TO
(
w
,
fc_node
);
graph
->
RemoveNode
(
elementwise_add
);
IR_NODE_LINK_TO
(
fc_bias
,
fc_node
);
graph
->
RemoveNode
(
mul_out
);
// tmp variable
IR_NODE_LINK_TO
(
fc_node
,
fc_out
);
found_fc_count
++
;
found_fc_count
++
;
};
};
...
...
paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
浏览文件 @
12b483c0
...
@@ -11,7 +11,6 @@
...
@@ -11,7 +11,6 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/framework/ir/fc_lstm_fuse_pass.h"
#include "paddle/fluid/framework/ir/fc_lstm_fuse_pass.h"
#include <string>
#include <string>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
...
@@ -87,15 +86,24 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
...
@@ -87,15 +86,24 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
}
}
op_desc
.
SetInput
(
"Bias"
,
{
new_bias_var
});
op_desc
.
SetInput
(
"Bias"
,
{
new_bias_var
});
}
}
#undef GET_NODE
#undef GET_NODE
// Create temp variables.
scope
->
Var
(
name_scope
+
"/BatchedInput.new"
)
->
GetMutable
<
framework
::
LoDTensor
>
();
scope
->
Var
(
name_scope
+
"/BatchCellPreAct.new"
)
->
GetMutable
<
framework
::
LoDTensor
>
();
scope
->
Var
(
name_scope
+
"/BatchedGate.new"
)
->
GetMutable
<
framework
::
LoDTensor
>
();
op_desc
.
SetInput
(
"H0"
,
{});
op_desc
.
SetInput
(
"H0"
,
{});
op_desc
.
SetInput
(
"C0"
,
{});
op_desc
.
SetInput
(
"C0"
,
{});
op_desc
.
SetOutput
(
"Hidden"
,
{
hidden_n
->
Name
()});
op_desc
.
SetOutput
(
"Hidden"
,
{
hidden_n
->
Name
()});
op_desc
.
SetOutput
(
"Cell"
,
{
cell_n
->
Name
()});
op_desc
.
SetOutput
(
"Cell"
,
{
cell_n
->
Name
()});
op_desc
.
SetOutput
(
"XX"
,
{
xx_n
->
Name
()});
op_desc
.
SetOutput
(
"XX"
,
{
xx_n
->
Name
()});
op_desc
.
SetOutput
(
"BatchedInput"
,
{
"blstm_0.tmp_2"
});
op_desc
.
SetOutput
(
"BatchedGate"
,
{
name_scope
+
"/BatchedGate.new"
});
op_desc
.
SetOutput
(
"BatchCellPreAct"
,
{
name_scope
+
"/BatchCellPreAct.new"
});
op_desc
.
SetOutput
(
"BatchedInput"
,
{
name_scope
+
"/BatchedInput.new"
});
op_desc
.
SetAttr
(
"is_reverse"
,
lstm_n
->
Op
()
->
GetAttr
(
"is_reverse"
));
op_desc
.
SetAttr
(
"is_reverse"
,
lstm_n
->
Op
()
->
GetAttr
(
"is_reverse"
));
op_desc
.
SetAttr
(
"use_peepholes"
,
lstm_n
->
Op
()
->
GetAttr
(
"use_peepholes"
));
op_desc
.
SetAttr
(
"use_peepholes"
,
lstm_n
->
Op
()
->
GetAttr
(
"use_peepholes"
));
// TODO(TJ): get from attr
// TODO(TJ): get from attr
...
@@ -121,22 +129,18 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
...
@@ -121,22 +129,18 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
#undef TMP_NEW
#undef TMP_NEW
#undef TMP_NAME
#undef TMP_NAME
#define LINK_TO(a, b) \
IR_NODE_LINK_TO
(
input_n
,
op
);
a->outputs.push_back(b); \
IR_NODE_LINK_TO
(
weight_x_n
,
op
);
b->inputs.push_back(a);
IR_NODE_LINK_TO
(
weight_h_n
,
op
);
LINK_TO
(
input_n
,
op
);
IR_NODE_LINK_TO
(
bias_n
,
op
);
LINK_TO
(
weight_x_n
,
op
);
IR_NODE_LINK_TO
(
op
,
hidden_n
);
LINK_TO
(
weight_h_n
,
op
);
LINK_TO
(
bias_n
,
op
);
LINK_TO
(
op
,
hidden_n
);
#undef LINK_TO
return
op
;
return
op
;
};
};
int
fusion_count
{
0
};
int
fusion_count
{
0
};
auto
fc_no_bias_handler
=
[
&
](
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
Graph
*
g
)
{
#define GET_NODE(name__) \
#define GET_NODE(name__) \
std::string name__##key = name_scope + "/" + #name__; \
std::string name__##key = name_scope + "/" + #name__; \
auto* name__##n = pattern->RetrieveNode(name__##key); \
auto* name__##n = pattern->RetrieveNode(name__##key); \
...
@@ -157,21 +161,24 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
...
@@ -157,21 +161,24 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
if
(
with_fc_bias
)
{
if
(
with_fc_bias
)
{
GET_NODE
(
fc_bias
);
GET_NODE
(
fc_bias
);
GET_NODE
(
elementwise_add
);
lstm_creator
(
lstm
,
x
,
w
,
Weight
,
Bias
,
Hidden
,
Cell
,
fc_out
,
fc_bias
);
lstm_creator
(
lstm
,
x
,
w
,
Weight
,
Bias
,
Hidden
,
Cell
,
fc_out
,
fc_bias
);
// Remove unneeded nodes.
std
::
unordered_set
<
const
Node
*>
marked_nodes
(
{
mul_n
,
lstm_n
,
elementwise_add_n
});
GraphSafeRemoveNodes
(
graph
,
marked_nodes
);
}
else
{
}
else
{
lstm_creator
(
lstm
,
x
,
w
,
Weight
,
Bias
,
Hidden
,
Cell
,
fc_out
,
-
1
);
lstm_creator
(
lstm
,
x
,
w
,
Weight
,
Bias
,
Hidden
,
Cell
,
fc_out
,
-
1
);
// Remove unneeded nodes.
std
::
unordered_set
<
const
Node
*>
marked_nodes
({
mul_n
,
lstm_n
});
GraphSafeRemoveNodes
(
graph
,
marked_nodes
);
}
}
#undef GET_NODE
#undef GET_NODE
// Remove unneeded nodes.
std
::
unordered_set
<
const
Node
*>
marked_nodes
({
mul_n
,
lstm_n
});
GraphSafeRemoveNodes
(
graph
,
marked_nodes
);
++
fusion_count
;
++
fusion_count
;
};
};
gpd
(
graph
,
fc_no_bias_
handler
);
gpd
(
graph
,
handler
);
return
fusion_count
;
return
fusion_count
;
}
}
...
...
paddle/fluid/framework/ir/fc_lstm_fuse_pass.h
浏览文件 @
12b483c0
...
@@ -12,6 +12,8 @@
...
@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
...
...
paddle/fluid/framework/ir/graph_pattern_detector.cc
浏览文件 @
12b483c0
...
@@ -73,7 +73,6 @@ void PDPattern::AddEdge(PDNode* a, PDNode* b) {
...
@@ -73,7 +73,6 @@ void PDPattern::AddEdge(PDNode* a, PDNode* b) {
void
GraphPatternDetector
::
operator
()(
Graph
*
graph
,
void
GraphPatternDetector
::
operator
()(
Graph
*
graph
,
GraphPatternDetector
::
handle_t
handler
)
{
GraphPatternDetector
::
handle_t
handler
)
{
if
(
!
MarkPDNodesInGraph
(
*
graph
))
{
if
(
!
MarkPDNodesInGraph
(
*
graph
))
{
LOG
(
INFO
)
<<
"Mark failed"
;
return
;
return
;
}
}
...
@@ -111,6 +110,11 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph& graph) {
...
@@ -111,6 +110,11 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph& graph) {
return
false
;
return
false
;
}
}
}
}
for
(
auto
&
item
:
pdnodes2nodes_
)
{
for
(
auto
&
n
:
item
.
second
)
{
GetMarkedNodes
(
const_cast
<
Graph
*>
(
&
graph
)).
insert
(
n
);
}
}
VLOG
(
3
)
<<
pdnodes2nodes_
.
size
()
<<
" nodes marked"
;
VLOG
(
3
)
<<
pdnodes2nodes_
.
size
()
<<
" nodes marked"
;
return
!
pdnodes2nodes_
.
empty
();
return
!
pdnodes2nodes_
.
empty
();
...
@@ -278,7 +282,7 @@ void GraphPatternDetector::RemoveOverlappedMatch(
...
@@ -278,7 +282,7 @@ void GraphPatternDetector::RemoveOverlappedMatch(
for
(
const
auto
&
subgraph
:
*
subgraphs
)
{
for
(
const
auto
&
subgraph
:
*
subgraphs
)
{
bool
valid
=
true
;
bool
valid
=
true
;
for
(
auto
&
item
:
subgraph
)
{
for
(
auto
&
item
:
subgraph
)
{
if
(
node_set
.
count
(
item
.
second
))
{
if
(
item
.
first
->
IsIntermediate
()
&&
node_set
.
count
(
item
.
second
))
{
valid
=
false
;
valid
=
false
;
break
;
break
;
}
}
...
@@ -334,22 +338,22 @@ PDNode& PDNode::LinksFrom(const std::vector<PDNode*>& others) {
...
@@ -334,22 +338,22 @@ PDNode& PDNode::LinksFrom(const std::vector<PDNode*>& others) {
}
}
PDNode
*
PDNode
::
assert_is_op
()
{
PDNode
*
PDNode
::
assert_is_op
()
{
asserts_
.
emplace_back
([
this
](
Node
*
x
)
{
return
x
&&
x
->
IsOp
();
});
asserts_
.
emplace_back
([](
Node
*
x
)
{
return
x
&&
x
->
IsOp
();
});
return
this
;
return
this
;
}
}
PDNode
*
PDNode
::
assert_is_op
(
const
std
::
string
&
op_type
)
{
PDNode
*
PDNode
::
assert_is_op
(
const
std
::
string
&
op_type
)
{
asserts_
.
emplace_back
([
this
,
op_type
](
Node
*
x
)
{
asserts_
.
emplace_back
([
op_type
](
Node
*
x
)
{
return
x
&&
x
->
IsOp
()
&&
x
->
Op
()
->
Type
()
==
op_type
;
return
x
&&
x
->
IsOp
()
&&
x
->
Op
()
->
Type
()
==
op_type
;
});
});
return
this
;
return
this
;
}
}
PDNode
*
PDNode
::
assert_is_var
()
{
PDNode
*
PDNode
::
assert_is_var
()
{
asserts_
.
emplace_back
([
this
](
Node
*
x
)
{
return
x
&&
x
->
IsVar
();
});
asserts_
.
emplace_back
([](
Node
*
x
)
{
return
x
&&
x
->
IsVar
();
});
return
this
;
return
this
;
}
}
PDNode
*
PDNode
::
assert_var_not_persistable
()
{
PDNode
*
PDNode
::
assert_var_not_persistable
()
{
assert_is_var
();
assert_is_var
();
asserts_
.
emplace_back
([
this
](
Node
*
x
)
{
return
!
x
->
Var
()
->
Persistable
();
});
asserts_
.
emplace_back
([](
Node
*
x
)
{
return
!
x
->
Var
()
->
Persistable
();
});
return
this
;
return
this
;
}
}
PDNode
*
PDNode
::
assert_is_persistable_var
()
{
PDNode
*
PDNode
::
assert_is_persistable_var
()
{
...
@@ -491,14 +495,16 @@ void GraphSafeRemoveNodes(Graph* graph,
...
@@ -491,14 +495,16 @@ void GraphSafeRemoveNodes(Graph* graph,
for
(
auto
it
=
node
->
inputs
.
begin
();
it
!=
node
->
inputs
.
end
();)
{
for
(
auto
it
=
node
->
inputs
.
begin
();
it
!=
node
->
inputs
.
end
();)
{
if
(
nodes
.
count
(
*
it
))
{
if
(
nodes
.
count
(
*
it
))
{
it
=
const_cast
<
Node
*>
(
node
)
->
inputs
.
erase
(
it
);
it
=
const_cast
<
Node
*>
(
node
)
->
inputs
.
erase
(
it
);
}
else
}
else
{
it
++
;
it
++
;
}
}
}
for
(
auto
it
=
node
->
outputs
.
begin
();
it
!=
node
->
outputs
.
end
();)
{
for
(
auto
it
=
node
->
outputs
.
begin
();
it
!=
node
->
outputs
.
end
();)
{
if
(
nodes
.
count
(
*
it
))
{
if
(
nodes
.
count
(
*
it
))
{
it
=
const_cast
<
Node
*>
(
node
)
->
outputs
.
erase
(
it
);
it
=
const_cast
<
Node
*>
(
node
)
->
outputs
.
erase
(
it
);
}
else
}
else
{
it
++
;
it
++
;
}
}
}
}
}
}
}
...
...
paddle/fluid/framework/ir/graph_pattern_detector.h
浏览文件 @
12b483c0
...
@@ -19,6 +19,9 @@
...
@@ -19,6 +19,9 @@
#endif
#endif
#include <numeric>
#include <numeric>
#include <string>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/node.h"
#include "paddle/fluid/framework/ir/node.h"
#include "paddle/fluid/inference/analysis/dot.h"
#include "paddle/fluid/inference/analysis/dot.h"
...
@@ -245,6 +248,8 @@ class GraphPatternDetector {
...
@@ -245,6 +248,8 @@ class GraphPatternDetector {
void
UniquePatterns
(
std
::
vector
<
subgraph_t
>*
subgraphs
);
void
UniquePatterns
(
std
::
vector
<
subgraph_t
>*
subgraphs
);
// Remove overlapped match subgraphs, when overlapped, keep the previous one.
// Remove overlapped match subgraphs, when overlapped, keep the previous one.
// The intermediate PDNodes will be removed, so can't shared by multiple
// patterns.
void
RemoveOverlappedMatch
(
std
::
vector
<
subgraph_t
>*
subgraphs
);
void
RemoveOverlappedMatch
(
std
::
vector
<
subgraph_t
>*
subgraphs
);
// Validate whether the intermediate nodes are linked by external nodes.
// Validate whether the intermediate nodes are linked by external nodes.
...
@@ -295,6 +300,10 @@ PDNode* LSTM(PDPattern* pattern, const std::string& name_scope, PDNode* x);
...
@@ -295,6 +300,10 @@ PDNode* LSTM(PDPattern* pattern, const std::string& name_scope, PDNode* x);
}
// namespace patterns
}
// namespace patterns
#define IR_NODE_LINK_TO(a, b) \
a->outputs.push_back(b); \
b->inputs.push_back(a);
}
// namespace ir
}
// namespace ir
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/fluid/framework/ir/graph_pattern_detector_tester.cc
浏览文件 @
12b483c0
...
@@ -140,8 +140,9 @@ TEST(GraphPatternDetecter, MultiSubgraph) {
...
@@ -140,8 +140,9 @@ TEST(GraphPatternDetecter, MultiSubgraph) {
return
node
->
IsOp
()
&&
(
node
->
Name
()
==
"op2"
||
node
->
Name
()
==
"op3"
);
return
node
->
IsOp
()
&&
(
node
->
Name
()
==
"op2"
||
node
->
Name
()
==
"op3"
);
},
},
"OP0"
);
"OP0"
);
auto
*
any_var
=
x
.
mutable_pattern
()
->
NewNode
(
auto
*
any_var
=
x
.
mutable_pattern
()
[](
Node
*
node
)
{
return
node
->
IsVar
();
},
"VAR"
);
->
NewNode
([](
Node
*
node
)
{
return
node
->
IsVar
();
},
"VAR"
)
->
AsIntermediate
();
auto
*
any_op1
=
x
.
mutable_pattern
()
->
NewNode
(
auto
*
any_op1
=
x
.
mutable_pattern
()
->
NewNode
(
[](
Node
*
node
)
{
return
node
->
IsOp
();
},
"OP1"
);
[](
Node
*
node
)
{
return
node
->
IsOp
();
},
"OP1"
);
...
...
paddle/fluid/framework/ir/infer_clean_graph_pass.cc
浏览文件 @
12b483c0
...
@@ -13,42 +13,41 @@
...
@@ -13,42 +13,41 @@
// limitations under the License.
// limitations under the License.
#include <algorithm>
#include <algorithm>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/
pass
.h"
#include "paddle/fluid/framework/ir/
graph_pattern_detector
.h"
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
namespace
ir
{
namespace
ir
{
class
InferCleanGraphPass
:
public
Pass
{
class
InferCleanGraphPass
:
public
FusePassBase
{
public:
public:
virtual
~
InferCleanGraphPass
()
{}
virtual
~
InferCleanGraphPass
()
{}
protected:
protected:
std
::
unique_ptr
<
ir
::
Graph
>
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
std
::
unique_ptr
<
ir
::
Graph
>
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
FusePassBase
::
Init
(
"original_graph"
,
graph
.
get
());
PADDLE_ENFORCE
(
graph
.
get
());
PADDLE_ENFORCE
(
graph
.
get
());
auto
is_valid_node
=
[](
Node
*
x
)
{
auto
is_valid_node
=
[](
Node
*
x
)
{
return
x
&&
IsControlDepVar
(
*
x
)
&&
x
->
IsVar
()
&&
!
x
->
Var
();
return
x
&&
IsControlDepVar
(
*
x
)
&&
x
->
IsVar
()
&&
!
x
->
Var
();
};
};
std
::
unordered_set
<
Node
*>
invalid_nodes
;
std
::
unordered_set
<
const
Node
*>
invalid_nodes
;
int
valid_op
=
0
;
for
(
auto
*
node
:
graph
->
Nodes
())
{
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
is_valid_node
(
node
))
{
if
(
is_valid_node
(
node
))
{
invalid_nodes
.
insert
(
node
);
invalid_nodes
.
insert
(
node
);
}
else
if
(
node
->
IsOp
())
{
// Collect all the operators to help tracking number of operators.
++
valid_op
;
}
}
}
}
// remove nodes from the graph.
GraphSafeRemoveNodes
(
graph
.
get
(),
invalid_nodes
);
for
(
auto
*
node
:
invalid_nodes
)
{
graph
->
RemoveNode
(
node
);
}
// clean edges.
AddStatis
(
valid_op
);
for
(
auto
*
node
:
graph
->
Nodes
())
{
CleanEdges
(
&
node
->
inputs
,
invalid_nodes
);
CleanEdges
(
&
node
->
outputs
,
invalid_nodes
);
}
return
graph
;
return
graph
;
}
}
...
...
paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc
浏览文件 @
12b483c0
...
@@ -219,16 +219,13 @@ std::unique_ptr<ir::Graph> SeqConcatFcFusePass::ApplyImpl(
...
@@ -219,16 +219,13 @@ std::unique_ptr<ir::Graph> SeqConcatFcFusePass::ApplyImpl(
op_desc
.
SetAttr
(
"fc_activation"
,
act
->
Op
()
->
Type
());
op_desc
.
SetAttr
(
"fc_activation"
,
act
->
Op
()
->
Type
());
auto
*
op_node
=
graph
->
CreateOpNode
(
&
op_desc
);
auto
*
op_node
=
graph
->
CreateOpNode
(
&
op_desc
);
// Add links
// Add links
#define NODE_LINKS(a, b) \
IR_NODE_LINK_TO
(
fc_w
,
op_node
);
a->outputs.push_back(b); \
IR_NODE_LINK_TO
(
fc_bias
,
op_node
);
b->inputs.push_back(a);
IR_NODE_LINK_TO
(
concat_in0
,
op_node
);
NODE_LINKS
(
fc_w
,
op_node
);
IR_NODE_LINK_TO
(
sequence_expand0_in
,
op_node
);
NODE_LINKS
(
fc_bias
,
op_node
);
IR_NODE_LINK_TO
(
sequence_expand1_in
,
op_node
);
NODE_LINKS
(
concat_in0
,
op_node
);
IR_NODE_LINK_TO
(
op_node
,
fc_out
);
NODE_LINKS
(
sequence_expand0_in
,
op_node
);
NODE_LINKS
(
sequence_expand1_in
,
op_node
);
NODE_LINKS
(
op_node
,
fc_out
);
// Clean nodes.
// Clean nodes.
std
::
unordered_set
<
const
Node
*>
marked_nodes
;
std
::
unordered_set
<
const
Node
*>
marked_nodes
;
...
@@ -241,7 +238,6 @@ std::unique_ptr<ir::Graph> SeqConcatFcFusePass::ApplyImpl(
...
@@ -241,7 +238,6 @@ std::unique_ptr<ir::Graph> SeqConcatFcFusePass::ApplyImpl(
marked_nodes
.
erase
(
sequence_expand0_in
);
marked_nodes
.
erase
(
sequence_expand0_in
);
marked_nodes
.
erase
(
sequence_expand1_in
);
marked_nodes
.
erase
(
sequence_expand1_in
);
marked_nodes
.
erase
(
fc_out
);
marked_nodes
.
erase
(
fc_out
);
GraphSafeRemoveNodes
(
graph
,
marked_nodes
);
GraphSafeRemoveNodes
(
graph
,
marked_nodes
);
});
});
...
...
paddle/fluid/inference/CMakeLists.txt
浏览文件 @
12b483c0
...
@@ -10,7 +10,7 @@ set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor)
...
@@ -10,7 +10,7 @@ set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor)
# TODO(panyx0718): Should this be called paddle_fluid_inference_api_internal?
# TODO(panyx0718): Should this be called paddle_fluid_inference_api_internal?
cc_library
(
paddle_fluid_api
cc_library
(
paddle_fluid_api
SRCS io.cc
SRCS io.cc
DEPS
${
FLUID_CORE_MODULES
}
${
GLOB_OP_LIB
}
graph_to_program_pass
)
DEPS
${
FLUID_CORE_MODULES
}
${
GLOB_OP_LIB
}
)
get_property
(
fluid_modules GLOBAL PROPERTY FLUID_MODULES
)
get_property
(
fluid_modules GLOBAL PROPERTY FLUID_MODULES
)
...
@@ -22,7 +22,7 @@ cc_library(paddle_fluid_origin DEPS ${fluid_modules} paddle_fluid_api)
...
@@ -22,7 +22,7 @@ cc_library(paddle_fluid_origin DEPS ${fluid_modules} paddle_fluid_api)
#endif()
#endif()
# Create static library
# Create static library
cc_library
(
paddle_fluid DEPS
${
fluid_modules
}
paddle_fluid_api paddle_inference_api
)
cc_library
(
paddle_fluid DEPS
${
fluid_modules
}
paddle_fluid_api paddle_inference_api
analysis_predictor
)
if
(
NOT APPLE
)
if
(
NOT APPLE
)
# TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
# TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
set
(
LINK_FLAGS
"-Wl,--retain-symbols-file
${
CMAKE_CURRENT_SOURCE_DIR
}
/paddle_fluid.sym"
)
set
(
LINK_FLAGS
"-Wl,--retain-symbols-file
${
CMAKE_CURRENT_SOURCE_DIR
}
/paddle_fluid.sym"
)
...
@@ -32,6 +32,7 @@ endif()
...
@@ -32,6 +32,7 @@ endif()
# Create shared library
# Create shared library
cc_library
(
paddle_fluid_shared SHARED
cc_library
(
paddle_fluid_shared SHARED
SRCS io.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/api.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/api_impl.cc
SRCS io.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/api.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/api_impl.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/analysis_predictor.cc
DEPS
${
fluid_modules
}
paddle_fluid_api
)
DEPS
${
fluid_modules
}
paddle_fluid_api
)
set_target_properties
(
paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid
)
set_target_properties
(
paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid
)
...
...
paddle/fluid/inference/analysis/CMakeLists.txt
浏览文件 @
12b483c0
...
@@ -33,7 +33,7 @@ function (inference_analysis_test TARGET)
...
@@ -33,7 +33,7 @@ function (inference_analysis_test TARGET)
endif
()
endif
()
cc_test
(
${
TARGET
}
cc_test
(
${
TARGET
}
SRCS
"
${
analysis_test_SRCS
}
"
SRCS
"
${
analysis_test_SRCS
}
"
DEPS analysis
graph fc_fuse_pass graph_viz_pass infer_clean_graph_pass graph_pattern_detector pass
${
analysis_test_EXTRA_DEPS
}
DEPS analysis
pass
${
GLOB_PASS_LIB
}
${
analysis_test_EXTRA_DEPS
}
ARGS --inference_model_dir=
${
PYTHON_TESTS_DIR
}
/book/word2vec.inference.model
${
mem_opt
}
${
analysis_test_ARGS
}
)
ARGS --inference_model_dir=
${
PYTHON_TESTS_DIR
}
/book/word2vec.inference.model
${
mem_opt
}
${
analysis_test_ARGS
}
)
set_tests_properties
(
${
TARGET
}
PROPERTIES DEPENDS test_word2vec
)
set_tests_properties
(
${
TARGET
}
PROPERTIES DEPENDS test_word2vec
)
endif
(
WITH_TESTING
)
endif
(
WITH_TESTING
)
...
@@ -56,25 +56,13 @@ if (NOT EXISTS ${DITU_INSTALL_DIR} AND WITH_TESTING)
...
@@ -56,25 +56,13 @@ if (NOT EXISTS ${DITU_INSTALL_DIR} AND WITH_TESTING)
endif
()
endif
()
inference_analysis_test
(
test_analyzer SRCS analyzer_tester.cc
inference_analysis_test
(
test_analyzer SRCS analyzer_tester.cc
EXTRA_DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis
EXTRA_DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis_predictor
analysis_predictor
# ir
fc_fuse_pass
fc_lstm_fuse_pass
seq_concat_fc_fuse_pass
graph_viz_pass
infer_clean_graph_pass
graph_pattern_detector
infer_clean_graph_pass
attention_lstm_fuse_pass
paddle_inference_api
pass
ARGS --infer_ditu_rnn_model=
${
DITU_INSTALL_DIR
}
/model
ARGS --infer_ditu_rnn_model=
${
DITU_INSTALL_DIR
}
/model
--infer_ditu_rnn_data=
${
DITU_INSTALL_DIR
}
/data.txt
)
--infer_ditu_rnn_data=
${
DITU_INSTALL_DIR
}
/data.txt
)
inference_analysis_test
(
test_data_flow_graph SRCS data_flow_graph_tester.cc
)
inference_analysis_test
(
test_data_flow_graph SRCS data_flow_graph_tester.cc
)
inference_analysis_test
(
test_data_flow_graph_to_fluid_pass SRCS data_flow_graph_to_fluid_pass_tester.cc
EXTRA_DEPS paddle_inference_api
)
inference_analysis_test
(
test_data_flow_graph_to_fluid_pass SRCS data_flow_graph_to_fluid_pass_tester.cc
)
inference_analysis_test
(
test_fluid_to_ir_pass SRCS fluid_to_ir_pass_tester.cc
EXTRA_DEPS paddle_fluid
)
inference_analysis_test
(
test_fluid_to_ir_pass SRCS fluid_to_ir_pass_tester.cc
)
inference_analysis_test
(
test_fluid_to_data_flow_graph_pass SRCS fluid_to_data_flow_graph_pass_tester.cc
)
inference_analysis_test
(
test_fluid_to_data_flow_graph_pass SRCS fluid_to_data_flow_graph_pass_tester.cc
)
inference_analysis_test
(
test_subgraph_splitter SRCS subgraph_splitter_tester.cc
)
inference_analysis_test
(
test_subgraph_splitter SRCS subgraph_splitter_tester.cc
)
inference_analysis_test
(
test_dfg_graphviz_draw_pass SRCS dfg_graphviz_draw_pass_tester.cc
)
inference_analysis_test
(
test_dfg_graphviz_draw_pass SRCS dfg_graphviz_draw_pass_tester.cc
)
...
@@ -86,7 +74,7 @@ inference_analysis_test(test_model_store_pass SRCS model_store_pass_tester.cc)
...
@@ -86,7 +74,7 @@ inference_analysis_test(test_model_store_pass SRCS model_store_pass_tester.cc)
set
(
CHINESE_NER_MODEL_URL
"http://paddle-inference-dist.bj.bcebos.com/chinese_ner_model.tar.gz"
)
set
(
CHINESE_NER_MODEL_URL
"http://paddle-inference-dist.bj.bcebos.com/chinese_ner_model.tar.gz"
)
set
(
CHINESE_NER_DATA_URL
"http://paddle-inference-dist.bj.bcebos.com/chinese_ner-data.txt.tar.gz"
)
set
(
CHINESE_NER_DATA_URL
"http://paddle-inference-dist.bj.bcebos.com/chinese_ner-data.txt.tar.gz"
)
set
(
CHINESE_NER_INSTALL_DIR
"
${
THIRD_PARTY_PATH
}
/inference_demo/chinese_ner"
CACHE PATH
"Chinese ner model and data root."
FORCE
)
set
(
CHINESE_NER_INSTALL_DIR
"
${
THIRD_PARTY_PATH
}
/inference_demo/chinese_ner"
CACHE PATH
"Chinese ner model and data root."
FORCE
)
if
(
NOT EXISTS
${
CHINESE_NER_INSTALL_DIR
}
AND WITH_TESTING
)
if
(
NOT EXISTS
${
CHINESE_NER_INSTALL_DIR
}
AND WITH_TESTING
AND WITH_INFERENCE
)
inference_download_and_uncompress
(
${
CHINESE_NER_INSTALL_DIR
}
${
CHINESE_NER_MODEL_URL
}
"chinese_ner_model.tar.gz"
)
inference_download_and_uncompress
(
${
CHINESE_NER_INSTALL_DIR
}
${
CHINESE_NER_MODEL_URL
}
"chinese_ner_model.tar.gz"
)
inference_download_and_uncompress
(
${
CHINESE_NER_INSTALL_DIR
}
${
CHINESE_NER_DATA_URL
}
"chinese_ner-data.txt.tar.gz"
)
inference_download_and_uncompress
(
${
CHINESE_NER_INSTALL_DIR
}
${
CHINESE_NER_DATA_URL
}
"chinese_ner-data.txt.tar.gz"
)
endif
()
endif
()
...
@@ -99,7 +87,7 @@ inference_analysis_test(test_analyzer_ner SRCS analyzer_ner_tester.cc
...
@@ -99,7 +87,7 @@ inference_analysis_test(test_analyzer_ner SRCS analyzer_ner_tester.cc
set
(
LAC_MODEL_URL
"http://paddle-inference-dist.bj.bcebos.com/lac_model.tar.gz"
)
set
(
LAC_MODEL_URL
"http://paddle-inference-dist.bj.bcebos.com/lac_model.tar.gz"
)
set
(
LAC_DATA_URL
"http://paddle-inference-dist.bj.bcebos.com/lac_data.txt.tar.gz"
)
set
(
LAC_DATA_URL
"http://paddle-inference-dist.bj.bcebos.com/lac_data.txt.tar.gz"
)
set
(
LAC_INSTALL_DIR
"
${
THIRD_PARTY_PATH
}
/inference_demo/lac"
CACHE PATH
"LAC model and data root."
FORCE
)
set
(
LAC_INSTALL_DIR
"
${
THIRD_PARTY_PATH
}
/inference_demo/lac"
CACHE PATH
"LAC model and data root."
FORCE
)
if
(
NOT EXISTS
${
LAC_INSTALL_DIR
}
AND WITH_TESTING
)
if
(
NOT EXISTS
${
LAC_INSTALL_DIR
}
AND WITH_TESTING
AND WITH_INFERENCE
)
inference_download_and_uncompress
(
${
LAC_INSTALL_DIR
}
${
LAC_MODEL_URL
}
"lac_model.tar.gz"
)
inference_download_and_uncompress
(
${
LAC_INSTALL_DIR
}
${
LAC_MODEL_URL
}
"lac_model.tar.gz"
)
inference_download_and_uncompress
(
${
LAC_INSTALL_DIR
}
${
LAC_DATA_URL
}
"lac_data.txt.tar.gz"
)
inference_download_and_uncompress
(
${
LAC_INSTALL_DIR
}
${
LAC_DATA_URL
}
"lac_data.txt.tar.gz"
)
endif
()
endif
()
...
@@ -120,3 +108,15 @@ inference_analysis_test(test_analyzer_lac SRCS analyzer_lac_tester.cc
...
@@ -120,3 +108,15 @@ inference_analysis_test(test_analyzer_lac SRCS analyzer_lac_tester.cc
pass
pass
ARGS --infer_model=
${
LAC_INSTALL_DIR
}
/model
ARGS --infer_model=
${
LAC_INSTALL_DIR
}
/model
--infer_data=
${
LAC_INSTALL_DIR
}
/data.txt
)
--infer_data=
${
LAC_INSTALL_DIR
}
/data.txt
)
set
(
TEXT_CLASSIFICATION_MODEL_URL
"http://paddle-inference-dist.bj.bcebos.com/text-classification-Senta.tar.gz"
)
set
(
TEXT_CLASSIFICATION_INSTALL_DIR
"
${
THIRD_PARTY_PATH
}
/inference_demo/text_classification"
CACHE PATH
"Text Classification model and data root."
FORCE
)
if
(
NOT EXISTS
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
AND WITH_TESTING AND WITH_INFERENCE
)
inference_download_and_uncompress
(
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
${
TEXT_CLASSIFICATION_MODEL_URL
}
"text-classification-Senta.tar.gz"
)
endif
()
inference_analysis_test
(
test_text_classification SRCS test_text_classification.cc
EXTRA_DEPS paddle_inference_api paddle_fluid_api analysis_predictor
ARGS --infer_model=
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
/text-classification-Senta
)
paddle/fluid/inference/analysis/analyzer.cc
浏览文件 @
12b483c0
...
@@ -14,6 +14,7 @@
...
@@ -14,6 +14,7 @@
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/analysis/analyzer.h"
#include <string>
#include <string>
#include <vector>
#include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h"
#include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h"
#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h"
#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h"
#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
...
@@ -41,20 +42,16 @@ class DfgPassManagerImpl final : public DfgPassManager {
...
@@ -41,20 +42,16 @@ class DfgPassManagerImpl final : public DfgPassManager {
public:
public:
DfgPassManagerImpl
()
{
DfgPassManagerImpl
()
{
// TODO(Superjomn) set the key with pass reprs.
// TODO(Superjomn) set the key with pass reprs.
LOG
(
INFO
)
if
(
!
FLAGS_IA_enable_ir
)
{
<<
"-----------------------------------------------------------------"
;
if
(
FLAGS_IA_enable_ir
)
{
AddPass
(
"fluid-to-ir-pass"
,
new
FluidToIrPass
);
}
else
{
AddPass
(
"fluid-to-data-flow-graph"
,
new
FluidToDataFlowGraphPass
);
AddPass
(
"fluid-to-data-flow-graph"
,
new
FluidToDataFlowGraphPass
);
}
else
{
AddPass
(
"fluid-to-ir-pass"
,
new
FluidToIrPass
);
}
}
TryAddTensorRtPass
();
TryAddTensorRtPass
();
AddPass
(
"data-flow-graph-to-fluid"
,
new
DataFlowGraphToFluidPass
);
AddPass
(
"data-flow-graph-to-fluid"
,
new
DataFlowGraphToFluidPass
);
if
(
!
FLAGS_IA_output_storage_path
.
empty
())
{
if
(
!
FLAGS_IA_output_storage_path
.
empty
())
{
AddPass
(
"model-store-pass"
,
new
ModelStorePass
);
AddPass
(
"model-store-pass"
,
new
ModelStorePass
);
}
}
LOG
(
INFO
)
<<
"-----------------------------------------------------------------"
;
}
}
std
::
string
repr
()
const
override
{
return
"dfg-pass-manager"
;
}
std
::
string
repr
()
const
override
{
return
"dfg-pass-manager"
;
}
...
@@ -101,19 +98,16 @@ class DfgPassManagerImpl final : public DfgPassManager {
...
@@ -101,19 +98,16 @@ class DfgPassManagerImpl final : public DfgPassManager {
Analyzer
::
Analyzer
()
{
Register
(
"manager1"
,
new
DfgPassManagerImpl
);
}
Analyzer
::
Analyzer
()
{
Register
(
"manager1"
,
new
DfgPassManagerImpl
);
}
void
Analyzer
::
Run
(
Argument
*
argument
)
{
void
Analyzer
::
Run
(
Argument
*
argument
)
{
std
::
vector
<
std
::
string
>
passes
;
for
(
auto
&
pass
:
all_ir_passes_
)
{
if
(
!
disabled_ir_passes_
.
count
(
pass
))
{
passes
.
push_back
(
pass
);
passes
.
push_back
(
"graph_viz_pass"
);
// add graphviz for debug.
}
}
passes
.
push_back
(
"graph_viz_pass"
);
// Ugly support fluid-to-ir-pass
// Ugly support fluid-to-ir-pass
argument
->
Set
(
kFluidToIrPassesAttr
,
argument
->
Set
(
kFluidToIrPassesAttr
,
new
std
::
vector
<
std
::
string
>
(
passes
));
new
std
::
vector
<
std
::
string
>
({
// Manual update the passes here.
"graph_viz_pass"
,
//
"infer_clean_graph_pass"
,
"graph_viz_pass"
,
//
"attention_lstm_fuse_pass"
,
"graph_viz_pass"
,
//
"fc_lstm_fuse_pass"
,
"graph_viz_pass"
,
//
"mul_lstm_fuse_pass"
,
"graph_viz_pass"
,
//
"seq_concat_fc_fuse_pass"
,
"graph_viz_pass"
,
//
"fc_fuse_pass"
,
"graph_viz_pass"
//
}));
for
(
auto
&
x
:
data_
)
{
for
(
auto
&
x
:
data_
)
{
PADDLE_ENFORCE
(
x
->
Initialize
(
argument
));
PADDLE_ENFORCE
(
x
->
Initialize
(
argument
));
...
@@ -122,6 +116,11 @@ void Analyzer::Run(Argument* argument) {
...
@@ -122,6 +116,11 @@ void Analyzer::Run(Argument* argument) {
}
}
}
}
Analyzer
&
Analyzer
::
DisableIrPasses
(
const
std
::
vector
<
std
::
string
>&
passes
)
{
disabled_ir_passes_
.
insert
(
passes
.
begin
(),
passes
.
end
());
return
*
this
;
}
}
// namespace analysis
}
// namespace analysis
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/analysis/analyzer.h
浏览文件 @
12b483c0
...
@@ -36,16 +36,10 @@ limitations under the License. */
...
@@ -36,16 +36,10 @@ limitations under the License. */
*/
*/
#include <gflags/gflags.h>
#include <gflags/gflags.h>
#include "paddle/fluid/inference/analysis/flags.h"
#include "paddle/fluid/inference/analysis/pass.h"
#include "paddle/fluid/inference/analysis/pass.h"
#include "paddle/fluid/inference/analysis/pass_manager.h"
#include "paddle/fluid/inference/analysis/pass_manager.h"
// TODO(Superjomn) add a definition flag like PADDLE_WITH_TENSORRT and hide this
// flag if not available.
DECLARE_bool
(
IA_enable_tensorrt_subgraph_engine
);
DECLARE_string
(
IA_graphviz_log_root
);
DECLARE_string
(
IA_output_storage_path
);
DECLARE_bool
(
IA_enable_ir
);
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
analysis
{
namespace
analysis
{
...
@@ -57,7 +51,26 @@ class Analyzer : public OrderedRegistry<PassManager> {
...
@@ -57,7 +51,26 @@ class Analyzer : public OrderedRegistry<PassManager> {
void
Run
(
Argument
*
argument
);
void
Run
(
Argument
*
argument
);
Analyzer
&
DisableIrPasses
(
const
std
::
vector
<
std
::
string
>&
passes
);
DISABLE_COPY_AND_ASSIGN
(
Analyzer
);
DISABLE_COPY_AND_ASSIGN
(
Analyzer
);
private:
// All avaiable IR passes.
// The bigger fuse comes first, so that the small operators prefer to be
// merged in a larger fuse op. The small fusion will not break the pattern of
// larger fusion.
const
std
::
vector
<
std
::
string
>
all_ir_passes_
{{
// Manual update the passes here.
"infer_clean_graph_pass"
,
//
"attention_lstm_fuse_pass"
,
//
"fc_lstm_fuse_pass"
,
//
"mul_lstm_fuse_pass"
,
//
"seq_concat_fc_fuse_pass"
,
//
"fc_fuse_pass"
,
//
}};
std
::
unordered_set
<
std
::
string
>
disabled_ir_passes_
;
};
};
}
// namespace analysis
}
// namespace analysis
...
...
paddle/fluid/inference/analysis/analyzer_tester.cc
浏览文件 @
12b483c0
...
@@ -22,6 +22,7 @@
...
@@ -22,6 +22,7 @@
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
...
@@ -270,17 +271,22 @@ void TestDituRNNPrediction(const std::string &model_path,
...
@@ -270,17 +271,22 @@ void TestDituRNNPrediction(const std::string &model_path,
const
std
::
string
&
data_path
,
int
batch_size
,
const
std
::
string
&
data_path
,
int
batch_size
,
bool
use_analysis
,
bool
activate_ir
,
bool
use_analysis
,
bool
activate_ir
,
int
num_times
=
1
)
{
int
num_times
=
1
)
{
Native
Config
config
;
Analysis
Config
config
;
config
.
prog_file
=
FLAGS_infer_ditu_rnn_model
+
"/__model__"
;
config
.
prog_file
=
FLAGS_infer_ditu_rnn_model
+
"/__model__"
;
config
.
param_file
=
FLAGS_infer_ditu_rnn_model
+
"/param"
;
config
.
param_file
=
FLAGS_infer_ditu_rnn_model
+
"/param"
;
config
.
use_gpu
=
false
;
config
.
use_gpu
=
false
;
config
.
device
=
0
;
config
.
device
=
0
;
config
.
specify_input_name
=
true
;
config
.
specify_input_name
=
true
;
config
.
enable_ir_optim
=
activate_ir
;
PADDLE_ENFORCE
(
config
.
ir_mode
==
AnalysisConfig
::
IrPassMode
::
kExclude
);
// default
config
.
ir_passes
.
clear
();
// Do not exclude any pass.
auto
base_predictor
=
auto
base_predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
auto
predictor
=
auto
predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
std
::
vector
<
PaddleTensor
>
input_slots
;
std
::
vector
<
PaddleTensor
>
input_slots
;
DataRecord
data
(
data_path
,
batch_size
);
DataRecord
data
(
data_path
,
batch_size
);
// Prepare inputs.
// Prepare inputs.
...
@@ -327,9 +333,20 @@ void TestDituRNNPrediction(const std::string &model_path,
...
@@ -327,9 +333,20 @@ void TestDituRNNPrediction(const std::string &model_path,
LOG
(
INFO
)
<<
"fused "
<<
item
.
first
<<
" "
<<
item
.
second
;
LOG
(
INFO
)
<<
"fused "
<<
item
.
first
<<
" "
<<
item
.
second
;
}
}
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc"
));
int
num_ops
=
0
;
EXPECT_EQ
(
fuse_statis
.
at
(
"fc"
),
1
);
for
(
auto
&
node
:
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_nobias_lstm_fuse"
),
1
);
analysis_predictor
->
analysis_argument
().
main_dfg
->
nodes
.
nodes
())
{
if
(
node
->
IsFunction
())
{
++
num_ops
;
}
}
LOG
(
INFO
)
<<
"has num ops: "
<<
num_ops
;
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_nobias_lstm_fuse"
),
2
);
// bi-directional LSTM
EXPECT_EQ
(
num_ops
,
13
);
// After graph optimization, only 13 operators exists.
}
}
}
}
...
@@ -357,10 +374,3 @@ TEST(Analyzer, DituRNN_with_analysis_with_IR) {
...
@@ -357,10 +374,3 @@ TEST(Analyzer, DituRNN_with_analysis_with_IR) {
}
// namespace analysis
}
// namespace analysis
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
USE_PASS
(
fc_fuse_pass
);
USE_PASS
(
seq_concat_fc_fuse_pass
);
USE_PASS
(
fc_lstm_fuse_pass
);
USE_PASS
(
graph_viz_pass
);
USE_PASS
(
infer_clean_graph_pass
);
USE_PASS
(
attention_lstm_fuse_pass
);
paddle/fluid/inference/analysis/flags.h
0 → 100644
浏览文件 @
12b483c0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
// TODO(Superjomn) add a definition flag like PADDLE_WITH_TENSORRT and hide this
// flag if not available.
DECLARE_bool
(
IA_enable_tensorrt_subgraph_engine
);
DECLARE_string
(
IA_graphviz_log_root
);
DECLARE_string
(
IA_output_storage_path
);
DECLARE_bool
(
IA_enable_ir
);
paddle/fluid/inference/analysis/fluid_to_ir_pass.h
浏览文件 @
12b483c0
...
@@ -15,6 +15,7 @@
...
@@ -15,6 +15,7 @@
#pragma once
#pragma once
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/inference/analysis/flags.h"
#include "paddle/fluid/inference/analysis/ir_pass_manager.h"
#include "paddle/fluid/inference/analysis/ir_pass_manager.h"
#include "paddle/fluid/inference/analysis/pass.h"
#include "paddle/fluid/inference/analysis/pass.h"
...
@@ -85,9 +86,11 @@ class FluidToIrPass final : public DataFlowGraphPass {
...
@@ -85,9 +86,11 @@ class FluidToIrPass final : public DataFlowGraphPass {
new
Scope
*
(
&
argument_
->
Get
<
Scope
>
(
ir
::
kParamScopeAttr
)));
new
Scope
*
(
&
argument_
->
Get
<
Scope
>
(
ir
::
kParamScopeAttr
)));
}
}
const
auto
&
ir_passes_to_apply
=
if
(
FLAGS_IA_enable_ir
)
{
argument_
->
Get
<
std
::
vector
<
std
::
string
>>
(
kFluidToIrPassesAttr
);
const
auto
&
ir_passes_to_apply
=
ir_passes
.
Apply
(
ir_passes_to_apply
);
argument_
->
Get
<
std
::
vector
<
std
::
string
>>
(
kFluidToIrPassesAttr
);
ir_passes
.
Apply
(
ir_passes_to_apply
);
}
PADDLE_ENFORCE
(
argument_
->
main_dfg
.
get
());
PADDLE_ENFORCE
(
argument_
->
main_dfg
.
get
());
argument_
->
main_dfg
->
Build
(
ir_passes
.
graph
());
argument_
->
main_dfg
->
Build
(
ir_passes
.
graph
());
...
...
paddle/fluid/inference/analysis/fluid_to_ir_pass_tester.cc
浏览文件 @
12b483c0
...
@@ -16,6 +16,7 @@
...
@@ -16,6 +16,7 @@
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
...
@@ -33,10 +34,3 @@ TEST(FluidToIrPass, Test) {
...
@@ -33,10 +34,3 @@ TEST(FluidToIrPass, Test) {
}
// namespace analysis
}
// namespace analysis
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
USE_PASS
(
graph_viz_pass
);
USE_PASS
(
infer_clean_graph_pass
);
USE_PASS
(
attention_lstm_fuse_pass
);
USE_PASS
(
fc_lstm_fuse_pass
);
USE_PASS
(
seq_concat_fc_fuse_pass
);
USE_PASS
(
fc_fuse_pass
);
paddle/fluid/inference/analysis/test_text_classification.cc
0 → 100644
浏览文件 @
12b483c0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <glog/logging.h> // use glog instead of PADDLE_ENFORCE to avoid importing other paddle header files.
#include <gtest/gtest.h>
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/timer.h"
DEFINE_string
(
infer_model
,
""
,
"Directory of the inference model."
);
DEFINE_string
(
infer_data
,
""
,
"Path of the dataset."
);
DEFINE_int32
(
batch_size
,
1
,
"batch size."
);
DEFINE_int32
(
repeat
,
1
,
"How many times to repeat run."
);
namespace
paddle
{
template
<
typename
T
>
std
::
string
to_string
(
const
std
::
vector
<
T
>
&
vec
)
{
std
::
stringstream
ss
;
for
(
const
auto
&
c
:
vec
)
{
ss
<<
c
<<
" "
;
}
return
ss
.
str
();
}
void
PrintTime
(
const
double
latency
,
const
int
bs
,
const
int
repeat
)
{
LOG
(
INFO
)
<<
"===========profile result==========="
;
LOG
(
INFO
)
<<
"batch_size: "
<<
bs
<<
", repeat: "
<<
repeat
<<
", avg latency: "
<<
latency
/
repeat
<<
"ms"
;
LOG
(
INFO
)
<<
"====================================="
;
}
void
Main
(
int
batch_size
)
{
// Three sequence inputs.
std
::
vector
<
PaddleTensor
>
input_slots
(
1
);
// one batch starts
// data --
int64_t
data0
[]
=
{
0
,
1
,
2
};
for
(
auto
&
input
:
input_slots
)
{
input
.
data
.
Reset
(
data0
,
sizeof
(
data0
));
input
.
shape
=
std
::
vector
<
int
>
({
3
,
1
});
// dtype --
input
.
dtype
=
PaddleDType
::
INT64
;
// LoD --
input
.
lod
=
std
::
vector
<
std
::
vector
<
size_t
>>
({{
0
,
3
}});
}
// shape --
// Create Predictor --
AnalysisConfig
config
;
config
.
model_dir
=
FLAGS_infer_model
;
config
.
use_gpu
=
false
;
config
.
enable_ir_optim
=
true
;
config
.
ir_passes
.
push_back
(
"fc_lstm_fuse_pass"
);
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
inference
::
Timer
timer
;
double
sum
=
0
;
std
::
vector
<
PaddleTensor
>
output_slots
;
for
(
int
i
=
0
;
i
<
FLAGS_repeat
;
i
++
)
{
timer
.
tic
();
CHECK
(
predictor
->
Run
(
input_slots
,
&
output_slots
));
sum
+=
timer
.
toc
();
}
PrintTime
(
sum
,
batch_size
,
FLAGS_repeat
);
// Get output
LOG
(
INFO
)
<<
"get outputs "
<<
output_slots
.
size
();
for
(
auto
&
output
:
output_slots
)
{
LOG
(
INFO
)
<<
"output.shape: "
<<
to_string
(
output
.
shape
);
// no lod ?
CHECK_EQ
(
output
.
lod
.
size
(),
0UL
);
LOG
(
INFO
)
<<
"output.dtype: "
<<
output
.
dtype
;
std
::
stringstream
ss
;
for
(
int
i
=
0
;
i
<
5
;
i
++
)
{
ss
<<
static_cast
<
float
*>
(
output
.
data
.
data
())[
i
]
<<
" "
;
}
LOG
(
INFO
)
<<
"output.data summary: "
<<
ss
.
str
();
// one batch ends
}
}
TEST
(
text_classification
,
basic
)
{
Main
(
FLAGS_batch_size
);
}
}
// namespace paddle
USE_PASS
(
fc_fuse_pass
);
USE_PASS
(
seq_concat_fc_fuse_pass
);
USE_PASS
(
fc_lstm_fuse_pass
);
USE_PASS
(
graph_viz_pass
);
USE_PASS
(
infer_clean_graph_pass
);
USE_PASS
(
attention_lstm_fuse_pass
);
paddle/fluid/inference/api/CMakeLists.txt
浏览文件 @
12b483c0
...
@@ -18,10 +18,7 @@ if(APPLE)
...
@@ -18,10 +18,7 @@ if(APPLE)
endif
(
APPLE
)
endif
(
APPLE
)
set
(
inference_deps paddle_inference_api paddle_fluid_api analysis pass ir_pass_manager
set
(
inference_deps paddle_inference_api paddle_fluid_api analysis pass ir_pass_manager
${
GLOB_PASS_LIB
}
)
graph_viz_pass fc_fuse_pass
infer_clean_graph_pass
)
if
(
WITH_GPU AND TENSORRT_FOUND
)
if
(
WITH_GPU AND TENSORRT_FOUND
)
set
(
inference_deps
${
inference_deps
}
paddle_inference_tensorrt_subgraph_engine
)
set
(
inference_deps
${
inference_deps
}
paddle_inference_tensorrt_subgraph_engine
)
...
@@ -47,7 +44,19 @@ function(inference_api_test TARGET_NAME)
...
@@ -47,7 +44,19 @@ function(inference_api_test TARGET_NAME)
endfunction
(
inference_api_test
)
endfunction
(
inference_api_test
)
cc_library
(
paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor
)
cc_library
(
paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor
)
cc_library
(
analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api analysis
)
cc_library
(
analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api
analysis
ir_pass_manager
pass
fc_fuse_pass
fc_lstm_fuse_pass
seq_concat_fc_fuse_pass
graph_viz_pass
infer_clean_graph_pass
graph_pattern_detector
infer_clean_graph_pass
attention_lstm_fuse_pass
)
cc_test
(
test_paddle_inference_api
cc_test
(
test_paddle_inference_api
SRCS api_tester.cc
SRCS api_tester.cc
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
12b483c0
...
@@ -14,10 +14,13 @@
...
@@ -14,10 +14,13 @@
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include <memory>
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/inference/utils/singleton.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -27,6 +30,8 @@ bool AnalysisPredictor::Init(
...
@@ -27,6 +30,8 @@ bool AnalysisPredictor::Init(
VLOG
(
3
)
<<
"Predictor::init()"
;
VLOG
(
3
)
<<
"Predictor::init()"
;
if
(
config_
.
use_gpu
)
{
if
(
config_
.
use_gpu
)
{
place_
=
paddle
::
platform
::
CUDAPlace
(
config_
.
device
);
place_
=
paddle
::
platform
::
CUDAPlace
(
config_
.
device
);
LOG
(
WARNING
)
<<
"ir optimize only supports CPU currently"
;
config_
.
enable_ir_optim
=
false
;
}
else
{
}
else
{
place_
=
paddle
::
platform
::
CPUPlace
();
place_
=
paddle
::
platform
::
CPUPlace
();
}
}
...
@@ -72,7 +77,7 @@ bool AnalysisPredictor::Init(
...
@@ -72,7 +77,7 @@ bool AnalysisPredictor::Init(
void
AnalysisPredictor
::
OptimizeInferenceProgram
()
{
void
AnalysisPredictor
::
OptimizeInferenceProgram
()
{
LOG
(
INFO
)
<<
"optimize begin"
;
LOG
(
INFO
)
<<
"optimize begin"
;
FLAGS_IA_enable_ir
=
true
;
FLAGS_IA_enable_ir
=
config_
.
enable_ir_optim
;
FLAGS_IA_enable_tensorrt_subgraph_engine
=
false
;
FLAGS_IA_enable_tensorrt_subgraph_engine
=
false
;
FLAGS_IA_output_storage_path
=
""
;
// Don't output the model.
FLAGS_IA_output_storage_path
=
""
;
// Don't output the model.
// Analyze inference_program
// Analyze inference_program
...
@@ -89,24 +94,26 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
...
@@ -89,24 +94,26 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
}
}
argument_
.
origin_program_desc
.
reset
(
argument_
.
origin_program_desc
.
reset
(
new
ProgramDesc
(
*
inference_program_
->
Proto
()));
new
ProgramDesc
(
*
inference_program_
->
Proto
()));
Analyzer
().
Run
(
&
argument_
);
PADDLE_ENFORCE
(
config_
.
ir_mode
==
AnalysisConfig
::
IrPassMode
::
kExclude
,
"Only kExclude is supported yet."
);
Analyzer
().
DisableIrPasses
(
config_
.
ir_passes
).
Run
(
&
argument_
);
CHECK
(
argument_
.
transformed_program_desc
);
CHECK
(
argument_
.
transformed_program_desc
);
VLOG
(
5
)
<<
"to prepare executor"
;
VLOG
(
5
)
<<
"to prepare executor"
;
// LOG(INFO) << "transformed_parogram_desc " <<
// argument.transformed_program_desc->DebugString();
inference_program_
.
reset
(
inference_program_
.
reset
(
new
framework
::
ProgramDesc
(
*
argument_
.
transformed_program_desc
));
new
framework
::
ProgramDesc
(
*
argument_
.
transformed_program_desc
));
PADDLE_ENFORCE
(
argument_
.
Has
(
framework
::
ir
::
kParamScopeAttr
));
if
(
argument_
.
Has
(
framework
::
ir
::
kParamScopeAttr
))
{
// Update scope.
// Update scope.
scope_
.
reset
(
scope_
.
reset
(
argument_
.
Release
<
framework
::
Scope
>
(
framework
::
ir
::
kParamScopeAttr
));
argument_
.
Release
<
framework
::
Scope
>
(
framework
::
ir
::
kParamScopeAttr
));
LOG
(
INFO
)
<<
"optimize end =="
;
}
LOG
(
INFO
)
<<
"== optimize end =="
;
}
}
template
<
>
template
<
>
std
::
unique_ptr
<
PaddlePredictor
>
CreatePaddlePredictor
<
std
::
unique_ptr
<
PaddlePredictor
>
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kAnalysis
>
(
const
Native
Config
&
config
)
{
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
const
Analysis
Config
&
config
)
{
VLOG
(
3
)
<<
"create
NativePredictor
"
;
VLOG
(
3
)
<<
"create
AnalysisConfig
"
;
if
(
config
.
use_gpu
)
{
if
(
config
.
use_gpu
)
{
// 1. GPU memeroy
// 1. GPU memeroy
PADDLE_ENFORCE_GT
(
PADDLE_ENFORCE_GT
(
...
@@ -133,7 +140,3 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
...
@@ -133,7 +140,3 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
}
}
}
// namespace paddle
}
// namespace paddle
USE_PASS
(
fc_fuse_pass
);
USE_PASS
(
graph_viz_pass
);
USE_PASS
(
infer_clean_graph_pass
);
paddle/fluid/inference/api/analysis_predictor.h
浏览文件 @
12b483c0
...
@@ -12,6 +12,8 @@
...
@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include <string>
#include <vector>
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
...
@@ -28,7 +30,7 @@ using framework::proto::ProgramDesc;
...
@@ -28,7 +30,7 @@ using framework::proto::ProgramDesc;
*/
*/
class
AnalysisPredictor
:
public
NativePaddlePredictor
{
class
AnalysisPredictor
:
public
NativePaddlePredictor
{
public:
public:
explicit
AnalysisPredictor
(
const
Native
Config
&
config
)
explicit
AnalysisPredictor
(
const
Analysis
Config
&
config
)
:
NativePaddlePredictor
(
config
),
config_
(
config
)
{}
:
NativePaddlePredictor
(
config
),
config_
(
config
)
{}
bool
Init
(
const
std
::
shared_ptr
<
framework
::
Scope
>&
parent_scope
);
bool
Init
(
const
std
::
shared_ptr
<
framework
::
Scope
>&
parent_scope
);
...
@@ -44,7 +46,7 @@ class AnalysisPredictor : public NativePaddlePredictor {
...
@@ -44,7 +46,7 @@ class AnalysisPredictor : public NativePaddlePredictor {
Argument
&
analysis_argument
()
{
return
argument_
;
}
Argument
&
analysis_argument
()
{
return
argument_
;
}
private:
private:
Native
Config
config_
;
Analysis
Config
config_
;
Argument
argument_
;
Argument
argument_
;
};
};
...
...
paddle/fluid/inference/api/api_impl.cc
浏览文件 @
12b483c0
...
@@ -176,7 +176,8 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
...
@@ -176,7 +176,8 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
framework
::
Scope
*
scope
)
{
framework
::
Scope
*
scope
)
{
VLOG
(
3
)
<<
"Predictor::set_feed"
;
VLOG
(
3
)
<<
"Predictor::set_feed"
;
if
(
inputs
.
size
()
!=
feeds_
.
size
())
{
if
(
inputs
.
size
()
!=
feeds_
.
size
())
{
LOG
(
ERROR
)
<<
"wrong feed input size."
;
LOG
(
ERROR
)
<<
"wrong feed input size, need "
<<
feeds_
.
size
()
<<
" but get "
<<
inputs
.
size
();
return
false
;
return
false
;
}
}
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
++
i
)
{
...
...
paddle/fluid/inference/api/demo_ci/run.sh
浏览文件 @
12b483c0
...
@@ -14,7 +14,7 @@ else
...
@@ -14,7 +14,7 @@ else
fi
fi
PREFIX
=
inference-vis-demos%2F
PREFIX
=
inference-vis-demos%2F
URL_ROOT
=
http://paddlemodels.
bj
.bcebos.com/
${
PREFIX
}
URL_ROOT
=
http://paddlemodels.
cdn
.bcebos.com/
${
PREFIX
}
# download vis_demo data
# download vis_demo data
function
download
()
{
function
download
()
{
...
...
paddle/fluid/inference/api/helper.h
浏览文件 @
12b483c0
...
@@ -16,6 +16,7 @@
...
@@ -16,6 +16,7 @@
#include <sys/time.h>
#include <sys/time.h>
#include <algorithm>
#include <algorithm>
#include <numeric>
#include <sstream>
#include <sstream>
#include <string>
#include <string>
#include <vector>
#include <vector>
...
...
paddle/fluid/inference/api/paddle_inference_api.h
浏览文件 @
12b483c0
...
@@ -150,6 +150,21 @@ struct TensorRTConfig : public NativeConfig {
...
@@ -150,6 +150,21 @@ struct TensorRTConfig : public NativeConfig {
int
workspace_size
{
1
<<
30
};
int
workspace_size
{
1
<<
30
};
};
};
// NOTE WIP, not stable yet.
struct
AnalysisConfig
:
public
NativeConfig
{
//
enum
class
IrPassMode
{
kSystem
,
// Use system default passes, not customize.
kInclude
,
// Specify the passes in `ir_passes`.
kExclude
// Specify the disabled passes in `ir_passes`.
};
bool
enable_ir_optim
=
true
;
IrPassMode
ir_mode
{
IrPassMode
::
kExclude
};
// attention lstm fuse works only on some specific models, disable as default.
std
::
vector
<
std
::
string
>
ir_passes
{
"attention_lstm_fuse_pass"
};
};
// A factory to help create different predictors.
// A factory to help create different predictors.
//
//
// FOR EXTENSION DEVELOPER:
// FOR EXTENSION DEVELOPER:
...
...
paddle/fluid/inference/paddle_fluid.map
浏览文件 @
12b483c0
{
{
global:
global:
*paddle*;
*paddle*;
*Pass*;
local:
local:
*;
*;
};
};
paddle/fluid/operators/auc_op.cc
浏览文件 @
12b483c0
...
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
...
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/operators/auc_op.h"
#include "paddle/fluid/operators/auc_op.h"
#include <string>
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -36,15 +35,12 @@ class AucOp : public framework::OperatorWithKernel {
...
@@ -36,15 +35,12 @@ class AucOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ
(
predict_height
,
label_height
,
PADDLE_ENFORCE_EQ
(
predict_height
,
label_height
,
"Out and Label should have same height."
);
"Out and Label should have same height."
);
int
num_
thres
=
ctx
->
Attrs
().
Get
<
int
>
(
"num_thresholds"
)
;
int
num_
pred_buckets
=
ctx
->
Attrs
().
Get
<
int
>
(
"num_thresholds"
)
+
1
;
ctx
->
SetOutputDim
(
"AUC"
,
{
1
});
ctx
->
SetOutputDim
(
"AUC"
,
{
1
});
ctx
->
SetOutputDim
(
"TPOut"
,
{
num_thres
});
ctx
->
SetOutputDim
(
"BatchAUC"
,
{
1
});
ctx
->
SetOutputDim
(
"TNOut"
,
{
num_thres
});
ctx
->
SetOutputDim
(
"StatPosOut"
,
{
num_pred_buckets
});
ctx
->
SetOutputDim
(
"FPOut"
,
{
num_thres
});
ctx
->
SetOutputDim
(
"StatNegOut"
,
{
num_pred_buckets
});
ctx
->
SetOutputDim
(
"FNOut"
,
{
num_thres
});
ctx
->
ShareLoD
(
"Predict"
,
/*->*/
"AUC"
);
}
}
protected:
protected:
...
@@ -66,25 +62,24 @@ class AucOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -66,25 +62,24 @@ class AucOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput
(
"Label"
,
AddInput
(
"Label"
,
"A 2D int tensor indicating the label of the training data. "
"A 2D int tensor indicating the label of the training data. "
"shape: [batch_size, 1]"
);
"shape: [batch_size, 1]"
);
AddInput
(
"TP"
,
"True-Positive value."
);
AddInput
(
"FP"
,
"False-Positive value."
);
AddInput
(
"TN"
,
"True-Negative value."
);
AddInput
(
"FN"
,
"False-Negative value."
);
// TODO(typhoonzero): support weight input
// TODO(typhoonzero): support weight input
AddInput
(
"StatPos"
,
"Statistic value when label = 1"
);
AddInput
(
"StatNeg"
,
"Statistic value when label = 0"
);
AddOutput
(
"AUC"
,
AddOutput
(
"AUC"
,
"A scalar representing the "
"A scalar representing the "
"current area-under-the-curve."
);
"current area-under-the-curve."
);
AddOutput
(
"TPOut"
,
"True-Positive value."
);
AddOutput
(
"BatchAUC"
,
"The AUC for current batch"
);
AddOutput
(
"FPOut"
,
"False-Positive value."
);
AddOutput
(
"StatPosOut"
,
"Statistic value when label = 1"
);
AddOutput
(
"TNOut"
,
"True-Negative value."
);
AddOutput
(
"StatNegOut"
,
"Statistic value when label = 0"
);
AddOutput
(
"FNOut"
,
"False-Negative value."
);
AddAttr
<
std
::
string
>
(
"curve"
,
"Curve type, can be 'ROC' or 'PR'."
)
AddAttr
<
std
::
string
>
(
"curve"
,
"Curve type, can be 'ROC' or 'PR'."
)
.
SetDefault
(
"ROC"
);
.
SetDefault
(
"ROC"
);
AddAttr
<
int
>
(
"num_thresholds"
,
AddAttr
<
int
>
(
"num_thresholds"
,
"The number of thresholds to use when discretizing the"
"The number of thresholds to use when discretizing the"
" roc curve."
)
" roc curve."
)
.
SetDefault
(
200
);
.
SetDefault
(
(
2
<<
12
)
-
1
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
Area Under The Curve (AUC) Operator.
Area Under The Curve (AUC) Operator.
...
...
paddle/fluid/operators/auc_op.h
浏览文件 @
12b483c0
...
@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and
...
@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#pragma once
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -23,106 +23,85 @@ namespace operators {
...
@@ -23,106 +23,85 @@ namespace operators {
using
Tensor
=
framework
::
Tensor
;
using
Tensor
=
framework
::
Tensor
;
template
<
typename
T
,
int
MajorType
=
Eigen
::
RowMajor
,
typename
IndexType
=
Eigen
::
DenseIndex
>
using
EigenVector
=
framework
::
EigenVector
<
T
,
MajorType
,
IndexType
>
;
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
class
AucKernel
:
public
framework
::
OpKernel
<
T
>
{
class
AucKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
predict
=
ctx
.
Input
<
Tensor
>
(
"Predict"
);
auto
*
predict
=
ctx
.
Input
<
Tensor
>
(
"Predict"
);
auto
*
label
=
ctx
.
Input
<
Tensor
>
(
"Label"
);
auto
*
label
=
ctx
.
Input
<
Tensor
>
(
"Label"
);
auto
*
auc
=
ctx
.
Output
<
Tensor
>
(
"AUC"
);
std
::
string
curve
=
ctx
.
Attr
<
std
::
string
>
(
"curve"
);
int
num_thresholds
=
ctx
.
Attr
<
int
>
(
"num_thresholds"
);
int
num_pred_buckets
=
num_thresholds
+
1
;
// Only use output var for now, make sure it's persistable and
// Only use output var for now, make sure it's persistable and
// not cleaned up for each batch.
// not cleaned up for each batch.
auto
*
true_positive
=
ctx
.
Output
<
Tensor
>
(
"TPOut"
);
auto
*
auc
=
ctx
.
Output
<
Tensor
>
(
"AUC"
);
auto
*
false_positive
=
ctx
.
Output
<
Tensor
>
(
"FPOut"
);
auto
*
stat_pos
=
ctx
.
Output
<
Tensor
>
(
"StatPosOut"
);
auto
*
true_negative
=
ctx
.
Output
<
Tensor
>
(
"TNOut"
);
auto
*
stat_neg
=
ctx
.
Output
<
Tensor
>
(
"StatNegOut"
);
auto
*
false_negative
=
ctx
.
Output
<
Tensor
>
(
"FNOut"
);
auto
*
auc_data
=
auc
->
mutable_data
<
double
>
(
ctx
.
GetPlace
());
auto
*
stat_pos_data
=
stat_pos
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
());
auto
*
stat_neg_data
=
stat_neg
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
());
calcAuc
(
ctx
,
label
,
predict
,
stat_pos_data
,
stat_neg_data
,
num_thresholds
,
auc
);
std
::
string
curve
=
ctx
.
Attr
<
std
::
string
>
(
"curve"
);
auto
*
batch_auc
=
ctx
.
Output
<
Tensor
>
(
"BatchAUC"
);
int
num_thresholds
=
ctx
.
Attr
<
int
>
(
"num_thresholds"
);
std
::
vector
<
int64_t
>
stat_pos_batch
(
num_pred_buckets
,
0
);
std
::
vector
<
double
>
thresholds_list
;
std
::
vector
<
int64_t
>
stat_neg_batch
(
num_pred_buckets
,
0
);
thresholds_list
.
reserve
(
num_thresholds
);
calcAuc
(
ctx
,
label
,
predict
,
stat_pos_batch
.
data
(),
stat_neg_batch
.
data
(),
for
(
int
i
=
1
;
i
<
num_thresholds
-
1
;
i
++
)
{
num_thresholds
,
batch_auc
);
thresholds_list
[
i
]
=
static_cast
<
double
>
(
i
)
/
(
num_thresholds
-
1
);
}
}
const
double
kEpsilon
=
1e-7
;
thresholds_list
[
0
]
=
0.0
f
-
kEpsilon
;
thresholds_list
[
num_thresholds
-
1
]
=
1.0
f
+
kEpsilon
;
private:
inline
static
double
trapezoidArea
(
double
X1
,
double
X2
,
double
Y1
,
double
Y2
)
{
return
(
X1
>
X2
?
(
X1
-
X2
)
:
(
X2
-
X1
))
*
(
Y1
+
Y2
)
/
2.0
;
}
inline
static
void
calcAuc
(
const
framework
::
ExecutionContext
&
ctx
,
const
framework
::
Tensor
*
label
,
const
framework
::
Tensor
*
predict
,
int64_t
*
stat_pos
,
int64_t
*
stat_neg
,
int
num_thresholds
,
framework
::
Tensor
*
auc_tensor
)
{
size_t
batch_size
=
predict
->
dims
()[
0
];
size_t
batch_size
=
predict
->
dims
()[
0
];
size_t
inference_width
=
predict
->
dims
()[
1
];
size_t
inference_width
=
predict
->
dims
()[
1
];
const
T
*
inference_data
=
predict
->
data
<
T
>
();
const
auto
*
label_data
=
label
->
data
<
int64_t
>
();
auto
*
auc
=
auc_tensor
->
mutable_data
<
double
>
(
ctx
.
GetPlace
());
const
T
*
inference_data
=
predict
->
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
batch_size
;
i
++
)
{
const
auto
*
label_data
=
label
->
data
<
int64_t
>
();
uint32_t
binIdx
=
static_cast
<
uint32_t
>
(
inference_data
[
i
*
inference_width
+
1
]
*
num_thresholds
);
auto
*
tp_data
=
true_positive
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
());
if
(
label_data
[
i
])
{
auto
*
fn_data
=
false_negative
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
());
stat_pos
[
binIdx
]
+=
1.0
;
auto
*
tn_data
=
true_negative
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
());
}
else
{
auto
*
fp_data
=
false_positive
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
());
stat_neg
[
binIdx
]
+=
1.0
;
for
(
int
idx_thresh
=
0
;
idx_thresh
<
num_thresholds
;
idx_thresh
++
)
{
// calculate TP, FN, TN, FP for current thresh
int64_t
tp
=
0
,
fn
=
0
,
tn
=
0
,
fp
=
0
;
for
(
size_t
i
=
0
;
i
<
batch_size
;
i
++
)
{
// NOTE: label_data used as bool, labels > 0 will be treated as true.
if
(
label_data
[
i
])
{
if
(
inference_data
[
i
*
inference_width
+
1
]
>=
(
thresholds_list
[
idx_thresh
]))
{
tp
++
;
}
else
{
fn
++
;
}
}
else
{
if
(
inference_data
[
i
*
inference_width
+
1
]
>=
(
thresholds_list
[
idx_thresh
]))
{
fp
++
;
}
else
{
tn
++
;
}
}
}
}
// store rates
tp_data
[
idx_thresh
]
+=
tp
;
fn_data
[
idx_thresh
]
+=
fn
;
tn_data
[
idx_thresh
]
+=
tn
;
fp_data
[
idx_thresh
]
+=
fp
;
}
}
// epsilon to avoid divide by zero.
double
epsilon
=
1e-6
;
*
auc
=
0.0
f
;
// Riemann sum to caculate auc.
Tensor
tp_rate
,
fp_rate
,
rec_rate
;
double
totPos
=
0.0
;
tp_rate
.
Resize
({
num_thresholds
});
double
totNeg
=
0.0
;
fp_rate
.
Resize
({
num_thresholds
});
double
totPosPrev
=
0.0
;
rec_rate
.
Resize
({
num_thresholds
});
double
totNegPrev
=
0.0
;
auto
*
tp_rate_data
=
tp_rate
.
mutable_data
<
double
>
(
ctx
.
GetPlace
());
auto
*
fp_rate_data
=
fp_rate
.
mutable_data
<
double
>
(
ctx
.
GetPlace
());
int
idx
=
num_thresholds
;
auto
*
rec_rate_data
=
rec_rate
.
mutable_data
<
double
>
(
ctx
.
GetPlace
());
for
(
int
i
=
0
;
i
<
num_thresholds
;
i
++
)
{
while
(
idx
>=
0
)
{
tp_rate_data
[
i
]
=
(
static_cast
<
double
>
(
tp_data
[
i
])
+
epsilon
)
/
totPosPrev
=
totPos
;
(
tp_data
[
i
]
+
fn_data
[
i
]
+
epsilon
);
totNegPrev
=
totNeg
;
fp_rate_data
[
i
]
=
totPos
+=
stat_pos
[
idx
];
static_cast
<
double
>
(
fp_data
[
i
])
/
(
fp_data
[
i
]
+
tn_data
[
i
]
+
epsilon
);
totNeg
+=
stat_neg
[
idx
];
rec_rate_data
[
i
]
=
(
static_cast
<
double
>
(
tp_data
[
i
])
+
epsilon
)
/
*
auc
+=
trapezoidArea
(
totNeg
,
totNegPrev
,
totPos
,
totPosPrev
);
(
tp_data
[
i
]
+
fp_data
[
i
]
+
epsilon
);
--
idx
;
}
}
*
auc_data
=
0.0
f
;
if
(
curve
==
"ROC"
)
{
if
(
totPos
>
0.0
&&
totNeg
>
0.0
)
{
for
(
int
i
=
0
;
i
<
num_thresholds
-
1
;
i
++
)
{
*
auc
=
*
auc
/
totPos
/
totNeg
;
auto
dx
=
fp_rate_data
[
i
]
-
fp_rate_data
[
i
+
1
];
auto
y
=
(
tp_rate_data
[
i
]
+
tp_rate_data
[
i
+
1
])
/
2.0
f
;
*
auc_data
=
*
auc_data
+
dx
*
y
;
}
}
else
if
(
curve
==
"PR"
)
{
for
(
int
i
=
1
;
i
<
num_thresholds
;
i
++
)
{
auto
dx
=
tp_rate_data
[
i
]
-
tp_rate_data
[
i
-
1
];
auto
y
=
(
rec_rate_data
[
i
]
+
rec_rate_data
[
i
-
1
])
/
2.0
f
;
*
auc_data
=
*
auc_data
+
dx
*
y
;
}
}
}
}
}
};
};
...
...
paddle/fluid/operators/detection/bbox_util.h
0 → 100644
浏览文件 @
12b483c0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/tensor.h"
namespace
paddle
{
namespace
operators
{
/*
* transform that computes target bounding-box regression deltas
* given proposal boxes and ground-truth boxes.
*/
template
<
typename
T
>
inline
void
BoxToDelta
(
const
int
box_num
,
const
framework
::
Tensor
&
ex_boxes
,
const
framework
::
Tensor
&
gt_boxes
,
const
T
*
weights
,
const
bool
normalized
,
framework
::
Tensor
*
box_delta
)
{
auto
ex_boxes_et
=
framework
::
EigenTensor
<
T
,
2
>::
From
(
ex_boxes
);
auto
gt_boxes_et
=
framework
::
EigenTensor
<
T
,
2
>::
From
(
gt_boxes
);
auto
trg
=
framework
::
EigenTensor
<
T
,
2
>::
From
(
*
box_delta
);
T
ex_w
,
ex_h
,
ex_ctr_x
,
ex_ctr_y
,
gt_w
,
gt_h
,
gt_ctr_x
,
gt_ctr_y
;
for
(
int64_t
i
=
0
;
i
<
box_num
;
++
i
)
{
ex_w
=
ex_boxes_et
(
i
,
2
)
-
ex_boxes_et
(
i
,
0
)
+
(
normalized
==
false
);
ex_h
=
ex_boxes_et
(
i
,
3
)
-
ex_boxes_et
(
i
,
1
)
+
(
normalized
==
false
);
ex_ctr_x
=
ex_boxes_et
(
i
,
0
)
+
0.5
*
ex_w
;
ex_ctr_y
=
ex_boxes_et
(
i
,
1
)
+
0.5
*
ex_h
;
gt_w
=
gt_boxes_et
(
i
,
2
)
-
gt_boxes_et
(
i
,
0
)
+
(
normalized
==
false
);
gt_h
=
gt_boxes_et
(
i
,
3
)
-
gt_boxes_et
(
i
,
1
)
+
(
normalized
==
false
);
gt_ctr_x
=
gt_boxes_et
(
i
,
0
)
+
0.5
*
gt_w
;
gt_ctr_y
=
gt_boxes_et
(
i
,
1
)
+
0.5
*
gt_h
;
trg
(
i
,
0
)
=
(
gt_ctr_x
-
ex_ctr_x
)
/
ex_w
;
trg
(
i
,
1
)
=
(
gt_ctr_y
-
ex_ctr_y
)
/
ex_h
;
trg
(
i
,
2
)
=
std
::
log
(
gt_w
/
ex_w
);
trg
(
i
,
3
)
=
std
::
log
(
gt_h
/
ex_h
);
if
(
weights
)
{
trg
(
i
,
0
)
=
trg
(
i
,
0
)
/
weights
[
0
];
trg
(
i
,
1
)
=
trg
(
i
,
1
)
/
weights
[
1
];
trg
(
i
,
2
)
=
trg
(
i
,
2
)
/
weights
[
2
];
trg
(
i
,
3
)
=
trg
(
i
,
3
)
/
weights
[
3
];
}
}
}
template
<
typename
T
>
void
Gather
(
const
T
*
in
,
const
int
in_stride
,
const
int
*
index
,
const
int
num
,
T
*
out
)
{
const
int
stride_bytes
=
in_stride
*
sizeof
(
T
);
for
(
int
i
=
0
;
i
<
num
;
++
i
)
{
int
id
=
index
[
i
];
memcpy
(
out
+
i
*
in_stride
,
in
+
id
*
in_stride
,
stride_bytes
);
}
}
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/detection/generate_proposal_labels_op.cc
浏览文件 @
12b483c0
...
@@ -14,6 +14,7 @@ limitations under the License. */
...
@@ -14,6 +14,7 @@ limitations under the License. */
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/detection/bbox_util.h"
#include "paddle/fluid/operators/gather.h"
#include "paddle/fluid/operators/gather.h"
#include "paddle/fluid/operators/math/concat.h"
#include "paddle/fluid/operators/math/concat.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/math_function.h"
...
@@ -133,31 +134,6 @@ void BboxOverlaps(const Tensor& r_boxes, const Tensor& c_boxes,
...
@@ -133,31 +134,6 @@ void BboxOverlaps(const Tensor& r_boxes, const Tensor& c_boxes,
}
}
}
}
template
<
typename
T
>
void
BoxToDelta
(
int
box_num
,
const
Tensor
&
ex_boxes
,
const
Tensor
&
gt_boxes
,
const
std
::
vector
<
float
>&
weights
,
Tensor
*
box_delta
)
{
auto
ex_boxes_et
=
framework
::
EigenTensor
<
T
,
2
>::
From
(
ex_boxes
);
auto
gt_boxes_et
=
framework
::
EigenTensor
<
T
,
2
>::
From
(
gt_boxes
);
auto
box_delta_et
=
framework
::
EigenTensor
<
T
,
2
>::
From
(
*
box_delta
);
T
ex_w
,
ex_h
,
ex_ctr_x
,
ex_ctr_y
,
gt_w
,
gt_h
,
gt_ctr_x
,
gt_ctr_y
;
for
(
int64_t
i
=
0
;
i
<
box_num
;
++
i
)
{
ex_w
=
ex_boxes_et
(
i
,
2
)
-
ex_boxes_et
(
i
,
0
)
+
1
;
ex_h
=
ex_boxes_et
(
i
,
3
)
-
ex_boxes_et
(
i
,
1
)
+
1
;
ex_ctr_x
=
ex_boxes_et
(
i
,
0
)
+
0.5
*
ex_w
;
ex_ctr_y
=
ex_boxes_et
(
i
,
1
)
+
0.5
*
ex_h
;
gt_w
=
gt_boxes_et
(
i
,
2
)
-
gt_boxes_et
(
i
,
0
)
+
1
;
gt_h
=
gt_boxes_et
(
i
,
3
)
-
gt_boxes_et
(
i
,
1
)
+
1
;
gt_ctr_x
=
gt_boxes_et
(
i
,
0
)
+
0.5
*
gt_w
;
gt_ctr_y
=
gt_boxes_et
(
i
,
1
)
+
0.5
*
gt_h
;
box_delta_et
(
i
,
0
)
=
(
gt_ctr_x
-
ex_ctr_x
)
/
ex_w
/
weights
[
0
];
box_delta_et
(
i
,
1
)
=
(
gt_ctr_y
-
ex_ctr_y
)
/
ex_h
/
weights
[
1
];
box_delta_et
(
i
,
2
)
=
log
(
gt_w
/
ex_w
)
/
ex_w
/
weights
[
2
];
box_delta_et
(
i
,
3
)
=
log
(
gt_h
/
ex_h
)
/
ex_h
/
weights
[
3
];
}
}
template
<
typename
T
>
template
<
typename
T
>
std
::
vector
<
std
::
vector
<
int
>>
SampleFgBgGt
(
std
::
vector
<
std
::
vector
<
int
>>
SampleFgBgGt
(
const
platform
::
CPUDeviceContext
&
context
,
Tensor
*
iou
,
const
platform
::
CPUDeviceContext
&
context
,
Tensor
*
iou
,
...
@@ -243,12 +219,11 @@ void GatherBoxesLabels(const platform::CPUDeviceContext& context,
...
@@ -243,12 +219,11 @@ void GatherBoxesLabels(const platform::CPUDeviceContext& context,
Tensor
*
sampled_labels
,
Tensor
*
sampled_gts
)
{
Tensor
*
sampled_labels
,
Tensor
*
sampled_gts
)
{
int
fg_num
=
fg_inds
.
size
();
int
fg_num
=
fg_inds
.
size
();
int
bg_num
=
bg_inds
.
size
();
int
bg_num
=
bg_inds
.
size
();
int
gt_num
=
fg_num
+
bg_num
;
Tensor
fg_inds_t
,
bg_inds_t
,
gt_box_inds_t
,
gt_label_inds_t
;
Tensor
fg_inds_t
,
bg_inds_t
,
gt_box_inds_t
,
gt_label_inds_t
;
int
*
fg_inds_data
=
fg_inds_t
.
mutable_data
<
int
>
({
fg_num
},
context
.
GetPlace
());
int
*
fg_inds_data
=
fg_inds_t
.
mutable_data
<
int
>
({
fg_num
},
context
.
GetPlace
());
int
*
bg_inds_data
=
bg_inds_t
.
mutable_data
<
int
>
({
bg_num
},
context
.
GetPlace
());
int
*
bg_inds_data
=
bg_inds_t
.
mutable_data
<
int
>
({
bg_num
},
context
.
GetPlace
());
int
*
gt_box_inds_data
=
int
*
gt_box_inds_data
=
gt_box_inds_t
.
mutable_data
<
int
>
({
gt
_num
},
context
.
GetPlace
());
gt_box_inds_t
.
mutable_data
<
int
>
({
fg
_num
},
context
.
GetPlace
());
int
*
gt_label_inds_data
=
int
*
gt_label_inds_data
=
gt_label_inds_t
.
mutable_data
<
int
>
({
fg_num
},
context
.
GetPlace
());
gt_label_inds_t
.
mutable_data
<
int
>
({
fg_num
},
context
.
GetPlace
());
std
::
copy
(
fg_inds
.
begin
(),
fg_inds
.
end
(),
fg_inds_data
);
std
::
copy
(
fg_inds
.
begin
(),
fg_inds
.
end
(),
fg_inds_data
);
...
@@ -303,18 +278,20 @@ std::vector<Tensor> SampleRoisForOneImage(
...
@@ -303,18 +278,20 @@ std::vector<Tensor> SampleRoisForOneImage(
// Gather boxes and labels
// Gather boxes and labels
Tensor
sampled_boxes
,
sampled_labels
,
sampled_gts
;
Tensor
sampled_boxes
,
sampled_labels
,
sampled_gts
;
int
boxes_num
=
fg_inds
.
size
()
+
bg_inds
.
size
();
int
fg_num
=
fg_inds
.
size
();
int
bg_num
=
bg_inds
.
size
();
int
boxes_num
=
fg_num
+
bg_num
;
framework
::
DDim
bbox_dim
({
boxes_num
,
kBoxDim
});
framework
::
DDim
bbox_dim
({
boxes_num
,
kBoxDim
});
sampled_boxes
.
mutable_data
<
T
>
(
bbox_dim
,
context
.
GetPlace
());
sampled_boxes
.
mutable_data
<
T
>
(
bbox_dim
,
context
.
GetPlace
());
sampled_labels
.
mutable_data
<
int
>
({
boxes_num
},
context
.
GetPlace
());
sampled_labels
.
mutable_data
<
int
>
({
boxes_num
},
context
.
GetPlace
());
sampled_gts
.
mutable_data
<
T
>
(
bbox_dim
,
context
.
GetPlace
());
sampled_gts
.
mutable_data
<
T
>
(
{
fg_num
,
kBoxDim
}
,
context
.
GetPlace
());
GatherBoxesLabels
<
T
>
(
context
,
boxes
,
*
gt_boxes
,
*
gt_classes
,
fg_inds
,
bg_inds
,
GatherBoxesLabels
<
T
>
(
context
,
boxes
,
*
gt_boxes
,
*
gt_classes
,
fg_inds
,
bg_inds
,
gt_inds
,
&
sampled_boxes
,
&
sampled_labels
,
&
sampled_gts
);
gt_inds
,
&
sampled_boxes
,
&
sampled_labels
,
&
sampled_gts
);
// Compute targets
// Compute targets
Tensor
bbox_targets_single
;
Tensor
bbox_targets_single
;
bbox_targets_single
.
mutable_data
<
T
>
(
bbox_dim
,
context
.
GetPlace
());
bbox_targets_single
.
mutable_data
<
T
>
(
bbox_dim
,
context
.
GetPlace
());
BoxToDelta
<
T
>
(
boxes_num
,
sampled_boxes
,
sampled_gts
,
bbox_reg_weights
,
BoxToDelta
<
T
>
(
fg_num
,
sampled_boxes
,
sampled_gts
,
nullptr
,
false
,
&
bbox_targets_single
);
&
bbox_targets_single
);
// Scale rois
// Scale rois
...
@@ -427,7 +404,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
...
@@ -427,7 +404,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
auto
rpn_rois_lod
=
rpn_rois
->
lod
().
back
();
auto
rpn_rois_lod
=
rpn_rois
->
lod
().
back
();
auto
gt_classes_lod
=
gt_classes
->
lod
().
back
();
auto
gt_classes_lod
=
gt_classes
->
lod
().
back
();
auto
gt_boxes_lod
=
gt_boxes
->
lod
().
back
();
auto
gt_boxes_lod
=
gt_boxes
->
lod
().
back
();
for
(
size_
t
i
=
0
;
i
<
n
;
++
i
)
{
for
(
in
t
i
=
0
;
i
<
n
;
++
i
)
{
Tensor
rpn_rois_slice
=
Tensor
rpn_rois_slice
=
rpn_rois
->
Slice
(
rpn_rois_lod
[
i
],
rpn_rois_lod
[
i
+
1
]);
rpn_rois
->
Slice
(
rpn_rois_lod
[
i
],
rpn_rois_lod
[
i
+
1
]);
Tensor
gt_classes_slice
=
Tensor
gt_classes_slice
=
...
...
paddle/fluid/operators/detection/generate_proposals_op.cc
浏览文件 @
12b483c0
...
@@ -311,8 +311,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
...
@@ -311,8 +311,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
rpn_rois
->
mutable_data
<
T
>
({
bbox_deltas
->
numel
()
/
4
,
4
},
rpn_rois
->
mutable_data
<
T
>
({
bbox_deltas
->
numel
()
/
4
,
4
},
context
.
GetPlace
());
context
.
GetPlace
());
rpn_roi_probs
->
mutable_data
<
T
>
({
scores
->
numel
()
/
4
,
1
},
rpn_roi_probs
->
mutable_data
<
T
>
({
scores
->
numel
(),
1
},
context
.
GetPlace
());
context
.
GetPlace
());
Tensor
bbox_deltas_swap
,
scores_swap
;
Tensor
bbox_deltas_swap
,
scores_swap
;
bbox_deltas_swap
.
mutable_data
<
T
>
({
num
,
h_bbox
,
w_bbox
,
c_bbox
},
bbox_deltas_swap
.
mutable_data
<
T
>
({
num
,
h_bbox
,
w_bbox
,
c_bbox
},
...
@@ -421,7 +420,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
...
@@ -421,7 +420,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
CPUGather
<
T
>
(
ctx
,
proposals
,
keep
,
&
bbox_sel
);
CPUGather
<
T
>
(
ctx
,
proposals
,
keep
,
&
bbox_sel
);
CPUGather
<
T
>
(
ctx
,
scores_sel
,
keep
,
&
scores_filter
);
CPUGather
<
T
>
(
ctx
,
scores_sel
,
keep
,
&
scores_filter
);
if
(
nms_thresh
<=
0
)
{
if
(
nms_thresh
<=
0
)
{
return
std
::
make_pair
(
bbox_sel
,
scores_
sel
);
return
std
::
make_pair
(
bbox_sel
,
scores_
filter
);
}
}
Tensor
keep_nms
=
NMS
<
T
>
(
ctx
,
&
bbox_sel
,
&
scores_filter
,
nms_thresh
,
eta
);
Tensor
keep_nms
=
NMS
<
T
>
(
ctx
,
&
bbox_sel
,
&
scores_filter
,
nms_thresh
,
eta
);
...
...
paddle/fluid/operators/detection/rpn_target_assign_op.cc
浏览文件 @
12b483c0
...
@@ -14,6 +14,7 @@ limitations under the License. */
...
@@ -14,6 +14,7 @@ limitations under the License. */
#include <random>
#include <random>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/detection/bbox_util.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/math_function.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -46,156 +47,219 @@ class RpnTargetAssignOp : public framework::OperatorWithKernel {
...
@@ -46,156 +47,219 @@ class RpnTargetAssignOp : public framework::OperatorWithKernel {
auto
in_dims
=
ctx
->
GetInputDim
(
"DistMat"
);
auto
in_dims
=
ctx
->
GetInputDim
(
"DistMat"
);
PADDLE_ENFORCE_EQ
(
in_dims
.
size
(),
2
,
PADDLE_ENFORCE_EQ
(
in_dims
.
size
(),
2
,
"The rank of Input(DistMat) must be 2."
);
"The rank of Input(DistMat) must be 2."
);
ctx
->
SetOutputDim
(
"LocationIndex"
,
{
-
1
});
ctx
->
SetOutputDim
(
"ScoreIndex"
,
{
-
1
});
ctx
->
SetOutputDim
(
"TargetLabel"
,
{
-
1
,
1
});
ctx
->
SetOutputDim
(
"TargetBBox"
,
{
-
1
,
4
});
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"DistMat"
)
->
type
()),
platform
::
CPUPlace
());
}
}
};
};
template
<
typename
T
>
template
<
typename
T
>
class
RpnTargetAssignKernel
:
public
framework
::
OpKernel
<
T
>
{
class
RpnTargetAssignKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
anchor_t
=
context
.
Input
<
Tensor
>
(
"Anchor"
);
// (H*W*A) * 4
auto
*
gt_bbox_t
=
context
.
Input
<
Tensor
>
(
"GtBox"
);
auto
*
dist_t
=
context
.
Input
<
LoDTensor
>
(
"DistMat"
);
auto
*
loc_index_t
=
context
.
Output
<
Tensor
>
(
"LocationIndex"
);
auto
*
score_index_t
=
context
.
Output
<
Tensor
>
(
"ScoreIndex"
);
auto
*
tgt_bbox_t
=
context
.
Output
<
Tensor
>
(
"TargetBBox"
);
auto
*
tgt_lbl_t
=
context
.
Output
<
Tensor
>
(
"TargetLabel"
);
auto
lod
=
dist_t
->
lod
().
back
();
int64_t
batch_num
=
static_cast
<
int64_t
>
(
lod
.
size
()
-
1
);
int64_t
anchor_num
=
dist_t
->
dims
()[
1
];
PADDLE_ENFORCE_EQ
(
anchor_num
,
anchor_t
->
dims
()[
0
]);
int
rpn_batch_size
=
context
.
Attr
<
int
>
(
"rpn_batch_size_per_im"
);
float
pos_threshold
=
context
.
Attr
<
float
>
(
"rpn_positive_overlap"
);
float
neg_threshold
=
context
.
Attr
<
float
>
(
"rpn_negative_overlap"
);
float
fg_fraction
=
context
.
Attr
<
float
>
(
"fg_fraction"
);
int
fg_num_per_batch
=
static_cast
<
int
>
(
rpn_batch_size
*
fg_fraction
);
int64_t
max_num
=
batch_num
*
anchor_num
;
auto
place
=
context
.
GetPlace
();
tgt_bbox_t
->
mutable_data
<
T
>
({
max_num
,
4
},
place
);
auto
*
loc_index
=
loc_index_t
->
mutable_data
<
int
>
({
max_num
},
place
);
auto
*
score_index
=
score_index_t
->
mutable_data
<
int
>
({
max_num
},
place
);
Tensor
tmp_tgt_lbl
;
auto
*
tmp_lbl_data
=
tmp_tgt_lbl
.
mutable_data
<
int64_t
>
({
max_num
},
place
);
auto
&
dev_ctx
=
context
.
device_context
<
platform
::
CPUDeviceContext
>
();
math
::
SetConstant
<
platform
::
CPUDeviceContext
,
int64_t
>
iset
;
iset
(
dev_ctx
,
&
tmp_tgt_lbl
,
static_cast
<
int64_t
>
(
-
1
));
std
::
random_device
rnd
;
std
::
minstd_rand
engine
;
int
seed
=
context
.
Attr
<
bool
>
(
"fix_seed"
)
?
context
.
Attr
<
int
>
(
"seed"
)
:
rnd
();
engine
.
seed
(
seed
);
int
fg_num
=
0
;
int
bg_num
=
0
;
for
(
int
i
=
0
;
i
<
batch_num
;
++
i
)
{
Tensor
dist
=
dist_t
->
Slice
(
lod
[
i
],
lod
[
i
+
1
]);
Tensor
gt_bbox
=
gt_bbox_t
->
Slice
(
lod
[
i
],
lod
[
i
+
1
]);
auto
fg_bg_gt
=
SampleFgBgGt
(
dev_ctx
,
dist
,
pos_threshold
,
neg_threshold
,
rpn_batch_size
,
fg_num_per_batch
,
engine
,
tmp_lbl_data
+
i
*
anchor_num
);
int
cur_fg_num
=
fg_bg_gt
[
0
].
size
();
int
cur_bg_num
=
fg_bg_gt
[
1
].
size
();
std
::
transform
(
fg_bg_gt
[
0
].
begin
(),
fg_bg_gt
[
0
].
end
(),
loc_index
,
[
i
,
anchor_num
](
int
d
)
{
return
d
+
i
*
anchor_num
;
});
memcpy
(
score_index
,
loc_index
,
cur_fg_num
*
sizeof
(
int
));
std
::
transform
(
fg_bg_gt
[
1
].
begin
(),
fg_bg_gt
[
1
].
end
(),
score_index
+
cur_fg_num
,
[
i
,
anchor_num
](
int
d
)
{
return
d
+
i
*
anchor_num
;
});
// get target bbox deltas
if
(
cur_fg_num
)
{
Tensor
fg_gt
;
T
*
gt_data
=
fg_gt
.
mutable_data
<
T
>
({
cur_fg_num
,
4
},
place
);
Tensor
tgt_bbox
=
tgt_bbox_t
->
Slice
(
fg_num
,
fg_num
+
cur_fg_num
);
T
*
tgt_data
=
tgt_bbox
.
data
<
T
>
();
Gather
<
T
>
(
anchor_t
->
data
<
T
>
(),
4
,
reinterpret_cast
<
int
*>
(
&
fg_bg_gt
[
0
][
0
]),
cur_fg_num
,
tgt_data
);
Gather
<
T
>
(
gt_bbox
.
data
<
T
>
(),
4
,
reinterpret_cast
<
int
*>
(
&
fg_bg_gt
[
2
][
0
]),
cur_fg_num
,
gt_data
);
BoxToDelta
<
T
>
(
cur_fg_num
,
tgt_bbox
,
fg_gt
,
nullptr
,
false
,
&
tgt_bbox
);
}
loc_index
+=
cur_fg_num
;
score_index
+=
cur_fg_num
+
cur_bg_num
;
fg_num
+=
cur_fg_num
;
bg_num
+=
cur_bg_num
;
}
int
lbl_num
=
fg_num
+
bg_num
;
PADDLE_ENFORCE_LE
(
fg_num
,
max_num
);
PADDLE_ENFORCE_LE
(
lbl_num
,
max_num
);
tgt_bbox_t
->
Resize
({
fg_num
,
4
});
loc_index_t
->
Resize
({
fg_num
});
score_index_t
->
Resize
({
lbl_num
});
auto
*
lbl_data
=
tgt_lbl_t
->
mutable_data
<
int64_t
>
({
lbl_num
,
1
},
place
);
Gather
<
int64_t
>
(
tmp_lbl_data
,
1
,
score_index_t
->
data
<
int
>
(),
lbl_num
,
lbl_data
);
}
private:
void
ScoreAssign
(
const
T
*
dist_data
,
const
Tensor
&
anchor_to_gt_max
,
void
ScoreAssign
(
const
T
*
dist_data
,
const
Tensor
&
anchor_to_gt_max
,
const
int
row
,
const
int
col
,
const
float
pos_threshold
,
const
int
row
,
const
int
col
,
const
float
pos_threshold
,
const
float
neg_threshold
,
int64_t
*
target_label
_data
,
const
float
neg_threshold
,
int64_t
*
target_label
,
std
::
vector
<
int
>*
fg_inds
,
std
::
vector
<
int
>*
bg_inds
)
const
{
std
::
vector
<
int
>*
fg_inds
,
std
::
vector
<
int
>*
bg_inds
)
const
{
int
fg_offset
=
fg_inds
->
size
();
float
epsilon
=
0.0001
;
int
bg_offset
=
bg_inds
->
size
();
for
(
int64_t
i
=
0
;
i
<
row
;
++
i
)
{
for
(
int64_t
i
=
0
;
i
<
row
;
++
i
)
{
const
T
*
v
=
dist_data
+
i
*
col
;
const
T
*
v
=
dist_data
+
i
*
col
;
T
max
_dist
=
*
std
::
max_element
(
v
,
v
+
col
);
T
max
=
*
std
::
max_element
(
v
,
v
+
col
);
for
(
int64_t
j
=
0
;
j
<
col
;
++
j
)
{
for
(
int64_t
j
=
0
;
j
<
col
;
++
j
)
{
T
val
=
dist_data
[
i
*
col
+
j
];
if
(
std
::
abs
(
max
-
v
[
j
])
<
epsilon
)
{
if
(
val
==
max_dist
)
target_label_data
[
j
]
=
1
;
target_label
[
j
]
=
1
;
}
}
}
}
}
// Pick the fg/bg and count the number
// Pick the fg/bg
const
T
*
anchor_to_gt_max_data
=
anchor_to_gt_max
.
data
<
T
>
();
for
(
int64_t
j
=
0
;
j
<
col
;
++
j
)
{
for
(
int64_t
j
=
0
;
j
<
col
;
++
j
)
{
if
(
anchor_to_gt_max
.
data
<
T
>
()[
j
]
>
pos_threshold
)
{
if
(
anchor_to_gt_max
_data
[
j
]
>=
pos_threshold
)
{
target_label
_data
[
j
]
=
1
;
target_label
[
j
]
=
1
;
}
else
if
(
anchor_to_gt_max
.
data
<
T
>
()
[
j
]
<
neg_threshold
)
{
}
else
if
(
anchor_to_gt_max
_data
[
j
]
<
neg_threshold
)
{
target_label
_data
[
j
]
=
0
;
target_label
[
j
]
=
0
;
}
}
if
(
target_label
_data
[
j
]
==
1
)
{
if
(
target_label
[
j
]
==
1
)
{
fg_inds
->
push_back
(
fg_offset
+
j
);
fg_inds
->
push_back
(
j
);
}
else
if
(
target_label
_data
[
j
]
==
0
)
{
}
else
if
(
target_label
[
j
]
==
0
)
{
bg_inds
->
push_back
(
bg_offset
+
j
);
bg_inds
->
push_back
(
j
);
}
}
}
}
}
}
void
ReservoirSampling
(
const
int
num
,
const
int
offset
,
void
ReservoirSampling
(
const
int
num
,
std
::
minstd_rand
engine
,
std
::
minstd_rand
engine
,
std
::
vector
<
int
>*
inds
)
const
{
std
::
vector
<
int
>*
inds
)
const
{
std
::
uniform_real_distribution
<
float
>
uniform
(
0
,
1
);
std
::
uniform_real_distribution
<
float
>
uniform
(
0
,
1
);
const
int64_t
size
=
static_cast
<
int64_t
>
(
inds
->
size
()
-
offset
);
size_t
len
=
inds
->
size
(
);
if
(
size
>
num
)
{
if
(
len
>
static_cast
<
size_t
>
(
num
)
)
{
for
(
int64_t
i
=
num
;
i
<
size
;
++
i
)
{
for
(
size_t
i
=
num
;
i
<
len
;
++
i
)
{
int
rng_ind
=
std
::
floor
(
uniform
(
engine
)
*
i
);
int
rng_ind
=
std
::
floor
(
uniform
(
engine
)
*
i
);
if
(
rng_ind
<
num
)
if
(
rng_ind
<
num
)
std
::
iter_swap
(
inds
->
begin
()
+
rng_ind
+
offset
,
std
::
iter_swap
(
inds
->
begin
()
+
rng_ind
,
inds
->
begin
()
+
i
);
inds
->
begin
()
+
i
+
offset
);
}
}
inds
->
resize
(
num
);
}
}
}
}
void
RpnTargetAssign
(
const
framework
::
ExecutionContext
&
ctx
,
// std::vector<std::vector<int>> RpnTargetAssign(
const
Tensor
&
dist
,
const
float
pos_threshold
,
std
::
vector
<
std
::
vector
<
int
>>
SampleFgBgGt
(
const
float
neg_threshold
,
const
int
rpn_batch_size
,
const
platform
::
CPUDeviceContext
&
ctx
,
const
Tensor
&
dist
,
const
int
fg_num
,
std
::
minstd_rand
engine
,
const
float
pos_threshold
,
const
float
neg_threshold
,
std
::
vector
<
int
>*
fg_inds
,
std
::
vector
<
int
>*
bg_inds
,
const
int
rpn_batch_size
,
const
int
fg_num
,
std
::
minstd_rand
engine
,
int64_t
*
target_label_data
)
const
{
int64_t
*
target_label
)
const
{
auto
*
dist_data
=
dist
.
data
<
T
>
();
auto
*
dist_data
=
dist
.
data
<
T
>
();
int64_t
row
=
dist
.
dims
()[
0
];
int
row
=
dist
.
dims
()[
0
];
int64_t
col
=
dist
.
dims
()[
1
];
int
col
=
dist
.
dims
()[
1
];
int
fg_offset
=
fg_inds
->
size
();
int
bg_offset
=
bg_inds
->
size
();
std
::
vector
<
int
>
fg_inds
;
std
::
vector
<
int
>
bg_inds
;
std
::
vector
<
int
>
gt_inds
;
// Calculate the max IoU between anchors and gt boxes
// Calculate the max IoU between anchors and gt boxes
Tensor
anchor_to_gt_max
;
// Map from anchor to gt box that has highest overlap
anchor_to_gt_max
.
mutable_data
<
T
>
(
auto
place
=
ctx
.
GetPlace
();
framework
::
make_ddim
({
static_cast
<
int64_t
>
(
col
),
1
}),
Tensor
anchor_to_gt_max
,
anchor_to_gt_argmax
;
platform
::
CPUPlace
());
anchor_to_gt_max
.
mutable_data
<
T
>
({
col
},
place
);
auto
&
place
=
*
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>()
int
*
argmax
=
anchor_to_gt_argmax
.
mutable_data
<
int
>
({
col
},
place
);
.
eigen_device
();
auto
x
=
EigenMatrix
<
T
>::
From
(
dist
);
auto
x
=
framework
::
EigenMatrix
<
T
>::
From
(
dist
);
auto
x_col_max
=
EigenMatrix
<
T
>::
From
(
anchor_to_gt_max
);
auto
x_col_max
=
framework
::
EigenVector
<
T
>::
Flatten
(
anchor_to_gt_max
);
x_col_max
.
device
(
place
)
=
auto
x_col_argmax
=
x
.
maximum
(
Eigen
::
DSizes
<
int
,
1
>
(
0
))
framework
::
EigenVector
<
int
>::
Flatten
(
anchor_to_gt_argmax
);
.
reshape
(
Eigen
::
DSizes
<
int
,
2
>
(
static_cast
<
int64_t
>
(
col
),
1
));
x_col_max
=
x
.
maximum
(
Eigen
::
DSizes
<
int
,
1
>
(
0
));
x_col_argmax
=
x
.
argmax
(
0
).
template
cast
<
int
>();
// Follow the Faster RCNN's implementation
// Follow the Faster RCNN's implementation
ScoreAssign
(
dist_data
,
anchor_to_gt_max
,
row
,
col
,
pos_threshold
,
ScoreAssign
(
dist_data
,
anchor_to_gt_max
,
row
,
col
,
pos_threshold
,
neg_threshold
,
target_label
_data
,
fg_inds
,
bg_inds
);
neg_threshold
,
target_label
,
&
fg_inds
,
&
bg_inds
);
// Reservoir Sampling
// Reservoir Sampling
ReservoirSampling
(
fg_num
,
fg_offset
,
engine
,
fg_inds
);
ReservoirSampling
(
fg_num
,
engine
,
&
fg_inds
);
int
bg_num
=
rpn_batch_size
-
(
fg_inds
->
size
()
-
fg_offset
);
int
fg_num2
=
static_cast
<
int
>
(
fg_inds
.
size
()
);
ReservoirSampling
(
bg_num
,
bg_offset
,
engine
,
bg_inds
)
;
int
bg_num
=
rpn_batch_size
-
fg_num2
;
}
ReservoirSampling
(
bg_num
,
engine
,
&
bg_inds
);
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
gt_inds
.
reserve
(
fg_num2
);
auto
*
dist
=
context
.
Input
<
LoDTensor
>
(
"DistMat"
);
for
(
int
i
=
0
;
i
<
fg_num2
;
++
i
)
{
auto
*
loc_index
=
context
.
Output
<
Tensor
>
(
"LocationIndex"
);
gt_inds
.
emplace_back
(
argmax
[
fg_inds
[
i
]]);
auto
*
score_index
=
context
.
Output
<
Tensor
>
(
"ScoreIndex"
);
auto
*
tgt_lbl
=
context
.
Output
<
Tensor
>
(
"TargetLabel"
);
auto
col
=
dist
->
dims
()[
1
];
int64_t
n
=
dist
->
lod
().
size
()
==
0UL
?
1
:
static_cast
<
int64_t
>
(
dist
->
lod
().
back
().
size
()
-
1
);
if
(
dist
->
lod
().
size
())
{
PADDLE_ENFORCE_EQ
(
dist
->
lod
().
size
(),
1UL
,
"Only support 1 level of LoD."
);
}
}
int
rpn_batch_size
=
context
.
Attr
<
int
>
(
"rpn_batch_size_per_im"
);
std
::
vector
<
std
::
vector
<
int
>>
fg_bg_gt
;
float
pos_threshold
=
context
.
Attr
<
float
>
(
"rpn_positive_overlap"
);
fg_bg_gt
.
emplace_back
(
fg_inds
);
float
neg_threshold
=
context
.
Attr
<
float
>
(
"rpn_negative_overlap"
);
fg_bg_gt
.
emplace_back
(
bg_inds
);
float
fg_fraction
=
context
.
Attr
<
float
>
(
"fg_fraction"
);
fg_bg_gt
.
emplace_back
(
gt_inds
);
int
fg_num
=
static_cast
<
int
>
(
rpn_batch_size
*
fg_fraction
);
int64_t
*
target_label_data
=
tgt_lbl
->
mutable_data
<
int64_t
>
({
n
*
col
,
1
},
context
.
GetPlace
());
auto
&
dev_ctx
=
context
.
device_context
<
platform
::
CPUDeviceContext
>
();
return
fg_bg_gt
;
math
::
SetConstant
<
platform
::
CPUDeviceContext
,
int64_t
>
iset
;
iset
(
dev_ctx
,
tgt_lbl
,
static_cast
<
int
>
(
-
1
));
std
::
vector
<
int
>
fg_inds
;
std
::
vector
<
int
>
bg_inds
;
std
::
random_device
rnd
;
std
::
minstd_rand
engine
;
int
seed
=
context
.
Attr
<
bool
>
(
"fix_seed"
)
?
context
.
Attr
<
int
>
(
"seed"
)
:
rnd
();
engine
.
seed
(
seed
);
if
(
n
==
1
)
{
RpnTargetAssign
(
context
,
*
dist
,
pos_threshold
,
neg_threshold
,
rpn_batch_size
,
fg_num
,
engine
,
&
fg_inds
,
&
bg_inds
,
target_label_data
);
}
else
{
auto
lod
=
dist
->
lod
().
back
();
for
(
size_t
i
=
0
;
i
<
lod
.
size
()
-
1
;
++
i
)
{
Tensor
one_ins
=
dist
->
Slice
(
lod
[
i
],
lod
[
i
+
1
]);
RpnTargetAssign
(
context
,
one_ins
,
pos_threshold
,
neg_threshold
,
rpn_batch_size
,
fg_num
,
engine
,
&
fg_inds
,
&
bg_inds
,
target_label_data
+
i
*
col
);
}
}
int
*
loc_index_data
=
loc_index
->
mutable_data
<
int
>
(
{
static_cast
<
int
>
(
fg_inds
.
size
())},
context
.
GetPlace
());
int
*
score_index_data
=
score_index
->
mutable_data
<
int
>
(
{
static_cast
<
int
>
(
fg_inds
.
size
()
+
bg_inds
.
size
())},
context
.
GetPlace
());
memcpy
(
loc_index_data
,
reinterpret_cast
<
int
*>
(
&
fg_inds
[
0
]),
fg_inds
.
size
()
*
sizeof
(
int
));
memcpy
(
score_index_data
,
reinterpret_cast
<
int
*>
(
&
fg_inds
[
0
]),
fg_inds
.
size
()
*
sizeof
(
int
));
memcpy
(
score_index_data
+
fg_inds
.
size
(),
reinterpret_cast
<
int
*>
(
&
bg_inds
[
0
]),
bg_inds
.
size
()
*
sizeof
(
int
));
}
}
};
};
class
RpnTargetAssignOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
class
RpnTargetAssignOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
public:
void
Make
()
override
{
void
Make
()
override
{
AddInput
(
"Anchor"
,
"(Tensor) input anchor is a 2-D Tensor with shape [H*W*A, 4]."
);
AddInput
(
"GtBox"
,
"(LoDTensor) input groud-truth bbox with shape [K, 4]."
);
AddInput
(
AddInput
(
"DistMat"
,
"DistMat"
,
"(LoDTensor or Tensor) this input is a 2-D LoDTensor with shape "
"(LoDTensor or Tensor) this input is a 2-D LoDTensor with shape "
...
@@ -241,12 +305,15 @@ class RpnTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -241,12 +305,15 @@ class RpnTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
"ScoreIndex"
,
"ScoreIndex"
,
"(Tensor), The indexes of foreground and background anchors in all "
"(Tensor), The indexes of foreground and background anchors in all "
"RPN anchors(The rest anchors are ignored). The shape of the "
"RPN anchors(The rest anchors are ignored). The shape of the "
"ScoreIndex is [F + B], F and B depend on the value of input "
"ScoreIndex is [F + B], F and B are sampled foreground and backgroud "
"tensor and attributes."
);
" number."
);
AddOutput
(
"TargetLabel"
,
AddOutput
(
"TargetBBox"
,
"(Tensor<int64_t>), The target labels of each anchor with shape "
"(Tensor<int64_t>), The target bbox deltas with shape "
"[K * M, 1], "
"[F, 4], F is the sampled foreground number."
);
"K and M is the same as they are in DistMat."
);
AddOutput
(
"TargetLabel"
,
"(Tensor<int64_t>), The target labels of each anchor with shape "
"[F + B, 1], F and B are sampled foreground and backgroud number."
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
This operator can be, for given the IoU between the ground truth bboxes and the
This operator can be, for given the IoU between the ground truth bboxes and the
anchors, to assign classification and regression targets to each prediction.
anchors, to assign classification and regression targets to each prediction.
...
...
paddle/fluid/operators/distributed/request_handler_impl.cc
浏览文件 @
12b483c0
...
@@ -39,19 +39,6 @@ bool RequestSendHandler::Handle(const std::string& varname,
...
@@ -39,19 +39,6 @@ bool RequestSendHandler::Handle(const std::string& varname,
const
std
::
string
&
out_var_name
)
{
const
std
::
string
&
out_var_name
)
{
VLOG
(
4
)
<<
"RequestSendHandler:"
<<
varname
;
VLOG
(
4
)
<<
"RequestSendHandler:"
<<
varname
;
// Async
if
(
!
sync_mode_
)
{
rpc_server_
->
Profiler
().
OneStep
();
try
{
executor_
->
RunPreparedContext
((
*
grad_to_prepared_ctx_
)[
varname
].
get
(),
scope
);
}
catch
(
std
::
exception
&
e
)
{
LOG
(
ERROR
)
<<
"async: run sub program error "
<<
e
.
what
();
return
false
;
}
return
true
;
}
// Sync
// Sync
if
(
varname
==
BATCH_BARRIER_MESSAGE
)
{
if
(
varname
==
BATCH_BARRIER_MESSAGE
)
{
VLOG
(
3
)
<<
"sync: recv BATCH_BARRIER_MESSAGE"
;
VLOG
(
3
)
<<
"sync: recv BATCH_BARRIER_MESSAGE"
;
...
@@ -60,17 +47,31 @@ bool RequestSendHandler::Handle(const std::string& varname,
...
@@ -60,17 +47,31 @@ bool RequestSendHandler::Handle(const std::string& varname,
VLOG
(
3
)
<<
"sync: recv complete message"
;
VLOG
(
3
)
<<
"sync: recv complete message"
;
rpc_server_
->
Complete
();
rpc_server_
->
Complete
();
}
else
{
}
else
{
VLOG
(
3
)
<<
"sync: received var_name: "
<<
varname
;
// Async
rpc_server_
->
WaitCond
(
kRequestSend
);
if
(
!
sync_mode_
)
{
VLOG
(
3
)
<<
"sync: processing received var: "
<<
varname
;
VLOG
(
3
)
<<
"async process var: "
<<
varname
;
rpc_server_
->
Profiler
().
OneStep
();
if
(
invar
==
nullptr
)
{
try
{
LOG
(
FATAL
)
<<
"sync: Can not find server side var: "
<<
varname
;
executor_
->
RunPreparedContext
((
*
grad_to_prepared_ctx_
)[
varname
].
get
(),
return
false
;
scope
);
}
}
catch
(
std
::
exception
&
e
)
{
if
(
invar
->
IsType
<
framework
::
SelectedRows
>
())
{
LOG
(
ERROR
)
<<
"async: run sub program error "
<<
e
.
what
();
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_sparse_vars_
);
return
false
;
sparse_vars_
.
push_back
(
invar
);
}
return
true
;
}
else
{
// sync
rpc_server_
->
WaitCond
(
kRequestSend
);
VLOG
(
3
)
<<
"sync: processing received var: "
<<
varname
;
if
(
invar
==
nullptr
)
{
LOG
(
FATAL
)
<<
"sync: Can not find server side var: "
<<
varname
;
return
false
;
}
if
(
invar
->
IsType
<
framework
::
SelectedRows
>
())
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_sparse_vars_
);
sparse_vars_
.
push_back
(
invar
);
}
}
}
}
}
return
true
;
return
true
;
...
...
paddle/fluid/operators/fusion_lstm_op.cc
浏览文件 @
12b483c0
...
@@ -89,12 +89,12 @@ void FusionLSTMOp::InferShape(framework::InferShapeContext* ctx) const {
...
@@ -89,12 +89,12 @@ void FusionLSTMOp::InferShape(framework::InferShapeContext* ctx) const {
PADDLE_ENFORCE_EQ
(
b_dims
[
0
],
1
,
PADDLE_ENFORCE_EQ
(
b_dims
[
0
],
1
,
"The first dimension of Input(Bias) should be 1."
);
"The first dimension of Input(Bias) should be 1."
);
PADDLE_ENFORCE
(
!
ctx
->
Attrs
().
Get
<
bool
>
(
"use_peepholes"
),
auto
use_peepholes
=
ctx
->
Attrs
().
Get
<
bool
>
(
"use_peepholes"
);
"Do not support peephole yet."
);
PADDLE_ENFORCE_EQ
(
b_dims
[
1
],
(
use_peepholes
?
7
:
4
)
*
frame_size
,
PADDLE_ENFORCE_EQ
(
b_dims
[
1
],
4
*
frame_size
,
"The second dimension of Input(Bias) should be "
"The second dimension of Input(Bias) should be "
"4 * %d if disable peepholes connection"
,
"7 * %d if enable peepholes connection or"
frame_size
);
"4 * %d if disable peepholes"
,
frame_size
,
frame_size
);
framework
::
DDim
out_dims
({
x_dims
[
0
],
frame_size
});
framework
::
DDim
out_dims
({
x_dims
[
0
],
frame_size
});
ctx
->
SetOutputDim
(
"Hidden"
,
out_dims
);
ctx
->
SetOutputDim
(
"Hidden"
,
out_dims
);
...
@@ -232,16 +232,17 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
...
@@ -232,16 +232,17 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
act_cand = act_functor(act_cand_str); \
act_cand = act_functor(act_cand_str); \
}
}
#define INIT_BASE_INPUT_OUTPUT \
#define INIT_BASE_INPUT_OUTPUT \
auto* x = ctx.Input<LoDTensor>("X"); \
auto* x = ctx.Input<LoDTensor>("X"); \
auto* h0 = ctx.Input<Tensor>("H0"); \
auto* h0 = ctx.Input<Tensor>("H0"); \
auto* c0 = ctx.Input<Tensor>("C0"); \
auto* c0 = ctx.Input<Tensor>("C0"); \
auto* wx = ctx.Input<Tensor>("WeightX"); \
auto* wx = ctx.Input<Tensor>("WeightX"); \
auto* wh = ctx.Input<Tensor>("WeightH"); \
auto* wh = ctx.Input<Tensor>("WeightH"); \
auto* bias = ctx.Input<Tensor>("Bias"); \
auto* bias = ctx.Input<Tensor>("Bias"); \
auto* xx = ctx.Output<LoDTensor>("XX"); \
auto* xx = ctx.Output<LoDTensor>("XX"); \
auto* hidden_out = ctx.Output<LoDTensor>("Hidden"); \
auto* hidden_out = ctx.Output<LoDTensor>("Hidden"); \
auto* cell_out = ctx.Output<LoDTensor>("Cell"); \
auto* cell_out = ctx.Output<LoDTensor>("Cell"); \
bool use_peepholes = ctx.Attr<bool>("use_peepholes"); \
bool is_reverse = ctx.Attr<bool>("is_reverse");
bool is_reverse = ctx.Attr<bool>("is_reverse");
#define INIT_BASE_SIZES \
#define INIT_BASE_SIZES \
...
@@ -266,12 +267,21 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
...
@@ -266,12 +267,21 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
const
T
*
x_data
=
x
->
data
<
T
>
();
const
T
*
x_data
=
x
->
data
<
T
>
();
const
T
*
h0_data
=
h0
?
h0
->
data
<
T
>
()
:
nullptr
;
const
T
*
h0_data
=
h0
?
h0
->
data
<
T
>
()
:
nullptr
;
const
T
*
c0_data
=
c0
?
c0
->
data
<
T
>
()
:
nullptr
;
const
T
*
c0_data
=
c0
?
c0
->
data
<
T
>
()
:
nullptr
;
const
T
*
bias_data
=
bias
->
data
<
T
>
();
const
T
*
wc_data
=
bias_data
+
D4
;
// w_ic, w_fc, w_oc
const
T
*
wx_data
=
wx
->
data
<
T
>
();
const
T
*
wx_data
=
wx
->
data
<
T
>
();
const
T
*
wh_data
=
wh
->
data
<
T
>
();
const
T
*
wh_data
=
wh
->
data
<
T
>
();
T
*
xx_data
=
xx
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
xx_data
=
xx
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
hidden_out_data
=
hidden_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
hidden_out_data
=
hidden_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
cell_out_data
=
cell_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
cell_out_data
=
cell_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
// use local variable
framework
::
DDim
check_dims
({
3
,
D
});
Tensor
checked_cell
;
// w_ic * Ct-1, w_fc * Ct-1, w_oc * Ct
auto
checked_cell_data
=
checked_cell
.
mutable_data
<
T
>
(
check_dims
,
ctx
.
GetPlace
());
auto
blas
=
math
::
GetBlas
<
DeviceContext
,
T
>
(
ctx
);
auto
blas
=
math
::
GetBlas
<
DeviceContext
,
T
>
(
ctx
);
math
::
FCCompute
<
DeviceContext
,
T
>
(
blas
,
total_T
,
D4
,
M
,
x_data
,
wx_data
,
math
::
FCCompute
<
DeviceContext
,
T
>
(
blas
,
total_T
,
D4
,
M
,
x_data
,
wx_data
,
xx_data
,
bias
->
data
<
T
>
());
xx_data
,
bias
->
data
<
T
>
());
...
@@ -297,46 +307,86 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
...
@@ -297,46 +307,86 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
int
seq_len
=
x_lod
[
0
][
bid
+
1
]
-
x_lod
[
0
][
bid
];
int
seq_len
=
x_lod
[
0
][
bid
+
1
]
-
x_lod
[
0
][
bid
];
const
T
*
prev_c_data
=
nullptr
;
const
T
*
prev_c_data
=
nullptr
;
const
T
*
prev_h_data
=
nullptr
;
const
T
*
prev_h_data
=
nullptr
;
int
tstart
=
0
;
int
tstart
=
0
;
if
(
h0_data
)
{
if
(
h0_data
)
{
prev_h_data
=
h0_data
+
bid
*
D
;
prev_h_data
=
h0_data
+
bid
*
D
;
prev_c_data
=
c0_data
+
bid
*
D
;
prev_c_data
=
c0_data
+
bid
*
D
;
}
else
{
}
else
{
// W_ch, W_ih, W_fh, W_oh
// If step == 0 and there is no initialized hidden state, that is to say
act_gate
(
D3
,
xx_data
+
D
,
xx_data
+
D
);
// the H0 is zeros. Then W_h * H_t-1 can be skipped
// ~C_t
act_cand
(
D
,
xx_data
,
xx_data
);
act_cand
(
D
,
xx_data
,
xx_data
);
// cell out= input*tilde
if
(
use_peepholes
)
{
// I_t, F_t
act_gate
(
D2
,
xx_data
+
D
,
xx_data
+
D
);
}
else
{
// I_t, F_t, O_t
act_gate
(
D3
,
xx_data
+
D
,
xx_data
+
D
);
}
// C_t = I_t * ~C_t
blas
.
VMUL
(
D
,
xx_data
,
xx_data
+
D
,
cell_out_data
);
blas
.
VMUL
(
D
,
xx_data
,
xx_data
+
D
,
cell_out_data
);
if
(
use_peepholes
)
{
// + W_oc * C_t for peephole connection
blas
.
VMUL
(
D
,
wc_data
+
D2
,
cell_out_data
,
checked_cell_data
+
D2
);
blas
.
VADD
(
D
,
xx_data
+
D3
,
checked_cell_data
+
D2
,
xx_data
+
D3
);
// O_t
act_gate
(
D
,
xx_data
+
D3
,
xx_data
+
D3
);
}
// hidden out= act_state(cellout) * outgate
// hidden out= act_state(cellout) * outgate
act_cell
(
D
,
cell_out_data
,
xx_data
+
D2
);
act_cell
(
D
,
cell_out_data
,
xx_data
+
D2
);
// H_t = O_t * act_state(C_t)
blas
.
VMUL
(
D
,
xx_data
+
D2
,
xx_data
+
D3
,
hidden_out_data
);
blas
.
VMUL
(
D
,
xx_data
+
D2
,
xx_data
+
D3
,
hidden_out_data
);
// prev
// prev
prev_h_data
=
hidden_out_data
;
prev_h_data
=
hidden_out_data
;
prev_c_data
=
cell_out_data
;
prev_c_data
=
cell_out_data
;
tstart
=
1
;
tstart
=
1
;
move_step
();
move_step
();
}
}
for
(
int
step
=
tstart
;
step
<
seq_len
;
++
step
)
{
for
(
int
step
=
tstart
;
step
<
seq_len
;
++
step
)
{
// + W_h * H_t-1
blas
.
GEMM
(
CblasNoTrans
,
CblasNoTrans
,
1
,
D4
,
D
,
static_cast
<
T
>
(
1
),
blas
.
GEMM
(
CblasNoTrans
,
CblasNoTrans
,
1
,
D4
,
D
,
static_cast
<
T
>
(
1
),
prev_h_data
,
D
,
wh_data
,
D4
,
static_cast
<
T
>
(
1
),
xx_data
,
D4
);
prev_h_data
,
D
,
wh_data
,
D4
,
static_cast
<
T
>
(
1
),
xx_data
,
D4
);
// W_ch, W_ih, W_fh, W_oh
// ~C_t
act_gate
(
D3
,
xx_data
+
D
,
xx_data
+
D
);
act_cand
(
D
,
xx_data
,
xx_data
);
act_cand
(
D
,
xx_data
,
xx_data
);
// a = forget * prev_cell
if
(
use_peepholes
)
{
// + W_ic|W_fc * C_t-1 for peephole connection
blas
.
VMUL
(
D
,
wc_data
,
prev_c_data
,
checked_cell_data
);
blas
.
VMUL
(
D
,
wc_data
+
D
,
prev_c_data
,
checked_cell_data
+
D
);
blas
.
VADD
(
D2
,
xx_data
+
D
,
checked_cell_data
,
xx_data
+
D
);
// I_t, F_t
act_gate
(
D2
,
xx_data
+
D
,
xx_data
+
D
);
}
else
{
// I_t, F_t, O_t
act_gate
(
D3
,
xx_data
+
D
,
xx_data
+
D
);
}
// F_t * C_t-1
blas
.
VMUL
(
D
,
xx_data
+
D2
,
prev_c_data
,
xx_data
+
D2
);
blas
.
VMUL
(
D
,
xx_data
+
D2
,
prev_c_data
,
xx_data
+
D2
);
// I_t * ~C_t
// b = input * tilde
blas
.
VMUL
(
D
,
xx_data
,
xx_data
+
D
,
xx_data
+
D
);
blas
.
VMUL
(
D
,
xx_data
,
xx_data
+
D
,
xx_data
+
D
);
// C_t = F_t * C_t-1 + I_t * ~C_t
// cell out= a+b
blas
.
VADD
(
D
,
xx_data
+
D
,
xx_data
+
D2
,
cell_out_data
);
blas
.
VADD
(
D
,
xx_data
+
D
,
xx_data
+
D2
,
cell_out_data
);
if
(
use_peepholes
)
{
// + W_oc * C_t for peephole connection
blas
.
VMUL
(
D
,
wc_data
+
D2
,
cell_out_data
,
checked_cell_data
+
D2
);
blas
.
VADD
(
D
,
xx_data
+
D3
,
checked_cell_data
+
D2
,
xx_data
+
D3
);
// O_t
act_gate
(
D
,
xx_data
+
D3
,
xx_data
+
D3
);
}
// hidden out= act_state(cellout) * outgate
// hidden out= act_state(cellout) * outgate
act_cell
(
D
,
cell_out_data
,
xx_data
+
D2
);
act_cell
(
D
,
cell_out_data
,
xx_data
+
D2
);
// H_t = O_t * act_state(C_t)
blas
.
VMUL
(
D
,
xx_data
+
D2
,
xx_data
+
D3
,
hidden_out_data
);
blas
.
VMUL
(
D
,
xx_data
+
D2
,
xx_data
+
D3
,
hidden_out_data
);
// prev
// prev
...
@@ -344,14 +394,14 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
...
@@ -344,14 +394,14 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
prev_c_data
=
cell_out_data
;
prev_c_data
=
cell_out_data
;
move_step
();
move_step
();
}
}
// for each step in batch
}
}
// for each batch
}
}
void
BatchCompute
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
void
BatchCompute
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
using
DeviceContext
=
platform
::
CPUDeviceContext
;
using
DeviceContext
=
platform
::
CPUDeviceContext
;
INIT_BASE_INPUT_OUTPUT
INIT_BASE_INPUT_OUTPUT
if
(
x
->
lod
()[
0
].
size
()
==
2
)
{
if
(
x
->
lod
()[
0
].
size
()
==
2
)
{
// batch size == 1
SeqCompute
(
ctx
);
SeqCompute
(
ctx
);
return
;
return
;
}
}
...
@@ -367,6 +417,8 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
...
@@ -367,6 +417,8 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
const
T
*
x_data
=
x
->
data
<
T
>
();
const
T
*
x_data
=
x
->
data
<
T
>
();
const
T
*
wx_data
=
wx
->
data
<
T
>
();
const
T
*
wx_data
=
wx
->
data
<
T
>
();
const
T
*
wh_data
=
wh
->
data
<
T
>
();
const
T
*
wh_data
=
wh
->
data
<
T
>
();
const
T
*
bias_data
=
bias
->
data
<
T
>
();
const
T
*
wc_data
=
bias_data
+
D4
;
// w_ic, w_fc, w_oc
auto
place
=
ctx
.
GetPlace
();
auto
place
=
ctx
.
GetPlace
();
T
*
xx_data
=
xx
->
mutable_data
<
T
>
(
place
);
T
*
xx_data
=
xx
->
mutable_data
<
T
>
(
place
);
T
*
batched_input_data
=
batched_input
->
mutable_data
<
T
>
(
place
);
T
*
batched_input_data
=
batched_input
->
mutable_data
<
T
>
(
place
);
...
@@ -375,6 +427,12 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
...
@@ -375,6 +427,12 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
hidden_out
->
mutable_data
<
T
>
(
place
);
hidden_out
->
mutable_data
<
T
>
(
place
);
cell_out
->
mutable_data
<
T
>
(
place
);
cell_out
->
mutable_data
<
T
>
(
place
);
// use local variable
framework
::
DDim
check_dims
({
3
,
D
});
Tensor
checked_cell
;
// w_ic * Ct-1, w_fc * Ct-1, w_oc * Ct
auto
checked_cell_data
=
checked_cell
.
mutable_data
<
T
>
(
check_dims
,
ctx
.
GetPlace
());
math
::
LoDTensor2BatchFunctor
<
DeviceContext
,
T
>
to_batch
;
math
::
LoDTensor2BatchFunctor
<
DeviceContext
,
T
>
to_batch
;
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
auto
blas
=
math
::
GetBlas
<
DeviceContext
,
T
>
(
dev_ctx
);
auto
blas
=
math
::
GetBlas
<
DeviceContext
,
T
>
(
dev_ctx
);
...
@@ -396,17 +454,27 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
...
@@ -396,17 +454,27 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
reordered_h0
->
Resize
({
max_bs
,
D
});
reordered_h0
->
Resize
({
max_bs
,
D
});
reordered_c0
->
Resize
({
max_bs
,
D
});
reordered_c0
->
Resize
({
max_bs
,
D
});
T
*
prev_batch_h_data
=
nullptr
;
T
*
prev_batch_c_data
=
nullptr
;
T
*
cur_batch_in_data
=
batched_input_data
;
T
*
cur_batch_h_out_data
=
batched_h_out_data
;
T
*
cur_batch_c_out_data
=
batched_c_out_data
;
auto
move_step
=
[
&
](
int
bs
)
{
cur_batch_in_data
+=
bs
*
D4
;
cur_batch_c_out_data
+=
bs
*
D
;
cur_batch_h_out_data
+=
bs
*
D
;
};
int
tstart
=
0
;
int
tstart
=
0
;
T
*
prev_h_data
=
nullptr
;
T
*
prev_c_data
=
nullptr
;
if
(
h0
)
{
if
(
h0
)
{
// reorder h0, c0
// reorder h0, c0
T
*
reordered_h0_data
=
reordered_h0
->
mutable_data
<
T
>
(
place
);
T
*
reordered_h0_data
=
reordered_h0
->
mutable_data
<
T
>
(
place
);
T
*
reordered_c0_data
=
reordered_c0
->
mutable_data
<
T
>
(
place
);
T
*
reordered_c0_data
=
reordered_c0
->
mutable_data
<
T
>
(
place
);
const
T
*
h0_data
=
h0
->
data
<
T
>
();
const
T
*
h0_data
=
h0
->
data
<
T
>
();
const
T
*
c0_data
=
c0
->
data
<
T
>
();
const
T
*
c0_data
=
c0
->
data
<
T
>
();
prev_h_data
=
reordered_h0_data
;
prev_
batch_
h_data
=
reordered_h0_data
;
prev_c_data
=
reordered_c0_data
;
prev_
batch_
c_data
=
reordered_c0_data
;
size_t
sz
=
sizeof
(
T
)
*
D
;
size_t
sz
=
sizeof
(
T
)
*
D
;
for
(
int
i
=
0
;
i
<
max_bs
;
++
i
)
{
for
(
int
i
=
0
;
i
<
max_bs
;
++
i
)
{
std
::
memcpy
(
reordered_h0_data
,
h0_data
+
seq_order
[
i
]
*
D
,
sz
);
std
::
memcpy
(
reordered_h0_data
,
h0_data
+
seq_order
[
i
]
*
D
,
sz
);
...
@@ -415,71 +483,122 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
...
@@ -415,71 +483,122 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
reordered_c0_data
+=
D
;
reordered_c0_data
+=
D
;
}
}
}
else
{
}
else
{
// compute without h0, c0
// Compute with no H0/C0
T
*
cur_in_data
=
batched_input_data
;
T
*
cur_in_data
=
cur_batch_in_data
;
T
*
cur_h_out_data
=
batched_h_out_data
;
T
*
cur_c_out_data
=
cur_batch_c_out_data
;
T
*
cur_c_out_data
=
batched_c_out_data
;
T
*
cur_h_out_data
=
cur_batch_h_out_data
;
// W_ch, W_ih, W_fh, W_oh
for
(
int
i
=
0
;
i
<
max_bs
;
++
i
)
{
// If step == 0 and there is no initialized hidden state, that is to say
act_gate
(
D3
,
cur_in_data
+
D
,
cur_in_data
+
D
);
// the H0 is zeros. Then W_h * H_t-1 can be skiped
for
(
int
i
=
0
;
i
<
max_bs
;
++
i
)
{
// iterate each data in 1st batch
// ~C_t
act_cand
(
D
,
cur_in_data
,
cur_in_data
);
act_cand
(
D
,
cur_in_data
,
cur_in_data
);
// cell out= input*tilde
if
(
use_peepholes
)
{
// I_t, F_t
act_gate
(
D2
,
cur_in_data
+
D
,
cur_in_data
+
D
);
}
else
{
// I_t, F_t, O_t
act_gate
(
D3
,
cur_in_data
+
D
,
cur_in_data
+
D
);
}
// C_t = I_t * ~C_t
blas
.
VMUL
(
D
,
cur_in_data
,
cur_in_data
+
D
,
cur_c_out_data
);
blas
.
VMUL
(
D
,
cur_in_data
,
cur_in_data
+
D
,
cur_c_out_data
);
if
(
use_peepholes
)
{
// + W_oc * C_t for peephole connection
blas
.
VMUL
(
D
,
wc_data
+
D2
,
cur_c_out_data
,
checked_cell_data
+
D2
);
blas
.
VADD
(
D
,
cur_in_data
+
D3
,
checked_cell_data
+
D2
,
cur_in_data
+
D3
);
// O_t
act_gate
(
D
,
cur_in_data
+
D3
,
cur_in_data
+
D3
);
}
// hidden out= act_state(cellout) * outgate
// hidden out= act_state(cellout) * outgate
act_cell
(
D
,
cur_c_out_data
,
cur_in_data
+
D2
);
act_cell
(
D
,
cur_c_out_data
,
cur_in_data
+
D2
);
// H_t = O_t * act_state(C_t)
blas
.
VMUL
(
D
,
cur_in_data
+
D2
,
cur_in_data
+
D3
,
cur_h_out_data
);
blas
.
VMUL
(
D
,
cur_in_data
+
D2
,
cur_in_data
+
D3
,
cur_h_out_data
);
//
add offset
//
move to next data in the same batch
cur_in_data
+=
D4
;
cur_in_data
+=
D4
;
cur_c_out_data
+=
D
;
cur_c_out_data
+=
D
;
cur_h_out_data
+=
D
;
cur_h_out_data
+=
D
;
}
}
// move to data for next timestep
prev_batch_h_data
=
cur_batch_h_out_data
;
prev_batch_c_data
=
cur_batch_c_out_data
;
move_step
(
max_bs
);
tstart
=
1
;
tstart
=
1
;
prev_h_data
=
batched_h_out_data
;
prev_c_data
=
batched_c_out_data
;
}
}
// Then start from next
const
auto
&
batch_starts
=
batched_lod
[
0
];
const
auto
&
batch_starts
=
batched_lod
[
0
];
const
int
max_seq_len
=
batch_starts
.
size
()
-
1
;
const
int
max_seq_len
=
batch_starts
.
size
()
-
1
;
const
int
offset
=
tstart
*
max_bs
*
D
;
batched_input_data
=
batched_input_data
+
offset
*
4
;
batched_h_out_data
=
batched_h_out_data
+
offset
;
batched_c_out_data
=
batched_c_out_data
+
offset
;
for
(
int
step
=
tstart
;
step
<
max_seq_len
;
++
step
)
{
for
(
int
step
=
tstart
;
step
<
max_seq_len
;
++
step
)
{
const
int
cur_bs
=
batch_starts
[
step
+
1
]
-
batch_starts
[
step
];
const
int
cur_bs
=
batch_starts
[
step
+
1
]
-
batch_starts
[
step
];
// + W_h * H_t-1
blas
.
GEMM
(
CblasNoTrans
,
CblasNoTrans
,
cur_bs
,
D4
,
D
,
static_cast
<
T
>
(
1
),
blas
.
GEMM
(
CblasNoTrans
,
CblasNoTrans
,
cur_bs
,
D4
,
D
,
static_cast
<
T
>
(
1
),
prev_h_data
,
D
,
wh_data
,
D4
,
static_cast
<
T
>
(
1
),
prev_batch_h_data
,
D
,
wh_data
,
D4
,
static_cast
<
T
>
(
1
),
batched_input_data
,
D4
);
cur_batch_in_data
,
D4
);
T
*
cur_in_data
=
batched_input_data
;
T
*
cur_in_data
=
cur_batch_in_data
;
T
*
cur_prev_c_data
=
prev_c_data
;
T
*
cur_c_out_data
=
cur_batch_c_out_data
;
T
*
cur_c_out_data
=
batched_c_out_data
;
T
*
cur_h_out_data
=
cur_batch_h_out_data
;
T
*
cur_h_out_data
=
batched_h_out_data
;
T
*
prev_c_data
=
prev_batch_c_data
;
// NULL if no C0 in step0
for
(
int
i
=
0
;
i
<
cur_bs
;
++
i
)
{
T
*
prev_h_data
=
prev_batch_h_data
;
// NULL if no H0 in step0
// W_ch, W_ih, W_fh, W_oh
auto
next_data_in_batch
=
[
&
]()
{
act_gate
(
D3
,
cur_in_data
+
D
,
cur_in_data
+
D
);
cur_in_data
+=
D4
;
cur_c_out_data
+=
D
;
cur_h_out_data
+=
D
;
prev_c_data
=
prev_c_data
?
prev_c_data
+
D
:
nullptr
;
prev_h_data
=
prev_h_data
?
prev_h_data
+
D
:
nullptr
;
};
for
(
int
i
=
0
;
i
<
cur_bs
;
++
i
)
{
// iterate each data in same batch
// ~C_t
act_cand
(
D
,
cur_in_data
,
cur_in_data
);
act_cand
(
D
,
cur_in_data
,
cur_in_data
);
// a = forget * prev_cell
blas
.
VMUL
(
D
,
cur_in_data
+
D2
,
cur_prev_c_data
,
cur_in_data
+
D2
);
if
(
use_peepholes
)
{
// b = input * tilde
// + W_ic|W_fc * C_t-1 for peephole connection
blas
.
VMUL
(
D
,
wc_data
,
prev_c_data
,
checked_cell_data
);
blas
.
VMUL
(
D
,
wc_data
+
D
,
prev_c_data
,
checked_cell_data
+
D
);
blas
.
VADD
(
D2
,
cur_in_data
+
D
,
checked_cell_data
,
cur_in_data
+
D
);
// I_t, F_t
act_gate
(
D2
,
cur_in_data
+
D
,
cur_in_data
+
D
);
}
else
{
// I_t, F_t, O_t
act_gate
(
D3
,
cur_in_data
+
D
,
cur_in_data
+
D
);
}
// F_t * C_t-1
blas
.
VMUL
(
D
,
cur_in_data
+
D2
,
prev_c_data
,
cur_in_data
+
D2
);
// I_t * ~C_t
blas
.
VMUL
(
D
,
cur_in_data
,
cur_in_data
+
D
,
cur_in_data
+
D
);
blas
.
VMUL
(
D
,
cur_in_data
,
cur_in_data
+
D
,
cur_in_data
+
D
);
//
cell out= a+b
//
C_t = F_t * C_t-1 + I_t * ~C_t
blas
.
VADD
(
D
,
cur_in_data
+
D
,
cur_in_data
+
D2
,
cur_c_out_data
);
blas
.
VADD
(
D
,
cur_in_data
+
D
,
cur_in_data
+
D2
,
cur_c_out_data
);
if
(
use_peepholes
)
{
// + W_oc * C_t for peephole connection
blas
.
VMUL
(
D
,
wc_data
+
D2
,
cur_c_out_data
,
checked_cell_data
+
D2
);
blas
.
VADD
(
D
,
cur_in_data
+
D3
,
checked_cell_data
+
D2
,
cur_in_data
+
D3
);
// O_t
act_gate
(
D
,
cur_in_data
+
D3
,
cur_in_data
+
D3
);
}
// hidden out= act_state(cellout) * outgate
// hidden out= act_state(cellout) * outgate
act_cell
(
D
,
cur_c_out_data
,
cur_in_data
+
D2
);
act_cell
(
D
,
cur_c_out_data
,
cur_in_data
+
D2
);
// H_t = O_t * act_state(C_t)
blas
.
VMUL
(
D
,
cur_in_data
+
D2
,
cur_in_data
+
D3
,
cur_h_out_data
);
blas
.
VMUL
(
D
,
cur_in_data
+
D2
,
cur_in_data
+
D3
,
cur_h_out_data
);
cur_in_data
+=
D4
;
// move to next data in same batch
cur_prev_c_data
+=
D
;
next_data_in_batch
();
cur_c_out_data
+=
D
;
cur_h_out_data
+=
D
;
}
}
// move to data for next timestep
prev_c_data
=
batched_c_out_data
;
prev_batch_h_data
=
cur_batch_h_out_data
;
prev_h_data
=
batched_h_out_data
;
prev_batch_c_data
=
cur_batch_c_out_data
;
batched_c_out_data
=
cur_c_out_data
;
move_step
(
cur_bs
);
batched_h_out_data
=
cur_h_out_data
;
batched_input_data
=
cur_in_data
;
}
}
math
::
Batch2LoDTensorFunctor
<
DeviceContext
,
T
>
to_seq
;
math
::
Batch2LoDTensorFunctor
<
DeviceContext
,
T
>
to_seq
;
...
...
paddle/fluid/operators/gru_unit_op.h
浏览文件 @
12b483c0
...
@@ -92,12 +92,12 @@ class GRUUnitKernel : public framework::OpKernel<T> {
...
@@ -92,12 +92,12 @@ class GRUUnitKernel : public framework::OpKernel<T> {
gate_data
,
frame_size
*
3
);
gate_data
,
frame_size
*
3
);
// calculate activited gate
// calculate activited gate
Eigen
::
array
<
int
,
2
>
extents
=
{
batch_size
,
frame_size
};
Eigen
::
array
<
int
,
2
>
extents
{{
batch_size
,
frame_size
}
};
Eigen
::
array
<
int
,
2
>
u_offsets
=
{
0
,
0
};
Eigen
::
array
<
int
,
2
>
u_offsets
{{
0
,
0
}
};
ActCompute
(
context
.
Attr
<
int
>
(
"gate_activation"
),
place
,
ActCompute
(
context
.
Attr
<
int
>
(
"gate_activation"
),
place
,
g
.
slice
(
u_offsets
,
extents
),
g
.
slice
(
u_offsets
,
extents
));
g
.
slice
(
u_offsets
,
extents
),
g
.
slice
(
u_offsets
,
extents
));
auto
u
=
g
.
slice
(
u_offsets
,
extents
);
// update gate
auto
u
=
g
.
slice
(
u_offsets
,
extents
);
// update gate
Eigen
::
array
<
int
,
2
>
r_offsets
=
{
0
,
frame_size
};
Eigen
::
array
<
int
,
2
>
r_offsets
{{
0
,
frame_size
}
};
ActCompute
(
context
.
Attr
<
int
>
(
"gate_activation"
),
place
,
ActCompute
(
context
.
Attr
<
int
>
(
"gate_activation"
),
place
,
g
.
slice
(
r_offsets
,
extents
),
g
.
slice
(
r_offsets
,
extents
));
g
.
slice
(
r_offsets
,
extents
),
g
.
slice
(
r_offsets
,
extents
));
auto
r
=
g
.
slice
(
r_offsets
,
extents
);
// reset gate
auto
r
=
g
.
slice
(
r_offsets
,
extents
);
// reset gate
...
@@ -107,7 +107,7 @@ class GRUUnitKernel : public framework::OpKernel<T> {
...
@@ -107,7 +107,7 @@ class GRUUnitKernel : public framework::OpKernel<T> {
weight_data
+
frame_size
*
frame_size
*
2
,
frame_size
,
1
,
weight_data
+
frame_size
*
frame_size
*
2
,
frame_size
,
1
,
gate_data
+
frame_size
*
2
,
frame_size
*
3
);
gate_data
+
frame_size
*
2
,
frame_size
*
3
);
Eigen
::
array
<
int
,
2
>
c_offsets
=
{
0
,
frame_size
*
2
};
Eigen
::
array
<
int
,
2
>
c_offsets
{{
0
,
frame_size
*
2
}
};
ActCompute
(
context
.
Attr
<
int
>
(
"activation"
),
place
,
ActCompute
(
context
.
Attr
<
int
>
(
"activation"
),
place
,
g
.
slice
(
c_offsets
,
extents
),
g
.
slice
(
c_offsets
,
extents
));
g
.
slice
(
c_offsets
,
extents
),
g
.
slice
(
c_offsets
,
extents
));
auto
c
=
g
.
slice
(
c_offsets
,
extents
);
// output candidate
auto
c
=
g
.
slice
(
c_offsets
,
extents
);
// output candidate
...
@@ -171,12 +171,12 @@ class GRUUnitGradKernel : public framework::OpKernel<T> {
...
@@ -171,12 +171,12 @@ class GRUUnitGradKernel : public framework::OpKernel<T> {
int
batch_size
=
input
->
dims
()[
0
];
int
batch_size
=
input
->
dims
()[
0
];
int
frame_size
=
hidden_prev
->
dims
()[
1
];
int
frame_size
=
hidden_prev
->
dims
()[
1
];
Eigen
::
array
<
int
,
2
>
extents
=
{
batch_size
,
frame_size
};
Eigen
::
array
<
int
,
2
>
extents
{{
batch_size
,
frame_size
}
};
Eigen
::
array
<
int
,
2
>
u_offsets
=
{
0
,
0
};
Eigen
::
array
<
int
,
2
>
u_offsets
{{
0
,
0
}
};
auto
u
=
g
.
slice
(
u_offsets
,
extents
);
// update gate
auto
u
=
g
.
slice
(
u_offsets
,
extents
);
// update gate
Eigen
::
array
<
int
,
2
>
r_offsets
=
{
0
,
frame_size
};
Eigen
::
array
<
int
,
2
>
r_offsets
{{
0
,
frame_size
}
};
auto
r
=
g
.
slice
(
r_offsets
,
extents
);
// reset gate
auto
r
=
g
.
slice
(
r_offsets
,
extents
);
// reset gate
Eigen
::
array
<
int
,
2
>
c_offsets
=
{
0
,
frame_size
*
2
};
Eigen
::
array
<
int
,
2
>
c_offsets
{{
0
,
frame_size
*
2
}
};
auto
c
=
g
.
slice
(
c_offsets
,
extents
);
// output candidate
auto
c
=
g
.
slice
(
c_offsets
,
extents
);
// output candidate
// backward for unactivated update gate
// backward for unactivated update gate
...
...
paddle/fluid/operators/lookup_table_op.h
浏览文件 @
12b483c0
...
@@ -57,7 +57,7 @@ class LookupTableKernel : public framework::OpKernel<T> {
...
@@ -57,7 +57,7 @@ class LookupTableKernel : public framework::OpKernel<T> {
memset
(
output
+
i
*
row_width
,
0
,
row_width
*
sizeof
(
T
));
memset
(
output
+
i
*
row_width
,
0
,
row_width
*
sizeof
(
T
));
}
else
{
}
else
{
PADDLE_ENFORCE_LT
(
ids
[
i
],
row_number
);
PADDLE_ENFORCE_LT
(
ids
[
i
],
row_number
);
PADDLE_ENFORCE_GE
(
ids
[
i
],
0
);
PADDLE_ENFORCE_GE
(
ids
[
i
],
0
,
"ids %d"
,
i
);
memcpy
(
output
+
i
*
row_width
,
table
+
ids
[
i
]
*
row_width
,
memcpy
(
output
+
i
*
row_width
,
table
+
ids
[
i
]
*
row_width
,
row_width
*
sizeof
(
T
));
row_width
*
sizeof
(
T
));
}
}
...
...
paddle/fluid/operators/rmsprop_op.cc
浏览文件 @
12b483c0
...
@@ -36,9 +36,13 @@ class RmspropOp : public framework::OperatorWithKernel {
...
@@ -36,9 +36,13 @@ class RmspropOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"ParamOut"
),
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"ParamOut"
),
"Output(param_out) of RmspropOp should not be null."
);
"Output(param_out) of RmspropOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"MomentOut"
),
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"MomentOut"
),
"Output(Moment
um_o
ut) of RmspropOp should not be null."
);
"Output(Moment
O
ut) of RmspropOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"MeanSquareOut"
),
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"MeanSquareOut"
),
"Output(MeanSquareOut) of RmspropOp should not be null."
);
"Output(MeanSquareOut) of RmspropOp should not be null."
);
if
(
ctx
->
Attrs
().
Get
<
bool
>
(
"centered"
))
{
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"MeanGradOut"
),
"Output(MeanGradOut) of RmspropOp should not be null."
);
}
auto
param_dim
=
ctx
->
GetInputDim
(
"Param"
);
auto
param_dim
=
ctx
->
GetInputDim
(
"Param"
);
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
...
@@ -58,6 +62,9 @@ class RmspropOp : public framework::OperatorWithKernel {
...
@@ -58,6 +62,9 @@ class RmspropOp : public framework::OperatorWithKernel {
ctx
->
SetOutputDim
(
"ParamOut"
,
param_dim
);
ctx
->
SetOutputDim
(
"ParamOut"
,
param_dim
);
ctx
->
SetOutputDim
(
"MomentOut"
,
param_dim
);
ctx
->
SetOutputDim
(
"MomentOut"
,
param_dim
);
ctx
->
SetOutputDim
(
"MeanSquareOut"
,
param_dim
);
ctx
->
SetOutputDim
(
"MeanSquareOut"
,
param_dim
);
if
(
ctx
->
Attrs
().
Get
<
bool
>
(
"centered"
))
{
ctx
->
SetOutputDim
(
"MeanGradOut"
,
param_dim
);
}
}
}
};
};
...
@@ -70,6 +77,10 @@ class RmspropOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -70,6 +77,10 @@ class RmspropOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput
(
"MeanSquare"
,
AddInput
(
"MeanSquare"
,
"(Tensor, default Tensor<float>)"
"(Tensor, default Tensor<float>)"
" The mean square value that gets updated."
);
" The mean square value that gets updated."
);
AddInput
(
"MeanGrad"
,
"(Tensor, default Tensor<float>)"
" The moving average of gradient"
)
.
AsDispensable
();
AddInput
(
"LearningRate"
,
AddInput
(
"LearningRate"
,
"(Tensor, default Tensor<float>) "
"(Tensor, default Tensor<float>) "
"The learning rate should be a tensor of size 1."
);
"The learning rate should be a tensor of size 1."
);
...
@@ -82,6 +93,8 @@ class RmspropOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -82,6 +93,8 @@ class RmspropOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput
(
"ParamOut"
,
"(Tensor) Output updated parameter value."
);
AddOutput
(
"ParamOut"
,
"(Tensor) Output updated parameter value."
);
AddOutput
(
"MomentOut"
,
"(Tensor) Output updated moment."
);
AddOutput
(
"MomentOut"
,
"(Tensor) Output updated moment."
);
AddOutput
(
"MeanSquareOut"
,
"(Tensor) Output Mean squared updated value."
);
AddOutput
(
"MeanSquareOut"
,
"(Tensor) Output Mean squared updated value."
);
AddOutput
(
"MeanGradOut"
,
"(Tensor) Output moving average of gradient updated value."
);
AddAttr
<
float
>
(
"epsilon"
,
AddAttr
<
float
>
(
"epsilon"
,
"(float, default 1e-10) Constant "
"(float, default 1e-10) Constant "
...
@@ -93,6 +106,8 @@ class RmspropOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -93,6 +106,8 @@ class RmspropOpMaker : public framework::OpProtoAndCheckerMaker {
.
SetDefault
(
0.9
f
);
.
SetDefault
(
0.9
f
);
AddAttr
<
float
>
(
"momentum"
,
"(float, default 0.0) Constant value."
)
AddAttr
<
float
>
(
"momentum"
,
"(float, default 0.0) Constant value."
)
.
SetDefault
(
0.0
f
);
.
SetDefault
(
0.0
f
);
AddAttr
<
bool
>
(
"centered"
,
"(bool, default false) use centered rmsprop."
)
.
SetDefault
(
false
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
Rmsprop Optimizer.
Rmsprop Optimizer.
...
@@ -103,6 +118,14 @@ MomentOut = momentum * Moment +
...
@@ -103,6 +118,14 @@ MomentOut = momentum * Moment +
ParamOut = Param - MomentOut
ParamOut = Param - MomentOut
$$
$$
if centered is true:
mean_grad = decay * mean_square{t-1} + (1-decay) * gradient
mean_square = decay * mean_square{t-1} + (1-decay) * gradient ** 2
mom = momentum * mom{t-1} + learning_rate * g_t /
sqrt(mean_square - mean_grad**2 + epsilon)
param -= mom
The original slides that proposed Rmsprop: Slide 29 of
The original slides that proposed Rmsprop: Slide 29 of
http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf)
http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf)
...
...
paddle/fluid/operators/rmsprop_op.h
浏览文件 @
12b483c0
...
@@ -41,6 +41,7 @@ class RmspropOpKernel : public framework::OpKernel<T> {
...
@@ -41,6 +41,7 @@ class RmspropOpKernel : public framework::OpKernel<T> {
float
epsilon
=
ctx
.
Attr
<
float
>
(
"epsilon"
);
float
epsilon
=
ctx
.
Attr
<
float
>
(
"epsilon"
);
float
rho
=
ctx
.
Attr
<
float
>
(
"decay"
);
float
rho
=
ctx
.
Attr
<
float
>
(
"decay"
);
float
momentum
=
ctx
.
Attr
<
float
>
(
"momentum"
);
float
momentum
=
ctx
.
Attr
<
float
>
(
"momentum"
);
bool
centered
=
ctx
.
Attr
<
bool
>
(
"centered"
);
auto
p
=
EigenVector
<
T
>::
Flatten
(
*
ctx
.
Input
<
Tensor
>
(
"Param"
));
auto
p
=
EigenVector
<
T
>::
Flatten
(
*
ctx
.
Input
<
Tensor
>
(
"Param"
));
auto
ms
=
EigenVector
<
T
>::
Flatten
(
*
ctx
.
Input
<
Tensor
>
(
"MeanSquare"
));
auto
ms
=
EigenVector
<
T
>::
Flatten
(
*
ctx
.
Input
<
Tensor
>
(
"MeanSquare"
));
...
@@ -53,12 +54,24 @@ class RmspropOpKernel : public framework::OpKernel<T> {
...
@@ -53,12 +54,24 @@ class RmspropOpKernel : public framework::OpKernel<T> {
auto
ms_out
=
EigenVector
<
T
>::
Flatten
(
*
mean_square_out
);
auto
ms_out
=
EigenVector
<
T
>::
Flatten
(
*
mean_square_out
);
auto
&
place
=
*
ctx
.
template
device_context
<
DeviceContext
>().
eigen_device
();
auto
&
place
=
*
ctx
.
template
device_context
<
DeviceContext
>().
eigen_device
();
Eigen
::
DSizes
<
int
,
1
>
grad_dsize
(
grad
->
numel
(
));
Eigen
::
DSizes
<
int
,
1
>
grad_dsize
(
static_cast
<
int
>
(
grad
->
numel
()
));
ms_out
.
device
(
place
)
=
rho
*
ms
+
(
1
-
rho
)
*
g
*
g
;
ms_out
.
device
(
place
)
=
rho
*
ms
+
(
1
-
rho
)
*
g
*
g
;
mom_out
.
device
(
place
)
=
if
(
centered
)
{
momentum
*
mom
+
auto
mg
=
EigenVector
<
T
>::
Flatten
(
*
ctx
.
Input
<
Tensor
>
(
"MeanGrad"
));
lr
.
broadcast
(
grad_dsize
)
*
g
/
(
ms_out
+
epsilon
).
sqrt
();
auto
*
mean_grad_out
=
ctx
.
Output
<
Tensor
>
(
"MeanGradOut"
);
mean_grad_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
mg_out
=
EigenVector
<
T
>::
Flatten
(
*
mean_grad_out
);
mg_out
.
device
(
place
)
=
rho
*
mg
+
(
1
-
rho
)
*
g
;
mom_out
.
device
(
place
)
=
momentum
*
mom
+
lr
.
broadcast
(
grad_dsize
)
*
g
/
(
ms_out
-
mg_out
.
square
()
+
epsilon
).
sqrt
();
}
else
{
mom_out
.
device
(
place
)
=
momentum
*
mom
+
lr
.
broadcast
(
grad_dsize
)
*
g
/
(
ms_out
+
epsilon
).
sqrt
();
}
p_out
.
device
(
place
)
=
p
-
mom_out
;
p_out
.
device
(
place
)
=
p
-
mom_out
;
}
}
};
};
...
...
paddle/fluid/operators/roi_pool_op.cu
浏览文件 @
12b483c0
...
@@ -31,7 +31,7 @@ static inline int NumBlocks(const int N) {
...
@@ -31,7 +31,7 @@ static inline int NumBlocks(const int N) {
template
<
typename
T
>
template
<
typename
T
>
__global__
void
GPUROIPoolForward
(
__global__
void
GPUROIPoolForward
(
const
int
nthreads
,
const
T
*
input_data
,
const
int64_t
*
input_rois
,
const
int
nthreads
,
const
T
*
input_data
,
const
T
*
input_rois
,
const
float
spatial_scale
,
const
int
channels
,
const
int
height
,
const
float
spatial_scale
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
int
*
roi_batch_id_data
,
T
*
output_data
,
int64_t
*
argmax_data
)
{
int
*
roi_batch_id_data
,
T
*
output_data
,
int64_t
*
argmax_data
)
{
...
@@ -43,7 +43,7 @@ __global__ void GPUROIPoolForward(
...
@@ -43,7 +43,7 @@ __global__ void GPUROIPoolForward(
int
c
=
(
i
/
pooled_width
/
pooled_height
)
%
channels
;
int
c
=
(
i
/
pooled_width
/
pooled_height
)
%
channels
;
int
n
=
i
/
pooled_width
/
pooled_height
/
channels
;
int
n
=
i
/
pooled_width
/
pooled_height
/
channels
;
const
int64_t
*
offset_input_rois
=
input_rois
+
n
*
kROISize
;
const
T
*
offset_input_rois
=
input_rois
+
n
*
kROISize
;
int
roi_batch_ind
=
roi_batch_id_data
[
n
];
int
roi_batch_ind
=
roi_batch_id_data
[
n
];
int
roi_start_w
=
round
(
offset_input_rois
[
0
]
*
spatial_scale
);
int
roi_start_w
=
round
(
offset_input_rois
[
0
]
*
spatial_scale
);
int
roi_start_h
=
round
(
offset_input_rois
[
1
]
*
spatial_scale
);
int
roi_start_h
=
round
(
offset_input_rois
[
1
]
*
spatial_scale
);
...
@@ -93,7 +93,7 @@ __global__ void GPUROIPoolForward(
...
@@ -93,7 +93,7 @@ __global__ void GPUROIPoolForward(
template
<
typename
T
>
template
<
typename
T
>
__global__
void
GPUROIPoolBackward
(
__global__
void
GPUROIPoolBackward
(
const
int
nthreads
,
const
int64_t
*
input_rois
,
const
T
*
output_grad
,
const
int
nthreads
,
const
T
*
input_rois
,
const
T
*
output_grad
,
const
int64_t
*
argmax_data
,
const
int
num_rois
,
const
float
spatial_scale
,
const
int64_t
*
argmax_data
,
const
int
num_rois
,
const
float
spatial_scale
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
int
*
roi_batch_id_data
,
const
int
pooled_height
,
const
int
pooled_width
,
int
*
roi_batch_id_data
,
...
@@ -174,8 +174,8 @@ class GPUROIPoolOpKernel : public framework::OpKernel<T> {
...
@@ -174,8 +174,8 @@ class GPUROIPoolOpKernel : public framework::OpKernel<T> {
GPUROIPoolForward
<
GPUROIPoolForward
<
T
><<<
blocks
,
threads
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
T
><<<
blocks
,
threads
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
output_size
,
in
->
data
<
T
>
(),
rois
->
data
<
int64_t
>
(),
spatial_scale
,
output_size
,
in
->
data
<
T
>
(),
rois
->
data
<
T
>
(),
spatial_scale
,
channels
,
channels
,
height
,
width
,
pooled_height
,
pooled_width
,
height
,
width
,
pooled_height
,
pooled_width
,
roi_batch_id_list_gpu
.
data
<
int
>
(),
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
roi_batch_id_list_gpu
.
data
<
int
>
(),
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
argmax
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
()));
argmax
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
()));
}
}
...
@@ -228,7 +228,7 @@ class GPUROIPoolGradOpKernel : public framework::OpKernel<T> {
...
@@ -228,7 +228,7 @@ class GPUROIPoolGradOpKernel : public framework::OpKernel<T> {
if
(
output_grad_size
>
0
)
{
if
(
output_grad_size
>
0
)
{
GPUROIPoolBackward
<
GPUROIPoolBackward
<
T
><<<
blocks
,
threads
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
T
><<<
blocks
,
threads
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
output_grad_size
,
rois
->
data
<
int64_t
>
(),
out_grad
->
data
<
T
>
(),
output_grad_size
,
rois
->
data
<
T
>
(),
out_grad
->
data
<
T
>
(),
argmax
->
data
<
int64_t
>
(),
rois_num
,
spatial_scale
,
channels
,
height
,
argmax
->
data
<
int64_t
>
(),
rois_num
,
spatial_scale
,
channels
,
height
,
width
,
pooled_height
,
pooled_width
,
width
,
pooled_height
,
pooled_width
,
roi_batch_id_list_gpu
.
data
<
int
>
(),
roi_batch_id_list_gpu
.
data
<
int
>
(),
...
...
paddle/fluid/operators/roi_pool_op.h
浏览文件 @
12b483c0
...
@@ -72,7 +72,7 @@ class CPUROIPoolOpKernel : public framework::OpKernel<T> {
...
@@ -72,7 +72,7 @@ class CPUROIPoolOpKernel : public framework::OpKernel<T> {
T
*
output_data
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
output_data
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
int64_t
*
argmax_data
=
argmax
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
());
int64_t
*
argmax_data
=
argmax
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
());
const
int64_t
*
rois_data
=
rois
->
data
<
int64_t
>
();
const
T
*
rois_data
=
rois
->
data
<
T
>
();
for
(
int
n
=
0
;
n
<
rois_num
;
++
n
)
{
for
(
int
n
=
0
;
n
<
rois_num
;
++
n
)
{
int
roi_batch_id
=
roi_batch_id_data
[
n
];
int
roi_batch_id
=
roi_batch_id_data
[
n
];
int
roi_start_w
=
round
(
rois_data
[
0
]
*
spatial_scale
);
int
roi_start_w
=
round
(
rois_data
[
0
]
*
spatial_scale
);
...
@@ -171,7 +171,7 @@ class CPUROIPoolGradOpKernel : public framework::OpKernel<T> {
...
@@ -171,7 +171,7 @@ class CPUROIPoolGradOpKernel : public framework::OpKernel<T> {
}
}
}
}
const
int64_t
*
rois_data
=
rois
->
data
<
int64_t
>
();
const
T
*
rois_data
=
rois
->
data
<
T
>
();
const
T
*
out_grad_data
=
out_grad
->
data
<
T
>
();
const
T
*
out_grad_data
=
out_grad
->
data
<
T
>
();
const
int64_t
*
argmax_data
=
argmax
->
data
<
int64_t
>
();
const
int64_t
*
argmax_data
=
argmax
->
data
<
int64_t
>
();
T
*
in_grad_data
=
in_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
in_grad_data
=
in_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
...
...
python/paddle/fluid/layers/detection.py
浏览文件 @
12b483c0
...
@@ -145,26 +145,23 @@ def rpn_target_assign(loc,
...
@@ -145,26 +145,23 @@ def rpn_target_assign(loc,
"""
"""
helper
=
LayerHelper
(
'rpn_target_assign'
,
**
locals
())
helper
=
LayerHelper
(
'rpn_target_assign'
,
**
locals
())
# 1. Compute the regression target bboxes
# Compute overlaps between the prior boxes and the gt boxes overlaps
target_bbox
=
box_coder
(
prior_box
=
anchor_box
,
prior_box_var
=
anchor_var
,
target_box
=
gt_box
,
code_type
=
'encode_center_size'
,
box_normalized
=
False
)
# 2. Compute overlaps between the prior boxes and the gt boxes overlaps
iou
=
iou_similarity
(
x
=
gt_box
,
y
=
anchor_box
)
iou
=
iou_similarity
(
x
=
gt_box
,
y
=
anchor_box
)
# 3. Assign target label to anchors
# Assign target label to anchors
loc_index
=
helper
.
create_tmp_variable
(
dtype
=
anchor_box
.
dtype
)
loc_index
=
helper
.
create_tmp_variable
(
dtype
=
'int32'
)
score_index
=
helper
.
create_tmp_variable
(
dtype
=
anchor_box
.
dtype
)
score_index
=
helper
.
create_tmp_variable
(
dtype
=
'int32'
)
target_label
=
helper
.
create_tmp_variable
(
dtype
=
anchor_box
.
dtype
)
target_label
=
helper
.
create_tmp_variable
(
dtype
=
'int64'
)
target_bbox
=
helper
.
create_tmp_variable
(
dtype
=
anchor_box
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"rpn_target_assign"
,
type
=
"rpn_target_assign"
,
inputs
=
{
'DistMat'
:
iou
},
inputs
=
{
'Anchor'
:
anchor_box
,
'GtBox'
:
gt_box
,
'DistMat'
:
iou
},
outputs
=
{
outputs
=
{
'LocationIndex'
:
loc_index
,
'LocationIndex'
:
loc_index
,
'ScoreIndex'
:
score_index
,
'ScoreIndex'
:
score_index
,
'TargetLabel'
:
target_label
'TargetLabel'
:
target_label
,
'TargetBBox'
:
target_bbox
,
},
},
attrs
=
{
attrs
=
{
'rpn_batch_size_per_im'
:
rpn_batch_size_per_im
,
'rpn_batch_size_per_im'
:
rpn_batch_size_per_im
,
...
@@ -173,16 +170,16 @@ def rpn_target_assign(loc,
...
@@ -173,16 +170,16 @@ def rpn_target_assign(loc,
'fg_fraction'
:
fg_fraction
'fg_fraction'
:
fg_fraction
})
})
# 4. Reshape and gather the target entry
loc_index
.
stop_gradient
=
True
scores
=
nn
.
reshape
(
x
=
scores
,
shape
=
(
-
1
,
2
))
score_index
.
stop_gradient
=
True
loc
=
nn
.
reshape
(
x
=
loc
,
shape
=
(
-
1
,
4
))
target_label
.
stop_gradient
=
True
target_label
=
nn
.
reshape
(
x
=
target_label
,
shape
=
(
-
1
,
1
))
target_bbox
.
stop_gradient
=
True
target_bbox
=
nn
.
reshape
(
x
=
target_bbox
,
shape
=
(
-
1
,
4
))
scores
=
nn
.
reshape
(
x
=
scores
,
shape
=
(
-
1
,
1
))
loc
=
nn
.
reshape
(
x
=
loc
,
shape
=
(
-
1
,
4
))
predicted_scores
=
nn
.
gather
(
scores
,
score_index
)
predicted_scores
=
nn
.
gather
(
scores
,
score_index
)
predicted_location
=
nn
.
gather
(
loc
,
loc_index
)
predicted_location
=
nn
.
gather
(
loc
,
loc_index
)
target_label
=
nn
.
gather
(
target_label
,
score_index
)
target_bbox
=
nn
.
gather
(
target_bbox
,
loc_index
)
return
predicted_scores
,
predicted_location
,
target_label
,
target_bbox
return
predicted_scores
,
predicted_location
,
target_label
,
target_bbox
...
...
python/paddle/fluid/layers/metric_op.py
浏览文件 @
12b483c0
...
@@ -78,7 +78,7 @@ def accuracy(input, label, k=1, correct=None, total=None):
...
@@ -78,7 +78,7 @@ def accuracy(input, label, k=1, correct=None, total=None):
return
acc_out
return
acc_out
def
auc
(
input
,
label
,
curve
=
'ROC'
,
num_thresholds
=
2
00
,
topk
=
1
):
def
auc
(
input
,
label
,
curve
=
'ROC'
,
num_thresholds
=
2
**
12
-
1
,
topk
=
1
):
"""
"""
**Area Under the Curve (AUC) Layer**
**Area Under the Curve (AUC) Layer**
...
@@ -118,16 +118,14 @@ def auc(input, label, curve='ROC', num_thresholds=200, topk=1):
...
@@ -118,16 +118,14 @@ def auc(input, label, curve='ROC', num_thresholds=200, topk=1):
"""
"""
helper
=
LayerHelper
(
"auc"
,
**
locals
())
helper
=
LayerHelper
(
"auc"
,
**
locals
())
auc_out
=
helper
.
create_tmp_variable
(
dtype
=
"float64"
)
auc_out
=
helper
.
create_tmp_variable
(
dtype
=
"float64"
)
batch_auc_out
=
helper
.
create_tmp_variable
(
dtype
=
"float64"
)
# make tp, tn, fp, fn persistable, so that can accumulate all batches.
# make tp, tn, fp, fn persistable, so that can accumulate all batches.
tp
=
helper
.
create_global_variable
(
stat_pos
=
helper
.
create_global_variable
(
persistable
=
True
,
dtype
=
'int64'
,
shape
=
[
num_thresholds
])
persistable
=
True
,
dtype
=
'int64'
,
shape
=
[
num_thresholds
+
1
])
tn
=
helper
.
create_global_variable
(
stat_neg
=
helper
.
create_global_variable
(
persistable
=
True
,
dtype
=
'int64'
,
shape
=
[
num_thresholds
])
persistable
=
True
,
dtype
=
'int64'
,
shape
=
[
num_thresholds
+
1
])
fp
=
helper
.
create_global_variable
(
persistable
=
True
,
dtype
=
'int64'
,
shape
=
[
num_thresholds
])
for
var
in
[
stat_pos
,
stat_neg
]:
fn
=
helper
.
create_global_variable
(
persistable
=
True
,
dtype
=
'int64'
,
shape
=
[
num_thresholds
])
for
var
in
[
tp
,
tn
,
fp
,
fn
]:
helper
.
set_variable_initializer
(
helper
.
set_variable_initializer
(
var
,
Constant
(
var
,
Constant
(
value
=
0.0
,
force_cpu
=
True
))
value
=
0.0
,
force_cpu
=
True
))
...
@@ -137,18 +135,15 @@ def auc(input, label, curve='ROC', num_thresholds=200, topk=1):
...
@@ -137,18 +135,15 @@ def auc(input, label, curve='ROC', num_thresholds=200, topk=1):
inputs
=
{
inputs
=
{
"Predict"
:
[
input
],
"Predict"
:
[
input
],
"Label"
:
[
label
],
"Label"
:
[
label
],
"TP"
:
[
tp
],
"StatPos"
:
[
stat_pos
],
"TN"
:
[
tn
],
"StatNeg"
:
[
stat_neg
]
"FP"
:
[
fp
],
"FN"
:
[
fn
]
},
},
attrs
=
{
"curve"
:
curve
,
attrs
=
{
"curve"
:
curve
,
"num_thresholds"
:
num_thresholds
},
"num_thresholds"
:
num_thresholds
},
outputs
=
{
outputs
=
{
"AUC"
:
[
auc_out
],
"AUC"
:
[
auc_out
],
"TPOut"
:
[
tp
],
"BatchAUC"
:
[
batch_auc_out
],
"TNOut"
:
[
tn
],
"StatPosOut"
:
[
stat_pos
],
"FPOut"
:
[
fp
],
"StatNegOut"
:
[
stat_neg
]
"FNOut"
:
[
fn
]
})
})
return
auc_out
,
[
tp
,
tn
,
fp
,
fn
]
return
auc_out
,
batch_auc_out
,
[
stat_pos
,
stat_neg
]
python/paddle/fluid/layers/nn.py
浏览文件 @
12b483c0
...
@@ -3546,11 +3546,6 @@ def topk(input, k, name=None):
...
@@ -3546,11 +3546,6 @@ def topk(input, k, name=None):
top5_values, top5_indices = layers.topk(input, k=5)
top5_values, top5_indices = layers.topk(input, k=5)
"""
"""
shape
=
input
.
shape
if
k
<
1
or
k
>=
shape
[
-
1
]:
raise
ValueError
(
"k must be greater than 0 and less than %d."
%
(
shape
[
-
1
]))
helper
=
LayerHelper
(
"top_k"
,
**
locals
())
helper
=
LayerHelper
(
"top_k"
,
**
locals
())
values
=
helper
.
create_tmp_variable
(
dtype
=
input
.
dtype
)
values
=
helper
.
create_tmp_variable
(
dtype
=
input
.
dtype
)
indices
=
helper
.
create_tmp_variable
(
dtype
=
"int64"
)
indices
=
helper
.
create_tmp_variable
(
dtype
=
"int64"
)
...
...
python/paddle/fluid/metrics.py
浏览文件 @
12b483c0
...
@@ -558,8 +558,6 @@ class Auc(MetricBase):
...
@@ -558,8 +558,6 @@ class Auc(MetricBase):
name: metric name
name: metric name
curve: Specifies the name of the curve to be computed, 'ROC' [default] or
curve: Specifies the name of the curve to be computed, 'ROC' [default] or
'PR' for the Precision-Recall-curve.
'PR' for the Precision-Recall-curve.
num_thresholds: The number of thresholds to use when discretizing the roc
curve.
"NOTE: only implement the ROC curve type via Python now."
"NOTE: only implement the ROC curve type via Python now."
...
@@ -574,15 +572,14 @@ class Auc(MetricBase):
...
@@ -574,15 +572,14 @@ class Auc(MetricBase):
numpy_auc = metric.eval()
numpy_auc = metric.eval()
"""
"""
def
__init__
(
self
,
name
,
curve
=
'ROC'
,
num_thresholds
=
200
):
def
__init__
(
self
,
name
,
curve
=
'ROC'
,
num_thresholds
=
4095
):
super
(
Auc
,
self
).
__init__
(
name
=
name
)
super
(
Auc
,
self
).
__init__
(
name
=
name
)
self
.
_curve
=
curve
self
.
_curve
=
curve
self
.
_num_thresholds
=
num_thresholds
self
.
_num_thresholds
=
num_thresholds
self
.
_epsilon
=
1e-6
self
.
tp_list
=
np
.
zeros
((
num_thresholds
,
))
_num_pred_buckets
=
num_thresholds
+
1
self
.
fn_list
=
np
.
zeros
((
num_thresholds
,
))
self
.
_stat_pos
=
[
0
]
*
_num_pred_buckets
self
.
tn_list
=
np
.
zeros
((
num_thresholds
,
))
self
.
_stat_neg
=
[
0
]
*
_num_pred_buckets
self
.
fp_list
=
np
.
zeros
((
num_thresholds
,
))
def
update
(
self
,
preds
,
labels
):
def
update
(
self
,
preds
,
labels
):
if
not
_is_numpy_
(
labels
):
if
not
_is_numpy_
(
labels
):
...
@@ -590,41 +587,32 @@ class Auc(MetricBase):
...
@@ -590,41 +587,32 @@ class Auc(MetricBase):
if
not
_is_numpy_
(
preds
):
if
not
_is_numpy_
(
preds
):
raise
ValueError
(
"The 'predictions' must be a numpy ndarray."
)
raise
ValueError
(
"The 'predictions' must be a numpy ndarray."
)
kepsilon
=
1e-7
# to account for floating point imprecisions
for
i
,
lbl
in
enumerate
(
labels
):
thresholds
=
[(
i
+
1
)
*
1.0
/
(
self
.
_num_thresholds
-
1
)
value
=
preds
[
i
,
1
]
for
i
in
range
(
self
.
_num_thresholds
-
2
)]
bin_idx
=
int
(
value
*
self
.
_num_thresholds
)
thresholds
=
[
0.0
-
kepsilon
]
+
thresholds
+
[
1.0
+
kepsilon
]
assert
bin_idx
<=
self
.
_num_thresholds
if
lbl
:
# calculate TP, FN, TN, FP count
self
.
_stat_pos
[
bin_idx
]
+=
1.0
for
idx_thresh
,
thresh
in
enumerate
(
thresholds
):
else
:
tp
,
fn
,
tn
,
fp
=
0
,
0
,
0
,
0
self
.
_stat_neg
[
bin_idx
]
+=
1.0
for
i
,
lbl
in
enumerate
(
labels
):
if
lbl
:
@
staticmethod
if
preds
[
i
,
1
]
>=
thresh
:
def
trapezoid_area
(
x1
,
x2
,
y1
,
y2
):
tp
+=
1
return
abs
(
x1
-
x2
)
*
(
y1
+
y2
)
/
2.0
else
:
fn
+=
1
else
:
if
preds
[
i
,
1
]
>=
thresh
:
fp
+=
1
else
:
tn
+=
1
self
.
tp_list
[
idx_thresh
]
+=
tp
self
.
fn_list
[
idx_thresh
]
+=
fn
self
.
tn_list
[
idx_thresh
]
+=
tn
self
.
fp_list
[
idx_thresh
]
+=
fp
def
eval
(
self
):
def
eval
(
self
):
epsilon
=
self
.
_epsilon
tot_pos
=
0.0
num_thresholds
=
self
.
_num_thresholds
tot_neg
=
0.0
tpr
=
(
self
.
tp_list
.
astype
(
"float32"
)
+
epsilon
)
/
(
auc
=
0.0
self
.
tp_list
+
self
.
fn_list
+
epsilon
)
fpr
=
self
.
fp_list
.
astype
(
"float32"
)
/
(
idx
=
self
.
_num_thresholds
self
.
fp_list
+
self
.
tn_list
+
epsilon
)
while
idx
>=
0
:
rec
=
(
self
.
tp_list
.
astype
(
"float32"
)
+
epsilon
)
/
(
tot_pos_prev
=
tot_pos
self
.
tp_list
+
self
.
fp_list
+
epsilon
)
tot_neg_prev
=
tot_neg
tot_pos
+=
self
.
_stat_pos
[
idx
]
x
=
fpr
[:
num_thresholds
-
1
]
-
fpr
[
1
:]
tot_neg
+=
self
.
_stat_neg
[
idx
]
y
=
(
tpr
[:
num_thresholds
-
1
]
+
tpr
[
1
:])
/
2.0
auc
+=
self
.
trapezoid_area
(
tot_neg
,
tot_neg_prev
,
tot_pos
,
auc_value
=
np
.
sum
(
x
*
y
)
tot_pos_prev
)
return
auc_value
idx
-=
1
return
auc
/
tot_pos
/
tot_neg
if
tot_pos
>
0.0
and
tot_neg
>
0.0
else
0.0
python/paddle/fluid/optimizer.py
浏览文件 @
12b483c0
...
@@ -897,7 +897,20 @@ class RMSPropOptimizer(Optimizer):
...
@@ -897,7 +897,20 @@ class RMSPropOptimizer(Optimizer):
r(w, t) & =
\\
rho r(w, t-1) + (1 -
\\
rho)(
\\
nabla Q_{i}(w))^2
r(w, t) & =
\\
rho r(w, t-1) + (1 -
\\
rho)(
\\
nabla Q_{i}(w))^2
v(w, t) & =
\\
beta v(w, t-1) +
\\
frac{
\\
eta} {
\\
sqrt{v(w,t) +
v(w, t) & =
\\
beta v(w, t-1) +
\\
frac{
\\
eta} {
\\
sqrt{r(w,t) +
\\
epsilon}}
\\
nabla Q_{i}(w)
w & = w - v(w, t)
if centered is True:
.. math::
r(w, t) & =
\\
rho r(w, t-1) + (1 -
\\
rho)(
\\
nabla Q_{i}(w))^2
g(w, t) & =
\\
rho g(w, t-1) + (1 -
\\
rho)
\\
nabla Q_{i}(w)
v(w, t) & =
\\
beta v(w, t-1) +
\\
frac{
\\
eta} {
\\
sqrt{r(w,t) - (g(w, t))^2 +
\\
epsilon}}
\\
nabla Q_{i}(w)
\\
epsilon}}
\\
nabla Q_{i}(w)
w & = w - v(w, t)
w & = w - v(w, t)
...
@@ -915,6 +928,10 @@ class RMSPropOptimizer(Optimizer):
...
@@ -915,6 +928,10 @@ class RMSPropOptimizer(Optimizer):
avoid division by zero, set 1e-6 by default.
avoid division by zero, set 1e-6 by default.
momentum(float): :math:`
\\
beta` in equation is the momentum term,
momentum(float): :math:`
\\
beta` in equation is the momentum term,
set 0.0 by default.
set 0.0 by default.
centered(bool): If True, gradients are normalized by the estimated variance of
the gradient; if False, by the uncentered second moment. Setting this to
True may help with training, but is slightly more expensive in terms of
computation and memory. Defaults to False.
Raises:
Raises:
ValueError: If learning_rate, rho, epsilon, momentum are None.
ValueError: If learning_rate, rho, epsilon, momentum are None.
...
@@ -928,12 +945,14 @@ class RMSPropOptimizer(Optimizer):
...
@@ -928,12 +945,14 @@ class RMSPropOptimizer(Optimizer):
_momentum_acc_str
=
"momentum"
_momentum_acc_str
=
"momentum"
_mean_square_acc_str
=
"mean_square"
_mean_square_acc_str
=
"mean_square"
_mean_grad_acc_str
=
"mean_grad"
def
__init__
(
self
,
def
__init__
(
self
,
learning_rate
,
learning_rate
,
rho
=
0.95
,
rho
=
0.95
,
epsilon
=
1.0e-6
,
epsilon
=
1.0e-6
,
momentum
=
0.0
,
momentum
=
0.0
,
centered
=
False
,
**
kwargs
):
**
kwargs
):
super
(
RMSPropOptimizer
,
self
).
__init__
(
super
(
RMSPropOptimizer
,
self
).
__init__
(
learning_rate
=
learning_rate
,
**
kwargs
)
learning_rate
=
learning_rate
,
**
kwargs
)
...
@@ -950,6 +969,7 @@ class RMSPropOptimizer(Optimizer):
...
@@ -950,6 +969,7 @@ class RMSPropOptimizer(Optimizer):
self
.
_rho
=
rho
self
.
_rho
=
rho
self
.
_epsilon
=
epsilon
self
.
_epsilon
=
epsilon
self
.
_momentum
=
momentum
self
.
_momentum
=
momentum
self
.
_centered
=
centered
def
_create_accumulators
(
self
,
block
,
parameters
):
def
_create_accumulators
(
self
,
block
,
parameters
):
if
not
isinstance
(
block
,
framework
.
Block
):
if
not
isinstance
(
block
,
framework
.
Block
):
...
@@ -958,6 +978,7 @@ class RMSPropOptimizer(Optimizer):
...
@@ -958,6 +978,7 @@ class RMSPropOptimizer(Optimizer):
for
p
in
parameters
:
for
p
in
parameters
:
self
.
_add_accumulator
(
self
.
_momentum_acc_str
,
p
)
self
.
_add_accumulator
(
self
.
_momentum_acc_str
,
p
)
self
.
_add_accumulator
(
self
.
_mean_square_acc_str
,
p
)
self
.
_add_accumulator
(
self
.
_mean_square_acc_str
,
p
)
self
.
_add_accumulator
(
self
.
_mean_grad_acc_str
,
p
)
def
_append_optimize_op
(
self
,
block
,
param_and_grad
):
def
_append_optimize_op
(
self
,
block
,
param_and_grad
):
if
not
isinstance
(
block
,
framework
.
Block
):
if
not
isinstance
(
block
,
framework
.
Block
):
...
@@ -967,6 +988,8 @@ class RMSPropOptimizer(Optimizer):
...
@@ -967,6 +988,8 @@ class RMSPropOptimizer(Optimizer):
param_and_grad
[
0
])
param_and_grad
[
0
])
mean_square_acc
=
self
.
_get_accumulator
(
self
.
_mean_square_acc_str
,
mean_square_acc
=
self
.
_get_accumulator
(
self
.
_mean_square_acc_str
,
param_and_grad
[
0
])
param_and_grad
[
0
])
mean_grad_acc
=
self
.
_get_accumulator
(
self
.
_mean_grad_acc_str
,
param_and_grad
[
0
])
rmsprop_op
=
block
.
append_op
(
rmsprop_op
=
block
.
append_op
(
type
=
self
.
type
,
type
=
self
.
type
,
inputs
=
{
inputs
=
{
...
@@ -974,17 +997,20 @@ class RMSPropOptimizer(Optimizer):
...
@@ -974,17 +997,20 @@ class RMSPropOptimizer(Optimizer):
"Grad"
:
param_and_grad
[
1
],
"Grad"
:
param_and_grad
[
1
],
"Moment"
:
momentum_acc
,
"Moment"
:
momentum_acc
,
"MeanSquare"
:
mean_square_acc
,
"MeanSquare"
:
mean_square_acc
,
"MeanGrad"
:
mean_grad_acc
,
"LearningRate"
:
self
.
_create_param_lr
(
param_and_grad
),
"LearningRate"
:
self
.
_create_param_lr
(
param_and_grad
),
},
},
outputs
=
{
outputs
=
{
"ParamOut"
:
param_and_grad
[
0
],
"ParamOut"
:
param_and_grad
[
0
],
"MomentOut"
:
momentum_acc
,
"MomentOut"
:
momentum_acc
,
"MeanSquareOut"
:
mean_square_acc
"MeanSquareOut"
:
mean_square_acc
,
"MeanGradOut"
:
mean_grad_acc
},
},
attrs
=
{
attrs
=
{
"epsilon"
:
self
.
_epsilon
,
"epsilon"
:
self
.
_epsilon
,
"decay"
:
self
.
_rho
,
"decay"
:
self
.
_rho
,
"momentum"
:
self
.
_momentum
"momentum"
:
self
.
_momentum
,
"centered"
:
self
.
_centered
})
})
return
rmsprop_op
return
rmsprop_op
...
...
python/paddle/fluid/tests/test_detection.py
浏览文件 @
12b483c0
...
@@ -281,7 +281,7 @@ class TestRpnTargetAssign(unittest.TestCase):
...
@@ -281,7 +281,7 @@ class TestRpnTargetAssign(unittest.TestCase):
gt_box
=
layers
.
data
(
gt_box
=
layers
.
data
(
name
=
'gt_box'
,
shape
=
[
4
],
lod_level
=
1
,
dtype
=
'float32'
)
name
=
'gt_box'
,
shape
=
[
4
],
lod_level
=
1
,
dtype
=
'float32'
)
pred
icted_scores
,
predicted_location
,
target_label
,
targe
t_bbox
=
layers
.
rpn_target_assign
(
pred
_scores
,
pred_loc
,
tgt_lbl
,
tg
t_bbox
=
layers
.
rpn_target_assign
(
loc
=
loc
,
loc
=
loc
,
scores
=
scores
,
scores
=
scores
,
anchor_box
=
anchor_box
,
anchor_box
=
anchor_box
,
...
@@ -292,15 +292,13 @@ class TestRpnTargetAssign(unittest.TestCase):
...
@@ -292,15 +292,13 @@ class TestRpnTargetAssign(unittest.TestCase):
rpn_positive_overlap
=
0.7
,
rpn_positive_overlap
=
0.7
,
rpn_negative_overlap
=
0.3
)
rpn_negative_overlap
=
0.3
)
self
.
assertIsNotNone
(
predicted_scores
)
self
.
assertIsNotNone
(
pred_scores
)
self
.
assertIsNotNone
(
predicted_location
)
self
.
assertIsNotNone
(
pred_loc
)
self
.
assertIsNotNone
(
target_label
)
self
.
assertIsNotNone
(
tgt_lbl
)
self
.
assertIsNotNone
(
target_bbox
)
self
.
assertIsNotNone
(
tgt_bbox
)
assert
predicted_scores
.
shape
[
1
]
==
2
assert
pred_scores
.
shape
[
1
]
==
1
assert
predicted_location
.
shape
[
1
]
==
4
assert
pred_loc
.
shape
[
1
]
==
4
assert
predicted_location
.
shape
[
1
]
==
target_bbox
.
shape
[
1
]
assert
pred_loc
.
shape
[
1
]
==
tgt_bbox
.
shape
[
1
]
print
(
str
(
program
))
class
TestGenerateProposals
(
unittest
.
TestCase
):
class
TestGenerateProposals
(
unittest
.
TestCase
):
...
...
python/paddle/fluid/tests/unittests/op_test.py
浏览文件 @
12b483c0
...
@@ -291,7 +291,7 @@ class OpTest(unittest.TestCase):
...
@@ -291,7 +291,7 @@ class OpTest(unittest.TestCase):
return_numpy
=
False
)
return_numpy
=
False
)
return
outs
,
fetch_list
return
outs
,
fetch_list
def
check_output_with_place
(
self
,
place
,
atol
):
def
check_output_with_place
(
self
,
place
,
atol
,
equal_nan
=
False
):
outs
,
fetch_list
=
self
.
_calc_output
(
place
)
outs
,
fetch_list
=
self
.
_calc_output
(
place
)
for
out_name
,
out_dup
in
Operator
.
get_op_outputs
(
self
.
op_type
):
for
out_name
,
out_dup
in
Operator
.
get_op_outputs
(
self
.
op_type
):
if
out_name
not
in
self
.
outputs
:
if
out_name
not
in
self
.
outputs
:
...
@@ -321,7 +321,7 @@ class OpTest(unittest.TestCase):
...
@@ -321,7 +321,7 @@ class OpTest(unittest.TestCase):
if
isinstance
(
expect
,
tuple
)
else
expect
if
isinstance
(
expect
,
tuple
)
else
expect
self
.
assertTrue
(
self
.
assertTrue
(
np
.
allclose
(
np
.
allclose
(
actual_t
,
expect_t
,
atol
=
atol
),
actual_t
,
expect_t
,
atol
=
atol
,
equal_nan
=
equal_nan
),
"Output ("
+
sub_out_name
+
") has diff at "
+
"Output ("
+
sub_out_name
+
") has diff at "
+
str
(
place
))
str
(
place
))
if
isinstance
(
expect
,
tuple
):
if
isinstance
(
expect
,
tuple
):
...
@@ -337,7 +337,7 @@ class OpTest(unittest.TestCase):
...
@@ -337,7 +337,7 @@ class OpTest(unittest.TestCase):
expect_t
=
expect
[
0
]
if
isinstance
(
expect
,
tuple
)
else
expect
expect_t
=
expect
[
0
]
if
isinstance
(
expect
,
tuple
)
else
expect
self
.
assertTrue
(
self
.
assertTrue
(
np
.
allclose
(
np
.
allclose
(
actual_t
,
expect_t
,
atol
=
atol
),
actual_t
,
expect_t
,
atol
=
atol
,
equal_nan
=
equal_nan
),
"Output ("
+
out_name
+
") has diff at "
+
str
(
place
)
+
"Output ("
+
out_name
+
") has diff at "
+
str
(
place
)
+
"
\n
Expect "
+
str
(
expect_t
)
+
"
\n
"
+
"But Got"
+
"
\n
Expect "
+
str
(
expect_t
)
+
"
\n
"
+
"But Got"
+
str
(
actual_t
))
str
(
actual_t
))
...
@@ -360,10 +360,10 @@ class OpTest(unittest.TestCase):
...
@@ -360,10 +360,10 @@ class OpTest(unittest.TestCase):
places
.
append
(
core
.
CUDAPlace
(
0
))
places
.
append
(
core
.
CUDAPlace
(
0
))
return
places
return
places
def
check_output
(
self
,
atol
=
1e-5
):
def
check_output
(
self
,
atol
=
1e-5
,
equal_nan
=
False
):
places
=
self
.
_get_places
()
places
=
self
.
_get_places
()
for
place
in
places
:
for
place
in
places
:
self
.
check_output_with_place
(
place
,
atol
)
self
.
check_output_with_place
(
place
,
atol
,
equal_nan
)
def
check_output_customized
(
self
,
checker
):
def
check_output_customized
(
self
,
checker
):
places
=
self
.
_get_places
()
places
=
self
.
_get_places
()
...
...
python/paddle/fluid/tests/unittests/test_auc_op.py
浏览文件 @
12b483c0
...
@@ -26,18 +26,15 @@ class TestAucOp(OpTest):
...
@@ -26,18 +26,15 @@ class TestAucOp(OpTest):
pred
=
np
.
random
.
random
((
128
,
2
)).
astype
(
"float32"
)
pred
=
np
.
random
.
random
((
128
,
2
)).
astype
(
"float32"
)
labels
=
np
.
random
.
randint
(
0
,
2
,
(
128
,
1
))
labels
=
np
.
random
.
randint
(
0
,
2
,
(
128
,
1
))
num_thresholds
=
200
num_thresholds
=
200
tp
=
np
.
zeros
((
num_thresholds
,
)).
astype
(
"int64"
)
tn
=
np
.
zeros
((
num_thresholds
,
)).
astype
(
"int64"
)
stat_pos
=
np
.
zeros
((
num_thresholds
+
1
,
)).
astype
(
"int64"
)
fp
=
np
.
zeros
((
num_thresholds
,
)).
astype
(
"int64"
)
stat_neg
=
np
.
zeros
((
num_thresholds
+
1
,
)).
astype
(
"int64"
)
fn
=
np
.
zeros
((
num_thresholds
,
)).
astype
(
"int64"
)
self
.
inputs
=
{
self
.
inputs
=
{
'Predict'
:
pred
,
'Predict'
:
pred
,
'Label'
:
labels
,
'Label'
:
labels
,
'TP'
:
tp
,
"StatPos"
:
stat_pos
,
'TN'
:
tn
,
"StatNeg"
:
stat_neg
'FP'
:
fp
,
'FN'
:
fn
}
}
self
.
attrs
=
{
'curve'
:
'ROC'
,
'num_thresholds'
:
num_thresholds
}
self
.
attrs
=
{
'curve'
:
'ROC'
,
'num_thresholds'
:
num_thresholds
}
...
@@ -47,11 +44,10 @@ class TestAucOp(OpTest):
...
@@ -47,11 +44,10 @@ class TestAucOp(OpTest):
python_auc
.
update
(
pred
,
labels
)
python_auc
.
update
(
pred
,
labels
)
self
.
outputs
=
{
self
.
outputs
=
{
'AUC'
:
python_auc
.
eval
(),
'AUC'
:
np
.
array
(
python_auc
.
eval
()),
'TPOut'
:
python_auc
.
tp_list
,
'BatchAUC'
:
np
.
array
(
python_auc
.
eval
()),
'FNOut'
:
python_auc
.
fn_list
,
'StatPosOut'
:
np
.
array
(
python_auc
.
_stat_pos
),
'TNOut'
:
python_auc
.
tn_list
,
'StatNegOut'
:
np
.
array
(
python_auc
.
_stat_neg
)
'FPOut'
:
python_auc
.
fp_list
}
}
def
test_check_output
(
self
):
def
test_check_output
(
self
):
...
...
python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py
浏览文件 @
12b483c0
...
@@ -58,6 +58,7 @@ class TestFusionLSTMOp(OpTest):
...
@@ -58,6 +58,7 @@ class TestFusionLSTMOp(OpTest):
self
.
act_cell
=
'tanh'
self
.
act_cell
=
'tanh'
self
.
act_cand
=
'tanh'
self
.
act_cand
=
'tanh'
self
.
use_peepholes
=
False
self
.
use_peepholes
=
False
self
.
use_seq
=
False
self
.
set_conf
()
self
.
set_conf
()
T
=
sum
(
self
.
lod
[
0
])
T
=
sum
(
self
.
lod
[
0
])
...
@@ -107,6 +108,7 @@ class TestFusionLSTMOp(OpTest):
...
@@ -107,6 +108,7 @@ class TestFusionLSTMOp(OpTest):
}
}
self
.
attrs
=
{
self
.
attrs
=
{
'use_peepholes'
:
self
.
use_peepholes
,
'use_peepholes'
:
self
.
use_peepholes
,
'use_seq'
:
self
.
use_seq
,
'is_reverse'
:
self
.
is_reverse
,
'is_reverse'
:
self
.
is_reverse
,
'gate_activation'
:
self
.
act_gate
,
'gate_activation'
:
self
.
act_gate
,
'cell_activation'
:
self
.
act_cell
,
'cell_activation'
:
self
.
act_cell
,
...
@@ -159,5 +161,68 @@ class TestFusionLSTMOpBS1(TestFusionLSTMOp):
...
@@ -159,5 +161,68 @@ class TestFusionLSTMOpBS1(TestFusionLSTMOp):
self
.
D
=
16
self
.
D
=
16
class
TestFusionLSTMOpPeepholes
(
TestFusionLSTMOp
):
def
set_conf
(
self
):
self
.
use_peepholes
=
True
class
TestFusionLSTMOpPeepholesInit
(
TestFusionLSTMOp
):
def
set_conf
(
self
):
self
.
use_peepholes
=
True
self
.
has_initial_state
=
True
class
TestFusionLSTMOpPeepholesReverse
(
TestFusionLSTMOp
):
def
set_conf
(
self
):
self
.
use_peepholes
=
True
self
.
is_reverse
=
True
class
TestFusionLSTMOpPoopholesBS1
(
TestFusionLSTMOp
):
def
set_conf
(
self
):
self
.
use_peepholes
=
True
self
.
lod
=
[[
3
]]
self
.
D
=
16
class
TestFusionLSTMOpSeqInit
(
TestFusionLSTMOp
):
def
set_conf
(
self
):
self
.
use_seq
=
True
self
.
has_initial_state
=
True
class
TestFusionLSTMOpSeqReverse
(
TestFusionLSTMOp
):
def
set_conf
(
self
):
self
.
use_seq
=
True
self
.
is_reverse
=
True
class
TestFusionLSTMOpSeqInitReverse
(
TestFusionLSTMOp
):
def
set_conf
(
self
):
self
.
use_seq
=
True
self
.
has_initial_state
=
True
self
.
is_reverse
=
True
class
TestFusionLSTMOpSeqPeepholes
(
TestFusionLSTMOp
):
def
set_conf
(
self
):
self
.
use_seq
=
True
self
.
use_peepholes
=
True
class
TestFusionLSTMOpSeqPeepholesInit
(
TestFusionLSTMOp
):
def
set_conf
(
self
):
self
.
use_seq
=
True
self
.
use_peepholes
=
True
self
.
has_initial_state
=
True
class
TestFusionLSTMOpSeqPeepholesReverse
(
TestFusionLSTMOp
):
def
set_conf
(
self
):
self
.
use_seq
=
True
self
.
use_peepholes
=
True
self
.
is_reverse
=
True
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_generate_proposal_labels.py
浏览文件 @
12b483c0
...
@@ -177,8 +177,8 @@ def _box_to_delta(ex_boxes, gt_boxes, weights):
...
@@ -177,8 +177,8 @@ def _box_to_delta(ex_boxes, gt_boxes, weights):
dx
=
(
gt_ctr_x
-
ex_ctr_x
)
/
ex_w
/
weights
[
0
]
dx
=
(
gt_ctr_x
-
ex_ctr_x
)
/
ex_w
/
weights
[
0
]
dy
=
(
gt_ctr_y
-
ex_ctr_y
)
/
ex_h
/
weights
[
1
]
dy
=
(
gt_ctr_y
-
ex_ctr_y
)
/
ex_h
/
weights
[
1
]
dw
=
(
np
.
log
(
gt_w
/
ex_w
))
/
ex_w
/
weights
[
2
]
dw
=
(
np
.
log
(
gt_w
/
ex_w
))
/
weights
[
2
]
dh
=
(
np
.
log
(
gt_h
/
ex_h
))
/
ex_h
/
weights
[
3
]
dh
=
(
np
.
log
(
gt_h
/
ex_h
))
/
weights
[
3
]
targets
=
np
.
vstack
([
dx
,
dy
,
dw
,
dh
]).
transpose
()
targets
=
np
.
vstack
([
dx
,
dy
,
dw
,
dh
]).
transpose
()
return
targets
return
targets
...
...
python/paddle/fluid/tests/unittests/test_rmsprop_op.py
浏览文件 @
12b483c0
...
@@ -15,90 +15,164 @@
...
@@ -15,90 +15,164 @@
from
__future__
import
print_function
from
__future__
import
print_function
import
unittest
import
unittest
import
numpy
as
np
import
numpy
as
np
from
op_test
import
OpTest
import
paddle.fluid.core
as
core
from
paddle.fluid.op
import
Operator
class
TestRmspropOp1
(
OpTest
):
''' Test RMSProp with explicit inputs
class
TestBase
(
unittest
.
TestCase
):
'''
def
setup
(
self
,
centered
,
epsilon
=
1e-6
):
np
.
random
.
seed
(
5
)
# fix seed
def
setUp
(
self
):
self
.
op_type
=
"rmsprop"
self
.
param_name
=
"param"
self
.
param
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
param
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
mean_square
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
self
.
mean_square_name
=
"mean_square"
learning_rate
=
np
.
array
([
0.01
]).
astype
(
"float32"
)
self
.
mean_square
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
grad
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
moment
=
np
.
zeros
((
123
,
321
)).
astype
(
"float32"
)
self
.
mean_grad_name
=
"mean_grad"
self
.
mean_grad
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
epsilon
=
1e-6
decay
=
0.9
self
.
lr_name
=
"lr"
momentum
=
0.0
self
.
learning_rate
=
np
.
array
([
0.01
]).
astype
(
"float32"
)
self
.
inputs
=
{
self
.
grad_name
=
"grad"
'Param'
:
param
,
self
.
grad
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
'MeanSquare'
:
mean_square
,
'LearningRate'
:
learning_rate
,
self
.
moment_name
=
"moment"
'Grad'
:
grad
,
self
.
moment
=
np
.
zeros
((
123
,
321
)).
astype
(
"float32"
)
'Moment'
:
moment
,
}
self
.
epsilon
=
epsilon
self
.
decay
=
0.9
self
.
attrs
=
{
'epsilon'
:
epsilon
,
'decay'
:
decay
,
'momentum'
:
momentum
}
self
.
momentum
=
0.0
self
.
centered
=
centered
ms_out
=
decay
*
mean_square
+
(
1
-
decay
)
*
grad
*
grad
moment_out
=
momentum
*
moment
+
\
self
.
ms_out
=
self
.
decay
*
self
.
mean_square
+
(
1
-
self
.
decay
learning_rate
*
grad
/
np
.
sqrt
(
ms_out
+
epsilon
)
)
*
self
.
grad
*
self
.
grad
param_out
=
param
-
moment_out
if
centered
:
self
.
mg_out
=
self
.
decay
*
self
.
mean_grad
+
(
1
-
self
.
decay
self
.
outputs
=
{
)
*
self
.
grad
'ParamOut'
:
param_out
,
self
.
moment_out
=
self
.
momentum
*
self
.
moment
+
\
'MomentOut'
:
moment_out
,
self
.
learning_rate
*
self
.
grad
/
np
.
sqrt
(
self
.
ms_out
-
np
.
square
(
self
.
mg_out
)
+
self
.
epsilon
)
'MeanSquareOut'
:
ms_out
else
:
}
self
.
moment_out
=
self
.
momentum
*
self
.
moment
+
\
self
.
learning_rate
*
self
.
grad
/
np
.
sqrt
(
self
.
ms_out
+
self
.
epsilon
)
def
test_check_output
(
self
):
self
.
check_output
()
self
.
param_out
=
self
.
param
-
self
.
moment_out
def
check
(
self
,
class
TestRmspropOp2
(
OpTest
):
actual_t
,
'''Test RMSProp with default values for attributes
expect_t
,
'''
place
,
out_name
,
def
setUp
(
self
):
atol
=
1e-5
,
self
.
op_type
=
"rmsprop"
equal_nan
=
False
):
self
.
assertTrue
(
param
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
np
.
allclose
(
mean_square
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
actual_t
,
expect_t
,
atol
=
atol
,
equal_nan
=
equal_nan
),
learning_rate
=
np
.
array
([
0.01
]).
astype
(
"float32"
)
"Output ("
+
out_name
+
") has diff at "
+
str
(
place
)
+
"
\n
Expect "
grad
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
+
str
(
expect_t
)
+
"
\n
"
+
"But Got"
+
str
(
actual_t
))
moment
=
np
.
zeros
((
123
,
321
)).
astype
(
"float32"
)
epsilon
=
1.0e-10
class
TestRmspropOp
(
TestBase
):
decay
=
0.9
def
check_with_place
(
self
,
place
,
centered
,
epsilon
):
momentum
=
0.0
self
.
setup
(
centered
,
epsilon
)
scope
=
core
.
Scope
()
self
.
inputs
=
{
'Param'
:
param
,
# create and initialize Param Variable
'MeanSquare'
:
mean_square
,
param
=
scope
.
var
(
self
.
param_name
).
get_tensor
()
'LearningRate'
:
learning_rate
,
param
.
set
(
self
.
param
,
place
)
'Grad'
:
grad
,
'Moment'
:
moment
,
mean_square
=
scope
.
var
(
self
.
mean_square_name
).
get_tensor
()
}
mean_square
.
set
(
self
.
mean_square
,
place
)
ms_out
=
decay
*
mean_square
+
(
1
-
decay
)
*
grad
*
grad
lr
=
scope
.
var
(
self
.
lr_name
).
get_tensor
()
moment_out
=
momentum
*
moment
+
\
lr
.
set
(
self
.
learning_rate
,
place
)
learning_rate
*
grad
/
np
.
sqrt
(
ms_out
+
epsilon
)
param_out
=
param
-
moment_out
grad
=
scope
.
var
(
self
.
grad_name
).
get_tensor
()
grad
.
set
(
self
.
grad
,
place
)
self
.
outputs
=
{
'ParamOut'
:
param_out
,
moment
=
scope
.
var
(
self
.
moment_name
).
get_tensor
()
'MomentOut'
:
moment_out
,
moment
.
set
(
self
.
moment
,
place
)
'MeanSquareOut'
:
ms_out
}
# create and run sgd operator
def
test_check_output
(
self
):
if
self
.
centered
:
self
.
check_output
()
mean_grad
=
scope
.
var
(
self
.
mean_grad_name
).
get_tensor
()
mean_grad
.
set
(
self
.
mean_grad
,
place
)
rmsprop_op
=
Operator
(
"rmsprop"
,
Param
=
self
.
param_name
,
Grad
=
self
.
grad_name
,
MeanSquare
=
self
.
mean_square_name
,
MeanGrad
=
self
.
mean_grad_name
,
Moment
=
self
.
moment_name
,
LearningRate
=
self
.
lr_name
,
ParamOut
=
self
.
param_name
,
MeanSquareOut
=
self
.
mean_square_name
,
MomentOut
=
self
.
moment_name
,
MeanGradOut
=
self
.
mean_grad_name
,
epsilon
=
self
.
epsilon
,
decay
=
self
.
decay
,
momentum
=
self
.
momentum
,
centered
=
True
)
else
:
rmsprop_op
=
Operator
(
"rmsprop"
,
Param
=
self
.
param_name
,
Grad
=
self
.
grad_name
,
MeanSquare
=
self
.
mean_square_name
,
Moment
=
self
.
moment_name
,
LearningRate
=
self
.
lr_name
,
ParamOut
=
self
.
param_name
,
MeanSquareOut
=
self
.
mean_square_name
,
MomentOut
=
self
.
moment_name
,
epsilon
=
self
.
epsilon
,
decay
=
self
.
decay
,
momentum
=
self
.
momentum
,
centered
=
False
)
rmsprop_op
.
run
(
scope
,
place
)
atol
=
1e-5
equal_nan
=
False
if
self
.
centered
:
atol
=
1e-3
equal_nan
=
True
self
.
check
(
np
.
array
(
mean_square
),
self
.
ms_out
,
place
,
self
.
mean_square_name
)
self
.
check
(
np
.
array
(
moment
),
self
.
moment_out
,
place
,
self
.
moment_name
,
atol
=
atol
,
equal_nan
=
equal_nan
)
self
.
check
(
np
.
array
(
param
),
self
.
param_out
,
place
,
self
.
param_name
,
atol
=
atol
,
equal_nan
=
equal_nan
)
if
self
.
centered
:
self
.
check
(
np
.
array
(
mean_grad
),
self
.
mg_out
,
place
,
self
.
mean_grad_name
)
def
test_rmsprop
(
self
):
places
=
[
core
.
CPUPlace
()]
if
core
.
is_compiled_with_cuda
():
places
.
append
(
core
.
CUDAPlace
(
0
))
for
place
in
places
:
self
.
check_with_place
(
place
,
False
,
1e-6
)
self
.
check_with_place
(
place
,
False
,
1e-10
)
self
.
check_with_place
(
place
,
True
,
1e-6
)
self
.
check_with_place
(
place
,
True
,
1e-10
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
python/paddle/fluid/tests/unittests/test_roi_pool_op.py
浏览文件 @
12b483c0
...
@@ -61,7 +61,7 @@ class TestROIPoolOp(OpTest):
...
@@ -61,7 +61,7 @@ class TestROIPoolOp(OpTest):
for
i
in
range
(
self
.
rois_num
):
for
i
in
range
(
self
.
rois_num
):
roi
=
self
.
rois
[
i
]
roi
=
self
.
rois
[
i
]
roi_batch_id
=
roi
[
0
]
roi_batch_id
=
int
(
roi
[
0
])
roi_start_w
=
int
(
cpt
.
round
(
roi
[
1
]
*
self
.
spatial_scale
))
roi_start_w
=
int
(
cpt
.
round
(
roi
[
1
]
*
self
.
spatial_scale
))
roi_start_h
=
int
(
cpt
.
round
(
roi
[
2
]
*
self
.
spatial_scale
))
roi_start_h
=
int
(
cpt
.
round
(
roi
[
2
]
*
self
.
spatial_scale
))
roi_end_w
=
int
(
cpt
.
round
(
roi
[
3
]
*
self
.
spatial_scale
))
roi_end_w
=
int
(
cpt
.
round
(
roi
[
3
]
*
self
.
spatial_scale
))
...
@@ -125,7 +125,7 @@ class TestROIPoolOp(OpTest):
...
@@ -125,7 +125,7 @@ class TestROIPoolOp(OpTest):
roi
=
[
bno
,
x1
,
y1
,
x2
,
y2
]
roi
=
[
bno
,
x1
,
y1
,
x2
,
y2
]
rois
.
append
(
roi
)
rois
.
append
(
roi
)
self
.
rois_num
=
len
(
rois
)
self
.
rois_num
=
len
(
rois
)
self
.
rois
=
np
.
array
(
rois
).
astype
(
"
int64
"
)
self
.
rois
=
np
.
array
(
rois
).
astype
(
"
float32
"
)
def
setUp
(
self
):
def
setUp
(
self
):
self
.
op_type
=
"roi_pool"
self
.
op_type
=
"roi_pool"
...
...
python/paddle/fluid/tests/unittests/test_rpn_target_assign_op.py
浏览文件 @
12b483c0
...
@@ -18,12 +18,17 @@ import unittest
...
@@ -18,12 +18,17 @@ import unittest
import
numpy
as
np
import
numpy
as
np
import
paddle.fluid.core
as
core
import
paddle.fluid.core
as
core
from
op_test
import
OpTest
from
op_test
import
OpTest
from
test_anchor_generator_op
import
anchor_generator_in_python
from
test_generate_proposal_labels
import
_generate_groundtruth
from
test_generate_proposal_labels
import
_bbox_overlaps
,
_box_to_delta
def
rpn_target_assign
(
iou
,
rpn_batch_size_per_im
,
rpn_positive_overlap
,
def
rpn_target_assign
(
gt_anchor_iou
,
rpn_batch_size_per_im
,
rpn_negative_overlap
,
fg_fraction
):
rpn_
positive_overlap
,
rpn_
negative_overlap
,
fg_fraction
):
iou
=
np
.
transpose
(
iou
)
iou
=
np
.
transpose
(
gt_anchor_
iou
)
anchor_to_gt_max
=
iou
.
max
(
axis
=
1
)
anchor_to_gt_max
=
iou
.
max
(
axis
=
1
)
anchor_to_gt_argmax
=
iou
.
argmax
(
axis
=
1
)
gt_to_anchor_argmax
=
iou
.
argmax
(
axis
=
0
)
gt_to_anchor_argmax
=
iou
.
argmax
(
axis
=
0
)
gt_to_anchor_max
=
iou
[
gt_to_anchor_argmax
,
np
.
arange
(
iou
.
shape
[
1
])]
gt_to_anchor_max
=
iou
[
gt_to_anchor_argmax
,
np
.
arange
(
iou
.
shape
[
1
])]
anchors_with_max_overlap
=
np
.
where
(
iou
==
gt_to_anchor_max
)[
0
]
anchors_with_max_overlap
=
np
.
where
(
iou
==
gt_to_anchor_max
)[
0
]
...
@@ -42,59 +47,113 @@ def rpn_target_assign(iou, rpn_batch_size_per_im, rpn_positive_overlap,
...
@@ -42,59 +47,113 @@ def rpn_target_assign(iou, rpn_batch_size_per_im, rpn_positive_overlap,
num_bg
=
rpn_batch_size_per_im
-
np
.
sum
(
tgt_lbl
==
1
)
num_bg
=
rpn_batch_size_per_im
-
np
.
sum
(
tgt_lbl
==
1
)
bg_inds
=
np
.
where
(
anchor_to_gt_max
<
rpn_negative_overlap
)[
0
]
bg_inds
=
np
.
where
(
anchor_to_gt_max
<
rpn_negative_overlap
)[
0
]
tgt_lbl
[
bg_inds
]
=
0
if
len
(
bg_inds
)
>
num_bg
:
if
len
(
bg_inds
)
>
num_bg
:
enable_inds
=
bg_inds
[
np
.
random
.
randint
(
len
(
bg_inds
),
size
=
num_bg
)]
enable_inds
=
bg_inds
[
np
.
random
.
randint
(
len
(
bg_inds
),
size
=
num_bg
)]
tgt_lbl
[
enable_inds
]
=
0
tgt_lbl
[
enable_inds
]
=
0
bg_inds
=
np
.
where
(
tgt_lbl
==
0
)[
0
]
bg_inds
=
np
.
where
(
tgt_lbl
==
0
)[
0
]
tgt_lbl
[
bg_inds
]
=
0
loc_index
=
fg_inds
loc_index
=
fg_inds
score_index
=
np
.
hstack
((
fg_inds
,
bg_inds
))
score_index
=
np
.
hstack
((
fg_inds
,
bg_inds
))
tgt_lbl
=
np
.
expand_dims
(
tgt_lbl
,
axis
=
1
)
tgt_lbl
=
np
.
expand_dims
(
tgt_lbl
,
axis
=
1
)
return
loc_index
,
score_index
,
tgt_lbl
gt_inds
=
anchor_to_gt_argmax
[
fg_inds
]
return
loc_index
,
score_index
,
tgt_lbl
,
gt_inds
def
get_anchor
(
n
,
c
,
h
,
w
):
input_feat
=
np
.
random
.
random
((
n
,
c
,
h
,
w
)).
astype
(
'float32'
)
anchors
,
_
=
anchor_generator_in_python
(
input_feat
=
input_feat
,
anchor_sizes
=
[
32.
,
64.
],
aspect_ratios
=
[
0.5
,
1.0
],
variances
=
[
1.0
,
1.0
,
1.0
,
1.0
],
stride
=
[
16.0
,
16.0
],
offset
=
0.5
)
return
anchors
def
rpn_blob
(
anchor
,
gt_boxes
,
iou
,
lod
,
rpn_batch_size_per_im
,
rpn_positive_overlap
,
rpn_negative_overlap
,
fg_fraction
):
loc_indexes
=
[]
score_indexes
=
[]
tmp_tgt_labels
=
[]
tgt_bboxes
=
[]
anchor_num
=
anchor
.
shape
[
0
]
batch_size
=
len
(
lod
)
-
1
for
i
in
range
(
batch_size
):
b
,
e
=
lod
[
i
],
lod
[
i
+
1
]
iou_slice
=
iou
[
b
:
e
,
:]
bboxes_slice
=
gt_boxes
[
b
:
e
,
:]
loc_idx
,
score_idx
,
tgt_lbl
,
gt_inds
=
rpn_target_assign
(
iou_slice
,
rpn_batch_size_per_im
,
rpn_positive_overlap
,
rpn_negative_overlap
,
fg_fraction
)
fg_bboxes
=
bboxes_slice
[
gt_inds
]
fg_anchors
=
anchor
[
loc_idx
]
box_deltas
=
_box_to_delta
(
fg_anchors
,
fg_bboxes
,
[
1.
,
1.
,
1.
,
1.
])
if
i
==
0
:
loc_indexes
=
loc_idx
score_indexes
=
score_idx
tmp_tgt_labels
=
tgt_lbl
tgt_bboxes
=
box_deltas
else
:
loc_indexes
=
np
.
concatenate
(
[
loc_indexes
,
loc_idx
+
i
*
anchor_num
])
score_indexes
=
np
.
concatenate
(
[
score_indexes
,
score_idx
+
i
*
anchor_num
])
tmp_tgt_labels
=
np
.
concatenate
([
tmp_tgt_labels
,
tgt_lbl
])
tgt_bboxes
=
np
.
vstack
([
tgt_bboxes
,
box_deltas
])
tgt_labels
=
tmp_tgt_labels
[
score_indexes
]
return
loc_indexes
,
score_indexes
,
tgt_bboxes
,
tgt_labels
class
TestRpnTargetAssignOp
(
OpTest
):
class
TestRpnTargetAssignOp
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
iou
=
np
.
random
.
random
((
10
,
8
)).
astype
(
"float32"
)
n
,
c
,
h
,
w
=
2
,
4
,
14
,
14
self
.
op_type
=
"rpn_target_assign"
anchor
=
get_anchor
(
n
,
c
,
h
,
w
)
self
.
inputs
=
{
'DistMat'
:
iou
}
gt_num
=
10
self
.
attrs
=
{
anchor
=
anchor
.
reshape
(
-
1
,
4
)
'rpn_batch_size_per_im'
:
256
,
anchor_num
=
anchor
.
shape
[
0
]
'rpn_positive_overlap'
:
0.95
,
'rpn_negative_overlap'
:
0.3
,
'fg_fraction'
:
0.25
,
'fix_seed'
:
True
}
loc_index
,
score_index
,
tgt_lbl
=
rpn_target_assign
(
iou
,
256
,
0.95
,
0.3
,
0.25
)
self
.
outputs
=
{
'LocationIndex'
:
loc_index
,
'ScoreIndex'
:
score_index
,
'TargetLabel'
:
tgt_lbl
,
}
def
test_check_output
(
self
):
im_shapes
=
[[
64
,
64
],
[
64
,
64
]]
self
.
check_output
()
gt_box
,
lod
=
_generate_groundtruth
(
im_shapes
,
3
,
4
)
bbox
=
np
.
vstack
([
v
[
'boxes'
]
for
v
in
gt_box
])
iou
=
_bbox_overlaps
(
bbox
,
anchor
)
anchor
=
anchor
.
astype
(
'float32'
)
bbox
=
bbox
.
astype
(
'float32'
)
iou
=
iou
.
astype
(
'float32'
)
loc_index
,
score_index
,
tgt_bbox
,
tgt_lbl
=
rpn_blob
(
anchor
,
bbox
,
iou
,
[
0
,
4
,
8
],
25600
,
0.95
,
0.03
,
0.25
)
class
TestRpnTargetAssignOp2
(
OpTest
):
def
setUp
(
self
):
iou
=
np
.
random
.
random
((
10
,
20
)).
astype
(
"float32"
)
self
.
op_type
=
"rpn_target_assign"
self
.
op_type
=
"rpn_target_assign"
self
.
inputs
=
{
'DistMat'
:
iou
}
self
.
inputs
=
{
'Anchor'
:
anchor
,
'GtBox'
:
(
bbox
,
[[
4
,
4
]]),
'DistMat'
:
(
iou
,
[[
4
,
4
]]),
}
self
.
attrs
=
{
self
.
attrs
=
{
'rpn_batch_size_per_im'
:
128
,
'rpn_batch_size_per_im'
:
25600
,
'rpn_positive_overlap'
:
0.5
,
'rpn_positive_overlap'
:
0.
9
5
,
'rpn_negative_overlap'
:
0.
5
,
'rpn_negative_overlap'
:
0.
03
,
'fg_fraction'
:
0.5
,
'fg_fraction'
:
0.
2
5
,
'fix_seed'
:
True
'fix_seed'
:
True
}
}
loc_index
,
score_index
,
tgt_lbl
=
rpn_target_assign
(
iou
,
128
,
0.5
,
0.5
,
0.5
)
self
.
outputs
=
{
self
.
outputs
=
{
'LocationIndex'
:
loc_index
,
'LocationIndex'
:
loc_index
.
astype
(
'int32'
),
'ScoreIndex'
:
score_index
,
'ScoreIndex'
:
score_index
.
astype
(
'int32'
),
'TargetLabel'
:
tgt_lbl
,
'TargetBBox'
:
tgt_bbox
.
astype
(
'float32'
),
'TargetLabel'
:
tgt_lbl
.
astype
(
'int64'
),
}
}
def
test_check_output
(
self
):
def
test_check_output
(
self
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录