Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
049c9c7d
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
049c9c7d
编写于
10月 23, 2018
作者:
P
phlrain
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into add_dropout_att_new
上级
ffb24a73
6b4056bb
变更
100
显示空白变更内容
内联
并排
Showing
100 changed file
with
6743 addition
and
882 deletion
+6743
-882
README.md
README.md
+5
-5
cmake/generic.cmake
cmake/generic.cmake
+7
-0
paddle/fluid/API.spec
paddle/fluid/API.spec
+1
-0
paddle/fluid/framework/details/var_handle.h
paddle/fluid/framework/details/var_handle.h
+2
-0
paddle/fluid/framework/ir/CMakeLists.txt
paddle/fluid/framework/ir/CMakeLists.txt
+10
-4
paddle/fluid/framework/ir/conv_bias_mkldnn_fuse_pass.cc
paddle/fluid/framework/ir/conv_bias_mkldnn_fuse_pass.cc
+137
-0
paddle/fluid/framework/ir/conv_bias_mkldnn_fuse_pass.h
paddle/fluid/framework/ir/conv_bias_mkldnn_fuse_pass.h
+36
-0
paddle/fluid/framework/ir/conv_bn_fuse_pass.cc
paddle/fluid/framework/ir/conv_bn_fuse_pass.cc
+63
-23
paddle/fluid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass.cc
...uid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass.cc
+154
-0
paddle/fluid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass.h
...luid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass.h
+38
-0
paddle/fluid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass_tester.cc
...mework/ir/conv_elementwise_add_mkldnn_fuse_pass_tester.cc
+247
-0
paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc
paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc
+6
-0
paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass_tester.cc
...e/fluid/framework/ir/conv_relu_mkldnn_fuse_pass_tester.cc
+33
-14
paddle/fluid/framework/ir/fuse_pass_base.cc
paddle/fluid/framework/ir/fuse_pass_base.cc
+62
-0
paddle/fluid/framework/ir/fuse_pass_base.h
paddle/fluid/framework/ir/fuse_pass_base.h
+12
-20
paddle/fluid/framework/ir/graph_helper_test.cc
paddle/fluid/framework/ir/graph_helper_test.cc
+3
-3
paddle/fluid/framework/ir/graph_pattern_detector.cc
paddle/fluid/framework/ir/graph_pattern_detector.cc
+118
-0
paddle/fluid/framework/ir/graph_pattern_detector.h
paddle/fluid/framework/ir/graph_pattern_detector.h
+93
-0
paddle/fluid/framework/ir/graph_test.cc
paddle/fluid/framework/ir/graph_test.cc
+1
-1
paddle/fluid/framework/ir/mkldnn_placement_pass.cc
paddle/fluid/framework/ir/mkldnn_placement_pass.cc
+37
-0
paddle/fluid/framework/ir/mkldnn_placement_pass.h
paddle/fluid/framework/ir/mkldnn_placement_pass.h
+31
-0
paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.cc
paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.cc
+101
-0
paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.h
paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.h
+38
-0
paddle/fluid/framework/op_desc.cc
paddle/fluid/framework/op_desc.cc
+5
-11
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+6
-0
paddle/fluid/framework/program_desc_test.cc
paddle/fluid/framework/program_desc_test.cc
+1
-1
paddle/fluid/framework/reader_test.cc
paddle/fluid/framework/reader_test.cc
+1
-1
paddle/fluid/inference/analysis/analyzer.cc
paddle/fluid/inference/analysis/analyzer.cc
+22
-1
paddle/fluid/inference/analysis/analyzer.h
paddle/fluid/inference/analysis/analyzer.h
+21
-12
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+18
-8
paddle/fluid/inference/api/paddle_inference_api.h
paddle/fluid/inference/api/paddle_inference_api.h
+7
-0
paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
+1
-1
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
+3
-3
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
...le/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
+7
-1
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+1
-1
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+7
-7
paddle/fluid/operators/array_to_lod_tensor_op.cc
paddle/fluid/operators/array_to_lod_tensor_op.cc
+1
-1
paddle/fluid/operators/concat_op.h
paddle/fluid/operators/concat_op.h
+8
-20
paddle/fluid/operators/conv_mkldnn_op.cc
paddle/fluid/operators/conv_mkldnn_op.cc
+36
-15
paddle/fluid/operators/conv_op.cc
paddle/fluid/operators/conv_op.cc
+8
-3
paddle/fluid/operators/detection/CMakeLists.txt
paddle/fluid/operators/detection/CMakeLists.txt
+1
-1
paddle/fluid/operators/detection/generate_proposal_labels_op.cc
.../fluid/operators/detection/generate_proposal_labels_op.cc
+1
-1
paddle/fluid/operators/detection/generate_proposals_op.cc
paddle/fluid/operators/detection/generate_proposals_op.cc
+131
-119
paddle/fluid/operators/detection/generate_proposals_op.cu
paddle/fluid/operators/detection/generate_proposals_op.cu
+90
-76
paddle/fluid/operators/detection/gpc.cc
paddle/fluid/operators/detection/gpc.cc
+2201
-0
paddle/fluid/operators/detection/gpc.h
paddle/fluid/operators/detection/gpc.h
+246
-0
paddle/fluid/operators/detection/multiclass_nms_op.cc
paddle/fluid/operators/detection/multiclass_nms_op.cc
+60
-21
paddle/fluid/operators/detection/poly_util.cc
paddle/fluid/operators/detection/poly_util.cc
+132
-0
paddle/fluid/operators/detection/poly_util.h
paddle/fluid/operators/detection/poly_util.h
+73
-0
paddle/fluid/operators/detection/polygon_box_transform_op.cc
paddle/fluid/operators/detection/polygon_box_transform_op.cc
+2
-2
paddle/fluid/operators/detection/polygon_box_transform_op.cu
paddle/fluid/operators/detection/polygon_box_transform_op.cu
+2
-2
paddle/fluid/operators/distributed/grpc_client.cc
paddle/fluid/operators/distributed/grpc_client.cc
+7
-7
paddle/fluid/operators/distributed/grpc_serde.cc
paddle/fluid/operators/distributed/grpc_serde.cc
+2
-2
paddle/fluid/operators/fusion_seqconv_eltadd_relu_op.cc
paddle/fluid/operators/fusion_seqconv_eltadd_relu_op.cc
+229
-0
paddle/fluid/operators/fusion_seqconv_eltadd_relu_op.h
paddle/fluid/operators/fusion_seqconv_eltadd_relu_op.h
+42
-0
paddle/fluid/operators/gather.h
paddle/fluid/operators/gather.h
+2
-4
paddle/fluid/operators/lod_tensor_to_array_op.cc
paddle/fluid/operators/lod_tensor_to_array_op.cc
+2
-2
paddle/fluid/operators/math/CMakeLists.txt
paddle/fluid/operators/math/CMakeLists.txt
+7
-7
paddle/fluid/operators/math/concat_and_split.cc
paddle/fluid/operators/math/concat_and_split.cc
+3
-3
paddle/fluid/operators/math/concat_and_split.cu
paddle/fluid/operators/math/concat_and_split.cu
+15
-15
paddle/fluid/operators/math/concat_and_split.h
paddle/fluid/operators/math/concat_and_split.h
+1
-1
paddle/fluid/operators/math/concat_test.cc
paddle/fluid/operators/math/concat_test.cc
+1
-1
paddle/fluid/operators/math/fc_compute.h
paddle/fluid/operators/math/fc_compute.h
+15
-9
paddle/fluid/operators/math/jit_kernel.h
paddle/fluid/operators/math/jit_kernel.h
+6
-0
paddle/fluid/operators/math/jit_kernel_blas.cc
paddle/fluid/operators/math/jit_kernel_blas.cc
+88
-0
paddle/fluid/operators/math/jit_kernel_exp.cc
paddle/fluid/operators/math/jit_kernel_exp.cc
+201
-60
paddle/fluid/operators/math/jit_kernel_lstm.cc
paddle/fluid/operators/math/jit_kernel_lstm.cc
+122
-70
paddle/fluid/operators/math/jit_kernel_test.cc
paddle/fluid/operators/math/jit_kernel_test.cc
+57
-0
paddle/fluid/operators/reader/reader_blocking_queue_test.cc
paddle/fluid/operators/reader/reader_blocking_queue_test.cc
+1
-1
paddle/fluid/operators/roi_align_op.cc
paddle/fluid/operators/roi_align_op.cc
+166
-0
paddle/fluid/operators/roi_align_op.cu
paddle/fluid/operators/roi_align_op.cu
+353
-0
paddle/fluid/operators/roi_align_op.h
paddle/fluid/operators/roi_align_op.h
+332
-0
paddle/fluid/operators/roi_pool_op.cc
paddle/fluid/operators/roi_pool_op.cc
+1
-1
paddle/fluid/operators/roi_pool_op.cu
paddle/fluid/operators/roi_pool_op.cu
+1
-1
paddle/fluid/operators/sequence_concat_op.h
paddle/fluid/operators/sequence_concat_op.h
+2
-2
paddle/fluid/operators/split_op.cc
paddle/fluid/operators/split_op.cc
+5
-6
paddle/fluid/operators/split_op.h
paddle/fluid/operators/split_op.h
+15
-10
paddle/fluid/operators/strided_memcpy.h
paddle/fluid/operators/strided_memcpy.h
+23
-1
paddle/fluid/platform/device_context.cc
paddle/fluid/platform/device_context.cc
+10
-0
paddle/fluid/platform/device_context.h
paddle/fluid/platform/device_context.h
+3
-0
paddle/fluid/platform/profiler.cc
paddle/fluid/platform/profiler.cc
+9
-0
paddle/fluid/platform/profiler.h
paddle/fluid/platform/profiler.h
+10
-0
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+1
-0
python/paddle/fluid/layer_helper.py
python/paddle/fluid/layer_helper.py
+12
-3
python/paddle/fluid/layers/control_flow.py
python/paddle/fluid/layers/control_flow.py
+17
-16
python/paddle/fluid/layers/detection.py
python/paddle/fluid/layers/detection.py
+38
-27
python/paddle/fluid/layers/io.py
python/paddle/fluid/layers/io.py
+1
-1
python/paddle/fluid/layers/layer_function_generator.py
python/paddle/fluid/layers/layer_function_generator.py
+5
-3
python/paddle/fluid/layers/metric_op.py
python/paddle/fluid/layers/metric_op.py
+5
-5
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+234
-167
python/paddle/fluid/layers/tensor.py
python/paddle/fluid/layers/tensor.py
+17
-14
python/paddle/fluid/regularizer.py
python/paddle/fluid/regularizer.py
+2
-2
python/paddle/fluid/tests/CMakeLists.txt
python/paddle/fluid/tests/CMakeLists.txt
+1
-5
python/paddle/fluid/tests/unittests/test_fusion_seqconv_eltadd_relu_op.py
...uid/tests/unittests/test_fusion_seqconv_eltadd_relu_op.py
+94
-0
python/paddle/fluid/tests/unittests/test_layers.py
python/paddle/fluid/tests/unittests/test_layers.py
+10
-0
python/paddle/fluid/tests/unittests/test_polygon_box_transform.py
...addle/fluid/tests/unittests/test_polygon_box_transform.py
+1
-1
python/paddle/fluid/tests/unittests/test_roi_align_op.py
python/paddle/fluid/tests/unittests/test_roi_align_op.py
+170
-0
python/paddle/fluid/tests/unittests/test_seq_conv.py
python/paddle/fluid/tests/unittests/test_seq_conv.py
+49
-50
python/paddle/fluid/tests/unittests/test_slice_var.py
python/paddle/fluid/tests/unittests/test_slice_var.py
+0
-1
python/paddle/fluid/transpiler/inference_transpiler.py
python/paddle/fluid/transpiler/inference_transpiler.py
+28
-6
未找到文件。
README.md
浏览文件 @
049c9c7d
...
@@ -2,8 +2,8 @@
...
@@ -2,8 +2,8 @@
[

](https://travis-ci.org/PaddlePaddle/Paddle)
[

](https://travis-ci.org/PaddlePaddle/Paddle)
[

](http://
www.paddlepaddle.org/docs/develop/documentation/en
/getstarted/index_en.html)
[

](http://
paddlepaddle.org/documentation/docs/en/1.0
/getstarted/index_en.html)
[

](http://
www.paddlepaddle.org/docs/develop/documentation/zh/getstarted/index_cn
.html)
[

](http://
paddlepaddle.org/documentation/docs/zh/1.0/beginners_guide/index
.html)
[

](https://github.com/PaddlePaddle/Paddle/releases)
[

](https://github.com/PaddlePaddle/Paddle/releases)
[

](LICENSE)
[

](LICENSE)
...
@@ -19,7 +19,7 @@ Our vision is to enable deep learning for everyone via PaddlePaddle.
...
@@ -19,7 +19,7 @@ Our vision is to enable deep learning for everyone via PaddlePaddle.
Please refer to our
[
release announcement
](
https://github.com/PaddlePaddle/Paddle/releases
)
to track the latest feature of PaddlePaddle.
Please refer to our
[
release announcement
](
https://github.com/PaddlePaddle/Paddle/releases
)
to track the latest feature of PaddlePaddle.
### Latest PaddlePaddle Release: [Fluid 1.0.
0
](https://github.com/PaddlePaddle/Paddle/tree/release/1.0.0)
### Latest PaddlePaddle Release: [Fluid 1.0.
1
](https://github.com/PaddlePaddle/Paddle/tree/release/1.0.0)
### Install Latest Stable Release:
### Install Latest Stable Release:
```
```
# Linux CPU
# Linux CPU
...
@@ -27,9 +27,9 @@ pip install paddlepaddle
...
@@ -27,9 +27,9 @@ pip install paddlepaddle
# Linux GPU cuda9cudnn7
# Linux GPU cuda9cudnn7
pip install paddlepaddle-gpu
pip install paddlepaddle-gpu
# Linux GPU cuda8cudnn7
# Linux GPU cuda8cudnn7
pip install paddlepaddle-gpu==
0.15.0
.post87
pip install paddlepaddle-gpu==
1.0.1
.post87
# Linux GPU cuda8cudnn5
# Linux GPU cuda8cudnn5
pip install paddlepaddle-gpu==
0.15.0
.post85
pip install paddlepaddle-gpu==
1.0.1
.post85
# For installation on other platform, refer to http://paddlepaddle.org/
# For installation on other platform, refer to http://paddlepaddle.org/
```
```
...
...
cmake/generic.cmake
浏览文件 @
049c9c7d
...
@@ -261,6 +261,13 @@ function(cc_library TARGET_NAME)
...
@@ -261,6 +261,13 @@ function(cc_library TARGET_NAME)
add_dependencies
(
${
TARGET_NAME
}
mklml
)
add_dependencies
(
${
TARGET_NAME
}
mklml
)
target_link_libraries
(
${
TARGET_NAME
}
"-L
${
MKLML_LIB_DIR
}
-liomp5 -Wl,--as-needed"
)
target_link_libraries
(
${
TARGET_NAME
}
"-L
${
MKLML_LIB_DIR
}
-liomp5 -Wl,--as-needed"
)
endif
()
endif
()
# remove link to python, see notes at:
# https://github.com/pybind/pybind11/blob/master/docs/compiling.rst#building-manually
if
(
"
${
cc_library_DEPS
}
;"
MATCHES
"python;"
)
list
(
REMOVE_ITEM cc_library_DEPS python
)
add_dependencies
(
${
TARGET_NAME
}
python
)
target_link_libraries
(
${
TARGET_NAME
}
"-Wl,-undefined,dynamic_lookup"
)
endif
()
target_link_libraries
(
${
TARGET_NAME
}
${
cc_library_DEPS
}
)
target_link_libraries
(
${
TARGET_NAME
}
${
cc_library_DEPS
}
)
add_dependencies
(
${
TARGET_NAME
}
${
cc_library_DEPS
}
)
add_dependencies
(
${
TARGET_NAME
}
${
cc_library_DEPS
}
)
endif
()
endif
()
...
...
paddle/fluid/API.spec
浏览文件 @
049c9c7d
...
@@ -116,6 +116,7 @@ paddle.fluid.layers.pad ArgSpec(args=['x', 'paddings', 'pad_value', 'name'], var
...
@@ -116,6 +116,7 @@ paddle.fluid.layers.pad ArgSpec(args=['x', 'paddings', 'pad_value', 'name'], var
paddle.fluid.layers.pad_constant_like ArgSpec(args=['x', 'y', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0.0, None))
paddle.fluid.layers.pad_constant_like ArgSpec(args=['x', 'y', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0.0, None))
paddle.fluid.layers.label_smooth ArgSpec(args=['label', 'prior_dist', 'epsilon', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, 0.1, 'float32', None))
paddle.fluid.layers.label_smooth ArgSpec(args=['label', 'prior_dist', 'epsilon', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, 0.1, 'float32', None))
paddle.fluid.layers.roi_pool ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1, 1, 1.0))
paddle.fluid.layers.roi_pool ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1, 1, 1.0))
paddle.fluid.layers.roi_align ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale', 'sampling_ratio', 'name'], varargs=None, keywords=None, defaults=(1, 1, 1.0, -1, None))
paddle.fluid.layers.dice_loss ArgSpec(args=['input', 'label', 'epsilon'], varargs=None, keywords=None, defaults=(1e-05,))
paddle.fluid.layers.dice_loss ArgSpec(args=['input', 'label', 'epsilon'], varargs=None, keywords=None, defaults=(1e-05,))
paddle.fluid.layers.image_resize ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'resample'], varargs=None, keywords=None, defaults=(None, None, None, 'BILINEAR'))
paddle.fluid.layers.image_resize ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'resample'], varargs=None, keywords=None, defaults=(None, None, None, 'BILINEAR'))
paddle.fluid.layers.image_resize_short ArgSpec(args=['input', 'out_short_len', 'resample'], varargs=None, keywords=None, defaults=('BILINEAR',))
paddle.fluid.layers.image_resize_short ArgSpec(args=['input', 'out_short_len', 'resample'], varargs=None, keywords=None, defaults=('BILINEAR',))
...
...
paddle/fluid/framework/details/var_handle.h
浏览文件 @
049c9c7d
...
@@ -49,6 +49,8 @@ struct VarHandleBase {
...
@@ -49,6 +49,8 @@ struct VarHandleBase {
void
AddOutput
(
OpHandleBase
*
out
,
ir
::
Node
*
node
)
{
void
AddOutput
(
OpHandleBase
*
out
,
ir
::
Node
*
node
)
{
if
(
pending_ops_
.
find
(
out
)
==
pending_ops_
.
end
())
{
if
(
pending_ops_
.
find
(
out
)
==
pending_ops_
.
end
())
{
PADDLE_ENFORCE
(
out
!=
nullptr
,
"The output of %s should not be nullptr"
,
this
->
Node
()
->
Name
());
pending_ops_
.
insert
(
out
);
pending_ops_
.
insert
(
out
);
node_
->
outputs
.
push_back
(
node
);
node_
->
outputs
.
push_back
(
node
);
}
}
...
...
paddle/fluid/framework/ir/CMakeLists.txt
浏览文件 @
049c9c7d
...
@@ -10,7 +10,7 @@ function(pass_library TARGET DEST)
...
@@ -10,7 +10,7 @@ function(pass_library TARGET DEST)
set
(
oneValueArgs
""
)
set
(
oneValueArgs
""
)
set
(
multiValueArgs SRCS DEPS
)
set
(
multiValueArgs SRCS DEPS
)
cmake_parse_arguments
(
op_library
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
cmake_parse_arguments
(
op_library
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
cc_library
(
${
TARGET
}
SRCS
${
TARGET
}
.cc DEPS graph_pattern_detector pass
${
op_library_DEPS
}
)
cc_library
(
${
TARGET
}
SRCS
${
TARGET
}
.cc DEPS graph_pattern_detector pass
fuse_pass_base
${
op_library_DEPS
}
)
# add more DEST here, such as train, dist and collect USE_PASS into a file automatically.
# add more DEST here, such as train, dist and collect USE_PASS into a file automatically.
if
(
${
DEST
}
STREQUAL
"base"
OR
${
DEST
}
STREQUAL
"inference"
)
if
(
${
DEST
}
STREQUAL
"base"
OR
${
DEST
}
STREQUAL
"inference"
)
message
(
STATUS
"add pass
${
TARGET
}
${
DEST
}
"
)
message
(
STATUS
"add pass
${
TARGET
}
${
DEST
}
"
)
...
@@ -25,13 +25,11 @@ cc_library(graph_helper SRCS graph_helper.cc DEPS graph)
...
@@ -25,13 +25,11 @@ cc_library(graph_helper SRCS graph_helper.cc DEPS graph)
cc_library
(
pass SRCS pass.cc DEPS graph node graph_helper
)
cc_library
(
pass SRCS pass.cc DEPS graph node graph_helper
)
cc_library
(
graph_traits SRCS graph_traits.cc DEPS graph
)
cc_library
(
graph_traits SRCS graph_traits.cc DEPS graph
)
cc_library
(
graph_pattern_detector SRCS graph_pattern_detector.cc DEPS graph graph_helper graph_traits
)
cc_library
(
graph_pattern_detector SRCS graph_pattern_detector.cc DEPS graph graph_helper graph_traits
)
cc_library
(
fuse_pass_base SRCS fuse_pass_base.cc DEPS pass
)
pass_library
(
graph_to_program_pass base
)
pass_library
(
graph_to_program_pass base
)
pass_library
(
graph_viz_pass base
)
pass_library
(
graph_viz_pass base
)
pass_library
(
fc_fuse_pass inference
)
pass_library
(
fc_fuse_pass inference
)
if
(
WITH_MKLDNN
)
pass_library
(
conv_relu_mkldnn_fuse_pass inference
)
endif
()
pass_library
(
attention_lstm_fuse_pass inference
)
pass_library
(
attention_lstm_fuse_pass inference
)
pass_library
(
infer_clean_graph_pass inference
)
pass_library
(
infer_clean_graph_pass inference
)
pass_library
(
fc_lstm_fuse_pass inference
)
pass_library
(
fc_lstm_fuse_pass inference
)
...
@@ -39,6 +37,13 @@ pass_library(embedding_fc_lstm_fuse_pass inference)
...
@@ -39,6 +37,13 @@ pass_library(embedding_fc_lstm_fuse_pass inference)
pass_library
(
fc_gru_fuse_pass inference
)
pass_library
(
fc_gru_fuse_pass inference
)
pass_library
(
seq_concat_fc_fuse_pass inference
)
pass_library
(
seq_concat_fc_fuse_pass inference
)
pass_library
(
conv_bn_fuse_pass inference
)
pass_library
(
conv_bn_fuse_pass inference
)
pass_library
(
seqconv_eltadd_relu_fuse_pass inference
)
if
(
WITH_MKLDNN
)
pass_library
(
mkldnn_placement_pass base
)
pass_library
(
conv_bias_mkldnn_fuse_pass inference
)
pass_library
(
conv_relu_mkldnn_fuse_pass inference
)
pass_library
(
conv_elementwise_add_mkldnn_fuse_pass inference
)
endif
()
cc_library
(
fuse_elewise_add_act_pass SRCS fuse_elewise_add_act_pass.cc DEPS pass graph_pattern_detector
)
cc_library
(
fuse_elewise_add_act_pass SRCS fuse_elewise_add_act_pass.cc DEPS pass graph_pattern_detector
)
...
@@ -54,4 +59,5 @@ cc_test(test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS g
...
@@ -54,4 +59,5 @@ cc_test(test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS g
cc_test
(
test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass framework_proto
)
cc_test
(
test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass framework_proto
)
if
(
WITH_MKLDNN
)
if
(
WITH_MKLDNN
)
cc_test
(
test_conv_relu_mkldnn_fuse_pass SRCS conv_relu_mkldnn_fuse_pass_tester.cc DEPS conv_relu_mkldnn_fuse_pass
)
cc_test
(
test_conv_relu_mkldnn_fuse_pass SRCS conv_relu_mkldnn_fuse_pass_tester.cc DEPS conv_relu_mkldnn_fuse_pass
)
cc_test
(
test_conv_elementwise_add_mkldnn_fuse_pass SRCS conv_elementwise_add_mkldnn_fuse_pass_tester.cc DEPS conv_elementwise_add_mkldnn_fuse_pass
)
endif
()
endif
()
paddle/fluid/framework/ir/conv_bias_mkldnn_fuse_pass.cc
0 → 100644
浏览文件 @
049c9c7d
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/conv_bias_mkldnn_fuse_pass.h"
#include <functional>
#include <string>
#include <vector>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
template
<
typename
BinaryOperation
>
LoDTensor
tensor_apply_eltwise
(
const
LoDTensor
&
vec_a
,
const
LoDTensor
&
vec_b
,
BinaryOperation
f
)
{
PADDLE_ENFORCE_EQ
(
vec_a
.
dims
(),
vec_b
.
dims
());
LoDTensor
vec_y
;
vec_y
.
Resize
(
vec_a
.
dims
());
const
float
*
a
=
vec_a
.
data
<
float
>
();
const
float
*
b
=
vec_b
.
data
<
float
>
();
float
*
y
=
vec_y
.
mutable_data
<
float
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
vec_a
.
numel
();
i
++
)
{
y
[
i
]
=
f
(
a
[
i
],
b
[
i
]);
}
return
vec_y
;
}
std
::
unique_ptr
<
ir
::
Graph
>
ConvBiasFusePass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
PADDLE_ENFORCE
(
graph
.
get
());
FusePassBase
::
Init
(
name_scope_
,
graph
.
get
());
auto
*
scope
=
param_scope
();
PADDLE_ENFORCE
(
scope
);
GraphPatternDetector
gpd
;
auto
*
conv_input
=
gpd
.
mutable_pattern
()
->
NewNode
(
patterns
::
PDNodeName
(
name_scope_
,
"conv_input"
))
->
AsInput
()
->
assert_is_op_input
(
"conv2d"
,
"Input"
);
patterns
::
ConvBias
conv_bias_pattern
(
gpd
.
mutable_pattern
(),
name_scope_
);
conv_bias_pattern
(
conv_input
);
int
found_conv_bias_count
=
0
;
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
VLOG
(
4
)
<<
"handle ConvBias fuse"
;
GET_IR_NODE_FROM_SUBGRAPH
(
conv_weight
,
conv_weight
,
conv_bias_pattern
);
// Filter
GET_IR_NODE_FROM_SUBGRAPH
(
conv_out
,
conv_out
,
conv_bias_pattern
);
// tmp
GET_IR_NODE_FROM_SUBGRAPH
(
conv
,
conv
,
conv_bias_pattern
);
// CONV op
// bias
GET_IR_NODE_FROM_SUBGRAPH
(
eltwise_bias
,
eltwise_bias
,
conv_bias_pattern
);
// output
GET_IR_NODE_FROM_SUBGRAPH
(
eltwise_out
,
eltwise_out
,
conv_bias_pattern
);
// elementwise_add op
GET_IR_NODE_FROM_SUBGRAPH
(
eltwise
,
eltwise
,
conv_bias_pattern
);
PADDLE_ENFORCE
(
subgraph
.
count
(
conv_input
));
// check if fuse can be done and if MKL-DNN should be used
FuseOptions
fuse_option
=
FindFuseOption
(
*
conv
,
*
eltwise
);
if
(
fuse_option
==
DO_NOT_FUSE
||
fuse_option
==
FUSE_NATIVE
)
{
VLOG
(
3
)
<<
"do not perform conv+bias fuse"
;
return
;
}
auto
*
eltwise_bias_tensor
=
scope
->
FindVar
(
eltwise_bias
->
Name
())
->
GetMutable
<
LoDTensor
>
();
auto
input_names
=
conv
->
Op
()
->
InputNames
();
bool
has_bias
=
std
::
find
(
input_names
.
begin
(),
input_names
.
end
(),
"Bias"
)
!=
input_names
.
end
();
if
(
has_bias
&&
conv
->
Op
()
->
Input
(
"Bias"
).
size
()
>
0
)
{
auto
conv_bias_names
=
conv
->
Op
()
->
Input
(
"Bias"
);
// add eltwise bias to existing conv bias
PADDLE_ENFORCE_EQ
(
conv_bias_names
.
size
(),
1
);
auto
*
conv_bias_var
=
scope
->
FindVar
(
conv_bias_names
[
0
]);
auto
*
conv_bias_tensor
=
conv_bias_var
->
GetMutable
<
LoDTensor
>
();
PADDLE_ENFORCE_EQ
(
conv_bias_tensor
->
dims
(),
eltwise_bias_tensor
->
dims
());
*
conv_bias_tensor
=
tensor_apply_eltwise
(
*
conv_bias_tensor
,
*
eltwise_bias_tensor
,
std
::
plus
<
float
>
());
conv
->
Op
()
->
SetOutput
(
"Output"
,
std
::
vector
<
std
::
string
>
({
eltwise_out
->
Name
()}));
GraphSafeRemoveNodes
(
graph
.
get
(),
{
eltwise
,
conv_out
});
IR_NODE_LINK_TO
(
conv
,
eltwise_out
);
}
else
{
// take eltwise bias as conv bias
OpDesc
desc
;
desc
.
SetInput
(
"Input"
,
std
::
vector
<
std
::
string
>
({
subgraph
.
at
(
conv_input
)
->
Name
()}));
desc
.
SetInput
(
"Filter"
,
std
::
vector
<
std
::
string
>
({
conv_weight
->
Name
()}));
desc
.
SetInput
(
"Bias"
,
std
::
vector
<
std
::
string
>
({
eltwise_bias
->
Name
()}));
desc
.
SetOutput
(
"Output"
,
std
::
vector
<
std
::
string
>
({
eltwise_out
->
Name
()}));
desc
.
SetType
(
"conv2d"
);
for
(
auto
&
attr
:
conv
->
Op
()
->
GetAttrMap
())
{
desc
.
SetAttr
(
attr
.
first
,
attr
.
second
);
}
auto
conv_bias_node
=
g
->
CreateOpNode
(
&
desc
);
IR_NODE_LINK_TO
(
subgraph
.
at
(
conv_input
),
conv_bias_node
);
IR_NODE_LINK_TO
(
conv_weight
,
conv_bias_node
);
IR_NODE_LINK_TO
(
eltwise_bias
,
conv_bias_node
);
IR_NODE_LINK_TO
(
conv_bias_node
,
eltwise_out
);
GraphSafeRemoveNodes
(
graph
.
get
(),
{
conv
,
eltwise
,
conv_out
});
}
found_conv_bias_count
++
;
};
gpd
(
graph
.
get
(),
handler
);
AddStatis
(
found_conv_bias_count
);
return
graph
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
conv_bias_mkldnn_fuse_pass
,
paddle
::
framework
::
ir
::
ConvBiasFusePass
);
paddle/fluid/framework/ir/conv_bias_mkldnn_fuse_pass.h
0 → 100644
浏览文件 @
049c9c7d
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/pass.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
/*
* Fuse the Conv and Elementwise_add to a ConvBiasOp.
*/
class
ConvBiasFusePass
:
public
FusePassBase
{
public:
virtual
~
ConvBiasFusePass
()
{}
protected:
std
::
unique_ptr
<
ir
::
Graph
>
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
;
const
std
::
string
name_scope_
{
"conv_bias_mkldnn_fuse"
};
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/conv_bn_fuse_pass.cc
浏览文件 @
049c9c7d
...
@@ -126,12 +126,21 @@ std::unique_ptr<ir::Graph> ConvBNFusePass::ApplyImpl(
...
@@ -126,12 +126,21 @@ std::unique_ptr<ir::Graph> ConvBNFusePass::ApplyImpl(
// conv, batch_norm,
// conv, batch_norm,
// conv_weight, conv_out,
// conv_weight, conv_out,
// bn_scale, bn_bias, bn_mean, bn_variance,
// bn_scale, bn_bias, bn_mean, bn_variance,
// bn_out, bn_mean_out, bn_variance_out, bn_saved_mean, bn_saved_variance
// bn_out, bn_mean_out, bn_variance_out, bn_saved_mean,
// bn_saved_variance
GET_CONV_BN_NODES
(
conv_bn_pattern
);
GET_CONV_BN_NODES
(
conv_bn_pattern
);
// check if fuse can be done and if MKL-DNN should be used
FuseOptions
fuse_option
=
FindFuseOption
(
*
conv
,
*
batch_norm
);
if
(
fuse_option
==
DO_NOT_FUSE
)
{
VLOG
(
3
)
<<
"do not perform conv+bn fuse"
;
return
;
}
// Create eltwise_y (conv bias) variable
// Create eltwise_y (conv bias) variable
VarDesc
eltwise_y_in_desc
(
VarDesc
eltwise_y_in_desc
(
patterns
::
PDNodeName
(
name_scope_
,
"eltwise_y_in"
));
patterns
::
PDNodeName
(
name_scope_
,
"eltwise_y_in"
));
eltwise_y_in_desc
.
SetPersistable
(
true
);
auto
*
eltwise_y_in_node
=
g
->
CreateVarNode
(
&
eltwise_y_in_desc
);
auto
*
eltwise_y_in_node
=
g
->
CreateVarNode
(
&
eltwise_y_in_desc
);
auto
*
eltwise_y_in_tensor
=
auto
*
eltwise_y_in_tensor
=
scope
->
Var
(
eltwise_y_in_node
->
Name
())
->
GetMutable
<
LoDTensor
>
();
scope
->
Var
(
eltwise_y_in_node
->
Name
())
->
GetMutable
<
LoDTensor
>
();
...
@@ -151,27 +160,59 @@ std::unique_ptr<ir::Graph> ConvBNFusePass::ApplyImpl(
...
@@ -151,27 +160,59 @@ std::unique_ptr<ir::Graph> ConvBNFusePass::ApplyImpl(
*
bn_mean
,
*
bn_variance
,
eltwise_y_in_tensor
,
*
bn_mean
,
*
bn_variance
,
eltwise_y_in_tensor
,
epsilon
);
epsilon
);
// Create an elementwise add node
// with MKL-DNN fuse conv+bn into conv with bias
// without MKL-DNN fuse conv+bn into conv+elementwise_add
if
(
fuse_option
==
FUSE_MKLDNN
)
{
auto
input_names
=
conv
->
Op
()
->
InputNames
();
bool
has_bias
=
std
::
find
(
input_names
.
begin
(),
input_names
.
end
(),
"Bias"
)
!=
input_names
.
end
();
if
(
has_bias
&&
conv
->
Op
()
->
Input
(
"Bias"
).
size
()
>
0
)
{
// reuse existing conv bias node
auto
conv_bias_names
=
conv
->
Op
()
->
Input
(
"Bias"
);
PADDLE_ENFORCE_EQ
(
conv_bias_names
.
size
(),
1
);
auto
*
conv_bias_var
=
scope
->
FindVar
(
conv_bias_names
[
0
]);
auto
*
conv_bias_tensor
=
conv_bias_var
->
GetMutable
<
LoDTensor
>
();
PADDLE_ENFORCE_EQ
(
conv_bias_tensor
->
dims
(),
eltwise_y_in_tensor
->
dims
());
auto
eigen_conv_bias
=
EigenVector
<
float
>::
From
(
*
conv_bias_tensor
);
eigen_conv_bias
+=
EigenVector
<
float
>::
From
(
*
eltwise_y_in_tensor
);
}
else
{
// add new conv_bias node
conv
->
Op
()
->
SetInput
(
"Bias"
,
std
::
vector
<
std
::
string
>
({
eltwise_y_in_node
->
Name
()}));
IR_NODE_LINK_TO
(
eltwise_y_in_node
,
conv
);
}
conv
->
Op
()
->
SetOutput
(
"Output"
,
std
::
vector
<
std
::
string
>
({
bn_out
->
Name
()}));
GraphSafeRemoveNodes
(
graph
.
get
(),
{
conv_out
,
bn_scale
,
bn_bias
,
bn_mean
,
bn_variance
,
batch_norm
,
bn_mean_out
,
bn_variance_out
,
bn_saved_mean
,
bn_saved_variance
});
IR_NODE_LINK_TO
(
conv
,
bn_out
);
found_conv_bn_count
++
;
}
else
{
// fuse_option == FUSE_NATIVE
// create an elementwise add node.
OpDesc
desc
;
OpDesc
desc
;
desc
.
SetInput
(
"X"
,
std
::
vector
<
std
::
string
>
({
conv_out
->
Name
()}));
desc
.
SetInput
(
"X"
,
std
::
vector
<
std
::
string
>
({
conv_out
->
Name
()}));
desc
.
SetInput
(
"Y"
,
std
::
vector
<
std
::
string
>
({
eltwise_y_in_node
->
Name
()}));
desc
.
SetInput
(
"Y"
,
std
::
vector
<
std
::
string
>
({
eltwise_y_in_node
->
Name
()}));
desc
.
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
bn_out
->
Name
()}));
desc
.
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
bn_out
->
Name
()}));
desc
.
SetType
(
"elementwise_add"
);
desc
.
SetType
(
"elementwise_add"
);
desc
.
SetAttr
(
"axis"
,
1
);
desc
.
SetAttr
(
"axis"
,
1
);
bool
a
=
boost
::
get
<
bool
>
(
conv
->
Op
()
->
GetAttr
(
"use_mkldnn"
));
desc
.
SetAttr
(
"use_mkldnn"
,
a
);
auto
eltwise_op
=
g
->
CreateOpNode
(
&
desc
);
// OpDesc will be copied.
auto
eltwise_op
=
g
->
CreateOpNode
(
&
desc
);
// OpDesc will be copied.
GraphSafeRemoveNodes
(
graph
.
get
(),
{
bn_scale
,
bn_bias
,
bn_mean
,
bn_variance
,
GraphSafeRemoveNodes
(
batch_norm
,
bn_mean_out
,
bn_variance_out
,
graph
.
get
(),
bn_saved_mean
,
bn_saved_variance
});
{
bn_scale
,
bn_bias
,
bn_mean
,
bn_variance
,
batch_norm
,
bn_mean_out
,
bn_variance_out
,
bn_saved_mean
,
bn_saved_variance
});
PADDLE_ENFORCE
(
subgraph
.
count
(
conv_input
));
IR_NODE_LINK_TO
(
conv_out
,
eltwise_op
);
IR_NODE_LINK_TO
(
conv_out
,
eltwise_op
);
IR_NODE_LINK_TO
(
eltwise_y_in_node
,
eltwise_op
);
IR_NODE_LINK_TO
(
eltwise_y_in_node
,
eltwise_op
);
IR_NODE_LINK_TO
(
eltwise_op
,
bn_out
);
IR_NODE_LINK_TO
(
eltwise_op
,
bn_out
);
found_conv_bn_count
++
;
found_conv_bn_count
++
;
}
};
};
gpd
(
graph
.
get
(),
handler
);
gpd
(
graph
.
get
(),
handler
);
...
@@ -237,7 +278,6 @@ std::unique_ptr<ir::Graph> ConvEltwiseAddBNFusePass::ApplyImpl(
...
@@ -237,7 +278,6 @@ std::unique_ptr<ir::Graph> ConvEltwiseAddBNFusePass::ApplyImpl(
{
bn_scale
,
bn_bias
,
bn_mean
,
bn_variance
,
batch_norm
,
bn_mean_out
,
{
bn_scale
,
bn_bias
,
bn_mean
,
bn_variance
,
batch_norm
,
bn_mean_out
,
bn_variance_out
,
bn_saved_mean
,
bn_saved_variance
,
eltwise_out
});
bn_variance_out
,
bn_saved_mean
,
bn_saved_variance
,
eltwise_out
});
PADDLE_ENFORCE
(
subgraph
.
count
(
conv_input
));
IR_NODE_LINK_TO
(
eltwise
,
bn_out
);
IR_NODE_LINK_TO
(
eltwise
,
bn_out
);
found_conv_bn_count
++
;
found_conv_bn_count
++
;
...
...
paddle/fluid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass.cc
0 → 100644
浏览文件 @
049c9c7d
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass.h"
#include <functional>
#include <utility>
#include "paddle/fluid/framework/ir/graph_traits.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
namespace
{
// The function keeps the graph consistent by replacing
// a node 'from' in the set of inputs nodes
// of the visited node by a node 'to'.
void
CorrectGraphEdges
(
Graph
*
graph
,
Node
*
from
,
Node
*
to
)
{
for
(
auto
&
node
:
GraphTraits
::
DFS
(
*
graph
))
{
auto
from_in_inputs
=
std
::
find
(
std
::
begin
(
node
.
inputs
),
std
::
end
(
node
.
inputs
),
from
);
if
(
from_in_inputs
!=
std
::
end
(
node
.
inputs
))
{
IR_NODE_LINK_TO
(
to
,
(
&
node
));
auto
inputs
=
node
.
Op
()
->
Inputs
();
using
input_type
=
VariableNameMap
::
value_type
;
std
::
for_each
(
std
::
begin
(
inputs
),
std
::
end
(
inputs
),
[
from
,
to
,
&
node
](
const
input_type
&
i
)
->
void
{
auto
param_names
=
i
.
second
;
auto
pi
=
std
::
find
(
std
::
begin
(
param_names
),
std
::
end
(
param_names
),
from
->
Name
());
if
(
pi
!=
std
::
end
(
param_names
))
{
node
.
Op
()
->
SetInput
(
i
.
first
,
{
to
->
Name
()});
}
});
}
}
}
}
// namespace
using
graph_ptr
=
std
::
unique_ptr
<
ir
::
Graph
>
;
graph_ptr
ConvElementwiseAddMKLDNNFusePass
::
ApplyImpl
(
graph_ptr
graph
)
const
{
FusePassBase
::
Init
(
name_scope_
,
graph
.
get
());
GraphPatternDetector
gpd
;
auto
pattern
=
gpd
.
mutable_pattern
();
patterns
::
Conv
conv_pattern
{
pattern
,
name_scope_
};
auto
conv_output
=
conv_pattern
();
patterns
::
ElementwiseAdd
elementwise_add_pattern
{
pattern
,
name_scope_
};
elementwise_add_pattern
(
conv_output
);
conv_output
->
AsIntermediate
();
auto
conv_op_has_bias
=
[](
const
Node
&
conv_op
)
->
std
::
pair
<
bool
,
Node
*>
{
auto
bias_input_names
=
conv_op
.
Op
()
->
Inputs
();
auto
bias_it
=
bias_input_names
.
find
(
"Bias"
);
if
(
bias_it
!=
std
::
end
(
bias_input_names
))
{
bool
has_bias
=
!
bias_it
->
second
.
empty
();
if
(
has_bias
)
{
auto
conv_bias_names
=
bias_it
->
second
;
auto
conv_bias_names_it
=
std
::
find_if
(
std
::
begin
(
conv_op
.
inputs
),
std
::
end
(
conv_op
.
inputs
),
[
&
conv_bias_names
](
Node
*
n
)
->
bool
{
return
n
->
Name
()
==
conv_bias_names
[
0
];
});
return
std
::
make_pair
(
has_bias
,
*
conv_bias_names_it
);
}
}
return
std
::
make_pair
(
false
,
nullptr
);
};
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
GET_IR_NODE_FROM_SUBGRAPH
(
conv_op
,
conv_op
,
conv_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
conv_input
,
conv_input
,
conv_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
conv_filter
,
conv_filter
,
conv_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
conv_output
,
conv_output
,
conv_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
elementwise_add_op
,
elementwise_add_op
,
elementwise_add_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
elementwise_add_x
,
elementwise_add_x
,
elementwise_add_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
elementwise_add_out
,
elementwise_add_out
,
elementwise_add_pattern
);
if
(
FindFuseOption
(
*
conv_op
,
*
elementwise_add_op
)
!=
FUSE_MKLDNN
)
return
;
OpDesc
op_desc
;
op_desc
.
SetType
(
"conv2d"
);
op_desc
.
SetInput
(
"Input"
,
{
conv_input
->
Name
()});
op_desc
.
SetInput
(
"Filter"
,
{
conv_filter
->
Name
()});
op_desc
.
SetInput
(
"ResidualData"
,
{
elementwise_add_x
->
Name
()});
op_desc
.
SetOutput
(
"Output"
,
{
conv_output
->
Name
()});
bool
has_bias
;
Node
*
conv_bias
;
std
::
tie
(
has_bias
,
conv_bias
)
=
conv_op_has_bias
(
*
conv_op
);
if
(
has_bias
)
{
op_desc
.
SetInput
(
"Bias"
,
{
conv_bias
->
Name
()});
}
for
(
const
auto
&
attr
:
conv_op
->
Op
()
->
GetAttrMap
())
{
op_desc
.
SetAttr
(
attr
.
first
,
attr
.
second
);
}
op_desc
.
SetAttr
(
"fuse_residual_connection"
,
true
);
auto
fused_conv_op
=
g
->
CreateOpNode
(
&
op_desc
);
IR_NODE_LINK_TO
(
conv_input
,
fused_conv_op
);
IR_NODE_LINK_TO
(
conv_filter
,
fused_conv_op
);
IR_NODE_LINK_TO
(
elementwise_add_x
,
fused_conv_op
);
IR_NODE_LINK_TO
(
fused_conv_op
,
conv_output
);
if
(
has_bias
)
{
IR_NODE_LINK_TO
(
conv_bias
,
fused_conv_op
);
}
CorrectGraphEdges
(
g
,
elementwise_add_out
,
conv_output
);
GraphSafeRemoveNodes
(
g
,
{
elementwise_add_out
,
conv_op
,
elementwise_add_op
});
};
gpd
(
graph
.
get
(),
handler
);
return
graph
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
conv_elementwise_add_mkldnn_fuse_pass
,
paddle
::
framework
::
ir
::
ConvElementwiseAddMKLDNNFusePass
);
paddle/fluid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass.h
0 → 100644
浏览文件 @
049c9c7d
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
class
ConvElementwiseAddMKLDNNFusePass
:
public
FusePassBase
{
public:
virtual
~
ConvElementwiseAddMKLDNNFusePass
()
{}
protected:
std
::
unique_ptr
<
ir
::
Graph
>
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
;
const
std
::
string
name_scope_
{
"residual_connections_fuse_pass"
};
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass_tester.cc
0 → 100644
浏览文件 @
049c9c7d
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include <string>
#include "paddle/fluid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass.h"
#include "paddle/fluid/framework/ir/graph_traits.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
namespace
{
constexpr
int
nodes_removed
=
3
;
constexpr
int
nodes_added
=
1
;
void
SetOp
(
ProgramDesc
*
prog
,
const
std
::
string
&
type
,
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>&
inputs
,
const
std
::
pair
<
std
::
string
,
std
::
string
>&
output
)
{
auto
op
=
prog
->
MutableBlock
(
0
)
->
AppendOp
();
op
->
SetType
(
type
);
op
->
SetAttr
(
"use_mkldnn"
,
true
);
for
(
const
auto
&
input
:
inputs
)
{
op
->
SetInput
(
input
.
first
,
{
input
.
second
});
}
op
->
SetOutput
(
output
.
first
,
{
output
.
second
});
}
struct
IsReachable
{
using
func
=
std
::
function
<
bool
(
const
std
::
string
&
,
const
std
::
string
&
)
>
;
auto
operator
()(
const
std
::
unique_ptr
<
ir
::
Graph
>&
graph
)
->
func
{
auto
find_node
=
[](
const
std
::
unique_ptr
<
ir
::
Graph
>&
graph
,
const
std
::
string
&
name
)
->
Node
*
{
for
(
auto
&
node
:
GraphTraits
::
DFS
(
*
graph
))
{
if
(
name
==
node
.
Name
())
{
return
&
node
;
}
}
return
nullptr
;
};
return
[
&
](
std
::
string
from
,
const
std
::
string
to
)
->
bool
{
if
(
from
==
to
)
return
true
;
std
::
map
<
std
::
string
,
bool
>
visited
;
for
(
auto
&
node
:
GraphTraits
::
DFS
(
*
graph
))
{
visited
[
node
.
Name
()]
=
false
;
}
visited
[
from
]
=
true
;
std
::
list
<
std
::
string
>
queue
;
queue
.
push_back
(
from
);
while
(
!
queue
.
empty
())
{
auto
cur
=
find_node
(
graph
,
queue
.
front
());
queue
.
pop_front
();
if
(
cur
==
nullptr
)
return
false
;
for
(
auto
n
:
cur
->
outputs
)
{
if
(
n
->
Name
()
==
to
)
return
true
;
if
(
!
visited
[
n
->
Name
()])
{
visited
[
n
->
Name
()]
=
true
;
queue
.
push_back
(
n
->
Name
());
}
}
}
return
false
;
};
}
};
void
AssertOpsCount
(
const
std
::
unique_ptr
<
ir
::
Graph
>&
graph
)
{
int
conv_count
=
0
;
int
elementwise_add_count
=
0
;
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsOp
()
&&
node
->
Op
()
->
Type
()
==
"conv2d"
)
{
++
conv_count
;
}
if
(
node
->
IsOp
()
&&
node
->
Op
()
->
Type
()
==
"elementwise_add"
)
{
++
elementwise_add_count
;
}
}
EXPECT_EQ
(
conv_count
,
1
);
EXPECT_EQ
(
elementwise_add_count
,
0
);
}
ProgramDesc
BuildProgramDesc
(
const
std
::
vector
<
std
::
string
>&
transient_vars
,
const
std
::
vector
<
std
::
string
>&
persistent_vars
)
{
ProgramDesc
prog
;
auto
add_var_to_prog
=
[
&
prog
](
const
std
::
string
&
var_name
)
->
VarDesc
*
{
auto
var
=
prog
.
MutableBlock
(
0
)
->
Var
(
var_name
);
var
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
return
var
;
};
for
(
const
auto
&
v
:
transient_vars
)
{
add_var_to_prog
(
v
);
}
for
(
const
auto
&
v
:
persistent_vars
)
{
auto
var
=
add_var_to_prog
(
v
);
var
->
SetPersistable
(
true
);
}
return
prog
;
}
}
// namespace
TEST
(
ConvElementwiseAddMKLDNNFusePass
,
ConvolutionWithElementwiseAddRelu
)
{
auto
prog
=
BuildProgramDesc
({
"a"
,
"b"
,
"c"
,
"d"
,
"e"
,
"f"
},
{
"bias"
,
"weights"
});
SetOp
(
&
prog
,
"conv2d"
,
{{
"Input"
,
"a"
},
{
"Bias"
,
"bias"
},
{
"Filter"
,
"weights"
}},
{
"Output"
,
"b"
});
SetOp
(
&
prog
,
"elementwise_add"
,
{{
"X"
,
"b"
},
{
"Y"
,
"c"
}},
{
"Out"
,
"d"
});
SetOp
(
&
prog
,
"relu"
,
{{
"X"
,
"d"
}},
{
"Out"
,
"e"
});
std
::
unique_ptr
<
ir
::
Graph
>
graph
(
new
ir
::
Graph
(
prog
));
IsReachable
is_reachable
;
EXPECT_TRUE
(
is_reachable
(
graph
)(
"a"
,
"relu"
));
auto
pass
=
PassRegistry
::
Instance
().
Get
(
"conv_elementwise_add_mkldnn_fuse_pass"
);
int
original_nodes_num
=
graph
->
Nodes
().
size
();
graph
=
pass
->
Apply
(
std
::
move
(
graph
));
int
current_nodes_num
=
graph
->
Nodes
().
size
();
EXPECT_TRUE
(
is_reachable
(
graph
)(
"a"
,
"relu"
));
EXPECT_EQ
(
original_nodes_num
-
nodes_removed
+
nodes_added
,
current_nodes_num
);
AssertOpsCount
(
graph
);
}
TEST
(
ConvElementwiseAddMKLDNNFusePass
,
ConvolutionWithElementwiseAddReluNoBias
)
{
auto
prog
=
BuildProgramDesc
({
"a"
,
"b"
,
"c"
,
"d"
,
"e"
},
{
"weights"
});
SetOp
(
&
prog
,
"conv2d"
,
{{
"Input"
,
"a"
},
{
"Filter"
,
"weights"
}},
{
"Output"
,
"b"
});
SetOp
(
&
prog
,
"elementwise_add"
,
{{
"X"
,
"b"
},
{
"Y"
,
"c"
}},
{
"Out"
,
"d"
});
SetOp
(
&
prog
,
"relu"
,
{{
"X"
,
"d"
}},
{
"Out"
,
"e"
});
std
::
unique_ptr
<
ir
::
Graph
>
graph
(
new
ir
::
Graph
(
prog
));
IsReachable
is_reachable
;
EXPECT_TRUE
(
is_reachable
(
graph
)(
"a"
,
"relu"
));
auto
pass
=
PassRegistry
::
Instance
().
Get
(
"conv_elementwise_add_mkldnn_fuse_pass"
);
int
original_nodes_num
=
graph
->
Nodes
().
size
();
graph
=
pass
->
Apply
(
std
::
move
(
graph
));
int
current_nodes_num
=
graph
->
Nodes
().
size
();
EXPECT_TRUE
(
is_reachable
(
graph
)(
"a"
,
"relu"
));
EXPECT_EQ
(
original_nodes_num
-
nodes_removed
+
nodes_added
,
current_nodes_num
);
AssertOpsCount
(
graph
);
}
TEST
(
ConvElementwiseAddMKLDNNFusePass
,
ConvolutionElementwiseAdd
)
{
auto
prog
=
BuildProgramDesc
({
"a"
,
"b"
,
"c"
,
"d"
},
{
"bias"
,
"weights"
});
SetOp
(
&
prog
,
"conv2d"
,
{{
"Input"
,
"a"
},
{
"Bias"
,
"bias"
},
{
"Filter"
,
"weights"
}},
{
"Output"
,
"b"
});
SetOp
(
&
prog
,
"elementwise_add"
,
{{
"X"
,
"b"
},
{
"Y"
,
"c"
}},
{
"Out"
,
"d"
});
std
::
unique_ptr
<
ir
::
Graph
>
graph
(
new
ir
::
Graph
(
prog
));
IsReachable
is_reachable
;
EXPECT_TRUE
(
is_reachable
(
graph
)(
"a"
,
"d"
));
auto
pass
=
PassRegistry
::
Instance
().
Get
(
"conv_elementwise_add_mkldnn_fuse_pass"
);
int
original_nodes_num
=
graph
->
Nodes
().
size
();
graph
=
pass
->
Apply
(
std
::
move
(
graph
));
int
current_nodes_num
=
graph
->
Nodes
().
size
();
EXPECT_FALSE
(
is_reachable
(
graph
)(
"a"
,
"d"
));
EXPECT_EQ
(
original_nodes_num
-
nodes_removed
+
nodes_added
,
current_nodes_num
);
AssertOpsCount
(
graph
);
}
TEST
(
ConvElementwiseAddMKLDNNFusePass
,
SigmoidConvolutionAddElementwiseRelu
)
{
auto
prog
=
BuildProgramDesc
({
"a"
,
"b"
,
"c"
,
"d"
,
"e"
,
"f"
},
{
"bias"
,
"weights"
});
SetOp
(
&
prog
,
"sigmoid"
,
{{
"X"
,
"a"
}},
{
"Out"
,
"b"
});
SetOp
(
&
prog
,
"conv2d"
,
{{
"Input"
,
"b"
},
{
"Bias"
,
"bias"
},
{
"Filter"
,
"weights"
}},
{
"Output"
,
"c"
});
SetOp
(
&
prog
,
"elementwise_add"
,
{{
"X"
,
"c"
},
{
"Y"
,
"d"
}},
{
"Out"
,
"e"
});
SetOp
(
&
prog
,
"relu"
,
{{
"X"
,
"e"
}},
{
"Out"
,
"f"
});
std
::
unique_ptr
<
ir
::
Graph
>
graph
(
new
ir
::
Graph
(
prog
));
IsReachable
is_reachable
;
EXPECT_TRUE
(
is_reachable
(
graph
)(
"a"
,
"f"
));
auto
pass
=
PassRegistry
::
Instance
().
Get
(
"conv_elementwise_add_mkldnn_fuse_pass"
);
int
original_nodes_num
=
graph
->
Nodes
().
size
();
graph
=
pass
->
Apply
(
std
::
move
(
graph
));
int
current_nodes_num
=
graph
->
Nodes
().
size
();
EXPECT_TRUE
(
is_reachable
(
graph
)(
"a"
,
"f"
));
EXPECT_EQ
(
original_nodes_num
-
nodes_removed
+
nodes_added
,
current_nodes_num
);
AssertOpsCount
(
graph
);
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
USE_PASS
(
conv_elementwise_add_mkldnn_fuse_pass
);
paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc
浏览文件 @
049c9c7d
...
@@ -46,6 +46,12 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl(
...
@@ -46,6 +46,12 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl(
GET_IR_NODE_FROM_SUBGRAPH
(
relu_out
,
relu_out
,
conv_relu_pattern
);
// Out
GET_IR_NODE_FROM_SUBGRAPH
(
relu_out
,
relu_out
,
conv_relu_pattern
);
// Out
GET_IR_NODE_FROM_SUBGRAPH
(
relu
,
relu
,
conv_relu_pattern
);
// ReLU op
GET_IR_NODE_FROM_SUBGRAPH
(
relu
,
relu
,
conv_relu_pattern
);
// ReLU op
FuseOptions
fuse_option
=
FindFuseOption
(
*
conv
,
*
relu
);
if
(
fuse_option
==
DO_NOT_FUSE
)
{
VLOG
(
3
)
<<
"do not perform conv+relu fuse"
;
return
;
}
// Transform Conv node into ConvReLU node.
// Transform Conv node into ConvReLU node.
OpDesc
*
desc
=
conv
->
Op
();
OpDesc
*
desc
=
conv
->
Op
();
desc
->
SetOutput
(
"Output"
,
std
::
vector
<
std
::
string
>
({
relu_out
->
Name
()}));
desc
->
SetOutput
(
"Output"
,
std
::
vector
<
std
::
string
>
({
relu_out
->
Name
()}));
...
...
paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass_tester.cc
浏览文件 @
049c9c7d
...
@@ -20,17 +20,19 @@ namespace paddle {
...
@@ -20,17 +20,19 @@ namespace paddle {
namespace
framework
{
namespace
framework
{
namespace
ir
{
namespace
ir
{
void
SetOp
(
ProgramDesc
*
prog
,
const
std
::
string
&
type
,
void
SetOp
(
ProgramDesc
*
prog
,
const
std
::
string
&
type
,
const
std
::
string
&
name
,
const
std
::
vector
<
std
::
string
>&
inputs
,
const
std
::
vector
<
std
::
string
>&
inputs
,
const
std
::
vector
<
std
::
string
>&
outputs
)
{
const
std
::
vector
<
std
::
string
>&
outputs
,
bool
use_mkldnn
=
false
)
{
auto
*
op
=
prog
->
MutableBlock
(
0
)
->
AppendOp
();
auto
*
op
=
prog
->
MutableBlock
(
0
)
->
AppendOp
();
op
->
SetType
(
type
);
op
->
SetType
(
type
);
if
(
type
==
"conv2d"
)
{
if
(
type
==
"conv2d"
)
{
op
->
SetAttr
(
"use_mkldnn"
,
true
);
op
->
SetAttr
(
"use_mkldnn"
,
use_mkldnn
);
op
->
SetAttr
(
"name"
,
name
);
op
->
SetInput
(
"Input"
,
{
inputs
[
0
]});
op
->
SetInput
(
"Input"
,
{
inputs
[
0
]});
op
->
SetInput
(
"Filter"
,
{
inputs
[
1
]});
op
->
SetInput
(
"Filter"
,
{
inputs
[
1
]});
op
->
SetInput
(
"Bias"
,
{
inputs
[
2
]});
op
->
SetInput
(
"Bias"
,
{
inputs
[
2
]});
}
else
if
(
type
==
"relu"
)
{
}
else
if
(
type
==
"relu"
)
{
op
->
SetAttr
(
"use_mkldnn"
,
use_mkldnn
);
op
->
SetInput
(
"X"
,
inputs
);
op
->
SetInput
(
"X"
,
inputs
);
}
}
op
->
SetOutput
(
"Out"
,
outputs
);
op
->
SetOutput
(
"Out"
,
outputs
);
...
@@ -43,7 +45,8 @@ void SetOp(ProgramDesc* prog, const std::string& type,
...
@@ -43,7 +45,8 @@ void SetOp(ProgramDesc* prog, const std::string& type,
ProgramDesc
BuildProgramDesc
()
{
ProgramDesc
BuildProgramDesc
()
{
ProgramDesc
prog
;
ProgramDesc
prog
;
for
(
auto
&
v
:
for
(
auto
&
v
:
std
::
vector
<
std
::
string
>
({
"a"
,
"b"
,
"c"
,
"weights"
,
"bias"
,
"f"
,
"g"
}))
{
std
::
vector
<
std
::
string
>
({
"a"
,
"b"
,
"c"
,
"weights"
,
"bias"
,
"f"
,
"g"
,
"h"
,
"weights2"
,
"bias2"
,
"k"
,
"l"
}))
{
auto
*
var
=
prog
.
MutableBlock
(
0
)
->
Var
(
v
);
auto
*
var
=
prog
.
MutableBlock
(
0
)
->
Var
(
v
);
var
->
SetType
(
proto
::
VarType
::
SELECTED_ROWS
);
var
->
SetType
(
proto
::
VarType
::
SELECTED_ROWS
);
if
(
v
==
"weights"
||
v
==
"bias"
)
{
if
(
v
==
"weights"
||
v
==
"bias"
)
{
...
@@ -51,14 +54,24 @@ ProgramDesc BuildProgramDesc() {
...
@@ -51,14 +54,24 @@ ProgramDesc BuildProgramDesc() {
}
}
}
}
SetOp
(
&
prog
,
"OP0"
,
std
::
vector
<
std
::
string
>
({
"a"
}),
SetOp
(
&
prog
,
"OP0"
,
"op0"
,
std
::
vector
<
std
::
string
>
({
"a"
}),
std
::
vector
<
std
::
string
>
({
"b"
}));
std
::
vector
<
std
::
string
>
({
"b"
}));
SetOp
(
&
prog
,
"OP1"
,
std
::
vector
<
std
::
string
>
({
"b"
}),
SetOp
(
&
prog
,
"OP1"
,
"op1"
,
std
::
vector
<
std
::
string
>
({
"b"
}),
std
::
vector
<
std
::
string
>
({
"c"
}));
std
::
vector
<
std
::
string
>
({
"c"
}));
SetOp
(
&
prog
,
"conv2d"
,
std
::
vector
<
std
::
string
>
({
"c"
,
"weights"
,
"bias"
}),
// conv+relu, both with MKL-DNN
std
::
vector
<
std
::
string
>
({
"f"
}));
SetOp
(
&
prog
,
"conv2d"
,
"conv1"
,
SetOp
(
&
prog
,
"relu"
,
std
::
vector
<
std
::
string
>
({
"f"
}),
std
::
vector
<
std
::
string
>
({
"c"
,
"weights"
,
"bias"
}),
std
::
vector
<
std
::
string
>
({
"g"
}));
std
::
vector
<
std
::
string
>
({
"f"
}),
true
);
SetOp
(
&
prog
,
"relu"
,
"relu1"
,
std
::
vector
<
std
::
string
>
({
"f"
}),
std
::
vector
<
std
::
string
>
({
"g"
}),
true
);
SetOp
(
&
prog
,
"OP3"
,
"op3"
,
std
::
vector
<
std
::
string
>
({
"g"
}),
std
::
vector
<
std
::
string
>
({
"h"
}));
// conv+relu, only one with MKL-DNN
SetOp
(
&
prog
,
"conv2d"
,
"conv2"
,
std
::
vector
<
std
::
string
>
({
"h"
,
"weights2"
,
"bias2"
}),
std
::
vector
<
std
::
string
>
({
"k"
}),
true
);
SetOp
(
&
prog
,
"relu"
,
"relu2"
,
std
::
vector
<
std
::
string
>
({
"k"
}),
std
::
vector
<
std
::
string
>
({
"l"
}));
return
prog
;
return
prog
;
}
}
...
@@ -88,11 +101,17 @@ TEST(ConvReLUFusePass, basic) {
...
@@ -88,11 +101,17 @@ TEST(ConvReLUFusePass, basic) {
auto
*
op
=
node
->
Op
();
auto
*
op
=
node
->
Op
();
ASSERT_TRUE
(
op
->
HasAttr
(
"use_mkldnn"
));
ASSERT_TRUE
(
op
->
HasAttr
(
"use_mkldnn"
));
EXPECT_TRUE
(
boost
::
get
<
bool
>
(
op
->
GetAttr
(
"use_mkldnn"
)));
EXPECT_TRUE
(
boost
::
get
<
bool
>
(
op
->
GetAttr
(
"use_mkldnn"
)));
// check if only "conv1" convolution is fused
auto
op_name
=
boost
::
get
<
std
::
string
>
(
op
->
GetAttr
(
"name"
));
if
(
op_name
==
"conv1"
)
{
ASSERT_TRUE
(
op
->
HasAttr
(
"fuse_relu"
));
ASSERT_TRUE
(
op
->
HasAttr
(
"fuse_relu"
));
bool
fuse_relu
=
boost
::
get
<
bool
>
(
op
->
GetAttr
(
"fuse_relu"
));
bool
fuse_relu
=
boost
::
get
<
bool
>
(
op
->
GetAttr
(
"fuse_relu"
));
if
(
fuse_relu
)
{
if
(
fuse_relu
)
{
++
conv_relu_count
;
++
conv_relu_count
;
}
}
}
else
if
(
op_name
==
"conv2"
)
{
ASSERT_FALSE
(
op
->
HasAttr
(
"fuse_relu"
));
}
}
}
}
}
EXPECT_EQ
(
conv_relu_count
,
1
);
EXPECT_EQ
(
conv_relu_count
,
1
);
...
...
paddle/fluid/framework/ir/fuse_pass_base.cc
0 → 100644
浏览文件 @
049c9c7d
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
void
FusePassBase
::
Init
(
const
std
::
string
&
repr
,
Graph
*
graph
)
const
{
repr_
=
repr
;
graph_
=
graph
;
}
Scope
*
FusePassBase
::
param_scope
()
const
{
PADDLE_ENFORCE
(
graph_
->
Has
(
kParamScopeAttr
));
return
graph_
->
Get
<
framework
::
Scope
*>
(
kParamScopeAttr
);
}
void
FusePassBase
::
AddStatis
(
int
count_of_fused
)
const
{
PADDLE_ENFORCE
(
graph_
);
PADDLE_ENFORCE
(
!
repr_
.
empty
());
if
(
!
graph_
->
Has
(
kFuseStatisAttr
))
{
graph_
->
Set
(
kFuseStatisAttr
,
new
std
::
unordered_map
<
std
::
string
,
int
>
);
}
auto
&
info
=
graph_
->
Get
<
std
::
unordered_map
<
std
::
string
,
int
>>
(
kFuseStatisAttr
);
info
[
repr_
]
=
count_of_fused
;
}
FuseOptions
FusePassBase
::
FindFuseOption
(
const
Node
&
node1
,
const
Node
&
node2
)
const
{
#ifdef PADDLE_WITH_MKLDNN
bool
node1_mkldnn
=
node1
.
Op
()
->
HasAttr
(
"use_mkldnn"
)
&&
boost
::
get
<
bool
>
(
node1
.
Op
()
->
GetAttr
(
"use_mkldnn"
));
bool
node2_mkldnn
=
node2
.
Op
()
->
HasAttr
(
"use_mkldnn"
)
&&
boost
::
get
<
bool
>
(
node2
.
Op
()
->
GetAttr
(
"use_mkldnn"
));
if
(
node1_mkldnn
&&
node2_mkldnn
)
return
FUSE_MKLDNN
;
else
if
(
!
node1_mkldnn
&&
!
node2_mkldnn
)
return
FUSE_NATIVE
;
else
return
DO_NOT_FUSE
;
#else
return
FUSE_NATIVE
;
#endif
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/fuse_pass_base.h
浏览文件 @
049c9c7d
...
@@ -25,32 +25,24 @@ namespace ir {
...
@@ -25,32 +25,24 @@ namespace ir {
static
const
char
kParamScopeAttr
[]
=
"__param_scope__"
;
static
const
char
kParamScopeAttr
[]
=
"__param_scope__"
;
static
const
char
kFuseStatisAttr
[]
=
"__fuse_statis__"
;
static
const
char
kFuseStatisAttr
[]
=
"__fuse_statis__"
;
enum
FuseOptions
{
DO_NOT_FUSE
,
// fusing will not be done
FUSE_NATIVE
,
// fusing will be done without MKL-DNN
FUSE_MKLDNN
// fusing will be done with MKL-DNN
};
class
FusePassBase
:
public
Pass
{
class
FusePassBase
:
public
Pass
{
public:
public:
void
Init
(
const
std
::
string
&
repr
,
Graph
*
graph
)
const
{
void
Init
(
const
std
::
string
&
repr
,
Graph
*
graph
)
const
;
repr_
=
repr
;
Scope
*
param_scope
()
const
;
graph_
=
graph
;
void
AddStatis
(
int
count_of_fused
)
const
;
}
Scope
*
param_scope
()
const
{
PADDLE_ENFORCE
(
graph_
->
Has
(
kParamScopeAttr
));
return
graph_
->
Get
<
framework
::
Scope
*>
(
kParamScopeAttr
);
}
void
AddStatis
(
int
count_of_fused
)
const
{
PADDLE_ENFORCE
(
graph_
);
PADDLE_ENFORCE
(
!
repr_
.
empty
());
if
(
!
graph_
->
Has
(
kFuseStatisAttr
))
{
graph_
->
Set
(
kFuseStatisAttr
,
new
std
::
unordered_map
<
std
::
string
,
int
>
);
}
auto
&
info
=
graph_
->
Get
<
std
::
unordered_map
<
std
::
string
,
int
>>
(
kFuseStatisAttr
);
info
[
repr_
]
=
count_of_fused
;
}
virtual
~
FusePassBase
()
{}
virtual
~
FusePassBase
()
{}
protected:
protected:
virtual
FuseOptions
FindFuseOption
(
const
Node
&
node1
,
const
Node
&
node2
)
const
;
mutable
Graph
*
graph_
;
mutable
Graph
*
graph_
;
mutable
std
::
string
repr_
;
mutable
std
::
string
repr_
;
};
};
...
...
paddle/fluid/framework/ir/graph_helper_test.cc
浏览文件 @
049c9c7d
...
@@ -200,15 +200,15 @@ TEST(GraphHelperTest, GraphNum) {
...
@@ -200,15 +200,15 @@ TEST(GraphHelperTest, GraphNum) {
Graph
g
(
prog
);
Graph
g
(
prog
);
BuildZeroGraph
(
&
g
);
BuildZeroGraph
(
&
g
);
ASSERT_EQ
(
GraphNum
(
g
),
0
);
ASSERT_EQ
(
GraphNum
(
g
),
0
UL
);
Graph
g2
(
prog
);
Graph
g2
(
prog
);
BuildOneGraph
(
&
g2
);
BuildOneGraph
(
&
g2
);
ASSERT_EQ
(
GraphNum
(
g2
),
1
);
ASSERT_EQ
(
GraphNum
(
g2
),
1
UL
);
Graph
g3
(
prog
);
Graph
g3
(
prog
);
BuildTwoGraphs
(
&
g3
);
BuildTwoGraphs
(
&
g3
);
ASSERT_EQ
(
GraphNum
(
g3
),
2
);
ASSERT_EQ
(
GraphNum
(
g3
),
2
UL
);
}
}
}
// namespace ir
}
// namespace ir
...
...
paddle/fluid/framework/ir/graph_pattern_detector.cc
浏览文件 @
049c9c7d
...
@@ -761,6 +761,51 @@ PDNode *patterns::ConvReLU::operator()(
...
@@ -761,6 +761,51 @@ PDNode *patterns::ConvReLU::operator()(
return
relu_out_var
;
return
relu_out_var
;
}
}
PDNode
*
patterns
::
SeqConvEltAddRelu
::
operator
()(
paddle
::
framework
::
ir
::
PDNode
*
seqconv_input
)
{
// Create Operators
seqconv_input
->
assert_is_op_input
(
"sequence_conv"
,
"X"
);
auto
*
seqconv_op
=
pattern
->
NewNode
(
seqconv_repr
())
->
assert_is_op
(
"sequence_conv"
)
->
assert_op_attr
<
bool
>
(
"paddingTrainable"
,
false
)
->
assert_op_attr
<
int
>
(
"contextStride"
,
1
);
auto
*
eltadd_op
=
pattern
->
NewNode
(
eltadd_repr
())
->
assert_is_op
(
"elementwise_add"
);
auto
*
relu_op
=
pattern
->
NewNode
(
relu_repr
())
->
assert_is_op
(
"relu"
);
// Create variables
// Filter
auto
*
seqconv_weight_var
=
pattern
->
NewNode
(
seqconv_weight_repr
())
->
AsInput
()
->
assert_is_persistable_var
()
->
assert_is_op_input
(
"sequence_conv"
,
"Filter"
);
// Bias
auto
*
eltadd_bias_var
=
pattern
->
NewNode
(
eltadd_bias_repr
())
->
AsInput
()
->
assert_is_op_input
(
"elementwise_add"
);
// intermediate variable, will be removed in the IR after fuse.
auto
*
seqconv_out_var
=
pattern
->
NewNode
(
seqconv_out_repr
())
->
AsIntermediate
()
->
assert_is_only_output_of_op
(
"sequence_conv"
)
->
assert_is_op_input
(
"elementwise_add"
);
auto
*
eltadd_out_var
=
pattern
->
NewNode
(
eltadd_out_repr
())
->
AsIntermediate
()
->
assert_is_only_output_of_op
(
"elementwise_add"
)
->
assert_is_only_input_of_op
(
"relu"
);
// output
auto
*
relu_out_var
=
pattern
->
NewNode
(
relu_out_repr
())
->
AsOutput
()
->
assert_is_op_output
(
"relu"
);
seqconv_op
->
LinksFrom
({
seqconv_input
,
seqconv_weight_var
})
.
LinksTo
({
seqconv_out_var
});
eltadd_op
->
LinksFrom
({
seqconv_out_var
,
eltadd_bias_var
})
.
LinksTo
({
eltadd_out_var
});
relu_op
->
LinksFrom
({
eltadd_out_var
}).
LinksTo
({
relu_out_var
});
return
relu_out_var
;
}
PDNode
*
patterns
::
FC
::
operator
()(
paddle
::
framework
::
ir
::
PDNode
*
x
,
PDNode
*
patterns
::
FC
::
operator
()(
paddle
::
framework
::
ir
::
PDNode
*
x
,
bool
with_bias
)
{
bool
with_bias
)
{
// Create shared nodes.
// Create shared nodes.
...
@@ -966,6 +1011,79 @@ PDNode *patterns::ElewiseAddActInplaceGrad::operator()(
...
@@ -966,6 +1011,79 @@ PDNode *patterns::ElewiseAddActInplaceGrad::operator()(
return
ele_add_grad
;
return
ele_add_grad
;
}
}
PDNode
*
patterns
::
ConvBias
::
operator
()(
paddle
::
framework
::
ir
::
PDNode
*
conv_input
)
{
// Create Operators
conv_input
->
assert_is_op_input
(
"conv2d"
,
"Input"
);
auto
*
conv_op
=
pattern
->
NewNode
(
conv_repr
())
->
assert_is_op
(
"conv2d"
);
auto
*
eltiwse_op
=
pattern
->
NewNode
(
eltwise_repr
())
->
assert_is_op
(
"elementwise_add"
);
// Create variables
// Filter
auto
*
conv_weight_var
=
pattern
->
NewNode
(
conv_weight_repr
())
->
AsInput
()
->
assert_is_persistable_var
()
->
assert_is_op_input
(
"conv2d"
,
"Filter"
);
// intermediate variable, will be removed in the IR after fuse.
auto
*
conv_out_var
=
pattern
->
NewNode
(
conv_out_repr
())
->
AsIntermediate
()
->
assert_is_only_output_of_op
(
"conv2d"
)
->
assert_is_op_input
(
"elementwise_add"
);
// Bias stored in elementwise_add
auto
*
eltwise_bias_var
=
pattern
->
NewNode
(
eltwise_bias_repr
())
->
AsInput
()
->
assert_is_persistable_var
()
->
assert_is_op_input
(
"elementwise_add"
,
"Y"
);
// output
auto
*
eltwise_out_var
=
pattern
->
NewNode
(
eltwise_out_repr
())
->
AsOutput
()
->
assert_is_op_output
(
"elementwise_add"
);
conv_op
->
LinksFrom
({
conv_input
,
conv_weight_var
}).
LinksTo
({
conv_out_var
});
eltiwse_op
->
LinksFrom
({
conv_out_var
,
eltwise_bias_var
})
.
LinksTo
({
eltwise_out_var
});
return
eltwise_out_var
;
}
PDNode
*
patterns
::
Conv
::
operator
()()
{
auto
conv_op
=
pattern
->
NewNode
(
conv_op_repr
())
->
assert_is_op
(
"conv2d"
);
auto
input_var
=
pattern
->
NewNode
(
conv_input_repr
())
->
AsInput
()
->
assert_is_op_input
(
"conv2d"
,
"Input"
);
auto
filter_var
=
pattern
->
NewNode
(
conv_filter_repr
())
->
AsInput
()
->
assert_is_op_input
(
"conv2d"
,
"Filter"
);
auto
output_var
=
pattern
->
NewNode
(
conv_output_repr
())
->
AsOutput
()
->
assert_is_op_output
(
"conv2d"
,
"Output"
);
conv_op
->
LinksFrom
({
input_var
,
filter_var
});
conv_op
->
LinksTo
({
output_var
});
return
output_var
;
}
PDNode
*
patterns
::
ElementwiseAdd
::
operator
()(
PDNode
*
x_var
)
{
auto
elementwise_add_op
=
pattern
->
NewNode
(
elementwise_add_op_repr
())
->
assert_is_op
(
"elementwise_add"
);
x_var
->
assert_is_op_input
(
"elementwise_add"
,
"X"
);
auto
y_var
=
pattern
->
NewNode
(
elementwise_add_x_repr
())
->
AsInput
()
->
assert_is_op_input
(
"elementwise_add"
,
"Y"
);
auto
out_var
=
pattern
->
NewNode
(
elementwise_add_out_repr
())
->
AsOutput
()
->
assert_is_op_output
(
"elementwise_add"
,
"Out"
);
elementwise_add_op
->
LinksFrom
({
x_var
,
y_var
});
elementwise_add_op
->
LinksTo
({
out_var
});
return
out_var
;
}
}
// namespace ir
}
// namespace ir
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/fluid/framework/ir/graph_pattern_detector.h
浏览文件 @
049c9c7d
...
@@ -128,6 +128,15 @@ struct PDNode {
...
@@ -128,6 +128,15 @@ struct PDNode {
const
std
::
unordered_set
<
std
::
string
>&
op_types
,
const
std
::
unordered_set
<
std
::
string
>&
op_types
,
const
std
::
string
&
argument
,
int
nth
);
const
std
::
string
&
argument
,
int
nth
);
template
<
typename
T
>
PDNode
*
assert_op_attr
(
const
std
::
string
&
attr_name
,
const
T
&
attr
)
{
asserts_
.
emplace_back
([
=
](
Node
*
x
)
{
return
x
&&
x
->
IsOp
()
&&
x
->
Op
()
->
HasAttr
(
attr_name
)
&&
boost
::
get
<
T
>
(
x
->
Op
()
->
GetAttr
(
attr_name
))
==
attr
;
});
return
this
;
}
private:
private:
PDNode
(
PDPattern
*
pattern
,
const
std
::
string
&
name
=
""
,
PDNode
(
PDPattern
*
pattern
,
const
std
::
string
&
name
=
""
,
Type
type
=
Type
::
kVar
)
Type
type
=
Type
::
kVar
)
...
@@ -434,6 +443,31 @@ struct ConvReLU : public PatternBase {
...
@@ -434,6 +443,31 @@ struct ConvReLU : public PatternBase {
PATTERN_DECL_NODE
(
relu_out
);
PATTERN_DECL_NODE
(
relu_out
);
};
};
// SEQCONV with Elementwise_Add ReLU
// op: seqconv + elementwise_add + relu
// named nodes:
// seqconv_input, seqconv_weight,
// seqconv_out, seqconv,
// elementwise_add_bias, elementwise_add_out, elementwise_add
// relu_out, relu
struct
SeqConvEltAddRelu
:
public
PatternBase
{
SeqConvEltAddRelu
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
)
:
PatternBase
(
pattern
,
name_scope
,
"seqconv_eltadd_relu"
)
{}
PDNode
*
operator
()(
PDNode
*
seqconv_input
);
// declare operator node's name
PATTERN_DECL_NODE
(
seqconv
);
PATTERN_DECL_NODE
(
eltadd
);
PATTERN_DECL_NODE
(
relu
);
// declare variable node's name
PATTERN_DECL_NODE
(
seqconv_weight
);
PATTERN_DECL_NODE
(
seqconv_out
);
PATTERN_DECL_NODE
(
eltadd_bias
);
PATTERN_DECL_NODE
(
eltadd_out
);
PATTERN_DECL_NODE
(
relu_out
);
};
// FC with bias
// FC with bias
// op: mul + elementwise_add
// op: mul + elementwise_add
// named nodes:
// named nodes:
...
@@ -578,6 +612,65 @@ struct ElewiseAddActInplaceGrad : public PatternBase {
...
@@ -578,6 +612,65 @@ struct ElewiseAddActInplaceGrad : public PatternBase {
PATTERN_DECL_NODE
(
d_ele_y
);
PATTERN_DECL_NODE
(
d_ele_y
);
PATTERN_DECL_NODE
(
ele_y
);
PATTERN_DECL_NODE
(
ele_y
);
};
};
// Conv with Elementwise_add as bias
// op: conv + elementwise_add
// named nodes:
// conv_input, conv_weight,
// conv_out, conv,
// eltwise_bias, eltwise_out,
// elementwise_add
struct
ConvBias
:
public
PatternBase
{
ConvBias
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
)
:
PatternBase
(
pattern
,
name_scope
,
"conv_bias"
)
{}
PDNode
*
operator
()(
PDNode
*
conv_input
);
// declare operator node's name
PATTERN_DECL_NODE
(
conv
);
PATTERN_DECL_NODE
(
eltwise
);
// declare variable node's name
PATTERN_DECL_NODE
(
conv_weight
);
PATTERN_DECL_NODE
(
conv_out
);
PATTERN_DECL_NODE
(
eltwise_bias
);
PATTERN_DECL_NODE
(
eltwise_out
);
};
// Convolution op
// Forward pass for convolution.
// conv_input, conv_bias and conv_filter are inputs.
// conv_output is a result of the operator.
// residual_data is data used by skip connection.
// If residual connection fusion is on, the formula is:
// conv_output = conv_op(conv_filter, conv_input, conv_bias)
// + conv_residual_data
// If the fusion is off, conv_residual_data is not added.
struct
Conv
:
public
PatternBase
{
Conv
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
)
:
PatternBase
(
pattern
,
name_scope
,
"convolution"
)
{}
PDNode
*
operator
()();
PATTERN_DECL_NODE
(
conv_op
);
PATTERN_DECL_NODE
(
conv_input
);
PATTERN_DECL_NODE
(
conv_filter
);
PATTERN_DECL_NODE
(
conv_residual_data
);
PATTERN_DECL_NODE
(
conv_output
);
};
// ElementwiseAdd used in residual connections.
// y_var is used and convolution output.
// The operator is removed, when residual
// connection fusion is on.
struct
ElementwiseAdd
:
public
PatternBase
{
ElementwiseAdd
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
)
:
PatternBase
(
pattern
,
name_scope
,
"elementwise_add"
)
{}
PDNode
*
operator
()(
PDNode
*
x_var
);
PATTERN_DECL_NODE
(
elementwise_add_op
);
PATTERN_DECL_NODE
(
elementwise_add_x
);
PATTERN_DECL_NODE
(
elementwise_add_y
);
PATTERN_DECL_NODE
(
elementwise_add_out
);
};
}
// namespace patterns
}
// namespace patterns
// Link two ir::Nodes from each other.
// Link two ir::Nodes from each other.
...
...
paddle/fluid/framework/ir/graph_test.cc
浏览文件 @
049c9c7d
...
@@ -124,7 +124,7 @@ TEST(GraphTest, Basic) {
...
@@ -124,7 +124,7 @@ TEST(GraphTest, Basic) {
ASSERT_EQ
(
n
->
outputs
.
size
(),
0UL
);
ASSERT_EQ
(
n
->
outputs
.
size
(),
0UL
);
}
}
}
}
ASSERT_EQ
(
nodes
.
size
(),
5
);
ASSERT_EQ
(
nodes
.
size
(),
5
UL
);
}
}
TEST
(
GraphTest
,
WriteAfterRead
)
{
TEST
(
GraphTest
,
WriteAfterRead
)
{
...
...
paddle/fluid/framework/ir/mkldnn_placement_pass.cc
0 → 100644
浏览文件 @
049c9c7d
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/ir/mkldnn_placement_pass.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
std
::
unique_ptr
<
ir
::
Graph
>
MKLDNNPlacementPass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
VLOG
(
3
)
<<
"Aplies MKL-DNN placement strategy."
;
for
(
const
Node
*
n
:
graph
->
Nodes
())
{
if
(
n
->
IsOp
()
&&
n
->
Op
()
->
HasAttr
(
"use_mkldnn"
))
{
n
->
Op
()
->
SetAttr
(
"use_mkldnn"
,
true
);
}
}
return
graph
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
mkldnn_placement_pass
,
paddle
::
framework
::
ir
::
MKLDNNPlacementPass
);
paddle/fluid/framework/ir/mkldnn_placement_pass.h
0 → 100644
浏览文件 @
049c9c7d
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/framework/ir/pass.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
class
MKLDNNPlacementPass
:
public
Pass
{
protected:
std
::
unique_ptr
<
ir
::
Graph
>
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
override
;
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.cc
0 → 100644
浏览文件 @
049c9c7d
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.h"
#include <string>
#include "paddle/fluid/framework/lod_tensor.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
int
BuildFusion
(
Graph
*
graph
,
const
std
::
string
&
name_scope
,
Scope
*
scope
)
{
GraphPatternDetector
gpd
;
auto
*
pattern
=
gpd
.
mutable_pattern
();
PDNode
*
x
=
pattern
->
NewNode
(
patterns
::
PDNodeName
(
name_scope
,
"X"
))
->
assert_is_op_input
(
"sequence_conv"
)
->
assert_var_not_persistable
();
patterns
::
SeqConvEltAddRelu
fuse_pattern
(
pattern
,
name_scope
);
fuse_pattern
(
x
);
// Create New OpDesc
auto
fuse_creator
=
[
&
](
Node
*
seqconv
,
Node
*
input
,
Node
*
seqconv_weight
,
Node
*
eltadd_bias
,
Node
*
relu_out
)
{
OpDesc
op_desc
;
op_desc
.
SetType
(
"fusion_seqconv_eltadd_relu"
);
op_desc
.
SetInput
(
"X"
,
{
input
->
Name
()});
op_desc
.
SetInput
(
"Filter"
,
{
seqconv_weight
->
Name
()});
op_desc
.
SetInput
(
"Bias"
,
{
eltadd_bias
->
Name
()});
op_desc
.
SetAttr
(
"contextLength"
,
seqconv
->
Op
()
->
GetAttr
(
"contextLength"
));
op_desc
.
SetAttr
(
"contextStart"
,
seqconv
->
Op
()
->
GetAttr
(
"contextStart"
));
op_desc
.
SetAttr
(
"contextStride"
,
seqconv
->
Op
()
->
GetAttr
(
"contextStride"
));
PADDLE_ENFORCE
(
graph
->
Has
(
kParamScopeAttr
));
auto
*
scope
=
graph
->
Get
<
Scope
*>
(
kParamScopeAttr
);
const
std
::
string
ColMat
=
patterns
::
UniqueKey
(
"SeqConvColMat"
);
op_desc
.
SetOutput
(
"ColMat"
,
{
ColMat
});
op_desc
.
SetOutput
(
"Out"
,
{
relu_out
->
Name
()});
scope
->
Var
(
ColMat
)
->
GetMutable
<
LoDTensor
>
();
auto
*
op
=
graph
->
CreateOpNode
(
&
op_desc
);
IR_NODE_LINK_TO
(
input
,
op
);
IR_NODE_LINK_TO
(
seqconv_weight
,
op
);
IR_NODE_LINK_TO
(
eltadd_bias
,
op
);
IR_NODE_LINK_TO
(
op
,
relu_out
);
return
op
;
};
int
fusion_count
{
0
};
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
VLOG
(
4
)
<<
"handle SeqConv EltAdd Relu fuse"
;
GET_IR_NODE_FROM_SUBGRAPH
(
seqconv
,
seqconv
,
fuse_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
seqconv_weight
,
seqconv_weight
,
fuse_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
seqconv_out
,
seqconv_out
,
fuse_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
eltadd
,
eltadd
,
fuse_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
eltadd_bias
,
eltadd_bias
,
fuse_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
eltadd_out
,
eltadd_out
,
fuse_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
relu
,
relu
,
fuse_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
relu_out
,
relu_out
,
fuse_pattern
);
fuse_creator
(
seqconv
,
subgraph
.
at
(
x
),
seqconv_weight
,
eltadd_bias
,
relu_out
);
std
::
unordered_set
<
const
Node
*>
marked_nodes
(
{
seqconv
,
seqconv_out
,
eltadd
,
eltadd_out
,
relu
});
GraphSafeRemoveNodes
(
graph
,
marked_nodes
);
++
fusion_count
;
};
gpd
(
graph
,
handler
);
return
fusion_count
;
}
std
::
unique_ptr
<
ir
::
Graph
>
SeqConvEltAddReluFusePass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
FusePassBase
::
Init
(
name_scope_
,
graph
.
get
());
int
fusion_count
=
BuildFusion
(
graph
.
get
(),
name_scope_
,
param_scope
());
AddStatis
(
fusion_count
);
return
graph
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
seqconv_eltadd_relu_fuse_pass
,
paddle
::
framework
::
ir
::
SeqConvEltAddReluFusePass
);
paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.h
0 → 100644
浏览文件 @
049c9c7d
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
class
SeqConvEltAddReluFusePass
:
public
FusePassBase
{
public:
virtual
~
SeqConvEltAddReluFusePass
()
{}
protected:
std
::
unique_ptr
<
ir
::
Graph
>
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
;
const
std
::
string
name_scope_
{
"seqconv_eltadd_relu_fuse"
};
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/op_desc.cc
浏览文件 @
049c9c7d
...
@@ -515,20 +515,14 @@ void OpDesc::InferShape(const BlockDesc &block) const {
...
@@ -515,20 +515,14 @@ void OpDesc::InferShape(const BlockDesc &block) const {
}
}
void
OpDesc
::
InferVarType
(
BlockDesc
*
block
)
const
{
void
OpDesc
::
InferVarType
(
BlockDesc
*
block
)
const
{
// There are a few places that var type can be set.
// When VarDesc is created, default set to LOD_TENSOR.
// When output variable is created, default is defaut set to LOD_TENSOR.
// We limit here to be the only place that operator defines its customized
// var type inference. Hence, we don't do any "default" setting here.
auto
&
info
=
OpInfoMap
::
Instance
().
Get
(
this
->
Type
());
auto
&
info
=
OpInfoMap
::
Instance
().
Get
(
this
->
Type
());
if
(
info
.
infer_var_type_
)
{
if
(
info
.
infer_var_type_
)
{
info
.
infer_var_type_
(
*
this
,
block
);
info
.
infer_var_type_
(
*
this
,
block
);
}
else
{
// all output type is LoDTensor by default
VLOG
(
10
)
<<
this
->
Type
()
<<
" has not registered InferVarType. Set output variables to "
"LOD_TENSOR"
;
for
(
auto
&
out_pair
:
this
->
outputs_
)
{
for
(
auto
&
out_var_name
:
out_pair
.
second
)
{
block
->
FindRecursiveOrCreateVar
(
out_var_name
)
.
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
}
}
}
}
}
}
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
049c9c7d
...
@@ -299,6 +299,12 @@ void ParallelExecutor::FeedAndSplitTensorIntoLocalScopes(
...
@@ -299,6 +299,12 @@ void ParallelExecutor::FeedAndSplitTensorIntoLocalScopes(
}
}
ParallelExecutor
::~
ParallelExecutor
()
{
ParallelExecutor
::~
ParallelExecutor
()
{
const
auto
dev_ctxs
=
platform
::
DeviceContextPool
::
Instance
().
GetAllDeviceContexts
();
for
(
auto
&
dev_ctx
:
dev_ctxs
)
{
dev_ctx
->
Wait
();
}
if
(
member_
->
own_local_scope_
)
{
if
(
member_
->
own_local_scope_
)
{
for
(
size_t
i
=
1
;
i
<
member_
->
local_scopes_
.
size
();
++
i
)
{
for
(
size_t
i
=
1
;
i
<
member_
->
local_scopes_
.
size
();
++
i
)
{
Scope
*
local_scope
=
member_
->
local_scopes_
[
i
];
Scope
*
local_scope
=
member_
->
local_scopes_
[
i
];
...
...
paddle/fluid/framework/program_desc_test.cc
浏览文件 @
049c9c7d
...
@@ -103,7 +103,7 @@ TEST(ProgramDesc, copy_ctor) {
...
@@ -103,7 +103,7 @@ TEST(ProgramDesc, copy_ctor) {
ASSERT_EQ
(
1
,
op
->
GetBlockAttrId
(
"sub_block"
));
ASSERT_EQ
(
1
,
op
->
GetBlockAttrId
(
"sub_block"
));
found_sub_block
=
true
;
found_sub_block
=
true
;
ASSERT_EQ
(
2
,
op
->
GetBlocksAttrIds
(
"sub_blocks"
).
size
());
ASSERT_EQ
(
2
UL
,
op
->
GetBlocksAttrIds
(
"sub_blocks"
).
size
());
found_sub_blocks
=
true
;
found_sub_blocks
=
true
;
}
}
}
}
...
...
paddle/fluid/framework/reader_test.cc
浏览文件 @
049c9c7d
...
@@ -40,7 +40,7 @@ TEST(READER, decorate_chain) {
...
@@ -40,7 +40,7 @@ TEST(READER, decorate_chain) {
auto
endpoints
=
root
->
GetEndPoints
();
auto
endpoints
=
root
->
GetEndPoints
();
ASSERT_EQ
(
endpoints
.
size
(),
2U
);
ASSERT_EQ
(
endpoints
.
size
(),
2U
);
ASSERT_NE
(
endpoints
.
count
(
end_point1
.
get
()),
0UL
);
ASSERT_NE
(
endpoints
.
count
(
end_point1
.
get
()),
0UL
);
ASSERT_NE
(
endpoints
.
count
(
end_point2
.
get
()),
0
);
ASSERT_NE
(
endpoints
.
count
(
end_point2
.
get
()),
0
UL
);
}
}
{
{
...
...
paddle/fluid/inference/analysis/analyzer.cc
浏览文件 @
049c9c7d
...
@@ -101,7 +101,13 @@ Analyzer::Analyzer() { Register("manager1", new DfgPassManagerImpl); }
...
@@ -101,7 +101,13 @@ Analyzer::Analyzer() { Register("manager1", new DfgPassManagerImpl); }
void
Analyzer
::
Run
(
Argument
*
argument
)
{
void
Analyzer
::
Run
(
Argument
*
argument
)
{
std
::
vector
<
std
::
string
>
passes
;
std
::
vector
<
std
::
string
>
passes
;
for
(
auto
&
pass
:
all_ir_passes_
)
{
#ifdef PADDLE_WITH_MKLDNN
if
(
use_mkldnn_
)
{
VLOG
(
3
)
<<
"Adding MKL-DNN placement pass"
;
passes
.
push_back
(
"mkldnn_placement_pass"
);
}
#endif
for
(
auto
&
pass
:
ir_passes_
)
{
if
(
!
disabled_ir_passes_
.
count
(
pass
))
{
if
(
!
disabled_ir_passes_
.
count
(
pass
))
{
passes
.
push_back
(
pass
);
passes
.
push_back
(
pass
);
passes
.
push_back
(
"graph_viz_pass"
);
// add graphviz for debug.
passes
.
push_back
(
"graph_viz_pass"
);
// add graphviz for debug.
...
@@ -117,11 +123,26 @@ void Analyzer::Run(Argument* argument) {
...
@@ -117,11 +123,26 @@ void Analyzer::Run(Argument* argument) {
}
}
}
}
Analyzer
&
Analyzer
::
IncludeAllIrPasses
()
{
ir_passes_
=
all_ir_passes_
;
return
*
this
;
}
Analyzer
&
Analyzer
::
DisableIrPasses
(
const
std
::
vector
<
std
::
string
>&
passes
)
{
Analyzer
&
Analyzer
::
DisableIrPasses
(
const
std
::
vector
<
std
::
string
>&
passes
)
{
disabled_ir_passes_
.
insert
(
passes
.
begin
(),
passes
.
end
());
disabled_ir_passes_
.
insert
(
passes
.
begin
(),
passes
.
end
());
return
*
this
;
return
*
this
;
}
}
Analyzer
&
Analyzer
::
IncludeIrPasses
(
const
std
::
vector
<
std
::
string
>&
passes
)
{
ir_passes_
=
passes
;
return
*
this
;
}
Analyzer
&
Analyzer
::
SetUseMkldnn
(
bool
use_mkldnn
)
{
use_mkldnn_
=
use_mkldnn
;
return
*
this
;
}
}
// namespace analysis
}
// namespace analysis
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/analysis/analyzer.h
浏览文件 @
049c9c7d
...
@@ -54,6 +54,9 @@ class Analyzer : public OrderedRegistry<PassManager> {
...
@@ -54,6 +54,9 @@ class Analyzer : public OrderedRegistry<PassManager> {
void
Run
(
Argument
*
argument
);
void
Run
(
Argument
*
argument
);
Analyzer
&
DisableIrPasses
(
const
std
::
vector
<
std
::
string
>&
passes
);
Analyzer
&
DisableIrPasses
(
const
std
::
vector
<
std
::
string
>&
passes
);
Analyzer
&
IncludeIrPasses
(
const
std
::
vector
<
std
::
string
>&
passes
);
Analyzer
&
IncludeAllIrPasses
();
Analyzer
&
SetUseMkldnn
(
bool
use_mkldnn
);
DISABLE_COPY_AND_ASSIGN
(
Analyzer
);
DISABLE_COPY_AND_ASSIGN
(
Analyzer
);
...
@@ -66,6 +69,7 @@ class Analyzer : public OrderedRegistry<PassManager> {
...
@@ -66,6 +69,7 @@ class Analyzer : public OrderedRegistry<PassManager> {
// Manual update the passes here.
// Manual update the passes here.
"infer_clean_graph_pass"
,
//
"infer_clean_graph_pass"
,
//
"attention_lstm_fuse_pass"
,
//
"attention_lstm_fuse_pass"
,
//
"seqconv_eltadd_relu_fuse_pass"
,
//
"embedding_fc_lstm_fuse_pass"
,
//
"embedding_fc_lstm_fuse_pass"
,
//
"fc_lstm_fuse_pass"
,
//
"fc_lstm_fuse_pass"
,
//
"mul_lstm_fuse_pass"
,
//
"mul_lstm_fuse_pass"
,
//
...
@@ -76,11 +80,16 @@ class Analyzer : public OrderedRegistry<PassManager> {
...
@@ -76,11 +80,16 @@ class Analyzer : public OrderedRegistry<PassManager> {
"conv_bn_fuse_pass"
,
//
"conv_bn_fuse_pass"
,
//
"conv_eltwiseadd_bn_fuse_pass"
,
//
"conv_eltwiseadd_bn_fuse_pass"
,
//
#ifdef PADDLE_WITH_MKLDNN
#ifdef PADDLE_WITH_MKLDNN
"conv_bias_mkldnn_fuse_pass"
,
//
"conv_relu_mkldnn_fuse_pass"
,
//
"conv_relu_mkldnn_fuse_pass"
,
//
"conv_elementwise_add_mkldnn_fuse_pass"
,
//
#endif
#endif
}};
}};
std
::
unordered_set
<
std
::
string
>
disabled_ir_passes_
;
std
::
unordered_set
<
std
::
string
>
disabled_ir_passes_
;
// Ir passes to run
std
::
vector
<
std
::
string
>
ir_passes_
;
bool
use_mkldnn_
;
};
};
}
// namespace analysis
}
// namespace analysis
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
049c9c7d
...
@@ -77,10 +77,6 @@ bool AnalysisPredictor::Init(
...
@@ -77,10 +77,6 @@ bool AnalysisPredictor::Init(
inference_program_
=
program
;
inference_program_
=
program
;
}
}
if
(
config_
.
_use_mkldnn
)
{
executor_
->
EnableMKLDNN
(
*
inference_program_
);
}
executor_
->
Prepare
(
scope_
.
get
(),
*
inference_program_
,
0
,
executor_
->
Prepare
(
scope_
.
get
(),
*
inference_program_
,
0
,
config_
.
use_feed_fetch_ops
);
config_
.
use_feed_fetch_ops
);
...
@@ -225,10 +221,24 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
...
@@ -225,10 +221,24 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
argument_
.
origin_program_desc
.
reset
(
argument_
.
origin_program_desc
.
reset
(
new
ProgramDesc
(
*
inference_program_
->
Proto
()));
new
ProgramDesc
(
*
inference_program_
->
Proto
()));
PADDLE_ENFORCE
(
config_
.
ir_mode
==
contrib
::
AnalysisConfig
::
IrPassMode
::
kExclude
,
switch
(
config_
.
ir_mode
)
{
"Only kExclude is supported yet."
);
case
contrib
::
AnalysisConfig
::
IrPassMode
::
kExclude
:
Analyzer
().
DisableIrPasses
(
config_
.
ir_passes
).
Run
(
&
argument_
);
Analyzer
()
.
IncludeAllIrPasses
()
.
SetUseMkldnn
(
config_
.
_use_mkldnn
)
.
DisableIrPasses
(
config_
.
ir_passes
)
.
Run
(
&
argument_
);
break
;
case
contrib
::
AnalysisConfig
::
IrPassMode
::
kInclude
:
Analyzer
()
.
SetUseMkldnn
(
config_
.
_use_mkldnn
)
.
IncludeIrPasses
(
config_
.
ir_passes
)
.
Run
(
&
argument_
);
break
;
default:
LOG
(
ERROR
)
<<
"Only kExclude and kInclude modes are supoorted yet."
;
}
CHECK
(
argument_
.
transformed_program_desc
);
CHECK
(
argument_
.
transformed_program_desc
);
VLOG
(
5
)
<<
"to prepare executor"
;
VLOG
(
5
)
<<
"to prepare executor"
;
...
...
paddle/fluid/inference/api/paddle_inference_api.h
浏览文件 @
049c9c7d
...
@@ -259,10 +259,17 @@ struct AnalysisConfig : public NativeConfig {
...
@@ -259,10 +259,17 @@ struct AnalysisConfig : public NativeConfig {
kExclude
// Specify the disabled passes in `ir_passes`.
kExclude
// Specify the disabled passes in `ir_passes`.
};
};
void
SetIncludeMode
()
{
ir_mode
=
IrPassMode
::
kInclude
;
// this pass has to be run at the beginning of all fuse passes
ir_passes
=
{
"infer_clean_graph_pass"
};
}
// Determine whether to perform graph optimization.
// Determine whether to perform graph optimization.
bool
enable_ir_optim
=
true
;
bool
enable_ir_optim
=
true
;
// Manually determine the IR passes to run.
// Manually determine the IR passes to run.
IrPassMode
ir_mode
{
IrPassMode
::
kExclude
};
IrPassMode
ir_mode
{
IrPassMode
::
kExclude
};
// passes to be excluded/included
std
::
vector
<
std
::
string
>
ir_passes
{
"embedding_fc_lstm_fuse_pass"
};
std
::
vector
<
std
::
string
>
ir_passes
{
"embedding_fc_lstm_fuse_pass"
};
// NOT stable yet.
// NOT stable yet.
...
...
paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
浏览文件 @
049c9c7d
...
@@ -71,7 +71,7 @@ void profile(bool use_mkldnn = false) {
...
@@ -71,7 +71,7 @@ void profile(bool use_mkldnn = false) {
}
}
TEST
(
Analyzer_resnet50
,
profile
)
{
profile
();
}
TEST
(
Analyzer_resnet50
,
profile
)
{
profile
();
}
#if
n
def PADDLE_WITH_MKLDNN
#ifdef PADDLE_WITH_MKLDNN
TEST
(
Analyzer_resnet50
,
profile_mkldnn
)
{
profile
(
true
/* use_mkldnn */
);
}
TEST
(
Analyzer_resnet50
,
profile_mkldnn
)
{
profile
(
true
/* use_mkldnn */
);
}
#endif
#endif
...
...
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
浏览文件 @
049c9c7d
...
@@ -18,12 +18,12 @@ namespace paddle {
...
@@ -18,12 +18,12 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
using
namespace
framework
;
// NOLINT
using
namespace
framework
;
// NOLINT
static
std
::
vector
<
float
>
result_data
;
struct
DataRecord
{
struct
DataRecord
{
std
::
vector
<
std
::
vector
<
std
::
vector
<
float
>>>
link_step_data_all
;
std
::
vector
<
std
::
vector
<
std
::
vector
<
float
>>>
link_step_data_all
;
std
::
vector
<
size_t
>
lod
;
std
::
vector
<
size_t
>
lod
;
std
::
vector
<
std
::
vector
<
float
>>
rnn_link_data
;
std
::
vector
<
std
::
vector
<
float
>>
rnn_link_data
;
std
::
vector
<
float
>
result_data
;
size_t
num_samples
;
// total number of samples
size_t
num_samples
;
// total number of samples
size_t
batch_iter
{
0
};
size_t
batch_iter
{
0
};
size_t
batch_size
{
1
};
size_t
batch_size
{
1
};
...
@@ -57,6 +57,7 @@ struct DataRecord {
...
@@ -57,6 +57,7 @@ struct DataRecord {
std
::
ifstream
file
(
path
);
std
::
ifstream
file
(
path
);
std
::
string
line
;
std
::
string
line
;
int
num_lines
=
0
;
int
num_lines
=
0
;
result_data
.
clear
();
while
(
std
::
getline
(
file
,
line
))
{
while
(
std
::
getline
(
file
,
line
))
{
num_lines
++
;
num_lines
++
;
std
::
vector
<
std
::
string
>
data
;
std
::
vector
<
std
::
string
>
data
;
...
@@ -135,13 +136,12 @@ TEST(Analyzer_rnn2, profile) {
...
@@ -135,13 +136,12 @@ TEST(Analyzer_rnn2, profile) {
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
// the first inference result
// the first inference result
DataRecord
data
(
FLAGS_infer_data
,
FLAGS_batch_size
);
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
size_t
size
=
GetSize
(
outputs
[
0
]);
size_t
size
=
GetSize
(
outputs
[
0
]);
PADDLE_ENFORCE_GT
(
size
,
0
);
PADDLE_ENFORCE_GT
(
size
,
0
);
float
*
result
=
static_cast
<
float
*>
(
outputs
[
0
].
data
.
data
());
float
*
result
=
static_cast
<
float
*>
(
outputs
[
0
].
data
.
data
());
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
EXPECT_NEAR
(
result
[
i
],
data
.
result_data
[
i
],
1e-3
);
EXPECT_NEAR
(
result
[
i
],
result_data
[
i
],
1e-3
);
}
}
}
}
}
}
...
...
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
浏览文件 @
049c9c7d
...
@@ -183,7 +183,13 @@ TEST(Analyzer_seq_conv1, fuse_statis) {
...
@@ -183,7 +183,13 @@ TEST(Analyzer_seq_conv1, fuse_statis) {
SetConfig
(
&
cfg
);
SetConfig
(
&
cfg
);
int
num_ops
;
int
num_ops
;
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
cfg
);
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
cfg
);
GetFuseStatis
(
predictor
.
get
(),
&
num_ops
);
auto
fuse_statis
=
GetFuseStatis
(
predictor
.
get
(),
&
num_ops
);
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
ASSERT_TRUE
(
fuse_statis
.
count
(
"seqconv_eltadd_relu_fuse"
));
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
2
);
EXPECT_EQ
(
fuse_statis
.
at
(
"seqconv_eltadd_relu_fuse"
),
6
);
EXPECT_EQ
(
num_ops
,
32
);
}
}
// Compare result of NativeConfig and AnalysisConfig
// Compare result of NativeConfig and AnalysisConfig
...
...
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
049c9c7d
...
@@ -50,7 +50,7 @@ void CompareResult(const std::vector<PaddleTensor> &outputs,
...
@@ -50,7 +50,7 @@ void CompareResult(const std::vector<PaddleTensor> &outputs,
auto
&
ref_out
=
ref_outputs
[
i
];
auto
&
ref_out
=
ref_outputs
[
i
];
size_t
size
=
VecReduceToInt
(
out
.
shape
);
size_t
size
=
VecReduceToInt
(
out
.
shape
);
size_t
ref_size
=
VecReduceToInt
(
ref_out
.
shape
);
size_t
ref_size
=
VecReduceToInt
(
ref_out
.
shape
);
EXPECT_GT
(
size
,
0
);
EXPECT_GT
(
size
,
0
UL
);
EXPECT_EQ
(
size
,
ref_size
);
EXPECT_EQ
(
size
,
ref_size
);
EXPECT_EQ
(
out
.
dtype
,
ref_out
.
dtype
);
EXPECT_EQ
(
out
.
dtype
,
ref_out
.
dtype
);
switch
(
out
.
dtype
)
{
switch
(
out
.
dtype
)
{
...
...
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
049c9c7d
...
@@ -86,7 +86,7 @@ function(op_library TARGET)
...
@@ -86,7 +86,7 @@ function(op_library TARGET)
# remove windows unsupported op, because windows has no nccl, no warpctc such ops.
# remove windows unsupported op, because windows has no nccl, no warpctc such ops.
foreach
(
windows_unsupport_op
"nccl_op"
"gen_nccl_id_op"
"warpctc_op"
"hierarchical_sigmoid_op"
foreach
(
windows_unsupport_op
"nccl_op"
"gen_nccl_id_op"
"warpctc_op"
"hierarchical_sigmoid_op"
"crf_decoding_op"
"select_op"
"lstmp_op"
"gru_op"
"fusion_gru_op"
"lstm_op"
"fusion_lstm_op"
"cumsum_op"
"crf_decoding_op"
"select_op"
"lstmp_op"
"gru_op"
"fusion_gru_op"
"lstm_op"
"fusion_lstm_op"
"cumsum_op"
"channel_send_op"
"channel_create_op"
"channel_close_op"
"channel_recv_op"
)
"fusion_seqconv_eltadd_relu_op"
"channel_send_op"
"channel_create_op"
"channel_close_op"
"channel_recv_op"
)
if
(
"
${
TARGET
}
"
STREQUAL
"
${
windows_unsupport_op
}
"
)
if
(
"
${
TARGET
}
"
STREQUAL
"
${
windows_unsupport_op
}
"
)
return
()
return
()
endif
()
endif
()
...
@@ -284,10 +284,10 @@ op_library(max_sequence_len_op DEPS lod_rank_table)
...
@@ -284,10 +284,10 @@ op_library(max_sequence_len_op DEPS lod_rank_table)
op_library
(
sequence_conv_op DEPS context_project
)
op_library
(
sequence_conv_op DEPS context_project
)
op_library
(
sequence_pool_op DEPS sequence_pooling
)
op_library
(
sequence_pool_op DEPS sequence_pooling
)
if
(
NOT WIN32
)
if
(
NOT WIN32
)
op_library
(
lstm_op DEPS sequence2batch lstm_compute
)
op_library
(
lstm_op DEPS sequence2batch lstm_compute
)
op_library
(
hierarchical_sigmoid_op DEPS matrix_bit_code
)
op_library
(
hierarchical_sigmoid_op DEPS matrix_bit_code
)
op_library
(
lstmp_op DEPS sequence2batch lstm_compute
)
op_library
(
lstmp_op DEPS sequence2batch lstm_compute
)
op_library
(
gru_op DEPS sequence2batch gru_compute
)
op_library
(
gru_op DEPS sequence2batch gru_compute
)
endif
(
NOT WIN32
)
endif
(
NOT WIN32
)
op_library
(
recurrent_op DEPS executor
)
op_library
(
recurrent_op DEPS executor
)
op_library
(
warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale
)
op_library
(
warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale
)
...
@@ -316,7 +316,7 @@ op_library(save_op DEPS lod_tensor)
...
@@ -316,7 +316,7 @@ op_library(save_op DEPS lod_tensor)
op_library
(
load_op DEPS lod_tensor
)
op_library
(
load_op DEPS lod_tensor
)
op_library
(
save_combine_op DEPS lod_tensor
)
op_library
(
save_combine_op DEPS lod_tensor
)
op_library
(
load_combine_op DEPS lod_tensor
)
op_library
(
load_combine_op DEPS lod_tensor
)
op_library
(
concat_op DEPS concat
)
op_library
(
concat_op DEPS concat
_and_split
)
list
(
REMOVE_ITEM GENERAL_OPS
${
DEPS_OPS
}
)
list
(
REMOVE_ITEM GENERAL_OPS
${
DEPS_OPS
}
)
...
@@ -348,6 +348,6 @@ cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor memory)
...
@@ -348,6 +348,6 @@ cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor memory)
cc_test
(
save_load_op_test SRCS save_load_op_test.cc DEPS save_op load_op
)
cc_test
(
save_load_op_test SRCS save_load_op_test.cc DEPS save_op load_op
)
cc_test
(
save_load_combine_op_test SRCS save_load_combine_op_test.cc DEPS save_combine_op load_combine_op
)
cc_test
(
save_load_combine_op_test SRCS save_load_combine_op_test.cc DEPS save_combine_op load_combine_op
)
if
(
NOT WIN32
)
if
(
NOT WIN32
)
nv_test
(
nccl_op_test SRCS nccl_op_test.cu.cc DEPS nccl_op gpu_info device_context
)
nv_test
(
nccl_op_test SRCS nccl_op_test.cu.cc DEPS nccl_op gpu_info device_context
)
endif
()
endif
()
nv_test
(
dropout_op_test SRCS dropout_op_test.cc DEPS dropout_op tensor
)
nv_test
(
dropout_op_test SRCS dropout_op_test.cc DEPS dropout_op tensor
)
paddle/fluid/operators/array_to_lod_tensor_op.cc
浏览文件 @
049c9c7d
...
@@ -11,7 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
...
@@ -11,7 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include <paddle/fluid/operators/math/concat.h>
#include <paddle/fluid/operators/math/concat
_and_split
.h>
#include <numeric>
#include <numeric>
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_rank_table.h"
...
...
paddle/fluid/operators/concat_op.h
浏览文件 @
049c9c7d
...
@@ -17,7 +17,7 @@ limitations under the License. */
...
@@ -17,7 +17,7 @@ limitations under the License. */
#include <utility>
#include <utility>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/concat.h"
#include "paddle/fluid/operators/math/concat
_and_split
.h"
#include "paddle/fluid/operators/strided_memcpy.h"
#include "paddle/fluid/operators/strided_memcpy.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -89,28 +89,16 @@ class ConcatGradKernel : public framework::OpKernel<T> {
...
@@ -89,28 +89,16 @@ class ConcatGradKernel : public framework::OpKernel<T> {
outputs
.
push_back
(
nullptr
);
outputs
.
push_back
(
nullptr
);
}
}
}
}
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
// Sometimes direct copies will be faster, this maybe need deeply analysis.
// Sometimes direct copies will be faster, this maybe need deeply analysis.
if
(
axis
==
0
&&
outs
.
size
()
<
10
)
{
if
(
axis
==
0
&&
outs
.
size
()
<
10
)
{
size_t
input_offset
=
0
;
std
::
vector
<
const
framework
::
Tensor
*>
ref_shape
;
const
auto
in_stride
=
framework
::
stride_numel
(
out_grad
->
dims
());
ref_shape
.
insert
(
ref_shape
.
begin
(),
ins
.
begin
(),
ins
.
end
());
StridedMemcpyWithAxis0
<
T
>
(
dev_ctx
,
*
out_grad
,
ref_shape
,
&
outputs
);
for
(
size_t
i
=
0
;
i
<
outs
.
size
();
++
i
)
{
auto
out_stride
=
framework
::
stride_numel
(
ins
[
i
]
->
dims
());
auto
*
out
=
outputs
[
i
];
if
(
out
!=
nullptr
)
{
StridedNumelCopyWithAxis
<
T
>
(
ctx
.
device_context
(),
axis
,
out
->
data
<
T
>
(),
out_stride
,
out_grad
->
data
<
T
>
()
+
input_offset
,
in_stride
,
out_stride
[
axis
]);
}
input_offset
+=
out_stride
[
axis
];
}
}
else
{
}
else
{
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
math
::
SplitFunctor
<
DeviceContext
,
T
>
split_functor
;
paddle
::
operators
::
math
::
ConcatGradFunctor
<
DeviceContext
,
T
>
split_functor
(
dev_ctx
,
*
out_grad
,
ctx
.
MultiInput
<
framework
::
Tensor
>
(
"X"
),
concat_grad_functor
;
concat_grad_functor
(
dev_ctx
,
*
out_grad
,
ctx
.
MultiInput
<
framework
::
Tensor
>
(
"X"
),
static_cast
<
int
>
(
axis
),
&
outputs
);
static_cast
<
int
>
(
axis
),
&
outputs
);
}
}
}
}
...
...
paddle/fluid/operators/conv_mkldnn_op.cc
浏览文件 @
049c9c7d
...
@@ -300,10 +300,10 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
...
@@ -300,10 +300,10 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
std
::
vector
<
int
>
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
dilations
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dilations"
);
std
::
vector
<
int
>
dilations
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dilations"
);
bool
fuse_relu
=
ctx
.
Attr
<
bool
>
(
"fuse_relu"
);
bool
fuse_relu
=
ctx
.
Attr
<
bool
>
(
"fuse_relu"
);
bool
fuse_
eltwise
=
ctx
.
Attr
<
bool
>
(
"fuse_eltwise
"
);
bool
fuse_
residual_conn
=
ctx
.
Attr
<
bool
>
(
"fuse_residual_connection
"
);
int
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
int
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
// TODO: add support for dilation
// TODO
(tpatejko)
: add support for dilation
PADDLE_ENFORCE
(
PADDLE_ENFORCE
(
dilations
.
size
()
==
2
&&
dilations
[
0
]
==
1
&&
dilations
[
1
]
==
1
,
dilations
.
size
()
==
2
&&
dilations
[
0
]
==
1
&&
dilations
[
1
]
==
1
,
"dilation in convolution is not implemented yet"
);
"dilation in convolution is not implemented yet"
);
...
@@ -369,11 +369,11 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
...
@@ -369,11 +369,11 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
bias_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
memory
::
format
::
x
);
bias_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
memory
::
format
::
x
);
conv_pd
=
ConvFwdPrimitiveDesc
(
src_md
,
weights_md
,
bias_md
,
dst_md
,
conv_pd
=
ConvFwdPrimitiveDesc
(
src_md
,
weights_md
,
bias_md
,
dst_md
,
strides
,
paddings
,
mkldnn_engine
,
strides
,
paddings
,
mkldnn_engine
,
fuse_relu
,
fuse_
eltwise
);
fuse_relu
,
fuse_
residual_conn
);
}
else
{
}
else
{
conv_pd
=
conv_pd
=
ConvFwdPrimitiveDesc
(
src_md
,
weights_md
,
dst_md
,
strides
,
paddings
,
ConvFwdPrimitiveDesc
(
src_md
,
weights_md
,
dst_md
,
strides
,
paddings
,
mkldnn_engine
,
fuse_relu
,
fuse_
eltwise
);
mkldnn_engine
,
fuse_relu
,
fuse_
residual_conn
);
}
}
// Save conv_pd/src_memory/weights_memory for backward pass
// Save conv_pd/src_memory/weights_memory for backward pass
dev_ctx
.
SetBlob
(
key_conv_pd
,
conv_pd
);
dev_ctx
.
SetBlob
(
key_conv_pd
,
conv_pd
);
...
@@ -386,8 +386,26 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
...
@@ -386,8 +386,26 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto
user_weights_memory_p
=
handler
.
AcquireWeightsMemory
(
auto
user_weights_memory_p
=
handler
.
AcquireWeightsMemory
(
user_weights_md
,
to_void_cast
<
T
>
(
filter_data
));
user_weights_md
,
to_void_cast
<
T
>
(
filter_data
));
T
*
output_data
=
T
*
output_data
=
nullptr
;
if
(
fuse_residual_conn
)
{
auto
residual_param
=
ctx
.
Input
<
Tensor
>
(
"ResidualData"
);
auto
residual_param_data
=
residual_param
->
data
<
T
>
();
PADDLE_ENFORCE
(
residual_param_data
!=
nullptr
,
"Provide data if you want MKLDNN conv+elementwise_add fusion"
);
PADDLE_ENFORCE_EQ
(
output
->
dims
(),
residual_param
->
dims
(),
"Output and elementwise parameter need to have the "
"same dimension sizes"
);
output
->
ShareDataWith
(
*
residual_param
);
output_data
=
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
}
else
{
output_data
=
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
(),
handler
.
GetDstMemorySize
());
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
(),
handler
.
GetDstMemorySize
());
}
// create reorder primitive if the input format is not the preferred one
// create reorder primitive if the input format is not the preferred one
auto
src_memory_p
=
auto
src_memory_p
=
handler
.
AcquireSrcMemoryFromPrimitive
(
user_src_memory_p
,
pipeline
);
handler
.
AcquireSrcMemoryFromPrimitive
(
user_src_memory_p
,
pipeline
);
...
@@ -424,14 +442,15 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
...
@@ -424,14 +442,15 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
private:
private:
mkldnn
::
primitive_attr
CreatePostOps
(
bool
fuse_relu
,
mkldnn
::
primitive_attr
CreatePostOps
(
bool
fuse_relu
,
bool
fuse_
eltwise
)
const
{
bool
fuse_
residual_conn
)
const
{
mkldnn
::
primitive_attr
conv_attr
;
mkldnn
::
primitive_attr
conv_attr
;
mkldnn
::
post_ops
post_operations
;
mkldnn
::
post_ops
post_operations
;
// Fusion with Elementwise layer relies on adding a sum post-operation with
// Fusion with Elementwise layer relies on adding a sum post-operation with
// the scale parameter. It is assumed that when fuse_eltwise is true, the
// the scale parameter. It is assumed that when fuse_residual_connection is
// Output tensor contains the data coming from residual connection. The
// true, the output tensor contains the data coming from residual
// result of this post_op is: Output = scale * Output + Conv_Out.
// connection. The result of this post_op is:
if
(
fuse_eltwise
)
{
// Output = scale * Output + Conv_Out.
if
(
fuse_residual_conn
)
{
post_operations
.
append_sum
(
1.0
f
);
post_operations
.
append_sum
(
1.0
f
);
}
}
// Fusion with ReLU layer is executed through the PostOps feature. Create a
// Fusion with ReLU layer is executed through the PostOps feature. Create a
...
@@ -452,7 +471,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
...
@@ -452,7 +471,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
const
memory
::
desc
&
dst
,
const
std
::
vector
<
int
>&
strides
,
const
memory
::
desc
&
dst
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
paddings
,
const
mkldnn
::
engine
&
engine
,
const
bool
fuse_relu
,
const
mkldnn
::
engine
&
engine
,
const
bool
fuse_relu
,
const
bool
fuse_
eltwise
)
const
{
const
bool
fuse_
residual_conn
)
const
{
memory
::
dims
stride_dims
=
{
strides
[
0
],
strides
[
1
]};
memory
::
dims
stride_dims
=
{
strides
[
0
],
strides
[
1
]};
memory
::
dims
padding_dims
=
{
paddings
[
0
],
paddings
[
1
]};
memory
::
dims
padding_dims
=
{
paddings
[
0
],
paddings
[
1
]};
...
@@ -461,7 +480,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
...
@@ -461,7 +480,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
dst
,
stride_dims
,
padding_dims
,
padding_dims
,
dst
,
stride_dims
,
padding_dims
,
padding_dims
,
mkldnn
::
padding_kind
::
zero
);
mkldnn
::
padding_kind
::
zero
);
mkldnn
::
primitive_attr
conv_attr
=
CreatePostOps
(
fuse_relu
,
fuse_eltwise
);
mkldnn
::
primitive_attr
conv_attr
=
CreatePostOps
(
fuse_relu
,
fuse_residual_conn
);
auto
p_conv_pd
=
new
mkldnn
::
convolution_forward
::
primitive_desc
(
auto
p_conv_pd
=
new
mkldnn
::
convolution_forward
::
primitive_desc
(
conv_desc
,
conv_attr
,
engine
);
conv_desc
,
conv_attr
,
engine
);
...
@@ -476,7 +496,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
...
@@ -476,7 +496,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
paddings
,
const
mkldnn
::
engine
&
engine
,
const
bool
fuse_relu
,
const
mkldnn
::
engine
&
engine
,
const
bool
fuse_relu
,
const
bool
fuse_
eltwise
)
const
{
const
bool
fuse_
residual_conn
)
const
{
memory
::
dims
stride_dims
=
{
strides
[
0
],
strides
[
1
]};
memory
::
dims
stride_dims
=
{
strides
[
0
],
strides
[
1
]};
memory
::
dims
padding_dims
=
{
paddings
[
0
],
paddings
[
1
]};
memory
::
dims
padding_dims
=
{
paddings
[
0
],
paddings
[
1
]};
...
@@ -485,7 +505,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
...
@@ -485,7 +505,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
bias
,
dst
,
stride_dims
,
padding_dims
,
padding_dims
,
bias
,
dst
,
stride_dims
,
padding_dims
,
padding_dims
,
mkldnn
::
padding_kind
::
zero
);
mkldnn
::
padding_kind
::
zero
);
mkldnn
::
primitive_attr
conv_attr
=
CreatePostOps
(
fuse_relu
,
fuse_eltwise
);
mkldnn
::
primitive_attr
conv_attr
=
CreatePostOps
(
fuse_relu
,
fuse_residual_conn
);
auto
p_conv_pd
=
new
mkldnn
::
convolution_forward
::
primitive_desc
(
auto
p_conv_pd
=
new
mkldnn
::
convolution_forward
::
primitive_desc
(
conv_desc
,
conv_attr
,
engine
);
conv_desc
,
conv_attr
,
engine
);
...
...
paddle/fluid/operators/conv_op.cc
浏览文件 @
049c9c7d
...
@@ -132,6 +132,11 @@ void Conv2DOpMaker::Make() {
...
@@ -132,6 +132,11 @@ void Conv2DOpMaker::Make() {
"(Tensor) The output tensor of convolution operator. "
"(Tensor) The output tensor of convolution operator. "
"The format of output tensor is also NCHW."
)
"The format of output tensor is also NCHW."
)
.
Reuse
(
"Input"
);
.
Reuse
(
"Input"
);
AddInput
(
"ResidualData"
,
"(Tensor) Tensor with residual data "
"to which convolution output will be added."
"Used with fuse_residual_connection fusion."
)
.
AsDispensable
();
AddAttr
<
std
::
vector
<
int
>>
(
"strides"
,
AddAttr
<
std
::
vector
<
int
>>
(
"strides"
,
"(vector<int> default:{1, 1}), the "
"(vector<int> default:{1, 1}), the "
"strides(h_stride, w_stride) of "
"strides(h_stride, w_stride) of "
...
@@ -164,10 +169,10 @@ void Conv2DOpMaker::Make() {
...
@@ -164,10 +169,10 @@ void Conv2DOpMaker::Make() {
.
SetDefault
(
false
);
.
SetDefault
(
false
);
AddAttr
<
bool
>
(
"fuse_relu"
,
"(bool, default false) Only used in mkldnn kernel"
)
AddAttr
<
bool
>
(
"fuse_relu"
,
"(bool, default false) Only used in mkldnn kernel"
)
.
SetDefault
(
false
);
.
SetDefault
(
false
);
AddAttr
<
bool
>
(
"fuse_
eltwise
"
,
AddAttr
<
bool
>
(
"fuse_
residual_connection
"
,
"(bool, default false) Only used in mkldnn kernel. Used "
"(bool, default false) Only used in mkldnn kernel. Used "
"whenever convolution output is
connected via skip connection
"
"whenever convolution output is
as an input to residual
"
"
to a previous layer
."
)
"
connection
."
)
.
SetDefault
(
false
);
.
SetDefault
(
false
);
AddAttr
<
std
::
string
>
(
AddAttr
<
std
::
string
>
(
"data_format"
,
"data_format"
,
...
...
paddle/fluid/operators/detection/CMakeLists.txt
浏览文件 @
049c9c7d
...
@@ -20,7 +20,7 @@ detection_library(box_coder_op SRCS box_coder_op.cc box_coder_op.cu)
...
@@ -20,7 +20,7 @@ detection_library(box_coder_op SRCS box_coder_op.cc box_coder_op.cu)
detection_library
(
iou_similarity_op SRCS iou_similarity_op.cc
detection_library
(
iou_similarity_op SRCS iou_similarity_op.cc
iou_similarity_op.cu
)
iou_similarity_op.cu
)
detection_library
(
mine_hard_examples_op SRCS mine_hard_examples_op.cc
)
detection_library
(
mine_hard_examples_op SRCS mine_hard_examples_op.cc
)
detection_library
(
multiclass_nms_op SRCS multiclass_nms_op.cc
)
detection_library
(
multiclass_nms_op SRCS multiclass_nms_op.cc
poly_util.cc gpc.cc
)
detection_library
(
prior_box_op SRCS prior_box_op.cc prior_box_op.cu
)
detection_library
(
prior_box_op SRCS prior_box_op.cc prior_box_op.cu
)
detection_library
(
anchor_generator_op SRCS anchor_generator_op.cc
detection_library
(
anchor_generator_op SRCS anchor_generator_op.cc
anchor_generator_op.cu
)
anchor_generator_op.cu
)
...
...
paddle/fluid/operators/detection/generate_proposal_labels_op.cc
浏览文件 @
049c9c7d
...
@@ -16,7 +16,7 @@ limitations under the License. */
...
@@ -16,7 +16,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/detection/bbox_util.h"
#include "paddle/fluid/operators/detection/bbox_util.h"
#include "paddle/fluid/operators/gather.h"
#include "paddle/fluid/operators/gather.h"
#include "paddle/fluid/operators/math/concat.h"
#include "paddle/fluid/operators/math/concat
_and_split
.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/math_function.h"
namespace
paddle
{
namespace
paddle
{
...
...
paddle/fluid/operators/detection/generate_proposals_op.cc
浏览文件 @
049c9c7d
...
@@ -12,10 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,10 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include <cmath>
#include <cstring>
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/
framework/var_type
.h"
#include "paddle/fluid/
operators/detail/safe_ref
.h"
#include "paddle/fluid/operators/gather.h"
#include "paddle/fluid/operators/gather.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/math_function.h"
...
@@ -25,21 +27,17 @@ namespace operators {
...
@@ -25,21 +27,17 @@ namespace operators {
using
Tensor
=
framework
::
Tensor
;
using
Tensor
=
framework
::
Tensor
;
using
LoDTensor
=
framework
::
LoDTensor
;
using
LoDTensor
=
framework
::
LoDTensor
;
struct
AppendProposalsFunctor
{
static
const
double
kBBoxClipDefault
=
std
::
log
(
1000.0
/
16.0
);
LoDTensor
*
out_
;
int64_t
offset_
;
Tensor
*
to_add_
;
AppendProposalsFunctor
(
LoDTensor
*
out
,
int64_t
offset
,
Tensor
*
to_add
)
static
void
AppendProposals
(
Tensor
*
dst
,
int64_t
offset
,
const
Tensor
&
src
)
{
:
out_
(
out
),
offset_
(
offset
),
to_add_
(
to_add
)
{}
auto
*
out_data
=
dst
->
data
<
void
>
();
auto
*
to_add_data
=
src
.
data
<
void
>
();
template
<
typename
T
>
size_t
size_of_t
=
framework
::
SizeOfType
(
src
.
type
());
void
apply
()
const
{
offset
*=
size_of_t
;
auto
*
out_data
=
out_
->
data
<
T
>
();
std
::
memcpy
(
auto
*
to_add_data
=
to_add_
->
data
<
T
>
();
reinterpret_cast
<
void
*>
(
reinterpret_cast
<
uintptr_t
>
(
out_data
)
+
offset
),
memcpy
(
out_data
+
offset_
,
to_add_data
,
to_add_
->
numel
()
*
sizeof
(
T
));
to_add_data
,
src
.
numel
()
*
size_of_t
);
}
}
};
class
GenerateProposalsOp
:
public
framework
::
OperatorWithKernel
{
class
GenerateProposalsOp
:
public
framework
::
OperatorWithKernel
{
public:
public:
...
@@ -75,8 +73,9 @@ class GenerateProposalsOp : public framework::OperatorWithKernel {
...
@@ -75,8 +73,9 @@ class GenerateProposalsOp : public framework::OperatorWithKernel {
};
};
template
<
class
T
>
template
<
class
T
>
void
BoxCoder
(
const
platform
::
DeviceContext
&
ctx
,
Tensor
*
all_anchors
,
static
inline
void
BoxCoder
(
const
platform
::
DeviceContext
&
ctx
,
Tensor
*
bbox_deltas
,
Tensor
*
variances
,
Tensor
*
proposals
)
{
Tensor
*
all_anchors
,
Tensor
*
bbox_deltas
,
Tensor
*
variances
,
Tensor
*
proposals
)
{
T
*
proposals_data
=
proposals
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
proposals_data
=
proposals
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
int64_t
row
=
all_anchors
->
dims
()[
0
];
int64_t
row
=
all_anchors
->
dims
()[
0
];
...
@@ -108,11 +107,11 @@ void BoxCoder(const platform::DeviceContext &ctx, Tensor *all_anchors,
...
@@ -108,11 +107,11 @@ void BoxCoder(const platform::DeviceContext &ctx, Tensor *all_anchors,
anchor_center_y
;
anchor_center_y
;
bbox_width
=
std
::
exp
(
std
::
min
<
T
>
(
variances_data
[
i
*
len
+
2
]
*
bbox_width
=
std
::
exp
(
std
::
min
<
T
>
(
variances_data
[
i
*
len
+
2
]
*
bbox_deltas_data
[
i
*
len
+
2
],
bbox_deltas_data
[
i
*
len
+
2
],
std
::
log
(
1000.0
/
16.0
)
))
*
kBBoxClipDefault
))
*
anchor_width
;
anchor_width
;
bbox_height
=
std
::
exp
(
std
::
min
<
T
>
(
variances_data
[
i
*
len
+
3
]
*
bbox_height
=
std
::
exp
(
std
::
min
<
T
>
(
variances_data
[
i
*
len
+
3
]
*
bbox_deltas_data
[
i
*
len
+
3
],
bbox_deltas_data
[
i
*
len
+
3
],
std
::
log
(
1000.0
/
16.0
)
))
*
kBBoxClipDefault
))
*
anchor_height
;
anchor_height
;
}
else
{
}
else
{
bbox_center_x
=
bbox_center_x
=
...
@@ -120,10 +119,10 @@ void BoxCoder(const platform::DeviceContext &ctx, Tensor *all_anchors,
...
@@ -120,10 +119,10 @@ void BoxCoder(const platform::DeviceContext &ctx, Tensor *all_anchors,
bbox_center_y
=
bbox_center_y
=
bbox_deltas_data
[
i
*
len
+
1
]
*
anchor_height
+
anchor_center_y
;
bbox_deltas_data
[
i
*
len
+
1
]
*
anchor_height
+
anchor_center_y
;
bbox_width
=
std
::
exp
(
std
::
min
<
T
>
(
bbox_deltas_data
[
i
*
len
+
2
],
bbox_width
=
std
::
exp
(
std
::
min
<
T
>
(
bbox_deltas_data
[
i
*
len
+
2
],
std
::
log
(
1000.0
/
16.0
)
))
*
kBBoxClipDefault
))
*
anchor_width
;
anchor_width
;
bbox_height
=
std
::
exp
(
std
::
min
<
T
>
(
bbox_deltas_data
[
i
*
len
+
3
],
bbox_height
=
std
::
exp
(
std
::
min
<
T
>
(
bbox_deltas_data
[
i
*
len
+
3
],
std
::
log
(
1000.0
/
16.0
)
))
*
kBBoxClipDefault
))
*
anchor_height
;
anchor_height
;
}
}
...
@@ -136,30 +135,32 @@ void BoxCoder(const platform::DeviceContext &ctx, Tensor *all_anchors,
...
@@ -136,30 +135,32 @@ void BoxCoder(const platform::DeviceContext &ctx, Tensor *all_anchors,
}
}
template
<
class
T
>
template
<
class
T
>
void
ClipTiledBoxes
(
const
platform
::
DeviceContext
&
ctx
,
const
Tensor
&
im_info
,
static
inline
void
ClipTiledBoxes
(
const
platform
::
DeviceContext
&
ctx
,
Tensor
*
boxes
)
{
const
Tensor
&
im_info
,
Tensor
*
boxes
)
{
T
*
boxes_data
=
boxes
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
boxes_data
=
boxes
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
const
T
*
im_info_data
=
im_info
.
data
<
T
>
();
const
T
*
im_info_data
=
im_info
.
data
<
T
>
();
T
zero
(
0
);
for
(
int64_t
i
=
0
;
i
<
boxes
->
numel
();
++
i
)
{
for
(
int64_t
i
=
0
;
i
<
boxes
->
numel
();
++
i
)
{
if
(
i
%
4
==
0
)
{
if
(
i
%
4
==
0
)
{
boxes_data
[
i
]
=
boxes_data
[
i
]
=
std
::
max
(
std
::
min
(
boxes_data
[
i
],
im_info_data
[
1
]
-
1
),
0.0
f
);
std
::
max
(
std
::
min
(
boxes_data
[
i
],
im_info_data
[
1
]
-
1
),
zero
);
}
else
if
(
i
%
4
==
1
)
{
}
else
if
(
i
%
4
==
1
)
{
boxes_data
[
i
]
=
boxes_data
[
i
]
=
std
::
max
(
std
::
min
(
boxes_data
[
i
],
im_info_data
[
0
]
-
1
),
0.0
f
);
std
::
max
(
std
::
min
(
boxes_data
[
i
],
im_info_data
[
0
]
-
1
),
zero
);
}
else
if
(
i
%
4
==
2
)
{
}
else
if
(
i
%
4
==
2
)
{
boxes_data
[
i
]
=
boxes_data
[
i
]
=
std
::
max
(
std
::
min
(
boxes_data
[
i
],
im_info_data
[
1
]
-
1
),
0.0
f
);
std
::
max
(
std
::
min
(
boxes_data
[
i
],
im_info_data
[
1
]
-
1
),
zero
);
}
else
{
}
else
{
boxes_data
[
i
]
=
boxes_data
[
i
]
=
std
::
max
(
std
::
min
(
boxes_data
[
i
],
im_info_data
[
0
]
-
1
),
0.0
f
);
std
::
max
(
std
::
min
(
boxes_data
[
i
],
im_info_data
[
0
]
-
1
),
zero
);
}
}
}
}
}
}
template
<
class
T
>
template
<
class
T
>
void
FilterBoxes
(
const
platform
::
DeviceContext
&
ctx
,
Tensor
*
boxes
,
static
inline
void
FilterBoxes
(
const
platform
::
DeviceContext
&
ctx
,
float
min_size
,
const
Tensor
&
im_info
,
Tensor
*
keep
)
{
Tensor
*
boxes
,
float
min_size
,
const
Tensor
&
im_info
,
Tensor
*
keep
)
{
const
T
*
im_info_data
=
im_info
.
data
<
T
>
();
const
T
*
im_info_data
=
im_info
.
data
<
T
>
();
T
*
boxes_data
=
boxes
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
boxes_data
=
boxes
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
im_scale
=
im_info_data
[
2
];
T
im_scale
=
im_info_data
[
2
];
...
@@ -185,24 +186,24 @@ void FilterBoxes(const platform::DeviceContext &ctx, Tensor *boxes,
...
@@ -185,24 +186,24 @@ void FilterBoxes(const platform::DeviceContext &ctx, Tensor *boxes,
keep
->
Resize
({
keep_len
});
keep
->
Resize
({
keep_len
});
}
}
bool
SortScorePairDescend
(
const
std
::
pair
<
float
,
int
>
&
pair1
,
const
std
::
pair
<
float
,
int
>
&
pair2
)
{
return
pair1
.
first
>
pair2
.
first
;
}
template
<
class
T
>
template
<
class
T
>
void
GetMaxScoreIndex
(
const
std
::
vector
<
T
>
&
scores
,
static
inline
std
::
vector
<
std
::
pair
<
T
,
int
>>
GetSortedScoreIndex
(
std
::
vector
<
std
::
pair
<
T
,
int
>>
*
sorted_indices
)
{
const
std
::
vector
<
T
>
&
scores
)
{
std
::
vector
<
std
::
pair
<
T
,
int
>>
sorted_indices
;
sorted_indices
.
reserve
(
scores
.
size
());
for
(
size_t
i
=
0
;
i
<
scores
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
scores
.
size
();
++
i
)
{
sorted_indices
->
push_back
(
std
::
make_pair
(
scores
[
i
],
i
)
);
sorted_indices
.
emplace_back
(
scores
[
i
],
i
);
}
}
// Sort the score pair according to the scores in descending order
// Sort the score pair according to the scores in descending order
std
::
stable_sort
(
sorted_indices
->
begin
(),
sorted_indices
->
end
(),
std
::
stable_sort
(
sorted_indices
.
begin
(),
sorted_indices
.
end
(),
SortScorePairDescend
);
[](
const
std
::
pair
<
T
,
int
>
&
a
,
const
std
::
pair
<
T
,
int
>
&
b
)
{
return
a
.
first
<
b
.
first
;
});
return
sorted_indices
;
}
}
template
<
class
T
>
template
<
class
T
>
T
BBoxArea
(
const
T
*
box
,
const
bool
normalized
)
{
static
inline
T
BBoxArea
(
const
T
*
box
,
bool
normalized
)
{
if
(
box
[
2
]
<
box
[
0
]
||
box
[
3
]
<
box
[
1
])
{
if
(
box
[
2
]
<
box
[
0
]
||
box
[
3
]
<
box
[
1
])
{
// If coordinate values are is invalid
// If coordinate values are is invalid
// (e.g. xmax < xmin or ymax < ymin), return 0.
// (e.g. xmax < xmin or ymax < ymin), return 0.
...
@@ -220,7 +221,7 @@ T BBoxArea(const T *box, const bool normalized) {
...
@@ -220,7 +221,7 @@ T BBoxArea(const T *box, const bool normalized) {
}
}
template
<
class
T
>
template
<
class
T
>
T
JaccardOverlap
(
const
T
*
box1
,
const
T
*
box2
,
const
bool
normalized
)
{
static
inline
T
JaccardOverlap
(
const
T
*
box1
,
const
T
*
box2
,
bool
normalized
)
{
if
(
box2
[
0
]
>
box1
[
2
]
||
box2
[
2
]
<
box1
[
0
]
||
box2
[
1
]
>
box1
[
3
]
||
if
(
box2
[
0
]
>
box1
[
2
]
||
box2
[
2
]
<
box1
[
0
]
||
box2
[
1
]
>
box1
[
3
]
||
box2
[
3
]
<
box1
[
1
])
{
box2
[
3
]
<
box1
[
1
])
{
return
static_cast
<
T
>
(
0.
);
return
static_cast
<
T
>
(
0.
);
...
@@ -229,8 +230,8 @@ T JaccardOverlap(const T *box1, const T *box2, const bool normalized) {
...
@@ -229,8 +230,8 @@ T JaccardOverlap(const T *box1, const T *box2, const bool normalized) {
const
T
inter_ymin
=
std
::
max
(
box1
[
1
],
box2
[
1
]);
const
T
inter_ymin
=
std
::
max
(
box1
[
1
],
box2
[
1
]);
const
T
inter_xmax
=
std
::
min
(
box1
[
2
],
box2
[
2
]);
const
T
inter_xmax
=
std
::
min
(
box1
[
2
],
box2
[
2
]);
const
T
inter_ymax
=
std
::
min
(
box1
[
3
],
box2
[
3
]);
const
T
inter_ymax
=
std
::
min
(
box1
[
3
],
box2
[
3
]);
const
T
inter_w
=
std
::
max
(
0.0
f
,
inter_xmax
-
inter_xmin
+
1
);
const
T
inter_w
=
std
::
max
(
T
(
0
)
,
inter_xmax
-
inter_xmin
+
1
);
const
T
inter_h
=
std
::
max
(
0.0
f
,
inter_ymax
-
inter_ymin
+
1
);
const
T
inter_h
=
std
::
max
(
T
(
0
)
,
inter_ymax
-
inter_ymin
+
1
);
const
T
inter_area
=
inter_w
*
inter_h
;
const
T
inter_area
=
inter_w
*
inter_h
;
const
T
bbox1_area
=
BBoxArea
<
T
>
(
box1
,
normalized
);
const
T
bbox1_area
=
BBoxArea
<
T
>
(
box1
,
normalized
);
const
T
bbox2_area
=
BBoxArea
<
T
>
(
box2
,
normalized
);
const
T
bbox2_area
=
BBoxArea
<
T
>
(
box2
,
normalized
);
...
@@ -238,9 +239,21 @@ T JaccardOverlap(const T *box1, const T *box2, const bool normalized) {
...
@@ -238,9 +239,21 @@ T JaccardOverlap(const T *box1, const T *box2, const bool normalized) {
}
}
}
}
template
<
typename
T
>
static
inline
Tensor
VectorToTensor
(
const
std
::
vector
<
T
>
&
selected_indices
,
int
selected_num
)
{
Tensor
keep_nms
;
keep_nms
.
Resize
({
selected_num
});
auto
*
keep_data
=
keep_nms
.
mutable_data
<
T
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
selected_num
;
++
i
)
{
keep_data
[
i
]
=
selected_indices
[
i
];
}
return
keep_nms
;
}
template
<
class
T
>
template
<
class
T
>
Tensor
NMS
(
const
platform
::
DeviceContext
&
ctx
,
Tensor
*
bbox
,
Tensor
*
scores
,
static
inline
Tensor
NMS
(
const
platform
::
DeviceContext
&
ctx
,
Tensor
*
bbox
,
const
T
nms_threshold
,
const
float
eta
)
{
Tensor
*
scores
,
T
nms_threshold
,
float
eta
)
{
PADDLE_ENFORCE_NOT_NULL
(
bbox
);
PADDLE_ENFORCE_NOT_NULL
(
bbox
);
int64_t
num_boxes
=
bbox
->
dims
()[
0
];
int64_t
num_boxes
=
bbox
->
dims
()[
0
];
// 4: [xmin ymin xmax ymax]
// 4: [xmin ymin xmax ymax]
...
@@ -248,20 +261,18 @@ Tensor NMS(const platform::DeviceContext &ctx, Tensor *bbox, Tensor *scores,
...
@@ -248,20 +261,18 @@ Tensor NMS(const platform::DeviceContext &ctx, Tensor *bbox, Tensor *scores,
std
::
vector
<
T
>
scores_data
(
num_boxes
);
std
::
vector
<
T
>
scores_data
(
num_boxes
);
std
::
copy_n
(
scores
->
data
<
T
>
(),
num_boxes
,
scores_data
.
begin
());
std
::
copy_n
(
scores
->
data
<
T
>
(),
num_boxes
,
scores_data
.
begin
());
std
::
vector
<
std
::
pair
<
T
,
int
>>
sorted_indices
;
std
::
vector
<
std
::
pair
<
T
,
int
>>
sorted_indices
=
GetMaxScoreIndex
<
T
>
(
scores_data
,
&
sorted_indices
);
GetSortedScoreIndex
<
T
>
(
scores_data
);
std
::
vector
<
int
>
selected_indices
;
std
::
vector
<
int
>
selected_indices
;
int
selected_num
=
0
;
int
selected_num
=
0
;
T
adaptive_threshold
=
nms_threshold
;
T
adaptive_threshold
=
nms_threshold
;
const
T
*
bbox_data
=
bbox
->
data
<
T
>
();
const
T
*
bbox_data
=
bbox
->
data
<
T
>
();
bool
flag
;
while
(
sorted_indices
.
size
()
!=
0
)
{
while
(
sorted_indices
.
size
()
!=
0
)
{
int
idx
=
sorted_indices
.
front
().
second
;
int
idx
=
sorted_indices
.
back
().
second
;
flag
=
true
;
bool
flag
=
true
;
for
(
size_t
k
=
0
;
k
<
selected_indices
.
size
();
++
k
)
{
for
(
int
kept_idx
:
selected_indices
)
{
if
(
flag
)
{
if
(
flag
)
{
const
int
kept_idx
=
selected_indices
[
k
];
T
overlap
=
JaccardOverlap
<
T
>
(
bbox_data
+
idx
*
box_size
,
T
overlap
=
JaccardOverlap
<
T
>
(
bbox_data
+
idx
*
box_size
,
bbox_data
+
kept_idx
*
box_size
,
false
);
bbox_data
+
kept_idx
*
box_size
,
false
);
flag
=
(
overlap
<=
adaptive_threshold
);
flag
=
(
overlap
<=
adaptive_threshold
);
...
@@ -271,32 +282,29 @@ Tensor NMS(const platform::DeviceContext &ctx, Tensor *bbox, Tensor *scores,
...
@@ -271,32 +282,29 @@ Tensor NMS(const platform::DeviceContext &ctx, Tensor *bbox, Tensor *scores,
}
}
if
(
flag
)
{
if
(
flag
)
{
selected_indices
.
push_back
(
idx
);
selected_indices
.
push_back
(
idx
);
selected_num
++
;
++
selected_num
;
}
}
sorted_indices
.
erase
(
sorted_indices
.
begin
());
sorted_indices
.
erase
(
sorted_indices
.
end
());
if
(
flag
&&
eta
<
1
&&
adaptive_threshold
>
0.5
)
{
if
(
flag
&&
eta
<
1
&&
adaptive_threshold
>
0.5
)
{
adaptive_threshold
*=
eta
;
adaptive_threshold
*=
eta
;
}
}
}
}
Tensor
keep_nms
;
return
VectorToTensor
(
selected_indices
,
selected_num
);
keep_nms
.
Resize
({
selected_num
});
int
*
keep_data
=
keep_nms
.
mutable_data
<
int
>
(
ctx
.
GetPlace
());
for
(
int
i
=
0
;
i
<
selected_num
;
++
i
)
{
keep_data
[
i
]
=
selected_indices
[
i
];
}
return
keep_nms
;
}
}
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
T
>
class
GenerateProposalsKernel
:
public
framework
::
OpKernel
<
T
>
{
class
GenerateProposalsKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
scores
=
context
.
Input
<
Tensor
>
(
"Scores"
);
auto
*
scores
=
context
.
Input
<
Tensor
>
(
"Scores"
);
auto
*
bbox_deltas
=
context
.
Input
<
Tensor
>
(
"BboxDeltas"
);
auto
*
bbox_deltas
=
context
.
Input
<
Tensor
>
(
"BboxDeltas"
);
auto
*
im_info
=
context
.
Input
<
Tensor
>
(
"ImInfo"
);
auto
*
im_info
=
context
.
Input
<
Tensor
>
(
"ImInfo"
);
auto
*
anchors
=
context
.
Input
<
Tensor
>
(
"Anchors"
);
auto
anchors
=
detail
::
Ref
(
context
.
Input
<
Tensor
>
(
"Anchors"
),
auto
*
variances
=
context
.
Input
<
Tensor
>
(
"Variances"
);
"Cannot find input Anchors(%s) in scope"
,
context
.
Inputs
(
"Anchors"
)[
0
]);
auto
variances
=
detail
::
Ref
(
context
.
Input
<
Tensor
>
(
"Variances"
),
"Cannot find input Variances(%s) in scope"
,
context
.
Inputs
(
"Variances"
)[
0
]);
auto
*
rpn_rois
=
context
.
Output
<
LoDTensor
>
(
"RpnRois"
);
auto
*
rpn_rois
=
context
.
Output
<
LoDTensor
>
(
"RpnRois"
);
auto
*
rpn_roi_probs
=
context
.
Output
<
LoDTensor
>
(
"RpnRoiProbs"
);
auto
*
rpn_roi_probs
=
context
.
Output
<
LoDTensor
>
(
"RpnRoiProbs"
);
...
@@ -307,15 +315,16 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
...
@@ -307,15 +315,16 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
float
min_size
=
context
.
Attr
<
float
>
(
"min_size"
);
float
min_size
=
context
.
Attr
<
float
>
(
"min_size"
);
float
eta
=
context
.
Attr
<
float
>
(
"eta"
);
float
eta
=
context
.
Attr
<
float
>
(
"eta"
);
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
auto
&
dev_ctx
=
context
.
template
device_context
<
platform
::
CPUDeviceContext
>();
auto
scores_dim
=
scores
->
dims
();
auto
&
scores_dim
=
scores
->
dims
();
int64_t
num
=
scores_dim
[
0
];
int64_t
num
=
scores_dim
[
0
];
int64_t
c_score
=
scores_dim
[
1
];
int64_t
c_score
=
scores_dim
[
1
];
int64_t
h_score
=
scores_dim
[
2
];
int64_t
h_score
=
scores_dim
[
2
];
int64_t
w_score
=
scores_dim
[
3
];
int64_t
w_score
=
scores_dim
[
3
];
auto
bbox_dim
=
bbox_deltas
->
dims
();
auto
&
bbox_dim
=
bbox_deltas
->
dims
();
int64_t
c_bbox
=
bbox_dim
[
1
];
int64_t
c_bbox
=
bbox_dim
[
1
];
int64_t
h_bbox
=
bbox_dim
[
2
];
int64_t
h_bbox
=
bbox_dim
[
2
];
int64_t
w_bbox
=
bbox_dim
[
3
];
int64_t
w_bbox
=
bbox_dim
[
3
];
...
@@ -330,17 +339,17 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
...
@@ -330,17 +339,17 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
scores_swap
.
mutable_data
<
T
>
({
num
,
h_score
,
w_score
,
c_score
},
scores_swap
.
mutable_data
<
T
>
({
num
,
h_score
,
w_score
,
c_score
},
dev_ctx
.
GetPlace
());
dev_ctx
.
GetPlace
());
math
::
Transpose
<
DeviceContext
,
T
,
4
>
trans
;
math
::
Transpose
<
platform
::
CPU
DeviceContext
,
T
,
4
>
trans
;
std
::
vector
<
int
>
axis
=
{
0
,
2
,
3
,
1
};
std
::
vector
<
int
>
axis
=
{
0
,
2
,
3
,
1
};
trans
(
dev_ctx
,
*
bbox_deltas
,
&
bbox_deltas_swap
,
axis
);
trans
(
dev_ctx
,
*
bbox_deltas
,
&
bbox_deltas_swap
,
axis
);
trans
(
dev_ctx
,
*
scores
,
&
scores_swap
,
axis
);
trans
(
dev_ctx
,
*
scores
,
&
scores_swap
,
axis
);
framework
::
LoD
lod
;
framework
::
LoD
lod
;
std
::
vector
<
size_t
>
lod0
(
1
,
0
);
lod
.
resize
(
1
);
Tensor
*
anchor
=
const_cast
<
framework
::
Tensor
*>
(
anchors
)
;
auto
&
lod0
=
lod
[
0
]
;
anchor
->
Resize
({
anchors
->
numel
()
/
4
,
4
}
);
lod0
.
push_back
(
0
);
Tensor
*
var
=
const_cast
<
framework
::
Tensor
*>
(
variances
);
anchors
.
Resize
({
anchors
.
numel
()
/
4
,
4
}
);
var
->
Resize
({
var
->
numel
()
/
4
,
4
});
var
iances
.
Resize
({
variances
.
numel
()
/
4
,
4
});
int64_t
num_proposals
=
0
;
int64_t
num_proposals
=
0
;
for
(
int64_t
i
=
0
;
i
<
num
;
++
i
)
{
for
(
int64_t
i
=
0
;
i
<
num
;
++
i
)
{
...
@@ -352,24 +361,17 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
...
@@ -352,24 +361,17 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
scores_slice
.
Resize
({
h_score
*
w_score
*
c_score
,
1
});
scores_slice
.
Resize
({
h_score
*
w_score
*
c_score
,
1
});
std
::
pair
<
Tensor
,
Tensor
>
tensor_pair
=
std
::
pair
<
Tensor
,
Tensor
>
tensor_pair
=
ProposalForOneImage
(
dev_ctx
,
im_info_slice
,
*
anchor
,
*
var
,
ProposalForOneImage
(
dev_ctx
,
im_info_slice
,
anchors
,
variances
,
bbox_deltas_slice
,
scores_slice
,
pre_nms_top_n
,
bbox_deltas_slice
,
scores_slice
,
pre_nms_top_n
,
post_nms_top_n
,
nms_thresh
,
min_size
,
eta
);
post_nms_top_n
,
nms_thresh
,
min_size
,
eta
);
Tensor
proposals
=
tensor_pair
.
first
;
Tensor
&
proposals
=
tensor_pair
.
first
;
Tensor
scores
=
tensor_pair
.
second
;
Tensor
&
scores
=
tensor_pair
.
second
;
framework
::
VisitDataType
(
framework
::
ToDataType
(
rpn_rois
->
type
()),
AppendProposalsFunctor
(
rpn_rois
,
4
*
num_proposals
,
&
proposals
));
framework
::
VisitDataType
(
framework
::
ToDataType
(
rpn_roi_probs
->
type
()),
AppendProposalsFunctor
(
rpn_roi_probs
,
num_proposals
,
&
scores
));
AppendProposals
(
rpn_rois
,
4
*
num_proposals
,
proposals
);
AppendProposals
(
rpn_roi_probs
,
num_proposals
,
scores
);
num_proposals
+=
proposals
.
dims
()[
0
];
num_proposals
+=
proposals
.
dims
()[
0
];
lod0
.
emplace
_back
(
num_proposals
);
lod0
.
push
_back
(
num_proposals
);
}
}
lod
.
emplace_back
(
lod0
);
rpn_rois
->
set_lod
(
lod
);
rpn_rois
->
set_lod
(
lod
);
rpn_roi_probs
->
set_lod
(
lod
);
rpn_roi_probs
->
set_lod
(
lod
);
rpn_rois
->
Resize
({
num_proposals
,
4
});
rpn_rois
->
Resize
({
num_proposals
,
4
});
...
@@ -377,7 +379,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
...
@@ -377,7 +379,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
}
}
std
::
pair
<
Tensor
,
Tensor
>
ProposalForOneImage
(
std
::
pair
<
Tensor
,
Tensor
>
ProposalForOneImage
(
const
DeviceContext
&
ctx
,
const
Tensor
&
im_info_slice
,
const
platform
::
CPU
DeviceContext
&
ctx
,
const
Tensor
&
im_info_slice
,
const
Tensor
&
anchors
,
const
Tensor
&
variances
,
const
Tensor
&
anchors
,
const
Tensor
&
variances
,
const
Tensor
&
bbox_deltas_slice
,
// [M, 4]
const
Tensor
&
bbox_deltas_slice
,
// [M, 4]
const
Tensor
&
scores_slice
,
// [N, 1]
const
Tensor
&
scores_slice
,
// [N, 1]
...
@@ -392,8 +394,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
...
@@ -392,8 +394,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
for
(
int
i
=
0
;
i
<
scores_slice
.
numel
();
++
i
)
{
for
(
int
i
=
0
;
i
<
scores_slice
.
numel
();
++
i
)
{
index
[
i
]
=
i
;
index
[
i
]
=
i
;
}
}
std
::
function
<
bool
(
const
int64_t
&
,
const
int64_t
&
)
>
compare
=
auto
compare
=
[
scores_data
](
const
int64_t
&
i
,
const
int64_t
&
j
)
{
[
scores_data
](
const
int64_t
&
i
,
const
int64_t
&
j
)
{
return
scores_data
[
i
]
>
scores_data
[
j
];
return
scores_data
[
i
]
>
scores_data
[
j
];
};
};
...
@@ -452,33 +453,45 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
...
@@ -452,33 +453,45 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
class
GenerateProposalsOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
class
GenerateProposalsOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
public:
void
Make
()
override
{
void
Make
()
override
{
AddInput
(
"Scores"
,
"The scores of anchors should be foreground."
);
AddInput
(
"Scores"
,
AddInput
(
"BboxDeltas"
,
"bbox_deltas."
);
"(Tensor) The scores from conv is in shape (N, A, H, W), "
AddInput
(
"ImInfo"
,
"Information for image reshape."
);
"N is batch size, A is number of anchors, "
AddInput
(
"Anchors"
,
"All anchors."
);
"H and W are height and width of the feature map"
);
AddInput
(
"Variances"
,
" variances"
);
AddInput
(
"BboxDeltas"
,
"(Tensor) Bounding box deltas from conv is in "
AddOutput
(
"RpnRois"
,
"Anchors."
);
"shape (N, 4*A, H, W)."
);
AddOutput
(
"RpnRoiProbs"
,
"Anchors."
);
AddInput
(
"ImInfo"
,
AddAttr
<
int
>
(
"pre_nms_topN"
,
"pre_nms_topN"
);
"(Tensor) Information for image reshape is in shape (N, 3), "
AddAttr
<
int
>
(
"post_nms_topN"
,
"post_nms_topN"
);
"in format (height, width, scale)"
);
AddAttr
<
float
>
(
"nms_thresh"
,
"nms_thres"
);
AddInput
(
"Anchors"
,
AddAttr
<
float
>
(
"min_size"
,
"min size"
);
"(Tensor) Bounding box anchors from anchor_generator_op "
"is in shape (A, H, W, 4)."
);
AddInput
(
"Variances"
,
"(Tensor) Bounding box variances with same shape as `Anchors`."
);
AddOutput
(
"RpnRois"
,
"(LoDTensor), Output proposals with shape (rois_num, 4)."
);
AddOutput
(
"RpnRoiProbs"
,
"(LoDTensor) Scores of proposals with shape (rois_num, 1)."
);
AddAttr
<
int
>
(
"pre_nms_topN"
,
"Number of top scoring RPN proposals to keep before "
"applying NMS."
);
AddAttr
<
int
>
(
"post_nms_topN"
,
"Number of top scoring RPN proposals to keep after "
"applying NMS"
);
AddAttr
<
float
>
(
"nms_thresh"
,
"NMS threshold used on RPN proposals."
);
AddAttr
<
float
>
(
"min_size"
,
"Proposal height and width both need to be greater "
"than this min_size."
);
AddAttr
<
float
>
(
"eta"
,
"The parameter for adaptive NMS."
);
AddAttr
<
float
>
(
"eta"
,
"The parameter for adaptive NMS."
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
Generate Proposals OP
This operator Generate bounding box proposals for Faster RCNN.
The propoasls are generated for a list of images based on image
This operator proposes rois according to each box with their probability to be a foreground object and
score 'Scores', bounding box regression result 'BboxDeltas' as
the box can be calculated by anchors. Bbox_deltais and scores are the output of RPN. Final proposals
well as predefined bounding box shapes 'anchors'. Greedy
could be used to train detection net.
non-maximum suppression is applied to generate the final bounding
boxes.
Scores is the probability for each box to be an object. In format of (N, A, H, W) where N is batch size, A is number
of anchors, H and W are height and width of the feature map.
BboxDeltas is the differece between predicted box locatoin and anchor location. In format of (N, 4*A, H, W)
For generating proposals, this operator transposes and resizes scores and bbox_deltas in size of (H*W*A, 1) and (H*W*A, 4) and
calculate box locations as proposals candidates. Then clip boxes to image and remove predicted boxes with small area.
Finally, apply nms to get final proposals as output.
)DOC"
);
)DOC"
);
}
}
};
};
...
@@ -490,6 +503,5 @@ namespace ops = paddle::operators;
...
@@ -490,6 +503,5 @@ namespace ops = paddle::operators;
REGISTER_OPERATOR
(
generate_proposals
,
ops
::
GenerateProposalsOp
,
REGISTER_OPERATOR
(
generate_proposals
,
ops
::
GenerateProposalsOp
,
ops
::
GenerateProposalsOpMaker
,
ops
::
GenerateProposalsOpMaker
,
paddle
::
framework
::
EmptyGradOpMaker
);
paddle
::
framework
::
EmptyGradOpMaker
);
REGISTER_OP_CPU_KERNEL
(
REGISTER_OP_CPU_KERNEL
(
generate_proposals
,
ops
::
GenerateProposalsKernel
<
float
>
,
generate_proposals
,
ops
::
GenerateProposalsKernel
<
double
>
);
ops
::
GenerateProposalsKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
paddle/fluid/operators/detection/generate_proposals_op.cu
浏览文件 @
049c9c7d
...
@@ -16,10 +16,13 @@ limitations under the License. */
...
@@ -16,10 +16,13 @@ limitations under the License. */
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "cub/cub.cuh"
#include "cub/cub.cuh"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/memory.h"
#include "paddle/fluid/memory/memory.h"
#include "paddle/fluid/operators/detail/safe_ref.h"
#include "paddle/fluid/operators/gather.cu.h"
#include "paddle/fluid/operators/gather.cu.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/for_range.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -36,36 +39,38 @@ namespace {
...
@@ -36,36 +39,38 @@ namespace {
int
const
kThreadsPerBlock
=
sizeof
(
uint64_t
)
*
8
;
int
const
kThreadsPerBlock
=
sizeof
(
uint64_t
)
*
8
;
template
<
typename
T
>
static
const
double
kBBoxClipDefault
=
std
::
log
(
1000.0
/
16.0
);
__global__
void
RangeInitKernel
(
const
T
start
,
const
T
delta
,
const
int
size
,
T
*
out
)
{
struct
RangeInitFunctor
{
CUDA_1D_KERNEL_LOOP
(
i
,
size
)
{
out
[
i
]
=
start
+
i
*
delta
;
}
int
start_
;
}
int
delta_
;
int
*
out_
;
__device__
void
operator
()(
size_t
i
)
{
out_
[
i
]
=
start_
+
i
*
delta_
;
}
};
template
<
typename
T
>
template
<
typename
T
>
void
SortDescending
(
const
platform
::
CUDADeviceContext
&
ctx
,
const
Tensor
&
value
,
static
void
SortDescending
(
const
platform
::
CUDADeviceContext
&
ctx
,
Tensor
*
value_out
,
Tensor
*
index_out
)
{
const
Tensor
&
value
,
Tensor
*
value_out
,
int
num
=
value
.
numel
();
Tensor
*
index_out
)
{
int
num
=
static_cast
<
int
>
(
value
.
numel
());
Tensor
index_in_t
;
Tensor
index_in_t
;
int
*
idx_in
=
index_in_t
.
mutable_data
<
int
>
({
num
},
ctx
.
GetPlace
());
int
*
idx_in
=
index_in_t
.
mutable_data
<
int
>
({
num
},
ctx
.
GetPlace
());
int
block
=
512
;
platform
::
ForRange
<
platform
::
CUDADeviceContext
>
for_range
(
ctx
,
num
)
;
auto
stream
=
ctx
.
stream
(
);
for_range
(
RangeInitFunctor
{
0
,
1
,
idx_in
}
);
RangeInitKernel
<<<
DIVUP
(
num
,
block
),
block
,
0
,
stream
>>>
(
0
,
1
,
num
,
idx_in
);
int
*
idx_out
=
index_out
->
mutable_data
<
int
>
({
num
},
ctx
.
GetPlace
());
int
*
idx_out
=
index_out
->
mutable_data
<
int
>
({
num
},
ctx
.
GetPlace
());
const
T
*
keys_in
=
value
.
data
<
T
>
();
const
T
*
keys_in
=
value
.
data
<
T
>
();
T
*
keys_out
=
value_out
->
mutable_data
<
T
>
({
num
},
ctx
.
GetPlace
());
T
*
keys_out
=
value_out
->
mutable_data
<
T
>
({
num
},
ctx
.
GetPlace
());
// Determine temporary device storage requirements
// Determine temporary device storage requirements
void
*
d_temp_storage
=
NULL
;
size_t
temp_storage_bytes
=
0
;
size_t
temp_storage_bytes
=
0
;
cub
::
DeviceRadixSort
::
SortPairsDescending
<
T
,
int
>
(
cub
::
DeviceRadixSort
::
SortPairsDescending
<
T
,
int
>
(
d_temp_storage
,
temp_storage_bytes
,
keys_in
,
keys_out
,
idx_in
,
idx_out
,
nullptr
,
temp_storage_bytes
,
keys_in
,
keys_out
,
idx_in
,
idx_out
,
num
);
num
);
// Allocate temporary storage
// Allocate temporary storage
auto
place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
());
auto
place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
());
d_temp_storage
=
memory
::
Alloc
(
place
,
temp_storage_bytes
);
void
*
d_temp_storage
=
memory
::
Alloc
(
place
,
temp_storage_bytes
);
// Run sorting operation
// Run sorting operation
cub
::
DeviceRadixSort
::
SortPairsDescending
<
T
,
int
>
(
cub
::
DeviceRadixSort
::
SortPairsDescending
<
T
,
int
>
(
...
@@ -76,22 +81,27 @@ void SortDescending(const platform::CUDADeviceContext &ctx, const Tensor &value,
...
@@ -76,22 +81,27 @@ void SortDescending(const platform::CUDADeviceContext &ctx, const Tensor &value,
}
}
template
<
typename
T
>
template
<
typename
T
>
__device__
__forceinline__
T
Min
(
T
x
,
T
y
)
{
struct
BoxDecodeAndClipFunctor
{
return
x
<
y
?
x
:
y
;
const
T
*
anchor
;
}
const
T
*
deltas
;
const
T
*
var
;
template
<
typename
T
>
const
int
*
index
;
__device__
__forceinline__
T
Max
(
T
x
,
T
y
)
{
const
T
*
im_info
;
return
x
>
y
?
x
:
y
;
}
T
*
proposals
;
template
<
typename
T
>
BoxDecodeAndClipFunctor
(
const
T
*
anchor
,
const
T
*
deltas
,
const
T
*
var
,
__global__
void
BoxDecodeAndClipKernel
(
const
T
*
anchor
,
const
T
*
deltas
,
const
int
*
index
,
const
T
*
im_info
,
T
*
proposals
)
const
T
*
var
,
const
int
*
index
,
:
anchor
(
anchor
),
const
T
*
im_info
,
const
int
num
,
deltas
(
deltas
),
T
*
proposals
)
{
var
(
var
),
T
kBBoxClipDefault
=
log
(
1000.0
/
16.0
);
index
(
index
),
CUDA_1D_KERNEL_LOOP
(
i
,
num
)
{
im_info
(
im_info
),
proposals
(
proposals
)
{}
T
bbox_clip_default
{
static_cast
<
T
>
(
kBBoxClipDefault
)};
__device__
void
operator
()(
size_t
i
)
{
int
k
=
index
[
i
]
*
4
;
int
k
=
index
[
i
]
*
4
;
T
axmin
=
anchor
[
k
];
T
axmin
=
anchor
[
k
];
T
aymin
=
anchor
[
k
+
1
];
T
aymin
=
anchor
[
k
+
1
];
...
@@ -108,17 +118,17 @@ __global__ void BoxDecodeAndClipKernel(const T *anchor, const T *deltas,
...
@@ -108,17 +118,17 @@ __global__ void BoxDecodeAndClipKernel(const T *anchor, const T *deltas,
T
dxmax
=
deltas
[
k
+
2
];
T
dxmax
=
deltas
[
k
+
2
];
T
dymax
=
deltas
[
k
+
3
];
T
dymax
=
deltas
[
k
+
3
];
T
d_cx
=
0.
,
d_cy
=
0.
,
d_w
=
0.
,
d_h
=
0.
;
T
d_cx
,
d_cy
,
d_w
,
d_h
;
if
(
var
)
{
if
(
var
)
{
d_cx
=
cx
+
dxmin
*
w
*
var
[
k
];
d_cx
=
cx
+
dxmin
*
w
*
var
[
k
];
d_cy
=
cy
+
dymin
*
h
*
var
[
k
+
1
];
d_cy
=
cy
+
dymin
*
h
*
var
[
k
+
1
];
d_w
=
exp
(
Min
<
T
>
(
dxmax
*
var
[
k
+
2
],
kBBoxClipD
efault
))
*
w
;
d_w
=
exp
(
Min
(
dxmax
*
var
[
k
+
2
],
bbox_clip_d
efault
))
*
w
;
d_h
=
exp
(
Min
<
T
>
(
dymax
*
var
[
k
+
3
],
kBBoxClipD
efault
))
*
h
;
d_h
=
exp
(
Min
(
dymax
*
var
[
k
+
3
],
bbox_clip_d
efault
))
*
h
;
}
else
{
}
else
{
d_cx
=
cx
+
dxmin
*
w
;
d_cx
=
cx
+
dxmin
*
w
;
d_cy
=
cy
+
dymin
*
h
;
d_cy
=
cy
+
dymin
*
h
;
d_w
=
exp
(
Min
<
T
>
(
dxmax
,
kBBoxClipD
efault
))
*
w
;
d_w
=
exp
(
Min
(
dxmax
,
bbox_clip_d
efault
))
*
w
;
d_h
=
exp
(
Min
<
T
>
(
dymax
,
kBBoxClipD
efault
))
*
h
;
d_h
=
exp
(
Min
(
dymax
,
bbox_clip_d
efault
))
*
h
;
}
}
T
oxmin
=
d_cx
-
d_w
*
0.5
;
T
oxmin
=
d_cx
-
d_w
*
0.5
;
...
@@ -126,17 +136,21 @@ __global__ void BoxDecodeAndClipKernel(const T *anchor, const T *deltas,
...
@@ -126,17 +136,21 @@ __global__ void BoxDecodeAndClipKernel(const T *anchor, const T *deltas,
T
oxmax
=
d_cx
+
d_w
*
0.5
-
1.
;
T
oxmax
=
d_cx
+
d_w
*
0.5
-
1.
;
T
oymax
=
d_cy
+
d_h
*
0.5
-
1.
;
T
oymax
=
d_cy
+
d_h
*
0.5
-
1.
;
proposals
[
i
*
4
]
=
Max
<
T
>
(
Min
<
T
>
(
oxmin
,
im_info
[
1
]
-
1.
),
0.
);
proposals
[
i
*
4
]
=
Max
(
Min
(
oxmin
,
im_info
[
1
]
-
1.
),
0.
);
proposals
[
i
*
4
+
1
]
=
Max
<
T
>
(
Min
<
T
>
(
oymin
,
im_info
[
0
]
-
1.
),
0.
);
proposals
[
i
*
4
+
1
]
=
Max
(
Min
(
oymin
,
im_info
[
0
]
-
1.
),
0.
);
proposals
[
i
*
4
+
2
]
=
Max
<
T
>
(
Min
<
T
>
(
oxmax
,
im_info
[
1
]
-
1.
),
0.
);
proposals
[
i
*
4
+
2
]
=
Max
(
Min
(
oxmax
,
im_info
[
1
]
-
1.
),
0.
);
proposals
[
i
*
4
+
3
]
=
Max
<
T
>
(
Min
<
T
>
(
oymax
,
im_info
[
0
]
-
1.
),
0.
);
proposals
[
i
*
4
+
3
]
=
Max
(
Min
(
oymax
,
im_info
[
0
]
-
1.
),
0.
);
}
}
}
__device__
__forceinline__
T
Min
(
T
a
,
T
b
)
const
{
return
a
>
b
?
b
:
a
;
}
__device__
__forceinline__
T
Max
(
T
a
,
T
b
)
const
{
return
a
>
b
?
a
:
b
;
}
};
template
<
typename
T
,
int
BlockSize
>
template
<
typename
T
,
int
BlockSize
>
__global__
void
FilterBBoxes
(
const
T
*
bboxes
,
const
T
*
im_info
,
static
__global__
void
FilterBBoxes
(
const
T
*
bboxes
,
const
T
*
im_info
,
const
T
min_size
,
const
int
num
,
int
*
keep_
num
,
const
T
min_size
,
const
int
num
,
int
*
keep
)
{
int
*
keep_num
,
int
*
keep
)
{
T
im_h
=
im_info
[
0
];
T
im_h
=
im_info
[
0
];
T
im_w
=
im_info
[
1
];
T
im_w
=
im_info
[
1
];
T
im_scale
=
im_info
[
2
];
T
im_scale
=
im_info
[
2
];
...
@@ -181,7 +195,7 @@ __global__ void FilterBBoxes(const T *bboxes, const T *im_info,
...
@@ -181,7 +195,7 @@ __global__ void FilterBBoxes(const T *bboxes, const T *im_info,
}
}
}
}
__device__
inline
float
IoU
(
const
float
*
a
,
const
float
*
b
)
{
static
__device__
inline
float
IoU
(
const
float
*
a
,
const
float
*
b
)
{
float
left
=
max
(
a
[
0
],
b
[
0
]),
right
=
min
(
a
[
2
],
b
[
2
]);
float
left
=
max
(
a
[
0
],
b
[
0
]),
right
=
min
(
a
[
2
],
b
[
2
]);
float
top
=
max
(
a
[
1
],
b
[
1
]),
bottom
=
min
(
a
[
3
],
b
[
3
]);
float
top
=
max
(
a
[
1
],
b
[
1
]),
bottom
=
min
(
a
[
3
],
b
[
3
]);
float
width
=
max
(
right
-
left
+
1
,
0.
f
),
height
=
max
(
bottom
-
top
+
1
,
0.
f
);
float
width
=
max
(
right
-
left
+
1
,
0.
f
),
height
=
max
(
bottom
-
top
+
1
,
0.
f
);
...
@@ -191,7 +205,8 @@ __device__ inline float IoU(const float *a, const float *b) {
...
@@ -191,7 +205,8 @@ __device__ inline float IoU(const float *a, const float *b) {
return
inter_s
/
(
s_a
+
s_b
-
inter_s
);
return
inter_s
/
(
s_a
+
s_b
-
inter_s
);
}
}
__global__
void
NMSKernel
(
const
int
n_boxes
,
const
float
nms_overlap_thresh
,
static
__global__
void
NMSKernel
(
const
int
n_boxes
,
const
float
nms_overlap_thresh
,
const
float
*
dev_boxes
,
uint64_t
*
dev_mask
)
{
const
float
*
dev_boxes
,
uint64_t
*
dev_mask
)
{
const
int
row_start
=
blockIdx
.
y
;
const
int
row_start
=
blockIdx
.
y
;
const
int
col_start
=
blockIdx
.
x
;
const
int
col_start
=
blockIdx
.
x
;
...
@@ -234,7 +249,7 @@ __global__ void NMSKernel(const int n_boxes, const float nms_overlap_thresh,
...
@@ -234,7 +249,7 @@ __global__ void NMSKernel(const int n_boxes, const float nms_overlap_thresh,
}
}
template
<
typename
T
>
template
<
typename
T
>
void
NMS
(
const
platform
::
CUDADeviceContext
&
ctx
,
const
Tensor
&
proposals
,
static
void
NMS
(
const
platform
::
CUDADeviceContext
&
ctx
,
const
Tensor
&
proposals
,
const
Tensor
&
sorted_indices
,
const
T
nms_threshold
,
const
Tensor
&
sorted_indices
,
const
T
nms_threshold
,
Tensor
*
keep_out
)
{
Tensor
*
keep_out
)
{
int
boxes_num
=
proposals
.
dims
()[
0
];
int
boxes_num
=
proposals
.
dims
()[
0
];
...
@@ -247,13 +262,10 @@ void NMS(const platform::CUDADeviceContext &ctx, const Tensor &proposals,
...
@@ -247,13 +262,10 @@ void NMS(const platform::CUDADeviceContext &ctx, const Tensor &proposals,
const
T
*
boxes
=
proposals
.
data
<
T
>
();
const
T
*
boxes
=
proposals
.
data
<
T
>
();
auto
place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
());
auto
place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
());
int
size_bytes
=
boxes_num
*
col_blocks
*
sizeof
(
uint64_t
);
framework
::
Vector
<
uint64_t
>
mask
(
boxes_num
*
col_blocks
);
uint64_t
*
d_mask
=
NMSKernel
<<<
blocks
,
threads
>>>
(
reinterpret_cast
<
uint64_t
*>
(
memory
::
Alloc
(
place
,
size_bytes
));
boxes_num
,
nms_threshold
,
boxes
,
NMSKernel
<<<
blocks
,
threads
>>>
(
boxes_num
,
nms_threshold
,
boxes
,
d_mask
);
mask
.
CUDAMutableData
(
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
())));
uint64_t
*
h_mask
=
reinterpret_cast
<
uint64_t
*>
(
memory
::
Alloc
(
platform
::
CPUPlace
(),
size_bytes
));
memory
::
Copy
(
platform
::
CPUPlace
(),
h_mask
,
place
,
d_mask
,
size_bytes
,
0
);
std
::
vector
<
uint64_t
>
remv
(
col_blocks
);
std
::
vector
<
uint64_t
>
remv
(
col_blocks
);
memset
(
&
remv
[
0
],
0
,
sizeof
(
uint64_t
)
*
col_blocks
);
memset
(
&
remv
[
0
],
0
,
sizeof
(
uint64_t
)
*
col_blocks
);
...
@@ -267,7 +279,7 @@ void NMS(const platform::CUDADeviceContext &ctx, const Tensor &proposals,
...
@@ -267,7 +279,7 @@ void NMS(const platform::CUDADeviceContext &ctx, const Tensor &proposals,
if
(
!
(
remv
[
nblock
]
&
(
1ULL
<<
inblock
)))
{
if
(
!
(
remv
[
nblock
]
&
(
1ULL
<<
inblock
)))
{
++
num_to_keep
;
++
num_to_keep
;
keep_vec
.
push_back
(
i
);
keep_vec
.
push_back
(
i
);
uint64_t
*
p
=
&
h_
mask
[
0
]
+
i
*
col_blocks
;
uint64_t
*
p
=
&
mask
[
0
]
+
i
*
col_blocks
;
for
(
int
j
=
nblock
;
j
<
col_blocks
;
j
++
)
{
for
(
int
j
=
nblock
;
j
<
col_blocks
;
j
++
)
{
remv
[
j
]
|=
p
[
j
];
remv
[
j
]
|=
p
[
j
];
}
}
...
@@ -276,12 +288,10 @@ void NMS(const platform::CUDADeviceContext &ctx, const Tensor &proposals,
...
@@ -276,12 +288,10 @@ void NMS(const platform::CUDADeviceContext &ctx, const Tensor &proposals,
int
*
keep
=
keep_out
->
mutable_data
<
int
>
({
num_to_keep
},
ctx
.
GetPlace
());
int
*
keep
=
keep_out
->
mutable_data
<
int
>
({
num_to_keep
},
ctx
.
GetPlace
());
memory
::
Copy
(
place
,
keep
,
platform
::
CPUPlace
(),
keep_vec
.
data
(),
memory
::
Copy
(
place
,
keep
,
platform
::
CPUPlace
(),
keep_vec
.
data
(),
sizeof
(
int
)
*
num_to_keep
,
0
);
sizeof
(
int
)
*
num_to_keep
,
0
);
memory
::
Free
(
place
,
d_mask
);
memory
::
Free
(
platform
::
CPUPlace
(),
h_mask
);
}
}
template
<
typename
T
>
template
<
typename
T
>
std
::
pair
<
Tensor
,
Tensor
>
ProposalForOneImage
(
st
atic
st
d
::
pair
<
Tensor
,
Tensor
>
ProposalForOneImage
(
const
platform
::
CUDADeviceContext
&
ctx
,
const
Tensor
&
im_info
,
const
platform
::
CUDADeviceContext
&
ctx
,
const
Tensor
&
im_info
,
const
Tensor
&
anchors
,
const
Tensor
&
variances
,
const
Tensor
&
anchors
,
const
Tensor
&
variances
,
const
Tensor
&
bbox_deltas
,
// [M, 4]
const
Tensor
&
bbox_deltas
,
// [M, 4]
...
@@ -300,18 +310,20 @@ std::pair<Tensor, Tensor> ProposalForOneImage(
...
@@ -300,18 +310,20 @@ std::pair<Tensor, Tensor> ProposalForOneImage(
// 2. box decode and clipping
// 2. box decode and clipping
Tensor
proposals
;
Tensor
proposals
;
proposals
.
mutable_data
<
T
>
({
pre_nms_num
,
4
},
ctx
.
GetPlace
());
proposals
.
mutable_data
<
T
>
({
pre_nms_num
,
4
},
ctx
.
GetPlace
());
int
block
=
512
;
auto
stream
=
ctx
.
stream
();
{
BoxDecodeAndClipKernel
<
T
><<<
DIVUP
(
pre_nms_num
,
block
),
block
,
0
,
stream
>>>
(
platform
::
ForRange
<
platform
::
CUDADeviceContext
>
for_range
(
ctx
,
pre_nms_num
);
for_range
(
BoxDecodeAndClipFunctor
<
T
>
{
anchors
.
data
<
T
>
(),
bbox_deltas
.
data
<
T
>
(),
variances
.
data
<
T
>
(),
anchors
.
data
<
T
>
(),
bbox_deltas
.
data
<
T
>
(),
variances
.
data
<
T
>
(),
index_sort
.
data
<
int
>
(),
im_info
.
data
<
T
>
(),
pre_nms_num
,
index_sort
.
data
<
int
>
(),
im_info
.
data
<
T
>
(),
proposals
.
data
<
T
>
()});
proposals
.
data
<
T
>
());
}
// 3. filter
// 3. filter
Tensor
keep_index
,
keep_num_t
;
Tensor
keep_index
,
keep_num_t
;
keep_index
.
mutable_data
<
int
>
({
pre_nms_num
},
ctx
.
GetPlace
());
keep_index
.
mutable_data
<
int
>
({
pre_nms_num
},
ctx
.
GetPlace
());
keep_num_t
.
mutable_data
<
int
>
({
1
},
ctx
.
GetPlace
());
keep_num_t
.
mutable_data
<
int
>
({
1
},
ctx
.
GetPlace
());
min_size
=
std
::
max
(
min_size
,
1.0
f
);
min_size
=
std
::
max
(
min_size
,
1.0
f
);
auto
stream
=
ctx
.
stream
();
FilterBBoxes
<
T
,
512
><<<
1
,
512
,
0
,
stream
>>>
(
FilterBBoxes
<
T
,
512
><<<
1
,
512
,
0
,
stream
>>>
(
proposals
.
data
<
T
>
(),
im_info
.
data
<
T
>
(),
min_size
,
pre_nms_num
,
proposals
.
data
<
T
>
(),
im_info
.
data
<
T
>
(),
min_size
,
pre_nms_num
,
keep_num_t
.
data
<
int
>
(),
keep_index
.
data
<
int
>
());
keep_num_t
.
data
<
int
>
(),
keep_index
.
data
<
int
>
());
...
@@ -355,8 +367,12 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel<T> {
...
@@ -355,8 +367,12 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel<T> {
auto
*
scores
=
context
.
Input
<
Tensor
>
(
"Scores"
);
auto
*
scores
=
context
.
Input
<
Tensor
>
(
"Scores"
);
auto
*
bbox_deltas
=
context
.
Input
<
Tensor
>
(
"BboxDeltas"
);
auto
*
bbox_deltas
=
context
.
Input
<
Tensor
>
(
"BboxDeltas"
);
auto
*
im_info
=
context
.
Input
<
Tensor
>
(
"ImInfo"
);
auto
*
im_info
=
context
.
Input
<
Tensor
>
(
"ImInfo"
);
auto
*
anchors
=
context
.
Input
<
Tensor
>
(
"Anchors"
);
auto
anchors
=
detail
::
Ref
(
context
.
Input
<
Tensor
>
(
"Anchors"
),
auto
*
variances
=
context
.
Input
<
Tensor
>
(
"Variances"
);
"Cannot find input Anchors(%s) in scope"
,
context
.
Inputs
(
"Anchors"
)[
0
]);
auto
variances
=
detail
::
Ref
(
context
.
Input
<
Tensor
>
(
"Variances"
),
"Cannot find input Variances(%s) in scope"
,
context
.
Inputs
(
"Variances"
)[
0
]);
auto
*
rpn_rois
=
context
.
Output
<
LoDTensor
>
(
"RpnRois"
);
auto
*
rpn_rois
=
context
.
Output
<
LoDTensor
>
(
"RpnRois"
);
auto
*
rpn_roi_probs
=
context
.
Output
<
LoDTensor
>
(
"RpnRoiProbs"
);
auto
*
rpn_roi_probs
=
context
.
Output
<
LoDTensor
>
(
"RpnRoiProbs"
);
...
@@ -392,10 +408,8 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel<T> {
...
@@ -392,10 +408,8 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel<T> {
trans
(
dev_ctx
,
*
bbox_deltas
,
&
bbox_deltas_swap
,
axis
);
trans
(
dev_ctx
,
*
bbox_deltas
,
&
bbox_deltas_swap
,
axis
);
trans
(
dev_ctx
,
*
scores
,
&
scores_swap
,
axis
);
trans
(
dev_ctx
,
*
scores
,
&
scores_swap
,
axis
);
Tensor
*
anchor
=
const_cast
<
framework
::
Tensor
*>
(
anchors
);
anchors
.
Resize
({
anchors
.
numel
()
/
4
,
4
});
anchor
->
Resize
({
anchors
->
numel
()
/
4
,
4
});
variances
.
Resize
({
variances
.
numel
()
/
4
,
4
});
Tensor
*
var
=
const_cast
<
framework
::
Tensor
*>
(
variances
);
var
->
Resize
({
var
->
numel
()
/
4
,
4
});
rpn_rois
->
mutable_data
<
T
>
({
bbox_deltas
->
numel
()
/
4
,
4
},
rpn_rois
->
mutable_data
<
T
>
({
bbox_deltas
->
numel
()
/
4
,
4
},
context
.
GetPlace
());
context
.
GetPlace
());
...
@@ -417,12 +431,12 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel<T> {
...
@@ -417,12 +431,12 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel<T> {
scores_slice
.
Resize
({
h_score
*
w_score
*
c_score
,
1
});
scores_slice
.
Resize
({
h_score
*
w_score
*
c_score
,
1
});
std
::
pair
<
Tensor
,
Tensor
>
box_score_pair
=
std
::
pair
<
Tensor
,
Tensor
>
box_score_pair
=
ProposalForOneImage
<
T
>
(
dev_ctx
,
im_info_slice
,
*
anchor
,
*
var
,
ProposalForOneImage
<
T
>
(
dev_ctx
,
im_info_slice
,
anchors
,
variances
,
bbox_deltas_slice
,
scores_slice
,
pre_nms_top_n
,
bbox_deltas_slice
,
scores_slice
,
pre_nms_top_n
,
post_nms_top_n
,
nms_thresh
,
min_size
,
eta
);
post_nms_top_n
,
nms_thresh
,
min_size
,
eta
);
Tensor
proposals
=
box_score_pair
.
first
;
Tensor
&
proposals
=
box_score_pair
.
first
;
Tensor
scores
=
box_score_pair
.
second
;
Tensor
&
scores
=
box_score_pair
.
second
;
memory
::
Copy
(
place
,
rpn_rois_data
+
num_proposals
*
4
,
place
,
memory
::
Copy
(
place
,
rpn_rois_data
+
num_proposals
*
4
,
place
,
proposals
.
data
<
T
>
(),
sizeof
(
T
)
*
proposals
.
numel
(),
0
);
proposals
.
data
<
T
>
(),
sizeof
(
T
)
*
proposals
.
numel
(),
0
);
...
...
paddle/fluid/operators/detection/gpc.cc
0 → 100644
浏览文件 @
049c9c7d
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/**
* @file src/gpc.cpp
* @author huhan02(com@baidu.com)
* @date 2015/12/18 14:17:30
* @brief
*
* @modified by sunyipeng
* @email sunyipeng@baidu.com
* @date 2018/6/12
**/
#include "paddle/fluid/operators/detection/gpc.h"
namespace
gpc
{
typedef
struct
lmt_shape
{
/* Local minima table */
double
y
;
/* Y coordinate at local minimum */
edge_node
*
first_bound
;
/* Pointer to bound list */
struct
lmt_shape
*
next
;
/* Pointer to next local minimum */
}
lmt_node
;
typedef
struct
sbt_t_shape
{
/* Scanbeam tree */
double
y
;
/* Scanbeam node y value */
struct
sbt_t_shape
*
less
;
/* Pointer to nodes with lower y */
struct
sbt_t_shape
*
more
;
/* Pointer to nodes with higher y */
}
sb_tree
;
typedef
struct
it_shape
{
/* Intersection table */
edge_node
*
ie
[
2
];
/* Intersecting edge (bundle) pair */
gpc_vertex
point
;
/* Point of intersection */
struct
it_shape
*
next
;
/* The next intersection table node */
}
it_node
;
typedef
struct
st_shape
{
/* Sorted edge table */
edge_node
*
edge
;
/* Pointer to AET edge */
double
xb
;
/* Scanbeam bottom x coordinate */
double
xt
;
/* Scanbeam top x coordinate */
double
dx
;
/* Change in x for a unit y increase */
struct
st_shape
*
prev
;
/* Previous edge in sorted list */
}
st_node
;
typedef
struct
bbox_shape
{
/* Contour axis-aligned bounding box */
double
xmin
;
/* Minimum x coordinate */
double
ymin
;
/* Minimum y coordinate */
double
xmax
;
/* Maximum x coordinate */
double
ymax
;
/* Maximum y coordinate */
}
bbox
;
/*
===========================================================================
Global Data
===========================================================================
*/
/* Horizontal edge state transitions within scanbeam boundary */
const
h_state
next_h_state
[
3
][
6
]
=
{
/* ABOVE BELOW CROSS */
/* L R L R L R */
/* NH */
{
BH
,
TH
,
TH
,
BH
,
NH
,
NH
},
/* BH */
{
NH
,
NH
,
NH
,
NH
,
TH
,
TH
},
/* TH */
{
NH
,
NH
,
NH
,
NH
,
BH
,
BH
}};
/*
===========================================================================
Private Functions
===========================================================================
*/
static
void
reset_it
(
it_node
**
it
)
{
it_node
*
itn
;
while
(
*
it
)
{
itn
=
(
*
it
)
->
next
;
gpc_free
<
it_node
>
(
*
it
);
*
it
=
itn
;
}
}
static
void
reset_lmt
(
lmt_node
**
lmt
)
{
lmt_node
*
lmtn
;
while
(
*
lmt
)
{
lmtn
=
(
*
lmt
)
->
next
;
gpc_free
<
lmt_node
>
(
*
lmt
);
*
lmt
=
lmtn
;
}
}
static
void
insert_bound
(
edge_node
**
b
,
edge_node
*
e
)
{
edge_node
*
existing_bound
=
NULL
;
if
(
!*
b
)
{
/* Link node e to the tail of the list */
*
b
=
e
;
}
else
{
/* Do primary sort on the x field */
if
(
e
[
0
].
bot
.
x
<
(
*
b
)[
0
].
bot
.
x
)
{
/* Insert a new node mid-list */
existing_bound
=
*
b
;
*
b
=
e
;
(
*
b
)
->
next_bound
=
existing_bound
;
}
else
{
if
(
e
[
0
].
bot
.
x
==
(
*
b
)[
0
].
bot
.
x
)
{
/* Do secondary sort on the dx field */
if
(
e
[
0
].
dx
<
(
*
b
)[
0
].
dx
)
{
/* Insert a new node mid-list */
existing_bound
=
*
b
;
*
b
=
e
;
(
*
b
)
->
next_bound
=
existing_bound
;
}
else
{
/* Head further down the list */
insert_bound
(
&
((
*
b
)
->
next_bound
),
e
);
}
}
else
{
/* Head further down the list */
insert_bound
(
&
((
*
b
)
->
next_bound
),
e
);
}
}
}
}
static
edge_node
**
bound_list
(
lmt_node
**
lmt
,
double
y
)
{
lmt_node
*
existing_node
;
if
(
!*
lmt
)
{
/* Add node onto the tail end of the LMT */
gpc_malloc
<
lmt_node
>
(
*
lmt
,
sizeof
(
lmt_node
),
const_cast
<
char
*>
(
"LMT insertion"
));
(
*
lmt
)
->
y
=
y
;
(
*
lmt
)
->
first_bound
=
NULL
;
(
*
lmt
)
->
next
=
NULL
;
return
&
((
*
lmt
)
->
first_bound
);
}
else
if
(
y
<
(
*
lmt
)
->
y
)
{
/* Insert a new LMT node before the current node */
existing_node
=
*
lmt
;
gpc_malloc
<
lmt_node
>
(
*
lmt
,
sizeof
(
lmt_node
),
const_cast
<
char
*>
(
"LMT insertion"
));
(
*
lmt
)
->
y
=
y
;
(
*
lmt
)
->
first_bound
=
NULL
;
(
*
lmt
)
->
next
=
existing_node
;
return
&
((
*
lmt
)
->
first_bound
);
}
else
{
if
(
y
>
(
*
lmt
)
->
y
)
{
/* Head further up the LMT */
return
bound_list
(
&
((
*
lmt
)
->
next
),
y
);
}
else
{
/* Use this existing LMT node */
return
&
((
*
lmt
)
->
first_bound
);
}
}
}
static
void
add_to_sbtree
(
int
*
entries
,
sb_tree
**
sbtree
,
double
y
)
{
if
(
!*
sbtree
)
{
/* Add a new tree node here */
gpc_malloc
<
sb_tree
>
(
*
sbtree
,
sizeof
(
sb_tree
),
const_cast
<
char
*>
(
"scanbeam tree insertion"
));
(
*
sbtree
)
->
y
=
y
;
(
*
sbtree
)
->
less
=
NULL
;
(
*
sbtree
)
->
more
=
NULL
;
(
*
entries
)
++
;
}
else
{
if
((
*
sbtree
)
->
y
>
y
)
{
/* Head into the 'less' sub-tree */
add_to_sbtree
(
entries
,
&
((
*
sbtree
)
->
less
),
y
);
}
else
{
if
((
*
sbtree
)
->
y
<
y
)
{
/* Head into the 'more' sub-tree */
add_to_sbtree
(
entries
,
&
((
*
sbtree
)
->
more
),
y
);
}
}
}
}
static
void
build_sbt
(
int
*
entries
,
double
*
sbt
,
sb_tree
*
sbtree
)
{
if
(
sbtree
->
less
)
{
build_sbt
(
entries
,
sbt
,
sbtree
->
less
);
}
sbt
[
*
entries
]
=
sbtree
->
y
;
(
*
entries
)
++
;
if
(
sbtree
->
more
)
{
build_sbt
(
entries
,
sbt
,
sbtree
->
more
);
}
}
static
void
free_sbtree
(
sb_tree
**
sbtree
)
{
if
(
*
sbtree
)
{
free_sbtree
(
&
((
*
sbtree
)
->
less
));
free_sbtree
(
&
((
*
sbtree
)
->
more
));
gpc_free
<
sb_tree
>
(
*
sbtree
);
}
}
static
int
count_optimal_vertices
(
gpc_vertex_list
c
)
{
int
result
=
0
;
int
i
=
0
;
/* Ignore non-contributing contours */
if
(
c
.
num_vertices
>
0
)
{
for
(
i
=
0
;
i
<
c
.
num_vertices
;
i
++
)
{
/* Ignore superfluous vertices embedded in horizontal edges */
if
(
gpc_optimal
(
c
.
vertex
,
i
,
c
.
num_vertices
))
{
result
++
;
}
}
}
return
result
;
}
static
edge_node
*
build_lmt
(
lmt_node
**
lmt
,
sb_tree
**
sbtree
,
int
*
sbt_entries
,
gpc_polygon
*
p
,
int
type
,
gpc_op
op
)
{
int
c
=
0
;
int
i
=
0
;
int
min
=
0
;
int
max
=
0
;
int
num_edges
=
0
;
int
v
=
0
;
int
num_vertices
=
0
;
int
total_vertices
=
0
;
int
e_index
=
0
;
edge_node
*
e
=
NULL
;
edge_node
*
edge_table
=
NULL
;
for
(
c
=
0
;
c
<
p
->
num_contours
;
c
++
)
{
total_vertices
+=
count_optimal_vertices
(
p
->
contour
[
c
]);
}
/* Create the entire input polygon edge table in one go */
gpc_malloc
<
edge_node
>
(
edge_table
,
total_vertices
*
sizeof
(
edge_node
),
const_cast
<
char
*>
(
"edge table creation"
));
for
(
c
=
0
;
c
<
p
->
num_contours
;
c
++
)
{
if
(
p
->
contour
[
c
].
num_vertices
<
0
)
{
/* Ignore the non-contributing contour and repair the vertex count */
p
->
contour
[
c
].
num_vertices
=
-
p
->
contour
[
c
].
num_vertices
;
}
else
{
/* Perform contour optimisation */
num_vertices
=
0
;
for
(
i
=
0
;
i
<
p
->
contour
[
c
].
num_vertices
;
i
++
)
{
if
(
gpc_optimal
(
p
->
contour
[
c
].
vertex
,
i
,
p
->
contour
[
c
].
num_vertices
))
{
edge_table
[
num_vertices
].
vertex
.
x
=
p
->
contour
[
c
].
vertex
[
i
].
x
;
edge_table
[
num_vertices
].
vertex
.
y
=
p
->
contour
[
c
].
vertex
[
i
].
y
;
/* Record vertex in the scanbeam table */
add_to_sbtree
(
sbt_entries
,
sbtree
,
edge_table
[
num_vertices
].
vertex
.
y
);
num_vertices
++
;
}
}
/* Do the contour forward pass */
for
(
min
=
0
;
min
<
num_vertices
;
min
++
)
{
/* If a forward local minimum... */
if
(
gpc_fwd_min
(
edge_table
,
min
,
num_vertices
))
{
/* Search for the next local maximum... */
num_edges
=
1
;
max
=
gpc_next_index
(
min
,
num_vertices
);
while
(
gpc_not_fmax
(
edge_table
,
max
,
num_vertices
))
{
num_edges
++
;
max
=
gpc_next_index
(
max
,
num_vertices
);
}
/* Build the next edge list */
e
=
&
edge_table
[
e_index
];
e_index
+=
num_edges
;
v
=
min
;
e
[
0
].
bstate
[
BELOW
]
=
UNBUNDLED
;
e
[
0
].
bundle
[
BELOW
][
CLIP
]
=
0
;
e
[
0
].
bundle
[
BELOW
][
SUBJ
]
=
0
;
for
(
i
=
0
;
i
<
num_edges
;
i
++
)
{
e
[
i
].
xb
=
edge_table
[
v
].
vertex
.
x
;
e
[
i
].
bot
.
x
=
edge_table
[
v
].
vertex
.
x
;
e
[
i
].
bot
.
y
=
edge_table
[
v
].
vertex
.
y
;
v
=
gpc_next_index
(
v
,
num_vertices
);
e
[
i
].
top
.
x
=
edge_table
[
v
].
vertex
.
x
;
e
[
i
].
top
.
y
=
edge_table
[
v
].
vertex
.
y
;
e
[
i
].
dx
=
(
edge_table
[
v
].
vertex
.
x
-
e
[
i
].
bot
.
x
)
/
(
e
[
i
].
top
.
y
-
e
[
i
].
bot
.
y
);
e
[
i
].
type
=
type
;
e
[
i
].
outp
[
ABOVE
]
=
NULL
;
e
[
i
].
outp
[
BELOW
]
=
NULL
;
e
[
i
].
next
=
NULL
;
e
[
i
].
prev
=
NULL
;
e
[
i
].
succ
=
((
num_edges
>
1
)
&&
(
i
<
(
num_edges
-
1
)))
?
&
(
e
[
i
+
1
])
:
NULL
;
e
[
i
].
pred
=
((
num_edges
>
1
)
&&
(
i
>
0
))
?
&
(
e
[
i
-
1
])
:
NULL
;
e
[
i
].
next_bound
=
NULL
;
e
[
i
].
bside
[
CLIP
]
=
(
op
==
GPC_DIFF
)
?
RIGHT
:
LEFT
;
e
[
i
].
bside
[
SUBJ
]
=
LEFT
;
}
insert_bound
(
bound_list
(
lmt
,
edge_table
[
min
].
vertex
.
y
),
e
);
}
}
/* Do the contour reverse pass */
for
(
min
=
0
;
min
<
num_vertices
;
min
++
)
{
/* If a reverse local minimum... */
if
(
gpc_rev_min
(
edge_table
,
min
,
num_vertices
))
{
/* Search for the previous local maximum... */
num_edges
=
1
;
max
=
gpc_prev_index
(
min
,
num_vertices
);
while
(
gpc_not_rmax
(
edge_table
,
max
,
num_vertices
))
{
num_edges
++
;
max
=
gpc_prev_index
(
max
,
num_vertices
);
}
/* Build the previous edge list */
e
=
&
edge_table
[
e_index
];
e_index
+=
num_edges
;
v
=
min
;
e
[
0
].
bstate
[
BELOW
]
=
UNBUNDLED
;
e
[
0
].
bundle
[
BELOW
][
CLIP
]
=
0
;
e
[
0
].
bundle
[
BELOW
][
SUBJ
]
=
0
;
for
(
i
=
0
;
i
<
num_edges
;
i
++
)
{
e
[
i
].
xb
=
edge_table
[
v
].
vertex
.
x
;
e
[
i
].
bot
.
x
=
edge_table
[
v
].
vertex
.
x
;
e
[
i
].
bot
.
y
=
edge_table
[
v
].
vertex
.
y
;
v
=
gpc_prev_index
(
v
,
num_vertices
);
e
[
i
].
top
.
x
=
edge_table
[
v
].
vertex
.
x
;
e
[
i
].
top
.
y
=
edge_table
[
v
].
vertex
.
y
;
e
[
i
].
dx
=
(
edge_table
[
v
].
vertex
.
x
-
e
[
i
].
bot
.
x
)
/
(
e
[
i
].
top
.
y
-
e
[
i
].
bot
.
y
);
e
[
i
].
type
=
type
;
e
[
i
].
outp
[
ABOVE
]
=
NULL
;
e
[
i
].
outp
[
BELOW
]
=
NULL
;
e
[
i
].
next
=
NULL
;
e
[
i
].
prev
=
NULL
;
e
[
i
].
succ
=
((
num_edges
>
1
)
&&
(
i
<
(
num_edges
-
1
)))
?
&
(
e
[
i
+
1
])
:
NULL
;
e
[
i
].
pred
=
((
num_edges
>
1
)
&&
(
i
>
0
))
?
&
(
e
[
i
-
1
])
:
NULL
;
e
[
i
].
next_bound
=
NULL
;
e
[
i
].
bside
[
CLIP
]
=
(
op
==
GPC_DIFF
)
?
RIGHT
:
LEFT
;
e
[
i
].
bside
[
SUBJ
]
=
LEFT
;
}
insert_bound
(
bound_list
(
lmt
,
edge_table
[
min
].
vertex
.
y
),
e
);
}
}
}
}
return
edge_table
;
}
// NOLINT
static
void
add_edge_to_aet
(
edge_node
**
aet
,
edge_node
*
edge
,
edge_node
*
prev
)
{
if
(
!*
aet
)
{
/* Append edge onto the tail end of the AET */
*
aet
=
edge
;
edge
->
prev
=
prev
;
edge
->
next
=
NULL
;
}
else
{
/* Do primary sort on the xb field */
if
(
edge
->
xb
<
(
*
aet
)
->
xb
)
{
/* Insert edge here (before the AET edge) */
edge
->
prev
=
prev
;
edge
->
next
=
*
aet
;
(
*
aet
)
->
prev
=
edge
;
*
aet
=
edge
;
}
else
{
if
(
edge
->
xb
==
(
*
aet
)
->
xb
)
{
/* Do secondary sort on the dx field */
if
(
edge
->
dx
<
(
*
aet
)
->
dx
)
{
/* Insert edge here (before the AET edge) */
edge
->
prev
=
prev
;
edge
->
next
=
*
aet
;
(
*
aet
)
->
prev
=
edge
;
*
aet
=
edge
;
}
else
{
/* Head further into the AET */
add_edge_to_aet
(
&
((
*
aet
)
->
next
),
edge
,
*
aet
);
}
}
else
{
/* Head further into the AET */
add_edge_to_aet
(
&
((
*
aet
)
->
next
),
edge
,
*
aet
);
}
}
}
}
static
void
add_intersection
(
it_node
**
it
,
edge_node
*
edge0
,
edge_node
*
edge1
,
double
x
,
double
y
)
{
it_node
*
existing_node
;
if
(
!*
it
)
{
/* Append a new node to the tail of the list */
gpc_malloc
<
it_node
>
(
*
it
,
sizeof
(
it_node
),
const_cast
<
char
*>
(
"IT insertion"
));
(
*
it
)
->
ie
[
0
]
=
edge0
;
(
*
it
)
->
ie
[
1
]
=
edge1
;
(
*
it
)
->
point
.
x
=
x
;
(
*
it
)
->
point
.
y
=
y
;
(
*
it
)
->
next
=
NULL
;
}
else
{
if
((
*
it
)
->
point
.
y
>
y
)
{
/* Insert a new node mid-list */
existing_node
=
*
it
;
gpc_malloc
<
it_node
>
(
*
it
,
sizeof
(
it_node
),
const_cast
<
char
*>
(
"IT insertion"
));
(
*
it
)
->
ie
[
0
]
=
edge0
;
(
*
it
)
->
ie
[
1
]
=
edge1
;
(
*
it
)
->
point
.
x
=
x
;
(
*
it
)
->
point
.
y
=
y
;
(
*
it
)
->
next
=
existing_node
;
}
else
{
/* Head further down the list */
add_intersection
(
&
((
*
it
)
->
next
),
edge0
,
edge1
,
x
,
y
);
}
}
}
static
void
add_st_edge
(
st_node
**
st
,
it_node
**
it
,
edge_node
*
edge
,
double
dy
)
{
st_node
*
existing_node
;
double
den
=
0.0
;
double
r
=
0.0
;
double
x
=
0.0
;
double
y
=
0.0
;
if
(
!*
st
)
{
/* Append edge onto the tail end of the ST */
gpc_malloc
<
st_node
>
(
*
st
,
sizeof
(
st_node
),
const_cast
<
char
*>
(
"ST insertion"
));
(
*
st
)
->
edge
=
edge
;
(
*
st
)
->
xb
=
edge
->
xb
;
(
*
st
)
->
xt
=
edge
->
xt
;
(
*
st
)
->
dx
=
edge
->
dx
;
(
*
st
)
->
prev
=
NULL
;
}
else
{
den
=
((
*
st
)
->
xt
-
(
*
st
)
->
xb
)
-
(
edge
->
xt
-
edge
->
xb
);
/* If new edge and ST edge don't cross */
if
((
edge
->
xt
>=
(
*
st
)
->
xt
)
||
(
edge
->
dx
==
(
*
st
)
->
dx
)
||
(
fabs
(
den
)
<=
DBL_EPSILON
))
{
/* No intersection - insert edge here (before the ST edge) */
existing_node
=
*
st
;
gpc_malloc
<
st_node
>
(
*
st
,
sizeof
(
st_node
),
const_cast
<
char
*>
(
"ST insertion"
));
(
*
st
)
->
edge
=
edge
;
(
*
st
)
->
xb
=
edge
->
xb
;
(
*
st
)
->
xt
=
edge
->
xt
;
(
*
st
)
->
dx
=
edge
->
dx
;
(
*
st
)
->
prev
=
existing_node
;
}
else
{
/* Compute intersection between new edge and ST edge */
r
=
(
edge
->
xb
-
(
*
st
)
->
xb
)
/
den
;
x
=
(
*
st
)
->
xb
+
r
*
((
*
st
)
->
xt
-
(
*
st
)
->
xb
);
y
=
r
*
dy
;
/* Insert the edge pointers and the intersection point in the IT */
add_intersection
(
it
,
(
*
st
)
->
edge
,
edge
,
x
,
y
);
/* Head further into the ST */
add_st_edge
(
&
((
*
st
)
->
prev
),
it
,
edge
,
dy
);
}
}
}
static
void
build_intersection_table
(
it_node
**
it
,
edge_node
*
aet
,
double
dy
)
{
st_node
*
st
;
st_node
*
stp
;
edge_node
*
edge
=
NULL
;
/* Build intersection table for the current scanbeam */
reset_it
(
it
);
st
=
NULL
;
/* Process each AET edge */
for
(
edge
=
aet
;
edge
;
edge
=
edge
->
next
)
{
if
((
edge
->
bstate
[
ABOVE
]
==
BUNDLE_HEAD
)
||
edge
->
bundle
[
ABOVE
][
CLIP
]
||
edge
->
bundle
[
ABOVE
][
SUBJ
])
{
add_st_edge
(
&
st
,
it
,
edge
,
dy
);
}
}
/* Free the sorted edge table */
while
(
st
)
{
stp
=
st
->
prev
;
gpc_free
<
st_node
>
(
st
);
st
=
stp
;
}
}
static
int
count_contours
(
polygon_node
*
polygon
)
{
int
nc
=
0
;
int
nv
=
0
;
vertex_node
*
v
=
NULL
;
vertex_node
*
nextv
=
NULL
;
for
(
nc
=
0
;
polygon
;
polygon
=
polygon
->
next
)
{
if
(
polygon
->
active
)
{
/* Count the vertices in the current contour */
nv
=
0
;
for
(
v
=
polygon
->
proxy
->
v
[
LEFT
];
v
;
v
=
v
->
next
)
{
nv
++
;
}
/* Record valid vertex counts in the active field */
if
(
nv
>
2
)
{
polygon
->
active
=
nv
;
nc
++
;
}
else
{
/* Invalid contour: just free the heap */
for
(
v
=
polygon
->
proxy
->
v
[
LEFT
];
v
;
v
=
nextv
)
{
nextv
=
v
->
next
;
gpc_free
<
vertex_node
>
(
v
);
}
polygon
->
active
=
0
;
}
}
}
return
nc
;
}
static
void
add_left
(
polygon_node
*
p
,
double
x
,
double
y
)
{
vertex_node
*
nv
=
NULL
;
/* Create a new vertex node and set its fields */
gpc_malloc
<
vertex_node
>
(
nv
,
sizeof
(
vertex_node
),
const_cast
<
char
*>
(
"vertex node creation"
));
nv
->
x
=
x
;
nv
->
y
=
y
;
/* Add vertex nv to the left end of the polygon's vertex list */
nv
->
next
=
p
->
proxy
->
v
[
LEFT
];
/* Update proxy->[LEFT] to point to nv */
p
->
proxy
->
v
[
LEFT
]
=
nv
;
}
static
void
merge_left
(
polygon_node
*
p
,
polygon_node
*
q
,
polygon_node
*
list
)
{
polygon_node
*
target
=
NULL
;
/* Label contour as a hole */
q
->
proxy
->
hole
=
1
;
if
(
p
->
proxy
!=
q
->
proxy
)
{
/* Assign p's vertex list to the left end of q's list */
p
->
proxy
->
v
[
RIGHT
]
->
next
=
q
->
proxy
->
v
[
LEFT
];
q
->
proxy
->
v
[
LEFT
]
=
p
->
proxy
->
v
[
LEFT
];
/* Redirect any p->proxy references to q->proxy */
for
(
target
=
p
->
proxy
;
list
;
list
=
list
->
next
)
{
if
(
list
->
proxy
==
target
)
{
list
->
active
=
0
;
list
->
proxy
=
q
->
proxy
;
}
}
}
}
static
void
add_right
(
polygon_node
*
p
,
double
x
,
double
y
)
{
vertex_node
*
nv
=
NULL
;
/* Create a new vertex node and set its fields */
gpc_malloc
<
vertex_node
>
(
nv
,
sizeof
(
vertex_node
),
const_cast
<
char
*>
(
"vertex node creation"
));
nv
->
x
=
x
;
nv
->
y
=
y
;
nv
->
next
=
NULL
;
/* Add vertex nv to the right end of the polygon's vertex list */
p
->
proxy
->
v
[
RIGHT
]
->
next
=
nv
;
/* Update proxy->v[RIGHT] to point to nv */
p
->
proxy
->
v
[
RIGHT
]
=
nv
;
}
static
void
merge_right
(
polygon_node
*
p
,
polygon_node
*
q
,
polygon_node
*
list
)
{
polygon_node
*
target
=
NULL
;
/* Label contour as external */
q
->
proxy
->
hole
=
0
;
if
(
p
->
proxy
!=
q
->
proxy
)
{
/* Assign p's vertex list to the right end of q's list */
q
->
proxy
->
v
[
RIGHT
]
->
next
=
p
->
proxy
->
v
[
LEFT
];
q
->
proxy
->
v
[
RIGHT
]
=
p
->
proxy
->
v
[
RIGHT
];
/* Redirect any p->proxy references to q->proxy */
for
(
target
=
p
->
proxy
;
list
;
list
=
list
->
next
)
{
if
(
list
->
proxy
==
target
)
{
list
->
active
=
0
;
list
->
proxy
=
q
->
proxy
;
}
}
}
}
static
void
add_local_min
(
polygon_node
**
p
,
edge_node
*
edge
,
double
x
,
double
y
)
{
polygon_node
*
existing_min
=
NULL
;
vertex_node
*
nv
=
NULL
;
existing_min
=
*
p
;
gpc_malloc
<
polygon_node
>
(
*
p
,
sizeof
(
polygon_node
),
const_cast
<
char
*>
(
"polygon node creation"
));
/* Create a new vertex node and set its fields */
gpc_malloc
<
vertex_node
>
(
nv
,
sizeof
(
vertex_node
),
const_cast
<
char
*>
(
"vertex node creation"
));
nv
->
x
=
x
;
nv
->
y
=
y
;
nv
->
next
=
NULL
;
/* Initialise proxy to point to p itself */
(
*
p
)
->
proxy
=
(
*
p
);
(
*
p
)
->
active
=
1
;
(
*
p
)
->
next
=
existing_min
;
/* Make v[LEFT] and v[RIGHT] point to new vertex nv */
(
*
p
)
->
v
[
LEFT
]
=
nv
;
(
*
p
)
->
v
[
RIGHT
]
=
nv
;
/* Assign polygon p to the edge */
edge
->
outp
[
ABOVE
]
=
*
p
;
}
static
int
count_tristrips
(
polygon_node
*
tn
)
{
int
total
=
0
;
for
(
total
=
0
;
tn
;
tn
=
tn
->
next
)
{
if
(
tn
->
active
>
2
)
{
total
++
;
}
}
return
total
;
}
void
add_vertex
(
vertex_node
**
t
,
double
x
,
double
y
)
{
if
(
!
(
*
t
))
{
gpc_malloc
<
vertex_node
>
(
*
t
,
sizeof
(
vertex_node
),
const_cast
<
char
*>
(
"tristrip vertex creation"
));
(
*
t
)
->
x
=
x
;
(
*
t
)
->
y
=
y
;
(
*
t
)
->
next
=
NULL
;
}
else
{
/* Head further down the list */
add_vertex
(
&
((
*
t
)
->
next
),
x
,
y
);
}
}
void
gpc_vertex_create
(
edge_node
*
e
,
int
p
,
int
s
,
double
x
,
double
y
)
{
add_vertex
(
&
(
e
->
outp
[
p
]
->
v
[
s
]),
x
,
y
);
e
->
outp
[
p
]
->
active
++
;
}
static
void
new_tristrip
(
polygon_node
**
tn
,
edge_node
*
edge
,
double
x
,
double
y
)
{
if
(
!
(
*
tn
))
{
gpc_malloc
<
polygon_node
>
(
*
tn
,
sizeof
(
polygon_node
),
const_cast
<
char
*>
(
"tristrip node creation"
));
(
*
tn
)
->
next
=
NULL
;
(
*
tn
)
->
v
[
LEFT
]
=
NULL
;
(
*
tn
)
->
v
[
RIGHT
]
=
NULL
;
(
*
tn
)
->
active
=
1
;
add_vertex
(
&
((
*
tn
)
->
v
[
LEFT
]),
x
,
y
);
edge
->
outp
[
ABOVE
]
=
*
tn
;
}
else
{
/* Head further down the list */
new_tristrip
(
&
((
*
tn
)
->
next
),
edge
,
x
,
y
);
}
}
static
bbox
*
create_contour_bboxes
(
gpc_polygon
*
p
)
{
bbox
*
box
;
int
c
=
0
;
int
v
=
0
;
gpc_malloc
<
bbox
>
(
box
,
p
->
num_contours
*
sizeof
(
bbox
),
const_cast
<
char
*>
(
"Bounding box creation"
));
/* Construct contour bounding boxes */
for
(
c
=
0
;
c
<
p
->
num_contours
;
c
++
)
{
/* Initialise bounding box extent */
box
[
c
].
xmin
=
DBL_MAX
;
box
[
c
].
ymin
=
DBL_MAX
;
box
[
c
].
xmax
=
-
DBL_MAX
;
box
[
c
].
ymax
=
-
DBL_MAX
;
for
(
v
=
0
;
v
<
p
->
contour
[
c
].
num_vertices
;
v
++
)
{
/* Adjust bounding box */
if
(
p
->
contour
[
c
].
vertex
[
v
].
x
<
box
[
c
].
xmin
)
{
box
[
c
].
xmin
=
p
->
contour
[
c
].
vertex
[
v
].
x
;
}
if
(
p
->
contour
[
c
].
vertex
[
v
].
y
<
box
[
c
].
ymin
)
{
box
[
c
].
ymin
=
p
->
contour
[
c
].
vertex
[
v
].
y
;
}
if
(
p
->
contour
[
c
].
vertex
[
v
].
x
>
box
[
c
].
xmax
)
{
box
[
c
].
xmax
=
p
->
contour
[
c
].
vertex
[
v
].
x
;
}
if
(
p
->
contour
[
c
].
vertex
[
v
].
y
>
box
[
c
].
ymax
)
{
box
[
c
].
ymax
=
p
->
contour
[
c
].
vertex
[
v
].
y
;
}
}
}
return
box
;
}
static
void
minimax_test
(
gpc_polygon
*
subj
,
gpc_polygon
*
clip
,
gpc_op
op
)
{
bbox
*
s_bbox
;
bbox
*
c_bbox
;
int
s
=
0
;
int
c
=
0
;
int
*
o_table
=
NULL
;
int
overlap
=
0
;
s_bbox
=
create_contour_bboxes
(
subj
);
c_bbox
=
create_contour_bboxes
(
clip
);
gpc_malloc
<
int
>
(
o_table
,
subj
->
num_contours
*
clip
->
num_contours
*
sizeof
(
int
),
const_cast
<
char
*>
(
"overlap table creation"
));
/* Check all subject contour bounding boxes against clip boxes */
for
(
s
=
0
;
s
<
subj
->
num_contours
;
s
++
)
{
for
(
c
=
0
;
c
<
clip
->
num_contours
;
c
++
)
{
o_table
[
c
*
subj
->
num_contours
+
s
]
=
(
!
((
s_bbox
[
s
].
xmax
<
c_bbox
[
c
].
xmin
)
||
(
s_bbox
[
s
].
xmin
>
c_bbox
[
c
].
xmax
)))
&&
(
!
((
s_bbox
[
s
].
ymax
<
c_bbox
[
c
].
ymin
)
||
(
s_bbox
[
s
].
ymin
>
c_bbox
[
c
].
ymax
)));
}
}
/* For each clip contour, search for any subject contour overlaps */
for
(
c
=
0
;
c
<
clip
->
num_contours
;
c
++
)
{
overlap
=
0
;
for
(
s
=
0
;
(
!
overlap
)
&&
(
s
<
subj
->
num_contours
);
s
++
)
{
overlap
=
o_table
[
c
*
subj
->
num_contours
+
s
];
}
if
(
!
overlap
)
{
/* Flag non contributing status by negating vertex count */
clip
->
contour
[
c
].
num_vertices
=
-
clip
->
contour
[
c
].
num_vertices
;
}
}
if
(
op
==
GPC_INT
)
{
/* For each subject contour, search for any clip contour overlaps */
for
(
s
=
0
;
s
<
subj
->
num_contours
;
s
++
)
{
overlap
=
0
;
for
(
c
=
0
;
(
!
overlap
)
&&
(
c
<
clip
->
num_contours
);
c
++
)
{
overlap
=
o_table
[
c
*
subj
->
num_contours
+
s
];
}
if
(
!
overlap
)
{
/* Flag non contributing status by negating vertex count */
subj
->
contour
[
s
].
num_vertices
=
-
subj
->
contour
[
s
].
num_vertices
;
}
}
}
gpc_free
<
bbox
>
(
s_bbox
);
gpc_free
<
bbox
>
(
c_bbox
);
gpc_free
<
int
>
(
o_table
);
}
/*
===========================================================================
Public Functions
===========================================================================
*/
void
gpc_free_polygon
(
gpc_polygon
*
p
)
{
int
c
=
0
;
for
(
c
=
0
;
c
<
p
->
num_contours
;
c
++
)
{
gpc_free
<
gpc_vertex
>
(
p
->
contour
[
c
].
vertex
);
}
gpc_free
<
int
>
(
p
->
hole
);
gpc_free
<
gpc_vertex_list
>
(
p
->
contour
);
p
->
num_contours
=
0
;
}
/*
void gpc_read_polygon(FILE *fp, int read_hole_flags, gpc_polygon *p) {
int c = 0;
int v = 0;
fscanf(fp, "%d", &(p->num_contours));
gpc_malloc<int>(p->hole, p->num_contours * sizeof(int),
(char *)"hole flag array creation");
gpc_malloc<gpc_vertex_list>(p->contour,
p->num_contours * sizeof(gpc_vertex_list),
(char *)"contour creation");
for (c = 0; c < p->num_contours; c++) {
fscanf(fp, "%d", &(p->contour[c].num_vertices));
if (read_hole_flags) {
fscanf(fp, "%d", &(p->hole[c]));
} else {
p->hole[c] = 0; // Assume all contours to be external
}
gpc_malloc<gpc_vertex>(p->contour[c].vertex,
p->contour[c].num_vertices * sizeof(gpc_vertex),
(char *)"vertex creation");
for (v = 0; v < p->contour[c].num_vertices; v++) {
fscanf(fp, "%lf %lf", &(p->contour[c].vertex[v].x),
&(p->contour[c].vertex[v].y));
}
}
}
void gpc_write_polygon(FILE *fp, int write_hole_flags, gpc_polygon *p) {
int c = 0;
int v = 0;
fprintf(fp, "%d\n", p->num_contours);
for (c = 0; c < p->num_contours; c++) {
fprintf(fp, "%d\n", p->contour[c].num_vertices);
if (write_hole_flags) {
fprintf(fp, "%d\n", p->hole[c]);
}
for (v = 0; v < p->contour[c].num_vertices; v++) {
fprintf(fp, "% .*lf % .*lf\n", DBL_DIG, p->contour[c].vertex[v].x,
DBL_DIG, p->contour[c].vertex[v].y);
}
}
}
*/
void
gpc_add_contour
(
gpc_polygon
*
p
,
gpc_vertex_list
*
new_contour
,
int
hole
)
{
int
*
extended_hole
=
NULL
;
int
c
=
0
;
int
v
=
0
;
gpc_vertex_list
*
extended_contour
=
NULL
;
/* Create an extended hole array */
gpc_malloc
<
int
>
(
extended_hole
,
(
p
->
num_contours
+
1
)
*
sizeof
(
int
),
const_cast
<
char
*>
(
"contour hole addition"
));
/* Create an extended contour array */
gpc_malloc
<
gpc_vertex_list
>
(
extended_contour
,
(
p
->
num_contours
+
1
)
*
sizeof
(
gpc_vertex_list
),
const_cast
<
char
*>
(
"contour addition"
));
/* Copy the old contour and hole data into the extended arrays */
for
(
c
=
0
;
c
<
p
->
num_contours
;
c
++
)
{
extended_hole
[
c
]
=
p
->
hole
[
c
];
extended_contour
[
c
]
=
p
->
contour
[
c
];
}
/* Copy the new contour and hole onto the end of the extended arrays */
c
=
p
->
num_contours
;
extended_hole
[
c
]
=
hole
;
extended_contour
[
c
].
num_vertices
=
new_contour
->
num_vertices
;
gpc_malloc
<
gpc_vertex
>
(
extended_contour
[
c
].
vertex
,
new_contour
->
num_vertices
*
sizeof
(
gpc_vertex
),
const_cast
<
char
*>
(
"contour addition"
));
for
(
v
=
0
;
v
<
new_contour
->
num_vertices
;
v
++
)
{
extended_contour
[
c
].
vertex
[
v
]
=
new_contour
->
vertex
[
v
];
}
/* Dispose of the old contour */
gpc_free
<
gpc_vertex_list
>
(
p
->
contour
);
gpc_free
<
int
>
(
p
->
hole
);
/* Update the polygon information */
p
->
num_contours
++
;
p
->
hole
=
extended_hole
;
p
->
contour
=
extended_contour
;
}
// gpc_polygon_clip
void
gpc_polygon_clip
(
gpc_op
op
,
gpc_polygon
*
subj
,
gpc_polygon
*
clip
,
gpc_polygon
*
result
)
{
sb_tree
*
sbtree
=
NULL
;
it_node
*
it
=
NULL
;
it_node
*
intersect
=
NULL
;
edge_node
*
edge
=
NULL
;
edge_node
*
prev_edge
=
NULL
;
edge_node
*
next_edge
=
NULL
;
edge_node
*
succ_edge
=
NULL
;
edge_node
*
e0
=
NULL
;
edge_node
*
e1
=
NULL
;
edge_node
*
aet
=
NULL
;
edge_node
*
c_heap
=
NULL
;
edge_node
*
s_heap
=
NULL
;
lmt_node
*
lmt
=
NULL
;
lmt_node
*
local_min
=
NULL
;
polygon_node
*
out_poly
=
NULL
;
polygon_node
*
p
=
NULL
;
polygon_node
*
q
=
NULL
;
polygon_node
*
poly
=
NULL
;
polygon_node
*
npoly
=
NULL
;
polygon_node
*
cf
=
NULL
;
vertex_node
*
vtx
=
NULL
;
vertex_node
*
nv
=
NULL
;
h_state
horiz
[
2
];
int
in
[
2
];
int
exists
[
2
];
int
parity
[
2
]
=
{
LEFT
,
LEFT
};
int
c
=
0
;
int
v
=
0
;
int
contributing
=
0
;
int
search
=
0
;
int
scanbeam
=
0
;
int
sbt_entries
=
0
;
int
vclass
=
0
;
int
bl
=
0
;
int
br
=
0
;
int
tl
=
0
;
int
tr
=
0
;
double
*
sbt
=
NULL
;
double
xb
=
0.0
;
double
px
=
0.0
;
double
yb
=
0.0
;
double
yt
=
0.0
;
double
dy
=
0.0
;
double
ix
=
0.0
;
double
iy
=
0.0
;
/* Test for trivial NULL result cases */
if
(((
subj
->
num_contours
==
0
)
&&
(
clip
->
num_contours
==
0
))
||
((
subj
->
num_contours
==
0
)
&&
((
op
==
GPC_INT
)
||
(
op
==
GPC_DIFF
)))
||
((
clip
->
num_contours
==
0
)
&&
(
op
==
GPC_INT
)))
{
result
->
num_contours
=
0
;
result
->
hole
=
NULL
;
result
->
contour
=
NULL
;
return
;
}
/* Identify potentialy contributing contours */
if
(((
op
==
GPC_INT
)
||
(
op
==
GPC_DIFF
))
&&
(
subj
->
num_contours
>
0
)
&&
(
clip
->
num_contours
>
0
))
{
minimax_test
(
subj
,
clip
,
op
);
}
/* Build LMT */
if
(
subj
->
num_contours
>
0
)
{
s_heap
=
build_lmt
(
&
lmt
,
&
sbtree
,
&
sbt_entries
,
subj
,
SUBJ
,
op
);
}
if
(
clip
->
num_contours
>
0
)
{
c_heap
=
build_lmt
(
&
lmt
,
&
sbtree
,
&
sbt_entries
,
clip
,
CLIP
,
op
);
}
/* Return a NULL result if no contours contribute */
if
(
lmt
==
NULL
)
{
result
->
num_contours
=
0
;
result
->
hole
=
NULL
;
result
->
contour
=
NULL
;
reset_lmt
(
&
lmt
);
gpc_free
<
edge_node
>
(
s_heap
);
gpc_free
<
edge_node
>
(
c_heap
);
return
;
}
/* Build scanbeam table from scanbeam tree */
gpc_malloc
<
double
>
(
sbt
,
sbt_entries
*
sizeof
(
double
),
const_cast
<
char
*>
(
"sbt creation"
));
build_sbt
(
&
scanbeam
,
sbt
,
sbtree
);
scanbeam
=
0
;
free_sbtree
(
&
sbtree
);
/* Allow pointer re-use without causing memory leak */
if
(
subj
==
result
)
{
gpc_free_polygon
(
subj
);
}
if
(
clip
==
result
)
{
gpc_free_polygon
(
clip
);
}
/* Invert clip polygon for difference operation */
if
(
op
==
GPC_DIFF
)
{
parity
[
CLIP
]
=
RIGHT
;
}
local_min
=
lmt
;
// Process each scanbeam
while
(
scanbeam
<
sbt_entries
)
{
/* Set yb and yt to the bottom and top of the scanbeam */
yb
=
sbt
[
scanbeam
++
];
if
(
scanbeam
<
sbt_entries
)
{
yt
=
sbt
[
scanbeam
];
dy
=
yt
-
yb
;
}
/* === SCANBEAM BOUNDARY PROCESSING ================================ */
/* If LMT node corresponding to yb exists */
if
(
local_min
)
{
if
(
local_min
->
y
==
yb
)
{
/* Add edges starting at this local minimum to the AET */
for
(
edge
=
local_min
->
first_bound
;
edge
;
edge
=
edge
->
next_bound
)
{
add_edge_to_aet
(
&
aet
,
edge
,
NULL
);
}
local_min
=
local_min
->
next
;
}
}
/* Set dummy previous x value */
px
=
-
DBL_MAX
;
/* Create bundles within AET */
e0
=
aet
;
e1
=
aet
;
/* Set up bundle fields of first edge */
aet
->
bundle
[
ABOVE
][
aet
->
type
]
=
(
aet
->
top
.
y
!=
yb
);
aet
->
bundle
[
ABOVE
][
!
aet
->
type
]
=
0
;
aet
->
bstate
[
ABOVE
]
=
UNBUNDLED
;
for
(
next_edge
=
aet
->
next
;
next_edge
;
next_edge
=
next_edge
->
next
)
{
/* Set up bundle fields of next edge */
next_edge
->
bundle
[
ABOVE
][
next_edge
->
type
]
=
(
next_edge
->
top
.
y
!=
yb
);
next_edge
->
bundle
[
ABOVE
][
!
next_edge
->
type
]
=
0
;
next_edge
->
bstate
[
ABOVE
]
=
UNBUNDLED
;
/* Bundle edges above the scanbeam boundary if they coincide */
if
(
next_edge
->
bundle
[
ABOVE
][
next_edge
->
type
])
{
if
(
gpc_eq
(
e0
->
xb
,
next_edge
->
xb
)
&&
gpc_eq
(
e0
->
dx
,
next_edge
->
dx
)
&&
(
e0
->
top
.
y
!=
yb
))
{
next_edge
->
bundle
[
ABOVE
][
next_edge
->
type
]
^=
e0
->
bundle
[
ABOVE
][
next_edge
->
type
];
next_edge
->
bundle
[
ABOVE
][
!
next_edge
->
type
]
=
e0
->
bundle
[
ABOVE
][
!
next_edge
->
type
];
next_edge
->
bstate
[
ABOVE
]
=
BUNDLE_HEAD
;
e0
->
bundle
[
ABOVE
][
CLIP
]
=
0
;
e0
->
bundle
[
ABOVE
][
SUBJ
]
=
0
;
e0
->
bstate
[
ABOVE
]
=
BUNDLE_TAIL
;
}
e0
=
next_edge
;
}
}
horiz
[
CLIP
]
=
NH
;
horiz
[
SUBJ
]
=
NH
;
// Process each edge at this scanbeam boundary
for
(
edge
=
aet
;
edge
;
edge
=
edge
->
next
)
{
exists
[
CLIP
]
=
edge
->
bundle
[
ABOVE
][
CLIP
]
+
(
edge
->
bundle
[
BELOW
][
CLIP
]
<<
1
);
exists
[
SUBJ
]
=
edge
->
bundle
[
ABOVE
][
SUBJ
]
+
(
edge
->
bundle
[
BELOW
][
SUBJ
]
<<
1
);
if
(
exists
[
CLIP
]
||
exists
[
SUBJ
])
{
/* Set bundle side */
edge
->
bside
[
CLIP
]
=
parity
[
CLIP
];
edge
->
bside
[
SUBJ
]
=
parity
[
SUBJ
];
/* Determine contributing status and quadrant occupancies */
switch
(
op
)
{
case
GPC_DIFF
:
case
GPC_INT
:
contributing
=
(
exists
[
CLIP
]
&&
(
parity
[
SUBJ
]
||
horiz
[
SUBJ
]))
||
(
exists
[
SUBJ
]
&&
(
parity
[
CLIP
]
||
horiz
[
CLIP
]))
||
(
exists
[
CLIP
]
&&
exists
[
SUBJ
]
&&
(
parity
[
CLIP
]
==
parity
[
SUBJ
]));
br
=
(
parity
[
CLIP
])
&&
(
parity
[
SUBJ
]);
bl
=
(
parity
[
CLIP
]
^
edge
->
bundle
[
ABOVE
][
CLIP
])
&&
(
parity
[
SUBJ
]
^
edge
->
bundle
[
ABOVE
][
SUBJ
]);
tr
=
(
parity
[
CLIP
]
^
(
horiz
[
CLIP
]
!=
NH
))
&&
(
parity
[
SUBJ
]
^
(
horiz
[
SUBJ
]
!=
NH
));
tl
=
(
parity
[
CLIP
]
^
(
horiz
[
CLIP
]
!=
NH
)
^
edge
->
bundle
[
BELOW
][
CLIP
])
&&
(
parity
[
SUBJ
]
^
(
horiz
[
SUBJ
]
!=
NH
)
^
edge
->
bundle
[
BELOW
][
SUBJ
]);
break
;
case
GPC_XOR
:
contributing
=
exists
[
CLIP
]
||
exists
[
SUBJ
];
br
=
(
parity
[
CLIP
])
^
(
parity
[
SUBJ
]);
bl
=
(
parity
[
CLIP
]
^
edge
->
bundle
[
ABOVE
][
CLIP
])
^
(
parity
[
SUBJ
]
^
edge
->
bundle
[
ABOVE
][
SUBJ
]);
tr
=
(
parity
[
CLIP
]
^
(
horiz
[
CLIP
]
!=
NH
))
^
(
parity
[
SUBJ
]
^
(
horiz
[
SUBJ
]
!=
NH
));
tl
=
(
parity
[
CLIP
]
^
(
horiz
[
CLIP
]
!=
NH
)
^
edge
->
bundle
[
BELOW
][
CLIP
])
^
(
parity
[
SUBJ
]
^
(
horiz
[
SUBJ
]
!=
NH
)
^
edge
->
bundle
[
BELOW
][
SUBJ
]);
break
;
case
GPC_UNION
:
contributing
=
(
exists
[
CLIP
]
&&
(
!
parity
[
SUBJ
]
||
horiz
[
SUBJ
]))
||
(
exists
[
SUBJ
]
&&
(
!
parity
[
CLIP
]
||
horiz
[
CLIP
]))
||
(
exists
[
CLIP
]
&&
exists
[
SUBJ
]
&&
(
parity
[
CLIP
]
==
parity
[
SUBJ
]));
br
=
(
parity
[
CLIP
])
||
(
parity
[
SUBJ
]);
bl
=
(
parity
[
CLIP
]
^
edge
->
bundle
[
ABOVE
][
CLIP
])
||
(
parity
[
SUBJ
]
^
edge
->
bundle
[
ABOVE
][
SUBJ
]);
tr
=
(
parity
[
CLIP
]
^
(
horiz
[
CLIP
]
!=
NH
))
||
(
parity
[
SUBJ
]
^
(
horiz
[
SUBJ
]
!=
NH
));
tl
=
(
parity
[
CLIP
]
^
(
horiz
[
CLIP
]
!=
NH
)
^
edge
->
bundle
[
BELOW
][
CLIP
])
||
(
parity
[
SUBJ
]
^
(
horiz
[
SUBJ
]
!=
NH
)
^
edge
->
bundle
[
BELOW
][
SUBJ
]);
break
;
}
// Update parity
parity
[
CLIP
]
^=
edge
->
bundle
[
ABOVE
][
CLIP
];
parity
[
SUBJ
]
^=
edge
->
bundle
[
ABOVE
][
SUBJ
];
/* Update horizontal state */
if
(
exists
[
CLIP
])
{
horiz
[
CLIP
]
=
next_h_state
[
horiz
[
CLIP
]]
[((
exists
[
CLIP
]
-
1
)
<<
1
)
+
parity
[
CLIP
]];
}
if
(
exists
[
SUBJ
])
{
horiz
[
SUBJ
]
=
next_h_state
[
horiz
[
SUBJ
]]
[((
exists
[
SUBJ
]
-
1
)
<<
1
)
+
parity
[
SUBJ
]];
}
vclass
=
tr
+
(
tl
<<
1
)
+
(
br
<<
2
)
+
(
bl
<<
3
);
if
(
contributing
)
{
xb
=
edge
->
xb
;
switch
(
vclass
)
{
case
EMN
:
case
IMN
:
add_local_min
(
&
out_poly
,
edge
,
xb
,
yb
);
px
=
xb
;
cf
=
edge
->
outp
[
ABOVE
];
break
;
case
ERI
:
if
(
xb
!=
px
)
{
add_right
(
cf
,
xb
,
yb
);
px
=
xb
;
}
edge
->
outp
[
ABOVE
]
=
cf
;
cf
=
NULL
;
break
;
case
ELI
:
add_left
(
edge
->
outp
[
BELOW
],
xb
,
yb
);
px
=
xb
;
cf
=
edge
->
outp
[
BELOW
];
break
;
case
EMX
:
if
(
xb
!=
px
)
{
add_left
(
cf
,
xb
,
yb
);
px
=
xb
;
}
merge_right
(
cf
,
edge
->
outp
[
BELOW
],
out_poly
);
cf
=
NULL
;
break
;
case
ILI
:
if
(
xb
!=
px
)
{
add_left
(
cf
,
xb
,
yb
);
px
=
xb
;
}
edge
->
outp
[
ABOVE
]
=
cf
;
cf
=
NULL
;
break
;
case
IRI
:
add_right
(
edge
->
outp
[
BELOW
],
xb
,
yb
);
px
=
xb
;
cf
=
edge
->
outp
[
BELOW
];
edge
->
outp
[
BELOW
]
=
NULL
;
break
;
case
IMX
:
if
(
xb
!=
px
)
{
add_right
(
cf
,
xb
,
yb
);
px
=
xb
;
}
merge_left
(
cf
,
edge
->
outp
[
BELOW
],
out_poly
);
cf
=
NULL
;
edge
->
outp
[
BELOW
]
=
NULL
;
break
;
case
IMM
:
if
(
xb
!=
px
)
{
add_right
(
cf
,
xb
,
yb
);
px
=
xb
;
}
merge_left
(
cf
,
edge
->
outp
[
BELOW
],
out_poly
);
edge
->
outp
[
BELOW
]
=
NULL
;
add_local_min
(
&
out_poly
,
edge
,
xb
,
yb
);
cf
=
edge
->
outp
[
ABOVE
];
break
;
case
EMM
:
if
(
xb
!=
px
)
{
add_left
(
cf
,
xb
,
yb
);
px
=
xb
;
}
merge_right
(
cf
,
edge
->
outp
[
BELOW
],
out_poly
);
edge
->
outp
[
BELOW
]
=
NULL
;
add_local_min
(
&
out_poly
,
edge
,
xb
,
yb
);
cf
=
edge
->
outp
[
ABOVE
];
break
;
case
LED
:
if
(
edge
->
bot
.
y
==
yb
)
{
add_left
(
edge
->
outp
[
BELOW
],
xb
,
yb
);
}
edge
->
outp
[
ABOVE
]
=
edge
->
outp
[
BELOW
];
px
=
xb
;
break
;
case
RED
:
if
(
edge
->
bot
.
y
==
yb
)
{
add_right
(
edge
->
outp
[
BELOW
],
xb
,
yb
);
}
edge
->
outp
[
ABOVE
]
=
edge
->
outp
[
BELOW
];
px
=
xb
;
break
;
default:
break
;
}
/* End of switch */
}
/* End of contributing conditional */
}
/* End of edge exists conditional */
}
// End of AET loop
/* Delete terminating edges from the AET, otherwise compute xt */
for
(
edge
=
aet
;
edge
;
edge
=
edge
->
next
)
{
if
(
edge
->
top
.
y
==
yb
)
{
prev_edge
=
edge
->
prev
;
next_edge
=
edge
->
next
;
if
(
prev_edge
)
{
prev_edge
->
next
=
next_edge
;
}
else
{
aet
=
next_edge
;
}
if
(
next_edge
)
{
next_edge
->
prev
=
prev_edge
;
}
/* Copy bundle head state to the adjacent tail edge if required */
if
((
edge
->
bstate
[
BELOW
]
==
BUNDLE_HEAD
)
&&
prev_edge
)
{
if
(
prev_edge
->
bstate
[
BELOW
]
==
BUNDLE_TAIL
)
{
prev_edge
->
outp
[
BELOW
]
=
edge
->
outp
[
BELOW
];
prev_edge
->
bstate
[
BELOW
]
=
UNBUNDLED
;
if
(
prev_edge
->
prev
)
{
if
(
prev_edge
->
prev
->
bstate
[
BELOW
]
==
BUNDLE_TAIL
)
{
prev_edge
->
bstate
[
BELOW
]
=
BUNDLE_HEAD
;
}
}
}
}
}
else
{
if
(
edge
->
top
.
y
==
yt
)
{
edge
->
xt
=
edge
->
top
.
x
;
}
else
{
edge
->
xt
=
edge
->
bot
.
x
+
edge
->
dx
*
(
yt
-
edge
->
bot
.
y
);
}
}
}
if
(
scanbeam
<
sbt_entries
)
{
/* === SCANBEAM INTERIOR PROCESSING ============================== */
build_intersection_table
(
&
it
,
aet
,
dy
);
/* Process each node in the intersection table */
for
(
intersect
=
it
;
intersect
;
intersect
=
intersect
->
next
)
{
e0
=
intersect
->
ie
[
0
];
e1
=
intersect
->
ie
[
1
];
/* Only generate output for contributing intersections */
if
((
e0
->
bundle
[
ABOVE
][
CLIP
]
||
e0
->
bundle
[
ABOVE
][
SUBJ
])
&&
(
e1
->
bundle
[
ABOVE
][
CLIP
]
||
e1
->
bundle
[
ABOVE
][
SUBJ
]))
{
p
=
e0
->
outp
[
ABOVE
];
q
=
e1
->
outp
[
ABOVE
];
ix
=
intersect
->
point
.
x
;
iy
=
intersect
->
point
.
y
+
yb
;
in
[
CLIP
]
=
(
e0
->
bundle
[
ABOVE
][
CLIP
]
&&
!
e0
->
bside
[
CLIP
])
||
(
e1
->
bundle
[
ABOVE
][
CLIP
]
&&
e1
->
bside
[
CLIP
])
||
(
!
e0
->
bundle
[
ABOVE
][
CLIP
]
&&
!
e1
->
bundle
[
ABOVE
][
CLIP
]
&&
e0
->
bside
[
CLIP
]
&&
e1
->
bside
[
CLIP
]);
in
[
SUBJ
]
=
(
e0
->
bundle
[
ABOVE
][
SUBJ
]
&&
!
e0
->
bside
[
SUBJ
])
||
(
e1
->
bundle
[
ABOVE
][
SUBJ
]
&&
e1
->
bside
[
SUBJ
])
||
(
!
e0
->
bundle
[
ABOVE
][
SUBJ
]
&&
!
e1
->
bundle
[
ABOVE
][
SUBJ
]
&&
e0
->
bside
[
SUBJ
]
&&
e1
->
bside
[
SUBJ
]);
// Determine quadrant occupancies
switch
(
op
)
{
case
GPC_DIFF
:
case
GPC_INT
:
tr
=
(
in
[
CLIP
])
&&
(
in
[
SUBJ
]);
tl
=
(
in
[
CLIP
]
^
e1
->
bundle
[
ABOVE
][
CLIP
])
&&
(
in
[
SUBJ
]
^
e1
->
bundle
[
ABOVE
][
SUBJ
]);
br
=
(
in
[
CLIP
]
^
e0
->
bundle
[
ABOVE
][
CLIP
])
&&
(
in
[
SUBJ
]
^
e0
->
bundle
[
ABOVE
][
SUBJ
]);
bl
=
(
in
[
CLIP
]
^
e1
->
bundle
[
ABOVE
][
CLIP
]
^
e0
->
bundle
[
ABOVE
][
CLIP
])
&&
(
in
[
SUBJ
]
^
e1
->
bundle
[
ABOVE
][
SUBJ
]
^
e0
->
bundle
[
ABOVE
][
SUBJ
]);
break
;
case
GPC_XOR
:
tr
=
(
in
[
CLIP
])
^
(
in
[
SUBJ
]);
tl
=
(
in
[
CLIP
]
^
e1
->
bundle
[
ABOVE
][
CLIP
])
^
(
in
[
SUBJ
]
^
e1
->
bundle
[
ABOVE
][
SUBJ
]);
br
=
(
in
[
CLIP
]
^
e0
->
bundle
[
ABOVE
][
CLIP
])
^
(
in
[
SUBJ
]
^
e0
->
bundle
[
ABOVE
][
SUBJ
]);
bl
=
(
in
[
CLIP
]
^
e1
->
bundle
[
ABOVE
][
CLIP
]
^
e0
->
bundle
[
ABOVE
][
CLIP
])
^
(
in
[
SUBJ
]
^
e1
->
bundle
[
ABOVE
][
SUBJ
]
^
e0
->
bundle
[
ABOVE
][
SUBJ
]);
break
;
case
GPC_UNION
:
tr
=
(
in
[
CLIP
])
||
(
in
[
SUBJ
]);
tl
=
(
in
[
CLIP
]
^
e1
->
bundle
[
ABOVE
][
CLIP
])
||
(
in
[
SUBJ
]
^
e1
->
bundle
[
ABOVE
][
SUBJ
]);
br
=
(
in
[
CLIP
]
^
e0
->
bundle
[
ABOVE
][
CLIP
])
||
(
in
[
SUBJ
]
^
e0
->
bundle
[
ABOVE
][
SUBJ
]);
bl
=
(
in
[
CLIP
]
^
e1
->
bundle
[
ABOVE
][
CLIP
]
^
e0
->
bundle
[
ABOVE
][
CLIP
])
||
(
in
[
SUBJ
]
^
e1
->
bundle
[
ABOVE
][
SUBJ
]
^
e0
->
bundle
[
ABOVE
][
SUBJ
]);
break
;
}
vclass
=
tr
+
(
tl
<<
1
)
+
(
br
<<
2
)
+
(
bl
<<
3
);
switch
(
vclass
)
{
case
EMN
:
add_local_min
(
&
out_poly
,
e0
,
ix
,
iy
);
e1
->
outp
[
ABOVE
]
=
e0
->
outp
[
ABOVE
];
break
;
case
ERI
:
if
(
p
)
{
add_right
(
p
,
ix
,
iy
);
e1
->
outp
[
ABOVE
]
=
p
;
e0
->
outp
[
ABOVE
]
=
NULL
;
}
break
;
case
ELI
:
if
(
q
)
{
add_left
(
q
,
ix
,
iy
);
e0
->
outp
[
ABOVE
]
=
q
;
e1
->
outp
[
ABOVE
]
=
NULL
;
}
break
;
case
EMX
:
if
(
p
&&
q
)
{
add_left
(
p
,
ix
,
iy
);
merge_right
(
p
,
q
,
out_poly
);
e0
->
outp
[
ABOVE
]
=
NULL
;
e1
->
outp
[
ABOVE
]
=
NULL
;
}
break
;
case
IMN
:
add_local_min
(
&
out_poly
,
e0
,
ix
,
iy
);
e1
->
outp
[
ABOVE
]
=
e0
->
outp
[
ABOVE
];
break
;
case
ILI
:
if
(
p
)
{
add_left
(
p
,
ix
,
iy
);
e1
->
outp
[
ABOVE
]
=
p
;
e0
->
outp
[
ABOVE
]
=
NULL
;
}
break
;
case
IRI
:
if
(
q
)
{
add_right
(
q
,
ix
,
iy
);
e0
->
outp
[
ABOVE
]
=
q
;
e1
->
outp
[
ABOVE
]
=
NULL
;
}
break
;
case
IMX
:
if
(
p
&&
q
)
{
add_right
(
p
,
ix
,
iy
);
merge_left
(
p
,
q
,
out_poly
);
e0
->
outp
[
ABOVE
]
=
NULL
;
e1
->
outp
[
ABOVE
]
=
NULL
;
}
break
;
case
IMM
:
if
(
p
&&
q
)
{
add_right
(
p
,
ix
,
iy
);
merge_left
(
p
,
q
,
out_poly
);
add_local_min
(
&
out_poly
,
e0
,
ix
,
iy
);
e1
->
outp
[
ABOVE
]
=
e0
->
outp
[
ABOVE
];
}
break
;
case
EMM
:
if
(
p
&&
q
)
{
add_left
(
p
,
ix
,
iy
);
merge_right
(
p
,
q
,
out_poly
);
add_local_min
(
&
out_poly
,
e0
,
ix
,
iy
);
e1
->
outp
[
ABOVE
]
=
e0
->
outp
[
ABOVE
];
}
break
;
default:
break
;
}
// End of switch
}
/* End of contributing intersection conditional */
/* Swap bundle sides in response to edge crossing */
if
(
e0
->
bundle
[
ABOVE
][
CLIP
])
{
e1
->
bside
[
CLIP
]
=
!
e1
->
bside
[
CLIP
];
}
if
(
e1
->
bundle
[
ABOVE
][
CLIP
])
{
e0
->
bside
[
CLIP
]
=
!
e0
->
bside
[
CLIP
];
}
if
(
e0
->
bundle
[
ABOVE
][
SUBJ
])
{
e1
->
bside
[
SUBJ
]
=
!
e1
->
bside
[
SUBJ
];
}
if
(
e1
->
bundle
[
ABOVE
][
SUBJ
])
{
e0
->
bside
[
SUBJ
]
=
!
e0
->
bside
[
SUBJ
];
}
/* Swap e0 and e1 bundles in the AET */
prev_edge
=
e0
->
prev
;
next_edge
=
e1
->
next
;
if
(
next_edge
)
{
next_edge
->
prev
=
e0
;
}
if
(
e0
->
bstate
[
ABOVE
]
==
BUNDLE_HEAD
)
{
search
=
1
;
while
(
search
)
{
prev_edge
=
prev_edge
->
prev
;
if
(
prev_edge
)
{
if
(
prev_edge
->
bstate
[
ABOVE
]
!=
BUNDLE_TAIL
)
{
search
=
0
;
}
}
else
{
search
=
0
;
}
}
}
if
(
!
prev_edge
)
{
aet
->
prev
=
e1
;
e1
->
next
=
aet
;
aet
=
e0
->
next
;
}
else
{
prev_edge
->
next
->
prev
=
e1
;
e1
->
next
=
prev_edge
->
next
;
prev_edge
->
next
=
e0
->
next
;
}
e0
->
next
->
prev
=
prev_edge
;
e1
->
next
->
prev
=
e1
;
e0
->
next
=
next_edge
;
}
/* End of IT loop*/
// Prepare for next scanbeam
for
(
edge
=
aet
;
edge
;
edge
=
next_edge
)
{
next_edge
=
edge
->
next
;
succ_edge
=
edge
->
succ
;
if
((
edge
->
top
.
y
==
yt
)
&&
succ_edge
)
{
/* Replace AET edge by its successor */
succ_edge
->
outp
[
BELOW
]
=
edge
->
outp
[
ABOVE
];
succ_edge
->
bstate
[
BELOW
]
=
edge
->
bstate
[
ABOVE
];
succ_edge
->
bundle
[
BELOW
][
CLIP
]
=
edge
->
bundle
[
ABOVE
][
CLIP
];
succ_edge
->
bundle
[
BELOW
][
SUBJ
]
=
edge
->
bundle
[
ABOVE
][
SUBJ
];
prev_edge
=
edge
->
prev
;
if
(
prev_edge
)
{
prev_edge
->
next
=
succ_edge
;
}
else
{
aet
=
succ_edge
;
}
if
(
next_edge
)
{
next_edge
->
prev
=
succ_edge
;
}
succ_edge
->
prev
=
prev_edge
;
succ_edge
->
next
=
next_edge
;
}
else
{
/* Update this edge */
edge
->
outp
[
BELOW
]
=
edge
->
outp
[
ABOVE
];
edge
->
bstate
[
BELOW
]
=
edge
->
bstate
[
ABOVE
];
edge
->
bundle
[
BELOW
][
CLIP
]
=
edge
->
bundle
[
ABOVE
][
CLIP
];
edge
->
bundle
[
BELOW
][
SUBJ
]
=
edge
->
bundle
[
ABOVE
][
SUBJ
];
edge
->
xb
=
edge
->
xt
;
}
edge
->
outp
[
ABOVE
]
=
NULL
;
}
}
}
/* === END OF SCANBEAM PROCESSING ================================== */
// Generate result polygon from out_poly
result
->
contour
=
NULL
;
result
->
hole
=
NULL
;
result
->
num_contours
=
count_contours
(
out_poly
);
if
(
result
->
num_contours
>
0
)
{
gpc_malloc
<
int
>
(
result
->
hole
,
result
->
num_contours
*
sizeof
(
int
),
const_cast
<
char
*>
(
"hole flag table creation"
));
gpc_malloc
<
gpc_vertex_list
>
(
result
->
contour
,
result
->
num_contours
*
sizeof
(
gpc_vertex_list
),
const_cast
<
char
*>
(
"contour creation"
));
c
=
0
;
for
(
poly
=
out_poly
;
poly
;
poly
=
npoly
)
{
npoly
=
poly
->
next
;
if
(
poly
->
active
)
{
result
->
hole
[
c
]
=
poly
->
proxy
->
hole
;
result
->
contour
[
c
].
num_vertices
=
poly
->
active
;
gpc_malloc
<
gpc_vertex
>
(
result
->
contour
[
c
].
vertex
,
result
->
contour
[
c
].
num_vertices
*
sizeof
(
gpc_vertex
),
const_cast
<
char
*>
(
"vertex creation"
));
v
=
result
->
contour
[
c
].
num_vertices
-
1
;
for
(
vtx
=
poly
->
proxy
->
v
[
LEFT
];
vtx
;
vtx
=
nv
)
{
nv
=
vtx
->
next
;
result
->
contour
[
c
].
vertex
[
v
].
x
=
vtx
->
x
;
result
->
contour
[
c
].
vertex
[
v
].
y
=
vtx
->
y
;
gpc_free
<
vertex_node
>
(
vtx
);
v
--
;
}
c
++
;
}
gpc_free
<
polygon_node
>
(
poly
);
}
}
else
{
for
(
poly
=
out_poly
;
poly
;
poly
=
npoly
)
{
npoly
=
poly
->
next
;
gpc_free
<
polygon_node
>
(
poly
);
}
}
// Tidy up
reset_it
(
&
it
);
reset_lmt
(
&
lmt
);
gpc_free
<
edge_node
>
(
c_heap
);
gpc_free
<
edge_node
>
(
s_heap
);
gpc_free
<
double
>
(
sbt
);
}
// NOLINT
void
gpc_free_tristrip
(
gpc_tristrip
*
t
)
{
int
s
=
0
;
for
(
s
=
0
;
s
<
t
->
num_strips
;
s
++
)
{
gpc_free
<
gpc_vertex
>
(
t
->
strip
[
s
].
vertex
);
}
gpc_free
<
gpc_vertex_list
>
(
t
->
strip
);
t
->
num_strips
=
0
;
}
void
gpc_polygon_to_tristrip
(
gpc_polygon
*
s
,
gpc_tristrip
*
t
)
{
gpc_polygon
c
;
c
.
num_contours
=
0
;
c
.
hole
=
NULL
;
c
.
contour
=
NULL
;
gpc_tristrip_clip
(
GPC_DIFF
,
s
,
&
c
,
t
);
}
// gpc_tristrip_clip
void
gpc_tristrip_clip
(
gpc_op
op
,
gpc_polygon
*
subj
,
gpc_polygon
*
clip
,
gpc_tristrip
*
result
)
{
sb_tree
*
sbtree
=
NULL
;
it_node
*
it
=
NULL
;
it_node
*
intersect
=
NULL
;
edge_node
*
edge
=
NULL
;
edge_node
*
prev_edge
=
NULL
;
edge_node
*
next_edge
=
NULL
;
edge_node
*
succ_edge
=
NULL
;
edge_node
*
e0
=
NULL
;
edge_node
*
e1
=
NULL
;
edge_node
*
aet
=
NULL
;
edge_node
*
c_heap
=
NULL
;
edge_node
*
s_heap
=
NULL
;
edge_node
*
cf
=
NULL
;
lmt_node
*
lmt
=
NULL
;
lmt_node
*
local_min
=
NULL
;
polygon_node
*
tlist
=
NULL
;
polygon_node
*
tn
=
NULL
;
polygon_node
*
tnn
=
NULL
;
polygon_node
*
p
=
NULL
;
polygon_node
*
q
=
NULL
;
vertex_node
*
lt
=
NULL
;
vertex_node
*
ltn
=
NULL
;
vertex_node
*
rt
=
NULL
;
vertex_node
*
rtn
=
NULL
;
h_state
horiz
[
2
];
vertex_type
cft
=
NUL
;
int
in
[
2
];
int
exists
[
2
];
int
parity
[
2
]
=
{
LEFT
,
LEFT
};
int
s
=
0
;
int
v
=
0
;
int
contributing
=
0
;
int
search
=
0
;
int
scanbeam
=
0
;
int
sbt_entries
=
0
;
int
vclass
=
0
;
int
bl
=
0
;
int
br
=
0
;
int
tl
=
0
;
int
tr
=
0
;
double
*
sbt
=
NULL
;
double
xb
=
0.0
;
double
px
=
0.0
;
double
nx
=
0.0
;
double
yb
=
0.0
;
double
yt
=
0.0
;
double
dy
=
0.0
;
double
ix
=
0.0
;
double
iy
=
0.0
;
/* Test for trivial NULL result cases */
if
(((
subj
->
num_contours
==
0
)
&&
(
clip
->
num_contours
==
0
))
||
((
subj
->
num_contours
==
0
)
&&
((
op
==
GPC_INT
)
||
(
op
==
GPC_DIFF
)))
||
((
clip
->
num_contours
==
0
)
&&
(
op
==
GPC_INT
)))
{
result
->
num_strips
=
0
;
result
->
strip
=
NULL
;
return
;
}
/* Identify potentialy contributing contours */
if
(((
op
==
GPC_INT
)
||
(
op
==
GPC_DIFF
))
&&
(
subj
->
num_contours
>
0
)
&&
(
clip
->
num_contours
>
0
))
{
minimax_test
(
subj
,
clip
,
op
);
}
/* Build LMT */
if
(
subj
->
num_contours
>
0
)
{
s_heap
=
build_lmt
(
&
lmt
,
&
sbtree
,
&
sbt_entries
,
subj
,
SUBJ
,
op
);
}
if
(
clip
->
num_contours
>
0
)
{
c_heap
=
build_lmt
(
&
lmt
,
&
sbtree
,
&
sbt_entries
,
clip
,
CLIP
,
op
);
}
/* Return a NULL result if no contours contribute */
if
(
lmt
==
NULL
)
{
result
->
num_strips
=
0
;
result
->
strip
=
NULL
;
reset_lmt
(
&
lmt
);
gpc_free
<
edge_node
>
(
s_heap
);
gpc_free
<
edge_node
>
(
c_heap
);
return
;
}
/* Build scanbeam table from scanbeam tree */
gpc_malloc
<
double
>
(
sbt
,
sbt_entries
*
sizeof
(
double
),
const_cast
<
char
*>
(
"sbt creation"
));
build_sbt
(
&
scanbeam
,
sbt
,
sbtree
);
scanbeam
=
0
;
free_sbtree
(
&
sbtree
);
/* Invert clip polygon for difference operation */
if
(
op
==
GPC_DIFF
)
{
parity
[
CLIP
]
=
RIGHT
;
}
local_min
=
lmt
;
// Process each scanbeam
while
(
scanbeam
<
sbt_entries
)
{
/* Set yb and yt to the bottom and top of the scanbeam */
yb
=
sbt
[
scanbeam
++
];
if
(
scanbeam
<
sbt_entries
)
{
yt
=
sbt
[
scanbeam
];
dy
=
yt
-
yb
;
}
/* === SCANBEAM BOUNDARY PROCESSING ================================ */
/* If LMT node corresponding to yb exists */
if
(
local_min
)
{
if
(
local_min
->
y
==
yb
)
{
/* Add edges starting at this local minimum to the AET */
for
(
edge
=
local_min
->
first_bound
;
edge
;
edge
=
edge
->
next_bound
)
{
add_edge_to_aet
(
&
aet
,
edge
,
NULL
);
}
local_min
=
local_min
->
next
;
}
}
/* Set dummy previous x value */
/* Create bundles within AET */
px
=
-
DBL_MAX
;
e0
=
aet
;
e1
=
aet
;
/* Set up bundle fields of first edge */
aet
->
bundle
[
ABOVE
][
aet
->
type
]
=
(
aet
->
top
.
y
!=
yb
);
aet
->
bundle
[
ABOVE
][
!
aet
->
type
]
=
0
;
aet
->
bstate
[
ABOVE
]
=
UNBUNDLED
;
for
(
next_edge
=
aet
->
next
;
next_edge
;
next_edge
=
next_edge
->
next
)
{
/* Set up bundle fields of next edge */
next_edge
->
bundle
[
ABOVE
][
next_edge
->
type
]
=
(
next_edge
->
top
.
y
!=
yb
);
next_edge
->
bundle
[
ABOVE
][
!
next_edge
->
type
]
=
0
;
next_edge
->
bstate
[
ABOVE
]
=
UNBUNDLED
;
/* Bundle edges above the scanbeam boundary if they coincide */
if
(
next_edge
->
bundle
[
ABOVE
][
next_edge
->
type
])
{
if
(
gpc_eq
(
e0
->
xb
,
next_edge
->
xb
)
&&
gpc_eq
(
e0
->
dx
,
next_edge
->
dx
)
&&
(
e0
->
top
.
y
!=
yb
))
{
next_edge
->
bundle
[
ABOVE
][
next_edge
->
type
]
^=
e0
->
bundle
[
ABOVE
][
next_edge
->
type
];
next_edge
->
bundle
[
ABOVE
][
!
next_edge
->
type
]
=
e0
->
bundle
[
ABOVE
][
!
next_edge
->
type
];
next_edge
->
bstate
[
ABOVE
]
=
BUNDLE_HEAD
;
e0
->
bundle
[
ABOVE
][
CLIP
]
=
0
;
e0
->
bundle
[
ABOVE
][
SUBJ
]
=
0
;
e0
->
bstate
[
ABOVE
]
=
BUNDLE_TAIL
;
}
e0
=
next_edge
;
}
}
horiz
[
CLIP
]
=
NH
;
horiz
[
SUBJ
]
=
NH
;
/* Process each edge at this scanbeam boundary */
for
(
edge
=
aet
;
edge
;
edge
=
edge
->
next
)
{
exists
[
CLIP
]
=
edge
->
bundle
[
ABOVE
][
CLIP
]
+
(
edge
->
bundle
[
BELOW
][
CLIP
]
<<
1
);
exists
[
SUBJ
]
=
edge
->
bundle
[
ABOVE
][
SUBJ
]
+
(
edge
->
bundle
[
BELOW
][
SUBJ
]
<<
1
);
if
(
exists
[
CLIP
]
||
exists
[
SUBJ
])
{
/* Set bundle side */
edge
->
bside
[
CLIP
]
=
parity
[
CLIP
];
edge
->
bside
[
SUBJ
]
=
parity
[
SUBJ
];
/* Determine contributing status and quadrant occupancies */
switch
(
op
)
{
case
GPC_DIFF
:
case
GPC_INT
:
contributing
=
(
exists
[
CLIP
]
&&
(
parity
[
SUBJ
]
||
horiz
[
SUBJ
]))
||
(
exists
[
SUBJ
]
&&
(
parity
[
CLIP
]
||
horiz
[
CLIP
]))
||
(
exists
[
CLIP
]
&&
exists
[
SUBJ
]
&&
(
parity
[
CLIP
]
==
parity
[
SUBJ
]));
br
=
(
parity
[
CLIP
])
&&
(
parity
[
SUBJ
]);
bl
=
(
parity
[
CLIP
]
^
edge
->
bundle
[
ABOVE
][
CLIP
])
&&
(
parity
[
SUBJ
]
^
edge
->
bundle
[
ABOVE
][
SUBJ
]);
tr
=
(
parity
[
CLIP
]
^
(
horiz
[
CLIP
]
!=
NH
))
&&
(
parity
[
SUBJ
]
^
(
horiz
[
SUBJ
]
!=
NH
));
tl
=
(
parity
[
CLIP
]
^
(
horiz
[
CLIP
]
!=
NH
)
^
edge
->
bundle
[
BELOW
][
CLIP
])
&&
(
parity
[
SUBJ
]
^
(
horiz
[
SUBJ
]
!=
NH
)
^
edge
->
bundle
[
BELOW
][
SUBJ
]);
break
;
case
GPC_XOR
:
contributing
=
exists
[
CLIP
]
||
exists
[
SUBJ
];
br
=
(
parity
[
CLIP
])
^
(
parity
[
SUBJ
]);
bl
=
(
parity
[
CLIP
]
^
edge
->
bundle
[
ABOVE
][
CLIP
])
^
(
parity
[
SUBJ
]
^
edge
->
bundle
[
ABOVE
][
SUBJ
]);
tr
=
(
parity
[
CLIP
]
^
(
horiz
[
CLIP
]
!=
NH
))
^
(
parity
[
SUBJ
]
^
(
horiz
[
SUBJ
]
!=
NH
));
tl
=
(
parity
[
CLIP
]
^
(
horiz
[
CLIP
]
!=
NH
)
^
edge
->
bundle
[
BELOW
][
CLIP
])
^
(
parity
[
SUBJ
]
^
(
horiz
[
SUBJ
]
!=
NH
)
^
edge
->
bundle
[
BELOW
][
SUBJ
]);
break
;
case
GPC_UNION
:
contributing
=
(
exists
[
CLIP
]
&&
(
!
parity
[
SUBJ
]
||
horiz
[
SUBJ
]))
||
(
exists
[
SUBJ
]
&&
(
!
parity
[
CLIP
]
||
horiz
[
CLIP
]))
||
(
exists
[
CLIP
]
&&
exists
[
SUBJ
]
&&
(
parity
[
CLIP
]
==
parity
[
SUBJ
]));
br
=
(
parity
[
CLIP
])
||
(
parity
[
SUBJ
]);
bl
=
(
parity
[
CLIP
]
^
edge
->
bundle
[
ABOVE
][
CLIP
])
||
(
parity
[
SUBJ
]
^
edge
->
bundle
[
ABOVE
][
SUBJ
]);
tr
=
(
parity
[
CLIP
]
^
(
horiz
[
CLIP
]
!=
NH
))
||
(
parity
[
SUBJ
]
^
(
horiz
[
SUBJ
]
!=
NH
));
tl
=
(
parity
[
CLIP
]
^
(
horiz
[
CLIP
]
!=
NH
)
^
edge
->
bundle
[
BELOW
][
CLIP
])
||
(
parity
[
SUBJ
]
^
(
horiz
[
SUBJ
]
!=
NH
)
^
edge
->
bundle
[
BELOW
][
SUBJ
]);
break
;
}
// Update parity
parity
[
CLIP
]
^=
edge
->
bundle
[
ABOVE
][
CLIP
];
parity
[
SUBJ
]
^=
edge
->
bundle
[
ABOVE
][
SUBJ
];
/* Update horizontal state */
if
(
exists
[
CLIP
])
{
horiz
[
CLIP
]
=
next_h_state
[
horiz
[
CLIP
]]
[((
exists
[
CLIP
]
-
1
)
<<
1
)
+
parity
[
CLIP
]];
}
if
(
exists
[
SUBJ
])
{
horiz
[
SUBJ
]
=
next_h_state
[
horiz
[
SUBJ
]]
[((
exists
[
SUBJ
]
-
1
)
<<
1
)
+
parity
[
SUBJ
]];
}
vclass
=
tr
+
(
tl
<<
1
)
+
(
br
<<
2
)
+
(
bl
<<
3
);
if
(
contributing
)
{
xb
=
edge
->
xb
;
switch
(
vclass
)
{
case
EMN
:
new_tristrip
(
&
tlist
,
edge
,
xb
,
yb
);
cf
=
edge
;
break
;
case
ERI
:
edge
->
outp
[
ABOVE
]
=
cf
->
outp
[
ABOVE
];
if
(
xb
!=
cf
->
xb
)
{
gpc_vertex_create
(
edge
,
ABOVE
,
RIGHT
,
xb
,
yb
);
}
cf
=
NULL
;
break
;
case
ELI
:
gpc_vertex_create
(
edge
,
BELOW
,
LEFT
,
xb
,
yb
);
edge
->
outp
[
ABOVE
]
=
NULL
;
cf
=
edge
;
break
;
case
EMX
:
if
(
xb
!=
cf
->
xb
)
{
gpc_vertex_create
(
edge
,
BELOW
,
RIGHT
,
xb
,
yb
);
}
edge
->
outp
[
ABOVE
]
=
NULL
;
cf
=
NULL
;
break
;
case
IMN
:
if
(
cft
==
LED
)
{
if
(
cf
->
bot
.
y
!=
yb
)
{
gpc_vertex_create
(
cf
,
BELOW
,
LEFT
,
cf
->
xb
,
yb
);
}
new_tristrip
(
&
tlist
,
cf
,
cf
->
xb
,
yb
);
}
edge
->
outp
[
ABOVE
]
=
cf
->
outp
[
ABOVE
];
gpc_vertex_create
(
edge
,
ABOVE
,
RIGHT
,
xb
,
yb
);
break
;
case
ILI
:
new_tristrip
(
&
tlist
,
edge
,
xb
,
yb
);
cf
=
edge
;
cft
=
ILI
;
break
;
case
IRI
:
if
(
cft
==
LED
)
{
if
(
cf
->
bot
.
y
!=
yb
)
{
gpc_vertex_create
(
cf
,
BELOW
,
LEFT
,
cf
->
xb
,
yb
);
}
new_tristrip
(
&
tlist
,
cf
,
cf
->
xb
,
yb
);
}
gpc_vertex_create
(
edge
,
BELOW
,
RIGHT
,
xb
,
yb
);
edge
->
outp
[
ABOVE
]
=
NULL
;
break
;
case
IMX
:
gpc_vertex_create
(
edge
,
BELOW
,
LEFT
,
xb
,
yb
);
edge
->
outp
[
ABOVE
]
=
NULL
;
cft
=
IMX
;
break
;
case
IMM
:
gpc_vertex_create
(
edge
,
BELOW
,
LEFT
,
xb
,
yb
);
edge
->
outp
[
ABOVE
]
=
cf
->
outp
[
ABOVE
];
if
(
xb
!=
cf
->
xb
)
{
gpc_vertex_create
(
cf
,
ABOVE
,
RIGHT
,
xb
,
yb
);
}
cf
=
edge
;
break
;
case
EMM
:
gpc_vertex_create
(
edge
,
BELOW
,
RIGHT
,
xb
,
yb
);
edge
->
outp
[
ABOVE
]
=
NULL
;
new_tristrip
(
&
tlist
,
edge
,
xb
,
yb
);
cf
=
edge
;
break
;
case
LED
:
if
(
edge
->
bot
.
y
==
yb
)
{
gpc_vertex_create
(
edge
,
BELOW
,
LEFT
,
xb
,
yb
);
}
edge
->
outp
[
ABOVE
]
=
edge
->
outp
[
BELOW
];
cf
=
edge
;
cft
=
LED
;
break
;
case
RED
:
edge
->
outp
[
ABOVE
]
=
cf
->
outp
[
ABOVE
];
if
(
cft
==
LED
)
{
if
(
cf
->
bot
.
y
==
yb
)
{
gpc_vertex_create
(
edge
,
BELOW
,
RIGHT
,
xb
,
yb
);
}
else
{
if
(
edge
->
bot
.
y
==
yb
)
{
gpc_vertex_create
(
cf
,
BELOW
,
LEFT
,
cf
->
xb
,
yb
);
gpc_vertex_create
(
edge
,
BELOW
,
RIGHT
,
xb
,
yb
);
}
}
}
else
{
gpc_vertex_create
(
edge
,
BELOW
,
RIGHT
,
xb
,
yb
);
gpc_vertex_create
(
edge
,
ABOVE
,
RIGHT
,
xb
,
yb
);
}
cf
=
NULL
;
break
;
default:
break
;
}
/* End of switch */
}
/* End of contributing conditional */
}
/* End of edge exists conditional */
}
// End of AET loop
/* Delete terminating edges from the AET, otherwise compute xt */
for
(
edge
=
aet
;
edge
;
edge
=
edge
->
next
)
{
if
(
edge
->
top
.
y
==
yb
)
{
prev_edge
=
edge
->
prev
;
next_edge
=
edge
->
next
;
if
(
prev_edge
)
{
prev_edge
->
next
=
next_edge
;
}
else
{
aet
=
next_edge
;
}
if
(
next_edge
)
{
next_edge
->
prev
=
prev_edge
;
}
/* Copy bundle head state to the adjacent tail edge if required */
if
((
edge
->
bstate
[
BELOW
]
==
BUNDLE_HEAD
)
&&
prev_edge
)
{
if
(
prev_edge
->
bstate
[
BELOW
]
==
BUNDLE_TAIL
)
{
prev_edge
->
outp
[
BELOW
]
=
edge
->
outp
[
BELOW
];
prev_edge
->
bstate
[
BELOW
]
=
UNBUNDLED
;
if
(
prev_edge
->
prev
)
{
if
(
prev_edge
->
prev
->
bstate
[
BELOW
]
==
BUNDLE_TAIL
)
{
prev_edge
->
bstate
[
BELOW
]
=
BUNDLE_HEAD
;
}
}
}
}
}
else
{
if
(
edge
->
top
.
y
==
yt
)
{
edge
->
xt
=
edge
->
top
.
x
;
}
else
{
edge
->
xt
=
edge
->
bot
.
x
+
edge
->
dx
*
(
yt
-
edge
->
bot
.
y
);
}
}
}
if
(
scanbeam
<
sbt_entries
)
{
/* === SCANBEAM INTERIOR PROCESSING ============================== */
build_intersection_table
(
&
it
,
aet
,
dy
);
/* Process each node in the intersection table */
for
(
intersect
=
it
;
intersect
;
intersect
=
intersect
->
next
)
{
e0
=
intersect
->
ie
[
0
];
e1
=
intersect
->
ie
[
1
];
/* Only generate output for contributing intersections */
if
((
e0
->
bundle
[
ABOVE
][
CLIP
]
||
e0
->
bundle
[
ABOVE
][
SUBJ
])
&&
(
e1
->
bundle
[
ABOVE
][
CLIP
]
||
e1
->
bundle
[
ABOVE
][
SUBJ
]))
{
p
=
e0
->
outp
[
ABOVE
];
q
=
e1
->
outp
[
ABOVE
];
ix
=
intersect
->
point
.
x
;
iy
=
intersect
->
point
.
y
+
yb
;
in
[
CLIP
]
=
(
e0
->
bundle
[
ABOVE
][
CLIP
]
&&
!
e0
->
bside
[
CLIP
])
||
(
e1
->
bundle
[
ABOVE
][
CLIP
]
&&
e1
->
bside
[
CLIP
])
||
(
!
e0
->
bundle
[
ABOVE
][
CLIP
]
&&
!
e1
->
bundle
[
ABOVE
][
CLIP
]
&&
e0
->
bside
[
CLIP
]
&&
e1
->
bside
[
CLIP
]);
in
[
SUBJ
]
=
(
e0
->
bundle
[
ABOVE
][
SUBJ
]
&&
!
e0
->
bside
[
SUBJ
])
||
(
e1
->
bundle
[
ABOVE
][
SUBJ
]
&&
e1
->
bside
[
SUBJ
])
||
(
!
e0
->
bundle
[
ABOVE
][
SUBJ
]
&&
!
e1
->
bundle
[
ABOVE
][
SUBJ
]
&&
e0
->
bside
[
SUBJ
]
&&
e1
->
bside
[
SUBJ
]);
switch
(
op
)
{
// Determine quadrant occupancies
case
GPC_DIFF
:
case
GPC_INT
:
tr
=
(
in
[
CLIP
])
&&
(
in
[
SUBJ
]);
tl
=
(
in
[
CLIP
]
^
e1
->
bundle
[
ABOVE
][
CLIP
])
&&
(
in
[
SUBJ
]
^
e1
->
bundle
[
ABOVE
][
SUBJ
]);
br
=
(
in
[
CLIP
]
^
e0
->
bundle
[
ABOVE
][
CLIP
])
&&
(
in
[
SUBJ
]
^
e0
->
bundle
[
ABOVE
][
SUBJ
]);
bl
=
(
in
[
CLIP
]
^
e1
->
bundle
[
ABOVE
][
CLIP
]
^
e0
->
bundle
[
ABOVE
][
CLIP
])
&&
(
in
[
SUBJ
]
^
e1
->
bundle
[
ABOVE
][
SUBJ
]
^
e0
->
bundle
[
ABOVE
][
SUBJ
]);
break
;
case
GPC_XOR
:
tr
=
(
in
[
CLIP
])
^
(
in
[
SUBJ
]);
tl
=
(
in
[
CLIP
]
^
e1
->
bundle
[
ABOVE
][
CLIP
])
^
(
in
[
SUBJ
]
^
e1
->
bundle
[
ABOVE
][
SUBJ
]);
br
=
(
in
[
CLIP
]
^
e0
->
bundle
[
ABOVE
][
CLIP
])
^
(
in
[
SUBJ
]
^
e0
->
bundle
[
ABOVE
][
SUBJ
]);
bl
=
(
in
[
CLIP
]
^
e1
->
bundle
[
ABOVE
][
CLIP
]
^
e0
->
bundle
[
ABOVE
][
CLIP
])
^
(
in
[
SUBJ
]
^
e1
->
bundle
[
ABOVE
][
SUBJ
]
^
e0
->
bundle
[
ABOVE
][
SUBJ
]);
break
;
case
GPC_UNION
:
tr
=
(
in
[
CLIP
])
||
(
in
[
SUBJ
]);
tl
=
(
in
[
CLIP
]
^
e1
->
bundle
[
ABOVE
][
CLIP
])
||
(
in
[
SUBJ
]
^
e1
->
bundle
[
ABOVE
][
SUBJ
]);
br
=
(
in
[
CLIP
]
^
e0
->
bundle
[
ABOVE
][
CLIP
])
||
(
in
[
SUBJ
]
^
e0
->
bundle
[
ABOVE
][
SUBJ
]);
bl
=
(
in
[
CLIP
]
^
e1
->
bundle
[
ABOVE
][
CLIP
]
^
e0
->
bundle
[
ABOVE
][
CLIP
])
||
(
in
[
SUBJ
]
^
e1
->
bundle
[
ABOVE
][
SUBJ
]
^
e0
->
bundle
[
ABOVE
][
SUBJ
]);
break
;
}
vclass
=
tr
+
(
tl
<<
1
)
+
(
br
<<
2
)
+
(
bl
<<
3
);
switch
(
vclass
)
{
case
EMN
:
new_tristrip
(
&
tlist
,
e1
,
ix
,
iy
);
e0
->
outp
[
ABOVE
]
=
e1
->
outp
[
ABOVE
];
break
;
case
ERI
:
if
(
p
)
{
gpc_p_edge
(
prev_edge
,
e0
,
ABOVE
);
gpc_vertex_create
(
prev_edge
,
ABOVE
,
LEFT
,
px
,
iy
);
gpc_vertex_create
(
e0
,
ABOVE
,
RIGHT
,
ix
,
iy
);
e1
->
outp
[
ABOVE
]
=
e0
->
outp
[
ABOVE
];
e0
->
outp
[
ABOVE
]
=
NULL
;
}
break
;
case
ELI
:
if
(
q
)
{
gpc_n_edge
(
next_edge
,
e1
,
ABOVE
);
gpc_vertex_create
(
e1
,
ABOVE
,
LEFT
,
ix
,
iy
);
gpc_vertex_create
(
next_edge
,
ABOVE
,
RIGHT
,
nx
,
iy
);
e0
->
outp
[
ABOVE
]
=
e1
->
outp
[
ABOVE
];
e1
->
outp
[
ABOVE
]
=
NULL
;
}
break
;
case
EMX
:
if
(
p
&&
q
)
{
gpc_vertex_create
(
e0
,
ABOVE
,
LEFT
,
ix
,
iy
);
e0
->
outp
[
ABOVE
]
=
NULL
;
e1
->
outp
[
ABOVE
]
=
NULL
;
}
break
;
case
IMN
:
gpc_p_edge
(
prev_edge
,
e0
,
ABOVE
);
gpc_vertex_create
(
prev_edge
,
ABOVE
,
LEFT
,
px
,
iy
);
gpc_n_edge
(
next_edge
,
e1
,
ABOVE
);
gpc_vertex_create
(
next_edge
,
ABOVE
,
RIGHT
,
nx
,
iy
);
new_tristrip
(
&
tlist
,
prev_edge
,
px
,
iy
);
e1
->
outp
[
ABOVE
]
=
prev_edge
->
outp
[
ABOVE
];
gpc_vertex_create
(
e1
,
ABOVE
,
RIGHT
,
ix
,
iy
);
new_tristrip
(
&
tlist
,
e0
,
ix
,
iy
);
next_edge
->
outp
[
ABOVE
]
=
e0
->
outp
[
ABOVE
];
gpc_vertex_create
(
next_edge
,
ABOVE
,
RIGHT
,
nx
,
iy
);
break
;
case
ILI
:
if
(
p
)
{
gpc_vertex_create
(
e0
,
ABOVE
,
LEFT
,
ix
,
iy
);
gpc_n_edge
(
next_edge
,
e1
,
ABOVE
);
gpc_vertex_create
(
next_edge
,
ABOVE
,
RIGHT
,
nx
,
iy
);
e1
->
outp
[
ABOVE
]
=
e0
->
outp
[
ABOVE
];
e0
->
outp
[
ABOVE
]
=
NULL
;
}
break
;
case
IRI
:
if
(
q
)
{
gpc_vertex_create
(
e1
,
ABOVE
,
RIGHT
,
ix
,
iy
);
gpc_p_edge
(
prev_edge
,
e0
,
ABOVE
);
gpc_vertex_create
(
prev_edge
,
ABOVE
,
LEFT
,
px
,
iy
);
e0
->
outp
[
ABOVE
]
=
e1
->
outp
[
ABOVE
];
e1
->
outp
[
ABOVE
]
=
NULL
;
}
break
;
case
IMX
:
if
(
p
&&
q
)
{
gpc_vertex_create
(
e0
,
ABOVE
,
RIGHT
,
ix
,
iy
);
gpc_vertex_create
(
e1
,
ABOVE
,
LEFT
,
ix
,
iy
);
e0
->
outp
[
ABOVE
]
=
NULL
;
e1
->
outp
[
ABOVE
]
=
NULL
;
gpc_p_edge
(
prev_edge
,
e0
,
ABOVE
);
gpc_vertex_create
(
prev_edge
,
ABOVE
,
LEFT
,
px
,
iy
);
new_tristrip
(
&
tlist
,
prev_edge
,
px
,
iy
);
gpc_n_edge
(
next_edge
,
e1
,
ABOVE
);
gpc_vertex_create
(
next_edge
,
ABOVE
,
RIGHT
,
nx
,
iy
);
next_edge
->
outp
[
ABOVE
]
=
prev_edge
->
outp
[
ABOVE
];
gpc_vertex_create
(
next_edge
,
ABOVE
,
RIGHT
,
nx
,
iy
);
}
break
;
case
IMM
:
if
(
p
&&
q
)
{
gpc_vertex_create
(
e0
,
ABOVE
,
RIGHT
,
ix
,
iy
);
gpc_vertex_create
(
e1
,
ABOVE
,
LEFT
,
ix
,
iy
);
gpc_p_edge
(
prev_edge
,
e0
,
ABOVE
);
gpc_vertex_create
(
prev_edge
,
ABOVE
,
LEFT
,
px
,
iy
);
new_tristrip
(
&
tlist
,
prev_edge
,
px
,
iy
);
gpc_n_edge
(
next_edge
,
e1
,
ABOVE
);
gpc_vertex_create
(
next_edge
,
ABOVE
,
RIGHT
,
nx
,
iy
);
e1
->
outp
[
ABOVE
]
=
prev_edge
->
outp
[
ABOVE
];
gpc_vertex_create
(
e1
,
ABOVE
,
RIGHT
,
ix
,
iy
);
new_tristrip
(
&
tlist
,
e0
,
ix
,
iy
);
next_edge
->
outp
[
ABOVE
]
=
e0
->
outp
[
ABOVE
];
gpc_vertex_create
(
next_edge
,
ABOVE
,
RIGHT
,
nx
,
iy
);
}
break
;
case
EMM
:
if
(
p
&&
q
)
{
gpc_vertex_create
(
e0
,
ABOVE
,
LEFT
,
ix
,
iy
);
new_tristrip
(
&
tlist
,
e1
,
ix
,
iy
);
e0
->
outp
[
ABOVE
]
=
e1
->
outp
[
ABOVE
];
}
break
;
default:
break
;
}
/* End of switch */
}
/* End of contributing intersection conditional */
// Swap bundle sides in response to edge crossing
if
(
e0
->
bundle
[
ABOVE
][
CLIP
])
{
e1
->
bside
[
CLIP
]
=
!
e1
->
bside
[
CLIP
];
}
if
(
e1
->
bundle
[
ABOVE
][
CLIP
])
{
e0
->
bside
[
CLIP
]
=
!
e0
->
bside
[
CLIP
];
}
if
(
e0
->
bundle
[
ABOVE
][
SUBJ
])
{
e1
->
bside
[
SUBJ
]
=
!
e1
->
bside
[
SUBJ
];
}
if
(
e1
->
bundle
[
ABOVE
][
SUBJ
])
{
e0
->
bside
[
SUBJ
]
=
!
e0
->
bside
[
SUBJ
];
}
/* Swap e0 and e1 bundles in the AET */
prev_edge
=
e0
->
prev
;
next_edge
=
e1
->
next
;
if
(
e1
->
next
)
{
e1
->
next
->
prev
=
e0
;
}
if
(
e0
->
bstate
[
ABOVE
]
==
BUNDLE_HEAD
)
{
search
=
1
;
while
(
search
)
{
prev_edge
=
prev_edge
->
prev
;
if
(
prev_edge
)
{
if
(
prev_edge
->
bundle
[
ABOVE
][
CLIP
]
||
prev_edge
->
bundle
[
ABOVE
][
SUBJ
]
||
(
prev_edge
->
bstate
[
ABOVE
]
==
BUNDLE_HEAD
))
{
search
=
0
;
}
}
else
{
search
=
0
;
}
}
}
if
(
!
prev_edge
)
{
e1
->
next
=
aet
;
aet
=
e0
->
next
;
}
else
{
e1
->
next
=
prev_edge
->
next
;
prev_edge
->
next
=
e0
->
next
;
}
e0
->
next
->
prev
=
prev_edge
;
e1
->
next
->
prev
=
e1
;
e0
->
next
=
next_edge
;
}
/* End of IT loop*/
/* Prepare for next scanbeam */
for
(
edge
=
aet
;
edge
;
edge
=
next_edge
)
{
next_edge
=
edge
->
next
;
succ_edge
=
edge
->
succ
;
if
((
edge
->
top
.
y
==
yt
)
&&
succ_edge
)
{
/* Replace AET edge by its successor */
succ_edge
->
outp
[
BELOW
]
=
edge
->
outp
[
ABOVE
];
succ_edge
->
bstate
[
BELOW
]
=
edge
->
bstate
[
ABOVE
];
succ_edge
->
bundle
[
BELOW
][
CLIP
]
=
edge
->
bundle
[
ABOVE
][
CLIP
];
succ_edge
->
bundle
[
BELOW
][
SUBJ
]
=
edge
->
bundle
[
ABOVE
][
SUBJ
];
prev_edge
=
edge
->
prev
;
if
(
prev_edge
)
{
prev_edge
->
next
=
succ_edge
;
}
else
{
aet
=
succ_edge
;
}
if
(
next_edge
)
{
next_edge
->
prev
=
succ_edge
;
}
succ_edge
->
prev
=
prev_edge
;
succ_edge
->
next
=
next_edge
;
}
else
{
/* Update this edge */
edge
->
outp
[
BELOW
]
=
edge
->
outp
[
ABOVE
];
edge
->
bstate
[
BELOW
]
=
edge
->
bstate
[
ABOVE
];
edge
->
bundle
[
BELOW
][
CLIP
]
=
edge
->
bundle
[
ABOVE
][
CLIP
];
edge
->
bundle
[
BELOW
][
SUBJ
]
=
edge
->
bundle
[
ABOVE
][
SUBJ
];
edge
->
xb
=
edge
->
xt
;
}
edge
->
outp
[
ABOVE
]
=
NULL
;
}
}
}
/* === END OF SCANBEAM PROCESSING ================================== */
// Generate result tristrip from tlist
result
->
strip
=
NULL
;
result
->
num_strips
=
count_tristrips
(
tlist
);
if
(
result
->
num_strips
>
0
)
{
gpc_malloc
<
gpc_vertex_list
>
(
result
->
strip
,
result
->
num_strips
*
sizeof
(
gpc_vertex_list
),
const_cast
<
char
*>
(
"tristrip list creation"
));
s
=
0
;
for
(
tn
=
tlist
;
tn
;
tn
=
tnn
)
{
tnn
=
tn
->
next
;
if
(
tn
->
active
>
2
)
{
/* Valid tristrip: copy the vertices and free the heap */
result
->
strip
[
s
].
num_vertices
=
tn
->
active
;
gpc_malloc
<
gpc_vertex
>
(
result
->
strip
[
s
].
vertex
,
tn
->
active
*
sizeof
(
gpc_vertex
),
const_cast
<
char
*>
(
"tristrip creation"
));
v
=
0
;
if
(
0
)
{
lt
=
tn
->
v
[
RIGHT
];
rt
=
tn
->
v
[
LEFT
];
}
else
{
lt
=
tn
->
v
[
LEFT
];
rt
=
tn
->
v
[
RIGHT
];
}
while
(
lt
||
rt
)
{
if
(
lt
)
{
ltn
=
lt
->
next
;
result
->
strip
[
s
].
vertex
[
v
].
x
=
lt
->
x
;
result
->
strip
[
s
].
vertex
[
v
].
y
=
lt
->
y
;
v
++
;
gpc_free
<
vertex_node
>
(
lt
);
lt
=
ltn
;
}
if
(
rt
)
{
rtn
=
rt
->
next
;
result
->
strip
[
s
].
vertex
[
v
].
x
=
rt
->
x
;
result
->
strip
[
s
].
vertex
[
v
].
y
=
rt
->
y
;
v
++
;
gpc_free
<
vertex_node
>
(
rt
);
rt
=
rtn
;
}
}
s
++
;
}
else
{
/* Invalid tristrip: just free the heap */
for
(
lt
=
tn
->
v
[
LEFT
];
lt
;
lt
=
ltn
)
{
ltn
=
lt
->
next
;
gpc_free
<
vertex_node
>
(
lt
);
}
for
(
rt
=
tn
->
v
[
RIGHT
];
rt
;
rt
=
rtn
)
{
rtn
=
rt
->
next
;
gpc_free
<
vertex_node
>
(
rt
);
}
}
gpc_free
<
polygon_node
>
(
tn
);
}
}
// Tidy up
reset_it
(
&
it
);
reset_lmt
(
&
lmt
);
gpc_free
<
edge_node
>
(
c_heap
);
gpc_free
<
edge_node
>
(
s_heap
);
gpc_free
<
double
>
(
sbt
);
}
// NOLINT
}
// namespace gpc
/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */
paddle/fluid/operators/detection/gpc.h
0 → 100644
浏览文件 @
049c9c7d
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/***************************************************************************
*
* Copyright (c) 2015 Baidu.com, Inc. All Rights Reserved
*
**************************************************************************/
/**
* @file include/gpc.h
* @author huhan02(com@baidu.com)
* @date 2015/12/18 13:52:10
* @brief
*
* @modified by sunyipeng
* @email sunyipeng@baidu.com
* @date 2018/6/12
**/
#ifndef PADDLE_FLUID_OPERATORS_DETECTION_GPC_H_ // GPC_H_
#define PADDLE_FLUID_OPERATORS_DETECTION_GPC_H_ // GPC_H_
#include <float.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
namespace
gpc
{
typedef
enum
{
// Set operation type
GPC_DIFF
,
// Difference
GPC_INT
,
// Intersection
GPC_XOR
,
// Exclusive or
GPC_UNION
// Union
}
gpc_op
;
typedef
struct
{
// Polygon vertex structure
double
x
;
// Vertex x component
double
y
;
// vertex y component
}
gpc_vertex
;
typedef
struct
{
// Vertex list structure
int
num_vertices
;
// Number of vertices in list
gpc_vertex
*
vertex
;
// Vertex array pointer
}
gpc_vertex_list
;
typedef
struct
{
// Polygon set structure
int
num_contours
;
// Number of contours in polygon
int
*
hole
;
// Hole external contour flags
gpc_vertex_list
*
contour
;
// Contour array pointer
}
gpc_polygon
;
typedef
struct
{
// Tristrip set structure
int
num_strips
;
// Number of tristrips
gpc_vertex_list
*
strip
;
// Tristrip array pointer
}
gpc_tristrip
;
typedef
enum
{
LEFT
,
RIGHT
}
gpc_left_right
;
typedef
enum
{
ABOVE
,
BELOW
}
gpc_above_below
;
typedef
enum
{
CLIP
,
SUBJ
}
gpc_clip_subj
;
typedef
enum
{
/* Edge intersection classes */
NUL
,
/* Empty non-intersection */
EMX
,
/* External maximum */
ELI
,
/* External left intermediate */
TED
,
/* Top edge */
ERI
,
/* External right intermediate */
RED
,
/* Right edge */
IMM
,
/* Internal maximum and minimum */
IMN
,
/* Internal minimum */
EMN
,
/* External minimum */
EMM
,
/* External maximum and minimum */
LED
,
/* Left edge */
ILI
,
/* Internal left intermediate */
BED
,
/* Bottom edge */
IRI
,
/* Internal right intermediate */
IMX
,
/* Internal maximum */
FUL
/* Full non-intersection */
}
vertex_type
;
typedef
enum
{
/* Horizontal edge states */
NH
,
/* No horizontal edge */
BH
,
/* Bottom horizontal edge */
TH
/* Top horizontal edge */
}
h_state
;
typedef
enum
{
/* Edge bundle state */
UNBUNDLED
,
/* Isolated edge not within a bundle */
BUNDLE_HEAD
,
/* Bundle head node */
BUNDLE_TAIL
/* Passive bundle tail node */
}
bundle_state
;
typedef
struct
v_shape
{
/* Internal vertex list datatype */
double
x
;
/* X coordinate component */
double
y
;
/* Y coordinate component */
struct
v_shape
*
next
;
/* Pointer to next vertex in list */
}
vertex_node
;
typedef
struct
p_shape
{
/* Internal contour / tristrip type */
int
active
;
/* Active flag / vertex count */
int
hole
;
/* Hole / external contour flag */
vertex_node
*
v
[
2
];
/* Left and right vertex list ptrs */
struct
p_shape
*
next
;
/* Pointer to next polygon contour */
struct
p_shape
*
proxy
;
/* Pointer to actual structure used */
}
polygon_node
;
typedef
struct
edge_shape
{
gpc_vertex
vertex
;
/* Piggy-backed contour vertex data */
gpc_vertex
bot
;
/* Edge lower (x, y) coordinate */
gpc_vertex
top
;
/* Edge upper (x, y) coordinate */
double
xb
;
/* Scanbeam bottom x coordinate */
double
xt
;
/* Scanbeam top x coordinate */
double
dx
;
/* Change in x for a unit y increase */
int
type
;
/* Clip / subject edge flag */
int
bundle
[
2
][
2
];
/* Bundle edge flags */
int
bside
[
2
];
/* Bundle left / right indicators */
bundle_state
bstate
[
2
];
/* Edge bundle state */
polygon_node
*
outp
[
2
];
/* Output polygon / tristrip pointer */
struct
edge_shape
*
prev
;
/* Previous edge in the AET */
struct
edge_shape
*
next
;
/* Next edge in the AET */
struct
edge_shape
*
pred
;
/* Edge connected at the lower end */
struct
edge_shape
*
succ
;
/* Edge connected at the upper end */
struct
edge_shape
*
next_bound
;
/* Pointer to next bound in LMT */
}
edge_node
;
inline
bool
gpc_eq
(
float
a
,
float
b
)
{
return
(
fabs
(
a
-
b
)
<=
1e-6
);
}
inline
bool
gpc_prev_index
(
float
a
,
float
b
)
{
return
(
fabs
(
a
-
b
)
<=
1e-6
);
}
inline
int
gpc_prev_index
(
int
i
,
int
n
)
{
return
((
i
-
1
+
n
)
%
n
);
}
inline
int
gpc_next_index
(
int
i
,
int
n
)
{
return
((
i
+
1
)
%
n
);
}
inline
int
gpc_optimal
(
gpc_vertex
*
v
,
int
i
,
int
n
)
{
return
(
v
[(
i
+
1
)
%
n
].
y
!=
v
[
i
].
y
||
v
[(
i
-
1
+
n
)
%
n
].
y
!=
v
[
i
].
y
);
}
inline
int
gpc_fwd_min
(
edge_node
*
v
,
int
i
,
int
n
)
{
return
(
v
[(
i
+
1
)
%
n
].
vertex
.
y
>
v
[
i
].
vertex
.
y
&&
v
[(
i
-
1
+
n
)
%
n
].
vertex
.
y
>=
v
[
i
].
vertex
.
y
);
}
inline
int
gpc_not_fmax
(
edge_node
*
v
,
int
i
,
int
n
)
{
return
(
v
[(
i
+
1
)
%
n
].
vertex
.
y
>
v
[
i
].
vertex
.
y
);
}
inline
int
gpc_rev_min
(
edge_node
*
v
,
int
i
,
int
n
)
{
return
(
v
[(
i
+
1
)
%
n
].
vertex
.
y
>=
v
[
i
].
vertex
.
y
&&
v
[(
i
-
1
+
n
)
%
n
].
vertex
.
y
>
v
[
i
].
vertex
.
y
);
}
inline
int
gpc_not_rmax
(
edge_node
*
v
,
int
i
,
int
n
)
{
return
(
v
[(
i
-
1
+
n
)
%
n
].
vertex
.
y
>
v
[
i
].
vertex
.
y
);
}
// inline void gpc_p_edge(edge_node *d, edge_node *e, int p, double i, double j)
// {
inline
void
gpc_p_edge
(
edge_node
*
d
,
edge_node
*
e
,
int
p
)
{
d
=
e
;
do
{
d
=
d
->
prev
;
}
while
(
!
d
->
outp
[
p
]);
// i = d->bot.x + d->dx * (j - d->bot.y);
}
// inline void gpc_n_edge(edge_node *d, edge_node *e, int p, double i, double j)
// {
inline
void
gpc_n_edge
(
edge_node
*
d
,
edge_node
*
e
,
int
p
)
{
d
=
e
;
do
{
d
=
d
->
next
;
}
while
(
!
d
->
outp
[
p
]);
// i = d->bot.x + d->dx * (j - d->bot.y);
}
template
<
typename
T
>
void
gpc_malloc
(
T
*&
p
,
int
b
,
char
*
s
)
{
if
(
b
>
0
)
{
p
=
(
T
*
)
malloc
(
b
);
if
(
!
p
)
{
fprintf
(
stderr
,
"gpc malloc failure: %s
\n
"
,
s
);
exit
(
0
);
}
}
else
{
p
=
NULL
;
}
}
template
<
typename
T
>
void
gpc_free
(
T
*&
p
)
{
if
(
p
)
{
free
(
p
);
p
=
NULL
;
}
}
/*
===========================================================================
Public Function Prototypes
===========================================================================
*/
void
add_vertex
(
vertex_node
**
t
,
double
x
,
double
y
);
void
gpc_vertex_create
(
edge_node
*
e
,
int
p
,
int
s
,
double
x
,
double
y
);
/*
void gpc_read_polygon(FILE *infile_ptr, int read_hole_flags,
gpc_polygon *polygon);
void gpc_write_polygon(FILE *outfile_ptr, int write_hole_flags,
gpc_polygon *polygon);
*/
void
gpc_add_contour
(
gpc_polygon
*
polygon
,
gpc_vertex_list
*
contour
,
int
hole
);
void
gpc_polygon_clip
(
gpc_op
set_operation
,
gpc_polygon
*
subject_polygon
,
gpc_polygon
*
clip_polygon
,
gpc_polygon
*
result_polygon
);
void
gpc_tristrip_clip
(
gpc_op
set_operation
,
gpc_polygon
*
subject_polygon
,
gpc_polygon
*
clip_polygon
,
gpc_tristrip
*
result_tristrip
);
void
gpc_polygon_to_tristrip
(
gpc_polygon
*
polygon
,
gpc_tristrip
*
tristrip
);
void
gpc_free_polygon
(
gpc_polygon
*
polygon
);
void
gpc_free_tristrip
(
gpc_tristrip
*
tristrip
);
}
// namespace gpc
#endif // PADDLE_FLUID_OPERATORS_DETECTION_GPC_H_
/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */
paddle/fluid/operators/detection/multiclass_nms_op.cc
浏览文件 @
049c9c7d
...
@@ -9,10 +9,11 @@ http://www.apache.org/licenses/LICENSE-2.0
...
@@ -9,10 +9,11 @@ http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/detection/poly_util.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -20,9 +21,6 @@ namespace operators {
...
@@ -20,9 +21,6 @@ namespace operators {
using
Tensor
=
framework
::
Tensor
;
using
Tensor
=
framework
::
Tensor
;
using
LoDTensor
=
framework
::
LoDTensor
;
using
LoDTensor
=
framework
::
LoDTensor
;
constexpr
int64_t
kOutputDim
=
6
;
constexpr
int64_t
kBBoxSize
=
4
;
class
MultiClassNMSOp
:
public
framework
::
OperatorWithKernel
{
class
MultiClassNMSOp
:
public
framework
::
OperatorWithKernel
{
public:
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
...
@@ -42,10 +40,15 @@ class MultiClassNMSOp : public framework::OperatorWithKernel {
...
@@ -42,10 +40,15 @@ class MultiClassNMSOp : public framework::OperatorWithKernel {
"The rank of Input(BBoxes) must be 3."
);
"The rank of Input(BBoxes) must be 3."
);
PADDLE_ENFORCE_EQ
(
score_dims
.
size
(),
3
,
PADDLE_ENFORCE_EQ
(
score_dims
.
size
(),
3
,
"The rank of Input(Scores) must be 3."
);
"The rank of Input(Scores) must be 3."
);
PADDLE_ENFORCE_EQ
(
box_dims
[
2
],
4
,
PADDLE_ENFORCE
(
box_dims
[
2
]
==
4
||
box_dims
[
2
]
==
8
||
box_dims
[
2
]
==
16
||
"The 2nd dimension of Input(BBoxes) must be 4, "
box_dims
[
2
]
==
24
||
box_dims
[
2
]
==
32
,
"The 2nd dimension of Input(BBoxes) must be 4 or 8, "
"represents the layout of coordinate "
"represents the layout of coordinate "
"[xmin, ymin, xmax, ymax]"
);
"[xmin, ymin, xmax, ymax] or "
"4 points: [x1, y1, x2, y2, x3, y3, x4, y4] or "
"8 points: [xi, yi] i= 1,2,...,8 or "
"12 points: [xi, yi] i= 1,2,...,12 or "
"16 points: [xi, yi] i= 1,2,...,16"
);
PADDLE_ENFORCE_EQ
(
box_dims
[
1
],
score_dims
[
2
],
PADDLE_ENFORCE_EQ
(
box_dims
[
1
],
score_dims
[
2
],
"The 1st dimensiong of Input(BBoxes) must be equal to "
"The 1st dimensiong of Input(BBoxes) must be equal to "
"3rd dimension of Input(Scores), which represents the "
"3rd dimension of Input(Scores), which represents the "
...
@@ -53,7 +56,7 @@ class MultiClassNMSOp : public framework::OperatorWithKernel {
...
@@ -53,7 +56,7 @@ class MultiClassNMSOp : public framework::OperatorWithKernel {
// Here the box_dims[0] is not the real dimension of output.
// Here the box_dims[0] is not the real dimension of output.
// It will be rewritten in the computing kernel.
// It will be rewritten in the computing kernel.
ctx
->
SetOutputDim
(
"Out"
,
{
box_dims
[
1
],
6
});
ctx
->
SetOutputDim
(
"Out"
,
{
box_dims
[
1
],
box_dims
[
2
]
+
2
});
}
}
protected:
protected:
...
@@ -128,6 +131,21 @@ static inline T JaccardOverlap(const T* box1, const T* box2,
...
@@ -128,6 +131,21 @@ static inline T JaccardOverlap(const T* box1, const T* box2,
}
}
}
}
template
<
class
T
>
T
PolyIoU
(
const
T
*
box1
,
const
T
*
box2
,
const
size_t
box_size
,
const
bool
normalized
)
{
T
bbox1_area
=
PolyArea
<
T
>
(
box1
,
box_size
,
normalized
);
T
bbox2_area
=
PolyArea
<
T
>
(
box2
,
box_size
,
normalized
);
T
inter_area
=
PolyOverlapArea
<
T
>
(
box1
,
box2
,
box_size
,
normalized
);
if
(
bbox1_area
==
0
||
bbox2_area
==
0
||
inter_area
==
0
)
{
// If coordinate values are is invalid
// if area size <= 0, return 0.
return
T
(
0.
);
}
else
{
return
inter_area
/
(
bbox1_area
+
bbox2_area
-
inter_area
);
}
}
template
<
typename
T
>
template
<
typename
T
>
class
MultiClassNMSKernel
:
public
framework
::
OpKernel
<
T
>
{
class
MultiClassNMSKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
...
@@ -137,6 +155,8 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
...
@@ -137,6 +155,8 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
// The total boxes for each instance.
// The total boxes for each instance.
int64_t
num_boxes
=
bbox
.
dims
()[
0
];
int64_t
num_boxes
=
bbox
.
dims
()[
0
];
// 4: [xmin ymin xmax ymax]
// 4: [xmin ymin xmax ymax]
// 8: [x1 y1 x2 y2 x3 y3 x4 y4]
// 16, 24, or 32: [x1 y1 x2 y2 ... xn yn], n = 8, 12 or 16
int64_t
box_size
=
bbox
.
dims
()[
1
];
int64_t
box_size
=
bbox
.
dims
()[
1
];
std
::
vector
<
T
>
scores_data
(
num_boxes
);
std
::
vector
<
T
>
scores_data
(
num_boxes
);
...
@@ -154,8 +174,19 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
...
@@ -154,8 +174,19 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
for
(
size_t
k
=
0
;
k
<
selected_indices
->
size
();
++
k
)
{
for
(
size_t
k
=
0
;
k
<
selected_indices
->
size
();
++
k
)
{
if
(
keep
)
{
if
(
keep
)
{
const
int
kept_idx
=
(
*
selected_indices
)[
k
];
const
int
kept_idx
=
(
*
selected_indices
)[
k
];
T
overlap
=
JaccardOverlap
<
T
>
(
bbox_data
+
idx
*
box_size
,
T
overlap
=
T
(
0.
);
// 4: [xmin ymin xmax ymax]
if
(
box_size
==
4
)
{
overlap
=
JaccardOverlap
<
T
>
(
bbox_data
+
idx
*
box_size
,
bbox_data
+
kept_idx
*
box_size
,
true
);
bbox_data
+
kept_idx
*
box_size
,
true
);
}
// 8: [x1 y1 x2 y2 x3 y3 x4 y4] or 16, 24, 32
if
(
box_size
==
8
||
box_size
==
16
||
box_size
==
24
||
box_size
==
32
)
{
overlap
=
PolyIoU
<
T
>
(
bbox_data
+
idx
*
box_size
,
bbox_data
+
kept_idx
*
box_size
,
box_size
,
true
);
}
keep
=
overlap
<=
adaptive_threshold
;
keep
=
overlap
<=
adaptive_threshold
;
}
else
{
}
else
{
break
;
break
;
...
@@ -228,7 +259,9 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
...
@@ -228,7 +259,9 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
void
MultiClassOutput
(
const
Tensor
&
scores
,
const
Tensor
&
bboxes
,
void
MultiClassOutput
(
const
Tensor
&
scores
,
const
Tensor
&
bboxes
,
const
std
::
map
<
int
,
std
::
vector
<
int
>>&
selected_indices
,
const
std
::
map
<
int
,
std
::
vector
<
int
>>&
selected_indices
,
Tensor
*
outs
)
const
{
Tensor
*
outs
)
const
{
int
predict_dim
=
scores
.
dims
()[
1
];
int64_t
predict_dim
=
scores
.
dims
()[
1
];
int64_t
box_size
=
bboxes
.
dims
()[
1
];
int64_t
out_dim
=
bboxes
.
dims
()[
1
]
+
2
;
auto
*
scores_data
=
scores
.
data
<
T
>
();
auto
*
scores_data
=
scores
.
data
<
T
>
();
auto
*
bboxes_data
=
bboxes
.
data
<
T
>
();
auto
*
bboxes_data
=
bboxes
.
data
<
T
>
();
auto
*
odata
=
outs
->
data
<
T
>
();
auto
*
odata
=
outs
->
data
<
T
>
();
...
@@ -240,11 +273,11 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
...
@@ -240,11 +273,11 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
const
std
::
vector
<
int
>&
indices
=
it
.
second
;
const
std
::
vector
<
int
>&
indices
=
it
.
second
;
for
(
size_t
j
=
0
;
j
<
indices
.
size
();
++
j
)
{
for
(
size_t
j
=
0
;
j
<
indices
.
size
();
++
j
)
{
int
idx
=
indices
[
j
];
int
idx
=
indices
[
j
];
const
T
*
bdata
=
bboxes_data
+
idx
*
kBBoxS
ize
;
const
T
*
bdata
=
bboxes_data
+
idx
*
box_s
ize
;
odata
[
count
*
kOutputD
im
]
=
label
;
// label
odata
[
count
*
out_d
im
]
=
label
;
// label
odata
[
count
*
kOutputD
im
+
1
]
=
sdata
[
idx
];
// score
odata
[
count
*
out_d
im
+
1
]
=
sdata
[
idx
];
// score
// xmin, ymin, xmax, ymax
// xmin, ymin, xmax, ymax
or multi-points coordinates
std
::
memcpy
(
odata
+
count
*
kOutputDim
+
2
,
bdata
,
4
*
sizeof
(
T
));
std
::
memcpy
(
odata
+
count
*
out_dim
+
2
,
bdata
,
box_size
*
sizeof
(
T
));
count
++
;
count
++
;
}
}
}
}
...
@@ -261,6 +294,7 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
...
@@ -261,6 +294,7 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
int64_t
class_num
=
score_dims
[
1
];
int64_t
class_num
=
score_dims
[
1
];
int64_t
predict_dim
=
score_dims
[
2
];
int64_t
predict_dim
=
score_dims
[
2
];
int64_t
box_dim
=
boxes
->
dims
()[
2
];
int64_t
box_dim
=
boxes
->
dims
()[
2
];
int64_t
out_dim
=
boxes
->
dims
()[
2
]
+
2
;
std
::
vector
<
std
::
map
<
int
,
std
::
vector
<
int
>>>
all_indices
;
std
::
vector
<
std
::
map
<
int
,
std
::
vector
<
int
>>>
all_indices
;
std
::
vector
<
size_t
>
batch_starts
=
{
0
};
std
::
vector
<
size_t
>
batch_starts
=
{
0
};
...
@@ -283,7 +317,7 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
...
@@ -283,7 +317,7 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
T
*
od
=
outs
->
mutable_data
<
T
>
({
1
},
ctx
.
GetPlace
());
T
*
od
=
outs
->
mutable_data
<
T
>
({
1
},
ctx
.
GetPlace
());
od
[
0
]
=
-
1
;
od
[
0
]
=
-
1
;
}
else
{
}
else
{
outs
->
mutable_data
<
T
>
({
num_kept
,
kOutputD
im
},
ctx
.
GetPlace
());
outs
->
mutable_data
<
T
>
({
num_kept
,
out_d
im
},
ctx
.
GetPlace
());
for
(
int64_t
i
=
0
;
i
<
batch_size
;
++
i
)
{
for
(
int64_t
i
=
0
;
i
<
batch_size
;
++
i
)
{
Tensor
ins_score
=
scores
->
Slice
(
i
,
i
+
1
);
Tensor
ins_score
=
scores
->
Slice
(
i
,
i
+
1
);
ins_score
.
Resize
({
class_num
,
predict_dim
});
ins_score
.
Resize
({
class_num
,
predict_dim
});
...
@@ -311,10 +345,11 @@ class MultiClassNMSOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -311,10 +345,11 @@ class MultiClassNMSOpMaker : public framework::OpProtoAndCheckerMaker {
public:
public:
void
Make
()
override
{
void
Make
()
override
{
AddInput
(
"BBoxes"
,
AddInput
(
"BBoxes"
,
"(Tensor) A 3-D Tensor with shape [N, M, 4] represents the "
"(Tensor) A 3-D Tensor with shape "
"[N, M, 4 or 8 16 24 32] represents the "
"predicted locations of M bounding bboxes, N is the batch size. "
"predicted locations of M bounding bboxes, N is the batch size. "
"Each bounding box has four coordinate values and the layout is "
"Each bounding box has four coordinate values and the layout is "
"[xmin, ymin, xmax, ymax]."
);
"[xmin, ymin, xmax, ymax]
, when box size equals to 4
."
);
AddInput
(
"Scores"
,
AddInput
(
"Scores"
,
"(Tensor) A 3-D Tensor with shape [N, C, M] represents the "
"(Tensor) A 3-D Tensor with shape [N, C, M] represents the "
"predicted confidence predictions. N is the batch size, C is the "
"predicted confidence predictions. N is the batch size, C is the "
...
@@ -351,8 +386,12 @@ class MultiClassNMSOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -351,8 +386,12 @@ class MultiClassNMSOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput
(
"Out"
,
AddOutput
(
"Out"
,
"(LoDTensor) A 2-D LoDTensor with shape [No, 6] represents the "
"(LoDTensor) A 2-D LoDTensor with shape [No, 6] represents the "
"detections. Each row has 6 values: "
"detections. Each row has 6 values: "
"[label, confidence, xmin, ymin, xmax, ymax], No is the total "
"[label, confidence, xmin, ymin, xmax, ymax] or "
"number of detections in this mini-batch. For each instance, "
"(LoDTensor) A 2-D LoDTensor with shape [No, 10] represents the "
"detections. Each row has 10 values: "
"[label, confidence, x1, y1, x2, y2, x3, y3, x4, y4]. No is the "
"total number of detections in this mini-batch."
"For each instance, "
"the offsets in first dimension are called LoD, the number of "
"the offsets in first dimension are called LoD, the number of "
"offset is N + 1, if LoD[i + 1] - LoD[i] == 0, means there is "
"offset is N + 1, if LoD[i + 1] - LoD[i] == 0, means there is "
"no detected bbox."
);
"no detected bbox."
);
...
...
paddle/fluid/operators/detection/poly_util.cc
0 → 100644
浏览文件 @
049c9c7d
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef POLY_UTIL_CC_
#define POLY_UTIL_CC_
#include "paddle/fluid/operators/detection/poly_util.h"
#include "paddle/fluid/framework/op_registry.h"
namespace
paddle
{
namespace
operators
{
using
gpc
::
gpc_polygon_clip
;
using
gpc
::
gpc_free_polygon
;
template
<
class
T
>
void
Array2PointVec
(
const
T
*&
box
,
const
size_t
box_size
,
std
::
vector
<
Point_
<
T
>>&
vec
)
{
size_t
pts_num
=
box_size
/
2
;
vec
.
resize
(
pts_num
);
for
(
size_t
i
=
0
;
i
<
pts_num
;
i
++
)
{
vec
.
at
(
i
).
x
=
box
[
2
*
i
];
vec
.
at
(
i
).
y
=
box
[
2
*
i
+
1
];
}
}
template
<
class
T
>
void
Array2Poly
(
const
T
*&
box
,
const
size_t
box_size
,
gpc
::
gpc_polygon
&
poly
)
{
size_t
pts_num
=
box_size
/
2
;
poly
.
num_contours
=
1
;
poly
.
hole
=
(
int
*
)
malloc
(
sizeof
(
int
));
poly
.
hole
[
0
]
=
0
;
poly
.
contour
=
(
gpc
::
gpc_vertex_list
*
)
malloc
(
sizeof
(
gpc
::
gpc_vertex_list
));
poly
.
contour
->
num_vertices
=
pts_num
;
poly
.
contour
->
vertex
=
(
gpc
::
gpc_vertex
*
)
malloc
(
sizeof
(
gpc
::
gpc_vertex
)
*
pts_num
);
for
(
size_t
i
=
0
;
i
<
pts_num
;
++
i
)
{
poly
.
contour
->
vertex
[
i
].
x
=
box
[
2
*
i
];
poly
.
contour
->
vertex
[
i
].
y
=
box
[
2
*
i
+
1
];
}
}
template
<
class
T
>
void
PointVec2Poly
(
const
std
::
vector
<
Point_
<
T
>>&
vec
,
gpc
::
gpc_polygon
&
poly
)
{
int
pts_num
=
vec
.
size
();
poly
.
num_contours
=
1
;
poly
.
hole
=
(
int
*
)
malloc
(
sizeof
(
int
));
poly
.
hole
[
0
]
=
0
;
poly
.
contour
=
(
gpc
::
gpc_vertex_list
*
)
malloc
(
sizeof
(
gpc
::
gpc_vertex_list
));
poly
.
contour
->
num_vertices
=
pts_num
;
poly
.
contour
->
vertex
=
(
gpc
::
gpc_vertex
*
)
malloc
(
sizeof
(
gpc
::
gpc_vertex
)
*
pts_num
);
for
(
size_t
i
=
0
;
i
<
pts_num
;
++
i
)
{
poly
.
contour
->
vertex
[
i
].
x
=
vec
[
i
].
x
;
poly
.
contour
->
vertex
[
i
].
y
=
vec
[
i
].
y
;
}
}
template
<
class
T
>
void
Poly2PointVec
(
const
gpc
::
gpc_vertex_list
&
contour
,
std
::
vector
<
Point_
<
T
>>&
vec
)
{
int
pts_num
=
contour
.
num_vertices
;
vec
.
resize
(
pts_num
);
for
(
int
i
=
0
;
i
<
pts_num
;
i
++
)
{
vec
.
at
(
i
).
x
=
contour
.
vertex
[
i
].
x
;
vec
.
at
(
i
).
y
=
contour
.
vertex
[
i
].
y
;
}
}
template
<
class
T
>
T
GetContourArea
(
std
::
vector
<
Point_
<
T
>>&
vec
)
{
size_t
pts_num
=
vec
.
size
();
if
(
pts_num
<
3
)
return
T
(
0.
);
T
area
=
T
(
0.
);
for
(
size_t
i
=
0
;
i
<
pts_num
;
++
i
)
{
area
+=
vec
[
i
].
x
*
vec
[(
i
+
1
)
%
pts_num
].
y
-
vec
[
i
].
y
*
vec
[(
i
+
1
)
%
pts_num
].
x
;
}
return
std
::
fabs
(
area
/
2.0
);
}
template
<
class
T
>
T
PolyArea
(
const
T
*
box
,
const
size_t
box_size
,
const
bool
normalized
)
{
// If coordinate values are is invalid
// if area size <= 0, return 0.
std
::
vector
<
Point_
<
T
>>
vec
;
Array2PointVec
<
T
>
(
box
,
box_size
,
vec
);
return
GetContourArea
<
T
>
(
vec
);
}
template
<
class
T
>
T
PolyOverlapArea
(
const
T
*
box1
,
const
T
*
box2
,
const
size_t
box_size
,
const
bool
normalized
)
{
gpc
::
gpc_polygon
poly1
;
gpc
::
gpc_polygon
poly2
;
Array2Poly
<
T
>
(
box1
,
box_size
,
poly1
);
Array2Poly
<
T
>
(
box2
,
box_size
,
poly2
);
gpc
::
gpc_polygon
respoly
;
gpc
::
gpc_op
op
=
gpc
::
GPC_INT
;
gpc
::
gpc_polygon_clip
(
op
,
&
poly2
,
&
poly1
,
&
respoly
);
T
inter_area
=
T
(
0.
);
int
contour_num
=
respoly
.
num_contours
;
for
(
int
i
=
0
;
i
<
contour_num
;
++
i
)
{
std
::
vector
<
Point_
<
T
>>
resvec
;
Poly2PointVec
<
T
>
(
respoly
.
contour
[
i
],
resvec
);
// inter_area += std::fabs(cv::contourArea(resvec)) + 0.5f *
// (cv::arcLength(resvec, true));
inter_area
+=
GetContourArea
<
T
>
(
resvec
);
}
gpc
::
gpc_free_polygon
(
&
poly1
);
gpc
::
gpc_free_polygon
(
&
poly2
);
gpc
::
gpc_free_polygon
(
&
respoly
);
return
inter_area
;
}
}
// namespace operators
}
// namespace paddle
#endif
paddle/fluid/operators/detection/poly_util.h
0 → 100644
浏览文件 @
049c9c7d
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef POLY_UTIL_H_
#define POLY_UTIL_H_
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/detection/gpc.h"
namespace
paddle
{
namespace
operators
{
template
<
class
T
>
class
Point_
{
public:
// default constructor
Point_
()
{}
Point_
(
T
_x
,
T
_y
)
{}
Point_
(
const
Point_
&
pt
)
{}
Point_
&
operator
=
(
const
Point_
&
pt
);
// conversion to another data type
// template<typename _T> operator Point_<_T>() const;
// conversion to the old-style C structures
// operator Vec<T, 2>() const;
// checks whether the point is inside the specified rectangle
// bool inside(const Rect_<T>& r) const;
T
x
;
//!< x coordinate of the point
T
y
;
//!< y coordinate of the point
};
template
<
class
T
>
void
Array2PointVec
(
const
T
*&
box
,
const
size_t
box_size
,
std
::
vector
<
Point_
<
T
>>&
vec
);
template
<
class
T
>
void
Array2Poly
(
const
T
*&
box
,
const
size_t
box_size
,
gpc
::
gpc_polygon
&
poly
);
template
<
class
T
>
void
PointVec2Poly
(
const
std
::
vector
<
Point_
<
T
>>&
vec
,
gpc
::
gpc_polygon
&
poly
);
template
<
class
T
>
void
Poly2PointVec
(
const
gpc
::
gpc_vertex_list
&
contour
,
std
::
vector
<
Point_
<
T
>>&
vec
);
template
<
class
T
>
T
GetContourArea
(
std
::
vector
<
Point_
<
T
>>&
vec
);
template
<
class
T
>
T
PolyArea
(
const
T
*
box
,
const
size_t
box_size
,
const
bool
normalized
);
template
<
class
T
>
T
PolyOverlapArea
(
const
T
*
box1
,
const
T
*
box2
,
const
size_t
box_size
,
const
bool
normalized
);
}
// namespace operators
}
// namespace paddle
#include "paddle/fluid/operators/detection/poly_util.cc"
#endif // POLY_UTIL_H_
paddle/fluid/operators/detection/polygon_box_transform_op.cc
浏览文件 @
049c9c7d
...
@@ -41,9 +41,9 @@ class PolygonBoxTransformCPUKernel : public framework::OpKernel<T> {
...
@@ -41,9 +41,9 @@ class PolygonBoxTransformCPUKernel : public framework::OpKernel<T> {
for
(
int
id_w
=
0
;
id_w
<
width
;
++
id_w
)
{
for
(
int
id_w
=
0
;
id_w
<
width
;
++
id_w
)
{
id
=
id_n
*
height
*
width
+
width
*
id_h
+
id_w
;
id
=
id_n
*
height
*
width
+
width
*
id_h
+
id_w
;
if
(
id_n
%
2
==
0
)
{
if
(
id_n
%
2
==
0
)
{
out_data
[
id
]
=
id_w
-
in_data
[
id
];
out_data
[
id
]
=
id_w
*
4
-
in_data
[
id
];
}
else
{
}
else
{
out_data
[
id
]
=
id_h
-
in_data
[
id
];
out_data
[
id
]
=
id_h
*
4
-
in_data
[
id
];
}
}
}
}
}
}
...
...
paddle/fluid/operators/detection/polygon_box_transform_op.cu
浏览文件 @
049c9c7d
...
@@ -32,9 +32,9 @@ __global__ void PolygonBoxTransformKernel(const int n, const int h, const int w,
...
@@ -32,9 +32,9 @@ __global__ void PolygonBoxTransformKernel(const int n, const int h, const int w,
if
(
id_n
<
n
&&
id_h
<
h
&&
id_w
<
w
)
{
if
(
id_n
<
n
&&
id_h
<
h
&&
id_w
<
w
)
{
int
id
=
id_n
*
h
*
w
+
w
*
id_h
+
id_w
;
int
id
=
id_n
*
h
*
w
+
w
*
id_h
+
id_w
;
if
(
id_n
%
2
==
0
)
{
if
(
id_n
%
2
==
0
)
{
output
[
id
]
=
id_w
-
input
[
id
];
output
[
id
]
=
id_w
*
4
-
input
[
id
];
}
else
{
}
else
{
output
[
id
]
=
id_h
-
input
[
id
];
output
[
id
]
=
id_h
*
4
-
input
[
id
];
}
}
}
}
}
}
...
...
paddle/fluid/operators/distributed/grpc_client.cc
浏览文件 @
049c9c7d
...
@@ -86,7 +86,7 @@ VarHandlePtr GRPCClient::AsyncSendVar(const std::string& ep,
...
@@ -86,7 +86,7 @@ VarHandlePtr GRPCClient::AsyncSendVar(const std::string& ep,
// stub context
// stub context
s
->
response_call_back_
=
nullptr
;
s
->
response_call_back_
=
nullptr
;
platform
::
RecordEvent
record_event
(
method
,
p_ctx
);
platform
::
Record
RPC
Event
record_event
(
method
,
p_ctx
);
auto
call
=
s
->
stub_g_
.
PrepareUnaryCall
(
auto
call
=
s
->
stub_g_
.
PrepareUnaryCall
(
s
->
context_
.
get
(),
"/sendrecv.SendRecvService/SendVariable"
,
req
,
&
cq_
);
s
->
context_
.
get
(),
"/sendrecv.SendRecvService/SendVariable"
,
req
,
&
cq_
);
...
@@ -143,7 +143,7 @@ VarHandlePtr GRPCClient::AsyncGetVar(const std::string& ep,
...
@@ -143,7 +143,7 @@ VarHandlePtr GRPCClient::AsyncGetVar(const std::string& ep,
// stub context
// stub context
s
->
response_call_back_
=
ProcGetResponse
;
s
->
response_call_back_
=
ProcGetResponse
;
platform
::
RecordEvent
record_event
(
method
,
p_ctx
);
platform
::
Record
RPC
Event
record_event
(
method
,
p_ctx
);
auto
call
=
s
->
stub_g_
.
PrepareUnaryCall
(
auto
call
=
s
->
stub_g_
.
PrepareUnaryCall
(
s
->
context_
.
get
(),
"/sendrecv.SendRecvService/GetVariable"
,
buf
,
&
cq_
);
s
->
context_
.
get
(),
"/sendrecv.SendRecvService/GetVariable"
,
buf
,
&
cq_
);
...
@@ -191,7 +191,7 @@ VarHandlePtr GRPCClient::AsyncPrefetchVar(const std::string& ep,
...
@@ -191,7 +191,7 @@ VarHandlePtr GRPCClient::AsyncPrefetchVar(const std::string& ep,
// stub context
// stub context
s
->
response_call_back_
=
ProcGetResponse
;
s
->
response_call_back_
=
ProcGetResponse
;
platform
::
RecordEvent
record_event
(
method
,
p_ctx
);
platform
::
Record
RPC
Event
record_event
(
method
,
p_ctx
);
auto
call
=
s
->
stub_g_
.
PrepareUnaryCall
(
auto
call
=
s
->
stub_g_
.
PrepareUnaryCall
(
s
->
context_
.
get
(),
"/sendrecv.SendRecvService/PrefetchVariable"
,
req
,
s
->
context_
.
get
(),
"/sendrecv.SendRecvService/PrefetchVariable"
,
req
,
...
@@ -221,7 +221,7 @@ VarHandlePtr GRPCClient::AsyncSendBatchBarrier(const std::string& ep,
...
@@ -221,7 +221,7 @@ VarHandlePtr GRPCClient::AsyncSendBatchBarrier(const std::string& ep,
sendrecv
::
VariableMessage
req
;
sendrecv
::
VariableMessage
req
;
req
.
set_varname
(
BATCH_BARRIER_MESSAGE
);
req
.
set_varname
(
BATCH_BARRIER_MESSAGE
);
platform
::
RecordEvent
record_event
(
method
,
nullptr
);
platform
::
Record
RPC
Event
record_event
(
method
,
nullptr
);
auto
rpc
=
s
->
stub_
->
AsyncSendVariable
(
s
->
context_
.
get
(),
req
,
&
cq_
);
auto
rpc
=
s
->
stub_
->
AsyncSendVariable
(
s
->
context_
.
get
(),
req
,
&
cq_
);
rpc
->
Finish
(
&
s
->
reply_
,
&
s
->
status_
,
reinterpret_cast
<
void
*>
(
s
));
rpc
->
Finish
(
&
s
->
reply_
,
&
s
->
status_
,
reinterpret_cast
<
void
*>
(
s
));
...
@@ -246,7 +246,7 @@ VarHandlePtr GRPCClient::AsyncSendFetchBarrier(const std::string& ep,
...
@@ -246,7 +246,7 @@ VarHandlePtr GRPCClient::AsyncSendFetchBarrier(const std::string& ep,
sendrecv
::
VariableMessage
req
;
sendrecv
::
VariableMessage
req
;
req
.
set_varname
(
FETCH_BARRIER_MESSAGE
);
req
.
set_varname
(
FETCH_BARRIER_MESSAGE
);
platform
::
RecordEvent
record_event
(
method
,
nullptr
);
platform
::
Record
RPC
Event
record_event
(
method
,
nullptr
);
auto
rpc
=
s
->
stub_
->
AsyncGetVariable
(
s
->
context_
.
get
(),
req
,
&
cq_
);
auto
rpc
=
s
->
stub_
->
AsyncGetVariable
(
s
->
context_
.
get
(),
req
,
&
cq_
);
rpc
->
Finish
(
&
s
->
reply_
,
&
s
->
status_
,
reinterpret_cast
<
void
*>
(
s
));
rpc
->
Finish
(
&
s
->
reply_
,
&
s
->
status_
,
reinterpret_cast
<
void
*>
(
s
));
...
@@ -271,7 +271,7 @@ VarHandlePtr GRPCClient::AsyncSendComplete(const std::string& ep,
...
@@ -271,7 +271,7 @@ VarHandlePtr GRPCClient::AsyncSendComplete(const std::string& ep,
sendrecv
::
VariableMessage
req
;
sendrecv
::
VariableMessage
req
;
req
.
set_varname
(
COMPLETE_MESSAGE
);
req
.
set_varname
(
COMPLETE_MESSAGE
);
platform
::
RecordEvent
record_event
(
method
,
nullptr
);
platform
::
Record
RPC
Event
record_event
(
method
,
nullptr
);
auto
rpc
=
s
->
stub_
->
AsyncSendVariable
(
s
->
context_
.
get
(),
req
,
&
cq_
);
auto
rpc
=
s
->
stub_
->
AsyncSendVariable
(
s
->
context_
.
get
(),
req
,
&
cq_
);
rpc
->
Finish
(
&
s
->
reply_
,
&
s
->
status_
,
reinterpret_cast
<
void
*>
(
s
));
rpc
->
Finish
(
&
s
->
reply_
,
&
s
->
status_
,
reinterpret_cast
<
void
*>
(
s
));
...
@@ -301,7 +301,7 @@ VarHandlePtr GRPCClient::AsyncCheckpointNotify(const std::string& ep,
...
@@ -301,7 +301,7 @@ VarHandlePtr GRPCClient::AsyncCheckpointNotify(const std::string& ep,
req
.
set_varname
(
CHECKPOINT_SAVE_MESSAGE
);
req
.
set_varname
(
CHECKPOINT_SAVE_MESSAGE
);
req
.
set_out_varname
(
dir
);
req
.
set_out_varname
(
dir
);
platform
::
RecordEvent
record_event
(
method
,
nullptr
);
platform
::
Record
RPC
Event
record_event
(
method
,
nullptr
);
auto
rpc
=
s
->
stub_
->
AsyncCheckpointNotify
(
s
->
context_
.
get
(),
req
,
&
cq_
);
auto
rpc
=
s
->
stub_
->
AsyncCheckpointNotify
(
s
->
context_
.
get
(),
req
,
&
cq_
);
rpc
->
Finish
(
&
s
->
reply_
,
&
s
->
status_
,
reinterpret_cast
<
void
*>
(
s
));
rpc
->
Finish
(
&
s
->
reply_
,
&
s
->
status_
,
reinterpret_cast
<
void
*>
(
s
));
...
...
paddle/fluid/operators/distributed/grpc_serde.cc
浏览文件 @
049c9c7d
...
@@ -36,7 +36,7 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var,
...
@@ -36,7 +36,7 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var,
const
platform
::
DeviceContext
&
ctx
,
const
platform
::
DeviceContext
&
ctx
,
::
grpc
::
ByteBuffer
*
msg
,
::
grpc
::
ByteBuffer
*
msg
,
const
std
::
string
&
out_name
)
{
const
std
::
string
&
out_name
)
{
platform
::
RecordEvent
record_event
(
"serial"
,
&
ctx
);
platform
::
Record
RPC
Event
record_event
(
"serial"
,
&
ctx
);
// Default DestroyCallback does nothing, When using GPU
// Default DestroyCallback does nothing, When using GPU
// the CPU buffer need to be freed.
// the CPU buffer need to be freed.
DestroyCallback
destroy_callback
=
[](
void
*
backing
)
{};
DestroyCallback
destroy_callback
=
[](
void
*
backing
)
{};
...
@@ -148,7 +148,7 @@ void DeserializeFromByteBuffer(const ::grpc::ByteBuffer& msg,
...
@@ -148,7 +148,7 @@ void DeserializeFromByteBuffer(const ::grpc::ByteBuffer& msg,
const
platform
::
DeviceContext
&
ctx
,
const
platform
::
DeviceContext
&
ctx
,
const
framework
::
Scope
*
scope
,
const
framework
::
Scope
*
scope
,
framework
::
Variable
**
var
)
{
framework
::
Variable
**
var
)
{
platform
::
RecordEvent
record_event
(
"deserial"
,
&
ctx
);
platform
::
Record
RPC
Event
record_event
(
"deserial"
,
&
ctx
);
operators
::
distributed
::
GRPCVariableResponse
resp
(
scope
,
&
ctx
);
operators
::
distributed
::
GRPCVariableResponse
resp
(
scope
,
&
ctx
);
PADDLE_ENFORCE
(
resp
.
Parse
(
msg
)
==
0
,
"parse bytebuffer to tensor error!"
);
PADDLE_ENFORCE
(
resp
.
Parse
(
msg
)
==
0
,
"parse bytebuffer to tensor error!"
);
*
var
=
resp
.
GetVar
();
*
var
=
resp
.
GetVar
();
...
...
paddle/fluid/operators/fusion_seqconv_eltadd_relu_op.cc
0 → 100644
浏览文件 @
049c9c7d
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/fusion_seqconv_eltadd_relu_op.h"
#include <algorithm> // for min, max
#include <string>
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/fc_compute.h"
namespace
paddle
{
namespace
operators
{
void
FusionSeqConvEltAddReluOp
::
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of FusionSeqConvEltAddReluOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Filter"
),
"Input(Filter) of FusionSeqConvEltAddReluOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Bias"
),
"Input(Bias) of FusionSeqConvEltAddReluOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of FusionSeqConvEltAddReluOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"ColMat"
),
"Output(ColMat) of FusionSeqConvEltAddReluOp should not be null."
);
auto
x_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
w_dims
=
ctx
->
GetInputDim
(
"Filter"
);
int
context_length
=
ctx
->
Attrs
().
Get
<
int
>
(
"contextLength"
);
PADDLE_ENFORCE
(
ctx
->
Attrs
().
Get
<
int
>
(
"contextStride"
)
==
1
,
"Currently, FusionSeqConvEltAddReluOp only supports contextStride=1."
);
PADDLE_ENFORCE
(
x_dims
.
size
()
==
2
&&
w_dims
.
size
()
==
2
,
"Input(X, Filter) should be 2-D tensor."
);
PADDLE_ENFORCE
(
x_dims
.
size
()
==
2
&&
w_dims
.
size
()
==
2
,
"Input(X, Filter) should be 2-D tensor."
);
PADDLE_ENFORCE
(
w_dims
[
0
]
==
context_length
*
x_dims
[
1
],
"Filter's height should be context_length * "
"input_hidden_size ."
);
PADDLE_ENFORCE_GT
(
context_length
+
ctx
->
Attrs
().
Get
<
int
>
(
"contextStart"
),
0
,
"contextStart size should be smaller than contextLength."
);
ctx
->
SetOutputDim
(
"Out"
,
{
x_dims
[
0
],
w_dims
[
1
]});
ctx
->
SetOutputDim
(
"ColMat"
,
{
x_dims
[
0
],
w_dims
[
0
]});
ctx
->
ShareLoD
(
"X"
,
"Out"
);
}
framework
::
OpKernelType
FusionSeqConvEltAddReluOp
::
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"X"
)
->
type
()),
ctx
.
device_context
());
}
void
FusionSeqConvEltAddReluOpMaker
::
Make
()
{
AddInput
(
"X"
,
"(LoDTensor) the input is a LodTensor, which support "
"variable-time length input sequence. The underlying tensor in "
"this LoDTensor is a matrix with shape (T X M), where T is the "
"total time steps in this mini-batch, M is the dim size of x."
);
// PaddingData only support false yet, should be ensured at pass.
AddInput
(
"Filter"
,
"(Tensor) same as the input(Filter) of sequence conv op is an "
"learnable parameter."
"This is a tensor with shape (K, N), where K is the "
"context_length * dim size of x, N is the output feature size."
);
AddInput
(
"Bias"
,
"(Tensor) the learnable weights. shape (1, N), where N is the "
"output feature size"
);
AddOutput
(
"Out"
,
"(LoDTensor) the output(Out) is a LodTensor, which support "
"variable-time length output sequence. The underlying tensor in "
"this LoDTensor is a matrix with shape (T, N), where, T is the "
"total time steps in this mini-batch, N is the output feature size."
);
AddOutput
(
"ColMat"
,
"(Tensor) (T, K), where T is where T is the "
"total time steps in this mini-batch, K is height of Filter"
)
.
AsIntermediate
();
AddAttr
<
int
>
(
"contextLength"
,
"(int) the contextLength of FusionSeqConvEltAddReluOp is the "
"height of the convolution kernel."
)
.
GreaterThan
(
0
);
AddAttr
<
int
>
(
"contextStart"
,
"(int, default:0) the contextStart of FusionSeqConvEltAddReluOp "
"represents the beginning of the convolution of the number of "
"rows of sequence, which can be negative. The negative number "
"means to pad contextStart time-steps of zeros or learnable "
"parameters at the beginning of each instance. The positive "
"number means to skip contextStart time-steps of each "
"instance."
)
.
SetDefault
(
0
);
AddAttr
<
int
>
(
"contextStride"
,
"(int, default:1) the contextStride of FusionSeqConvEltAddReluOp "
"represents the stride length of convolution kernel. "
"Currently, FusionSeqConvEltAddReluOp only supports"
"contextStride=1."
)
.
SetDefault
(
1
)
.
GreaterThan
(
0
);
AddComment
(
R"DOC(
Fusion Sequence Conv and ElementwiseAdd Operator.
)DOC"
);
}
template
<
typename
T
>
class
FusionSeqConvEltAddReluKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
using
DeviceContext
=
paddle
::
platform
::
CPUDeviceContext
;
auto
*
x
=
ctx
.
Input
<
LoDTensor
>
(
"X"
);
auto
*
w
=
ctx
.
Input
<
Tensor
>
(
"Filter"
);
auto
*
b
=
ctx
.
Input
<
Tensor
>
(
"Bias"
);
auto
*
y
=
ctx
.
Output
<
LoDTensor
>
(
"Out"
);
auto
*
col
=
ctx
.
Output
<
Tensor
>
(
"ColMat"
);
auto
x_lod
=
x
->
lod
();
auto
x_dims
=
x
->
dims
();
auto
w_dims
=
w
->
dims
();
PADDLE_ENFORCE_EQ
(
b
->
numel
(),
w_dims
[
1
],
"bias size should be equal to output feature size."
);
PADDLE_ENFORCE_EQ
(
x_lod
.
size
(),
1UL
,
"Only support one level sequence now."
);
const
T
*
x_data
=
x
->
data
<
T
>
();
const
T
*
w_data
=
w
->
data
<
T
>
();
const
T
*
b_data
=
b
->
data
<
T
>
();
T
*
y_data
=
y
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
col_data
=
col
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
int
context_start
=
ctx
.
Attr
<
int
>
(
"contextStart"
);
int
context_length
=
ctx
.
Attr
<
int
>
(
"contextLength"
);
int
up_pad
=
std
::
max
(
0
,
-
context_start
);
int
down_pad
=
std
::
max
(
0
,
context_start
+
context_length
-
1
);
// im2col
int
src_mat_w
=
static_cast
<
int
>
(
x_dims
[
1
]);
int
src_mat_w_sz
=
src_mat_w
*
sizeof
(
T
);
int
col_mat_w
=
static_cast
<
int
>
(
w_dims
[
0
]);
int
col_mat_w_sz
=
col_mat_w
*
sizeof
(
T
);
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
x_lod
[
0
].
size
())
-
1
;
++
i
)
{
int
st
=
x_lod
[
0
][
i
];
int
ed
=
x_lod
[
0
][
i
+
1
];
const
T
*
src_data
=
x_data
+
st
*
src_mat_w
;
T
*
dst_data
=
col_data
+
st
*
col_mat_w
;
int
seq_len
=
ed
-
st
;
if
(
seq_len
>
up_pad
+
down_pad
)
{
// zero all up_pad and fill data
std
::
memset
(
dst_data
,
0
,
up_pad
*
col_mat_w_sz
);
dst_data
=
dst_data
+
up_pad
*
src_mat_w
;
int
copy_size
=
col_mat_w_sz
-
up_pad
*
src_mat_w_sz
;
for
(
int
j
=
0
;
j
<
up_pad
;
++
j
)
{
// blas.VCOPY?
std
::
memcpy
(
dst_data
,
src_data
,
copy_size
);
dst_data
+=
(
col_mat_w
-
src_mat_w
);
copy_size
+=
src_mat_w_sz
;
}
// fill data
for
(
int
j
=
0
;
j
<
seq_len
-
up_pad
-
down_pad
;
++
j
)
{
std
::
memcpy
(
dst_data
,
src_data
,
copy_size
);
dst_data
+=
col_mat_w
;
src_data
+=
src_mat_w
;
}
// zero all down_pad and fill data
std
::
memset
(
dst_data
,
0
,
down_pad
*
col_mat_w_sz
);
copy_size
-=
src_mat_w_sz
;
for
(
int
j
=
0
;
j
<
down_pad
;
++
j
)
{
std
::
memcpy
(
dst_data
,
src_data
,
copy_size
);
dst_data
+=
col_mat_w
;
src_data
+=
src_mat_w
;
copy_size
-=
src_mat_w_sz
;
}
}
else
{
PADDLE_ENFORCE_GE
(
context_length
,
up_pad
+
down_pad
+
1
);
std
::
memset
(
dst_data
,
0
,
seq_len
*
col_mat_w_sz
);
dst_data
=
dst_data
+
up_pad
*
src_mat_w
;
int
zero_sz
=
up_pad
*
src_mat_w_sz
;
int
cur_src_sz
=
seq_len
*
src_mat_w_sz
;
for
(
int
j
=
0
;
j
<
std
::
min
(
up_pad
,
seq_len
);
++
j
)
{
int
copy_size
=
std
::
min
(
cur_src_sz
,
col_mat_w_sz
-
zero_sz
);
std
::
memcpy
(
dst_data
,
src_data
,
copy_size
);
dst_data
+=
(
col_mat_w
-
src_mat_w
);
zero_sz
-=
src_mat_w_sz
;
}
// from bottom
dst_data
=
col_data
+
ed
*
col_mat_w
;
src_data
=
x_data
+
st
*
src_mat_w
;
zero_sz
=
down_pad
*
src_mat_w_sz
;
for
(
int
j
=
1
;
j
<=
std
::
min
(
down_pad
,
seq_len
);
++
j
)
{
int
copy_size
=
std
::
min
(
cur_src_sz
,
col_mat_w_sz
-
zero_sz
);
std
::
memcpy
(
dst_data
-
(
zero_sz
+
copy_size
)
/
sizeof
(
T
),
src_data
+
std
::
max
(
seq_len
-
j
-
up_pad
,
0
)
*
src_mat_w
,
copy_size
);
dst_data
-=
col_mat_w
;
zero_sz
-=
src_mat_w_sz
;
}
}
}
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
auto
blas
=
math
::
GetBlas
<
DeviceContext
,
T
>
(
dev_ctx
);
math
::
FCCompute
<
DeviceContext
,
T
>
(
blas
,
x_dims
[
0
],
w_dims
[
1
],
w_dims
[
0
],
col_data
,
w_data
,
y_data
,
b_data
,
true
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
fusion_seqconv_eltadd_relu
,
ops
::
FusionSeqConvEltAddReluOp
,
ops
::
FusionSeqConvEltAddReluOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OP_CPU_KERNEL
(
fusion_seqconv_eltadd_relu
,
ops
::
FusionSeqConvEltAddReluKernel
<
float
>
,
ops
::
FusionSeqConvEltAddReluKernel
<
double
>
);
paddle/fluid/operators/fusion_seqconv_eltadd_relu_op.h
0 → 100644
浏览文件 @
049c9c7d
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/framework/op_registry.h"
namespace
paddle
{
namespace
operators
{
using
LoDTensor
=
framework
::
LoDTensor
;
using
Tensor
=
framework
::
Tensor
;
class
FusionSeqConvEltAddReluOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
;
};
class
FusionSeqConvEltAddReluOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
;
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/gather.h
浏览文件 @
049c9c7d
...
@@ -39,11 +39,9 @@ void CPUGather(const platform::DeviceContext& ctx, const Tensor& src,
...
@@ -39,11 +39,9 @@ void CPUGather(const platform::DeviceContext& ctx, const Tensor& src,
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
()));
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
()));
// check index of shape 1-D
// check index of shape 1-D
PADDLE_ENFORCE
(
index
.
dims
().
size
()
==
1
);
PADDLE_ENFORCE
(
index
.
dims
().
size
()
==
1
);
int
index_size
=
index
.
dims
()[
0
];
int
64_t
index_size
=
index
.
dims
()[
0
];
auto
src_dims
=
src
.
dims
();
auto
src_dims
=
src
.
dims
();
framework
::
DDim
output_dims
(
src_dims
);
output_dims
[
0
]
=
index_size
;
const
T
*
p_src
=
src
.
data
<
T
>
();
const
T
*
p_src
=
src
.
data
<
T
>
();
const
int
*
p_index
=
index
.
data
<
int
>
();
const
int
*
p_index
=
index
.
data
<
int
>
();
...
@@ -55,7 +53,7 @@ void CPUGather(const platform::DeviceContext& ctx, const Tensor& src,
...
@@ -55,7 +53,7 @@ void CPUGather(const platform::DeviceContext& ctx, const Tensor& src,
const
size_t
slice_bytes
=
slice_size
*
sizeof
(
T
);
const
size_t
slice_bytes
=
slice_size
*
sizeof
(
T
);
for
(
int
i
=
0
;
i
<
index_size
;
++
i
)
{
for
(
int
64_t
i
=
0
;
i
<
index_size
;
++
i
)
{
int
index_
=
p_index
[
i
];
int
index_
=
p_index
[
i
];
memcpy
(
p_output
+
i
*
slice_size
,
p_src
+
index_
*
slice_size
,
slice_bytes
);
memcpy
(
p_output
+
i
*
slice_size
,
p_src
+
index_
*
slice_size
,
slice_bytes
);
}
}
...
...
paddle/fluid/operators/lod_tensor_to_array_op.cc
浏览文件 @
049c9c7d
...
@@ -17,7 +17,7 @@ limitations under the License. */
...
@@ -17,7 +17,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/detail/safe_ref.h"
#include "paddle/fluid/operators/detail/safe_ref.h"
#include "paddle/fluid/operators/math/concat.h"
#include "paddle/fluid/operators/math/concat
_and_split
.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/port.h"
...
@@ -79,7 +79,7 @@ struct LoDTensorToArrayFunctor : public boost::static_visitor<void> {
...
@@ -79,7 +79,7 @@ struct LoDTensorToArrayFunctor : public boost::static_visitor<void> {
template
<
typename
DeviceContext
>
template
<
typename
DeviceContext
>
template
<
typename
T
>
template
<
typename
T
>
void
LoDTensorToArrayFunctorImpl
<
DeviceContext
>::
apply
()
{
void
LoDTensorToArrayFunctorImpl
<
DeviceContext
>::
apply
()
{
math
::
ConcatGrad
Functor
<
DeviceContext
,
T
>
func
;
math
::
Split
Functor
<
DeviceContext
,
T
>
func
;
func
(
*
dev_ctx_
,
prev_functor_
->
input_
,
prev_functor_
->
ref_inputs_
,
0
,
func
(
*
dev_ctx_
,
prev_functor_
->
input_
,
prev_functor_
->
ref_inputs_
,
0
,
&
prev_functor_
->
outputs_
);
&
prev_functor_
->
outputs_
);
}
}
...
...
paddle/fluid/operators/math/CMakeLists.txt
浏览文件 @
049c9c7d
if
(
NOT WIN32
)
if
(
NOT WIN32
)
add_subdirectory
(
detail
)
add_subdirectory
(
detail
)
endif
(
NOT WIN32
)
endif
(
NOT WIN32
)
function
(
math_library TARGET
)
function
(
math_library TARGET
)
...
@@ -35,7 +35,7 @@ function(math_library TARGET)
...
@@ -35,7 +35,7 @@ function(math_library TARGET)
endfunction
()
endfunction
()
# please add new math_library in alphabetical order
# please add new math_library in alphabetical order
math_library
(
concat
)
math_library
(
concat
_and_split
)
math_library
(
context_project DEPS im2col math_function
)
math_library
(
context_project DEPS im2col math_function
)
math_library
(
cross_entropy
)
math_library
(
cross_entropy
)
math_library
(
cos_sim_functor
)
math_library
(
cos_sim_functor
)
...
@@ -43,8 +43,8 @@ math_library(depthwise_conv)
...
@@ -43,8 +43,8 @@ math_library(depthwise_conv)
math_library
(
im2col
)
math_library
(
im2col
)
if
(
NOT WIN32
)
# windows do not support avx functions yet.
if
(
NOT WIN32
)
# windows do not support avx functions yet.
math_library
(
gru_compute DEPS activation_functions math_function
)
math_library
(
gru_compute DEPS activation_functions math_function
)
math_library
(
lstm_compute DEPS activation_functions
)
math_library
(
lstm_compute DEPS activation_functions
)
endif
(
NOT WIN32
)
endif
(
NOT WIN32
)
cc_library
(
blas SRCS blas.cc DEPS cblas framework_proto device_context
)
cc_library
(
blas SRCS blas.cc DEPS cblas framework_proto device_context
)
...
@@ -58,7 +58,7 @@ math_library(sequence_pooling DEPS math_function)
...
@@ -58,7 +58,7 @@ math_library(sequence_pooling DEPS math_function)
math_library
(
sequence_scale
)
math_library
(
sequence_scale
)
math_library
(
softmax DEPS math_function
)
math_library
(
softmax DEPS math_function
)
if
(
NOT WIN32
)
if
(
NOT WIN32
)
math_library
(
matrix_bit_code
)
math_library
(
matrix_bit_code
)
endif
(
NOT WIN32
)
endif
(
NOT WIN32
)
math_library
(
unpooling
)
math_library
(
unpooling
)
math_library
(
vol2col
)
math_library
(
vol2col
)
...
@@ -72,9 +72,9 @@ if(WITH_GPU)
...
@@ -72,9 +72,9 @@ if(WITH_GPU)
nv_test
(
math_function_gpu_test SRCS math_function_test.cu DEPS math_function
)
nv_test
(
math_function_gpu_test SRCS math_function_test.cu DEPS math_function
)
nv_test
(
selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu DEPS selected_rows_functor math_function
)
nv_test
(
selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu DEPS selected_rows_functor math_function
)
endif
()
endif
()
cc_test
(
concat_test SRCS concat_test.cc DEPS concat
)
cc_test
(
concat_test SRCS concat_test.cc DEPS concat
_and_split
)
cc_test
(
cpu_vec_test SRCS cpu_vec_test.cc DEPS blas cpu_info
)
cc_test
(
cpu_vec_test SRCS cpu_vec_test.cc DEPS blas cpu_info
)
cc_library
(
jit_kernel
cc_library
(
jit_kernel
SRCS jit_kernel.cc jit_kernel_blas.cc jit_kernel_exp.cc jit_kernel_lstm.cc
SRCS jit_kernel.cc jit_kernel_blas.cc jit_kernel_exp.cc jit_kernel_lstm.cc
DEPS cpu_info cblas
activation_functions
)
DEPS cpu_info cblas
)
cc_test
(
jit_kernel_test SRCS jit_kernel_test.cc DEPS jit_kernel
)
cc_test
(
jit_kernel_test SRCS jit_kernel_test.cc DEPS jit_kernel
)
paddle/fluid/operators/math/concat.cc
→
paddle/fluid/operators/math/concat
_and_split
.cc
浏览文件 @
049c9c7d
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/operators/math/concat.h"
#include "paddle/fluid/operators/math/concat
_and_split
.h"
#include <vector>
#include <vector>
namespace
paddle
{
namespace
paddle
{
...
@@ -67,7 +67,7 @@ class ConcatFunctor<platform::CPUDeviceContext, T> {
...
@@ -67,7 +67,7 @@ class ConcatFunctor<platform::CPUDeviceContext, T> {
* each dimension must be the same, except the axis dimension.
* each dimension must be the same, except the axis dimension.
*/
*/
template
<
typename
T
>
template
<
typename
T
>
class
ConcatGrad
Functor
<
platform
::
CPUDeviceContext
,
T
>
{
class
Split
Functor
<
platform
::
CPUDeviceContext
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
const
framework
::
Tensor
&
input
,
...
@@ -111,7 +111,7 @@ class ConcatGradFunctor<platform::CPUDeviceContext, T> {
...
@@ -111,7 +111,7 @@ class ConcatGradFunctor<platform::CPUDeviceContext, T> {
};
};
#define DEFINE_FUNCTOR(type) \
#define DEFINE_FUNCTOR(type) \
template class ConcatFunctor<platform::CPUDeviceContext, type>; \
template class ConcatFunctor<platform::CPUDeviceContext, type>; \
template class
ConcatGrad
Functor<platform::CPUDeviceContext, type>;
template class
Split
Functor<platform::CPUDeviceContext, type>;
FOR_ALL_TYPES
(
DEFINE_FUNCTOR
);
FOR_ALL_TYPES
(
DEFINE_FUNCTOR
);
...
...
paddle/fluid/operators/math/concat.cu
→
paddle/fluid/operators/math/concat
_and_split
.cu
浏览文件 @
049c9c7d
...
@@ -15,7 +15,7 @@ limitations under the License. */
...
@@ -15,7 +15,7 @@ limitations under the License. */
#include <algorithm>
#include <algorithm>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/operators/math/concat.h"
#include "paddle/fluid/operators/math/concat
_and_split
.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/platform/float16.h"
...
@@ -24,7 +24,7 @@ namespace operators {
...
@@ -24,7 +24,7 @@ namespace operators {
namespace
math
{
namespace
math
{
template
<
typename
T
>
template
<
typename
T
>
__global__
void
KernelConcat
(
T
**
inputs
,
const
int
*
input_cols
,
int
col_size
,
__global__
void
ConcatKernel
(
T
**
inputs
,
const
int
*
input_cols
,
int
col_size
,
const
int
output_rows
,
const
int
output_cols
,
const
int
output_rows
,
const
int
output_cols
,
T
*
output
)
{
T
*
output
)
{
int
tid_x
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
tid_x
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
...
@@ -50,7 +50,7 @@ __global__ void KernelConcat(T** inputs, const int* input_cols, int col_size,
...
@@ -50,7 +50,7 @@ __global__ void KernelConcat(T** inputs, const int* input_cols, int col_size,
}
}
template
<
typename
T
>
template
<
typename
T
>
__global__
void
KernelConcat
(
T
**
inputs_data
,
const
int
fixed_in_col
,
__global__
void
ConcatKernel
(
T
**
inputs_data
,
const
int
fixed_in_col
,
const
int
out_rows
,
const
int
out_cols
,
const
int
out_rows
,
const
int
out_cols
,
T
*
output_data
)
{
T
*
output_data
)
{
int
tid_x
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
tid_x
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
...
@@ -67,7 +67,7 @@ __global__ void KernelConcat(T** inputs_data, const int fixed_in_col,
...
@@ -67,7 +67,7 @@ __global__ void KernelConcat(T** inputs_data, const int fixed_in_col,
}
}
template
<
typename
T
>
template
<
typename
T
>
__global__
void
KernelConcatGrad
(
const
T
*
input_data
,
const
int
in_row
,
__global__
void
SplitKernel
(
const
T
*
input_data
,
const
int
in_row
,
const
int
in_col
,
const
int
*
out_cols
,
const
int
in_col
,
const
int
*
out_cols
,
int
out_cols_size
,
T
**
outputs_data
)
{
int
out_cols_size
,
T
**
outputs_data
)
{
int
tid_x
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
tid_x
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
...
@@ -94,7 +94,7 @@ __global__ void KernelConcatGrad(const T* input_data, const int in_row,
...
@@ -94,7 +94,7 @@ __global__ void KernelConcatGrad(const T* input_data, const int in_row,
}
}
template
<
typename
T
>
template
<
typename
T
>
__global__
void
KernelConcatGrad
(
const
T
*
input_data
,
const
int
in_row
,
__global__
void
SplitKernel
(
const
T
*
input_data
,
const
int
in_row
,
const
int
in_col
,
const
int
fixed_out_col
,
const
int
in_col
,
const
int
fixed_out_col
,
T
**
outputs_data
)
{
T
**
outputs_data
)
{
int
tid_x
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
tid_x
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
...
@@ -170,11 +170,11 @@ class ConcatFunctor<platform::CUDADeviceContext, T> {
...
@@ -170,11 +170,11 @@ class ConcatFunctor<platform::CUDADeviceContext, T> {
dim3
grid_size
=
dim3
(
grid_cols
,
grid_rows
,
1
);
dim3
grid_size
=
dim3
(
grid_cols
,
grid_rows
,
1
);
if
(
sameShape
)
{
if
(
sameShape
)
{
KernelConcat
<<<
grid_size
,
block_size
,
0
,
context
.
stream
()
>>>
(
ConcatKernel
<<<
grid_size
,
block_size
,
0
,
context
.
stream
()
>>>
(
dev_ins_data
,
in_col
,
out_row
,
out_col
,
output
->
data
<
T
>
());
dev_ins_data
,
in_col
,
out_row
,
out_col
,
output
->
data
<
T
>
());
}
else
{
}
else
{
const
int
*
dev_ins_col_data
=
inputs_col
.
CUDAData
(
context
.
GetPlace
());
const
int
*
dev_ins_col_data
=
inputs_col
.
CUDAData
(
context
.
GetPlace
());
KernelConcat
<<<
grid_size
,
block_size
,
0
,
context
.
stream
()
>>>
(
ConcatKernel
<<<
grid_size
,
block_size
,
0
,
context
.
stream
()
>>>
(
dev_ins_data
,
dev_ins_col_data
,
static_cast
<
int
>
(
inputs_col
.
size
()),
dev_ins_data
,
dev_ins_col_data
,
static_cast
<
int
>
(
inputs_col
.
size
()),
out_row
,
out_col
,
output
->
data
<
T
>
());
out_row
,
out_col
,
output
->
data
<
T
>
());
}
}
...
@@ -189,7 +189,7 @@ class ConcatFunctor<platform::CUDADeviceContext, T> {
...
@@ -189,7 +189,7 @@ class ConcatFunctor<platform::CUDADeviceContext, T> {
* each dimension must be the same, except the axis dimension.
* each dimension must be the same, except the axis dimension.
*/
*/
template
<
typename
T
>
template
<
typename
T
>
class
ConcatGrad
Functor
<
platform
::
CUDADeviceContext
,
T
>
{
class
Split
Functor
<
platform
::
CUDADeviceContext
,
T
>
{
public:
public:
void
operator
()(
const
platform
::
CUDADeviceContext
&
context
,
void
operator
()(
const
platform
::
CUDADeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
const
framework
::
Tensor
&
input
,
...
@@ -248,11 +248,11 @@ class ConcatGradFunctor<platform::CUDADeviceContext, T> {
...
@@ -248,11 +248,11 @@ class ConcatGradFunctor<platform::CUDADeviceContext, T> {
dim3
grid_size
=
dim3
(
grid_cols
,
grid_rows
,
1
);
dim3
grid_size
=
dim3
(
grid_cols
,
grid_rows
,
1
);
if
(
sameShape
)
{
if
(
sameShape
)
{
KernelConcatGrad
<<<
grid_size
,
block_size
,
0
,
context
.
stream
()
>>>
(
SplitKernel
<<<
grid_size
,
block_size
,
0
,
context
.
stream
()
>>>
(
input
.
data
<
T
>
(),
in_row
,
in_col
,
out0_col
,
dev_out_gpu_data
);
input
.
data
<
T
>
(),
in_row
,
in_col
,
out0_col
,
dev_out_gpu_data
);
}
else
{
}
else
{
const
int
*
dev_outs_col_data
=
outputs_cols
.
CUDAData
(
context
.
GetPlace
());
const
int
*
dev_outs_col_data
=
outputs_cols
.
CUDAData
(
context
.
GetPlace
());
KernelConcatGrad
<<<
grid_size
,
block_size
,
0
,
context
.
stream
()
>>>
(
SplitKernel
<<<
grid_size
,
block_size
,
0
,
context
.
stream
()
>>>
(
input
.
data
<
T
>
(),
in_row
,
in_col
,
dev_outs_col_data
,
input
.
data
<
T
>
(),
in_row
,
in_col
,
dev_outs_col_data
,
static_cast
<
int
>
(
outputs_cols
.
size
()),
dev_out_gpu_data
);
static_cast
<
int
>
(
outputs_cols
.
size
()),
dev_out_gpu_data
);
}
}
...
@@ -264,7 +264,7 @@ class ConcatGradFunctor<platform::CUDADeviceContext, T> {
...
@@ -264,7 +264,7 @@ class ConcatGradFunctor<platform::CUDADeviceContext, T> {
#define DEFINE_FUNCTOR(type) \
#define DEFINE_FUNCTOR(type) \
template class ConcatFunctor<platform::CUDADeviceContext, type>; \
template class ConcatFunctor<platform::CUDADeviceContext, type>; \
template class
ConcatGrad
Functor<platform::CUDADeviceContext, type>
template class
Split
Functor<platform::CUDADeviceContext, type>
FOR_ALL_TYPES
(
DEFINE_FUNCTOR
);
FOR_ALL_TYPES
(
DEFINE_FUNCTOR
);
...
...
paddle/fluid/operators/math/concat.h
→
paddle/fluid/operators/math/concat
_and_split
.h
浏览文件 @
049c9c7d
...
@@ -54,7 +54,7 @@ class ConcatFunctor {
...
@@ -54,7 +54,7 @@ class ConcatFunctor {
* Output[1] = [[5,6]]
* Output[1] = [[5,6]]
*/
*/
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
class
ConcatGrad
Functor
{
class
Split
Functor
{
public:
public:
void
operator
()(
const
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
void
operator
()(
const
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
const
std
::
vector
<
const
framework
::
Tensor
*>&
ref_inputs
,
const
std
::
vector
<
const
framework
::
Tensor
*>&
ref_inputs
,
...
...
paddle/fluid/operators/math/concat_test.cc
浏览文件 @
049c9c7d
...
@@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/operators/math/concat.h"
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/math/concat_and_split.h"
template
<
typename
DeviceContext
,
typename
Place
>
template
<
typename
DeviceContext
,
typename
Place
>
void
testConcat
()
{
void
testConcat
()
{
...
...
paddle/fluid/operators/math/fc_compute.h
浏览文件 @
049c9c7d
...
@@ -15,6 +15,7 @@ limitations under the License. */
...
@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#pragma once
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/jit_kernel.h"
DECLARE_int32
(
paddle_num_threads
);
DECLARE_int32
(
paddle_num_threads
);
...
@@ -30,20 +31,25 @@ inline void FCCompute(const BlasT<DeviceContext, T>& blas, const int M,
...
@@ -30,20 +31,25 @@ inline void FCCompute(const BlasT<DeviceContext, T>& blas, const int M,
if
(
B
==
NULL
)
{
if
(
B
==
NULL
)
{
return
;
return
;
}
}
if
(
relu
)
{
const
auto
&
vaddrelu
=
jitkernel
::
KernelPool
::
Instance
()
.
template
Get
<
jitkernel
::
VAddReluKernel
<
T
>
>
(
N
);
for
(
int
i
=
0
;
i
<
M
;
i
++
)
{
T
*
dst
=
Y
+
i
*
N
;
vaddrelu
->
Compute
(
B
,
dst
,
dst
);
}
}
else
{
const
auto
&
vadd
=
jitkernel
::
KernelPool
::
Instance
()
.
template
Get
<
jitkernel
::
VAddKernel
<
T
>
>
(
N
);
#ifdef PADDLE_WITH_MKLML
#ifdef PADDLE_WITH_MKLML
#pragma omp parallel for if (FLAGS_paddle_num_threads > 1)
#pragma omp parallel for if (FLAGS_paddle_num_threads > 1)
#endif
#endif
for
(
int
i
=
0
;
i
<
M
;
i
++
)
{
for
(
int
i
=
0
;
i
<
M
;
i
++
)
{
blas
.
AXPY
(
N
,
static_cast
<
T
>
(
1
),
B
,
Y
+
i
*
N
);
T
*
dst
=
Y
+
i
*
N
;
vadd
->
Compute
(
B
,
dst
,
dst
);
}
}
if
(
!
relu
)
{
return
;
}
}
// TODO(TJ): fuse relu
LOG
(
FATAL
)
<<
"Not implemented!"
;
}
}
}
// namespace math
}
// namespace math
...
...
paddle/fluid/operators/math/jit_kernel.h
浏览文件 @
049c9c7d
...
@@ -86,6 +86,12 @@ class VAddBiasKernel : public Kernel {
...
@@ -86,6 +86,12 @@ class VAddBiasKernel : public Kernel {
virtual
void
Compute
(
const
T
a
,
const
T
*
x
,
T
*
y
)
const
=
0
;
virtual
void
Compute
(
const
T
a
,
const
T
*
x
,
T
*
y
)
const
=
0
;
};
};
template
<
typename
T
>
class
VAddReluKernel
:
public
Kernel
{
public:
virtual
void
Compute
(
const
T
*
x
,
const
T
*
y
,
T
*
z
)
const
=
0
;
};
template
<
typename
T
>
template
<
typename
T
>
class
VActKernel
:
public
Kernel
{
class
VActKernel
:
public
Kernel
{
public:
public:
...
...
paddle/fluid/operators/math/jit_kernel_blas.cc
浏览文件 @
049c9c7d
...
@@ -378,11 +378,99 @@ class VIdentityKernelImpl : public VIdentityKernel<T> {
...
@@ -378,11 +378,99 @@ class VIdentityKernelImpl : public VIdentityKernel<T> {
void
Compute
(
const
T
*
x
,
T
*
y
)
const
override
{}
void
Compute
(
const
T
*
x
,
T
*
y
)
const
override
{}
};
};
/* VAddRelu JitKernel */
template
<
typename
T
,
platform
::
jit
::
cpu_isa_t
isa
,
jit_block
>
class
VAddReluKernelImpl
:
public
VAddReluKernel
<
T
>
{
public:
explicit
VAddReluKernelImpl
(
int
d
)
:
VAddReluKernel
<
T
>
()
{
this
->
num_
=
d
;
}
void
Compute
(
const
T
*
x
,
const
T
*
y
,
T
*
z
)
const
override
{
for
(
int
i
=
0
;
i
<
this
->
num_
;
++
i
)
{
z
[
i
]
=
x
[
i
]
+
y
[
i
];
z
[
i
]
=
z
[
i
]
>
0
?
z
[
i
]
:
0
;
}
}
};
#define INTRI8_FLOAT(isa) \
template <> \
void VAddReluKernelImpl<float, isa, kEQ8>::Compute( \
const float* x, const float* y, float* z) const { \
__m256 tmpx = _mm256_loadu_ps(x); \
__m256 tmpy = _mm256_loadu_ps(y); \
tmpy = _mm256_add_ps(tmpx, tmpy); \
tmpy = _mm256_max_ps(tmpy, _mm256_setzero_ps()); \
_mm256_storeu_ps(z, tmpy); \
}
#define INTRI16_FLOAT(isa) \
template <> \
void VAddReluKernelImpl<float, isa, kEQ16>::Compute( \
const float* x, const float* y, float* z) const { \
__m256 zeros = _mm256_setzero_ps(); \
__m256 tmp0 = _mm256_loadu_ps(x); \
__m256 tmp1 = _mm256_loadu_ps(y); \
tmp0 = _mm256_add_ps(tmp0, tmp1); \
tmp0 = _mm256_max_ps(tmp0, zeros); \
tmp1 = _mm256_loadu_ps(x + 8); \
__m256 tmp2 = _mm256_loadu_ps(y + 8); \
tmp1 = _mm256_add_ps(tmp1, tmp2); \
tmp1 = _mm256_max_ps(tmp1, zeros); \
_mm256_storeu_ps(z, tmp0); \
_mm256_storeu_ps(z + 8, tmp1); \
}
#define INTRI_COMMON_FLOAT(isa, block) \
template <> \
VAddReluKernelImpl<float, isa, block>::VAddReluKernelImpl(int d) \
: VAddReluKernel<float>() { \
this->num_ = d; \
this->end_ = d - d % AVX_FLOAT_BLOCK; \
this->rest_ = d - this->end_; \
} \
template <> \
void VAddReluKernelImpl<float, isa, block>::Compute( \
const float* x, const float* y, float* z) const { \
__m256 zeros = _mm256_setzero_ps(); \
for (int i = 0; i < this->end_; i += AVX_FLOAT_BLOCK) { \
__m256 tmpx = _mm256_loadu_ps(x + i); \
__m256 tmpy = _mm256_loadu_ps(y + i); \
tmpy = _mm256_add_ps(tmpx, tmpy); \
tmpy = _mm256_max_ps(tmpy, zeros); \
_mm256_storeu_ps(z + i, tmpy); \
} \
for (int i = this->end_; i < this->num_; ++i) { \
z[i] = x[i] + y[i]; \
z[i] = z[i] > 0 ? z[i] : 0; \
} \
}
#ifdef __AVX__
INTRI8_FLOAT
(
jit
::
avx
);
INTRI16_FLOAT
(
jit
::
avx
);
INTRI_COMMON_FLOAT
(
jit
::
avx
,
kGT16
);
#endif
#ifdef __AVX2__
INTRI8_FLOAT
(
jit
::
avx2
);
INTRI16_FLOAT
(
jit
::
avx2
);
INTRI_COMMON_FLOAT
(
jit
::
avx2
,
kGT16
);
#endif
#ifdef __AVX512F__
// TODO(TJ): refine avx512
INTRI8_FLOAT
(
jit
::
avx512f
);
INTRI16_FLOAT
(
jit
::
avx512f
);
INTRI_COMMON_FLOAT
(
jit
::
avx512f
,
kGT16
);
#endif
#undef INTRI8_FLOAT
#undef INTRI16_FLOAT
#undef INTRI_COMMON_FLOAT
REGISTER_JITKERNEL
(
vmul
,
VMulKernel
);
REGISTER_JITKERNEL
(
vmul
,
VMulKernel
);
REGISTER_JITKERNEL
(
vadd
,
VAddKernel
);
REGISTER_JITKERNEL
(
vadd
,
VAddKernel
);
REGISTER_JITKERNEL
(
vscal
,
VScalKernel
);
REGISTER_JITKERNEL
(
vscal
,
VScalKernel
);
REGISTER_JITKERNEL
(
vaddb
,
VAddBiasKernel
);
REGISTER_JITKERNEL
(
vaddb
,
VAddBiasKernel
);
REGISTER_JITKERNEL
(
vrelu
,
VReluKernel
);
REGISTER_JITKERNEL
(
vrelu
,
VReluKernel
);
REGISTER_JITKERNEL
(
vaddrelu
,
VAddReluKernel
);
REGISTER_JITKERNEL
(
videntity
,
VIdentityKernel
);
REGISTER_JITKERNEL
(
videntity
,
VIdentityKernel
);
}
// namespace jitkernel
}
// namespace jitkernel
...
...
paddle/fluid/operators/math/jit_kernel_exp.cc
浏览文件 @
049c9c7d
...
@@ -27,13 +27,6 @@ limitations under the License. */
...
@@ -27,13 +27,6 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
namespace
math
{
namespace
math
{
#ifdef __AVX__
namespace
detail
{
__m256
Exp
(
__m256
a
);
}
// namespace detail
#endif
namespace
jitkernel
{
namespace
jitkernel
{
namespace
jit
=
platform
::
jit
;
namespace
jit
=
platform
::
jit
;
...
@@ -69,37 +62,186 @@ FOR_EACH_ISA(MKL_FLOAT, kGT16);
...
@@ -69,37 +62,186 @@ FOR_EACH_ISA(MKL_FLOAT, kGT16);
FOR_EACH_ISA_BLOCK
(
MKL_DOUBLE
);
FOR_EACH_ISA_BLOCK
(
MKL_DOUBLE
);
#endif
#endif
#define INTRI8_FLOAT(isa) \
namespace
detail
{
#ifdef __AVX__
#define ALIGN32 __attribute__((aligned(32)))
#define _PS256_CONST(Name, Val) \
static const float _ps256_##Name[8] ALIGN32 = {Val, Val, Val, Val, \
Val, Val, Val, Val}
#define _PI256_CONST(Name, Val) \
static const int _pi256_##Name[8] ALIGN32 = {Val, Val, Val, Val, \
Val, Val, Val, Val}
_PI256_CONST
(
0x7f
,
0x7f
);
_PS256_CONST
(
one
,
1.
f
);
_PS256_CONST
(
0
p5
,
0.5
f
);
_PS256_CONST
(
exp_hi
,
88.3762626647949
f
);
_PS256_CONST
(
exp_lo
,
-
88.3762626647949
f
);
_PS256_CONST
(
cephes_LOG2EF
,
1.44269504088896341
);
_PS256_CONST
(
cephes_exp_C1
,
0.693359375
);
_PS256_CONST
(
cephes_exp_C2
,
-
2.12194440e-4
);
_PS256_CONST
(
cephes_exp_p0
,
1.9875691500E-4
);
_PS256_CONST
(
cephes_exp_p1
,
1.3981999507E-3
);
_PS256_CONST
(
cephes_exp_p2
,
8.3334519073E-3
);
_PS256_CONST
(
cephes_exp_p3
,
4.1665795894E-2
);
_PS256_CONST
(
cephes_exp_p4
,
1.6666665459E-1
);
_PS256_CONST
(
cephes_exp_p5
,
5.0000001201E-1
);
typedef
union
imm_xmm_union
{
__m256i
imm
;
__m128i
xmm
[
2
];
}
imm_xmm_union
;
#define COPY_IMM_TO_XMM(imm_, xmm0_, xmm1_) \
{ \
imm_xmm_union u ALIGN32; \
u.imm = imm_; \
xmm0_ = u.xmm[0]; \
xmm1_ = u.xmm[1]; \
}
#define COPY_XMM_TO_IMM(xmm0_, xmm1_, imm_) \
{ \
imm_xmm_union u ALIGN32; \
u.xmm[0] = xmm0_; \
u.xmm[1] = xmm1_; \
imm_ = u.imm; \
}
#define AVX2_BITOP_USING_SSE2(fn) \
static inline __m256i avx2_mm256_##fn(__m256i x, int y) { \
/* use SSE2 to perform the bitop AVX2 */
\
__m128i x1, x2; \
__m256i ret; \
COPY_IMM_TO_XMM(x, x1, x2); \
x1 = _mm_##fn(x1, y); \
x2 = _mm_##fn(x2, y); \
COPY_XMM_TO_IMM(x1, x2, ret); \
return ret; \
}
#define AVX2_INTOP_USING_SSE2(fn) \
static inline __m256i avx2_mm256_add_epi32(__m256i x, __m256i y) { \
/* use SSE2 to perform the AVX2 integer operation */
\
__m128i x1, x2; \
__m128i y1, y2; \
__m256i ret; \
COPY_IMM_TO_XMM(x, x1, x2); \
COPY_IMM_TO_XMM(y, y1, y2); \
x1 = _mm_##fn(x1, y1); \
x2 = _mm_##fn(x2, y2); \
COPY_XMM_TO_IMM(x1, x2, ret); \
return ret; \
}
AVX2_BITOP_USING_SSE2
(
slli_epi32
);
AVX2_INTOP_USING_SSE2
(
add_epi32
);
#define AVXEXP_BASE \
__m256 tmp = _mm256_setzero_ps(), fx; \
__m256 one = *reinterpret_cast<const __m256*>(_ps256_one); \
__m256i imm0; \
x = _mm256_min_ps(x, *reinterpret_cast<const __m256*>(_ps256_exp_hi)); \
x = _mm256_max_ps(x, *reinterpret_cast<const __m256*>(_ps256_exp_lo)); \
/* express exp(x) as exp(g + n*log(2)) */
\
fx = _mm256_mul_ps(x, \
*reinterpret_cast<const __m256*>(_ps256_cephes_LOG2EF)); \
fx = _mm256_add_ps(fx, *reinterpret_cast<const __m256*>(_ps256_0p5)); \
tmp = _mm256_floor_ps(fx); \
/* if greater, substract 1 */
\
__m256 mask = _mm256_cmp_ps(tmp, fx, _CMP_GT_OS); \
mask = _mm256_and_ps(mask, one); \
fx = _mm256_sub_ps(tmp, mask); \
tmp = _mm256_mul_ps(fx, \
*reinterpret_cast<const __m256*>(_ps256_cephes_exp_C1)); \
__m256 z = _mm256_mul_ps( \
fx, *reinterpret_cast<const __m256*>(_ps256_cephes_exp_C2)); \
x = _mm256_sub_ps(x, tmp); \
x = _mm256_sub_ps(x, z); \
z = _mm256_mul_ps(x, x); \
__m256 y = *reinterpret_cast<const __m256*>(_ps256_cephes_exp_p0); \
y = _mm256_mul_ps(y, x); \
y = _mm256_add_ps(y, \
*reinterpret_cast<const __m256*>(_ps256_cephes_exp_p1)); \
y = _mm256_mul_ps(y, x); \
y = _mm256_add_ps(y, \
*reinterpret_cast<const __m256*>(_ps256_cephes_exp_p2)); \
y = _mm256_mul_ps(y, x); \
y = _mm256_add_ps(y, \
*reinterpret_cast<const __m256*>(_ps256_cephes_exp_p3)); \
y = _mm256_mul_ps(y, x); \
y = _mm256_add_ps(y, \
*reinterpret_cast<const __m256*>(_ps256_cephes_exp_p4)); \
y = _mm256_mul_ps(y, x); \
y = _mm256_add_ps(y, \
*reinterpret_cast<const __m256*>(_ps256_cephes_exp_p5)); \
y = _mm256_mul_ps(y, z); \
y = _mm256_add_ps(y, x); \
y = _mm256_add_ps(y, one); \
/* build 2^n */
\
imm0 = _mm256_cvttps_epi32(fx)
__m256
ExpAVX
(
__m256
x
)
{
AVXEXP_BASE
;
// two AVX2 instructions using SSE2
imm0
=
avx2_mm256_add_epi32
(
imm0
,
*
reinterpret_cast
<
const
__m256i
*>
(
_pi256_0x7f
));
imm0
=
avx2_mm256_slli_epi32
(
imm0
,
23
);
__m256
pow2n
=
_mm256_castsi256_ps
(
imm0
);
y
=
_mm256_mul_ps
(
y
,
pow2n
);
return
y
;
}
#endif
#ifdef __AVX2__
__m256
ExpAVX2
(
__m256
x
)
{
AVXEXP_BASE
;
// two AVX2 instructions
imm0
=
_mm256_add_epi32
(
imm0
,
*
reinterpret_cast
<
const
__m256i
*>
(
_pi256_0x7f
));
imm0
=
_mm256_slli_epi32
(
imm0
,
23
);
__m256
pow2n
=
_mm256_castsi256_ps
(
imm0
);
y
=
_mm256_mul_ps
(
y
,
pow2n
);
return
y
;
}
#endif
}
// namespace detail
#define INTRI8_FLOAT(isa, expisa) \
template <> \
template <> \
void VExpKernelImpl<float, isa, kEQ8>::Compute(const float* x, float* y) \
void VExpKernelImpl<float, isa, kEQ8>::Compute(const float* x, float* y) \
const { \
const { \
__m256 tmp = _mm256_loadu_ps(x); \
__m256 tmp = _mm256_loadu_ps(x); \
_mm256_storeu_ps(y,
detail::Exp(tmp));
\
_mm256_storeu_ps(y,
expisa(tmp));
\
}
}
#define INTRI16_FLOAT(isa
)
\
#define INTRI16_FLOAT(isa
, expisa)
\
template <> \
template <> \
void VExpKernelImpl<float, isa, kEQ16>::Compute(const float* x, float* y) \
void VExpKernelImpl<float, isa, kEQ16>::Compute(const float* x, float* y) \
const { \
const { \
__m256 tmp0 = _mm256_loadu_ps(x); \
__m256 tmp0 = _mm256_loadu_ps(x); \
__m256 tmp1 = _mm256_loadu_ps(x + 8); \
__m256 tmp1 = _mm256_loadu_ps(x + 8); \
tmp0 =
detail::Exp(tmp0);
\
tmp0 =
expisa(tmp0);
\
tmp1 =
detail::Exp(tmp1);
\
tmp1 =
expisa(tmp1);
\
_mm256_storeu_ps(y, tmp0); \
_mm256_storeu_ps(y, tmp0); \
_mm256_storeu_ps(y + 8, tmp1); \
_mm256_storeu_ps(y + 8, tmp1); \
}
}
#ifdef __AVX__
#ifdef __AVX__
INTRI8_FLOAT
(
jit
::
avx
);
INTRI8_FLOAT
(
jit
::
avx
,
detail
::
ExpAVX
);
INTRI16_FLOAT
(
jit
::
avx
);
INTRI16_FLOAT
(
jit
::
avx
,
detail
::
ExpAVX
);
#endif
#endif
#ifdef __AVX2__
#ifdef __AVX2__
INTRI8_FLOAT
(
jit
::
avx2
);
INTRI8_FLOAT
(
jit
::
avx2
,
detail
::
ExpAVX2
);
INTRI16_FLOAT
(
jit
::
avx2
);
INTRI16_FLOAT
(
jit
::
avx2
,
detail
::
ExpAVX2
);
#endif
#endif
#ifdef __AVX512F__
#ifdef __AVX512F__
INTRI8_FLOAT
(
jit
::
avx512f
);
INTRI8_FLOAT
(
jit
::
avx512f
,
detail
::
ExpAVX2
);
INTRI16_FLOAT
(
jit
::
avx512f
);
INTRI16_FLOAT
(
jit
::
avx512f
,
detail
::
ExpAVX2
);
#endif
#endif
// TODO(TJ): eq16 test and complete avx512
// TODO(TJ): eq16 test and complete avx512
...
@@ -135,26 +277,27 @@ class VSigmoidKernelImpl : public VSigmoidKernel<T> {
...
@@ -135,26 +277,27 @@ class VSigmoidKernelImpl : public VSigmoidKernel<T> {
std
::
shared_ptr
<
const
VExpKernel
<
T
>>
vexp_
;
std
::
shared_ptr
<
const
VExpKernel
<
T
>>
vexp_
;
};
};
#define INTRI_SIGMOID(tmp, min, max
)
\
#define INTRI_SIGMOID(tmp, min, max
, expisa)
\
tmp = _mm256_max_ps(tmp, min); \
tmp = _mm256_max_ps(tmp, min); \
tmp = _mm256_min_ps(tmp, max); \
tmp = _mm256_min_ps(tmp, max); \
tmp = _mm256_sub_ps(_mm256_set1_ps(0.0f), tmp); \
tmp = _mm256_sub_ps(_mm256_set1_ps(0.0f), tmp); \
tmp =
detail::Exp(tmp);
\
tmp =
expisa(tmp);
\
tmp = _mm256_add_ps(_mm256_set1_ps(1.0f), tmp); \
tmp = _mm256_add_ps(_mm256_set1_ps(1.0f), tmp); \
tmp = _mm256_div_ps(_mm256_set1_ps(1.0f), tmp)
tmp = _mm256_div_ps(_mm256_set1_ps(1.0f), tmp)
#define INTRI8_FLOAT(isa
)
\
#define INTRI8_FLOAT(isa
, expisa)
\
template <> \
template <> \
void VSigmoidKernelImpl<float, isa, kEQ8>::Compute(const float* x, float* y) \
void VSigmoidKernelImpl<float, isa, kEQ8>::Compute(const float* x, float* y) \
const { \
const { \
/* TODO(TJ): try to use static const*/
\
__m256 max = _mm256_set1_ps(SIGMOID_THRESHOLD_MAX); \
__m256 max = _mm256_set1_ps(SIGMOID_THRESHOLD_MAX); \
__m256 min = _mm256_set1_ps(SIGMOID_THRESHOLD_MIN); \
__m256 min = _mm256_set1_ps(SIGMOID_THRESHOLD_MIN); \
__m256 tmp = _mm256_loadu_ps(x); \
__m256 tmp = _mm256_loadu_ps(x); \
INTRI_SIGMOID(tmp, min, max
);
\
INTRI_SIGMOID(tmp, min, max
, expisa);
\
_mm256_storeu_ps(y, tmp); \
_mm256_storeu_ps(y, tmp); \
}
}
#define INTRI16_FLOAT(isa
)
\
#define INTRI16_FLOAT(isa
, expisa)
\
template <> \
template <> \
void VSigmoidKernelImpl<float, isa, kEQ16>::Compute(const float* x, \
void VSigmoidKernelImpl<float, isa, kEQ16>::Compute(const float* x, \
float* y) const { \
float* y) const { \
...
@@ -162,13 +305,13 @@ class VSigmoidKernelImpl : public VSigmoidKernel<T> {
...
@@ -162,13 +305,13 @@ class VSigmoidKernelImpl : public VSigmoidKernel<T> {
__m256 min = _mm256_set1_ps(SIGMOID_THRESHOLD_MIN); \
__m256 min = _mm256_set1_ps(SIGMOID_THRESHOLD_MIN); \
__m256 tmp0 = _mm256_loadu_ps(x); \
__m256 tmp0 = _mm256_loadu_ps(x); \
__m256 tmp1 = _mm256_loadu_ps(x + 8); \
__m256 tmp1 = _mm256_loadu_ps(x + 8); \
INTRI_SIGMOID(tmp0, min, max
);
\
INTRI_SIGMOID(tmp0, min, max
, expisa);
\
INTRI_SIGMOID(tmp1, min, max
);
\
INTRI_SIGMOID(tmp1, min, max
, expisa);
\
_mm256_storeu_ps(y, tmp0); \
_mm256_storeu_ps(y, tmp0); \
_mm256_storeu_ps(y + 8, tmp1); \
_mm256_storeu_ps(y + 8, tmp1); \
}
}
#define INTRI_GT8LT16_FLOAT(isa
)
\
#define INTRI_GT8LT16_FLOAT(isa
, expisa)
\
template <> \
template <> \
VSigmoidKernelImpl<float, isa, kGT8LT16>::VSigmoidKernelImpl(int d) \
VSigmoidKernelImpl<float, isa, kGT8LT16>::VSigmoidKernelImpl(int d) \
: VSigmoidKernel<float>() { \
: VSigmoidKernel<float>() { \
...
@@ -184,7 +327,7 @@ class VSigmoidKernelImpl : public VSigmoidKernel<T> {
...
@@ -184,7 +327,7 @@ class VSigmoidKernelImpl : public VSigmoidKernel<T> {
__m256 max = _mm256_set1_ps(SIGMOID_THRESHOLD_MAX); \
__m256 max = _mm256_set1_ps(SIGMOID_THRESHOLD_MAX); \
__m256 min = _mm256_set1_ps(SIGMOID_THRESHOLD_MIN); \
__m256 min = _mm256_set1_ps(SIGMOID_THRESHOLD_MIN); \
__m256 tmp = _mm256_loadu_ps(x); \
__m256 tmp = _mm256_loadu_ps(x); \
INTRI_SIGMOID(tmp, min, max
);
\
INTRI_SIGMOID(tmp, min, max
, expisa);
\
_mm256_storeu_ps(y, tmp); \
_mm256_storeu_ps(y, tmp); \
const float min_ = SIGMOID_THRESHOLD_MIN; \
const float min_ = SIGMOID_THRESHOLD_MIN; \
const float max_ = SIGMOID_THRESHOLD_MAX; \
const float max_ = SIGMOID_THRESHOLD_MAX; \
...
@@ -198,7 +341,7 @@ class VSigmoidKernelImpl : public VSigmoidKernel<T> {
...
@@ -198,7 +341,7 @@ class VSigmoidKernelImpl : public VSigmoidKernel<T> {
} \
} \
}
}
#define INTRI_GT16_FLOAT(isa
)
\
#define INTRI_GT16_FLOAT(isa
, expisa)
\
template <> \
template <> \
VSigmoidKernelImpl<float, isa, kGT16>::VSigmoidKernelImpl(int d) \
VSigmoidKernelImpl<float, isa, kGT16>::VSigmoidKernelImpl(int d) \
: VSigmoidKernel<float>() { \
: VSigmoidKernel<float>() { \
...
@@ -215,7 +358,7 @@ class VSigmoidKernelImpl : public VSigmoidKernel<T> {
...
@@ -215,7 +358,7 @@ class VSigmoidKernelImpl : public VSigmoidKernel<T> {
__m256 min = _mm256_set1_ps(SIGMOID_THRESHOLD_MIN); \
__m256 min = _mm256_set1_ps(SIGMOID_THRESHOLD_MIN); \
for (int i = 0; i < this->end_; i += AVX_FLOAT_BLOCK) { \
for (int i = 0; i < this->end_; i += AVX_FLOAT_BLOCK) { \
__m256 tmp = _mm256_loadu_ps(x + i); \
__m256 tmp = _mm256_loadu_ps(x + i); \
INTRI_SIGMOID(tmp, min, max
);
\
INTRI_SIGMOID(tmp, min, max
, expisa);
\
_mm256_storeu_ps(y + i, tmp); \
_mm256_storeu_ps(y + i, tmp); \
} \
} \
const float min_ = SIGMOID_THRESHOLD_MIN; \
const float min_ = SIGMOID_THRESHOLD_MIN; \
...
@@ -231,22 +374,20 @@ class VSigmoidKernelImpl : public VSigmoidKernel<T> {
...
@@ -231,22 +374,20 @@ class VSigmoidKernelImpl : public VSigmoidKernel<T> {
}
}
#ifdef __AVX__
#ifdef __AVX__
INTRI8_FLOAT
(
jit
::
avx
);
INTRI8_FLOAT
(
jit
::
avx
,
detail
::
ExpAVX
);
INTRI16_FLOAT
(
jit
::
avx
);
INTRI16_FLOAT
(
jit
::
avx
,
detail
::
ExpAVX
);
INTRI_GT8LT16_FLOAT
(
jit
::
avx
);
INTRI_GT8LT16_FLOAT
(
jit
::
avx
,
detail
::
ExpAVX
);
INTRI_GT16_FLOAT
(
jit
::
avx
);
INTRI_GT16_FLOAT
(
jit
::
avx
,
detail
::
ExpAVX
);
#endif
#endif
#ifdef __AVX2__
#ifdef __AVX2__
INTRI8_FLOAT
(
jit
::
avx2
);
INTRI8_FLOAT
(
jit
::
avx2
,
detail
::
ExpAVX2
);
INTRI16_FLOAT
(
jit
::
avx2
);
INTRI16_FLOAT
(
jit
::
avx2
,
detail
::
ExpAVX2
);
// INTRI_GT8LT16_FLOAT(jit::avx2);
// maybe use avx at gt8lt16 and gt16
// INTRI_GT16_FLOAT(jit::avx2);
#endif
#endif
#ifdef __AVX512F__
#ifdef __AVX512F__
INTRI8_FLOAT
(
jit
::
avx512f
);
INTRI8_FLOAT
(
jit
::
avx512f
,
detail
::
ExpAVX2
);
INTRI16_FLOAT
(
jit
::
avx512f
);
INTRI16_FLOAT
(
jit
::
avx512f
,
detail
::
ExpAVX2
);
// INTRI_GT8LT16_FLOAT(jit::avx512f);
// maybe use avx2 at gt8lt16 and gt16
// INTRI_GT16_FLOAT(jit::avx512f);
#endif
#endif
#undef INTRI8_FLOAT
#undef INTRI8_FLOAT
...
@@ -280,36 +421,36 @@ class VTanhKernelImpl : public VTanhKernel<T> {
...
@@ -280,36 +421,36 @@ class VTanhKernelImpl : public VTanhKernel<T> {
std
::
shared_ptr
<
const
VAddBiasKernel
<
T
>>
vaddbias_
;
std
::
shared_ptr
<
const
VAddBiasKernel
<
T
>>
vaddbias_
;
};
};
#define INTRI_VTANH(tmp
)
\
#define INTRI_VTANH(tmp
, expisa)
\
tmp = _mm256_mul_ps(_mm256_set1_ps(-2.0f), tmp); \
tmp = _mm256_mul_ps(_mm256_set1_ps(-2.0f), tmp); \
tmp = _mm256_min_ps(tmp, _mm256_set1_ps(EXP_MAX_INPUT)); \
tmp = _mm256_min_ps(tmp, _mm256_set1_ps(EXP_MAX_INPUT)); \
tmp =
detail::Exp(tmp);
\
tmp =
expisa(tmp);
\
tmp = _mm256_add_ps(_mm256_set1_ps(1.0f), tmp); \
tmp = _mm256_add_ps(_mm256_set1_ps(1.0f), tmp); \
tmp = _mm256_div_ps(_mm256_set1_ps(2.0f), tmp); \
tmp = _mm256_div_ps(_mm256_set1_ps(2.0f), tmp); \
tmp = _mm256_sub_ps(tmp, _mm256_set1_ps(1.0f))
tmp = _mm256_sub_ps(tmp, _mm256_set1_ps(1.0f))
#define INTRI8_FLOAT(isa
)
\
#define INTRI8_FLOAT(isa
, expisa)
\
template <> \
template <> \
void VTanhKernelImpl<float, isa, kEQ8>::Compute(const float* x, float* y) \
void VTanhKernelImpl<float, isa, kEQ8>::Compute(const float* x, float* y) \
const { \
const { \
__m256 tmp = _mm256_loadu_ps(x); \
__m256 tmp = _mm256_loadu_ps(x); \
INTRI_VTANH(tmp
);
\
INTRI_VTANH(tmp
, expisa);
\
_mm256_storeu_ps(y, tmp); \
_mm256_storeu_ps(y, tmp); \
}
}
#define INTRI16_FLOAT(isa
)
\
#define INTRI16_FLOAT(isa
, expisa)
\
template <> \
template <> \
void VTanhKernelImpl<float, isa, kEQ16>::Compute(const float* x, float* y) \
void VTanhKernelImpl<float, isa, kEQ16>::Compute(const float* x, float* y) \
const { \
const { \
__m256 tmp0 = _mm256_loadu_ps(x); \
__m256 tmp0 = _mm256_loadu_ps(x); \
__m256 tmp1 = _mm256_loadu_ps(x + 8); \
__m256 tmp1 = _mm256_loadu_ps(x + 8); \
INTRI_VTANH(tmp0
);
\
INTRI_VTANH(tmp0
, expisa);
\
INTRI_VTANH(tmp1
);
\
INTRI_VTANH(tmp1
, expisa);
\
_mm256_storeu_ps(y, tmp0); \
_mm256_storeu_ps(y, tmp0); \
_mm256_storeu_ps(y + 8, tmp1); \
_mm256_storeu_ps(y + 8, tmp1); \
}
}
#define INTRI_GT8LT16_FLOAT(isa
)
\
#define INTRI_GT8LT16_FLOAT(isa
, expisa)
\
template <> \
template <> \
VTanhKernelImpl<float, isa, kGT8LT16>::VTanhKernelImpl(int d) \
VTanhKernelImpl<float, isa, kGT8LT16>::VTanhKernelImpl(int d) \
: VTanhKernel<float>() { \
: VTanhKernel<float>() { \
...
@@ -327,7 +468,7 @@ class VTanhKernelImpl : public VTanhKernel<T> {
...
@@ -327,7 +468,7 @@ class VTanhKernelImpl : public VTanhKernel<T> {
void VTanhKernelImpl<float, isa, kGT8LT16>::Compute(const float* x, \
void VTanhKernelImpl<float, isa, kGT8LT16>::Compute(const float* x, \
float* y) const { \
float* y) const { \
__m256 tmp = _mm256_loadu_ps(x); \
__m256 tmp = _mm256_loadu_ps(x); \
INTRI_VTANH(tmp
);
\
INTRI_VTANH(tmp
, expisa);
\
_mm256_storeu_ps(y, tmp); \
_mm256_storeu_ps(y, tmp); \
x += AVX_FLOAT_BLOCK; \
x += AVX_FLOAT_BLOCK; \
y += AVX_FLOAT_BLOCK; \
y += AVX_FLOAT_BLOCK; \
...
@@ -337,7 +478,7 @@ class VTanhKernelImpl : public VTanhKernel<T> {
...
@@ -337,7 +478,7 @@ class VTanhKernelImpl : public VTanhKernel<T> {
vaddbias_->Compute(-1.f, y, y); \
vaddbias_->Compute(-1.f, y, y); \
}
}
#define INTRI_GT16_FLOAT(isa
)
\
#define INTRI_GT16_FLOAT(isa
, expisa)
\
template <> \
template <> \
VTanhKernelImpl<float, isa, kGT16>::VTanhKernelImpl(int d) \
VTanhKernelImpl<float, isa, kGT16>::VTanhKernelImpl(int d) \
: VTanhKernel<float>() { \
: VTanhKernel<float>() { \
...
@@ -356,7 +497,7 @@ class VTanhKernelImpl : public VTanhKernel<T> {
...
@@ -356,7 +497,7 @@ class VTanhKernelImpl : public VTanhKernel<T> {
const { \
const { \
for (int i = 0; i < this->end_; i += AVX_FLOAT_BLOCK) { \
for (int i = 0; i < this->end_; i += AVX_FLOAT_BLOCK) { \
__m256 tmp = _mm256_loadu_ps(x + i); \
__m256 tmp = _mm256_loadu_ps(x + i); \
INTRI_VTANH(tmp
);
\
INTRI_VTANH(tmp
, expisa);
\
_mm256_storeu_ps(y + i, tmp); \
_mm256_storeu_ps(y + i, tmp); \
} \
} \
x += this->end_; \
x += this->end_; \
...
@@ -368,19 +509,19 @@ class VTanhKernelImpl : public VTanhKernel<T> {
...
@@ -368,19 +509,19 @@ class VTanhKernelImpl : public VTanhKernel<T> {
}
}
#ifdef __AVX__
#ifdef __AVX__
INTRI8_FLOAT
(
jit
::
avx
);
INTRI8_FLOAT
(
jit
::
avx
,
detail
::
ExpAVX
);
INTRI16_FLOAT
(
jit
::
avx
);
INTRI16_FLOAT
(
jit
::
avx
,
detail
::
ExpAVX
);
INTRI_GT8LT16_FLOAT
(
jit
::
avx
);
INTRI_GT8LT16_FLOAT
(
jit
::
avx
,
detail
::
ExpAVX
);
INTRI_GT16_FLOAT
(
jit
::
avx
);
INTRI_GT16_FLOAT
(
jit
::
avx
,
detail
::
ExpAVX
);
#endif
#endif
#ifdef __AVX2__
#ifdef __AVX2__
INTRI8_FLOAT
(
jit
::
avx2
);
INTRI8_FLOAT
(
jit
::
avx2
,
detail
::
ExpAVX2
);
INTRI16_FLOAT
(
jit
::
avx2
);
INTRI16_FLOAT
(
jit
::
avx2
,
detail
::
ExpAVX2
);
// maybe use avx at gt8lt16 and gt16
// maybe use avx at gt8lt16 and gt16
#endif
#endif
#ifdef __AVX512F__
#ifdef __AVX512F__
INTRI8_FLOAT
(
jit
::
avx512f
);
INTRI8_FLOAT
(
jit
::
avx512f
,
detail
::
ExpAVX2
);
INTRI16_FLOAT
(
jit
::
avx512f
);
INTRI16_FLOAT
(
jit
::
avx512f
,
detail
::
ExpAVX2
);
// maybe use avx at gt8lt16 and gt16
// maybe use avx at gt8lt16 and gt16
#endif
#endif
...
...
paddle/fluid/operators/math/jit_kernel_lstm.cc
浏览文件 @
049c9c7d
...
@@ -25,13 +25,18 @@ limitations under the License. */
...
@@ -25,13 +25,18 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
namespace
math
{
namespace
math
{
#ifdef __AVX__
namespace
jitkernel
{
namespace
detail
{
namespace
detail
{
__m256
Exp
(
__m256
a
);
#ifdef __AVX__
}
// namespace detail
__m256
ExpAVX
(
__m256
x
);
#endif
#endif
namespace
jitkernel
{
#ifdef __AVX2__
__m256
ExpAVX2
(
__m256
x
);
#endif
}
// namespace detail
namespace
jit
=
platform
::
jit
;
namespace
jit
=
platform
::
jit
;
#ifdef __AVX__
#ifdef __AVX__
...
@@ -43,43 +48,72 @@ class AVXAct {
...
@@ -43,43 +48,72 @@ class AVXAct {
virtual
__m256
Compute
(
__m256
x
)
const
=
0
;
virtual
__m256
Compute
(
__m256
x
)
const
=
0
;
};
};
template
<
act_type
type
>
template
<
act_type
type
,
jit
::
cpu_isa_t
isa
>
class
AVXActImpl
:
public
AVXAct
{
class
AVXActImpl
:
public
AVXAct
{
public:
public:
__m256
Compute
(
__m256
x
)
const
override
{
PADDLE_THROW
(
"Unkown type!"
);
}
__m256
Compute
(
__m256
x
)
const
override
{
PADDLE_THROW
(
"Unkown type!"
);
}
};
};
template
<
>
#define AVX_SIGMOID(isa, expisa) \
__m256
AVXActImpl
<
kSigmoid
>::
Compute
(
__m256
x
)
const
{
template <> \
__m256
ones
=
_mm256_set1_ps
(
1.0
f
);
__m256 AVXActImpl<kSigmoid, isa>::Compute(__m256 x) const { \
x
=
_mm256_max_ps
(
x
,
_mm256_set1_ps
(
SIGMOID_THRESHOLD_MIN
));
__m256 ones = _mm256_set1_ps(1.0f); \
x
=
_mm256_min_ps
(
x
,
_mm256_set1_ps
(
SIGMOID_THRESHOLD_MAX
));
x = _mm256_max_ps(x, _mm256_set1_ps(SIGMOID_THRESHOLD_MIN)); \
x
=
_mm256_sub_ps
(
_mm256_set1_ps
(
0.0
f
),
x
);
x = _mm256_min_ps(x, _mm256_set1_ps(SIGMOID_THRESHOLD_MAX)); \
x
=
detail
::
Exp
(
x
);
x = _mm256_sub_ps(_mm256_set1_ps(0.0f), x); \
x
=
_mm256_add_ps
(
ones
,
x
);
x = expisa(x); \
return
_mm256_div_ps
(
ones
,
x
);
x = _mm256_add_ps(ones, x); \
}
return _mm256_div_ps(ones, x); \
}
template
<
>
#define AVX_TANH(isa, expisa) \
__m256
AVXActImpl
<
kTanh
>::
Compute
(
__m256
x
)
const
{
template <> \
__m256
ones
=
_mm256_set1_ps
(
1.0
f
);
__m256 AVXActImpl<kTanh, isa>::Compute(__m256 x) const { \
x
=
_mm256_mul_ps
(
_mm256_set1_ps
(
-
2.0
f
),
x
);
__m256 ones = _mm256_set1_ps(1.0f); \
x
=
_mm256_min_ps
(
x
,
_mm256_set1_ps
(
EXP_MAX_INPUT
));
x = _mm256_mul_ps(_mm256_set1_ps(-2.0f), x); \
x
=
detail
::
Exp
(
x
);
x = _mm256_min_ps(x, _mm256_set1_ps(EXP_MAX_INPUT)); \
x
=
_mm256_add_ps
(
ones
,
x
);
x = expisa(x); \
x
=
_mm256_div_ps
(
_mm256_set1_ps
(
2.0
f
),
x
);
x = _mm256_add_ps(ones, x); \
return
_mm256_sub_ps
(
x
,
ones
);
x = _mm256_div_ps(_mm256_set1_ps(2.0f), x); \
}
return _mm256_sub_ps(x, ones); \
}
template
<
>
#define AVX_RELU(isa) \
__m256
AVXActImpl
<
kRelu
>::
Compute
(
__m256
x
)
const
{
template <> \
return
_mm256_max_ps
(
x
,
_mm256_setzero_ps
());
__m256 AVXActImpl<kRelu, isa>::Compute(__m256 x) const { \
}
return _mm256_max_ps(x, _mm256_setzero_ps()); \
}
#define AVX_IDENTITY(isa) \
template <> \
__m256 AVXActImpl<kIdentity, isa>::Compute(__m256 x) const { \
return x; \
}
#define FOR_EACH_AVX_ISA(macro_) \
macro_(jit::avx); \
macro_(jit::avx2); \
macro_(jit::avx512f)
FOR_EACH_AVX_ISA
(
AVX_RELU
);
FOR_EACH_AVX_ISA
(
AVX_IDENTITY
);
AVX_SIGMOID
(
jit
::
avx
,
detail
::
ExpAVX
);
AVX_TANH
(
jit
::
avx
,
detail
::
ExpAVX
);
#ifdef __AVX2__
AVX_SIGMOID
(
jit
::
avx2
,
detail
::
ExpAVX2
);
AVX_SIGMOID
(
jit
::
avx512f
,
detail
::
ExpAVX2
);
AVX_TANH
(
jit
::
avx2
,
detail
::
ExpAVX2
);
AVX_TANH
(
jit
::
avx512f
,
detail
::
ExpAVX2
);
#endif
#undef FOR_EACH_AVX_ISA
#undef AVX_IDENTITY
#undef AVX_RELU
#undef AVX_TANH
#undef AVX_SIGMOID
template
<
>
__m256
AVXActImpl
<
kIdentity
>::
Compute
(
__m256
x
)
const
{
return
x
;
}
#endif
#endif
template
<
typename
T
>
template
<
typename
T
>
...
@@ -119,23 +153,6 @@ class LSTMKernelImpl : public LSTMKernel<T> {
...
@@ -119,23 +153,6 @@ class LSTMKernelImpl : public LSTMKernel<T> {
act_cell_d_
=
GetActKernel
<
T
>
(
act_cell
,
d
);
act_cell_d_
=
GetActKernel
<
T
>
(
act_cell
,
d
);
vmul_d_
=
KernelPool
::
Instance
().
template
Get
<
VMulKernel
<
T
>
>
(
d
);
vmul_d_
=
KernelPool
::
Instance
().
template
Get
<
VMulKernel
<
T
>
>
(
d
);
vadd_d_
=
KernelPool
::
Instance
().
template
Get
<
VAddKernel
<
T
>
>
(
d
);
vadd_d_
=
KernelPool
::
Instance
().
template
Get
<
VAddKernel
<
T
>
>
(
d
);
#ifdef __AVX__
auto
GetAVXAct
=
[
&
](
const
std
::
string
&
type
)
->
std
::
unique_ptr
<
AVXAct
>
{
if
(
type
==
"sigmoid"
)
{
return
std
::
unique_ptr
<
AVXAct
>
(
new
AVXActImpl
<
kSigmoid
>
());
}
else
if
(
type
==
"relu"
)
{
return
std
::
unique_ptr
<
AVXAct
>
(
new
AVXActImpl
<
kRelu
>
());
}
else
if
(
type
==
"tanh"
)
{
return
std
::
unique_ptr
<
AVXAct
>
(
new
AVXActImpl
<
kTanh
>
());
}
else
if
(
type
==
"identity"
||
type
==
""
)
{
return
std
::
unique_ptr
<
AVXAct
>
(
new
AVXActImpl
<
kIdentity
>
());
}
PADDLE_THROW
(
"Not support type: %s"
,
type
);
};
avx_act_gate_
=
GetAVXAct
(
act_gate
);
avx_act_cand_
=
GetAVXAct
(
act_cand
);
avx_act_cell_
=
GetAVXAct
(
act_cell
);
#endif
}
}
void
ComputeCtHt
(
T
*
gates
,
const
T
*
ct_1
,
T
*
ct
,
T
*
ht
,
const
T
*
wp_data
,
void
ComputeCtHt
(
T
*
gates
,
const
T
*
ct_1
,
T
*
ct
,
T
*
ht
,
const
T
*
wp_data
,
...
@@ -176,6 +193,27 @@ class LSTMKernelImpl : public LSTMKernel<T> {
...
@@ -176,6 +193,27 @@ class LSTMKernelImpl : public LSTMKernel<T> {
};
};
#define INTRI8_FLOAT(isa) \
#define INTRI8_FLOAT(isa) \
template <> \
LSTMKernelImpl<float, isa, kEQ8>::LSTMKernelImpl( \
const std::string& act_gate, const std::string& act_cand, \
const std::string& act_cell, int d) \
: LSTMKernel<float>() { \
auto GetAVXAct = [&](const std::string& type) -> std::unique_ptr<AVXAct> { \
if (type == "sigmoid") { \
return std::unique_ptr<AVXAct>(new AVXActImpl<kSigmoid, isa>()); \
} else if (type == "relu") { \
return std::unique_ptr<AVXAct>(new AVXActImpl<kRelu, isa>()); \
} else if (type == "tanh") { \
return std::unique_ptr<AVXAct>(new AVXActImpl<kTanh, isa>()); \
} else if (type == "identity" || type == "") { \
return std::unique_ptr<AVXAct>(new AVXActImpl<kIdentity, isa>()); \
} \
PADDLE_THROW("Not support type: %s", type); \
}; \
avx_act_gate_ = GetAVXAct(act_gate); \
avx_act_cand_ = GetAVXAct(act_cand); \
avx_act_cell_ = GetAVXAct(act_cell); \
} \
template <> \
template <> \
void LSTMKernelImpl<float, isa, kEQ8>::ComputeCtHt( \
void LSTMKernelImpl<float, isa, kEQ8>::ComputeCtHt( \
float* gates, const float* ct_1, float* ct, float* ht, \
float* gates, const float* ct_1, float* ct, float* ht, \
...
@@ -195,6 +233,20 @@ class LSTMKernelImpl : public LSTMKernel<T> {
...
@@ -195,6 +233,20 @@ class LSTMKernelImpl : public LSTMKernel<T> {
/* H_t = act_cell(C_t) * ogated */
\
/* H_t = act_cell(C_t) * ogated */
\
o = _mm256_mul_ps(avx_act_cell_->Compute(f), avx_act_gate_->Compute(o)); \
o = _mm256_mul_ps(avx_act_cell_->Compute(f), avx_act_gate_->Compute(o)); \
_mm256_storeu_ps(ht, o); \
_mm256_storeu_ps(ht, o); \
} \
template <> \
void LSTMKernelImpl<float, isa, kEQ8>::ComputeC1H1( \
float* gates, float* ct, float* ht, const float* wp_data) const { \
__m256 c, i, o; \
c = _mm256_loadu_ps(gates); \
i = _mm256_loadu_ps(gates + 8); \
o = _mm256_loadu_ps(gates + 24); \
/* C_t = igated * cgated*/
\
c = _mm256_mul_ps(avx_act_gate_->Compute(i), avx_act_cand_->Compute(c)); \
_mm256_storeu_ps(ct, c); \
/* H_t = act_cell(C_t) * ogated */
\
o = _mm256_mul_ps(avx_act_cell_->Compute(c), avx_act_gate_->Compute(o)); \
_mm256_storeu_ps(ht, o); \
}
}
// TODO(TJ): optimize keq16
// TODO(TJ): optimize keq16
...
...
paddle/fluid/operators/math/jit_kernel_test.cc
浏览文件 @
049c9c7d
...
@@ -712,6 +712,63 @@ TEST(JitKernel, vadd) {
...
@@ -712,6 +712,63 @@ TEST(JitKernel, vadd) {
}
}
}
}
void
vaddrelu_ref
(
const
int
n
,
const
float
*
x
,
const
float
*
y
,
float
*
z
)
{
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
z
[
i
]
=
x
[
i
]
+
y
[
i
];
z
[
i
]
=
z
[
i
]
>
0
?
z
[
i
]
:
0
;
}
}
void
vaddrelu_better
(
const
std
::
shared_ptr
<
const
paddle
::
operators
::
math
::
jitkernel
::
VAddKernel
<
float
>>&
vadd
,
const
std
::
shared_ptr
<
const
paddle
::
operators
::
math
::
jitkernel
::
VReluKernel
<
float
>>&
vrelu
,
const
float
*
x
,
const
float
*
y
,
float
*
z
)
{
vadd
->
Compute
(
x
,
y
,
z
);
vrelu
->
Compute
(
z
,
z
);
}
TEST
(
JitKernel
,
vaddrelu
)
{
namespace
jit
=
paddle
::
operators
::
math
::
jitkernel
;
for
(
int
d
:
{
7
,
8
,
15
,
16
,
30
,
256
,
512
})
{
std
::
vector
<
float
>
x
(
d
),
y
(
d
);
std
::
vector
<
float
>
zref
(
d
),
ztgt
(
d
);
RandomVec
<
float
>
(
d
,
x
.
data
());
RandomVec
<
float
>
(
d
,
y
.
data
());
const
auto
&
ker
=
jit
::
KernelPool
::
Instance
().
template
Get
<
jit
::
VAddReluKernel
<
float
>
>
(
d
);
const
auto
&
vadd
=
jit
::
KernelPool
::
Instance
().
template
Get
<
jit
::
VAddKernel
<
float
>
>
(
d
);
const
auto
&
vrelu
=
jit
::
KernelPool
::
Instance
().
template
Get
<
jit
::
VReluKernel
<
float
>
>
(
d
);
const
float
*
x_data
=
x
.
data
();
const
float
*
y_data
=
y
.
data
();
float
*
ztgt_data
=
ztgt
.
data
();
float
*
zref_data
=
zref
.
data
();
auto
trefs
=
GetCurrentUS
();
for
(
int
i
=
0
;
i
<
repeat
;
++
i
)
{
vadd_ref
(
d
,
x_data
,
y_data
,
zref_data
);
}
auto
trefe
=
GetCurrentUS
();
auto
tmkls
=
GetCurrentUS
();
for
(
int
i
=
0
;
i
<
repeat
;
++
i
)
{
vaddrelu_better
(
vadd
,
vrelu
,
x_data
,
y_data
,
zref_data
);
}
auto
tmkle
=
GetCurrentUS
();
auto
ttgts
=
GetCurrentUS
();
for
(
int
i
=
0
;
i
<
repeat
;
++
i
)
{
ker
->
Compute
(
x_data
,
y_data
,
ztgt_data
);
}
auto
ttgte
=
GetCurrentUS
();
VLOG
(
3
)
<<
"Vec size "
<<
d
<<
": refer takes: "
<<
(
trefe
-
trefs
)
/
repeat
<<
" us, better takes: "
<<
(
tmkle
-
tmkls
)
/
repeat
<<
" us, "
<<
"tgt takes: "
<<
(
ttgte
-
ttgts
)
/
repeat
;
for
(
int
i
=
0
;
i
<
d
;
++
i
)
{
EXPECT_NEAR
(
ztgt_data
[
i
],
zref_data
[
i
],
1e-3
);
}
}
}
TEST
(
JitKernel
,
pool
)
{
TEST
(
JitKernel
,
pool
)
{
namespace
jit
=
paddle
::
operators
::
math
::
jitkernel
;
namespace
jit
=
paddle
::
operators
::
math
::
jitkernel
;
const
int
frame_size
=
4
;
const
int
frame_size
=
4
;
...
...
paddle/fluid/operators/reader/reader_blocking_queue_test.cc
浏览文件 @
049c9c7d
...
@@ -237,7 +237,7 @@ TEST(BlockingQueue, speed_test_mode) {
...
@@ -237,7 +237,7 @@ TEST(BlockingQueue, speed_test_mode) {
}
}
for
(
size_t
i
=
0
;
i
<
queue_size
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
queue_size
;
++
i
)
{
q2
.
Receive
(
&
b
);
q2
.
Receive
(
&
b
);
EXPECT_EQ
(
b
,
0
);
EXPECT_EQ
(
b
,
0
UL
);
}
}
EXPECT_EQ
(
q2
.
Size
(),
queue_size
);
EXPECT_EQ
(
q2
.
Size
(),
queue_size
);
}
}
paddle/fluid/operators/roi_align_op.cc
0 → 100644
浏览文件 @
049c9c7d
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/roi_align_op.h"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
LoDTensor
=
framework
::
LoDTensor
;
class
ROIAlignOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of ROIAlignOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"ROIs"
),
"Input(ROIs) of ROIAlignOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of ROIAlignOp should not be null."
);
auto
input_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
rois_dims
=
ctx
->
GetInputDim
(
"ROIs"
);
PADDLE_ENFORCE
(
input_dims
.
size
()
==
4
,
"The format of input tensor is NCHW."
);
PADDLE_ENFORCE
(
rois_dims
.
size
()
==
2
,
"ROIs should be a 2-D LoDTensor of shape (num_rois, 4)"
"given as [[x1, y1, x2, y2], …]."
);
PADDLE_ENFORCE
(
rois_dims
[
1
]
==
4
,
"ROIs should be a 2-D LoDTensor of shape (num_rois, 4)"
"given as [[x1, y1, x2, y2], …]."
);
int
pooled_height
=
ctx
->
Attrs
().
Get
<
int
>
(
"pooled_height"
);
int
pooled_width
=
ctx
->
Attrs
().
Get
<
int
>
(
"pooled_width"
);
float
spatial_scale
=
ctx
->
Attrs
().
Get
<
float
>
(
"spatial_scale"
);
PADDLE_ENFORCE_GT
(
pooled_height
,
0
,
"The pooled output height must greater than 0"
);
PADDLE_ENFORCE_GT
(
pooled_width
,
0
,
"The pooled output width must greater than 0"
);
PADDLE_ENFORCE_GT
(
spatial_scale
,
0.0
f
,
"The spatial scale must greater than 0"
);
auto
out_dims
=
input_dims
;
out_dims
[
0
]
=
rois_dims
[
0
];
out_dims
[
1
]
=
input_dims
[
1
];
out_dims
[
2
]
=
pooled_height
;
out_dims
[
3
]
=
pooled_width
;
ctx
->
SetOutputDim
(
"Out"
,
out_dims
);
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
)
->
type
()),
ctx
.
device_context
());
}
};
class
ROIAlignGradOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"The GRAD@Out of ROIAlignGradOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutputs
(
framework
::
GradVarName
(
"X"
)),
"The GRAD@X of ROIAlignGradOp should not be null."
);
ctx
->
SetOutputsDim
(
framework
::
GradVarName
(
"X"
),
ctx
->
GetInputsDim
(
"X"
));
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
)
->
type
()),
ctx
.
device_context
());
}
};
class
ROIAlignOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"X"
,
"(Tensor), "
"The input of ROIAlignOp. "
"The format of input tensor is NCHW. Where N is batch size, "
"C is the number of input channels, "
"H is the height of the feature, and "
"W is the width of the feature."
);
AddInput
(
"ROIs"
,
"(LoDTensor), "
"ROIs (Regions of Interest) to pool over. "
"should be a 2-D LoDTensor of shape (num_rois, 4)"
"given as [[x1, y1, x2, y2], …]. "
"(x1, y1) is the top left coordinates, and "
"(x2, y2) is the bottom right coordinates."
);
AddOutput
(
"Out"
,
"(Tensor), "
"The output of ROIAlignOp is a 4-D tensor with shape "
"(num_rois, channels, pooled_h, pooled_w)."
);
AddAttr
<
float
>
(
"spatial_scale"
,
"(float, default 1.0), "
"Multiplicative spatial scale factor "
"to translate ROI coords from their input scale "
"to the scale used when pooling."
)
.
SetDefault
(
1.0
);
AddAttr
<
int
>
(
"pooled_height"
,
"(int, default 1), "
"The pooled output height."
)
.
SetDefault
(
1
);
AddAttr
<
int
>
(
"pooled_width"
,
"(int, default 1), "
"The pooled output width."
)
.
SetDefault
(
1
);
AddAttr
<
int
>
(
"sampling_ratio"
,
"(int,default -1),"
"number of sampling points in the interpolation grid"
"If <=0, then grid points are adaptive to roi_width "
"and pooled_w, likewise for height"
)
.
SetDefault
(
-
1
);
AddComment
(
R"DOC(
**RoIAlign Operator**
Region of interest align (also known as RoI align) is to perform
bilinear interpolation on inputs of nonuniform sizes to obtain
fixed-size feature maps (e.g. 7*7)
Dividing each region proposal into equal-sized sections with
the pooled_width and pooled_height. Location remains the origin
result.
In each ROI bin, the value of the four regularly sampled locations
are computed directly through bilinear interpolation. The output is
the mean of four locations.
Thus avoid the misaligned problem.
)DOC"
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
roi_align
,
ops
::
ROIAlignOp
,
ops
::
ROIAlignOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
roi_align_grad
,
ops
::
ROIAlignGradOp
);
REGISTER_OP_CPU_KERNEL
(
roi_align
,
ops
::
CPUROIAlignOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
CPUROIAlignOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
roi_align_grad
,
ops
::
CPUROIAlignGradOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
CPUROIAlignGradOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
paddle/fluid/operators/roi_align_op.cu
0 → 100644
浏览文件 @
049c9c7d
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/roi_align_op.h"
#include "paddle/fluid/platform/cuda_primitives.h"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
LoDTensor
=
framework
::
LoDTensor
;
static
constexpr
int
kNumCUDAThreads
=
512
;
static
constexpr
int
kNumMaxinumNumBlocks
=
4096
;
static
inline
int
NumBlocks
(
const
int
N
)
{
return
std
::
min
((
N
+
kNumCUDAThreads
-
1
)
/
kNumCUDAThreads
,
kNumMaxinumNumBlocks
);
}
#define CUDA_1D_KERNEL_LOOP(i, n) \
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
i += blockDim.x * gridDim.x)
template
<
class
T
>
__device__
T
BilinearInterpolate
(
const
T
*
input_data
,
const
int
height
,
const
int
width
,
T
y
,
T
x
)
{
if
(
y
<
-
1.0
||
y
>
height
||
x
<
-
1.0
||
x
>
width
)
{
return
0
;
}
y
=
y
<=
0
?
0
:
y
;
x
=
x
<=
0
?
0
:
x
;
int
y_low
=
static_cast
<
int
>
(
y
);
int
x_low
=
static_cast
<
int
>
(
x
);
int
y_high
;
int
x_high
;
if
(
y_low
>=
height
-
1
)
{
y_high
=
y_low
=
height
-
1
;
y
=
static_cast
<
T
>
(
y_low
);
}
else
{
y_high
=
y_low
+
1
;
}
if
(
x_low
>=
width
-
1
)
{
x_high
=
x_low
=
width
-
1
;
x
=
static_cast
<
T
>
(
x_low
);
}
else
{
x_high
=
x_low
+
1
;
}
T
ly
=
y
-
y_low
,
lx
=
x
-
x_low
;
T
hy
=
1.
-
ly
,
hx
=
1.
-
lx
;
T
v1
=
input_data
[
y_low
*
width
+
x_low
];
T
v2
=
input_data
[
y_low
*
width
+
x_high
];
T
v3
=
input_data
[
y_high
*
width
+
x_low
];
T
v4
=
input_data
[
y_high
*
width
+
x_high
];
T
w1
=
hy
*
hx
,
w2
=
hy
*
lx
,
w3
=
ly
*
hx
,
w4
=
ly
*
lx
;
T
val
=
(
w1
*
v1
+
w2
*
v2
+
w3
*
v3
+
w4
*
v4
);
return
val
;
}
template
<
class
T
>
__device__
void
BilinearInterpolateGradient
(
const
int
height
,
const
int
width
,
T
y
,
T
x
,
T
*
w1
,
T
*
w2
,
T
*
w3
,
T
*
w4
,
int
*
x_low
,
int
*
x_high
,
int
*
y_low
,
int
*
y_high
)
{
if
(
y
<
-
1.0
||
y
>
height
||
x
<
-
1.0
||
x
>
width
)
{
return
;
}
y
=
y
<=
0
?
0
:
y
;
x
=
x
<=
0
?
0
:
x
;
*
y_low
=
static_cast
<
int
>
(
y
);
*
x_low
=
static_cast
<
int
>
(
x
);
if
(
*
y_low
>=
height
-
1
)
{
*
y_high
=
*
y_low
=
height
-
1
;
y
=
static_cast
<
T
>
(
*
y_low
);
}
else
{
*
y_high
=
*
y_low
+
1
;
}
if
(
*
x_low
>=
width
-
1
)
{
*
x_high
=
*
x_low
=
width
-
1
;
x
=
static_cast
<
T
>
(
*
x_low
);
}
else
{
*
x_high
=
*
x_low
+
1
;
}
T
ly
=
y
-
*
y_low
,
lx
=
x
-
*
x_low
;
T
hy
=
1.
-
ly
,
hx
=
1.
-
lx
;
*
w1
=
hy
*
hx
,
*
w2
=
hy
*
lx
,
*
w3
=
ly
*
hx
,
*
w4
=
ly
*
lx
;
return
;
}
template
<
class
T
>
__global__
void
GPUROIAlignForward
(
const
int
nthreads
,
const
T
*
input_data
,
const
T
*
input_rois
,
const
float
spatial_scale
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
const
int
sampling_ratio
,
int
*
roi_batch_id_data
,
T
*
output_data
)
{
CUDA_1D_KERNEL_LOOP
(
i
,
nthreads
)
{
int
pw
=
i
%
pooled_width
;
int
ph
=
(
i
/
pooled_width
)
%
pooled_height
;
int
c
=
(
i
/
pooled_width
/
pooled_height
)
%
channels
;
int
n
=
i
/
pooled_width
/
pooled_height
/
channels
;
const
T
*
offset_input_rois
=
input_rois
+
n
*
kROISize
;
int
roi_batch_ind
=
roi_batch_id_data
[
n
];
T
roi_xmin
=
offset_input_rois
[
0
]
*
spatial_scale
;
T
roi_ymin
=
offset_input_rois
[
1
]
*
spatial_scale
;
T
roi_xmax
=
offset_input_rois
[
2
]
*
spatial_scale
;
T
roi_ymax
=
offset_input_rois
[
3
]
*
spatial_scale
;
T
roi_width
=
max
(
roi_xmax
-
roi_xmin
,
static_cast
<
T
>
(
1.
));
T
roi_height
=
max
(
roi_ymax
-
roi_ymin
,
static_cast
<
T
>
(
1.
));
T
bin_size_h
=
static_cast
<
T
>
(
roi_height
)
/
static_cast
<
T
>
(
pooled_height
);
T
bin_size_w
=
static_cast
<
T
>
(
roi_width
)
/
static_cast
<
T
>
(
pooled_width
);
const
T
*
offset_input_data
=
input_data
+
(
roi_batch_ind
*
channels
+
c
)
*
height
*
width
;
int
roi_bin_grid_h
=
(
sampling_ratio
>
0
)
?
sampling_ratio
:
ceil
(
roi_height
/
pooled_height
);
int
roi_bin_grid_w
=
(
sampling_ratio
>
0
)
?
sampling_ratio
:
ceil
(
roi_width
/
pooled_width
);
const
T
count
=
roi_bin_grid_h
*
roi_bin_grid_w
;
T
output_val
=
0
;
for
(
int
iy
=
0
;
iy
<
roi_bin_grid_h
;
iy
++
)
{
const
T
y
=
roi_ymin
+
ph
*
bin_size_h
+
static_cast
<
T
>
(
iy
+
.5
f
)
*
bin_size_h
/
static_cast
<
T
>
(
roi_bin_grid_h
);
for
(
int
ix
=
0
;
ix
<
roi_bin_grid_w
;
ix
++
)
{
const
T
x
=
roi_xmin
+
pw
*
bin_size_w
+
static_cast
<
T
>
(
ix
+
.5
f
)
*
bin_size_w
/
static_cast
<
T
>
(
roi_bin_grid_w
);
T
val
=
BilinearInterpolate
(
offset_input_data
,
height
,
width
,
y
,
x
);
output_val
+=
val
;
}
}
output_val
/=
count
;
output_data
[
i
]
=
output_val
;
}
}
template
<
typename
T
>
__global__
void
GPUROIAlignBackward
(
const
int
nthreads
,
const
T
*
input_rois
,
const
T
*
out_grad
,
const
int
num_rois
,
const
float
spatial_scale
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
const
int
sampling_ratio
,
int
*
roi_batch_id_data
,
T
*
input_grad
)
{
CUDA_1D_KERNEL_LOOP
(
i
,
nthreads
)
{
int
pw
=
i
%
pooled_width
;
int
ph
=
(
i
/
pooled_width
)
%
pooled_height
;
int
c
=
(
i
/
pooled_width
/
pooled_height
)
%
channels
;
int
n
=
i
/
pooled_width
/
pooled_height
/
channels
;
const
T
*
offset_input_rois
=
input_rois
+
n
*
kROISize
;
int
roi_batch_ind
=
roi_batch_id_data
[
n
];
T
roi_xmin
=
offset_input_rois
[
0
]
*
spatial_scale
;
T
roi_ymin
=
offset_input_rois
[
1
]
*
spatial_scale
;
T
roi_xmax
=
offset_input_rois
[
2
]
*
spatial_scale
;
T
roi_ymax
=
offset_input_rois
[
3
]
*
spatial_scale
;
T
roi_width
=
max
(
roi_xmax
-
roi_xmin
,
static_cast
<
T
>
(
1.
));
T
roi_height
=
max
(
roi_ymax
-
roi_ymin
,
static_cast
<
T
>
(
1.
));
T
bin_size_h
=
static_cast
<
T
>
(
roi_height
)
/
static_cast
<
T
>
(
pooled_height
);
T
bin_size_w
=
static_cast
<
T
>
(
roi_width
)
/
static_cast
<
T
>
(
pooled_width
);
T
*
offset_input_grad
=
input_grad
+
(
roi_batch_ind
*
channels
+
c
)
*
height
*
width
;
const
T
*
offset_out_grad
=
out_grad
+
(
n
*
channels
+
c
)
*
pooled_height
*
pooled_width
;
const
T
out_grad_this_bin
=
offset_out_grad
[
ph
*
pooled_width
+
pw
];
int
roi_bin_grid_h
=
(
sampling_ratio
>
0
)
?
sampling_ratio
:
ceil
(
roi_height
/
pooled_height
);
int
roi_bin_grid_w
=
(
sampling_ratio
>
0
)
?
sampling_ratio
:
ceil
(
roi_width
/
pooled_width
);
const
T
count
=
roi_bin_grid_h
*
roi_bin_grid_w
;
for
(
int
iy
=
0
;
iy
<
roi_bin_grid_h
;
iy
++
)
{
const
T
y
=
roi_ymin
+
ph
*
bin_size_h
+
static_cast
<
T
>
(
iy
+
.5
f
)
*
bin_size_h
/
static_cast
<
T
>
(
roi_bin_grid_h
);
for
(
int
ix
=
0
;
ix
<
roi_bin_grid_w
;
ix
++
)
{
const
T
x
=
roi_xmin
+
pw
*
bin_size_w
+
static_cast
<
T
>
(
ix
+
.5
f
)
*
bin_size_w
/
static_cast
<
T
>
(
roi_bin_grid_w
);
T
w1
=
0
,
w2
=
0
,
w3
=
0
,
w4
=
0
;
int
x_low
=
-
1
,
x_high
=
-
1
,
y_low
=
-
1
,
y_high
=
-
1
;
BilinearInterpolateGradient
(
height
,
width
,
y
,
x
,
&
w1
,
&
w2
,
&
w3
,
&
w4
,
&
x_low
,
&
x_high
,
&
y_low
,
&
y_high
);
T
diff1
=
out_grad_this_bin
*
w1
/
count
;
T
diff2
=
out_grad_this_bin
*
w2
/
count
;
T
diff3
=
out_grad_this_bin
*
w3
/
count
;
T
diff4
=
out_grad_this_bin
*
w4
/
count
;
if
(
x_low
>=
0
&&
x_high
>=
0
&&
y_low
>=
0
&&
y_high
>=
0
)
{
platform
::
CudaAtomicAdd
(
offset_input_grad
+
y_low
*
width
+
x_low
,
diff1
);
platform
::
CudaAtomicAdd
(
offset_input_grad
+
y_low
*
width
+
x_high
,
diff2
);
platform
::
CudaAtomicAdd
(
offset_input_grad
+
y_high
*
width
+
x_low
,
diff3
);
platform
::
CudaAtomicAdd
(
offset_input_grad
+
y_high
*
width
+
x_high
,
diff4
);
}
}
}
}
}
template
<
typename
Place
,
typename
T
>
class
GPUROIAlignOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
rois
=
ctx
.
Input
<
LoDTensor
>
(
"ROIs"
);
auto
*
out
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
auto
pooled_height
=
ctx
.
Attr
<
int
>
(
"pooled_height"
);
auto
pooled_width
=
ctx
.
Attr
<
int
>
(
"pooled_width"
);
auto
spatial_scale
=
ctx
.
Attr
<
float
>
(
"spatial_scale"
);
auto
sampling_ratio
=
ctx
.
Attr
<
int
>
(
"sampling_ratio"
);
auto
in_dims
=
in
->
dims
();
int
batch_size
=
in_dims
[
0
];
int
channels
=
in_dims
[
1
];
int
height
=
in_dims
[
2
];
int
width
=
in_dims
[
3
];
int
rois_num
=
rois
->
dims
()[
0
];
if
(
rois_num
==
0
)
return
;
int
output_size
=
out
->
numel
();
int
blocks
=
NumBlocks
(
output_size
);
int
threads
=
kNumCUDAThreads
;
Tensor
roi_batch_id_list
;
roi_batch_id_list
.
Resize
({
rois_num
});
int
*
roi_batch_id_data
=
roi_batch_id_list
.
mutable_data
<
int
>
(
platform
::
CPUPlace
());
auto
rois_lod
=
rois
->
lod
().
back
();
int
rois_batch_size
=
rois_lod
.
size
()
-
1
;
PADDLE_ENFORCE_EQ
(
rois_batch_size
,
batch_size
,
"The rois_batch_size and imgs batch_size must be the same."
);
int
rois_num_with_lod
=
rois_lod
[
rois_batch_size
];
PADDLE_ENFORCE_EQ
(
rois_num
,
rois_num_with_lod
,
"The rois_num from input and lod must be the same."
);
for
(
int
n
=
0
;
n
<
rois_batch_size
;
++
n
)
{
for
(
size_t
i
=
rois_lod
[
n
];
i
<
rois_lod
[
n
+
1
];
++
i
)
{
roi_batch_id_data
[
i
]
=
n
;
}
}
Tensor
roi_batch_id_list_gpu
;
framework
::
TensorCopySync
(
roi_batch_id_list
,
ctx
.
GetPlace
(),
&
roi_batch_id_list_gpu
);
GPUROIAlignForward
<
T
><<<
blocks
,
threads
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
output_size
,
in
->
data
<
T
>
(),
rois
->
data
<
T
>
(),
spatial_scale
,
channels
,
height
,
width
,
pooled_height
,
pooled_width
,
sampling_ratio
,
roi_batch_id_list_gpu
.
data
<
int
>
(),
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()));
}
};
template
<
typename
Place
,
typename
T
>
class
GPUROIAlignGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
rois
=
ctx
.
Input
<
LoDTensor
>
(
"ROIs"
);
auto
*
out_grad
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
in_grad
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
pooled_height
=
ctx
.
Attr
<
int
>
(
"pooled_height"
);
auto
pooled_width
=
ctx
.
Attr
<
int
>
(
"pooled_width"
);
auto
spatial_scale
=
ctx
.
Attr
<
float
>
(
"spatial_scale"
);
auto
sampling_ratio
=
ctx
.
Attr
<
int
>
(
"sampling_ratio"
);
int
rois_num
=
rois
->
dims
()[
0
];
int
channels
=
in
->
dims
()[
1
];
int
height
=
in
->
dims
()[
2
];
int
width
=
in
->
dims
()[
3
];
if
(
!
in_grad
)
{
return
;
}
Tensor
roi_batch_id_list
;
roi_batch_id_list
.
Resize
({
rois_num
});
int
*
roi_batch_id_data
=
roi_batch_id_list
.
mutable_data
<
int
>
(
platform
::
CPUPlace
());
auto
rois_lod
=
rois
->
lod
().
back
();
int
rois_batch_size
=
rois_lod
.
size
()
-
1
;
for
(
int
n
=
0
;
n
<
rois_batch_size
;
++
n
)
{
for
(
size_t
i
=
rois_lod
[
n
];
i
<
rois_lod
[
n
+
1
];
++
i
)
{
roi_batch_id_data
[
i
]
=
n
;
}
}
Tensor
roi_batch_id_list_gpu
;
framework
::
TensorCopySync
(
roi_batch_id_list
,
ctx
.
GetPlace
(),
&
roi_batch_id_list_gpu
);
in_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
math
::
SetConstant
<
Place
,
T
>
set_zero
;
set_zero
(
ctx
.
cuda_device_context
(),
in_grad
,
static_cast
<
T
>
(
0
));
int
output_grad_size
=
out_grad
->
numel
();
int
blocks
=
NumBlocks
(
output_grad_size
);
int
threads
=
kNumCUDAThreads
;
if
(
output_grad_size
>
0
)
{
GPUROIAlignBackward
<
T
><<<
blocks
,
threads
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
output_grad_size
,
rois
->
data
<
T
>
(),
out_grad
->
data
<
T
>
(),
rois_num
,
spatial_scale
,
channels
,
height
,
width
,
pooled_height
,
pooled_width
,
sampling_ratio
,
roi_batch_id_list_gpu
.
data
<
int
>
(),
in_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()));
}
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
roi_align
,
ops
::
GPUROIAlignOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
GPUROIAlignOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
REGISTER_OP_CUDA_KERNEL
(
roi_align_grad
,
ops
::
GPUROIAlignGradOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
GPUROIAlignGradOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
paddle/fluid/operators/roi_align_op.h
0 → 100644
浏览文件 @
049c9c7d
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include <limits>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/math_function.h"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
LoDTensor
=
framework
::
LoDTensor
;
static
constexpr
int
kROISize
=
4
;
template
<
class
T
>
void
PreCalcForBilinearInterpolate
(
const
platform
::
DeviceContext
&
ctx
,
const
int
height
,
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
const
int
iy_upper
,
const
int
ix_upper
,
T
roi_ymin
,
T
roi_xmin
,
T
bin_size_h
,
T
bin_size_w
,
int
roi_bin_grid_h
,
int
roi_bin_grid_w
,
Tensor
*
pre_pos
,
Tensor
*
pre_w
)
{
int
pre_calc_index
=
0
;
int
*
pre_pos_data
=
pre_pos
->
mutable_data
<
int
>
(
ctx
.
GetPlace
());
T
*
pre_w_data
=
pre_w
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
for
(
int
ph
=
0
;
ph
<
pooled_height
;
ph
++
)
{
for
(
int
pw
=
0
;
pw
<
pooled_width
;
pw
++
)
{
for
(
int
iy
=
0
;
iy
<
iy_upper
;
iy
++
)
{
// calculate y of sample points
T
y
=
roi_ymin
+
ph
*
bin_size_h
+
static_cast
<
T
>
(
iy
+
.5
f
)
*
bin_size_h
/
static_cast
<
T
>
(
roi_bin_grid_h
);
// calculate x of samle points
for
(
int
ix
=
0
;
ix
<
ix_upper
;
ix
++
)
{
T
x
=
roi_xmin
+
pw
*
bin_size_w
+
static_cast
<
T
>
(
ix
+
.5
f
)
*
bin_size_w
/
static_cast
<
T
>
(
roi_bin_grid_w
);
// deal with elements out of map
if
(
y
<
-
1.0
||
y
>
height
||
x
<
-
1.0
||
x
>
width
)
{
for
(
int
i
=
0
;
i
<
kROISize
;
++
i
)
{
pre_pos_data
[
i
+
pre_calc_index
*
kROISize
]
=
0
;
pre_w_data
[
i
+
pre_calc_index
*
kROISize
]
=
0
;
}
pre_calc_index
+=
1
;
continue
;
}
y
=
y
<=
0
?
0
:
y
;
x
=
x
<=
0
?
0
:
x
;
int
y_low
=
static_cast
<
int
>
(
y
);
int
x_low
=
static_cast
<
int
>
(
x
);
int
y_high
;
int
x_high
;
if
(
y_low
>=
height
-
1
)
{
y_high
=
y_low
=
height
-
1
;
y
=
static_cast
<
T
>
(
y_low
);
}
else
{
y_high
=
y_low
+
1
;
}
if
(
x_low
>=
width
-
1
)
{
x_high
=
x_low
=
width
-
1
;
x
=
static_cast
<
T
>
(
x_low
);
}
else
{
x_high
=
x_low
+
1
;
}
T
ly
=
y
-
y_low
,
lx
=
x
-
x_low
;
T
hy
=
1.
-
ly
,
hx
=
1.
-
lx
;
pre_pos_data
[
pre_calc_index
*
kROISize
]
=
y_low
*
width
+
x_low
;
pre_pos_data
[
pre_calc_index
*
kROISize
+
1
]
=
y_low
*
width
+
x_high
;
pre_pos_data
[
pre_calc_index
*
kROISize
+
2
]
=
y_high
*
width
+
x_low
;
pre_pos_data
[
pre_calc_index
*
kROISize
+
3
]
=
y_high
*
width
+
x_high
;
pre_w_data
[
pre_calc_index
*
kROISize
]
=
hy
*
hx
;
pre_w_data
[
pre_calc_index
*
kROISize
+
1
]
=
hy
*
lx
;
pre_w_data
[
pre_calc_index
*
kROISize
+
2
]
=
ly
*
hx
;
pre_w_data
[
pre_calc_index
*
kROISize
+
3
]
=
ly
*
lx
;
pre_calc_index
+=
1
;
}
}
}
}
}
template
<
class
T
>
void
bilinear_interpolate_gradient
(
const
int
height
,
const
int
width
,
T
y
,
T
x
,
const
T
out_grad_this_bin
,
const
T
count
,
T
*
batch_grad_data
)
{
int
x_low
,
y_low
,
x_high
,
y_high
;
T
w1
,
w2
,
w3
,
w4
;
if
(
y
<
-
1.0
||
y
>
height
||
x
<
-
1.0
||
x
>
width
)
{
w1
=
w2
=
w3
=
w4
=
0
;
x_low
=
x_high
=
y_low
=
y_high
=
-
1
;
return
;
}
y
=
y
<=
0
?
0
:
y
;
x
=
x
<=
0
?
0
:
x
;
y_low
=
static_cast
<
int
>
(
y
);
x_low
=
static_cast
<
int
>
(
x
);
if
(
y_low
>=
height
-
1
)
{
y_high
=
y_low
=
height
-
1
;
y
=
static_cast
<
T
>
(
y_low
);
}
else
{
y_high
=
y_low
+
1
;
}
if
(
x_low
>=
width
-
1
)
{
x_high
=
x_low
=
width
-
1
;
x
=
static_cast
<
T
>
(
x_low
);
}
else
{
x_high
=
x_low
+
1
;
}
T
ly
=
y
-
y_low
,
lx
=
x
-
x_low
;
T
hy
=
1.
-
ly
,
hx
=
1.
-
lx
;
w1
=
hy
*
hx
,
w2
=
hy
*
lx
,
w3
=
ly
*
hx
,
w4
=
ly
*
lx
;
T
diff1
=
out_grad_this_bin
*
w1
/
count
;
T
diff2
=
out_grad_this_bin
*
w2
/
count
;
T
diff3
=
out_grad_this_bin
*
w3
/
count
;
T
diff4
=
out_grad_this_bin
*
w4
/
count
;
if
(
x_low
>=
0
&&
x_high
>=
0
&&
y_low
>=
0
&&
y_high
>=
0
)
{
*
(
batch_grad_data
+
y_low
*
width
+
x_low
)
+=
diff1
;
*
(
batch_grad_data
+
y_low
*
width
+
x_high
)
+=
diff2
;
*
(
batch_grad_data
+
y_high
*
width
+
x_low
)
+=
diff3
;
*
(
batch_grad_data
+
y_high
*
width
+
x_high
)
+=
diff4
;
}
}
template
<
typename
DeviceContext
,
typename
T
>
class
CPUROIAlignOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
rois
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"ROIs"
);
auto
*
out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Out"
);
auto
pooled_height
=
ctx
.
Attr
<
int
>
(
"pooled_height"
);
auto
pooled_width
=
ctx
.
Attr
<
int
>
(
"pooled_width"
);
auto
spatial_scale
=
ctx
.
Attr
<
float
>
(
"spatial_scale"
);
auto
sampling_ratio
=
ctx
.
Attr
<
int
>
(
"sampling_ratio"
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
auto
in_dims
=
in
->
dims
();
int
batch_size
=
in_dims
[
0
];
int
channels
=
in_dims
[
1
];
int
height
=
in_dims
[
2
];
int
width
=
in_dims
[
3
];
int
rois_num
=
rois
->
dims
()[
0
];
auto
in_stride
=
framework
::
stride
(
in_dims
);
auto
roi_stride
=
framework
::
stride
(
rois
->
dims
());
auto
out_stride
=
framework
::
stride
(
out
->
dims
());
const
T
*
input_data
=
in
->
data
<
T
>
();
framework
::
Tensor
roi_batch_id_list
;
roi_batch_id_list
.
Resize
({
rois_num
});
int
*
roi_batch_id_data
=
roi_batch_id_list
.
mutable_data
<
int
>
(
ctx
.
GetPlace
());
auto
rois_lod
=
rois
->
lod
().
back
();
int
rois_batch_size
=
rois_lod
.
size
()
-
1
;
PADDLE_ENFORCE_EQ
(
rois_batch_size
,
batch_size
,
"The rois_batch_size and imgs batch_size must be the same."
);
int
rois_num_with_lod
=
rois_lod
[
rois_batch_size
];
PADDLE_ENFORCE_EQ
(
rois_num
,
rois_num_with_lod
,
"The rois_num from input and lod must be the same."
);
for
(
int
n
=
0
;
n
<
rois_batch_size
;
++
n
)
{
for
(
size_t
i
=
rois_lod
[
n
];
i
<
rois_lod
[
n
+
1
];
++
i
)
{
roi_batch_id_data
[
i
]
=
n
;
}
}
T
*
output_data
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
const
T
*
rois_data
=
rois
->
data
<
T
>
();
for
(
int
n
=
0
;
n
<
rois_num
;
++
n
)
{
int
roi_batch_id
=
roi_batch_id_data
[
n
];
T
roi_xmin
=
rois_data
[
0
]
*
spatial_scale
;
T
roi_ymin
=
rois_data
[
1
]
*
spatial_scale
;
T
roi_xmax
=
rois_data
[
2
]
*
spatial_scale
;
T
roi_ymax
=
rois_data
[
3
]
*
spatial_scale
;
T
roi_width
=
std
::
max
(
roi_xmax
-
roi_xmin
,
static_cast
<
T
>
(
1.
));
T
roi_height
=
std
::
max
(
roi_ymax
-
roi_ymin
,
static_cast
<
T
>
(
1.
));
T
bin_size_h
=
static_cast
<
T
>
(
roi_height
)
/
static_cast
<
T
>
(
pooled_height
);
T
bin_size_w
=
static_cast
<
T
>
(
roi_width
)
/
static_cast
<
T
>
(
pooled_width
);
const
T
*
batch_data
=
input_data
+
roi_batch_id
*
in_stride
[
0
];
int
roi_bin_grid_h
=
(
sampling_ratio
>
0
)
?
sampling_ratio
:
ceil
(
roi_height
/
pooled_height
);
int
roi_bin_grid_w
=
(
sampling_ratio
>
0
)
?
sampling_ratio
:
ceil
(
roi_width
/
pooled_width
);
const
T
count
=
roi_bin_grid_h
*
roi_bin_grid_w
;
Tensor
pre_pos
;
Tensor
pre_w
;
int
pre_size
=
count
*
out_stride
[
1
];
pre_pos
.
Resize
({
pre_size
,
kROISize
});
pre_w
.
Resize
({
pre_size
,
kROISize
});
PreCalcForBilinearInterpolate
(
dev_ctx
,
height
,
width
,
pooled_height
,
pooled_width
,
roi_bin_grid_h
,
roi_bin_grid_w
,
roi_ymin
,
roi_xmin
,
bin_size_h
,
bin_size_w
,
roi_bin_grid_h
,
roi_bin_grid_w
,
&
pre_pos
,
&
pre_w
);
const
int
*
pre_pos_data
=
pre_pos
.
data
<
int
>
();
const
T
*
pre_w_data
=
pre_w
.
data
<
T
>
();
for
(
int
c
=
0
;
c
<
channels
;
c
++
)
{
int
pre_calc_index
=
0
;
for
(
int
ph
=
0
;
ph
<
pooled_height
;
ph
++
)
{
for
(
int
pw
=
0
;
pw
<
pooled_width
;
pw
++
)
{
const
int
pool_index
=
ph
*
pooled_width
+
pw
;
T
output_val
=
0
;
for
(
int
iy
=
0
;
iy
<
roi_bin_grid_h
;
iy
++
)
{
for
(
int
ix
=
0
;
ix
<
roi_bin_grid_w
;
ix
++
)
{
for
(
int
i
=
0
;
i
<
kROISize
;
i
++
)
{
int
pos
=
pre_pos_data
[
pre_calc_index
*
kROISize
+
i
];
T
w
=
pre_w_data
[
pre_calc_index
*
kROISize
+
i
];
output_val
+=
w
*
batch_data
[
pos
];
}
pre_calc_index
+=
1
;
}
}
output_val
/=
count
;
output_data
[
pool_index
]
=
output_val
;
}
}
batch_data
+=
in_stride
[
1
];
output_data
+=
out_stride
[
1
];
}
rois_data
+=
roi_stride
[
0
];
}
}
};
template
<
typename
DeviceContext
,
typename
T
>
class
CPUROIAlignGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
rois
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"ROIs"
);
auto
*
out_grad
=
ctx
.
Input
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
in_grad
=
ctx
.
Output
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
pooled_height
=
ctx
.
Attr
<
int
>
(
"pooled_height"
);
auto
pooled_width
=
ctx
.
Attr
<
int
>
(
"pooled_width"
);
auto
spatial_scale
=
ctx
.
Attr
<
float
>
(
"spatial_scale"
);
auto
sampling_ratio
=
ctx
.
Attr
<
int
>
(
"sampling_ratio"
);
auto
in_dims
=
in
->
dims
();
if
(
!
in_grad
)
{
return
;
}
int
channels
=
in_dims
[
1
];
int
height
=
in_dims
[
2
];
int
width
=
in_dims
[
3
];
int
rois_num
=
rois
->
dims
()[
0
];
Tensor
roi_batch_id_list
;
roi_batch_id_list
.
Resize
({
rois_num
});
int
*
roi_batch_id_data
=
roi_batch_id_list
.
mutable_data
<
int
>
(
ctx
.
GetPlace
());
auto
rois_lod
=
rois
->
lod
().
back
();
int
rois_batch_size
=
rois_lod
.
size
()
-
1
;
for
(
int
n
=
0
;
n
<
rois_batch_size
;
++
n
)
{
for
(
size_t
i
=
rois_lod
[
n
];
i
<
rois_lod
[
n
+
1
];
++
i
)
{
roi_batch_id_data
[
i
]
=
n
;
}
}
const
T
*
rois_data
=
rois
->
data
<
T
>
();
const
T
*
out_grad_data
=
out_grad
->
data
<
T
>
();
T
*
in_grad_data
=
in_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
in_stride
=
framework
::
stride
(
in
->
dims
());
auto
roi_stride
=
framework
::
stride
(
rois
->
dims
());
auto
out_stride
=
framework
::
stride
(
out_grad
->
dims
());
for
(
int
n
=
0
;
n
<
rois_num
;
++
n
)
{
int
roi_batch_idx
=
roi_batch_id_data
[
n
];
T
roi_xmin
=
rois_data
[
0
]
*
spatial_scale
;
T
roi_ymin
=
rois_data
[
1
]
*
spatial_scale
;
T
roi_xmax
=
rois_data
[
2
]
*
spatial_scale
;
T
roi_ymax
=
rois_data
[
3
]
*
spatial_scale
;
T
roi_width
=
std
::
max
(
roi_xmax
-
roi_xmin
,
static_cast
<
T
>
(
1.
));
T
roi_height
=
std
::
max
(
roi_ymax
-
roi_ymin
,
static_cast
<
T
>
(
1.
));
T
bin_size_h
=
static_cast
<
T
>
(
roi_height
)
/
static_cast
<
T
>
(
pooled_height
);
T
bin_size_w
=
static_cast
<
T
>
(
roi_width
)
/
static_cast
<
T
>
(
pooled_width
);
for
(
int
c
=
0
;
c
<
channels
;
++
c
)
{
T
*
batch_grad_data
=
in_grad_data
+
roi_batch_idx
*
in_stride
[
0
]
+
c
*
in_stride
[
1
];
const
T
*
batch_out_grad_data
=
out_grad_data
+
n
*
out_stride
[
0
]
+
c
*
out_stride
[
1
];
for
(
int
ph
=
0
;
ph
<
pooled_height
;
++
ph
)
{
for
(
int
pw
=
0
;
pw
<
pooled_width
;
++
pw
)
{
int
pool_index
=
ph
*
pooled_width
+
pw
;
T
out_grad_this_bin
=
batch_out_grad_data
[
pool_index
];
int
roi_bin_grid_h
=
(
sampling_ratio
>
0
)
?
sampling_ratio
:
ceil
(
roi_height
/
pooled_height
);
int
roi_bin_grid_w
=
(
sampling_ratio
>
0
)
?
sampling_ratio
:
ceil
(
roi_width
/
pooled_width
);
T
count
=
roi_bin_grid_h
*
roi_bin_grid_w
;
for
(
int
iy
=
0
;
iy
<
roi_bin_grid_h
;
iy
++
)
{
const
T
y
=
roi_ymin
+
ph
*
bin_size_h
+
static_cast
<
T
>
(
iy
+
.5
f
)
*
bin_size_h
/
static_cast
<
T
>
(
roi_bin_grid_h
);
for
(
int
ix
=
0
;
ix
<
roi_bin_grid_w
;
ix
++
)
{
const
T
x
=
roi_xmin
+
pw
*
bin_size_w
+
static_cast
<
T
>
(
ix
+
.5
f
)
*
bin_size_w
/
static_cast
<
T
>
(
roi_bin_grid_w
);
bilinear_interpolate_gradient
(
height
,
width
,
y
,
x
,
out_grad_this_bin
,
count
,
batch_grad_data
);
}
}
}
}
}
rois_data
+=
roi_stride
[
0
];
}
}
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/roi_pool_op.cc
浏览文件 @
049c9c7d
...
@@ -174,4 +174,4 @@ REGISTER_OP_CPU_KERNEL(
...
@@ -174,4 +174,4 @@ REGISTER_OP_CPU_KERNEL(
REGISTER_OP_CPU_KERNEL
(
REGISTER_OP_CPU_KERNEL
(
roi_pool_grad
,
roi_pool_grad
,
ops
::
CPUROIPoolGradOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
CPUROIPoolGradOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
CPUROIPoolOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
ops
::
CPUROIPool
Grad
OpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
paddle/fluid/operators/roi_pool_op.cu
浏览文件 @
049c9c7d
...
@@ -249,4 +249,4 @@ REGISTER_OP_CUDA_KERNEL(
...
@@ -249,4 +249,4 @@ REGISTER_OP_CUDA_KERNEL(
REGISTER_OP_CUDA_KERNEL
(
REGISTER_OP_CUDA_KERNEL
(
roi_pool_grad
,
roi_pool_grad
,
ops
::
GPUROIPoolGradOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
GPUROIPoolGradOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
GPUROIPoolOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
ops
::
GPUROIPool
Grad
OpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
paddle/fluid/operators/sequence_concat_op.h
浏览文件 @
049c9c7d
...
@@ -17,7 +17,7 @@
...
@@ -17,7 +17,7 @@
#include <vector>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/detail/safe_ref.h"
#include "paddle/fluid/operators/detail/safe_ref.h"
#include "paddle/fluid/operators/math/concat.h"
#include "paddle/fluid/operators/math/concat
_and_split
.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -106,7 +106,7 @@ class SeqConcatGradKernel : public framework::OpKernel<T> {
...
@@ -106,7 +106,7 @@ class SeqConcatGradKernel : public framework::OpKernel<T> {
}
}
}
}
math
::
ConcatGrad
Functor
<
DeviceContext
,
T
>
functor
;
math
::
Split
Functor
<
DeviceContext
,
T
>
functor
;
std
::
vector
<
const
framework
::
Tensor
*>
sliced_x_ptr
;
std
::
vector
<
const
framework
::
Tensor
*>
sliced_x_ptr
;
std
::
vector
<
framework
::
Tensor
*>
sliced_dx_ptr
;
std
::
vector
<
framework
::
Tensor
*>
sliced_dx_ptr
;
for
(
auto
&
x
:
sliced_x
)
{
for
(
auto
&
x
:
sliced_x
)
{
...
...
paddle/fluid/operators/split_op.cc
浏览文件 @
049c9c7d
...
@@ -111,11 +111,10 @@ Example:
...
@@ -111,11 +111,10 @@ Example:
}
// namespace paddle
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
USE_CPU_ONLY_OP
(
concat
);
REGISTER_OPERATOR
(
split
,
ops
::
SplitOp
,
ops
::
SplitOpMaker
,
ops
::
SplitGradMaker
);
REGISTER_OPERATOR
(
split
,
ops
::
SplitOp
,
ops
::
SplitOpMaker
,
ops
::
SplitGradMaker
);
REGISTER_OP_CPU_KERNEL
(
split
,
REGISTER_OP_CPU_KERNEL
(
ops
::
SplitOpKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
,
split
,
ops
::
SplitOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
,
ops
::
SplitOpKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
SplitOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
SplitOpKernel
<
paddle
::
platform
::
CPUPlace
,
int64_t
>
,
ops
::
SplitOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int64_t
>
,
ops
::
SplitOpKernel
<
paddle
::
platform
::
CPUPlace
,
int
>
);
ops
::
SplitOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int
>
);
paddle/fluid/operators/split_op.h
浏览文件 @
049c9c7d
...
@@ -17,6 +17,7 @@ limitations under the License. */
...
@@ -17,6 +17,7 @@ limitations under the License. */
#include <chrono> // NOLINT
#include <chrono> // NOLINT
#include <vector>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/concat_and_split.h"
#include "paddle/fluid/operators/strided_memcpy.h"
#include "paddle/fluid/operators/strided_memcpy.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -28,18 +29,22 @@ class SplitOpKernel : public framework::OpKernel<T> {
...
@@ -28,18 +29,22 @@ class SplitOpKernel : public framework::OpKernel<T> {
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
in
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
outs
=
ctx
.
MultiOutput
<
framework
::
Tensor
>
(
"Out"
);
auto
outs
=
ctx
.
MultiOutput
<
framework
::
Tensor
>
(
"Out"
);
auto
in_stride
=
framework
::
stride_numel
(
in
->
dims
());
int
axis
=
ctx
.
Attr
<
int
>
(
"axis"
);
int64_t
axis
=
static_cast
<
int64_t
>
(
ctx
.
Attr
<
int
>
(
"axis"
));
auto
place
=
ctx
.
GetPlace
();
auto
place
=
ctx
.
GetPlace
();
size_t
input_offset
=
0
;
std
::
vector
<
const
framework
::
Tensor
*>
shape_refer
;
for
(
auto
&
out
:
outs
)
{
for
(
size_t
j
=
0
;
j
<
outs
.
size
();
++
j
)
{
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
outs
[
j
]
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
out_stride
=
framework
::
stride_numel
(
out
->
dims
());
shape_refer
.
emplace_back
(
outs
[
j
]);
StridedNumelCopyWithAxis
<
T
>
(
ctx
.
device_context
(),
axis
,
out
->
data
<
T
>
(),
}
out_stride
,
in
->
data
<
T
>
()
+
input_offset
,
in_stride
,
out_stride
[
axis
]);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
input_offset
+=
out_stride
[
axis
];
// Sometimes direct copies will be faster, this maybe need deeply analysis.
if
(
axis
==
0
&&
outs
.
size
()
<
10
)
{
StridedMemcpyWithAxis0
<
T
>
(
dev_ctx
,
*
in
,
shape_refer
,
&
outs
);
}
else
{
math
::
SplitFunctor
<
DeviceContext
,
T
>
functor
;
functor
(
dev_ctx
,
*
in
,
shape_refer
,
axis
,
&
outs
);
}
}
}
}
};
};
...
...
paddle/fluid/operators/strided_memcpy.h
浏览文件 @
049c9c7d
...
@@ -13,8 +13,9 @@ See the License for the specific language governing permissions and
...
@@ -13,8 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#pragma once
#include <vector>
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/detail/strided_memcpy.h"
#include "paddle/fluid/operators/detail/strided_memcpy.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -98,5 +99,26 @@ inline void StridedNumelCopyWithAxis(const platform::DeviceContext& ctx,
...
@@ -98,5 +99,26 @@ inline void StridedNumelCopyWithAxis(const platform::DeviceContext& ctx,
}
}
}
}
template
<
typename
T
>
inline
void
StridedMemcpyWithAxis0
(
const
platform
::
DeviceContext
&
dev_ctx
,
const
framework
::
Tensor
&
input
,
const
std
::
vector
<
const
framework
::
Tensor
*>&
shape_refer
,
std
::
vector
<
framework
::
Tensor
*>*
outputs
)
{
const
framework
::
DDim
in_stride
=
stride_numel
(
input
.
dims
());
const
int
axis
=
0
;
size_t
input_offset
=
0
;
for
(
size_t
i
=
0
;
i
<
outputs
->
size
();
++
i
)
{
auto
out_stride
=
stride_numel
(
shape_refer
[
i
]
->
dims
());
auto
out
=
outputs
->
at
(
i
);
if
(
out
!=
nullptr
)
{
StridedNumelCopyWithAxis
<
T
>
(
dev_ctx
,
axis
,
out
->
data
<
T
>
(),
out_stride
,
input
.
data
<
T
>
()
+
input_offset
,
in_stride
,
out_stride
[
axis
]);
}
input_offset
+=
out_stride
[
axis
];
}
}
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
paddle/fluid/platform/device_context.cc
浏览文件 @
049c9c7d
...
@@ -35,6 +35,16 @@ platform::DeviceContext* DeviceContextPool::Get(const platform::Place& place) {
...
@@ -35,6 +35,16 @@ platform::DeviceContext* DeviceContextPool::Get(const platform::Place& place) {
return
it
->
second
.
get
();
return
it
->
second
.
get
();
}
}
const
std
::
vector
<
const
DeviceContext
*>
DeviceContextPool
::
GetAllDeviceContexts
()
const
{
std
::
vector
<
const
DeviceContext
*>
all_device_ctx
;
all_device_ctx
.
reserve
(
device_contexts_
.
size
());
for
(
auto
&
dev_ctx
:
device_contexts_
)
{
all_device_ctx
.
emplace_back
(
dev_ctx
.
second
.
get
());
}
return
all_device_ctx
;
}
DeviceContextPool
::
DeviceContextPool
(
DeviceContextPool
::
DeviceContextPool
(
const
std
::
vector
<
platform
::
Place
>&
places
)
{
const
std
::
vector
<
platform
::
Place
>&
places
)
{
PADDLE_ENFORCE_GT
(
places
.
size
(),
0
);
PADDLE_ENFORCE_GT
(
places
.
size
(),
0
);
...
...
paddle/fluid/platform/device_context.h
浏览文件 @
049c9c7d
...
@@ -217,6 +217,9 @@ class DeviceContextPool {
...
@@ -217,6 +217,9 @@ class DeviceContextPool {
/*! \brief Return handle of single device context. */
/*! \brief Return handle of single device context. */
platform
::
DeviceContext
*
Get
(
const
platform
::
Place
&
place
);
platform
::
DeviceContext
*
Get
(
const
platform
::
Place
&
place
);
/*! \brief Return all the device contexts. */
const
std
::
vector
<
const
DeviceContext
*>
GetAllDeviceContexts
()
const
;
template
<
typename
Place
>
template
<
typename
Place
>
const
typename
DefaultDeviceContextType
<
Place
>::
TYPE
*
GetByPlace
(
const
typename
DefaultDeviceContextType
<
Place
>::
TYPE
*
GetByPlace
(
const
Place
&
place
)
{
const
Place
&
place
)
{
...
...
paddle/fluid/platform/profiler.cc
浏览文件 @
049c9c7d
...
@@ -30,6 +30,8 @@ limitations under the License. */
...
@@ -30,6 +30,8 @@ limitations under the License. */
#include "paddle/fluid/platform/device_tracer.h"
#include "paddle/fluid/platform/device_tracer.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/string/printf.h"
DEFINE_bool
(
enable_rpc_profiler
,
false
,
"Enable rpc profiler or not."
);
namespace
paddle
{
namespace
paddle
{
namespace
platform
{
namespace
platform
{
...
@@ -193,6 +195,13 @@ RecordEvent::~RecordEvent() {
...
@@ -193,6 +195,13 @@ RecordEvent::~RecordEvent() {
PopEvent
(
name_
,
dev_ctx_
);
PopEvent
(
name_
,
dev_ctx_
);
}
}
RecordRPCEvent
::
RecordRPCEvent
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
)
{
if
(
FLAGS_enable_rpc_profiler
)
{
event_
.
reset
(
new
platform
::
RecordEvent
(
name
,
dev_ctx
));
}
}
RecordBlock
::
RecordBlock
(
int
block_id
)
RecordBlock
::
RecordBlock
(
int
block_id
)
:
is_enabled_
(
false
),
start_ns_
(
PosixInNsec
())
{
:
is_enabled_
(
false
),
start_ns_
(
PosixInNsec
())
{
std
::
lock_guard
<
std
::
mutex
>
l
(
profiler_mu
);
std
::
lock_guard
<
std
::
mutex
>
l
(
profiler_mu
);
...
...
paddle/fluid/platform/profiler.h
浏览文件 @
049c9c7d
...
@@ -87,6 +87,16 @@ struct RecordEvent {
...
@@ -87,6 +87,16 @@ struct RecordEvent {
std
::
string
full_name_
;
std
::
string
full_name_
;
};
};
class
RecordRPCEvent
{
public:
// dev_ctx can be set to nullptr if device is cpu.
RecordRPCEvent
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
);
~
RecordRPCEvent
()
{}
private:
std
::
unique_ptr
<
RecordEvent
>
event_
;
};
struct
RecordBlock
{
struct
RecordBlock
{
explicit
RecordBlock
(
int
block_id
);
explicit
RecordBlock
(
int
block_id
);
~
RecordBlock
();
~
RecordBlock
();
...
...
python/paddle/fluid/__init__.py
浏览文件 @
049c9c7d
...
@@ -120,6 +120,7 @@ def __bootstrap__():
...
@@ -120,6 +120,7 @@ def __bootstrap__():
read_env_flags
.
append
(
'rpc_deadline'
)
read_env_flags
.
append
(
'rpc_deadline'
)
read_env_flags
.
append
(
'rpc_server_profile_period'
)
read_env_flags
.
append
(
'rpc_server_profile_period'
)
read_env_flags
.
append
(
'rpc_server_profile_path'
)
read_env_flags
.
append
(
'rpc_server_profile_path'
)
read_env_flags
.
append
(
'enable_rpc_profiler'
)
if
core
.
is_compiled_with_cuda
():
if
core
.
is_compiled_with_cuda
():
read_env_flags
+=
[
read_env_flags
+=
[
...
...
python/paddle/fluid/layer_helper.py
浏览文件 @
049c9c7d
...
@@ -324,10 +324,19 @@ class LayerHelper(object):
...
@@ -324,10 +324,19 @@ class LayerHelper(object):
raise
ValueError
(
"no Parameter name %s found"
%
name
)
raise
ValueError
(
"no Parameter name %s found"
%
name
)
return
param
return
param
def
create_tmp_variable
(
self
,
dtype
,
stop_gradient
=
False
):
def
create_variable_for_type_inference
(
self
,
dtype
,
stop_gradient
=
False
):
"""Create a temporary variable that should be type inferred layer.
Note:
The default type will be set to LOD_TENSOR. However, when
the var is used as operator output, its type will be updated
based on operator's `VarTypeInference` implementation in
infer_var_type.
"""
return
self
.
main_program
.
current_block
().
create_var
(
return
self
.
main_program
.
current_block
().
create_var
(
name
=
unique_name
.
generate
(
"."
.
join
([
self
.
name
,
'tmp'
])),
name
=
unique_name
.
generate
(
"."
.
join
([
self
.
name
,
'tmp'
])),
dtype
=
dtype
,
dtype
=
dtype
,
type
=
core
.
VarDesc
.
VarType
.
LOD_TENSOR
,
persistable
=
False
,
persistable
=
False
,
stop_gradient
=
stop_gradient
)
stop_gradient
=
stop_gradient
)
...
@@ -388,7 +397,7 @@ class LayerHelper(object):
...
@@ -388,7 +397,7 @@ class LayerHelper(object):
b
=
self
.
create_parameter
(
b
=
self
.
create_parameter
(
attr
=
bias_attr
,
shape
=
size
,
dtype
=
input_var
.
dtype
,
is_bias
=
True
)
attr
=
bias_attr
,
shape
=
size
,
dtype
=
input_var
.
dtype
,
is_bias
=
True
)
tmp
=
self
.
create_
tmp_variabl
e
(
dtype
=
input_var
.
dtype
)
tmp
=
self
.
create_
variable_for_type_inferenc
e
(
dtype
=
input_var
.
dtype
)
self
.
append_op
(
self
.
append_op
(
type
=
'elementwise_add'
,
type
=
'elementwise_add'
,
inputs
=
{
'X'
:
[
input_var
],
inputs
=
{
'X'
:
[
input_var
],
...
@@ -414,7 +423,7 @@ class LayerHelper(object):
...
@@ -414,7 +423,7 @@ class LayerHelper(object):
tmp
=
input_var
tmp
=
input_var
# NOTE(dzhwinter): some activation support inplace compution.
# NOTE(dzhwinter): some activation support inplace compution.
if
not
core
.
IsInplace
(
act_type
):
if
not
core
.
IsInplace
(
act_type
):
tmp
=
self
.
create_
tmp_variabl
e
(
dtype
=
input_var
.
dtype
)
tmp
=
self
.
create_
variable_for_type_inferenc
e
(
dtype
=
input_var
.
dtype
)
self
.
append_op
(
self
.
append_op
(
type
=
act_type
,
type
=
act_type
,
inputs
=
{
"X"
:
[
input_var
]},
inputs
=
{
"X"
:
[
input_var
]},
...
...
python/paddle/fluid/layers/control_flow.py
浏览文件 @
049c9c7d
...
@@ -80,8 +80,8 @@ def split_lod_tensor(input, mask, level=0):
...
@@ -80,8 +80,8 @@ def split_lod_tensor(input, mask, level=0):
"""
"""
helper
=
LayerHelper
(
'split_lod_tensor'
,
**
locals
())
helper
=
LayerHelper
(
'split_lod_tensor'
,
**
locals
())
out_true
=
helper
.
create_
tmp_variabl
e
(
dtype
=
input
.
dtype
)
out_true
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
input
.
dtype
)
out_false
=
helper
.
create_
tmp_variabl
e
(
dtype
=
input
.
dtype
)
out_false
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
input
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'split_lod_tensor'
,
type
=
'split_lod_tensor'
,
inputs
=
{
inputs
=
{
...
@@ -131,7 +131,7 @@ def merge_lod_tensor(in_true, in_false, x, mask, level=0):
...
@@ -131,7 +131,7 @@ def merge_lod_tensor(in_true, in_false, x, mask, level=0):
in_true=out_true, in_false=out_false, mask=y, x=x, level=level)
in_true=out_true, in_false=out_false, mask=y, x=x, level=level)
"""
"""
helper
=
LayerHelper
(
'merge_lod_tensor'
,
**
locals
())
helper
=
LayerHelper
(
'merge_lod_tensor'
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
in_true
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
in_true
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'merge_lod_tensor'
,
type
=
'merge_lod_tensor'
,
inputs
=
{
'X'
:
x
,
inputs
=
{
'X'
:
x
,
...
@@ -524,7 +524,7 @@ class StaticRNN(object):
...
@@ -524,7 +524,7 @@ class StaticRNN(object):
if
not
isinstance
(
o
,
Variable
):
if
not
isinstance
(
o
,
Variable
):
raise
TypeError
(
"step output takes a Variable"
)
raise
TypeError
(
"step output takes a Variable"
)
tmp_o
=
self
.
helper
.
create_
tmp_variabl
e
(
dtype
=
o
.
dtype
)
tmp_o
=
self
.
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
o
.
dtype
)
self
.
helper
.
append_op
(
self
.
helper
.
append_op
(
type
=
'rnn_memory_helper'
,
type
=
'rnn_memory_helper'
,
inputs
=
{
'X'
:
[
o
]},
inputs
=
{
'X'
:
[
o
]},
...
@@ -606,7 +606,8 @@ class StaticRNN(object):
...
@@ -606,7 +606,8 @@ class StaticRNN(object):
pre_memories
.
append
(
mem
.
pre_mem
.
name
)
pre_memories
.
append
(
mem
.
pre_mem
.
name
)
mem_var
=
rnn_block
.
var
(
mem
.
mem
.
name
)
mem_var
=
rnn_block
.
var
(
mem
.
mem
.
name
)
assert
isinstance
(
mem_var
,
Variable
)
assert
isinstance
(
mem_var
,
Variable
)
new_mem
=
self
.
helper
.
create_tmp_variable
(
dtype
=
mem_var
.
dtype
)
new_mem
=
self
.
helper
.
create_variable_for_type_inference
(
dtype
=
mem_var
.
dtype
)
rnn_block
.
append_op
(
rnn_block
.
append_op
(
type
=
'rnn_memory_helper'
,
type
=
'rnn_memory_helper'
,
...
@@ -813,7 +814,7 @@ def max_sequence_len(rank_table):
...
@@ -813,7 +814,7 @@ def max_sequence_len(rank_table):
${out_comment}.
${out_comment}.
"""
"""
helper
=
LayerHelper
(
"max_seqence_len"
,
**
locals
())
helper
=
LayerHelper
(
"max_seqence_len"
,
**
locals
())
res
=
helper
.
create_
tmp_variabl
e
(
dtype
=
"int64"
)
res
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
"int64"
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"max_sequence_len"
,
type
=
"max_sequence_len"
,
inputs
=
{
"RankTable"
:
rank_table
},
inputs
=
{
"RankTable"
:
rank_table
},
...
@@ -884,7 +885,7 @@ def array_to_lod_tensor(x, table):
...
@@ -884,7 +885,7 @@ def array_to_lod_tensor(x, table):
lod_tensor = fluid.layers.array_to_lod_tensor(array, table)
lod_tensor = fluid.layers.array_to_lod_tensor(array, table)
"""
"""
helper
=
LayerHelper
(
"array_to_lod_tensor"
,
**
locals
())
helper
=
LayerHelper
(
"array_to_lod_tensor"
,
**
locals
())
tmp
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
tmp
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"array_to_lod_tensor"
,
type
=
"array_to_lod_tensor"
,
inputs
=
{
'X'
:
x
,
inputs
=
{
'X'
:
x
,
...
@@ -915,7 +916,7 @@ def increment(x, value=1.0, in_place=True):
...
@@ -915,7 +916,7 @@ def increment(x, value=1.0, in_place=True):
"""
"""
helper
=
LayerHelper
(
"increment"
,
**
locals
())
helper
=
LayerHelper
(
"increment"
,
**
locals
())
if
not
in_place
:
if
not
in_place
:
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
else
:
else
:
out
=
x
out
=
x
helper
.
append_op
(
helper
.
append_op
(
...
@@ -1012,7 +1013,7 @@ def less_than(x, y, force_cpu=None, cond=None, **ignored):
...
@@ -1012,7 +1013,7 @@ def less_than(x, y, force_cpu=None, cond=None, **ignored):
"""
"""
helper
=
LayerHelper
(
"less_than"
,
**
locals
())
helper
=
LayerHelper
(
"less_than"
,
**
locals
())
if
cond
is
None
:
if
cond
is
None
:
cond
=
helper
.
create_
tmp_variabl
e
(
dtype
=
'bool'
)
cond
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
'bool'
)
cond
.
stop_gradient
=
True
cond
.
stop_gradient
=
True
attrs
=
dict
()
attrs
=
dict
()
...
@@ -1051,7 +1052,7 @@ def equal(x, y, cond=None, **ignored):
...
@@ -1051,7 +1052,7 @@ def equal(x, y, cond=None, **ignored):
"""
"""
helper
=
LayerHelper
(
"equal"
,
**
locals
())
helper
=
LayerHelper
(
"equal"
,
**
locals
())
if
cond
is
None
:
if
cond
is
None
:
cond
=
helper
.
create_
tmp_variabl
e
(
dtype
=
'bool'
)
cond
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
'bool'
)
cond
.
stop_gradient
=
True
cond
.
stop_gradient
=
True
helper
.
append_op
(
helper
.
append_op
(
...
@@ -1098,7 +1099,7 @@ def array_read(array, i):
...
@@ -1098,7 +1099,7 @@ def array_read(array, i):
array
,
array
,
Variable
)
or
array
.
type
!=
core
.
VarDesc
.
VarType
.
LOD_TENSOR_ARRAY
:
Variable
)
or
array
.
type
!=
core
.
VarDesc
.
VarType
.
LOD_TENSOR_ARRAY
:
raise
TypeError
(
"array should be tensor array vairable"
)
raise
TypeError
(
"array should be tensor array vairable"
)
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
array
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
array
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'read_from_array'
,
type
=
'read_from_array'
,
inputs
=
{
'X'
:
[
array
],
inputs
=
{
'X'
:
[
array
],
...
@@ -1133,7 +1134,7 @@ def shrink_memory(x, i, table):
...
@@ -1133,7 +1134,7 @@ def shrink_memory(x, i, table):
usage.
usage.
"""
"""
helper
=
LayerHelper
(
'shrink_memory'
,
**
locals
())
helper
=
LayerHelper
(
'shrink_memory'
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'shrink_rnn_memory'
,
type
=
'shrink_rnn_memory'
,
inputs
=
{
'X'
:
[
x
],
inputs
=
{
'X'
:
[
x
],
...
@@ -1170,7 +1171,7 @@ def array_length(array):
...
@@ -1170,7 +1171,7 @@ def array_length(array):
"""
"""
helper
=
LayerHelper
(
'array_length'
,
**
locals
())
helper
=
LayerHelper
(
'array_length'
,
**
locals
())
tmp
=
helper
.
create_
tmp_variabl
e
(
dtype
=
'int64'
)
tmp
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
'int64'
)
tmp
.
stop_gradient
=
True
tmp
.
stop_gradient
=
True
helper
.
append_op
(
helper
.
append_op
(
type
=
'lod_array_length'
,
inputs
=
{
'X'
:
[
array
]},
outputs
=
{
'Out'
:
[
tmp
]})
type
=
'lod_array_length'
,
inputs
=
{
'X'
:
[
array
]},
outputs
=
{
'Out'
:
[
tmp
]})
...
@@ -1590,7 +1591,7 @@ class DynamicRNN(object):
...
@@ -1590,7 +1591,7 @@ class DynamicRNN(object):
self
.
mem_dict
=
dict
()
self
.
mem_dict
=
dict
()
self
.
output_array
=
[]
self
.
output_array
=
[]
self
.
outputs
=
[]
self
.
outputs
=
[]
self
.
cond
=
self
.
helper
.
create_
tmp_variabl
e
(
dtype
=
'bool'
)
self
.
cond
=
self
.
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
'bool'
)
self
.
cond
.
stop_gradient
=
False
self
.
cond
.
stop_gradient
=
False
self
.
while_op
=
While
(
self
.
cond
)
self
.
while_op
=
While
(
self
.
cond
)
self
.
input_array
=
[]
self
.
input_array
=
[]
...
@@ -1924,7 +1925,7 @@ def reorder_lod_tensor_by_rank(x, rank_table):
...
@@ -1924,7 +1925,7 @@ def reorder_lod_tensor_by_rank(x, rank_table):
helper
.
is_instance
(
'x'
,
Variable
)
helper
.
is_instance
(
'x'
,
Variable
)
helper
.
is_instance
(
'rank_table'
,
Variable
)
helper
.
is_instance
(
'rank_table'
,
Variable
)
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'reorder_lod_tensor_by_rank'
,
type
=
'reorder_lod_tensor_by_rank'
,
inputs
=
{
'X'
:
[
x
],
inputs
=
{
'X'
:
[
x
],
...
@@ -1958,7 +1959,7 @@ def is_empty(x, cond=None, **ignored):
...
@@ -1958,7 +1959,7 @@ def is_empty(x, cond=None, **ignored):
"""
"""
helper
=
LayerHelper
(
"is_empty"
,
**
locals
())
helper
=
LayerHelper
(
"is_empty"
,
**
locals
())
if
cond
is
None
:
if
cond
is
None
:
cond
=
helper
.
create_
tmp_variabl
e
(
dtype
=
'bool'
)
cond
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
'bool'
)
cond
.
stop_gradient
=
True
cond
.
stop_gradient
=
True
elif
not
isinstance
(
cond
,
Variable
):
elif
not
isinstance
(
cond
,
Variable
):
raise
TypeError
(
"cond takes a variable"
)
raise
TypeError
(
"cond takes a variable"
)
...
...
python/paddle/fluid/layers/detection.py
浏览文件 @
049c9c7d
...
@@ -147,10 +147,11 @@ def rpn_target_assign(bbox_pred,
...
@@ -147,10 +147,11 @@ def rpn_target_assign(bbox_pred,
helper
=
LayerHelper
(
'rpn_target_assign'
,
**
locals
())
helper
=
LayerHelper
(
'rpn_target_assign'
,
**
locals
())
# Assign target label to anchors
# Assign target label to anchors
loc_index
=
helper
.
create_tmp_variable
(
dtype
=
'int32'
)
loc_index
=
helper
.
create_variable_for_type_inference
(
dtype
=
'int32'
)
score_index
=
helper
.
create_tmp_variable
(
dtype
=
'int32'
)
score_index
=
helper
.
create_variable_for_type_inference
(
dtype
=
'int32'
)
target_label
=
helper
.
create_tmp_variable
(
dtype
=
'int32'
)
target_label
=
helper
.
create_variable_for_type_inference
(
dtype
=
'int32'
)
target_bbox
=
helper
.
create_tmp_variable
(
dtype
=
anchor_box
.
dtype
)
target_bbox
=
helper
.
create_variable_for_type_inference
(
dtype
=
anchor_box
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"rpn_target_assign"
,
type
=
"rpn_target_assign"
,
inputs
=
{
inputs
=
{
...
@@ -282,7 +283,8 @@ def detection_output(loc,
...
@@ -282,7 +283,8 @@ def detection_output(loc,
scores
=
nn
.
reshape
(
x
=
scores
,
shape
=
compile_shape
,
actual_shape
=
run_shape
)
scores
=
nn
.
reshape
(
x
=
scores
,
shape
=
compile_shape
,
actual_shape
=
run_shape
)
scores
=
nn
.
transpose
(
scores
,
perm
=
[
0
,
2
,
1
])
scores
=
nn
.
transpose
(
scores
,
perm
=
[
0
,
2
,
1
])
scores
.
stop_gradient
=
True
scores
.
stop_gradient
=
True
nmsed_outs
=
helper
.
create_tmp_variable
(
dtype
=
decoded_box
.
dtype
)
nmsed_outs
=
helper
.
create_variable_for_type_inference
(
dtype
=
decoded_box
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"multiclass_nms"
,
type
=
"multiclass_nms"
,
inputs
=
{
'Scores'
:
scores
,
inputs
=
{
'Scores'
:
scores
,
...
@@ -314,7 +316,7 @@ def iou_similarity(x, y, name=None):
...
@@ -314,7 +316,7 @@ def iou_similarity(x, y, name=None):
"""
"""
helper
=
LayerHelper
(
"iou_similarity"
,
**
locals
())
helper
=
LayerHelper
(
"iou_similarity"
,
**
locals
())
if
name
is
None
:
if
name
is
None
:
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
else
:
else
:
out
=
helper
.
create_variable
(
out
=
helper
.
create_variable
(
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
...
@@ -351,7 +353,8 @@ def box_coder(prior_box,
...
@@ -351,7 +353,8 @@ def box_coder(prior_box,
helper
=
LayerHelper
(
"box_coder"
,
**
locals
())
helper
=
LayerHelper
(
"box_coder"
,
**
locals
())
if
name
is
None
:
if
name
is
None
:
output_box
=
helper
.
create_tmp_variable
(
dtype
=
prior_box
.
dtype
)
output_box
=
helper
.
create_variable_for_type_inference
(
dtype
=
prior_box
.
dtype
)
else
:
else
:
output_box
=
helper
.
create_variable
(
output_box
=
helper
.
create_variable
(
name
=
name
,
dtype
=
prior_box
.
dtype
,
persistable
=
False
)
name
=
name
,
dtype
=
prior_box
.
dtype
,
persistable
=
False
)
...
@@ -382,7 +385,7 @@ def polygon_box_transform(input, name=None):
...
@@ -382,7 +385,7 @@ def polygon_box_transform(input, name=None):
"""
"""
helper
=
LayerHelper
(
"polygon_box_transform"
,
**
locals
())
helper
=
LayerHelper
(
"polygon_box_transform"
,
**
locals
())
if
name
is
None
:
if
name
is
None
:
output
=
helper
.
create_
tmp_variabl
e
(
dtype
=
input
.
dtype
)
output
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
input
.
dtype
)
else
:
else
:
output
=
helper
.
create_variable
(
output
=
helper
.
create_variable
(
name
=
name
,
dtype
=
prior_box
.
input
,
persistable
=
False
)
name
=
name
,
dtype
=
prior_box
.
input
,
persistable
=
False
)
...
@@ -450,7 +453,7 @@ def detection_map(detect_res,
...
@@ -450,7 +453,7 @@ def detection_map(detect_res,
helper
=
LayerHelper
(
"detection_map"
,
**
locals
())
helper
=
LayerHelper
(
"detection_map"
,
**
locals
())
def
__create_var
(
type
):
def
__create_var
(
type
):
return
helper
.
create_
tmp_variabl
e
(
dtype
=
type
)
return
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
type
)
map_out
=
__create_var
(
'float32'
)
map_out
=
__create_var
(
'float32'
)
accum_pos_count_out
=
out_states
[
0
]
if
out_states
else
__create_var
(
'int32'
)
accum_pos_count_out
=
out_states
[
0
]
if
out_states
else
__create_var
(
'int32'
)
...
@@ -557,8 +560,9 @@ def bipartite_match(dist_matrix,
...
@@ -557,8 +560,9 @@ def bipartite_match(dist_matrix,
>>> matched_indices, matched_dist = fluid.layers.bipartite_match(iou)
>>> matched_indices, matched_dist = fluid.layers.bipartite_match(iou)
"""
"""
helper
=
LayerHelper
(
'bipartite_match'
,
**
locals
())
helper
=
LayerHelper
(
'bipartite_match'
,
**
locals
())
match_indices
=
helper
.
create_tmp_variable
(
dtype
=
'int32'
)
match_indices
=
helper
.
create_variable_for_type_inference
(
dtype
=
'int32'
)
match_distance
=
helper
.
create_tmp_variable
(
dtype
=
dist_matrix
.
dtype
)
match_distance
=
helper
.
create_variable_for_type_inference
(
dtype
=
dist_matrix
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'bipartite_match'
,
type
=
'bipartite_match'
,
inputs
=
{
'DistMat'
:
dist_matrix
},
inputs
=
{
'DistMat'
:
dist_matrix
},
...
@@ -644,8 +648,8 @@ def target_assign(input,
...
@@ -644,8 +648,8 @@ def target_assign(input,
gt, matched_indices, mismatch_value=0)
gt, matched_indices, mismatch_value=0)
"""
"""
helper
=
LayerHelper
(
'target_assign'
,
**
locals
())
helper
=
LayerHelper
(
'target_assign'
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
input
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
input
.
dtype
)
out_weight
=
helper
.
create_
tmp_variabl
e
(
dtype
=
'float32'
)
out_weight
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
'float32'
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'target_assign'
,
type
=
'target_assign'
,
inputs
=
{
inputs
=
{
...
@@ -816,9 +820,10 @@ def ssd_loss(location,
...
@@ -816,9 +820,10 @@ def ssd_loss(location,
conf_loss
=
nn
.
reshape
(
conf_loss
=
nn
.
reshape
(
x
=
conf_loss
,
shape
=
(
num
,
num_prior
),
actual_shape
=
actual_shape
)
x
=
conf_loss
,
shape
=
(
num
,
num_prior
),
actual_shape
=
actual_shape
)
conf_loss
.
stop_gradient
=
True
conf_loss
.
stop_gradient
=
True
neg_indices
=
helper
.
create_
tmp_variabl
e
(
dtype
=
'int32'
)
neg_indices
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
'int32'
)
dtype
=
matched_indices
.
dtype
dtype
=
matched_indices
.
dtype
updated_matched_indices
=
helper
.
create_tmp_variable
(
dtype
=
dtype
)
updated_matched_indices
=
helper
.
create_variable_for_type_inference
(
dtype
=
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'mine_hard_examples'
,
type
=
'mine_hard_examples'
,
inputs
=
{
inputs
=
{
...
@@ -998,8 +1003,8 @@ def prior_box(input,
...
@@ -998,8 +1003,8 @@ def prior_box(input,
max_sizes
=
[
max_sizes
]
max_sizes
=
[
max_sizes
]
attrs
[
'max_sizes'
]
=
max_sizes
attrs
[
'max_sizes'
]
=
max_sizes
box
=
helper
.
create_
tmp_variabl
e
(
dtype
)
box
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
var
=
helper
.
create_
tmp_variabl
e
(
dtype
)
var
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"prior_box"
,
type
=
"prior_box"
,
inputs
=
{
"Input"
:
input
,
inputs
=
{
"Input"
:
input
,
...
@@ -1337,8 +1342,8 @@ def anchor_generator(input,
...
@@ -1337,8 +1342,8 @@ def anchor_generator(input,
'offset'
:
offset
'offset'
:
offset
}
}
anchor
=
helper
.
create_
tmp_variabl
e
(
dtype
)
anchor
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
var
=
helper
.
create_
tmp_variabl
e
(
dtype
)
var
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"anchor_generator"
,
type
=
"anchor_generator"
,
inputs
=
{
"Input"
:
input
},
inputs
=
{
"Input"
:
input
},
...
@@ -1384,7 +1389,7 @@ def roi_perspective_transform(input,
...
@@ -1384,7 +1389,7 @@ def roi_perspective_transform(input,
"""
"""
helper
=
LayerHelper
(
'roi_perspective_transform'
,
**
locals
())
helper
=
LayerHelper
(
'roi_perspective_transform'
,
**
locals
())
dtype
=
helper
.
input_dtype
()
dtype
=
helper
.
input_dtype
()
out
=
helper
.
create_
tmp_variabl
e
(
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"roi_perspective_transform"
,
type
=
"roi_perspective_transform"
,
inputs
=
{
"X"
:
input
,
inputs
=
{
"X"
:
input
,
...
@@ -1418,11 +1423,15 @@ def generate_proposal_labels(rpn_rois,
...
@@ -1418,11 +1423,15 @@ def generate_proposal_labels(rpn_rois,
helper
=
LayerHelper
(
'generate_proposal_labels'
,
**
locals
())
helper
=
LayerHelper
(
'generate_proposal_labels'
,
**
locals
())
rois
=
helper
.
create_tmp_variable
(
dtype
=
rpn_rois
.
dtype
)
rois
=
helper
.
create_variable_for_type_inference
(
dtype
=
rpn_rois
.
dtype
)
labels_int32
=
helper
.
create_tmp_variable
(
dtype
=
gt_classes
.
dtype
)
labels_int32
=
helper
.
create_variable_for_type_inference
(
bbox_targets
=
helper
.
create_tmp_variable
(
dtype
=
rpn_rois
.
dtype
)
dtype
=
gt_classes
.
dtype
)
bbox_inside_weights
=
helper
.
create_tmp_variable
(
dtype
=
rpn_rois
.
dtype
)
bbox_targets
=
helper
.
create_variable_for_type_inference
(
bbox_outside_weights
=
helper
.
create_tmp_variable
(
dtype
=
rpn_rois
.
dtype
)
dtype
=
rpn_rois
.
dtype
)
bbox_inside_weights
=
helper
.
create_variable_for_type_inference
(
dtype
=
rpn_rois
.
dtype
)
bbox_outside_weights
=
helper
.
create_variable_for_type_inference
(
dtype
=
rpn_rois
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"generate_proposal_labels"
,
type
=
"generate_proposal_labels"
,
...
@@ -1504,8 +1513,10 @@ def generate_proposals(scores,
...
@@ -1504,8 +1513,10 @@ def generate_proposals(scores,
"""
"""
helper
=
LayerHelper
(
'generate_proposals'
,
**
locals
())
helper
=
LayerHelper
(
'generate_proposals'
,
**
locals
())
rpn_rois
=
helper
.
create_tmp_variable
(
dtype
=
bbox_deltas
.
dtype
)
rpn_rois
=
helper
.
create_variable_for_type_inference
(
rpn_roi_probs
=
helper
.
create_tmp_variable
(
dtype
=
scores
.
dtype
)
dtype
=
bbox_deltas
.
dtype
)
rpn_roi_probs
=
helper
.
create_variable_for_type_inference
(
dtype
=
scores
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"generate_proposals"
,
type
=
"generate_proposals"
,
inputs
=
{
inputs
=
{
...
...
python/paddle/fluid/layers/io.py
浏览文件 @
049c9c7d
...
@@ -954,7 +954,7 @@ def read_file(reader):
...
@@ -954,7 +954,7 @@ def read_file(reader):
"""
"""
helper
=
LayerHelper
(
'read_file'
)
helper
=
LayerHelper
(
'read_file'
)
out
=
[
out
=
[
helper
.
create_
tmp_variabl
e
(
helper
.
create_
variable_for_type_inferenc
e
(
stop_gradient
=
True
,
dtype
=
'float32'
)
stop_gradient
=
True
,
dtype
=
'float32'
)
for
_
in
range
(
len
(
reader
.
desc
.
shapes
()))
for
_
in
range
(
len
(
reader
.
desc
.
shapes
()))
]
]
...
...
python/paddle/fluid/layers/layer_function_generator.py
浏览文件 @
049c9c7d
...
@@ -202,10 +202,12 @@ def generate_layer_fn(op_type):
...
@@ -202,10 +202,12 @@ def generate_layer_fn(op_type):
out_var
=
out
[
0
]
if
(
isinstance
(
out
,
list
)
or
out_var
=
out
[
0
]
if
(
isinstance
(
out
,
list
)
or
isinstance
(
out
,
tuple
))
else
out
isinstance
(
out
,
tuple
))
else
out
else
:
else
:
out_var
=
helper
.
create_
tmp_variabl
e
(
dtype
=
dtype
)
out_var
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
dtype
)
outputs
[
o_name
]
=
[
out_var
]
outputs
[
o_name
]
=
[
out_var
]
for
name
in
intermediate_output_names
:
for
name
in
intermediate_output_names
:
outputs
[
name
]
=
[
helper
.
create_tmp_variable
(
dtype
=
dtype
)]
outputs
[
name
]
=
[
helper
.
create_variable_for_type_inference
(
dtype
=
dtype
)
]
helper
.
append_op
(
helper
.
append_op
(
type
=
op_type
,
inputs
=
inputs
,
outputs
=
outputs
,
attrs
=
kwargs
)
type
=
op_type
,
inputs
=
inputs
,
outputs
=
outputs
,
attrs
=
kwargs
)
return
helper
.
append_activation
(
out_var
)
return
helper
.
append_activation
(
out_var
)
...
@@ -229,7 +231,7 @@ def generate_layer_fn_noattr(op_type):
...
@@ -229,7 +231,7 @@ def generate_layer_fn_noattr(op_type):
def
func
(
x
,
name
=
None
):
def
func
(
x
,
name
=
None
):
helper
=
LayerHelper
(
op_type
,
**
locals
())
helper
=
LayerHelper
(
op_type
,
**
locals
())
output
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
output
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
type
=
op_type
,
inputs
=
{
"X"
:
x
},
outputs
=
{
"Out"
:
output
})
helper
.
append_op
(
type
=
op_type
,
inputs
=
{
"X"
:
x
},
outputs
=
{
"Out"
:
output
})
return
output
return
output
...
...
python/paddle/fluid/layers/metric_op.py
浏览文件 @
049c9c7d
...
@@ -58,11 +58,11 @@ def accuracy(input, label, k=1, correct=None, total=None):
...
@@ -58,11 +58,11 @@ def accuracy(input, label, k=1, correct=None, total=None):
"""
"""
helper
=
LayerHelper
(
"accuracy"
,
**
locals
())
helper
=
LayerHelper
(
"accuracy"
,
**
locals
())
topk_out
,
topk_indices
=
nn
.
topk
(
input
,
k
=
k
)
topk_out
,
topk_indices
=
nn
.
topk
(
input
,
k
=
k
)
acc_out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
"float32"
)
acc_out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
"float32"
)
if
correct
is
None
:
if
correct
is
None
:
correct
=
helper
.
create_
tmp_variabl
e
(
dtype
=
"int64"
)
correct
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
"int64"
)
if
total
is
None
:
if
total
is
None
:
total
=
helper
.
create_
tmp_variabl
e
(
dtype
=
"int64"
)
total
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
"int64"
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"accuracy"
,
type
=
"accuracy"
,
inputs
=
{
inputs
=
{
...
@@ -124,8 +124,8 @@ def auc(input,
...
@@ -124,8 +124,8 @@ def auc(input,
auc_out=fluid.layers.auc(input=prediction, label=label)
auc_out=fluid.layers.auc(input=prediction, label=label)
"""
"""
helper
=
LayerHelper
(
"auc"
,
**
locals
())
helper
=
LayerHelper
(
"auc"
,
**
locals
())
auc_out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
"float64"
)
auc_out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
"float64"
)
batch_auc_out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
"float64"
)
batch_auc_out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
"float64"
)
# make tp, tn, fp, fn persistable, so that can accumulate all batches.
# make tp, tn, fp, fn persistable, so that can accumulate all batches.
# for batch auc
# for batch auc
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
049c9c7d
...
@@ -96,6 +96,7 @@ __all__ = [
...
@@ -96,6 +96,7 @@ __all__ = [
'pad_constant_like'
,
'pad_constant_like'
,
'label_smooth'
,
'label_smooth'
,
'roi_pool'
,
'roi_pool'
,
'roi_align'
,
'dice_loss'
,
'dice_loss'
,
'image_resize'
,
'image_resize'
,
'image_resize_short'
,
'image_resize_short'
,
...
@@ -241,7 +242,7 @@ def fc(input,
...
@@ -241,7 +242,7 @@ def fc(input,
w
=
helper
.
create_parameter
(
w
=
helper
.
create_parameter
(
attr
=
param_attr
,
shape
=
param_shape
,
dtype
=
dtype
,
is_bias
=
False
)
attr
=
param_attr
,
shape
=
param_shape
,
dtype
=
dtype
,
is_bias
=
False
)
tmp
=
helper
.
create_
tmp_variabl
e
(
dtype
)
tmp
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"mul"
,
type
=
"mul"
,
inputs
=
{
"X"
:
input_var
,
inputs
=
{
"X"
:
input_var
,
...
@@ -254,7 +255,7 @@ def fc(input,
...
@@ -254,7 +255,7 @@ def fc(input,
if
len
(
mul_results
)
==
1
:
if
len
(
mul_results
)
==
1
:
pre_bias
=
mul_results
[
0
]
pre_bias
=
mul_results
[
0
]
else
:
else
:
pre_bias
=
helper
.
create_
tmp_variabl
e
(
dtype
)
pre_bias
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"sum"
,
type
=
"sum"
,
inputs
=
{
"X"
:
mul_results
},
inputs
=
{
"X"
:
mul_results
},
...
@@ -313,7 +314,7 @@ def embedding(input,
...
@@ -313,7 +314,7 @@ def embedding(input,
helper
=
LayerHelper
(
'embedding'
,
**
locals
())
helper
=
LayerHelper
(
'embedding'
,
**
locals
())
w
=
helper
.
create_parameter
(
w
=
helper
.
create_parameter
(
attr
=
helper
.
param_attr
,
shape
=
size
,
dtype
=
dtype
,
is_bias
=
False
)
attr
=
helper
.
param_attr
,
shape
=
size
,
dtype
=
dtype
,
is_bias
=
False
)
tmp
=
helper
.
create_
tmp_variabl
e
(
dtype
)
tmp
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
padding_idx
=
-
1
if
padding_idx
is
None
else
padding_idx
if
padding_idx
>=
0
else
(
padding_idx
=
-
1
if
padding_idx
is
None
else
padding_idx
if
padding_idx
>=
0
else
(
size
[
0
]
+
padding_idx
)
size
[
0
]
+
padding_idx
)
helper
.
append_op
(
helper
.
append_op
(
...
@@ -417,10 +418,10 @@ def dynamic_lstm(input,
...
@@ -417,10 +418,10 @@ def dynamic_lstm(input,
bias
=
helper
.
create_parameter
(
bias
=
helper
.
create_parameter
(
attr
=
helper
.
bias_attr
,
shape
=
bias_size
,
dtype
=
dtype
,
is_bias
=
True
)
attr
=
helper
.
bias_attr
,
shape
=
bias_size
,
dtype
=
dtype
,
is_bias
=
True
)
hidden
=
helper
.
create_
tmp_variabl
e
(
dtype
)
hidden
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
cell
=
helper
.
create_
tmp_variabl
e
(
dtype
)
cell
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
batch_gate
=
helper
.
create_
tmp_variabl
e
(
dtype
)
batch_gate
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
batch_cell_pre_act
=
helper
.
create_
tmp_variabl
e
(
dtype
)
batch_cell_pre_act
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
inputs
=
{
'Input'
:
input
,
'Weight'
:
weight
,
'Bias'
:
bias
}
inputs
=
{
'Input'
:
input
,
'Weight'
:
weight
,
'Bias'
:
bias
}
batch_size
=
input
.
shape
[
0
]
batch_size
=
input
.
shape
[
0
]
if
h_0
:
if
h_0
:
...
@@ -620,12 +621,12 @@ def dynamic_lstmp(input,
...
@@ -620,12 +621,12 @@ def dynamic_lstmp(input,
bias
=
helper
.
create_parameter
(
bias
=
helper
.
create_parameter
(
attr
=
helper
.
bias_attr
,
shape
=
bias_size
,
dtype
=
dtype
,
is_bias
=
True
)
attr
=
helper
.
bias_attr
,
shape
=
bias_size
,
dtype
=
dtype
,
is_bias
=
True
)
projection
=
helper
.
create_
tmp_variabl
e
(
dtype
)
projection
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
cell
=
helper
.
create_
tmp_variabl
e
(
dtype
)
cell
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
ordered_proj0
=
helper
.
create_
tmp_variabl
e
(
dtype
)
ordered_proj0
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
batch_hidden
=
helper
.
create_
tmp_variabl
e
(
dtype
)
batch_hidden
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
batch_gate
=
helper
.
create_
tmp_variabl
e
(
dtype
)
batch_gate
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
batch_cell_pre_act
=
helper
.
create_
tmp_variabl
e
(
dtype
)
batch_cell_pre_act
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'lstmp'
,
type
=
'lstmp'
,
...
@@ -750,10 +751,10 @@ def dynamic_gru(input,
...
@@ -750,10 +751,10 @@ def dynamic_gru(input,
),
'The shape of h0 should be(batch_size, %d)'
%
size
),
'The shape of h0 should be(batch_size, %d)'
%
size
inputs
[
'H0'
]
=
h_0
inputs
[
'H0'
]
=
h_0
hidden
=
helper
.
create_
tmp_variabl
e
(
dtype
)
hidden
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
batch_gate
=
helper
.
create_
tmp_variabl
e
(
dtype
)
batch_gate
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
batch_reset_hidden_prev
=
helper
.
create_
tmp_variabl
e
(
dtype
)
batch_reset_hidden_prev
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
batch_hidden
=
helper
.
create_
tmp_variabl
e
(
dtype
)
batch_hidden
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'gru'
,
type
=
'gru'
,
...
@@ -843,9 +844,9 @@ def gru_unit(input,
...
@@ -843,9 +844,9 @@ def gru_unit(input,
weight
=
helper
.
create_parameter
(
weight
=
helper
.
create_parameter
(
attr
=
helper
.
param_attr
,
shape
=
[
size
,
3
*
size
],
dtype
=
dtype
)
attr
=
helper
.
param_attr
,
shape
=
[
size
,
3
*
size
],
dtype
=
dtype
)
gate
=
helper
.
create_
tmp_variabl
e
(
dtype
)
gate
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
reset_hidden_pre
=
helper
.
create_
tmp_variabl
e
(
dtype
)
reset_hidden_pre
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
updated_hidden
=
helper
.
create_
tmp_variabl
e
(
dtype
)
updated_hidden
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
inputs
=
{
'Input'
:
input
,
'HiddenPrev'
:
hidden
,
'Weight'
:
weight
}
inputs
=
{
'Input'
:
input
,
'HiddenPrev'
:
hidden
,
'Weight'
:
weight
}
# create bias
# create bias
if
helper
.
bias_attr
:
if
helper
.
bias_attr
:
...
@@ -895,10 +896,14 @@ def linear_chain_crf(input, label, param_attr=None):
...
@@ -895,10 +896,14 @@ def linear_chain_crf(input, label, param_attr=None):
attr
=
helper
.
param_attr
,
attr
=
helper
.
param_attr
,
shape
=
[
size
+
2
,
size
],
shape
=
[
size
+
2
,
size
],
dtype
=
helper
.
input_dtype
())
dtype
=
helper
.
input_dtype
())
alpha
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
())
alpha
=
helper
.
create_variable_for_type_inference
(
emission_exps
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
())
dtype
=
helper
.
input_dtype
())
transition_exps
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
())
emission_exps
=
helper
.
create_variable_for_type_inference
(
log_likelihood
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
())
dtype
=
helper
.
input_dtype
())
transition_exps
=
helper
.
create_variable_for_type_inference
(
dtype
=
helper
.
input_dtype
())
log_likelihood
=
helper
.
create_variable_for_type_inference
(
dtype
=
helper
.
input_dtype
())
helper
.
append_op
(
helper
.
append_op
(
type
=
'linear_chain_crf'
,
type
=
'linear_chain_crf'
,
inputs
=
{
"Emission"
:
[
input
],
inputs
=
{
"Emission"
:
[
input
],
...
@@ -937,7 +942,8 @@ def crf_decoding(input, param_attr, label=None):
...
@@ -937,7 +942,8 @@ def crf_decoding(input, param_attr, label=None):
"""
"""
helper
=
LayerHelper
(
'crf_decoding'
,
**
locals
())
helper
=
LayerHelper
(
'crf_decoding'
,
**
locals
())
transition
=
helper
.
get_parameter
(
param_attr
.
name
)
transition
=
helper
.
get_parameter
(
param_attr
.
name
)
viterbi_path
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
())
viterbi_path
=
helper
.
create_variable_for_type_inference
(
dtype
=
helper
.
input_dtype
())
helper
.
append_op
(
helper
.
append_op
(
type
=
'crf_decoding'
,
type
=
'crf_decoding'
,
inputs
=
{
"Emission"
:
[
input
],
inputs
=
{
"Emission"
:
[
input
],
...
@@ -961,9 +967,9 @@ def cos_sim(X, Y):
...
@@ -961,9 +967,9 @@ def cos_sim(X, Y):
Variable: the output of cosine(X, Y).
Variable: the output of cosine(X, Y).
"""
"""
helper
=
LayerHelper
(
'cos_sim'
,
**
locals
())
helper
=
LayerHelper
(
'cos_sim'
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
X
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
X
.
dtype
)
xnorm
=
helper
.
create_
tmp_variabl
e
(
dtype
=
X
.
dtype
)
xnorm
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
X
.
dtype
)
ynorm
=
helper
.
create_
tmp_variabl
e
(
dtype
=
X
.
dtype
)
ynorm
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
X
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'cos_sim'
,
type
=
'cos_sim'
,
inputs
=
{
'X'
:
[
X
],
inputs
=
{
'X'
:
[
X
],
...
@@ -1020,8 +1026,9 @@ def dropout(x,
...
@@ -1020,8 +1026,9 @@ def dropout(x,
"""
"""
helper
=
LayerHelper
(
'dropout'
,
**
locals
())
helper
=
LayerHelper
(
'dropout'
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_variable_for_type_inference
(
dtype
=
x
.
dtype
)
mask
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
,
stop_gradient
=
True
)
mask
=
helper
.
create_variable_for_type_inference
(
dtype
=
x
.
dtype
,
stop_gradient
=
True
)
if
(
seed
is
None
or
seed
==
0
)
and
helper
.
main_program
.
random_seed
!=
0
:
if
(
seed
is
None
or
seed
==
0
)
and
helper
.
main_program
.
random_seed
!=
0
:
seed
=
helper
.
main_program
.
random_seed
seed
=
helper
.
main_program
.
random_seed
...
@@ -1107,7 +1114,7 @@ def cross_entropy(input, label, soft_label=False, ignore_index=-100):
...
@@ -1107,7 +1114,7 @@ def cross_entropy(input, label, soft_label=False, ignore_index=-100):
cost = fluid.layers.cross_entropy(input=predict, label=label)
cost = fluid.layers.cross_entropy(input=predict, label=label)
"""
"""
helper
=
LayerHelper
(
'cross_entropy'
,
**
locals
())
helper
=
LayerHelper
(
'cross_entropy'
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
input
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
input
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'cross_entropy'
,
type
=
'cross_entropy'
,
inputs
=
{
'X'
:
[
input
],
inputs
=
{
'X'
:
[
input
],
...
@@ -1154,14 +1161,14 @@ def square_error_cost(input, label):
...
@@ -1154,14 +1161,14 @@ def square_error_cost(input, label):
"""
"""
helper
=
LayerHelper
(
'square_error_cost'
,
**
locals
())
helper
=
LayerHelper
(
'square_error_cost'
,
**
locals
())
minus_out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
input
.
dtype
)
minus_out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
input
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'elementwise_sub'
,
type
=
'elementwise_sub'
,
inputs
=
{
'X'
:
[
input
],
inputs
=
{
'X'
:
[
input
],
'Y'
:
[
label
]},
'Y'
:
[
label
]},
outputs
=
{
'Out'
:
[
minus_out
]})
outputs
=
{
'Out'
:
[
minus_out
]})
square_out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
input
.
dtype
)
square_out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
input
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'square'
,
inputs
=
{
'X'
:
[
minus_out
]},
type
=
'square'
,
inputs
=
{
'X'
:
[
minus_out
]},
outputs
=
{
'Out'
:
[
square_out
]})
outputs
=
{
'Out'
:
[
square_out
]})
...
@@ -1267,12 +1274,13 @@ def chunk_eval(input,
...
@@ -1267,12 +1274,13 @@ def chunk_eval(input,
helper
=
LayerHelper
(
"chunk_eval"
,
**
locals
())
helper
=
LayerHelper
(
"chunk_eval"
,
**
locals
())
# prepare output
# prepare output
precision
=
helper
.
create_tmp_variable
(
dtype
=
"float32"
)
precision
=
helper
.
create_variable_for_type_inference
(
dtype
=
"float32"
)
recall
=
helper
.
create_tmp_variable
(
dtype
=
"float32"
)
recall
=
helper
.
create_variable_for_type_inference
(
dtype
=
"float32"
)
f1_score
=
helper
.
create_tmp_variable
(
dtype
=
"float32"
)
f1_score
=
helper
.
create_variable_for_type_inference
(
dtype
=
"float32"
)
num_infer_chunks
=
helper
.
create_tmp_variable
(
dtype
=
"int64"
)
num_infer_chunks
=
helper
.
create_variable_for_type_inference
(
dtype
=
"int64"
)
num_label_chunks
=
helper
.
create_tmp_variable
(
dtype
=
"int64"
)
num_label_chunks
=
helper
.
create_variable_for_type_inference
(
dtype
=
"int64"
)
num_correct_chunks
=
helper
.
create_tmp_variable
(
dtype
=
"int64"
)
num_correct_chunks
=
helper
.
create_variable_for_type_inference
(
dtype
=
"int64"
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"chunk_eval"
,
type
=
"chunk_eval"
,
...
@@ -1339,7 +1347,7 @@ def sequence_conv(input,
...
@@ -1339,7 +1347,7 @@ def sequence_conv(input,
filter_shape
=
[
filter_size
*
input
.
shape
[
1
],
num_filters
]
filter_shape
=
[
filter_size
*
input
.
shape
[
1
],
num_filters
]
filter_param
=
helper
.
create_parameter
(
filter_param
=
helper
.
create_parameter
(
attr
=
helper
.
param_attr
,
shape
=
filter_shape
,
dtype
=
dtype
)
attr
=
helper
.
param_attr
,
shape
=
filter_shape
,
dtype
=
dtype
)
pre_bias
=
helper
.
create_
tmp_variabl
e
(
dtype
)
pre_bias
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'sequence_conv'
,
type
=
'sequence_conv'
,
...
@@ -1395,7 +1403,7 @@ def sequence_softmax(input, use_cudnn=False, name=None):
...
@@ -1395,7 +1403,7 @@ def sequence_softmax(input, use_cudnn=False, name=None):
"""
"""
helper
=
LayerHelper
(
'sequence_softmax'
,
**
locals
())
helper
=
LayerHelper
(
'sequence_softmax'
,
**
locals
())
dtype
=
helper
.
input_dtype
()
dtype
=
helper
.
input_dtype
()
softmax_out
=
helper
.
create_
tmp_variabl
e
(
dtype
)
softmax_out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"sequence_softmax"
,
type
=
"sequence_softmax"
,
inputs
=
{
"X"
:
input
},
inputs
=
{
"X"
:
input
},
...
@@ -1449,7 +1457,7 @@ def softmax(input, use_cudnn=True, name=None):
...
@@ -1449,7 +1457,7 @@ def softmax(input, use_cudnn=True, name=None):
"""
"""
helper
=
LayerHelper
(
'softmax'
,
**
locals
())
helper
=
LayerHelper
(
'softmax'
,
**
locals
())
dtype
=
helper
.
input_dtype
()
dtype
=
helper
.
input_dtype
()
softmax_out
=
helper
.
create_
tmp_variabl
e
(
dtype
)
softmax_out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"softmax"
,
type
=
"softmax"
,
inputs
=
{
"X"
:
input
},
inputs
=
{
"X"
:
input
},
...
@@ -1612,7 +1620,7 @@ def conv2d(input,
...
@@ -1612,7 +1620,7 @@ def conv2d(input,
dtype
=
dtype
,
dtype
=
dtype
,
default_initializer
=
_get_default_param_initializer
())
default_initializer
=
_get_default_param_initializer
())
pre_bias
=
helper
.
create_
tmp_variabl
e
(
dtype
)
pre_bias
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
l_type
,
type
=
l_type
,
...
@@ -1783,7 +1791,7 @@ def conv3d(input,
...
@@ -1783,7 +1791,7 @@ def conv3d(input,
dtype
=
dtype
,
dtype
=
dtype
,
default_initializer
=
_get_default_param_initializer
())
default_initializer
=
_get_default_param_initializer
())
pre_bias
=
helper
.
create_
tmp_variabl
e
(
dtype
)
pre_bias
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
l_type
,
type
=
l_type
,
...
@@ -1862,8 +1870,8 @@ def sequence_pool(input, pool_type):
...
@@ -1862,8 +1870,8 @@ def sequence_pool(input, pool_type):
"""
"""
helper
=
LayerHelper
(
'sequence_pool'
,
**
locals
())
helper
=
LayerHelper
(
'sequence_pool'
,
**
locals
())
dtype
=
helper
.
input_dtype
()
dtype
=
helper
.
input_dtype
()
pool_out
=
helper
.
create_
tmp_variabl
e
(
dtype
)
pool_out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
max_index
=
helper
.
create_
tmp_variabl
e
(
dtype
)
max_index
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"sequence_pool"
,
type
=
"sequence_pool"
,
...
@@ -1899,7 +1907,7 @@ def sequence_concat(input, name=None):
...
@@ -1899,7 +1907,7 @@ def sequence_concat(input, name=None):
out = fluid.layers.sequence_concat(input=[seq1, seq2, seq3])
out = fluid.layers.sequence_concat(input=[seq1, seq2, seq3])
"""
"""
helper
=
LayerHelper
(
'sequence_concat'
,
**
locals
())
helper
=
LayerHelper
(
'sequence_concat'
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
helper
.
input_dtype
())
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
helper
.
input_dtype
())
helper
.
append_op
(
helper
.
append_op
(
type
=
'sequence_concat'
,
inputs
=
{
'X'
:
input
},
outputs
=
{
'Out'
:
[
out
]})
type
=
'sequence_concat'
,
inputs
=
{
'X'
:
input
},
outputs
=
{
'Out'
:
[
out
]})
return
out
return
out
...
@@ -2026,7 +2034,7 @@ def sequence_slice(input, offset, length, name=None):
...
@@ -2026,7 +2034,7 @@ def sequence_slice(input, offset, length, name=None):
"""
"""
helper
=
LayerHelper
(
"sequence_slice"
,
**
locals
())
helper
=
LayerHelper
(
"sequence_slice"
,
**
locals
())
dtype
=
helper
.
input_dtype
()
dtype
=
helper
.
input_dtype
()
out
=
helper
.
create_
tmp_variabl
e
(
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
offset
.
stop_gradient
=
True
offset
.
stop_gradient
=
True
length
.
stop_gradient
=
True
length
.
stop_gradient
=
True
...
@@ -2112,7 +2120,7 @@ def pool2d(input,
...
@@ -2112,7 +2120,7 @@ def pool2d(input,
helper
=
LayerHelper
(
l_type
,
**
locals
())
helper
=
LayerHelper
(
l_type
,
**
locals
())
dtype
=
helper
.
input_dtype
()
dtype
=
helper
.
input_dtype
()
pool_out
=
helper
.
create_
tmp_variabl
e
(
dtype
)
pool_out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
l_type
,
type
=
l_type
,
...
@@ -2180,7 +2188,7 @@ def pool3d(input,
...
@@ -2180,7 +2188,7 @@ def pool3d(input,
l_type
=
"pool3d"
l_type
=
"pool3d"
helper
=
LayerHelper
(
l_type
,
**
locals
())
helper
=
LayerHelper
(
l_type
,
**
locals
())
dtype
=
helper
.
input_dtype
()
dtype
=
helper
.
input_dtype
()
pool_out
=
helper
.
create_
tmp_variabl
e
(
dtype
)
pool_out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
l_type
,
type
=
l_type
,
...
@@ -2323,10 +2331,13 @@ def batch_norm(input,
...
@@ -2323,10 +2331,13 @@ def batch_norm(input,
mean_out
=
mean
mean_out
=
mean
# variance and variance out share the same memory
# variance and variance out share the same memory
variance_out
=
variance
variance_out
=
variance
saved_mean
=
helper
.
create_tmp_variable
(
dtype
=
dtype
,
stop_gradient
=
True
)
saved_mean
=
helper
.
create_variable_for_type_inference
(
saved_variance
=
helper
.
create_tmp_variable
(
dtype
=
dtype
,
stop_gradient
=
True
)
dtype
=
dtype
,
stop_gradient
=
True
)
saved_variance
=
helper
.
create_variable_for_type_inference
(
dtype
=
dtype
,
stop_gradient
=
True
)
batch_norm_out
=
input
if
in_place
else
helper
.
create_tmp_variable
(
dtype
)
batch_norm_out
=
input
if
in_place
else
helper
.
create_variable_for_type_inference
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"batch_norm"
,
type
=
"batch_norm"
,
...
@@ -2443,9 +2454,11 @@ def layer_norm(input,
...
@@ -2443,9 +2454,11 @@ def layer_norm(input,
inputs
[
'Bias'
]
=
bias
inputs
[
'Bias'
]
=
bias
# create output
# create output
mean_out
=
helper
.
create_tmp_variable
(
dtype
=
dtype
,
stop_gradient
=
True
)
mean_out
=
helper
.
create_variable_for_type_inference
(
variance_out
=
helper
.
create_tmp_variable
(
dtype
=
dtype
,
stop_gradient
=
True
)
dtype
=
dtype
,
stop_gradient
=
True
)
layer_norm_out
=
helper
.
create_tmp_variable
(
dtype
)
variance_out
=
helper
.
create_variable_for_type_inference
(
dtype
=
dtype
,
stop_gradient
=
True
)
layer_norm_out
=
helper
.
create_variable_for_type_inference
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"layer_norm"
,
type
=
"layer_norm"
,
...
@@ -2632,7 +2645,7 @@ def conv2d_transpose(input,
...
@@ -2632,7 +2645,7 @@ def conv2d_transpose(input,
img_filter
=
helper
.
create_parameter
(
img_filter
=
helper
.
create_parameter
(
dtype
=
input
.
dtype
,
shape
=
filter_shape
,
attr
=
helper
.
param_attr
)
dtype
=
input
.
dtype
,
shape
=
filter_shape
,
attr
=
helper
.
param_attr
)
pre_bias
=
helper
.
create_
tmp_variabl
e
(
dtype
=
input
.
dtype
)
pre_bias
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
input
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
op_type
,
type
=
op_type
,
inputs
=
{
'Input'
:
[
input
],
inputs
=
{
'Input'
:
[
input
],
...
@@ -2810,7 +2823,7 @@ def conv3d_transpose(input,
...
@@ -2810,7 +2823,7 @@ def conv3d_transpose(input,
img_filter
=
helper
.
create_parameter
(
img_filter
=
helper
.
create_parameter
(
dtype
=
input
.
dtype
,
shape
=
filter_shape
,
attr
=
helper
.
param_attr
)
dtype
=
input
.
dtype
,
shape
=
filter_shape
,
attr
=
helper
.
param_attr
)
pre_bias
=
helper
.
create_
tmp_variabl
e
(
dtype
=
input
.
dtype
)
pre_bias
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
input
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
l_type
,
type
=
l_type
,
inputs
=
{
'Input'
:
[
input
],
inputs
=
{
'Input'
:
[
input
],
...
@@ -2889,7 +2902,7 @@ def sequence_expand(x, y, ref_level=-1, name=None):
...
@@ -2889,7 +2902,7 @@ def sequence_expand(x, y, ref_level=-1, name=None):
"""
"""
helper
=
LayerHelper
(
'sequence_expand'
,
input
=
x
,
**
locals
())
helper
=
LayerHelper
(
'sequence_expand'
,
input
=
x
,
**
locals
())
dtype
=
helper
.
input_dtype
()
dtype
=
helper
.
input_dtype
()
tmp
=
helper
.
create_
tmp_variabl
e
(
dtype
)
tmp
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'sequence_expand'
,
type
=
'sequence_expand'
,
inputs
=
{
'X'
:
x
,
inputs
=
{
'X'
:
x
,
...
@@ -2955,7 +2968,7 @@ def sequence_expand_as(x, y, name=None):
...
@@ -2955,7 +2968,7 @@ def sequence_expand_as(x, y, name=None):
"""
"""
helper
=
LayerHelper
(
'sequence_expand_as'
,
input
=
x
,
**
locals
())
helper
=
LayerHelper
(
'sequence_expand_as'
,
input
=
x
,
**
locals
())
dtype
=
helper
.
input_dtype
()
dtype
=
helper
.
input_dtype
()
tmp
=
helper
.
create_
tmp_variabl
e
(
dtype
)
tmp
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'sequence_expand_as'
,
type
=
'sequence_expand_as'
,
inputs
=
{
'X'
:
x
,
inputs
=
{
'X'
:
x
,
...
@@ -3000,8 +3013,8 @@ def sequence_pad(x, pad_value, maxlen=None, name=None):
...
@@ -3000,8 +3013,8 @@ def sequence_pad(x, pad_value, maxlen=None, name=None):
helper
=
LayerHelper
(
'sequence_pad'
,
input
=
x
,
**
locals
())
helper
=
LayerHelper
(
'sequence_pad'
,
input
=
x
,
**
locals
())
dtype
=
helper
.
input_dtype
()
dtype
=
helper
.
input_dtype
()
out
=
helper
.
create_
tmp_variabl
e
(
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
length
=
helper
.
create_
tmp_variabl
e
(
dtype
)
length
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
pad_value
.
stop_gradient
=
True
pad_value
.
stop_gradient
=
True
length
.
stop_gradient
=
True
length
.
stop_gradient
=
True
...
@@ -3066,7 +3079,7 @@ def sequence_unpad(x, length, name=None):
...
@@ -3066,7 +3079,7 @@ def sequence_unpad(x, length, name=None):
helper
=
LayerHelper
(
'sequence_unpad'
,
input
=
x
,
**
locals
())
helper
=
LayerHelper
(
'sequence_unpad'
,
input
=
x
,
**
locals
())
dtype
=
helper
.
input_dtype
()
dtype
=
helper
.
input_dtype
()
out
=
helper
.
create_
tmp_variabl
e
(
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
length
.
stop_gradient
=
True
length
.
stop_gradient
=
True
...
@@ -3165,8 +3178,9 @@ def beam_search(pre_ids,
...
@@ -3165,8 +3178,9 @@ def beam_search(pre_ids,
score_type
=
scores
.
dtype
score_type
=
scores
.
dtype
id_type
=
ids
.
dtype
id_type
=
ids
.
dtype
selected_scores
=
helper
.
create_tmp_variable
(
dtype
=
score_type
)
selected_scores
=
helper
.
create_variable_for_type_inference
(
selected_ids
=
helper
.
create_tmp_variable
(
dtype
=
id_type
)
dtype
=
score_type
)
selected_ids
=
helper
.
create_variable_for_type_inference
(
dtype
=
id_type
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'beam_search'
,
type
=
'beam_search'
,
...
@@ -3223,8 +3237,8 @@ def beam_search_decode(ids, scores, beam_size, end_id, name=None):
...
@@ -3223,8 +3237,8 @@ def beam_search_decode(ids, scores, beam_size, end_id, name=None):
ids, scores, beam_size=5, end_id=0)
ids, scores, beam_size=5, end_id=0)
"""
"""
helper
=
LayerHelper
(
'beam_search_decode'
,
**
locals
())
helper
=
LayerHelper
(
'beam_search_decode'
,
**
locals
())
sentence_ids
=
helper
.
create_
tmp_variabl
e
(
dtype
=
ids
.
dtype
)
sentence_ids
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
ids
.
dtype
)
sentence_scores
=
helper
.
create_
tmp_variabl
e
(
dtype
=
ids
.
dtype
)
sentence_scores
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
ids
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"beam_search_decode"
,
type
=
"beam_search_decode"
,
...
@@ -3354,8 +3368,8 @@ def lstm_unit(x_t,
...
@@ -3354,8 +3368,8 @@ def lstm_unit(x_t,
param_attr
=
param_attr
,
param_attr
=
param_attr
,
bias_attr
=
bias_attr
)
bias_attr
=
bias_attr
)
dtype
=
x_t
.
dtype
dtype
=
x_t
.
dtype
c
=
helper
.
create_
tmp_variabl
e
(
dtype
)
c
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
h
=
helper
.
create_
tmp_variabl
e
(
dtype
)
h
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'lstm_unit'
,
type
=
'lstm_unit'
,
...
@@ -3409,7 +3423,7 @@ def reduce_sum(input, dim=None, keep_dim=False, name=None):
...
@@ -3409,7 +3423,7 @@ def reduce_sum(input, dim=None, keep_dim=False, name=None):
"""
"""
helper
=
LayerHelper
(
'reduce_sum'
,
**
locals
())
helper
=
LayerHelper
(
'reduce_sum'
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
helper
.
input_dtype
())
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
helper
.
input_dtype
())
if
dim
is
not
None
and
not
isinstance
(
dim
,
list
):
if
dim
is
not
None
and
not
isinstance
(
dim
,
list
):
dim
=
[
dim
]
dim
=
[
dim
]
helper
.
append_op
(
helper
.
append_op
(
...
@@ -3466,7 +3480,7 @@ def reduce_mean(input, dim=None, keep_dim=False, name=None):
...
@@ -3466,7 +3480,7 @@ def reduce_mean(input, dim=None, keep_dim=False, name=None):
fluid.layers.reduce_mean(x, dim=[0, 1]) # [4.0, 5.0]
fluid.layers.reduce_mean(x, dim=[0, 1]) # [4.0, 5.0]
"""
"""
helper
=
LayerHelper
(
'reduce_mean'
,
**
locals
())
helper
=
LayerHelper
(
'reduce_mean'
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
helper
.
input_dtype
())
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
helper
.
input_dtype
())
if
dim
is
not
None
and
not
isinstance
(
dim
,
list
):
if
dim
is
not
None
and
not
isinstance
(
dim
,
list
):
dim
=
[
dim
]
dim
=
[
dim
]
helper
.
append_op
(
helper
.
append_op
(
...
@@ -3521,7 +3535,7 @@ def reduce_max(input, dim=None, keep_dim=False, name=None):
...
@@ -3521,7 +3535,7 @@ def reduce_max(input, dim=None, keep_dim=False, name=None):
fluid.layers.reduce_max(x, dim=[0, 1]) # [7.0, 8.0]
fluid.layers.reduce_max(x, dim=[0, 1]) # [7.0, 8.0]
"""
"""
helper
=
LayerHelper
(
'reduce_max'
,
**
locals
())
helper
=
LayerHelper
(
'reduce_max'
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
helper
.
input_dtype
())
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
helper
.
input_dtype
())
if
dim
is
not
None
and
not
isinstance
(
dim
,
list
):
if
dim
is
not
None
and
not
isinstance
(
dim
,
list
):
dim
=
[
dim
]
dim
=
[
dim
]
helper
.
append_op
(
helper
.
append_op
(
...
@@ -3576,7 +3590,7 @@ def reduce_min(input, dim=None, keep_dim=False, name=None):
...
@@ -3576,7 +3590,7 @@ def reduce_min(input, dim=None, keep_dim=False, name=None):
fluid.layers.reduce_min(x, dim=[0, 1]) # [1.0, 2.0]
fluid.layers.reduce_min(x, dim=[0, 1]) # [1.0, 2.0]
"""
"""
helper
=
LayerHelper
(
'reduce_min'
,
**
locals
())
helper
=
LayerHelper
(
'reduce_min'
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
helper
.
input_dtype
())
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
helper
.
input_dtype
())
if
dim
is
not
None
and
not
isinstance
(
dim
,
list
):
if
dim
is
not
None
and
not
isinstance
(
dim
,
list
):
dim
=
[
dim
]
dim
=
[
dim
]
helper
.
append_op
(
helper
.
append_op
(
...
@@ -3632,7 +3646,7 @@ def reduce_prod(input, dim=None, keep_dim=False, name=None):
...
@@ -3632,7 +3646,7 @@ def reduce_prod(input, dim=None, keep_dim=False, name=None):
fluid.layers.reduce_prod(x, dim=[0, 1]) # [105.0, 384.0]
fluid.layers.reduce_prod(x, dim=[0, 1]) # [105.0, 384.0]
"""
"""
helper
=
LayerHelper
(
'reduce_prod'
,
**
locals
())
helper
=
LayerHelper
(
'reduce_prod'
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
helper
.
input_dtype
())
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
helper
.
input_dtype
())
if
dim
is
not
None
and
not
isinstance
(
dim
,
list
):
if
dim
is
not
None
and
not
isinstance
(
dim
,
list
):
dim
=
[
dim
]
dim
=
[
dim
]
helper
.
append_op
(
helper
.
append_op
(
...
@@ -3692,7 +3706,7 @@ def split(input, num_or_sections, dim=-1, name=None):
...
@@ -3692,7 +3706,7 @@ def split(input, num_or_sections, dim=-1, name=None):
dim
],
'len(num_or_sections) must not be more than input.shape[dim].'
dim
],
'len(num_or_sections) must not be more than input.shape[dim].'
num
=
len
(
num_or_sections
)
num
=
len
(
num_or_sections
)
outs
=
[
outs
=
[
helper
.
create_
tmp_variabl
e
(
dtype
=
helper
.
input_dtype
())
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
helper
.
input_dtype
())
for
i
in
range
(
num
)
for
i
in
range
(
num
)
]
]
helper
.
append_op
(
helper
.
append_op
(
...
@@ -3749,8 +3763,8 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None):
...
@@ -3749,8 +3763,8 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None):
axis
=
0
axis
=
0
helper
=
LayerHelper
(
"l2_normalize"
,
**
locals
())
helper
=
LayerHelper
(
"l2_normalize"
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
norm
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
norm
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"norm"
,
type
=
"norm"
,
inputs
=
{
"X"
:
x
},
inputs
=
{
"X"
:
x
},
...
@@ -3859,7 +3873,7 @@ def matmul(x, y, transpose_x=False, transpose_y=False, alpha=1.0, name=None):
...
@@ -3859,7 +3873,7 @@ def matmul(x, y, transpose_x=False, transpose_y=False, alpha=1.0, name=None):
__check_input
(
x
,
y
)
__check_input
(
x
,
y
)
helper
=
LayerHelper
(
'matmul'
,
**
locals
())
helper
=
LayerHelper
(
'matmul'
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'matmul'
,
type
=
'matmul'
,
inputs
=
{
'X'
:
x
,
inputs
=
{
'X'
:
x
,
...
@@ -3930,8 +3944,8 @@ def topk(input, k, name=None):
...
@@ -3930,8 +3944,8 @@ def topk(input, k, name=None):
top5_values, top5_indices = layers.topk(input, k=5)
top5_values, top5_indices = layers.topk(input, k=5)
"""
"""
helper
=
LayerHelper
(
"top_k"
,
**
locals
())
helper
=
LayerHelper
(
"top_k"
,
**
locals
())
values
=
helper
.
create_
tmp_variabl
e
(
dtype
=
input
.
dtype
)
values
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
input
.
dtype
)
indices
=
helper
.
create_
tmp_variabl
e
(
dtype
=
"int64"
)
indices
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
"int64"
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"top_k"
,
type
=
"top_k"
,
inputs
=
{
"X"
:
[
input
]},
inputs
=
{
"X"
:
[
input
]},
...
@@ -3989,8 +4003,8 @@ def edit_distance(input, label, normalized=True, ignored_tokens=None):
...
@@ -3989,8 +4003,8 @@ def edit_distance(input, label, normalized=True, ignored_tokens=None):
# remove some tokens from input and labels
# remove some tokens from input and labels
if
ignored_tokens
is
not
None
and
len
(
ignored_tokens
)
>
0
:
if
ignored_tokens
is
not
None
and
len
(
ignored_tokens
)
>
0
:
erased_input
=
helper
.
create_
tmp_variabl
e
(
dtype
=
"int64"
)
erased_input
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
"int64"
)
erased_label
=
helper
.
create_
tmp_variabl
e
(
dtype
=
"int64"
)
erased_label
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
"int64"
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"sequence_erase"
,
type
=
"sequence_erase"
,
...
@@ -4007,8 +4021,8 @@ def edit_distance(input, label, normalized=True, ignored_tokens=None):
...
@@ -4007,8 +4021,8 @@ def edit_distance(input, label, normalized=True, ignored_tokens=None):
label
=
erased_label
label
=
erased_label
# edit distance op
# edit distance op
edit_distance_out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
"int64"
)
edit_distance_out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
"int64"
)
sequence_num
=
helper
.
create_
tmp_variabl
e
(
dtype
=
"int64"
)
sequence_num
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
"int64"
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"edit_distance"
,
type
=
"edit_distance"
,
inputs
=
{
"Hyps"
:
[
input
],
inputs
=
{
"Hyps"
:
[
input
],
...
@@ -4083,7 +4097,7 @@ def ctc_greedy_decoder(input, blank, name=None):
...
@@ -4083,7 +4097,7 @@ def ctc_greedy_decoder(input, blank, name=None):
_
,
topk_indices
=
topk
(
input
,
k
=
1
)
_
,
topk_indices
=
topk
(
input
,
k
=
1
)
# ctc align op
# ctc align op
ctc_out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
"int64"
)
ctc_out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
"int64"
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"ctc_align"
,
type
=
"ctc_align"
,
inputs
=
{
"Input"
:
[
topk_indices
]},
inputs
=
{
"Input"
:
[
topk_indices
]},
...
@@ -4133,8 +4147,8 @@ def warpctc(input, label, blank=0, norm_by_times=False):
...
@@ -4133,8 +4147,8 @@ def warpctc(input, label, blank=0, norm_by_times=False):
"""
"""
helper
=
LayerHelper
(
'warpctc'
,
**
locals
())
helper
=
LayerHelper
(
'warpctc'
,
**
locals
())
loss_out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
input
.
dtype
)
loss_out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
input
.
dtype
)
grad_out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
input
.
dtype
)
grad_out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
input
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'warpctc'
,
type
=
'warpctc'
,
inputs
=
{
'Logits'
:
[
input
],
inputs
=
{
'Logits'
:
[
input
],
...
@@ -4195,7 +4209,7 @@ def sequence_reshape(input, new_dim):
...
@@ -4195,7 +4209,7 @@ def sequence_reshape(input, new_dim):
x_reshaped = fluid.layers.sequence_reshape(input=x, new_dim=10)
x_reshaped = fluid.layers.sequence_reshape(input=x, new_dim=10)
"""
"""
helper
=
LayerHelper
(
'sequence_reshape'
,
**
locals
())
helper
=
LayerHelper
(
'sequence_reshape'
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
helper
.
input_dtype
())
out
=
helper
.
create_
variable_for_type_inferenc
e
(
helper
.
input_dtype
())
helper
.
append_op
(
helper
.
append_op
(
type
=
'sequence_reshape'
,
type
=
'sequence_reshape'
,
inputs
=
{
'X'
:
[
input
]},
inputs
=
{
'X'
:
[
input
]},
...
@@ -4292,9 +4306,9 @@ def nce(input,
...
@@ -4292,9 +4306,9 @@ def nce(input,
is_bias
=
True
,
is_bias
=
True
,
dtype
=
input
.
dtype
)
dtype
=
input
.
dtype
)
inputs
[
'Bias'
]
=
b
inputs
[
'Bias'
]
=
b
cost
=
helper
.
create_
tmp_variabl
e
(
dtype
=
input
.
dtype
)
cost
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
input
.
dtype
)
sample_logits
=
helper
.
create_
tmp_variabl
e
(
dtype
=
input
.
dtype
)
sample_logits
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
input
.
dtype
)
sample_labels
=
helper
.
create_
tmp_variabl
e
(
dtype
=
label
.
dtype
)
sample_labels
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
label
.
dtype
)
if
num_neg_samples
is
None
:
if
num_neg_samples
is
None
:
num_neg_samples
=
10
num_neg_samples
=
10
...
@@ -4370,8 +4384,8 @@ def hsigmoid(input,
...
@@ -4370,8 +4384,8 @@ def hsigmoid(input,
helper
=
LayerHelper
(
'hierarchical_sigmoid'
,
**
locals
())
helper
=
LayerHelper
(
'hierarchical_sigmoid'
,
**
locals
())
dtype
=
helper
.
input_dtype
()
dtype
=
helper
.
input_dtype
()
out
=
helper
.
create_
tmp_variabl
e
(
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
pre_out
=
helper
.
create_
tmp_variabl
e
(
dtype
)
pre_out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
dim
=
input
.
shape
[
1
]
dim
=
input
.
shape
[
1
]
if
num_classes
<
2
:
if
num_classes
<
2
:
raise
ValueError
(
"num_classes must not be less than 2."
)
raise
ValueError
(
"num_classes must not be less than 2."
)
...
@@ -4431,8 +4445,8 @@ def transpose(x, perm, name=None):
...
@@ -4431,8 +4445,8 @@ def transpose(x, perm, name=None):
(
idx
,
perm
[
idx
],
len
(
x
.
shape
)))
(
idx
,
perm
[
idx
],
len
(
x
.
shape
)))
helper
=
LayerHelper
(
'transpose'
,
**
locals
())
helper
=
LayerHelper
(
'transpose'
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
x
.
dtype
)
x_shape
=
helper
.
create_
tmp_variabl
e
(
x
.
dtype
)
x_shape
=
helper
.
create_
variable_for_type_inferenc
e
(
x
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'transpose2'
,
type
=
'transpose2'
,
inputs
=
{
'X'
:
[
x
]},
inputs
=
{
'X'
:
[
x
]},
...
@@ -4574,7 +4588,7 @@ def im2sequence(input,
...
@@ -4574,7 +4588,7 @@ def im2sequence(input,
inputs
[
"Y"
]
=
input_image_size
inputs
[
"Y"
]
=
input_image_size
attrs
[
"out_stride"
]
=
out_stride
attrs
[
"out_stride"
]
=
out_stride
helper
=
LayerHelper
(
'im2sequence'
,
**
locals
())
helper
=
LayerHelper
(
'im2sequence'
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
helper
.
input_dtype
())
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
helper
.
input_dtype
())
helper
.
append_op
(
helper
.
append_op
(
type
=
'im2sequence'
,
inputs
=
inputs
,
outputs
=
{
'Out'
:
out
},
attrs
=
attrs
)
type
=
'im2sequence'
,
inputs
=
inputs
,
outputs
=
{
'Out'
:
out
},
attrs
=
attrs
)
return
out
return
out
...
@@ -4607,7 +4621,7 @@ def row_conv(input, future_context_size, param_attr=None, act=None):
...
@@ -4607,7 +4621,7 @@ def row_conv(input, future_context_size, param_attr=None, act=None):
filter_shape
=
[
future_context_size
+
1
,
input
.
shape
[
1
]]
filter_shape
=
[
future_context_size
+
1
,
input
.
shape
[
1
]]
filter_param
=
helper
.
create_parameter
(
filter_param
=
helper
.
create_parameter
(
attr
=
helper
.
param_attr
,
shape
=
filter_shape
,
dtype
=
dtype
)
attr
=
helper
.
param_attr
,
shape
=
filter_shape
,
dtype
=
dtype
)
out
=
helper
.
create_
tmp_variabl
e
(
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'row_conv'
,
type
=
'row_conv'
,
inputs
=
{
'X'
:
[
input
],
inputs
=
{
'X'
:
[
input
],
...
@@ -4640,7 +4654,7 @@ def multiplex(inputs, index):
...
@@ -4640,7 +4654,7 @@ def multiplex(inputs, index):
raise
ValueError
(
"inputs should be a list object and contains at least "
raise
ValueError
(
"inputs should be a list object and contains at least "
"2 elements."
)
"2 elements."
)
out
=
helper
.
create_
tmp_variabl
e
(
inputs
[
0
].
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
inputs
[
0
].
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'multiplex'
,
type
=
'multiplex'
,
inputs
=
{
'X'
:
inputs
,
inputs
=
{
'X'
:
inputs
,
...
@@ -4711,8 +4725,8 @@ def softmax_with_cross_entropy(logits,
...
@@ -4711,8 +4725,8 @@ def softmax_with_cross_entropy(logits,
logits=fc, label=label)
logits=fc, label=label)
"""
"""
helper
=
LayerHelper
(
'softmax_with_cross_entropy'
,
**
locals
())
helper
=
LayerHelper
(
'softmax_with_cross_entropy'
,
**
locals
())
softmax
=
helper
.
create_
tmp_variabl
e
(
dtype
=
logits
.
dtype
)
softmax
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
logits
.
dtype
)
loss
=
helper
.
create_
tmp_variabl
e
(
dtype
=
logits
.
dtype
)
loss
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
logits
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'softmax_with_cross_entropy'
,
type
=
'softmax_with_cross_entropy'
,
inputs
=
{
'Logits'
:
logits
,
inputs
=
{
'Logits'
:
logits
,
...
@@ -4762,8 +4776,8 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None):
...
@@ -4762,8 +4776,8 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None):
"""
"""
helper
=
LayerHelper
(
'smooth_l1_loss'
,
**
locals
())
helper
=
LayerHelper
(
'smooth_l1_loss'
,
**
locals
())
diff
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
diff
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
loss
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
loss
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'smooth_l1_loss'
,
type
=
'smooth_l1_loss'
,
inputs
=
{
inputs
=
{
...
@@ -4796,7 +4810,7 @@ def one_hot(input, depth):
...
@@ -4796,7 +4810,7 @@ def one_hot(input, depth):
one_hot_label = layers.one_hot(input=label, depth=10)
one_hot_label = layers.one_hot(input=label, depth=10)
"""
"""
helper
=
LayerHelper
(
"one_hot"
,
**
locals
())
helper
=
LayerHelper
(
"one_hot"
,
**
locals
())
one_hot_out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
'float32'
)
one_hot_out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
'float32'
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"one_hot"
,
type
=
"one_hot"
,
inputs
=
{
'X'
:
input
},
inputs
=
{
'X'
:
input
},
...
@@ -4938,8 +4952,8 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None):
...
@@ -4938,8 +4952,8 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None):
"except one unknown dimension."
)
"except one unknown dimension."
)
helper
=
LayerHelper
(
"reshape2"
,
**
locals
())
helper
=
LayerHelper
(
"reshape2"
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
x_shape
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
x_shape
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"reshape2"
,
type
=
"reshape2"
,
inputs
=
inputs
,
inputs
=
inputs
,
...
@@ -4988,8 +5002,8 @@ def squeeze(input, axes, name=None):
...
@@ -4988,8 +5002,8 @@ def squeeze(input, axes, name=None):
y = layers.sequeeze(input=x, axes=[1])
y = layers.sequeeze(input=x, axes=[1])
"""
"""
helper
=
LayerHelper
(
"squeeze"
,
**
locals
())
helper
=
LayerHelper
(
"squeeze"
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
input
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
input
.
dtype
)
x_shape
=
helper
.
create_
tmp_variabl
e
(
dtype
=
input
.
dtype
)
x_shape
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
input
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"squeeze2"
,
type
=
"squeeze2"
,
inputs
=
{
"X"
:
input
},
inputs
=
{
"X"
:
input
},
...
@@ -5025,8 +5039,8 @@ def unsqueeze(input, axes, name=None):
...
@@ -5025,8 +5039,8 @@ def unsqueeze(input, axes, name=None):
y = layers.unsequeeze(input=x, axes=[1])
y = layers.unsequeeze(input=x, axes=[1])
"""
"""
helper
=
LayerHelper
(
"unsqueeze"
,
**
locals
())
helper
=
LayerHelper
(
"unsqueeze"
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
input
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
input
.
dtype
)
x_shape
=
helper
.
create_
tmp_variabl
e
(
dtype
=
input
.
dtype
)
x_shape
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
input
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"unsqueeze2"
,
type
=
"unsqueeze2"
,
inputs
=
{
"X"
:
input
},
inputs
=
{
"X"
:
input
},
...
@@ -5116,7 +5130,7 @@ def lod_reset(x, y=None, target_lod=None):
...
@@ -5116,7 +5130,7 @@ def lod_reset(x, y=None, target_lod=None):
out = layers.lod_reset(x=x, y=y)
out = layers.lod_reset(x=x, y=y)
"""
"""
helper
=
LayerHelper
(
"lod_reset"
,
**
locals
())
helper
=
LayerHelper
(
"lod_reset"
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
if
y
is
not
None
:
if
y
is
not
None
:
helper
.
append_op
(
helper
.
append_op
(
type
=
"lod_reset"
,
inputs
=
{
'X'
:
x
,
type
=
"lod_reset"
,
inputs
=
{
'X'
:
x
,
...
@@ -5185,8 +5199,9 @@ def lrn(input, n=5, k=1.0, alpha=1e-4, beta=0.75, name=None):
...
@@ -5185,8 +5199,9 @@ def lrn(input, n=5, k=1.0, alpha=1e-4, beta=0.75, name=None):
"dims of input must be 4(not %d), and it's order must be NCHW"
%
"dims of input must be 4(not %d), and it's order must be NCHW"
%
(
dims
))
(
dims
))
mid_out
=
helper
.
create_tmp_variable
(
dtype
=
dtype
,
stop_gradient
=
True
)
mid_out
=
helper
.
create_variable_for_type_inference
(
lrn_out
=
helper
.
create_tmp_variable
(
dtype
)
dtype
=
dtype
,
stop_gradient
=
True
)
lrn_out
=
helper
.
create_variable_for_type_inference
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"lrn"
,
type
=
"lrn"
,
inputs
=
{
"X"
:
input
},
inputs
=
{
"X"
:
input
},
...
@@ -5251,7 +5266,7 @@ def pad(x, paddings, pad_value=0., name=None):
...
@@ -5251,7 +5266,7 @@ def pad(x, paddings, pad_value=0., name=None):
"""
"""
helper
=
LayerHelper
(
'pad'
,
input
=
x
,
**
locals
())
helper
=
LayerHelper
(
'pad'
,
input
=
x
,
**
locals
())
dtype
=
helper
.
input_dtype
()
dtype
=
helper
.
input_dtype
()
out
=
helper
.
create_
tmp_variabl
e
(
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'pad'
,
type
=
'pad'
,
inputs
=
{
'X'
:
x
},
inputs
=
{
'X'
:
x
},
...
@@ -5331,7 +5346,7 @@ def pad_constant_like(x, y, pad_value=0., name=None):
...
@@ -5331,7 +5346,7 @@ def pad_constant_like(x, y, pad_value=0., name=None):
"""
"""
helper
=
LayerHelper
(
'pad_constant_like'
,
input
=
x
,
**
locals
())
helper
=
LayerHelper
(
'pad_constant_like'
,
input
=
x
,
**
locals
())
dtype
=
helper
.
input_dtype
()
dtype
=
helper
.
input_dtype
()
out
=
helper
.
create_
tmp_variabl
e
(
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'pad_constant_like'
,
type
=
'pad_constant_like'
,
inputs
=
{
'X'
:
x
,
inputs
=
{
'X'
:
x
,
...
@@ -5396,7 +5411,7 @@ def label_smooth(label,
...
@@ -5396,7 +5411,7 @@ def label_smooth(label,
raise
ValueError
(
"The value of epsilon must be between 0 and 1."
)
raise
ValueError
(
"The value of epsilon must be between 0 and 1."
)
helper
=
LayerHelper
(
"label_smooth"
,
**
locals
())
helper
=
LayerHelper
(
"label_smooth"
,
**
locals
())
label
.
stop_gradient
=
True
label
.
stop_gradient
=
True
smooth_label
=
helper
.
create_
tmp_variabl
e
(
dtype
)
smooth_label
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"label_smooth"
,
type
=
"label_smooth"
,
inputs
=
{
"X"
:
label
,
inputs
=
{
"X"
:
label
,
...
@@ -5428,8 +5443,8 @@ def roi_pool(input, rois, pooled_height=1, pooled_width=1, spatial_scale=1.0):
...
@@ -5428,8 +5443,8 @@ def roi_pool(input, rois, pooled_height=1, pooled_width=1, spatial_scale=1.0):
"""
"""
helper
=
LayerHelper
(
'roi_pool'
,
**
locals
())
helper
=
LayerHelper
(
'roi_pool'
,
**
locals
())
dtype
=
helper
.
input_dtype
()
dtype
=
helper
.
input_dtype
()
pool_out
=
helper
.
create_
tmp_variabl
e
(
dtype
)
pool_out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
argmaxes
=
helper
.
create_
tmp_variabl
e
(
dtype
=
'int32'
)
argmaxes
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
'int32'
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"roi_pool"
,
type
=
"roi_pool"
,
inputs
=
{
"X"
:
input
,
inputs
=
{
"X"
:
input
,
...
@@ -5444,6 +5459,54 @@ def roi_pool(input, rois, pooled_height=1, pooled_width=1, spatial_scale=1.0):
...
@@ -5444,6 +5459,54 @@ def roi_pool(input, rois, pooled_height=1, pooled_width=1, spatial_scale=1.0):
return
pool_out
return
pool_out
@
templatedoc
()
def
roi_align
(
input
,
rois
,
pooled_height
=
1
,
pooled_width
=
1
,
spatial_scale
=
1.0
,
sampling_ratio
=-
1
,
name
=
None
):
"""
${comment}
Args:
input (Variable): ${x_comment}
rois (Variable): ROIs (Regions of Interest) to pool over.
pooled_height (integer): ${pooled_height_comment} Default: 1
pooled_width (integer): ${pooled_width_comment} Default: 1
spatial_scale (float): ${spatial_scale_comment} Default: 1.0
sampling_ratio(intger): ${sampling_ratio_comment} Default: -1
Returns:
Variable: ${out_comment}.
Examples:
.. code-block:: python
align_out = fluid.layers.roi_align(input=x,
rois=rois,
pooled_height=7,
pooled_width=7,
spatial_scale=0.5,
sampling_ratio=-1)
"""
helper
=
LayerHelper
(
'roi_align'
,
**
locals
())
dtype
=
helper
.
input_dtype
()
align_out
=
helper
.
create_variable_for_type_inference
(
dtype
)
helper
.
append_op
(
type
=
"roi_align"
,
inputs
=
{
"X"
:
input
,
"ROIs"
:
rois
},
outputs
=
{
"Out"
:
align_out
},
attrs
=
{
"pooled_height"
:
pooled_height
,
"pooled_width"
:
pooled_width
,
"spatial_scale"
:
spatial_scale
,
"sampling_ratio"
:
sampling_ratio
})
return
align_out
def
dice_loss
(
input
,
label
,
epsilon
=
0.00001
):
def
dice_loss
(
input
,
label
,
epsilon
=
0.00001
):
"""
"""
Dice loss for comparing the similarity of two batch of data,
Dice loss for comparing the similarity of two batch of data,
...
@@ -5554,7 +5617,7 @@ def image_resize(input,
...
@@ -5554,7 +5617,7 @@ def image_resize(input,
out_h
=
int
(
input
.
shape
[
2
]
*
scale
)
out_h
=
int
(
input
.
shape
[
2
]
*
scale
)
out_w
=
int
(
input
.
shape
[
3
]
*
scale
)
out_w
=
int
(
input
.
shape
[
3
]
*
scale
)
out
=
helper
.
create_
tmp_variabl
e
(
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
resample_methods
[
resample
],
type
=
resample_methods
[
resample
],
inputs
=
inputs
,
inputs
=
inputs
,
...
@@ -5663,7 +5726,7 @@ def gather(input, index):
...
@@ -5663,7 +5726,7 @@ def gather(input, index):
"""
"""
helper
=
LayerHelper
(
'gather'
,
**
locals
())
helper
=
LayerHelper
(
'gather'
,
**
locals
())
dtype
=
helper
.
input_dtype
()
dtype
=
helper
.
input_dtype
()
out
=
helper
.
create_
tmp_variabl
e
(
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"gather"
,
type
=
"gather"
,
inputs
=
{
"X"
:
input
,
inputs
=
{
"X"
:
input
,
...
@@ -5703,7 +5766,7 @@ def scatter(input, index, updates, name=None):
...
@@ -5703,7 +5766,7 @@ def scatter(input, index, updates, name=None):
"""
"""
helper
=
LayerHelper
(
'scatter'
,
**
locals
())
helper
=
LayerHelper
(
'scatter'
,
**
locals
())
dtype
=
helper
.
input_dtype
()
dtype
=
helper
.
input_dtype
()
out
=
helper
.
create_
tmp_variabl
e
(
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"scatter"
,
type
=
"scatter"
,
inputs
=
{
"X"
:
input
,
inputs
=
{
"X"
:
input
,
...
@@ -5763,7 +5826,7 @@ def sequence_scatter(input, index, updates, name=None):
...
@@ -5763,7 +5826,7 @@ def sequence_scatter(input, index, updates, name=None):
"""
"""
helper
=
LayerHelper
(
'sequence_scatter'
,
**
locals
())
helper
=
LayerHelper
(
'sequence_scatter'
,
**
locals
())
dtype
=
helper
.
input_dtype
()
dtype
=
helper
.
input_dtype
()
out
=
helper
.
create_
tmp_variabl
e
(
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"sequence_scatter"
,
type
=
"sequence_scatter"
,
inputs
=
{
"X"
:
input
,
inputs
=
{
"X"
:
input
,
...
@@ -5793,7 +5856,7 @@ def random_crop(x, shape, seed=None):
...
@@ -5793,7 +5856,7 @@ def random_crop(x, shape, seed=None):
"""
"""
helper
=
LayerHelper
(
"random_crop"
,
**
locals
())
helper
=
LayerHelper
(
"random_crop"
,
**
locals
())
dtype
=
x
.
dtype
dtype
=
x
.
dtype
out
=
helper
.
create_
tmp_variabl
e
(
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
if
seed
is
None
:
if
seed
is
None
:
seed
=
np
.
random
.
randint
(
-
65536
,
65536
)
seed
=
np
.
random
.
randint
(
-
65536
,
65536
)
op_attrs
=
{
"shape"
:
shape
}
op_attrs
=
{
"shape"
:
shape
}
...
@@ -5839,7 +5902,7 @@ def log(x, name=None):
...
@@ -5839,7 +5902,7 @@ def log(x, name=None):
"""
"""
helper
=
LayerHelper
(
'log'
,
**
locals
())
helper
=
LayerHelper
(
'log'
,
**
locals
())
dtype
=
helper
.
input_dtype
(
input_param_name
=
'x'
)
dtype
=
helper
.
input_dtype
(
input_param_name
=
'x'
)
out
=
helper
.
create_
tmp_variabl
e
(
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
helper
.
append_op
(
type
=
"log"
,
inputs
=
{
"X"
:
x
},
outputs
=
{
"Out"
:
out
})
helper
.
append_op
(
type
=
"log"
,
inputs
=
{
"X"
:
x
},
outputs
=
{
"Out"
:
out
})
return
out
return
out
...
@@ -5870,7 +5933,7 @@ def relu(x, name=None):
...
@@ -5870,7 +5933,7 @@ def relu(x, name=None):
"""
"""
helper
=
LayerHelper
(
'relu'
,
**
locals
())
helper
=
LayerHelper
(
'relu'
,
**
locals
())
dtype
=
helper
.
input_dtype
(
input_param_name
=
'x'
)
dtype
=
helper
.
input_dtype
(
input_param_name
=
'x'
)
out
=
helper
.
create_
tmp_variabl
e
(
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
helper
.
append_op
(
type
=
"relu"
,
inputs
=
{
"X"
:
x
},
outputs
=
{
"Out"
:
out
})
helper
.
append_op
(
type
=
"relu"
,
inputs
=
{
"X"
:
x
},
outputs
=
{
"Out"
:
out
})
return
out
return
out
...
@@ -5909,9 +5972,9 @@ def mean_iou(input, label, num_classes):
...
@@ -5909,9 +5972,9 @@ def mean_iou(input, label, num_classes):
"""
"""
helper
=
LayerHelper
(
'mean_iou'
,
**
locals
())
helper
=
LayerHelper
(
'mean_iou'
,
**
locals
())
dtype
=
helper
.
input_dtype
()
dtype
=
helper
.
input_dtype
()
out_mean_iou
=
helper
.
create_
tmp_variabl
e
(
dtype
=
'float32'
)
out_mean_iou
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
'float32'
)
out_wrong
=
helper
.
create_
tmp_variabl
e
(
dtype
=
'int32'
)
out_wrong
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
'int32'
)
out_correct
=
helper
.
create_
tmp_variabl
e
(
dtype
=
'int32'
)
out_correct
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
'int32'
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"mean_iou"
,
type
=
"mean_iou"
,
inputs
=
{
"Predictions"
:
input
,
inputs
=
{
"Predictions"
:
input
,
...
@@ -6003,7 +6066,7 @@ def crop(x, shape=None, offsets=None, name=None):
...
@@ -6003,7 +6066,7 @@ def crop(x, shape=None, offsets=None, name=None):
if
offsets
is
None
:
if
offsets
is
None
:
offsets
=
[
0
]
*
len
(
x
.
shape
)
offsets
=
[
0
]
*
len
(
x
.
shape
)
out
=
helper
.
create_
tmp_variabl
e
(
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
x
.
dtype
)
ipts
=
{
'X'
:
x
}
ipts
=
{
'X'
:
x
}
attrs
=
{}
attrs
=
{}
if
isinstance
(
shape
,
Variable
):
if
isinstance
(
shape
,
Variable
):
...
@@ -6083,7 +6146,7 @@ def rank_loss(label, left, right, name=None):
...
@@ -6083,7 +6146,7 @@ def rank_loss(label, left, right, name=None):
if
not
(
isinstance
(
right
,
Variable
)):
if
not
(
isinstance
(
right
,
Variable
)):
raise
ValueError
(
"The right should be a Variable"
)
raise
ValueError
(
"The right should be a Variable"
)
out
=
helper
.
create_
tmp_variabl
e
(
"float32"
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
"float32"
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'rank_loss'
,
type
=
'rank_loss'
,
...
@@ -6129,8 +6192,8 @@ def margin_rank_loss(label, left, right, margin=0.1, name=None):
...
@@ -6129,8 +6192,8 @@ def margin_rank_loss(label, left, right, margin=0.1, name=None):
raise
ValueError
(
"The left should be a Variable."
)
raise
ValueError
(
"The left should be a Variable."
)
if
not
isinstance
(
right
,
Variable
):
if
not
isinstance
(
right
,
Variable
):
raise
ValueError
(
"The right should be a Variable."
)
raise
ValueError
(
"The right should be a Variable."
)
out
=
helper
.
create_
tmp_variabl
e
(
left
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
left
.
dtype
)
act
=
helper
.
create_
tmp_variabl
e
(
left
.
dtype
)
act
=
helper
.
create_
variable_for_type_inferenc
e
(
left
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'margin_rank_loss'
,
type
=
'margin_rank_loss'
,
inputs
=
{
"Label"
:
label
,
inputs
=
{
"Label"
:
label
,
...
@@ -6215,7 +6278,7 @@ def pad2d(input,
...
@@ -6215,7 +6278,7 @@ def pad2d(input,
helper
=
LayerHelper
(
'pad2d'
,
**
locals
())
helper
=
LayerHelper
(
'pad2d'
,
**
locals
())
dtype
=
helper
.
input_dtype
(
input_param_name
=
'input'
)
dtype
=
helper
.
input_dtype
(
input_param_name
=
'input'
)
out
=
helper
.
create_
tmp_variabl
e
(
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'pad2d'
,
type
=
'pad2d'
,
inputs
=
{
'X'
:
input
},
inputs
=
{
'X'
:
input
},
...
@@ -6244,7 +6307,7 @@ def elu(x, alpha=1.0, name=None):
...
@@ -6244,7 +6307,7 @@ def elu(x, alpha=1.0, name=None):
output(${out_type}): ${out_comment}
output(${out_type}): ${out_comment}
"""
"""
helper
=
LayerHelper
(
'elu'
,
**
locals
())
helper
=
LayerHelper
(
'elu'
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'elu'
,
type
=
'elu'
,
inputs
=
{
'X'
:
x
},
inputs
=
{
'X'
:
x
},
...
@@ -6267,7 +6330,7 @@ def relu6(x, threshold=6.0, name=None):
...
@@ -6267,7 +6330,7 @@ def relu6(x, threshold=6.0, name=None):
output(${out_type}): ${out_comment}
output(${out_type}): ${out_comment}
"""
"""
helper
=
LayerHelper
(
'relu6'
,
**
locals
())
helper
=
LayerHelper
(
'relu6'
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'relu6'
,
type
=
'relu6'
,
inputs
=
{
'X'
:
x
},
inputs
=
{
'X'
:
x
},
...
@@ -6290,7 +6353,7 @@ def pow(x, factor=1.0, name=None):
...
@@ -6290,7 +6353,7 @@ def pow(x, factor=1.0, name=None):
output(${out_type}): ${out_comment}
output(${out_type}): ${out_comment}
"""
"""
helper
=
LayerHelper
(
'pow'
,
**
locals
())
helper
=
LayerHelper
(
'pow'
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'pow'
,
type
=
'pow'
,
inputs
=
{
'X'
:
x
},
inputs
=
{
'X'
:
x
},
...
@@ -6314,7 +6377,7 @@ def stanh(x, scale_a=2.0 / 3.0, scale_b=1.7159, name=None):
...
@@ -6314,7 +6377,7 @@ def stanh(x, scale_a=2.0 / 3.0, scale_b=1.7159, name=None):
output(${out_type}): ${out_comment}
output(${out_type}): ${out_comment}
"""
"""
helper
=
LayerHelper
(
'stanh'
,
**
locals
())
helper
=
LayerHelper
(
'stanh'
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'stanh'
,
type
=
'stanh'
,
inputs
=
{
'X'
:
x
},
inputs
=
{
'X'
:
x
},
...
@@ -6339,7 +6402,7 @@ def hard_sigmoid(x, slope=0.2, offset=0.5, name=None):
...
@@ -6339,7 +6402,7 @@ def hard_sigmoid(x, slope=0.2, offset=0.5, name=None):
output(${out_type}): ${out_comment}
output(${out_type}): ${out_comment}
"""
"""
helper
=
LayerHelper
(
'hard_sigmoid'
,
**
locals
())
helper
=
LayerHelper
(
'hard_sigmoid'
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'hard_sigmoid'
,
type
=
'hard_sigmoid'
,
inputs
=
{
'X'
:
x
},
inputs
=
{
'X'
:
x
},
...
@@ -6363,7 +6426,7 @@ def swish(x, beta=1.0, name=None):
...
@@ -6363,7 +6426,7 @@ def swish(x, beta=1.0, name=None):
output(${out_type}): ${out_comment}
output(${out_type}): ${out_comment}
"""
"""
helper
=
LayerHelper
(
'swish'
,
**
locals
())
helper
=
LayerHelper
(
'swish'
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'swish'
,
type
=
'swish'
,
inputs
=
{
'X'
:
x
},
inputs
=
{
'X'
:
x
},
...
@@ -6415,7 +6478,7 @@ def prelu(x, mode, param_attr=None, name=None):
...
@@ -6415,7 +6478,7 @@ def prelu(x, mode, param_attr=None, name=None):
dtype
=
'float32'
,
dtype
=
'float32'
,
is_bias
=
False
,
is_bias
=
False
,
default_initializer
=
Constant
(
1.0
))
default_initializer
=
Constant
(
1.0
))
out
=
helper
.
create_
tmp_variabl
e
(
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"prelu"
,
type
=
"prelu"
,
inputs
=
{
"X"
:
x
,
inputs
=
{
"X"
:
x
,
...
@@ -6439,7 +6502,7 @@ def brelu(x, t_min=0.0, t_max=24.0, name=None):
...
@@ -6439,7 +6502,7 @@ def brelu(x, t_min=0.0, t_max=24.0, name=None):
output(${out_type}): ${out_comment}
output(${out_type}): ${out_comment}
"""
"""
helper
=
LayerHelper
(
'brelu'
,
**
locals
())
helper
=
LayerHelper
(
'brelu'
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'brelu'
,
type
=
'brelu'
,
inputs
=
{
'X'
:
x
},
inputs
=
{
'X'
:
x
},
...
@@ -6462,7 +6525,7 @@ def leaky_relu(x, alpha=0.02, name=None):
...
@@ -6462,7 +6525,7 @@ def leaky_relu(x, alpha=0.02, name=None):
output(${out_type}): ${out_comment}
output(${out_type}): ${out_comment}
"""
"""
helper
=
LayerHelper
(
'leaky_relu'
,
**
locals
())
helper
=
LayerHelper
(
'leaky_relu'
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'leaky_relu'
,
type
=
'leaky_relu'
,
inputs
=
{
'X'
:
x
},
inputs
=
{
'X'
:
x
},
...
@@ -6484,7 +6547,7 @@ def soft_relu(x, threshold=40.0, name=None):
...
@@ -6484,7 +6547,7 @@ def soft_relu(x, threshold=40.0, name=None):
output(${out_type}): ${out_comment}
output(${out_type}): ${out_comment}
"""
"""
helper
=
LayerHelper
(
'soft_relu'
,
**
locals
())
helper
=
LayerHelper
(
'soft_relu'
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'soft_relu'
,
type
=
'soft_relu'
,
inputs
=
{
'X'
:
x
},
inputs
=
{
'X'
:
x
},
...
@@ -6551,8 +6614,8 @@ def flatten(x, axis=1, name=None):
...
@@ -6551,8 +6614,8 @@ def flatten(x, axis=1, name=None):
if
not
(
isinstance
(
axis
,
int
))
or
axis
>
len
(
x
.
shape
)
or
axis
<
0
:
if
not
(
isinstance
(
axis
,
int
))
or
axis
>
len
(
x
.
shape
)
or
axis
<
0
:
raise
ValueError
(
"The axis should be a int, and in range [0, rank(x)]"
)
raise
ValueError
(
"The axis should be a int, and in range [0, rank(x)]"
)
out
=
helper
.
create_
tmp_variabl
e
(
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
x
.
dtype
)
x_shape
=
helper
.
create_
tmp_variabl
e
(
x
.
dtype
)
x_shape
=
helper
.
create_
variable_for_type_inferenc
e
(
x
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'flatten2'
,
type
=
'flatten2'
,
inputs
=
{
"X"
:
x
},
inputs
=
{
"X"
:
x
},
...
@@ -6598,7 +6661,8 @@ def sequence_enumerate(input, win_size, pad_value=0, name=None):
...
@@ -6598,7 +6661,8 @@ def sequence_enumerate(input, win_size, pad_value=0, name=None):
out = fluid.layers.sequence_enumerate(input=x, win_size=3, pad_value=0)
out = fluid.layers.sequence_enumerate(input=x, win_size=3, pad_value=0)
"""
"""
helper
=
LayerHelper
(
'sequence_enumerate'
,
**
locals
())
helper
=
LayerHelper
(
'sequence_enumerate'
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
helper
.
input_dtype
(),
stop_gradient
=
True
)
out
=
helper
.
create_variable_for_type_inference
(
helper
.
input_dtype
(),
stop_gradient
=
True
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'sequence_enumerate'
,
type
=
'sequence_enumerate'
,
inputs
=
{
'X'
:
input
},
inputs
=
{
'X'
:
input
},
...
@@ -6638,9 +6702,9 @@ def sequence_mask(x, maxlen=None, dtype='int64', name=None):
...
@@ -6638,9 +6702,9 @@ def sequence_mask(x, maxlen=None, dtype='int64', name=None):
helper
=
LayerHelper
(
'sequence_mask'
,
**
locals
())
helper
=
LayerHelper
(
'sequence_mask'
,
**
locals
())
if
name
is
None
:
if
name
is
None
:
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
dtype
)
else
:
else
:
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
dtype
,
name
=
name
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
dtype
,
name
=
name
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'sequence_mask'
,
type
=
'sequence_mask'
,
...
@@ -6683,7 +6747,7 @@ def stack(x, axis=0):
...
@@ -6683,7 +6747,7 @@ def stack(x, axis=0):
if
not
isinstance
(
x
,
list
)
and
not
isinstance
(
x
,
tuple
):
if
not
isinstance
(
x
,
list
)
and
not
isinstance
(
x
,
tuple
):
x
=
[
x
]
x
=
[
x
]
out
=
helper
.
create_
tmp_variabl
e
(
x
[
0
].
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
x
[
0
].
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'stack'
,
inputs
=
{
'X'
:
x
},
outputs
=
{
'Y'
:
out
},
type
=
'stack'
,
inputs
=
{
'X'
:
x
},
outputs
=
{
'Y'
:
out
},
attrs
=
{
'axis'
:
axis
})
attrs
=
{
'axis'
:
axis
})
...
@@ -6721,7 +6785,7 @@ def unstack(x, axis=0, num=None):
...
@@ -6721,7 +6785,7 @@ def unstack(x, axis=0, num=None):
outs
=
[]
outs
=
[]
for
_
in
num
:
for
_
in
num
:
outs
.
append
(
helper
.
create_
tmp_variabl
e
(
x
.
dtype
))
outs
.
append
(
helper
.
create_
variable_for_type_inferenc
e
(
x
.
dtype
))
helper
.
append_op
(
helper
.
append_op
(
type
=
'unstack'
,
type
=
'unstack'
,
...
@@ -6773,7 +6837,7 @@ def expand(x, expand_times, name=None):
...
@@ -6773,7 +6837,7 @@ def expand(x, expand_times, name=None):
"""
"""
helper
=
LayerHelper
(
'expand'
,
input
=
x
,
**
locals
())
helper
=
LayerHelper
(
'expand'
,
input
=
x
,
**
locals
())
dtype
=
helper
.
input_dtype
(
input_param_name
=
'x'
)
dtype
=
helper
.
input_dtype
(
input_param_name
=
'x'
)
out
=
helper
.
create_
tmp_variabl
e
(
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'expand'
,
type
=
'expand'
,
inputs
=
{
'X'
:
x
},
inputs
=
{
'X'
:
x
},
...
@@ -6812,7 +6876,7 @@ def uniform_random_batch_size_like(input,
...
@@ -6812,7 +6876,7 @@ def uniform_random_batch_size_like(input,
"""
"""
helper
=
LayerHelper
(
'uniform_random_batch_size_like'
,
**
locals
())
helper
=
LayerHelper
(
'uniform_random_batch_size_like'
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
c_dtype
=
convert_np_dtype_to_dtype_
(
dtype
)
c_dtype
=
convert_np_dtype_to_dtype_
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'uniform_random_batch_size_like'
,
type
=
'uniform_random_batch_size_like'
,
...
@@ -6849,7 +6913,7 @@ def gaussian_random(shape, mean=0.0, std=1.0, seed=0, dtype='float32'):
...
@@ -6849,7 +6913,7 @@ def gaussian_random(shape, mean=0.0, std=1.0, seed=0, dtype='float32'):
"""
"""
helper
=
LayerHelper
(
'gaussian_random'
,
**
locals
())
helper
=
LayerHelper
(
'gaussian_random'
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
c_dtype
=
convert_np_dtype_to_dtype_
(
dtype
)
c_dtype
=
convert_np_dtype_to_dtype_
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'gaussian_random'
,
type
=
'gaussian_random'
,
...
@@ -6884,7 +6948,7 @@ def sampling_id(x, min=0.0, max=1.0, seed=0, dtype='float32'):
...
@@ -6884,7 +6948,7 @@ def sampling_id(x, min=0.0, max=1.0, seed=0, dtype='float32'):
"""
"""
helper
=
LayerHelper
(
'sampling_id'
,
**
locals
())
helper
=
LayerHelper
(
'sampling_id'
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'sampling_id'
,
type
=
'sampling_id'
,
inputs
=
{
'X'
:
x
},
inputs
=
{
'X'
:
x
},
...
@@ -6923,7 +6987,7 @@ def gaussian_random_batch_size_like(input,
...
@@ -6923,7 +6987,7 @@ def gaussian_random_batch_size_like(input,
"""
"""
helper
=
LayerHelper
(
'gaussian_random_batch_size_like'
,
**
locals
())
helper
=
LayerHelper
(
'gaussian_random_batch_size_like'
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
)
c_dtype
=
convert_np_dtype_to_dtype_
(
dtype
)
c_dtype
=
convert_np_dtype_to_dtype_
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'gaussian_random_batch_size_like'
,
type
=
'gaussian_random_batch_size_like'
,
...
@@ -6955,7 +7019,8 @@ def sum(x):
...
@@ -6955,7 +7019,8 @@ def sum(x):
"""
"""
helper
=
LayerHelper
(
'sum'
,
**
locals
())
helper
=
LayerHelper
(
'sum'
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
(
'x'
))
out
=
helper
.
create_variable_for_type_inference
(
dtype
=
helper
.
input_dtype
(
'x'
))
helper
.
append_op
(
helper
.
append_op
(
type
=
'sum'
,
type
=
'sum'
,
inputs
=
{
'X'
:
x
},
inputs
=
{
'X'
:
x
},
...
@@ -6982,7 +7047,8 @@ def slice(input, axes, starts, ends):
...
@@ -6982,7 +7047,8 @@ def slice(input, axes, starts, ends):
"""
"""
helper
=
LayerHelper
(
'slice'
,
**
locals
())
helper
=
LayerHelper
(
'slice'
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
(
'input'
))
out
=
helper
.
create_variable_for_type_inference
(
dtype
=
helper
.
input_dtype
(
'input'
))
helper
.
append_op
(
helper
.
append_op
(
type
=
'slice'
,
type
=
'slice'
,
inputs
=
{
'Input'
:
input
},
inputs
=
{
'Input'
:
input
},
...
@@ -7008,7 +7074,8 @@ def shape(input):
...
@@ -7008,7 +7074,8 @@ def shape(input):
"""
"""
helper
=
LayerHelper
(
'shape'
,
**
locals
())
helper
=
LayerHelper
(
'shape'
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
(
'input'
))
out
=
helper
.
create_variable_for_type_inference
(
dtype
=
helper
.
input_dtype
(
'input'
))
helper
.
append_op
(
helper
.
append_op
(
type
=
'shape'
,
inputs
=
{
'Input'
:
input
},
outputs
=
{
'Out'
:
out
})
type
=
'shape'
,
inputs
=
{
'Input'
:
input
},
outputs
=
{
'Out'
:
out
})
...
@@ -7025,7 +7092,7 @@ def _elementwise_op(helper):
...
@@ -7025,7 +7092,7 @@ def _elementwise_op(helper):
use_mkldnn
=
helper
.
kwargs
.
get
(
'use_mkldnn'
,
False
)
use_mkldnn
=
helper
.
kwargs
.
get
(
'use_mkldnn'
,
False
)
name
=
helper
.
kwargs
.
get
(
'name'
,
None
)
name
=
helper
.
kwargs
.
get
(
'name'
,
None
)
if
name
is
None
:
if
name
is
None
:
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
else
:
else
:
out
=
helper
.
create_variable
(
out
=
helper
.
create_variable
(
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
...
@@ -7059,7 +7126,7 @@ def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None):
...
@@ -7059,7 +7126,7 @@ def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None):
helper
=
LayerHelper
(
'scale'
,
**
locals
())
helper
=
LayerHelper
(
'scale'
,
**
locals
())
if
name
is
None
:
if
name
is
None
:
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
else
:
else
:
out
=
helper
.
create_variable
(
out
=
helper
.
create_variable
(
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
...
@@ -7125,7 +7192,7 @@ def _logical_op(op_name, x, y, out=None, name=None, binary_op=True):
...
@@ -7125,7 +7192,7 @@ def _logical_op(op_name, x, y, out=None, name=None, binary_op=True):
if
out
is
None
:
if
out
is
None
:
if
name
is
None
:
if
name
is
None
:
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
else
:
else
:
out
=
helper
.
create_variable
(
out
=
helper
.
create_variable
(
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
...
@@ -7233,7 +7300,7 @@ def clip(x, min, max, name=None):
...
@@ -7233,7 +7300,7 @@ def clip(x, min, max, name=None):
helper
=
LayerHelper
(
"clip"
,
**
locals
())
helper
=
LayerHelper
(
"clip"
,
**
locals
())
if
name
is
None
:
if
name
is
None
:
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
else
:
else
:
out
=
helper
.
create_variable
(
out
=
helper
.
create_variable
(
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
...
@@ -7265,7 +7332,7 @@ def clip_by_norm(x, max_norm, name=None):
...
@@ -7265,7 +7332,7 @@ def clip_by_norm(x, max_norm, name=None):
helper
=
LayerHelper
(
"clip_by_norm"
,
**
locals
())
helper
=
LayerHelper
(
"clip_by_norm"
,
**
locals
())
if
name
is
None
:
if
name
is
None
:
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
else
:
else
:
out
=
helper
.
create_variable
(
out
=
helper
.
create_variable
(
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
...
@@ -7295,7 +7362,7 @@ def mean(x, name=None):
...
@@ -7295,7 +7362,7 @@ def mean(x, name=None):
helper
=
LayerHelper
(
"mean"
,
**
locals
())
helper
=
LayerHelper
(
"mean"
,
**
locals
())
if
name
is
None
:
if
name
is
None
:
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
else
:
else
:
out
=
helper
.
create_variable
(
out
=
helper
.
create_variable
(
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
...
@@ -7325,7 +7392,7 @@ def mul(x, y, x_num_col_dims=1, y_num_col_dims=1, name=None):
...
@@ -7325,7 +7392,7 @@ def mul(x, y, x_num_col_dims=1, y_num_col_dims=1, name=None):
helper
=
LayerHelper
(
"mul"
,
**
locals
())
helper
=
LayerHelper
(
"mul"
,
**
locals
())
if
name
is
None
:
if
name
is
None
:
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
else
:
else
:
out
=
helper
.
create_variable
(
out
=
helper
.
create_variable
(
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
...
@@ -7359,7 +7426,7 @@ def sigmoid_cross_entropy_with_logits(x, label, name=None):
...
@@ -7359,7 +7426,7 @@ def sigmoid_cross_entropy_with_logits(x, label, name=None):
helper
=
LayerHelper
(
"sigmoid_cross_entropy_with_logits"
,
**
locals
())
helper
=
LayerHelper
(
"sigmoid_cross_entropy_with_logits"
,
**
locals
())
if
name
is
None
:
if
name
is
None
:
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
else
:
else
:
out
=
helper
.
create_variable
(
out
=
helper
.
create_variable
(
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
...
@@ -7389,7 +7456,7 @@ def maxout(x, groups, name=None):
...
@@ -7389,7 +7456,7 @@ def maxout(x, groups, name=None):
helper
=
LayerHelper
(
"maxout"
,
**
locals
())
helper
=
LayerHelper
(
"maxout"
,
**
locals
())
if
name
is
None
:
if
name
is
None
:
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
else
:
else
:
out
=
helper
.
create_variable
(
out
=
helper
.
create_variable
(
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
...
@@ -7428,7 +7495,7 @@ def affine_channel(x, scale=None, bias=None, data_layout='NCHW', name=None):
...
@@ -7428,7 +7495,7 @@ def affine_channel(x, scale=None, bias=None, data_layout='NCHW', name=None):
helper
=
LayerHelper
(
"affine_channel"
,
**
locals
())
helper
=
LayerHelper
(
"affine_channel"
,
**
locals
())
if
name
is
None
:
if
name
is
None
:
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
else
:
else
:
out
=
helper
.
create_variable
(
out
=
helper
.
create_variable
(
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
...
...
python/paddle/fluid/layers/tensor.py
浏览文件 @
049c9c7d
...
@@ -152,7 +152,7 @@ def cast(x, dtype):
...
@@ -152,7 +152,7 @@ def cast(x, dtype):
result = fluid.layers.cast(x=data, dtype='float64')
result = fluid.layers.cast(x=data, dtype='float64')
"""
"""
helper
=
LayerHelper
(
'cast'
,
**
locals
())
helper
=
LayerHelper
(
'cast'
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'cast'
,
type
=
'cast'
,
inputs
=
{
'X'
:
[
x
]},
inputs
=
{
'X'
:
[
x
]},
...
@@ -184,7 +184,7 @@ def concat(input, axis=0, name=None):
...
@@ -184,7 +184,7 @@ def concat(input, axis=0, name=None):
out = fluid.layers.concat(input=[Efirst, Esecond, Ethird, Efourth])
out = fluid.layers.concat(input=[Efirst, Esecond, Ethird, Efourth])
"""
"""
helper
=
LayerHelper
(
'concat'
,
**
locals
())
helper
=
LayerHelper
(
'concat'
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
helper
.
input_dtype
())
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
helper
.
input_dtype
())
helper
.
append_op
(
helper
.
append_op
(
type
=
'concat'
,
type
=
'concat'
,
inputs
=
{
'X'
:
input
},
inputs
=
{
'X'
:
input
},
...
@@ -221,7 +221,8 @@ def sums(input, out=None):
...
@@ -221,7 +221,8 @@ def sums(input, out=None):
"""
"""
helper
=
LayerHelper
(
'sum'
,
**
locals
())
helper
=
LayerHelper
(
'sum'
,
**
locals
())
if
out
is
None
:
if
out
is
None
:
out
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
())
out
=
helper
.
create_variable_for_type_inference
(
dtype
=
helper
.
input_dtype
())
helper
.
append_op
(
helper
.
append_op
(
type
=
'sum'
,
type
=
'sum'
,
inputs
=
{
'X'
:
input
},
inputs
=
{
'X'
:
input
},
...
@@ -252,7 +253,7 @@ def assign(input, output=None):
...
@@ -252,7 +253,7 @@ def assign(input, output=None):
"""
"""
helper
=
LayerHelper
(
'assign'
,
**
locals
())
helper
=
LayerHelper
(
'assign'
,
**
locals
())
if
output
is
None
:
if
output
is
None
:
output
=
helper
.
create_
tmp_variabl
e
(
dtype
=
input
.
dtype
)
output
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
input
.
dtype
)
if
isinstance
(
input
,
Variable
):
if
isinstance
(
input
,
Variable
):
helper
.
append_op
(
helper
.
append_op
(
type
=
'assign'
,
inputs
=
{
'X'
:
[
input
]},
outputs
=
{
'Out'
:
[
output
]})
type
=
'assign'
,
inputs
=
{
'X'
:
[
input
]},
outputs
=
{
'Out'
:
[
output
]})
...
@@ -311,7 +312,7 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None):
...
@@ -311,7 +312,7 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None):
helper
=
LayerHelper
(
"fill_constant"
,
**
locals
())
helper
=
LayerHelper
(
"fill_constant"
,
**
locals
())
if
out
is
None
:
if
out
is
None
:
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'fill_constant'
,
type
=
'fill_constant'
,
inputs
=
{},
inputs
=
{},
...
@@ -358,7 +359,7 @@ def fill_constant_batch_size_like(input,
...
@@ -358,7 +359,7 @@ def fill_constant_batch_size_like(input,
${out_comment}.
${out_comment}.
"""
"""
helper
=
LayerHelper
(
"fill_constant_batch_size_like"
,
**
locals
())
helper
=
LayerHelper
(
"fill_constant_batch_size_like"
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'fill_constant_batch_size_like'
,
type
=
'fill_constant_batch_size_like'
,
inputs
=
{
'Input'
:
input
},
inputs
=
{
'Input'
:
input
},
...
@@ -396,7 +397,7 @@ def argmin(x, axis=0):
...
@@ -396,7 +397,7 @@ def argmin(x, axis=0):
out = fluid.layers.argmin(x=in, axis=-1)
out = fluid.layers.argmin(x=in, axis=-1)
"""
"""
helper
=
LayerHelper
(
"arg_min"
,
**
locals
())
helper
=
LayerHelper
(
"arg_min"
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
VarDesc
.
VarType
.
INT64
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
VarDesc
.
VarType
.
INT64
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'arg_min'
,
type
=
'arg_min'
,
inputs
=
{
'X'
:
x
},
inputs
=
{
'X'
:
x
},
...
@@ -427,7 +428,7 @@ def argmax(x, axis=0):
...
@@ -427,7 +428,7 @@ def argmax(x, axis=0):
out = fluid.layers.argmax(x=in, axis=-1)
out = fluid.layers.argmax(x=in, axis=-1)
"""
"""
helper
=
LayerHelper
(
"arg_max"
,
**
locals
())
helper
=
LayerHelper
(
"arg_max"
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
VarDesc
.
VarType
.
INT64
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
VarDesc
.
VarType
.
INT64
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'arg_max'
,
type
=
'arg_max'
,
inputs
=
{
'X'
:
x
},
inputs
=
{
'X'
:
x
},
...
@@ -477,8 +478,10 @@ def argsort(input, axis=-1, name=None):
...
@@ -477,8 +478,10 @@ def argsort(input, axis=-1, name=None):
out, indices = fluid.layers.argsort(input, axis=0)
out, indices = fluid.layers.argsort(input, axis=0)
"""
"""
helper
=
LayerHelper
(
"argsort"
,
**
locals
())
helper
=
LayerHelper
(
"argsort"
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
=
input
.
dtype
,
stop_gradient
=
True
)
out
=
helper
.
create_variable_for_type_inference
(
ids
=
helper
.
create_tmp_variable
(
VarDesc
.
VarType
.
INT64
,
stop_gradient
=
True
)
dtype
=
input
.
dtype
,
stop_gradient
=
True
)
ids
=
helper
.
create_variable_for_type_inference
(
VarDesc
.
VarType
.
INT64
,
stop_gradient
=
True
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'argsort'
,
type
=
'argsort'
,
inputs
=
{
'X'
:
input
},
inputs
=
{
'X'
:
input
},
...
@@ -562,7 +565,7 @@ def reverse(x, axis):
...
@@ -562,7 +565,7 @@ def reverse(x, axis):
if
isinstance
(
axis
,
int
):
if
isinstance
(
axis
,
int
):
axis
=
[
axis
]
axis
=
[
axis
]
helper
=
LayerHelper
(
"reverse"
,
**
locals
())
helper
=
LayerHelper
(
"reverse"
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'reverse'
,
type
=
'reverse'
,
inputs
=
{
'Input'
:
x
},
inputs
=
{
'Input'
:
x
},
...
@@ -654,7 +657,7 @@ def has_inf(x):
...
@@ -654,7 +657,7 @@ def has_inf(x):
Variable: The tensor variable storing the output, only a bool value.
Variable: The tensor variable storing the output, only a bool value.
"""
"""
helper
=
LayerHelper
(
"isinf"
,
**
locals
())
helper
=
LayerHelper
(
"isinf"
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
type
=
"isinf"
,
inputs
=
{
"X"
:
x
},
outputs
=
{
"Out"
:
out
})
helper
.
append_op
(
type
=
"isinf"
,
inputs
=
{
"X"
:
x
},
outputs
=
{
"Out"
:
out
})
return
out
return
out
...
@@ -670,7 +673,7 @@ def has_nan(x):
...
@@ -670,7 +673,7 @@ def has_nan(x):
Variable: The tensor variable storing the output, only a bool value.
Variable: The tensor variable storing the output, only a bool value.
"""
"""
helper
=
LayerHelper
(
"isnan"
,
**
locals
())
helper
=
LayerHelper
(
"isnan"
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
type
=
"isnan"
,
inputs
=
{
"X"
:
x
},
outputs
=
{
"Out"
:
out
})
helper
.
append_op
(
type
=
"isnan"
,
inputs
=
{
"X"
:
x
},
outputs
=
{
"Out"
:
out
})
return
out
return
out
...
@@ -687,6 +690,6 @@ def isfinite(x):
...
@@ -687,6 +690,6 @@ def isfinite(x):
Variable: The tensor variable storing the output, contains a bool value.
Variable: The tensor variable storing the output, contains a bool value.
"""
"""
helper
=
LayerHelper
(
"isfinite"
,
**
locals
())
helper
=
LayerHelper
(
"isfinite"
,
**
locals
())
out
=
helper
.
create_
tmp_variabl
e
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_
variable_for_type_inferenc
e
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
type
=
"isfinite"
,
inputs
=
{
"X"
:
x
},
outputs
=
{
"Out"
:
out
})
helper
.
append_op
(
type
=
"isfinite"
,
inputs
=
{
"X"
:
x
},
outputs
=
{
"Out"
:
out
})
return
out
return
out
python/paddle/fluid/regularizer.py
浏览文件 @
049c9c7d
...
@@ -151,7 +151,7 @@ class L2DecayRegularizer(WeightDecayRegularizer):
...
@@ -151,7 +151,7 @@ class L2DecayRegularizer(WeightDecayRegularizer):
decay
=
block
.
create_var
(
decay
=
block
.
create_var
(
dtype
=
"float32"
,
dtype
=
"float32"
,
shape
=
param
.
shape
,
shape
=
param
.
shape
,
type
=
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
)
type
=
core
.
VarDesc
.
VarType
.
LOD_TENSOR
)
block
.
append_op
(
block
.
append_op
(
type
=
'extract_rows'
,
inputs
=
{
'X'
:
grad
},
outputs
=
{
'Out'
:
idx
})
type
=
'extract_rows'
,
inputs
=
{
'X'
:
grad
},
outputs
=
{
'Out'
:
idx
})
block
.
append_op
(
block
.
append_op
(
...
@@ -228,7 +228,7 @@ class L1DecayRegularizer(WeightDecayRegularizer):
...
@@ -228,7 +228,7 @@ class L1DecayRegularizer(WeightDecayRegularizer):
decay
=
block
.
create_var
(
decay
=
block
.
create_var
(
dtype
=
"float32"
,
dtype
=
"float32"
,
shape
=
param
.
shape
,
shape
=
param
.
shape
,
type
=
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
)
type
=
core
.
VarDesc
.
VarType
.
LOD_TENSOR
)
block
.
append_op
(
block
.
append_op
(
type
=
'extract_rows'
,
inputs
=
{
'X'
:
grad
},
outputs
=
{
'Out'
:
idx
})
type
=
'extract_rows'
,
inputs
=
{
'X'
:
grad
},
outputs
=
{
'Out'
:
idx
})
block
.
append_op
(
block
.
append_op
(
...
...
python/paddle/fluid/tests/CMakeLists.txt
浏览文件 @
049c9c7d
if
(
NOT APPLE
)
set
(
PYTHON_TESTS_DIR
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/tests CACHE INTERNAL
"python tests directory"
)
set
(
PYTHON_TESTS_DIR
${
CMAKE_CURRENT_BINARY_DIR
}
CACHE PATH
"python tests directory"
)
else
()
set
(
PYTHON_TESTS_DIR
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/tests
)
endif
(
NOT APPLE
)
file
(
GLOB TEST_OPS RELATIVE
"
${
CMAKE_CURRENT_SOURCE_DIR
}
"
"test_*.py"
)
file
(
GLOB TEST_OPS RELATIVE
"
${
CMAKE_CURRENT_SOURCE_DIR
}
"
"test_*.py"
)
string
(
REPLACE
".py"
""
TEST_OPS
"
${
TEST_OPS
}
"
)
string
(
REPLACE
".py"
""
TEST_OPS
"
${
TEST_OPS
}
"
)
...
...
python/paddle/fluid/tests/unittests/test_fusion_seqconv_eltadd_relu_op.py
0 → 100644
浏览文件 @
049c9c7d
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
import
random
from
op_test
import
OpTest
from
test_seq_conv
import
seqconv
class
TestSeqConvEltAddRelu
(
OpTest
):
def
set_conf
(
self
):
pass
def
setUp
(
self
):
self
.
op_type
=
'fusion_seqconv_eltadd_relu'
self
.
lod
=
[[
6
,
4
]]
self
.
in_fea_size
=
16
self
.
out_fea_size
=
8
self
.
context_length
=
4
self
.
context_stride
=
1
self
.
context_start
=
0
self
.
set_conf
()
assert
self
.
context_stride
==
1
T
=
sum
(
self
.
lod
[
0
])
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
T
,
self
.
in_fea_size
]).
astype
(
'float32'
)
w
=
np
.
random
.
uniform
(
-
1
,
1
,
[
self
.
in_fea_size
*
self
.
context_length
,
self
.
out_fea_size
]).
astype
(
'float32'
)
b
=
np
.
random
.
uniform
(
-
2
,
1
,
[
1
,
self
.
out_fea_size
]).
astype
(
'float32'
)
out
=
seqconv
(
x
,
self
.
lod
,
w
,
self
.
context_length
,
self
.
context_start
)
out
=
np
.
maximum
(
out
+
b
,
0
)
self
.
inputs
=
{
'X'
:
(
x
,
self
.
lod
),
'Filter'
:
w
,
'Bias'
:
b
}
self
.
attrs
=
{
'contextStart'
:
self
.
context_start
,
'contextLength'
:
self
.
context_length
,
'contextStride'
:
self
.
context_stride
}
self
.
outputs
=
{
'Out'
:
out
}
def
test_check_output
(
self
):
self
.
check_output
()
class
TestSeqConvEltAddReluBS1
(
TestSeqConvEltAddRelu
):
def
set_conf
(
self
):
self
.
lod
=
[[
10
]]
class
TestSeqConvEltAddReluBS1Case2
(
TestSeqConvEltAddRelu
):
def
set_conf
(
self
):
self
.
lod
=
[[
2
]]
class
TestSeqConvEltAddReluCase1
(
TestSeqConvEltAddRelu
):
def
set_conf
(
self
):
self
.
lod
=
[[
3
,
5
,
1
,
6
]]
self
.
context_length
=
3
self
.
context_start
=
-
2
class
TestSeqConvEltAddReluCase2
(
TestSeqConvEltAddRelu
):
def
set_conf
(
self
):
self
.
lod
=
[[
10
,
1
,
2
,
4
,
1
,
5
,
6
]]
self
.
in_fea_size
=
2
self
.
context_length
=
4
self
.
context_start
=
-
1
class
TestSeqConvEltAddReluCase3
(
TestSeqConvEltAddRelu
):
def
set_conf
(
self
):
self
.
lod
=
[[
10
,
1
,
2
,
4
,
1
,
5
,
6
]]
self
.
context_length
=
5
self
.
context_start
=
-
4
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_layers.py
浏览文件 @
049c9c7d
...
@@ -465,6 +465,16 @@ class TestBook(unittest.TestCase):
...
@@ -465,6 +465,16 @@ class TestBook(unittest.TestCase):
self
.
assertIsNotNone
(
output
)
self
.
assertIsNotNone
(
output
)
print
(
str
(
program
))
print
(
str
(
program
))
def
test_roi_align
(
self
):
program
=
Program
()
with
program_guard
(
program
):
x
=
layers
.
data
(
name
=
"x"
,
shape
=
[
256
,
30
,
30
],
dtype
=
"float32"
)
rois
=
layers
.
data
(
name
=
"rois"
,
shape
=
[
4
],
dtype
=
"float32"
,
lod_level
=
1
)
output
=
layers
.
roi_align
(
x
,
rois
,
14
,
14
,
0.5
,
2
)
self
.
assertIsNotNone
(
output
)
print
(
str
(
program
))
def
test_resize_bilinear
(
self
):
def
test_resize_bilinear
(
self
):
program
=
Program
()
program
=
Program
()
with
program_guard
(
program
):
with
program_guard
(
program
):
...
...
python/paddle/fluid/tests/unittests/test_polygon_box_transform.py
浏览文件 @
049c9c7d
...
@@ -37,7 +37,7 @@ def PolygonBoxRestore(input):
...
@@ -37,7 +37,7 @@ def PolygonBoxRestore(input):
indexes
=
indexes
.
repeat
(
indexes
=
indexes
.
repeat
(
[
batch_size
],
axis
=
0
)
# [batch_size, geo_channels/2, 2, h, w]
[
batch_size
],
axis
=
0
)
# [batch_size, geo_channels/2, 2, h, w]
return
indexes
.
reshape
(
return
indexes
.
reshape
(
input
.
shape
)
-
input
# [batch_size, geo_channels, h, w]
input
.
shape
)
*
4
-
input
# [batch_size, geo_channels, h, w]
class
TestPolygonBoxRestoreOp
(
OpTest
):
class
TestPolygonBoxRestoreOp
(
OpTest
):
...
...
python/paddle/fluid/tests/unittests/test_roi_align_op.py
0 → 100644
浏览文件 @
049c9c7d
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
import
math
import
sys
from
op_test
import
OpTest
class
TestROIAlignOp
(
OpTest
):
def
set_data
(
self
):
self
.
init_test_case
()
self
.
make_rois
()
self
.
calc_roi_align
()
self
.
inputs
=
{
'X'
:
self
.
x
,
'ROIs'
:
(
self
.
rois
[:,
1
:
5
],
self
.
rois_lod
)}
self
.
attrs
=
{
'spatial_scale'
:
self
.
spatial_scale
,
'pooled_height'
:
self
.
pooled_height
,
'pooled_width'
:
self
.
pooled_width
,
'sampling_ratio'
:
self
.
sampling_ratio
}
self
.
outputs
=
{
'Out'
:
self
.
out_data
}
def
init_test_case
(
self
):
self
.
batch_size
=
3
self
.
channels
=
3
self
.
height
=
8
self
.
width
=
6
# n, c, h, w
self
.
x_dim
=
(
self
.
batch_size
,
self
.
channels
,
self
.
height
,
self
.
width
)
self
.
spatial_scale
=
1.0
/
2.0
self
.
pooled_height
=
2
self
.
pooled_width
=
2
self
.
sampling_ratio
=
-
1
self
.
x
=
np
.
random
.
random
(
self
.
x_dim
).
astype
(
'float32'
)
def
pre_calc
(
self
,
x_i
,
roi_xmin
,
roi_ymin
,
roi_bin_grid_h
,
roi_bin_grid_w
,
bin_size_h
,
bin_size_w
):
count
=
roi_bin_grid_h
*
roi_bin_grid_w
bilinear_pos
=
np
.
zeros
(
[
self
.
channels
,
self
.
pooled_height
,
self
.
pooled_width
,
count
,
4
],
np
.
float32
)
bilinear_w
=
np
.
zeros
(
[
self
.
pooled_height
,
self
.
pooled_width
,
count
,
4
],
np
.
float32
)
for
ph
in
range
(
self
.
pooled_width
):
for
pw
in
range
(
self
.
pooled_height
):
c
=
0
for
iy
in
range
(
roi_bin_grid_h
):
y
=
roi_ymin
+
ph
*
bin_size_h
+
(
iy
+
0.5
)
*
\
bin_size_h
/
roi_bin_grid_h
for
ix
in
range
(
roi_bin_grid_w
):
x
=
roi_xmin
+
pw
*
bin_size_w
+
(
ix
+
0.5
)
*
\
bin_size_w
/
roi_bin_grid_w
if
y
<
-
1.0
or
y
>
self
.
height
or
\
x
<
-
1.0
or
x
>
self
.
width
:
continue
if
y
<=
0
:
y
=
0
if
x
<=
0
:
x
=
0
y_low
=
int
(
y
)
x_low
=
int
(
x
)
if
y_low
>=
self
.
height
-
1
:
y
=
y_high
=
y_low
=
self
.
height
-
1
else
:
y_high
=
y_low
+
1
if
x_low
>=
self
.
width
-
1
:
x
=
x_high
=
x_low
=
self
.
width
-
1
else
:
x_high
=
x_low
+
1
ly
=
y
-
y_low
lx
=
x
-
x_low
hy
=
1
-
ly
hx
=
1
-
lx
for
ch
in
range
(
self
.
channels
):
bilinear_pos
[
ch
,
ph
,
pw
,
c
,
0
]
=
x_i
[
ch
,
y_low
,
x_low
]
bilinear_pos
[
ch
,
ph
,
pw
,
c
,
1
]
=
x_i
[
ch
,
y_low
,
x_high
]
bilinear_pos
[
ch
,
ph
,
pw
,
c
,
2
]
=
x_i
[
ch
,
y_high
,
x_low
]
bilinear_pos
[
ch
,
ph
,
pw
,
c
,
3
]
=
x_i
[
ch
,
y_high
,
x_high
]
bilinear_w
[
ph
,
pw
,
c
,
0
]
=
hy
*
hx
bilinear_w
[
ph
,
pw
,
c
,
1
]
=
hy
*
lx
bilinear_w
[
ph
,
pw
,
c
,
2
]
=
ly
*
hx
bilinear_w
[
ph
,
pw
,
c
,
3
]
=
ly
*
lx
c
=
c
+
1
return
bilinear_pos
,
bilinear_w
def
calc_roi_align
(
self
):
self
.
out_data
=
np
.
zeros
(
(
self
.
rois_num
,
self
.
channels
,
self
.
pooled_height
,
self
.
pooled_width
)).
astype
(
'float32'
)
for
i
in
range
(
self
.
rois_num
):
roi
=
self
.
rois
[
i
]
roi_batch_id
=
int
(
roi
[
0
])
x_i
=
self
.
x
[
roi_batch_id
]
roi_xmin
=
roi
[
1
]
*
self
.
spatial_scale
roi_ymin
=
roi
[
2
]
*
self
.
spatial_scale
roi_xmax
=
roi
[
3
]
*
self
.
spatial_scale
roi_ymax
=
roi
[
4
]
*
self
.
spatial_scale
roi_width
=
max
(
roi_xmax
-
roi_xmin
,
1
)
roi_height
=
max
(
roi_ymax
-
roi_ymin
,
1
)
bin_size_h
=
float
(
roi_height
)
/
float
(
self
.
pooled_height
)
bin_size_w
=
float
(
roi_width
)
/
float
(
self
.
pooled_width
)
roi_bin_grid_h
=
self
.
sampling_ratio
if
self
.
sampling_ratio
>
0
else
\
math
.
ceil
(
roi_height
/
self
.
pooled_height
)
roi_bin_grid_w
=
self
.
sampling_ratio
if
self
.
sampling_ratio
>
0
else
\
math
.
ceil
(
roi_width
/
self
.
pooled_width
)
count
=
int
(
roi_bin_grid_h
*
roi_bin_grid_w
)
pre_size
=
count
*
self
.
pooled_width
*
self
.
pooled_height
bilinear_pos
,
bilinear_w
=
self
.
pre_calc
(
x_i
,
roi_xmin
,
roi_ymin
,
int
(
roi_bin_grid_h
),
int
(
roi_bin_grid_w
),
bin_size_h
,
bin_size_w
)
for
ch
in
range
(
self
.
channels
):
align_per_bin
=
(
bilinear_pos
[
ch
]
*
bilinear_w
).
sum
(
axis
=-
1
)
output_val
=
align_per_bin
.
mean
(
axis
=-
1
)
self
.
out_data
[
i
,
ch
,
:,
:]
=
output_val
def
make_rois
(
self
):
rois
=
[]
self
.
rois_lod
=
[[]]
for
bno
in
range
(
self
.
batch_size
):
self
.
rois_lod
[
0
].
append
(
bno
+
1
)
for
i
in
range
(
bno
+
1
):
x1
=
np
.
random
.
random_integers
(
0
,
self
.
width
//
self
.
spatial_scale
-
self
.
pooled_width
)
y1
=
np
.
random
.
random_integers
(
0
,
self
.
height
//
self
.
spatial_scale
-
self
.
pooled_height
)
x2
=
np
.
random
.
random_integers
(
x1
+
self
.
pooled_width
,
self
.
width
//
self
.
spatial_scale
)
y2
=
np
.
random
.
random_integers
(
y1
+
self
.
pooled_height
,
self
.
height
//
self
.
spatial_scale
)
roi
=
[
bno
,
x1
,
y1
,
x2
,
y2
]
rois
.
append
(
roi
)
self
.
rois_num
=
len
(
rois
)
self
.
rois
=
np
.
array
(
rois
).
astype
(
"float32"
)
def
setUp
(
self
):
self
.
op_type
=
"roi_align"
self
.
set_data
()
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'Out'
)
python/paddle/fluid/tests/unittests/test_seq_conv.py
浏览文件 @
049c9c7d
...
@@ -20,6 +20,53 @@ import random
...
@@ -20,6 +20,53 @@ import random
from
op_test
import
OpTest
from
op_test
import
OpTest
def
seqconv
(
x
,
lod
,
filter
,
context_length
,
context_start
,
padding_trainable
=
False
,
padding_data
=
None
):
[
T
,
M
]
=
x
.
shape
col
=
np
.
zeros
((
T
,
context_length
*
M
)).
astype
(
'float32'
)
offset
=
[
0
]
for
seq_len
in
lod
[
0
]:
offset
.
append
(
offset
[
-
1
]
+
seq_len
)
begin_pad
=
np
.
max
([
0
,
-
context_start
])
for
i
in
range
(
len
(
offset
)
-
1
):
for
j
in
range
(
context_length
):
in_begin
=
offset
[
i
]
+
context_start
+
j
in_end
=
offset
[
i
+
1
]
+
context_start
+
j
out_begin
=
offset
[
i
]
out_end
=
offset
[
i
+
1
]
if
in_begin
<
offset
[
i
]:
pad_size
=
np
.
min
(
[
offset
[
i
]
-
in_begin
,
offset
[
i
+
1
]
-
offset
[
i
]])
if
padding_trainable
:
sub_w
=
padding_data
[
j
:
j
+
pad_size
,
:]
col
[
offset
[
i
]:
offset
[
i
]
+
pad_size
,
j
*
M
:(
j
+
1
)
*
M
]
=
sub_w
out_begin
=
offset
[
i
]
+
pad_size
in_begin
=
offset
[
i
]
if
in_end
>
offset
[
i
+
1
]:
pad_size
=
np
.
min
(
[
in_end
-
offset
[
i
+
1
],
offset
[
i
+
1
]
-
offset
[
i
]])
if
padding_trainable
:
sub_w
=
padding_data
[
begin_pad
+
context_start
+
j
-
pad_size
:
begin_pad
+
context_start
+
j
,
:]
col
[
offset
[
i
+
1
]
-
pad_size
:
offset
[
i
+
1
],
j
*
M
:(
j
+
1
)
*
M
]
=
sub_w
in_end
=
offset
[
i
+
1
]
out_end
=
offset
[
i
+
1
]
-
pad_size
if
in_end
<=
in_begin
:
continue
in_sub
=
x
[
in_begin
:
in_end
,
:]
col
[
out_begin
:
out_end
,
j
*
M
:(
j
+
1
)
*
M
]
+=
in_sub
return
np
.
dot
(
col
,
filter
)
class
TestSeqProject
(
OpTest
):
class
TestSeqProject
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
init_test_case
()
self
.
init_test_case
()
...
@@ -66,57 +113,9 @@ class TestSeqProject(OpTest):
...
@@ -66,57 +113,9 @@ class TestSeqProject(OpTest):
'paddingTrainable'
:
self
.
padding_trainable
,
'paddingTrainable'
:
self
.
padding_trainable
,
'contextStride'
:
self
.
context_stride
'contextStride'
:
self
.
context_stride
}
}
out
=
np
.
zeros
(
out
=
seqconv
(
x
,
self
.
lod
,
w
,
self
.
context_length
,
self
.
context_start
,
(
self
.
input_size
[
0
],
self
.
output_represention
)).
astype
(
'float32'
)
self
.
padding_trainable
,
self
.
pad_data
)
self
.
outputs
=
{
'Out'
:
out
}
self
.
outputs
=
{
'Out'
:
out
}
self
.
compute
()
def
compute
(
self
):
x
,
lod
=
self
.
inputs
[
'X'
]
filter
=
self
.
inputs
[
'Filter'
]
pading_data
=
self
.
pad_data
out
=
np
.
zeros
((
self
.
input_size
[
0
],
self
.
context_length
*
self
.
input_size
[
1
])).
astype
(
'float32'
)
offset
=
[
0
]
for
seq_len
in
lod
[
0
]:
offset
.
append
(
offset
[
-
1
]
+
seq_len
)
begin_pad
=
np
.
max
([
0
,
-
self
.
context_start
])
for
i
in
range
(
len
(
offset
)
-
1
):
for
j
in
range
(
self
.
context_length
):
in_begin
=
offset
[
i
]
+
self
.
context_start
+
j
in_end
=
offset
[
i
+
1
]
+
self
.
context_start
+
j
out_begin
=
offset
[
i
]
out_end
=
offset
[
i
+
1
]
if
in_begin
<
offset
[
i
]:
pad_size
=
np
.
min
(
[
offset
[
i
]
-
in_begin
,
offset
[
i
+
1
]
-
offset
[
i
]])
if
self
.
padding_trainable
:
sub_w
=
pading_data
[
j
:
j
+
pad_size
,
:]
out
[
offset
[
i
]:
offset
[
i
]
+
pad_size
,
j
*
self
.
input_size
[
1
]:(
j
+
1
)
*
self
.
input_size
[
1
]]
=
sub_w
out_begin
=
offset
[
i
]
+
pad_size
in_begin
=
offset
[
i
]
if
in_end
>
offset
[
i
+
1
]:
pad_size
=
np
.
min
(
[
in_end
-
offset
[
i
+
1
],
offset
[
i
+
1
]
-
offset
[
i
]])
if
self
.
padding_trainable
:
sub_w
=
pading_data
[
begin_pad
+
self
.
context_start
+
j
-
pad_size
:
begin_pad
+
self
.
context_start
+
j
,
:]
out
[
offset
[
i
+
1
]
-
pad_size
:
offset
[
i
+
1
],
j
*
self
.
input_size
[
1
]:(
j
+
1
)
*
self
.
input_size
[
1
]]
=
sub_w
in_end
=
offset
[
i
+
1
]
out_end
=
offset
[
i
+
1
]
-
pad_size
if
in_end
<=
in_begin
:
continue
in_sub
=
x
[
in_begin
:
in_end
,
:]
out
[
out_begin
:
out_end
,
j
*
self
.
input_size
[
1
]:(
j
+
1
)
*
self
.
input_size
[
1
]]
+=
in_sub
np
.
dot
(
out
,
filter
,
out
=
self
.
outputs
[
'Out'
])
def
test_check_output
(
self
):
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
()
...
...
python/paddle/fluid/tests/unittests/test_slice_var.py
浏览文件 @
049c9c7d
...
@@ -30,7 +30,6 @@ class TestSliceVar(unittest.TestCase):
...
@@ -30,7 +30,6 @@ class TestSliceVar(unittest.TestCase):
var
=
program
.
global_block
().
create_var
(
var
=
program
.
global_block
().
create_var
(
name
=
str
(
random
.
randint
(
10000
,
99999
)),
name
=
str
(
random
.
randint
(
10000
,
99999
)),
persistable
=
True
,
persistable
=
True
,
# dtype=core.VarDesc.VarType.LOD_TENSOR,
shape
=
shape
)
shape
=
shape
)
var_list
.
append
(
var
)
var_list
.
append
(
var
)
blocks
=
slice_variable
(
var_list
,
10
,
min_size
)
blocks
=
slice_variable
(
var_list
,
10
,
min_size
)
...
...
python/paddle/fluid/transpiler/inference_transpiler.py
浏览文件 @
049c9c7d
...
@@ -74,7 +74,7 @@ class InferenceTranspiler(object):
...
@@ -74,7 +74,7 @@ class InferenceTranspiler(object):
'''
'''
Transpile the program fusing elementwise_add into conv for MKLDNN
Transpile the program fusing elementwise_add into conv for MKLDNN
program. Elementwise add following convolution OP can be fused by adding
program. Elementwise add following convolution OP can be fused by adding
'fuse_
eltwise
' attribute to convolution OP and replacing its output
'fuse_
residual_connection
' attribute to convolution OP and replacing its output
Tensor with second parameter of elementwise_add.
Tensor with second parameter of elementwise_add.
The result of fuse is:
The result of fuse is:
- before:
- before:
...
@@ -92,7 +92,8 @@ class InferenceTranspiler(object):
...
@@ -92,7 +92,8 @@ class InferenceTranspiler(object):
if
current_op
.
type
in
[
'conv2d'
]:
if
current_op
.
type
in
[
'conv2d'
]:
next_op
=
self
.
block
.
ops
[
i
+
1
]
next_op
=
self
.
block
.
ops
[
i
+
1
]
if
next_op
.
type
==
'elementwise_add'
:
if
next_op
.
type
==
'elementwise_add'
:
self
.
_fuse_conv_eltwise
(
current_op
,
next_op
)
self
.
_fuse_conv_eltwise
(
i
,
current_op
,
next_op
)
self
.
block
.
_remove_op
(
i
+
1
)
# Remove old conv
self
.
block
.
_remove_op
(
i
+
1
)
# Remove elementwise_add
self
.
block
.
_remove_op
(
i
+
1
)
# Remove elementwise_add
i
=
i
+
1
i
=
i
+
1
self
.
_adjust_input
()
self
.
_adjust_input
()
...
@@ -444,7 +445,7 @@ class InferenceTranspiler(object):
...
@@ -444,7 +445,7 @@ class InferenceTranspiler(object):
outputs
=
{
"Output"
:
out_var
},
outputs
=
{
"Output"
:
out_var
},
attrs
=
attrs
)
attrs
=
attrs
)
def
_fuse_conv_eltwise
(
self
,
conv_op
,
eltwise_op
):
def
_fuse_conv_eltwise
(
self
,
index
,
conv_op
,
eltwise_op
):
'''
'''
fuse the conv op with elementwise_add
fuse the conv op with elementwise_add
...
@@ -454,9 +455,30 @@ class InferenceTranspiler(object):
...
@@ -454,9 +455,30 @@ class InferenceTranspiler(object):
:type eltwise_op: Operator
:type eltwise_op: Operator
'''
'''
conv_op
.
_set_attr
(
"fuse_eltwise"
,
True
)
eltwise_input
=
"X"
self
.
input_map
[
conv_op
.
output
(
"Output"
)[
0
]]
=
eltwise_op
.
input
(
"Y"
)[
0
]
if
eltwise_op
.
input
(
"X"
)[
0
]
==
conv_op
.
output
(
"Output"
)[
0
]:
self
.
input_map
[
eltwise_op
.
output
(
"Out"
)[
0
]]
=
eltwise_op
.
input
(
"Y"
)[
0
]
eltwise_input
=
"Y"
residual_var
=
self
.
block
.
vars
[
eltwise_op
.
input
(
eltwise_input
)[
0
]]
out_var
=
self
.
block
.
vars
[
eltwise_op
.
output
(
"Out"
)[
0
]]
filter_var
=
self
.
block
.
vars
[
conv_op
.
input
(
"Filter"
)[
0
]]
in_var
=
self
.
block
.
vars
[
conv_op
.
input
(
"Input"
)[
0
]]
bias_var
=
self
.
block
.
vars
[
conv_op
.
input
(
"Bias"
)[
0
]]
conv_op
.
_set_attr
(
"fuse_residual_connection"
,
True
)
attrs
=
{
name
:
conv_op
.
attr
(
name
)
for
name
in
conv_op
.
attr_names
}
self
.
block
.
_insert_op
(
index
,
type
=
"conv2d"
,
inputs
=
{
"Input"
:
in_var
,
"Filter"
:
filter_var
,
"Bias"
:
bias_var
,
"ResidualData"
:
residual_var
},
outputs
=
{
"Output"
:
out_var
},
attrs
=
attrs
)
def
_adjust_input
(
self
):
def
_adjust_input
(
self
):
for
i
in
range
(
len
(
self
.
block
.
ops
)):
for
i
in
range
(
len
(
self
.
block
.
ops
)):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录