Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
91756a5a
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
91756a5a
编写于
10月 05, 2018
作者:
Q
qiaolongfei
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into optimize-opyreader
上级
c5292b18
8cd17c04
变更
140
隐藏空白更改
内联
并排
Showing
140 changed file
with
5645 addition
and
4580 deletion
+5645
-4580
Dockerfile
Dockerfile
+12
-2
cmake/external/anakin.cmake
cmake/external/anakin.cmake
+1
-0
paddle/fluid/API.spec
paddle/fluid/API.spec
+37
-33
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+9
-14
paddle/fluid/framework/channel.h
paddle/fluid/framework/channel.h
+0
-291
paddle/fluid/framework/channel_impl.h
paddle/fluid/framework/channel_impl.h
+0
-369
paddle/fluid/framework/channel_test.cc
paddle/fluid/framework/channel_test.cc
+0
-1008
paddle/fluid/framework/concurrency_test.cc
paddle/fluid/framework/concurrency_test.cc
+0
-292
paddle/fluid/framework/details/CMakeLists.txt
paddle/fluid/framework/details/CMakeLists.txt
+5
-0
paddle/fluid/framework/details/build_strategy.cc
paddle/fluid/framework/details/build_strategy.cc
+126
-0
paddle/fluid/framework/details/build_strategy.h
paddle/fluid/framework/details/build_strategy.h
+35
-0
paddle/fluid/framework/details/reference_count_pass.cc
paddle/fluid/framework/details/reference_count_pass.cc
+9
-9
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+1
-4
paddle/fluid/framework/framework.proto
paddle/fluid/framework/framework.proto
+0
-7
paddle/fluid/framework/ir/CMakeLists.txt
paddle/fluid/framework/ir/CMakeLists.txt
+10
-6
paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc
paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc
+243
-0
paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.h
paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.h
+18
-17
paddle/fluid/framework/ir/graph_helper.cc
paddle/fluid/framework/ir/graph_helper.cc
+70
-2
paddle/fluid/framework/ir/graph_helper.h
paddle/fluid/framework/ir/graph_helper.h
+2
-0
paddle/fluid/framework/ir/graph_helper_test.cc
paddle/fluid/framework/ir/graph_helper_test.cc
+91
-0
paddle/fluid/framework/ir/graph_pattern_detector.cc
paddle/fluid/framework/ir/graph_pattern_detector.cc
+18
-0
paddle/fluid/framework/ir/graph_pattern_detector.h
paddle/fluid/framework/ir/graph_pattern_detector.h
+17
-0
paddle/fluid/framework/ir/pass.cc
paddle/fluid/framework/ir/pass.cc
+0
-1
paddle/fluid/framework/ir/pass.h
paddle/fluid/framework/ir/pass.h
+27
-4
paddle/fluid/framework/ir/pass_builder.cc
paddle/fluid/framework/ir/pass_builder.cc
+43
-0
paddle/fluid/framework/ir/pass_builder.h
paddle/fluid/framework/ir/pass_builder.h
+49
-0
paddle/fluid/framework/ir/pass_test.cc
paddle/fluid/framework/ir/pass_test.cc
+4
-6
paddle/fluid/framework/naive_executor.cc
paddle/fluid/framework/naive_executor.cc
+150
-0
paddle/fluid/framework/naive_executor.h
paddle/fluid/framework/naive_executor.h
+63
-0
paddle/fluid/framework/naive_executor_test.cc
paddle/fluid/framework/naive_executor_test.cc
+70
-0
paddle/fluid/framework/op_proto_maker.cc
paddle/fluid/framework/op_proto_maker.cc
+1
-3
paddle/fluid/framework/op_proto_maker.h
paddle/fluid/framework/op_proto_maker.h
+0
-1
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+13
-44
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+22
-90
paddle/fluid/framework/parallel_executor.h
paddle/fluid/framework/parallel_executor.h
+2
-2
paddle/fluid/framework/tuple.h
paddle/fluid/framework/tuple.h
+0
-1
paddle/fluid/framework/var_desc.cc
paddle/fluid/framework/var_desc.cc
+2
-52
paddle/fluid/framework/var_desc.h
paddle/fluid/framework/var_desc.h
+0
-4
paddle/fluid/framework/var_type.h
paddle/fluid/framework/var_type.h
+0
-6
paddle/fluid/inference/CMakeLists.txt
paddle/fluid/inference/CMakeLists.txt
+4
-2
paddle/fluid/inference/analysis/CMakeLists.txt
paddle/fluid/inference/analysis/CMakeLists.txt
+1
-1
paddle/fluid/inference/analysis/analysis_pass.h
paddle/fluid/inference/analysis/analysis_pass.h
+0
-6
paddle/fluid/inference/analysis/analyzer.h
paddle/fluid/inference/analysis/analyzer.h
+9
-8
paddle/fluid/inference/api/CMakeLists.txt
paddle/fluid/inference/api/CMakeLists.txt
+13
-7
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+210
-31
paddle/fluid/inference/api/analysis_predictor.h
paddle/fluid/inference/api/analysis_predictor.h
+49
-10
paddle/fluid/inference/api/analysis_predictor_tester.cc
paddle/fluid/inference/api/analysis_predictor_tester.cc
+67
-0
paddle/fluid/inference/api/api.cc
paddle/fluid/inference/api/api.cc
+22
-16
paddle/fluid/inference/api/api_impl.cc
paddle/fluid/inference/api/api_impl.cc
+1
-2
paddle/fluid/inference/api/api_impl.h
paddle/fluid/inference/api/api_impl.h
+13
-9
paddle/fluid/inference/api/api_impl_tester.cc
paddle/fluid/inference/api/api_impl_tester.cc
+14
-8
paddle/fluid/inference/api/demo_ci/run.sh
paddle/fluid/inference/api/demo_ci/run.sh
+9
-6
paddle/fluid/inference/api/details/zero_copy_tensor.cc
paddle/fluid/inference/api/details/zero_copy_tensor.cc
+111
-0
paddle/fluid/inference/api/details/zero_copy_tensor_dummy.cc
paddle/fluid/inference/api/details/zero_copy_tensor_dummy.cc
+46
-0
paddle/fluid/inference/api/helper.h
paddle/fluid/inference/api/helper.h
+34
-3
paddle/fluid/inference/api/paddle_inference_api.h
paddle/fluid/inference/api/paddle_inference_api.h
+52
-1
paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc
paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc
+0
-1
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
+6
-1
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
+4
-1
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
+281
-3
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
...le/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
+2
-1
paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc
...nference/tests/api/analyzer_text_classification_tester.cc
+13
-0
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
+3
-1
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+125
-4
paddle/fluid/memory/malloc.cc
paddle/fluid/memory/malloc.cc
+21
-0
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+7
-9
paddle/fluid/operators/channel_close_op.cc
paddle/fluid/operators/channel_close_op.cc
+0
-70
paddle/fluid/operators/channel_create_op.cc
paddle/fluid/operators/channel_create_op.cc
+0
-113
paddle/fluid/operators/channel_recv_op.cc
paddle/fluid/operators/channel_recv_op.cc
+0
-98
paddle/fluid/operators/channel_send_op.cc
paddle/fluid/operators/channel_send_op.cc
+0
-76
paddle/fluid/operators/concurrency/CMakeLists.txt
paddle/fluid/operators/concurrency/CMakeLists.txt
+0
-1
paddle/fluid/operators/concurrency/channel_util.cc
paddle/fluid/operators/concurrency/channel_util.cc
+0
-111
paddle/fluid/operators/conv_op.h
paddle/fluid/operators/conv_op.h
+4
-3
paddle/fluid/operators/conv_transpose_op.h
paddle/fluid/operators/conv_transpose_op.h
+4
-3
paddle/fluid/operators/cub_reduce.h
paddle/fluid/operators/cub_reduce.h
+322
-0
paddle/fluid/operators/detection/roi_perspective_transform_op.cc
...fluid/operators/detection/roi_perspective_transform_op.cc
+5
-6
paddle/fluid/operators/detection/roi_perspective_transform_op.cu
...fluid/operators/detection/roi_perspective_transform_op.cu
+3
-3
paddle/fluid/operators/distributed/grpc_client.h
paddle/fluid/operators/distributed/grpc_client.h
+1
-0
paddle/fluid/operators/distributed/request_handler.h
paddle/fluid/operators/distributed/request_handler.h
+1
-0
paddle/fluid/operators/distributed/rpc_server.h
paddle/fluid/operators/distributed/rpc_server.h
+1
-0
paddle/fluid/operators/elementwise_op.h
paddle/fluid/operators/elementwise_op.h
+1
-1
paddle/fluid/operators/fused_embedding_fc_lstm_op.cc
paddle/fluid/operators/fused_embedding_fc_lstm_op.cc
+604
-0
paddle/fluid/operators/fused_embedding_fc_lstm_op.h
paddle/fluid/operators/fused_embedding_fc_lstm_op.h
+41
-0
paddle/fluid/operators/fusion_gru_op.cc
paddle/fluid/operators/fusion_gru_op.cc
+3
-2
paddle/fluid/operators/fusion_lstm_op.cc
paddle/fluid/operators/fusion_lstm_op.cc
+2
-1
paddle/fluid/operators/math/cpu_lstm_compute.cc
paddle/fluid/operators/math/cpu_lstm_compute.cc
+26
-1
paddle/fluid/operators/math/cpu_lstm_compute.h
paddle/fluid/operators/math/cpu_lstm_compute.h
+2
-19
paddle/fluid/operators/math/depthwise_conv.cu
paddle/fluid/operators/math/depthwise_conv.cu
+323
-156
paddle/fluid/operators/math/depthwise_conv.h
paddle/fluid/operators/math/depthwise_conv.h
+4
-1
paddle/fluid/operators/math/math_function.cc
paddle/fluid/operators/math/math_function.cc
+9
-0
paddle/fluid/operators/math/math_function.h
paddle/fluid/operators/math/math_function.h
+0
-12
paddle/fluid/operators/reduce_mean_op.cu
paddle/fluid/operators/reduce_mean_op.cu
+56
-9
paddle/fluid/operators/reduce_sum_op.cu
paddle/fluid/operators/reduce_sum_op.cu
+51
-9
paddle/fluid/operators/select_op.cc
paddle/fluid/operators/select_op.cc
+0
-419
paddle/fluid/operators/sequence_erase_op.cc
paddle/fluid/operators/sequence_erase_op.cc
+2
-2
paddle/fluid/operators/tensorrt_engine_op.h
paddle/fluid/operators/tensorrt_engine_op.h
+1
-1
paddle/fluid/operators/top_k_op.cc
paddle/fluid/operators/top_k_op.cc
+0
-2
paddle/fluid/platform/dynload/cublas.h
paddle/fluid/platform/dynload/cublas.h
+1
-1
paddle/fluid/platform/dynload/cudnn.h
paddle/fluid/platform/dynload/cudnn.h
+10
-7
paddle/fluid/platform/dynload/curand.h
paddle/fluid/platform/dynload/curand.h
+1
-1
paddle/fluid/platform/dynload/dynamic_loader.cc
paddle/fluid/platform/dynload/dynamic_loader.cc
+17
-3
paddle/fluid/pybind/CMakeLists.txt
paddle/fluid/pybind/CMakeLists.txt
+1
-1
paddle/fluid/pybind/const_value.cc
paddle/fluid/pybind/const_value.cc
+0
-3
paddle/fluid/pybind/protobuf.cc
paddle/fluid/pybind/protobuf.cc
+0
-2
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+29
-2
paddle/fluid/string/pretty_log.h
paddle/fluid/string/pretty_log.h
+4
-4
paddle/legacy/trainer/tests/CMakeLists.txt
paddle/legacy/trainer/tests/CMakeLists.txt
+5
-1
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+20
-4
python/CMakeLists.txt
python/CMakeLists.txt
+1
-0
python/paddle/fluid/clip.py
python/paddle/fluid/clip.py
+3
-1
python/paddle/fluid/concurrency.py
python/paddle/fluid/concurrency.py
+0
-454
python/paddle/fluid/contrib/__init__.py
python/paddle/fluid/contrib/__init__.py
+3
-0
python/paddle/fluid/contrib/quantize/__init__.py
python/paddle/fluid/contrib/quantize/__init__.py
+20
-0
python/paddle/fluid/contrib/quantize/quantize_transpiler.py
python/paddle/fluid/contrib/quantize/quantize_transpiler.py
+557
-0
python/paddle/fluid/contrib/tests/CMakeLists.txt
python/paddle/fluid/contrib/tests/CMakeLists.txt
+6
-0
python/paddle/fluid/contrib/tests/test_quantize_transpiler.py
...on/paddle/fluid/contrib/tests/test_quantize_transpiler.py
+279
-0
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+2
-12
python/paddle/fluid/layers/control_flow.py
python/paddle/fluid/layers/control_flow.py
+1
-1
python/paddle/fluid/layers/detection.py
python/paddle/fluid/layers/detection.py
+95
-8
python/paddle/fluid/layers/io.py
python/paddle/fluid/layers/io.py
+2
-4
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+488
-179
python/paddle/fluid/layers/ops.py
python/paddle/fluid/layers/ops.py
+1
-12
python/paddle/fluid/nets.py
python/paddle/fluid/nets.py
+6
-16
python/paddle/fluid/tests/book/high-level-api/recognize_digits/CMakeLists.txt
...tests/book/high-level-api/recognize_digits/CMakeLists.txt
+13
-3
python/paddle/fluid/tests/no_test_concurrency.py
python/paddle/fluid/tests/no_test_concurrency.py
+0
-260
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+5
-4
python/paddle/fluid/tests/unittests/dist_se_resnext.py
python/paddle/fluid/tests/unittests/dist_se_resnext.py
+1
-1
python/paddle/fluid/tests/unittests/test_conv2d_op.py
python/paddle/fluid/tests/unittests/test_conv2d_op.py
+52
-7
python/paddle/fluid/tests/unittests/test_dist_base.py
python/paddle/fluid/tests/unittests/test_dist_base.py
+32
-40
python/paddle/fluid/tests/unittests/test_dist_ctr.py
python/paddle/fluid/tests/unittests/test_dist_ctr.py
+4
-3
python/paddle/fluid/tests/unittests/test_dist_se_resnext.py
python/paddle/fluid/tests/unittests/test_dist_se_resnext.py
+7
-8
python/paddle/fluid/tests/unittests/test_dist_simnet_bow.py
python/paddle/fluid/tests/unittests/test_dist_simnet_bow.py
+4
-4
python/paddle/fluid/tests/unittests/test_dist_text_classification.py
...le/fluid/tests/unittests/test_dist_text_classification.py
+2
-2
python/paddle/fluid/tests/unittests/test_layers.py
python/paddle/fluid/tests/unittests/test_layers.py
+9
-0
python/paddle/fluid/tests/unittests/test_operator_desc.py
python/paddle/fluid/tests/unittests/test_operator_desc.py
+1
-1
python/paddle/fluid/tests/unittests/test_pass_builder.py
python/paddle/fluid/tests/unittests/test_pass_builder.py
+121
-0
python/paddle/fluid/tests/unittests/transformer_model.py
python/paddle/fluid/tests/unittests/transformer_model.py
+1
-0
python/paddle/fluid/transpiler/__init__.py
python/paddle/fluid/transpiler/__init__.py
+6
-2
python/paddle/fluid/transpiler/memory_optimization_transpiler.py
...paddle/fluid/transpiler/memory_optimization_transpiler.py
+101
-11
python/setup.py.in
python/setup.py.in
+1
-0
未找到文件。
Dockerfile
浏览文件 @
91756a5a
...
@@ -24,6 +24,7 @@ COPY ./paddle/scripts/docker/root/ /root/
...
@@ -24,6 +24,7 @@ COPY ./paddle/scripts/docker/root/ /root/
RUN
apt-get update
&&
\
RUN
apt-get update
&&
\
apt-get
install
-y
--allow-downgrades
patchelf
\
apt-get
install
-y
--allow-downgrades
patchelf
\
python3 python3-dev python3-pip
\
git python-pip python-dev python-opencv openssh-server bison
\
git python-pip python-dev python-opencv openssh-server bison
\
libnccl2
=
2.1.2-1+cuda8.0 libnccl-dev
=
2.1.2-1+cuda8.0
\
libnccl2
=
2.1.2-1+cuda8.0 libnccl-dev
=
2.1.2-1+cuda8.0
\
wget unzip unrar
tar
xz-utils bzip2
gzip
coreutils ntp
\
wget unzip unrar
tar
xz-utils bzip2
gzip
coreutils ntp
\
...
@@ -70,24 +71,33 @@ RUN localedef -i en_US -f UTF-8 en_US.UTF-8
...
@@ -70,24 +71,33 @@ RUN localedef -i en_US -f UTF-8 en_US.UTF-8
# specify sphinx version as 1.5.6 and remove -U option for [pip install -U
# specify sphinx version as 1.5.6 and remove -U option for [pip install -U
# sphinx-rtd-theme] since -U option will cause sphinx being updated to newest
# sphinx-rtd-theme] since -U option will cause sphinx being updated to newest
# version(1.7.1 for now), which causes building documentation failed.
# version(1.7.1 for now), which causes building documentation failed.
RUN
easy_install
-U
pip
&&
\
RUN
pip3
install
-U
wheel
&&
\
pip3
install
-U
docopt PyYAML
sphinx
==
1.5.6
&&
\
pip3
install
sphinx-rtd-theme
==
0.1.9 recommonmark
&&
\
easy_install
-U
pip
&&
\
pip
install
-U
wheel
&&
\
pip
install
-U
wheel
&&
\
pip
install
-U
docopt PyYAML
sphinx
==
1.5.6
&&
\
pip
install
-U
docopt PyYAML
sphinx
==
1.5.6
&&
\
pip
install
sphinx-rtd-theme
==
0.1.9 recommonmark
pip
install
sphinx-rtd-theme
==
0.1.9 recommonmark
RUN
pip
install
pre-commit
'ipython==5.3.0'
&&
\
RUN
pip3
install
pre-commit
'ipython==5.3.0'
&&
\
pip3
install
'ipykernel==4.6.0'
'jupyter==1.0.0'
&&
\
pip3
install
opencv-python
&&
\
pip
install
pre-commit
'ipython==5.3.0'
&&
\
pip
install
'ipykernel==4.6.0'
'jupyter==1.0.0'
&&
\
pip
install
'ipykernel==4.6.0'
'jupyter==1.0.0'
&&
\
pip
install
opencv-python
pip
install
opencv-python
#For docstring checker
#For docstring checker
RUN
pip3
install
pylint pytest astroid isort
RUN
pip
install
pylint pytest astroid isort LinkChecker
RUN
pip
install
pylint pytest astroid isort LinkChecker
COPY
./python/requirements.txt /root/
COPY
./python/requirements.txt /root/
RUN
pip3
install
-r
/root/requirements.txt
RUN
pip
install
-r
/root/requirements.txt
RUN
pip
install
-r
/root/requirements.txt
# To fix https://github.com/PaddlePaddle/Paddle/issues/1954, we use
# To fix https://github.com/PaddlePaddle/Paddle/issues/1954, we use
# the solution in https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl-py2
# the solution in https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl-py2
RUN
apt-get
install
-y
libssl-dev libffi-dev
RUN
apt-get
install
-y
libssl-dev libffi-dev
RUN
pip3
install
certifi urllib3[secure]
RUN
pip
install
certifi urllib3[secure]
RUN
pip
install
certifi urllib3[secure]
...
...
cmake/external/anakin.cmake
浏览文件 @
91756a5a
...
@@ -52,6 +52,7 @@ ExternalProject_Add(
...
@@ -52,6 +52,7 @@ ExternalProject_Add(
PREFIX
${
ANAKIN_SOURCE_DIR
}
PREFIX
${
ANAKIN_SOURCE_DIR
}
UPDATE_COMMAND
""
UPDATE_COMMAND
""
CMAKE_ARGS
${
CMAKE_ARGS_PREFIX
}
CMAKE_ARGS
${
CMAKE_ARGS_PREFIX
}
-DUSE_LOGGER=YES
-DUSE_X86_PLACE=YES
-DUSE_X86_PLACE=YES
-DBUILD_WITH_UNIT_TEST=NO
-DBUILD_WITH_UNIT_TEST=NO
-DPROTOBUF_ROOT=
${
THIRD_PARTY_PATH
}
/install/protobuf
-DPROTOBUF_ROOT=
${
THIRD_PARTY_PATH
}
/install/protobuf
...
...
paddle/fluid/API.spec
浏览文件 @
91756a5a
...
@@ -21,7 +21,7 @@ paddle.fluid.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'en
...
@@ -21,7 +21,7 @@ paddle.fluid.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'en
paddle.fluid.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,))
paddle.fluid.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,))
paddle.fluid.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174'))
paddle.fluid.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174'))
paddle.fluid.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level'
], varargs=None, keywords=None, defaults=(None, False, 0
))
paddle.fluid.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level'
, 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False
))
paddle.fluid.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.DistributeTranspilerConfig.__init__
paddle.fluid.DistributeTranspilerConfig.__init__
paddle.fluid.ParallelExecutor.__init__ ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None))
paddle.fluid.ParallelExecutor.__init__ ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None))
...
@@ -49,7 +49,7 @@ paddle.fluid.initializer.BilinearInitializer.__init__ ArgSpec(args=['self'], var
...
@@ -49,7 +49,7 @@ paddle.fluid.initializer.BilinearInitializer.__init__ ArgSpec(args=['self'], var
paddle.fluid.initializer.MSRAInitializer.__init__ ArgSpec(args=['self', 'uniform', 'fan_in', 'seed'], varargs=None, keywords=None, defaults=(True, None, 0))
paddle.fluid.initializer.MSRAInitializer.__init__ ArgSpec(args=['self', 'uniform', 'fan_in', 'seed'], varargs=None, keywords=None, defaults=(True, None, 0))
paddle.fluid.initializer.force_init_on_cpu ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
paddle.fluid.initializer.force_init_on_cpu ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
paddle.fluid.initializer.init_on_cpu ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
paddle.fluid.initializer.init_on_cpu ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
paddle.fluid.layers.fc ArgSpec(args=['input', 'size', 'num_flatten_dims', 'param_attr', 'bias_attr', '
use_mkldnn', 'act', 'is_test', 'name'], varargs=None, keywords=None, defaults=(1, None, None, Fals
e, None, False, None))
paddle.fluid.layers.fc ArgSpec(args=['input', 'size', 'num_flatten_dims', 'param_attr', 'bias_attr', '
act', 'is_test', 'name'], varargs=None, keywords=None, defaults=(1, None, Non
e, None, False, None))
paddle.fluid.layers.embedding ArgSpec(args=['input', 'size', 'is_sparse', 'is_distributed', 'padding_idx', 'param_attr', 'dtype'], varargs=None, keywords=None, defaults=(False, False, None, None, 'float32'))
paddle.fluid.layers.embedding ArgSpec(args=['input', 'size', 'is_sparse', 'is_distributed', 'padding_idx', 'param_attr', 'dtype'], varargs=None, keywords=None, defaults=(False, False, None, None, 'float32'))
paddle.fluid.layers.dynamic_lstm ArgSpec(args=['input', 'size', 'h_0', 'c_0', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, None, None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'float32', None))
paddle.fluid.layers.dynamic_lstm ArgSpec(args=['input', 'size', 'h_0', 'c_0', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, None, None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'float32', None))
paddle.fluid.layers.dynamic_lstmp ArgSpec(args=['input', 'size', 'proj_size', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'proj_activation', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'tanh', 'float32', None))
paddle.fluid.layers.dynamic_lstmp ArgSpec(args=['input', 'size', 'proj_size', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'proj_activation', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'tanh', 'float32', None))
...
@@ -62,14 +62,14 @@ paddle.fluid.layers.cross_entropy ArgSpec(args=['input', 'label', 'soft_label',
...
@@ -62,14 +62,14 @@ paddle.fluid.layers.cross_entropy ArgSpec(args=['input', 'label', 'soft_label',
paddle.fluid.layers.square_error_cost ArgSpec(args=['input', 'label'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.square_error_cost ArgSpec(args=['input', 'label'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.chunk_eval ArgSpec(args=['input', 'label', 'chunk_scheme', 'num_chunk_types', 'excluded_chunk_types'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.chunk_eval ArgSpec(args=['input', 'label', 'chunk_scheme', 'num_chunk_types', 'excluded_chunk_types'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.sequence_conv ArgSpec(args=['input', 'num_filters', 'filter_size', 'filter_stride', 'padding', 'bias_attr', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(3, 1, None, None, None, None))
paddle.fluid.layers.sequence_conv ArgSpec(args=['input', 'num_filters', 'filter_size', 'filter_stride', 'padding', 'bias_attr', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(3, 1, None, None, None, None))
paddle.fluid.layers.conv2d ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', '
use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, Fals
e, None, None))
paddle.fluid.layers.conv2d ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', '
act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, Tru
e, None, None))
paddle.fluid.layers.conv3d ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', '
use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, Fals
e, None, None))
paddle.fluid.layers.conv3d ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', '
act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, Tru
e, None, None))
paddle.fluid.layers.sequence_pool ArgSpec(args=['input', 'pool_type'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.sequence_pool ArgSpec(args=['input', 'pool_type'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.sequence_softmax ArgSpec(args=['input', 'param_attr', 'bias_attr', 'use_cudnn'], varargs=None, keywords=None, defaults=(None, None, False))
paddle.fluid.layers.sequence_softmax ArgSpec(args=['input', 'param_attr', 'bias_attr', 'use_cudnn'], varargs=None, keywords=None, defaults=(None, None, False))
paddle.fluid.layers.softmax ArgSpec(args=['input', 'param_attr', 'bias_attr', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(None, None, True, None))
paddle.fluid.layers.softmax ArgSpec(args=['input', 'param_attr', 'bias_attr', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(None, None, True, None))
paddle.fluid.layers.pool2d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', '
use_mkldnn', 'name'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, Fals
e, False, None))
paddle.fluid.layers.pool2d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', '
name'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, Tru
e, False, None))
paddle.fluid.layers.pool3d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', '
use_mkldnn', 'name'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, Fals
e, False, None))
paddle.fluid.layers.pool3d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', '
name'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, Tru
e, False, None))
paddle.fluid.layers.batch_norm ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', '
use_mkldnn', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW', False
, False, None, None, None, False, False))
paddle.fluid.layers.batch_norm ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', '
name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW'
, False, None, None, None, False, False))
paddle.fluid.layers.beam_search_decode ArgSpec(args=['ids', 'scores', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.beam_search_decode ArgSpec(args=['ids', 'scores', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.conv2d_transpose ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None))
paddle.fluid.layers.conv2d_transpose ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None))
paddle.fluid.layers.conv3d_transpose ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None))
paddle.fluid.layers.conv3d_transpose ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None))
...
@@ -145,21 +145,31 @@ paddle.fluid.layers.unstack ArgSpec(args=['x', 'axis', 'num'], varargs=None, key
...
@@ -145,21 +145,31 @@ paddle.fluid.layers.unstack ArgSpec(args=['x', 'axis', 'num'], varargs=None, key
paddle.fluid.layers.sequence_enumerate ArgSpec(args=['input', 'win_size', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0, None))
paddle.fluid.layers.sequence_enumerate ArgSpec(args=['input', 'win_size', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0, None))
paddle.fluid.layers.expand ArgSpec(args=['x', 'expand_times', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.expand ArgSpec(args=['x', 'expand_times', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.sequence_concat ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.sequence_concat ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.scale ArgSpec(args=['x', 'scale', 'bias', 'bias_after_scale', '
out', 'act', 'name'], varargs=None, keywords=None, defaults=(1.0, 0.0, True, Non
e, None, None))
paddle.fluid.layers.scale ArgSpec(args=['x', 'scale', 'bias', 'bias_after_scale', '
act', 'name'], varargs=None, keywords=None, defaults=(1.0, 0.0, Tru
e, None, None))
paddle.fluid.layers.elementwise_add ArgSpec(args=['x', 'y', '
out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False
, None, None))
paddle.fluid.layers.elementwise_add ArgSpec(args=['x', 'y', '
axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1
, None, None))
paddle.fluid.layers.elementwise_div ArgSpec(args=['x', 'y', '
out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False
, None, None))
paddle.fluid.layers.elementwise_div ArgSpec(args=['x', 'y', '
axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1
, None, None))
paddle.fluid.layers.elementwise_sub ArgSpec(args=['x', 'y', '
out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False
, None, None))
paddle.fluid.layers.elementwise_sub ArgSpec(args=['x', 'y', '
axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1
, None, None))
paddle.fluid.layers.elementwise_mul ArgSpec(args=['x', 'y', '
out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False
, None, None))
paddle.fluid.layers.elementwise_mul ArgSpec(args=['x', 'y', '
axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1
, None, None))
paddle.fluid.layers.elementwise_max ArgSpec(args=['x', 'y', '
out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False
, None, None))
paddle.fluid.layers.elementwise_max ArgSpec(args=['x', 'y', '
axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1
, None, None))
paddle.fluid.layers.elementwise_min ArgSpec(args=['x', 'y', '
out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False
, None, None))
paddle.fluid.layers.elementwise_min ArgSpec(args=['x', 'y', '
axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1
, None, None))
paddle.fluid.layers.elementwise_pow ArgSpec(args=['x', 'y', '
out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False
, None, None))
paddle.fluid.layers.elementwise_pow ArgSpec(args=['x', 'y', '
axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1
, None, None))
paddle.fluid.layers.uniform_random_batch_size_like ArgSpec(args=['input', 'shape', 'dtype', 'input_dim_idx', 'output_dim_idx', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=('float32', 0, 0, -1.0, 1.0, 0))
paddle.fluid.layers.uniform_random_batch_size_like ArgSpec(args=['input', 'shape', 'dtype', 'input_dim_idx', 'output_dim_idx', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=('float32', 0, 0, -1.0, 1.0, 0))
paddle.fluid.layers.gaussian_random ArgSpec(args=['shape', 'mean', 'std', 'seed', 'dtype'
, 'use_mkldnn'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32', False
))
paddle.fluid.layers.gaussian_random ArgSpec(args=['shape', 'mean', 'std', 'seed', 'dtype'
], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32'
))
paddle.fluid.layers.sampling_id ArgSpec(args=['x', 'min', 'max', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32'))
paddle.fluid.layers.sampling_id ArgSpec(args=['x', 'min', 'max', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32'))
paddle.fluid.layers.gaussian_random_batch_size_like ArgSpec(args=['input', 'shape', 'input_dim_idx', 'output_dim_idx', 'mean', 'std', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0, 0, 0.0, 1.0, 0, 'float32'))
paddle.fluid.layers.gaussian_random_batch_size_like ArgSpec(args=['input', 'shape', 'input_dim_idx', 'output_dim_idx', 'mean', 'std', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0, 0, 0.0, 1.0, 0, 'float32'))
paddle.fluid.layers.sum ArgSpec(args=['x'
, 'use_mkldnn'], varargs=None, keywords=None, defaults=(False,)
)
paddle.fluid.layers.sum ArgSpec(args=['x'
], varargs=None, keywords=None, defaults=None
)
paddle.fluid.layers.slice ArgSpec(args=['input', 'axes', 'starts', 'ends'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.slice ArgSpec(args=['input', 'axes', 'starts', 'ends'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.shape ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.shape ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.logical_and ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.logical_or ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.logical_xor ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.logical_not ArgSpec(args=['x', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.clip ArgSpec(args=['x', 'min', 'max', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.clip_by_norm ArgSpec(args=['x', 'max_norm', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.mean ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.mul ArgSpec(args=['x', 'y', 'x_num_col_dims', 'y_num_col_dims', 'name'], varargs=None, keywords=None, defaults=(1, 1, None))
paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=['x', 'label', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.maxout ArgSpec(args=['x', 'groups', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None)
...
@@ -223,16 +233,6 @@ paddle.fluid.layers.StaticRNN.update_memory ArgSpec(args=['self', 'mem', 'var'],
...
@@ -223,16 +233,6 @@ paddle.fluid.layers.StaticRNN.update_memory ArgSpec(args=['self', 'mem', 'var'],
paddle.fluid.layers.reorder_lod_tensor_by_rank ArgSpec(args=['x', 'rank_table'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.reorder_lod_tensor_by_rank ArgSpec(args=['x', 'rank_table'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.Print ArgSpec(args=['input', 'first_n', 'message', 'summarize', 'print_tensor_name', 'print_tensor_type', 'print_tensor_shape', 'print_tensor_lod', 'print_phase'], varargs=None, keywords=None, defaults=(-1, None, -1, True, True, True, True, 'both'))
paddle.fluid.layers.Print ArgSpec(args=['input', 'first_n', 'message', 'summarize', 'print_tensor_name', 'print_tensor_type', 'print_tensor_shape', 'print_tensor_lod', 'print_phase'], varargs=None, keywords=None, defaults=(-1, None, -1, True, True, True, True, 'both'))
paddle.fluid.layers.is_empty ArgSpec(args=['x', 'cond'], varargs=None, keywords='ignored', defaults=(None,))
paddle.fluid.layers.is_empty ArgSpec(args=['x', 'cond'], varargs=None, keywords='ignored', defaults=(None,))
paddle.fluid.layers.mean ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.mul ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.clip ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.clip_by_norm ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.logical_and ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.logical_or ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.logical_xor ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.logical_not ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.maxout ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.sigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.sigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.logsigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.logsigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.exp ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.exp ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
...
@@ -266,9 +266,9 @@ paddle.fluid.layers.anchor_generator ArgSpec(args=['input', 'anchor_sizes', 'asp
...
@@ -266,9 +266,9 @@ paddle.fluid.layers.anchor_generator ArgSpec(args=['input', 'anchor_sizes', 'asp
paddle.fluid.layers.roi_perspective_transform ArgSpec(args=['input', 'rois', 'transformed_height', 'transformed_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1.0,))
paddle.fluid.layers.roi_perspective_transform ArgSpec(args=['input', 'rois', 'transformed_height', 'transformed_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1.0,))
paddle.fluid.layers.generate_proposal_labels ArgSpec(args=['rpn_rois', 'gt_classes', 'is_crowd', 'gt_boxes', 'im_info', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None, True))
paddle.fluid.layers.generate_proposal_labels ArgSpec(args=['rpn_rois', 'gt_classes', 'is_crowd', 'gt_boxes', 'im_info', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None, True))
paddle.fluid.layers.generate_proposals ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None))
paddle.fluid.layers.generate_proposals ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None))
paddle.fluid.layers.iou_similarity ArgSpec(args=[
], varargs='args', keywords='kwargs', defaults=None
)
paddle.fluid.layers.iou_similarity ArgSpec(args=[
'x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)
)
paddle.fluid.layers.box_coder ArgSpec(args=[
], varargs='args', keywords='kwargs', defaults=None
)
paddle.fluid.layers.box_coder ArgSpec(args=[
'prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name'], varargs=None, keywords=None, defaults=('encode_center_size', True, None)
)
paddle.fluid.layers.polygon_box_transform ArgSpec(args=[
], varargs='args', keywords='kwargs', defaults=None
)
paddle.fluid.layers.polygon_box_transform ArgSpec(args=[
'input', 'name'], varargs=None, keywords=None, defaults=(None,)
)
paddle.fluid.layers.accuracy ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None))
paddle.fluid.layers.accuracy ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None))
paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk', 'slide_steps'], varargs=None, keywords=None, defaults=('ROC', 4095, 1, 1))
paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk', 'slide_steps'], varargs=None, keywords=None, defaults=('ROC', 4095, 1, 1))
paddle.fluid.layers.exponential_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.layers.exponential_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))
...
@@ -300,13 +300,17 @@ paddle.fluid.contrib.BeamSearchDecoder.read_array ArgSpec(args=['self', 'init',
...
@@ -300,13 +300,17 @@ paddle.fluid.contrib.BeamSearchDecoder.read_array ArgSpec(args=['self', 'init',
paddle.fluid.contrib.BeamSearchDecoder.update_array ArgSpec(args=['self', 'array', 'value'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.BeamSearchDecoder.update_array ArgSpec(args=['self', 'array', 'value'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.memory_usage ArgSpec(args=['program', 'batch_size'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.memory_usage ArgSpec(args=['program', 'batch_size'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.op_freq_statistic ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.op_freq_statistic ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.QuantizeTranspiler.__init__ ArgSpec(args=['self', 'weight_bits', 'activation_bits', 'activation_quantize_type', 'weight_quantize_type', 'window_size'], varargs=None, keywords=None, defaults=(8, 8, 'abs_max', 'abs_max', 10000))
paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.QuantizeTranspiler.freeze_program ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None))
paddle.fluid.contrib.QuantizeTranspiler.training_transpile ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.transpiler.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.transpiler.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.transpiler.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.transpiler.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,))
paddle.fluid.transpiler.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,))
paddle.fluid.transpiler.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174'))
paddle.fluid.transpiler.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174'))
paddle.fluid.transpiler.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level'
], varargs=None, keywords=None, defaults=(None, False, 0
))
paddle.fluid.transpiler.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level'
, 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False
))
paddle.fluid.transpiler.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.transpiler.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.transpiler.HashName.__init__ ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.HashName.__init__ ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.HashName.dispatch ArgSpec(args=['self', 'varlist'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.HashName.dispatch ArgSpec(args=['self', 'varlist'], varargs=None, keywords=None, defaults=None)
...
@@ -315,11 +319,11 @@ paddle.fluid.transpiler.RoundRobin.__init__ ArgSpec(args=['self', 'pserver_endpo
...
@@ -315,11 +319,11 @@ paddle.fluid.transpiler.RoundRobin.__init__ ArgSpec(args=['self', 'pserver_endpo
paddle.fluid.transpiler.RoundRobin.dispatch ArgSpec(args=['self', 'varlist'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.RoundRobin.dispatch ArgSpec(args=['self', 'varlist'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.RoundRobin.reset ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.RoundRobin.reset ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspilerConfig.__init__
paddle.fluid.transpiler.DistributeTranspilerConfig.__init__
paddle.fluid.nets.simple_img_conv_pool ArgSpec(args=['input', 'num_filters', 'filter_size', 'pool_size', 'pool_stride', 'pool_padding', 'pool_type', 'global_pooling', 'conv_stride', 'conv_padding', 'conv_dilation', 'conv_groups', 'param_attr', 'bias_attr', 'act', 'use_cudnn'
, 'use_mkldnn'], varargs=None, keywords=None, defaults=(0, 'max', False, 1, 0, 1, 1, None, None, None, True, Fals
e))
paddle.fluid.nets.simple_img_conv_pool ArgSpec(args=['input', 'num_filters', 'filter_size', 'pool_size', 'pool_stride', 'pool_padding', 'pool_type', 'global_pooling', 'conv_stride', 'conv_padding', 'conv_dilation', 'conv_groups', 'param_attr', 'bias_attr', 'act', 'use_cudnn'
], varargs=None, keywords=None, defaults=(0, 'max', False, 1, 0, 1, 1, None, None, None, Tru
e))
paddle.fluid.nets.sequence_conv_pool ArgSpec(args=['input', 'num_filters', 'filter_size', 'param_attr', 'act', 'pool_type'], varargs=None, keywords=None, defaults=(None, 'sigmoid', 'max'))
paddle.fluid.nets.sequence_conv_pool ArgSpec(args=['input', 'num_filters', 'filter_size', 'param_attr', 'act', 'pool_type'], varargs=None, keywords=None, defaults=(None, 'sigmoid', 'max'))
paddle.fluid.nets.glu ArgSpec(args=['input', 'dim'], varargs=None, keywords=None, defaults=(-1,))
paddle.fluid.nets.glu ArgSpec(args=['input', 'dim'], varargs=None, keywords=None, defaults=(-1,))
paddle.fluid.nets.scaled_dot_product_attention ArgSpec(args=['queries', 'keys', 'values', 'num_heads', 'dropout_rate'], varargs=None, keywords=None, defaults=(1, 0.0))
paddle.fluid.nets.scaled_dot_product_attention ArgSpec(args=['queries', 'keys', 'values', 'num_heads', 'dropout_rate'], varargs=None, keywords=None, defaults=(1, 0.0))
paddle.fluid.nets.img_conv_group ArgSpec(args=['input', 'conv_num_filter', 'pool_size', 'conv_padding', 'conv_filter_size', 'conv_act', 'param_attr', 'conv_with_batchnorm', 'conv_batchnorm_drop_rate', 'pool_stride', 'pool_type', 'use_cudnn'
, 'use_mkldnn'], varargs=None, keywords=None, defaults=(1, 3, None, None, False, 0.0, 1, 'max', True, Fals
e))
paddle.fluid.nets.img_conv_group ArgSpec(args=['input', 'conv_num_filter', 'pool_size', 'conv_padding', 'conv_filter_size', 'conv_act', 'param_attr', 'conv_with_batchnorm', 'conv_batchnorm_drop_rate', 'pool_stride', 'pool_type', 'use_cudnn'
], varargs=None, keywords=None, defaults=(1, 3, None, None, False, 0.0, 1, 'max', Tru
e))
paddle.fluid.optimizer.SGDOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'regularization', 'name'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.optimizer.SGDOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'regularization', 'name'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.optimizer.SGDOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.SGDOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.MomentumOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'momentum', 'use_nesterov', 'regularization', 'name'], varargs=None, keywords=None, defaults=(False, None, None))
paddle.fluid.optimizer.MomentumOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'momentum', 'use_nesterov', 'regularization', 'name'], varargs=None, keywords=None, defaults=(False, None, None))
...
...
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
91756a5a
...
@@ -56,9 +56,9 @@ else()
...
@@ -56,9 +56,9 @@ else()
cc_test
(
mixed_vector_test SRCS mixed_vector_test.cc DEPS place memory device_context tensor
)
cc_test
(
mixed_vector_test SRCS mixed_vector_test.cc DEPS place memory device_context tensor
)
endif
()
endif
()
if
(
NOT WIN32
)
if
(
NOT WIN32
)
cc_library
(
lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto recordio version
)
cc_library
(
lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto recordio version
)
else
()
else
()
cc_library
(
lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto version
)
cc_library
(
lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto version
)
endif
(
NOT WIN32
)
endif
(
NOT WIN32
)
cc_test
(
lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor memory
)
cc_test
(
lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor memory
)
...
@@ -141,20 +141,22 @@ cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor)
...
@@ -141,20 +141,22 @@ cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor)
cc_library
(
feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog
)
cc_library
(
feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog
)
cc_library
(
naive_executor SRCS naive_executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass
)
if
(
WITH_DISTRIBUTE
)
if
(
WITH_DISTRIBUTE
)
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method sendrecvop_grpc cares grpc++_unsecure grpc_unsecure gpr graph_to_program_pass
)
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method sendrecvop_grpc cares grpc++_unsecure grpc_unsecure gpr graph_to_program_pass
)
set
(
DISTRIBUTE_COMPILE_FLAGS
"-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor"
)
set
(
DISTRIBUTE_COMPILE_FLAGS
"-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor"
)
set_source_files_properties
(
executor.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
executor.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
else
()
else
()
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass
)
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass
)
cc_test
(
test_naive_executor SRCS naive_executor_test.cc DEPS naive_executor op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass elementwise_add_op
)
endif
()
endif
()
if
(
NOT WIN32
)
if
(
NOT WIN32
)
cc_library
(
parallel_executor SRCS parallel_executor.cc DEPS
cc_library
(
parallel_executor SRCS parallel_executor.cc DEPS
threaded_ssa_graph_executor scope_buffered_ssa_graph_executor
threaded_ssa_graph_executor scope_buffered_ssa_graph_executor
graph graph_viz_pass multi_devices_graph_pass
graph build_strategy
multi_devices_graph_print_pass multi_devices_graph_check_pass
fast_threaded_ssa_graph_executor
)
fast_threaded_ssa_graph_executor fuse_elewise_add_act_pass
)
endif
()
# NOT WIN32
endif
()
# NOT WIN32
cc_library
(
prune SRCS prune.cc DEPS framework_proto
)
cc_library
(
prune SRCS prune.cc DEPS framework_proto
)
...
@@ -167,15 +169,8 @@ cc_test(selected_rows_test SRCS selected_rows_test.cc DEPS selected_rows)
...
@@ -167,15 +169,8 @@ cc_test(selected_rows_test SRCS selected_rows_test.cc DEPS selected_rows)
cc_test
(
op_kernel_type_test SRCS op_kernel_type_test.cc DEPS place device_context framework_proto
)
cc_test
(
op_kernel_type_test SRCS op_kernel_type_test.cc DEPS place device_context framework_proto
)
cc_test
(
cow_ptr_tests SRCS details/cow_ptr_test.cc
)
cc_test
(
cow_ptr_tests SRCS details/cow_ptr_test.cc
)
# cc_test(channel_test SRCS channel_test.cc)
cc_test
(
tuple_test SRCS tuple_test.cc
)
cc_test
(
tuple_test SRCS tuple_test.cc
)
if
(
NOT WIN32
)
if
(
NOT WIN32
)
cc_test
(
rw_lock_test SRCS rw_lock_test.cc
)
cc_test
(
rw_lock_test SRCS rw_lock_test.cc
)
endif
(
NOT WIN32
)
endif
(
NOT WIN32
)
# disable test temporarily.
# TODO https://github.com/PaddlePaddle/Paddle/issues/11971
# cc_test(concurrency_test SRCS concurrency_test.cc DEPS go_op channel_close_op channel_create_op
# channel_send_op channel_recv_op sum_op select_op elementwise_add_op compare_op
# conditional_block_op while_op assign_op print_op executor proto_desc)
paddle/fluid/framework/channel.h
已删除
100644 → 0
浏览文件 @
c5292b18
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <stddef.h> // for size_t
#include <condition_variable> // NOLINT
#include <typeindex>
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
framework
{
enum
class
ChannelAction
{
SEND
=
0
,
RECEIVE
=
1
,
CLOSE
=
2
,
};
// Channel is the abstract class of buffered and un-buffered channels.
template
<
typename
T
>
class
Channel
{
public:
virtual
bool
CanSend
()
=
0
;
virtual
bool
CanReceive
()
=
0
;
virtual
void
Send
(
T
*
)
=
0
;
virtual
bool
Receive
(
T
*
)
=
0
;
virtual
size_t
Cap
()
=
0
;
virtual
void
Lock
()
=
0
;
virtual
void
Unlock
()
=
0
;
virtual
bool
IsClosed
()
=
0
;
virtual
void
Close
()
=
0
;
virtual
~
Channel
()
{}
virtual
void
AddToSendQ
(
const
void
*
referrer
,
T
*
data
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
,
std
::
function
<
bool
(
ChannelAction
)
>
cb
)
=
0
;
virtual
void
AddToReceiveQ
(
const
void
*
referrer
,
T
*
data
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
,
std
::
function
<
bool
(
ChannelAction
)
>
cb
)
=
0
;
virtual
void
RemoveFromSendQ
(
const
void
*
referrer
)
=
0
;
virtual
void
RemoveFromReceiveQ
(
const
void
*
referrer
)
=
0
;
};
// Forward declaration of channel implementations.
template
<
typename
T
>
class
ChannelImpl
;
template
<
typename
T
>
Channel
<
T
>*
MakeChannel
(
size_t
buffer_size
)
{
return
new
ChannelImpl
<
T
>
(
buffer_size
);
}
template
<
typename
T
>
void
CloseChannel
(
Channel
<
T
>*
ch
)
{
ch
->
Close
();
}
/*
* The ChannelHolder class serves two main purposes:
* 1. It acts as a unified wrapper for the different kinds of
* channels, i.e. Buffered and Unbuffered channels. This is
* similar to the ReaderHolder class.
* 2. It also helps us in TypeHiding. This is similar to the
* PlaceHolder implementations in variable.h and tensor.h.
*/
class
ChannelHolder
{
public:
template
<
typename
T
>
void
Reset
(
size_t
buffer_size
)
{
holder_
.
reset
(
new
PlaceholderImpl
<
T
>
(
buffer_size
));
}
template
<
typename
T
>
void
Send
(
T
*
data
)
{
PADDLE_ENFORCE_EQ
(
IsInitialized
(),
true
,
"The Channel hasn't been initialized"
);
PADDLE_ENFORCE_EQ
(
holder_
->
Type
(),
std
::
type_index
(
typeid
(
T
)),
"Channel type is not same as the type of the data being sent"
);
// Static cast should be safe because we have ensured that types are same
Channel
<
T
>*
channel
=
static_cast
<
Channel
<
T
>*>
(
holder_
->
Ptr
());
PADDLE_ENFORCE_EQ
(
channel
!=
nullptr
,
true
,
"Channel should not be null."
);
channel
->
Send
(
data
);
}
template
<
typename
T
>
bool
Receive
(
T
*
data
)
{
PADDLE_ENFORCE_EQ
(
IsInitialized
(),
true
,
"The Channel hasn't been initialized"
);
PADDLE_ENFORCE_EQ
(
holder_
->
Type
(),
std
::
type_index
(
typeid
(
T
)),
"Channel type is not same as the type of the data being sent"
);
Channel
<
T
>*
channel
=
static_cast
<
Channel
<
T
>*>
(
holder_
->
Ptr
());
PADDLE_ENFORCE_EQ
(
channel
!=
nullptr
,
true
,
"Channel should not be null."
);
return
channel
->
Receive
(
data
);
}
bool
IsClosed
()
{
PADDLE_ENFORCE_EQ
(
IsInitialized
(),
true
,
"The Channel hasn't been initialized"
);
return
holder_
->
IsClosed
();
}
bool
CanSend
()
{
PADDLE_ENFORCE_EQ
(
IsInitialized
(),
true
,
"The Channel hasn't been initialized"
);
return
holder_
->
CanSend
();
}
bool
CanReceive
()
{
PADDLE_ENFORCE_EQ
(
IsInitialized
(),
true
,
"The Channel hasn't been initialized"
);
return
holder_
->
CanReceive
();
}
void
close
()
{
PADDLE_ENFORCE_EQ
(
IsInitialized
(),
true
,
"The Channel hasn't been initialized"
);
holder_
->
Close
();
}
size_t
Cap
()
{
PADDLE_ENFORCE_EQ
(
IsInitialized
(),
true
,
"The Channel hasn't been initialized"
);
return
holder_
->
Cap
();
}
void
Lock
()
{
PADDLE_ENFORCE_EQ
(
IsInitialized
(),
true
,
"The Channel hasn't been initialized"
);
holder_
->
Lock
();
}
void
Unlock
()
{
PADDLE_ENFORCE_EQ
(
IsInitialized
(),
true
,
"The Channel hasn't been initialized"
);
holder_
->
Unlock
();
}
template
<
typename
T
>
void
AddToSendQ
(
const
void
*
referrer
,
T
*
data
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
,
std
::
function
<
bool
(
ChannelAction
)
>
cb
)
{
PADDLE_ENFORCE_EQ
(
IsInitialized
(),
true
,
"The Channel hasn't been initialized"
);
Channel
<
T
>*
channel
=
static_cast
<
Channel
<
T
>*>
(
holder_
->
Ptr
());
if
(
channel
!=
nullptr
)
{
channel
->
AddToSendQ
(
referrer
,
data
,
cond
,
cb
);
}
}
template
<
typename
T
>
void
AddToReceiveQ
(
const
void
*
referrer
,
T
*
data
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
,
std
::
function
<
bool
(
ChannelAction
)
>
cb
)
{
PADDLE_ENFORCE_EQ
(
IsInitialized
(),
true
,
"The Channel hasn't been initialized"
);
Channel
<
T
>*
channel
=
static_cast
<
Channel
<
T
>*>
(
holder_
->
Ptr
());
if
(
channel
!=
nullptr
)
{
channel
->
AddToReceiveQ
(
referrer
,
data
,
cond
,
cb
);
}
}
void
RemoveFromSendQ
(
const
void
*
referrer
)
{
PADDLE_ENFORCE_EQ
(
IsInitialized
(),
true
,
"The Channel hasn't been initialized"
);
holder_
->
RemoveFromSendQ
(
referrer
);
}
void
RemoveFromReceiveQ
(
const
void
*
referrer
)
{
PADDLE_ENFORCE_EQ
(
IsInitialized
(),
true
,
"The Channel hasn't been initialized"
);
holder_
->
RemoveFromReceiveQ
(
referrer
);
}
inline
bool
IsInitialized
()
const
{
return
holder_
!=
nullptr
;
}
inline
const
std
::
type_index
Type
()
{
PADDLE_ENFORCE_EQ
(
IsInitialized
(),
true
,
"The Channel hasn't been initialized"
);
return
holder_
->
Type
();
}
private:
/**
* @note Placeholder hides type T, so it doesn't appear as a template
* parameter of ChannelHolder.
*/
struct
Placeholder
{
virtual
~
Placeholder
()
{}
virtual
const
std
::
type_index
Type
()
const
=
0
;
virtual
void
*
Ptr
()
const
=
0
;
virtual
bool
IsClosed
()
=
0
;
virtual
bool
CanSend
()
=
0
;
virtual
bool
CanReceive
()
=
0
;
virtual
void
RemoveFromSendQ
(
const
void
*
referrer
)
=
0
;
virtual
void
RemoveFromReceiveQ
(
const
void
*
referrer
)
=
0
;
virtual
void
Close
()
=
0
;
virtual
void
Lock
()
=
0
;
virtual
void
Unlock
()
=
0
;
virtual
size_t
Cap
()
=
0
;
};
template
<
typename
T
>
struct
PlaceholderImpl
:
public
Placeholder
{
explicit
PlaceholderImpl
(
size_t
buffer_size
)
:
type_
(
std
::
type_index
(
typeid
(
T
)))
{
channel_
.
reset
(
MakeChannel
<
T
>
(
buffer_size
));
}
virtual
const
std
::
type_index
Type
()
const
{
return
type_
;
}
virtual
void
*
Ptr
()
const
{
return
static_cast
<
void
*>
(
channel_
.
get
());
}
virtual
bool
IsClosed
()
{
if
(
channel_
)
{
return
channel_
->
IsClosed
();
}
return
false
;
}
virtual
bool
CanSend
()
{
if
(
channel_
)
{
return
channel_
->
CanSend
();
}
return
false
;
}
virtual
bool
CanReceive
()
{
if
(
channel_
)
{
return
channel_
->
CanReceive
();
}
return
false
;
}
virtual
void
RemoveFromSendQ
(
const
void
*
referrer
)
{
if
(
channel_
)
{
channel_
->
RemoveFromSendQ
(
referrer
);
}
}
virtual
void
RemoveFromReceiveQ
(
const
void
*
referrer
)
{
if
(
channel_
)
{
channel_
->
RemoveFromReceiveQ
(
referrer
);
}
}
virtual
void
Close
()
{
if
(
channel_
)
channel_
->
Close
();
}
virtual
size_t
Cap
()
{
if
(
channel_
)
return
channel_
->
Cap
();
else
return
-
1
;
}
virtual
void
Lock
()
{
if
(
channel_
)
channel_
->
Lock
();
}
virtual
void
Unlock
()
{
if
(
channel_
)
channel_
->
Unlock
();
}
std
::
unique_ptr
<
Channel
<
T
>>
channel_
;
const
std
::
type_index
type_
;
};
// Pointer to a PlaceholderImpl object
std
::
unique_ptr
<
Placeholder
>
holder_
;
};
}
// namespace framework
}
// namespace paddle
#include "paddle/fluid/framework/channel_impl.h"
paddle/fluid/framework/channel_impl.h
已删除
100644 → 0
浏览文件 @
c5292b18
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <stddef.h> // for size_t
#include <atomic>
#include <condition_variable> // NOLINT
#include <deque>
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
framework
{
template
<
typename
T
>
class
ChannelImpl
:
public
paddle
::
framework
::
Channel
<
T
>
{
friend
Channel
<
T
>
*
paddle
::
framework
::
MakeChannel
<
T
>
(
size_t
);
friend
void
paddle
::
framework
::
CloseChannel
<
T
>
(
Channel
<
T
>
*
);
public:
virtual
bool
CanSend
();
virtual
bool
CanReceive
();
virtual
void
Send
(
T
*
);
virtual
bool
Receive
(
T
*
);
virtual
size_t
Cap
()
{
return
cap_
;
}
virtual
void
Lock
();
virtual
void
Unlock
();
virtual
bool
IsClosed
();
virtual
void
Close
();
explicit
ChannelImpl
(
size_t
);
virtual
~
ChannelImpl
();
virtual
void
AddToSendQ
(
const
void
*
referrer
,
T
*
data
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
,
std
::
function
<
bool
(
ChannelAction
)
>
cb
);
virtual
void
AddToReceiveQ
(
const
void
*
referrer
,
T
*
data
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
,
std
::
function
<
bool
(
ChannelAction
)
>
cb
);
virtual
void
RemoveFromSendQ
(
const
void
*
referrer
);
virtual
void
RemoveFromReceiveQ
(
const
void
*
referrer
);
private:
struct
QueueMessage
{
T
*
data
;
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
;
bool
chan_closed
=
false
;
bool
completed
=
false
;
const
void
*
referrer
;
// TODO(thuan): figure out better way to do this
std
::
function
<
bool
(
ChannelAction
)
>
callback
;
explicit
QueueMessage
(
T
*
item
)
:
data
(
item
),
cond
(
std
::
make_shared
<
std
::
condition_variable_any
>
())
{}
QueueMessage
(
T
*
item
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
)
:
data
(
item
),
cond
(
cond
)
{}
void
Wait
(
std
::
unique_lock
<
std
::
recursive_mutex
>
&
lock
)
{
cond
->
wait
(
lock
,
[
this
]()
{
return
completed
;
});
}
void
Notify
()
{
completed
=
true
;
cond
->
notify_all
();
}
};
void
send_return
()
{
send_ctr
--
;
destructor_cond_
.
notify_all
();
}
bool
recv_return
(
bool
value
)
{
recv_ctr
--
;
destructor_cond_
.
notify_all
();
return
value
;
}
std
::
shared_ptr
<
QueueMessage
>
get_first_message
(
std
::
deque
<
std
::
shared_ptr
<
QueueMessage
>>
*
queue
,
ChannelAction
action
)
{
while
(
!
queue
->
empty
())
{
// Check whether this message was added by Select
// If this was added by Select then execute the callback
// to check if you can execute this message. The callback
// can return false if some other case was executed in Select.
// In that case just discard this QueueMessage and process next.
std
::
shared_ptr
<
QueueMessage
>
m
=
queue
->
front
();
queue
->
pop_front
();
if
(
m
->
callback
==
nullptr
||
m
->
callback
(
action
))
return
m
;
}
return
nullptr
;
}
size_t
cap_
;
std
::
recursive_mutex
mu_
;
bool
closed_
;
std
::
deque
<
T
>
buf_
;
std
::
deque
<
std
::
shared_ptr
<
QueueMessage
>>
recvq
;
std
::
deque
<
std
::
shared_ptr
<
QueueMessage
>>
sendq
;
std
::
atomic
<
unsigned
>
send_ctr
{
0
};
std
::
atomic
<
unsigned
>
recv_ctr
{
0
};
std
::
condition_variable_any
destructor_cond_
;
};
template
<
typename
T
>
ChannelImpl
<
T
>::
ChannelImpl
(
size_t
capacity
)
:
cap_
(
capacity
),
closed_
(
false
),
send_ctr
(
0
),
recv_ctr
(
0
)
{
PADDLE_ENFORCE_GE
(
capacity
,
0
);
}
template
<
typename
T
>
bool
ChannelImpl
<
T
>::
CanSend
()
{
std
::
lock_guard
<
std
::
recursive_mutex
>
lock
{
mu_
};
return
!
closed_
&&
(
!
recvq
.
empty
()
||
buf_
.
size
()
<
cap_
);
}
template
<
typename
T
>
bool
ChannelImpl
<
T
>::
CanReceive
()
{
std
::
lock_guard
<
std
::
recursive_mutex
>
lock
{
mu_
};
return
!
(
closed_
&&
buf_
.
empty
())
&&
(
!
sendq
.
empty
()
||
buf_
.
size
()
>
0
);
}
template
<
typename
T
>
void
ChannelImpl
<
T
>::
Send
(
T
*
item
)
{
send_ctr
++
;
std
::
unique_lock
<
std
::
recursive_mutex
>
lock
{
mu_
};
// If channel is closed, throw exception
if
(
closed_
)
{
send_return
();
lock
.
unlock
();
PADDLE_THROW
(
"Cannot send on closed channel"
);
}
// If there is a receiver, directly pass the value we want
// to send to the receiver, bypassing the channel buffer if any
if
(
!
recvq
.
empty
())
{
std
::
shared_ptr
<
QueueMessage
>
m
=
get_first_message
(
&
recvq
,
ChannelAction
::
SEND
);
if
(
m
!=
nullptr
)
{
*
(
m
->
data
)
=
std
::
move
(
*
item
);
m
->
Notify
();
send_return
();
return
;
}
else
{
Send
(
item
);
send_return
();
return
;
}
}
// Unbuffered channel will always bypass this
// If buffered channel has space in buffer,
// write the element to the buffer.
if
(
buf_
.
size
()
<
cap_
)
{
// Copy to buffer
buf_
.
push_back
(
std
::
move
(
*
item
));
send_return
();
return
;
}
// Block on channel, because some receiver will complete
// the operation for us
auto
m
=
std
::
make_shared
<
QueueMessage
>
(
item
);
sendq
.
push_back
(
m
);
m
->
Wait
(
lock
);
if
(
m
->
chan_closed
)
{
send_return
();
lock
.
unlock
();
PADDLE_THROW
(
"Cannot send on closed channel"
);
}
send_return
();
}
template
<
typename
T
>
bool
ChannelImpl
<
T
>::
Receive
(
T
*
item
)
{
recv_ctr
++
;
std
::
unique_lock
<
std
::
recursive_mutex
>
lock
{
mu_
};
// If channel is closed and buffer is empty or
// channel is unbuffered
if
(
closed_
&&
buf_
.
empty
())
return
recv_return
(
false
);
// If there is a sender, directly receive the value we want
// from the sender. In case of a buffered channel, read from
// buffer and move front of send queue to the buffer
if
(
!
sendq
.
empty
())
{
std
::
shared_ptr
<
QueueMessage
>
m
=
get_first_message
(
&
sendq
,
ChannelAction
::
RECEIVE
);
if
(
buf_
.
size
()
>
0
)
{
// Case 1 : Channel is Buffered
// Do Data transfer from front of buffer
// and add a QueueMessage to the buffer
*
item
=
std
::
move
(
buf_
.
front
());
buf_
.
pop_front
();
// If first message from sendq is not null
// add it to the buffer and notify it
if
(
m
!=
nullptr
)
{
// Copy to buffer
buf_
.
push_back
(
std
::
move
(
*
(
m
->
data
)));
m
->
Notify
();
}
// Ignore if there is no first message
}
else
{
// Case 2: Channel is Unbuffered
// Do data transfer from front of SendQ
// If front is nullptr, then recursively call itself
if
(
m
!=
nullptr
)
{
*
item
=
std
::
move
(
*
(
m
->
data
));
m
->
Notify
();
}
else
{
return
recv_return
(
Receive
(
item
));
}
}
return
recv_return
(
true
);
}
// If this is a buffered channel and there are items in buffer
if
(
buf_
.
size
()
>
0
)
{
// Directly read from buffer
*
item
=
std
::
move
(
buf_
.
front
());
buf_
.
pop_front
();
// return true
return
recv_return
(
true
);
}
// No sender available, block on this channel
// Some receiver will complete the option for us
auto
m
=
std
::
make_shared
<
QueueMessage
>
(
item
);
recvq
.
push_back
(
m
);
m
->
Wait
(
lock
);
return
recv_return
(
!
m
->
chan_closed
);
}
template
<
typename
T
>
void
ChannelImpl
<
T
>::
Lock
()
{
mu_
.
lock
();
}
template
<
typename
T
>
void
ChannelImpl
<
T
>::
Unlock
()
{
mu_
.
unlock
();
}
template
<
typename
T
>
bool
ChannelImpl
<
T
>::
IsClosed
()
{
std
::
lock_guard
<
std
::
recursive_mutex
>
lock
{
mu_
};
return
closed_
;
}
template
<
typename
T
>
void
ChannelImpl
<
T
>::
Close
()
{
std
::
unique_lock
<
std
::
recursive_mutex
>
lock
{
mu_
};
if
(
closed_
)
{
// TODO(abhinavarora): closing an already closed channel should panic
lock
.
unlock
();
return
;
}
closed_
=
true
;
// Empty the readers
while
(
!
recvq
.
empty
())
{
std
::
shared_ptr
<
QueueMessage
>
m
=
recvq
.
front
();
recvq
.
pop_front
();
m
->
chan_closed
=
true
;
// Execute callback function (if any)
if
(
m
->
callback
!=
nullptr
)
{
m
->
callback
(
ChannelAction
::
CLOSE
);
}
m
->
Notify
();
}
// Empty the senders
while
(
!
sendq
.
empty
())
{
std
::
shared_ptr
<
QueueMessage
>
m
=
sendq
.
front
();
sendq
.
pop_front
();
m
->
chan_closed
=
true
;
// Execute callback function (if any)
if
(
m
->
callback
!=
nullptr
)
{
m
->
callback
(
ChannelAction
::
CLOSE
);
}
m
->
Notify
();
}
}
template
<
typename
T
>
void
ChannelImpl
<
T
>::
AddToSendQ
(
const
void
*
referrer
,
T
*
data
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
,
std
::
function
<
bool
(
ChannelAction
)
>
cb
)
{
std
::
lock_guard
<
std
::
recursive_mutex
>
lock
{
mu_
};
auto
m
=
std
::
make_shared
<
QueueMessage
>
(
data
,
cond
);
m
->
referrer
=
referrer
;
m
->
callback
=
cb
;
sendq
.
push_back
(
m
);
}
template
<
typename
T
>
void
ChannelImpl
<
T
>::
AddToReceiveQ
(
const
void
*
referrer
,
T
*
data
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
,
std
::
function
<
bool
(
ChannelAction
)
>
cb
)
{
std
::
lock_guard
<
std
::
recursive_mutex
>
lock
{
mu_
};
auto
m
=
std
::
make_shared
<
QueueMessage
>
(
data
,
cond
);
m
->
referrer
=
referrer
;
m
->
callback
=
cb
;
recvq
.
push_back
(
m
);
}
template
<
typename
T
>
void
ChannelImpl
<
T
>::
RemoveFromSendQ
(
const
void
*
referrer
)
{
std
::
lock_guard
<
std
::
recursive_mutex
>
lock
{
mu_
};
for
(
auto
it
=
sendq
.
begin
();
it
!=
sendq
.
end
();)
{
std
::
shared_ptr
<
QueueMessage
>
sendMsg
=
(
std
::
shared_ptr
<
QueueMessage
>
)
*
it
;
if
(
sendMsg
->
referrer
==
referrer
)
{
it
=
sendq
.
erase
(
it
);
}
else
{
++
it
;
}
}
}
template
<
typename
T
>
void
ChannelImpl
<
T
>::
RemoveFromReceiveQ
(
const
void
*
referrer
)
{
std
::
lock_guard
<
std
::
recursive_mutex
>
lock
{
mu_
};
for
(
auto
it
=
recvq
.
begin
();
it
!=
recvq
.
end
();)
{
std
::
shared_ptr
<
QueueMessage
>
recvMsg
=
(
std
::
shared_ptr
<
QueueMessage
>
)
*
it
;
if
(
recvMsg
->
referrer
==
referrer
)
{
it
=
recvq
.
erase
(
it
);
}
else
{
++
it
;
}
}
}
template
<
typename
T
>
ChannelImpl
<
T
>::~
ChannelImpl
()
{
Close
();
// The destructor must wait for all readers and writers to complete their task
// The channel has been closed, so we will not accept new readers and writers
std
::
unique_lock
<
std
::
recursive_mutex
>
lock
{
mu_
};
destructor_cond_
.
wait
(
lock
,
[
this
]()
{
return
send_ctr
==
0
&&
recv_ctr
==
0
;
});
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/channel_test.cc
已删除
100644 → 0
浏览文件 @
c5292b18
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/channel.h"
#include <chrono> // NOLINT
#include <thread> // NOLINT
#include "gtest/gtest.h"
using
paddle
::
framework
::
Channel
;
using
paddle
::
framework
::
ChannelHolder
;
using
paddle
::
framework
::
MakeChannel
;
using
paddle
::
framework
::
CloseChannel
;
TEST
(
Channel
,
ChannelCapacityTest
)
{
const
size_t
buffer_size
=
10
;
auto
ch
=
MakeChannel
<
size_t
>
(
buffer_size
);
EXPECT_EQ
(
ch
->
Cap
(),
buffer_size
);
CloseChannel
(
ch
);
delete
ch
;
ch
=
MakeChannel
<
size_t
>
(
0
);
EXPECT_EQ
(
ch
->
Cap
(),
0U
);
CloseChannel
(
ch
);
delete
ch
;
}
void
RecevingOrderEqualToSendingOrder
(
Channel
<
int
>
*
ch
,
int
num_items
)
{
unsigned
sum_send
=
0
;
std
::
thread
t
([
&
]()
{
for
(
int
i
=
0
;
i
<
num_items
;
i
++
)
{
ch
->
Send
(
&
i
);
sum_send
+=
i
;
}
});
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
for
(
int
i
=
0
;
i
<
num_items
;
i
++
)
{
int
recv
=
-
1
;
EXPECT_EQ
(
ch
->
Receive
(
&
recv
),
true
);
EXPECT_EQ
(
recv
,
i
);
}
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
CloseChannel
(
ch
);
t
.
join
();
unsigned
expected_sum
=
(
num_items
*
(
num_items
-
1
))
/
2
;
EXPECT_EQ
(
sum_send
,
expected_sum
);
delete
ch
;
}
TEST
(
Channel
,
SufficientBufferSizeDoesntBlock
)
{
const
size_t
buffer_size
=
10
;
auto
ch
=
MakeChannel
<
size_t
>
(
buffer_size
);
for
(
size_t
i
=
0
;
i
<
buffer_size
;
++
i
)
{
ch
->
Send
(
&
i
);
}
size_t
out
;
for
(
size_t
i
=
0
;
i
<
buffer_size
;
++
i
)
{
EXPECT_EQ
(
ch
->
Receive
(
&
out
),
true
);
// should not block
EXPECT_EQ
(
out
,
i
);
}
CloseChannel
(
ch
);
delete
ch
;
}
// This tests that a channel must return false
// on send and receive performed after closing the channel.
// Receive will only return false after close when queue is empty.
// By creating separate threads for sending and receiving, we make this
// function able to test both buffered and unbuffered channels.
void
SendReceiveWithACloseChannelShouldPanic
(
Channel
<
size_t
>
*
ch
)
{
const
size_t
data
=
5
;
std
::
thread
send_thread
{[
&
]()
{
size_t
i
=
data
;
ch
->
Send
(
&
i
);
// should not block
}};
std
::
thread
recv_thread
{[
&
]()
{
size_t
i
;
EXPECT_EQ
(
ch
->
Receive
(
&
i
),
true
);
// should not block
EXPECT_EQ
(
i
,
data
);
}};
send_thread
.
join
();
recv_thread
.
join
();
// After closing send should panic. Receive should
// also false as there is no data in queue.
CloseChannel
(
ch
);
send_thread
=
std
::
thread
{[
&
]()
{
size_t
i
=
data
;
bool
is_exception
=
false
;
try
{
ch
->
Send
(
&
i
);
}
catch
(
paddle
::
platform
::
EnforceNotMet
e
)
{
is_exception
=
true
;
}
EXPECT_EQ
(
is_exception
,
true
);
}};
recv_thread
=
std
::
thread
{[
&
]()
{
size_t
i
;
// should return false because channel is closed and queue is empty
EXPECT_EQ
(
ch
->
Receive
(
&
i
),
false
);
}};
send_thread
.
join
();
recv_thread
.
join
();
}
TEST
(
Channel
,
SendReceiveClosedBufferedChannelPanics
)
{
size_t
buffer_size
=
10
;
auto
ch
=
MakeChannel
<
size_t
>
(
buffer_size
);
SendReceiveWithACloseChannelShouldPanic
(
ch
);
delete
ch
;
}
TEST
(
Channel
,
SendReceiveClosedUnBufferedChannelPanics
)
{
auto
ch
=
MakeChannel
<
size_t
>
(
0
);
SendReceiveWithACloseChannelShouldPanic
(
ch
);
delete
ch
;
}
TEST
(
Channel
,
ReceiveFromBufferedChannelReturnResidualValuesTest
)
{
const
size_t
buffer_size
=
10
;
auto
ch
=
MakeChannel
<
size_t
>
(
buffer_size
);
for
(
size_t
i
=
0
;
i
<
buffer_size
;
++
i
)
{
ch
->
Send
(
&
i
);
// sending should not block
}
size_t
out
;
for
(
size_t
i
=
0
;
i
<
buffer_size
/
2
;
++
i
)
{
EXPECT_EQ
(
ch
->
Receive
(
&
out
),
true
);
// receiving should not block
EXPECT_EQ
(
out
,
i
);
}
CloseChannel
(
ch
);
for
(
size_t
i
=
buffer_size
/
2
;
i
<
buffer_size
;
++
i
)
{
EXPECT_EQ
(
ch
->
Receive
(
&
out
),
true
);
// receving should return residual values.
EXPECT_EQ
(
out
,
i
);
}
for
(
size_t
i
=
0
;
i
<
buffer_size
;
++
i
)
{
EXPECT_EQ
(
ch
->
Receive
(
&
out
),
false
);
// receiving on closed channel should return false
}
delete
ch
;
}
TEST
(
Channel
,
ConcurrentSendNonConcurrentReceiveWithSufficientBufferSize
)
{
const
size_t
buffer_size
=
10
;
auto
ch
=
MakeChannel
<
size_t
>
(
buffer_size
);
std
::
thread
t
([
&
]()
{
// Try to write more than buffer size.
for
(
size_t
i
=
0
;
i
<
2
*
buffer_size
;
++
i
)
{
if
(
i
<
buffer_size
)
{
ch
->
Send
(
&
i
);
// should block after 10 iterations
}
else
{
bool
is_exception
=
false
;
try
{
ch
->
Send
(
&
i
);
}
catch
(
paddle
::
platform
::
EnforceNotMet
e
)
{
is_exception
=
true
;
}
EXPECT_EQ
(
is_exception
,
true
);
}
}
});
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait 0.2 sec
CloseChannel
(
ch
);
t
.
join
();
delete
ch
;
}
TEST
(
Channel
,
RecevingOrderEqualToSendingOrderWithUnBufferedChannel
)
{
auto
ch
=
MakeChannel
<
int
>
(
0
);
RecevingOrderEqualToSendingOrder
(
ch
,
20
);
}
TEST
(
Channel
,
RecevingOrderEqualToSendingOrderWithBufferedChannel1
)
{
// Test that Receive Order is same as Send Order when number of items
// sent is less than size of buffer
auto
ch
=
MakeChannel
<
int
>
(
10
);
RecevingOrderEqualToSendingOrder
(
ch
,
5
);
}
TEST
(
Channel
,
RecevingOrderEqualToSendingOrderWithBufferedChannel2
)
{
// Test that Receive Order is same as Send Order when number of items
// sent is equal to size of buffer
auto
ch
=
MakeChannel
<
int
>
(
10
);
RecevingOrderEqualToSendingOrder
(
ch
,
10
);
}
TEST
(
Channel
,
RecevingOrderEqualToSendingOrderWithBufferedChannel3
)
{
// Test that Receive Order is same as Send Order when number of items
// sent is greater than the size of buffer
auto
ch
=
MakeChannel
<
int
>
(
10
);
RecevingOrderEqualToSendingOrder
(
ch
,
20
);
}
void
ChannelCloseUnblocksReceiversTest
(
Channel
<
int
>
*
ch
)
{
const
size_t
kNumThreads
=
5
;
std
::
thread
t
[
kNumThreads
];
bool
thread_ended
[
kNumThreads
];
// Launches threads that try to read and are blocked because of no writers
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
[
&
](
bool
*
p
)
{
int
data
;
EXPECT_EQ
(
ch
->
Receive
(
&
data
),
false
);
*
p
=
true
;
},
&
thread_ended
[
i
]);
}
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait 0.2 sec
// Verify that all the threads are blocked
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
false
);
}
// Explicitly close the channel
// This should unblock all receivers
CloseChannel
(
ch
);
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait 0.2 sec
// Verify that all threads got unblocked
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
t
[
i
].
join
();
}
void
ChannelCloseUnblocksSendersTest
(
Channel
<
int
>
*
ch
,
bool
isBuffered
)
{
const
size_t
kNumThreads
=
5
;
std
::
thread
t
[
kNumThreads
];
bool
thread_ended
[
kNumThreads
];
bool
send_success
[
kNumThreads
];
// Launches threads that try to write and are blocked because of no readers
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
send_success
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
[
&
](
bool
*
ended
,
bool
*
success
)
{
int
data
=
10
;
bool
is_exception
=
false
;
try
{
ch
->
Send
(
&
data
);
}
catch
(
paddle
::
platform
::
EnforceNotMet
e
)
{
is_exception
=
true
;
}
*
success
=
!
is_exception
;
*
ended
=
true
;
},
&
thread_ended
[
i
],
&
send_success
[
i
]);
}
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
if
(
isBuffered
)
{
// If ch is Buffered, atleast 4 threads must be blocked.
int
ct
=
0
;
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
if
(
!
thread_ended
[
i
])
ct
++
;
}
EXPECT_GE
(
ct
,
4
);
}
else
{
// If ch is UnBuffered, all the threads should be blocked.
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
false
);
}
}
// Explicitly close the thread
// This should unblock all senders
CloseChannel
(
ch
);
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
// Verify that all threads got unblocked
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
if
(
isBuffered
)
{
// Verify that only 1 send was successful
int
ct
=
0
;
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
if
(
send_success
[
i
])
ct
++
;
}
// Only 1 send must be successful
EXPECT_EQ
(
ct
,
1
);
}
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
t
[
i
].
join
();
}
// This tests that closing a buffered channel also unblocks
// any receivers waiting on the channel
TEST
(
Channel
,
BufferedChannelCloseUnblocksReceiversTest
)
{
auto
ch
=
MakeChannel
<
int
>
(
1
);
ChannelCloseUnblocksReceiversTest
(
ch
);
delete
ch
;
}
// This tests that closing a buffered channel also unblocks
// any senders waiting for channel to have write space
TEST
(
Channel
,
BufferedChannelCloseUnblocksSendersTest
)
{
auto
ch
=
MakeChannel
<
int
>
(
1
);
ChannelCloseUnblocksSendersTest
(
ch
,
true
);
delete
ch
;
}
// This tests that closing an unbuffered channel also unblocks
// unblocks any receivers waiting for senders
TEST
(
Channel
,
UnbufferedChannelCloseUnblocksReceiversTest
)
{
auto
ch
=
MakeChannel
<
int
>
(
0
);
ChannelCloseUnblocksReceiversTest
(
ch
);
delete
ch
;
}
// This tests that closing an unbuffered channel also unblocks
// unblocks any senders waiting for senders
TEST
(
Channel
,
UnbufferedChannelCloseUnblocksSendersTest
)
{
auto
ch
=
MakeChannel
<
int
>
(
0
);
ChannelCloseUnblocksSendersTest
(
ch
,
false
);
delete
ch
;
}
TEST
(
Channel
,
UnbufferedLessReceiveMoreSendTest
)
{
auto
ch
=
MakeChannel
<
int
>
(
0
);
unsigned
sum_send
=
0
;
// Send should block after three iterations
// since we only have three receivers.
std
::
thread
t
([
&
]()
{
// Try to send more number of times
// than receivers
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
try
{
ch
->
Send
(
&
i
);
sum_send
+=
i
;
}
catch
(
paddle
::
platform
::
EnforceNotMet
e
)
{
}
}
});
for
(
int
i
=
0
;
i
<
3
;
i
++
)
{
int
recv
;
ch
->
Receive
(
&
recv
);
EXPECT_EQ
(
recv
,
i
);
}
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait 0.2 sec
EXPECT_EQ
(
sum_send
,
3U
);
CloseChannel
(
ch
);
t
.
join
();
delete
ch
;
}
TEST
(
Channel
,
UnbufferedMoreReceiveLessSendTest
)
{
auto
ch
=
MakeChannel
<
int
>
(
0
);
unsigned
sum_send
=
0
;
unsigned
sum_receive
=
0
;
// The receiver should block after 5
// iterations, since there are only 5 senders.
std
::
thread
t
([
&
]()
{
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
int
recv
;
ch
->
Receive
(
&
recv
);
// should block after the fifth iteration.
EXPECT_EQ
(
recv
,
i
);
sum_receive
+=
i
;
}
});
for
(
int
i
=
0
;
i
<
5
;
i
++
)
{
ch
->
Send
(
&
i
);
sum_send
+=
i
;
}
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait 0.2 sec
EXPECT_EQ
(
sum_send
,
10U
);
EXPECT_EQ
(
sum_receive
,
10U
);
// send three more elements
for
(
int
i
=
5
;
i
<
8
;
i
++
)
{
ch
->
Send
(
&
i
);
sum_send
+=
i
;
}
CloseChannel
(
ch
);
t
.
join
();
EXPECT_EQ
(
sum_send
,
28U
);
EXPECT_EQ
(
sum_receive
,
28U
);
delete
ch
;
}
// This tests that destroying a channel unblocks
// any senders waiting for channel to have write space
void
ChannelDestroyUnblockSenders
(
Channel
<
int
>
*
ch
,
bool
isBuffered
)
{
const
size_t
kNumThreads
=
5
;
std
::
thread
t
[
kNumThreads
];
bool
thread_ended
[
kNumThreads
];
bool
send_success
[
kNumThreads
];
// Launches threads that try to write and are blocked because of no readers
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
send_success
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
[
&
](
bool
*
ended
,
bool
*
success
)
{
int
data
=
10
;
bool
is_exception
=
false
;
try
{
ch
->
Send
(
&
data
);
}
catch
(
paddle
::
platform
::
EnforceNotMet
e
)
{
is_exception
=
true
;
}
*
success
=
!
is_exception
;
*
ended
=
true
;
},
&
thread_ended
[
i
],
&
send_success
[
i
]);
}
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait 0.2 sec
if
(
isBuffered
)
{
// If channel is buffered, verify that atleast 4 threads are blocked
int
ct
=
0
;
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
if
(
thread_ended
[
i
]
==
false
)
ct
++
;
}
// Atleast 4 threads must be blocked
EXPECT_GE
(
ct
,
4
);
}
else
{
// Verify that all the threads are blocked
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
false
);
}
}
// Explicitly destroy the channel
delete
ch
;
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
// Verify that all threads got unblocked
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
// Count number of successful sends
int
ct
=
0
;
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
if
(
send_success
[
i
])
ct
++
;
}
if
(
isBuffered
)
{
// Only 1 send must be successful
EXPECT_EQ
(
ct
,
1
);
}
else
{
// In unbuffered channel, no send should be successful
EXPECT_EQ
(
ct
,
0
);
}
// Join all threads
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
t
[
i
].
join
();
}
// This tests that destroying a channel also unblocks
// any receivers waiting on the channel
void
ChannelDestroyUnblockReceivers
(
Channel
<
int
>
*
ch
)
{
const
size_t
kNumThreads
=
5
;
std
::
thread
t
[
kNumThreads
];
bool
thread_ended
[
kNumThreads
];
// Launches threads that try to read and are blocked because of no writers
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
[
&
](
bool
*
p
)
{
int
data
;
// All reads should return false
EXPECT_EQ
(
ch
->
Receive
(
&
data
),
false
);
*
p
=
true
;
},
&
thread_ended
[
i
]);
}
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
100
));
// wait
// Verify that all threads are blocked
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
false
);
}
// delete the channel
delete
ch
;
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
// Verify that all threads got unblocked
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
t
[
i
].
join
();
}
TEST
(
Channel
,
BufferedChannelDestroyUnblocksReceiversTest
)
{
size_t
buffer_size
=
1
;
auto
ch
=
MakeChannel
<
int
>
(
buffer_size
);
ChannelDestroyUnblockReceivers
(
ch
);
}
TEST
(
Channel
,
BufferedChannelDestroyUnblocksSendersTest
)
{
size_t
buffer_size
=
1
;
auto
ch
=
MakeChannel
<
int
>
(
buffer_size
);
ChannelDestroyUnblockSenders
(
ch
,
true
);
}
// This tests that destroying an unbuffered channel also unblocks
// unblocks any receivers waiting for senders
TEST
(
Channel
,
UnbufferedChannelDestroyUnblocksReceiversTest
)
{
auto
ch
=
MakeChannel
<
int
>
(
0
);
ChannelDestroyUnblockReceivers
(
ch
);
}
TEST
(
Channel
,
UnbufferedChannelDestroyUnblocksSendersTest
)
{
auto
ch
=
MakeChannel
<
int
>
(
0
);
ChannelDestroyUnblockSenders
(
ch
,
false
);
}
TEST
(
ChannelHolder
,
ChannelHolderCapacityTest
)
{
const
size_t
buffer_size
=
10
;
ChannelHolder
*
ch
=
new
ChannelHolder
();
ch
->
Reset
<
int
>
(
buffer_size
);
EXPECT_EQ
(
ch
->
Cap
(),
buffer_size
);
delete
ch
;
ch
=
new
ChannelHolder
();
ch
->
Reset
<
int
>
(
0
);
EXPECT_EQ
(
ch
->
Cap
(),
0U
);
delete
ch
;
}
void
ChannelHolderSendReceive
(
ChannelHolder
*
ch
)
{
unsigned
sum_send
=
0
;
std
::
thread
t
([
&
]()
{
for
(
int
i
=
0
;
i
<
5
;
i
++
)
{
ch
->
Send
(
&
i
);
sum_send
+=
i
;
}
});
for
(
int
i
=
0
;
i
<
5
;
i
++
)
{
int
recv
;
EXPECT_EQ
(
ch
->
Receive
(
&
recv
),
true
);
EXPECT_EQ
(
recv
,
i
);
}
ch
->
close
();
t
.
join
();
EXPECT_EQ
(
sum_send
,
10U
);
}
TEST
(
ChannelHolder
,
ChannelHolderBufferedSendReceiveTest
)
{
ChannelHolder
*
ch
=
new
ChannelHolder
();
ch
->
Reset
<
int
>
(
10
);
ChannelHolderSendReceive
(
ch
);
delete
ch
;
}
TEST
(
ChannelHolder
,
ChannelHolderUnBufferedSendReceiveTest
)
{
ChannelHolder
*
ch
=
new
ChannelHolder
();
ch
->
Reset
<
int
>
(
0
);
ChannelHolderSendReceive
(
ch
);
delete
ch
;
}
TEST
(
ChannelHolder
,
ChannelUninitializedTest
)
{
ChannelHolder
*
ch
=
new
ChannelHolder
();
EXPECT_EQ
(
ch
->
IsInitialized
(),
false
);
int
i
=
10
;
bool
send_exception
=
false
;
try
{
ch
->
Send
(
&
i
);
}
catch
(
paddle
::
platform
::
EnforceNotMet
e
)
{
send_exception
=
true
;
}
EXPECT_EQ
(
send_exception
,
true
);
bool
recv_exception
=
false
;
try
{
ch
->
Receive
(
&
i
);
}
catch
(
paddle
::
platform
::
EnforceNotMet
e
)
{
recv_exception
=
true
;
}
EXPECT_EQ
(
recv_exception
,
true
);
bool
is_exception
=
false
;
try
{
ch
->
Type
();
}
catch
(
paddle
::
platform
::
EnforceNotMet
e
)
{
is_exception
=
true
;
}
EXPECT_EQ
(
is_exception
,
true
);
delete
ch
;
}
TEST
(
ChannelHolder
,
ChannelInitializedTest
)
{
ChannelHolder
*
ch
=
new
ChannelHolder
();
ch
->
Reset
<
int
>
(
2
);
EXPECT_EQ
(
ch
->
IsInitialized
(),
true
);
// Channel should remain intialized even after close
ch
->
close
();
EXPECT_EQ
(
ch
->
IsInitialized
(),
true
);
delete
ch
;
}
TEST
(
ChannelHolder
,
TypeMismatchSendTest
)
{
// Test with unbuffered channel
ChannelHolder
*
ch
=
new
ChannelHolder
();
ch
->
Reset
<
int
>
(
0
);
bool
is_exception
=
false
;
bool
boolean_data
=
true
;
try
{
ch
->
Send
(
&
boolean_data
);
}
catch
(
paddle
::
platform
::
EnforceNotMet
e
)
{
is_exception
=
true
;
}
EXPECT_EQ
(
is_exception
,
true
);
delete
ch
;
// Test with Buffered Channel
ch
=
new
ChannelHolder
();
ch
->
Reset
<
float
>
(
10
);
is_exception
=
false
;
int
int_data
=
23
;
try
{
ch
->
Send
(
&
int_data
);
}
catch
(
paddle
::
platform
::
EnforceNotMet
e
)
{
is_exception
=
true
;
}
EXPECT_EQ
(
is_exception
,
true
);
delete
ch
;
}
TEST
(
ChannelHolder
,
TypeMismatchReceiveTest
)
{
// Test with unbuffered channel
ChannelHolder
*
ch
=
new
ChannelHolder
();
ch
->
Reset
<
int
>
(
0
);
bool
is_exception
=
false
;
bool
float_data
;
try
{
ch
->
Receive
(
&
float_data
);
}
catch
(
paddle
::
platform
::
EnforceNotMet
e
)
{
is_exception
=
true
;
}
EXPECT_EQ
(
is_exception
,
true
);
delete
ch
;
// Test with Buffered Channel
ch
=
new
ChannelHolder
();
ch
->
Reset
<
float
>
(
10
);
is_exception
=
false
;
int
int_data
=
23
;
try
{
ch
->
Receive
(
&
int_data
);
}
catch
(
paddle
::
platform
::
EnforceNotMet
e
)
{
is_exception
=
true
;
}
EXPECT_EQ
(
is_exception
,
true
);
delete
ch
;
}
void
ChannelHolderCloseUnblocksReceiversTest
(
ChannelHolder
*
ch
)
{
const
size_t
kNumThreads
=
5
;
std
::
thread
t
[
kNumThreads
];
bool
thread_ended
[
kNumThreads
];
// Launches threads that try to read and are blocked because of no writers
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
[
&
](
bool
*
p
)
{
int
data
;
EXPECT_EQ
(
ch
->
Receive
(
&
data
),
false
);
*
p
=
true
;
},
&
thread_ended
[
i
]);
}
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait 0.2 sec
// Verify that all the threads are blocked
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
false
);
}
// Explicitly close the channel
// This should unblock all receivers
ch
->
close
();
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait 0.2 sec
// Verify that all threads got unblocked
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
t
[
i
].
join
();
}
void
ChannelHolderCloseUnblocksSendersTest
(
ChannelHolder
*
ch
,
bool
isBuffered
)
{
const
size_t
kNumThreads
=
5
;
std
::
thread
t
[
kNumThreads
];
bool
thread_ended
[
kNumThreads
];
bool
send_success
[
kNumThreads
];
// Launches threads that try to write and are blocked because of no readers
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
send_success
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
[
&
](
bool
*
ended
,
bool
*
success
)
{
int
data
=
10
;
bool
is_exception
=
false
;
try
{
ch
->
Send
(
&
data
);
}
catch
(
paddle
::
platform
::
EnforceNotMet
e
)
{
is_exception
=
true
;
}
*
success
=
!
is_exception
;
*
ended
=
true
;
},
&
thread_ended
[
i
],
&
send_success
[
i
]);
}
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
if
(
isBuffered
)
{
// If ch is Buffered, atleast 4 threads must be blocked.
int
ct
=
0
;
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
if
(
!
thread_ended
[
i
])
ct
++
;
}
EXPECT_GE
(
ct
,
4
);
}
else
{
// If ch is UnBuffered, all the threads should be blocked.
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
false
);
}
}
// Explicitly close the thread
// This should unblock all senders
ch
->
close
();
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
// Verify that all threads got unblocked
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
if
(
isBuffered
)
{
// Verify that only 1 send was successful
int
ct
=
0
;
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
if
(
send_success
[
i
])
ct
++
;
}
// Only 1 send must be successful
EXPECT_EQ
(
ct
,
1
);
}
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
t
[
i
].
join
();
}
// This tests that closing a channelholder unblocks
// any receivers waiting on the channel
TEST
(
ChannelHolder
,
ChannelHolderCloseUnblocksReceiversTest
)
{
// Check for buffered channel
ChannelHolder
*
ch
=
new
ChannelHolder
();
ch
->
Reset
<
int
>
(
1
);
ChannelHolderCloseUnblocksReceiversTest
(
ch
);
delete
ch
;
// Check for unbuffered channel
ch
=
new
ChannelHolder
();
ch
->
Reset
<
int
>
(
0
);
ChannelHolderCloseUnblocksReceiversTest
(
ch
);
delete
ch
;
}
// This tests that closing a channelholder unblocks
// any senders waiting for channel to have write space
TEST
(
Channel
,
ChannelHolderCloseUnblocksSendersTest
)
{
// Check for buffered channel
ChannelHolder
*
ch
=
new
ChannelHolder
();
ch
->
Reset
<
int
>
(
1
);
ChannelHolderCloseUnblocksSendersTest
(
ch
,
true
);
delete
ch
;
// Check for unbuffered channel
ch
=
new
ChannelHolder
();
ch
->
Reset
<
int
>
(
0
);
ChannelHolderCloseUnblocksSendersTest
(
ch
,
false
);
delete
ch
;
}
// This tests that destroying a channelholder unblocks
// any senders waiting for channel
void
ChannelHolderDestroyUnblockSenders
(
ChannelHolder
*
ch
,
bool
isBuffered
)
{
const
size_t
kNumThreads
=
5
;
std
::
thread
t
[
kNumThreads
];
bool
thread_ended
[
kNumThreads
];
bool
send_success
[
kNumThreads
];
// Launches threads that try to write and are blocked because of no readers
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
send_success
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
[
&
](
bool
*
ended
,
bool
*
success
)
{
int
data
=
10
;
bool
is_exception
=
false
;
try
{
ch
->
Send
(
&
data
);
}
catch
(
paddle
::
platform
::
EnforceNotMet
e
)
{
is_exception
=
true
;
}
*
success
=
!
is_exception
;
*
ended
=
true
;
},
&
thread_ended
[
i
],
&
send_success
[
i
]);
}
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait 0.2 sec
if
(
isBuffered
)
{
// If channel is buffered, verify that atleast 4 threads are blocked
int
ct
=
0
;
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
if
(
thread_ended
[
i
]
==
false
)
ct
++
;
}
// Atleast 4 threads must be blocked
EXPECT_GE
(
ct
,
4
);
}
else
{
// Verify that all the threads are blocked
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
false
);
}
}
// Explicitly destroy the channel
delete
ch
;
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
// Verify that all threads got unblocked
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
// Count number of successfuld sends
int
ct
=
0
;
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
if
(
send_success
[
i
])
ct
++
;
}
if
(
isBuffered
)
{
// Only 1 send must be successful
EXPECT_EQ
(
ct
,
1
);
}
else
{
// In unbuffered channel, no send should be successful
EXPECT_EQ
(
ct
,
0
);
}
// Join all threads
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
t
[
i
].
join
();
}
// This tests that destroying a channelholder also unblocks
// any receivers waiting on the channel
void
ChannelHolderDestroyUnblockReceivers
(
ChannelHolder
*
ch
)
{
const
size_t
kNumThreads
=
5
;
std
::
thread
t
[
kNumThreads
];
bool
thread_ended
[
kNumThreads
];
// Launches threads that try to read and are blocked because of no writers
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
[
&
](
bool
*
p
)
{
int
data
;
// All reads should return false
EXPECT_EQ
(
ch
->
Receive
(
&
data
),
false
);
*
p
=
true
;
},
&
thread_ended
[
i
]);
}
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
// Verify that all threads are blocked
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
false
);
}
// delete the channel
delete
ch
;
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
// Verify that all threads got unblocked
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
t
[
i
].
join
();
}
TEST
(
ChannelHolder
,
ChannelHolderDestroyUnblocksReceiversTest
)
{
// Check for Buffered Channel
ChannelHolder
*
ch
=
new
ChannelHolder
();
ch
->
Reset
<
int
>
(
1
);
ChannelHolderDestroyUnblockReceivers
(
ch
);
// ch is already deleted already deleted in
// ChannelHolderDestroyUnblockReceivers
// Check for Unbuffered channel
ch
=
new
ChannelHolder
();
ch
->
Reset
<
int
>
(
0
);
ChannelHolderDestroyUnblockReceivers
(
ch
);
}
TEST
(
ChannelHolder
,
ChannelHolderDestroyUnblocksSendersTest
)
{
// Check for Buffered Channel
ChannelHolder
*
ch
=
new
ChannelHolder
();
ch
->
Reset
<
int
>
(
1
);
ChannelHolderDestroyUnblockSenders
(
ch
,
true
);
// ch is already deleted already deleted in
// ChannelHolderDestroyUnblockReceivers
// Check for Unbuffered channel
ch
=
new
ChannelHolder
();
ch
->
Reset
<
int
>
(
0
);
ChannelHolderDestroyUnblockSenders
(
ch
,
false
);
}
// This tests that closing a channelholder many times.
void
ChannelHolderManyTimesClose
(
ChannelHolder
*
ch
)
{
const
int
kNumThreads
=
15
;
std
::
thread
t
[
kNumThreads
];
bool
thread_ended
[
kNumThreads
];
// Launches threads that try to send data to channel.
for
(
size_t
i
=
0
;
i
<
kNumThreads
/
3
;
i
++
)
{
thread_ended
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
[
&
](
bool
*
ended
)
{
int
data
=
10
;
ch
->
Send
(
&
data
);
*
ended
=
true
;
},
&
thread_ended
[
i
]);
}
// Launches threads that try to receive data to channel.
for
(
size_t
i
=
kNumThreads
/
3
;
i
<
2
*
kNumThreads
/
3
;
i
++
)
{
thread_ended
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
[
&
](
bool
*
p
)
{
int
data
;
if
(
ch
->
Receive
(
&
data
))
{
EXPECT_EQ
(
data
,
10
);
}
*
p
=
true
;
},
&
thread_ended
[
i
]);
}
// Launches threads that try to close the channel.
for
(
size_t
i
=
2
*
kNumThreads
/
3
;
i
<
kNumThreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
[
&
](
bool
*
p
)
{
if
(
!
ch
->
IsClosed
())
{
ch
->
close
();
}
*
p
=
true
;
},
&
thread_ended
[
i
]);
}
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
100
));
// wait
// Verify that all threads are unblocked
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
EXPECT_TRUE
(
ch
->
IsClosed
());
// delete the channel
delete
ch
;
for
(
size_t
i
=
0
;
i
<
kNumThreads
;
i
++
)
t
[
i
].
join
();
}
TEST
(
ChannelHolder
,
ChannelHolderManyTimesCloseTest
)
{
// Check for Buffered Channel
ChannelHolder
*
ch
=
new
ChannelHolder
();
ch
->
Reset
<
int
>
(
10
);
ChannelHolderManyTimesClose
(
ch
);
}
paddle/fluid/framework/concurrency_test.cc
已删除
100644 → 0
浏览文件 @
c5292b18
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <thread> // NOLINT
#include "gtest/gtest.h"
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/op_registry.h"
USE_NO_KERNEL_OP
(
go
);
USE_NO_KERNEL_OP
(
channel_close
);
USE_NO_KERNEL_OP
(
channel_create
);
USE_NO_KERNEL_OP
(
channel_recv
);
USE_NO_KERNEL_OP
(
channel_send
);
USE_NO_KERNEL_OP
(
elementwise_add
);
USE_NO_KERNEL_OP
(
select
);
USE_NO_KERNEL_OP
(
conditional_block
);
USE_NO_KERNEL_OP
(
equal
);
USE_NO_KERNEL_OP
(
assign
);
USE_NO_KERNEL_OP
(
while
);
USE_NO_KERNEL_OP
(
print
);
namespace
f
=
paddle
::
framework
;
namespace
p
=
paddle
::
platform
;
namespace
paddle
{
namespace
framework
{
template
<
typename
T
>
LoDTensor
*
CreateVariable
(
Scope
*
scope
,
const
p
::
CPUPlace
&
place
,
std
::
string
name
,
T
value
)
{
// Create LoDTensor<int> of dim [1]
auto
var
=
scope
->
Var
(
name
);
auto
tensor
=
var
->
GetMutable
<
LoDTensor
>
();
tensor
->
Resize
({
1
});
T
*
expect
=
tensor
->
mutable_data
<
T
>
(
place
);
expect
[
0
]
=
value
;
return
tensor
;
}
void
AddOp
(
const
std
::
string
&
type
,
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
AttributeMap
attrs
,
BlockDesc
*
block
)
{
// insert op
auto
op
=
block
->
AppendOp
();
op
->
SetType
(
type
);
for
(
auto
&
kv
:
inputs
)
{
op
->
SetInput
(
kv
.
first
,
kv
.
second
);
}
for
(
auto
&
kv
:
outputs
)
{
op
->
SetOutput
(
kv
.
first
,
kv
.
second
);
}
op
->
SetAttrMap
(
attrs
);
}
void
AddCase
(
ProgramDesc
*
program
,
Scope
*
scope
,
p
::
CPUPlace
*
place
,
BlockDesc
*
casesBlock
,
int
caseId
,
int
caseType
,
std
::
string
caseChannel
,
std
::
string
caseVarName
,
std
::
function
<
void
(
BlockDesc
*
,
Scope
*
)
>
func
)
{
std
::
string
caseCondName
=
std
::
string
(
"caseCond"
)
+
std
::
to_string
(
caseId
);
std
::
string
caseCondXVarName
=
std
::
string
(
"caseCondX"
)
+
std
::
to_string
(
caseId
);
BlockDesc
*
caseBlock
=
program
->
AppendBlock
(
*
casesBlock
);
func
(
caseBlock
,
scope
);
CreateVariable
(
scope
,
*
place
,
caseCondName
,
false
);
CreateVariable
(
scope
,
*
place
,
caseCondXVarName
,
caseId
);
CreateVariable
(
scope
,
*
place
,
caseVarName
,
caseId
);
scope
->
Var
(
"step_scope"
);
AddOp
(
"equal"
,
{{
"X"
,
{
caseCondXVarName
}},
{
"Y"
,
{
"caseToExecute"
}}},
{{
"Out"
,
{
caseCondName
}}},
{},
casesBlock
);
AddOp
(
"conditional_block"
,
{{
"X"
,
{
caseCondName
}},
{
"Params"
,
{}}},
{{
"Out"
,
{}},
{
"Scope"
,
{
"step_scope"
}}},
{{
"sub_block"
,
caseBlock
},
{
"is_scalar_condition"
,
true
}},
casesBlock
);
}
void
AddFibonacciSelect
(
Scope
*
scope
,
p
::
CPUPlace
*
place
,
ProgramDesc
*
program
,
BlockDesc
*
parentBlock
,
std
::
string
dataChanName
,
std
::
string
quitChanName
)
{
BlockDesc
*
whileBlock
=
program
->
AppendBlock
(
*
parentBlock
);
CreateVariable
(
scope
,
*
place
,
"whileExitCond"
,
true
);
CreateVariable
(
scope
,
*
place
,
"caseToExecute"
,
-
1
);
CreateVariable
(
scope
,
*
place
,
"case1var"
,
0
);
CreateVariable
(
scope
,
*
place
,
"xtemp"
,
0
);
// TODO(thuan): Need to create fibXToSend, since channel send moves the actual
// data,
// which causes the data to be no longer accessible to do the fib calculation
// TODO(abhinav): Change channel send to do a copy instead of a move!
CreateVariable
(
scope
,
*
place
,
"fibXToSend"
,
0
);
CreateVariable
(
scope
,
*
place
,
"fibX"
,
0
);
CreateVariable
(
scope
,
*
place
,
"fibY"
,
1
);
CreateVariable
(
scope
,
*
place
,
"quitVar"
,
0
);
BlockDesc
*
casesBlock
=
program
->
AppendBlock
(
*
whileBlock
);
std
::
function
<
void
(
BlockDesc
*
caseBlock
)
>
f
=
[](
BlockDesc
*
caseBlock
)
{};
// TODO(thuan): Remove this once we change channel send to do a copy instead
// of move
AddOp
(
"assign"
,
{{
"X"
,
{
"fibX"
}}},
{{
"Out"
,
{
"fibXToSend"
}}},
{},
whileBlock
);
// Case 0: Send to dataChanName
std
::
function
<
void
(
BlockDesc
*
caseBlock
,
Scope
*
scope
)
>
case0Func
=
[
&
](
BlockDesc
*
caseBlock
,
Scope
*
scope
)
{
AddOp
(
"assign"
,
{{
"X"
,
{
"fibX"
}}},
{{
"Out"
,
{
"xtemp"
}}},
{},
caseBlock
);
AddOp
(
"assign"
,
{{
"X"
,
{
"fibY"
}}},
{{
"Out"
,
{
"fibX"
}}},
{},
caseBlock
);
AddOp
(
"elementwise_add"
,
{{
"X"
,
{
"xtemp"
}},
{
"Y"
,
{
"fibY"
}}},
{{
"Out"
,
{
"fibY"
}}},
{},
caseBlock
);
};
AddCase
(
program
,
scope
,
place
,
casesBlock
,
0
,
1
,
dataChanName
,
"fibXToSend"
,
case0Func
);
std
::
string
case0Config
=
std
::
string
(
"0,1,"
)
+
dataChanName
+
std
::
string
(
",fibXToSend"
);
// Case 1: Receive from quitChanName
std
::
function
<
void
(
BlockDesc
*
caseBlock
,
Scope
*
scope
)
>
case2Func
=
[
&
](
BlockDesc
*
caseBlock
,
Scope
*
scope
)
{
// Exit the while loop after we receive from quit channel.
// We assign a false to "whileExitCond" variable, which will
// break out of while_op loop
CreateVariable
(
scope
,
*
place
,
"whileFalse"
,
false
);
AddOp
(
"assign"
,
{{
"X"
,
{
"whileFalse"
}}},
{{
"Out"
,
{
"whileExitCond"
}}},
{},
caseBlock
);
};
AddCase
(
program
,
scope
,
place
,
casesBlock
,
1
,
2
,
quitChanName
,
"quitVar"
,
case2Func
);
std
::
string
case1Config
=
std
::
string
(
"1,2,"
)
+
quitChanName
+
std
::
string
(
",quitVar"
);
// Select block
AddOp
(
"select"
,
{{
"X"
,
{
dataChanName
,
quitChanName
}},
{
"case_to_execute"
,
{
"caseToExecute"
}}},
{{
"Out"
,
{}}},
{{
"sub_block"
,
casesBlock
},
{
"cases"
,
std
::
vector
<
std
::
string
>
{
case0Config
,
case1Config
}}},
whileBlock
);
scope
->
Var
(
"stepScopes"
);
AddOp
(
"while"
,
{{
"X"
,
{
dataChanName
,
quitChanName
}},
{
"Condition"
,
{
"whileExitCond"
}}},
{{
"Out"
,
{}},
{
"StepScopes"
,
{
"stepScopes"
}}},
{{
"sub_block"
,
whileBlock
}},
parentBlock
);
}
TEST
(
Concurrency
,
Go_Op
)
{
Scope
scope
;
p
::
CPUPlace
place
;
// Initialize scope variables
p
::
CPUDeviceContext
ctx
(
place
);
// Create channel variable
scope
.
Var
(
"Channel"
);
// Create Variables, x0 will be put into channel,
// result will be pulled from channel
CreateVariable
(
&
scope
,
place
,
"Status"
,
false
);
CreateVariable
(
&
scope
,
place
,
"x0"
,
99
);
CreateVariable
(
&
scope
,
place
,
"result"
,
0
);
framework
::
Executor
executor
(
place
);
ProgramDesc
program
;
BlockDesc
*
block
=
program
.
MutableBlock
(
0
);
// Create channel OP
AddOp
(
"channel_create"
,
{},
{{
"Out"
,
{
"Channel"
}}},
{{
"capacity"
,
10
},
{
"data_type"
,
f
::
proto
::
VarType
::
LOD_TENSOR
}},
block
);
// Create Go Op routine
BlockDesc
*
goOpBlock
=
program
.
AppendBlock
(
program
.
Block
(
0
));
AddOp
(
"channel_send"
,
{{
"Channel"
,
{
"Channel"
}},
{
"X"
,
{
"x0"
}}},
{{
"Status"
,
{
"Status"
}}},
{},
goOpBlock
);
// Create Go Op
AddOp
(
"go"
,
{{
"X"
,
{
"Channel"
,
"x0"
}}},
{},
{{
"sub_block"
,
goOpBlock
}},
block
);
// Create Channel Receive Op
AddOp
(
"channel_recv"
,
{{
"Channel"
,
{
"Channel"
}}},
{{
"Status"
,
{
"Status"
}},
{
"Out"
,
{
"result"
}}},
{},
block
);
// Create Channel Close Op
AddOp
(
"channel_close"
,
{{
"Channel"
,
{
"Channel"
}}},
{},
{},
block
);
// Check the result tensor to make sure it is set to 0
const
LoDTensor
&
tensor
=
(
scope
.
FindVar
(
"result"
))
->
Get
<
LoDTensor
>
();
auto
*
initialData
=
tensor
.
data
<
int
>
();
EXPECT_EQ
(
initialData
[
0
],
0
);
executor
.
Run
(
program
,
&
scope
,
0
,
true
,
true
);
// After we call executor.run, the Go operator should do a channel_send to
// set the "result" variable to 99.
auto
*
finalData
=
tensor
.
data
<
int
>
();
EXPECT_EQ
(
finalData
[
0
],
99
);
}
/**
* This test implements the fibonacci function using go_op and select_op
*/
TEST
(
Concurrency
,
Select
)
{
Scope
scope
;
p
::
CPUPlace
place
;
// Initialize scope variables
p
::
CPUDeviceContext
ctx
(
place
);
CreateVariable
(
&
scope
,
place
,
"Status"
,
false
);
CreateVariable
(
&
scope
,
place
,
"result"
,
0
);
CreateVariable
(
&
scope
,
place
,
"currentXFib"
,
0
);
framework
::
Executor
executor
(
place
);
ProgramDesc
program
;
BlockDesc
*
block
=
program
.
MutableBlock
(
0
);
// Create channel OP
std
::
string
dataChanName
=
"Channel"
;
scope
.
Var
(
dataChanName
);
AddOp
(
"channel_create"
,
{},
{{
"Out"
,
{
dataChanName
}}},
{{
"capacity"
,
0
},
{
"data_type"
,
f
::
proto
::
VarType
::
LOD_TENSOR
}},
block
);
std
::
string
quitChanName
=
"Quit"
;
scope
.
Var
(
quitChanName
);
AddOp
(
"channel_create"
,
{},
{{
"Out"
,
{
quitChanName
}}},
{{
"capacity"
,
0
},
{
"data_type"
,
f
::
proto
::
VarType
::
LOD_TENSOR
}},
block
);
// Create Go Op routine, which loops 10 times over fibonacci sequence
CreateVariable
(
&
scope
,
place
,
"xReceiveVar"
,
0
);
BlockDesc
*
goOpBlock
=
program
.
AppendBlock
(
program
.
Block
(
0
));
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
AddOp
(
"channel_recv"
,
{{
"Channel"
,
{
dataChanName
}}},
{{
"Status"
,
{
"Status"
}},
{
"Out"
,
{
"currentXFib"
}}},
{},
goOpBlock
);
AddOp
(
"print"
,
{{
"In"
,
{
"currentXFib"
}}},
{{
"Out"
,
{
"currentXFib"
}}},
{{
"first_n"
,
100
},
{
"summarize"
,
-
1
},
{
"print_tensor_name"
,
false
},
{
"print_tensor_type"
,
true
},
{
"print_tensor_shape"
,
false
},
{
"print_tensor_lod"
,
false
},
{
"print_phase"
,
std
::
string
(
"FORWARD"
)},
{
"message"
,
std
::
string
(
"X: "
)}},
goOpBlock
);
}
CreateVariable
(
&
scope
,
place
,
"quitSignal"
,
0
);
AddOp
(
"channel_send"
,
{{
"Channel"
,
{
quitChanName
}},
{
"X"
,
{
"quitSignal"
}}},
{{
"Status"
,
{
"Status"
}}},
{},
goOpBlock
);
// Create Go Op
AddOp
(
"go"
,
{{
"X"
,
{
dataChanName
,
quitChanName
}}},
{},
{{
"sub_block"
,
goOpBlock
}},
block
);
AddFibonacciSelect
(
&
scope
,
&
place
,
&
program
,
block
,
dataChanName
,
quitChanName
);
// Create Channel Close Op
AddOp
(
"channel_close"
,
{{
"Channel"
,
{
dataChanName
}}},
{},
{},
block
);
AddOp
(
"channel_close"
,
{{
"Channel"
,
{
quitChanName
}}},
{},
{},
block
);
executor
.
Run
(
program
,
&
scope
,
0
,
true
,
true
);
// After we call executor.run, "result" variable should be equal to 34
// (which is 10 loops through fibonacci sequence)
const
LoDTensor
&
tensor
=
(
scope
.
FindVar
(
"currentXFib"
))
->
Get
<
LoDTensor
>
();
auto
*
finalData
=
tensor
.
data
<
int
>
();
EXPECT_EQ
(
finalData
[
0
],
34
);
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/details/CMakeLists.txt
浏览文件 @
91756a5a
...
@@ -54,3 +54,8 @@ cc_library(scope_buffered_ssa_graph_executor SRCS scope_buffered_ssa_graph_execu
...
@@ -54,3 +54,8 @@ cc_library(scope_buffered_ssa_graph_executor SRCS scope_buffered_ssa_graph_execu
# device_context reduce_op_handle )
# device_context reduce_op_handle )
cc_library
(
fast_threaded_ssa_graph_executor SRCS fast_threaded_ssa_graph_executor.cc
cc_library
(
fast_threaded_ssa_graph_executor SRCS fast_threaded_ssa_graph_executor.cc
DEPS fetch_op_handle ssa_graph_executor scope simple_threadpool device_context
)
DEPS fetch_op_handle ssa_graph_executor scope simple_threadpool device_context
)
cc_library
(
build_strategy SRCS build_strategy.cc DEPS
graph_viz_pass multi_devices_graph_pass
multi_devices_graph_print_pass multi_devices_graph_check_pass
fuse_elewise_add_act_pass
)
paddle/fluid/framework/details/build_strategy.cc
0 → 100644
浏览文件 @
91756a5a
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/details/build_strategy.h"
#include "paddle/fluid/framework/details/multi_devices_graph_check_pass.h"
#include "paddle/fluid/framework/details/multi_devices_graph_print_pass.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
namespace
paddle
{
namespace
framework
{
namespace
details
{
class
ParallelExecutorPassBuilder
:
public
ir
::
PassBuilder
{
public:
explicit
ParallelExecutorPassBuilder
(
const
BuildStrategy
&
strategy
)
:
ir
::
PassBuilder
(),
strategy_
(
strategy
)
{
// Add a graph viz pass to record a graph.
if
(
!
strategy_
.
debug_graphviz_path_
.
empty
())
{
auto
viz_pass
=
AppendPass
(
"graph_viz_pass"
);
const
std
::
string
graph_path
=
string
::
Sprintf
(
"%s%s"
,
strategy_
.
debug_graphviz_path_
.
c_str
(),
"_original_graph"
);
viz_pass
->
Set
<
std
::
string
>
(
"graph_viz_path"
,
new
std
::
string
(
graph_path
));
}
// Add op fusion.
if
(
strategy
.
fuse_elewise_add_act_ops_
)
{
auto
fuse_elewise_add_act_pass
=
AppendPass
(
"fuse_elewise_add_act_pass"
);
// Add a graph viz pass to record a graph.
if
(
!
strategy
.
debug_graphviz_path_
.
empty
())
{
auto
viz_pass
=
AppendPass
(
"graph_viz_pass"
);
const
std
::
string
graph_path
=
string
::
Sprintf
(
"%s%s"
,
strategy
.
debug_graphviz_path_
.
c_str
(),
"_fused_graph"
);
viz_pass
->
Set
<
std
::
string
>
(
"graph_viz_path"
,
new
std
::
string
(
graph_path
));
}
}
// Convert graph to run on multi-devices.
auto
multi_devices_pass
=
AppendPass
(
"multi_devices_pass"
);
multi_devices_pass
->
SetNotOwned
<
const
BuildStrategy
>
(
"strategy"
,
&
strategy_
);
// Add a graph print pass to record a graph with device info.
if
(
!
strategy_
.
debug_graphviz_path_
.
empty
())
{
auto
multi_devices_print_pass
=
AppendPass
(
"multi_devices_print_pass"
);
multi_devices_print_pass
->
SetNotOwned
<
const
std
::
string
>
(
"debug_graphviz_path"
,
&
strategy_
.
debug_graphviz_path_
);
multi_devices_print_pass
->
Set
<
details
::
GraphvizSSAGraphPrinter
>
(
"graph_printer"
,
new
details
::
GraphvizSSAGraphPrinter
);
}
// Verify that the graph is correct for multi-device executor.
AppendPass
(
"multi_devices_check_pass"
);
}
private:
BuildStrategy
strategy_
;
};
std
::
shared_ptr
<
ir
::
PassBuilder
>
BuildStrategy
::
CreatePassesFromStrategy
()
const
{
pass_builder_
.
reset
(
new
ParallelExecutorPassBuilder
(
*
this
));
return
pass_builder_
;
}
std
::
unique_ptr
<
ir
::
Graph
>
BuildStrategy
::
Apply
(
const
ProgramDesc
&
main_program
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
string
&
loss_var_name
,
const
std
::
unordered_set
<
std
::
string
>
&
param_names
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
#ifdef PADDLE_WITH_CUDA
const
bool
use_cuda
,
platform
::
NCCLContextMap
*
nccl_ctxs
)
const
{
#else
const
bool
use_cuda
)
const
{
#endif
// Create a default one if not initialized by user.
if
(
!
pass_builder_
)
{
CreatePassesFromStrategy
();
}
std
::
unique_ptr
<
ir
::
Graph
>
graph
(
new
ir
::
Graph
(
main_program
));
for
(
std
::
shared_ptr
<
ir
::
Pass
>
&
pass
:
pass_builder_
->
AllPasses
())
{
if
(
pass
->
Type
()
==
"multi_devices_pass"
)
{
pass
->
Erase
(
"places"
);
pass
->
SetNotOwned
<
const
std
::
vector
<
platform
::
Place
>>
(
"places"
,
&
places
);
pass
->
Erase
(
"loss_var_name"
);
pass
->
SetNotOwned
<
const
std
::
string
>
(
"loss_var_name"
,
&
loss_var_name
);
pass
->
Erase
(
"params"
);
pass
->
SetNotOwned
<
const
std
::
unordered_set
<
std
::
string
>>
(
"params"
,
&
param_names
);
pass
->
Erase
(
"local_scopes"
);
pass
->
SetNotOwned
<
const
std
::
vector
<
Scope
*>>
(
"local_scopes"
,
&
local_scopes
);
#ifdef PADDLE_WITH_CUDA
platform
::
NCCLContextMap
*
nctx
=
use_cuda
?
nccl_ctxs
:
nullptr
;
pass
->
Erase
(
"nccl_ctxs"
);
pass
->
SetNotOwned
<
platform
::
NCCLContextMap
>
(
"nccl_ctxs"
,
nctx
);
#endif
}
graph
=
pass
->
Apply
(
std
::
move
(
graph
));
}
return
graph
;
}
}
// namespace details
}
// namespace framework
}
// namespace paddle
USE_PASS
(
fuse_elewise_add_act_pass
);
USE_PASS
(
graph_viz_pass
);
USE_PASS
(
multi_devices_pass
);
USE_PASS
(
multi_devices_check_pass
);
USE_PASS
(
multi_devices_print_pass
);
paddle/fluid/framework/details/build_strategy.h
浏览文件 @
91756a5a
...
@@ -15,6 +15,17 @@
...
@@ -15,6 +15,17 @@
#pragma once
#pragma once
#include <string>
#include <string>
#include <vector>
#include "paddle/fluid/framework/ir/pass_builder.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/nccl_helper.h"
#endif
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
@@ -57,6 +68,30 @@ struct BuildStrategy {
...
@@ -57,6 +68,30 @@ struct BuildStrategy {
bool
fuse_elewise_add_act_ops_
{
false
};
bool
fuse_elewise_add_act_ops_
{
false
};
bool
enable_data_balance_
{
false
};
bool
enable_data_balance_
{
false
};
// User normally doesn't need to call this API.
// The PassBuilder allows for more customized insert, remove of passes
// from python side.
// A new PassBuilder is created based on configs defined above and
// passes are owned by the PassBuilder.
std
::
shared_ptr
<
ir
::
PassBuilder
>
CreatePassesFromStrategy
()
const
;
// Apply the passes built by the pass_builder_. The passes will be
// applied to the Program and output an ir::Graph.
std
::
unique_ptr
<
ir
::
Graph
>
Apply
(
const
ProgramDesc
&
main_program
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
string
&
loss_var_name
,
const
std
::
unordered_set
<
std
::
string
>
&
param_names
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
#ifdef PADDLE_WITH_CUDA
const
bool
use_cuda
,
platform
::
NCCLContextMap
*
nccl_ctxs
)
const
;
#else
const
bool
use_cuda
)
const
;
#endif
private:
mutable
std
::
shared_ptr
<
ir
::
PassBuilder
>
pass_builder_
;
};
};
}
// namespace details
}
// namespace details
...
...
paddle/fluid/framework/details/reference_count_pass.cc
浏览文件 @
91756a5a
...
@@ -80,15 +80,15 @@ std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl(
...
@@ -80,15 +80,15 @@ std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl(
// This is weird but there is really some variables without var_desc
// This is weird but there is really some variables without var_desc
// in computation_op
// in computation_op
if
(
var_desc
==
nullptr
)
{
if
(
var_desc
==
nullptr
)
{
if
(
compute_op
->
Node
()
->
Op
()
->
Block
()
->
FindVar
(
var_name
)
==
nullptr
)
var_desc
=
compute_op
->
Node
()
->
Op
()
->
Block
()
->
FindVar
(
var_name
);
continue
;
if
(
var_desc
==
nullptr
)
continue
;
}
else
{
}
if
(
var_desc
->
Persistable
())
continue
;
auto
var_type
=
var_desc
->
Proto
()
->
type
().
type
()
;
if
(
var_desc
->
Persistable
())
continue
;
if
(
var_type
!=
proto
::
VarType
::
LOD_TENSOR
&&
auto
var_type
=
var_desc
->
Proto
()
->
type
().
type
();
var_type
!=
proto
::
VarType
::
SELECTED_ROWS
)
{
if
(
var_type
!=
proto
::
VarType
::
LOD_TENSOR
&&
continue
;
var_type
!=
proto
::
VarType
::
SELECTED_ROWS
)
{
}
continue
;
}
}
// compute op only runs in one device
// compute op only runs in one device
...
...
paddle/fluid/framework/executor.cc
浏览文件 @
91756a5a
...
@@ -14,7 +14,6 @@ limitations under the License. */
...
@@ -14,7 +14,6 @@ limitations under the License. */
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
...
@@ -76,15 +75,13 @@ void InitializeVariable(Variable* var, proto::VarType::Type var_type) {
...
@@ -76,15 +75,13 @@ void InitializeVariable(Variable* var, proto::VarType::Type var_type) {
var
->
GetMutable
<
platform
::
PlaceList
>
();
var
->
GetMutable
<
platform
::
PlaceList
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
READER
)
{
}
else
if
(
var_type
==
proto
::
VarType
::
READER
)
{
var
->
GetMutable
<
ReaderHolder
>
();
var
->
GetMutable
<
ReaderHolder
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
CHANNEL
)
{
var
->
GetMutable
<
ChannelHolder
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
RAW
)
{
}
else
if
(
var_type
==
proto
::
VarType
::
RAW
)
{
// GetMutable will be called in operator
// GetMutable will be called in operator
}
else
{
}
else
{
PADDLE_THROW
(
PADDLE_THROW
(
"Variable type %d is not in "
"Variable type %d is not in "
"[LOD_TENSOR, SELECTED_ROWS, FEED_MINIBATCH, FETCH_LIST, "
"[LOD_TENSOR, SELECTED_ROWS, FEED_MINIBATCH, FETCH_LIST, "
"LOD_RANK_TABLE, PLACE_LIST, READER,
CHANNEL,
RAW]"
,
"LOD_RANK_TABLE, PLACE_LIST, READER, RAW]"
,
var_type
);
var_type
);
}
}
}
}
...
...
paddle/fluid/framework/framework.proto
浏览文件 @
91756a5a
...
@@ -126,7 +126,6 @@ message VarType {
...
@@ -126,7 +126,6 @@ message VarType {
LOD_TENSOR_ARRAY
=
13
;
LOD_TENSOR_ARRAY
=
13
;
PLACE_LIST
=
14
;
PLACE_LIST
=
14
;
READER
=
15
;
READER
=
15
;
CHANNEL
=
16
;
// Any runtime decided variable type is raw
// Any runtime decided variable type is raw
// raw variables should manage their own allocations
// raw variables should manage their own allocations
// in operators like nccl_op
// in operators like nccl_op
...
@@ -158,12 +157,6 @@ message VarType {
...
@@ -158,12 +157,6 @@ message VarType {
message
ReaderDesc
{
repeated
LoDTensorDesc
lod_tensor
=
1
;
}
message
ReaderDesc
{
repeated
LoDTensorDesc
lod_tensor
=
1
;
}
optional
ReaderDesc
reader
=
5
;
optional
ReaderDesc
reader
=
5
;
message
ChannelDesc
{
required
Type
data_type
=
1
;
required
int64
capacity
=
2
;
}
optional
ChannelDesc
channel
=
6
;
message
Tuple
{
repeated
Type
element_type
=
1
;
}
message
Tuple
{
repeated
Type
element_type
=
1
;
}
optional
Tuple
tuple
=
7
;
optional
Tuple
tuple
=
7
;
}
}
...
...
paddle/fluid/framework/ir/CMakeLists.txt
浏览文件 @
91756a5a
set
(
pass_file
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/api/paddle_inference_pass.h
)
set
(
pass_file
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/api/paddle_inference_pass.h
)
file
(
WRITE
${
pass_file
}
"// Generated by the paddle/fluid/framework/ir/CMakeLists.txt. DO NOT EDIT!
\n\n
"
)
file
(
WRITE
${
pass_file
}
"// Generated by the paddle/fluid/framework/ir/CMakeLists.txt. DO NOT EDIT!
\n\n
"
)
file
(
APPEND
${
pass_file
}
"
\#
pragma once
\n
"
)
file
(
APPEND
${
pass_file
}
"
\#
include
\"
paddle/fluid/framework/ir/pass.h
\"\n
"
)
file
(
APPEND
${
pass_file
}
"
\#
include
\"
paddle/fluid/framework/ir/pass.h
\"\n
"
)
...
@@ -28,12 +29,13 @@ cc_library(graph_pattern_detector SRCS graph_pattern_detector.cc DEPS graph grap
...
@@ -28,12 +29,13 @@ cc_library(graph_pattern_detector SRCS graph_pattern_detector.cc DEPS graph grap
pass_library
(
graph_to_program_pass base
)
pass_library
(
graph_to_program_pass base
)
pass_library
(
graph_viz_pass base
)
pass_library
(
graph_viz_pass base
)
pass_library
(
fc_fuse_pass inference
)
pass_library
(
fc_fuse_pass inference
)
if
(
WITH_MKLDNN
)
if
(
WITH_MKLDNN
)
pass_library
(
conv_relu_mkldnn_fuse_pass inference
)
pass_library
(
conv_relu_mkldnn_fuse_pass inference
)
endif
()
endif
()
pass_library
(
attention_lstm_fuse_pass inference
)
pass_library
(
attention_lstm_fuse_pass inference
)
pass_library
(
infer_clean_graph_pass inference
)
pass_library
(
infer_clean_graph_pass inference
)
pass_library
(
fc_lstm_fuse_pass inference
)
pass_library
(
fc_lstm_fuse_pass inference
)
pass_library
(
embedding_fc_lstm_fuse_pass inference
)
pass_library
(
fc_gru_fuse_pass inference
)
pass_library
(
fc_gru_fuse_pass inference
)
pass_library
(
seq_concat_fc_fuse_pass inference
)
pass_library
(
seq_concat_fc_fuse_pass inference
)
...
@@ -41,12 +43,14 @@ cc_library(fuse_elewise_add_act_pass SRCS fuse_elewise_add_act_pass.cc DEPS pass
...
@@ -41,12 +43,14 @@ cc_library(fuse_elewise_add_act_pass SRCS fuse_elewise_add_act_pass.cc DEPS pass
set
(
GLOB_PASS_LIB
${
PASS_LIBRARY
}
CACHE INTERNAL
"Global PASS library"
)
set
(
GLOB_PASS_LIB
${
PASS_LIBRARY
}
CACHE INTERNAL
"Global PASS library"
)
cc_library
(
pass_builder SRCS pass_builder.cc DEPS pass
)
cc_test
(
pass_test SRCS pass_test.cc DEPS graph pass graph_helper
)
cc_test
(
pass_test SRCS pass_test.cc DEPS graph pass graph_helper
)
cc_test
(
graph_test SRCS graph_test.cc DEPS graph graph_helper op_registry
)
cc_test
(
graph_test SRCS graph_test.cc DEPS graph graph_helper op_registry
)
cc_test
(
graph_helper_test SRCS graph_helper_test.cc DEPS graph graph_helper op_registry
)
cc_test
(
graph_helper_test SRCS graph_helper_test.cc DEPS graph graph_helper op_registry
)
cc_test
(
graph_to_program_pass_test SRCS graph_to_program_pass_test.cc DEPS graph_to_program_pass
)
cc_test
(
graph_to_program_pass_test SRCS graph_to_program_pass_test.cc DEPS graph_to_program_pass
)
cc_test
(
test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS graph_pattern_detector
)
cc_test
(
test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS graph_pattern_detector
)
cc_test
(
test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass framework_proto
)
cc_test
(
test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass framework_proto
)
if
(
WITH_MKLDNN
)
if
(
WITH_MKLDNN
)
cc_test
(
test_conv_relu_mkldnn_fuse_pass SRCS conv_relu_mkldnn_fuse_pass_tester.cc DEPS conv_relu_mkldnn_fuse_pass
)
cc_test
(
test_conv_relu_mkldnn_fuse_pass SRCS conv_relu_mkldnn_fuse_pass_tester.cc DEPS conv_relu_mkldnn_fuse_pass
)
endif
()
endif
()
paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc
0 → 100644
浏览文件 @
91756a5a
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.h"
#include <algorithm>
#include <string>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/cpu_vec.h"
#include "paddle/fluid/operators/math/fc_compute.h"
#include "paddle/fluid/platform/cpu_info.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
static
int
BuildFusion
(
Graph
*
graph
,
const
std
::
string
&
name_scope
,
Scope
*
scope
,
bool
with_fc_bias
)
{
GraphPatternDetector
gpd
;
auto
*
pattern
=
gpd
.
mutable_pattern
();
// Build pattern
PDNode
*
x
=
pattern
->
NewNode
(
patterns
::
PDNodeName
(
name_scope
,
"x"
))
->
assert_is_op_input
(
"lookup_table"
)
->
assert_var_not_persistable
();
patterns
::
Embedding
embedding_pattern
(
pattern
,
name_scope
);
// TODO(jczaja): Intermediate can only be for val that are not used anywhere
// but lookup table output may go into other LSTM (for reverse
// direction)
auto
*
embedding_out
=
embedding_pattern
(
x
);
patterns
::
FC
fc_pattern
(
pattern
,
name_scope
);
// fc_out is a tmp var, will be removed after fuse, so marked as intermediate.
auto
*
fc_out
=
fc_pattern
(
embedding_out
,
with_fc_bias
)
->
AsIntermediate
();
patterns
::
LSTM
lstm_pattern
(
pattern
,
name_scope
);
lstm_pattern
(
fc_out
);
// Create New OpDesc
auto
embedding_lstm_creator
=
[
&
](
Node
*
embedding
,
Node
*
W
,
Node
*
lstm
,
Node
*
input
,
Node
*
weight_x
,
Node
*
weight_h
,
Node
*
bias
,
Node
*
hidden
,
Node
*
cell
,
Node
*
xx
,
Node
*
fc_bias
)
{
OpDesc
op_desc
;
op_desc
.
SetType
(
"fused_embedding_fc_lstm"
);
#define SET_IN(Key, node__) op_desc.SetInput(#Key, {node__->Name()});
SET_IN
(
Ids
,
input
);
SET_IN
(
WeightH
,
weight_h
);
// Neet to have this passed as We need Wc data for peephole connections
SET_IN
(
Bias
,
bias
);
#undef SET_IN
// Multiply embeddings with Weights
PADDLE_ENFORCE
(
scope
);
const
std
::
string
&
embeddings
=
patterns
::
UniqueKey
(
"Embeddings"
);
auto
*
embeddings_var
=
scope
->
Var
(
embeddings
);
PADDLE_ENFORCE
(
embeddings_var
);
auto
*
embeddings_tensor
=
embeddings_var
->
GetMutable
<
framework
::
LoDTensor
>
();
// Get WeightX size: [single_embedding, fc_size]
// and embedding size: [dict_size, single_embedding]
// and create new size of embeddings eg. [dict_size , hidden_size]
auto
*
embedding_var
=
scope
->
FindVar
(
W
->
Name
());
PADDLE_ENFORCE
(
embedding_var
);
const
auto
&
embedding_tensor
=
embedding_var
->
Get
<
framework
::
LoDTensor
>
();
const
auto
&
weightx_tensor
=
scope
->
FindVar
(
weight_x
->
Name
())
->
Get
<
framework
::
LoDTensor
>
();
embeddings_tensor
->
Resize
(
{
embedding_tensor
.
dims
()[
0
],
weightx_tensor
.
dims
()[
1
]});
// Multiplie embeddings via WeightsX and add bias
auto
embedding_data
=
embedding_tensor
.
data
<
float
>
();
auto
weightx_data
=
weightx_tensor
.
data
<
float
>
();
auto
embeddings_data
=
embeddings_tensor
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
// Adding biases to GEMM result to be
auto
*
lstm_bias_var
=
scope
->
FindVar
(
bias
->
Name
());
PADDLE_ENFORCE
(
lstm_bias_var
);
const
auto
&
lstm_bias_tensor
=
lstm_bias_var
->
Get
<
framework
::
LoDTensor
>
();
auto
alpha
=
1.0
f
;
auto
beta
=
1.0
f
;
int
m
=
embedding_tensor
.
dims
()[
0
];
int
n
=
weightx_tensor
.
dims
()[
1
];
int
k
=
embedding_tensor
.
dims
()[
1
];
// Copy only gate biases values (only actual bias data, not peephole
// weights)
std
::
vector
<
float
>
combined_biases
;
combined_biases
.
reserve
(
n
);
std
::
copy_n
(
lstm_bias_tensor
.
data
<
float
>
(),
n
,
std
::
back_inserter
(
combined_biases
));
if
(
with_fc_bias
)
{
// Add FC-bias with LSTM-bias (into GEMM result to be)
auto
*
fc_bias_var
=
scope
->
FindVar
(
fc_bias
->
Name
());
const
auto
&
fc_bias_tensor
=
fc_bias_var
->
Get
<
framework
::
LoDTensor
>
();
for
(
int
i
=
0
;
i
<
fc_bias_tensor
.
numel
();
i
++
)
{
combined_biases
[
i
]
+=
fc_bias_tensor
.
data
<
float
>
()[
i
];
}
}
// broadcast biases
std
::
vector
<
float
>
ones
(
m
,
1.0
f
);
paddle
::
operators
::
math
::
CBlas
<
float
>::
GEMM
(
CblasRowMajor
,
CblasNoTrans
,
CblasNoTrans
,
m
,
n
,
1
,
alpha
,
&
ones
[
0
],
1
,
&
combined_biases
[
0
],
n
,
0.0
f
,
embeddings_data
,
n
);
// Wx*embeddings + biases
paddle
::
operators
::
math
::
CBlas
<
float
>::
GEMM
(
CblasRowMajor
,
CblasNoTrans
,
CblasNoTrans
,
m
,
n
,
k
,
alpha
,
embedding_data
,
k
,
weightx_data
,
n
,
beta
,
embeddings_data
,
n
);
op_desc
.
SetInput
(
"Embeddings"
,
{
embeddings
});
// Create temp variables.
const
std
::
string
BatchedInput
=
patterns
::
UniqueKey
(
"BatchedInput"
);
const
std
::
string
BatchedCellPreAct
=
patterns
::
UniqueKey
(
"BatchedCellPreAct"
);
const
std
::
string
BatchedGate
=
patterns
::
UniqueKey
(
"BatchedGate"
);
scope
->
Var
(
BatchedInput
)
->
GetMutable
<
framework
::
LoDTensor
>
();
scope
->
Var
(
BatchedCellPreAct
)
->
GetMutable
<
framework
::
LoDTensor
>
();
scope
->
Var
(
BatchedGate
)
->
GetMutable
<
framework
::
LoDTensor
>
();
op_desc
.
SetInput
(
"H0"
,
{});
op_desc
.
SetInput
(
"C0"
,
{});
op_desc
.
SetOutput
(
"Hidden"
,
{
hidden
->
Name
()});
op_desc
.
SetOutput
(
"Cell"
,
{
cell
->
Name
()});
op_desc
.
SetOutput
(
"XX"
,
{
xx
->
Name
()});
op_desc
.
SetOutput
(
"BatchedGate"
,
{
BatchedGate
});
op_desc
.
SetOutput
(
"BatchCellPreAct"
,
{
BatchedCellPreAct
});
op_desc
.
SetOutput
(
"BatchedInput"
,
{
BatchedInput
});
op_desc
.
SetAttr
(
"is_reverse"
,
lstm
->
Op
()
->
GetAttr
(
"is_reverse"
));
op_desc
.
SetAttr
(
"use_peepholes"
,
lstm
->
Op
()
->
GetAttr
(
"use_peepholes"
));
// TODO(TJ): get from attr
op_desc
.
SetAttr
(
"use_seq"
,
true
);
PADDLE_ENFORCE
(
graph
->
Has
(
kParamScopeAttr
));
auto
*
scope
=
graph
->
Get
<
Scope
*>
(
kParamScopeAttr
);
#define OP_SET_OUT(x) \
const std::string x = patterns::UniqueKey(#x); \
op_desc.SetOutput(#x, {x}); \
scope->Var(x)->GetMutable<LoDTensor>()
OP_SET_OUT
(
BatchedCell
);
OP_SET_OUT
(
BatchedHidden
);
OP_SET_OUT
(
ReorderedH0
);
OP_SET_OUT
(
ReorderedC0
);
#undef OP_SET_OUT
auto
*
op
=
graph
->
CreateOpNode
(
&
op_desc
);
IR_NODE_LINK_TO
(
input
,
op
);
IR_NODE_LINK_TO
(
weight_x
,
op
);
IR_NODE_LINK_TO
(
weight_h
,
op
);
IR_NODE_LINK_TO
(
bias
,
op
);
IR_NODE_LINK_TO
(
op
,
hidden
);
return
op
;
};
int
fusion_count
{
0
};
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
GET_IR_NODE_FROM_SUBGRAPH
(
lstm
,
lstm
,
lstm_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
Weight
,
Weight
,
lstm_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
Bias
,
Bias
,
lstm_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
Cell
,
Cell
,
lstm_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
Hidden
,
Hidden
,
lstm_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
lookup_table
,
lookup_table
,
embedding_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
W
,
W
,
embedding_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
w
,
w
,
fc_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
mul
,
mul
,
fc_pattern
);
// TODO(jczaja): Add support for is_sparse / is_distributed
auto
is_sparse
=
boost
::
get
<
bool
>
(
lookup_table
->
Op
()
->
GetAttr
(
"is_sparse"
));
auto
is_distributed
=
boost
::
get
<
bool
>
(
lookup_table
->
Op
()
->
GetAttr
(
"is_distributed"
));
if
(
is_sparse
==
true
||
is_distributed
==
true
)
{
return
;
}
if
(
with_fc_bias
)
{
GET_IR_NODE_FROM_SUBGRAPH
(
fc_out
,
Out
,
fc_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
fc_bias
,
bias
,
fc_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
elementwise_add
,
elementwise_add
,
fc_pattern
);
embedding_lstm_creator
(
lookup_table
,
W
,
lstm
,
subgraph
.
at
(
x
),
w
,
Weight
,
Bias
,
Hidden
,
Cell
,
fc_out
,
fc_bias
);
// Remove unneeded nodes.
// TODO(jczaja): Proper removing of lookup table
std
::
unordered_set
<
const
Node
*>
marked_nodes
(
//{lookup_table, mul, lstm, elementwise_add, fc_bias, W});
{
mul
,
lstm
,
elementwise_add
,
fc_bias
});
GraphSafeRemoveNodes
(
graph
,
marked_nodes
);
}
else
{
GET_IR_NODE_FROM_SUBGRAPH
(
fc_out
,
mul_out
,
fc_pattern
);
embedding_lstm_creator
(
lookup_table
,
W
,
lstm
,
subgraph
.
at
(
x
),
w
,
Weight
,
Bias
,
Hidden
,
Cell
,
fc_out
,
nullptr
);
// Remove unneeded nodes.
// TODO(jczaja): Proper removing of lookup table
// std::unordered_set<const Node*> marked_nodes({lookup_table, W, mul,
// lstm});
std
::
unordered_set
<
const
Node
*>
marked_nodes
({
mul
,
lstm
});
GraphSafeRemoveNodes
(
graph
,
marked_nodes
);
}
++
fusion_count
;
};
gpd
(
graph
,
handler
);
return
fusion_count
;
}
std
::
unique_ptr
<
ir
::
Graph
>
EmbeddingFCLSTMFusePass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
FusePassBase
::
Init
(
name_scope_
,
graph
.
get
());
int
fusion_count
=
BuildFusion
(
graph
.
get
(),
name_scope_
,
param_scope
(),
true
/*with_fc_bias*/
);
AddStatis
(
fusion_count
);
return
graph
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
embedding_fc_lstm_fuse_pass
,
paddle
::
framework
::
ir
::
EmbeddingFCLSTMFusePass
);
paddle/fluid/
inference/api/timer
.h
→
paddle/fluid/
framework/ir/embedding_fc_lstm_fuse_pass
.h
浏览文件 @
91756a5a
...
@@ -11,29 +11,30 @@
...
@@ -11,29 +11,30 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#pragma once
#pragma once
#include <chrono> // NOLINT
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
framework
{
namespace
ir
{
// Fusing of Embedding , FC and LSTM op
//
Timer for timer
//
Just FC without bias
class
Timer
{
class
EmbeddingFCLSTMFusePass
:
public
FusePassBase
{
public:
public:
std
::
chrono
::
high_resolution_clock
::
time_point
start
;
virtual
~
EmbeddingFCLSTMFusePass
()
{}
std
::
chrono
::
high_resolution_clock
::
time_point
startu
;
protected:
void
tic
()
{
start
=
std
::
chrono
::
high_resolution_clock
::
now
();
}
std
::
unique_ptr
<
ir
::
Graph
>
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
;
double
toc
()
{
startu
=
std
::
chrono
::
high_resolution_clock
::
now
();
const
std
::
string
name_scope_
{
"embedding_fc_lstm_fuse"
};
std
::
chrono
::
duration
<
double
>
time_span
=
std
::
chrono
::
duration_cast
<
std
::
chrono
::
duration
<
double
>>
(
startu
-
start
);
double
used_time_ms
=
static_cast
<
double
>
(
time_span
.
count
())
*
1000.0
;
return
used_time_ms
;
}
};
};
}
// namespace inference
}
// namespace ir
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/fluid/framework/ir/graph_helper.cc
浏览文件 @
91756a5a
...
@@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/framework/ir/graph_helper.h"
#include <algorithm>
#include <algorithm>
#include <deque>
#include <unordered_set>
#include <unordered_set>
#include "paddle/fluid/framework/ir/graph_helper.h"
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
namespace
ir
{
namespace
ir
{
...
@@ -113,6 +113,74 @@ std::map<ir::Node *, std::unordered_set<ir::Node *>> BuildOperationAdjList(
...
@@ -113,6 +113,74 @@ std::map<ir::Node *, std::unordered_set<ir::Node *>> BuildOperationAdjList(
return
adj_list
;
return
adj_list
;
}
}
size_t
GraphNum
(
const
Graph
&
graph
)
{
std
::
unordered_set
<
ir
::
Node
*>
nodes
=
graph
.
Nodes
();
std
::
unordered_set
<
ir
::
Node
*>
visited_nodes
;
visited_nodes
.
reserve
(
nodes
.
size
());
std
::
deque
<
ir
::
Node
*>
q_nodes
;
std
::
vector
<
std
::
unordered_set
<
ir
::
Node
*>>
graph_nodes
;
std
::
unordered_set
<
ir
::
Node
*>
g_nodes
;
size_t
graph_count
=
0
;
auto
traverse_nodes
=
[
&
visited_nodes
,
&
q_nodes
](
const
std
::
vector
<
ir
::
Node
*>
&
nodes
)
{
std
::
copy_if
(
nodes
.
begin
(),
nodes
.
end
(),
std
::
back_inserter
(
q_nodes
),
[
&
visited_nodes
](
Node
*
node
)
{
return
!
visited_nodes
.
count
(
node
);
});
};
while
(
visited_nodes
.
size
()
!=
nodes
.
size
())
{
if
(
!
q_nodes
.
empty
())
{
auto
cur_node
=
q_nodes
.
front
();
q_nodes
.
pop_front
();
visited_nodes
.
insert
(
cur_node
);
g_nodes
.
insert
(
cur_node
);
traverse_nodes
(
cur_node
->
inputs
);
traverse_nodes
(
cur_node
->
outputs
);
}
else
{
++
graph_count
;
if
(
g_nodes
.
size
())
{
graph_nodes
.
emplace_back
(
g_nodes
);
}
g_nodes
.
clear
();
for
(
auto
&
n
:
nodes
)
{
if
(
visited_nodes
.
count
(
n
)
==
0
)
{
q_nodes
.
push_back
(
n
);
break
;
}
}
}
}
if
(
g_nodes
.
size
())
{
graph_nodes
.
emplace_back
(
g_nodes
);
}
if
(
VLOG_IS_ON
(
10
))
{
VLOG
(
10
)
<<
"graph_num: "
<<
graph_nodes
.
size
();
for
(
auto
&
g_n
:
graph_nodes
)
{
VLOG
(
10
)
<<
"graph_nodes: "
<<
g_n
.
size
();
if
(
g_n
.
size
()
<
10
)
{
std
::
stringstream
out
;
for
(
auto
&
node
:
g_n
)
{
out
<<
"
\n
Node: "
<<
node
->
Name
()
<<
" in ["
;
for
(
auto
&
n
:
node
->
inputs
)
{
out
<<
n
->
Name
()
<<
", "
;
}
out
<<
"], out["
;
for
(
auto
&
n
:
node
->
outputs
)
{
out
<<
n
->
Name
()
<<
", "
;
}
out
<<
"]"
;
}
VLOG
(
10
)
<<
out
.
str
();
}
}
}
return
graph_count
;
}
}
// namespace ir
}
// namespace ir
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/fluid/framework/ir/graph_helper.h
浏览文件 @
91756a5a
...
@@ -27,6 +27,8 @@ namespace ir {
...
@@ -27,6 +27,8 @@ namespace ir {
// Test if the graph contains circle.
// Test if the graph contains circle.
bool
HasCircle
(
const
Graph
&
graph
);
bool
HasCircle
(
const
Graph
&
graph
);
size_t
GraphNum
(
const
Graph
&
graph
);
// Topology Sort the operations in the graph from inputs to outputs.
// Topology Sort the operations in the graph from inputs to outputs.
// `graph` cannot contain circle.
// `graph` cannot contain circle.
std
::
vector
<
ir
::
Node
*>
TopologySortOperations
(
const
Graph
&
graph
);
std
::
vector
<
ir
::
Node
*>
TopologySortOperations
(
const
Graph
&
graph
);
...
...
paddle/fluid/framework/ir/graph_helper_test.cc
浏览文件 @
91756a5a
...
@@ -120,6 +120,97 @@ TEST(GraphHelperTest, Basic) {
...
@@ -120,6 +120,97 @@ TEST(GraphHelperTest, Basic) {
ASSERT_EQ
(
node_map
.
at
(
"op2"
),
1UL
);
ASSERT_EQ
(
node_map
.
at
(
"op2"
),
1UL
);
ASSERT_TRUE
(
node_map
.
at
(
"op3"
)
<
node_map
.
at
(
"op5"
));
ASSERT_TRUE
(
node_map
.
at
(
"op3"
)
<
node_map
.
at
(
"op5"
));
}
}
void
BuildZeroGraph
(
Graph
*
g
)
{}
void
BuildOneGraph
(
Graph
*
g
)
{
ir
::
Node
*
o1
=
g
->
CreateEmptyNode
(
"op1"
,
Node
::
Type
::
kOperation
);
ir
::
Node
*
o2
=
g
->
CreateEmptyNode
(
"op2"
,
Node
::
Type
::
kOperation
);
ir
::
Node
*
o3
=
g
->
CreateEmptyNode
(
"op3"
,
Node
::
Type
::
kOperation
);
ir
::
Node
*
o4
=
g
->
CreateEmptyNode
(
"op4"
,
Node
::
Type
::
kOperation
);
ir
::
Node
*
o5
=
g
->
CreateEmptyNode
(
"op5"
,
Node
::
Type
::
kOperation
);
ir
::
Node
*
v1
=
g
->
CreateEmptyNode
(
"var1"
,
Node
::
Type
::
kVariable
);
ir
::
Node
*
v2
=
g
->
CreateEmptyNode
(
"var2"
,
Node
::
Type
::
kVariable
);
ir
::
Node
*
v3
=
g
->
CreateEmptyNode
(
"var3"
,
Node
::
Type
::
kVariable
);
ir
::
Node
*
v4
=
g
->
CreateEmptyNode
(
"var4"
,
Node
::
Type
::
kVariable
);
// o1->v1->o2
o1
->
outputs
.
push_back
(
v1
);
o2
->
inputs
.
push_back
(
v1
);
v1
->
inputs
.
push_back
(
o1
);
v1
->
outputs
.
push_back
(
o2
);
// o2->v2->o3
// o2->v2->o4
o2
->
outputs
.
push_back
(
v2
);
o3
->
inputs
.
push_back
(
v2
);
o4
->
inputs
.
push_back
(
v2
);
v2
->
inputs
.
push_back
(
o2
);
v2
->
outputs
.
push_back
(
o3
);
v2
->
outputs
.
push_back
(
o4
);
// o2->v3->o5
o2
->
outputs
.
push_back
(
v3
);
o5
->
inputs
.
push_back
(
v3
);
v3
->
inputs
.
push_back
(
o2
);
v3
->
outputs
.
push_back
(
o5
);
// o3-v4->o5
o3
->
outputs
.
push_back
(
v4
);
o5
->
inputs
.
push_back
(
v4
);
v4
->
inputs
.
push_back
(
o3
);
v4
->
outputs
.
push_back
(
o5
);
}
void
BuildTwoGraphs
(
Graph
*
g
)
{
ir
::
Node
*
o1
=
g
->
CreateEmptyNode
(
"op1"
,
Node
::
Type
::
kOperation
);
ir
::
Node
*
o2
=
g
->
CreateEmptyNode
(
"op2"
,
Node
::
Type
::
kOperation
);
ir
::
Node
*
o3
=
g
->
CreateEmptyNode
(
"op3"
,
Node
::
Type
::
kOperation
);
ir
::
Node
*
o4
=
g
->
CreateEmptyNode
(
"op4"
,
Node
::
Type
::
kOperation
);
ir
::
Node
*
o5
=
g
->
CreateEmptyNode
(
"op5"
,
Node
::
Type
::
kOperation
);
ir
::
Node
*
v1
=
g
->
CreateEmptyNode
(
"var1"
,
Node
::
Type
::
kVariable
);
ir
::
Node
*
v2
=
g
->
CreateEmptyNode
(
"var2"
,
Node
::
Type
::
kVariable
);
ir
::
Node
*
v3
=
g
->
CreateEmptyNode
(
"var3"
,
Node
::
Type
::
kVariable
);
ir
::
Node
*
v4
=
g
->
CreateEmptyNode
(
"var4"
,
Node
::
Type
::
kVariable
);
// o1->v1->o2
o1
->
outputs
.
push_back
(
v1
);
o2
->
inputs
.
push_back
(
v1
);
v1
->
inputs
.
push_back
(
o1
);
v1
->
outputs
.
push_back
(
o2
);
// o2->v2->o3
// o2->v2->o4
o2
->
outputs
.
push_back
(
v2
);
o3
->
inputs
.
push_back
(
v2
);
o4
->
inputs
.
push_back
(
v2
);
v2
->
inputs
.
push_back
(
o2
);
v2
->
outputs
.
push_back
(
o3
);
v2
->
outputs
.
push_back
(
o4
);
// o2->v3->o5
// o2->outputs.push_back(v3);
o5
->
inputs
.
push_back
(
v3
);
// v3->inputs.push_back(o2);
v3
->
outputs
.
push_back
(
o5
);
// o3-v4->o5
o3
->
outputs
.
push_back
(
v4
);
// o5->inputs.push_back(v4);
v4
->
inputs
.
push_back
(
o3
);
// v4->outputs.push_back(o5);
}
TEST
(
GraphHelperTest
,
GraphNum
)
{
ProgramDesc
prog
;
Graph
g
(
prog
);
BuildZeroGraph
(
&
g
);
ASSERT_EQ
(
GraphNum
(
g
),
0
);
Graph
g2
(
prog
);
BuildOneGraph
(
&
g2
);
ASSERT_EQ
(
GraphNum
(
g2
),
1
);
Graph
g3
(
prog
);
BuildTwoGraphs
(
&
g3
);
ASSERT_EQ
(
GraphNum
(
g3
),
2
);
}
}
// namespace ir
}
// namespace ir
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/fluid/framework/ir/graph_pattern_detector.cc
浏览文件 @
91756a5a
...
@@ -692,6 +692,24 @@ PDNode *patterns::FC::operator()(paddle::framework::ir::PDNode *x,
...
@@ -692,6 +692,24 @@ PDNode *patterns::FC::operator()(paddle::framework::ir::PDNode *x,
}
}
}
}
PDNode
*
patterns
::
Embedding
::
operator
()(
PDNode
*
x
)
{
x
->
assert_is_op_input
(
"lookup_table"
,
"Ids"
);
auto
*
lookup_table_op
=
pattern
->
NewNode
(
lookup_table_repr
())
->
assert_is_op
(
"lookup_table"
);
#define NEW_NODE(arg__, io__) \
auto *arg__ = pattern->NewNode(arg__##_repr()) \
->assert_is_op_##io__("lookup_table", #arg__);
NEW_NODE
(
W
,
input
);
NEW_NODE
(
Out
,
output
);
#undef NEW_NODE
lookup_table_op
->
LinksFrom
({
x
,
W
});
lookup_table_op
->
LinksTo
({
Out
});
return
Out
;
}
PDNode
*
patterns
::
LSTM
::
operator
()(
PDNode
*
x
)
{
PDNode
*
patterns
::
LSTM
::
operator
()(
PDNode
*
x
)
{
x
->
assert_is_op_input
(
"lstm"
,
"Input"
);
x
->
assert_is_op_input
(
"lstm"
,
"Input"
);
auto
*
lstm_op
=
pattern
->
NewNode
(
lstm_repr
())
->
assert_is_op
(
"lstm"
);
auto
*
lstm_op
=
pattern
->
NewNode
(
lstm_repr
())
->
assert_is_op
(
"lstm"
);
...
...
paddle/fluid/framework/ir/graph_pattern_detector.h
浏览文件 @
91756a5a
...
@@ -418,6 +418,23 @@ struct FC : public PatternBase {
...
@@ -418,6 +418,23 @@ struct FC : public PatternBase {
PATTERN_DECL_NODE
(
Out
);
PATTERN_DECL_NODE
(
Out
);
};
};
// Embedding
struct
Embedding
:
public
PatternBase
{
Embedding
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
)
:
PatternBase
(
pattern
,
name_scope
,
"embedding"
)
{}
PDNode
*
operator
()(
PDNode
*
x
);
// declare operator node's name
PATTERN_DECL_NODE
(
lookup_table
);
// Inputs
//
PATTERN_DECL_NODE
(
Ids
);
PATTERN_DECL_NODE
(
W
);
// embeddings
// Outputs
PATTERN_DECL_NODE
(
Out
);
};
struct
LSTM
:
public
PatternBase
{
struct
LSTM
:
public
PatternBase
{
LSTM
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
)
LSTM
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
)
:
PatternBase
(
pattern
,
name_scope
,
"lstm"
)
{}
:
PatternBase
(
pattern
,
name_scope
,
"lstm"
)
{}
...
...
paddle/fluid/framework/ir/pass.cc
浏览文件 @
91756a5a
...
@@ -19,7 +19,6 @@ namespace paddle {
...
@@ -19,7 +19,6 @@ namespace paddle {
namespace
framework
{
namespace
framework
{
namespace
ir
{
namespace
ir
{
std
::
unique_ptr
<
Graph
>
Pass
::
Apply
(
std
::
unique_ptr
<
Graph
>
graph
)
const
{
std
::
unique_ptr
<
Graph
>
Pass
::
Apply
(
std
::
unique_ptr
<
Graph
>
graph
)
const
{
PADDLE_ENFORCE
(
!
applied_
,
"Pass can only Apply() once."
);
PADDLE_ENFORCE
(
graph
.
get
(),
"graph passed to Pass::Apply() cannot be empty."
);
PADDLE_ENFORCE
(
graph
.
get
(),
"graph passed to Pass::Apply() cannot be empty."
);
for
(
const
std
::
string
&
attr
:
required_pass_attrs_
)
{
for
(
const
std
::
string
&
attr
:
required_pass_attrs_
)
{
PADDLE_ENFORCE
(
attrs_
.
find
(
attr
)
!=
attrs_
.
end
(),
PADDLE_ENFORCE
(
attrs_
.
find
(
attr
)
!=
attrs_
.
end
(),
...
...
paddle/fluid/framework/ir/pass.h
浏览文件 @
91756a5a
...
@@ -42,6 +42,8 @@ class Pass {
...
@@ -42,6 +42,8 @@ class Pass {
attr_dels_
.
clear
();
attr_dels_
.
clear
();
}
}
std
::
string
Type
()
const
{
return
type_
;
}
std
::
unique_ptr
<
Graph
>
Apply
(
std
::
unique_ptr
<
Graph
>
graph
)
const
;
std
::
unique_ptr
<
Graph
>
Apply
(
std
::
unique_ptr
<
Graph
>
graph
)
const
;
// Get a reference to the attributed previously set.
// Get a reference to the attributed previously set.
...
@@ -52,6 +54,21 @@ class Pass {
...
@@ -52,6 +54,21 @@ class Pass {
return
*
boost
::
any_cast
<
AttrType
*>
(
attrs_
.
at
(
attr_name
));
return
*
boost
::
any_cast
<
AttrType
*>
(
attrs_
.
at
(
attr_name
));
}
}
bool
Has
(
const
std
::
string
&
attr_name
)
const
{
return
attrs_
.
find
(
attr_name
)
!=
attrs_
.
end
();
}
void
Erase
(
const
std
::
string
&
attr_name
)
{
if
(
!
Has
(
attr_name
))
{
return
;
}
if
(
attr_dels_
.
find
(
attr_name
)
!=
attr_dels_
.
end
())
{
attr_dels_
[
attr_name
]();
attr_dels_
.
erase
(
attr_name
);
}
attrs_
.
erase
(
attr_name
);
}
// Set a pointer to the attribute. Pass takes ownership of the attribute.
// Set a pointer to the attribute. Pass takes ownership of the attribute.
template
<
typename
AttrType
>
template
<
typename
AttrType
>
void
Set
(
const
std
::
string
&
attr_name
,
AttrType
*
attr
)
{
void
Set
(
const
std
::
string
&
attr_name
,
AttrType
*
attr
)
{
...
@@ -68,13 +85,15 @@ class Pass {
...
@@ -68,13 +85,15 @@ class Pass {
// should delete the attribute.
// should delete the attribute.
template
<
typename
AttrType
>
template
<
typename
AttrType
>
void
SetNotOwned
(
const
std
::
string
&
attr_name
,
AttrType
*
attr
)
{
void
SetNotOwned
(
const
std
::
string
&
attr_name
,
AttrType
*
attr
)
{
PADDLE_ENFORCE
(
attrs_
.
count
(
attr_name
)
==
0
);
PADDLE_ENFORCE
(
attrs_
.
count
(
attr_name
)
==
0
,
"%s already set in the pass"
,
attr_name
);
attrs_
[
attr_name
]
=
attr
;
attrs_
[
attr_name
]
=
attr
;
}
}
protected:
protected:
virtual
std
::
unique_ptr
<
Graph
>
ApplyImpl
(
virtual
std
::
unique_ptr
<
Graph
>
ApplyImpl
(
std
::
unique_ptr
<
Graph
>
graph
)
const
{
std
::
unique_ptr
<
Graph
>
graph
)
const
=
0
;
LOG
(
FATAL
)
<<
"Calling virtual Pass not implemented."
;
}
private:
private:
template
<
typename
PassType
>
template
<
typename
PassType
>
...
@@ -89,7 +108,10 @@ class Pass {
...
@@ -89,7 +108,10 @@ class Pass {
required_graph_attrs_
.
insert
(
attrs
.
begin
(),
attrs
.
end
());
required_graph_attrs_
.
insert
(
attrs
.
begin
(),
attrs
.
end
());
}
}
void
RegisterType
(
const
std
::
string
&
type
)
{
type_
=
type
;
}
mutable
bool
applied_
{
false
};
mutable
bool
applied_
{
false
};
std
::
string
type_
;
std
::
unordered_set
<
std
::
string
>
required_pass_attrs_
;
std
::
unordered_set
<
std
::
string
>
required_pass_attrs_
;
std
::
unordered_set
<
std
::
string
>
required_graph_attrs_
;
std
::
unordered_set
<
std
::
string
>
required_graph_attrs_
;
std
::
map
<
std
::
string
,
boost
::
any
>
attrs_
;
std
::
map
<
std
::
string
,
boost
::
any
>
attrs_
;
...
@@ -143,10 +165,11 @@ struct PassRegistrar : public Registrar {
...
@@ -143,10 +165,11 @@ struct PassRegistrar : public Registrar {
PADDLE_ENFORCE
(
!
PassRegistry
::
Instance
().
Has
(
pass_type
),
PADDLE_ENFORCE
(
!
PassRegistry
::
Instance
().
Has
(
pass_type
),
"'%s' is registered more than once."
,
pass_type
);
"'%s' is registered more than once."
,
pass_type
);
PassRegistry
::
Instance
().
Insert
(
PassRegistry
::
Instance
().
Insert
(
pass_type
,
[
this
]()
->
std
::
unique_ptr
<
Pass
>
{
pass_type
,
[
this
,
pass_type
]()
->
std
::
unique_ptr
<
Pass
>
{
std
::
unique_ptr
<
Pass
>
pass
(
new
PassType
());
std
::
unique_ptr
<
Pass
>
pass
(
new
PassType
());
pass
->
RegisterRequiredPassAttrs
(
this
->
required_pass_attrs_
);
pass
->
RegisterRequiredPassAttrs
(
this
->
required_pass_attrs_
);
pass
->
RegisterRequiredGraphAttrs
(
this
->
required_graph_attrs_
);
pass
->
RegisterRequiredGraphAttrs
(
this
->
required_graph_attrs_
);
pass
->
RegisterType
(
pass_type
);
return
pass
;
return
pass
;
});
});
}
}
...
...
paddle/fluid/framework/ir/pass_builder.cc
0 → 100644
浏览文件 @
91756a5a
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/ir/pass_builder.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
std
::
shared_ptr
<
Pass
>
PassBuilder
::
AppendPass
(
const
std
::
string
&
pass_type
)
{
auto
pass
=
ir
::
PassRegistry
::
Instance
().
Get
(
pass_type
);
passes_
.
emplace_back
(
pass
.
release
());
return
passes_
.
back
();
}
void
PassBuilder
::
RemovePass
(
size_t
idx
)
{
PADDLE_ENFORCE
(
passes_
.
size
()
>
idx
);
passes_
.
erase
(
passes_
.
begin
()
+
idx
);
}
std
::
shared_ptr
<
Pass
>
PassBuilder
::
InsertPass
(
size_t
idx
,
const
std
::
string
&
pass_type
)
{
PADDLE_ENFORCE
(
passes_
.
size
()
>=
idx
);
std
::
shared_ptr
<
Pass
>
pass
(
ir
::
PassRegistry
::
Instance
().
Get
(
pass_type
).
release
());
passes_
.
insert
(
passes_
.
begin
()
+
idx
,
std
::
move
(
pass
));
return
passes_
[
idx
];
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/pass_builder.h
0 → 100644
浏览文件 @
91756a5a
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/framework/ir/pass.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
class
PassBuilder
{
public:
PassBuilder
()
{}
virtual
~
PassBuilder
()
{}
// Append a new pass to the end.
std
::
shared_ptr
<
Pass
>
AppendPass
(
const
std
::
string
&
pass_type
);
// Insert a new pass after `idx`.
std
::
shared_ptr
<
Pass
>
InsertPass
(
size_t
idx
,
const
std
::
string
&
pass_type
);
// Remove a new pass at `idx`.
void
RemovePass
(
size_t
idx
);
// Returns a list of all passes.
std
::
vector
<
std
::
shared_ptr
<
Pass
>>
AllPasses
()
const
{
return
passes_
;
}
protected:
std
::
vector
<
std
::
shared_ptr
<
Pass
>>
passes_
;
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/pass_test.cc
浏览文件 @
91756a5a
...
@@ -82,12 +82,10 @@ TEST(PassTest, TestPassAttrCheck) {
...
@@ -82,12 +82,10 @@ TEST(PassTest, TestPassAttrCheck) {
ASSERT_EQ
(
graph
->
Get
<
int
>
(
"copy_test_pass_attr"
),
2
);
ASSERT_EQ
(
graph
->
Get
<
int
>
(
"copy_test_pass_attr"
),
2
);
ASSERT_EQ
(
graph
->
Get
<
int
>
(
"copy_test_graph_attr"
),
2
);
ASSERT_EQ
(
graph
->
Get
<
int
>
(
"copy_test_graph_attr"
),
2
);
try
{
// Allow apply more than once.
graph
=
pass
->
Apply
(
std
::
move
(
graph
));
graph
.
reset
(
new
Graph
(
prog
));
}
catch
(
paddle
::
platform
::
EnforceNotMet
e
)
{
graph
->
Set
<
int
>
(
"test_graph_attr"
,
new
int
);
exception
=
std
::
string
(
e
.
what
());
graph
=
pass
->
Apply
(
std
::
move
(
graph
));
}
ASSERT_TRUE
(
exception
.
find
(
"Pass can only Apply() once"
)
!=
exception
.
npos
);
pass
=
PassRegistry
::
Instance
().
Get
(
"test_pass"
);
pass
=
PassRegistry
::
Instance
().
Get
(
"test_pass"
);
pass
->
SetNotOwned
<
int
>
(
"test_pass_attr"
,
&
val
);
pass
->
SetNotOwned
<
int
>
(
"test_pass_attr"
,
&
val
);
...
...
paddle/fluid/framework/naive_executor.cc
0 → 100644
浏览文件 @
91756a5a
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include <vector>
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/string/pretty_log.h"
namespace
paddle
{
namespace
framework
{
// These code can be shared with Executor.
static
void
InitializeVariable
(
Variable
*
var
,
proto
::
VarType
::
Type
var_type
)
{
if
(
var_type
==
proto
::
VarType
::
LOD_TENSOR
)
{
var
->
GetMutable
<
LoDTensor
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
SELECTED_ROWS
)
{
var
->
GetMutable
<
SelectedRows
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
FEED_MINIBATCH
)
{
var
->
GetMutable
<
FeedFetchList
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
FETCH_LIST
)
{
var
->
GetMutable
<
FeedFetchList
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
STEP_SCOPES
)
{
var
->
GetMutable
<
std
::
vector
<
framework
::
Scope
>>
();
}
else
if
(
var_type
==
proto
::
VarType
::
LOD_RANK_TABLE
)
{
var
->
GetMutable
<
LoDRankTable
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
LOD_TENSOR_ARRAY
)
{
var
->
GetMutable
<
LoDTensorArray
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
PLACE_LIST
)
{
var
->
GetMutable
<
platform
::
PlaceList
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
READER
)
{
var
->
GetMutable
<
ReaderHolder
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
RAW
)
{
// GetMutable will be called in operator
}
else
{
PADDLE_THROW
(
"Variable type %d is not in "
"[LOD_TENSOR, SELECTED_ROWS, FEED_MINIBATCH, FETCH_LIST, "
"LOD_RANK_TABLE, PLACE_LIST, READER, CHANNEL, RAW]"
,
var_type
);
}
}
void
NaiveExecutor
::
Prepare
(
Scope
*
parent_scope
,
const
ProgramDesc
&
program_desc
,
int
block_id
,
bool
with_feed_fetch_ops
)
{
if
(
!
parent_scope
)
{
scope_
=
new
framework
::
Scope
;
}
else
{
scope_
=
&
parent_scope
->
NewScope
();
}
CreateVariables
(
program_desc
,
scope_
,
block_id
);
CreateOps
(
program_desc
,
block_id
,
with_feed_fetch_ops
);
}
void
NaiveExecutor
::
Run
()
{
for
(
auto
&
op
:
ops_
)
{
VLOG
(
4
)
<<
"run "
<<
op
->
Type
();
op
->
Run
(
*
scope_
,
place_
);
}
}
void
NaiveExecutor
::
CreateVariables
(
const
ProgramDesc
&
desc
,
Scope
*
scope
,
int
block_id
)
{
PADDLE_ENFORCE
(
scope
);
auto
&
global_block
=
desc
.
Block
(
block_id
);
const
Scope
*
ancestor_scope
=
scope
;
while
(
ancestor_scope
->
parent
())
{
ancestor_scope
=
ancestor_scope
->
parent
();
}
if
(
ancestor_scope
!=
scope
)
{
for
(
auto
&
var
:
global_block
.
AllVars
())
{
if
(
var
->
Name
()
==
framework
::
kEmptyVarName
)
{
continue
;
}
// Create persistable vars in ancestor scope.
if
(
var
->
Persistable
())
{
auto
*
ptr
=
const_cast
<
Scope
*>
(
ancestor_scope
)
->
Var
(
var
->
Name
());
InitializeVariable
(
ptr
,
var
->
GetType
());
VLOG
(
3
)
<<
"Create Variable "
<<
var
->
Name
()
<<
" global, which pointer is "
<<
ptr
;
}
else
{
// Create temporary variables in local scope.
auto
*
ptr
=
scope
->
Var
(
var
->
Name
());
InitializeVariable
(
ptr
,
var
->
GetType
());
VLOG
(
3
)
<<
"Create Variable "
<<
var
->
Name
()
<<
" locally, which pointer is "
<<
ptr
;
}
}
}
else
{
for
(
auto
&
var
:
global_block
.
AllVars
())
{
auto
*
ptr
=
scope
->
Var
(
var
->
Name
());
InitializeVariable
(
ptr
,
var
->
GetType
());
VLOG
(
3
)
<<
"Create variable "
<<
var
->
Name
()
<<
", which pointer is "
<<
ptr
;
}
}
}
void
NaiveExecutor
::
CreateOps
(
const
ProgramDesc
&
desc
,
int
block_id
,
bool
with_feed_fetch_ops
)
{
for
(
const
auto
&
op_desc
:
desc
.
Block
(
block_id
).
AllOps
())
{
if
(
!
with_feed_fetch_ops
&&
(
op_desc
->
Type
()
==
"feed"
||
op_desc
->
Type
()
==
"fetch"
))
{
string
::
PrettyLogEndl
(
string
::
Style
::
detail
(),
"--- skip [%s], %s -> %s"
,
op_desc
->
Input
(
"X"
)[
0
],
op_desc
->
Type
(),
op_desc
->
Output
(
"Out"
)[
0
]);
continue
;
}
ops_
.
emplace_back
(
OpRegistry
::
CreateOp
(
*
op_desc
));
}
}
LoDTensor
*
NaiveExecutor
::
FindTensor
(
const
std
::
string
&
name
)
{
PADDLE_ENFORCE
(
scope_
,
"Need to init scope first"
);
auto
*
var
=
scope_
->
FindVar
(
name
);
PADDLE_ENFORCE
(
var
,
"No variable [%s] in the scope"
);
auto
*
tensor
=
const_cast
<
LoDTensor
*>
(
&
var
->
Get
<
LoDTensor
>
());
return
tensor
;
}
void
NaiveExecutor
::
CleanFeedFetchOps
()
{
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>
ops
;
for
(
auto
&
op
:
ops_
)
{
if
(
op
->
Type
()
!=
"feed"
&&
op
->
Type
()
!=
"fetch"
)
{
ops
.
emplace_back
(
std
::
move
(
op
));
}
}
ops_
.
swap
(
ops
);
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/naive_executor.h
0 → 100644
浏览文件 @
91756a5a
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/device_context.h"
namespace
paddle
{
namespace
framework
{
/*
* Simple, intuitive and effective. Only single thread is supported, and
* currently designed for inference.
*/
class
NaiveExecutor
{
public:
explicit
NaiveExecutor
(
const
platform
::
Place
&
place
)
:
place_
(
place
)
{}
// Create child scope.
// Create variables.
// @with_feed_fetch_ops: whether to work with the feed and fetch operators.
void
Prepare
(
Scope
*
parent_scope
,
const
ProgramDesc
&
program_desc
,
int
block_id
,
bool
with_feed_fetch_ops
);
// Run all the operators.
void
Run
();
// Get an tensor to operating directly, without the need for feed_ops.
LoDTensor
*
FindTensor
(
const
std
::
string
&
name
);
Scope
*
scope
()
{
return
scope_
;
}
void
CleanFeedFetchOps
();
protected:
void
CreateVariables
(
const
ProgramDesc
&
desc
,
Scope
*
scope
,
int
block_id
);
void
CreateOps
(
const
ProgramDesc
&
desc
,
int
block_id
,
bool
with_feed_fetch_ops
);
private:
const
platform
::
Place
place_
;
// Catch the required resource to avoid recreate.
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>
ops_
;
Scope
*
scope_
;
};
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/naive_executor_test.cc
0 → 100644
浏览文件 @
91756a5a
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/naive_executor.h"
#include <gtest/gtest.h>
#include <algorithm>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/program_desc.h"
namespace
paddle
{
namespace
framework
{
TEST
(
NaiveExecutor
,
Basic
)
{
ProgramDesc
program
;
auto
*
main_block
=
program
.
MutableBlock
(
0
);
auto
*
a
=
main_block
->
Var
(
"a"
);
// input
auto
*
b
=
main_block
->
Var
(
"b"
);
// input
auto
*
c
=
main_block
->
Var
(
"c"
);
// input
a
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
b
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
c
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
auto
*
add
=
main_block
->
AppendOp
();
add
->
SetType
(
"elementwise_add"
);
add
->
SetInput
(
"X"
,
{
"a"
});
add
->
SetInput
(
"Y"
,
{
"b"
});
add
->
SetOutput
(
"Out"
,
{
"c"
});
auto
place
=
platform
::
CPUPlace
();
NaiveExecutor
exe
(
place
);
exe
.
Prepare
(
nullptr
,
program
,
0
,
false
/*with feed fetch ops*/
);
auto
*
a_tensor
=
exe
.
FindTensor
(
"a"
);
auto
*
b_tensor
=
exe
.
FindTensor
(
"b"
);
auto
*
c_tensor
=
exe
.
FindTensor
(
"c"
);
a_tensor
->
Resize
({
1
,
4
});
b_tensor
->
Resize
({
1
,
4
});
c_tensor
->
Resize
({
1
,
4
});
b_tensor
->
mutable_data
<
float
>
(
place
);
a_tensor
->
mutable_data
<
float
>
(
place
);
float
a_arr
[]
=
{
0
,
1
,
2
,
3
};
float
b_arr
[]
=
{
0.0
,
.1
,
.2
,
.3
};
std
::
copy_n
(
a_arr
,
4
,
a_tensor
->
mutable_data
<
float
>
(
place
));
std
::
copy_n
(
b_arr
,
4
,
b_tensor
->
mutable_data
<
float
>
(
place
));
exe
.
Run
();
auto
*
c_data
=
c_tensor
->
mutable_data
<
float
>
(
place
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
EXPECT_NEAR
(
c_data
[
i
],
1.1
*
i
,
1e-3
);
}
}
}
// namespace framework
}
// namespace paddle
USE_OP
(
elementwise_add
);
paddle/fluid/framework/op_proto_maker.cc
浏览文件 @
91756a5a
...
@@ -132,9 +132,7 @@ void OpProtoAndCheckerMaker::operator()(proto::OpProto* proto,
...
@@ -132,9 +132,7 @@ void OpProtoAndCheckerMaker::operator()(proto::OpProto* proto,
AddAttr
<
std
::
string
>
(
OpNamescopeAttrName
(),
"Operator name with namesope."
)
AddAttr
<
std
::
string
>
(
OpNamescopeAttrName
(),
"Operator name with namesope."
)
.
SetDefault
(
""
);
.
SetDefault
(
""
);
AddAttr
<
std
::
vector
<
std
::
string
>>
(
OpCreationCallstackAttrName
(),
"Callstack for Op Creatation."
)
.
SetDefault
({});
Validate
();
Validate
();
}
}
...
...
paddle/fluid/framework/op_proto_maker.h
浏览文件 @
91756a5a
...
@@ -46,7 +46,6 @@ class OpProtoAndCheckerMaker {
...
@@ -46,7 +46,6 @@ class OpProtoAndCheckerMaker {
static
const
char
*
OpRoleAttrName
()
{
return
"op_role"
;
}
static
const
char
*
OpRoleAttrName
()
{
return
"op_role"
;
}
static
const
char
*
OpRoleVarAttrName
()
{
return
"op_role_var"
;
}
static
const
char
*
OpRoleVarAttrName
()
{
return
"op_role_var"
;
}
static
const
char
*
OpNamescopeAttrName
()
{
return
"op_namescope"
;
}
static
const
char
*
OpNamescopeAttrName
()
{
return
"op_namescope"
;
}
static
const
char
*
OpCreationCallstackAttrName
()
{
return
"op_callstack"
;
}
void
operator
()(
proto
::
OpProto
*
proto
,
OpAttrChecker
*
attr_checker
);
void
operator
()(
proto
::
OpProto
*
proto
,
OpAttrChecker
*
attr_checker
);
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
91756a5a
...
@@ -14,17 +14,15 @@ limitations under the License. */
...
@@ -14,17 +14,15 @@ limitations under the License. */
#define GLOG_NO_ABBREVIATED_SEVERITIES
#define GLOG_NO_ABBREVIATED_SEVERITIES
#define GOOGLE_GLOG_DLL_DECL
#define GOOGLE_GLOG_DLL_DECL
#include "paddle/fluid/framework/operator.h"
#include <gflags/gflags.h>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <glog/logging.h>
#include <algorithm>
#include <algorithm>
#include <sstream>
#include <string>
#include <vector>
#include "paddle/fluid/framework/data_transform.h"
#include "paddle/fluid/framework/data_transform.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op
_proto_make
r.h"
#include "paddle/fluid/framework/op
erato
r.h"
#include "paddle/fluid/framework/shape_inference.h"
#include "paddle/fluid/framework/shape_inference.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
...
@@ -142,48 +140,19 @@ static LoD GetLoD(const Scope& scope, const std::string& name) {
...
@@ -142,48 +140,19 @@ static LoD GetLoD(const Scope& scope, const std::string& name) {
}
}
void
OperatorBase
::
Run
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
{
void
OperatorBase
::
Run
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
{
try
{
VLOG
(
4
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
if
(
VLOG_IS_ON
(
4
))
{
if
(
platform
::
is_gpu_place
(
place
))
{
VLOG
(
4
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
}
if
(
platform
::
is_gpu_place
(
place
))
{
#ifndef PADDLE_WITH_CUDA
#ifndef PADDLE_WITH_CUDA
PADDLE_THROW
(
"Cannot run operator on place %s"
,
place
);
PADDLE_THROW
(
"Cannot run operator on place %s"
,
place
);
#else
#else
auto
dev_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
place
).
device
;
auto
dev_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
place
).
device
;
platform
::
SetDeviceId
(
dev_id
);
platform
::
SetDeviceId
(
dev_id
);
#endif
#endif
}
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
RecordEvent
record_event
(
Type
(),
pool
.
Get
(
place
));
RunImpl
(
scope
,
place
);
if
(
VLOG_IS_ON
(
3
))
{
VLOG
(
3
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
}
}
catch
(
platform
::
EnforceNotMet
exception
)
{
if
(
Attrs
().
count
(
"sub_block"
)
!=
0
)
{
throw
exception
;
}
auto
&
callstack
=
Attr
<
std
::
vector
<
std
::
string
>>
(
OpProtoAndCheckerMaker
::
OpCreationCallstackAttrName
());
if
(
callstack
.
empty
())
{
throw
exception
;
}
std
::
ostringstream
sout
;
sout
<<
"Invoke operator "
<<
Type
()
<<
" error.
\n
"
;
sout
<<
"Python Callstacks:
\n
"
;
for
(
auto
&
line
:
callstack
)
{
sout
<<
line
;
}
sout
<<
"C++ Callstacks:
\n
"
;
sout
<<
exception
.
err_str_
;
exception
.
err_str_
=
sout
.
str
();
throw
exception
;
}
catch
(...)
{
std
::
rethrow_exception
(
std
::
current_exception
());
}
}
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
RecordEvent
record_event
(
Type
(),
pool
.
Get
(
place
));
RunImpl
(
scope
,
place
);
VLOG
(
3
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
}
}
bool
OperatorBase
::
HasInputs
(
const
std
::
string
&
name
)
const
{
bool
OperatorBase
::
HasInputs
(
const
std
::
string
&
name
)
const
{
...
@@ -211,7 +180,7 @@ const std::vector<std::string>& OperatorBase::Inputs(
...
@@ -211,7 +180,7 @@ const std::vector<std::string>& OperatorBase::Inputs(
}
}
bool
OperatorBase
::
HasOutputs
(
const
std
::
string
&
name
)
const
{
bool
OperatorBase
::
HasOutputs
(
const
std
::
string
&
name
)
const
{
if
(
outputs_
.
end
()
!=
outputs_
.
find
(
name
))
{
if
(
outputs_
.
find
(
name
)
!=
outputs_
.
end
(
))
{
return
true
;
return
true
;
}
else
{
}
else
{
return
false
;
return
false
;
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
91756a5a
...
@@ -13,21 +13,19 @@ See the License for the specific language governing permissions and
...
@@ -13,21 +13,19 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/framework/parallel_executor.h"
#include "paddle/fluid/framework/parallel_executor.h"
#include <string>
#include <string>
#include <tuple>
#include <tuple>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/nccl_helper.h"
#include "paddle/fluid/platform/nccl_helper.h"
#endif
#endif
#include "paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h"
#include "paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h"
#include "paddle/fluid/framework/details/multi_devices_graph_check_pass.h"
#include "paddle/fluid/framework/details/multi_devices_helper.h"
#include "paddle/fluid/framework/details/multi_devices_graph_print_pass.h"
#include "paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h"
#include "paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h"
#include "paddle/fluid/framework/details/threaded_ssa_graph_executor.h"
#include "paddle/fluid/framework/details/threaded_ssa_graph_executor.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
...
@@ -35,80 +33,6 @@ limitations under the License. */
...
@@ -35,80 +33,6 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
std
::
unique_ptr
<
ir
::
Graph
>
ApplyParallelExecutorPass
(
const
ProgramDesc
&
main_program
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
string
&
loss_var_name
,
const
std
::
unordered_set
<
std
::
string
>
&
param_names
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
bool
use_cuda
,
#ifdef PADDLE_WITH_CUDA
const
BuildStrategy
&
strategy
,
platform
::
NCCLContextMap
*
nccl_ctxs
)
{
#else
const
BuildStrategy
&
strategy
)
{
#endif
// Convert the program to graph.
std
::
unique_ptr
<
ir
::
Graph
>
graph
(
new
ir
::
Graph
(
main_program
));
// Apply a graph viz pass to record a graph.
if
(
!
strategy
.
debug_graphviz_path_
.
empty
())
{
auto
viz_pass
=
ir
::
PassRegistry
::
Instance
().
Get
(
"graph_viz_pass"
);
const
std
::
string
graph_path
=
string
::
Sprintf
(
"%s%s"
,
strategy
.
debug_graphviz_path_
.
c_str
(),
"_original_graph"
);
viz_pass
->
Set
<
std
::
string
>
(
"graph_viz_path"
,
new
std
::
string
(
graph_path
));
graph
=
viz_pass
->
Apply
(
std
::
move
(
graph
));
}
// Apply op fusion.
if
(
strategy
.
fuse_elewise_add_act_ops_
)
{
auto
fuse_elewise_add_act_pass
=
ir
::
PassRegistry
::
Instance
().
Get
(
"fuse_elewise_add_act_pass"
);
graph
=
fuse_elewise_add_act_pass
->
Apply
(
std
::
move
(
graph
));
// Apply a graph viz pass to record a graph.
if
(
!
strategy
.
debug_graphviz_path_
.
empty
())
{
auto
viz_pass
=
ir
::
PassRegistry
::
Instance
().
Get
(
"graph_viz_pass"
);
const
std
::
string
graph_path
=
string
::
Sprintf
(
"%s%s"
,
strategy
.
debug_graphviz_path_
.
c_str
(),
"_fused_graph"
);
viz_pass
->
Set
<
std
::
string
>
(
"graph_viz_path"
,
new
std
::
string
(
graph_path
));
graph
=
viz_pass
->
Apply
(
std
::
move
(
graph
));
}
}
// Convert graph to run on multi-devices.
auto
multi_devices_pass
=
ir
::
PassRegistry
::
Instance
().
Get
(
"multi_devices_pass"
);
multi_devices_pass
->
SetNotOwned
<
const
std
::
vector
<
platform
::
Place
>>
(
"places"
,
&
places
);
multi_devices_pass
->
SetNotOwned
<
const
std
::
string
>
(
"loss_var_name"
,
&
loss_var_name
);
multi_devices_pass
->
SetNotOwned
<
const
std
::
unordered_set
<
std
::
string
>>
(
"params"
,
&
param_names
);
multi_devices_pass
->
SetNotOwned
<
const
std
::
vector
<
Scope
*>>
(
"local_scopes"
,
&
local_scopes
);
multi_devices_pass
->
SetNotOwned
<
const
BuildStrategy
>
(
"strategy"
,
&
strategy
);
#ifdef PADDLE_WITH_CUDA
platform
::
NCCLContextMap
*
nctx
=
use_cuda
?
nccl_ctxs
:
nullptr
;
multi_devices_pass
->
SetNotOwned
<
platform
::
NCCLContextMap
>
(
"nccl_ctxs"
,
nctx
);
#endif
graph
=
multi_devices_pass
->
Apply
(
std
::
move
(
graph
));
// Apply a graph print pass to record a graph with device info.
if
(
!
strategy
.
debug_graphviz_path_
.
empty
())
{
auto
multi_devices_print_pass
=
ir
::
PassRegistry
::
Instance
().
Get
(
"multi_devices_print_pass"
);
multi_devices_print_pass
->
SetNotOwned
<
const
std
::
string
>
(
"debug_graphviz_path"
,
&
strategy
.
debug_graphviz_path_
);
multi_devices_print_pass
->
Set
<
details
::
GraphvizSSAGraphPrinter
>
(
"graph_printer"
,
new
details
::
GraphvizSSAGraphPrinter
);
graph
=
multi_devices_print_pass
->
Apply
(
std
::
move
(
graph
));
}
// Verify that the graph is correct for multi-device executor.
auto
multi_devices_check_pass
=
ir
::
PassRegistry
::
Instance
().
Get
(
"multi_devices_check_pass"
);
graph
=
multi_devices_check_pass
->
Apply
(
std
::
move
(
graph
));
return
graph
;
}
class
ParallelExecutorPrivate
{
class
ParallelExecutorPrivate
{
public:
public:
explicit
ParallelExecutorPrivate
(
const
std
::
vector
<
platform
::
Place
>
&
places
)
explicit
ParallelExecutorPrivate
(
const
std
::
vector
<
platform
::
Place
>
&
places
)
...
@@ -199,10 +123,9 @@ ParallelExecutor::ParallelExecutor(
...
@@ -199,10 +123,9 @@ ParallelExecutor::ParallelExecutor(
// Step 3. Convert main_program to SSA form and dependency graph. Also, insert
// Step 3. Convert main_program to SSA form and dependency graph. Also, insert
// ncclOp
// ncclOp
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
std
::
unique_ptr
<
ir
::
Graph
>
graph
=
ApplyParallelExecutorPass
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
=
build_strategy
.
Apply
(
main_program
,
member_
->
places_
,
loss_var_name
,
params
,
main_program
,
member_
->
places_
,
loss_var_name
,
params
,
member_
->
local_scopes_
,
member_
->
use_cuda_
,
build_strategy
,
member_
->
local_scopes_
,
member_
->
use_cuda_
,
member_
->
nccl_ctxs_
.
get
());
member_
->
nccl_ctxs_
.
get
());
auto
max_memory_size
=
GetEagerDeletionThreshold
();
auto
max_memory_size
=
GetEagerDeletionThreshold
();
if
(
max_memory_size
>=
0
)
{
if
(
max_memory_size
>=
0
)
{
...
@@ -228,11 +151,19 @@ ParallelExecutor::ParallelExecutor(
...
@@ -228,11 +151,19 @@ ParallelExecutor::ParallelExecutor(
}
}
}
}
#else
#else
std
::
unique_ptr
<
ir
::
Graph
>
graph
=
ApplyParallelExecutorPass
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
=
main_program
,
member_
->
places_
,
loss_var_name
,
params
,
build_strategy
.
Apply
(
main_program
,
member_
->
places_
,
loss_var_name
,
member_
->
local_scopes_
,
member_
->
use_cuda_
,
build_strategy
);
params
,
member_
->
local_scopes_
,
member_
->
use_cuda_
);
#endif
#endif
if
(
VLOG_IS_ON
(
5
))
{
// If the loss_var_name is given, the number of graph should be only one.
if
(
loss_var_name
.
size
())
{
PADDLE_ENFORCE_EQ
(
ir
::
GraphNum
(
*
graph
),
1
,
"The number of graph should be only one"
);
}
}
if
(
exec_strategy
.
type_
==
ExecutionStrategy
::
kDefault
)
{
if
(
exec_strategy
.
type_
==
ExecutionStrategy
::
kDefault
)
{
member_
->
executor_
.
reset
(
new
details
::
ThreadedSSAGraphExecutor
(
member_
->
executor_
.
reset
(
new
details
::
ThreadedSSAGraphExecutor
(
exec_strategy
,
member_
->
local_scopes_
,
places
,
std
::
move
(
graph
)));
exec_strategy
,
member_
->
local_scopes_
,
places
,
std
::
move
(
graph
)));
...
@@ -319,6 +250,13 @@ void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors,
...
@@ -319,6 +250,13 @@ void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors,
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
if
(
!
gcs_
.
empty
())
{
if
(
!
gcs_
.
empty
())
{
ResetReferenceCount
();
ResetReferenceCount
();
for
(
auto
&
pair
:
cur_ref_cnts_
)
{
auto
&
name_map
=
*
(
pair
.
second
);
for
(
auto
&
fetch_name
:
fetch_tensors
)
{
name_map
.
erase
(
fetch_name
);
}
name_map
.
erase
(
fetched_var_name
);
}
}
}
#endif
#endif
auto
fetch_data
=
member_
->
executor_
->
Run
(
fetch_tensors
);
auto
fetch_data
=
member_
->
executor_
->
Run
(
fetch_tensors
);
...
@@ -373,12 +311,6 @@ ParallelExecutor::~ParallelExecutor() {
...
@@ -373,12 +311,6 @@ ParallelExecutor::~ParallelExecutor() {
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
USE_PASS
(
fuse_elewise_add_act_pass
);
USE_PASS
(
graph_viz_pass
);
USE_PASS
(
multi_devices_pass
);
USE_PASS
(
multi_devices_check_pass
);
USE_PASS
(
multi_devices_print_pass
);
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
USE_PASS
(
reference_count_pass
);
USE_PASS
(
reference_count_pass
);
#endif
#endif
paddle/fluid/framework/parallel_executor.h
浏览文件 @
91756a5a
...
@@ -14,14 +14,14 @@ limitations under the License. */
...
@@ -14,14 +14,14 @@ limitations under the License. */
#pragma once
#pragma once
#include <paddle/fluid/framework/details/build_strategy.h>
#include <atomic>
#include <atomic>
#include <string>
#include <string>
#include <unordered_map>
#include <unordered_map>
#include <unordered_set>
#include <unordered_set>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/details/build_strategy.h"
#include "paddle/fluid/framework/details/execution_strategy.h"
#include "paddle/fluid/framework/details/execution_strategy.h"
#include "paddle/fluid/framework/details/multi_devices_graph_pass.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/program_desc.h"
...
...
paddle/fluid/framework/tuple.h
浏览文件 @
91756a5a
...
@@ -17,7 +17,6 @@ limitations under the License. */
...
@@ -17,7 +17,6 @@ limitations under the License. */
#include <stdexcept>
#include <stdexcept>
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/var_desc.h"
#include "paddle/fluid/framework/var_desc.h"
...
...
paddle/fluid/framework/var_desc.cc
浏览文件 @
91756a5a
...
@@ -88,13 +88,7 @@ std::vector<std::vector<int64_t>> VarDesc::GetShapes() const {
...
@@ -88,13 +88,7 @@ std::vector<std::vector<int64_t>> VarDesc::GetShapes() const {
}
}
void
VarDesc
::
SetDataType
(
proto
::
VarType
::
Type
data_type
)
{
void
VarDesc
::
SetDataType
(
proto
::
VarType
::
Type
data_type
)
{
switch
(
desc_
.
type
().
type
())
{
mutable_tensor_desc
()
->
set_data_type
(
data_type
);
case
proto
::
VarType
::
CHANNEL
:
mutable_channel_desc
()
->
set_data_type
(
data_type
);
break
;
default:
mutable_tensor_desc
()
->
set_data_type
(
data_type
);
}
}
}
void
VarDesc
::
SetDataTypes
(
void
VarDesc
::
SetDataTypes
(
...
@@ -115,13 +109,7 @@ void VarDesc::SetDataTypes(
...
@@ -115,13 +109,7 @@ void VarDesc::SetDataTypes(
}
}
proto
::
VarType
::
Type
VarDesc
::
GetDataType
()
const
{
proto
::
VarType
::
Type
VarDesc
::
GetDataType
()
const
{
switch
(
desc_
.
type
().
type
())
{
return
tensor_desc
().
data_type
();
case
proto
::
VarType
::
CHANNEL
:
return
channel_desc
().
data_type
();
break
;
default:
return
tensor_desc
().
data_type
();
}
}
}
std
::
vector
<
proto
::
VarType
::
Type
>
VarDesc
::
GetDataTypes
()
const
{
std
::
vector
<
proto
::
VarType
::
Type
>
VarDesc
::
GetDataTypes
()
const
{
...
@@ -134,17 +122,6 @@ std::vector<proto::VarType::Type> VarDesc::GetDataTypes() const {
...
@@ -134,17 +122,6 @@ std::vector<proto::VarType::Type> VarDesc::GetDataTypes() const {
return
res
;
return
res
;
}
}
void
VarDesc
::
SetCapacity
(
int64_t
capacity
)
{
switch
(
desc_
.
type
().
type
())
{
case
proto
::
VarType
::
CHANNEL
:
desc_
.
mutable_type
()
->
mutable_channel
()
->
set_capacity
(
capacity
);
break
;
default:
PADDLE_THROW
(
"Setting 'capacity' is not supported by the type of var %s."
,
this
->
Name
());
}
}
void
VarDesc
::
SetLoDLevel
(
int32_t
lod_level
)
{
void
VarDesc
::
SetLoDLevel
(
int32_t
lod_level
)
{
switch
(
desc_
.
type
().
type
())
{
switch
(
desc_
.
type
().
type
())
{
case
proto
::
VarType
::
LOD_TENSOR
:
case
proto
::
VarType
::
LOD_TENSOR
:
...
@@ -214,19 +191,6 @@ std::vector<int32_t> VarDesc::GetLoDLevels() const {
...
@@ -214,19 +191,6 @@ std::vector<int32_t> VarDesc::GetLoDLevels() const {
}
}
}
}
const
proto
::
VarType
::
ChannelDesc
&
VarDesc
::
channel_desc
()
const
{
PADDLE_ENFORCE
(
desc_
.
has_type
(),
"The var's type hasn't been set."
);
PADDLE_ENFORCE
(
desc_
.
type
().
has_type
(),
"The var type hasn't been set."
);
switch
(
desc_
.
type
().
type
())
{
case
proto
::
VarType
::
CHANNEL
:
return
desc_
.
type
().
channel
();
default:
PADDLE_THROW
(
"Getting 'channel_desc' is not supported by the type of var %s."
,
this
->
Name
());
}
}
const
proto
::
VarType
::
TensorDesc
&
VarDesc
::
tensor_desc
()
const
{
const
proto
::
VarType
::
TensorDesc
&
VarDesc
::
tensor_desc
()
const
{
PADDLE_ENFORCE
(
desc_
.
has_type
(),
"The var's type hasn't been set."
);
PADDLE_ENFORCE
(
desc_
.
has_type
(),
"The var's type hasn't been set."
);
PADDLE_ENFORCE
(
desc_
.
type
().
has_type
(),
"The var type hasn't been set."
);
PADDLE_ENFORCE
(
desc_
.
type
().
has_type
(),
"The var type hasn't been set."
);
...
@@ -262,20 +226,6 @@ std::vector<proto::VarType::TensorDesc> VarDesc::tensor_descs() const {
...
@@ -262,20 +226,6 @@ std::vector<proto::VarType::TensorDesc> VarDesc::tensor_descs() const {
}
}
}
}
proto
::
VarType
::
ChannelDesc
*
VarDesc
::
mutable_channel_desc
()
{
PADDLE_ENFORCE
(
desc_
.
has_type
(),
"The var type hasn't been set."
);
PADDLE_ENFORCE
(
desc_
.
type
().
has_type
(),
"The var type hasn't been set."
);
switch
(
desc_
.
type
().
type
())
{
case
proto
::
VarType
::
CHANNEL
:
return
desc_
.
mutable_type
()
->
mutable_channel
();
default:
PADDLE_THROW
(
"Getting 'mutable_channel_desc' is not supported by the type of var "
"%s."
,
this
->
Name
());
}
}
proto
::
VarType
::
TensorDesc
*
VarDesc
::
mutable_tensor_desc
()
{
proto
::
VarType
::
TensorDesc
*
VarDesc
::
mutable_tensor_desc
()
{
PADDLE_ENFORCE
(
desc_
.
has_type
(),
"The var type hasn't been set."
);
PADDLE_ENFORCE
(
desc_
.
has_type
(),
"The var type hasn't been set."
);
PADDLE_ENFORCE
(
desc_
.
type
().
has_type
(),
"The var type hasn't been set."
);
PADDLE_ENFORCE
(
desc_
.
type
().
has_type
(),
"The var type hasn't been set."
);
...
...
paddle/fluid/framework/var_desc.h
浏览文件 @
91756a5a
...
@@ -87,8 +87,6 @@ class VarDesc {
...
@@ -87,8 +87,6 @@ class VarDesc {
void
SetDataTypes
(
void
SetDataTypes
(
const
std
::
vector
<
proto
::
VarType
::
Type
>
&
multiple_data_type
);
const
std
::
vector
<
proto
::
VarType
::
Type
>
&
multiple_data_type
);
void
SetCapacity
(
int64_t
capacity
);
proto
::
VarType
::
Type
GetDataType
()
const
;
proto
::
VarType
::
Type
GetDataType
()
const
;
std
::
vector
<
proto
::
VarType
::
Type
>
GetDataTypes
()
const
;
std
::
vector
<
proto
::
VarType
::
Type
>
GetDataTypes
()
const
;
...
@@ -110,10 +108,8 @@ class VarDesc {
...
@@ -110,10 +108,8 @@ class VarDesc {
void
SetPersistable
(
bool
persistable
)
{
desc_
.
set_persistable
(
persistable
);
}
void
SetPersistable
(
bool
persistable
)
{
desc_
.
set_persistable
(
persistable
);
}
private:
private:
const
proto
::
VarType
::
ChannelDesc
&
channel_desc
()
const
;
const
proto
::
VarType
::
TensorDesc
&
tensor_desc
()
const
;
const
proto
::
VarType
::
TensorDesc
&
tensor_desc
()
const
;
std
::
vector
<
proto
::
VarType
::
TensorDesc
>
tensor_descs
()
const
;
std
::
vector
<
proto
::
VarType
::
TensorDesc
>
tensor_descs
()
const
;
proto
::
VarType
::
ChannelDesc
*
mutable_channel_desc
();
proto
::
VarType
::
TensorDesc
*
mutable_tensor_desc
();
proto
::
VarType
::
TensorDesc
*
mutable_tensor_desc
();
std
::
vector
<
proto
::
VarType
::
TensorDesc
*>
mutable_tensor_descs
();
std
::
vector
<
proto
::
VarType
::
TensorDesc
*>
mutable_tensor_descs
();
...
...
paddle/fluid/framework/var_type.h
浏览文件 @
91756a5a
...
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
...
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#pragma once
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
...
@@ -41,8 +40,6 @@ inline proto::VarType::Type ToVarType(std::type_index type) {
...
@@ -41,8 +40,6 @@ inline proto::VarType::Type ToVarType(std::type_index type) {
return
proto
::
VarType_Type_SELECTED_ROWS
;
return
proto
::
VarType_Type_SELECTED_ROWS
;
}
else
if
(
IsType
<
ReaderHolder
>
(
type
))
{
}
else
if
(
IsType
<
ReaderHolder
>
(
type
))
{
return
proto
::
VarType_Type_READER
;
return
proto
::
VarType_Type_READER
;
}
else
if
(
IsType
<
ChannelHolder
>
(
type
))
{
return
proto
::
VarType_Type_CHANNEL
;
}
else
{
}
else
{
PADDLE_THROW
(
"ToVarType:Unsupported type %s"
,
type
.
name
());
PADDLE_THROW
(
"ToVarType:Unsupported type %s"
,
type
.
name
());
}
}
...
@@ -66,9 +63,6 @@ inline void VisitVarType(const framework::Variable& var, Visitor visitor) {
...
@@ -66,9 +63,6 @@ inline void VisitVarType(const framework::Variable& var, Visitor visitor) {
case
proto
::
VarType_Type_READER
:
case
proto
::
VarType_Type_READER
:
visitor
(
var
.
Get
<
ReaderHolder
>
());
visitor
(
var
.
Get
<
ReaderHolder
>
());
return
;
return
;
case
proto
::
VarType_Type_CHANNEL
:
visitor
(
var
.
Get
<
ChannelHolder
>
());
return
;
default:
default:
PADDLE_THROW
(
"Not supported visit type, %d"
,
ToVarType
(
var
.
Type
()));
PADDLE_THROW
(
"Not supported visit type, %d"
,
ToVarType
(
var
.
Type
()));
}
}
...
...
paddle/fluid/inference/CMakeLists.txt
浏览文件 @
91756a5a
...
@@ -20,7 +20,8 @@ cc_library(paddle_fluid_origin DEPS ${fluid_modules} paddle_fluid_api)
...
@@ -20,7 +20,8 @@ cc_library(paddle_fluid_origin DEPS ${fluid_modules} paddle_fluid_api)
add_subdirectory
(
api
)
add_subdirectory
(
api
)
# Create static library
# Create static library
cc_library
(
paddle_fluid DEPS
${
fluid_modules
}
paddle_fluid_api paddle_inference_api analysis_predictor
)
cc_library
(
paddle_fluid DEPS
${
fluid_modules
}
paddle_fluid_api paddle_inference_api
analysis_predictor zero_copy_tensor
)
if
(
NOT APPLE
)
if
(
NOT APPLE
)
# TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
# TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
set
(
LINK_FLAGS
"-Wl,--retain-symbols-file
${
CMAKE_CURRENT_SOURCE_DIR
}
/paddle_fluid.sym"
)
set
(
LINK_FLAGS
"-Wl,--retain-symbols-file
${
CMAKE_CURRENT_SOURCE_DIR
}
/paddle_fluid.sym"
)
...
@@ -31,6 +32,7 @@ endif()
...
@@ -31,6 +32,7 @@ endif()
cc_library
(
paddle_fluid_shared SHARED
cc_library
(
paddle_fluid_shared SHARED
SRCS io.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/api.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/api_impl.cc
SRCS io.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/api.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/api_impl.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/analysis_predictor.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/analysis_predictor.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/details/zero_copy_tensor.cc
DEPS
${
fluid_modules
}
paddle_fluid_api
)
DEPS
${
fluid_modules
}
paddle_fluid_api
)
set_target_properties
(
paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid
)
set_target_properties
(
paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid
)
...
@@ -53,7 +55,7 @@ if(NOT APPLE)
...
@@ -53,7 +55,7 @@ if(NOT APPLE)
endif
()
endif
()
if
(
WITH_TESTING
)
if
(
WITH_TESTING
)
# tests/book depends the models that generated by python/paddle/fluid/tests/book
# tests/book depends the models that generated by python/paddle/fluid/tests/book
add_subdirectory
(
tests/book
)
add_subdirectory
(
tests/book
)
if
(
WITH_INFERENCE_API_TEST
)
if
(
WITH_INFERENCE_API_TEST
)
add_subdirectory
(
tests/api
)
add_subdirectory
(
tests/api
)
...
...
paddle/fluid/inference/analysis/CMakeLists.txt
浏览文件 @
91756a5a
cc_library
(
ir_pass_manager SRCS ir_pass_manager.cc DEPS graph pass
)
cc_library
(
ir_pass_manager SRCS ir_pass_manager.cc DEPS graph pass
)
set
(
analysis_deps
set
(
analysis_deps
framework_proto proto_desc ir_pass_manager graph pass paddle_fluid_api executor pretty_log
)
framework_proto proto_desc ir_pass_manager graph pass paddle_fluid_api executor pretty_log
)
cc_library
(
analysis SRCS pass_manager.cc node.cc data_flow_graph.cc graph_traits.cc subgraph_splitter.cc
cc_library
(
analysis SRCS pass_manager.cc node.cc data_flow_graph.cc graph_traits.cc subgraph_splitter.cc
analyzer.cc
analyzer.cc
...
...
paddle/fluid/inference/analysis/analysis_pass.h
浏览文件 @
91756a5a
...
@@ -41,12 +41,6 @@ class AnalysisPass {
...
@@ -41,12 +41,6 @@ class AnalysisPass {
// all passes have run.
// all passes have run.
virtual
bool
Finalize
()
{
return
false
;
}
virtual
bool
Finalize
()
{
return
false
;
}
// Get a Pass appropriate to print the Node this pass operates on.
virtual
AnalysisPass
*
CreatePrinterPass
(
std
::
ostream
&
os
,
const
std
::
string
&
banner
)
const
{
return
nullptr
;
}
// Create a debugger Pass that draw the DFG by graphviz toolkit.
// Create a debugger Pass that draw the DFG by graphviz toolkit.
virtual
AnalysisPass
*
CreateGraphvizDebugerPass
()
const
{
return
nullptr
;
}
virtual
AnalysisPass
*
CreateGraphvizDebugerPass
()
const
{
return
nullptr
;
}
...
...
paddle/fluid/inference/analysis/analyzer.h
浏览文件 @
91756a5a
...
@@ -64,14 +64,15 @@ class Analyzer : public OrderedRegistry<PassManager> {
...
@@ -64,14 +64,15 @@ class Analyzer : public OrderedRegistry<PassManager> {
// larger fusion.
// larger fusion.
const
std
::
vector
<
std
::
string
>
all_ir_passes_
{{
const
std
::
vector
<
std
::
string
>
all_ir_passes_
{{
// Manual update the passes here.
// Manual update the passes here.
"infer_clean_graph_pass"
,
//
"infer_clean_graph_pass"
,
//
"attention_lstm_fuse_pass"
,
//
"attention_lstm_fuse_pass"
,
//
"fc_lstm_fuse_pass"
,
//
"embedding_fc_lstm_fuse_pass"
,
//
"mul_lstm_fuse_pass"
,
//
"fc_lstm_fuse_pass"
,
//
"fc_gru_fuse_pass"
,
//
"mul_lstm_fuse_pass"
,
//
"mul_gru_fuse_pass"
,
//
"fc_gru_fuse_pass"
,
//
"seq_concat_fc_fuse_pass"
,
//
"mul_gru_fuse_pass"
,
//
"fc_fuse_pass"
,
//
"seq_concat_fc_fuse_pass"
,
//
"fc_fuse_pass"
,
//
#ifdef PADDLE_WITH_MKLDNN
#ifdef PADDLE_WITH_MKLDNN
"conv_relu_mkldnn_fuse_pass"
,
//
"conv_relu_mkldnn_fuse_pass"
,
//
#endif
#endif
...
...
paddle/fluid/inference/api/CMakeLists.txt
浏览文件 @
91756a5a
...
@@ -18,10 +18,10 @@ if(APPLE)
...
@@ -18,10 +18,10 @@ if(APPLE)
endif
(
APPLE
)
endif
(
APPLE
)
set
(
inference_deps paddle_inference_api paddle_fluid_api analysis pass ir_pass_manager
${
GLOB_PASS_LIB
}
)
set
(
inference_deps paddle_inference_api paddle_fluid_api analysis pass ir_pass_manager
naive_executor
${
GLOB_PASS_LIB
}
)
if
(
WITH_GPU AND TENSORRT_FOUND
)
if
(
WITH_GPU AND TENSORRT_FOUND
)
set
(
inference_deps
${
inference_deps
}
paddle_inference_tensorrt_subgraph_engine
)
set
(
inference_deps
${
inference_deps
}
paddle_inference_tensorrt_subgraph_engine
analysis_predictor
)
endif
()
endif
()
function
(
inference_api_test TARGET_NAME
)
function
(
inference_api_test TARGET_NAME
)
...
@@ -43,8 +43,10 @@ function(inference_api_test TARGET_NAME)
...
@@ -43,8 +43,10 @@ function(inference_api_test TARGET_NAME)
endif
(
WITH_TESTING
)
endif
(
WITH_TESTING
)
endfunction
(
inference_api_test
)
endfunction
(
inference_api_test
)
cc_library
(
paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor
)
cc_library
(
paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor scope
)
cc_library
(
analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api analysis
)
cc_library
(
analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api analysis naive_executor zero_copy_tensor
)
cc_library
(
zero_copy_tensor SRCS details/zero_copy_tensor.cc DEPS paddle_inference_api
)
cc_library
(
zero_copy_tensor_dummy SRCS details/zero_copy_tensor_dummy.cc DEPS paddle_inference_api
)
cc_test
(
test_paddle_inference_api
cc_test
(
test_paddle_inference_api
SRCS api_tester.cc
SRCS api_tester.cc
DEPS paddle_inference_api
)
DEPS paddle_inference_api
)
...
@@ -52,18 +54,22 @@ cc_test(test_paddle_inference_api
...
@@ -52,18 +54,22 @@ cc_test(test_paddle_inference_api
inference_api_test
(
test_api_impl SRC api_impl_tester.cc
inference_api_test
(
test_api_impl SRC api_impl_tester.cc
ARGS test_word2vec test_image_classification
)
ARGS test_word2vec test_image_classification
)
set
(
PYTHON_TESTS_DIR
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/tests
)
cc_test
(
test_analysis_predictor SRCS analysis_predictor_tester.cc DEPS analysis_predictor
${
inference_deps
}
paddle_inference_api
ARGS --dirname=
${
PYTHON_TESTS_DIR
}
/book
)
if
(
WITH_GPU AND TENSORRT_FOUND
)
if
(
WITH_GPU AND TENSORRT_FOUND
)
cc_library
(
paddle_inference_tensorrt_subgraph_engine
cc_library
(
paddle_inference_tensorrt_subgraph_engine
SRCS api_tensorrt_subgraph_engine.cc
SRCS api_tensorrt_subgraph_engine.cc
DEPS paddle_inference_api analysis tensorrt_engine paddle_inference_api paddle_fluid_api tensorrt_converter
)
DEPS paddle_inference_api analysis tensorrt_engine paddle_inference_api paddle_fluid_api tensorrt_converter
zero_copy_tensor_dummy
)
inference_api_test
(
test_api_tensorrt_subgraph_engine SRC api_tensorrt_subgraph_engine_tester.cc ARGS test_word2vec
)
inference_api_test
(
test_api_tensorrt_subgraph_engine SRC api_tensorrt_subgraph_engine_tester.cc ARGS test_word2vec
)
endif
()
endif
()
if
(
WITH_ANAKIN AND WITH_MKL
)
# only needed in CI
if
(
WITH_ANAKIN AND WITH_MKL
)
# only needed in CI
# compile the libinference_anakin_api.a and anakin.so.
# compile the libinference_anakin_api.a and anakin.so.
cc_library
(
inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber mklml
)
cc_library
(
inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber mklml
scope zero_copy_tensor_dummy
)
cc_library
(
inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber
)
cc_library
(
inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber
scope
)
function
(
anakin_target target_name
)
function
(
anakin_target target_name
)
target_compile_options
(
${
target_name
}
BEFORE PUBLIC
${
ANAKIN_COMPILE_EXTRA_FLAGS
}
)
target_compile_options
(
${
target_name
}
BEFORE PUBLIC
${
ANAKIN_COMPILE_EXTRA_FLAGS
}
)
endfunction
()
endfunction
()
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
91756a5a
...
@@ -16,9 +16,12 @@
...
@@ -16,9 +16,12 @@
#include <memory>
#include <memory>
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/inference/utils/singleton.h"
...
@@ -28,8 +31,11 @@ DECLARE_bool(profile);
...
@@ -28,8 +31,11 @@ DECLARE_bool(profile);
namespace
paddle
{
namespace
paddle
{
using
contrib
::
AnalysisConfig
;
bool
AnalysisPredictor
::
Init
(
bool
AnalysisPredictor
::
Init
(
const
std
::
shared_ptr
<
framework
::
Scope
>&
parent_scope
)
{
const
std
::
shared_ptr
<
framework
::
Scope
>
&
parent_scope
,
const
std
::
shared_ptr
<
framework
::
ProgramDesc
>
&
program
)
{
VLOG
(
3
)
<<
"Predictor::init()"
;
VLOG
(
3
)
<<
"Predictor::init()"
;
#if !defined(_WIN32)
#if !defined(_WIN32)
if
(
FLAGS_profile
)
{
if
(
FLAGS_profile
)
{
...
@@ -43,7 +49,8 @@ bool AnalysisPredictor::Init(
...
@@ -43,7 +49,8 @@ bool AnalysisPredictor::Init(
if
(
config_
.
use_gpu
)
{
if
(
config_
.
use_gpu
)
{
place_
=
paddle
::
platform
::
CUDAPlace
(
config_
.
device
);
place_
=
paddle
::
platform
::
CUDAPlace
(
config_
.
device
);
LOG
(
WARNING
)
<<
"ir optimize only supports CPU currently"
;
LOG
(
WARNING
)
<<
"ir optimize only supports CPU currently, enable_ir_optim "
"is turned false."
;
config_
.
enable_ir_optim
=
false
;
config_
.
enable_ir_optim
=
false
;
}
else
{
}
else
{
place_
=
paddle
::
platform
::
CPUPlace
();
place_
=
paddle
::
platform
::
CPUPlace
();
...
@@ -56,37 +63,134 @@ bool AnalysisPredictor::Init(
...
@@ -56,37 +63,134 @@ bool AnalysisPredictor::Init(
scope_
.
reset
(
new
paddle
::
framework
::
Scope
());
scope_
.
reset
(
new
paddle
::
framework
::
Scope
());
}
}
executor_
.
reset
(
new
paddle
::
framework
::
Executor
(
place_
));
executor_
.
reset
(
new
paddle
::
framework
::
Naive
Executor
(
place_
));
// Initialize the inference program
if
(
!
program
)
{
if
(
!
config_
.
model_dir
.
empty
())
{
if
(
!
LoadProgramDesc
())
return
false
;
// Parameters are saved in separate files sited in
OptimizeInferenceProgram
();
// the specified `dirname`.
inference_program_
=
paddle
::
inference
::
Load
(
executor_
.
get
(),
scope_
.
get
(),
config_
.
model_dir
);
}
else
if
(
!
config_
.
prog_file
.
empty
()
&&
!
config_
.
param_file
.
empty
())
{
// All parameters are saved in a single file.
// The file names should be consistent with that used
// in Python API `fluid.io.save_inference_model`.
inference_program_
=
paddle
::
inference
::
Load
(
executor_
.
get
(),
scope_
.
get
(),
config_
.
prog_file
,
config_
.
param_file
);
}
else
{
}
else
{
LOG
(
ERROR
)
<<
"fail to load inference model from "
<<
config_
.
model_dir
;
inference_program_
=
program
;
}
executor_
->
Prepare
(
scope_
.
get
(),
*
inference_program_
,
0
,
config_
.
use_feed_fetch_ops
);
// Get the feed_target_names and fetch_target_names
PrepareFeedFetch
();
return
true
;
}
bool
AnalysisPredictor
::
Run
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
output_data
,
int
batch_size
)
{
VLOG
(
3
)
<<
"Predictor::predict"
;
inference
::
Timer
timer
;
timer
.
tic
();
// set feed variable
std
::
vector
<
framework
::
LoDTensor
>
feeds
;
framework
::
Scope
*
scope
=
sub_scope_
?
sub_scope_
:
scope_
.
get
();
if
(
!
SetFeed
(
inputs
,
scope
))
{
LOG
(
ERROR
)
<<
"fail to set feed"
;
return
false
;
return
false
;
}
}
// Run the inference program
// if share variables, we need not create variables
executor_
->
Run
();
OptimizeInferenceProgram
();
// get fetch variable
if
(
config_
.
_use_mkldnn
)
{
if
(
!
GetFetch
(
output_data
,
scope
))
{
executor_
->
EnableMKLDNN
(
*
inference_program_
);
LOG
(
ERROR
)
<<
"fail to get fetches"
;
return
false
;
}
}
ctx_
=
executor_
->
Prepare
(
*
inference_program_
,
0
);
VLOG
(
3
)
<<
"predict cost: "
<<
timer
.
toc
()
<<
"ms"
;
return
true
;
}
VLOG
(
5
)
<<
"to create variables"
;
bool
AnalysisPredictor
::
SetFeed
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
PADDLE_ENFORCE
(
scope_
.
get
());
framework
::
Scope
*
scope
)
{
executor_
->
CreateVariables
(
*
inference_program_
,
VLOG
(
3
)
<<
"Predictor::set_feed"
;
sub_scope_
?
sub_scope_
:
scope_
.
get
(),
0
);
if
(
inputs
.
size
()
!=
feeds_
.
size
())
{
// Get the feed_target_names and fetch_target_names
LOG
(
ERROR
)
<<
"wrong feed input size, need "
<<
feeds_
.
size
()
<<
" but get "
PrepareFeedFetch
();
<<
inputs
.
size
();
return
false
;
}
// Cache the inputs memory for better concurrency performance.
feed_tensors_
.
resize
(
inputs
.
size
());
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
++
i
)
{
auto
&
input
=
feed_tensors_
[
i
];
framework
::
DDim
ddim
=
framework
::
make_ddim
(
inputs
[
i
].
shape
);
void
*
input_ptr
;
if
(
inputs
[
i
].
dtype
==
PaddleDType
::
INT64
)
{
input_ptr
=
input
.
mutable_data
<
int64_t
>
(
ddim
,
platform
::
CPUPlace
());
}
else
if
(
inputs
[
i
].
dtype
==
PaddleDType
::
FLOAT32
)
{
input_ptr
=
input
.
mutable_data
<
float
>
(
ddim
,
platform
::
CPUPlace
());
}
else
{
LOG
(
ERROR
)
<<
"unsupported feed type "
<<
inputs
[
i
].
dtype
;
return
false
;
}
// TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
std
::
memcpy
(
static_cast
<
void
*>
(
input_ptr
),
inputs
[
i
].
data
.
data
(),
inputs
[
i
].
data
.
length
());
// TODO(Superjomn) Low performance, need optimization for heavy LoD copy.
framework
::
LoD
lod
;
for
(
auto
&
level
:
inputs
[
i
].
lod
)
{
lod
.
emplace_back
(
level
);
}
input
.
set_lod
(
lod
);
int
idx
=
-
1
;
if
(
config_
.
specify_input_name
)
{
idx
=
feed_names_
[
inputs
[
i
].
name
];
}
else
{
idx
=
boost
::
get
<
int
>
(
feeds_
[
i
]
->
GetAttr
(
"col"
));
}
framework
::
SetFeedVariable
(
scope
,
input
,
"feed"
,
idx
);
}
return
true
;
}
template
<
typename
T
>
void
AnalysisPredictor
::
GetFetchOne
(
const
framework
::
LoDTensor
&
fetch
,
PaddleTensor
*
output
)
{
// set shape.
auto
shape
=
framework
::
vectorize
(
fetch
.
dims
());
output
->
shape
.
assign
(
shape
.
begin
(),
shape
.
end
());
// set data.
const
T
*
data
=
fetch
.
data
<
T
>
();
int
num_elems
=
inference
::
VecReduceToInt
(
shape
);
output
->
data
.
Resize
(
num_elems
*
sizeof
(
T
));
// The fetched tensor output by fetch op, should always in CPU memory, so just
// copy.
memcpy
(
output
->
data
.
data
(),
data
,
num_elems
*
sizeof
(
T
));
// set lod
output
->
lod
.
clear
();
for
(
auto
&
level
:
fetch
.
lod
())
{
output
->
lod
.
emplace_back
(
level
.
begin
(),
level
.
end
());
}
}
bool
AnalysisPredictor
::
GetFetch
(
std
::
vector
<
PaddleTensor
>
*
outputs
,
framework
::
Scope
*
scope
)
{
VLOG
(
3
)
<<
"Predictor::get_fetch"
;
outputs
->
resize
(
fetchs_
.
size
());
for
(
size_t
i
=
0
;
i
<
fetchs_
.
size
();
++
i
)
{
int
idx
=
boost
::
get
<
int
>
(
fetchs_
[
i
]
->
GetAttr
(
"col"
));
PADDLE_ENFORCE
((
size_t
)
idx
==
i
);
framework
::
LoDTensor
&
fetch
=
framework
::
GetFetchVariable
(
*
scope
,
"fetch"
,
idx
);
auto
type
=
fetch
.
type
();
auto
output
=
&
(
outputs
->
at
(
i
));
if
(
type
==
typeid
(
float
))
{
GetFetchOne
<
float
>
(
fetch
,
output
);
output
->
dtype
=
PaddleDType
::
FLOAT32
;
}
else
if
(
type
==
typeid
(
int64_t
))
{
GetFetchOne
<
int64_t
>
(
fetch
,
output
);
output
->
dtype
=
PaddleDType
::
INT64
;
}
else
{
LOG
(
ERROR
)
<<
"unknown type, only support float32 and int64 now."
;
}
}
return
true
;
return
true
;
}
}
...
@@ -107,6 +211,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
...
@@ -107,6 +211,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
new
std
::
string
(
config_
.
prog_file
));
new
std
::
string
(
config_
.
prog_file
));
argument_
.
fluid_model_param_path
.
reset
(
new
std
::
string
(
config_
.
param_file
));
argument_
.
fluid_model_param_path
.
reset
(
new
std
::
string
(
config_
.
param_file
));
}
}
argument_
.
origin_program_desc
.
reset
(
argument_
.
origin_program_desc
.
reset
(
new
ProgramDesc
(
*
inference_program_
->
Proto
()));
new
ProgramDesc
(
*
inference_program_
->
Proto
()));
PADDLE_ENFORCE
(
PADDLE_ENFORCE
(
...
@@ -127,9 +232,8 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
...
@@ -127,9 +232,8 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
}
}
template
<
>
template
<
>
std
::
unique_ptr
<
PaddlePredictor
>
std
::
unique_ptr
<
PaddlePredictor
>
CreatePaddlePredictor
<
CreatePaddlePredictor
<
contrib
::
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
const
AnalysisConfig
&
config
)
{
const
contrib
::
AnalysisConfig
&
config
)
{
VLOG
(
3
)
<<
"create AnalysisConfig"
;
VLOG
(
3
)
<<
"create AnalysisConfig"
;
if
(
config
.
use_gpu
)
{
if
(
config
.
use_gpu
)
{
// 1. GPU memeroy
// 1. GPU memeroy
...
@@ -150,15 +254,90 @@ CreatePaddlePredictor<contrib::AnalysisConfig, PaddleEngineKind::kAnalysis>(
...
@@ -150,15 +254,90 @@ CreatePaddlePredictor<contrib::AnalysisConfig, PaddleEngineKind::kAnalysis>(
}
}
std
::
unique_ptr
<
PaddlePredictor
>
predictor
(
new
AnalysisPredictor
(
config
));
std
::
unique_ptr
<
PaddlePredictor
>
predictor
(
new
AnalysisPredictor
(
config
));
if
(
!
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
())
->
Init
(
nullptr
))
{
if
(
!
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
())
->
Init
(
nullptr
))
{
return
nullptr
;
return
nullptr
;
}
}
return
predictor
;
return
predictor
;
}
}
void
AnalysisPredictor
::
PrepareFeedFetch
()
{
for
(
auto
*
op
:
inference_program_
->
Block
(
0
).
AllOps
())
{
if
(
op
->
Type
()
==
"feed"
)
{
int
idx
=
boost
::
get
<
int
>
(
op
->
GetAttr
(
"col"
));
if
(
feeds_
.
size
()
<=
static_cast
<
size_t
>
(
idx
))
{
feeds_
.
resize
(
idx
+
1
);
}
feeds_
[
idx
]
=
op
;
feed_names_
[
op
->
Output
(
"Out"
)[
0
]]
=
idx
;
}
else
if
(
op
->
Type
()
==
"fetch"
)
{
int
idx
=
boost
::
get
<
int
>
(
op
->
GetAttr
(
"col"
));
if
(
fetchs_
.
size
()
<=
static_cast
<
size_t
>
(
idx
))
{
fetchs_
.
resize
(
idx
+
1
);
}
fetchs_
[
idx
]
=
op
;
}
}
}
std
::
unique_ptr
<
ZeroCopyTensor
>
AnalysisPredictor
::
GetInputTensor
(
const
std
::
string
&
name
)
{
PADDLE_ENFORCE
(
executor_
->
scope
()
->
FindVar
(
name
),
"no name called %s"
,
name
);
std
::
unique_ptr
<
ZeroCopyTensor
>
res
(
new
ZeroCopyTensor
(
static_cast
<
void
*>
(
executor_
->
scope
())));
res
->
input_or_output_
=
true
;
res
->
SetName
(
name
);
return
res
;
}
std
::
unique_ptr
<
ZeroCopyTensor
>
AnalysisPredictor
::
GetOutputTensor
(
const
std
::
string
&
name
)
{
PADDLE_ENFORCE
(
executor_
->
scope
()
->
FindVar
(
name
),
"no name called %s"
,
name
);
std
::
unique_ptr
<
ZeroCopyTensor
>
res
(
new
ZeroCopyTensor
(
static_cast
<
void
*>
(
executor_
->
scope
())));
res
->
input_or_output_
=
false
;
res
->
SetName
(
name
);
return
res
;
}
bool
AnalysisPredictor
::
ZeroCopyRun
()
{
executor_
->
Run
();
return
true
;
}
bool
AnalysisPredictor
::
LoadProgramDesc
()
{
// Initialize the inference program
std
::
unique_ptr
<
framework
::
Executor
>
tmp_exe
(
new
framework
::
Executor
(
platform
::
CPUPlace
()));
if
(
!
config_
.
model_dir
.
empty
())
{
// Parameters are saved in separate files sited in
// the specified `dirname`.
inference_program_
=
paddle
::
inference
::
Load
(
static_cast
<
framework
::
Executor
*>
(
tmp_exe
.
get
()),
scope_
.
get
(),
config_
.
model_dir
);
}
else
if
(
!
config_
.
prog_file
.
empty
()
&&
!
config_
.
param_file
.
empty
())
{
// All parameters are saved in a single file.
// The file names should be consistent with that used
// in Python API `fluid.io.save_inference_model`.
inference_program_
=
paddle
::
inference
::
Load
(
static_cast
<
framework
::
Executor
*>
(
tmp_exe
.
get
()),
scope_
.
get
(),
config_
.
prog_file
,
config_
.
param_file
);
}
else
{
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"not valid model path '%s' or program path '%s'."
,
config_
.
model_dir
,
config_
.
param_file
);
return
false
;
}
return
true
;
}
std
::
unique_ptr
<
PaddlePredictor
>
AnalysisPredictor
::
Clone
()
{
auto
*
x
=
new
AnalysisPredictor
(
config_
);
x
->
Init
(
scope_
,
inference_program_
);
return
std
::
unique_ptr
<
PaddlePredictor
>
(
x
);
}
template
<
>
template
<
>
std
::
unique_ptr
<
PaddlePredictor
>
CreatePaddlePredictor
<
contrib
::
AnalysisConfig
>
(
std
::
unique_ptr
<
PaddlePredictor
>
CreatePaddlePredictor
<
contrib
::
AnalysisConfig
>
(
const
contrib
::
AnalysisConfig
&
config
)
{
const
contrib
::
AnalysisConfig
&
config
)
{
return
CreatePaddlePredictor
<
contrib
::
AnalysisConfig
,
return
CreatePaddlePredictor
<
contrib
::
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
PaddleEngineKind
::
kAnalysis
>
(
config
);
}
}
...
...
paddle/fluid/inference/api/analysis_predictor.h
浏览文件 @
91756a5a
...
@@ -12,42 +12,81 @@
...
@@ -12,42 +12,81 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#pragma once
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/string/printf.h"
namespace
paddle
{
namespace
paddle
{
using
inference
::
analysis
::
Argument
;
using
inference
::
analysis
::
Argument
;
using
inference
::
analysis
::
Analyzer
;
using
inference
::
analysis
::
Analyzer
;
using
framework
::
proto
::
ProgramDesc
;
using
framework
::
proto
::
ProgramDesc
;
using
framework
::
NaiveExecutor
;
using
contrib
::
AnalysisConfig
;
/* This predictor is based on the original native predictor with IR and Analysis
/* This predictor is based on the original native predictor with IR and Analysis
* support. It will optimize IR and Parameters in the runtime.
* support. It will optimize IR and Parameters in the runtime.
* TODO(Superjomn) Replace the Navive predictor?
* TODO(Superjomn) Replace the Navive predictor?
*/
*/
class
AnalysisPredictor
:
public
Native
PaddlePredictor
{
class
AnalysisPredictor
:
public
PaddlePredictor
{
public:
public:
explicit
AnalysisPredictor
(
const
contrib
::
AnalysisConfig
&
config
)
explicit
AnalysisPredictor
(
const
AnalysisConfig
&
config
)
:
config_
(
config
)
{}
:
NativePaddlePredictor
(
config
),
config_
(
config
)
{}
bool
Init
(
const
std
::
shared_ptr
<
framework
::
Scope
>&
parent_scope
);
bool
Init
(
const
std
::
shared_ptr
<
framework
::
Scope
>
&
parent_scope
,
const
std
::
shared_ptr
<
framework
::
ProgramDesc
>
&
program
=
nullptr
);
bool
Run
(
const
std
::
vector
<
PaddleTensor
>&
inputs
,
bool
Run
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
std
::
vector
<
PaddleTensor
>*
output_data
,
std
::
vector
<
PaddleTensor
>
*
output_data
,
int
batch_size
=
-
1
)
override
{
int
batch_size
=
-
1
)
override
;
return
NativePaddlePredictor
::
Run
(
inputs
,
output_data
,
batch_size
);
}
std
::
unique_ptr
<
ZeroCopyTensor
>
GetInputTensor
(
const
std
::
string
&
name
)
override
;
std
::
unique_ptr
<
ZeroCopyTensor
>
GetOutputTensor
(
const
std
::
string
&
name
)
override
;
bool
ZeroCopyRun
()
override
;
void
PrepareFeedFetch
();
void
OptimizeInferenceProgram
();
void
OptimizeInferenceProgram
();
Argument
&
analysis_argument
()
{
return
argument_
;
}
Argument
&
analysis_argument
()
{
return
argument_
;
}
std
::
unique_ptr
<
PaddlePredictor
>
Clone
()
override
;
framework
::
Scope
*
scope
()
{
return
executor_
->
scope
();
}
framework
::
ProgramDesc
&
program
()
{
return
*
inference_program_
;
}
protected:
bool
LoadProgramDesc
();
bool
SetFeed
(
const
std
::
vector
<
PaddleTensor
>
&
input_datas
,
framework
::
Scope
*
scope
);
bool
GetFetch
(
std
::
vector
<
PaddleTensor
>
*
output_data
,
framework
::
Scope
*
scope
);
template
<
typename
T
>
void
GetFetchOne
(
const
framework
::
LoDTensor
&
fetchs
,
PaddleTensor
*
output_data
);
private:
private:
contrib
::
AnalysisConfig
config_
;
contrib
::
AnalysisConfig
config_
;
Argument
argument_
;
Argument
argument_
;
std
::
unique_ptr
<
NaiveExecutor
>
executor_
;
platform
::
Place
place_
;
std
::
shared_ptr
<
framework
::
Scope
>
scope_
;
framework
::
Scope
*
sub_scope_
{
nullptr
};
std
::
shared_ptr
<
framework
::
ProgramDesc
>
inference_program_
;
std
::
vector
<
framework
::
OpDesc
*>
feeds_
;
std
::
map
<
std
::
string
,
size_t
>
feed_names_
;
std
::
vector
<
framework
::
OpDesc
*>
fetchs_
;
// Memory buffer for feed inputs. The temporary LoDTensor will cause serious
// concurrency problems, so cache them.
std
::
vector
<
framework
::
LoDTensor
>
feed_tensors_
;
};
};
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/api/analysis_predictor_tester.cc
0 → 100644
浏览文件 @
91756a5a
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <glog/logging.h>
#include <gtest/gtest.h>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
DEFINE_string
(
dirname
,
""
,
"dirname to tests."
);
namespace
paddle
{
namespace
inference
{
using
contrib
::
AnalysisConfig
;
TEST
(
AnalysisPredictor
,
ZeroCopy
)
{
AnalysisConfig
config
;
config
.
model_dir
=
FLAGS_dirname
+
"/word2vec.inference.model"
;
config
.
use_feed_fetch_ops
=
false
;
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
auto
w0
=
predictor
->
GetInputTensor
(
"firstw"
);
auto
w1
=
predictor
->
GetInputTensor
(
"secondw"
);
auto
w2
=
predictor
->
GetInputTensor
(
"thirdw"
);
auto
w3
=
predictor
->
GetInputTensor
(
"forthw"
);
w0
->
Reshape
({
4
,
1
});
w1
->
Reshape
({
4
,
1
});
w2
->
Reshape
({
4
,
1
});
w3
->
Reshape
({
4
,
1
});
auto
*
w0_data
=
w0
->
mutable_data
<
int64_t
>
(
PaddlePlace
::
kCPU
);
auto
*
w1_data
=
w1
->
mutable_data
<
int64_t
>
(
PaddlePlace
::
kCPU
);
auto
*
w2_data
=
w2
->
mutable_data
<
int64_t
>
(
PaddlePlace
::
kCPU
);
auto
*
w3_data
=
w3
->
mutable_data
<
int64_t
>
(
PaddlePlace
::
kCPU
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
w0_data
[
i
]
=
i
;
w1_data
[
i
]
=
i
;
w2_data
[
i
]
=
i
;
w3_data
[
i
]
=
i
;
}
predictor
->
ZeroCopyRun
();
auto
out
=
predictor
->
GetOutputTensor
(
"fc_1.tmp_2"
);
PaddlePlace
place
;
int
size
=
0
;
auto
*
out_data
=
out
->
data
<
float
>
(
&
place
,
&
size
);
LOG
(
INFO
)
<<
"output size: "
<<
size
/
sizeof
(
float
);
LOG
(
INFO
)
<<
"output_data: "
<<
out_data
;
}
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/api/api.cc
浏览文件 @
91756a5a
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
//
you may not use this file except in compliance with the License.
// Licensed under the Apache License, Version 2.0 (the "License");
You may obtain a copy of the License at
// you may not use this file except in compliance with the License.
http://www.apache.org/licenses/LICENSE-2.0
// You may obtain a copy of the License at
Unless required by applicable law or agreed to in writing, software
//
distributed under the License is distributed on an "AS IS" BASIS,
// http://www.apache.org/licenses/LICENSE-2.0
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//
See the License for the specific language governing permissions and
// Unless required by applicable law or agreed to in writing, software
limitations under the License. */
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle_inference_api.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -26,7 +32,7 @@ int PaddleDtypeSize(PaddleDType dtype) {
...
@@ -26,7 +32,7 @@ int PaddleDtypeSize(PaddleDType dtype) {
}
}
}
}
PaddleBuf
::
PaddleBuf
(
PaddleBuf
&&
other
)
PaddleBuf
::
PaddleBuf
(
PaddleBuf
&&
other
)
:
data_
(
other
.
data_
),
:
data_
(
other
.
data_
),
length_
(
other
.
length_
),
length_
(
other
.
length_
),
memory_owned_
(
other
.
memory_owned_
)
{
memory_owned_
(
other
.
memory_owned_
)
{
...
@@ -35,9 +41,9 @@ PaddleBuf::PaddleBuf(PaddleBuf&& other)
...
@@ -35,9 +41,9 @@ PaddleBuf::PaddleBuf(PaddleBuf&& other)
other
.
length_
=
0
;
other
.
length_
=
0
;
}
}
PaddleBuf
::
PaddleBuf
(
const
PaddleBuf
&
other
)
{
*
this
=
other
;
}
PaddleBuf
::
PaddleBuf
(
const
PaddleBuf
&
other
)
{
*
this
=
other
;
}
PaddleBuf
&
PaddleBuf
::
operator
=
(
const
PaddleBuf
&
other
)
{
PaddleBuf
&
PaddleBuf
::
operator
=
(
const
PaddleBuf
&
other
)
{
if
(
!
other
.
memory_owned_
)
{
if
(
!
other
.
memory_owned_
)
{
data_
=
other
.
data_
;
data_
=
other
.
data_
;
length_
=
other
.
length_
;
length_
=
other
.
length_
;
...
@@ -51,7 +57,7 @@ PaddleBuf& PaddleBuf::operator=(const PaddleBuf& other) {
...
@@ -51,7 +57,7 @@ PaddleBuf& PaddleBuf::operator=(const PaddleBuf& other) {
return
*
this
;
return
*
this
;
}
}
PaddleBuf
&
PaddleBuf
::
operator
=
(
PaddleBuf
&&
other
)
{
PaddleBuf
&
PaddleBuf
::
operator
=
(
PaddleBuf
&&
other
)
{
// only the buffer with external memory can be copied
// only the buffer with external memory can be copied
data_
=
other
.
data_
;
data_
=
other
.
data_
;
length_
=
other
.
length_
;
length_
=
other
.
length_
;
...
@@ -75,7 +81,7 @@ void PaddleBuf::Resize(size_t length) {
...
@@ -75,7 +81,7 @@ void PaddleBuf::Resize(size_t length) {
}
}
}
}
void
PaddleBuf
::
Reset
(
void
*
data
,
size_t
length
)
{
void
PaddleBuf
::
Reset
(
void
*
data
,
size_t
length
)
{
Free
();
Free
();
memory_owned_
=
false
;
memory_owned_
=
false
;
data_
=
data
;
data_
=
data
;
...
@@ -85,7 +91,7 @@ void PaddleBuf::Reset(void* data, size_t length) {
...
@@ -85,7 +91,7 @@ void PaddleBuf::Reset(void* data, size_t length) {
void
PaddleBuf
::
Free
()
{
void
PaddleBuf
::
Free
()
{
if
(
memory_owned_
&&
data_
)
{
if
(
memory_owned_
&&
data_
)
{
PADDLE_ENFORCE_GT
(
length_
,
0
);
PADDLE_ENFORCE_GT
(
length_
,
0
);
free
(
static_cast
<
char
*>
(
data_
));
free
(
static_cast
<
char
*>
(
data_
));
data_
=
nullptr
;
data_
=
nullptr
;
length_
=
0
;
length_
=
0
;
}
}
...
...
paddle/fluid/inference/api/api_impl.cc
浏览文件 @
91756a5a
...
@@ -23,7 +23,6 @@ limitations under the License. */
...
@@ -23,7 +23,6 @@ limitations under the License. */
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/timer.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
DEFINE_bool
(
profile
,
false
,
"Turn on profiler for fluid"
);
DEFINE_bool
(
profile
,
false
,
"Turn on profiler for fluid"
);
...
@@ -145,7 +144,7 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
...
@@ -145,7 +144,7 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
VLOG
(
4
)
<<
"Run prepared context"
;
VLOG
(
4
)
<<
"Run prepared context"
;
executor_
->
RunPreparedContext
(
ctx_
.
get
(),
scope
,
executor_
->
RunPreparedContext
(
ctx_
.
get
(),
scope
,
false
,
/* don't create local scope each time*/
false
,
/* don't create local scope each time*/
false
/* don't create variable ea
t
ch time */
);
false
/* don't create variable each time */
);
VLOG
(
4
)
<<
"Finish prepared context"
;
VLOG
(
4
)
<<
"Finish prepared context"
;
// get fetch variable
// get fetch variable
if
(
!
GetFetch
(
output_data
,
scope
))
{
if
(
!
GetFetch
(
output_data
,
scope
))
{
...
...
paddle/fluid/inference/api/api_impl.h
浏览文件 @
91756a5a
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#pragma once
...
@@ -30,6 +30,8 @@
...
@@ -30,6 +30,8 @@
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/io.h"
#include "paddle/fluid/inference/io.h"
#include "paddle/fluid/platform/init.h"
#include "paddle/fluid/platform/init.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
...
@@ -52,6 +54,8 @@ class NativePaddlePredictor : public PaddlePredictor {
...
@@ -52,6 +54,8 @@ class NativePaddlePredictor : public PaddlePredictor {
~
NativePaddlePredictor
()
override
;
~
NativePaddlePredictor
()
override
;
framework
::
Scope
*
scope
()
{
return
sub_scope_
?
sub_scope_
:
scope_
.
get
();
}
protected:
protected:
bool
SetFeed
(
const
std
::
vector
<
PaddleTensor
>
&
input_datas
,
bool
SetFeed
(
const
std
::
vector
<
PaddleTensor
>
&
input_datas
,
framework
::
Scope
*
scope
);
framework
::
Scope
*
scope
);
...
...
paddle/fluid/inference/api/api_impl_tester.cc
浏览文件 @
91756a5a
...
@@ -21,6 +21,12 @@ limitations under the License. */
...
@@ -21,6 +21,12 @@ limitations under the License. */
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/tests/test_helper.h"
#include "paddle/fluid/inference/tests/test_helper.h"
#ifdef __clang__
#define ACC_DIFF 4e-3
#else
#define ACC_DIFF 1e-3
#endif
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
namespace
paddle
{
namespace
paddle
{
...
@@ -43,7 +49,7 @@ PaddleTensor LodTensorToPaddleTensor(framework::LoDTensor* t) {
...
@@ -43,7 +49,7 @@ PaddleTensor LodTensorToPaddleTensor(framework::LoDTensor* t) {
NativeConfig
GetConfig
()
{
NativeConfig
GetConfig
()
{
NativeConfig
config
;
NativeConfig
config
;
config
.
model_dir
=
FLAGS_dirname
+
"word2vec.inference.model"
;
config
.
model_dir
=
FLAGS_dirname
+
"
/
word2vec.inference.model"
;
LOG
(
INFO
)
<<
"dirname "
<<
config
.
model_dir
;
LOG
(
INFO
)
<<
"dirname "
<<
config
.
model_dir
;
config
.
fraction_of_gpu_memory
=
0.15
;
config
.
fraction_of_gpu_memory
=
0.15
;
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
...
@@ -99,8 +105,8 @@ void MainWord2Vec(bool use_gpu) {
...
@@ -99,8 +105,8 @@ void MainWord2Vec(bool use_gpu) {
float
*
lod_data
=
output1
.
data
<
float
>
();
float
*
lod_data
=
output1
.
data
<
float
>
();
for
(
int
i
=
0
;
i
<
output1
.
numel
();
++
i
)
{
for
(
int
i
=
0
;
i
<
output1
.
numel
();
++
i
)
{
EXPECT_LT
(
lod_data
[
i
]
-
data
[
i
],
1e-3
);
EXPECT_LT
(
lod_data
[
i
]
-
data
[
i
],
ACC_DIFF
);
EXPECT_GT
(
lod_data
[
i
]
-
data
[
i
],
-
1e-3
);
EXPECT_GT
(
lod_data
[
i
]
-
data
[
i
],
-
ACC_DIFF
);
}
}
}
}
...
@@ -110,7 +116,7 @@ void MainImageClassification(bool use_gpu) {
...
@@ -110,7 +116,7 @@ void MainImageClassification(bool use_gpu) {
NativeConfig
config
=
GetConfig
();
NativeConfig
config
=
GetConfig
();
config
.
use_gpu
=
use_gpu
;
config
.
use_gpu
=
use_gpu
;
config
.
model_dir
=
config
.
model_dir
=
FLAGS_dirname
+
"image_classification_resnet.inference.model"
;
FLAGS_dirname
+
"
/
image_classification_resnet.inference.model"
;
const
bool
is_combined
=
false
;
const
bool
is_combined
=
false
;
std
::
vector
<
std
::
vector
<
int64_t
>>
feed_target_shapes
=
std
::
vector
<
std
::
vector
<
int64_t
>>
feed_target_shapes
=
...
@@ -144,7 +150,7 @@ void MainImageClassification(bool use_gpu) {
...
@@ -144,7 +150,7 @@ void MainImageClassification(bool use_gpu) {
float
*
data
=
static_cast
<
float
*>
(
outputs
[
0
].
data
.
data
());
float
*
data
=
static_cast
<
float
*>
(
outputs
[
0
].
data
.
data
());
float
*
lod_data
=
output1
.
data
<
float
>
();
float
*
lod_data
=
output1
.
data
<
float
>
();
for
(
size_t
j
=
0
;
j
<
len
/
sizeof
(
float
);
++
j
)
{
for
(
size_t
j
=
0
;
j
<
len
/
sizeof
(
float
);
++
j
)
{
EXPECT_NEAR
(
lod_data
[
j
],
data
[
j
],
1e-3
);
EXPECT_NEAR
(
lod_data
[
j
],
data
[
j
],
ACC_DIFF
);
}
}
}
}
...
@@ -199,7 +205,7 @@ void MainThreadsWord2Vec(bool use_gpu) {
...
@@ -199,7 +205,7 @@ void MainThreadsWord2Vec(bool use_gpu) {
float
*
ref_data
=
refs
[
tid
].
data
<
float
>
();
float
*
ref_data
=
refs
[
tid
].
data
<
float
>
();
EXPECT_EQ
(
refs
[
tid
].
numel
(),
static_cast
<
int64_t
>
(
len
/
sizeof
(
float
)));
EXPECT_EQ
(
refs
[
tid
].
numel
(),
static_cast
<
int64_t
>
(
len
/
sizeof
(
float
)));
for
(
int
i
=
0
;
i
<
refs
[
tid
].
numel
();
++
i
)
{
for
(
int
i
=
0
;
i
<
refs
[
tid
].
numel
();
++
i
)
{
EXPECT_NEAR
(
ref_data
[
i
],
data
[
i
],
1e-3
);
EXPECT_NEAR
(
ref_data
[
i
],
data
[
i
],
ACC_DIFF
);
}
}
});
});
}
}
...
@@ -214,7 +220,7 @@ void MainThreadsImageClassification(bool use_gpu) {
...
@@ -214,7 +220,7 @@ void MainThreadsImageClassification(bool use_gpu) {
NativeConfig
config
=
GetConfig
();
NativeConfig
config
=
GetConfig
();
config
.
use_gpu
=
use_gpu
;
config
.
use_gpu
=
use_gpu
;
config
.
model_dir
=
config
.
model_dir
=
FLAGS_dirname
+
"image_classification_resnet.inference.model"
;
FLAGS_dirname
+
"
/
image_classification_resnet.inference.model"
;
auto
main_predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
auto
main_predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
std
::
vector
<
framework
::
LoDTensor
>
jobs
(
num_jobs
);
std
::
vector
<
framework
::
LoDTensor
>
jobs
(
num_jobs
);
...
@@ -251,7 +257,7 @@ void MainThreadsImageClassification(bool use_gpu) {
...
@@ -251,7 +257,7 @@ void MainThreadsImageClassification(bool use_gpu) {
float
*
ref_data
=
refs
[
tid
].
data
<
float
>
();
float
*
ref_data
=
refs
[
tid
].
data
<
float
>
();
EXPECT_EQ
((
size_t
)
refs
[
tid
].
numel
(),
len
/
sizeof
(
float
));
EXPECT_EQ
((
size_t
)
refs
[
tid
].
numel
(),
len
/
sizeof
(
float
));
for
(
int
i
=
0
;
i
<
refs
[
tid
].
numel
();
++
i
)
{
for
(
int
i
=
0
;
i
<
refs
[
tid
].
numel
();
++
i
)
{
EXPECT_NEAR
(
ref_data
[
i
],
data
[
i
],
1e-3
);
EXPECT_NEAR
(
ref_data
[
i
],
data
[
i
],
ACC_DIFF
);
}
}
});
});
}
}
...
...
paddle/fluid/inference/api/demo_ci/run.sh
浏览文件 @
91756a5a
...
@@ -2,6 +2,9 @@ set -x
...
@@ -2,6 +2,9 @@ set -x
PADDLE_ROOT
=
$1
PADDLE_ROOT
=
$1
TURN_ON_MKL
=
$2
# use MKL or Openblas
TURN_ON_MKL
=
$2
# use MKL or Openblas
TEST_GPU_CPU
=
$3
# test both GPU/CPU mode or only CPU mode
TEST_GPU_CPU
=
$3
# test both GPU/CPU mode or only CPU mode
DATA_DIR
=
$4
# dataset
cd
`
dirname
$0
`
current_dir
=
`
pwd
`
if
[
$2
==
ON
]
;
then
if
[
$2
==
ON
]
;
then
# You can export yourself if move the install path
# You can export yourself if move the install path
MKL_LIB
=
${
PADDLE_ROOT
}
/build/fluid_install_dir/third_party/install/mklml/lib
MKL_LIB
=
${
PADDLE_ROOT
}
/build/fluid_install_dir/third_party/install/mklml/lib
...
@@ -29,15 +32,15 @@ function download() {
...
@@ -29,15 +32,15 @@ function download() {
fi
fi
cd
..
cd
..
}
}
mkdir
-p
data
mkdir
-p
$DATA_DIR
cd
data
cd
$DATA_DIR
vis_demo_list
=
'se_resnext50 ocr mobilenet'
vis_demo_list
=
'se_resnext50 ocr mobilenet'
for
vis_demo_name
in
$vis_demo_list
;
do
for
vis_demo_name
in
$vis_demo_list
;
do
download
$vis_demo_name
download
$vis_demo_name
done
done
cd
..
# compile and test the demo
# compile and test the demo
cd
$current_dir
mkdir
-p
build
mkdir
-p
build
cd
build
cd
build
...
@@ -73,9 +76,9 @@ for WITH_STATIC_LIB in ON OFF; do
...
@@ -73,9 +76,9 @@ for WITH_STATIC_LIB in ON OFF; do
for
use_gpu
in
$use_gpu_list
;
do
for
use_gpu
in
$use_gpu_list
;
do
for
vis_demo_name
in
$vis_demo_list
;
do
for
vis_demo_name
in
$vis_demo_list
;
do
./vis_demo
\
./vis_demo
\
--modeldir
=
../data
/
$vis_demo_name
/model
\
--modeldir
=
$DATA_DIR
/
$vis_demo_name
/model
\
--data
=
../data
/
$vis_demo_name
/data.txt
\
--data
=
$DATA_DIR
/
$vis_demo_name
/data.txt
\
--refer
=
../data
/
$vis_demo_name
/result.txt
\
--refer
=
$DATA_DIR
/
$vis_demo_name
/result.txt
\
--use_gpu
=
$use_gpu
--use_gpu
=
$use_gpu
if
[
$?
-ne
0
]
;
then
if
[
$?
-ne
0
]
;
then
echo
"vis demo
$vis_demo_name
runs fail."
echo
"vis demo
$vis_demo_name
runs fail."
...
...
paddle/fluid/inference/api/details/zero_copy_tensor.cc
0 → 100644
浏览文件 @
91756a5a
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
void
ZeroCopyTensor
::
Reshape
(
const
std
::
vector
<
int
>
&
shape
)
{
PADDLE_ENFORCE
(
!
name_
.
empty
(),
"Need to SetName first, so that the corresponding tensor can "
"be retrieved."
);
PADDLE_ENFORCE
(
input_or_output_
,
"Can't reshape the output tensor, it is readonly"
);
PADDLE_ENFORCE
(
scope_
);
auto
*
scope
=
static_cast
<
framework
::
Scope
*>
(
scope_
);
auto
*
var
=
scope
->
FindVar
(
name_
);
PADDLE_ENFORCE
(
var
,
"No tensor called [%s] in the runtime scope"
,
name_
);
auto
*
tensor
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
tensor
->
Resize
(
framework
::
make_ddim
(
shape
));
}
template
<
typename
T
>
T
*
ZeroCopyTensor
::
mutable_data
(
PaddlePlace
place
)
{
auto
*
tensor
=
static_cast
<
framework
::
LoDTensor
*>
(
FindTensor
());
switch
(
static_cast
<
int
>
(
place
))
{
case
static_cast
<
int
>
(
PaddlePlace
::
kCPU
):
{
return
tensor
->
mutable_data
<
T
>
(
platform
::
CPUPlace
());
}
case
static_cast
<
int
>
(
PaddlePlace
::
kGPU
):
{
return
tensor
->
mutable_data
<
T
>
(
platform
::
CUDAPlace
());
}
default:
PADDLE_THROW
(
"Unsupported place: %d"
,
static_cast
<
int
>
(
place
));
break
;
}
return
nullptr
;
}
template
<
typename
T
>
T
*
ZeroCopyTensor
::
data
(
PaddlePlace
*
place
,
int
*
size
)
{
auto
*
tensor
=
static_cast
<
framework
::
LoDTensor
*>
(
FindTensor
());
auto
*
res
=
tensor
->
data
<
T
>
();
if
(
platform
::
is_cpu_place
(
tensor
->
place
()))
{
*
place
=
PaddlePlace
::
kCPU
;
}
else
if
(
platform
::
is_gpu_place
(
tensor
->
place
()))
{
*
place
=
PaddlePlace
::
kGPU
;
}
else
{
*
place
=
PaddlePlace
::
kUNK
;
}
*
size
=
tensor
->
numel
();
return
res
;
}
template
float
*
ZeroCopyTensor
::
data
<
float
>(
PaddlePlace
*
place
,
int
*
size
);
template
int64_t
*
ZeroCopyTensor
::
data
<
int64_t
>(
PaddlePlace
*
place
,
int
*
size
);
template
float
*
ZeroCopyTensor
::
mutable_data
<
float
>(
PaddlePlace
place
);
template
int64_t
*
ZeroCopyTensor
::
mutable_data
<
int64_t
>(
PaddlePlace
place
);
void
*
ZeroCopyTensor
::
FindTensor
()
const
{
PADDLE_ENFORCE
(
!
name_
.
empty
(),
"Need to SetName first, so that the corresponding tensor can "
"be retrieved."
);
PADDLE_ENFORCE
(
scope_
);
auto
*
scope
=
static_cast
<
framework
::
Scope
*>
(
scope_
);
auto
*
var
=
scope
->
FindVar
(
name_
);
PADDLE_ENFORCE
(
var
,
"No tensor called [%s] in the runtime scope"
,
name_
);
auto
*
tensor
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
return
tensor
;
}
std
::
vector
<
int64_t
>
ZeroCopyTensor
::
shape
()
{
auto
*
tensor
=
static_cast
<
framework
::
LoDTensor
*>
(
FindTensor
());
PADDLE_ENFORCE
(
tensor
,
"not found tensor called %s in the scope"
,
name_
);
return
framework
::
vectorize
(
tensor
->
dims
());
}
void
ZeroCopyTensor
::
SetLoD
(
const
std
::
vector
<
std
::
vector
<
size_t
>>
&
x
)
{
auto
*
tensor
=
static_cast
<
framework
::
LoDTensor
*>
(
FindTensor
());
framework
::
LoD
lod
;
for
(
auto
&
level
:
x
)
{
lod
.
emplace_back
(
level
);
}
tensor
->
set_lod
(
lod
);
}
std
::
vector
<
std
::
vector
<
size_t
>>
ZeroCopyTensor
::
lod
()
const
{
std
::
vector
<
std
::
vector
<
size_t
>>
res
;
auto
*
tensor
=
static_cast
<
framework
::
LoDTensor
*>
(
FindTensor
());
for
(
auto
&
level
:
tensor
->
lod
())
{
res
.
emplace_back
(
level
);
}
return
res
;
}
}
// namespace paddle
paddle/fluid/inference/api/details/zero_copy_tensor_dummy.cc
0 → 100644
浏览文件 @
91756a5a
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/api/paddle_inference_api.h"
namespace
paddle
{
void
ZeroCopyTensor
::
Reshape
(
const
std
::
vector
<
int
>
&
shape
)
{}
template
<
typename
T
>
T
*
ZeroCopyTensor
::
mutable_data
(
PaddlePlace
place
)
{
return
nullptr
;
}
template
<
typename
T
>
T
*
ZeroCopyTensor
::
data
(
PaddlePlace
*
place
,
int
*
size
)
{
return
nullptr
;
}
template
float
*
ZeroCopyTensor
::
data
<
float
>(
PaddlePlace
*
place
,
int
*
size
);
template
int64_t
*
ZeroCopyTensor
::
data
<
int64_t
>(
PaddlePlace
*
place
,
int
*
size
);
template
float
*
ZeroCopyTensor
::
mutable_data
(
PaddlePlace
place
);
template
int64_t
*
ZeroCopyTensor
::
mutable_data
(
PaddlePlace
place
);
void
*
ZeroCopyTensor
::
FindTensor
()
const
{
return
nullptr
;
}
std
::
vector
<
int64_t
>
ZeroCopyTensor
::
shape
()
{
return
{};
}
void
ZeroCopyTensor
::
SetLoD
(
const
std
::
vector
<
std
::
vector
<
size_t
>>
&
x
)
{}
std
::
vector
<
std
::
vector
<
size_t
>>
ZeroCopyTensor
::
lod
()
const
{
return
std
::
vector
<
std
::
vector
<
size_t
>>
();
}
}
// namespace paddle
paddle/fluid/inference/api/helper.h
浏览文件 @
91756a5a
...
@@ -16,17 +16,34 @@
...
@@ -16,17 +16,34 @@
#include <glog/logging.h>
#include <glog/logging.h>
#include <sys/time.h>
#include <sys/time.h>
#include <
algorithm>
#include <
chrono> // NOLINT
#include <numeric>
#include <numeric>
#include <sstream>
#include <sstream>
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/
inference/api/paddle_inference_api
.h"
#include "paddle/fluid/
string/printf
.h"
#include "paddle
/fluid/inference/api/timer
.h"
#include "paddle
_inference_api
.h"
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
// Timer for timer
class
Timer
{
public:
std
::
chrono
::
high_resolution_clock
::
time_point
start
;
std
::
chrono
::
high_resolution_clock
::
time_point
startu
;
void
tic
()
{
start
=
std
::
chrono
::
high_resolution_clock
::
now
();
}
double
toc
()
{
startu
=
std
::
chrono
::
high_resolution_clock
::
now
();
std
::
chrono
::
duration
<
double
>
time_span
=
std
::
chrono
::
duration_cast
<
std
::
chrono
::
duration
<
double
>>
(
startu
-
start
);
double
used_time_ms
=
static_cast
<
double
>
(
time_span
.
count
())
*
1000.0
;
return
used_time_ms
;
}
};
static
void
split
(
const
std
::
string
&
str
,
char
sep
,
static
void
split
(
const
std
::
string
&
str
,
char
sep
,
std
::
vector
<
std
::
string
>
*
pieces
)
{
std
::
vector
<
std
::
string
>
*
pieces
)
{
pieces
->
clear
();
pieces
->
clear
();
...
@@ -93,6 +110,20 @@ static void TensorAssignData(PaddleTensor *tensor,
...
@@ -93,6 +110,20 @@ static void TensorAssignData(PaddleTensor *tensor,
}
}
}
}
template
<
typename
T
>
static
int
ZeroCopyTensorAssignData
(
ZeroCopyTensor
*
tensor
,
const
std
::
vector
<
std
::
vector
<
T
>>
&
data
)
{
int
size
{
0
};
auto
*
ptr
=
tensor
->
mutable_data
<
T
>
(
PaddlePlace
::
kCPU
);
int
c
=
0
;
for
(
const
auto
&
f
:
data
)
{
for
(
T
v
:
f
)
{
ptr
[
c
++
]
=
v
;
}
}
return
size
;
}
static
std
::
string
DescribeTensor
(
const
PaddleTensor
&
tensor
)
{
static
std
::
string
DescribeTensor
(
const
PaddleTensor
&
tensor
)
{
std
::
stringstream
os
;
std
::
stringstream
os
;
os
<<
"Tensor ["
<<
tensor
.
name
<<
"]
\n
"
;
os
<<
"Tensor ["
<<
tensor
.
name
<<
"]
\n
"
;
...
...
paddle/fluid/inference/api/paddle_inference_api.h
浏览文件 @
91756a5a
...
@@ -101,6 +101,40 @@ struct PaddleTensor {
...
@@ -101,6 +101,40 @@ struct PaddleTensor {
std
::
vector
<
std
::
vector
<
size_t
>>
lod
;
// Tensor+LoD equals LoDTensor
std
::
vector
<
std
::
vector
<
size_t
>>
lod
;
// Tensor+LoD equals LoDTensor
};
};
enum
class
PaddlePlace
{
kUNK
=
-
1
,
kCPU
,
kGPU
};
// Tensor without copy, currently only supports AnalysisPredictor.
class
ZeroCopyTensor
{
public:
void
Reshape
(
const
std
::
vector
<
int
>&
shape
);
// Get the memory in CPU or GPU with specific data type, should Reshape first
// to tell the data size.
// Once can directly call this data to feed the data.
// This is for write the input tensor.
template
<
typename
T
>
T
*
mutable_data
(
PaddlePlace
place
);
// Get the memory directly, will return the place and memory size by pointer.
// This is for reading the output tensor.
template
<
typename
T
>
T
*
data
(
PaddlePlace
*
place
,
int
*
size
);
std
::
vector
<
int64_t
>
shape
();
void
SetLoD
(
const
std
::
vector
<
std
::
vector
<
size_t
>>&
x
);
std
::
vector
<
std
::
vector
<
size_t
>>
lod
()
const
;
protected:
ZeroCopyTensor
(
void
*
scope
)
:
scope_
{
scope
}
{}
void
SetName
(
const
std
::
string
&
name
)
{
name_
=
name
;
}
void
*
FindTensor
()
const
;
private:
std
::
string
name_
;
bool
input_or_output_
;
friend
class
AnalysisPredictor
;
void
*
scope_
{
nullptr
};
};
/*
/*
* A simple Inference API for Paddle.
* A simple Inference API for Paddle.
*/
*/
...
@@ -120,6 +154,19 @@ class PaddlePredictor {
...
@@ -120,6 +154,19 @@ class PaddlePredictor {
std
::
vector
<
PaddleTensor
>*
output_data
,
std
::
vector
<
PaddleTensor
>*
output_data
,
int
batch_size
=
-
1
)
=
0
;
int
batch_size
=
-
1
)
=
0
;
// Zero copy input and output optimization.
// Get the input or output tensors, and operate on their memory directly,
// without copy.
virtual
std
::
unique_ptr
<
ZeroCopyTensor
>
GetInputTensor
(
const
std
::
string
&
name
)
{
return
nullptr
;
}
virtual
std
::
unique_ptr
<
ZeroCopyTensor
>
GetOutputTensor
(
const
std
::
string
&
name
)
{
return
nullptr
;
}
virtual
bool
ZeroCopyRun
()
{
return
false
;
}
// Clone a predictor that share the model weights, the Cloned predictor should
// Clone a predictor that share the model weights, the Cloned predictor should
// be thread-safe.
// be thread-safe.
virtual
std
::
unique_ptr
<
PaddlePredictor
>
Clone
()
=
0
;
virtual
std
::
unique_ptr
<
PaddlePredictor
>
Clone
()
=
0
;
...
@@ -216,9 +263,13 @@ struct AnalysisConfig : public NativeConfig {
...
@@ -216,9 +263,13 @@ struct AnalysisConfig : public NativeConfig {
bool
enable_ir_optim
=
true
;
bool
enable_ir_optim
=
true
;
// Manually determine the IR passes to run.
// Manually determine the IR passes to run.
IrPassMode
ir_mode
{
IrPassMode
::
kExclude
};
IrPassMode
ir_mode
{
IrPassMode
::
kExclude
};
std
::
vector
<
std
::
string
>
ir_passes
;
std
::
vector
<
std
::
string
>
ir_passes
{
"embedding_fc_lstm_fuse_pass"
};
// NOT stable yet.
bool
use_feed_fetch_ops
{
true
};
// NOTE this is just for internal development, please not use it.
// NOTE this is just for internal development, please not use it.
// NOT stable yet.
bool
_use_mkldnn
{
false
};
bool
_use_mkldnn
{
false
};
};
};
...
...
paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc
浏览文件 @
91756a5a
...
@@ -22,7 +22,6 @@ limitations under the License. */
...
@@ -22,7 +22,6 @@ limitations under the License. */
#include <vector>
#include <vector>
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/timer.h"
#include "utils/logger/logger.h"
#include "utils/logger/logger.h"
DEFINE_string
(
model
,
""
,
"Directory of the inference model."
);
DEFINE_string
(
model
,
""
,
"Directory of the inference model."
);
...
...
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
浏览文件 @
91756a5a
...
@@ -18,6 +18,8 @@ namespace paddle {
...
@@ -18,6 +18,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
analysis
{
namespace
analysis
{
using
contrib
::
AnalysisConfig
;
struct
DataRecord
{
struct
DataRecord
{
std
::
vector
<
int64_t
>
data
;
std
::
vector
<
int64_t
>
data
;
std
::
vector
<
size_t
>
lod
;
std
::
vector
<
size_t
>
lod
;
...
@@ -78,6 +80,7 @@ struct DataRecord {
...
@@ -78,6 +80,7 @@ struct DataRecord {
}
}
}
}
}
}
DataRecord
NextBatch
()
{
DataRecord
NextBatch
()
{
DataRecord
data
;
DataRecord
data
;
data
.
data
=
batched_datas
[
batch_iter
];
data
.
data
=
batched_datas
[
batch_iter
];
...
@@ -155,7 +158,9 @@ TEST(Analyzer_LAC, fuse_statis) {
...
@@ -155,7 +158,9 @@ TEST(Analyzer_LAC, fuse_statis) {
SetConfig
(
&
cfg
);
SetConfig
(
&
cfg
);
int
num_ops
;
int
num_ops
;
auto
fuse_statis
=
GetFuseStatis
(
cfg
,
&
num_ops
);
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
cfg
);
auto
fuse_statis
=
GetFuseStatis
(
static_cast
<
AnalysisPredictor
*>
(
predictor
.
get
()),
&
num_ops
);
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_gru_fuse"
));
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_gru_fuse"
));
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
...
...
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
浏览文件 @
91756a5a
...
@@ -16,6 +16,7 @@
...
@@ -16,6 +16,7 @@
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
using
contrib
::
AnalysisConfig
;
struct
DataRecord
{
struct
DataRecord
{
std
::
vector
<
std
::
vector
<
int64_t
>>
word_data_all
,
mention_data_all
;
std
::
vector
<
std
::
vector
<
int64_t
>>
word_data_all
,
mention_data_all
;
...
@@ -145,7 +146,9 @@ TEST(Analyzer_Chinese_ner, fuse_statis) {
...
@@ -145,7 +146,9 @@ TEST(Analyzer_Chinese_ner, fuse_statis) {
SetConfig
(
&
cfg
);
SetConfig
(
&
cfg
);
int
num_ops
;
int
num_ops
;
auto
fuse_statis
=
GetFuseStatis
(
cfg
,
&
num_ops
);
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
cfg
);
auto
fuse_statis
=
GetFuseStatis
(
static_cast
<
AnalysisPredictor
*>
(
predictor
.
get
()),
&
num_ops
);
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_gru_fuse"
));
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_gru_fuse"
));
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
...
...
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
浏览文件 @
91756a5a
...
@@ -14,10 +14,13 @@
...
@@ -14,10 +14,13 @@
#include "paddle/fluid/inference/tests/api/tester_helper.h"
#include "paddle/fluid/inference/tests/api/tester_helper.h"
DEFINE_bool
(
with_precision_check
,
true
,
"turn on test"
);
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
using
namespace
framework
;
// NOLINT
using
namespace
framework
;
// NOLINT
using
namespace
contrib
;
// NOLINT
struct
DataRecord
{
struct
DataRecord
{
std
::
vector
<
std
::
vector
<
std
::
vector
<
float
>>>
link_step_data_all
;
std
::
vector
<
std
::
vector
<
std
::
vector
<
float
>>>
link_step_data_all
;
...
@@ -29,10 +32,12 @@ struct DataRecord {
...
@@ -29,10 +32,12 @@ struct DataRecord {
size_t
batch_iter
{
0
};
size_t
batch_iter
{
0
};
size_t
batch_size
{
1
};
size_t
batch_size
{
1
};
DataRecord
()
=
default
;
DataRecord
()
=
default
;
explicit
DataRecord
(
const
std
::
string
&
path
,
int
batch_size
=
1
)
explicit
DataRecord
(
const
std
::
string
&
path
,
int
batch_size
=
1
)
:
batch_size
(
batch_size
)
{
:
batch_size
(
batch_size
)
{
Load
(
path
);
Load
(
path
);
}
}
DataRecord
NextBatch
()
{
DataRecord
NextBatch
()
{
DataRecord
data
;
DataRecord
data
;
size_t
batch_end
=
batch_iter
+
batch_size
;
size_t
batch_end
=
batch_iter
+
batch_size
;
...
@@ -101,6 +106,7 @@ struct DataRecord {
...
@@ -101,6 +106,7 @@ struct DataRecord {
num_samples
=
num_lines
;
num_samples
=
num_lines
;
}
}
};
};
void
PrepareInputs
(
std
::
vector
<
PaddleTensor
>
*
input_slots
,
DataRecord
*
data
,
void
PrepareInputs
(
std
::
vector
<
PaddleTensor
>
*
input_slots
,
DataRecord
*
data
,
int
batch_size
)
{
int
batch_size
)
{
PaddleTensor
lod_attention_tensor
,
init_zero_tensor
,
lod_tensor_tensor
,
PaddleTensor
lod_attention_tensor
,
init_zero_tensor
,
lod_tensor_tensor
,
...
@@ -149,7 +155,55 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
...
@@ -149,7 +155,55 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
}
}
}
}
void
SetConfig
(
contrib
::
AnalysisConfig
*
cfg
)
{
void
PrepareZeroCopyInputs
(
ZeroCopyTensor
*
lod_attention_tensor
,
ZeroCopyTensor
*
cell_init_tensor
,
ZeroCopyTensor
*
data_tensor
,
ZeroCopyTensor
*
hidden_init_tensor
,
ZeroCopyTensor
*
week_tensor
,
ZeroCopyTensor
*
minute_tensor
,
DataRecord
*
data_record
,
int
batch_size
)
{
auto
one_batch
=
data_record
->
NextBatch
();
std
::
vector
<
int
>
rnn_link_data_shape
(
{
static_cast
<
int
>
(
one_batch
.
rnn_link_data
.
size
()),
static_cast
<
int
>
(
one_batch
.
rnn_link_data
.
front
().
size
())});
lod_attention_tensor
->
Reshape
({
1
,
2
});
lod_attention_tensor
->
SetLoD
({
one_batch
.
lod1
,
one_batch
.
lod2
});
cell_init_tensor
->
Reshape
({
batch_size
,
15
});
cell_init_tensor
->
SetLoD
({
one_batch
.
lod3
});
hidden_init_tensor
->
Reshape
({
batch_size
,
15
});
hidden_init_tensor
->
SetLoD
({
one_batch
.
lod3
});
data_tensor
->
Reshape
(
rnn_link_data_shape
);
data_tensor
->
SetLoD
({
one_batch
.
lod1
});
week_tensor
->
Reshape
(
{
static_cast
<
int
>
(
one_batch
.
rnn_week_datas
.
size
()),
static_cast
<
int
>
(
one_batch
.
rnn_week_datas
.
front
().
size
())});
week_tensor
->
SetLoD
({
one_batch
.
lod3
});
minute_tensor
->
Reshape
(
{
static_cast
<
int
>
(
one_batch
.
rnn_minute_datas
.
size
()),
static_cast
<
int
>
(
one_batch
.
rnn_minute_datas
.
front
().
size
())});
minute_tensor
->
SetLoD
({
one_batch
.
lod3
});
// assign data
float
arr0
[]
=
{
0
,
0
};
std
::
vector
<
float
>
zeros
(
batch_size
*
15
,
0
);
std
::
copy_n
(
arr0
,
2
,
lod_attention_tensor
->
mutable_data
<
float
>
(
PaddlePlace
::
kCPU
));
std
::
copy_n
(
arr0
,
2
,
data_tensor
->
mutable_data
<
float
>
(
PaddlePlace
::
kCPU
));
std
::
copy_n
(
zeros
.
begin
(),
zeros
.
size
(),
cell_init_tensor
->
mutable_data
<
float
>
(
PaddlePlace
::
kCPU
));
std
::
copy_n
(
zeros
.
begin
(),
zeros
.
size
(),
hidden_init_tensor
->
mutable_data
<
float
>
(
PaddlePlace
::
kCPU
));
ZeroCopyTensorAssignData
(
data_tensor
,
one_batch
.
rnn_link_data
);
ZeroCopyTensorAssignData
(
week_tensor
,
one_batch
.
rnn_week_datas
);
ZeroCopyTensorAssignData
(
minute_tensor
,
one_batch
.
rnn_minute_datas
);
}
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
cfg
->
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
cfg
->
param_file
=
FLAGS_infer_model
+
"/param"
;
cfg
->
param_file
=
FLAGS_infer_model
+
"/param"
;
cfg
->
use_gpu
=
false
;
cfg
->
use_gpu
=
false
;
...
@@ -187,7 +241,9 @@ TEST(Analyzer_rnn1, fuse_statis) {
...
@@ -187,7 +241,9 @@ TEST(Analyzer_rnn1, fuse_statis) {
SetConfig
(
&
cfg
);
SetConfig
(
&
cfg
);
int
num_ops
;
int
num_ops
;
auto
fuse_statis
=
GetFuseStatis
(
cfg
,
&
num_ops
);
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
cfg
);
auto
fuse_statis
=
GetFuseStatis
(
static_cast
<
AnalysisPredictor
*>
(
predictor
.
get
()),
&
num_ops
);
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_nobias_lstm_fuse"
),
2
);
// bi-directional LSTM
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_nobias_lstm_fuse"
),
2
);
// bi-directional LSTM
...
@@ -214,7 +270,229 @@ TEST(Analyzer_rnn1, multi_thread) {
...
@@ -214,7 +270,229 @@ TEST(Analyzer_rnn1, multi_thread) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
SetInput
(
&
input_slots_all
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
4
/* num_threads */
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
}
bool
CompareTensors
(
framework
::
Scope
&
a_scope
,
framework
::
Scope
&
b_scope
,
const
std
::
vector
<
std
::
string
>
&
tensors
)
{
for
(
auto
&
x
:
tensors
)
{
auto
*
a_var
=
a_scope
.
FindVar
(
x
);
auto
*
b_var
=
b_scope
.
FindVar
(
x
);
if
(
a_var
&&
b_var
)
{
if
(
a_var
->
Type
()
==
typeid
(
framework
::
LoDTensor
)
||
a_var
->
Type
()
==
typeid
(
framework
::
Tensor
))
{
LOG
(
INFO
)
<<
"comparing tensor "
<<
x
;
auto
&
a_t
=
a_var
->
Get
<
framework
::
LoDTensor
>
();
auto
&
b_t
=
b_var
->
Get
<
framework
::
LoDTensor
>
();
if
(
!
inference
::
CompareTensor
(
a_t
,
b_t
))
{
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"tensor %s not match in two scopes"
,
x
);
}
}
else
{
LOG
(
INFO
)
<<
"skip no tensor "
<<
x
;
}
}
else
{
LOG
(
INFO
)
<<
"skip tensor "
<<
x
;
}
}
return
true
;
}
// Validate that the AnalysisPredictor + ZeroCopyTensor really works by testing
// on the complex RNN1 model.
TEST
(
Analyzer_rnn1
,
ZeroCopy
)
{
AnalysisConfig
config
;
SetConfig
(
&
config
);
config
.
use_feed_fetch_ops
=
false
;
PaddlePlace
place
;
int
output_size
{
0
};
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
config
.
use_feed_fetch_ops
=
true
;
auto
native_predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
config
.
use_feed_fetch_ops
=
true
;
// the analysis predictor needs feed/fetch.
auto
analysis_predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
#define NEW_TENSOR(name__) \
auto name__##_tensor = predictor->GetInputTensor(#name__);
NEW_TENSOR
(
data_lod_attention
);
NEW_TENSOR
(
cell_init
);
NEW_TENSOR
(
data
);
NEW_TENSOR
(
week
);
NEW_TENSOR
(
minute
);
NEW_TENSOR
(
hidden_init
);
// Prepare data for AnalysisPredictor
DataRecord
data
(
FLAGS_infer_data
,
FLAGS_batch_size
);
PrepareZeroCopyInputs
(
data_lod_attention_tensor
.
get
(),
cell_init_tensor
.
get
(),
data_tensor
.
get
(),
hidden_init_tensor
.
get
(),
week_tensor
.
get
(),
minute_tensor
.
get
(),
&
data
,
FLAGS_batch_size
);
// Prepare data for NativePredictor
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
native_inputs
;
SetInput
(
&
native_inputs
);
std
::
vector
<
PaddleTensor
>
native_outputs
;
std
::
vector
<
PaddleTensor
>
analysis_outputs
;
auto
output_tensor
=
predictor
->
GetOutputTensor
(
"final_output.tmp_1"
);
// Run analysis predictor
int
num_ops
;
auto
fuse_statis
=
GetFuseStatis
(
predictor
.
get
(),
&
num_ops
);
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
ASSERT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
ASSERT_EQ
(
fuse_statis
.
at
(
"fc_nobias_lstm_fuse"
),
2
);
// bi-directional LSTM
ASSERT_EQ
(
fuse_statis
.
at
(
"seq_concat_fc_fuse"
),
1
);
ASSERT_EQ
(
num_ops
,
13
);
// After graph optimization, only 13 operators exists.
Timer
timer
;
double
total_time
{
0
};
double
native_total_time
{
0
};
double
analysis_total_time
{
0.
};
for
(
int
i
=
0
;
i
<
FLAGS_repeat
;
i
++
)
{
timer
.
tic
();
predictor
->
ZeroCopyRun
();
total_time
+=
timer
.
toc
();
}
auto
*
output_data
=
output_tensor
->
data
<
float
>
(
&
place
,
&
output_size
);
ASSERT_GT
(
output_size
,
0
);
// more than one output!
for
(
int
i
=
0
;
i
<
FLAGS_repeat
;
i
++
)
{
// Run native predictor.
timer
.
tic
();
ASSERT_TRUE
(
native_predictor
->
Run
(
native_inputs
.
front
(),
&
native_outputs
));
native_total_time
+=
timer
.
toc
();
}
for
(
int
i
=
0
;
i
<
FLAGS_repeat
;
i
++
)
{
timer
.
tic
();
ASSERT_TRUE
(
analysis_predictor
->
Run
(
native_inputs
.
front
(),
&
analysis_outputs
));
analysis_total_time
+=
timer
.
toc
();
}
if
(
!
FLAGS_with_precision_check
)
{
return
;
}
int
native_output_size
=
VecReduceToInt
(
native_outputs
.
front
().
shape
);
EXPECT_EQ
(
native_output_size
,
output_size
);
// Compare tensors between analysis and zerocopy
auto
*
p0
=
static_cast
<
AnalysisPredictor
*>
(
predictor
.
get
());
auto
*
p1
=
static_cast
<
AnalysisPredictor
*>
(
analysis_predictor
.
get
());
auto
*
p2
=
static_cast
<
NativePaddlePredictor
*>
(
native_predictor
.
get
());
std
::
vector
<
std
::
string
>
tensor_names
;
for
(
auto
&
var_desc
:
p0
->
program
().
Block
(
0
).
AllVars
())
{
tensor_names
.
push_back
(
var_desc
->
Name
());
}
LOG
(
INFO
)
<<
"Comparing tensors"
;
ASSERT_TRUE
(
CompareTensors
(
*
p0
->
scope
(),
*
p1
->
scope
(),
{
"final_output.tmp_1"
}));
ASSERT_TRUE
(
CompareTensors
(
*
p0
->
scope
(),
*
p2
->
scope
(),
{
"final_output.tmp_1"
}));
LOG
(
INFO
)
<<
"output1 "
<<
inference
::
LoDTensorSummary
<
float
>
(
p0
->
scope
()
->
FindVar
(
"final_output.tmp_1"
)
->
Get
<
framework
::
LoDTensor
>
());
LOG
(
INFO
)
<<
"output2 "
<<
inference
::
LoDTensorSummary
<
float
>
(
p1
->
scope
()
->
FindVar
(
"final_output.tmp_1"
)
->
Get
<
framework
::
LoDTensor
>
());
LOG
(
INFO
)
<<
"output3 "
<<
inference
::
LoDTensorSummary
<
float
>
(
p2
->
scope
()
->
FindVar
(
"final_output.tmp_1"
)
->
Get
<
framework
::
LoDTensor
>
());
for
(
int
i
=
0
;
i
<
output_size
;
i
++
)
{
LOG
(
INFO
)
<<
output_data
[
i
]
<<
" "
<<
static_cast
<
float
*>
(
native_outputs
.
front
().
data
.
data
())[
i
]
<<
" "
<<
static_cast
<
float
*>
(
analysis_outputs
.
front
().
data
.
data
())[
i
];
EXPECT_NEAR
(
output_data
[
i
],
static_cast
<
float
*>
(
native_outputs
.
front
().
data
.
data
())[
i
],
1e-3
);
}
LOG
(
INFO
)
<<
"batch_size: "
<<
FLAGS_batch_size
;
LOG
(
INFO
)
<<
"zero average time: "
<<
total_time
/
(
FLAGS_repeat
*
FLAGS_batch_size
);
LOG
(
INFO
)
<<
"analysis average time: "
<<
analysis_total_time
/
(
FLAGS_repeat
*
FLAGS_batch_size
);
LOG
(
INFO
)
<<
"native average time: "
<<
native_total_time
/
(
FLAGS_repeat
*
FLAGS_batch_size
);
}
TEST
(
Analyzer_rnn1
,
ZeroCopyMultiThread
)
{
AnalysisConfig
config
;
SetConfig
(
&
config
);
config
.
use_feed_fetch_ops
=
false
;
#define NEW_TENSOR(name__) \
auto name__##_tensor = predictor->GetInputTensor(#name__);
auto
base_predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
);
double
total_time_of_threads
{
0
};
std
::
vector
<
std
::
thread
>
threads
;
std
::
vector
<
std
::
unique_ptr
<
PaddlePredictor
>>
predictors
;
for
(
int
tid
=
0
;
tid
<
FLAGS_num_threads
;
tid
++
)
{
predictors
.
emplace_back
(
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
));
}
for
(
int
tid
=
0
;
tid
<
FLAGS_num_threads
;
tid
++
)
{
threads
.
emplace_back
([
config
,
&
total_time_of_threads
,
&
predictors
,
tid
]
{
// auto predictor = base_predictor->Clone();
auto
&
predictor
=
predictors
[
tid
];
NEW_TENSOR
(
data_lod_attention
);
NEW_TENSOR
(
cell_init
);
NEW_TENSOR
(
data
);
NEW_TENSOR
(
week
);
NEW_TENSOR
(
minute
);
NEW_TENSOR
(
hidden_init
);
// Prepare data for AnalysisPredictor
DataRecord
data
(
FLAGS_infer_data
,
FLAGS_batch_size
);
Timer
timer
;
double
total_time
{
0
};
for
(
int
i
=
0
;
i
<
FLAGS_repeat
;
i
++
)
{
PrepareZeroCopyInputs
(
data_lod_attention_tensor
.
get
(),
cell_init_tensor
.
get
(),
data_tensor
.
get
(),
hidden_init_tensor
.
get
(),
week_tensor
.
get
(),
minute_tensor
.
get
(),
&
data
,
FLAGS_batch_size
);
timer
.
tic
();
predictor
->
ZeroCopyRun
();
total_time
+=
timer
.
toc
();
}
total_time_of_threads
+=
total_time
;
LOG
(
INFO
)
<<
"thread time: "
<<
total_time
/
FLAGS_repeat
;
});
}
for
(
auto
&
t
:
threads
)
{
t
.
join
();
}
LOG
(
INFO
)
<<
"average time: "
<<
total_time_of_threads
/
FLAGS_num_threads
/
FLAGS_repeat
;
}
}
}
// namespace inference
}
// namespace inference
...
...
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
浏览文件 @
91756a5a
...
@@ -182,7 +182,8 @@ TEST(Analyzer_seq_conv1, fuse_statis) {
...
@@ -182,7 +182,8 @@ TEST(Analyzer_seq_conv1, fuse_statis) {
AnalysisConfig
cfg
;
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
SetConfig
(
&
cfg
);
int
num_ops
;
int
num_ops
;
auto
fuse_statis
=
GetFuseStatis
(
cfg
,
&
num_ops
);
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
cfg
);
GetFuseStatis
(
predictor
.
get
(),
&
num_ops
);
}
}
// Compare result of NativeConfig and AnalysisConfig
// Compare result of NativeConfig and AnalysisConfig
...
...
paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc
浏览文件 @
91756a5a
...
@@ -104,5 +104,18 @@ TEST(Analyzer_Text_Classification, compare) {
...
@@ -104,5 +104,18 @@ TEST(Analyzer_Text_Classification, compare) {
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
}
}
TEST
(
Analyzer_Text_Classification
,
compare_against_embedding_fc_lstm_fused
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
// Enable embedding_fc_lstm_fuse_pass (disabled by default)
auto
it
=
std
::
find
(
cfg
.
ir_passes
.
begin
(),
cfg
.
ir_passes
.
end
(),
"embedding_fc_lstm_fuse_pass"
);
if
(
it
!=
cfg
.
ir_passes
.
end
())
cfg
.
ir_passes
.
erase
(
it
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
}
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
浏览文件 @
91756a5a
...
@@ -19,6 +19,7 @@ limitations under the License. */
...
@@ -19,6 +19,7 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
analysis
{
namespace
analysis
{
using
contrib
::
AnalysisConfig
;
struct
Record
{
struct
Record
{
std
::
vector
<
float
>
data
;
std
::
vector
<
float
>
data
;
...
@@ -114,7 +115,8 @@ TEST(Analyzer_vis, fuse_statis) {
...
@@ -114,7 +115,8 @@ TEST(Analyzer_vis, fuse_statis) {
AnalysisConfig
cfg
;
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
SetConfig
(
&
cfg
);
int
num_ops
;
int
num_ops
;
GetFuseStatis
(
cfg
,
&
num_ops
);
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
cfg
);
GetFuseStatis
(
predictor
.
get
(),
&
num_ops
);
}
}
// Compare result of NativeConfig and AnalysisConfig
// Compare result of NativeConfig and AnalysisConfig
...
...
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
91756a5a
...
@@ -15,6 +15,7 @@
...
@@ -15,6 +15,7 @@
#pragma once
#pragma once
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include <algorithm>
#include <string>
#include <string>
#include <thread> // NOLINT
#include <thread> // NOLINT
#include <vector>
#include <vector>
...
@@ -86,11 +87,9 @@ std::unique_ptr<PaddlePredictor> CreateTestPredictor(
...
@@ -86,11 +87,9 @@ std::unique_ptr<PaddlePredictor> CreateTestPredictor(
size_t
GetSize
(
const
PaddleTensor
&
out
)
{
return
VecReduceToInt
(
out
.
shape
);
}
size_t
GetSize
(
const
PaddleTensor
&
out
)
{
return
VecReduceToInt
(
out
.
shape
);
}
std
::
unordered_map
<
std
::
string
,
int
>
GetFuseStatis
(
AnalysisConfig
config
,
std
::
unordered_map
<
std
::
string
,
int
>
GetFuseStatis
(
PaddlePredictor
*
predictor
,
int
*
num_ops
)
{
int
*
num_ops
)
{
auto
predictor
=
CreateTestPredictor
(
config
);
auto
*
analysis_predictor
=
static_cast
<
AnalysisPredictor
*>
(
predictor
);
AnalysisPredictor
*
analysis_predictor
=
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
());
auto
&
fuse_statis
=
analysis_predictor
->
analysis_argument
()
auto
&
fuse_statis
=
analysis_predictor
->
analysis_argument
()
.
Get
<
std
::
unordered_map
<
std
::
string
,
int
>>
(
.
Get
<
std
::
unordered_map
<
std
::
string
,
int
>>
(
framework
::
ir
::
kFuseStatisAttr
);
framework
::
ir
::
kFuseStatisAttr
);
...
@@ -184,5 +183,127 @@ void CompareNativeAndAnalysis(
...
@@ -184,5 +183,127 @@ void CompareNativeAndAnalysis(
CompareResult
(
analysis_outputs
,
native_outputs
);
CompareResult
(
analysis_outputs
,
native_outputs
);
}
}
template
<
typename
T
>
std
::
string
LoDTensorSummary
(
const
framework
::
LoDTensor
&
tensor
)
{
std
::
stringstream
ss
;
ss
<<
"
\n
---- tensor ---"
<<
'\n'
;
ss
<<
"lod: ["
;
for
(
const
auto
&
level
:
tensor
.
lod
())
{
ss
<<
"[ "
;
for
(
auto
i
:
level
)
{
ss
<<
i
<<
", "
;
}
ss
<<
"]"
;
}
ss
<<
"]
\n
"
;
ss
<<
"shape: ["
;
int
size
=
1
;
for
(
int
i
=
0
;
i
<
tensor
.
dims
().
size
();
i
++
)
{
int
dim
=
tensor
.
dims
()[
i
];
ss
<<
dim
<<
", "
;
size
*=
dim
;
}
ss
<<
"]
\n
"
;
ss
<<
"data: "
;
for
(
int
i
=
0
;
i
<
std
::
min
(
20
,
size
);
i
++
)
{
ss
<<
tensor
.
data
<
T
>
()[
i
]
<<
" "
;
}
ss
<<
"
\n
"
;
return
ss
.
str
();
}
static
bool
CompareLoD
(
const
framework
::
LoD
&
a
,
const
framework
::
LoD
&
b
)
{
if
(
a
.
size
()
!=
b
.
size
())
{
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"lod size not match %d != %d"
,
a
.
size
(),
b
.
size
());
return
false
;
}
for
(
size_t
i
=
0
;
i
<
a
.
size
();
i
++
)
{
auto
&
al
=
a
[
i
];
auto
&
bl
=
b
[
i
];
if
(
al
.
size
()
!=
bl
.
size
())
{
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"level size %d != %d"
,
al
.
size
(),
bl
.
size
());
return
false
;
}
}
return
true
;
}
static
bool
CompareShape
(
const
std
::
vector
<
int64_t
>
&
a
,
const
std
::
vector
<
int64_t
>
&
b
)
{
if
(
a
.
size
()
!=
b
.
size
())
{
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"shape size not match %d != %d"
,
a
.
size
(),
b
.
size
());
return
false
;
}
for
(
size_t
i
=
0
;
i
<
a
.
size
();
i
++
)
{
if
(
a
[
i
]
!=
b
[
i
])
{
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"shape %d-th element not match %d != %d"
,
i
,
a
[
i
],
b
[
i
]);
return
false
;
}
}
return
true
;
}
static
bool
CompareTensorData
(
const
framework
::
LoDTensor
&
a
,
const
framework
::
LoDTensor
&
b
)
{
auto
a_shape
=
framework
::
vectorize
(
a
.
dims
());
auto
b_shape
=
framework
::
vectorize
(
b
.
dims
());
size_t
a_size
=
std
::
accumulate
(
a_shape
.
begin
(),
a_shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
size_t
b_size
=
std
::
accumulate
(
b_shape
.
begin
(),
b_shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
if
(
a_size
!=
b_size
)
{
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"tensor data size not match, %d != %d"
,
a_size
,
b_size
);
}
for
(
size_t
i
=
0
;
i
<
a_size
;
i
++
)
{
if
(
a
.
type
()
==
typeid
(
float
))
{
const
auto
*
a_data
=
a
.
data
<
float
>
();
const
auto
*
b_data
=
b
.
data
<
float
>
();
if
(
std
::
abs
(
a_data
[
i
]
-
b_data
[
i
])
>
1e-3
)
{
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"tensor data %d-th element not match, %f != %f"
,
i
,
a_data
[
i
],
b_data
[
i
]);
return
false
;
}
}
else
if
(
a
.
type
()
==
typeid
(
int64_t
))
{
const
auto
*
a_data
=
a
.
data
<
int64_t
>
();
const
auto
*
b_data
=
b
.
data
<
int64_t
>
();
if
(
std
::
abs
(
a_data
[
i
]
-
b_data
[
i
])
>
1e-3
)
{
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"tensor data %d-th element not match, %f != %f"
,
i
,
a_data
[
i
],
b_data
[
i
]);
return
false
;
}
}
}
return
true
;
}
static
bool
CompareTensor
(
const
framework
::
LoDTensor
&
a
,
const
framework
::
LoDTensor
&
b
)
{
if
(
!
CompareLoD
(
a
.
lod
(),
b
.
lod
()))
{
return
false
;
}
if
(
!
CompareShape
(
framework
::
vectorize
(
a
.
dims
()),
framework
::
vectorize
(
b
.
dims
())))
{
return
false
;
}
if
(
!
CompareTensorData
(
a
,
b
))
{
return
false
;
}
return
true
;
}
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
paddle/fluid/memory/malloc.cc
浏览文件 @
91756a5a
...
@@ -36,6 +36,8 @@ namespace memory {
...
@@ -36,6 +36,8 @@ namespace memory {
using
BuddyAllocator
=
detail
::
BuddyAllocator
;
using
BuddyAllocator
=
detail
::
BuddyAllocator
;
BuddyAllocator
*
GetCPUBuddyAllocator
()
{
BuddyAllocator
*
GetCPUBuddyAllocator
()
{
// We tried thread_local for inference::RNN1 model, but that not works much
// for multi-thread test.
static
std
::
once_flag
init_flag
;
static
std
::
once_flag
init_flag
;
static
detail
::
BuddyAllocator
*
a
=
nullptr
;
static
detail
::
BuddyAllocator
*
a
=
nullptr
;
...
@@ -48,6 +50,25 @@ BuddyAllocator* GetCPUBuddyAllocator() {
...
@@ -48,6 +50,25 @@ BuddyAllocator* GetCPUBuddyAllocator() {
return
a
;
return
a
;
}
}
// We compared the NaiveAllocator with BuddyAllocator in CPU memory allocation,
// seems they are almost the same overhead.
struct
NaiveAllocator
{
void
*
Alloc
(
size_t
size
)
{
return
malloc
(
size
);
}
void
Free
(
void
*
p
)
{
PADDLE_ENFORCE
(
p
);
free
(
p
);
}
static
NaiveAllocator
*
Instance
()
{
static
NaiveAllocator
x
;
return
&
x
;
}
private:
std
::
mutex
lock_
;
};
template
<
>
template
<
>
void
*
Alloc
<
platform
::
CPUPlace
>
(
platform
::
CPUPlace
place
,
size_t
size
)
{
void
*
Alloc
<
platform
::
CPUPlace
>
(
platform
::
CPUPlace
place
,
size_t
size
)
{
VLOG
(
10
)
<<
"Allocate "
<<
size
<<
" bytes on "
<<
platform
::
Place
(
place
);
VLOG
(
10
)
<<
"Allocate "
<<
size
<<
" bytes on "
<<
platform
::
Place
(
place
);
...
...
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
91756a5a
...
@@ -82,10 +82,11 @@ function(op_library TARGET)
...
@@ -82,10 +82,11 @@ function(op_library TARGET)
if
(
${
cc_srcs_len
}
EQUAL 0
)
if
(
${
cc_srcs_len
}
EQUAL 0
)
message
(
FATAL_ERROR
"The op library
${
TARGET
}
should contains at least one .cc file"
)
message
(
FATAL_ERROR
"The op library
${
TARGET
}
should contains at least one .cc file"
)
endif
()
endif
()
#remove windows unsupported op
if
(
WIN32
)
if
(
WIN32
)
foreach
(
windows_unsupport_op
"nccl_op"
"gen_nccl_id_op"
"warpctc_op"
)
# remove windows unsupported op, because windows has no nccl, no warpctc such ops.
foreach
(
windows_unsupport_op
"nccl_op"
"gen_nccl_id_op"
"warpctc_op"
"hierarchical_sigmoid_op"
"crf_decoding_op"
"select_op"
"lstmp_op"
"gru_op"
"fusion_gru_op"
"lstm_op"
"fusion_lstm_op"
"cumsum_op"
"channel_send_op"
"channel_create_op"
"channel_close_op"
"channel_recv_op"
)
if
(
"
${
TARGET
}
"
STREQUAL
"
${
windows_unsupport_op
}
"
)
if
(
"
${
TARGET
}
"
STREQUAL
"
${
windows_unsupport_op
}
"
)
return
()
return
()
endif
()
endif
()
...
@@ -281,10 +282,12 @@ op_library(array_to_lod_tensor_op DEPS lod_rank_table_op)
...
@@ -281,10 +282,12 @@ op_library(array_to_lod_tensor_op DEPS lod_rank_table_op)
op_library
(
max_sequence_len_op DEPS lod_rank_table
)
op_library
(
max_sequence_len_op DEPS lod_rank_table
)
op_library
(
sequence_conv_op DEPS context_project
)
op_library
(
sequence_conv_op DEPS context_project
)
op_library
(
sequence_pool_op DEPS sequence_pooling
)
op_library
(
sequence_pool_op DEPS sequence_pooling
)
if
(
NOT WIN32
)
op_library
(
lstm_op DEPS sequence2batch lstm_compute
)
op_library
(
lstm_op DEPS sequence2batch lstm_compute
)
op_library
(
hierarchical_sigmoid_op DEPS matrix_bit_code
)
op_library
(
hierarchical_sigmoid_op DEPS matrix_bit_code
)
op_library
(
lstmp_op DEPS sequence2batch lstm_compute
)
op_library
(
lstmp_op DEPS sequence2batch lstm_compute
)
op_library
(
gru_op DEPS sequence2batch gru_compute
)
op_library
(
gru_op DEPS sequence2batch gru_compute
)
endif
(
NOT WIN32
)
op_library
(
recurrent_op DEPS executor
)
op_library
(
recurrent_op DEPS executor
)
op_library
(
warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale
)
op_library
(
warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale
)
op_library
(
cos_sim_op DEPS cos_sim_functor
)
op_library
(
cos_sim_op DEPS cos_sim_functor
)
...
@@ -297,10 +300,10 @@ op_library(sequence_pad_op DEPS sequence_padding)
...
@@ -297,10 +300,10 @@ op_library(sequence_pad_op DEPS sequence_padding)
op_library
(
unstack_op DEPS stack_op
)
op_library
(
unstack_op DEPS stack_op
)
op_library
(
fake_quantize_op DEPS memory
)
op_library
(
fake_quantize_op DEPS memory
)
op_library
(
fusion_lstm_op DEPS cpu_lstm_compute
)
op_library
(
fusion_lstm_op DEPS cpu_lstm_compute
)
if
(
WITH_GPU
)
if
(
WITH_GPU
)
op_library
(
conv_op DEPS vol2col depthwise_conv im2col
)
op_library
(
conv_op DEPS vol2col depthwise_conv im2col
)
op_library
(
layer_norm_op DEPS cub
)
op_library
(
layer_norm_op DEPS cub
)
op_library
(
reduce_mean_op DEPS cub
)
else
()
else
()
op_library
(
conv_op DEPS vol2col im2col
)
op_library
(
conv_op DEPS vol2col im2col
)
endif
()
endif
()
...
@@ -313,11 +316,6 @@ op_library(save_combine_op DEPS lod_tensor)
...
@@ -313,11 +316,6 @@ op_library(save_combine_op DEPS lod_tensor)
op_library
(
load_combine_op DEPS lod_tensor
)
op_library
(
load_combine_op DEPS lod_tensor
)
op_library
(
concat_op DEPS concat
)
op_library
(
concat_op DEPS concat
)
# FIXME(thuan): Move CSP operators to paddle/fluid/framework/operators/concurrency
add_subdirectory
(
concurrency
)
op_library
(
channel_send_op DEPS concurrency
)
op_library
(
channel_recv_op DEPS concurrency
)
list
(
REMOVE_ITEM GENERAL_OPS
${
DEPS_OPS
}
)
list
(
REMOVE_ITEM GENERAL_OPS
${
DEPS_OPS
}
)
foreach
(
src
${
GENERAL_OPS
}
)
foreach
(
src
${
GENERAL_OPS
}
)
...
...
paddle/fluid/operators/channel_close_op.cc
已删除
100644 → 0
浏览文件 @
c5292b18
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/op_registry.h"
namespace
pf
=
paddle
::
framework
;
static
constexpr
char
kChannel
[]
=
"Channel"
;
namespace
paddle
{
namespace
operators
{
class
ChannelCloseOp
:
public
framework
::
OperatorBase
{
public:
ChannelCloseOp
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
framework
::
OperatorBase
(
type
,
inputs
,
outputs
,
attrs
)
{}
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
dev_place
)
const
override
{
auto
&
inp
=
*
scope
.
FindVar
(
Input
(
kChannel
));
// Get the mutable version of the channel variable and closes it.
pf
::
ChannelHolder
*
ch
=
inp
.
GetMutable
<
framework
::
ChannelHolder
>
();
ch
->
close
();
}
};
class
ChannelCloseOpOpInferShape
:
public
framework
::
InferShapeBase
{
public:
void
operator
()(
framework
::
InferShapeContext
*
context
)
const
override
{
PADDLE_ENFORCE
(
context
->
HasInput
(
"Channel"
),
"The input of ChannelClose op must be set"
);
}
};
class
ChannelCloseOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
kChannel
,
"The Channel Variable that should be closed by"
" the ChannelClose Op."
);
AddComment
(
R"DOC(
Channel Close Operator.
This operator closes an open channel.
)DOC"
);
}
};
}
// namespace operators
}
// namespace paddle
REGISTER_OPERATOR
(
channel_close
,
paddle
::
operators
::
ChannelCloseOp
,
paddle
::
framework
::
EmptyGradOpMaker
,
paddle
::
operators
::
ChannelCloseOpMaker
);
paddle/fluid/operators/channel_create_op.cc
已删除
100644 → 0
浏览文件 @
c5292b18
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/reader.h"
namespace
pf
=
paddle
::
framework
;
static
constexpr
char
kOutput
[]
=
"Out"
;
namespace
paddle
{
namespace
operators
{
class
ChannelCreateOp
:
public
framework
::
OperatorBase
{
public:
ChannelCreateOp
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
framework
::
OperatorBase
(
type
,
inputs
,
outputs
,
attrs
)
{}
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
dev_place
)
const
override
{
auto
&
out
=
*
scope
.
FindVar
(
Output
(
kOutput
));
// Determine the datatype and capacity of the channel to be created
// from the attributes provided.
auto
dtype
=
static_cast
<
framework
::
proto
::
VarType
::
Type
>
(
Attr
<
int
>
(
"data_type"
));
auto
capacity
=
Attr
<
int
>
(
"capacity"
);
// Based on the datatype, create a new channel holder initialized with
// the given capacity. When capacity is 0, an unbuffered channel is
// created.
pf
::
ChannelHolder
*
ch
=
out
.
GetMutable
<
framework
::
ChannelHolder
>
();
if
(
dtype
==
framework
::
proto
::
VarType
::
LOD_TENSOR
)
{
ch
->
Reset
<
pf
::
LoDTensor
>
(
capacity
);
}
else
if
(
dtype
==
framework
::
proto
::
VarType
::
SELECTED_ROWS
)
{
ch
->
Reset
<
pf
::
SelectedRows
>
(
capacity
);
}
else
if
(
dtype
==
framework
::
proto
::
VarType
::
LOD_RANK_TABLE
)
{
ch
->
Reset
<
pf
::
LoDRankTable
>
(
capacity
);
}
else
if
(
dtype
==
framework
::
proto
::
VarType
::
LOD_TENSOR_ARRAY
)
{
ch
->
Reset
<
pf
::
LoDTensorArray
>
(
capacity
);
}
else
if
(
dtype
==
framework
::
proto
::
VarType
::
READER
)
{
ch
->
Reset
<
pf
::
ReaderHolder
>
(
capacity
);
}
else
if
(
dtype
==
framework
::
proto
::
VarType
::
CHANNEL
)
{
ch
->
Reset
<
pf
::
ChannelHolder
>
(
capacity
);
}
else
if
(
dtype
==
framework
::
proto
::
VarType
::
BOOL
)
{
ch
->
Reset
<
bool
>
(
capacity
);
}
else
if
(
dtype
==
framework
::
proto
::
VarType
::
INT32
)
{
ch
->
Reset
<
int
>
(
capacity
);
}
else
if
(
dtype
==
framework
::
proto
::
VarType
::
INT64
)
{
ch
->
Reset
<
int64_t
>
(
capacity
);
}
else
if
(
dtype
==
framework
::
proto
::
VarType
::
FP32
)
{
ch
->
Reset
<
float
>
(
capacity
);
}
else
if
(
dtype
==
framework
::
proto
::
VarType
::
FP64
)
{
ch
->
Reset
<
double
>
(
capacity
);
}
else
{
PADDLE_THROW
(
"Data type %d is not in "
"[LOD_TENSOR, SELECTED_ROWS, LOD_RANK_TABLE, LOD_TENSOR_ARRAY, "
"READER, CHANNEL, BOOL, INT32, INT64, FP32, FP64]"
,
dtype
);
}
}
};
class
ChannelCreateOpOpInferShape
:
public
framework
::
InferShapeBase
{
public:
void
operator
()(
framework
::
InferShapeContext
*
context
)
const
override
{
PADDLE_ENFORCE
(
context
->
HasOutput
(
kOutput
),
"The output of ChannelCreate op must be set"
);
context
->
SetOutputDim
(
kOutput
,
{
1
});
}
};
class
ChannelCreateOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddOutput
(
kOutput
,
"The object of a Channel type created by ChannelCreate Op."
);
AddAttr
<
int
>
(
"capacity"
,
"The size of the buffer of Channel."
)
.
SetDefault
(
0
);
AddAttr
<
int
>
(
"data_type"
,
"The data type of elements inside the Channel."
);
AddComment
(
R"DOC(
Channel Create Operator.
This operator creates an object of the VarType Channel and returns it.
)DOC"
);
}
};
}
// namespace operators
}
// namespace paddle
REGISTER_OPERATOR
(
channel_create
,
paddle
::
operators
::
ChannelCreateOp
,
paddle
::
framework
::
EmptyGradOpMaker
,
paddle
::
operators
::
ChannelCreateOpMaker
);
paddle/fluid/operators/channel_recv_op.cc
已删除
100644 → 0
浏览文件 @
c5292b18
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/channel.h"
#include <paddle/fluid/framework/lod_rank_table.h>
#include <paddle/fluid/framework/lod_tensor_array.h>
#include <paddle/fluid/framework/reader.h>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/operators/concurrency/channel_util.h"
#include "paddle/fluid/operators/math/math_function.h"
static
constexpr
char
Channel
[]
=
"Channel"
;
static
constexpr
char
Status
[]
=
"Status"
;
static
constexpr
char
Out
[]
=
"Out"
;
namespace
paddle
{
namespace
operators
{
void
SetReceiveStatus
(
const
platform
::
Place
&
dev_place
,
framework
::
Variable
*
status_var
,
bool
status
)
{
auto
cpu
=
platform
::
CPUPlace
();
auto
status_tensor
=
status_var
->
GetMutable
<
framework
::
LoDTensor
>
()
->
mutable_data
<
bool
>
({
1
},
cpu
);
status_tensor
[
0
]
=
status
;
}
class
ChannelRecvOp
:
public
framework
::
OperatorBase
{
public:
ChannelRecvOp
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
framework
::
OperatorBase
(
type
,
inputs
,
outputs
,
attrs
)
{}
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
Channel
),
"Input(Channel) of ChannelRecvOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
Out
),
"Input(Channel) of ChannelRecvOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
Status
),
"Output(Status) of ChannelRecvOp should not be null."
);
ctx
->
SetOutputDim
(
"Status"
,
{
1
});
}
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
dev_place
)
const
override
{
// Get the channel holder created by channel_create op, passed as input.
framework
::
ChannelHolder
*
ch
=
scope
.
FindVar
(
Input
(
Channel
))
->
GetMutable
<
framework
::
ChannelHolder
>
();
auto
output_var
=
scope
.
FindVar
(
Output
(
Out
));
// Receive the data from the channel.
bool
ok
=
concurrency
::
ChannelReceive
(
ch
,
output_var
);
// Set the status output of the `ChannelReceive` call.
SetReceiveStatus
(
dev_place
,
scope
.
FindVar
(
Output
(
Status
)),
ok
);
}
};
class
ChannelRecvOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
Channel
,
"(Channel) A variable which
\"
receives
\"
the a value sent"
"to it by a channel_send op."
)
.
AsDuplicable
();
AddOutput
(
Out
,
"(Variable) Output Variable that will hold the data received"
" from the Channel"
)
.
AsDuplicable
();
AddOutput
(
Status
,
"(Tensor) An LoD Tensor that returns a boolean status of the"
"result of the receive operation."
)
.
AsDuplicable
();
AddComment
(
R"DOC(
)DOC"
);
}
};
}
// namespace operators
}
// namespace paddle
REGISTER_OPERATOR
(
channel_recv
,
paddle
::
operators
::
ChannelRecvOp
,
paddle
::
framework
::
EmptyGradOpMaker
,
paddle
::
operators
::
ChannelRecvOpMaker
);
paddle/fluid/operators/channel_send_op.cc
已删除
100644 → 0
浏览文件 @
c5292b18
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/channel.h"
#include <paddle/fluid/framework/lod_rank_table.h>
#include <paddle/fluid/framework/lod_tensor_array.h>
#include <paddle/fluid/framework/reader.h>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/operators/concurrency/channel_util.h"
#include "paddle/fluid/operators/math/math_function.h"
static
constexpr
char
Channel
[]
=
"Channel"
;
static
constexpr
char
X
[]
=
"X"
;
namespace
paddle
{
namespace
operators
{
class
ChannelSendOp
:
public
framework
::
OperatorBase
{
public:
ChannelSendOp
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
framework
::
OperatorBase
(
type
,
inputs
,
outputs
,
attrs
)
{}
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
Channel
),
"Input(Channel) of ChannelSendOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
X
),
"Input(X) of ChannelSendOp should not be null."
);
}
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
dev_place
)
const
override
{
// Get the channel holder created by channel_create op, passed as input.
framework
::
ChannelHolder
*
ch
=
scope
.
FindVar
(
Input
(
Channel
))
->
GetMutable
<
framework
::
ChannelHolder
>
();
auto
input_var
=
scope
.
FindVar
(
Input
(
X
));
// Send the input data through the channel.
concurrency
::
ChannelSend
(
ch
,
input_var
);
}
};
class
ChannelSendOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
Channel
,
"(Channel) A variable which
\"
sends
\"
the passed in value to "
"a listening receiver."
)
.
AsDuplicable
();
AddInput
(
X
,
"(Variable) The value which gets sent by the channel."
)
.
AsDuplicable
();
AddComment
(
R"DOC(
)DOC"
);
}
};
}
// namespace operators
}
// namespace paddle
REGISTER_OPERATOR
(
channel_send
,
paddle
::
operators
::
ChannelSendOp
,
paddle
::
framework
::
EmptyGradOpMaker
,
paddle
::
operators
::
ChannelSendOpMaker
);
paddle/fluid/operators/concurrency/CMakeLists.txt
已删除
100644 → 0
浏览文件 @
c5292b18
cc_library
(
concurrency SRCS channel_util.cc DEPS device_context framework_proto boost eigen3
)
paddle/fluid/operators/concurrency/channel_util.cc
已删除
100644 → 0
浏览文件 @
c5292b18
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/concurrency/channel_util.h"
#include "paddle/fluid/framework/var_type.h"
namespace
poc
=
paddle
::
operators
::
concurrency
;
void
poc
::
ChannelSend
(
framework
::
ChannelHolder
*
ch
,
framework
::
Variable
*
var
)
{
auto
type
=
framework
::
ToVarType
(
var
->
Type
());
if
(
type
==
framework
::
proto
::
VarType_Type_LOD_TENSOR
)
ch
->
Send
(
var
->
GetMutable
<
framework
::
LoDTensor
>
());
else
if
(
type
==
framework
::
proto
::
VarType_Type_LOD_RANK_TABLE
)
ch
->
Send
(
var
->
GetMutable
<
framework
::
LoDRankTable
>
());
else
if
(
type
==
framework
::
proto
::
VarType_Type_LOD_TENSOR_ARRAY
)
ch
->
Send
(
var
->
GetMutable
<
framework
::
LoDTensorArray
>
());
else
if
(
type
==
framework
::
proto
::
VarType_Type_SELECTED_ROWS
)
ch
->
Send
(
var
->
GetMutable
<
framework
::
SelectedRows
>
());
else
if
(
type
==
framework
::
proto
::
VarType_Type_READER
)
ch
->
Send
(
var
->
GetMutable
<
framework
::
ReaderHolder
>
());
else
if
(
type
==
framework
::
proto
::
VarType_Type_CHANNEL
)
ch
->
Send
(
var
->
GetMutable
<
framework
::
ChannelHolder
>
());
else
PADDLE_THROW
(
"ChannelSend:Unsupported type"
);
}
bool
poc
::
ChannelReceive
(
framework
::
ChannelHolder
*
ch
,
framework
::
Variable
*
var
)
{
// Get type of channel and use that to call mutable data for Variable
auto
type
=
framework
::
ToVarType
(
ch
->
Type
());
if
(
type
==
framework
::
proto
::
VarType_Type_LOD_TENSOR
)
return
ch
->
Receive
(
var
->
GetMutable
<
framework
::
LoDTensor
>
());
else
if
(
type
==
framework
::
proto
::
VarType_Type_LOD_RANK_TABLE
)
return
ch
->
Receive
(
var
->
GetMutable
<
framework
::
LoDRankTable
>
());
else
if
(
type
==
framework
::
proto
::
VarType_Type_LOD_TENSOR_ARRAY
)
return
ch
->
Receive
(
var
->
GetMutable
<
framework
::
LoDTensorArray
>
());
else
if
(
type
==
framework
::
proto
::
VarType_Type_SELECTED_ROWS
)
return
ch
->
Receive
(
var
->
GetMutable
<
framework
::
SelectedRows
>
());
else
if
(
type
==
framework
::
proto
::
VarType_Type_READER
)
return
ch
->
Receive
(
var
->
GetMutable
<
framework
::
ReaderHolder
>
());
else
if
(
type
==
framework
::
proto
::
VarType_Type_CHANNEL
)
return
ch
->
Receive
(
var
->
GetMutable
<
framework
::
ChannelHolder
>
());
else
PADDLE_THROW
(
"ChannelReceive:Unsupported type"
);
}
void
poc
::
ChannelAddToSendQ
(
framework
::
ChannelHolder
*
ch
,
const
void
*
referrer
,
framework
::
Variable
*
var
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
,
std
::
function
<
bool
(
framework
::
ChannelAction
)
>
cb
)
{
auto
type
=
framework
::
ToVarType
(
var
->
Type
());
if
(
type
==
framework
::
proto
::
VarType_Type_LOD_TENSOR
)
{
ch
->
AddToSendQ
(
referrer
,
var
->
GetMutable
<
framework
::
LoDTensor
>
(),
cond
,
cb
);
}
else
if
(
type
==
framework
::
proto
::
VarType_Type_LOD_RANK_TABLE
)
{
ch
->
AddToSendQ
(
referrer
,
var
->
GetMutable
<
framework
::
LoDRankTable
>
(),
cond
,
cb
);
}
else
if
(
type
==
framework
::
proto
::
VarType_Type_LOD_TENSOR_ARRAY
)
{
ch
->
AddToSendQ
(
referrer
,
var
->
GetMutable
<
framework
::
LoDTensorArray
>
(),
cond
,
cb
);
}
else
if
(
type
==
framework
::
proto
::
VarType_Type_SELECTED_ROWS
)
{
ch
->
AddToSendQ
(
referrer
,
var
->
GetMutable
<
framework
::
SelectedRows
>
(),
cond
,
cb
);
}
else
if
(
type
==
framework
::
proto
::
VarType_Type_READER
)
{
ch
->
AddToSendQ
(
referrer
,
var
->
GetMutable
<
framework
::
ReaderHolder
>
(),
cond
,
cb
);
}
else
if
(
type
==
framework
::
proto
::
VarType_Type_CHANNEL
)
{
ch
->
AddToSendQ
(
referrer
,
var
->
GetMutable
<
framework
::
ChannelHolder
>
(),
cond
,
cb
);
}
else
{
PADDLE_THROW
(
"ChannelAddToSendQ:Unsupported type"
);
}
}
void
poc
::
ChannelAddToReceiveQ
(
framework
::
ChannelHolder
*
ch
,
const
void
*
referrer
,
framework
::
Variable
*
var
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
,
std
::
function
<
bool
(
framework
::
ChannelAction
)
>
cb
)
{
auto
type
=
framework
::
ToVarType
(
var
->
Type
());
if
(
type
==
framework
::
proto
::
VarType_Type_LOD_TENSOR
)
{
ch
->
AddToReceiveQ
(
referrer
,
var
->
GetMutable
<
framework
::
LoDTensor
>
(),
cond
,
cb
);
}
else
if
(
type
==
framework
::
proto
::
VarType_Type_LOD_RANK_TABLE
)
{
ch
->
AddToReceiveQ
(
referrer
,
var
->
GetMutable
<
framework
::
LoDRankTable
>
(),
cond
,
cb
);
}
else
if
(
type
==
framework
::
proto
::
VarType_Type_LOD_TENSOR_ARRAY
)
{
ch
->
AddToReceiveQ
(
referrer
,
var
->
GetMutable
<
framework
::
LoDTensorArray
>
(),
cond
,
cb
);
}
else
if
(
type
==
framework
::
proto
::
VarType_Type_SELECTED_ROWS
)
{
ch
->
AddToReceiveQ
(
referrer
,
var
->
GetMutable
<
framework
::
SelectedRows
>
(),
cond
,
cb
);
}
else
if
(
type
==
framework
::
proto
::
VarType_Type_READER
)
{
ch
->
AddToReceiveQ
(
referrer
,
var
->
GetMutable
<
framework
::
ReaderHolder
>
(),
cond
,
cb
);
}
else
if
(
type
==
framework
::
proto
::
VarType_Type_CHANNEL
)
{
ch
->
AddToReceiveQ
(
referrer
,
var
->
GetMutable
<
framework
::
ChannelHolder
>
(),
cond
,
cb
);
}
else
{
PADDLE_THROW
(
"ChannelAddToReceiveQ:Unsupported type"
);
}
}
paddle/fluid/operators/conv_op.h
浏览文件 @
91756a5a
...
@@ -380,7 +380,8 @@ class DepthwiseConvKernel : public framework::OpKernel<T> {
...
@@ -380,7 +380,8 @@ class DepthwiseConvKernel : public framework::OpKernel<T> {
math
::
DepthwiseConvFunctor
<
DeviceContext
,
T
>
depthwiseConv
;
math
::
DepthwiseConvFunctor
<
DeviceContext
,
T
>
depthwiseConv
;
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
depthwiseConv
(
dev_ctx
,
*
input
,
filter
,
strides
,
paddings
,
output
);
depthwiseConv
(
dev_ctx
,
*
input
,
filter
,
strides
,
paddings
,
dilations
,
output
);
}
}
};
};
...
@@ -415,14 +416,14 @@ class DepthwiseConvGradKernel : public framework::OpKernel<T> {
...
@@ -415,14 +416,14 @@ class DepthwiseConvGradKernel : public framework::OpKernel<T> {
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
set_zero
(
dev_ctx
,
input_grad
,
static_cast
<
T
>
(
0
));
set_zero
(
dev_ctx
,
input_grad
,
static_cast
<
T
>
(
0
));
depthwiseConvInputGrad
(
dev_ctx
,
*
input
,
filter
,
*
output_grad
,
strides
,
depthwiseConvInputGrad
(
dev_ctx
,
*
input
,
filter
,
*
output_grad
,
strides
,
paddings
,
input_grad
);
paddings
,
dilations
,
input_grad
);
}
}
if
(
filter_grad
)
{
if
(
filter_grad
)
{
filter_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
filter_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
set_zero
(
dev_ctx
,
filter_grad
,
static_cast
<
T
>
(
0
));
set_zero
(
dev_ctx
,
filter_grad
,
static_cast
<
T
>
(
0
));
depthwiseConvFilterGrad
(
dev_ctx
,
*
input
,
*
output_grad
,
strides
,
paddings
,
depthwiseConvFilterGrad
(
dev_ctx
,
*
input
,
*
output_grad
,
strides
,
paddings
,
filter_grad
);
dilations
,
filter_grad
);
}
}
}
}
};
};
...
...
paddle/fluid/operators/conv_transpose_op.h
浏览文件 @
91756a5a
...
@@ -345,7 +345,7 @@ class DepthwiseConvTransposeKernel : public framework::OpKernel<T> {
...
@@ -345,7 +345,7 @@ class DepthwiseConvTransposeKernel : public framework::OpKernel<T> {
math
::
DepthwiseConvInputGradFunctor
<
DeviceContext
,
T
>
math
::
DepthwiseConvInputGradFunctor
<
DeviceContext
,
T
>
depthwiseConvInputGrad
;
depthwiseConvInputGrad
;
depthwiseConvInputGrad
(
dev_ctx
,
*
output
,
filter
,
*
input
,
strides
,
paddings
,
depthwiseConvInputGrad
(
dev_ctx
,
*
output
,
filter
,
*
input
,
strides
,
paddings
,
output
);
dilations
,
output
);
}
}
};
};
...
@@ -367,10 +367,11 @@ class DepthwiseConvTransposeGradKernel : public framework::OpKernel<T> {
...
@@ -367,10 +367,11 @@ class DepthwiseConvTransposeGradKernel : public framework::OpKernel<T> {
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
strides
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
paddings
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
dilations
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"dilations"
);
if
(
input_grad
)
{
if
(
input_grad
)
{
math
::
DepthwiseConvFunctor
<
DeviceContext
,
T
>
depthwiseConv
;
math
::
DepthwiseConvFunctor
<
DeviceContext
,
T
>
depthwiseConv
;
depthwiseConv
(
dev_ctx
,
*
output_grad
,
filter
,
strides
,
paddings
,
depthwiseConv
(
dev_ctx
,
*
output_grad
,
filter
,
strides
,
paddings
,
dilations
,
input_grad
);
input_grad
);
}
}
...
@@ -382,7 +383,7 @@ class DepthwiseConvTransposeGradKernel : public framework::OpKernel<T> {
...
@@ -382,7 +383,7 @@ class DepthwiseConvTransposeGradKernel : public framework::OpKernel<T> {
math
::
DepthwiseConvFilterGradFunctor
<
DeviceContext
,
T
>
math
::
DepthwiseConvFilterGradFunctor
<
DeviceContext
,
T
>
depthwiseConvFilterGrad
;
depthwiseConvFilterGrad
;
depthwiseConvFilterGrad
(
dev_ctx
,
*
output_grad
,
*
input
,
strides
,
paddings
,
depthwiseConvFilterGrad
(
dev_ctx
,
*
output_grad
,
*
input
,
strides
,
paddings
,
filter_grad
);
dilations
,
filter_grad
);
}
}
}
}
};
};
...
...
paddle/fluid/operators/cub_reduce.h
0 → 100644
浏览文件 @
91756a5a
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <cmath>
#include <numeric>
#include <set>
#include <vector>
#include <cub/cub.cuh> // NOLINT
#include "paddle/fluid/framework/tensor.h"
namespace
paddle
{
namespace
operators
{
namespace
detail
{
template
<
typename
T
,
size_t
ElementCount
>
struct
Array
{
public:
HOSTDEVICE
inline
Array
()
{}
HOSTDEVICE
inline
T
&
operator
[](
size_t
index
)
{
return
data_
[
index
];
}
HOSTDEVICE
inline
const
T
&
operator
[](
size_t
index
)
const
{
return
data_
[
index
];
}
HOSTDEVICE
constexpr
inline
size_t
size
()
const
{
return
ElementCount
;
}
template
<
typename
VectorLikeType
>
static
inline
Array
<
T
,
ElementCount
>
From
(
const
VectorLikeType
&
vec
)
{
PADDLE_ENFORCE_EQ
(
vec
.
size
(),
ElementCount
,
"size not match"
);
size_t
n
=
static_cast
<
size_t
>
(
vec
.
size
());
Array
<
T
,
ElementCount
>
ret
;
for
(
size_t
i
=
0
;
i
<
n
;
++
i
)
ret
[
i
]
=
vec
[
i
];
return
ret
;
}
private:
T
data_
[
ElementCount
];
};
// reduce the last axis of 2d array
template
<
typename
Tx
,
typename
Ty
,
typename
ReduceOp
,
typename
TransformOp
,
int
BlockDim
>
__global__
void
ReduceKernel2D
(
const
Tx
*
x
,
Ty
*
y
,
ReduceOp
reducer
,
TransformOp
transformer
,
Ty
init
,
int
reduce_num
)
{
__shared__
typename
cub
::
BlockReduce
<
Ty
,
BlockDim
>::
TempStorage
temp_storage
;
int
idx_x
=
blockIdx
.
x
*
reduce_num
;
int
idx_y
=
threadIdx
.
x
;
Ty
reduce_var
=
init
;
for
(
int
idx_y
=
threadIdx
.
x
;
idx_y
<
reduce_num
;
idx_y
+=
BlockDim
)
reduce_var
=
reducer
(
reduce_var
,
transformer
(
x
[
idx_x
+
idx_y
]));
reduce_var
=
cub
::
BlockReduce
<
Ty
,
BlockDim
>
(
temp_storage
).
Reduce
(
reduce_var
,
reducer
);
if
(
threadIdx
.
x
==
0
)
{
y
[
blockIdx
.
x
]
=
reduce_var
;
}
}
template
<
typename
Tx
,
typename
Ty
,
typename
ReduceOp
,
typename
TransformOp
,
int
BlockDim
,
int
Rank
,
int
ReduceRank
>
__global__
void
ReduceKernel
(
const
Tx
*
x
,
Ty
*
y
,
ReduceOp
reducer
,
TransformOp
transformer
,
Ty
init
,
int
reduce_num
,
Array
<
int
,
Rank
>
x_strides
,
Array
<
int
,
ReduceRank
>
reduce_dim
,
Array
<
int
,
ReduceRank
>
reduce_strides
,
Array
<
int
,
Rank
-
ReduceRank
>
left_dim
,
Array
<
int
,
Rank
-
ReduceRank
>
left_strides
)
{
__shared__
typename
cub
::
BlockReduce
<
Ty
,
BlockDim
>::
TempStorage
temp_storage
;
Array
<
int
,
Rank
>
sub_index
;
int
left_idx
=
blockIdx
.
x
;
for
(
int
i
=
0
;
i
<
Rank
-
ReduceRank
;
++
i
)
{
sub_index
[
left_dim
[
i
]]
=
left_idx
/
left_strides
[
i
];
left_idx
%=
left_strides
[
i
];
}
int
reduce_idx
=
threadIdx
.
x
;
for
(
int
j
=
0
;
j
<
ReduceRank
;
++
j
)
{
sub_index
[
reduce_dim
[
j
]]
=
reduce_idx
/
reduce_strides
[
j
];
reduce_idx
%=
reduce_strides
[
j
];
}
int
idx_x
=
0
;
for
(
int
k
=
0
;
k
<
Rank
;
++
k
)
idx_x
+=
(
sub_index
[
k
]
*
x_strides
[
k
]);
Ty
reduce_var
=
static_cast
<
Ty
>
(
transformer
(
x
[
idx_x
]));
for
(
int
i
=
threadIdx
.
x
+
BlockDim
;
i
<
reduce_num
;
i
+=
BlockDim
)
{
int
reduce_idx
=
i
;
for
(
int
j
=
0
;
j
<
ReduceRank
;
++
j
)
{
sub_index
[
reduce_dim
[
j
]]
=
reduce_idx
/
reduce_strides
[
j
];
reduce_idx
%=
reduce_strides
[
j
];
}
int
idx_x
=
0
;
for
(
int
k
=
0
;
k
<
Rank
;
++
k
)
idx_x
+=
(
sub_index
[
k
]
*
x_strides
[
k
]);
reduce_var
=
static_cast
<
Ty
>
(
reducer
(
reduce_var
,
transformer
(
x
[
idx_x
])));
}
reduce_var
=
cub
::
BlockReduce
<
Ty
,
BlockDim
>
(
temp_storage
).
Reduce
(
reduce_var
,
reducer
);
if
(
threadIdx
.
x
==
0
)
{
y
[
blockIdx
.
x
]
=
reduce_var
;
}
}
static
inline
std
::
vector
<
int
>
GetStrides
(
const
std
::
vector
<
int
>&
dims
)
{
int
n
=
static_cast
<
int
>
(
dims
.
size
());
if
(
n
==
0
)
return
std
::
vector
<
int
>
();
std
::
vector
<
int
>
strides
(
n
);
strides
.
back
()
=
1
;
for
(
int
i
=
n
-
2
;
i
>=
0
;
--
i
)
{
strides
[
i
]
=
strides
[
i
+
1
]
*
dims
[
i
+
1
];
}
return
strides
;
}
static
inline
std
::
vector
<
int
>
GetStrides
(
const
std
::
vector
<
int
>&
dims
,
const
std
::
vector
<
int
>&
idx
)
{
int
n
=
static_cast
<
int
>
(
idx
.
size
());
if
(
n
==
0
)
return
std
::
vector
<
int
>
();
std
::
vector
<
int
>
strides
(
n
);
strides
.
back
()
=
1
;
for
(
int
i
=
n
-
2
;
i
>=
0
;
--
i
)
{
strides
[
i
]
=
strides
[
i
+
1
]
*
dims
[
idx
[
i
+
1
]];
}
return
strides
;
}
constexpr
int
kMaxBlockDim
=
512
;
static
inline
int
GetDesiredBlockDim
(
int
block_dim
)
{
return
block_dim
>=
kMaxBlockDim
?
kMaxBlockDim
:
(
1
<<
static_cast
<
int
>
(
std
::
log2
(
block_dim
)));
}
template
<
typename
Tx
,
typename
Ty
,
int
BlockDim
,
typename
ReduceOp
,
typename
TransformOp
>
static
void
TensorReduceImpl
(
const
Tx
*
x_data
,
Ty
*
y_data
,
const
platform
::
Place
&
place
,
const
ReduceOp
&
reducer
,
const
TransformOp
&
transformer
,
const
Ty
&
init
,
int
left_num
,
int
reduce_num
,
const
std
::
vector
<
int
>&
x_strides
,
const
std
::
vector
<
int
>&
reduce_dim
,
const
std
::
vector
<
int
>&
reduce_strides
,
const
std
::
vector
<
int
>&
left_dim
,
const
std
::
vector
<
int
>&
left_strides
,
cudaStream_t
stream
)
{
#define CUB_RANK_CASE(i, ...) \
case i: { \
constexpr auto kRank = i; \
switch (reduce_rank) { __VA_ARGS__; } \
} break
#define CUB_REDUCE_RANK_CASE(i, ...) \
case i: { \
constexpr auto kReduceRank = i; \
ReduceKernel<Tx, Ty, ReduceOp, TransformOp, BlockDim, kRank, \
kReduceRank><<<left_num, BlockDim, 0, stream>>>( \
x_data, y_data, reducer, transformer, init, reduce_num, \
Array<int, kRank>::From(x_strides), \
Array<int, kReduceRank>::From(reduce_dim), \
Array<int, kReduceRank>::From(reduce_strides), \
Array<int, kRank - kReduceRank>::From(left_dim), \
Array<int, kRank - kReduceRank>::From(left_strides)); \
} break
int
rank
=
x_strides
.
size
();
int
reduce_rank
=
reduce_strides
.
size
();
if
(
rank
==
reduce_rank
)
{
cub
::
TransformInputIterator
<
Ty
,
TransformOp
,
const
Tx
*>
trans_x
(
x_data
,
transformer
);
size_t
temp_storage_bytes
=
0
;
cub
::
DeviceReduce
::
Reduce
(
nullptr
,
temp_storage_bytes
,
trans_x
,
y_data
,
reduce_num
,
reducer
,
init
,
stream
);
framework
::
Tensor
tmp
;
auto
*
temp_storage
=
tmp
.
mutable_data
<
uint8_t
>
(
framework
::
make_ddim
({
static_cast
<
int64_t
>
(
temp_storage_bytes
)}),
place
);
cub
::
DeviceReduce
::
Reduce
(
temp_storage
,
temp_storage_bytes
,
trans_x
,
y_data
,
reduce_num
,
reducer
,
init
,
stream
);
return
;
}
if
(
rank
==
2
&&
reduce_rank
==
1
&&
reduce_dim
[
0
]
==
1
)
{
ReduceKernel2D
<
Tx
,
Ty
,
ReduceOp
,
TransformOp
,
BlockDim
><<<
left_num
,
BlockDim
,
0
,
stream
>>>
(
x_data
,
y_data
,
reducer
,
transformer
,
init
,
reduce_num
);
return
;
}
/*
if (rank == 3 && reduce_rank == 1 && reduce_dim[0] == 1) {
// TODO(liangdun): we can optimize 3d case which the 2nd axis is reduced.
// Currently, it is handled by code below, but inefficient
return;
}
*/
switch
(
rank
)
{
CUB_RANK_CASE
(
2
,
CUB_REDUCE_RANK_CASE
(
1
););
CUB_RANK_CASE
(
3
,
CUB_REDUCE_RANK_CASE
(
1
);
CUB_REDUCE_RANK_CASE
(
2
););
CUB_RANK_CASE
(
4
,
CUB_REDUCE_RANK_CASE
(
1
);
CUB_REDUCE_RANK_CASE
(
2
);
CUB_REDUCE_RANK_CASE
(
3
););
CUB_RANK_CASE
(
5
,
CUB_REDUCE_RANK_CASE
(
1
);
CUB_REDUCE_RANK_CASE
(
2
);
CUB_REDUCE_RANK_CASE
(
3
);
CUB_REDUCE_RANK_CASE
(
4
););
CUB_RANK_CASE
(
6
,
CUB_REDUCE_RANK_CASE
(
1
);
CUB_REDUCE_RANK_CASE
(
2
);
CUB_REDUCE_RANK_CASE
(
3
);
CUB_REDUCE_RANK_CASE
(
4
);
CUB_REDUCE_RANK_CASE
(
5
););
CUB_RANK_CASE
(
7
,
CUB_REDUCE_RANK_CASE
(
1
);
CUB_REDUCE_RANK_CASE
(
2
);
CUB_REDUCE_RANK_CASE
(
3
);
CUB_REDUCE_RANK_CASE
(
4
);
CUB_REDUCE_RANK_CASE
(
5
);
CUB_REDUCE_RANK_CASE
(
6
););
CUB_RANK_CASE
(
8
,
CUB_REDUCE_RANK_CASE
(
1
);
CUB_REDUCE_RANK_CASE
(
2
);
CUB_REDUCE_RANK_CASE
(
3
);
CUB_REDUCE_RANK_CASE
(
4
);
CUB_REDUCE_RANK_CASE
(
5
);
CUB_REDUCE_RANK_CASE
(
6
););
CUB_RANK_CASE
(
9
,
CUB_REDUCE_RANK_CASE
(
1
);
CUB_REDUCE_RANK_CASE
(
2
);
CUB_REDUCE_RANK_CASE
(
3
);
CUB_REDUCE_RANK_CASE
(
4
);
CUB_REDUCE_RANK_CASE
(
5
);
CUB_REDUCE_RANK_CASE
(
6
);
CUB_REDUCE_RANK_CASE
(
7
);
CUB_REDUCE_RANK_CASE
(
8
););
}
#undef CUB_REDUCE_RANK_CASE
#undef CUB_RANK_CASE
}
}
// namespace detail
template
<
typename
Tx
,
typename
Ty
,
typename
ReduceOp
,
typename
TransformOp
>
void
TensorReduce
(
const
framework
::
Tensor
&
x
,
framework
::
Tensor
*
y
,
std
::
vector
<
int
>
origin_reduce_dims
,
const
Ty
&
init
,
const
ReduceOp
&
reducer
,
const
TransformOp
&
transformer
,
cudaStream_t
stream
)
{
auto
x_dim
=
framework
::
vectorize2int
(
x
.
dims
());
std
::
vector
<
int
>
new_x_dim
,
new_reduce_dims
;
int
is_reduced
=
0
;
for
(
auto
e
:
origin_reduce_dims
)
{
auto
pos
=
e
>=
0
?
e
:
e
+
x_dim
.
size
();
is_reduced
|=
1
<<
e
;
}
for
(
int
i
=
0
;
i
<
x_dim
.
size
();
i
++
)
{
if
((
i
==
0
)
||
(((
is_reduced
>>
i
)
^
(
is_reduced
>>
(
i
-
1
)))
&
1
))
{
new_x_dim
.
push_back
(
x_dim
[
i
]);
if
((
is_reduced
>>
i
)
&
1
)
new_reduce_dims
.
push_back
(
new_x_dim
.
size
()
-
1
);
}
else
{
new_x_dim
[
new_x_dim
.
size
()
-
1
]
*=
x_dim
[
i
];
}
}
x_dim
=
new_x_dim
;
origin_reduce_dims
=
new_reduce_dims
;
int
x_rank
=
static_cast
<
int
>
(
x_dim
.
size
());
std
::
set
<
int
>
left_set
,
reduce_set
;
for
(
int
i
=
0
;
i
<
x_rank
;
++
i
)
left_set
.
insert
(
i
);
for
(
auto
e
:
origin_reduce_dims
)
{
left_set
.
erase
(
e
);
reduce_set
.
insert
(
e
);
}
std
::
vector
<
int
>
reduce_dim
(
reduce_set
.
begin
(),
reduce_set
.
end
());
std
::
vector
<
int
>
left_dim
(
left_set
.
begin
(),
left_set
.
end
());
std
::
vector
<
int
>
x_strides
=
detail
::
GetStrides
(
x_dim
);
std
::
vector
<
int
>
reduce_strides
=
detail
::
GetStrides
(
x_dim
,
reduce_dim
);
std
::
vector
<
int
>
left_strides
=
detail
::
GetStrides
(
x_dim
,
left_dim
);
int
reduce_num
=
reduce_strides
[
0
]
*
x_dim
[
reduce_dim
[
0
]];
int
left_num
=
1
;
if
(
left_dim
.
size
())
left_num
=
left_strides
[
0
]
*
x_dim
[
left_dim
[
0
]];
std
::
vector
<
int
>
y_dim
(
left_dim
.
size
());
for
(
int
i
=
0
;
i
<
left_dim
.
size
();
++
i
)
{
y_dim
[
i
]
=
x_dim
[
left_dim
[
i
]];
}
auto
x_data
=
x
.
data
<
Tx
>
();
auto
y_data
=
y
->
mutable_data
<
Ty
>
(
x
.
place
());
if
(
reduce_num
==
1
)
return
;
#define CUB_BLOCK_DIM_CASE(block_dim) \
case block_dim: { \
constexpr auto kBlockDim = block_dim; \
detail::TensorReduceImpl<Tx, Ty, block_dim, ReduceOp, TransformOp>( \
x_data, y_data, x.place(), reducer, transformer, init, left_num, \
reduce_num, x_strides, reduce_dim, reduce_strides, left_dim, \
left_strides, stream); \
} break
switch
(
detail
::
GetDesiredBlockDim
(
reduce_num
))
{
CUB_BLOCK_DIM_CASE
(
512
);
CUB_BLOCK_DIM_CASE
(
256
);
CUB_BLOCK_DIM_CASE
(
128
);
CUB_BLOCK_DIM_CASE
(
64
);
CUB_BLOCK_DIM_CASE
(
32
);
CUB_BLOCK_DIM_CASE
(
16
);
CUB_BLOCK_DIM_CASE
(
8
);
CUB_BLOCK_DIM_CASE
(
4
);
CUB_BLOCK_DIM_CASE
(
2
);
}
#undef CUB_BLOCK_DIM_CASE
}
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/detection/roi_perspective_transform_op.cc
浏览文件 @
91756a5a
...
@@ -104,7 +104,6 @@ bool in_quad(T x, T y, T roi_x[], T roi_y[]) {
...
@@ -104,7 +104,6 @@ bool in_quad(T x, T y, T roi_x[], T roi_y[]) {
* a31 = (dx3 * dy2 - dx2 * dy3) / (dx1 * dy2 - dx2 * dy1) / (w - 1)
* a31 = (dx3 * dy2 - dx2 * dy3) / (dx1 * dy2 - dx2 * dy1) / (w - 1)
* a32 = (dx1 * dy3 - dx3 * dy1) / (dx1 * dy2 - dx2 * dy1) / (h - 1)
* a32 = (dx1 * dy3 - dx3 * dy1) / (dx1 * dy2 - dx2 * dy1) / (h - 1)
* a33 = 1
* a33 = 1
*
*/
*/
template
<
typename
T
>
template
<
typename
T
>
void
get_transform_matrix
(
const
int
transformed_width
,
void
get_transform_matrix
(
const
int
transformed_width
,
...
@@ -260,8 +259,8 @@ class CPUROIPerspectiveTransformOpKernel : public framework::OpKernel<T> {
...
@@ -260,8 +259,8 @@ class CPUROIPerspectiveTransformOpKernel : public framework::OpKernel<T> {
roi2image
.
Resize
({
rois_num
});
roi2image
.
Resize
({
rois_num
});
int
*
roi2image_data
=
roi2image
.
mutable_data
<
int
>
(
ctx
.
GetPlace
());
int
*
roi2image_data
=
roi2image
.
mutable_data
<
int
>
(
ctx
.
GetPlace
());
auto
lod
=
rois
->
lod
().
back
();
auto
lod
=
rois
->
lod
().
back
();
for
(
in
t
i
=
0
;
i
<
lod
.
size
()
-
1
;
++
i
)
{
for
(
size_
t
i
=
0
;
i
<
lod
.
size
()
-
1
;
++
i
)
{
for
(
in
t
j
=
lod
[
i
];
j
<
lod
[
i
+
1
];
++
j
)
{
for
(
size_
t
j
=
lod
[
i
];
j
<
lod
[
i
+
1
];
++
j
)
{
roi2image_data
[
j
]
=
i
;
roi2image_data
[
j
]
=
i
;
}
}
}
}
...
@@ -393,8 +392,8 @@ class CPUROIPerspectiveTransformGradOpKernel : public framework::OpKernel<T> {
...
@@ -393,8 +392,8 @@ class CPUROIPerspectiveTransformGradOpKernel : public framework::OpKernel<T> {
roi2image
.
Resize
({
rois_num
});
roi2image
.
Resize
({
rois_num
});
int
*
roi2image_data
=
roi2image
.
mutable_data
<
int
>
(
ctx
.
GetPlace
());
int
*
roi2image_data
=
roi2image
.
mutable_data
<
int
>
(
ctx
.
GetPlace
());
auto
lod
=
rois
->
lod
().
back
();
auto
lod
=
rois
->
lod
().
back
();
for
(
in
t
i
=
0
;
i
<
lod
.
size
()
-
1
;
++
i
)
{
for
(
size_
t
i
=
0
;
i
<
lod
.
size
()
-
1
;
++
i
)
{
for
(
in
t
j
=
lod
[
i
];
j
<
lod
[
i
+
1
];
++
j
)
{
for
(
size_
t
j
=
lod
[
i
];
j
<
lod
[
i
+
1
];
++
j
)
{
roi2image_data
[
j
]
=
i
;
roi2image_data
[
j
]
=
i
;
}
}
}
}
...
@@ -404,7 +403,7 @@ class CPUROIPerspectiveTransformGradOpKernel : public framework::OpKernel<T> {
...
@@ -404,7 +403,7 @@ class CPUROIPerspectiveTransformGradOpKernel : public framework::OpKernel<T> {
for
(
int
in_h
=
0
;
in_h
<
in_height
;
++
in_h
)
{
for
(
int
in_h
=
0
;
in_h
<
in_height
;
++
in_h
)
{
for
(
int
in_w
=
0
;
in_w
<
in_width
;
++
in_w
)
{
for
(
int
in_w
=
0
;
in_w
<
in_width
;
++
in_w
)
{
T
gradient
=
0.0
;
T
gradient
=
0.0
;
for
(
in
t
roi_idx
=
lod
[
n
];
roi_idx
<
lod
[
n
+
1
];
++
roi_idx
)
{
for
(
size_
t
roi_idx
=
lod
[
n
];
roi_idx
<
lod
[
n
+
1
];
++
roi_idx
)
{
const
T
*
rois
=
rois_data
+
roi_idx
*
8
;
const
T
*
rois
=
rois_data
+
roi_idx
*
8
;
T
roi_x
[
4
];
T
roi_x
[
4
];
T
roi_y
[
4
];
T
roi_y
[
4
];
...
...
paddle/fluid/operators/detection/roi_perspective_transform_op.cu
浏览文件 @
91756a5a
...
@@ -345,8 +345,8 @@ class CUDAROIPerspectiveTransformOpKernel : public framework::OpKernel<T> {
...
@@ -345,8 +345,8 @@ class CUDAROIPerspectiveTransformOpKernel : public framework::OpKernel<T> {
roi2image
.
Resize
({
rois_num
});
roi2image
.
Resize
({
rois_num
});
int
*
roi2image_data
=
roi2image
.
mutable_data
<
int
>
(
platform
::
CPUPlace
());
int
*
roi2image_data
=
roi2image
.
mutable_data
<
int
>
(
platform
::
CPUPlace
());
auto
lod
=
rois
->
lod
().
back
();
auto
lod
=
rois
->
lod
().
back
();
for
(
in
t
i
=
0
;
i
<
lod
.
size
()
-
1
;
++
i
)
{
for
(
size_
t
i
=
0
;
i
<
lod
.
size
()
-
1
;
++
i
)
{
for
(
in
t
j
=
lod
[
i
];
j
<
lod
[
i
+
1
];
++
j
)
{
for
(
size_
t
j
=
lod
[
i
];
j
<
lod
[
i
+
1
];
++
j
)
{
roi2image_data
[
j
]
=
i
;
roi2image_data
[
j
]
=
i
;
}
}
}
}
...
@@ -432,7 +432,7 @@ __global__ void RoiTransformGradKernel(
...
@@ -432,7 +432,7 @@ __global__ void RoiTransformGradKernel(
T
gradient
=
0.0
;
T
gradient
=
0.0
;
// Accumulate gradient over all RoIs that interpolated this element
// Accumulate gradient over all RoIs that interpolated this element
for
(
in
t
roi_idx
=
lod
[
n
];
roi_idx
<
lod
[
n
+
1
];
++
roi_idx
)
{
for
(
size_
t
roi_idx
=
lod
[
n
];
roi_idx
<
lod
[
n
+
1
];
++
roi_idx
)
{
const
T
*
rois
=
rois_data
+
roi_idx
*
8
;
const
T
*
rois
=
rois_data
+
roi_idx
*
8
;
T
roi_x
[
4
];
T
roi_x
[
4
];
T
roi_y
[
4
];
T
roi_y
[
4
];
...
...
paddle/fluid/operators/distributed/grpc_client.h
浏览文件 @
91756a5a
...
@@ -15,6 +15,7 @@ limitations under the License. */
...
@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#pragma once
#include <time.h>
#include <time.h>
#include <atomic>
#include <chrono> // NOLINT
#include <chrono> // NOLINT
#include <condition_variable> // NOLINT
#include <condition_variable> // NOLINT
...
...
paddle/fluid/operators/distributed/request_handler.h
浏览文件 @
91756a5a
...
@@ -15,6 +15,7 @@
...
@@ -15,6 +15,7 @@
#pragma once
#pragma once
#include <time.h>
#include <time.h>
#include <condition_variable> // NOLINT
#include <functional>
#include <functional>
#include <string>
#include <string>
...
...
paddle/fluid/operators/distributed/rpc_server.h
浏览文件 @
91756a5a
...
@@ -14,6 +14,7 @@
...
@@ -14,6 +14,7 @@
#pragma once
#pragma once
#include <atomic>
#include <set>
#include <set>
#include <string>
#include <string>
#include <thread> // NOLINT
#include <thread> // NOLINT
...
...
paddle/fluid/operators/elementwise_op.h
浏览文件 @
91756a5a
...
@@ -89,7 +89,7 @@ class ElementwiseOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -89,7 +89,7 @@ class ElementwiseOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr
<
bool
>
(
"use_mkldnn"
,
"(bool, default false). Used by MKLDNN."
)
AddAttr
<
bool
>
(
"use_mkldnn"
,
"(bool, default false). Used by MKLDNN."
)
.
SetDefault
(
false
);
.
SetDefault
(
false
);
AddComment
(
string
::
Sprintf
(
R"DOC(
AddComment
(
string
::
Sprintf
(
R"DOC(
Limited
Elementwise %s Operator
Elementwise %s Operator
The equation is:
The equation is:
...
...
paddle/fluid/operators/fused_embedding_fc_lstm_op.cc
0 → 100644
浏览文件 @
91756a5a
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/fused_embedding_fc_lstm_op.h"
#include <string>
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/cpu_vec.h"
#include "paddle/fluid/operators/math/fc_compute.h"
#include "paddle/fluid/operators/math/sequence2batch.h"
#include "paddle/fluid/platform/cpu_info.h"
namespace
paddle
{
namespace
operators
{
void
FusedEmbeddingFCLSTMOp
::
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Embeddings"
),
"Assert only one Input(Embeddings) of LSTM."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"WeightH"
),
"Assert only one Input(WeightH) of LSTM."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Bias"
),
"Assert only one Input(Bias) of LSTM."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"XX"
),
"Assert only one Output(XX) of LSTM."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Hidden"
),
"Assert only one Output(Hidden) of LSTM."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Cell"
),
"Assert only one Output(Cell) of LSTM."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Ids"
),
"Input(Ids) of LookupTableOp should not be null."
);
auto
table_dims
=
ctx
->
GetInputDim
(
"Embeddings"
);
auto
ids_dims
=
ctx
->
GetInputDim
(
"Ids"
);
int
ids_rank
=
ids_dims
.
size
();
PADDLE_ENFORCE_EQ
(
table_dims
.
size
(),
2
);
PADDLE_ENFORCE_EQ
(
ids_dims
[
ids_rank
-
1
],
1
,
"The last dimension of the 'Ids' tensor must be 1."
);
auto
x_dims
=
ctx
->
GetInputDim
(
"Ids"
);
PADDLE_ENFORCE_EQ
(
x_dims
.
size
(),
2
,
"Input(Ids)'s rank must be 2."
);
if
(
ctx
->
HasInput
(
"H0"
))
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"C0"
),
"Input(Cell) and Input(Hidden) of LSTM should not "
"be null at the same time."
);
auto
h_dims
=
ctx
->
GetInputDim
(
"H0"
);
auto
c_dims
=
ctx
->
GetInputDim
(
"C0"
);
PADDLE_ENFORCE
(
h_dims
==
c_dims
,
"The dimension of Input(H0) and Input(C0) "
"should be the same."
);
}
auto
embeddings_dims
=
ctx
->
GetInputDim
(
"Embeddings"
);
PADDLE_ENFORCE_EQ
(
embeddings_dims
.
size
(),
2
,
"The rank of Input(Embeddings) should be 2."
);
auto
wh_dims
=
ctx
->
GetInputDim
(
"WeightH"
);
int
frame_size
=
wh_dims
[
1
]
/
4
;
PADDLE_ENFORCE_EQ
(
wh_dims
.
size
(),
2
,
"The rank of Input(WeightH) should be 2."
);
PADDLE_ENFORCE_EQ
(
wh_dims
[
0
],
frame_size
,
"The first dimension of Input(WeightH) "
"should be %d."
,
frame_size
);
PADDLE_ENFORCE_EQ
(
wh_dims
[
1
],
4
*
frame_size
,
"The second dimension of Input(WeightH) "
"should be 4 * %d."
,
frame_size
);
auto
b_dims
=
ctx
->
GetInputDim
(
"Bias"
);
PADDLE_ENFORCE_EQ
(
b_dims
.
size
(),
2
,
"The rank of Input(Bias) should be 2."
);
PADDLE_ENFORCE_EQ
(
b_dims
[
0
],
1
,
"The first dimension of Input(Bias) should be 1."
);
PADDLE_ENFORCE_EQ
(
b_dims
[
1
],
(
ctx
->
Attrs
().
Get
<
bool
>
(
"use_peepholes"
)
?
7
:
4
)
*
frame_size
,
"The second dimension of Input(Bias) should be "
"7 * %d if enable peepholes connection or"
"4 * %d if disable peepholes"
,
frame_size
,
frame_size
);
framework
::
DDim
out_dims
({
x_dims
[
0
],
frame_size
});
ctx
->
SetOutputDim
(
"Hidden"
,
out_dims
);
ctx
->
SetOutputDim
(
"Cell"
,
out_dims
);
ctx
->
ShareLoD
(
"Ids"
,
"Hidden"
);
ctx
->
ShareLoD
(
"Ids"
,
"Cell"
);
int
xx_width
;
if
(
ctx
->
Attrs
().
Get
<
bool
>
(
"use_seq"
))
{
xx_width
=
wh_dims
[
1
];
}
else
{
xx_width
=
x_dims
[
1
]
>
wh_dims
[
1
]
?
wh_dims
[
1
]
:
x_dims
[
1
];
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"BatchedInput"
),
"Assert only one Output(BatchedInput) of LSTM."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"BatchedHidden"
),
"Assert only one Output(BatchedHidden) of LSTM."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"BatchedCell"
),
"Assert only one Output(BatchedCell) of LSTM."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"ReorderedH0"
),
"Assert only one Output(ReorderedH0) of LSTM"
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"ReorderedC0"
),
"Assert only one Output(ReorderedC0) of LSTM."
);
ctx
->
SetOutputDim
(
"BatchedInput"
,
{
x_dims
[
0
],
wh_dims
[
1
]});
ctx
->
SetOutputDim
(
"BatchedHidden"
,
out_dims
);
ctx
->
SetOutputDim
(
"BatchedCell"
,
out_dims
);
}
ctx
->
SetOutputDim
(
"XX"
,
{
x_dims
[
0
],
xx_width
});
ctx
->
ShareLoD
(
"Ids"
,
"XX"
);
}
framework
::
OpKernelType
FusedEmbeddingFCLSTMOp
::
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Embeddings"
)
->
type
()),
ctx
.
device_context
());
}
void
FusedEmbeddingFCLSTMOpMaker
::
Make
()
{
AddInput
(
"Ids"
,
"An input with type int32 or int64 "
"contains the ids to be looked up in W. "
"The last dimension size must be 1."
);
AddInput
(
"Embeddings"
,
"(Tensor) the learnable weights of X."
" - The shape is (M x 4D), where M is the dim size of x, D is the "
"hidden size. "
" - Weight = {W_cx, W_ix, W_fx, W_ox}"
);
AddInput
(
"WeightH"
,
"(Tensor) same as LSTMOp, the learnable hidden-hidden weights."
" - The shape is (D x 4D), where D is the hidden size. "
" - Weight = {W_ch, W_ih, W_fh, W_oh}"
);
AddInput
(
"Bias"
,
"(Tensor) the learnable weights. Almost same as LSTMOp"
"Note: we should add the fc bias into this (1x4D) in bias."
"input-hidden bias weight and peephole connections weight if "
"setting `use_peepholes` True. "
"1. `use_peepholes = False` "
" - The shape is (1 x 4D). "
" - Bias = {b_c, b_i, b_f, b_o}."
"2. `use_peepholes = True` "
" - The shape is (1 x 7D). "
" - Bias = {b_c, b_i, b_f, b_o, W_ic, W_fc, W_oc}."
);
AddInput
(
"H0"
,
"(Tensor, optional) (same as LSTMOp) the initial hidden state is an "
"optional "
"input. This is a tensor with shape (N x D), where N is the "
"batch size and D is the hidden size."
)
.
AsDispensable
();
AddInput
(
"C0"
,
"(Tensor, optional) (same as LSTMOp) (the initial cell state is an "
"optional "
"input. This is a tensor with shape (N x D), where N is the "
"batch size. `H0` and `C0` can be NULL but only at the same time."
)
.
AsDispensable
();
AddOutput
(
"Hidden"
,
"(LoDTensor) (same as LSTMOp) the hidden state of LSTM operator. "
"The shape is (T x D), and lod is the same with the `Input`."
);
AddOutput
(
"Cell"
,
"(LoDTensor) (same as LSTMOp) the cell state of LSTM operator. "
"The shape is (T x D), and lod is the same with the `Input`."
);
AddOutput
(
"XX"
,
"(LoDTensor) the result after X * WeightX (size is T x 4D)"
" or batched_X (size is T x M), this will be automatically chosen,"
" where T is the total time steps in this mini-batch,"
" D is the hidden size, M is the dim size of x input."
)
.
AsIntermediate
();
AddOutput
(
"BatchedInput"
,
"(LoDTensor) (T x 4D)."
).
AsIntermediate
();
AddOutput
(
"BatchedHidden"
,
"(LoDTensor) (T x D)."
).
AsIntermediate
();
AddOutput
(
"BatchedCell"
,
"(LoDTensor) (T x D)."
).
AsIntermediate
();
AddOutput
(
"ReorderedH0"
,
"(LoDTensor) (N x D)."
).
AsIntermediate
();
AddOutput
(
"ReorderedC0"
,
"(LoDTensor) (N x D)."
).
AsIntermediate
();
AddAttr
<
bool
>
(
"use_peepholes"
,
"(bool, defalut: True) "
"whether to enable diagonal/peephole connections."
)
.
SetDefault
(
true
);
AddAttr
<
bool
>
(
"is_reverse"
,
"(bool, defalut: False) "
"whether to compute reversed LSTM."
)
.
SetDefault
(
false
);
AddAttr
<
bool
>
(
"use_seq"
,
"(bool, defalut: True) "
"whether to use seq mode to compute."
)
.
SetDefault
(
true
);
AddAttr
<
std
::
string
>
(
"gate_activation"
,
"(string, default: sigmoid)"
"The activation for input gate, forget gate and output "
"gate, `sigmoid` by default."
)
.
SetDefault
(
"sigmoid"
)
.
InEnum
({
"sigmoid"
,
"tanh"
,
"relu"
,
"identity"
});
AddAttr
<
std
::
string
>
(
"cell_activation"
,
"(string, default: tanh)"
"The activation for cell output, `tanh` by defalut."
)
.
SetDefault
(
"tanh"
)
.
InEnum
({
"sigmoid"
,
"tanh"
,
"relu"
,
"identity"
});
AddAttr
<
std
::
string
>
(
"candidate_activation"
,
"(string, default: tanh)"
"The activation for candidate hidden state, "
"`tanh` by default."
)
.
SetDefault
(
"tanh"
)
.
InEnum
({
"sigmoid"
,
"tanh"
,
"relu"
,
"identity"
});
AddComment
(
R"DOC(
Fusion Long-Short Term Memory (LSTM) Operator.
This operator fuse the X into LSTM, more details can refer to LSTM op.
)DOC"
);
}
template
<
typename
T
>
class
FusedEmbeddingFCLSTMKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
#define INIT_VEC_FUNC \
std::function<void(const int, const T *, T *)> act_gate, act_cell, act_cand; \
auto& act_gate_str = ctx.Attr<std::string>("gate_activation"); \
auto& act_cell_str = ctx.Attr<std::string>("cell_activation"); \
auto& act_cand_str = ctx.Attr<std::string>("candidate_activation"); \
if (platform::jit::MayIUse(platform::jit::avx)) { \
math::VecActivations<T, platform::jit::avx> act_functor; \
act_gate = act_functor(act_gate_str); \
act_cell = act_functor(act_cell_str); \
act_cand = act_functor(act_cand_str); \
} else { \
math::VecActivations<T, platform::jit::isa_any> act_functor; \
act_gate = act_functor(act_gate_str); \
act_cell = act_functor(act_cell_str); \
act_cand = act_functor(act_cand_str); \
}
#define INIT_BASE_INPUT_OUTPUT \
auto* ids = ctx.Input<LoDTensor>("Ids"); \
auto* h0 = ctx.Input<Tensor>("H0"); \
auto* c0 = ctx.Input<Tensor>("C0"); \
auto* embeddings = ctx.Input<Tensor>("Embeddings"); \
auto* wh = ctx.Input<Tensor>("WeightH"); \
auto* bias = ctx.Input<Tensor>("Bias"); \
auto* xx = ctx.Output<LoDTensor>("XX"); \
auto* hidden_out = ctx.Output<LoDTensor>("Hidden"); \
auto* cell_out = ctx.Output<LoDTensor>("Cell"); \
bool is_reverse = ctx.Attr<bool>("is_reverse"); \
bool use_peepholes = ctx.Attr<bool>("use_peepholes");
#define INIT_BASE_SIZES \
auto ids_dims = ids->dims();
/* T x M*/
\
auto ids_numel = ids->numel();
/* T x 1*/
\
auto wh_dims = wh->dims();
/* D x 4D*/
\
const int D = wh_dims[0]; \
const int D2 = D * 2; \
const int D3 = D * 3; \
int64_t row_number = embeddings->dims()[0]; \
int64_t row_width = embeddings->dims()[1]; \
const int D4 = wh_dims[1];
#define INIT_BASE_INPUT_DATAS \
const int64_t* ids_data = ids->data<int64_t>(); \
const T* embeddings_data = embeddings->data<T>(); \
const T* wh_data = wh->data<T>(); \
/* diagonal weight*/
\
const T* wc_data = bias->data<T>() + D4; \
/* for peephole only*/
\
Tensor checked_cell; \
T* checked_cell_data = nullptr; \
auto place = ctx.GetPlace(); \
if (use_peepholes) { \
/* w_ic * Ct-1, w_fc * Ct-1 ; w_oc * Ct => ih*/
\
checked_cell_data = checked_cell.mutable_data<T>({2, D}, place); \
}
/// Compute LSTM
#define GEMM_WH_ADDON(bs, prev, out) \
blas.GEMM(CblasNoTrans, CblasNoTrans, bs, D4, D, static_cast<T>(1), prev, D, \
wh_data, D4, static_cast<T>(1), out, D4)
// gates: W_ch, W_ih, W_fh, W_oh
#define GET_Ct(ct_1, gates, ct) \
/* C_t = C_t-1 * fgated + cand_gated * igated*/
\
act_cand(D, gates, gates); \
blas.VMUL(D, gates, gates + D, gates + D); \
blas.VMUL(D, ct_1, gates + D2, gates + D2); \
blas.VADD(D, gates + D, gates + D2, ct)
#define GET_Ht(ct, gates, ht) \
/* H_t = act_cell(C_t) * ogated */
\
act_cell(D, ct, gates + D2); \
blas.VMUL(D, gates + D2, gates + D3, ht)
#define GET_Ct_NOH0C0(gates, ct) \
/* C_t = igated * cgated*/
\
act_gate(D, gates + D, gates + D); \
act_cand(D, gates, gates); \
blas.VMUL(D, gates, gates + D, ct)
#define COMPUTE_CtHt_NOH0C0(gates, ct, ht) \
GET_Ct_NOH0C0(gates, ct); \
act_gate(D, gates + D3, gates + D3); \
GET_Ht(ct, gates, ht)
#define COMPUTE_CtHt_PEEPHOLE_NOH0C0(gates, ct, ht) \
GET_Ct_NOH0C0(gates, ct); \
/* get outgated, put W_oc * C_t on igated */
\
blas.VMUL(D, wc_data + D2, ct, gates + D); \
blas.VADD(D, gates + D, gates + D3, gates + D3); \
act_gate(D, gates + D3, gates + D3); \
GET_Ht(ct, gates, ht)
#define COMPUTE_CtHt(gates, ct_1, ct, ht) \
act_gate(D3, gates + D, gates + D); \
GET_Ct(ct_1, gates, ct); \
GET_Ht(ct, gates, ht)
#define COMPUTE_CtHt_PEEPHOLE(gates, ct_1, ct, ht) \
/* get fgated and igated*/
\
blas.VMUL(D, wc_data, ct_1, checked_cell_data); \
blas.VMUL(D, wc_data + D, ct_1, checked_cell_data + D); \
blas.VADD(D2, checked_cell_data, gates + D, gates + D); \
act_gate(D2, gates + D, gates + D); \
GET_Ct(ct_1, gates, ct); \
/* get ogated*/
\
blas.VMUL(D, wc_data + D2, ct, gates + D); \
blas.VADD(D, gates + D, gates + D3, gates + D3); \
act_gate(D, gates + D3, gates + D3); \
GET_Ht(ct, gates, ht)
void
SeqCompute
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
using
DeviceContext
=
paddle
::
platform
::
CPUDeviceContext
;
INIT_BASE_INPUT_OUTPUT
INIT_BASE_SIZES
INIT_VEC_FUNC
INIT_BASE_INPUT_DATAS
// std::cout << "====> SeqCompute" << std::endl;
auto
ids_lod
=
ids
->
lod
();
const
int
total_T
=
ids_dims
[
0
];
const
int
N
=
ids_lod
[
0
].
size
()
-
1
;
const
T
*
h0_data
=
h0
?
h0
->
data
<
T
>
()
:
nullptr
;
const
T
*
c0_data
=
c0
?
c0
->
data
<
T
>
()
:
nullptr
;
T
*
xx_data
=
xx
->
mutable_data
<
T
>
(
place
);
T
*
h_out_data
=
hidden_out
->
mutable_data
<
T
>
(
place
);
T
*
c_out_data
=
cell_out
->
mutable_data
<
T
>
(
place
);
auto
blas
=
math
::
GetBlas
<
DeviceContext
,
T
>
(
ctx
);
for
(
int64_t
i
=
0
;
i
<
ids_numel
;
++
i
)
{
PADDLE_ENFORCE_LT
(
ids_data
[
i
],
row_number
);
PADDLE_ENFORCE_GE
(
ids_data
[
i
],
0
,
"ids %d"
,
i
);
memcpy
(
xx_data
+
i
*
row_width
,
embeddings_data
+
ids_data
[
i
]
*
row_width
,
row_width
*
sizeof
(
T
));
}
int
xx_offset
=
D4
;
int
gate_offset
=
D
;
if
(
is_reverse
)
{
const
int
offset
=
(
total_T
-
1
)
*
D
;
xx_data
=
xx_data
+
offset
*
4
;
h_out_data
=
h_out_data
+
offset
;
c_out_data
=
c_out_data
+
offset
;
xx_offset
=
-
D4
;
gate_offset
=
-
D
;
}
#define MOVE_ONE_STEP \
prev_h_data = h_out_data; \
prev_c_data = c_out_data; \
xx_data = xx_data + xx_offset; \
h_out_data = h_out_data + gate_offset; \
c_out_data = c_out_data + gate_offset
#define PROCESS_H0C0_DEFINES \
int bid = is_reverse ? N - 1 - i : i; \
int seq_len = ids_lod[0][bid + 1] - ids_lod[0][bid]; \
const T* prev_c_data = nullptr; \
const T* prev_h_data = nullptr; \
int tstart = 0
#define PROCESS_H0C0_PEEPHOLE \
PROCESS_H0C0_DEFINES; \
if (h0_data) { \
prev_h_data = h0_data + bid * D; \
prev_c_data = c0_data + bid * D; \
} else { \
COMPUTE_CtHt_PEEPHOLE_NOH0C0(xx_data, c_out_data, h_out_data); \
MOVE_ONE_STEP; \
tstart = 1; \
}
#define PROCESS_H0C0 \
PROCESS_H0C0_DEFINES; \
if (h0_data) { \
prev_h_data = h0_data + bid * D; \
prev_c_data = c0_data + bid * D; \
} else { \
COMPUTE_CtHt_NOH0C0(xx_data, c_out_data, h_out_data); \
MOVE_ONE_STEP; \
tstart = 1; \
}
if
(
use_peepholes
)
{
for
(
int
i
=
0
;
i
<
N
;
++
i
)
{
PROCESS_H0C0_PEEPHOLE
for
(
int
step
=
tstart
;
step
<
seq_len
;
++
step
)
{
GEMM_WH_ADDON
(
1
,
prev_h_data
,
xx_data
);
COMPUTE_CtHt_PEEPHOLE
(
xx_data
,
prev_c_data
,
c_out_data
,
h_out_data
);
MOVE_ONE_STEP
;
}
}
}
else
{
for
(
int
i
=
0
;
i
<
N
;
++
i
)
{
PROCESS_H0C0
for
(
int
step
=
tstart
;
step
<
seq_len
;
++
step
)
{
GEMM_WH_ADDON
(
1
,
prev_h_data
,
xx_data
);
COMPUTE_CtHt
(
xx_data
,
prev_c_data
,
c_out_data
,
h_out_data
);
MOVE_ONE_STEP
;
}
}
}
#undef PROCESS_H0C0_DEFINES
#undef PROCESS_H0C0_PEEPHOLE
#undef PROCESS_H0C0
#undef MOVE_ONE_STEP
}
void
BatchCompute
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
using
DeviceContext
=
platform
::
CPUDeviceContext
;
INIT_BASE_INPUT_OUTPUT
if
(
ids
->
lod
()[
0
].
size
()
==
2
)
{
SeqCompute
(
ctx
);
return
;
}
INIT_BASE_SIZES
INIT_VEC_FUNC
INIT_BASE_INPUT_DATAS
// std::cout << "===> Batch Compute" << std::endl;
auto
*
reordered_h0
=
ctx
.
Output
<
Tensor
>
(
"ReorderedH0"
);
auto
*
reordered_c0
=
ctx
.
Output
<
Tensor
>
(
"ReorderedC0"
);
auto
*
batched_input
=
ctx
.
Output
<
LoDTensor
>
(
"BatchedInput"
);
auto
*
batched_c_out
=
ctx
.
Output
<
LoDTensor
>
(
"BatchedCell"
);
auto
*
batched_h_out
=
ctx
.
Output
<
LoDTensor
>
(
"BatchedHidden"
);
T
*
xx_data
=
xx
->
mutable_data
<
T
>
(
place
);
T
*
batched_input_data
=
batched_input
->
mutable_data
<
T
>
(
place
);
T
*
batched_c_out_data
=
batched_c_out
->
mutable_data
<
T
>
(
place
);
T
*
batched_h_out_data
=
batched_h_out
->
mutable_data
<
T
>
(
place
);
hidden_out
->
mutable_data
<
T
>
(
place
);
cell_out
->
mutable_data
<
T
>
(
place
);
math
::
LoDTensor2BatchFunctor
<
DeviceContext
,
T
>
to_batch
;
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
auto
blas
=
math
::
GetBlas
<
DeviceContext
,
T
>
(
dev_ctx
);
for
(
int64_t
i
=
0
;
i
<
ids_numel
;
++
i
)
{
PADDLE_ENFORCE_LT
(
ids_data
[
i
],
row_number
);
PADDLE_ENFORCE_GE
(
ids_data
[
i
],
0
,
"ids %d"
,
i
);
memcpy
(
xx_data
+
i
*
row_width
,
embeddings_data
+
ids_data
[
i
]
*
row_width
,
row_width
*
sizeof
(
T
));
}
to_batch
(
dev_ctx
,
*
xx
,
batched_input
,
true
,
is_reverse
);
auto
batched_lod
=
batched_input
->
lod
();
const
auto
&
seq_order
=
batched_lod
[
2
];
const
int
max_bs
=
seq_order
.
size
();
reordered_h0
->
Resize
({
max_bs
,
D
});
reordered_c0
->
Resize
({
max_bs
,
D
});
int
tstart
=
0
;
T
*
prev_h_data
=
nullptr
;
T
*
prev_c_data
=
nullptr
;
if
(
h0
)
{
// reorder h0, c0
T
*
reordered_h0_data
=
reordered_h0
->
mutable_data
<
T
>
(
place
);
T
*
reordered_c0_data
=
reordered_c0
->
mutable_data
<
T
>
(
place
);
const
T
*
h0_data
=
h0
->
data
<
T
>
();
const
T
*
c0_data
=
c0
->
data
<
T
>
();
prev_h_data
=
reordered_h0_data
;
prev_c_data
=
reordered_c0_data
;
size_t
sz
=
sizeof
(
T
)
*
D
;
for
(
int
i
=
0
;
i
<
max_bs
;
++
i
)
{
std
::
memcpy
(
reordered_h0_data
,
h0_data
+
seq_order
[
i
]
*
D
,
sz
);
std
::
memcpy
(
reordered_c0_data
,
c0_data
+
seq_order
[
i
]
*
D
,
sz
);
reordered_h0_data
+=
D
;
reordered_c0_data
+=
D
;
}
}
else
{
// compute without h0, c0
T
*
cur_in_data
=
batched_input_data
;
T
*
cur_h_out_data
=
batched_h_out_data
;
T
*
cur_c_out_data
=
batched_c_out_data
;
for
(
int
i
=
0
;
i
<
max_bs
;
++
i
)
{
GET_Ct_NOH0C0
(
cur_in_data
,
cur_c_out_data
);
if
(
use_peepholes
)
{
blas
.
VMUL
(
D
,
wc_data
+
D2
,
cur_c_out_data
,
cur_in_data
+
D
);
blas
.
VADD
(
D
,
cur_in_data
+
D
,
cur_in_data
+
D3
,
cur_in_data
+
D3
);
}
act_gate
(
D
,
cur_in_data
+
D3
,
cur_in_data
+
D3
);
GET_Ht
(
cur_c_out_data
,
cur_in_data
,
cur_h_out_data
);
cur_in_data
+=
D4
;
cur_c_out_data
+=
D
;
cur_h_out_data
+=
D
;
}
tstart
=
1
;
prev_h_data
=
batched_h_out_data
;
prev_c_data
=
batched_c_out_data
;
}
const
auto
&
batch_starts
=
batched_lod
[
0
];
const
int
max_seq_len
=
batch_starts
.
size
()
-
1
;
const
int
offset
=
tstart
*
max_bs
*
D
;
batched_input_data
=
batched_input_data
+
offset
*
4
;
batched_h_out_data
=
batched_h_out_data
+
offset
;
batched_c_out_data
=
batched_c_out_data
+
offset
;
#define DEFINE_CUR \
T* cur_in_data = batched_input_data; \
T* cur_prev_c_data = prev_c_data; \
T* cur_c_out_data = batched_c_out_data; \
T* cur_h_out_data = batched_h_out_data
#define MOVE_ONE_BATCH \
cur_in_data += D4; \
cur_prev_c_data += D; \
cur_c_out_data += D; \
cur_h_out_data += D
#define MOVE_ONE_STEP \
prev_c_data = batched_c_out_data; \
prev_h_data = batched_h_out_data; \
batched_c_out_data = cur_c_out_data; \
batched_h_out_data = cur_h_out_data; \
batched_input_data = cur_in_data
if
(
use_peepholes
)
{
for
(
int
step
=
tstart
;
step
<
max_seq_len
;
++
step
)
{
const
int
cur_bs
=
batch_starts
[
step
+
1
]
-
batch_starts
[
step
];
GEMM_WH_ADDON
(
cur_bs
,
prev_h_data
,
batched_input_data
);
DEFINE_CUR
;
for
(
int
i
=
0
;
i
<
cur_bs
;
++
i
)
{
COMPUTE_CtHt_PEEPHOLE
(
cur_in_data
,
cur_prev_c_data
,
cur_c_out_data
,
cur_h_out_data
);
MOVE_ONE_BATCH
;
}
MOVE_ONE_STEP
;
}
}
else
{
for
(
int
step
=
tstart
;
step
<
max_seq_len
;
++
step
)
{
const
int
cur_bs
=
batch_starts
[
step
+
1
]
-
batch_starts
[
step
];
GEMM_WH_ADDON
(
cur_bs
,
prev_h_data
,
batched_input_data
);
DEFINE_CUR
;
for
(
int
i
=
0
;
i
<
cur_bs
;
++
i
)
{
COMPUTE_CtHt
(
cur_in_data
,
cur_prev_c_data
,
cur_c_out_data
,
cur_h_out_data
);
MOVE_ONE_BATCH
;
}
MOVE_ONE_STEP
;
}
}
#undef MOVE_ONE_STEP
#undef MOVE_ONE_BATCH
#undef DEFINE_CUR
math
::
Batch2LoDTensorFunctor
<
DeviceContext
,
T
>
to_seq
;
batched_h_out
->
set_lod
(
batched_lod
);
to_seq
(
dev_ctx
,
*
batched_h_out
,
hidden_out
);
batched_c_out
->
set_lod
(
batched_lod
);
to_seq
(
dev_ctx
,
*
batched_c_out
,
cell_out
);
}
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
if
(
ctx
.
Attr
<
bool
>
(
"use_seq"
))
{
SeqCompute
(
ctx
);
}
else
{
BatchCompute
(
ctx
);
}
}
#undef COMPUTE_CtHt_PEEPHOLE
#undef COMPUTE_CtHt
#undef GET_Ct_NOH0C0
#undef COMPUTE_CtHt_NOH0C0
#undef COMPUTE_CtHt_PEEPHOLE_NOH0C0
#undef GET_Ht
#undef GET_Ct
#undef GEMM_WH_ADDON
#undef INIT_BASE_INPUT_DATAS
#undef INIT_BASE_SIZES
#undef INIT_BASE_INPUT_OUTPUT
#undef INIT_VEC_FUNC
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
fused_embedding_fc_lstm
,
ops
::
FusedEmbeddingFCLSTMOp
,
ops
::
FusedEmbeddingFCLSTMOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OP_CPU_KERNEL
(
fused_embedding_fc_lstm
,
ops
::
FusedEmbeddingFCLSTMKernel
<
float
>
,
ops
::
FusedEmbeddingFCLSTMKernel
<
double
>
);
paddle/fluid/operators/
concurrency/channel_util
.h
→
paddle/fluid/operators/
fused_embedding_fc_lstm_op
.h
浏览文件 @
91756a5a
...
@@ -4,7 +4,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
...
@@ -4,7 +4,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
distributed under the License is distributed on an "AS IS" BASIS,
...
@@ -13,26 +13,29 @@ See the License for the specific language governing permissions and
...
@@ -13,26 +13,29 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#pragma once
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/variable.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
namespace
concurrency
{
void
ChannelSend
(
framework
::
ChannelHolder
*
ch
,
framework
::
Variable
*
var
);
using
LoDTensor
=
framework
::
LoDTensor
;
bool
ChannelReceive
(
framework
::
ChannelHolder
*
ch
,
framework
::
Variable
*
var
);
using
Tensor
=
framework
::
Tensor
;
class
FusedEmbeddingFCLSTMOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
;
};
void
ChannelAddToSendQ
(
framework
::
ChannelHolder
*
ch
,
const
void
*
referrer
,
class
FusedEmbeddingFCLSTMOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
framework
::
Variable
*
var
,
public:
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
,
void
Make
()
override
;
std
::
function
<
bool
(
framework
::
ChannelAction
)
>
cb
);
};
void
ChannelAddToReceiveQ
(
framework
::
ChannelHolder
*
ch
,
const
void
*
referrer
,
framework
::
Variable
*
var
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
,
std
::
function
<
bool
(
framework
::
ChannelAction
)
>
cb
);
}
// namespace concurrency
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
paddle/fluid/operators/fusion_gru_op.cc
浏览文件 @
91756a5a
...
@@ -290,12 +290,13 @@ class FusionGRUKernel : public framework::OpKernel<T> {
...
@@ -290,12 +290,13 @@ class FusionGRUKernel : public framework::OpKernel<T> {
void
BatchCompute
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
void
BatchCompute
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
using
DeviceContext
=
paddle
::
platform
::
CPUDeviceContext
;
using
DeviceContext
=
paddle
::
platform
::
CPUDeviceContext
;
auto
*
x
=
ctx
.
Input
<
LoDTensor
>
(
"X"
);
auto
*
x
=
ctx
.
Input
<
LoDTensor
>
(
"X"
);
INIT_BASE_INPUT_OUTPUT
INIT_BASE_SIZES
if
(
x
->
lod
()[
0
].
size
()
==
2
)
{
if
(
x
->
lod
()[
0
].
size
()
==
2
)
{
xx
->
Resize
({
total_T
,
D3
});
SeqCompute
(
ctx
);
SeqCompute
(
ctx
);
return
;
return
;
}
}
INIT_BASE_INPUT_OUTPUT
INIT_BASE_SIZES
INIT_VEC_FUNC
INIT_VEC_FUNC
auto
*
reordered_h0
=
ctx
.
Output
<
Tensor
>
(
"ReorderedH0"
);
auto
*
reordered_h0
=
ctx
.
Output
<
Tensor
>
(
"ReorderedH0"
);
...
...
paddle/fluid/operators/fusion_lstm_op.cc
浏览文件 @
91756a5a
...
@@ -432,11 +432,12 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
...
@@ -432,11 +432,12 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
void
BatchCompute
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
void
BatchCompute
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
using
DeviceContext
=
platform
::
CPUDeviceContext
;
using
DeviceContext
=
platform
::
CPUDeviceContext
;
INIT_BASE_INPUT_OUTPUT
INIT_BASE_INPUT_OUTPUT
INIT_BASE_SIZES
if
(
x
->
lod
()[
0
].
size
()
==
2
)
{
if
(
x
->
lod
()[
0
].
size
()
==
2
)
{
xx
->
Resize
({
x_dims
[
0
],
D4
});
SeqCompute
(
ctx
);
SeqCompute
(
ctx
);
return
;
return
;
}
}
INIT_BASE_SIZES
INIT_VEC_FUNC
INIT_VEC_FUNC
INIT_BASE_INPUT_DATAS
INIT_BASE_INPUT_DATAS
...
...
paddle/fluid/operators/math/cpu_lstm_compute.cc
浏览文件 @
91756a5a
...
@@ -13,6 +13,31 @@ limitations under the License. */
...
@@ -13,6 +13,31 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
namespace
math
{}
// namespace math
namespace
math
{
#ifdef __AVX__
template
<
>
void
lstm_compute_ctht
<
float
>
(
float
*
gates
,
const
float
*
ct_1
,
float
*
ct
,
float
*
ht
)
{
namespace
act
=
detail
::
forward
::
avx
;
// gates: W_ch, W_ih, W_fh, W_oh
__m256
c
,
i
,
f
,
o
;
c
=
_mm256_loadu_ps
(
gates
);
i
=
_mm256_loadu_ps
(
gates
+
8
);
f
=
_mm256_loadu_ps
(
gates
+
16
);
o
=
_mm256_loadu_ps
(
gates
+
24
);
/* C_t = C_t-1 * fgated + cand_gated * igated*/
c
=
_mm256_mul_ps
(
act
::
Tanh
(
c
),
act
::
Sigmoid
(
i
));
i
=
_mm256_loadu_ps
(
ct_1
);
f
=
_mm256_mul_ps
(
i
,
act
::
Sigmoid
(
f
));
f
=
_mm256_add_ps
(
c
,
f
);
_mm256_storeu_ps
(
ct
,
f
);
/* H_t = act_cell(C_t) * ogated */
o
=
_mm256_mul_ps
(
act
::
Tanh
(
f
),
act
::
Sigmoid
(
o
));
_mm256_storeu_ps
(
ht
,
o
);
}
#endif
}
// namespace math
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
paddle/fluid/operators/math/cpu_lstm_compute.h
浏览文件 @
91756a5a
...
@@ -48,32 +48,15 @@ namespace forward {
...
@@ -48,32 +48,15 @@ namespace forward {
namespace
avx
{
namespace
avx
{
__m256
Sigmoid
(
const
__m256
a
);
__m256
Sigmoid
(
const
__m256
a
);
__m256
Tanh
(
const
__m256
a
);
__m256
Tanh
(
const
__m256
a
);
}
// namespace avx
}
// namespace avx
}
// namespace forward
}
// namespace forward
}
// namespace detail
}
// namespace detail
template
<
>
template
<
>
void
lstm_compute_ctht
<
float
>
(
float
*
gates
,
const
float
*
ct_1
,
float
*
ct
,
void
lstm_compute_ctht
<
float
>
(
float
*
gates
,
const
float
*
ct_1
,
float
*
ct
,
float
*
ht
)
{
float
*
ht
);
namespace
act
=
detail
::
forward
::
avx
;
// gates: W_ch, W_ih, W_fh, W_oh
__m256
c
,
i
,
f
,
o
;
c
=
_mm256_loadu_ps
(
gates
);
i
=
_mm256_loadu_ps
(
gates
+
8
);
f
=
_mm256_loadu_ps
(
gates
+
16
);
o
=
_mm256_loadu_ps
(
gates
+
24
);
/* C_t = C_t-1 * fgated + cand_gated * igated*/
c
=
_mm256_mul_ps
(
act
::
Tanh
(
c
),
act
::
Sigmoid
(
i
));
i
=
_mm256_loadu_ps
(
ct_1
);
f
=
_mm256_mul_ps
(
i
,
act
::
Sigmoid
(
f
));
f
=
_mm256_add_ps
(
c
,
f
);
_mm256_storeu_ps
(
ct
,
f
);
/* H_t = act_cell(C_t) * ogated */
o
=
_mm256_mul_ps
(
act
::
Tanh
(
f
),
act
::
Sigmoid
(
o
));
_mm256_storeu_ps
(
ht
,
o
);
}
#endif
#endif
}
// namespace math
}
// namespace math
...
...
paddle/fluid/operators/math/depthwise_conv.cu
浏览文件 @
91756a5a
...
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include <algorithm>
#include <vector>
#include <vector>
#include "paddle/fluid/operators/math/depthwise_conv.h"
#include "paddle/fluid/operators/math/depthwise_conv.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/cuda_primitives.h"
...
@@ -20,149 +21,268 @@ namespace paddle {
...
@@ -20,149 +21,268 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
namespace
math
{
namespace
math
{
template
<
typename
T
>
__inline__
__device__
T
warpReduceSum
(
T
val
)
{
#if CUDA_VERSION < 9000
for
(
int
offset
=
16
;
offset
>
0
;
offset
/=
2
)
val
+=
__shfl_down
(
val
,
offset
);
return
val
;
#else
#define FULL_MASK 0xffffffff
for
(
int
offset
=
16
;
offset
>
0
;
offset
/=
2
)
val
+=
__shfl_down_sync
(
FULL_MASK
,
val
,
offset
);
return
val
;
#endif
}
__forceinline__
__device__
unsigned
lane_id
()
{
unsigned
ret
;
asm
volatile
(
"mov.u32 %0, %laneid;"
:
"=r"
(
ret
));
return
ret
;
}
__forceinline__
__device__
unsigned
warp_id
()
{
unsigned
ret
;
asm
volatile
(
"mov.u32 %0, %warpid;"
:
"=r"
(
ret
));
return
ret
;
}
// A Cuda kernel to compute the depthwise convolution forward pass
// A Cuda kernel to compute the depthwise convolution forward pass
// in NCHW format.
// in NCHW format.
template
<
typename
T
>
template
<
typename
T
>
__
global
__
void
KernelDepthwiseConv
(
__
device__
__inline
__
void
KernelDepthwiseConv
(
const
int
nthreads
,
const
T
*
const
input_data
,
const
T
*
const
filter_data
,
const
T
*
const
input_data
,
const
T
*
const
filter_data
,
const
int
batch_size
,
const
int
batch_size
,
const
int
output_channels
,
const
int
output_height
,
const
int
output_channels
,
const
int
output_height
,
const
int
output_width
,
const
int
output_width
,
const
int
input_channels
,
const
int
input_height
,
const
int
input_channels
,
const
int
input_height
,
const
int
input_width
,
const
int
input_width
,
const
int
filter_multiplier
,
const
int
filter_height
,
const
int
filter_multiplier
,
const
int
filter_height
,
const
int
filter_width
,
const
int
stride_height
,
const
int
stride_width
,
const
int
filter_width
,
const
int
stride_height
,
const
int
stride_width
,
const
int
padding_height
,
const
int
padding_width
,
T
*
const
output_data
)
{
const
int
padding_height
,
const
int
padding_width
,
const
int
dilate_height
,
int
index
=
(
blockIdx
.
x
*
gridDim
.
y
+
blockIdx
.
y
)
*
blockDim
.
x
+
threadIdx
.
x
;
const
int
dilate_width
,
T
*
const
output_data
)
{
for
(
int
w_out
=
threadIdx
.
x
;
w_out
<
output_width
;
w_out
+=
blockDim
.
x
)
{
if
(
index
<
nthreads
)
{
for
(
int
h_out
=
threadIdx
.
y
;
h_out
<
output_height
;
h_out
+=
blockDim
.
y
)
{
const
int
batch
=
index
/
output_channels
/
output_height
/
output_width
;
const
int
batch
=
blockIdx
.
y
;
const
int
c_out
=
(
index
/
output_height
/
output_width
)
%
output_channels
;
const
int
c_out
=
blockIdx
.
x
;
const
int
h_out
=
(
index
/
output_width
)
%
output_height
;
const
int
w_out
=
index
%
output_width
;
const
int
c_in
=
c_out
/
filter_multiplier
;
const
T
*
weight
=
filter_data
+
c_out
*
filter_height
*
filter_width
;
const
int
c_in
=
c_out
/
filter_multiplier
;
T
value
=
0
;
const
T
*
weight
=
filter_data
+
c_out
*
filter_height
*
filter_width
;
const
int
h_in_start
=
-
padding_height
+
h_out
*
stride_height
;
T
value
=
0
;
const
int
w_in_start
=
-
padding_width
+
w_out
*
stride_width
;
const
int
h_in_start
=
-
padding_height
+
h_out
*
stride_height
;
const
int
h_in_end
=
h_in_start
+
filter_height
*
dilate_height
;
const
int
w_in_start
=
-
padding_width
+
w_out
*
stride_width
;
const
int
w_in_end
=
w_in_start
+
filter_width
*
dilate_width
;
const
int
h_in_end
=
h_in_start
+
filter_height
;
const
int
w_in_end
=
w_in_start
+
filter_width
;
const
int
in_offset
=
((
batch
*
input_channels
+
c_in
)
*
input_height
)
*
input_width
;
const
int
in_offset
=
((
batch
*
input_channels
+
c_in
)
*
input_height
)
*
input_width
;
const
int
h_end
=
h_in_end
<
input_height
?
h_in_end
:
input_height
;
const
int
w_end
=
w_in_end
<
input_width
?
w_in_end
:
input_width
;
const
int
h_end
=
h_in_end
<
input_height
?
h_in_end
:
input_height
;
const
int
h_start
=
h_in_start
>
0
?
h_in_start
:
0
;
const
int
w_end
=
w_in_end
<
input_width
?
w_in_end
:
input_width
;
const
int
w_start
=
w_in_start
>
0
?
w_in_start
:
0
;
const
int
h_start
=
h_in_start
>
0
?
h_in_start
:
0
;
int
weight_offset
=
0
;
const
int
w_start
=
w_in_start
>
0
?
w_in_start
:
0
;
for
(
int
h_in
=
h_in_start
;
h_in
<
h_in_end
;
h_in
+=
dilate_height
)
{
for
(
int
h_in
=
h_start
;
h_in
<
h_end
;
h_in
++
)
{
for
(
int
w_in
=
w_in_start
;
w_in
<
w_in_end
;
w_in
+=
dilate_width
)
{
for
(
int
w_in
=
w_start
;
w_in
<
w_end
;
w_in
++
)
{
if
(
h_in
>=
h_start
&&
h_in
<
h_end
&&
w_in
>=
w_start
&&
const
int
offset
=
in_offset
+
h_in
*
input_width
+
w_in
;
w_in
<
w_end
)
{
value
+=
const
int
offset
=
in_offset
+
h_in
*
input_width
+
w_in
;
weight
[(
h_in
-
h_in_start
)
*
filter_width
+
(
w_in
-
w_in_start
)]
*
value
+=
weight
[
weight_offset
]
*
input_data
[
offset
];
input_data
[
offset
];
}
weight_offset
++
;
}
}
}
int
index
=
((
batch
*
gridDim
.
x
+
c_out
)
*
output_height
+
h_out
)
*
output_width
+
w_out
;
output_data
[
index
]
=
value
;
}
}
output_data
[
index
]
=
value
;
}
}
}
}
template
<
typename
T
,
int
c_filter_multiplier
,
int
c_stride
>
__global__
void
KernelDepthwiseConvSp
(
const
T
*
const
input_data
,
const
T
*
const
filter_data
,
const
int
batch_size
,
const
int
output_channels
,
const
int
output_height
,
const
int
output_width
,
const
int
input_channels
,
const
int
input_height
,
const
int
input_width
,
const
int
filter_multiplier
,
const
int
filter_height
,
const
int
filter_width
,
const
int
stride_height
,
const
int
stride_width
,
const
int
padding_height
,
const
int
padding_width
,
const
int
dilate_height
,
const
int
dilate_width
,
T
*
const
output_data
)
{
if
(
c_filter_multiplier
==
0
)
KernelDepthwiseConv
<
T
>
(
input_data
,
filter_data
,
batch_size
,
output_channels
,
output_height
,
output_width
,
input_channels
,
input_height
,
input_width
,
filter_multiplier
,
filter_height
,
filter_width
,
stride_height
,
stride_width
,
padding_height
,
padding_width
,
dilate_height
,
dilate_width
,
output_data
);
else
KernelDepthwiseConv
<
T
>
(
input_data
,
filter_data
,
batch_size
,
output_channels
,
output_height
,
output_width
,
input_channels
,
input_height
,
input_width
,
c_filter_multiplier
,
filter_height
,
filter_height
,
c_stride
,
c_stride
,
padding_height
,
padding_width
,
dilate_height
,
dilate_width
,
output_data
);
}
// CUDA kernel to compute the depthwise convolution backprop w.r.t input.
// CUDA kernel to compute the depthwise convolution backprop w.r.t input.
template
<
typename
T
>
template
<
typename
T
>
__global__
void
KernelDepthwiseConvInputGrad
(
__device__
__inline__
void
KernelDepthwiseConvInputGrad
(
const
int
nthreads
,
const
T
*
const
output_grad_data
,
const
T
*
const
output_grad_data
,
const
T
*
const
filter_data
,
const
T
*
const
filter_data
,
const
int
batch_size
,
const
int
output_channels
,
const
int
batch_size
,
const
int
output_channels
,
const
int
output_height
,
const
int
output_height
,
const
int
output_width
,
const
int
input_channels
,
const
int
output_width
,
const
int
input_channels
,
const
int
input_height
,
const
int
input_height
,
const
int
input_width
,
const
int
filter_multiplier
,
const
int
input_width
,
const
int
filter_multiplier
,
const
int
filter_height
,
const
int
filter_height
,
const
int
filter_width
,
const
int
stride_height
,
const
int
filter_width
,
const
int
stride_height
,
const
int
stride_width
,
const
int
stride_width
,
const
int
padding_height
,
const
int
padding_width
,
const
int
padding_height
,
const
int
padding_width
,
const
int
dilate_height
,
T
*
const
input_grad_data
)
{
const
int
dilate_width
,
T
*
const
input_grad_data
)
{
int
index
=
(
blockIdx
.
x
*
gridDim
.
y
+
blockIdx
.
y
)
*
blockDim
.
x
+
threadIdx
.
x
;
for
(
int
w_in
=
threadIdx
.
x
;
w_in
<
input_width
;
w_in
+=
blockDim
.
x
)
{
if
(
index
<
nthreads
)
{
for
(
int
h_in
=
threadIdx
.
y
;
h_in
<
input_height
;
h_in
+=
blockDim
.
y
)
{
const
int
batch
=
index
/
input_channels
/
input_height
/
input_width
;
const
int
batch
=
blockIdx
.
y
;
const
int
c_in
=
(
index
/
input_height
/
input_width
)
%
input_channels
;
const
int
c_in
=
blockIdx
.
x
;
const
int
h_in
=
(
index
/
input_width
)
%
input_height
;
const
int
w_in
=
index
%
input_width
;
const
int
c_out_start
=
c_in
*
filter_multiplier
;
const
int
c_out_start
=
c_in
*
filter_multiplier
;
int
h_out_start
=
h_in
-
(
filter_height
-
1
)
*
dilate_height
+
padding_height
;
int
h_out_start
=
(
h_in
-
filter_height
+
padding_height
+
stride_height
)
/
stride_height
;
int
h_out_end
=
h_in
+
padding_height
;
h_out_start
=
0
>
h_out_start
?
0
:
h_out_start
;
int
w_out_start
=
int
h_out_end
=
(
h_in
+
padding_height
)
/
stride_height
;
w_in
-
(
filter_width
-
1
)
*
dilate_width
+
padding_width
;
h_out_end
=
output_height
-
1
<
h_out_end
?
output_height
-
1
:
h_out_end
;
int
w_out_end
=
w_in
+
padding_width
;
int
w_out_start
=
(
w_in
-
filter_width
+
padding_width
+
stride_width
)
/
stride_width
;
T
value
=
0
;
w_out_start
=
0
>
w_out_start
?
0
:
w_out_start
;
for
(
int
c_out
=
c_out_start
;
c_out
<
c_out_start
+
filter_multiplier
;
int
w_out_end
=
(
w_in
+
padding_width
)
/
stride_width
;
c_out
++
)
{
w_out_end
=
output_width
-
1
<
w_out_end
?
output_width
-
1
:
w_out_end
;
int
filter_offset
=
(
c_out
+
1
)
*
filter_height
*
filter_width
;
for
(
int
h_out
=
h_out_start
;
h_out
<=
h_out_end
;
T
value
=
0
;
h_out
+=
dilate_height
)
{
for
(
int
w_out
=
w_out_start
;
w_out
<=
w_out_end
;
for
(
int
c_out
=
c_out_start
;
c_out
<
c_out_start
+
filter_multiplier
;
w_out
+=
dilate_width
)
{
c_out
++
)
{
filter_offset
--
;
for
(
int
h_out
=
h_out_start
;
h_out
<=
h_out_end
;
++
h_out
)
{
int
s_h_out
=
h_out
/
stride_height
;
const
int
filter_h
=
h_in
+
padding_height
-
h_out
*
stride_height
;
int
s_w_out
=
w_out
/
stride_width
;
for
(
int
w_out
=
w_out_start
;
w_out
<=
w_out_end
;
++
w_out
)
{
if
(
h_out
%
stride_height
==
0
&&
w_out
%
stride_width
==
0
&&
const
int
filter_w
=
w_in
+
padding_width
-
w_out
*
stride_width
;
s_h_out
>=
0
&&
s_h_out
<
output_height
&&
s_w_out
>=
0
&&
const
int
filter_offset
=
c_out
*
filter_height
*
filter_width
+
s_w_out
<
output_width
)
{
filter_h
*
filter_width
+
filter_w
;
const
int
output_grad_offset
=
const
int
output_grad_offset
=
((
batch
*
output_channels
+
c_out
)
*
output_height
+
((
batch
*
output_channels
+
c_out
)
*
output_height
+
h_out
)
*
s_h_out
)
*
output_width
+
output_width
+
w_out
;
s_w_out
;
value
+=
value
+=
output_grad_data
[
output_grad_offset
]
*
output_grad_data
[
output_grad_offset
]
*
filter_data
[
filter_offset
];
filter_data
[
filter_offset
];
}
}
}
}
}
}
int
index
=
((
batch
*
gridDim
.
x
+
c_in
)
*
input_height
+
h_in
)
*
input_width
+
w_in
;
input_grad_data
[
index
]
=
value
;
}
}
input_grad_data
[
index
]
+=
value
;
}
}
}
}
template
<
typename
T
,
int
c_filter_multiplier
,
int
c_stride
>
__global__
void
KernelDepthwiseConvInputGradSp
(
const
T
*
const
output_grad_data
,
const
T
*
const
filter_data
,
const
int
batch_size
,
const
int
output_channels
,
const
int
output_height
,
const
int
output_width
,
const
int
input_channels
,
const
int
input_height
,
const
int
input_width
,
const
int
filter_multiplier
,
const
int
filter_height
,
const
int
filter_width
,
const
int
stride_height
,
const
int
stride_width
,
const
int
padding_height
,
const
int
padding_width
,
const
int
dilate_height
,
const
int
dilate_width
,
T
*
const
input_grad_data
)
{
if
(
c_filter_multiplier
==
0
)
KernelDepthwiseConvInputGrad
<
T
>
(
output_grad_data
,
filter_data
,
batch_size
,
output_channels
,
output_height
,
output_width
,
input_channels
,
input_height
,
input_width
,
filter_multiplier
,
filter_height
,
filter_width
,
stride_height
,
stride_width
,
padding_height
,
padding_width
,
dilate_height
,
dilate_width
,
input_grad_data
);
else
KernelDepthwiseConvInputGrad
<
T
>
(
output_grad_data
,
filter_data
,
batch_size
,
output_channels
,
output_height
,
output_width
,
input_channels
,
input_height
,
input_width
,
c_filter_multiplier
,
filter_height
,
filter_width
,
c_stride
,
c_stride
,
padding_height
,
padding_width
,
dilate_height
,
dilate_width
,
input_grad_data
);
}
// Cuda kernel to compute the depthwise convolution backprop w.r.t. filter.
// Cuda kernel to compute the depthwise convolution backprop w.r.t. filter.
template
<
typename
T
>
template
<
typename
T
>
__global__
void
KernelDepthwiseConvFilterGrad
(
__device__
__inline__
void
KernelDepthwiseConvFilterGrad
(
const
int
nthreads
,
const
T
*
const
output_grad_data
,
const
T
*
output_grad_data
,
const
T
*
input_data
,
const
int
num
,
const
T
*
const
input_data
,
const
int
num
,
const
int
output_channels
,
const
int
output_channels
,
const
int
output_height
,
const
int
output_width
,
const
int
output_height
,
const
int
output_width
,
const
int
input_channels
,
const
int
input_channels
,
const
int
input_height
,
const
int
input_width
,
const
int
input_height
,
const
int
input_width
,
const
int
filter_multiplier
,
const
int
filter_multiplier
,
const
int
filter_height
,
const
int
filter_height
,
const
int
filter_width
,
const
int
stride_height
,
const
int
filter_width
,
const
int
stride_height
,
const
int
stride_width
,
const
int
stride_width
,
const
int
padding_height
,
const
int
padding_width
,
const
int
padding_height
,
const
int
padding_width
,
const
int
dilate_height
,
T
*
const
filter_grad_data
)
{
const
int
dilate_width
,
T
*
filter_grad_data
)
{
int
index
=
(
blockIdx
.
x
*
gridDim
.
y
+
blockIdx
.
y
)
*
blockDim
.
x
+
threadIdx
.
x
;
T
s
=
0
;
if
(
index
<
nthreads
)
{
const
int
w_out
=
index
%
output_width
;
int
gbid
=
((
blockIdx
.
z
*
gridDim
.
y
)
+
blockIdx
.
y
)
*
gridDim
.
x
+
blockIdx
.
x
;
const
int
h_out
=
(
index
/
output_width
)
%
output_height
;
int
lid
=
lane_id
();
const
int
c_out
=
(
index
/
output_width
/
output_height
)
%
output_channels
;
const
int
batch
=
(
index
/
output_width
/
output_height
/
output_channels
);
for
(
int
image_w
=
threadIdx
.
x
;
image_w
<
output_width
;
const
int
c_in
=
c_out
/
filter_multiplier
;
image_w
+=
blockDim
.
x
)
{
const
int
h_in_start
=
-
padding_height
+
h_out
*
stride_height
;
for
(
int
bid
=
0
;
bid
<
num
;
bid
++
)
{
const
int
w_in_start
=
-
padding_width
+
w_out
*
stride_width
;
for
(
int
image_h
=
threadIdx
.
y
;
image_h
<
output_height
;
const
int
h_in_end
=
image_h
+=
blockDim
.
y
)
{
-
padding_height
+
h_out
*
stride_height
+
filter_height
;
int
kernel_id
=
blockIdx
.
z
;
const
int
w_in_end
=
-
padding_width
+
w_out
*
stride_width
+
filter_width
;
int
kernel_h
=
blockIdx
.
y
*
dilate_height
-
padding_height
;
const
int
in_offset
=
int
kernel_w
=
blockIdx
.
x
*
dilate_width
-
padding_width
;
(
batch
*
input_channels
+
c_in
)
*
input_height
*
input_width
;
int
image_hk
=
image_h
*
stride_height
+
kernel_h
;
T
*
addr_offset
=
filter_grad_data
+
c_out
*
filter_height
*
filter_width
;
int
image_wk
=
image_w
*
stride_width
+
kernel_w
;
const
int
h_end
=
h_in_end
<
input_height
?
h_in_end
:
input_height
;
if
(
image_hk
<
0
||
image_hk
>=
input_height
)
continue
;
const
int
w_end
=
w_in_end
<
input_width
?
w_in_end
:
input_width
;
if
(
image_wk
<
0
||
image_wk
>=
input_width
)
continue
;
const
int
h_start
=
h_in_start
>
0
?
h_in_start
:
0
;
#define gaid(N, C, H, W) \
const
int
w_start
=
w_in_start
>
0
?
w_in_start
:
0
;
((((N)*gridDim.z + (C)) * output_height + (H)) * output_width + (W))
for
(
int
h_in
=
h_start
;
h_in
<
h_end
;
h_in
++
)
{
s
+=
output_grad_data
[
gaid
(
bid
,
kernel_id
,
image_h
,
image_w
)]
*
for
(
int
w_in
=
w_start
;
w_in
<
w_end
;
w_in
++
)
{
input_data
[((
bid
*
(
gridDim
.
z
/
filter_multiplier
)
+
const
int
offset
=
in_offset
+
h_in
*
input_width
+
w_in
;
kernel_id
/
filter_multiplier
)
*
const
T
diff_temp
=
output_grad_data
[
index
]
*
input_data
[
offset
];
input_height
+
T
*
addr
=
addr_offset
+
(
h_in
-
h_in_start
)
*
filter_width
+
image_hk
)
*
(
w_in
-
w_in_start
);
input_width
+
paddle
::
platform
::
CudaAtomicAdd
(
addr
,
diff_temp
);
image_wk
];
#undef gaid
}
}
}
}
}
}
#if __CUDA_ARCH__ >= 530
s
=
warpReduceSum
<
T
>
(
s
);
if
(
lid
==
0
)
paddle
::
platform
::
CudaAtomicAdd
(
&
filter_grad_data
[
gbid
],
s
);
#else
paddle
::
platform
::
CudaAtomicAdd
(
&
filter_grad_data
[
gbid
],
s
);
#endif
}
template
<
typename
T
,
int
c_filter_multiplier
>
__global__
void
KernelDepthwiseConvFilterGradSp
(
const
T
*
output_grad_data
,
const
T
*
input_data
,
const
int
num
,
const
int
output_channels
,
const
int
output_height
,
const
int
output_width
,
const
int
input_channels
,
const
int
input_height
,
const
int
input_width
,
const
int
filter_multiplier
,
const
int
filter_height
,
const
int
filter_width
,
const
int
stride_height
,
const
int
stride_width
,
const
int
padding_height
,
const
int
padding_width
,
const
int
dilate_height
,
const
int
dilate_width
,
T
*
filter_grad_data
)
{
if
(
c_filter_multiplier
==
0
)
KernelDepthwiseConvFilterGrad
<
T
>
(
output_grad_data
,
input_data
,
num
,
output_channels
,
output_height
,
output_width
,
input_channels
,
input_height
,
input_width
,
filter_multiplier
,
filter_height
,
filter_width
,
stride_height
,
stride_width
,
padding_height
,
padding_width
,
dilate_height
,
dilate_width
,
filter_grad_data
);
else
KernelDepthwiseConvFilterGrad
<
T
>
(
output_grad_data
,
input_data
,
num
,
output_channels
,
output_height
,
output_width
,
input_channels
,
input_height
,
input_width
,
c_filter_multiplier
,
filter_height
,
filter_width
,
stride_height
,
stride_width
,
padding_height
,
padding_width
,
dilate_height
,
dilate_width
,
filter_grad_data
);
}
}
/*
/*
...
@@ -177,7 +297,9 @@ class DepthwiseConvFunctor<platform::CUDADeviceContext, T> {
...
@@ -177,7 +297,9 @@ class DepthwiseConvFunctor<platform::CUDADeviceContext, T> {
const
framework
::
Tensor
&
input
,
const
framework
::
Tensor
&
input
,
const
framework
::
Tensor
&
filter
,
const
framework
::
Tensor
&
filter
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
framework
::
Tensor
*
output
)
{
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
dilations
,
framework
::
Tensor
*
output
)
{
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_height
=
input
.
dims
()[
2
];
const
int
input_height
=
input
.
dims
()[
2
];
...
@@ -191,22 +313,37 @@ class DepthwiseConvFunctor<platform::CUDADeviceContext, T> {
...
@@ -191,22 +313,37 @@ class DepthwiseConvFunctor<platform::CUDADeviceContext, T> {
const
int
stride_width
=
strides
[
1
];
const
int
stride_width
=
strides
[
1
];
const
int
padding_height
=
paddings
[
0
];
const
int
padding_height
=
paddings
[
0
];
const
int
padding_width
=
paddings
[
1
];
const
int
padding_width
=
paddings
[
1
];
const
int
dilate_height
=
dilations
[
0
];
const
int
dilate_width
=
dilations
[
1
];
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
filter_data
=
filter
.
data
<
T
>
();
const
T
*
filter_data
=
filter
.
data
<
T
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
output_data
=
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
int
nthreads
=
batch_size
*
output_channels
*
output_height
*
output_width
;
int
thread
=
512
;
int
blocks
=
(
nthreads
+
1024
-
1
)
/
1024
;
int
blocks
=
std
::
min
(
std
::
max
(
thread
/
output_width
,
1
),
output_height
);
dim3
threads
(
1024
,
1
);
dim3
threads
(
std
::
min
(
output_width
,
thread
),
blocks
,
1
);
dim3
grid
(
blocks
,
1
);
dim3
grid
(
output_channels
,
batch_size
,
1
);
int
filter_multiplier
=
output_channels
/
input_channels
;
KernelDepthwiseConv
<
T
><<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
#define check_case(c_filter_multiplier, c_stride) \
nthreads
,
input_data
,
filter_data
,
batch_size
,
output_channels
,
if (c_filter_multiplier == 0 || \
output_height
,
output_width
,
input_channels
,
input_height
,
input_width
,
filter_multiplier == c_filter_multiplier && \
output_channels
/
input_channels
,
ksize_height
,
ksize_width
,
stride_height == stride_width && stride_height == c_stride) { \
stride_height
,
stride_width
,
padding_height
,
padding_width
,
KernelDepthwiseConvSp<T, c_filter_multiplier, \
output_data
);
c_stride><<<grid, threads, 0, context.stream()>>>( \
input_data, filter_data, batch_size, output_channels, output_height, \
output_width, input_channels, input_height, input_width, \
filter_multiplier, ksize_height, ksize_width, stride_height, \
stride_width, padding_height, padding_width, dilate_height, \
dilate_width, output_data); \
return; \
}
check_case
(
1
,
1
);
check_case
(
1
,
2
);
// NOTE(liangdun): 0,0 for other case
// add other case if needed, e.g. check_case(2^n,1)
check_case
(
0
,
0
);
#undef check_case
}
}
};
};
...
@@ -219,6 +356,7 @@ class DepthwiseConvInputGradFunctor<platform::CUDADeviceContext, T> {
...
@@ -219,6 +356,7 @@ class DepthwiseConvInputGradFunctor<platform::CUDADeviceContext, T> {
const
framework
::
Tensor
&
output_grad
,
const
framework
::
Tensor
&
output_grad
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
dilations
,
framework
::
Tensor
*
input_grad
)
{
framework
::
Tensor
*
input_grad
)
{
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_channels
=
input
.
dims
()[
1
];
...
@@ -233,22 +371,39 @@ class DepthwiseConvInputGradFunctor<platform::CUDADeviceContext, T> {
...
@@ -233,22 +371,39 @@ class DepthwiseConvInputGradFunctor<platform::CUDADeviceContext, T> {
const
int
stride_width
=
strides
[
1
];
const
int
stride_width
=
strides
[
1
];
const
int
padding_height
=
paddings
[
0
];
const
int
padding_height
=
paddings
[
0
];
const
int
padding_width
=
paddings
[
1
];
const
int
padding_width
=
paddings
[
1
];
const
int
dilate_height
=
dilations
[
0
];
const
int
dilate_width
=
dilations
[
1
];
const
T
*
filter_data
=
filter
.
data
<
T
>
();
const
T
*
filter_data
=
filter
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
int
nthreads
=
batch_size
*
input_channels
*
input_height
*
input_width
;
int
thread
=
512
;
int
blocks
=
(
nthreads
+
1024
-
1
)
/
1024
;
int
blocks
=
std
::
min
(
std
::
max
(
thread
/
input_width
,
1
),
input_height
);
dim3
threads
(
1024
,
1
);
dim3
threads
(
std
::
min
(
input_width
,
thread
),
blocks
,
1
);
dim3
grid
(
blocks
,
1
);
dim3
grid
(
input_channels
,
batch_size
,
1
);
int
filter_multiplier
=
output_channels
/
input_channels
;
KernelDepthwiseConvInputGrad
<
T
><<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
nthreads
,
output_grad_data
,
filter_data
,
batch_size
,
output_channels
,
#define check_case(c_filter_multiplier, c_stride) \
output_height
,
output_width
,
input_channels
,
input_height
,
input_width
,
if (c_filter_multiplier == 0 || \
output_channels
/
input_channels
,
ksize_height
,
ksize_width
,
filter_multiplier == c_filter_multiplier && \
stride_height
,
stride_width
,
padding_height
,
padding_width
,
stride_height == stride_width && stride_height == c_stride) { \
input_grad_data
);
KernelDepthwiseConvInputGradSp< \
T, c_filter_multiplier, \
c_stride><<<grid, threads, 0, context.stream()>>>( \
output_grad_data, filter_data, batch_size, output_channels, \
output_height, output_width, input_channels, input_height, \
input_width, filter_multiplier, ksize_height, ksize_width, \
stride_height, stride_width, padding_height, padding_width, \
dilate_height, dilate_width, input_grad_data); \
return; \
}
check_case
(
1
,
1
);
check_case
(
1
,
2
);
// NOTE(liangdun): 0,0 for other case
// add other case if needed, e.g. check_case(2^n,1)
check_case
(
0
,
0
);
#undef check_case
}
}
};
};
...
@@ -260,6 +415,7 @@ class DepthwiseConvFilterGradFunctor<platform::CUDADeviceContext, T> {
...
@@ -260,6 +415,7 @@ class DepthwiseConvFilterGradFunctor<platform::CUDADeviceContext, T> {
const
framework
::
Tensor
&
output_grad
,
const
framework
::
Tensor
&
output_grad
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
dilations
,
framework
::
Tensor
*
filter_grad
)
{
framework
::
Tensor
*
filter_grad
)
{
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
batch_size
=
input
.
dims
()[
0
];
const
int
input_channels
=
input
.
dims
()[
1
];
const
int
input_channels
=
input
.
dims
()[
1
];
...
@@ -274,23 +430,34 @@ class DepthwiseConvFilterGradFunctor<platform::CUDADeviceContext, T> {
...
@@ -274,23 +430,34 @@ class DepthwiseConvFilterGradFunctor<platform::CUDADeviceContext, T> {
const
int
stride_width
=
strides
[
1
];
const
int
stride_width
=
strides
[
1
];
const
int
padding_height
=
paddings
[
0
];
const
int
padding_height
=
paddings
[
0
];
const
int
padding_width
=
paddings
[
1
];
const
int
padding_width
=
paddings
[
1
];
const
int
dilate_height
=
dilations
[
0
];
const
int
dilate_width
=
dilations
[
1
];
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
input_data
=
input
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
const
T
*
output_grad_data
=
output_grad
.
data
<
T
>
();
T
*
filter_grad_data
=
filter_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
filter_grad_data
=
filter_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
int
nthreads
=
batch_size
*
output_channels
*
output_height
*
output_width
;
int
block_size
=
512
;
int
crop_output_height
=
int
blocks
=
(
nthreads
+
1024
-
1
)
/
1024
;
std
::
min
(
std
::
max
(
block_size
/
output_width
,
1
),
output_height
);
dim3
threads
(
1024
,
1
);
dim3
grid
(
ksize_width
,
ksize_height
,
output_channels
);
dim3
grid
(
blocks
,
1
);
dim3
threads
(
std
::
min
(
output_width
,
block_size
),
crop_output_height
,
1
);
int
filter_multiplier
=
output_channels
/
input_channels
;
KernelDepthwiseConvFilterGrad
<
T
><<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
nthreads
,
output_grad_data
,
input_data
,
batch_size
,
output_channels
,
#define check_case(c_filter_multiplier) \
output_height
,
output_width
,
input_channels
,
input_height
,
input_width
,
if (c_filter_multiplier == 0 || c_filter_multiplier == filter_multiplier) { \
output_channels
/
input_channels
,
ksize_height
,
ksize_width
,
KernelDepthwiseConvFilterGradSp< \
stride_height
,
stride_width
,
padding_height
,
padding_width
,
T, c_filter_multiplier><<<grid, threads, 0, context.stream()>>>( \
filter_grad_data
);
output_grad_data, input_data, batch_size, output_channels, \
output_height, output_width, input_channels, input_height, \
input_width, filter_multiplier, ksize_height, ksize_width, \
stride_height, stride_width, padding_height, padding_width, \
dilate_height, dilate_width, filter_grad_data); \
return; \
}
check_case
(
1
);
check_case
(
0
);
#undef check_case
}
}
};
};
...
...
paddle/fluid/operators/math/depthwise_conv.h
浏览文件 @
91756a5a
...
@@ -32,7 +32,8 @@ class DepthwiseConvFunctor {
...
@@ -32,7 +32,8 @@ class DepthwiseConvFunctor {
void
operator
()(
const
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
void
operator
()(
const
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
const
framework
::
Tensor
&
filter
,
const
framework
::
Tensor
&
filter
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
framework
::
Tensor
*
output
);
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
dilations
,
framework
::
Tensor
*
output
);
};
};
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
...
@@ -43,6 +44,7 @@ class DepthwiseConvInputGradFunctor {
...
@@ -43,6 +44,7 @@ class DepthwiseConvInputGradFunctor {
const
framework
::
Tensor
&
output_grad
,
const
framework
::
Tensor
&
output_grad
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
dilations
,
framework
::
Tensor
*
input_grad
);
framework
::
Tensor
*
input_grad
);
};
};
...
@@ -53,6 +55,7 @@ class DepthwiseConvFilterGradFunctor {
...
@@ -53,6 +55,7 @@ class DepthwiseConvFilterGradFunctor {
const
framework
::
Tensor
&
output_grad
,
const
framework
::
Tensor
&
output_grad
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
dilations
,
framework
::
Tensor
*
filter_grad
);
framework
::
Tensor
*
filter_grad
);
};
};
...
...
paddle/fluid/operators/math/math_function.cc
浏览文件 @
91756a5a
...
@@ -13,6 +13,15 @@ See the License for the specific language governing permissions and
...
@@ -13,6 +13,15 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/math_function.h"
#ifdef PADDLE_WITH_MKLML
#include "paddle/fluid/platform/dynload/mklml.h"
#endif
#ifdef PADDLE_USE_OPENBLAS
#include <cblas.h>
#endif
#include <vector>
#include <vector>
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/operators/math/math_function_impl.h"
#include "paddle/fluid/operators/math/math_function_impl.h"
...
...
paddle/fluid/operators/math/math_function.h
浏览文件 @
91756a5a
...
@@ -13,18 +13,6 @@ See the License for the specific language governing permissions and
...
@@ -13,18 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#pragma once
#ifdef PADDLE_WITH_MKLML
#include "paddle/fluid/platform/dynload/mklml.h"
#endif
#ifdef PADDLE_USE_OPENBLAS
#include <cblas.h>
// remove typedef in openblas
#undef FLOAT
#undef INT
#undef SIZE
#endif
#include <cmath>
#include <cmath>
#include <vector>
#include <vector>
...
...
paddle/fluid/operators/reduce_mean_op.cu
浏览文件 @
91756a5a
...
@@ -12,17 +12,64 @@
...
@@ -12,17 +12,64 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include <vector>
#include "paddle/fluid/operators/cub_reduce.h"
#include "paddle/fluid/operators/reduce_mean_op.h"
#include "paddle/fluid/operators/reduce_mean_op.h"
REGISTER_OP_CUDA_KERNEL
(
reduce_mean
,
namespace
paddle
{
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
namespace
operators
{
float
,
ops
::
MeanFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
template
<
typename
T
>
double
,
ops
::
MeanFunctor
>
,
struct
DivideFunctor
{
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
HOSTDEVICE
explicit
inline
DivideFunctor
(
int
n
)
:
n_inv
((
T
)(
1.0
/
n
))
{}
int
,
ops
::
MeanFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
HOSTDEVICE
inline
T
operator
()(
const
T
&
x
)
const
{
return
x
*
n_inv
;
}
int64_t
,
ops
::
MeanFunctor
>
);
private:
T
n_inv
;
};
template
<
typename
T
>
class
ReduceMeanKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
bool
reduce_all
=
context
.
Attr
<
bool
>
(
"reduce_all"
);
auto
*
input
=
context
.
Input
<
Tensor
>
(
"X"
);
auto
*
output
=
context
.
Output
<
Tensor
>
(
"Out"
);
auto
dims
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"dim"
);
bool
keep_dim
=
context
.
Attr
<
bool
>
(
"keep_dim"
);
std
::
vector
<
int
>
reduce_dims
;
if
(
reduce_all
)
{
reduce_dims
.
resize
(
input
->
dims
().
size
());
for
(
int
i
=
0
;
i
<
reduce_dims
.
size
();
++
i
)
reduce_dims
[
i
]
=
i
;
}
else
{
for
(
auto
e
:
dims
)
{
reduce_dims
.
push_back
(
e
>=
0
?
e
:
e
+
input
->
dims
().
size
());
}
}
int
reduce_num
=
1
;
for
(
int
i
=
0
;
i
<
reduce_dims
.
size
();
++
i
)
{
reduce_num
*=
input
->
dims
()[
reduce_dims
[
i
]];
}
auto
stream
=
context
.
cuda_device_context
().
stream
();
TensorReduce
<
T
,
T
,
cub
::
Sum
,
DivideFunctor
<
T
>>
(
*
input
,
output
,
reduce_dims
,
static_cast
<
T
>
(
0
),
cub
::
Sum
(),
DivideFunctor
<
T
>
(
reduce_num
),
stream
);
}
};
}
// namespace operators
}
// namespace paddle
REGISTER_OP_CUDA_KERNEL
(
reduce_mean
,
ops
::
ReduceMeanKernel
<
float
>
,
ops
::
ReduceMeanKernel
<
double
>
,
ops
::
ReduceMeanKernel
<
int
>
,
ops
::
ReduceMeanKernel
<
int64_t
>
);
REGISTER_OP_CUDA_KERNEL
(
REGISTER_OP_CUDA_KERNEL
(
reduce_mean_grad
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
reduce_mean_grad
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
,
ops
::
MeanGradFunctor
>
,
float
,
ops
::
MeanGradFunctor
>
,
...
...
paddle/fluid/operators/reduce_sum_op.cu
浏览文件 @
91756a5a
...
@@ -12,17 +12,59 @@
...
@@ -12,17 +12,59 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/operators/cub_reduce.h"
#include "paddle/fluid/operators/reduce_sum_op.h"
#include "paddle/fluid/operators/reduce_sum_op.h"
REGISTER_OP_CUDA_KERNEL
(
reduce_sum
,
namespace
paddle
{
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
namespace
operators
{
float
,
ops
::
SumFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
template
<
typename
T
>
double
,
ops
::
SumFunctor
>
,
struct
IdentityFunctor
{
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
HOSTDEVICE
explicit
inline
IdentityFunctor
()
{}
int
,
ops
::
SumFunctor
>
,
ops
::
ReduceKernel
<
paddle
::
platform
::
CUDADeviceContext
,
HOSTDEVICE
inline
T
operator
()(
const
T
&
x
)
const
{
return
x
;
}
int64_t
,
ops
::
SumFunctor
>
);
};
template
<
typename
T
>
class
ReduceSumKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
bool
reduce_all
=
context
.
Attr
<
bool
>
(
"reduce_all"
);
auto
*
input
=
context
.
Input
<
Tensor
>
(
"X"
);
auto
*
output
=
context
.
Output
<
Tensor
>
(
"Out"
);
auto
dims
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"dim"
);
bool
keep_dim
=
context
.
Attr
<
bool
>
(
"keep_dim"
);
std
::
vector
<
int
>
reduce_dims
;
if
(
reduce_all
)
{
reduce_dims
.
resize
(
input
->
dims
().
size
());
for
(
int
i
=
0
;
i
<
reduce_dims
.
size
();
++
i
)
reduce_dims
[
i
]
=
i
;
}
else
{
for
(
auto
e
:
dims
)
{
reduce_dims
.
push_back
(
e
>=
0
?
e
:
e
+
input
->
dims
().
size
());
}
}
int
reduce_num
=
1
;
for
(
int
i
=
0
;
i
<
reduce_dims
.
size
();
++
i
)
{
reduce_num
*=
input
->
dims
()[
reduce_dims
[
i
]];
}
auto
stream
=
context
.
cuda_device_context
().
stream
();
TensorReduce
<
T
,
T
,
cub
::
Sum
,
IdentityFunctor
<
T
>>
(
*
input
,
output
,
reduce_dims
,
static_cast
<
T
>
(
0
),
cub
::
Sum
(),
IdentityFunctor
<
T
>
(),
stream
);
}
};
}
// namespace operators
}
// namespace paddle
REGISTER_OP_CUDA_KERNEL
(
reduce_sum
,
ops
::
ReduceSumKernel
<
float
>
,
ops
::
ReduceSumKernel
<
double
>
,
ops
::
ReduceSumKernel
<
int
>
,
ops
::
ReduceSumKernel
<
int64_t
>
);
REGISTER_OP_CUDA_KERNEL
(
REGISTER_OP_CUDA_KERNEL
(
reduce_sum_grad
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
reduce_sum_grad
,
ops
::
ReduceGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
,
ops
::
SumGradFunctor
>
,
float
,
ops
::
SumGradFunctor
>
,
...
...
paddle/fluid/operators/select_op.cc
已删除
100644 → 0
浏览文件 @
c5292b18
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <memory>
#include <thread> // NOLINT
#include <vector>
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/concurrency/channel_util.h"
#include <boost/tokenizer.hpp>
namespace
paddle
{
namespace
operators
{
static
constexpr
char
kX
[]
=
"X"
;
static
constexpr
char
kCaseToExecute
[]
=
"case_to_execute"
;
static
constexpr
char
kOutputs
[]
=
"Out"
;
static
constexpr
char
kCases
[]
=
"cases"
;
static
constexpr
char
kCasesBlock
[]
=
"sub_block"
;
class
SelectOp
:
public
framework
::
OperatorBase
{
public:
SelectOp
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
framework
::
OperatorBase
(
type
,
inputs
,
outputs
,
attrs
)
{}
private:
enum
class
SelectOpCaseType
{
DEFAULT
=
0
,
SEND
=
1
,
RECEIVE
=
2
,
};
struct
SelectOpCase
{
int
caseIndex
;
SelectOpCaseType
caseType
;
std
::
string
channelName
;
std
::
string
varName
;
SelectOpCase
()
{}
SelectOpCase
(
int
caseIndex
,
SelectOpCaseType
caseType
,
std
::
string
channelName
,
std
::
string
varName
)
:
caseIndex
(
caseIndex
),
caseType
(
caseType
),
channelName
(
channelName
),
varName
(
varName
)
{}
};
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
dev_place
)
const
override
{
std
::
vector
<
std
::
string
>
casesConfigs
=
Attr
<
std
::
vector
<
std
::
string
>>
(
kCases
);
framework
::
BlockDesc
*
casesBlock
=
Attr
<
framework
::
BlockDesc
*>
(
kCasesBlock
);
framework
::
Scope
&
casesBlockScope
=
scope
.
NewScope
();
std
::
string
caseToExecuteVarName
=
Input
(
kCaseToExecute
);
framework
::
Variable
*
caseToExecuteVar
=
casesBlockScope
.
FindVar
(
caseToExecuteVarName
);
// Construct cases from "conditional_block_op"(s) in the casesBlock
std
::
vector
<
std
::
shared_ptr
<
SelectOpCase
>>
cases
=
ParseAndShuffleCases
(
&
casesConfigs
);
// Get all unique channels involved in select
std
::
set
<
framework
::
ChannelHolder
*>
channelsSet
;
for
(
auto
c
:
cases
)
{
if
(
!
c
->
channelName
.
empty
())
{
auto
channelVar
=
scope
.
FindVar
(
c
->
channelName
);
framework
::
ChannelHolder
*
ch
=
channelVar
->
GetMutable
<
framework
::
ChannelHolder
>
();
if
(
channelsSet
.
find
(
ch
)
==
channelsSet
.
end
())
{
channelsSet
.
insert
(
ch
);
}
}
}
// Order all channels by their pointer address
std
::
vector
<
framework
::
ChannelHolder
*>
channels
(
channelsSet
.
begin
(),
channelsSet
.
end
());
std
::
sort
(
channels
.
begin
(),
channels
.
end
());
// Poll all cases
int32_t
caseToExecute
=
pollCases
(
&
scope
,
&
cases
,
channels
);
// At this point, the case to execute has already been determined,
// so we can proceed with executing the cases block
framework
::
LoDTensor
*
caseToExecuteTensor
=
caseToExecuteVar
->
GetMutable
<
framework
::
LoDTensor
>
();
caseToExecuteTensor
->
data
<
int32_t
>
()[
0
]
=
caseToExecute
;
// Execute the cases block, only one case will be executed since we set the
// case_to_execute value to the index of the case we want to execute
framework
::
Executor
executor
(
dev_place
);
framework
::
ProgramDesc
*
program
=
casesBlock
->
Program
();
executor
.
Run
(
*
program
,
&
casesBlockScope
,
casesBlock
->
ID
(),
false
/*create_local_scope*/
);
}
/**
* Goes through all operators in the casesConfigs and processes
* "conditional_block" operators. These operators are mapped to our
* SelectOpCase objects. We randomize the case orders, and set the
* default case (if any exists) as the last case)
* @param casesBlock
* @return
*/
std
::
vector
<
std
::
shared_ptr
<
SelectOpCase
>>
ParseAndShuffleCases
(
std
::
vector
<
std
::
string
>
*
casesConfigs
)
const
{
std
::
vector
<
std
::
shared_ptr
<
SelectOpCase
>>
cases
;
std
::
shared_ptr
<
SelectOpCase
>
defaultCase
;
if
(
casesConfigs
!=
nullptr
)
{
boost
::
char_delimiters_separator
<
char
>
sep
(
false
,
","
,
""
);
for
(
std
::
vector
<
std
::
string
>::
iterator
itr
=
casesConfigs
->
begin
();
itr
<
casesConfigs
->
end
();
++
itr
)
{
std
::
string
caseConfig
=
*
itr
;
boost
::
tokenizer
<>
tokens
(
caseConfig
,
sep
);
boost
::
tokenizer
<>::
iterator
tok_iter
=
tokens
.
begin
();
PADDLE_ENFORCE
(
tok_iter
!=
tokens
.
end
(),
"Cannot get case index"
);
std
::
string
caseIndexString
=
*
tok_iter
;
int
caseIndex
=
std
::
stoi
(
caseIndexString
);
++
tok_iter
;
PADDLE_ENFORCE
(
tok_iter
!=
tokens
.
end
(),
"Cannot get case type"
);
std
::
string
caseTypeString
=
*
tok_iter
;
SelectOpCaseType
caseType
=
(
SelectOpCaseType
)
std
::
stoi
(
caseTypeString
);
std
::
string
caseChannel
;
std
::
string
caseChannelVar
;
++
tok_iter
;
if
(
caseType
!=
SelectOpCaseType
::
DEFAULT
)
{
PADDLE_ENFORCE
(
tok_iter
!=
tokens
.
end
(),
"Cannot get case channel"
);
caseChannel
=
*
tok_iter
;
++
tok_iter
;
PADDLE_ENFORCE
(
tok_iter
!=
tokens
.
end
(),
"Cannot get case channel variable"
);
caseChannelVar
=
*
tok_iter
;
}
auto
c
=
std
::
make_shared
<
SelectOpCase
>
(
caseIndex
,
caseType
,
caseChannel
,
caseChannelVar
);
if
(
caseType
==
SelectOpCaseType
::
DEFAULT
)
{
PADDLE_ENFORCE
(
defaultCase
==
nullptr
,
"Select can only contain one default case."
);
defaultCase
=
c
;
}
else
{
cases
.
push_back
(
c
);
}
}
}
// Randomly sort cases, with default case being last
std
::
random_shuffle
(
cases
.
begin
(),
cases
.
end
());
if
(
defaultCase
!=
nullptr
)
{
cases
.
push_back
(
defaultCase
);
}
return
cases
;
}
/**
* This method will recursively poll the cases and determines if any case
* condition is true.
* If none of the cases conditions are true (and there is no default case),
* then block
* the thread. The thread may be woken up by a channel operation, at which
* point we
* execute the case.
* @param scope
* @param cases
* @param channels
* @return
*/
int32_t
pollCases
(
const
framework
::
Scope
*
scope
,
std
::
vector
<
std
::
shared_ptr
<
SelectOpCase
>>
*
cases
,
std
::
vector
<
framework
::
ChannelHolder
*>
channels
)
const
{
// Lock all involved channels
lockChannels
(
channels
);
std
::
atomic
<
int
>
caseToExecute
(
-
1
);
std
::
vector
<
std
::
shared_ptr
<
SelectOpCase
>>::
iterator
it
=
cases
->
begin
();
while
(
it
!=
cases
->
end
())
{
std
::
shared_ptr
<
SelectOpCase
>
c
=
*
it
;
auto
chVar
=
scope
->
FindVar
(
c
->
channelName
);
framework
::
ChannelHolder
*
ch
=
chVar
->
GetMutable
<
framework
::
ChannelHolder
>
();
switch
(
c
->
caseType
)
{
case
SelectOpCaseType
::
SEND
:
PADDLE_ENFORCE
(
!
ch
->
IsClosed
(),
"Cannot send to a closed channel"
);
if
(
ch
->
CanSend
())
{
// We can send to channel directly, send the data to channel
// and execute case
auto
chVar
=
scope
->
FindVar
(
c
->
varName
);
concurrency
::
ChannelSend
(
ch
,
chVar
);
caseToExecute
=
c
->
caseIndex
;
}
break
;
case
SelectOpCaseType
::
RECEIVE
:
if
(
ch
->
CanReceive
())
{
// We can receive from channel directly, send the data to channel
// and execute case
auto
chVar
=
scope
->
FindVar
(
c
->
varName
);
concurrency
::
ChannelReceive
(
ch
,
chVar
);
caseToExecute
=
c
->
caseIndex
;
}
break
;
case
SelectOpCaseType
::
DEFAULT
:
caseToExecute
=
c
->
caseIndex
;
break
;
}
if
(
caseToExecute
!=
-
1
)
{
// We found a case to execute, stop looking at other case statements
break
;
}
++
it
;
}
if
(
caseToExecute
==
-
1
)
{
// None of the cases are eligible to execute, enqueue current thread
// into all the sending/receiving queue of each involved channel
std
::
atomic
<
bool
>
completed
(
false
);
std
::
recursive_mutex
mutex
;
std
::
unique_lock
<
std
::
recursive_mutex
>
lock
{
mutex
};
// std::condition_variable_any selectCond;
auto
selectCond
=
std
::
make_shared
<
std
::
condition_variable_any
>
();
std
::
recursive_mutex
callbackMutex
;
pushThreadOnChannelQueues
(
scope
,
cases
,
selectCond
,
&
caseToExecute
,
&
completed
,
&
callbackMutex
);
// TODO(thuan): Atomically unlock all channels and sleep current thread
unlockChannels
(
channels
);
selectCond
->
wait
(
lock
,
[
&
completed
]()
{
return
completed
.
load
();
});
// Select has been woken up by case operation
lockChannels
(
channels
);
removeThreadOnChannelQueues
(
scope
,
cases
);
if
(
caseToExecute
==
-
1
)
{
// Recursively poll cases, since we were woken up by a channel close
// TODO(thuan): Need to test if this is a valid case
unlockChannels
(
channels
);
return
pollCases
(
scope
,
cases
,
channels
);
}
}
// At this point, caseToExecute != -1, and we can proceed with executing
// the case block
unlockChannels
(
channels
);
return
caseToExecute
;
}
void
lockChannels
(
std
::
vector
<
framework
::
ChannelHolder
*>
chs
)
const
{
std
::
vector
<
framework
::
ChannelHolder
*>::
iterator
it
=
chs
.
begin
();
while
(
it
!=
chs
.
end
())
{
framework
::
ChannelHolder
*
ch
=
*
it
;
ch
->
Lock
();
++
it
;
}
}
void
unlockChannels
(
std
::
vector
<
framework
::
ChannelHolder
*>
chs
)
const
{
std
::
vector
<
framework
::
ChannelHolder
*>::
reverse_iterator
it
=
chs
.
rbegin
();
while
(
it
!=
chs
.
rend
())
{
framework
::
ChannelHolder
*
ch
=
*
it
;
ch
->
Unlock
();
++
it
;
}
}
void
pushThreadOnChannelQueues
(
const
framework
::
Scope
*
scope
,
std
::
vector
<
std
::
shared_ptr
<
SelectOpCase
>>
*
cases
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
rCond
,
std
::
atomic
<
int
>
*
caseToExecute
,
std
::
atomic
<
bool
>
*
completed
,
std
::
recursive_mutex
*
callbackMutex
)
const
{
std
::
vector
<
std
::
shared_ptr
<
SelectOpCase
>>::
iterator
it
=
cases
->
begin
();
while
(
it
!=
cases
->
end
())
{
std
::
shared_ptr
<
SelectOpCase
>
c
=
*
it
;
auto
chVar
=
scope
->
FindVar
(
c
->
channelName
);
framework
::
ChannelHolder
*
ch
=
chVar
->
GetMutable
<
framework
::
ChannelHolder
>
();
std
::
function
<
bool
(
framework
::
ChannelAction
channelAction
)
>
cb
=
[
&
caseToExecute
,
&
completed
,
&
callbackMutex
,
c
](
framework
::
ChannelAction
channelAction
)
{
std
::
lock_guard
<
std
::
recursive_mutex
>
lock
{
*
callbackMutex
};
bool
canProcess
=
false
;
if
(
!
(
*
completed
))
{
// If the channel wasn't closed, we set the caseToExecute index
// as this current case
if
(
channelAction
!=
framework
::
ChannelAction
::
CLOSE
)
{
*
caseToExecute
=
c
->
caseIndex
;
}
// This will allow our conditional variable to break out of wait
*
completed
=
true
;
canProcess
=
true
;
}
return
canProcess
;
};
switch
(
c
->
caseType
)
{
case
SelectOpCaseType
::
SEND
:
{
auto
chOutputVar
=
scope
->
FindVar
(
c
->
varName
);
concurrency
::
ChannelAddToSendQ
(
ch
,
this
,
chOutputVar
,
rCond
,
cb
);
break
;
}
case
SelectOpCaseType
::
RECEIVE
:
{
auto
chOutputVar
=
scope
->
FindVar
(
c
->
varName
);
concurrency
::
ChannelAddToReceiveQ
(
ch
,
this
,
chOutputVar
,
rCond
,
cb
);
break
;
}
default:
break
;
}
++
it
;
}
}
void
removeThreadOnChannelQueues
(
const
framework
::
Scope
*
scope
,
std
::
vector
<
std
::
shared_ptr
<
SelectOpCase
>>
*
cases
)
const
{
std
::
vector
<
std
::
shared_ptr
<
SelectOpCase
>>::
iterator
it
=
cases
->
begin
();
while
(
it
!=
cases
->
end
())
{
std
::
shared_ptr
<
SelectOpCase
>
c
=
*
it
;
auto
chVar
=
scope
->
FindVar
(
c
->
channelName
);
framework
::
ChannelHolder
*
ch
=
chVar
->
GetMutable
<
framework
::
ChannelHolder
>
();
switch
(
c
->
caseType
)
{
case
SelectOpCaseType
::
SEND
:
{
ch
->
RemoveFromSendQ
(
this
);
break
;
}
case
SelectOpCaseType
::
RECEIVE
:
{
ch
->
RemoveFromReceiveQ
(
this
);
break
;
}
default:
break
;
}
++
it
;
}
}
};
class
SelectOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
kX
,
"A set of variables, which are required by operators inside the "
"cases of Select Op"
)
.
AsDuplicable
();
AddInput
(
kCaseToExecute
,
"(Int) The variable the sets the index of the case to execute, "
"after evaluating the channels being sent to and received from"
)
.
AsDuplicable
();
AddOutput
(
kOutputs
,
"A set of variables, which will be assigned with values "
"generated by the operators inside the cases of Select Op."
)
.
AsDuplicable
();
AddAttr
<
std
::
vector
<
std
::
string
>>
(
kCases
,
"(String vector) Serialized list of"
"all cases in the select op. Each"
"case is serialized as: "
"'<index>,<type>,<channel>,<value>'"
"where type is 0 for default, 1 for"
"send, and 2 for receive"
"No channel and values are needed for"
"default cases."
);
AddAttr
<
framework
::
BlockDesc
*>
(
kCasesBlock
,
"The cases block inside select_op"
);
AddComment
(
R"DOC(
)DOC"
);
}
};
// TODO(thuan): Implement Gradient Operator for SELECT_OP
}
// namespace operators
}
// namespace paddle
REGISTER_OPERATOR
(
select
,
paddle
::
operators
::
SelectOp
,
paddle
::
framework
::
EmptyGradOpMaker
,
paddle
::
operators
::
SelectOpMaker
);
paddle/fluid/operators/sequence_erase_op.cc
浏览文件 @
91756a5a
...
@@ -24,9 +24,9 @@ class SequenceEraseOp : public framework::OperatorWithKernel {
...
@@ -24,9 +24,9 @@ class SequenceEraseOp : public framework::OperatorWithKernel {
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of SequenceErase
Op
should not be null."
);
"Input(X) of SequenceErase
operator
should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of SequenceErase
Op
should not be null."
);
"Output(Out) of SequenceErase
operator
should not be null."
);
auto
x_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
x_dims
=
ctx
->
GetInputDim
(
"X"
);
PADDLE_ENFORCE
(
x_dims
.
size
()
==
2
&&
x_dims
[
1
]
==
1
,
PADDLE_ENFORCE
(
x_dims
.
size
()
==
2
&&
x_dims
[
1
]
==
1
,
"Input(X) of SequenceEraseOp should be a 2-D LoDTensor "
"Input(X) of SequenceEraseOp should be a 2-D LoDTensor "
...
...
paddle/fluid/operators/tensorrt_engine_op.h
浏览文件 @
91756a5a
...
@@ -34,7 +34,7 @@ namespace operators {
...
@@ -34,7 +34,7 @@ namespace operators {
using
FluidDT
=
framework
::
proto
::
VarType_Type
;
using
FluidDT
=
framework
::
proto
::
VarType_Type
;
using
TRT_DT
=
nvinfer1
::
DataType
;
using
TRT_DT
=
nvinfer1
::
DataType
;
namespace
{
// NOLINT
namespace
{
TRT_DT
FluidDataType2TRT
(
FluidDT
type
)
{
TRT_DT
FluidDataType2TRT
(
FluidDT
type
)
{
switch
(
type
)
{
switch
(
type
)
{
...
...
paddle/fluid/operators/top_k_op.cc
浏览文件 @
91756a5a
...
@@ -30,8 +30,6 @@ class TopkOp : public framework::OperatorWithKernel {
...
@@ -30,8 +30,6 @@ class TopkOp : public framework::OperatorWithKernel {
"Output(Indices) of TopkOp should not be null."
);
"Output(Indices) of TopkOp should not be null."
);
auto
input_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
input_dims
=
ctx
->
GetInputDim
(
"X"
);
PADDLE_ENFORCE_EQ
(
input_dims
.
size
(),
2
,
"Rank of TopK op's input must be 2."
);
const
int
k
=
static_cast
<
int
>
(
ctx
->
Attrs
().
Get
<
int
>
(
"k"
));
const
int
k
=
static_cast
<
int
>
(
ctx
->
Attrs
().
Get
<
int
>
(
"k"
));
PADDLE_ENFORCE_GE
(
k
,
1
,
"k must >= 1"
);
PADDLE_ENFORCE_GE
(
k
,
1
,
"k must >= 1"
);
...
...
paddle/fluid/platform/dynload/cublas.h
浏览文件 @
91756a5a
...
@@ -55,7 +55,7 @@ extern void *cublas_dso_handle;
...
@@ -55,7 +55,7 @@ extern void *cublas_dso_handle;
struct DynLoad__##__name { \
struct DynLoad__##__name { \
template <typename... Args> \
template <typename... Args> \
inline cublasStatus_t operator()(Args... args) { \
inline cublasStatus_t operator()(Args... args) { \
return
__name(args...);
\
return
::__name(args...);
\
} \
} \
}; \
}; \
extern DynLoad__##__name __name
extern DynLoad__##__name __name
...
...
paddle/fluid/platform/dynload/cudnn.h
浏览文件 @
91756a5a
...
@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
...
@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#pragma once
#define GLOG_NO_ABBREVIATED_SEVERITIES
#define GOOGLE_GLOG_DLL_DECL
#include <glog/logging.h>
#include <cudnn.h>
#include <cudnn.h>
#include <mutex> // NOLINT
#include <mutex> // NOLINT
...
@@ -47,13 +50,13 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
...
@@ -47,13 +50,13 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
#else
#else
#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name)
\
#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name) \
struct DynLoad__##__name {
\
struct DynLoad__##__name { \
template <typename... Args>
\
template <typename... Args> \
auto operator()(Args... args) -> decltype(__name(args...)
) { \
inline cudnnStatus_t operator()(Args... args
) { \
return
__name(args...);
\
return
::__name(args...);
\
}
\
} \
};
\
}; \
extern DynLoad__##__name __name
extern DynLoad__##__name __name
#endif
#endif
...
...
paddle/fluid/platform/dynload/curand.h
浏览文件 @
91756a5a
...
@@ -44,7 +44,7 @@ extern void *curand_dso_handle;
...
@@ -44,7 +44,7 @@ extern void *curand_dso_handle;
struct DynLoad__##__name { \
struct DynLoad__##__name { \
template <typename... Args> \
template <typename... Args> \
curandStatus_t operator()(Args... args) { \
curandStatus_t operator()(Args... args) { \
return
__name(args...);
\
return
::__name(args...);
\
} \
} \
}; \
}; \
extern DynLoad__##__name __name
extern DynLoad__##__name __name
...
...
paddle/fluid/platform/dynload/dynamic_loader.cc
浏览文件 @
91756a5a
...
@@ -107,7 +107,11 @@ static inline void* GetDsoHandleFromDefaultPath(const std::string& dso_path,
...
@@ -107,7 +107,11 @@ static inline void* GetDsoHandleFromDefaultPath(const std::string& dso_path,
static
inline
void
*
GetDsoHandleFromSearchPath
(
const
std
::
string
&
search_root
,
static
inline
void
*
GetDsoHandleFromSearchPath
(
const
std
::
string
&
search_root
,
const
std
::
string
&
dso_name
,
const
std
::
string
&
dso_name
,
bool
throw_on_error
=
true
)
{
bool
throw_on_error
=
true
)
{
#if !defined(_WIN32)
int
dynload_flags
=
RTLD_LAZY
|
RTLD_LOCAL
;
int
dynload_flags
=
RTLD_LAZY
|
RTLD_LOCAL
;
#else
int
dynload_flags
=
0
;
#endif // !_WIN32
void
*
dso_handle
=
nullptr
;
void
*
dso_handle
=
nullptr
;
std
::
string
dlPath
=
dso_name
;
std
::
string
dlPath
=
dso_name
;
...
@@ -117,10 +121,15 @@ static inline void* GetDsoHandleFromSearchPath(const std::string& search_root,
...
@@ -117,10 +121,15 @@ static inline void* GetDsoHandleFromSearchPath(const std::string& search_root,
// search xxx.so from custom path
// search xxx.so from custom path
dlPath
=
join
(
search_root
,
dso_name
);
dlPath
=
join
(
search_root
,
dso_name
);
dso_handle
=
dlopen
(
dlPath
.
c_str
(),
dynload_flags
);
dso_handle
=
dlopen
(
dlPath
.
c_str
(),
dynload_flags
);
#if !defined(_WIN32)
auto
errorno
=
dlerror
();
#else
auto
errorno
=
GetLastError
();
#endif // !_WIN32
// if not found, search from default path
// if not found, search from default path
if
(
nullptr
==
dso_handle
)
{
if
(
nullptr
==
dso_handle
)
{
LOG
(
WARNING
)
<<
"Failed to find dynamic library: "
<<
dlPath
<<
" ("
LOG
(
WARNING
)
<<
"Failed to find dynamic library: "
<<
dlPath
<<
" ("
<<
dlerror
()
<<
")"
;
<<
errorno
<<
")"
;
if
(
dlPath
.
find
(
"nccl"
)
!=
std
::
string
::
npos
)
{
if
(
dlPath
.
find
(
"nccl"
)
!=
std
::
string
::
npos
)
{
std
::
cout
std
::
cout
<<
"You may need to install 'nccl2' from NVIDIA official website: "
<<
"You may need to install 'nccl2' from NVIDIA official website: "
...
@@ -139,10 +148,15 @@ static inline void* GetDsoHandleFromSearchPath(const std::string& search_root,
...
@@ -139,10 +148,15 @@ static inline void* GetDsoHandleFromSearchPath(const std::string& search_root,
"export LD_LIBRARY_PATH=...
\n
Note: After Mac OS 10.11, "
"export LD_LIBRARY_PATH=...
\n
Note: After Mac OS 10.11, "
"using the DYLD_LIBRARY_PATH is impossible unless System "
"using the DYLD_LIBRARY_PATH is impossible unless System "
"Integrity Protection (SIP) is disabled."
;
"Integrity Protection (SIP) is disabled."
;
#if !defined(_WIN32)
auto
errorno
=
dlerror
();
#else
auto
errorno
=
GetLastError
();
#endif // !_WIN32
if
(
throw_on_error
)
{
if
(
throw_on_error
)
{
PADDLE_ENFORCE
(
nullptr
!=
dso_handle
,
error_msg
,
dlPath
,
dlerror
()
);
PADDLE_ENFORCE
(
nullptr
!=
dso_handle
,
error_msg
,
dlPath
,
errorno
);
}
else
if
(
nullptr
==
dso_handle
)
{
}
else
if
(
nullptr
==
dso_handle
)
{
LOG
(
WARNING
)
<<
string
::
Sprintf
(
error_msg
,
dlPath
,
dlerror
()
);
LOG
(
WARNING
)
<<
string
::
Sprintf
(
error_msg
,
dlPath
,
errorno
);
}
}
return
dso_handle
;
return
dso_handle
;
...
...
paddle/fluid/pybind/CMakeLists.txt
浏览文件 @
91756a5a
set
(
PYBIND_DEPS pybind python proto_desc memory executor prune feed_fetch_method
)
set
(
PYBIND_DEPS pybind python proto_desc memory executor prune feed_fetch_method
pass_builder
)
set
(
PYBIND_SRCS pybind.cc exception.cc protobuf.cc const_value.cc
)
set
(
PYBIND_SRCS pybind.cc exception.cc protobuf.cc const_value.cc
)
if
(
NOT WIN32
)
if
(
NOT WIN32
)
list
(
APPEND PYBIND_DEPS parallel_executor profiler
)
list
(
APPEND PYBIND_DEPS parallel_executor profiler
)
...
...
paddle/fluid/pybind/const_value.cc
浏览文件 @
91756a5a
...
@@ -48,9 +48,6 @@ void BindConstValue(pybind11::module* m) {
...
@@ -48,9 +48,6 @@ void BindConstValue(pybind11::module* m) {
op_proto_and_checker_maker
.
def
(
op_proto_and_checker_maker
.
def
(
"kOpNameScopeAttrName"
,
"kOpNameScopeAttrName"
,
framework
::
OpProtoAndCheckerMaker
::
OpNamescopeAttrName
);
framework
::
OpProtoAndCheckerMaker
::
OpNamescopeAttrName
);
op_proto_and_checker_maker
.
def
(
"kOpCreationCallstackAttrName"
,
framework
::
OpProtoAndCheckerMaker
::
OpCreationCallstackAttrName
);
}
}
}
// namespace pybind
}
// namespace pybind
...
...
paddle/fluid/pybind/protobuf.cc
浏览文件 @
91756a5a
...
@@ -214,7 +214,6 @@ void BindVarDsec(pybind11::module *m) {
...
@@ -214,7 +214,6 @@ void BindVarDsec(pybind11::module *m) {
.
def
(
"set_shapes"
,
&
pd
::
VarDesc
::
SetShapes
)
.
def
(
"set_shapes"
,
&
pd
::
VarDesc
::
SetShapes
)
.
def
(
"set_dtype"
,
&
pd
::
VarDesc
::
SetDataType
)
.
def
(
"set_dtype"
,
&
pd
::
VarDesc
::
SetDataType
)
.
def
(
"set_dtypes"
,
&
pd
::
VarDesc
::
SetDataTypes
)
.
def
(
"set_dtypes"
,
&
pd
::
VarDesc
::
SetDataTypes
)
.
def
(
"set_capacity"
,
&
pd
::
VarDesc
::
SetCapacity
)
.
def
(
"shape"
,
&
pd
::
VarDesc
::
GetShape
,
.
def
(
"shape"
,
&
pd
::
VarDesc
::
GetShape
,
pybind11
::
return_value_policy
::
reference
)
pybind11
::
return_value_policy
::
reference
)
.
def
(
"shapes"
,
&
pd
::
VarDesc
::
GetShapes
,
.
def
(
"shapes"
,
&
pd
::
VarDesc
::
GetShapes
,
...
@@ -251,7 +250,6 @@ void BindVarDsec(pybind11::module *m) {
...
@@ -251,7 +250,6 @@ void BindVarDsec(pybind11::module *m) {
.
value
(
"STEP_SCOPES"
,
pd
::
proto
::
VarType
::
STEP_SCOPES
)
.
value
(
"STEP_SCOPES"
,
pd
::
proto
::
VarType
::
STEP_SCOPES
)
.
value
(
"LOD_RANK_TABLE"
,
pd
::
proto
::
VarType
::
LOD_RANK_TABLE
)
.
value
(
"LOD_RANK_TABLE"
,
pd
::
proto
::
VarType
::
LOD_RANK_TABLE
)
.
value
(
"LOD_TENSOR_ARRAY"
,
pd
::
proto
::
VarType
::
LOD_TENSOR_ARRAY
)
.
value
(
"LOD_TENSOR_ARRAY"
,
pd
::
proto
::
VarType
::
LOD_TENSOR_ARRAY
)
.
value
(
"CHANNEL"
,
pd
::
proto
::
VarType
::
CHANNEL
)
.
value
(
"PLACE_LIST"
,
pd
::
proto
::
VarType
::
PLACE_LIST
)
.
value
(
"PLACE_LIST"
,
pd
::
proto
::
VarType
::
PLACE_LIST
)
.
value
(
"READER"
,
pd
::
proto
::
VarType
::
READER
)
.
value
(
"READER"
,
pd
::
proto
::
VarType
::
READER
)
.
value
(
"RAW"
,
pd
::
proto
::
VarType
::
RAW
);
.
value
(
"RAW"
,
pd
::
proto
::
VarType
::
RAW
);
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
91756a5a
...
@@ -21,10 +21,10 @@ limitations under the License. */
...
@@ -21,10 +21,10 @@ limitations under the License. */
#include <utility>
#include <utility>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/ir/pass_builder.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
...
@@ -595,6 +595,29 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -595,6 +595,29 @@ All parameter, weight, gradient are variables in Paddle.
m
.
def
(
"is_profiler_enabled"
,
platform
::
IsProfileEnabled
);
m
.
def
(
"is_profiler_enabled"
,
platform
::
IsProfileEnabled
);
m
.
def
(
"reset_profiler"
,
platform
::
ResetProfiler
);
m
.
def
(
"reset_profiler"
,
platform
::
ResetProfiler
);
py
::
class_
<
ir
::
Pass
,
std
::
shared_ptr
<
ir
::
Pass
>>
pass
(
m
,
"Pass"
);
pass
.
def
(
py
::
init
())
.
def
(
"set_str"
,
[](
ir
::
Pass
&
self
,
const
std
::
string
&
name
,
const
std
::
string
&
attr
)
{
self
.
Set
<
std
::
string
>
(
name
,
new
std
::
string
(
attr
));
});
py
::
class_
<
ir
::
PassBuilder
,
std
::
shared_ptr
<
ir
::
PassBuilder
>>
pb
(
m
,
"PassBuilder"
);
pb
.
def
(
py
::
init
())
.
def
(
"append_pass"
,
[](
ir
::
PassBuilder
&
self
,
const
std
::
string
&
pass_type
)
->
std
::
shared_ptr
<
ir
::
Pass
>
{
return
self
.
AppendPass
(
pass_type
);
})
.
def
(
"all_passes"
,
[](
ir
::
PassBuilder
&
self
)
{
return
self
.
AllPasses
();
})
.
def
(
"insert_pass"
,
[](
ir
::
PassBuilder
&
self
,
size_t
idx
,
const
std
::
string
&
pass_type
)
{
return
self
.
InsertPass
(
idx
,
pass_type
);
})
.
def
(
"remove_pass"
,
[](
ir
::
PassBuilder
&
self
,
size_t
idx
)
{
self
.
RemovePass
(
idx
);
});
// -- python binds for parallel executor.
// -- python binds for parallel executor.
py
::
class_
<
ParallelExecutor
>
pe
(
m
,
"ParallelExecutor"
);
py
::
class_
<
ParallelExecutor
>
pe
(
m
,
"ParallelExecutor"
);
py
::
class_
<
ExecutionStrategy
>
exec_strategy
(
pe
,
"ExecutionStrategy"
);
py
::
class_
<
ExecutionStrategy
>
exec_strategy
(
pe
,
"ExecutionStrategy"
);
...
@@ -677,7 +700,11 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -677,7 +700,11 @@ All parameter, weight, gradient are variables in Paddle.
},
},
[](
BuildStrategy
&
self
,
bool
b
)
{
[](
BuildStrategy
&
self
,
bool
b
)
{
self
.
fuse_elewise_add_act_ops_
=
b
;
self
.
fuse_elewise_add_act_ops_
=
b
;
});
})
.
def
(
"_create_passes_from_strategy"
,
[](
BuildStrategy
&
self
)
->
std
::
shared_ptr
<
ir
::
PassBuilder
>
{
return
self
.
CreatePassesFromStrategy
();
});
pe
.
def
(
py
::
init
<
const
std
::
vector
<
platform
::
Place
>
&
,
pe
.
def
(
py
::
init
<
const
std
::
vector
<
platform
::
Place
>
&
,
const
std
::
unordered_set
<
std
::
string
>
&
,
const
std
::
unordered_set
<
std
::
string
>
&
,
...
...
paddle/fluid/string/pretty_log.h
浏览文件 @
91756a5a
...
@@ -56,13 +56,13 @@ struct Style {
...
@@ -56,13 +56,13 @@ struct Style {
};
};
template
<
typename
...
Args
>
template
<
typename
...
Args
>
static
void
PrettyLogEndl
(
const
std
::
string
&
style
,
const
char
*
fmt
,
static
void
PrettyLogEndl
(
const
std
::
string
&
style
,
const
char
*
fmt
,
const
Args
&
...
args
)
{
const
Args
&
...
args
)
{
std
::
cerr
<<
style
<<
Sprintf
(
fmt
,
args
...)
<<
reset
()
<<
std
::
endl
;
std
::
cerr
<<
style
<<
Sprintf
(
fmt
,
args
...)
<<
reset
()
<<
std
::
endl
;
}
}
template
<
typename
...
Args
>
template
<
typename
...
Args
>
static
void
PrettyLog
(
const
std
::
string
&
style
,
const
char
*
fmt
,
static
void
PrettyLog
(
const
std
::
string
&
style
,
const
char
*
fmt
,
const
Args
&
...
args
)
{
const
Args
&
...
args
)
{
std
::
cerr
<<
style
<<
Sprintf
(
fmt
,
args
...)
<<
reset
();
std
::
cerr
<<
style
<<
Sprintf
(
fmt
,
args
...)
<<
reset
();
}
}
...
...
paddle/legacy/trainer/tests/CMakeLists.txt
浏览文件 @
91756a5a
...
@@ -16,7 +16,11 @@ endfunction()
...
@@ -16,7 +16,11 @@ endfunction()
trainer_test
(
test_Compare
)
trainer_test
(
test_Compare
)
trainer_test
(
test_PyDataProviderWrapper
)
trainer_test
(
test_PyDataProviderWrapper
)
trainer_test
(
test_recurrent_machine_generation
)
trainer_test
(
test_recurrent_machine_generation
)
trainer_test
(
test_Trainer
)
if
(
NOT APPLE
)
trainer_test
(
test_Trainer
)
else
()
message
(
WARNING
"These tests has been disabled in OSX for random fail:
\n
test_Trainer"
)
endif
()
############### test_TrainerOnePass ##########################
############### test_TrainerOnePass ##########################
if
(
WITH_PYTHON
)
if
(
WITH_PYTHON
)
...
...
paddle/scripts/paddle_build.sh
浏览文件 @
91756a5a
...
@@ -395,10 +395,11 @@ EOF
...
@@ -395,10 +395,11 @@ EOF
ctest
--output-on-failure
-j
$1
ctest
--output-on-failure
-j
$1
# make install should also be test when unittest
# make install should also be test when unittest
make
install
-j
8
make
install
-j
8
pip
install
/usr/local
/opt/paddle/share/wheels/
*
.whl
pip
install
--user
${
INSTALL_PREFIX
:-
/paddle/build
}
/opt/paddle/share/wheels/
*
.whl
if
[[
${
WITH_FLUID_ONLY
:-
OFF
}
==
"OFF"
]]
;
then
if
[[
${
WITH_FLUID_ONLY
:-
OFF
}
==
"OFF"
]]
;
then
paddle version
paddle version
fi
fi
pip uninstall
-y
paddlepaddle
fi
fi
}
}
...
@@ -654,11 +655,21 @@ function gen_fluid_inference_lib() {
...
@@ -654,11 +655,21 @@ function gen_fluid_inference_lib() {
if
[[
${
WITH_C_API
:-
OFF
}
==
"OFF"
&&
${
WITH_INFERENCE
:-
ON
}
==
"ON"
]]
;
then
if
[[
${
WITH_C_API
:-
OFF
}
==
"OFF"
&&
${
WITH_INFERENCE
:-
ON
}
==
"ON"
]]
;
then
cat
<<
EOF
cat
<<
EOF
========================================
========================================
Deploy
ing fluid inference library ...
Generat
ing fluid inference library ...
========================================
========================================
EOF
EOF
cmake ..
-DWITH_DISTRIBUTE
=
OFF
cmake ..
-DWITH_DISTRIBUTE
=
OFF
make
-j
`
nproc
`
inference_lib_dist
make
-j
`
nproc
`
inference_lib_dist
fi
}
function
tar_fluid_inference_lib
()
{
if
[[
${
WITH_C_API
:-
OFF
}
==
"OFF"
&&
${
WITH_INFERENCE
:-
ON
}
==
"ON"
]]
;
then
cat
<<
EOF
========================================
Taring fluid inference library ...
========================================
EOF
cd
${
PADDLE_ROOT
}
/build
cd
${
PADDLE_ROOT
}
/build
cp
-r
fluid_install_dir fluid
cp
-r
fluid_install_dir fluid
tar
-czf
fluid.tgz fluid
tar
-czf
fluid.tgz fluid
...
@@ -673,7 +684,7 @@ function test_fluid_inference_lib() {
...
@@ -673,7 +684,7 @@ function test_fluid_inference_lib() {
========================================
========================================
EOF
EOF
cd
${
PADDLE_ROOT
}
/paddle/fluid/inference/api/demo_ci
cd
${
PADDLE_ROOT
}
/paddle/fluid/inference/api/demo_ci
./run.sh
${
PADDLE_ROOT
}
${
WITH_MKL
:-
ON
}
${
WITH_GPU
:-
OFF
}
./run.sh
${
PADDLE_ROOT
}
${
WITH_MKL
:-
ON
}
${
WITH_GPU
:-
OFF
}
${
INFERENCE_DEMO_INSTALL_DIR
}
./clean.sh
./clean.sh
fi
fi
}
}
...
@@ -722,6 +733,7 @@ function main() {
...
@@ -722,6 +733,7 @@ function main() {
fluid_inference_lib
)
fluid_inference_lib
)
cmake_gen
${
PYTHON_ABI
:-
""
}
cmake_gen
${
PYTHON_ABI
:-
""
}
gen_fluid_inference_lib
gen_fluid_inference_lib
tar_fluid_inference_lib
test_fluid_inference_lib
test_fluid_inference_lib
;;
;;
check_style
)
check_style
)
...
@@ -742,11 +754,15 @@ function main() {
...
@@ -742,11 +754,15 @@ function main() {
build_mac
build_mac
run_mac_test
${
PROC_RUN
:-
1
}
run_mac_test
${
PROC_RUN
:-
1
}
;;
;;
macbuild
)
cmake_gen
${
PYTHON_ABI
:-
""
}
build_mac
;;
cicheck_py35
)
cicheck_py35
)
cmake_gen
${
PYTHON_ABI
:-
""
}
cmake_gen
${
PYTHON_ABI
:-
""
}
build
build
run_test
run_test
assert_api_not_changed
assert_api_not_changed
${
PYTHON_ABI
:-
""
}
;;
;;
*
)
*
)
print_usage
print_usage
...
...
python/CMakeLists.txt
浏览文件 @
91756a5a
...
@@ -87,6 +87,7 @@ if (WITH_TESTING)
...
@@ -87,6 +87,7 @@ if (WITH_TESTING)
endif
()
endif
()
endif
()
endif
()
add_subdirectory
(
paddle/fluid/tests
)
add_subdirectory
(
paddle/fluid/tests
)
add_subdirectory
(
paddle/fluid/contrib/tests
)
endif
()
endif
()
install
(
DIRECTORY
${
PADDLE_PYTHON_PACKAGE_DIR
}
install
(
DIRECTORY
${
PADDLE_PYTHON_PACKAGE_DIR
}
DESTINATION opt/paddle/share/wheels
DESTINATION opt/paddle/share/wheels
...
...
python/paddle/fluid/clip.py
浏览文件 @
91756a5a
...
@@ -271,7 +271,8 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr):
...
@@ -271,7 +271,8 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr):
"All parameters' 'clip_norm' of a same group should be the same"
"All parameters' 'clip_norm' of a same group should be the same"
)
)
local_norm_var
=
layers
.
reduce_sum
(
input
=
layers
.
pow
(
x
=
grad
,
factor
=
2.0
))
square
=
grad
*
grad
local_norm_var
=
layers
.
cast
(
layers
.
reduce_sum
(
input
=
square
),
'float64'
)
context
[
self
.
group_name
].
append
(
local_norm_var
)
context
[
self
.
group_name
].
append
(
local_norm_var
)
self
.
context
=
context
self
.
context
=
context
...
@@ -281,6 +282,7 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr):
...
@@ -281,6 +282,7 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr):
if
group_scale_name
not
in
self
.
context
:
if
group_scale_name
not
in
self
.
context
:
group_norm_var
=
layers
.
sums
(
input
=
self
.
context
[
self
.
group_name
])
group_norm_var
=
layers
.
sums
(
input
=
self
.
context
[
self
.
group_name
])
group_norm_var
=
layers
.
sqrt
(
x
=
group_norm_var
)
group_norm_var
=
layers
.
sqrt
(
x
=
group_norm_var
)
group_norm_var
=
layers
.
cast
(
group_norm_var
,
'float32'
)
clip_var
=
self
.
context
[
self
.
group_name
+
"_clip"
]
clip_var
=
self
.
context
[
self
.
group_name
+
"_clip"
]
group_scale_var
=
layers
.
elementwise_div
(
group_scale_var
=
layers
.
elementwise_div
(
x
=
clip_var
,
x
=
clip_var
,
...
...
python/paddle/fluid/concurrency.py
已删除
100644 → 0
浏览文件 @
c5292b18
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
from
.layers.control_flow
import
BlockGuard
,
equal
from
.framework
import
Operator
from
.layer_helper
import
LayerHelper
,
unique_name
from
.layers
import
fill_constant
from
.
import
core
__all__
=
[
'make_channel'
,
'channel_send'
,
'channel_recv'
,
'channel_close'
,
'Select'
]
class
Go
(
BlockGuard
):
def
__init__
(
self
,
name
=
None
):
self
.
helper
=
LayerHelper
(
"go"
,
name
=
name
)
super
(
Go
,
self
).
__init__
(
self
.
helper
.
main_program
)
def
__enter__
(
self
):
super
(
Go
,
self
).
__enter__
()
def
__exit__
(
self
,
exc_type
,
exc_val
,
exc_tb
):
if
exc_type
is
not
None
:
return
False
self
.
_construct_go_op
()
return
super
(
Go
,
self
).
__exit__
(
exc_type
,
exc_val
,
exc_tb
)
def
_construct_go_op
(
self
):
main_program
=
self
.
helper
.
main_program
go_block
=
main_program
.
current_block
()
parent_block
=
main_program
.
block
(
main_program
.
current_block
()
.
parent_idx
)
inner_outputs
=
set
()
x_name_list
=
set
()
for
op
in
go_block
.
ops
:
# Iterate over all operators, get all the inputs
# and add as input to the Go operator.
for
iname
in
op
.
input_names
:
for
in_var_name
in
op
.
input
(
iname
):
if
in_var_name
not
in
inner_outputs
:
x_name_list
.
add
(
in_var_name
)
for
oname
in
op
.
output_names
:
for
out_var_name
in
op
.
output
(
oname
):
inner_outputs
.
add
(
out_var_name
)
# Iterate over all operators , get all the outputs
# add to the output list of Go operator only if
# they exist in the parent block.
out_vars
=
[]
for
inner_out_name
in
inner_outputs
:
if
inner_out_name
in
parent_block
.
vars
:
out_vars
.
append
(
parent_block
.
var
(
inner_out_name
))
parent_block
.
append_op
(
type
=
'go'
,
inputs
=
{
'X'
:
[
parent_block
.
_var_recursive
(
x_name
)
for
x_name
in
x_name_list
]
},
outputs
=
{},
attrs
=
{
'sub_block'
:
go_block
})
class
SelectCase
(
object
):
DEFAULT
=
0
SEND
=
1
RECEIVE
=
2
def
__init__
(
self
,
select
,
case_idx
,
case_to_execute
,
channel_action_fn
=
None
,
channel
=
None
,
value
=
None
,
is_copy
=
False
):
self
.
select
=
select
self
.
helper
=
LayerHelper
(
'conditional_block'
)
self
.
main_program
=
self
.
helper
.
main_program
self
.
is_scalar_condition
=
True
self
.
case_to_execute
=
case_to_execute
self
.
idx
=
case_idx
# Since we aren't going to use the `channel_send` or `channel_recv`
# functions directly, we just need to capture the name.
self
.
action
=
(
self
.
SEND
if
channel_action_fn
.
__name__
==
(
'channel_send'
)
else
self
.
RECEIVE
)
if
channel_action_fn
else
self
.
DEFAULT
X
=
value
if
self
.
action
==
self
.
SEND
and
is_copy
:
# We create of copy of the data we want to send
copied_X
=
self
.
select
.
parent_block
.
create_var
(
name
=
unique_name
.
generate
(
value
.
name
+
'_copy'
),
type
=
value
.
type
,
dtype
=
value
.
dtype
,
shape
=
value
.
shape
,
lod_level
=
value
.
lod_level
,
capacity
=
value
.
capacity
if
hasattr
(
value
,
'capacity'
)
else
None
,
)
self
.
select
.
parent_block
.
append_op
(
type
=
"assign"
,
inputs
=
{
"X"
:
value
},
outputs
=
{
"Out"
:
copied_X
})
X
=
copied_X
self
.
value
=
X
self
.
channel
=
channel
def
__enter__
(
self
):
self
.
block
=
self
.
main_program
.
_create_block
()
def
construct_op
(
self
):
main_program
=
self
.
helper
.
main_program
cases_block
=
main_program
.
current_block
()
inner_outputs
=
set
()
input_set
=
set
()
params
=
set
()
for
op
in
self
.
block
.
ops
:
# Iterate over all operators, get all the inputs
# and add as input to the SelectCase operator.
for
iname
in
op
.
input_names
:
for
in_var_name
in
op
.
input
(
iname
):
if
in_var_name
not
in
inner_outputs
:
input_set
.
add
(
in_var_name
)
for
oname
in
op
.
output_names
:
for
out_var_name
in
op
.
output
(
oname
):
inner_outputs
.
add
(
out_var_name
)
param_list
=
[
cases_block
.
var
(
each_name
)
for
each_name
in
params
if
each_name
not
in
input_set
]
# Iterate over all operators, get all the outputs
# add to the output list of SelectCase operator only if
# they exist in the parent block.
out_vars
=
[]
for
inner_out_name
in
inner_outputs
:
if
inner_out_name
in
cases_block
.
vars
:
out_vars
.
append
(
cases_block
.
var
(
inner_out_name
))
# First, create an op that will determine whether or not this is the
# conditional variable to execute.
should_execute_block
=
equal
(
fill_constant
(
shape
=
[
1
],
dtype
=
core
.
VarDesc
.
VarType
.
INT32
,
value
=
self
.
idx
),
self
.
case_to_execute
)
step_scope
=
cases_block
.
create_var
(
type
=
core
.
VarDesc
.
VarType
.
STEP_SCOPES
)
cases_block
.
append_op
(
type
=
'conditional_block'
,
inputs
=
{
'X'
:
[
should_execute_block
],
'Params'
:
param_list
},
outputs
=
{
'Out'
:
out_vars
,
'Scope'
:
[
step_scope
]},
attrs
=
{
'sub_block'
:
self
.
block
,
'is_scalar_condition'
:
self
.
is_scalar_condition
})
return
'%s,%s,%s,%s'
%
(
self
.
idx
,
self
.
action
,
self
.
channel
.
name
if
self
.
channel
else
''
,
self
.
value
.
name
if
self
.
value
else
''
)
def
__exit__
(
self
,
exc_type
,
exc_val
,
exc_tb
):
self
.
main_program
.
_rollback
()
if
exc_type
is
not
None
:
return
False
# re-raise exception
return
True
class
Select
(
BlockGuard
):
def
__init__
(
self
,
name
=
None
):
self
.
helper
=
LayerHelper
(
'select'
,
name
=
name
)
self
.
parent_block
=
self
.
helper
.
main_program
.
current_block
()
self
.
cases
=
[]
super
(
Select
,
self
).
__init__
(
self
.
helper
.
main_program
)
self
.
case_to_execute
=
fill_constant
(
shape
=
[
1
],
dtype
=
core
.
VarDesc
.
VarType
.
INT32
,
value
=-
1
)
def
__enter__
(
self
):
super
(
Select
,
self
).
__enter__
()
return
self
def
case
(
self
,
channel_action_fn
,
channel
,
value
,
is_copy
=
False
):
"""Create a new block for this condition.
"""
select_case
=
SelectCase
(
self
,
len
(
self
.
cases
),
self
.
case_to_execute
,
channel_action_fn
,
channel
,
value
,
is_copy
)
self
.
cases
.
append
(
select_case
)
return
select_case
def
default
(
self
):
"""Create a default case block for this condition.
"""
default_case
=
SelectCase
(
self
,
len
(
self
.
cases
),
self
.
case_to_execute
)
self
.
cases
.
append
(
default_case
)
return
default_case
def
__exit__
(
self
,
exc_type
,
exc_val
,
exc_tb
):
if
exc_type
is
not
None
:
return
False
# Create a select op and another block to wrap its
# case blocks.
select_block
=
self
.
helper
.
main_program
.
current_block
()
parent_block
=
self
.
helper
.
main_program
.
block
(
select_block
.
parent_idx
)
# Construct each case op, inside the newly created select block.
serialized_cases
=
[]
for
case
in
self
.
cases
:
serialized_cases
.
append
(
case
.
construct_op
())
intermediate
=
set
()
params
=
set
()
for
case_block
in
select_block
.
ops
:
if
case_block
.
attrs
and
'sub_block'
in
case_block
.
attrs
:
for
each_op
in
case_block
.
attrs
[
'sub_block'
].
ops
:
assert
isinstance
(
each_op
,
Operator
)
for
iname
in
each_op
.
input_names
:
for
in_var_name
in
each_op
.
input
(
iname
):
if
in_var_name
not
in
intermediate
:
params
.
add
(
in_var_name
)
for
oname
in
each_op
.
output_names
:
for
out_var_name
in
each_op
.
output
(
oname
):
intermediate
.
add
(
out_var_name
)
out_list
=
[
parent_block
.
var
(
var_name
)
for
var_name
in
parent_block
.
vars
if
var_name
in
intermediate
]
X
=
[
select_block
.
_var_recursive
(
x_name
)
for
x_name
in
params
]
# Needs to be used by `equal` inside the cases block.
X
.
append
(
self
.
case_to_execute
)
# Construct the select op.
parent_block
.
append_op
(
type
=
'select'
,
inputs
=
{
'X'
:
X
,
'case_to_execute'
:
self
.
case_to_execute
},
attrs
=
{
'sub_block'
:
select_block
,
'cases'
:
serialized_cases
},
outputs
=
{
'Out'
:
out_list
})
return
super
(
Select
,
self
).
__exit__
(
exc_type
,
exc_val
,
exc_tb
)
def
make_channel
(
dtype
,
capacity
=
0
):
"""
Helps implementation of a concurrent program by creating a "channel" of
a defined data type. Channels allow for the passing of data in
concurrent scenarios - such as when using threads to divide computation.
Channels can be used to "send" and "receive" such data concurrently.
There are two kinds of channels: unbuffered and buffered. Unbuffered
channels have no capacity - and thus, block on send and only unblock only
once what they have sent has been received.
On the other hand, buffered channels are initialized with a capacity -
and do not block on sends.
Use this method in combination with `channel_send`, `channel_recv`,
`channel_close`, and `Go` to design a concurrent Paddle program.
Args:
dtype (ParamAttr|string): Data type of the data sent in the channel.
This data type should be the string name of a numpy data type.
capacity (ParamAttr|int): Size of the channel. Defaults to 0 for
to create an unbuffered channel.
Returns:
Variable: The channel variable that can be used to send an receive data
of the defined dtype.
Examples:
.. code-block:: python
ch = fluid.make_channel(dtype='int32', capacity=10)
...
# Code to execute in a Go block, which receives the channel data.
fluid.channel_send(ch, 100)
fluid.channel_close(ch)
"""
helper
=
LayerHelper
(
'channel_create'
,
**
locals
())
main_program
=
helper
.
main_program
make_channel_block
=
main_program
.
current_block
()
# Make a channel variable (using the channel data type) and make sure it
# persists into the global scope.
channel
=
helper
.
create_variable
(
name
=
unique_name
.
generate
(
'channel'
),
type
=
core
.
VarDesc
.
VarType
.
CHANNEL
,
persistable
=
True
)
create_channel_op
=
make_channel_block
.
append_op
(
type
=
"channel_create"
,
outputs
=
{
"Out"
:
channel
},
attrs
=
{
"data_type"
:
dtype
,
"capacity"
:
capacity
})
return
channel
def
channel_send
(
channel
,
value
,
is_copy
=
False
):
"""
Sends a value through a channel variable. Used by an unbuffered or buffered
channel to pass data from within or to a concurrent Go block, where
`channel_recv` to used to get the passed value.
Args:
channel (Variable|Channel): Channel variable created using
`make_channel`.
value (Variable): Value to send to channel
is_copy (bool): Copy data while channel send. If False, then data
is moved. The input cannot be used after move. (default False)
Returns:
Variable: The boolean status on whether or not the channel
successfully sent the passed value.
Examples:
.. code-block:: python
ch = fluid.make_channel(dtype='int32', capacity=10)
...
# Code to execute in a Go block, which receives the channel data.
fluid.channel_send(ch, 100)
"""
helper
=
LayerHelper
(
'channel_send'
,
**
locals
())
main_program
=
helper
.
main_program
channel_send_block
=
main_program
.
current_block
()
X
=
value
if
is_copy
:
copied_X
=
helper
.
create_variable
(
name
=
unique_name
.
generate
(
value
.
name
+
'_copy'
),
type
=
value
.
type
,
dtype
=
value
.
dtype
,
shape
=
value
.
shape
,
lod_level
=
value
.
lod_level
,
capacity
=
value
.
capacity
if
hasattr
(
value
,
'capacity'
)
else
None
)
assign_op
=
channel_send_block
.
append_op
(
type
=
"assign"
,
inputs
=
{
"X"
:
value
},
outputs
=
{
"Out"
:
copied_X
})
X
=
copied_X
channel_send_block
.
append_op
(
type
=
"channel_send"
,
inputs
=
{
"Channel"
:
channel
,
"X"
:
X
,
})
def
channel_recv
(
channel
,
return_value
):
"""
Receives a value through a channel variable. Used by an unbuffered or
buffered channel within a concurrent Go block to get data from originally
sent using `channel_send`, or from outside such a block where
`channel_send` is used to send the value.
Args:
channel (Variable|Channel): Channel variable created using
`make_channel`.
return_value (Variable): Variable to set as a result of running channel_recv_op
Returns:
Variable: The received value from the channel.
Variable: The boolean status on whether or not the channel
successfully received the passed value.
Examples:
.. code-block:: python
ch = fluid.make_channel(dtype='int32', capacity=10)
with fluid.Go():
returned_value, return_status = fluid.channel_recv(ch, 'int32')
# Code to send data through the channel.
"""
helper
=
LayerHelper
(
'channel_recv'
,
**
locals
())
main_program
=
helper
.
main_program
channel_recv_block
=
main_program
.
current_block
()
status
=
helper
.
create_variable
(
name
=
unique_name
.
generate
(
'status'
),
type
=
core
.
VarDesc
.
VarType
.
LOD_TENSOR
,
dtype
=
core
.
VarDesc
.
VarType
.
BOOL
)
channel_recv_op
=
channel_recv_block
.
append_op
(
type
=
"channel_recv"
,
inputs
=
{
"Channel"
:
channel
},
outputs
=
{
"Out"
:
return_value
,
"Status"
:
status
})
return
return_value
,
status
def
channel_close
(
channel
):
"""
Closes a channel created using `make_channel`.
Args:
channel (Variable|Channel): Channel variable created using
`make_channel`.
Examples:
.. code-block:: python
ch = fluid.make_channel(dtype='int32', capacity=10)
...
# Code to receive and send data through a channel
...
fluid.channel_close(ch)
"""
helper
=
LayerHelper
(
'channel_close'
,
**
locals
())
main_program
=
helper
.
main_program
channel_close_block
=
main_program
.
current_block
()
channel_close_op
=
channel_close_block
.
append_op
(
type
=
"channel_close"
,
inputs
=
{
"Channel"
:
channel
})
python/paddle/fluid/contrib/__init__.py
浏览文件 @
91756a5a
...
@@ -20,8 +20,11 @@ from . import memory_usage_calc
...
@@ -20,8 +20,11 @@ from . import memory_usage_calc
from
.memory_usage_calc
import
*
from
.memory_usage_calc
import
*
from
.
import
op_frequence
from
.
import
op_frequence
from
.op_frequence
import
*
from
.op_frequence
import
*
from
.
import
quantize
from
.quantize
import
*
__all__
=
[]
__all__
=
[]
__all__
+=
decoder
.
__all__
__all__
+=
decoder
.
__all__
__all__
+=
memory_usage_calc
.
__all__
__all__
+=
memory_usage_calc
.
__all__
__all__
+=
op_frequence
.
__all__
__all__
+=
op_frequence
.
__all__
__all__
+=
quantize
.
__all__
python/paddle/fluid/
tests/notest_concurrency
.py
→
python/paddle/fluid/
contrib/quantize/__init__
.py
浏览文件 @
91756a5a
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve
d
.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# you may not use this file except in compliance with the License.
...
@@ -14,28 +14,7 @@
...
@@ -14,28 +14,7 @@
from
__future__
import
print_function
from
__future__
import
print_function
import
unittest
from
.
import
quantize_transpiler
import
paddle.fluid
as
fluid
from
.quantize_transpiler
import
*
import
paddle.fluid.core
as
core
from
paddle.fluid.executor
import
Executor
__all__
=
quantize_transpiler
.
__all__
class
TestRoutineOp
(
unittest
.
TestCase
):
def
test_simple_routine
(
self
):
ch
=
fluid
.
make_channel
(
dtype
=
core
.
VarDesc
.
VarType
.
BOOL
,
name
=
"CreateChannel"
)
with
fluid
.
Go
():
fluid
.
channel_send
(
ch
,
True
)
result
=
fluid
.
channel_recv
(
ch
)
fluid
.
channel_close
(
ch
)
cpu
=
core
.
CPUPlace
()
exe
=
Executor
(
cpu
)
outs
=
exe
.
run
(
fetch_list
=
[
result
])
self
.
assertEqual
(
outs
[
0
],
True
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/contrib/quantize/quantize_transpiler.py
0 → 100644
浏览文件 @
91756a5a
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
collections
import
numpy
as
np
from
paddle.fluid.framework
import
default_main_program
,
default_startup_program
,
program_guard
from
paddle.fluid.layer_helper
import
LayerHelper
from
paddle.fluid
import
unique_name
from
paddle.fluid
import
core
from
paddle.fluid.initializer
import
Constant
from
paddle.fluid.param_attr
import
ParamAttr
from
paddle.fluid.layer_helper
import
LayerHelper
from
paddle.fluid.layers.nn
import
autoincreased_step_counter
from
paddle.fluid.framework
import
Variable
from
paddle.fluid.executor
import
global_scope
from
paddle.fluid.transpiler.inference_transpiler
import
InferenceTranspiler
__all__
=
[
'QuantizeTranspiler'
]
_QUANTIZABLE_OP_TYPES
=
[
'conv2d'
,
'depthwise_conv2d'
,
'mul'
]
def
_quantized_var_name
(
var_name
):
"""
Return quantized variable name for the input `var_name`.
"""
return
"%s.quantized"
%
(
var_name
)
def
_dequantized_var_name
(
var_name
):
"""
Return dequantized variable name for the input `var_name`.
"""
return
"%s.dequantized"
%
(
var_name
)
def
_quantized_scale_name
(
var_name
):
"""
Return quantized variable name for the input `var_name`.
"""
return
"%s.scale"
%
(
var_name
)
def
_original_var_name
(
var_name
):
"""
Return the original variable name.
"""
if
var_name
.
endswith
(
'.quantized.dequantized'
):
return
var_name
[:
-
len
(
'.quantized.dequantized'
)]
if
var_name
.
endswith
(
'.quantized'
):
return
var_name
[:
-
len
(
'.quantized'
)]
if
var_name
.
endswith
(
'.dequantized'
):
return
var_name
[:
-
len
(
'.dequantized'
)]
if
var_name
.
endswith
(
'.scale'
):
return
var_name
[:
-
len
(
'.scale'
)]
else
:
return
var_name
def
_is_float
(
v
):
return
isinstance
(
v
,
float
)
or
isinstance
(
v
,
np
.
float32
)
def
quant
(
x
,
scale
,
num_bits
):
y
=
np
.
round
(
x
/
scale
*
((
1
<<
(
num_bits
-
1
))
-
1
))
return
y
class
QuantizeTranspiler
(
object
):
def
__init__
(
self
,
weight_bits
=
8
,
activation_bits
=
8
,
activation_quantize_type
=
'abs_max'
,
weight_quantize_type
=
'abs_max'
,
window_size
=
10000
):
"""
Convert and rewrite the fluid Program according to weight and
activation quantization type.
Args:
weight_bits (int): quantization bit number for weights,
the bias is not quantized.
activation_bits (int): quantization bit number for activation.
activation_quantize_type (str): quantization type for activation,
now support 'abs_max', 'range_abs_max'. If use 'abs_max' mode,
the quantization scale will be calculated dynamically each step
in both training and testing period. If use 'range_abs_max',
a static quantization scale will be calculated during training
and used in inference.
weight_quantize_type (str): quantization type for weights,
support 'abs_max'. The 'range_abs_max' usually is not used for
weight, since weights are fixed once the model is well trained.
window_size (int): the window size for 'range_abs_max' quantization.
Examples:
.. code-block:: python
# the original program will be rewrite, if you don't want to
# change it, please clone at first.
# quantize_program = program.clone()
t = fluid.QuantizeTranspiler()
t.transpile(quantize_program)
"""
self
.
weight_bits
=
weight_bits
self
.
activation_bits
=
activation_bits
quant_type
=
[
'abs_max'
,
'range_abs_max'
]
if
weight_quantize_type
not
in
quant_type
:
raise
ValueError
(
"Unknown weight_quantize_type: '%s'. It can only be "
,
"'abs_max' or 'range_abs_max'."
,
str
(
weight_quantize_type
))
if
activation_quantize_type
not
in
quant_type
:
raise
ValueError
(
"Unknown activation_quantize_type : '%s'. It can only be "
,
"'abs_max' or 'range_abs_max'."
,
str
(
activation_quantize_type
))
self
.
weight_quantize_type
=
weight_quantize_type
self
.
activation_quantize_type
=
activation_quantize_type
self
.
window_size
=
window_size
self
.
helper
=
LayerHelper
(
self
.
__class__
.
__name__
)
self
.
fake_quant_op_types
=
[
'fake_quantize_abs_max'
,
'fake_quantize_range_abs_max'
]
self
.
fake_dequant_op_types
=
[
'fake_dequantize_max_abs'
]
self
.
is_test
=
None
self
.
global_step
=
None
def
training_transpile
(
self
,
program
=
None
,
startup_program
=
None
):
"""Rewrites a training input program in place for simulated
quantization. Insert fake quantization and de-quantization ops into
program to simulate the error introduced by quantization. And change
the graident ops' input by using the faked quantization weights and
activation. Since the program is transformed in place, the graph
connection will change.
Args:
program (Program): the input program to be transpile.
"""
self
.
is_test
=
False
program
=
default_main_program
()
if
program
is
None
else
program
startup_program
=
default_startup_program
()
if
startup_program
is
\
None
else
startup_program
# marked the variable which has been quantized and dequantized.
dequanted_vars
=
[
collections
.
OrderedDict
()
for
_
in
range
(
len
(
program
.
blocks
))
]
grad_op_types
=
[
'%s_grad'
%
(
type
)
for
type
in
_QUANTIZABLE_OP_TYPES
]
params
=
[
p
.
name
for
p
in
program
.
global_block
().
iter_parameters
()]
def
_transpile_forward
(
block
,
op
):
idx
=
block
.
ops
.
index
(
op
)
block_id
=
block
.
idx
# insert quant op and dequant op
for
name
in
op
.
input_arg_names
:
if
name
in
dequanted_vars
[
block_id
]:
dequant_var
=
dequanted_vars
[
block_id
][
name
]
else
:
var
=
block
.
var
(
name
)
quant_bits
=
self
.
weight_bits
if
var
.
name
in
params
\
else
self
.
activation_bits
quant_type
=
self
.
weight_quantize_type
if
var
.
name
\
in
params
else
self
.
activation_quantize_type
quant_var
,
scale_var
=
self
.
_insert_quant_op
(
block
,
idx
,
var
,
quant_bits
,
quant_type
)
dequant_var
=
self
.
_insert_dequant_op
(
block
,
idx
+
1
,
quant_var
,
scale_var
,
quant_bits
)
dequanted_vars
[
block_id
][
name
]
=
dequant_var
# rename the forward op inputs
op
.
_rename_input
(
name
,
dequant_var
.
name
)
def
_transpile_backward
(
block
,
op
):
block_id
=
block
.
idx
no_dequanted_input_vars
=
True
for
name
in
op
.
input_arg_names
:
if
name
in
dequanted_vars
[
block_id
]:
dequant_var
=
dequanted_vars
[
block_id
][
name
]
op
.
_rename_input
(
name
,
dequant_var
.
name
)
no_dequanted_input_vars
=
False
if
no_dequanted_input_vars
:
raise
ValueError
(
"There is no dequanted inputs for op %s."
%
(
op
.
type
))
with
program_guard
(
program
,
startup_program
):
self
.
_create_global_step
()
for
block
in
program
.
blocks
:
ops
=
list
(
block
.
ops
)
block_id
=
block
.
idx
for
op
in
ops
:
# rewrite the forward ProgramDes
if
op
.
type
in
_QUANTIZABLE_OP_TYPES
:
_transpile_forward
(
block
,
op
)
# rename the backward op inputs
if
op
.
type
in
grad_op_types
:
_transpile_backward
(
block
,
op
)
def
_create_global_step
(
self
):
if
self
.
weight_quantize_type
==
'range_abs_max'
or
\
self
.
activation_quantize_type
==
'range_abs_max'
:
self
.
global_step
=
autoincreased_step_counter
()
def
freeze_program
(
self
,
program
,
place
,
fuse_bn
=
False
,
scope
=
None
):
"""Freeze input training program for inference.
Args:
program (Program): the input program to be transpile.
"""
self
.
is_test
=
True
scope
=
global_scope
()
if
scope
is
None
else
scope
program
=
default_main_program
()
if
program
is
None
else
program
if
fuse_bn
:
bn_fuse_transpiler
=
BNFuseTranspiler
()
bn_fuse_transpiler
.
transpile
(
program
,
place
)
persistable_vars
=
[
v
.
name
for
v
in
filter
(
lambda
var
:
var
.
persistable
,
program
.
list_vars
())
]
op_in_rename_map
=
[
collections
.
OrderedDict
()
for
_
in
range
(
len
(
program
.
blocks
))
]
op_out_rename_map
=
[
collections
.
OrderedDict
()
for
_
in
range
(
len
(
program
.
blocks
))
]
var_scale_map
=
[
collections
.
OrderedDict
()
for
_
in
range
(
len
(
program
.
blocks
))
]
def
_remove_fake_quant_and_dequant_op
(
block
,
op
):
idx
=
block
.
ops
.
index
(
op
)
block_id
=
block
.
idx
k
=
op
.
output
(
'Out'
)[
0
]
v
=
op
.
input
(
'X'
)[
0
]
if
v
not
in
op_in_rename_map
[
block_id
]:
op_in_rename_map
[
block_id
][
k
]
=
v
else
:
op_in_rename_map
[
block_id
][
k
]
=
op_in_rename_map
[
block_id
][
v
]
block
.
_remove_op
(
idx
)
def
_insert_post_dequant_op
(
block
,
op
):
idx
=
block
.
ops
.
index
(
op
)
block_id
=
block
.
idx
max_range
=
None
scale_var
=
None
for
name
in
op
.
input_arg_names
:
if
name
in
op_in_rename_map
[
block_id
]:
op
.
_rename_input
(
name
,
op_in_rename_map
[
block_id
][
name
])
scale_v
=
var_scale_map
[
block_id
][
_original_var_name
(
name
)]
if
_original_var_name
(
name
)
in
persistable_vars
:
param_range
=
(
1
<<
(
self
.
weight_bits
-
1
))
-
1
act_range
=
(
1
<<
(
self
.
activation_bits
-
1
))
-
1
assert
_is_float
(
scale_v
)
max_range
=
param_range
*
act_range
/
scale_v
else
:
assert
isinstance
(
scale_v
,
Variable
)
scale_var
=
var_scale_map
[
block_id
][
_original_var_name
(
name
)]
if
len
(
op
.
output_arg_names
)
!=
1
:
raise
ValueError
(
"Only support one output, but op %s has"
" more than one output."
%
(
op
.
type
))
out_var
=
block
.
var
(
op
.
output_arg_names
[
0
])
dequant_var
=
block
.
create_var
(
name
=
_dequantized_var_name
(
out_var
.
name
),
type
=
out_var
.
type
,
shape
=
out_var
.
shape
,
dtype
=
out_var
.
dtype
)
# insert fake_dequantize_op
dequant_op
=
block
.
_insert_op
(
idx
+
1
,
type
=
"fake_dequantize_max_abs"
,
attrs
=
{
'max_range'
:
float
(
max_range
)},
inputs
=
{
"X"
:
out_var
,
'Scale'
:
scale_var
},
outputs
=
{
"Out"
:
dequant_var
})
op_out_rename_map
[
block_id
][
out_var
.
name
]
=
dequant_var
.
name
return
dequant_var
def
_load_var
(
name
):
return
np
.
array
(
scope
.
find_var
(
name
).
get_tensor
())
def
_restore_var
(
name
,
arr
):
t
=
scope
.
find_var
(
name
).
get_tensor
()
t
.
set
(
arr
,
place
)
for
block
in
program
.
blocks
:
ops
=
list
(
block
.
ops
)
block_id
=
block
.
idx
for
op
in
ops
:
op_type
=
op
.
type
# insert dequant_op after fc/conv, need to rename
# input of the followed ops
for
name
in
op
.
input_arg_names
:
if
name
in
op_out_rename_map
[
block_id
]:
op
.
_rename_input
(
name
,
op_out_rename_map
[
block_id
][
name
])
if
op_type
in
self
.
fake_quant_op_types
:
in_arg_name
=
op
.
input
(
'X'
)[
0
]
if
in_arg_name
in
persistable_vars
:
if
self
.
weight_quantize_type
==
'abs_max'
:
param
=
_load_var
(
in_arg_name
)
scale_v
=
np
.
max
(
np
.
abs
(
param
))
else
:
scale_v
=
_load_var
(
op
.
output
(
'OutScale'
)[
0
])
var_scale_map
[
block_id
][
in_arg_name
]
=
scale_v
else
:
scale_v
=
block
.
var
(
op
.
output
(
'OutScale'
)[
0
])
var_scale_map
[
block_id
][
in_arg_name
]
=
scale_v
if
in_arg_name
in
persistable_vars
:
_remove_fake_quant_and_dequant_op
(
block
,
op
)
# quantize weight and restore
param_t
=
_load_var
(
in_arg_name
)
param_q_t
=
quant
(
param_t
,
scale_v
,
self
.
weight_bits
)
_restore_var
(
in_arg_name
,
param_q_t
)
if
op_type
in
self
.
fake_dequant_op_types
:
_remove_fake_quant_and_dequant_op
(
block
,
op
)
if
op_type
in
_QUANTIZABLE_OP_TYPES
:
dequant_var
=
_insert_post_dequant_op
(
block
,
op
)
# remove the unused var in ProgramDesc
self
.
_remove_unused_var
(
program
)
#program = program.clone()
def
convert_to_int8
(
self
,
program
,
place
,
scope
=
None
):
scope
=
global_scope
()
if
scope
is
None
else
scope
program
=
default_main_program
()
if
program
is
None
else
program
def
_load_var
(
name
):
return
np
.
array
(
scope
.
find_var
(
name
).
get_tensor
())
global_block
=
program
.
global_block
()
def
convert_to_int8
(
var
):
int8_var_name
=
var
.
name
+
".int8"
int8_var
=
global_block
.
create_parameter
(
name
=
int8_var_name
.
encode
(
'ascii'
),
type
=
var
.
type
,
dtype
=
core
.
VarDesc
.
VarType
.
INT8
,
shape
=
var
.
shape
)
tensor
=
_load_var
(
var
.
name
)
scope
.
var
(
int8_var_name
)
int8_tensor
=
scope
.
find_var
(
int8_var_name
).
get_tensor
()
int8_tensor
.
set
(
tensor
.
astype
(
np
.
int8
),
place
)
return
int8_var
input_map
=
{}
for
block
in
program
.
blocks
:
for
op
in
list
(
block
.
ops
):
if
op
.
type
in
_QUANTIZABLE_OP_TYPES
:
for
name
in
op
.
input_arg_names
:
var
=
block
.
var
(
name
)
if
var
.
persistable
:
if
name
not
in
input_map
:
int8_var
=
convert_to_int8
(
var
)
input_map
[
name
]
=
int8_var
.
name
op
.
_rename_input
(
name
,
input_map
[
name
])
self
.
_remove_unused_var
(
program
)
def
_remove_unused_var
(
self
,
program
):
all_remove_vars
=
[]
for
block
in
program
.
blocks
:
args
=
[]
for
op
in
block
.
ops
:
args
+=
op
.
input_arg_names
args
+=
op
.
output_arg_names
args
=
list
(
set
(
args
))
var_names
=
block
.
vars
.
keys
()
sub_block_remove_vars
=
[]
for
var
in
var_names
:
if
var
not
in
args
:
sub_block_remove_vars
.
append
(
var
)
all_remove_vars
.
append
(
sub_block_remove_vars
)
remove_vars
=
[
list
(
set
(
v
))
for
v
in
all_remove_vars
]
for
i
,
block
in
enumerate
(
program
.
blocks
):
for
v
in
remove_vars
[
i
]:
block
.
_remove_var
(
v
)
def
_insert_quant_abs_max_op
(
self
,
block
,
idx
,
var
,
quant_bits
):
"""Insert fake_quantize_abs_max op.
"""
quant_var
=
block
.
create_var
(
name
=
_quantized_var_name
(
var
.
name
),
type
=
var
.
type
,
shape
=
var
.
shape
,
dtype
=
var
.
dtype
)
scale
=
block
.
create_var
(
name
=
_quantized_scale_name
(
var
.
name
),
type
=
var
.
type
,
shape
=
var
.
shape
,
dtype
=
var
.
dtype
)
quant_op
=
block
.
_insert_op
(
idx
,
type
=
'fake_quantize_abs_max'
,
attrs
=
{
'bit_length'
:
quant_bits
},
inputs
=
{
'X'
:
var
},
outputs
=
{
'Out'
:
quant_var
,
'OutScale'
:
scale
})
return
quant_var
,
scale
def
_insert_quant_range_abs_max_op
(
self
,
block
,
idx
,
var
,
quant_bits
):
"""Insert fake_quantize_range_abs_max
"""
quant_var
=
block
.
create_var
(
name
=
_quantized_var_name
(
var
.
name
),
type
=
var
.
type
,
shape
=
var
.
shape
,
dtype
=
var
.
dtype
)
scale
=
self
.
helper
.
create_parameter
(
attr
=
ParamAttr
(
name
=
_quantized_scale_name
(
var
.
name
),
initializer
=
Constant
(
0.001
),
trainable
=
False
),
shape
=
[
1
],
dtype
=
var
.
dtype
)
scale
.
stop_gradient
=
True
ins
=
{
'X'
:
var
,
'InScale'
:
scale
}
outs
=
{
'Out'
:
quant_var
,
'OutScale'
:
scale
}
if
not
self
.
is_test
:
# A global step counter variable with type int64
scales
=
self
.
helper
.
create_global_variable
(
name
=
unique_name
.
generate
(
'scales'
),
persistable
=
True
,
dtype
=
var
.
dtype
,
shape
=
[
self
.
window_size
])
self
.
helper
.
set_variable_initializer
(
scales
,
initializer
=
Constant
(
value
=
0
))
ins
[
'Iter'
]
=
self
.
global_step
outs
[
'OutScales'
]
=
scales
attrs
=
{
'window_size'
:
self
.
window_size
,
'bit_length'
:
quant_bits
,
'is_test'
:
self
.
is_test
}
quant_op
=
block
.
_insert_op
(
idx
,
type
=
'fake_quantize_range_abs_max'
,
attrs
=
attrs
,
inputs
=
ins
,
outputs
=
outs
)
return
quant_var
,
scale
def
_insert_quant_op
(
self
,
block
,
idx
,
var
,
quant_bits
,
quant_type
):
"""
Insert fake_quantize_op
"""
if
quant_type
==
'abs_max'
:
return
self
.
_insert_quant_abs_max_op
(
block
,
idx
,
var
,
quant_bits
)
elif
quant_type
==
'range_abs_max'
:
return
self
.
_insert_quant_range_abs_max_op
(
block
,
idx
,
var
,
quant_bits
)
def
_insert_dequant_op
(
self
,
block
,
idx
,
var
,
scale
,
quant_bits
):
"""
Insert fake_quantize_op
"""
dequant_var
=
block
.
create_var
(
name
=
_dequantized_var_name
(
var
.
name
),
type
=
var
.
type
,
shape
=
var
.
shape
,
dtype
=
var
.
dtype
)
# insert fake_dequantize_op
max_range
=
(
1
<<
(
quant_bits
-
1
))
-
1
dequant_op
=
block
.
_insert_op
(
idx
,
type
=
"fake_dequantize_max_abs"
,
attrs
=
{
'max_range'
:
float
(
max_range
)},
inputs
=
{
"X"
:
var
,
'Scale'
:
scale
},
outputs
=
{
"Out"
:
dequant_var
})
return
dequant_var
class
BNFuseTranspiler
(
InferenceTranspiler
):
def
_fuse_param
(
self
,
current_op
,
bn_op
,
bias_op
,
with_bias
):
def
_update_param
(
op
,
param_name
,
new_param
):
var
=
self
.
block
.
vars
[
param_name
]
tensor
=
self
.
scope
.
find_var
(
param_name
).
get_tensor
()
tensor
.
set
(
np
.
array
(
new_param
),
self
.
place
)
def
_load_param
(
param_name
):
return
np
.
array
(
self
.
scope
.
find_var
(
param_name
).
get_tensor
())
bias_bn
=
_load_param
(
bn_op
.
input
(
"Bias"
)[
0
])
#Bias
scale_bn
=
_load_param
(
bn_op
.
input
(
"Scale"
)[
0
])
#Scale
mean_bn
=
_load_param
(
bn_op
.
input
(
"Mean"
)[
0
])
#Mean
var_bn
=
_load_param
(
bn_op
.
input
(
"Variance"
)[
0
])
#Variance
if
current_op
.
type
in
[
'conv2d'
,
'depthwise_conv2d'
]:
current_param
=
_load_param
(
_original_var_name
(
current_op
.
input
(
"Filter"
)[
0
]))
elif
current_op
.
type
==
'mul'
:
current_param
=
_load_param
(
_original_var_name
(
current_op
.
input
(
"Y"
)[
0
]))
std_bn
=
np
.
float32
(
np
.
sqrt
(
np
.
add
(
var_bn
,
1e-5
)))
tmp
=
np
.
float32
(
np
.
divide
(
scale_bn
,
std_bn
))
# add bias of batch_norm_op to conv2d
if
with_bias
:
bias
=
_load_param
(
bias_op
.
input
(
"Y"
))
else
:
bias
=
np
.
zeros
(
bias_bn
.
shape
)
bias
=
np
.
float32
(
np
.
add
(
np
.
multiply
(
np
.
subtract
(
bias
,
mean_bn
),
tmp
),
bias_bn
))
# re-compute weight of conv2d/fc
tmp
=
tmp
.
reshape
(
tmp
.
shape
[
0
],
-
1
)
dst_param
=
current_param
.
reshape
((
tmp
.
shape
[
0
],
-
1
))
dst_param
=
np
.
float32
(
np
.
multiply
(
dst_param
,
tmp
))
dst_param
=
dst_param
.
reshape
(
current_param
.
shape
)
# update parameters
if
current_op
.
type
in
[
'conv2d'
,
'depthwise_conv2d'
]:
_update_param
(
current_op
,
_original_var_name
(
current_op
.
input
(
"Filter"
)[
0
]),
dst_param
)
elif
current_op
.
type
==
'mul'
:
_update_param
(
current_op
,
_original_var_name
(
current_op
.
input
(
"Y"
)[
0
]),
dst_param
)
_update_param
(
bias_op
,
bias_op
.
input
(
"Y"
)[
0
],
bias
)
# collect the renamed input
self
.
input_map
[
bn_op
.
output
(
"Y"
)[
0
]]
=
bias_op
.
output
(
"Out"
)[
0
]
python/paddle/fluid/contrib/tests/CMakeLists.txt
0 → 100644
浏览文件 @
91756a5a
file
(
GLOB TEST_OPS RELATIVE
"
${
CMAKE_CURRENT_SOURCE_DIR
}
"
"test_*.py"
)
string
(
REPLACE
".py"
""
TEST_OPS
"
${
TEST_OPS
}
"
)
foreach
(
src
${
TEST_OPS
}
)
py_test
(
${
src
}
SRCS
${
src
}
.py
)
endforeach
()
python/paddle/fluid/contrib/tests/test_quantize_transpiler.py
0 → 100644
浏览文件 @
91756a5a
# copyright (c) 2018 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import
numpy
as
np
import
six
import
unittest
import
paddle
import
paddle.fluid
as
fluid
from
paddle.fluid.contrib.quantize.quantize_transpiler
import
_original_var_name
from
paddle.fluid.contrib.quantize.quantize_transpiler
import
QuantizeTranspiler
def
linear_fc
(
num
):
data
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
1
,
32
,
32
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
hidden
=
data
for
_
in
six
.
moves
.
xrange
(
num
):
hidden
=
fluid
.
layers
.
fc
(
hidden
,
size
=
128
,
act
=
'relu'
)
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
hidden
,
label
=
label
)
loss
=
fluid
.
layers
.
mean
(
loss
)
return
loss
def
residual_block
(
num
):
def
conv_bn_layer
(
input
,
ch_out
,
filter_size
,
stride
,
padding
,
act
=
'relu'
,
bias_attr
=
False
):
tmp
=
fluid
.
layers
.
conv2d
(
input
=
input
,
filter_size
=
filter_size
,
num_filters
=
ch_out
,
stride
=
stride
,
padding
=
padding
,
act
=
None
,
bias_attr
=
bias_attr
)
return
fluid
.
layers
.
batch_norm
(
input
=
tmp
,
act
=
act
)
data
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
1
,
32
,
32
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
hidden
=
data
for
_
in
six
.
moves
.
xrange
(
num
):
conv
=
conv_bn_layer
(
hidden
,
16
,
3
,
1
,
1
,
act
=
None
,
bias_attr
=
True
)
short
=
conv_bn_layer
(
hidden
,
16
,
1
,
1
,
0
,
act
=
None
)
hidden
=
fluid
.
layers
.
elementwise_add
(
x
=
conv
,
y
=
short
,
act
=
'relu'
)
fc
=
fluid
.
layers
.
fc
(
input
=
hidden
,
size
=
10
)
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
fc
,
label
=
label
)
loss
=
fluid
.
layers
.
mean
(
loss
)
return
loss
def
conv_net
(
img
,
label
):
conv_pool_1
=
fluid
.
nets
.
simple_img_conv_pool
(
input
=
img
,
filter_size
=
5
,
num_filters
=
20
,
pool_size
=
2
,
pool_stride
=
2
,
act
=
"relu"
)
conv_pool_1
=
fluid
.
layers
.
batch_norm
(
conv_pool_1
)
conv_pool_2
=
fluid
.
nets
.
simple_img_conv_pool
(
input
=
conv_pool_1
,
filter_size
=
5
,
num_filters
=
50
,
pool_size
=
2
,
pool_stride
=
2
,
act
=
"relu"
)
prediction
=
fluid
.
layers
.
fc
(
input
=
conv_pool_2
,
size
=
10
,
act
=
'softmax'
)
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
avg_loss
=
fluid
.
layers
.
mean
(
loss
)
return
avg_loss
class
TestQuantizeTranspiler
(
unittest
.
TestCase
):
def
setUp
(
self
):
# since quant_op and dequant_op is not ready, use cos and sin for test
self
.
weight_quant_op_type
=
'fake_quantize_abs_max'
self
.
dequant_op_type
=
'fake_dequantize_max_abs'
self
.
quantizable_op_and_inputs
=
{
'conv2d'
:
[
'Input'
,
'Filter'
],
'depthwise_conv2d'
:
[
'Input'
,
'Filter'
],
'mul'
:
[
'X'
,
'Y'
]
}
self
.
quantizable_op_grad_and_inputs
=
{
'conv2d_grad'
:
[
'Input'
,
'Filter'
],
'depthwise_conv2d_grad'
:
[
'Input'
,
'Filter'
],
'mul_grad'
:
[
'X'
,
'Y'
]
}
def
check_program
(
self
,
program
):
quantized_ops
=
{}
persistable_vars
=
[
v
.
name
for
v
in
filter
(
lambda
var
:
var
.
persistable
,
program
.
list_vars
())
]
for
block
in
program
.
blocks
:
for
idx
,
op
in
enumerate
(
block
.
ops
):
# check forward
if
op
.
type
in
self
.
quantizable_op_and_inputs
:
for
i
,
arg_name
in
enumerate
(
op
.
input_arg_names
):
quant_op_type
=
self
.
weight_quant_op_type
if
\
_original_var_name
(
arg_name
)
\
in
persistable_vars
else
self
.
act_quant_op_type
self
.
assertTrue
(
arg_name
.
endswith
(
'.quantized.dequantized'
))
if
arg_name
not
in
quantized_ops
:
self
.
assertEqual
(
block
.
ops
[
idx
-
2
*
i
-
1
].
type
,
self
.
dequant_op_type
)
self
.
assertEqual
(
block
.
ops
[
idx
-
2
*
i
-
2
].
type
,
quant_op_type
)
quantized_ops
[
arg_name
]
=
block
.
ops
[
idx
-
2
*
i
-
2
]
else
:
op_idx
=
block
.
ops
.
index
(
quantized_ops
[
arg_name
])
self
.
assertLess
(
op_idx
,
idx
)
# check backward
if
op
.
type
in
self
.
quantizable_op_grad_and_inputs
:
for
pname
in
self
.
quantizable_op_grad_and_inputs
[
op
.
type
]:
arg_name
=
op
.
input
(
pname
)[
0
]
self
.
assertTrue
(
arg_name
.
endswith
(
'.quantized.dequantized'
))
self
.
assertTrue
(
arg_name
in
quantized_ops
)
def
linear_fc_quant
(
self
,
quant_type
):
main
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main
,
startup
):
loss
=
linear_fc
(
3
)
opt
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
0.001
)
opt
.
minimize
(
loss
)
t
=
QuantizeTranspiler
(
activation_quantize_type
=
quant_type
)
t
.
training_transpile
(
main
)
self
.
check_program
(
main
)
def
test_linear_fc_quant_abs_max
(
self
):
self
.
act_quant_op_type
=
'fake_quantize_abs_max'
self
.
linear_fc_quant
(
'abs_max'
)
def
test_linear_fc_quant_range_abs_max
(
self
):
self
.
act_quant_op_type
=
'fake_quantize_range_abs_max'
self
.
linear_fc_quant
(
'range_abs_max'
)
def
residual_block_quant
(
self
,
quant_type
):
main
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main
,
startup
):
loss
=
residual_block
(
2
)
opt
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
0.001
)
opt
.
minimize
(
loss
)
t
=
QuantizeTranspiler
(
activation_quantize_type
=
quant_type
)
t
.
training_transpile
(
main
)
self
.
check_program
(
main
)
def
test_residual_block_abs_max
(
self
):
self
.
act_quant_op_type
=
'fake_quantize_abs_max'
self
.
residual_block_quant
(
'abs_max'
)
def
test_residual_block_range_abs_max
(
self
):
self
.
act_quant_op_type
=
'fake_quantize_range_abs_max'
self
.
residual_block_quant
(
'range_abs_max'
)
def
freeze_program
(
self
,
use_cuda
,
seed
):
def
build_program
(
main
,
startup
,
is_test
):
main
.
random_seed
=
seed
startup
.
random_seed
=
seed
with
fluid
.
unique_name
.
guard
():
with
fluid
.
program_guard
(
main
,
startup
):
img
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
1
,
28
,
28
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
loss
=
conv_net
(
img
,
label
)
if
not
is_test
:
opt
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
0.001
)
opt
.
minimize
(
loss
)
return
[
img
,
label
],
loss
main
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
test_program
=
fluid
.
Program
()
import
random
random
.
seed
(
0
)
np
.
random
.
seed
(
0
)
feeds
,
loss
=
build_program
(
main
,
startup
,
False
)
build_program
(
test_program
,
startup
,
True
)
test_program
=
test_program
.
clone
(
for_test
=
True
)
quant_transpiler
=
QuantizeTranspiler
()
quant_transpiler
.
training_transpile
(
main
)
quant_transpiler
.
training_transpile
(
test_program
)
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
iters
=
5
batch_size
=
8
class_num
=
10
exe
.
run
(
startup
)
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
mnist
.
train
(),
buf_size
=
500
),
batch_size
=
batch_size
)
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
test
(),
batch_size
=
batch_size
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
feeds
,
place
=
place
)
with
fluid
.
program_guard
(
main
):
for
_
in
range
(
iters
):
data
=
next
(
train_reader
())
loss_v
=
exe
.
run
(
program
=
main
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
loss
])
with
fluid
.
program_guard
(
test_program
):
test_data
=
next
(
test_reader
())
w_var
=
fluid
.
framework
.
_get_var
(
'conv2d_1.w_0.quantized'
,
test_program
)
# Testing during training
test_loss1
,
w_quant
=
exe
.
run
(
program
=
test_program
,
feed
=
feeder
.
feed
(
test_data
),
fetch_list
=
[
loss
,
w_var
])
# Freeze program for inference, but the weight of fc/conv is still float type.
quant_transpiler
.
freeze_program
(
test_program
,
place
)
test_loss2
,
=
exe
.
run
(
program
=
test_program
,
feed
=
feeder
.
feed
(
test_data
),
fetch_list
=
[
loss
])
self
.
assertAlmostEqual
(
test_loss1
,
test_loss2
,
delta
=
5e-3
)
w_freeze
=
np
.
array
(
fluid
.
global_scope
().
find_var
(
'conv2d_1.w_0'
)
.
get_tensor
())
# fail: -432.0 != -433.0, this is due to the calculation precision
#self.assertAlmostEqual(np.sum(w_freeze), np.sum(w_quant))
# Convert parameter to 8-bit.
quant_transpiler
.
convert_to_int8
(
test_program
,
place
)
# Save the 8-bit parameter and model file.
fluid
.
io
.
save_inference_model
(
'model_8bit'
,
[
'image'
,
'label'
],
[
loss
],
exe
,
test_program
)
# Test whether the 8-bit parameter and model file can be loaded successfully.
[
infer
,
feed
,
fetch
]
=
fluid
.
io
.
load_inference_model
(
'model_8bit'
,
exe
)
# Check the loaded 8-bit weight.
w_8bit
=
np
.
array
(
fluid
.
global_scope
().
find_var
(
'conv2d_1.w_0.int8'
)
.
get_tensor
())
self
.
assertEqual
(
w_8bit
.
dtype
,
np
.
int8
)
self
.
assertEqual
(
np
.
sum
(
w_8bit
),
np
.
sum
(
w_freeze
))
def
not_test_freeze_program_cuda
(
self
):
if
fluid
.
core
.
is_compiled_with_cuda
():
with
fluid
.
unique_name
.
guard
():
self
.
freeze_program
(
True
,
seed
=
1
)
def
not_test_freeze_program_cpu
(
self
):
with
fluid
.
unique_name
.
guard
():
self
.
freeze_program
(
False
,
seed
=
2
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/framework.py
浏览文件 @
91756a5a
...
@@ -18,7 +18,6 @@ import collections
...
@@ -18,7 +18,6 @@ import collections
import
contextlib
import
contextlib
import
re
import
re
import
six
import
six
import
traceback
import
numpy
as
np
import
numpy
as
np
...
@@ -35,8 +34,6 @@ except ImportError as e:
...
@@ -35,8 +34,6 @@ except ImportError as e:
except
Exception
as
e
:
except
Exception
as
e
:
raise
e
raise
e
from
.
import
unique_name
from
.
import
unique_name
import
os
PADDLE_ON_MODEL_CE
=
os
.
environ
.
get
(
'PADDLE_ON_MODEL_CE'
,
None
)
is
not
None
__all__
=
[
__all__
=
[
'Program'
,
'Program'
,
...
@@ -490,8 +487,7 @@ class OpProtoHolder(object):
...
@@ -490,8 +487,7 @@ class OpProtoHolder(object):
return
{
return
{
core
.
op_proto_and_checker_maker
.
kOpRoleAttrName
(),
core
.
op_proto_and_checker_maker
.
kOpRoleAttrName
(),
core
.
op_proto_and_checker_maker
.
kOpRoleVarAttrName
(),
core
.
op_proto_and_checker_maker
.
kOpRoleVarAttrName
(),
core
.
op_proto_and_checker_maker
.
kOpNameScopeAttrName
(),
core
.
op_proto_and_checker_maker
.
kOpNameScopeAttrName
()
core
.
op_proto_and_checker_maker
.
kOpCreationCallstackAttrName
()
}
}
...
@@ -541,8 +537,7 @@ class Operator(object):
...
@@ -541,8 +537,7 @@ class Operator(object):
'feed'
,
'fetch'
,
'save'
,
'load'
,
'recurrent'
,
'go'
,
'feed'
,
'fetch'
,
'save'
,
'load'
,
'recurrent'
,
'go'
,
'rnn_memory_helper_grad'
,
'conditional_block'
,
'while'
,
'send'
,
'recv'
,
'rnn_memory_helper_grad'
,
'conditional_block'
,
'while'
,
'send'
,
'recv'
,
'listen_and_serv'
,
'parallel_do'
,
'save_combine'
,
'load_combine'
,
'listen_and_serv'
,
'parallel_do'
,
'save_combine'
,
'load_combine'
,
'ncclInit'
,
'channel_create'
,
'channel_close'
,
'channel_send'
,
'ncclInit'
,
'select'
,
'checkpoint_notify'
,
'gen_nccl_id'
'channel_recv'
,
'select'
,
'checkpoint_notify'
,
'gen_nccl_id'
}
}
def
__init__
(
self
,
def
__init__
(
self
,
...
@@ -574,11 +569,6 @@ class Operator(object):
...
@@ -574,11 +569,6 @@ class Operator(object):
if
role_var_name
in
op_attrs
and
len
(
op_attrs
[
role_var_name
])
==
0
:
if
role_var_name
in
op_attrs
and
len
(
op_attrs
[
role_var_name
])
==
0
:
del
op_attrs
[
role_var_name
]
del
op_attrs
[
role_var_name
]
if
not
PADDLE_ON_MODEL_CE
:
callstack_var_name
=
op_maker
.
kOpCreationCallstackAttrName
()
op_attrs
[
callstack_var_name
]
=
list
(
reversed
(
traceback
.
format_stack
()))[
1
:]
if
len
(
self
.
desc
.
type
())
!=
0
:
if
len
(
self
.
desc
.
type
())
!=
0
:
return
return
if
type
is
None
:
if
type
is
None
:
...
...
python/paddle/fluid/layers/control_flow.py
浏览文件 @
91756a5a
...
@@ -21,7 +21,7 @@ from .. import core
...
@@ -21,7 +21,7 @@ from .. import core
from
..framework
import
Program
,
Variable
,
Operator
from
..framework
import
Program
,
Variable
,
Operator
from
..layer_helper
import
LayerHelper
,
unique_name
from
..layer_helper
import
LayerHelper
,
unique_name
from
..initializer
import
force_init_on_cpu
from
..initializer
import
force_init_on_cpu
from
.
ops
import
logical_and
,
logical_not
,
logical_or
from
.
nn
import
logical_and
,
logical_not
,
logical_or
import
numpy
import
numpy
import
warnings
import
warnings
import
six
import
six
...
...
python/paddle/fluid/layers/detection.py
浏览文件 @
91756a5a
...
@@ -42,19 +42,11 @@ __all__ = [
...
@@ -42,19 +42,11 @@ __all__ = [
'roi_perspective_transform'
,
'roi_perspective_transform'
,
'generate_proposal_labels'
,
'generate_proposal_labels'
,
'generate_proposals'
,
'generate_proposals'
,
]
__auto__
=
[
'iou_similarity'
,
'iou_similarity'
,
'box_coder'
,
'box_coder'
,
'polygon_box_transform'
,
'polygon_box_transform'
,
]
]
__all__
+=
__auto__
for
_OP
in
set
(
__auto__
):
globals
()[
_OP
]
=
generate_layer_fn
(
_OP
)
def
rpn_target_assign
(
bbox_pred
,
def
rpn_target_assign
(
bbox_pred
,
cls_logits
,
cls_logits
,
...
@@ -308,6 +300,101 @@ def detection_output(loc,
...
@@ -308,6 +300,101 @@ def detection_output(loc,
return
nmsed_outs
return
nmsed_outs
@
templatedoc
()
def
iou_similarity
(
x
,
y
,
name
=
None
):
"""
${comment}
Args:
x(${x_type}): ${x_comment}
y(${y_type}): ${y_comment}
Returns:
out(${out_type}): ${out_comment}
"""
helper
=
LayerHelper
(
"iou_similarity"
,
**
locals
())
if
name
is
None
:
out
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
else
:
out
=
helper
.
create_variable
(
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
helper
.
append_op
(
type
=
"iou_similarity"
,
inputs
=
{
"X"
:
x
,
"Y"
:
y
},
attrs
=
{},
outputs
=
{
"Out"
:
out
})
return
out
@
templatedoc
()
def
box_coder
(
prior_box
,
prior_box_var
,
target_box
,
code_type
=
"encode_center_size"
,
box_normalized
=
True
,
name
=
None
):
"""
${comment}
Args:
prior_box(${prior_box_type}): ${prior_box_comment}
prior_box_var(${prior_box_var_type}): ${prior_box_var_comment}
target_box(${target_box_type}): ${target_box_comment}
code_type(${code_type_type}): ${code_type_comment}
box_normalized(${box_normalized_type}): ${box_normalized_comment}
Returns:
output_box(${output_box_type}): ${output_box_comment}
"""
helper
=
LayerHelper
(
"box_coder"
,
**
locals
())
if
name
is
None
:
output_box
=
helper
.
create_tmp_variable
(
dtype
=
prior_box
.
dtype
)
else
:
output_box
=
helper
.
create_variable
(
name
=
name
,
dtype
=
prior_box
.
dtype
,
persistable
=
False
)
helper
.
append_op
(
type
=
"box_coder"
,
inputs
=
{
"PriorBox"
:
prior_box
,
"PriorBoxVar"
:
prior_box_var
,
"TargetBox"
:
target_box
},
attrs
=
{
"code_type"
:
code_type
,
"box_normalized"
:
box_normalized
},
outputs
=
{
"OutputBox"
:
output_box
})
return
output_box
@
templatedoc
()
def
polygon_box_transform
(
input
,
name
=
None
):
"""
${comment}
Args:
input(${input_type}): ${input_comment}
Returns:
output(${output_type}): ${output_comment}
"""
helper
=
LayerHelper
(
"polygon_box_transform"
,
**
locals
())
if
name
is
None
:
output
=
helper
.
create_tmp_variable
(
dtype
=
input
.
dtype
)
else
:
output
=
helper
.
create_variable
(
name
=
name
,
dtype
=
prior_box
.
input
,
persistable
=
False
)
helper
.
append_op
(
type
=
"polygon_box_transform"
,
inputs
=
{
"Input"
:
input
},
attrs
=
{},
outputs
=
{
"Output"
:
output
})
return
output
@
templatedoc
()
@
templatedoc
()
def
detection_map
(
detect_res
,
def
detection_map
(
detect_res
,
label
,
label
,
...
...
python/paddle/fluid/layers/io.py
浏览文件 @
91756a5a
...
@@ -660,7 +660,6 @@ def py_reader(capacity,
...
@@ -660,7 +660,6 @@ def py_reader(capacity,
1. The basic usage of :code:`py_reader` is as follows:
1. The basic usage of :code:`py_reader` is as follows:
>>> import paddle.v2
>>> import paddle.fluid as fluid
>>> import paddle.fluid as fluid
>>> import paddle.dataset.mnist as mnist
>>> import paddle.dataset.mnist as mnist
>>>
>>>
...
@@ -668,7 +667,7 @@ def py_reader(capacity,
...
@@ -668,7 +667,7 @@ def py_reader(capacity,
>>> shapes=[(-1,3,224,224), (-1,1)],
>>> shapes=[(-1,3,224,224), (-1,1)],
>>> dtypes=['float32', 'int64'])
>>> dtypes=['float32', 'int64'])
>>> reader.decorate_paddle_reader(
>>> reader.decorate_paddle_reader(
>>> paddle.
v2.
reader.shuffle(paddle.batch(mnist.train())
>>> paddle.reader.shuffle(paddle.batch(mnist.train())
>>>
>>>
>>> img, label = fluid.layers.read_file(reader)
>>> img, label = fluid.layers.read_file(reader)
>>> loss = network(img, label) # some network definition
>>> loss = network(img, label) # some network definition
...
@@ -687,7 +686,6 @@ def py_reader(capacity,
...
@@ -687,7 +686,6 @@ def py_reader(capacity,
2. When training and testing are both performed, two different
2. When training and testing are both performed, two different
:code:`py_reader` should be created with different names, e.g.:
:code:`py_reader` should be created with different names, e.g.:
>>> import paddle.v2
>>> import paddle.fluid as fluid
>>> import paddle.fluid as fluid
>>> import paddle.dataset.mnist as mnist
>>> import paddle.dataset.mnist as mnist
>>>
>>>
...
@@ -701,7 +699,7 @@ def py_reader(capacity,
...
@@ -701,7 +699,7 @@ def py_reader(capacity,
>>> dtypes=['float32', 'int64'],
>>> dtypes=['float32', 'int64'],
>>> name='train_reader')
>>> name='train_reader')
>>> train_reader.decorate_paddle_reader(
>>> train_reader.decorate_paddle_reader(
>>> paddle.
v2.
reader.shuffle(paddle.batch(mnist.train())
>>> paddle.reader.shuffle(paddle.batch(mnist.train())
>>>
>>>
>>> test_reader = fluid.layers.py_reader(capacity=32,
>>> test_reader = fluid.layers.py_reader(capacity=32,
>>> shapes=[(-1,3,224,224), (-1,1)],
>>> shapes=[(-1,3,224,224), (-1,1)],
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
91756a5a
...
@@ -29,29 +29,127 @@ from .. import unique_name
...
@@ -29,29 +29,127 @@ from .. import unique_name
from
functools
import
reduce
from
functools
import
reduce
__all__
=
[
__all__
=
[
'fc'
,
'embedding'
,
'dynamic_lstm'
,
'dynamic_lstmp'
,
'dynamic_gru'
,
'fc'
,
'gru_unit'
,
'linear_chain_crf'
,
'crf_decoding'
,
'cos_sim'
,
'cross_entropy'
,
'embedding'
,
'square_error_cost'
,
'chunk_eval'
,
'sequence_conv'
,
'conv2d'
,
'conv3d'
,
'dynamic_lstm'
,
'sequence_pool'
,
'sequence_softmax'
,
'softmax'
,
'pool2d'
,
'pool3d'
,
'dynamic_lstmp'
,
'batch_norm'
,
'beam_search_decode'
,
'conv2d_transpose'
,
'conv3d_transpose'
,
'dynamic_gru'
,
'sequence_expand'
,
'sequence_expand_as'
,
'sequence_pad'
,
'lstm_unit'
,
'gru_unit'
,
'reduce_sum'
,
'reduce_mean'
,
'reduce_max'
,
'reduce_min'
,
'reduce_prod'
,
'linear_chain_crf'
,
'sequence_first_step'
,
'sequence_last_step'
,
'dropout'
,
'split'
,
'crf_decoding'
,
'ctc_greedy_decoder'
,
'edit_distance'
,
'l2_normalize'
,
'matmul'
,
'topk'
,
'cos_sim'
,
'warpctc'
,
'sequence_reshape'
,
'transpose'
,
'im2sequence'
,
'nce'
,
'cross_entropy'
,
'hsigmoid'
,
'beam_search'
,
'row_conv'
,
'multiplex'
,
'layer_norm'
,
'square_error_cost'
,
'softmax_with_cross_entropy'
,
'smooth_l1'
,
'one_hot'
,
'chunk_eval'
,
'autoincreased_step_counter'
,
'reshape'
,
'squeeze'
,
'unsqueeze'
,
'sequence_conv'
,
'lod_reset'
,
'lrn'
,
'pad'
,
'pad_constant_like'
,
'label_smooth'
,
'roi_pool'
,
'conv2d'
,
'dice_loss'
,
'image_resize'
,
'image_resize_short'
,
'resize_bilinear'
,
'conv3d'
,
'gather'
,
'scatter'
,
'sequence_scatter'
,
'random_crop'
,
'mean_iou'
,
'relu'
,
'sequence_pool'
,
'log'
,
'crop'
,
'rank_loss'
,
'elu'
,
'relu6'
,
'pow'
,
'stanh'
,
'hard_sigmoid'
,
'sequence_softmax'
,
'swish'
,
'prelu'
,
'brelu'
,
'leaky_relu'
,
'soft_relu'
,
'flatten'
,
'softmax'
,
'sequence_mask'
,
'stack'
,
'pad2d'
,
'unstack'
,
'sequence_enumerate'
,
'pool2d'
,
'expand'
,
'sequence_concat'
,
'scale'
,
'elementwise_add'
,
'elementwise_div'
,
'pool3d'
,
'elementwise_sub'
,
'elementwise_mul'
,
'elementwise_max'
,
'elementwise_min'
,
'batch_norm'
,
'elementwise_pow'
,
'uniform_random_batch_size_like'
,
'gaussian_random'
,
'beam_search_decode'
,
'sampling_id'
,
'gaussian_random_batch_size_like'
,
'sum'
,
'slice'
,
'shape'
'conv2d_transpose'
,
'conv3d_transpose'
,
'sequence_expand'
,
'sequence_expand_as'
,
'sequence_pad'
,
'lstm_unit'
,
'reduce_sum'
,
'reduce_mean'
,
'reduce_max'
,
'reduce_min'
,
'reduce_prod'
,
'sequence_first_step'
,
'sequence_last_step'
,
'dropout'
,
'split'
,
'ctc_greedy_decoder'
,
'edit_distance'
,
'l2_normalize'
,
'matmul'
,
'topk'
,
'warpctc'
,
'sequence_reshape'
,
'transpose'
,
'im2sequence'
,
'nce'
,
'hsigmoid'
,
'beam_search'
,
'row_conv'
,
'multiplex'
,
'layer_norm'
,
'softmax_with_cross_entropy'
,
'smooth_l1'
,
'one_hot'
,
'autoincreased_step_counter'
,
'reshape'
,
'squeeze'
,
'unsqueeze'
,
'lod_reset'
,
'lrn'
,
'pad'
,
'pad_constant_like'
,
'label_smooth'
,
'roi_pool'
,
'dice_loss'
,
'image_resize'
,
'image_resize_short'
,
'resize_bilinear'
,
'gather'
,
'scatter'
,
'sequence_scatter'
,
'random_crop'
,
'mean_iou'
,
'relu'
,
'log'
,
'crop'
,
'rank_loss'
,
'elu'
,
'relu6'
,
'pow'
,
'stanh'
,
'hard_sigmoid'
,
'swish'
,
'prelu'
,
'brelu'
,
'leaky_relu'
,
'soft_relu'
,
'flatten'
,
'sequence_mask'
,
'stack'
,
'pad2d'
,
'unstack'
,
'sequence_enumerate'
,
'expand'
,
'sequence_concat'
,
'scale'
,
'elementwise_add'
,
'elementwise_div'
,
'elementwise_sub'
,
'elementwise_mul'
,
'elementwise_max'
,
'elementwise_min'
,
'elementwise_pow'
,
'uniform_random_batch_size_like'
,
'gaussian_random'
,
'sampling_id'
,
'gaussian_random_batch_size_like'
,
'sum'
,
'slice'
,
'shape'
,
'logical_and'
,
'logical_or'
,
'logical_xor'
,
'logical_not'
,
'clip'
,
'clip_by_norm'
,
'mean'
,
'mul'
,
'sigmoid_cross_entropy_with_logits'
,
'maxout'
,
]
]
...
@@ -60,7 +158,6 @@ def fc(input,
...
@@ -60,7 +158,6 @@ def fc(input,
num_flatten_dims
=
1
,
num_flatten_dims
=
1
,
param_attr
=
None
,
param_attr
=
None
,
bias_attr
=
None
,
bias_attr
=
None
,
use_mkldnn
=
False
,
act
=
None
,
act
=
None
,
is_test
=
False
,
is_test
=
False
,
name
=
None
):
name
=
None
):
...
@@ -112,8 +209,6 @@ def fc(input,
...
@@ -112,8 +209,6 @@ def fc(input,
If it is set to None, the bias is initialized zero. Default: None.
If it is set to None, the bias is initialized zero. Default: None.
act (str, default None): Activation to be applied to the output of this layer.
act (str, default None): Activation to be applied to the output of this layer.
is_test(bool): A flag indicating whether execution is in test phase.
is_test(bool): A flag indicating whether execution is in test phase.
use_mkldnn(bool): Use mkldnn kernel or not, it is valid only when the mkldnn
library is installed. Default: False
name (str, default None): The name of this layer.
name (str, default None): The name of this layer.
Returns:
Returns:
...
@@ -160,7 +255,7 @@ def fc(input,
...
@@ -160,7 +255,7 @@ def fc(input,
type
=
"sum"
,
type
=
"sum"
,
inputs
=
{
"X"
:
mul_results
},
inputs
=
{
"X"
:
mul_results
},
outputs
=
{
"Out"
:
pre_bias
},
outputs
=
{
"Out"
:
pre_bias
},
attrs
=
{
"use_mkldnn"
:
use_mkldnn
})
attrs
=
{
"use_mkldnn"
:
False
})
# add bias
# add bias
pre_activation
=
helper
.
append_bias_op
(
pre_bias
,
dim_start
=
num_flatten_dims
)
pre_activation
=
helper
.
append_bias_op
(
pre_bias
,
dim_start
=
num_flatten_dims
)
# add activation
# add activation
...
@@ -953,8 +1048,8 @@ def cross_entropy(input, label, soft_label=False, ignore_index=-100):
...
@@ -953,8 +1048,8 @@ def cross_entropy(input, label, soft_label=False, ignore_index=-100):
soft_label (bool): a flag indicating whether to
soft_label (bool): a flag indicating whether to
interpretate the given labels as soft
interpretate the given labels as soft
labels. Default: `False`.
labels. Default: `False`.
ignore_index (int): Specifies a target value that is ignored and does
ignore_index (int): Specifies a target value that is ignored and does
not contribute to the input gradient. Only valid
not contribute to the input gradient. Only valid
if soft_label is set to False. Default: -100
if soft_label is set to False. Default: -100
Returns:
Returns:
...
@@ -1324,7 +1419,6 @@ def conv2d(input,
...
@@ -1324,7 +1419,6 @@ def conv2d(input,
param_attr
=
None
,
param_attr
=
None
,
bias_attr
=
None
,
bias_attr
=
None
,
use_cudnn
=
True
,
use_cudnn
=
True
,
use_mkldnn
=
False
,
act
=
None
,
act
=
None
,
name
=
None
):
name
=
None
):
"""
"""
...
@@ -1402,8 +1496,6 @@ def conv2d(input,
...
@@ -1402,8 +1496,6 @@ def conv2d(input,
bias_attr (ParamAttr): Bias parameter for the Conv2d layer. Default: None
bias_attr (ParamAttr): Bias parameter for the Conv2d layer. Default: None
use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
library is installed. Default: True
library is installed. Default: True
use_mkldnn (bool): Use mkldnn kernels or not, it is valid only when compiled
with mkldnn library. Default: False
act (str): Activation type. Default: None
act (str): Activation type. Default: None
name (str|None): A name for this layer(optional). If set None, the layer
name (str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
will be named automatically.
...
@@ -1476,7 +1568,7 @@ def conv2d(input,
...
@@ -1476,7 +1568,7 @@ def conv2d(input,
'dilations'
:
dilation
,
'dilations'
:
dilation
,
'groups'
:
groups
,
'groups'
:
groups
,
'use_cudnn'
:
use_cudnn
,
'use_cudnn'
:
use_cudnn
,
'use_mkldnn'
:
use_mkldnn
'use_mkldnn'
:
False
})
})
pre_act
=
helper
.
append_bias_op
(
pre_bias
,
dim_start
=
1
,
dim_end
=
2
)
pre_act
=
helper
.
append_bias_op
(
pre_bias
,
dim_start
=
1
,
dim_end
=
2
)
...
@@ -1494,7 +1586,6 @@ def conv3d(input,
...
@@ -1494,7 +1586,6 @@ def conv3d(input,
param_attr
=
None
,
param_attr
=
None
,
bias_attr
=
None
,
bias_attr
=
None
,
use_cudnn
=
True
,
use_cudnn
=
True
,
use_mkldnn
=
False
,
act
=
None
,
act
=
None
,
name
=
None
):
name
=
None
):
"""
"""
...
@@ -1568,7 +1659,6 @@ def conv3d(input,
...
@@ -1568,7 +1659,6 @@ def conv3d(input,
bias_attr (ParamAttr): Bias parameter for the Conv3d layer. Default: None
bias_attr (ParamAttr): Bias parameter for the Conv3d layer. Default: None
use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
library is installed. Default: True
library is installed. Default: True
use_mkldnn (bool): Use mkldnn kernels or not.
act (str): Activation type. Default: None
act (str): Activation type. Default: None
name (str|None): A name for this layer(optional). If set None, the layer
name (str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
will be named automatically.
...
@@ -1638,7 +1728,7 @@ def conv3d(input,
...
@@ -1638,7 +1728,7 @@ def conv3d(input,
'dilations'
:
dilation
,
'dilations'
:
dilation
,
'groups'
:
groups
,
'groups'
:
groups
,
'use_cudnn'
:
use_cudnn
,
'use_cudnn'
:
use_cudnn
,
'use_mkldnn'
:
use_mkldnn
'use_mkldnn'
:
False
})
})
pre_act
=
helper
.
append_bias_op
(
pre_bias
,
dim_start
=
1
,
dim_end
=
2
)
pre_act
=
helper
.
append_bias_op
(
pre_bias
,
dim_start
=
1
,
dim_end
=
2
)
...
@@ -1820,7 +1910,6 @@ def pool2d(input,
...
@@ -1820,7 +1910,6 @@ def pool2d(input,
global_pooling
=
False
,
global_pooling
=
False
,
use_cudnn
=
True
,
use_cudnn
=
True
,
ceil_mode
=
False
,
ceil_mode
=
False
,
use_mkldnn
=
False
,
name
=
None
):
name
=
None
):
"""
"""
${comment}
${comment}
...
@@ -1838,7 +1927,6 @@ def pool2d(input,
...
@@ -1838,7 +1927,6 @@ def pool2d(input,
global_pooling: ${global_pooling_comment}
global_pooling: ${global_pooling_comment}
use_cudnn: ${use_cudnn_comment}
use_cudnn: ${use_cudnn_comment}
ceil_mode: ${ceil_mode_comment}
ceil_mode: ${ceil_mode_comment}
use_mkldnn: ${use_mkldnn_comment}
name (str|None): A name for this layer(optional). If set None, the
name (str|None): A name for this layer(optional). If set None, the
layer will be named automatically.
layer will be named automatically.
...
@@ -1898,7 +1986,7 @@ def pool2d(input,
...
@@ -1898,7 +1986,7 @@ def pool2d(input,
"paddings"
:
pool_padding
,
"paddings"
:
pool_padding
,
"use_cudnn"
:
use_cudnn
,
"use_cudnn"
:
use_cudnn
,
"ceil_mode"
:
ceil_mode
,
"ceil_mode"
:
ceil_mode
,
"use_mkldnn"
:
use_mkldnn
"use_mkldnn"
:
False
})
})
return
pool_out
return
pool_out
...
@@ -1912,7 +2000,6 @@ def pool3d(input,
...
@@ -1912,7 +2000,6 @@ def pool3d(input,
global_pooling
=
False
,
global_pooling
=
False
,
use_cudnn
=
True
,
use_cudnn
=
True
,
ceil_mode
=
False
,
ceil_mode
=
False
,
use_mkldnn
=
False
,
name
=
None
):
name
=
None
):
"""
"""
This function adds the operator for pooling in 3-dimensions, using the
This function adds the operator for pooling in 3-dimensions, using the
...
@@ -1927,7 +2014,6 @@ def pool3d(input,
...
@@ -1927,7 +2014,6 @@ def pool3d(input,
global_pooling (bool): ${global_pooling_comment}
global_pooling (bool): ${global_pooling_comment}
use_cudnn (bool): ${use_cudnn_comment}
use_cudnn (bool): ${use_cudnn_comment}
ceil_mode (bool): ${ceil_mode_comment}
ceil_mode (bool): ${ceil_mode_comment}
use_mkldnn (bool): ${use_mkldnn_comment}
name (str): A name for this layer(optional). If set None, the layer
name (str): A name for this layer(optional). If set None, the layer
will be named automatically.
will be named automatically.
...
@@ -1968,7 +2054,7 @@ def pool3d(input,
...
@@ -1968,7 +2054,7 @@ def pool3d(input,
"paddings"
:
pool_padding
,
"paddings"
:
pool_padding
,
"use_cudnn"
:
use_cudnn
,
"use_cudnn"
:
use_cudnn
,
"ceil_mode"
:
ceil_mode
,
"ceil_mode"
:
ceil_mode
,
"use_mkldnn"
:
use_mkldnn
"use_mkldnn"
:
False
})
})
return
pool_out
return
pool_out
...
@@ -1983,7 +2069,6 @@ def batch_norm(input,
...
@@ -1983,7 +2069,6 @@ def batch_norm(input,
bias_attr
=
None
,
bias_attr
=
None
,
data_layout
=
'NCHW'
,
data_layout
=
'NCHW'
,
in_place
=
False
,
in_place
=
False
,
use_mkldnn
=
False
,
name
=
None
,
name
=
None
,
moving_mean_name
=
None
,
moving_mean_name
=
None
,
moving_variance_name
=
None
,
moving_variance_name
=
None
,
...
@@ -2025,7 +2110,6 @@ def batch_norm(input,
...
@@ -2025,7 +2110,6 @@ def batch_norm(input,
bias_attr(ParamAttr): The parameter attribute for Parameter `bias`.
bias_attr(ParamAttr): The parameter attribute for Parameter `bias`.
data_layout(string, default NCHW): NCHW|NHWC
data_layout(string, default NCHW): NCHW|NHWC
in_place(bool, Default False): Make the input and output of batch norm reuse memory.
in_place(bool, Default False): Make the input and output of batch norm reuse memory.
use_mkldnn(bool, Default false): ${use_mkldnn_comment}
name(string, Default None): A name for this layer(optional). If set None, the layer
name(string, Default None): A name for this layer(optional). If set None, the layer
will be named automatically.
will be named automatically.
moving_mean_name(string, Default None): The name of moving_mean which store the global Mean.
moving_mean_name(string, Default None): The name of moving_mean which store the global Mean.
...
@@ -2117,7 +2201,7 @@ def batch_norm(input,
...
@@ -2117,7 +2201,7 @@ def batch_norm(input,
"momentum"
:
momentum
,
"momentum"
:
momentum
,
"epsilon"
:
epsilon
,
"epsilon"
:
epsilon
,
"is_test"
:
is_test
,
"is_test"
:
is_test
,
"use_mkldnn"
:
use_mkldnn
,
"use_mkldnn"
:
False
,
"fuse_with_relu"
:
fuse_with_relu
"fuse_with_relu"
:
fuse_with_relu
})
})
...
@@ -2714,20 +2798,20 @@ def sequence_pad(x, pad_value, maxlen=None):
...
@@ -2714,20 +2798,20 @@ def sequence_pad(x, pad_value, maxlen=None):
Args:
Args:
x(Variable): Input variable which should contain lod information.
x(Variable): Input variable which should contain lod information.
pad_value(Variable): The Variable that holds values that will be fill
pad_value(Variable): The Variable that holds values that will be fill
into padded steps. It can be a scalar or a tensor whose shape
into padded steps. It can be a scalar or a tensor whose shape
equals to time steps in sequences. If it's a scalar, it will be
equals to time steps in sequences. If it's a scalar, it will be
automatically broadcasted to the shape of time step.
automatically broadcasted to the shape of time step.
maxlen(int, default None): The length of padded sequences. It can be
maxlen(int, default None): The length of padded sequences. It can be
None or any positive int. When it is None, all sequences will be
None or any positive int. When it is None, all sequences will be
padded up to the length of the longest one among them; when it a
padded up to the length of the longest one among them; when it a
certain positive value, it must be greater than the length of the
certain positive value, it must be greater than the length of the
longest original sequence."
longest original sequence."
Returns:
Returns:
Variable: The padded sequence batch and the original lengths before
Variable: The padded sequence batch and the original lengths before
padding. All sequences has the same length.
padding. All sequences has the same length.
Examples:
Examples:
.. code-block:: python
.. code-block:: python
...
@@ -4343,8 +4427,8 @@ def softmax_with_cross_entropy(logits,
...
@@ -4343,8 +4427,8 @@ def softmax_with_cross_entropy(logits,
soft_label is set to true, Label is a Tensor<float/double> with
soft_label is set to true, Label is a Tensor<float/double> with
soft_label (bool): A flag to indicate whether to interpretate the given
soft_label (bool): A flag to indicate whether to interpretate the given
labels as soft labels. By default, `soft_label` is set to False.
labels as soft labels. By default, `soft_label` is set to False.
ignore_index (int): Specifies a target value that is ignored and does
ignore_index (int): Specifies a target value that is ignored and does
not contribute to the input gradient. Only valid
not contribute to the input gradient. Only valid
if soft_label is set to False. Default: -100
if soft_label is set to False. Default: -100
Returns:
Returns:
...
@@ -4601,14 +4685,14 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None):
...
@@ -4601,14 +4685,14 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None):
def
squeeze
(
input
,
axes
,
name
=
None
):
def
squeeze
(
input
,
axes
,
name
=
None
):
"""
"""
Remove single-dimensional entries from the shape of a tensor. Takes a
Remove single-dimensional entries from the shape of a tensor. Takes a
parameter axes with a list of axes to squeeze. If axes is not provided, all
parameter axes with a list of axes to squeeze. If axes is not provided, all
the single dimensions will be removed from the shape. If an axis is
the single dimensions will be removed from the shape. If an axis is
selected with shape entry not equal to one, an error is raised.
selected with shape entry not equal to one, an error is raised.
Examples:
Examples:
Case 1:
Case 1:
Given
Given
X.shape = (1, 3, 1, 5)
X.shape = (1, 3, 1, 5)
and
and
axes = [0]
axes = [0]
...
@@ -4617,11 +4701,11 @@ def squeeze(input, axes, name=None):
...
@@ -4617,11 +4701,11 @@ def squeeze(input, axes, name=None):
Case 2:
Case 2:
Given
Given
X.shape = (1, 3, 1, 5)
X.shape = (1, 3, 1, 5)
and
and
axes = []
axes = []
we get:
we get:
Out.shape = (3, 5)
Out.shape = (3, 5)
Args:
Args:
input (Variable): The input variable to be squeezed.
input (Variable): The input variable to be squeezed.
axes (list): List of integers, indicating the dimensions to be squeezed.
axes (list): List of integers, indicating the dimensions to be squeezed.
...
@@ -4651,14 +4735,14 @@ def squeeze(input, axes, name=None):
...
@@ -4651,14 +4735,14 @@ def squeeze(input, axes, name=None):
def
unsqueeze
(
input
,
axes
,
name
=
None
):
def
unsqueeze
(
input
,
axes
,
name
=
None
):
"""
"""
Insert single-dimensional entries to the shape of a tensor. Takes one
Insert single-dimensional entries to the shape of a tensor. Takes one
required argument axes, a list of dimensions that will be inserted.
required argument axes, a list of dimensions that will be inserted.
Dimension indices in axes are as seen in the output tensor.
Dimension indices in axes are as seen in the output tensor.
For example:
For example:
Given a tensor such that tensor with shape [3, 4, 5],
Given a tensor such that tensor with shape [3, 4, 5],
then Unsqueezed tensor with axes=[0, 4] has shape [1, 3, 4, 5, 1].
then Unsqueezed tensor with axes=[0, 4] has shape [1, 3, 4, 5, 1].
Args:
Args:
input (Variable): The input variable to be unsqueezed.
input (Variable): The input variable to be unsqueezed.
axes (list): List of integers, indicating the dimensions to be inserted.
axes (list): List of integers, indicating the dimensions to be inserted.
...
@@ -5757,39 +5841,39 @@ def pad2d(input,
...
@@ -5757,39 +5841,39 @@ def pad2d(input,
Example:
Example:
Given that X is a channel of image from input:
Given that X is a channel of image from input:
X = [[1, 2, 3],
X = [[1, 2, 3],
[4, 5, 6]]
[4, 5, 6]]
Case 0:
Case 0:
paddings = [0, 1, 2, 3],
paddings = [0, 1, 2, 3],
mode = 'constant'
mode = 'constant'
pad_value = 0
pad_value = 0
Out = [[0, 0, 1, 2, 3, 0, 0, 0]
Out = [[0, 0, 1, 2, 3, 0, 0, 0]
[0, 0, 4, 5, 6, 0, 0, 0]
[0, 0, 4, 5, 6, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0]]
[0, 0, 0, 0, 0, 0, 0, 0]]
Case 1:
Case 1:
paddings = [0, 1, 2, 1],
paddings = [0, 1, 2, 1],
mode = 'reflect'
mode = 'reflect'
Out = [[3, 2, 1, 2, 3, 2]
Out = [[3, 2, 1, 2, 3, 2]
[6, 5, 4, 5, 6, 5]
[6, 5, 4, 5, 6, 5]
[3, 2, 1, 2, 3, 2]]
[3, 2, 1, 2, 3, 2]]
Case 2:
Case 2:
paddings = [0, 1, 2, 1],
paddings = [0, 1, 2, 1],
mode = 'edge'
mode = 'edge'
Out = [[1, 1, 1, 2, 3, 3]
Out = [[1, 1, 1, 2, 3, 3]
[4, 4, 4, 5, 6, 6]
[4, 4, 4, 5, 6, 6]
[4, 4, 4, 5, 6, 6]]
[4, 4, 4, 5, 6, 6]]
Args:
Args:
input (Variable): The input image with [N, C, H, W] format or [N, H, W, C] format.
input (Variable): The input image with [N, C, H, W] format or [N, H, W, C] format.
paddings (tuple|list): The padding size. If padding is a tuple, it must
paddings (tuple|list): The padding size. If padding is a tuple, it must
...
@@ -5988,7 +6072,7 @@ def prelu(x, mode, param_attr=None, name=None):
...
@@ -5988,7 +6072,7 @@ def prelu(x, mode, param_attr=None, name=None):
channel:elements in a channel share same weight
channel:elements in a channel share same weight
element:each element has a weight
element:each element has a weight
name(str|None): A name for this layer(optional). If set None, the layer
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
will be named automatically.
Returns:
Returns:
Variable: The output tensor with the same shape as input.
Variable: The output tensor with the same shape as input.
...
@@ -6166,10 +6250,10 @@ def flatten(x, axis=1, name=None):
...
@@ -6166,10 +6250,10 @@ def flatten(x, axis=1, name=None):
def
sequence_enumerate
(
input
,
win_size
,
pad_value
=
0
,
name
=
None
):
def
sequence_enumerate
(
input
,
win_size
,
pad_value
=
0
,
name
=
None
):
"""
"""
Generate a new sequence for the input index sequence, which enumerates all the
Generate a new sequence for the input index sequence, which enumerates all the
sub-sequences with length `win_size` of the input.
sub-sequences with length `win_size` of the input.
The enumerated sequence has the same 1st dimension with variable `input`, and
The enumerated sequence has the same 1st dimension with variable `input`, and
the 2nd dimension is `win_size`, padded by `pad_value` if necessary in generation.
the 2nd dimension is `win_size`, padded by `pad_value` if necessary in generation.
Examples:
Examples:
Case 1:
Case 1:
Input:
Input:
...
@@ -6296,20 +6380,20 @@ def unstack(x, axis=0, num=None):
...
@@ -6296,20 +6380,20 @@ def unstack(x, axis=0, num=None):
**UnStack Layer**
**UnStack Layer**
This layer unstacks input :code:`x` into several tensors along axis.
This layer unstacks input :code:`x` into several tensors along axis.
If :code:`axis` < 0, it would be replaced with :code:`axis+rank(x)`.
If :code:`axis` < 0, it would be replaced with :code:`axis+rank(x)`.
If :code:`num` is None, it would be inferred from :code:`x.shape[axis]`,
If :code:`num` is None, it would be inferred from :code:`x.shape[axis]`,
and if :code:`x.shape[axis]` <= 0 or is unknown, :code:`ValueError` is
and if :code:`x.shape[axis]` <= 0 or is unknown, :code:`ValueError` is
raised.
raised.
Args:
Args:
x (Variable): Input variable.
x (Variable): Input variable.
axis (int): The axis along which the input is unstacked.
axis (int): The axis along which the input is unstacked.
num (int|None): The number of output variables.
num (int|None): The number of output variables.
Returns:
Returns:
list(Variable): The unstacked variables.
list(Variable): The unstacked variables.
"""
"""
helper
=
LayerHelper
(
'unstack'
,
**
locals
())
helper
=
LayerHelper
(
'unstack'
,
**
locals
())
...
@@ -6342,21 +6426,21 @@ def expand(x, expand_times, name=None):
...
@@ -6342,21 +6426,21 @@ def expand(x, expand_times, name=None):
.. code-block:: text
.. code-block:: text
Input(X) is a 3-D tensor with shape [2, 3, 1]:
Input(X) is a 3-D tensor with shape [2, 3, 1]:
[
[
[[1], [2], [3]],
[[1], [2], [3]],
[[4], [5], [6]]
[[4], [5], [6]]
]
]
Attr(expand_times): [1, 2, 2]
Attr(expand_times): [1, 2, 2]
Output(Out) is a 3-D tensor with shape [2, 6, 2]:
Output(Out) is a 3-D tensor with shape [2, 6, 2]:
[
[
[[1, 1], [2, 2], [3, 3], [1, 1], [2, 2], [3, 3]],
[[1, 1], [2, 2], [3, 3], [1, 1], [2, 2], [3, 3]],
[[4, 4], [5, 5], [6, 6], [4, 4], [5, 5], [6, 6]]
[[4, 4], [5, 5], [6, 6], [4, 4], [5, 5], [6, 6]]
]
]
Args:
Args:
x (Variable): A tensor with rank in [1, 6].
x (Variable): A tensor with rank in [1, 6].
expand_times (list|tuple): Expand times number for each dimension.
expand_times (list|tuple): Expand times number for each dimension.
...
@@ -6432,12 +6516,7 @@ def uniform_random_batch_size_like(input,
...
@@ -6432,12 +6516,7 @@ def uniform_random_batch_size_like(input,
@
templatedoc
()
@
templatedoc
()
def
gaussian_random
(
shape
,
def
gaussian_random
(
shape
,
mean
=
0.0
,
std
=
1.0
,
seed
=
0
,
dtype
=
'float32'
):
mean
=
0.0
,
std
=
1.0
,
seed
=
0
,
dtype
=
'float32'
,
use_mkldnn
=
False
):
"""
"""
${comment}
${comment}
...
@@ -6447,7 +6526,6 @@ def gaussian_random(shape,
...
@@ -6447,7 +6526,6 @@ def gaussian_random(shape,
std (Float): ${std_comment}
std (Float): ${std_comment}
seed (Int): ${seed_comment}
seed (Int): ${seed_comment}
dtype(np.dtype|core.VarDesc.VarType|str): Output data type.
dtype(np.dtype|core.VarDesc.VarType|str): Output data type.
use_mkldnn (Bool): Only used in mkldnn kernel.
Returns:
Returns:
out (Variable): ${out_comment}
out (Variable): ${out_comment}
...
@@ -6466,7 +6544,7 @@ def gaussian_random(shape,
...
@@ -6466,7 +6544,7 @@ def gaussian_random(shape,
'std'
:
std
,
'std'
:
std
,
'seed'
:
seed
,
'seed'
:
seed
,
'dtype'
:
c_dtype
,
'dtype'
:
c_dtype
,
'use_mkldnn'
:
use_mkldnn
'use_mkldnn'
:
False
})
})
return
out
return
out
...
@@ -6549,13 +6627,12 @@ def gaussian_random_batch_size_like(input,
...
@@ -6549,13 +6627,12 @@ def gaussian_random_batch_size_like(input,
@
templatedoc
()
@
templatedoc
()
def
sum
(
x
,
use_mkldnn
=
False
):
def
sum
(
x
):
"""
"""
${comment}
${comment}
Args:
Args:
x (Variable): ${x_comment}
x (Variable): ${x_comment}
use_mkldnn (Bool): ${use_mkldnn_comment}
Returns:
Returns:
out (Variable): ${out_comment}
out (Variable): ${out_comment}
...
@@ -6567,7 +6644,7 @@ def sum(x, use_mkldnn=False):
...
@@ -6567,7 +6644,7 @@ def sum(x, use_mkldnn=False):
type
=
'sum'
,
type
=
'sum'
,
inputs
=
{
'X'
:
x
},
inputs
=
{
'X'
:
x
},
outputs
=
{
'Out'
:
out
},
outputs
=
{
'Out'
:
out
},
attrs
=
{
'use_mkldnn'
:
use_mkldnn
})
attrs
=
{
'use_mkldnn'
:
False
})
return
out
return
out
...
@@ -6630,14 +6707,12 @@ def _elementwise_op(helper):
...
@@ -6630,14 +6707,12 @@ def _elementwise_op(helper):
assert
y
is
not
None
,
'y cannot be None in {}'
.
format
(
op_type
)
assert
y
is
not
None
,
'y cannot be None in {}'
.
format
(
op_type
)
axis
=
helper
.
kwargs
.
get
(
'axis'
,
-
1
)
axis
=
helper
.
kwargs
.
get
(
'axis'
,
-
1
)
use_mkldnn
=
helper
.
kwargs
.
get
(
'use_mkldnn'
,
False
)
use_mkldnn
=
helper
.
kwargs
.
get
(
'use_mkldnn'
,
False
)
out
=
helper
.
kwargs
.
get
(
'out'
,
None
)
name
=
helper
.
kwargs
.
get
(
'name'
,
None
)
if
out
is
None
:
if
name
is
None
:
name
=
helper
.
kwargs
.
get
(
'name'
,
None
)
out
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
if
name
is
None
:
else
:
out
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_variable
(
else
:
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
out
=
helper
.
create_variable
(
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
helper
.
append_op
(
helper
.
append_op
(
type
=
op_type
,
type
=
op_type
,
...
@@ -6650,13 +6725,7 @@ def _elementwise_op(helper):
...
@@ -6650,13 +6725,7 @@ def _elementwise_op(helper):
@
templatedoc
()
@
templatedoc
()
def
scale
(
x
,
def
scale
(
x
,
scale
=
1.0
,
bias
=
0.0
,
bias_after_scale
=
True
,
act
=
None
,
name
=
None
):
scale
=
1.0
,
bias
=
0.0
,
bias_after_scale
=
True
,
out
=
None
,
act
=
None
,
name
=
None
):
"""
"""
${comment}
${comment}
...
@@ -6665,21 +6734,19 @@ def scale(x,
...
@@ -6665,21 +6734,19 @@ def scale(x,
scale(${scale_type}): ${scale_comment}
scale(${scale_type}): ${scale_comment}
bias(${bias_type}): ${bias_comment}
bias(${bias_type}): ${bias_comment}
bias_after_scale(${bias_after_scale_type}): ${bias_after_scale_comment}
bias_after_scale(${bias_after_scale_type}): ${bias_after_scale_comment}
out(Tensor): Output tensor.
act(basestring|None): Activation applied to the output.
act(basestring|None): Activation applied to the output.
name(basestring|None): Name of the output.
name(basestring|None): Name of the output.
Returns:
Returns:
out(${out_type}): ${out_comment}
out(${out_type}): ${out_comment}
"""
"""
helper
=
LayerHelper
(
'scale'
,
**
locals
())
helper
=
LayerHelper
(
'scale'
,
**
locals
())
if
out
is
None
:
if
name
is
None
:
if
name
is
None
:
out
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
out
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
else
:
else
:
out
=
helper
.
create_variable
(
out
=
helper
.
create_variable
(
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'scale'
,
type
=
'scale'
,
...
@@ -6693,73 +6760,31 @@ def scale(x,
...
@@ -6693,73 +6760,31 @@ def scale(x,
return
helper
.
append_activation
(
out
)
return
helper
.
append_activation
(
out
)
def
elementwise_add
(
x
,
def
elementwise_add
(
x
,
y
,
axis
=-
1
,
act
=
None
,
name
=
None
):
y
,
out
=
None
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
return
_elementwise_op
(
LayerHelper
(
'elementwise_add'
,
**
locals
()))
return
_elementwise_op
(
LayerHelper
(
'elementwise_add'
,
**
locals
()))
def
elementwise_div
(
x
,
def
elementwise_div
(
x
,
y
,
axis
=-
1
,
act
=
None
,
name
=
None
):
y
,
out
=
None
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
return
_elementwise_op
(
LayerHelper
(
'elementwise_div'
,
**
locals
()))
return
_elementwise_op
(
LayerHelper
(
'elementwise_div'
,
**
locals
()))
def
elementwise_sub
(
x
,
def
elementwise_sub
(
x
,
y
,
axis
=-
1
,
act
=
None
,
name
=
None
):
y
,
out
=
None
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
return
_elementwise_op
(
LayerHelper
(
'elementwise_sub'
,
**
locals
()))
return
_elementwise_op
(
LayerHelper
(
'elementwise_sub'
,
**
locals
()))
def
elementwise_mul
(
x
,
def
elementwise_mul
(
x
,
y
,
axis
=-
1
,
act
=
None
,
name
=
None
):
y
,
out
=
None
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
return
_elementwise_op
(
LayerHelper
(
'elementwise_mul'
,
**
locals
()))
return
_elementwise_op
(
LayerHelper
(
'elementwise_mul'
,
**
locals
()))
def
elementwise_max
(
x
,
def
elementwise_max
(
x
,
y
,
axis
=-
1
,
act
=
None
,
name
=
None
):
y
,
out
=
None
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
return
_elementwise_op
(
LayerHelper
(
'elementwise_max'
,
**
locals
()))
return
_elementwise_op
(
LayerHelper
(
'elementwise_max'
,
**
locals
()))
def
elementwise_min
(
x
,
def
elementwise_min
(
x
,
y
,
axis
=-
1
,
act
=
None
,
name
=
None
):
y
,
out
=
None
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
return
_elementwise_op
(
LayerHelper
(
'elementwise_min'
,
**
locals
()))
return
_elementwise_op
(
LayerHelper
(
'elementwise_min'
,
**
locals
()))
def
elementwise_pow
(
x
,
def
elementwise_pow
(
x
,
y
,
axis
=-
1
,
act
=
None
,
name
=
None
):
y
,
out
=
None
,
axis
=-
1
,
use_mkldnn
=
False
,
act
=
None
,
name
=
None
):
return
_elementwise_op
(
LayerHelper
(
'elementwise_pow'
,
**
locals
()))
return
_elementwise_op
(
LayerHelper
(
'elementwise_pow'
,
**
locals
()))
...
@@ -6771,7 +6796,291 @@ for func in [
...
@@ -6771,7 +6796,291 @@ for func in [
func
.
__doc__
=
_generate_doc_string_
(
func
.
__doc__
=
_generate_doc_string_
(
op_proto
,
op_proto
,
additional_args_lines
=
[
additional_args_lines
=
[
"out (Tensor): The output tensor of elementwise op."
,
"act (basestring|None): Activation applied to the output."
,
"act (basestring|None): Activation applied to the output."
,
"name (basestring|None): Name of the output."
"name (basestring|None): Name of the output."
])
])
def
_logical_op
(
op_name
,
x
,
y
,
out
=
None
,
name
=
None
,
binary_op
=
True
):
helper
=
LayerHelper
(
op_name
,
**
locals
())
if
binary_op
:
assert
x
.
dtype
==
y
.
dtype
if
out
is
None
:
if
name
is
None
:
out
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
else
:
out
=
helper
.
create_variable
(
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
if
binary_op
:
helper
.
append_op
(
type
=
op_name
,
inputs
=
{
"X"
:
x
,
"Y"
:
y
},
outputs
=
{
"Out"
:
out
})
else
:
helper
.
append_op
(
type
=
op_name
,
inputs
=
{
"X"
:
x
},
outputs
=
{
"Out"
:
out
})
return
out
@
templatedoc
()
def
logical_and
(
x
,
y
,
out
=
None
,
name
=
None
):
"""
${comment}
Args:
x(${x_type}): ${x_comment}
y(${y_type}): ${y_comment}
out(Tensor): Output tensor of logical operation.
name(basestring|None): Name of the output.
Returns:
out(${out_type}): ${out_comment}
"""
return
_logical_op
(
op_name
=
"logical_and"
,
x
=
x
,
y
=
y
,
name
=
name
,
out
=
out
,
binary_op
=
True
)
@
templatedoc
()
def
logical_or
(
x
,
y
,
out
=
None
,
name
=
None
):
"""
${comment}
Args:
x(${x_type}): ${x_comment}
y(${y_type}): ${y_comment}
out(Tensor): Output tensor of logical operation.
name(basestring|None): Name of the output.
Returns:
out(${out_type}): ${out_comment}
"""
return
_logical_op
(
op_name
=
"logical_or"
,
x
=
x
,
y
=
y
,
name
=
name
,
out
=
out
,
binary_op
=
True
)
@
templatedoc
()
def
logical_xor
(
x
,
y
,
out
=
None
,
name
=
None
):
"""
${comment}
Args:
x(${x_type}): ${x_comment}
y(${y_type}): ${y_comment}
out(Tensor): Output tensor of logical operation.
name(basestring|None): Name of the output.
Returns:
out(${out_type}): ${out_comment}
"""
return
_logical_op
(
op_name
=
"logical_xor"
,
x
=
x
,
y
=
y
,
name
=
name
,
out
=
out
,
binary_op
=
True
)
@
templatedoc
()
def
logical_not
(
x
,
out
=
None
,
name
=
None
):
"""
${comment}
Args:
x(${x_type}): ${x_comment}
out(Tensor): Output tensor of logical operation.
name(basestring|None): Name of the output.
Returns:
out(${out_type}): ${out_comment}
"""
return
_logical_op
(
op_name
=
"logical_not"
,
x
=
x
,
y
=
None
,
name
=
name
,
out
=
out
,
binary_op
=
False
)
@
templatedoc
()
def
clip
(
x
,
min
,
max
,
name
=
None
):
"""
${comment}
Args:
x(${x_type}): ${x_comment}
min(${min_type}): ${min_comment}
max(${max_type}): ${max_comment}
name(basestring|None): Name of the output.
Returns:
out(${out_type}): ${out_comment}
"""
helper
=
LayerHelper
(
"clip"
,
**
locals
())
if
name
is
None
:
out
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
else
:
out
=
helper
.
create_variable
(
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
helper
.
append_op
(
type
=
"clip"
,
inputs
=
{
"X"
:
x
},
attrs
=
{
"min"
:
min
,
"max"
:
max
},
outputs
=
{
"Out"
:
out
})
return
out
@
templatedoc
()
def
clip_by_norm
(
x
,
max_norm
,
name
=
None
):
"""
${comment}
Args:
x(${x_type}): ${x_comment}
max_norm(${max_norm_type}): ${max_norm_comment}
name(basestring|None): Name of the output.
Returns:
out(${out_type}): ${out_comment}
"""
helper
=
LayerHelper
(
"clip_by_norm"
,
**
locals
())
if
name
is
None
:
out
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
else
:
out
=
helper
.
create_variable
(
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
helper
.
append_op
(
type
=
"clip_by_norm"
,
inputs
=
{
"X"
:
x
},
attrs
=
{
"max_norm"
:
max_norm
},
outputs
=
{
"Out"
:
out
})
return
out
@
templatedoc
()
def
mean
(
x
,
name
=
None
):
"""
${comment}
Args:
x(${x_type}): ${x_comment}
name(basestring|None): Name of the output.
Returns:
out(${out_type}): ${out_comment}
"""
helper
=
LayerHelper
(
"mean"
,
**
locals
())
if
name
is
None
:
out
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
else
:
out
=
helper
.
create_variable
(
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
helper
.
append_op
(
type
=
"mean"
,
inputs
=
{
"X"
:
x
},
attrs
=
{},
outputs
=
{
"Out"
:
out
})
return
out
@
templatedoc
()
def
mul
(
x
,
y
,
x_num_col_dims
=
1
,
y_num_col_dims
=
1
,
name
=
None
):
"""
${comment}
Args:
x(${x_type}): ${x_comment}
y(${y_type}): ${y_comment}
x_num_col_dims(${x_num_col_dims_type}): ${x_num_col_dims_comment}
y_num_col_dims(${y_num_col_dims_type}): ${y_num_col_dims_comment}
name(basestring|None): Name of the output.
Returns:
out(${out_type}): ${out_comment}
"""
helper
=
LayerHelper
(
"mul"
,
**
locals
())
if
name
is
None
:
out
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
else
:
out
=
helper
.
create_variable
(
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
helper
.
append_op
(
type
=
"mul"
,
inputs
=
{
"X"
:
x
,
"Y"
:
y
},
attrs
=
{
"x_num_col_dims"
:
x_num_col_dims
,
"y_num_col_dims"
:
y_num_col_dims
},
outputs
=
{
"Out"
:
out
})
return
out
@
templatedoc
()
def
sigmoid_cross_entropy_with_logits
(
x
,
label
,
name
=
None
):
"""
${comment}
Args:
x(${x_type}): ${x_comment}
label(${label_type}): ${label_comment}
name(basestring|None): Name of the output.
Returns:
out(${out_type}): ${out_comment}
"""
helper
=
LayerHelper
(
"sigmoid_cross_entropy_with_logits"
,
**
locals
())
if
name
is
None
:
out
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
else
:
out
=
helper
.
create_variable
(
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
helper
.
append_op
(
type
=
"sigmoid_cross_entropy_with_logits"
,
inputs
=
{
"X"
:
x
,
"Label"
:
label
},
attrs
=
{},
outputs
=
{
"Out"
:
out
})
return
out
@
templatedoc
()
def
maxout
(
x
,
groups
,
name
=
None
):
"""
${comment}
Args:
x(${x_type}): ${x_comment}
groups(${groups_type}): ${groups_comment}
name(basestring|None): Name of the output.
Returns:
out(${out_type}): ${out_comment}
"""
helper
=
LayerHelper
(
"maxout"
,
**
locals
())
if
name
is
None
:
out
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
else
:
out
=
helper
.
create_variable
(
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
helper
.
append_op
(
type
=
"maxout"
,
inputs
=
{
"X"
:
x
},
attrs
=
{
"groups"
:
groups
},
outputs
=
{
"Out"
:
out
})
return
out
python/paddle/fluid/layers/ops.py
浏览文件 @
91756a5a
...
@@ -35,18 +35,7 @@ __activations_noattr__ = [
...
@@ -35,18 +35,7 @@ __activations_noattr__ = [
'softsign'
,
'softsign'
,
]
]
__all__
=
[
__all__
=
[]
'mean'
,
'mul'
,
'sigmoid_cross_entropy_with_logits'
,
'clip'
,
'clip_by_norm'
,
'logical_and'
,
'logical_or'
,
'logical_xor'
,
'logical_not'
,
'maxout'
,
]
for
_OP
in
set
(
__all__
):
for
_OP
in
set
(
__all__
):
globals
()[
_OP
]
=
generate_layer_fn
(
_OP
)
globals
()[
_OP
]
=
generate_layer_fn
(
_OP
)
...
...
python/paddle/fluid/nets.py
浏览文件 @
91756a5a
...
@@ -40,8 +40,7 @@ def simple_img_conv_pool(input,
...
@@ -40,8 +40,7 @@ def simple_img_conv_pool(input,
param_attr
=
None
,
param_attr
=
None
,
bias_attr
=
None
,
bias_attr
=
None
,
act
=
None
,
act
=
None
,
use_cudnn
=
True
,
use_cudnn
=
True
):
use_mkldnn
=
False
):
"""
"""
The simple_img_conv_pool is composed with one Convolution2d and one Pool2d.
The simple_img_conv_pool is composed with one Convolution2d and one Pool2d.
...
@@ -84,8 +83,6 @@ def simple_img_conv_pool(input,
...
@@ -84,8 +83,6 @@ def simple_img_conv_pool(input,
act (str): Activation type for Conv2d. Default: None
act (str): Activation type for Conv2d. Default: None
use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
library is installed. Default: True
library is installed. Default: True
use_mkldnn (bool): Use mkldnn kernels or not, it is valid only when compiled
with mkldnn library. Default: False
Return:
Return:
Variable: The result of input after Convolution2d and Pool2d.
Variable: The result of input after Convolution2d and Pool2d.
...
@@ -112,8 +109,7 @@ def simple_img_conv_pool(input,
...
@@ -112,8 +109,7 @@ def simple_img_conv_pool(input,
param_attr
=
param_attr
,
param_attr
=
param_attr
,
bias_attr
=
bias_attr
,
bias_attr
=
bias_attr
,
act
=
act
,
act
=
act
,
use_cudnn
=
use_cudnn
,
use_cudnn
=
use_cudnn
)
use_mkldnn
=
use_mkldnn
)
pool_out
=
layers
.
pool2d
(
pool_out
=
layers
.
pool2d
(
input
=
conv_out
,
input
=
conv_out
,
...
@@ -122,8 +118,7 @@ def simple_img_conv_pool(input,
...
@@ -122,8 +118,7 @@ def simple_img_conv_pool(input,
pool_stride
=
pool_stride
,
pool_stride
=
pool_stride
,
pool_padding
=
pool_padding
,
pool_padding
=
pool_padding
,
global_pooling
=
global_pooling
,
global_pooling
=
global_pooling
,
use_cudnn
=
use_cudnn
,
use_cudnn
=
use_cudnn
)
use_mkldnn
=
use_mkldnn
)
return
pool_out
return
pool_out
...
@@ -138,8 +133,7 @@ def img_conv_group(input,
...
@@ -138,8 +133,7 @@ def img_conv_group(input,
conv_batchnorm_drop_rate
=
0.0
,
conv_batchnorm_drop_rate
=
0.0
,
pool_stride
=
1
,
pool_stride
=
1
,
pool_type
=
"max"
,
pool_type
=
"max"
,
use_cudnn
=
True
,
use_cudnn
=
True
):
use_mkldnn
=
False
):
"""
"""
The Image Convolution Group is composed of Convolution2d, BatchNorm, DropOut,
The Image Convolution Group is composed of Convolution2d, BatchNorm, DropOut,
and Pool2d. According to the input arguments, img_conv_group will do serials of
and Pool2d. According to the input arguments, img_conv_group will do serials of
...
@@ -177,8 +171,6 @@ def img_conv_group(input,
...
@@ -177,8 +171,6 @@ def img_conv_group(input,
average-pooling. Default :math:`max`.
average-pooling. Default :math:`max`.
use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
library is installed. Default: True
library is installed. Default: True
use_mkldnn (bool): Use mkldnn kernels or not, it is valid only when compiled
with mkldnn library. Default: False
Return:
Return:
Variable: The final result after serial computation using Convolution2d,
Variable: The final result after serial computation using Convolution2d,
...
@@ -226,8 +218,7 @@ def img_conv_group(input,
...
@@ -226,8 +218,7 @@ def img_conv_group(input,
padding
=
conv_padding
[
i
],
padding
=
conv_padding
[
i
],
param_attr
=
param_attr
[
i
],
param_attr
=
param_attr
[
i
],
act
=
local_conv_act
,
act
=
local_conv_act
,
use_cudnn
=
use_cudnn
,
use_cudnn
=
use_cudnn
)
use_mkldnn
=
use_mkldnn
)
if
conv_with_batchnorm
[
i
]:
if
conv_with_batchnorm
[
i
]:
tmp
=
layers
.
batch_norm
(
input
=
tmp
,
act
=
conv_act
,
in_place
=
True
)
tmp
=
layers
.
batch_norm
(
input
=
tmp
,
act
=
conv_act
,
in_place
=
True
)
...
@@ -240,8 +231,7 @@ def img_conv_group(input,
...
@@ -240,8 +231,7 @@ def img_conv_group(input,
pool_size
=
pool_size
,
pool_size
=
pool_size
,
pool_type
=
pool_type
,
pool_type
=
pool_type
,
pool_stride
=
pool_stride
,
pool_stride
=
pool_stride
,
use_cudnn
=
use_cudnn
,
use_cudnn
=
use_cudnn
)
use_mkldnn
=
use_mkldnn
)
return
pool_out
return
pool_out
...
...
python/paddle/fluid/tests/book/high-level-api/recognize_digits/CMakeLists.txt
浏览文件 @
91756a5a
...
@@ -2,6 +2,16 @@ file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
...
@@ -2,6 +2,16 @@ file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string
(
REPLACE
".py"
""
TEST_OPS
"
${
TEST_OPS
}
"
)
string
(
REPLACE
".py"
""
TEST_OPS
"
${
TEST_OPS
}
"
)
# default test
# default test
foreach
(
src
${
TEST_OPS
}
)
if
(
NOT APPLE
)
py_test
(
${
src
}
SRCS
${
src
}
.py
)
foreach
(
src
${
TEST_OPS
}
)
endforeach
()
py_test
(
${
src
}
SRCS
${
src
}
.py
)
endforeach
()
else
()
foreach
(
src
${
TEST_OPS
}
)
if
(
${
src
}
STREQUAL
"test_recognize_digits_conv"
)
message
(
WARNING
"These tests has been disabled in OSX for random fail:
\n
"
${
src
}
)
else
()
py_test
(
${
src
}
SRCS
${
src
}
.py
)
endif
()
endforeach
()
endif
()
python/paddle/fluid/tests/no_test_concurrency.py
已删除
100644 → 0
浏览文件 @
c5292b18
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
from
paddle.fluid
import
framework
,
unique_name
,
layer_helper
from
paddle.fluid.executor
import
Executor
from
paddle.fluid.layers
import
fill_constant
,
assign
,
While
,
elementwise_add
,
Print
class
TestRoutineOp
(
unittest
.
TestCase
):
def
test_simple_routine
(
self
):
ch
=
fluid
.
make_channel
(
dtype
=
core
.
VarDesc
.
VarType
.
LOD_TENSOR
)
# Create LOD_TENSOR<INT64> and put it into the scope. This placeholder
# variable will be filled in and returned by fluid.channel_recv
result
=
self
.
_create_tensor
(
'return_value'
,
core
.
VarDesc
.
VarType
.
LOD_TENSOR
,
core
.
VarDesc
.
VarType
.
INT64
)
with
fluid
.
Go
():
input_value
=
fill_constant
(
shape
=
[
1
],
dtype
=
core
.
VarDesc
.
VarType
.
FP64
,
value
=
1234
)
fluid
.
channel_send
(
ch
,
input_value
)
result
,
status
=
fluid
.
channel_recv
(
ch
,
result
)
fluid
.
channel_close
(
ch
)
cpu
=
core
.
CPUPlace
()
exe
=
Executor
(
cpu
)
outs
=
exe
.
run
(
fetch_list
=
[
result
])
self
.
assertEqual
(
outs
[
0
],
1234
)
def
test_daisy_chain
(
self
):
'''
Mimics classic Daisy-chain test: https://talks.golang.org/2012/concurrency.slide#39
'''
n
=
100
leftmost
=
fluid
.
make_channel
(
dtype
=
core
.
VarDesc
.
VarType
.
LOD_TENSOR
)
left
=
leftmost
# TODO(thuan): Use fluid.While() after scope capture is implemented.
# https://github.com/PaddlePaddle/Paddle/issues/8502
for
i
in
range
(
n
):
right
=
fluid
.
make_channel
(
dtype
=
core
.
VarDesc
.
VarType
.
LOD_TENSOR
)
with
fluid
.
Go
():
one_tensor
=
self
.
_create_one_dim_tensor
(
1
)
result
=
self
.
_create_tensor
(
'return_value'
,
core
.
VarDesc
.
VarType
.
LOD_TENSOR
,
core
.
VarDesc
.
VarType
.
INT64
)
result
,
status
=
fluid
.
channel_recv
(
right
,
result
)
one_added
=
fluid
.
layers
.
elementwise_add
(
x
=
one_tensor
,
y
=
result
)
fluid
.
channel_send
(
left
,
one_added
)
left
=
right
# Trigger the channel propagation by sending a "1" to rightmost channel
with
fluid
.
Go
():
one_tensor
=
self
.
_create_one_dim_tensor
(
1
)
fluid
.
channel_send
(
right
,
one_tensor
)
leftmost_result
=
self
.
_create_tensor
(
'return_value'
,
core
.
VarDesc
.
VarType
.
LOD_TENSOR
,
core
.
VarDesc
.
VarType
.
INT64
)
leftmost_result
,
status
=
fluid
.
channel_recv
(
leftmost
,
leftmost_result
)
cpu
=
core
.
CPUPlace
()
exe
=
Executor
(
cpu
)
leftmost_data
=
exe
.
run
(
fetch_list
=
[
leftmost_result
])
# The leftmost_data should be equal to the number of channels + 1
self
.
assertEqual
(
leftmost_data
[
0
][
0
],
n
+
1
)
def
_create_one_dim_tensor
(
self
,
value
):
one_dim_tensor
=
fill_constant
(
shape
=
[
1
],
dtype
=
'int'
,
value
=
value
)
one_dim_tensor
.
stop_gradient
=
True
return
one_dim_tensor
def
_create_tensor
(
self
,
name
,
type
,
dtype
):
return
framework
.
default_main_program
().
current_block
().
create_var
(
name
=
unique_name
.
generate
(
name
),
type
=
type
,
dtype
=
dtype
)
def
_create_persistable_tensor
(
self
,
name
,
type
,
dtype
):
return
framework
.
default_main_program
().
current_block
().
create_var
(
name
=
unique_name
.
generate
(
name
),
type
=
type
,
dtype
=
dtype
,
persistable
=
True
)
def
test_select
(
self
):
with
framework
.
program_guard
(
framework
.
Program
()):
ch1
=
fluid
.
make_channel
(
dtype
=
core
.
VarDesc
.
VarType
.
LOD_TENSOR
,
capacity
=
1
)
result1
=
self
.
_create_tensor
(
'return_value'
,
core
.
VarDesc
.
VarType
.
LOD_TENSOR
,
core
.
VarDesc
.
VarType
.
FP64
)
input_value
=
fill_constant
(
shape
=
[
1
],
dtype
=
core
.
VarDesc
.
VarType
.
FP64
,
value
=
10
)
with
fluid
.
Select
()
as
select
:
with
select
.
case
(
fluid
.
channel_send
,
ch1
,
input_value
):
# Execute something.
pass
with
select
.
default
():
pass
# This should not block because we are using a buffered channel.
result1
,
status
=
fluid
.
channel_recv
(
ch1
,
result1
)
fluid
.
channel_close
(
ch1
)
cpu
=
core
.
CPUPlace
()
exe
=
Executor
(
cpu
)
result
=
exe
.
run
(
fetch_list
=
[
result1
])
self
.
assertEqual
(
result
[
0
][
0
],
10
)
def
test_fibonacci
(
self
):
"""
Mimics Fibonacci Go example: https://tour.golang.org/concurrency/5
"""
with
framework
.
program_guard
(
framework
.
Program
()):
quit_ch_input_var
=
self
.
_create_persistable_tensor
(
'quit_ch_input'
,
core
.
VarDesc
.
VarType
.
LOD_TENSOR
,
core
.
VarDesc
.
VarType
.
INT32
)
quit_ch_input
=
fill_constant
(
shape
=
[
1
],
dtype
=
core
.
VarDesc
.
VarType
.
INT32
,
value
=
0
,
out
=
quit_ch_input_var
)
result
=
self
.
_create_persistable_tensor
(
'result'
,
core
.
VarDesc
.
VarType
.
LOD_TENSOR
,
core
.
VarDesc
.
VarType
.
INT32
)
fill_constant
(
shape
=
[
1
],
dtype
=
core
.
VarDesc
.
VarType
.
INT32
,
value
=
0
,
out
=
result
)
x
=
fill_constant
(
shape
=
[
1
],
dtype
=
core
.
VarDesc
.
VarType
.
INT32
,
value
=
0
)
y
=
fill_constant
(
shape
=
[
1
],
dtype
=
core
.
VarDesc
.
VarType
.
INT32
,
value
=
1
)
while_cond
=
fill_constant
(
shape
=
[
1
],
dtype
=
core
.
VarDesc
.
VarType
.
BOOL
,
value
=
True
)
while_false
=
fill_constant
(
shape
=
[
1
],
dtype
=
core
.
VarDesc
.
VarType
.
BOOL
,
value
=
False
)
x_tmp
=
fill_constant
(
shape
=
[
1
],
dtype
=
core
.
VarDesc
.
VarType
.
INT32
,
value
=
0
)
def
fibonacci
(
channel
,
quit_channel
):
while_op
=
While
(
cond
=
while_cond
)
with
while_op
.
block
():
result2
=
fill_constant
(
shape
=
[
1
],
dtype
=
core
.
VarDesc
.
VarType
.
INT32
,
value
=
0
)
with
fluid
.
Select
()
as
select
:
with
select
.
case
(
fluid
.
channel_send
,
channel
,
x
,
is_copy
=
True
):
assign
(
input
=
x
,
output
=
x_tmp
)
assign
(
input
=
y
,
output
=
x
)
assign
(
elementwise_add
(
x
=
x_tmp
,
y
=
y
),
output
=
y
)
with
select
.
case
(
fluid
.
channel_recv
,
quit_channel
,
result2
):
# Quit
helper
=
layer_helper
.
LayerHelper
(
'assign'
)
helper
.
append_op
(
type
=
'assign'
,
inputs
=
{
'X'
:
[
while_false
]},
outputs
=
{
'Out'
:
[
while_cond
]})
ch1
=
fluid
.
make_channel
(
dtype
=
core
.
VarDesc
.
VarType
.
LOD_TENSOR
)
quit_ch
=
fluid
.
make_channel
(
dtype
=
core
.
VarDesc
.
VarType
.
LOD_TENSOR
)
with
fluid
.
Go
():
for
i
in
range
(
10
):
fluid
.
channel_recv
(
ch1
,
result
)
Print
(
result
)
fluid
.
channel_send
(
quit_ch
,
quit_ch_input
)
fibonacci
(
ch1
,
quit_ch
)
fluid
.
channel_close
(
ch1
)
fluid
.
channel_close
(
quit_ch
)
cpu
=
core
.
CPUPlace
()
exe
=
Executor
(
cpu
)
exe_result
=
exe
.
run
(
fetch_list
=
[
result
])
self
.
assertEqual
(
exe_result
[
0
][
0
],
34
)
def
test_ping_pong
(
self
):
"""
Mimics Ping Pong example: https://gobyexample.com/channel-directions
"""
with
framework
.
program_guard
(
framework
.
Program
()):
result
=
self
.
_create_tensor
(
'return_value'
,
core
.
VarDesc
.
VarType
.
LOD_TENSOR
,
core
.
VarDesc
.
VarType
.
FP64
)
ping_result
=
self
.
_create_tensor
(
'ping_return_value'
,
core
.
VarDesc
.
VarType
.
LOD_TENSOR
,
core
.
VarDesc
.
VarType
.
FP64
)
def
ping
(
ch
,
message
):
fluid
.
channel_send
(
ch
,
message
,
is_copy
=
True
)
def
pong
(
ch1
,
ch2
):
fluid
.
channel_recv
(
ch1
,
ping_result
)
fluid
.
channel_send
(
ch2
,
ping_result
,
is_copy
=
True
)
pings
=
fluid
.
make_channel
(
dtype
=
core
.
VarDesc
.
VarType
.
LOD_TENSOR
,
capacity
=
1
)
pongs
=
fluid
.
make_channel
(
dtype
=
core
.
VarDesc
.
VarType
.
LOD_TENSOR
,
capacity
=
1
)
msg
=
fill_constant
(
shape
=
[
1
],
dtype
=
core
.
VarDesc
.
VarType
.
FP64
,
value
=
9
)
ping
(
pings
,
msg
)
pong
(
pings
,
pongs
)
fluid
.
channel_recv
(
pongs
,
result
)
fluid
.
channel_close
(
pings
)
fluid
.
channel_close
(
pongs
)
cpu
=
core
.
CPUPlace
()
exe
=
Executor
(
cpu
)
exe_result
=
exe
.
run
(
fetch_list
=
[
result
])
self
.
assertEqual
(
exe_result
[
0
][
0
],
9
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
91756a5a
...
@@ -28,7 +28,6 @@ list(REMOVE_ITEM TEST_OPS test_cond_op) # FIXME(qijun): https://github.com/Paddl
...
@@ -28,7 +28,6 @@ list(REMOVE_ITEM TEST_OPS test_cond_op) # FIXME(qijun): https://github.com/Paddl
list
(
REMOVE_ITEM TEST_OPS op_test
)
# op_test is a helper python file, not a test
list
(
REMOVE_ITEM TEST_OPS op_test
)
# op_test is a helper python file, not a test
list
(
REMOVE_ITEM TEST_OPS decorators
)
# decorators is a helper python file, not a test
list
(
REMOVE_ITEM TEST_OPS decorators
)
# decorators is a helper python file, not a test
if
(
APPLE
)
if
(
APPLE
)
if
(
NOT WITH_DISTRIBUTE
)
if
(
NOT WITH_DISTRIBUTE
)
list
(
REMOVE_ITEM TEST_OPS test_desc_clone
)
list
(
REMOVE_ITEM TEST_OPS test_desc_clone
)
...
@@ -77,11 +76,13 @@ if(WITH_DISTRIBUTE)
...
@@ -77,11 +76,13 @@ if(WITH_DISTRIBUTE)
if
(
NOT APPLE
)
if
(
NOT APPLE
)
set_tests_properties
(
test_dist_mnist PROPERTIES TIMEOUT 200
)
set_tests_properties
(
test_dist_mnist PROPERTIES TIMEOUT 200
)
set_tests_properties
(
test_dist_word2vec PROPERTIES TIMEOUT 200
)
set_tests_properties
(
test_dist_word2vec PROPERTIES TIMEOUT 200
)
py_test_modules
(
test_dist_se_resnext MODULES test_dist_se_resnext SERIAL
)
py_test_modules
(
test_dist_se_resnext MODULES test_dist_se_resnext
)
set_tests_properties
(
test_dist_se_resnext PROPERTIES TIMEOUT 1000
)
# TODO: fix this test
#py_test_modules(test_dist_transformer MODULES test_dist_transformer)
#set_tests_properties(test_dist_transformer PROPERTIES TIMEOUT 1000)
endif
(
NOT APPLE
)
endif
(
NOT APPLE
)
py_test_modules
(
test_dist_transpiler MODULES test_dist_transpiler
)
py_test_modules
(
test_dist_transpiler MODULES test_dist_transpiler
)
#FIXME(gongwb): random fails.
#py_test_modules(test_dist_transformer MODULES test_dist_transformer SERIAL)
endif
()
endif
()
py_test_modules
(
test_parallel_executor_crf MODULES test_parallel_executor_crf SERIAL
)
py_test_modules
(
test_parallel_executor_crf MODULES test_parallel_executor_crf SERIAL
)
py_test_modules
(
test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL
)
py_test_modules
(
test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL
)
...
...
python/paddle/fluid/tests/unittests/dist_se_resnext.py
浏览文件 @
91756a5a
...
@@ -247,7 +247,7 @@ class DistSeResneXt2x2(TestDistRunnerBase):
...
@@ -247,7 +247,7 @@ class DistSeResneXt2x2(TestDistRunnerBase):
# Reader
# Reader
train_reader
=
paddle
.
batch
(
train_reader
=
paddle
.
batch
(
paddle
.
dataset
.
flowers
.
t
rain
(
),
batch_size
=
batch_size
)
paddle
.
dataset
.
flowers
.
t
est
(
use_xmap
=
False
),
batch_size
=
batch_size
)
test_reader
=
paddle
.
batch
(
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
flowers
.
test
(
use_xmap
=
False
),
batch_size
=
batch_size
)
paddle
.
dataset
.
flowers
.
test
(
use_xmap
=
False
),
batch_size
=
batch_size
)
...
...
python/paddle/fluid/tests/unittests/test_conv2d_op.py
浏览文件 @
91756a5a
...
@@ -67,6 +67,7 @@ class TestConv2dOp(OpTest):
...
@@ -67,6 +67,7 @@ class TestConv2dOp(OpTest):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
op_type
=
"conv2d"
self
.
op_type
=
"conv2d"
self
.
use_cudnn
=
False
self
.
use_cudnn
=
False
self
.
use_cuda
=
False
self
.
use_mkldnn
=
False
self
.
use_mkldnn
=
False
self
.
data_format
=
"AnyLayout"
self
.
data_format
=
"AnyLayout"
self
.
dtype
=
np
.
float32
self
.
dtype
=
np
.
float32
...
@@ -101,24 +102,25 @@ class TestConv2dOp(OpTest):
...
@@ -101,24 +102,25 @@ class TestConv2dOp(OpTest):
}
}
self
.
outputs
=
{
'Output'
:
output
}
self
.
outputs
=
{
'Output'
:
output
}
def
testcudnn
(
self
):
def
testcuda
(
self
):
return
core
.
is_compiled_with_cuda
()
and
self
.
use_cudnn
return
core
.
is_compiled_with_cuda
()
and
(
self
.
use_cudnn
or
self
.
use_cuda
)
def
test_check_output
(
self
):
def
test_check_output
(
self
):
place
=
core
.
CUDAPlace
(
0
)
if
self
.
testcud
nn
()
else
core
.
CPUPlace
()
place
=
core
.
CUDAPlace
(
0
)
if
self
.
testcud
a
()
else
core
.
CPUPlace
()
self
.
check_output_with_place
(
place
,
atol
=
1e-5
)
self
.
check_output_with_place
(
place
,
atol
=
1e-5
)
def
test_check_grad
(
self
):
def
test_check_grad
(
self
):
if
self
.
dtype
==
np
.
float16
:
if
self
.
dtype
==
np
.
float16
:
return
return
place
=
core
.
CUDAPlace
(
0
)
if
self
.
testcud
nn
()
else
core
.
CPUPlace
()
place
=
core
.
CUDAPlace
(
0
)
if
self
.
testcud
a
()
else
core
.
CPUPlace
()
self
.
check_grad_with_place
(
self
.
check_grad_with_place
(
place
,
set
([
'Input'
,
'Filter'
]),
'Output'
,
max_relative_error
=
0.02
)
place
,
set
([
'Input'
,
'Filter'
]),
'Output'
,
max_relative_error
=
0.02
)
def
test_check_grad_no_filter
(
self
):
def
test_check_grad_no_filter
(
self
):
if
self
.
dtype
==
np
.
float16
:
if
self
.
dtype
==
np
.
float16
:
return
return
place
=
core
.
CUDAPlace
(
0
)
if
self
.
testcud
nn
()
else
core
.
CPUPlace
()
place
=
core
.
CUDAPlace
(
0
)
if
self
.
testcud
a
()
else
core
.
CPUPlace
()
self
.
check_grad_with_place
(
self
.
check_grad_with_place
(
place
,
[
'Input'
],
place
,
[
'Input'
],
'Output'
,
'Output'
,
...
@@ -128,7 +130,7 @@ class TestConv2dOp(OpTest):
...
@@ -128,7 +130,7 @@ class TestConv2dOp(OpTest):
def
test_check_grad_no_input
(
self
):
def
test_check_grad_no_input
(
self
):
if
self
.
dtype
==
np
.
float16
:
if
self
.
dtype
==
np
.
float16
:
return
return
place
=
core
.
CUDAPlace
(
0
)
if
self
.
testcud
nn
()
else
core
.
CPUPlace
()
place
=
core
.
CUDAPlace
(
0
)
if
self
.
testcud
a
()
else
core
.
CPUPlace
()
self
.
check_grad_with_place
(
self
.
check_grad_with_place
(
place
,
[
'Filter'
],
place
,
[
'Filter'
],
'Output'
,
'Output'
,
...
@@ -325,18 +327,33 @@ class TestFP16CUDNNWithInput1x1Filter1x1(TestWithInput1x1Filter1x1):
...
@@ -325,18 +327,33 @@ class TestFP16CUDNNWithInput1x1Filter1x1(TestWithInput1x1Filter1x1):
class
TestDepthwiseConv
(
TestConv2dOp
):
class
TestDepthwiseConv
(
TestConv2dOp
):
def
init_test_case
(
self
):
def
init_test_case
(
self
):
self
.
use_cuda
=
True
self
.
pad
=
[
1
,
1
]
self
.
pad
=
[
1
,
1
]
self
.
stride
=
[
2
,
2
]
self
.
stride
=
[
2
,
2
]
self
.
input_size
=
[
2
,
3
,
5
,
5
]
# NCHW
self
.
input_size
=
[
2
,
3
,
5
,
5
]
# NCHW
self
.
groups
=
3
self
.
groups
=
3
assert
np
.
mod
(
self
.
input_size
[
1
],
self
.
groups
)
==
0
assert
np
.
mod
(
self
.
input_size
[
1
],
self
.
groups
)
==
0
f_c
=
self
.
input_size
[
1
]
//
self
.
groups
f_c
=
self
.
input_size
[
1
]
//
self
.
groups
self
.
filter_size
=
[
6
,
f_c
,
3
,
3
]
self
.
filter_size
=
[
3
,
f_c
,
3
,
3
]
self
.
op_type
=
"depthwise_conv2d"
self
.
op_type
=
"depthwise_conv2d"
class
TestDepthwiseConv2
(
TestConv2dOp
):
class
TestDepthwiseConv2
(
TestConv2dOp
):
def
init_test_case
(
self
):
def
init_test_case
(
self
):
self
.
use_cuda
=
True
self
.
pad
=
[
1
,
1
]
self
.
stride
=
[
1
,
1
]
self
.
input_size
=
[
2
,
3
,
5
,
5
]
# NCHW
self
.
groups
=
3
assert
np
.
mod
(
self
.
input_size
[
1
],
self
.
groups
)
==
0
f_c
=
self
.
input_size
[
1
]
//
self
.
groups
self
.
filter_size
=
[
3
,
f_c
,
3
,
3
]
self
.
op_type
=
"depthwise_conv2d"
class
TestDepthwiseConv3
(
TestConv2dOp
):
def
init_test_case
(
self
):
self
.
use_cuda
=
True
self
.
pad
=
[
1
,
1
]
self
.
pad
=
[
1
,
1
]
self
.
stride
=
[
1
,
1
]
self
.
stride
=
[
1
,
1
]
self
.
input_size
=
[
2
,
3
,
5
,
5
]
# NCHW
self
.
input_size
=
[
2
,
3
,
5
,
5
]
# NCHW
...
@@ -347,6 +364,34 @@ class TestDepthwiseConv2(TestConv2dOp):
...
@@ -347,6 +364,34 @@ class TestDepthwiseConv2(TestConv2dOp):
self
.
op_type
=
"depthwise_conv2d"
self
.
op_type
=
"depthwise_conv2d"
class
TestDepthwiseConvWithDilation
(
TestConv2dOp
):
def
init_test_case
(
self
):
self
.
use_cuda
=
True
self
.
pad
=
[
1
,
1
]
self
.
stride
=
[
2
,
2
]
self
.
input_size
=
[
2
,
3
,
5
,
5
]
# NCHW
self
.
groups
=
3
self
.
dilations
=
[
2
,
2
]
assert
np
.
mod
(
self
.
input_size
[
1
],
self
.
groups
)
==
0
f_c
=
self
.
input_size
[
1
]
//
self
.
groups
self
.
filter_size
=
[
6
,
f_c
,
3
,
3
]
self
.
op_type
=
"depthwise_conv2d"
class
TestDepthwiseConvWithDilation2
(
TestConv2dOp
):
def
init_test_case
(
self
):
self
.
use_cuda
=
True
self
.
pad
=
[
1
,
1
]
self
.
stride
=
[
1
,
1
]
self
.
input_size
=
[
2
,
3
,
5
,
5
]
# NCHW
self
.
groups
=
3
self
.
dilations
=
[
2
,
2
]
assert
np
.
mod
(
self
.
input_size
[
1
],
self
.
groups
)
==
0
f_c
=
self
.
input_size
[
1
]
//
self
.
groups
self
.
filter_size
=
[
6
,
f_c
,
3
,
3
]
self
.
op_type
=
"depthwise_conv2d"
# Please Don't remove the following code.
# Please Don't remove the following code.
# Currently, CI use cudnn V5.0 which not support dilation conv.
# Currently, CI use cudnn V5.0 which not support dilation conv.
# class TestCUDNNWithDilation(TestWithDilation):
# class TestCUDNNWithDilation(TestWithDilation):
...
...
python/paddle/fluid/tests/unittests/test_dist_base.py
浏览文件 @
91756a5a
...
@@ -50,9 +50,7 @@ class TestDistRunnerBase(object):
...
@@ -50,9 +50,7 @@ class TestDistRunnerBase(object):
def
run_pserver
(
self
,
args
):
def
run_pserver
(
self
,
args
):
self
.
get_model
(
batch_size
=
2
)
self
.
get_model
(
batch_size
=
2
)
# NOTE: pserver should not call memory optimize
if
args
.
mem_opt
:
fluid
.
memory_optimize
(
fluid
.
default_main_program
())
t
=
self
.
get_transpiler
(
args
.
trainer_id
,
t
=
self
.
get_transpiler
(
args
.
trainer_id
,
fluid
.
default_main_program
(),
args
.
endpoints
,
fluid
.
default_main_program
(),
args
.
endpoints
,
args
.
trainers
,
args
.
sync_mode
)
args
.
trainers
,
args
.
sync_mode
)
...
@@ -70,7 +68,7 @@ class TestDistRunnerBase(object):
...
@@ -70,7 +68,7 @@ class TestDistRunnerBase(object):
self
.
get_model
(
batch_size
=
2
)
self
.
get_model
(
batch_size
=
2
)
if
args
.
mem_opt
:
if
args
.
mem_opt
:
fluid
.
memory_optimize
(
fluid
.
default_main_program
())
fluid
.
memory_optimize
(
fluid
.
default_main_program
()
,
skip_grads
=
True
)
if
args
.
is_dist
:
if
args
.
is_dist
:
t
=
self
.
get_transpiler
(
args
.
trainer_id
,
t
=
self
.
get_transpiler
(
args
.
trainer_id
,
fluid
.
default_main_program
(),
fluid
.
default_main_program
(),
...
@@ -166,6 +164,17 @@ class TestDistBase(unittest.TestCase):
...
@@ -166,6 +164,17 @@ class TestDistBase(unittest.TestCase):
def
_setup_config
(
self
):
def
_setup_config
(
self
):
raise
NotImplementedError
(
"tests should have _setup_config implemented"
)
raise
NotImplementedError
(
"tests should have _setup_config implemented"
)
def
_after_setup_config
(
self
):
if
self
.
_enforce_place
==
"CPU"
:
self
.
__use_cuda
=
False
elif
self
.
_enforce_place
==
"GPU"
:
self
.
__use_cuda
=
True
else
:
if
fluid
.
core
.
is_compiled_with_cuda
():
self
.
__use_cuda
=
True
else
:
self
.
__use_cuda
=
False
def
setUp
(
self
):
def
setUp
(
self
):
self
.
_trainers
=
2
self
.
_trainers
=
2
self
.
_pservers
=
2
self
.
_pservers
=
2
...
@@ -173,11 +182,12 @@ class TestDistBase(unittest.TestCase):
...
@@ -173,11 +182,12 @@ class TestDistBase(unittest.TestCase):
self
.
_find_free_port
(),
self
.
_find_free_port
())
self
.
_find_free_port
(),
self
.
_find_free_port
())
self
.
_python_interp
=
"python"
self
.
_python_interp
=
"python"
self
.
_sync_mode
=
True
self
.
_sync_mode
=
True
self
.
_
use_cuda
=
Tru
e
self
.
_
enforce_place
=
Non
e
self
.
_mem_opt
=
False
self
.
_mem_opt
=
False
self
.
_use_reduce
=
False
self
.
_use_reduce
=
False
self
.
_use_reader_alloc
=
True
self
.
_use_reader_alloc
=
True
self
.
_setup_config
()
self
.
_setup_config
()
self
.
_after_setup_config
()
def
_find_free_port
(
self
):
def
_find_free_port
(
self
):
with
closing
(
socket
.
socket
(
socket
.
AF_INET
,
socket
.
SOCK_STREAM
))
as
s
:
with
closing
(
socket
.
socket
(
socket
.
AF_INET
,
socket
.
SOCK_STREAM
))
as
s
:
...
@@ -201,13 +211,10 @@ class TestDistBase(unittest.TestCase):
...
@@ -201,13 +211,10 @@ class TestDistBase(unittest.TestCase):
ps0_cmd
+=
" --mem_opt"
ps0_cmd
+=
" --mem_opt"
ps1_cmd
+=
" --mem_opt"
ps1_cmd
+=
" --mem_opt"
ps0_pipe
=
subprocess
.
PIPE
print
(
ps0_cmd
)
ps1_pipe
=
subprocess
.
PIPE
print
(
ps1_cmd
)
if
check_error_log
:
ps0_pipe
=
open
(
"/tmp/ps0_err.log"
,
"wb"
)
print
(
ps0_cmd
)
ps1_pipe
=
open
(
"/tmp/ps1_err.log"
,
"wb"
)
print
(
ps1_cmd
)
ps0_pipe
=
open
(
"/tmp/ps0_err.log"
,
"wb"
)
ps1_pipe
=
open
(
"/tmp/ps1_err.log"
,
"wb"
)
ps0_proc
=
subprocess
.
Popen
(
ps0_proc
=
subprocess
.
Popen
(
ps0_cmd
.
strip
().
split
(
" "
),
ps0_cmd
.
strip
().
split
(
" "
),
...
@@ -220,10 +227,7 @@ class TestDistBase(unittest.TestCase):
...
@@ -220,10 +227,7 @@ class TestDistBase(unittest.TestCase):
stderr
=
ps1_pipe
,
stderr
=
ps1_pipe
,
env
=
required_envs
)
env
=
required_envs
)
if
not
check_error_log
:
return
ps0_proc
,
ps1_proc
,
ps0_pipe
,
ps1_pipe
return
ps0_proc
,
ps1_proc
,
None
,
None
else
:
return
ps0_proc
,
ps1_proc
,
ps0_pipe
,
ps1_pipe
def
_wait_ps_ready
(
self
,
pid
):
def
_wait_ps_ready
(
self
,
pid
):
retry_times
=
50
retry_times
=
50
...
@@ -244,7 +248,7 @@ class TestDistBase(unittest.TestCase):
...
@@ -244,7 +248,7 @@ class TestDistBase(unittest.TestCase):
cmd
=
"%s %s --role trainer"
%
(
self
.
_python_interp
,
model
)
cmd
=
"%s %s --role trainer"
%
(
self
.
_python_interp
,
model
)
if
self
.
_use_cuda
:
if
self
.
_
_
use_cuda
:
cmd
+=
" --use_cuda"
cmd
+=
" --use_cuda"
env_local
=
{
"CUDA_VISIBLE_DEVICES"
:
"0"
}
env_local
=
{
"CUDA_VISIBLE_DEVICES"
:
"0"
}
else
:
else
:
...
@@ -252,7 +256,7 @@ class TestDistBase(unittest.TestCase):
...
@@ -252,7 +256,7 @@ class TestDistBase(unittest.TestCase):
envs
.
update
(
env_local
)
envs
.
update
(
env_local
)
if
not
check_error_log
:
if
check_error_log
:
err_log
=
open
(
"/tmp/trainer.err.log"
,
"wb"
)
err_log
=
open
(
"/tmp/trainer.err.log"
,
"wb"
)
local_proc
=
subprocess
.
Popen
(
local_proc
=
subprocess
.
Popen
(
cmd
.
split
(
" "
),
cmd
.
split
(
" "
),
...
@@ -266,7 +270,6 @@ class TestDistBase(unittest.TestCase):
...
@@ -266,7 +270,6 @@ class TestDistBase(unittest.TestCase):
stderr
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
env
=
envs
)
env
=
envs
)
local_proc
.
wait
()
local_out
,
local_err
=
local_proc
.
communicate
()
local_out
,
local_err
=
local_proc
.
communicate
()
local_ret
=
cpt
.
to_text
(
local_out
)
local_ret
=
cpt
.
to_text
(
local_out
)
...
@@ -307,7 +310,7 @@ class TestDistBase(unittest.TestCase):
...
@@ -307,7 +310,7 @@ class TestDistBase(unittest.TestCase):
if
self
.
_use_reader_alloc
:
if
self
.
_use_reader_alloc
:
tr0_cmd
+=
" --use_reader_alloc"
tr0_cmd
+=
" --use_reader_alloc"
tr1_cmd
+=
" --use_reader_alloc"
tr1_cmd
+=
" --use_reader_alloc"
if
self
.
_use_cuda
:
if
self
.
_
_
use_cuda
:
tr0_cmd
+=
" --use_cuda"
tr0_cmd
+=
" --use_cuda"
tr1_cmd
+=
" --use_cuda"
tr1_cmd
+=
" --use_cuda"
env0
=
{
"CUDA_VISIBLE_DEVICES"
:
"0"
}
env0
=
{
"CUDA_VISIBLE_DEVICES"
:
"0"
}
...
@@ -319,15 +322,10 @@ class TestDistBase(unittest.TestCase):
...
@@ -319,15 +322,10 @@ class TestDistBase(unittest.TestCase):
env0
.
update
(
envs
)
env0
.
update
(
envs
)
env1
.
update
(
envs
)
env1
.
update
(
envs
)
FNULL
=
open
(
os
.
devnull
,
'w'
)
print
(
"tr0_cmd:{}, env0: {}"
.
format
(
tr0_cmd
,
env0
))
print
(
"tr1_cmd:{}, env1: {}"
.
format
(
tr1_cmd
,
env1
))
tr0_pipe
=
subprocess
.
PIPE
tr0_pipe
=
open
(
"/tmp/tr0_err.log"
,
"wb"
)
tr1_pipe
=
subprocess
.
PIPE
tr1_pipe
=
open
(
"/tmp/tr1_err.log"
,
"wb"
)
if
check_error_log
:
print
(
"tr0_cmd:{}, env0: {}"
.
format
(
tr0_cmd
,
env0
))
print
(
"tr1_cmd:{}, env1: {}"
.
format
(
tr1_cmd
,
env1
))
tr0_pipe
=
open
(
"/tmp/tr0_err.log"
,
"wb"
)
tr1_pipe
=
open
(
"/tmp/tr1_err.log"
,
"wb"
)
tr0_proc
=
subprocess
.
Popen
(
tr0_proc
=
subprocess
.
Popen
(
tr0_cmd
.
strip
().
split
(
" "
),
tr0_cmd
.
strip
().
split
(
" "
),
...
@@ -340,29 +338,22 @@ class TestDistBase(unittest.TestCase):
...
@@ -340,29 +338,22 @@ class TestDistBase(unittest.TestCase):
stderr
=
tr1_pipe
,
stderr
=
tr1_pipe
,
env
=
env1
)
env
=
env1
)
tr0_proc
.
wait
()
tr1_proc
.
wait
()
tr0_out
,
tr0_err
=
tr0_proc
.
communicate
()
tr0_out
,
tr0_err
=
tr0_proc
.
communicate
()
tr0_loss_text
=
cpt
.
to_text
(
tr0_out
)
tr0_loss_text
=
cpt
.
to_text
(
tr0_out
)
tr1_out
,
tr1_err
=
tr1_proc
.
communicate
()
tr1_out
,
tr1_err
=
tr1_proc
.
communicate
()
tr1_loss_text
=
cpt
.
to_text
(
tr1_out
)
tr1_loss_text
=
cpt
.
to_text
(
tr1_out
)
# close trainer file
# close trainer file
if
check_error_log
:
tr0_pipe
.
close
()
tr0_pipe
.
close
()
tr1_pipe
.
close
()
tr1_pipe
.
close
()
ps0_pipe
.
close
()
ps0_pipe
.
close
()
ps1_pipe
.
close
()
ps1_pipe
.
close
()
# FIXME: use terminate() instead of sigkill.
# FIXME: use terminate() instead of sigkill.
os
.
kill
(
ps0
.
pid
,
signal
.
SIGKILL
)
os
.
kill
(
ps0
.
pid
,
signal
.
SIGKILL
)
os
.
kill
(
ps1
.
pid
,
signal
.
SIGKILL
)
os
.
kill
(
ps1
.
pid
,
signal
.
SIGKILL
)
ps0
.
terminate
()
ps0
.
terminate
()
ps1
.
terminate
()
ps1
.
terminate
()
ps0
.
wait
()
ps1
.
wait
()
FNULL
.
close
()
# print log
# print log
sys
.
stderr
.
write
(
'trainer 0 stdout:
\n
%s
\n
'
%
tr0_loss_text
)
sys
.
stderr
.
write
(
'trainer 0 stdout:
\n
%s
\n
'
%
tr0_loss_text
)
...
@@ -387,6 +378,7 @@ class TestDistBase(unittest.TestCase):
...
@@ -387,6 +378,7 @@ class TestDistBase(unittest.TestCase):
"LD_LIBRARY_PATH"
:
os
.
getenv
(
"LD_LIBRARY_PATH"
,
""
),
"LD_LIBRARY_PATH"
:
os
.
getenv
(
"LD_LIBRARY_PATH"
,
""
),
"FLAGS_fraction_of_gpu_memory_to_use"
:
"0.15"
,
"FLAGS_fraction_of_gpu_memory_to_use"
:
"0.15"
,
"FLAGS_cudnn_deterministic"
:
"1"
,
"FLAGS_cudnn_deterministic"
:
"1"
,
"http_proxy"
:
""
}
}
required_envs
.
update
(
need_envs
)
required_envs
.
update
(
need_envs
)
...
...
python/paddle/fluid/tests/unittests/test_dist_ctr.py
浏览文件 @
91756a5a
...
@@ -21,10 +21,11 @@ from test_dist_base import TestDistBase
...
@@ -21,10 +21,11 @@ from test_dist_base import TestDistBase
class
TestDistCTR2x2
(
TestDistBase
):
class
TestDistCTR2x2
(
TestDistBase
):
def
_setup_config
(
self
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
True
self
.
_sync_mode
=
True
self
.
_
use_cuda
=
False
self
.
_
enforce_place
=
"CPU"
def
test_dist_ctr
(
self
):
self
.
check_with_place
(
"dist_ctr.py"
,
delta
=
1e-7
,
check_error_log
=
False
)
def
test_dist_ctr
(
self
):
self
.
check_with_place
(
"dist_ctr.py"
,
delta
=
1e-7
,
check_error_log
=
False
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
python/paddle/fluid/tests/unittests/test_dist_se_resnext.py
浏览文件 @
91756a5a
...
@@ -26,14 +26,13 @@ class TestDistSeResneXt2x2(TestDistBase):
...
@@ -26,14 +26,13 @@ class TestDistSeResneXt2x2(TestDistBase):
self
.
check_with_place
(
"dist_se_resnext.py"
,
delta
=
100
)
self
.
check_with_place
(
"dist_se_resnext.py"
,
delta
=
100
)
# TODO(typhoonzero): fix this test
class
TestDistseResnXt2x2WithMemopt
(
TestDistBase
):
# class TestDistseResnXt2x2WithMemopt(TestDistBase):
def
_setup_config
(
self
):
# def _setup_config(self):
self
.
_sync_mode
=
True
# self._sync_mode = True
self
.
_mem_opt
=
True
# self._mem_opt = True
def
test_dist_train
(
self
):
# def test_dist_train(self):
self
.
check_with_place
(
"dist_se_resnext.py"
,
delta
=
100
)
# self.check_with_place("dist_se_resnext.py", delta=1e-7)
class
TestDistSeResneXt2x2Async
(
TestDistBase
):
class
TestDistSeResneXt2x2Async
(
TestDistBase
):
...
...
python/paddle/fluid/tests/unittests/test_dist_simnet_bow.py
浏览文件 @
91756a5a
...
@@ -22,7 +22,7 @@ from test_dist_base import TestDistBase
...
@@ -22,7 +22,7 @@ from test_dist_base import TestDistBase
class
TestDistSimnetBowDense2x2
(
TestDistBase
):
class
TestDistSimnetBowDense2x2
(
TestDistBase
):
def
_setup_config
(
self
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
True
self
.
_sync_mode
=
True
self
.
_
use_cuda
=
False
self
.
_
enforce_place
=
"CPU"
def
test_simnet_bow
(
self
):
def
test_simnet_bow
(
self
):
need_envs
=
{
"IS_DISTRIBUTED"
:
'0'
,
"IS_SPARSE"
:
'0'
}
need_envs
=
{
"IS_DISTRIBUTED"
:
'0'
,
"IS_SPARSE"
:
'0'
}
...
@@ -36,7 +36,7 @@ class TestDistSimnetBowDense2x2(TestDistBase):
...
@@ -36,7 +36,7 @@ class TestDistSimnetBowDense2x2(TestDistBase):
class
TestDistSimnetBow2x2DenseAsync
(
TestDistBase
):
class
TestDistSimnetBow2x2DenseAsync
(
TestDistBase
):
def
_setup_config
(
self
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
False
self
.
_sync_mode
=
False
self
.
_
use_cuda
=
False
self
.
_
enforce_place
=
"CPU"
def
test_simnet_bow
(
self
):
def
test_simnet_bow
(
self
):
need_envs
=
{
"IS_DISTRIBUTED"
:
'0'
,
"IS_SPARSE"
:
'0'
}
need_envs
=
{
"IS_DISTRIBUTED"
:
'0'
,
"IS_SPARSE"
:
'0'
}
...
@@ -50,7 +50,7 @@ class TestDistSimnetBow2x2DenseAsync(TestDistBase):
...
@@ -50,7 +50,7 @@ class TestDistSimnetBow2x2DenseAsync(TestDistBase):
class
TestDistSimnetBowSparse2x2
(
TestDistBase
):
class
TestDistSimnetBowSparse2x2
(
TestDistBase
):
def
_setup_config
(
self
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
True
self
.
_sync_mode
=
True
self
.
_
use_cuda
=
False
self
.
_
enforce_place
=
"CPU"
def
test_simnet_bow
(
self
):
def
test_simnet_bow
(
self
):
need_envs
=
{
"IS_DISTRIBUTED"
:
'0'
,
"IS_SPARSE"
:
'1'
}
need_envs
=
{
"IS_DISTRIBUTED"
:
'0'
,
"IS_SPARSE"
:
'1'
}
...
@@ -64,7 +64,7 @@ class TestDistSimnetBowSparse2x2(TestDistBase):
...
@@ -64,7 +64,7 @@ class TestDistSimnetBowSparse2x2(TestDistBase):
class
TestDistSimnetBow2x2SparseAsync
(
TestDistBase
):
class
TestDistSimnetBow2x2SparseAsync
(
TestDistBase
):
def
_setup_config
(
self
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
False
self
.
_sync_mode
=
False
self
.
_
use_cuda
=
False
self
.
_
enforce_place
=
"CPU"
def
test_simnet_bow
(
self
):
def
test_simnet_bow
(
self
):
need_envs
=
{
"IS_DISTRIBUTED"
:
'0'
,
"IS_SPARSE"
:
'1'
}
need_envs
=
{
"IS_DISTRIBUTED"
:
'0'
,
"IS_SPARSE"
:
'1'
}
...
...
python/paddle/fluid/tests/unittests/test_dist_text_classification.py
浏览文件 @
91756a5a
...
@@ -21,7 +21,7 @@ from test_dist_base import TestDistBase
...
@@ -21,7 +21,7 @@ from test_dist_base import TestDistBase
class
TestDistTextClassification2x2
(
TestDistBase
):
class
TestDistTextClassification2x2
(
TestDistBase
):
def
_setup_config
(
self
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
True
self
.
_sync_mode
=
True
self
.
_
use_cuda
=
False
self
.
_
enforce_place
=
"CPU"
def
test_text_classification
(
self
):
def
test_text_classification
(
self
):
self
.
check_with_place
(
"dist_text_classification.py"
,
delta
=
1e-6
)
self
.
check_with_place
(
"dist_text_classification.py"
,
delta
=
1e-6
)
...
@@ -30,7 +30,7 @@ class TestDistTextClassification2x2(TestDistBase):
...
@@ -30,7 +30,7 @@ class TestDistTextClassification2x2(TestDistBase):
class
TestDistTextClassification2x2Async
(
TestDistBase
):
class
TestDistTextClassification2x2Async
(
TestDistBase
):
def
_setup_config
(
self
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
False
self
.
_sync_mode
=
False
self
.
_
use_cuda
=
False
self
.
_
enforce_place
=
"CPU"
def
test_se_resnext
(
self
):
def
test_se_resnext
(
self
):
self
.
check_with_place
(
"dist_text_classification.py"
,
delta
=
100
)
self
.
check_with_place
(
"dist_text_classification.py"
,
delta
=
100
)
...
...
python/paddle/fluid/tests/unittests/test_layers.py
浏览文件 @
91756a5a
...
@@ -825,6 +825,15 @@ class TestBook(unittest.TestCase):
...
@@ -825,6 +825,15 @@ class TestBook(unittest.TestCase):
self
.
assertIsNotNone
(
out
)
self
.
assertIsNotNone
(
out
)
print
(
str
(
program
))
print
(
str
(
program
))
def
iou_similarity
(
self
):
program
=
Program
()
with
program_guard
(
program
):
x
=
layers
.
data
(
name
=
"x"
,
shape
=
[
16
],
dtype
=
"float32"
)
y
=
layers
.
data
(
name
=
"y"
,
shape
=
[
16
],
dtype
=
"float32"
)
out
=
layers
.
iou_similarity
(
x
,
y
,
name
=
'iou_similarity'
)
self
.
assertIsNotNone
(
out
)
print
(
str
(
program
))
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_operator_desc.py
浏览文件 @
91756a5a
...
@@ -69,7 +69,7 @@ class TestOperator(unittest.TestCase):
...
@@ -69,7 +69,7 @@ class TestOperator(unittest.TestCase):
set
(
mul_op
.
attr_names
),
set
(
mul_op
.
attr_names
),
set
([
set
([
"x_num_col_dims"
,
"y_num_col_dims"
,
"op_role"
,
"op_role_var"
,
"x_num_col_dims"
,
"y_num_col_dims"
,
"op_role"
,
"op_role_var"
,
"op_namescope"
,
"op_callstack"
"op_namescope"
]))
]))
self
.
assertEqual
(
mul_op
.
has_attr
(
"x_num_col_dims"
),
True
)
self
.
assertEqual
(
mul_op
.
has_attr
(
"x_num_col_dims"
),
True
)
self
.
assertEqual
(
mul_op
.
attr_type
(
"x_num_col_dims"
),
core
.
AttrType
.
INT
)
self
.
assertEqual
(
mul_op
.
attr_type
(
"x_num_col_dims"
),
core
.
AttrType
.
INT
)
...
...
python/paddle/fluid/tests/unittests/test_pass_builder.py
0 → 100644
浏览文件 @
91756a5a
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
import
numpy
as
np
import
unittest
import
os
import
sys
import
math
def
simple_fc_net
():
img
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
784
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
hidden
=
img
for
_
in
range
(
4
):
hidden
=
fluid
.
layers
.
fc
(
hidden
,
size
=
200
,
act
=
'tanh'
,
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
1.0
)))
prediction
=
fluid
.
layers
.
fc
(
hidden
,
size
=
10
,
act
=
'softmax'
)
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
loss
=
fluid
.
layers
.
mean
(
loss
)
return
loss
class
TestPassBuilder
(
unittest
.
TestCase
):
def
check_network_convergence
(
self
,
use_cuda
,
build_strategy
=
None
):
os
.
environ
[
'CPU_NUM'
]
=
str
(
4
)
main
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main
,
startup
):
loss
=
simple_fc_net
()
test_program
=
main
.
clone
(
for_test
=
True
)
opt
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
0.001
)
opt
.
minimize
(
loss
)
batch_size
=
32
image
=
np
.
random
.
normal
(
size
=
(
batch_size
,
784
)).
astype
(
'float32'
)
label
=
np
.
random
.
randint
(
0
,
10
,
(
batch_size
,
1
),
dtype
=
"int64"
)
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
startup
)
feed_dict
=
{
'image'
:
image
,
'label'
:
label
}
train_exe
=
fluid
.
ParallelExecutor
(
use_cuda
=
use_cuda
,
loss_name
=
loss
.
name
,
main_program
=
main
,
build_strategy
=
build_strategy
)
test_exe
=
fluid
.
ParallelExecutor
(
use_cuda
=
use_cuda
,
main_program
=
test_program
,
share_vars_from
=
train_exe
,
build_strategy
=
build_strategy
)
for
i
in
range
(
5
):
test_loss
,
=
test_exe
.
run
([
loss
.
name
],
feed
=
feed_dict
)
train_loss
,
=
train_exe
.
run
([
loss
.
name
],
feed
=
feed_dict
)
avg_test_loss_val
=
np
.
array
(
test_loss
).
mean
()
if
math
.
isnan
(
float
(
avg_test_loss_val
)):
sys
.
exit
(
"got NaN loss, testing failed."
)
avg_train_loss_val
=
np
.
array
(
train_loss
).
mean
()
if
math
.
isnan
(
float
(
avg_train_loss_val
)):
sys
.
exit
(
"got NaN loss, training failed."
)
self
.
assertTrue
(
np
.
allclose
(
train_loss
,
test_loss
,
atol
=
1e-8
),
"Train loss: "
+
str
(
train_loss
)
+
"
\n
Test loss:"
+
str
(
test_loss
))
def
test_parallel_testing_with_new_strategy
(
self
):
build_strategy
=
fluid
.
BuildStrategy
()
pass_builder
=
build_strategy
.
_create_passes_from_strategy
()
origin_len
=
len
(
pass_builder
.
all_passes
())
viz_pass
=
pass_builder
.
append_pass
(
"graph_viz_pass"
)
self
.
assertEqual
(
origin_len
+
1
,
len
(
pass_builder
.
all_passes
()))
pass_builder
.
insert_pass
(
len
(
pass_builder
.
all_passes
()),
"graph_viz_pass"
)
self
.
assertEqual
(
origin_len
+
2
,
len
(
pass_builder
.
all_passes
()))
pass_builder
.
remove_pass
(
len
(
pass_builder
.
all_passes
())
-
1
)
self
.
assertEqual
(
origin_len
+
1
,
len
(
pass_builder
.
all_passes
()))
viz_pass
.
set_str
(
"graph_viz_path"
,
"/tmp/test_viz_pass"
)
self
.
check_network_convergence
(
use_cuda
=
core
.
is_compiled_with_cuda
(),
build_strategy
=
build_strategy
)
try
:
os
.
stat
(
"/tmp/test_viz_pass"
)
except
os
.
error
:
self
.
assertFalse
(
True
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/transformer_model.py
浏览文件 @
91756a5a
...
@@ -246,6 +246,7 @@ def prepare_encoder(src_word,
...
@@ -246,6 +246,7 @@ def prepare_encoder(src_word,
padding_idx
=
pos_pad_idx
,
padding_idx
=
pos_pad_idx
,
param_attr
=
fluid
.
ParamAttr
(
param_attr
=
fluid
.
ParamAttr
(
name
=
pos_enc_param_name
,
trainable
=
False
))
name
=
pos_enc_param_name
,
trainable
=
False
))
src_pos_enc
.
stop_gradient
=
True
enc_input
=
src_word_emb
+
src_pos_enc
enc_input
=
src_word_emb
+
src_pos_enc
# FIXME(guosheng): Decouple the program desc with batch_size.
# FIXME(guosheng): Decouple the program desc with batch_size.
...
...
python/paddle/fluid/transpiler/__init__.py
浏览文件 @
91756a5a
...
@@ -20,6 +20,10 @@ from .memory_optimization_transpiler import memory_optimize, release_memory
...
@@ -20,6 +20,10 @@ from .memory_optimization_transpiler import memory_optimize, release_memory
from
.ps_dispatcher
import
HashName
,
RoundRobin
from
.ps_dispatcher
import
HashName
,
RoundRobin
__all__
=
[
__all__
=
[
"DistributeTranspiler"
,
"memory_optimize"
,
"release_memory"
,
"HashName"
,
"DistributeTranspiler"
,
"RoundRobin"
,
"DistributeTranspilerConfig"
"memory_optimize"
,
"release_memory"
,
"HashName"
,
"RoundRobin"
,
"DistributeTranspilerConfig"
,
]
]
python/paddle/fluid/transpiler/memory_optimization_transpiler.py
浏览文件 @
91756a5a
...
@@ -14,10 +14,10 @@
...
@@ -14,10 +14,10 @@
from
__future__
import
print_function
from
__future__
import
print_function
from
collections
import
defaultdict
,
OrderedDict
,
Callable
from
collections
import
defaultdict
,
MutableSet
from
..
import
core
from
..
import
core
from
...
import
compat
as
cpt
from
...
import
compat
as
cpt
from
..framework
import
Program
,
default_main_program
,
Parameter
,
Variable
from
..framework
import
Program
,
default_main_program
,
Parameter
,
Variable
,
core
from
..backward
import
_rename_arg_
from
..backward
import
_rename_arg_
from
functools
import
reduce
from
functools
import
reduce
from
six.moves
import
range
from
six.moves
import
range
...
@@ -44,17 +44,82 @@ SUB_BLOCK_PAIR = [("while", "while_grad"), ("parallel_do", "parallel_do_grad"),
...
@@ -44,17 +44,82 @@ SUB_BLOCK_PAIR = [("while", "while_grad"), ("parallel_do", "parallel_do_grad"),
PRINT_LOG
=
False
PRINT_LOG
=
False
class
OrderedSet
(
MutableSet
):
def
__init__
(
self
,
iterable
=
None
):
self
.
end
=
end
=
[]
end
+=
[
None
,
end
,
end
]
# sentinel node for doubly linked list
self
.
map
=
{}
# key --> [key, prev, next]
if
iterable
is
not
None
:
self
|=
iterable
def
__len__
(
self
):
return
len
(
self
.
map
)
def
__contains__
(
self
,
key
):
return
key
in
self
.
map
def
add
(
self
,
key
):
if
key
not
in
self
.
map
:
end
=
self
.
end
curr
=
end
[
1
]
curr
[
2
]
=
end
[
1
]
=
self
.
map
[
key
]
=
[
key
,
curr
,
end
]
def
update
(
self
,
other
):
for
e
in
other
:
self
.
add
(
e
)
def
discard
(
self
,
key
):
if
key
in
self
.
map
:
key
,
prev
,
next
=
self
.
map
.
pop
(
key
)
prev
[
2
]
=
next
next
[
1
]
=
prev
def
remove
(
self
,
key
):
self
.
discard
(
key
)
def
__iter__
(
self
):
end
=
self
.
end
curr
=
end
[
2
]
while
curr
is
not
end
:
yield
curr
[
0
]
curr
=
curr
[
2
]
def
__reversed__
(
self
):
end
=
self
.
end
curr
=
end
[
1
]
while
curr
is
not
end
:
yield
curr
[
0
]
curr
=
curr
[
1
]
def
pop
(
self
,
last
=
True
):
if
not
self
:
raise
KeyError
(
'set is empty'
)
key
=
self
.
end
[
1
][
0
]
if
last
else
self
.
end
[
2
][
0
]
self
.
discard
(
key
)
return
key
def
__repr__
(
self
):
if
not
self
:
return
'%s()'
%
(
self
.
__class__
.
__name__
,
)
return
'%s(%r)'
%
(
self
.
__class__
.
__name__
,
list
(
self
))
def
__eq__
(
self
,
other
):
if
isinstance
(
other
,
OrderedSet
):
return
len
(
self
)
==
len
(
other
)
and
list
(
self
)
==
list
(
other
)
return
set
(
self
)
==
set
(
other
)
class
ControlFlowGraph
(
object
):
class
ControlFlowGraph
(
object
):
def
__init__
(
self
,
program
,
ops
,
forward_num
,
skip_opt
):
def
__init__
(
self
,
program
,
ops
,
forward_num
,
skip_opt
):
self
.
_program
=
program
self
.
_program
=
program
self
.
_ops
=
ops
self
.
_ops
=
ops
self
.
_forward_num
=
forward_num
self
.
_forward_num
=
forward_num
self
.
_successors
=
defaultdict
(
s
et
)
self
.
_successors
=
defaultdict
(
OrderedS
et
)
self
.
_presuccessors
=
defaultdict
(
s
et
)
self
.
_presuccessors
=
defaultdict
(
OrderedS
et
)
self
.
_uses
=
defaultdict
(
s
et
)
self
.
_uses
=
defaultdict
(
OrderedS
et
)
self
.
_defs
=
defaultdict
(
s
et
)
self
.
_defs
=
defaultdict
(
OrderedS
et
)
self
.
_live_in
=
defaultdict
(
s
et
)
self
.
_live_in
=
defaultdict
(
OrderedS
et
)
self
.
_live_out
=
defaultdict
(
s
et
)
self
.
_live_out
=
defaultdict
(
OrderedS
et
)
self
.
_skip_opt
=
skip_opt
self
.
_skip_opt
=
skip_opt
self
.
pool
=
[]
self
.
pool
=
[]
...
@@ -116,7 +181,7 @@ class ControlFlowGraph(object):
...
@@ -116,7 +181,7 @@ class ControlFlowGraph(object):
# NOTE: must sort the in_diff set for cases that get different cache var.
# NOTE: must sort the in_diff set for cases that get different cache var.
# FIXME(typhoonzero): maybe use a "sorted set" is better than this.
# FIXME(typhoonzero): maybe use a "sorted set" is better than this.
can_optimize
=
[
can_optimize
=
[
x
for
x
in
sorted
(
list
(
in_diff
))
x
for
x
in
in_diff
if
self
.
_check_var_validity
(
block_desc
,
x
,
is_forward
)
if
self
.
_check_var_validity
(
block_desc
,
x
,
is_forward
)
]
]
if
can_optimize
:
if
can_optimize
:
...
@@ -224,7 +289,7 @@ class ControlFlowGraph(object):
...
@@ -224,7 +289,7 @@ class ControlFlowGraph(object):
if
self
.
pool
:
if
self
.
pool
:
# NOTE: must sort the in_diff set for cases that get different cache var.
# NOTE: must sort the in_diff set for cases that get different cache var.
defs_can_optimize
=
[
defs_can_optimize
=
[
x
for
x
in
s
orted
(
list
(
self
.
_defs
[
i
]))
x
for
x
in
s
elf
.
_defs
[
i
]
if
self
.
_check_var_validity
(
block_desc
,
x
,
is_forward
)
if
self
.
_check_var_validity
(
block_desc
,
x
,
is_forward
)
]
]
out_pair
=
[
out_pair
=
[
...
@@ -381,7 +446,19 @@ def _get_cfgs(input_program):
...
@@ -381,7 +446,19 @@ def _get_cfgs(input_program):
return
cfgs
return
cfgs
def
memory_optimize
(
input_program
,
skip_opt_set
=
None
,
print_log
=
False
,
level
=
0
):
def
_is_opt_role_op
(
op
):
op_maker
=
core
.
op_proto_and_checker_maker
optimize_role
=
core
.
op_proto_and_checker_maker
.
OpRole
.
Optimize
if
op_maker
.
kOpRoleAttrName
()
in
op
.
attr_names
and
\
int
(
op
.
all_attrs
()[
op_maker
.
kOpRoleAttrName
()])
==
int
(
optimize_role
):
return
True
def
memory_optimize
(
input_program
,
skip_opt_set
=
None
,
print_log
=
False
,
level
=
0
,
skip_grads
=
False
):
"""Optimize memory by reusing var memory.
"""Optimize memory by reusing var memory.
Note: it doesn't not support subblock nested in subblock.
Note: it doesn't not support subblock nested in subblock.
...
@@ -398,6 +475,19 @@ def memory_optimize(input_program, skip_opt_set=None, print_log=False, level=0):
...
@@ -398,6 +475,19 @@ def memory_optimize(input_program, skip_opt_set=None, print_log=False, level=0):
raise
ValueError
(
"only support opt_level 0 or 1."
)
raise
ValueError
(
"only support opt_level 0 or 1."
)
global
PRINT_LOG
global
PRINT_LOG
PRINT_LOG
=
print_log
PRINT_LOG
=
print_log
if
skip_grads
:
grad_set
=
set
()
OP_ROLE_VAR
=
core
.
op_proto_and_checker_maker
.
kOpRoleVarAttrName
()
for
op
in
input_program
.
global_block
().
ops
:
if
_is_opt_role_op
(
op
):
if
op
.
attr
(
OP_ROLE_VAR
):
grad_name
=
op
.
attr
(
OP_ROLE_VAR
)[
1
]
grad_set
.
add
(
grad_name
)
if
not
skip_opt_set
:
skip_opt_set
=
grad_set
else
:
skip_opt_set
.
update
(
grad_set
)
cfgs
=
_get_cfgs
(
input_program
)
cfgs
=
_get_cfgs
(
input_program
)
for
cfg
in
cfgs
:
for
cfg
in
cfgs
:
cfg
.
memory_optimize
(
skip_opt_set
=
skip_opt_set
,
level
=
level
)
cfg
.
memory_optimize
(
skip_opt_set
=
skip_opt_set
,
level
=
level
)
...
...
python/setup.py.in
浏览文件 @
91756a5a
...
@@ -106,6 +106,7 @@ packages=['paddle',
...
@@ -106,6 +106,7 @@ packages=['paddle',
'paddle.fluid.layers',
'paddle.fluid.layers',
'paddle.fluid.contrib',
'paddle.fluid.contrib',
'paddle.fluid.contrib.decoder',
'paddle.fluid.contrib.decoder',
'paddle.fluid.contrib.quantize',
'paddle.fluid.transpiler',
'paddle.fluid.transpiler',
'paddle.fluid.transpiler.details']
'paddle.fluid.transpiler.details']
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录