Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
4743c9cd
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
4743c9cd
编写于
12月 27, 2018
作者:
Y
Yancey1989
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of github.com:PaddlePaddle/Paddle into parallel_graph_mode
上级
8cad371a
4048cfa9
变更
101
展开全部
显示空白变更内容
内联
并排
Showing
101 changed file
with
2266 addition
and
1057 deletion
+2266
-1057
Dockerfile
Dockerfile
+38
-38
cmake/external/mkldnn.cmake
cmake/external/mkldnn.cmake
+2
-2
cmake/inference_lib.cmake
cmake/inference_lib.cmake
+1
-1
cmake/simd.cmake
cmake/simd.cmake
+35
-38
paddle/contrib/float16/float16_transpiler.py
paddle/contrib/float16/float16_transpiler.py
+1
-1
paddle/fluid/API.spec
paddle/fluid/API.spec
+1
-5
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+21
-25
paddle/fluid/framework/data_device_transform_test.cu
paddle/fluid/framework/data_device_transform_test.cu
+1
-0
paddle/fluid/framework/details/eager_deletion_op_handle.cc
paddle/fluid/framework/details/eager_deletion_op_handle.cc
+1
-1
paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc
...uid/framework/details/fast_threaded_ssa_graph_executor.cc
+1
-0
paddle/fluid/framework/details/multi_devices_graph_pass.cc
paddle/fluid/framework/details/multi_devices_graph_pass.cc
+199
-208
paddle/fluid/framework/details/multi_devices_graph_pass.h
paddle/fluid/framework/details/multi_devices_graph_pass.h
+11
-8
paddle/fluid/framework/details/variable_visitor.cc
paddle/fluid/framework/details/variable_visitor.cc
+2
-2
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+1
-1
paddle/fluid/framework/ir/CMakeLists.txt
paddle/fluid/framework/ir/CMakeLists.txt
+1
-0
paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc
paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc
+222
-0
paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.h
paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.h
+49
-0
paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc
...fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc
+14
-11
paddle/fluid/framework/ir/graph.cc
paddle/fluid/framework/ir/graph.cc
+0
-58
paddle/fluid/framework/ir/graph_pattern_detector.cc
paddle/fluid/framework/ir/graph_pattern_detector.cc
+77
-7
paddle/fluid/framework/ir/graph_pattern_detector.h
paddle/fluid/framework/ir/graph_pattern_detector.h
+32
-0
paddle/fluid/framework/mixed_vector.h
paddle/fluid/framework/mixed_vector.h
+5
-5
paddle/fluid/framework/op_proto_maker.cc
paddle/fluid/framework/op_proto_maker.cc
+0
-4
paddle/fluid/framework/op_proto_maker.h
paddle/fluid/framework/op_proto_maker.h
+0
-1
paddle/fluid/framework/op_registry.h
paddle/fluid/framework/op_registry.h
+2
-1
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+25
-66
paddle/fluid/framework/operator.h
paddle/fluid/framework/operator.h
+9
-3
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+2
-3
paddle/fluid/framework/scope.cc
paddle/fluid/framework/scope.cc
+1
-3
paddle/fluid/framework/scope_pool.cc
paddle/fluid/framework/scope_pool.cc
+54
-0
paddle/fluid/framework/scope_pool.h
paddle/fluid/framework/scope_pool.h
+46
-0
paddle/fluid/framework/var_type.h
paddle/fluid/framework/var_type.h
+20
-22
paddle/fluid/framework/var_type_inference_test.cc
paddle/fluid/framework/var_type_inference_test.cc
+1
-1
paddle/fluid/framework/var_type_traits.cc
paddle/fluid/framework/var_type_traits.cc
+119
-0
paddle/fluid/framework/var_type_traits.h
paddle/fluid/framework/var_type_traits.h
+195
-0
paddle/fluid/framework/var_type_traits_test.cc
paddle/fluid/framework/var_type_traits_test.cc
+120
-0
paddle/fluid/framework/variable.h
paddle/fluid/framework/variable.h
+35
-31
paddle/fluid/framework/variable_test.cc
paddle/fluid/framework/variable_test.cc
+12
-11
paddle/fluid/inference/analysis/analyzer_tester.cc
paddle/fluid/inference/analysis/analyzer_tester.cc
+4
-4
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+1
-0
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+2
-0
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
...id/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+24
-4
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+7
-3
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+1
-0
paddle/fluid/inference/api/details/reset_tensor_array.cc
paddle/fluid/inference/api/details/reset_tensor_array.cc
+1
-1
paddle/fluid/inference/api/details/reset_tensor_array.h
paddle/fluid/inference/api/details/reset_tensor_array.h
+6
-3
paddle/fluid/inference/api/helper.h
paddle/fluid/inference/api/helper.h
+10
-0
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+12
-1
paddle/fluid/inference/api/paddle_pass_builder.h
paddle/fluid/inference/api/paddle_pass_builder.h
+7
-5
paddle/fluid/inference/tests/api/CMakeLists.txt
paddle/fluid/inference/tests/api/CMakeLists.txt
+4
-0
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
+1
-3
paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
+9
-30
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
+11
-27
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
...le/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
+15
-61
paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
...le/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
+117
-0
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+39
-20
paddle/fluid/inference/tests/test.cmake
paddle/fluid/inference/tests/test.cmake
+5
-3
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+1
-1
paddle/fluid/operators/clip_by_norm_op.h
paddle/fluid/operators/clip_by_norm_op.h
+1
-1
paddle/fluid/operators/controlflow/while_op.cc
paddle/fluid/operators/controlflow/while_op.cc
+3
-4
paddle/fluid/operators/conv_fusion_op.cu.cc
paddle/fluid/operators/conv_fusion_op.cu.cc
+3
-5
paddle/fluid/operators/cudnn_lstm_op.cu.cc
paddle/fluid/operators/cudnn_lstm_op.cu.cc
+4
-237
paddle/fluid/operators/cudnn_rnn_cache.h
paddle/fluid/operators/cudnn_rnn_cache.h
+255
-0
paddle/fluid/operators/cum_op.h
paddle/fluid/operators/cum_op.h
+2
-0
paddle/fluid/operators/detail/safe_ref.h
paddle/fluid/operators/detail/safe_ref.h
+1
-1
paddle/fluid/operators/distributed/proto_encoder_helper.h
paddle/fluid/operators/distributed/proto_encoder_helper.h
+3
-1
paddle/fluid/operators/distributed_ops/CMakeLists.txt
paddle/fluid/operators/distributed_ops/CMakeLists.txt
+1
-1
paddle/fluid/operators/distributed_ops/split_ids_op.h
paddle/fluid/operators/distributed_ops/split_ids_op.h
+1
-1
paddle/fluid/operators/elementwise/elementwise_mul_op.h
paddle/fluid/operators/elementwise/elementwise_mul_op.h
+1
-1
paddle/fluid/operators/lrn_mkldnn_op.cc
paddle/fluid/operators/lrn_mkldnn_op.cc
+4
-4
paddle/fluid/operators/optimizers/adadelta_op.h
paddle/fluid/operators/optimizers/adadelta_op.h
+4
-2
paddle/fluid/operators/optimizers/adagrad_op.h
paddle/fluid/operators/optimizers/adagrad_op.h
+2
-1
paddle/fluid/operators/optimizers/adam_op.h
paddle/fluid/operators/optimizers/adam_op.h
+2
-1
paddle/fluid/operators/optimizers/adamax_op.h
paddle/fluid/operators/optimizers/adamax_op.h
+4
-2
paddle/fluid/operators/optimizers/decayed_adagrad_op.h
paddle/fluid/operators/optimizers/decayed_adagrad_op.h
+4
-2
paddle/fluid/operators/optimizers/ftrl_op.h
paddle/fluid/operators/optimizers/ftrl_op.h
+4
-2
paddle/fluid/operators/optimizers/momentum_op.h
paddle/fluid/operators/optimizers/momentum_op.h
+1
-1
paddle/fluid/operators/optimizers/sgd_op.cu
paddle/fluid/operators/optimizers/sgd_op.cu
+2
-1
paddle/fluid/operators/split_lod_tensor_op.cc
paddle/fluid/operators/split_lod_tensor_op.cc
+1
-1
paddle/fluid/operators/sum_mkldnn_op.cc
paddle/fluid/operators/sum_mkldnn_op.cc
+1
-1
paddle/fluid/operators/sum_op.cc
paddle/fluid/operators/sum_op.cc
+1
-1
paddle/fluid/operators/sum_op.h
paddle/fluid/operators/sum_op.h
+1
-1
paddle/fluid/platform/enforce.h
paddle/fluid/platform/enforce.h
+60
-28
paddle/fluid/platform/enforce_test.cc
paddle/fluid/platform/enforce_test.cc
+19
-0
paddle/fluid/platform/float16_test.cc
paddle/fluid/platform/float16_test.cc
+1
-0
paddle/fluid/platform/float16_test.cu
paddle/fluid/platform/float16_test.cu
+1
-0
paddle/fluid/pybind/CMakeLists.txt
paddle/fluid/pybind/CMakeLists.txt
+1
-1
paddle/fluid/pybind/const_value.cc
paddle/fluid/pybind/const_value.cc
+0
-3
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+15
-2
paddle/fluid/string/printf.h
paddle/fluid/string/printf.h
+1
-1
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+10
-8
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+1
-1
python/paddle/fluid/executor.py
python/paddle/fluid/executor.py
+1
-1
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+0
-5
python/paddle/fluid/parallel_executor.py
python/paddle/fluid/parallel_executor.py
+2
-2
python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py
.../image_classification/test_image_classification_resnet.py
+7
-5
python/paddle/fluid/tests/unittests/test_dist_se_resnext.py
python/paddle/fluid/tests/unittests/test_dist_se_resnext.py
+15
-0
python/paddle/fluid/tests/unittests/test_operator_desc.py
python/paddle/fluid/tests/unittests/test_operator_desc.py
+1
-1
python/paddle/fluid/tests/unittests/test_py_func_op.py
python/paddle/fluid/tests/unittests/test_py_func_op.py
+3
-3
python/paddle/fluid/tests/unittests/test_weight_decay.py
python/paddle/fluid/tests/unittests/test_weight_decay.py
+188
-0
python/paddle/fluid/transpiler/inference_transpiler.py
python/paddle/fluid/transpiler/inference_transpiler.py
+1
-1
未找到文件。
Dockerfile
浏览文件 @
4743c9cd
...
@@ -94,52 +94,52 @@ RUN localedef -i en_US -f UTF-8 en_US.UTF-8
...
@@ -94,52 +94,52 @@ RUN localedef -i en_US -f UTF-8 en_US.UTF-8
# specify sphinx version as 1.5.6 and remove -U option for [pip install -U
# specify sphinx version as 1.5.6 and remove -U option for [pip install -U
# sphinx-rtd-theme] since -U option will cause sphinx being updated to newest
# sphinx-rtd-theme] since -U option will cause sphinx being updated to newest
# version(1.7.1 for now), which causes building documentation failed.
# version(1.7.1 for now), which causes building documentation failed.
RUN
pip3
install
-U
wheel
&&
\
RUN
pip3
--no-cache-dir
install
-U
wheel
&&
\
pip3
install
-U
docopt PyYAML
sphinx
==
1.5.6
&&
\
pip3
--no-cache-dir
install
-U
docopt PyYAML
sphinx
==
1.5.6
&&
\
pip3
install
sphinx-rtd-theme
==
0.1.9 recommonmark
&&
\
pip3
--no-cache-dir
install
sphinx-rtd-theme
==
0.1.9 recommonmark
&&
\
pip3.6
install
-U
wheel
&&
\
pip3.6
--no-cache-dir
install
-U
wheel
&&
\
pip3.6
install
-U
docopt PyYAML
sphinx
==
1.5.6
&&
\
pip3.6
--no-cache-dir
install
-U
docopt PyYAML
sphinx
==
1.5.6
&&
\
pip3.6
install
sphinx-rtd-theme
==
0.1.9 recommonmark
&&
\
pip3.6
--no-cache-dir
install
sphinx-rtd-theme
==
0.1.9 recommonmark
&&
\
pip3.7
install
-U
wheel
&&
\
pip3.7
--no-cache-dir
install
-U
wheel
&&
\
pip3.7
install
-U
docopt PyYAML
sphinx
==
1.5.6
&&
\
pip3.7
--no-cache-dir
install
-U
docopt PyYAML
sphinx
==
1.5.6
&&
\
pip3.7
install
sphinx-rtd-theme
==
0.1.9 recommonmark
&&
\
pip3.7
--no-cache-dir
install
sphinx-rtd-theme
==
0.1.9 recommonmark
&&
\
easy_install
-U
pip
&&
\
easy_install
-U
pip
&&
\
pip
install
-U
pip setuptools wheel
&&
\
pip
--no-cache-dir
install
-U
pip setuptools wheel
&&
\
pip
install
-U
docopt PyYAML
sphinx
==
1.5.6
&&
\
pip
--no-cache-dir
install
-U
docopt PyYAML
sphinx
==
1.5.6
&&
\
pip
install
sphinx-rtd-theme
==
0.1.9 recommonmark
pip
--no-cache-dir
install
sphinx-rtd-theme
==
0.1.9 recommonmark
RUN
pip3
install
'pre-commit==1.10.4'
'ipython==5.3.0'
&&
\
RUN
pip3
--no-cache-dir
install
'pre-commit==1.10.4'
'ipython==5.3.0'
&&
\
pip3
install
'ipykernel==4.6.0'
'jupyter==1.0.0'
&&
\
pip3
--no-cache-dir
install
'ipykernel==4.6.0'
'jupyter==1.0.0'
&&
\
pip3
install
opencv-python
&&
\
pip3
--no-cache-dir
install
opencv-python
&&
\
pip3.6
install
'pre-commit==1.10.4'
'ipython==5.3.0'
&&
\
pip3.6
--no-cache-dir
install
'pre-commit==1.10.4'
'ipython==5.3.0'
&&
\
pip3.6
install
'ipykernel==4.6.0'
'jupyter==1.0.0'
&&
\
pip3.6
--no-cache-dir
install
'ipykernel==4.6.0'
'jupyter==1.0.0'
&&
\
pip3.6
install
opencv-python
&&
\
pip3.6
--no-cache-dir
install
opencv-python
&&
\
pip3.7
install
'pre-commit==1.10.4'
'ipython==5.3.0'
&&
\
pip3.7
--no-cache-dir
install
'pre-commit==1.10.4'
'ipython==5.3.0'
&&
\
pip3.7
install
'ipykernel==4.6.0'
'jupyter==1.0.0'
&&
\
pip3.7
--no-cache-dir
install
'ipykernel==4.6.0'
'jupyter==1.0.0'
&&
\
pip3.7
install
opencv-python
&&
\
pip3.7
--no-cache-dir
install
opencv-python
&&
\
pip
install
'pre-commit==1.10.4'
'ipython==5.3.0'
&&
\
pip
--no-cache-dir
install
'pre-commit==1.10.4'
'ipython==5.3.0'
&&
\
pip
install
'ipykernel==4.6.0'
'jupyter==1.0.0'
&&
\
pip
--no-cache-dir
install
'ipykernel==4.6.0'
'jupyter==1.0.0'
&&
\
pip
install
opencv-python
pip
--no-cache-dir
install
opencv-python
#For docstring checker
#For docstring checker
RUN
pip3
install
pylint pytest astroid isort
RUN
pip3
--no-cache-dir
install
pylint pytest astroid isort
RUN
pip3.6
install
pylint pytest astroid isort
RUN
pip3.6
--no-cache-dir
install
pylint pytest astroid isort
RUN
pip3.7
install
pylint pytest astroid isort
RUN
pip3.7
--no-cache-dir
install
pylint pytest astroid isort
RUN
pip
install
pylint pytest astroid isort LinkChecker
RUN
pip
--no-cache-dir
install
pylint pytest astroid isort LinkChecker
COPY
./python/requirements.txt /root/
COPY
./python/requirements.txt /root/
RUN
pip3
install
-r
/root/requirements.txt
RUN
pip3
--no-cache-dir
install
-r
/root/requirements.txt
RUN
pip3.6
install
-r
/root/requirements.txt
RUN
pip3.6
--no-cache-dir
install
-r
/root/requirements.txt
RUN
pip3.7
install
-r
/root/requirements.txt
RUN
pip3.7
--no-cache-dir
install
-r
/root/requirements.txt
RUN
pip
install
-r
/root/requirements.txt
RUN
pip
--no-cache-dir
install
-r
/root/requirements.txt
# To fix https://github.com/PaddlePaddle/Paddle/issues/1954, we use
# To fix https://github.com/PaddlePaddle/Paddle/issues/1954, we use
# the solution in https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl-py2
# the solution in https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl-py2
RUN
apt-get
install
-y
libssl-dev libffi-dev
RUN
apt-get
install
-y
libssl-dev libffi-dev
&&
apt-get clean
-y
RUN
pip3
install
certifi urllib3[secure]
RUN
pip3
--no-cache-dir
install
certifi urllib3[secure]
RUN
pip3.6
install
certifi urllib3[secure]
RUN
pip3.6
--no-cache-dir
install
certifi urllib3[secure]
RUN
pip3.7
install
certifi urllib3[secure]
RUN
pip3.7
--no-cache-dir
install
certifi urllib3[secure]
RUN
pip
install
certifi urllib3[secure]
RUN
pip
--no-cache-dir
install
certifi urllib3[secure]
# Install woboq_codebrowser to /woboq
# Install woboq_codebrowser to /woboq
...
...
cmake/external/mkldnn.cmake
浏览文件 @
4743c9cd
...
@@ -106,10 +106,10 @@ else(WIN32)
...
@@ -106,10 +106,10 @@ else(WIN32)
SET
(
MKLDNN_SHARED_LIB
${
MKLDNN_INSTALL_DIR
}
/libmkldnn.so.0
)
SET
(
MKLDNN_SHARED_LIB
${
MKLDNN_INSTALL_DIR
}
/libmkldnn.so.0
)
ADD_CUSTOM_COMMAND
(
OUTPUT
${
MKLDNN_SHARED_LIB
}
ADD_CUSTOM_COMMAND
(
OUTPUT
${
MKLDNN_SHARED_LIB
}
COMMAND
${
CMAKE_COMMAND
}
-E copy
${
MKLDNN_LIB
}
${
MKLDNN_SHARED_LIB
}
COMMAND
${
CMAKE_COMMAND
}
-E copy
${
MKLDNN_LIB
}
${
MKLDNN_SHARED_LIB
}
DEPENDS mkldnn
)
DEPENDS mkldnn
shared_mkldnn
)
endif
(
WIN32
)
endif
(
WIN32
)
ADD_CUSTOM_TARGET
(
mkldnn_shared_lib ALL DEPENDS
${
MKLDNN_SHARED_LIB
}
)
ADD_CUSTOM_TARGET
(
mkldnn_shared_lib ALL DEPENDS
${
MKLDNN_SHARED_LIB
}
)
ADD_DEPENDENCIES
(
mkldnn_shared_lib
${
MKLDNN_PROJECT
}
mkldnn
)
IF
(
WITH_C_API
)
IF
(
WITH_C_API
)
INSTALL
(
FILES
${
MKLDNN_SHARED_LIB
}
DESTINATION lib
)
INSTALL
(
FILES
${
MKLDNN_SHARED_LIB
}
DESTINATION lib
)
ENDIF
()
ENDIF
()
...
...
cmake/inference_lib.cmake
浏览文件 @
4743c9cd
...
@@ -136,7 +136,7 @@ if (WITH_MKLDNN)
...
@@ -136,7 +136,7 @@ if (WITH_MKLDNN)
copy
(
mkldnn_lib
copy
(
mkldnn_lib
SRCS
${
MKLDNN_INC_DIR
}
${
MKLDNN_SHARED_LIB
}
SRCS
${
MKLDNN_INC_DIR
}
${
MKLDNN_SHARED_LIB
}
DSTS
${
dst_dir
}
${
dst_dir
}
/lib
DSTS
${
dst_dir
}
${
dst_dir
}
/lib
DEPS mkldnn
DEPS mkldnn
_shared_lib
)
)
endif
()
endif
()
...
...
cmake/simd.cmake
浏览文件 @
4743c9cd
...
@@ -57,46 +57,43 @@ int main()
...
@@ -57,46 +57,43 @@ int main()
return 0;
return 0;
}"
SSE3_FOUND
)
}"
SSE3_FOUND
)
# disable AVX by default on windows
# Check AVX
if
(
NOT WIN32
)
set
(
CMAKE_REQUIRED_FLAGS
${
AVX_FLAG
}
)
# Check AVX
set
(
AVX_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
set
(
CMAKE_REQUIRED_FLAGS
${
AVX_FLAG
}
)
CHECK_CXX_SOURCE_RUNS
(
"
set
(
AVX_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
#include <immintrin.h>
CHECK_CXX_SOURCE_RUNS
(
"
int main()
#include <immintrin.h>
{
int main()
{
__m256 a = _mm256_set_ps (-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f);
__m256 a = _mm256_set_ps (-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f);
__m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
__m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
__m256 result = _mm256_add_ps (a, b);
__m256 result = _mm256_add_ps (a, b);
return 0;
return 0;
}"
AVX_FOUND
)
}"
AVX_FOUND
)
# Check AVX 2
# Check AVX 2
set
(
CMAKE_REQUIRED_FLAGS
${
AVX2_FLAG
}
)
set
(
CMAKE_REQUIRED_FLAGS
${
AVX2_FLAG
}
)
set
(
AVX2_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
set
(
AVX2_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
#include <immintrin.h>
int main()
int main()
{
{
__m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
__m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
__m256i result = _mm256_abs_epi32 (a);
__m256i result = _mm256_abs_epi32 (a);
return 0;
return 0;
}"
AVX2_FOUND
)
}"
AVX2_FOUND
)
# Check AVX512F
# Check AVX512F
set
(
CMAKE_REQUIRED_FLAGS
${
AVX512F_FLAG
}
)
set
(
CMAKE_REQUIRED_FLAGS
${
AVX512F_FLAG
}
)
set
(
AVX512F_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
set
(
AVX512F_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
#include <immintrin.h>
int main()
int main()
{
{
__m512i a = _mm512_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4,
__m512i a = _mm512_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4,
13, -5, 6, -7, 9, 2, -6, 3);
13, -5, 6, -7, 9, 2, -6, 3);
__m512i result = _mm512_abs_epi32 (a);
__m512i result = _mm512_abs_epi32 (a);
return 0;
return 0;
}"
AVX512F_FOUND
)
}"
AVX512F_FOUND
)
endif
(
NOT WIN32
)
set
(
CMAKE_REQUIRED_FLAGS
${
CMAKE_REQUIRED_FLAGS_RETAINED
}
)
set
(
CMAKE_REQUIRED_FLAGS
${
CMAKE_REQUIRED_FLAGS_RETAINED
}
)
mark_as_advanced
(
MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND AVX512F_FOUND
)
mark_as_advanced
(
MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND AVX512F_FOUND
)
paddle/contrib/float16/float16_transpiler.py
浏览文件 @
4743c9cd
...
@@ -60,7 +60,7 @@ class Float16Transpiler:
...
@@ -60,7 +60,7 @@ class Float16Transpiler:
raise
TypeError
(
"place should be as CPUPlace/CUDAPlace type"
)
raise
TypeError
(
"place should be as CPUPlace/CUDAPlace type"
)
if
scope
is
None
:
if
scope
is
None
:
scope
=
global_scope
()
scope
=
global_scope
()
if
not
isinstance
(
scope
,
core
.
Scope
):
if
not
isinstance
(
scope
,
core
.
_
Scope
):
raise
TypeError
(
"scope should be as Scope type or None"
)
raise
TypeError
(
"scope should be as Scope type or None"
)
self
.
scope
=
scope
self
.
scope
=
scope
...
...
paddle/fluid/API.spec
浏览文件 @
4743c9cd
...
@@ -464,11 +464,7 @@ paddle.fluid.unique_name.switch ArgSpec(args=['new_generator'], varargs=None, ke
...
@@ -464,11 +464,7 @@ paddle.fluid.unique_name.switch ArgSpec(args=['new_generator'], varargs=None, ke
paddle.fluid.unique_name.guard ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
paddle.fluid.unique_name.guard ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
paddle.fluid.recordio_writer.convert_reader_to_recordio_file ArgSpec(args=['filename', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None))
paddle.fluid.recordio_writer.convert_reader_to_recordio_file ArgSpec(args=['filename', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None))
paddle.fluid.recordio_writer.convert_reader_to_recordio_files ArgSpec(args=['filename', 'batch_per_file', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None))
paddle.fluid.recordio_writer.convert_reader_to_recordio_files ArgSpec(args=['filename', 'batch_per_file', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None))
paddle.fluid.Scope.__init__ __init__(self: paddle.fluid.core.Scope) -> None
paddle.fluid.Scope Scope() -> paddle.fluid.core._Scope
paddle.fluid.Scope.drop_kids drop_kids(self: paddle.fluid.core.Scope) -> None
paddle.fluid.Scope.find_var find_var(self: paddle.fluid.core.Scope, arg0: unicode) -> paddle.fluid.core.Variable
paddle.fluid.Scope.new_scope new_scope(self: paddle.fluid.core.Scope) -> paddle.fluid.core.Scope
paddle.fluid.Scope.var var(self: paddle.fluid.core.Scope, arg0: unicode) -> paddle.fluid.core.Variable
paddle.reader.map_readers ArgSpec(args=['func'], varargs='readers', keywords=None, defaults=None)
paddle.reader.map_readers ArgSpec(args=['func'], varargs='readers', keywords=None, defaults=None)
paddle.reader.buffered ArgSpec(args=['reader', 'size'], varargs=None, keywords=None, defaults=None)
paddle.reader.buffered ArgSpec(args=['reader', 'size'], varargs=None, keywords=None, defaults=None)
paddle.reader.compose ArgSpec(args=[], varargs='readers', keywords='kwargs', defaults=None)
paddle.reader.compose ArgSpec(args=[], varargs='readers', keywords='kwargs', defaults=None)
...
...
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
4743c9cd
...
@@ -8,25 +8,15 @@ function(windows_symbolic TARGET)
...
@@ -8,25 +8,15 @@ function(windows_symbolic TARGET)
set
(
final_path
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
windows_symbolic_PATH
}
)
set
(
final_path
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
windows_symbolic_PATH
}
)
foreach
(
src
${
windows_symbolic_SRCS
}
)
foreach
(
src
${
windows_symbolic_SRCS
}
)
get_filename_component
(
src
${
src
}
NAME_WE
)
get_filename_component
(
src
${
src
}
NAME_WE
)
if
(
NOT EXISTS
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
src
}
.cc OR NOT EXISTS
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
src
}
.cu
)
if
(
NOT EXISTS
${
final_path
}
/
${
src
}
.cc OR NOT EXISTS
${
final_path
}
/
${
src
}
.cu
)
message
(
FATAL
"
${
src
}
.cc and
${
src
}
.cu must exsits, and
${
src
}
.cu must be symbolic file."
)
message
(
FATAL
"
${
src
}
.cc and
${
src
}
.cu must exsits, and
${
src
}
.cu must be symbolic file."
)
endif
()
endif
()
#only copy the xx.cu to.xx.cu when the content are modified
file
(
GENERATE OUTPUT
${
final_path
}
/.
${
src
}
.cu INPUT
${
final_path
}
/
${
src
}
.cc
)
set
(
copy_flag 1
)
if
(
EXISTS
${
CMAKE_CURRENT_SOURCE_DIR
}
/.
${
src
}
.cu
)
add_custom_command
(
OUTPUT
${
final_path
}
/.
${
src
}
.cu
file
(
READ
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
src
}
.cc SOURCE_STR
)
COMMAND
${
CMAKE_COMMAND
}
-E copy_if_different
"
${
final_path
}
/
${
src
}
.cc"
"
${
final_path
}
/.
${
src
}
.cu"
file
(
READ
${
CMAKE_CURRENT_SOURCE_DIR
}
/.
${
src
}
.cu TARGET_STR
)
if
(
SOURCE_STR STREQUAL TARGET_STR
)
set
(
copy_flag 0
)
endif
()
endif
()
if
(
copy_flag
)
add_custom_command
(
OUTPUT .
${
src
}
.cu
COMMAND
${
CMAKE_COMMAND
}
-E remove
${
CMAKE_CURRENT_SOURCE_DIR
}
/.
${
src
}
.cu
COMMAND
${
CMAKE_COMMAND
}
-E copy
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
src
}
.cc"
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/.
${
src
}
.cu"
COMMENT
"create hidden file of
${
src
}
.cu"
)
COMMENT
"create hidden file of
${
src
}
.cu"
)
endif
(
copy_flag
)
add_custom_target
(
${
TARGET
}
ALL DEPENDS .
${
src
}
.cu
)
add_custom_target
(
${
TARGET
}
ALL DEPENDS .
${
src
}
.cu
)
endforeach
()
endforeach
()
endfunction
()
endfunction
()
...
@@ -78,17 +68,23 @@ cc_library(garbage_collector SRCS garbage_collector.cc DEPS device_context memor
...
@@ -78,17 +68,23 @@ cc_library(garbage_collector SRCS garbage_collector.cc DEPS device_context memor
cc_library
(
reader SRCS reader.cc DEPS lod_tensor ddim
)
cc_library
(
reader SRCS reader.cc DEPS lod_tensor ddim
)
cc_test
(
reader_test SRCS reader_test.cc DEPS reader
)
cc_test
(
reader_test SRCS reader_test.cc DEPS reader
)
cc_test
(
variable_test SRCS variable_test.cc
)
cc_library
(
threadpool SRCS threadpool.cc DEPS enforce
)
cc_library
(
threadpool SRCS threadpool.cc DEPS enforce
)
cc_test
(
threadpool_test SRCS threadpool_test.cc DEPS threadpool
)
cc_test
(
threadpool_test SRCS threadpool_test.cc DEPS threadpool
)
cc_library
(
scope SRCS scope.cc DEPS glog threadpool
)
cc_library
(
var_type_traits SRCS var_type_traits DEPS lod_tensor selected_rows framework_proto
)
if
(
WITH_GPU
)
target_link_libraries
(
var_type_traits dynload_cuda
)
endif
()
cc_test
(
var_type_traits_test SRCS var_type_traits_test.cc DEPS var_type_traits
)
cc_library
(
scope SRCS scope.cc DEPS glog threadpool var_type_traits
)
cc_library
(
scope_pool SRCS scope_pool.cc DEPS scope
)
cc_test
(
scope_test SRCS scope_test.cc DEPS scope
)
cc_test
(
scope_test SRCS scope_test.cc DEPS scope
)
cc_test
(
variable_test SRCS variable_test.cc DEPS tensor var_type_traits
)
cc_library
(
data_device_transform SRCS data_device_transform.cc DEPS tensor
)
cc_library
(
data_device_transform SRCS data_device_transform.cc DEPS tensor
)
nv_test
(
data_device_transform_test SRCS data_device_transform_test.cu
nv_test
(
data_device_transform_test SRCS data_device_transform_test.cu
DEPS operator op_registry device_context math_function
)
DEPS operator op_registry device_context math_function
scope
)
if
(
WITH_GPU
)
if
(
WITH_GPU
)
if
(
WIN32
)
if
(
WIN32
)
...
...
paddle/fluid/framework/data_device_transform_test.cu
浏览文件 @
4743c9cd
...
@@ -17,6 +17,7 @@ limitations under the License. */
...
@@ -17,6 +17,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/device_context.h"
...
...
paddle/fluid/framework/details/eager_deletion_op_handle.cc
浏览文件 @
4743c9cd
...
@@ -88,7 +88,7 @@ void EagerDeletionOpHandle::RunImpl() {
...
@@ -88,7 +88,7 @@ void EagerDeletionOpHandle::RunImpl() {
}
}
}
else
{
}
else
{
PADDLE_THROW
(
"Type %s of %s is not supported eager deletion"
,
PADDLE_THROW
(
"Type %s of %s is not supported eager deletion"
,
var
->
Type
().
name
(
),
name
);
framework
::
ToTypeName
(
var
->
Type
()
),
name
);
}
}
}
}
...
...
paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc
浏览文件 @
4743c9cd
...
@@ -120,6 +120,7 @@ FeedFetchList FastThreadedSSAGraphExecutor::Run(
...
@@ -120,6 +120,7 @@ FeedFetchList FastThreadedSSAGraphExecutor::Run(
ClearFetchOp
(
graph_
.
get
(),
&
fetch_ops
);
ClearFetchOp
(
graph_
.
get
(),
&
fetch_ops
);
return
fetches
;
return
fetches
;
}
}
void
FastThreadedSSAGraphExecutor
::
RunOpAsync
(
void
FastThreadedSSAGraphExecutor
::
RunOpAsync
(
std
::
unordered_map
<
OpHandleBase
*
,
std
::
atomic
<
int
>>
*
op_deps
,
std
::
unordered_map
<
OpHandleBase
*
,
std
::
atomic
<
int
>>
*
op_deps
,
OpHandleBase
*
op
,
OpHandleBase
*
op
,
...
...
paddle/fluid/framework/details/multi_devices_graph_pass.cc
浏览文件 @
4743c9cd
此差异已折叠。
点击以展开。
paddle/fluid/framework/details/multi_devices_graph_pass.h
浏览文件 @
4743c9cd
...
@@ -45,7 +45,7 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
...
@@ -45,7 +45,7 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
#endif
#endif
int
GetVarDeviceID
(
int
GetVarDeviceID
(
const
ir
::
Graph
&
graph
,
const
std
::
string
&
varname
,
const
std
::
string
&
varname
,
const
std
::
unordered_map
<
std
::
string
,
int
>
&
sharded_var_device
)
const
;
const
std
::
unordered_map
<
std
::
string
,
int
>
&
sharded_var_device
)
const
;
bool
IsScaleLossOp
(
ir
::
Node
*
node
)
const
;
bool
IsScaleLossOp
(
ir
::
Node
*
node
)
const
;
...
@@ -57,12 +57,6 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
...
@@ -57,12 +57,6 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
ir
::
Graph
*
result
,
ir
::
Node
*
node
,
ir
::
Graph
*
result
,
ir
::
Node
*
node
,
std
::
unordered_map
<
std
::
string
,
int
>
*
sharded_var_device
)
const
;
std
::
unordered_map
<
std
::
string
,
int
>
*
sharded_var_device
)
const
;
std
::
vector
<
std
::
string
>
FindDistTrainSendVars
(
const
std
::
vector
<
ir
::
Node
*>
&
nodes
)
const
;
std
::
vector
<
std
::
string
>
FindDistTrainRecvVars
(
const
std
::
vector
<
ir
::
Node
*>
&
nodes
)
const
;
void
CreateComputationalOps
(
ir
::
Graph
*
result
,
ir
::
Node
*
node
,
void
CreateComputationalOps
(
ir
::
Graph
*
result
,
ir
::
Node
*
node
,
size_t
num_places
)
const
;
size_t
num_places
)
const
;
...
@@ -77,7 +71,7 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
...
@@ -77,7 +71,7 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
int
dev_id
)
const
;
int
dev_id
)
const
;
int
GetOpDeviceID
(
int
GetOpDeviceID
(
const
ir
::
Graph
&
graph
,
ir
::
Node
*
node
,
ir
::
Node
*
node
,
const
std
::
unordered_map
<
std
::
string
,
int
>
&
sharded_var_device
)
const
;
const
std
::
unordered_map
<
std
::
string
,
int
>
&
sharded_var_device
)
const
;
void
InsertAllReduceOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
og
)
const
;
void
InsertAllReduceOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
og
)
const
;
...
@@ -100,6 +94,15 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
...
@@ -100,6 +94,15 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
void
SetCommunicationContext
(
OpHandleBase
*
op_handle
,
void
SetCommunicationContext
(
OpHandleBase
*
op_handle
,
const
platform
::
Place
&
p
)
const
;
const
platform
::
Place
&
p
)
const
;
std
::
vector
<
ir
::
Node
*>
SortForReduceMode
(
const
std
::
vector
<
ir
::
Node
*>
&
)
const
;
int
GetOpDeviceID
(
ir
::
Node
*
node
,
const
std
::
unordered_map
<
std
::
string
,
int
>
&
shared_var_device
,
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
ir
::
Node
*>>
*
delay_ops
)
const
;
mutable
std
::
string
loss_var_name_
;
mutable
std
::
string
loss_var_name_
;
mutable
std
::
vector
<
platform
::
Place
>
places_
;
mutable
std
::
vector
<
platform
::
Place
>
places_
;
mutable
std
::
vector
<
Scope
*>
local_scopes_
;
mutable
std
::
vector
<
Scope
*>
local_scopes_
;
...
...
paddle/fluid/framework/details/variable_visitor.cc
浏览文件 @
4743c9cd
...
@@ -24,7 +24,7 @@ static void VisitVariable(Variable* var, Func* func) {
...
@@ -24,7 +24,7 @@ static void VisitVariable(Variable* var, Func* func) {
}
else
if
(
var
->
IsType
<
SelectedRows
>
())
{
}
else
if
(
var
->
IsType
<
SelectedRows
>
())
{
(
*
func
)(
var
->
GetMutable
<
SelectedRows
>
());
(
*
func
)(
var
->
GetMutable
<
SelectedRows
>
());
}
else
{
}
else
{
PADDLE_THROW
(
"Not supported type %s"
,
var
->
Type
().
name
(
));
PADDLE_THROW
(
"Not supported type %s"
,
ToTypeName
(
var
->
Type
()
));
}
}
}
}
...
@@ -35,7 +35,7 @@ static void VisitVariable(const Variable& var, Func* func) {
...
@@ -35,7 +35,7 @@ static void VisitVariable(const Variable& var, Func* func) {
}
else
if
(
var
.
IsType
<
SelectedRows
>
())
{
}
else
if
(
var
.
IsType
<
SelectedRows
>
())
{
(
*
func
)(
var
.
Get
<
SelectedRows
>
());
(
*
func
)(
var
.
Get
<
SelectedRows
>
());
}
else
{
}
else
{
PADDLE_THROW
(
"Not supported type %s"
,
var
.
Type
().
name
(
));
PADDLE_THROW
(
"Not supported type %s"
,
ToTypeName
(
var
.
Type
()
));
}
}
}
}
...
...
paddle/fluid/framework/executor.cc
浏览文件 @
4743c9cd
...
@@ -119,7 +119,7 @@ static void DeleteUnusedTensors(
...
@@ -119,7 +119,7 @@ static void DeleteUnusedTensors(
}
}
}
else
{
}
else
{
PADDLE_THROW
(
"Type %s of %s is not supported eager deletion"
,
PADDLE_THROW
(
"Type %s of %s is not supported eager deletion"
,
var
->
Type
().
name
(
),
name
);
framework
::
ToTypeName
(
var
->
Type
()
),
name
);
}
}
}
}
}
}
...
...
paddle/fluid/framework/ir/CMakeLists.txt
浏览文件 @
4743c9cd
...
@@ -45,6 +45,7 @@ pass_library(is_test_pass base)
...
@@ -45,6 +45,7 @@ pass_library(is_test_pass base)
pass_library
(
conv_elementwise_add_act_fuse_pass inference
)
pass_library
(
conv_elementwise_add_act_fuse_pass inference
)
pass_library
(
conv_elementwise_add2_act_fuse_pass inference
)
pass_library
(
conv_elementwise_add2_act_fuse_pass inference
)
pass_library
(
conv_elementwise_add_fuse_pass inference
)
pass_library
(
conv_elementwise_add_fuse_pass inference
)
pass_library
(
conv_affine_channel_fuse_pass inference
)
if
(
WITH_MKLDNN
)
if
(
WITH_MKLDNN
)
pass_library
(
mkldnn_placement_pass base
)
pass_library
(
mkldnn_placement_pass base
)
pass_library
(
depthwise_conv_mkldnn_pass base
)
pass_library
(
depthwise_conv_mkldnn_pass base
)
...
...
paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc
0 → 100644
浏览文件 @
4743c9cd
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.h"
#include <functional>
#include <string>
#include <vector>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/operators/math/cpu_vec.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
#define GET_CONV_BN_NODES(pattern_name) \
/* OPERATORS */
\
GET_IR_NODE_FROM_SUBGRAPH(conv, conv, pattern_name); \
GET_IR_NODE_FROM_SUBGRAPH(affine_channel, affine_channel, pattern_name); \
/* CONV inputs */
\
GET_IR_NODE_FROM_SUBGRAPH(conv_weight, conv_weight, pattern_name); \
/* CONV outputs */
\
GET_IR_NODE_FROM_SUBGRAPH(conv_out, conv_out, pattern_name); \
/* Affine Channel inputs */
\
GET_IR_NODE_FROM_SUBGRAPH(ac_scale, ac_scale, pattern_name); \
GET_IR_NODE_FROM_SUBGRAPH(ac_bias, ac_bias, pattern_name); \
/* Affine channel outputs */
\
GET_IR_NODE_FROM_SUBGRAPH(ac_out, ac_out, pattern_name);
/* Out */
void
recompute_bias_and_weights
(
const
Scope
*
scope
,
ir
::
Node
*
conv_weight
,
const
ir
::
Node
&
ac_scale
,
const
LoDTensor
&
ac_bias_tensor
,
LoDTensor
*
eltwise_y_in_tensor
)
{
using
EigenVectorArrayMap
=
Eigen
::
Map
<
Eigen
::
Array
<
float
,
Eigen
::
Dynamic
,
1
>>
;
using
ConstEigenVectorArrayMap
=
Eigen
::
Map
<
const
Eigen
::
Array
<
float
,
Eigen
::
Dynamic
,
1
>>
;
using
EigenMatrixArrayMap
=
Eigen
::
Map
<
Eigen
::
Array
<
float
,
Eigen
::
Dynamic
,
Eigen
::
Dynamic
,
Eigen
::
RowMajor
>>
;
// Re-compute bias of conv2d from AffineChannel
PADDLE_ENFORCE_EQ
(
eltwise_y_in_tensor
->
dims
(),
ac_bias_tensor
.
dims
());
auto
*
scale_tensor
=
scope
->
FindVar
(
ac_scale
.
Name
())
->
GetMutable
<
LoDTensor
>
();
ConstEigenVectorArrayMap
scale_array
(
scale_tensor
->
data
<
float
>
(),
scale_tensor
->
numel
(),
1
);
ConstEigenVectorArrayMap
ac_bias_array
(
ac_bias_tensor
.
data
<
float
>
(),
ac_bias_tensor
.
numel
(),
1
);
EigenVectorArrayMap
eltwise_y_in_array
(
eltwise_y_in_tensor
->
mutable_data
<
float
>
(
platform
::
CPUPlace
()),
eltwise_y_in_tensor
->
numel
(),
1
);
eltwise_y_in_array
=
(
eltwise_y_in_array
*
scale_array
)
+
ac_bias_array
;
// Re-compute weight of conv2d from AffineChannel
auto
*
weights
=
scope
->
FindVar
(
conv_weight
->
Name
())
->
GetMutable
<
LoDTensor
>
();
auto
weights_shape
=
weights
->
dims
();
auto
weights_shape_2d
=
flatten_to_2d
(
weights_shape
,
1
);
EigenMatrixArrayMap
weights_array_2d
(
weights
->
mutable_data
<
float
>
(
platform
::
CPUPlace
()),
weights_shape_2d
[
0
],
weights_shape_2d
[
1
]);
weights_array_2d
.
colwise
()
*=
scale_array
;
}
std
::
unique_ptr
<
ir
::
Graph
>
ConvAffineChannelFusePass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
PADDLE_ENFORCE
(
graph
.
get
());
FusePassBase
::
Init
(
name_scope_
,
graph
.
get
());
auto
*
scope
=
param_scope
();
PADDLE_ENFORCE
(
scope
);
GraphPatternDetector
gpd
;
auto
*
conv_input
=
gpd
.
mutable_pattern
()
->
NewNode
(
patterns
::
PDNodeName
(
name_scope_
,
"conv_input"
))
->
AsInput
()
->
assert_is_op_input
(
"conv2d"
,
"Input"
);
patterns
::
ConvAffineChannel
conv_ac_pattern
(
gpd
.
mutable_pattern
(),
name_scope_
);
conv_ac_pattern
(
conv_input
,
false
/*with_eltwise_add*/
);
int
found_conv_ac_count
=
0
;
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
VLOG
(
4
)
<<
"handle ConvAffineChannel fuse"
;
GET_CONV_BN_NODES
(
conv_ac_pattern
);
// check if fuse can be done and if MKL-DNN should be used
FuseOptions
fuse_option
=
FindFuseOption
(
*
conv
,
*
affine_channel
);
if
(
fuse_option
==
DO_NOT_FUSE
)
{
VLOG
(
3
)
<<
"do not perform conv+affinechannel fuse"
;
return
;
}
// Create eltwise_y (conv bias) variable
VarDesc
eltwise_y_in_desc
(
patterns
::
PDNodeName
(
name_scope_
,
"eltwise_y_in"
));
eltwise_y_in_desc
.
SetPersistable
(
true
);
auto
*
eltwise_y_in_node
=
g
->
CreateVarNode
(
&
eltwise_y_in_desc
);
auto
*
eltwise_y_in_tensor
=
scope
->
Var
(
eltwise_y_in_node
->
Name
())
->
GetMutable
<
LoDTensor
>
();
// Get affine_channel bias
auto
*
ac_bias_tensor
=
scope
->
FindVar
(
ac_bias
->
Name
())
->
GetMutable
<
LoDTensor
>
();
// Initialize eltwise_y
eltwise_y_in_tensor
->
Resize
(
ac_bias_tensor
->
dims
());
std
::
fill_n
(
eltwise_y_in_tensor
->
mutable_data
<
float
>
(
platform
::
CPUPlace
()),
eltwise_y_in_tensor
->
numel
(),
0.0
f
);
// update weights and biases
recompute_bias_and_weights
(
scope
,
conv_weight
,
*
ac_scale
,
*
ac_bias_tensor
,
eltwise_y_in_tensor
);
// create an elementwise add node.
OpDesc
desc
;
desc
.
SetInput
(
"X"
,
std
::
vector
<
std
::
string
>
({
conv_out
->
Name
()}));
desc
.
SetInput
(
"Y"
,
std
::
vector
<
std
::
string
>
({
eltwise_y_in_node
->
Name
()}));
desc
.
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
ac_out
->
Name
()}));
desc
.
SetType
(
"elementwise_add"
);
desc
.
SetAttr
(
"axis"
,
1
);
auto
eltwise_op
=
g
->
CreateOpNode
(
&
desc
);
// OpDesc will be copied.
GraphSafeRemoveNodes
(
graph
.
get
(),
{
ac_scale
,
ac_bias
,
affine_channel
});
IR_NODE_LINK_TO
(
conv_out
,
eltwise_op
);
IR_NODE_LINK_TO
(
eltwise_y_in_node
,
eltwise_op
);
IR_NODE_LINK_TO
(
eltwise_op
,
ac_out
);
found_conv_ac_count
++
;
};
gpd
(
graph
.
get
(),
handler
);
AddStatis
(
found_conv_ac_count
);
return
graph
;
}
std
::
unique_ptr
<
ir
::
Graph
>
ConvEltwiseAddAffineChannelFusePass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
PADDLE_ENFORCE
(
graph
.
get
());
FusePassBase
::
Init
(
name_scope_
,
graph
.
get
());
auto
*
scope
=
param_scope
();
PADDLE_ENFORCE
(
scope
);
GraphPatternDetector
gpd
;
auto
*
conv_input
=
gpd
.
mutable_pattern
()
->
NewNode
(
patterns
::
PDNodeName
(
name_scope_
,
"conv_input"
))
->
AsInput
()
->
assert_is_op_input
(
"conv2d"
,
"Input"
);
patterns
::
ConvAffineChannel
conv_ac_pattern
(
gpd
.
mutable_pattern
(),
name_scope_
);
conv_ac_pattern
(
conv_input
,
true
/*with_eltwise_add*/
);
int
found_conv_ac_count
=
0
;
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
VLOG
(
4
)
<<
"handle ConvBN fuse"
;
GET_CONV_BN_NODES
(
conv_ac_pattern
);
// OPERATORS
GET_IR_NODE_FROM_SUBGRAPH
(
eltwise
,
eltwise
,
conv_ac_pattern
);
// BIAS inputs
GET_IR_NODE_FROM_SUBGRAPH
(
eltwise_y_in
,
eltwise_y_in
,
conv_ac_pattern
);
// BIAS outputs
GET_IR_NODE_FROM_SUBGRAPH
(
eltwise_out
,
eltwise_out
,
conv_ac_pattern
);
// Get eltwise_y (conv bias) variable
auto
*
eltwise_y_in_tensor
=
scope
->
FindVar
(
eltwise_y_in
->
Name
())
->
GetMutable
<
LoDTensor
>
();
// Get batch norm bias
auto
*
ac_bias_tensor
=
scope
->
FindVar
(
ac_bias
->
Name
())
->
GetMutable
<
LoDTensor
>
();
recompute_bias_and_weights
(
scope
,
conv_weight
,
*
ac_scale
,
*
ac_bias_tensor
,
eltwise_y_in_tensor
);
// Update the elementwise_add node
eltwise
->
Op
()
->
SetAttr
(
"axis"
,
1
);
eltwise
->
Op
()
->
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
ac_out
->
Name
()}));
GraphSafeRemoveNodes
(
graph
.
get
(),
{
ac_scale
,
ac_bias
,
affine_channel
,
eltwise_out
});
IR_NODE_LINK_TO
(
eltwise
,
ac_out
);
found_conv_ac_count
++
;
};
gpd
(
graph
.
get
(),
handler
);
AddStatis
(
found_conv_ac_count
);
return
graph
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
conv_affine_channel_fuse_pass
,
paddle
::
framework
::
ir
::
ConvAffineChannelFusePass
);
REGISTER_PASS
(
conv_eltwiseadd_affine_channel_fuse_pass
,
paddle
::
framework
::
ir
::
ConvEltwiseAddAffineChannelFusePass
);
paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.h
0 → 100644
浏览文件 @
4743c9cd
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
/*
* Fuse the Conv and ConvAffineChannel.
*/
class
ConvAffineChannelFusePass
:
public
FusePassBase
{
public:
virtual
~
ConvAffineChannelFusePass
()
{}
protected:
std
::
unique_ptr
<
ir
::
Graph
>
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
;
const
std
::
string
name_scope_
{
"conv_affine_channel_fuse"
};
};
class
ConvEltwiseAddAffineChannelFusePass
:
public
FusePassBase
{
public:
virtual
~
ConvEltwiseAddAffineChannelFusePass
()
{}
protected:
std
::
unique_ptr
<
ir
::
Graph
>
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
;
const
std
::
string
name_scope_
{
"conv_eltwiseadd_affine_channel_fuse"
};
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc
浏览文件 @
4743c9cd
...
@@ -40,18 +40,20 @@ framework::proto::OpDesc PrepareOpDesc(
...
@@ -40,18 +40,20 @@ framework::proto::OpDesc PrepareOpDesc(
const
std
::
string
&
output
)
{
const
std
::
string
&
output
)
{
auto
proto
=
base_desc
;
auto
proto
=
base_desc
;
framework
::
OpDesc
desc
(
proto
,
nullptr
);
framework
::
OpDesc
desc
(
proto
,
nullptr
);
desc
.
SetType
(
"conv2d_fusion"
);
desc
.
SetInput
(
"Bias"
,
{
bias
});
desc
.
SetInput
(
"Bias"
,
{
bias
});
desc
.
SetInput
(
"ResidualData"
,
{
bias1
});
desc
.
SetInput
(
"ResidualData"
,
{
bias1
});
desc
.
SetAttr
(
"activation"
,
activation
);
desc
.
SetAttr
(
"activation"
,
activation
);
desc
.
SetOutput
(
"Output"
,
{
output
});
desc
.
SetOutput
(
"Output"
,
{
output
});
desc
.
SetAttr
(
"is_test"
,
true
);
desc
.
SetAttr
(
"is_test"
,
true
);
desc
.
SetAttr
(
"use_cudnn"
,
false
);
desc
.
Flush
();
return
*
desc
.
Proto
();
return
*
desc
.
Proto
();
}
}
std
::
unique_ptr
<
ir
::
Graph
>
ConvElementwiseAdd2ActFusePass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
ConvElementwiseAdd2ActFusePass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
const
std
::
string
pattern_name
=
"conv_elementwise_add_act_fuse"
;
const
std
::
string
pattern_name
=
"conv_elementwise_add
2
_act_fuse"
;
FusePassBase
::
Init
(
pattern_name
,
graph
.
get
());
FusePassBase
::
Init
(
pattern_name
,
graph
.
get
());
GraphPatternDetector
gpd
;
GraphPatternDetector
gpd
;
...
@@ -76,22 +78,23 @@ std::unique_ptr<ir::Graph> ConvElementwiseAdd2ActFusePass::ApplyImpl(
...
@@ -76,22 +78,23 @@ std::unique_ptr<ir::Graph> ConvElementwiseAdd2ActFusePass::ApplyImpl(
framework
::
OpDesc
new_op_desc
(
new_op_proto
,
nullptr
);
framework
::
OpDesc
new_op_desc
(
new_op_proto
,
nullptr
);
// Create a new node for the fused op.
// Create a new node for the fused op.
graph
->
CreateOpNode
(
&
new_op_desc
);
auto
*
new_conv_op
=
graph
->
CreateOpNode
(
&
new_op_desc
);
// Link inputs and outputs.
// Link inputs and outputs.
PADDLE_ENFORCE
(
subgraph
.
count
(
x
));
PADDLE_ENFORCE
(
subgraph
.
count
(
x
));
auto
*
conv_in_node
=
subgraph
.
at
(
x
);
auto
*
conv_in_node
=
subgraph
.
at
(
x
);
IR_NODE_LINK_TO
(
conv_in_node
,
conv_op
);
// Input
IR_NODE_LINK_TO
(
conv_in_node
,
new_
conv_op
);
// Input
IR_NODE_LINK_TO
(
conv_filter
,
conv_op
);
// Filter
IR_NODE_LINK_TO
(
conv_filter
,
new_
conv_op
);
// Filter
IR_NODE_LINK_TO
(
conv_op
,
conv_out
);
// Output
IR_NODE_LINK_TO
(
elementwise_add_in_y
,
new_conv_op
);
// Bias
IR_NODE_LINK_TO
(
elementwise_add_in_y
,
conv_op
);
// Bias
IR_NODE_LINK_TO
(
elementwise_add_in_y
_1
,
new_conv_op
);
// Bias
IR_NODE_LINK_TO
(
elementwise_add_in_y_1
,
conv_op
);
// Bias
IR_NODE_LINK_TO
(
new_conv_op
,
act_out
);
// Output
// Delete the unneeded nodes.
// Delete the unneeded nodes.
GraphSafeRemoveNodes
(
graph
.
get
(),
GraphSafeRemoveNodes
(
{
conv_op
,
elementwise_add_op
,
elementwise_add_op_1
,
graph
.
get
(),
elementwise_add_out
});
{
conv_op
,
conv_out
,
elementwise_add_op
,
elementwise_add_op_1
,
elementwise_add_out
,
elementwise_add_out_1
,
act_op
});
};
};
gpd
(
graph
.
get
(),
handler
);
gpd
(
graph
.
get
(),
handler
);
return
graph
;
return
graph
;
...
...
paddle/fluid/framework/ir/graph.cc
浏览文件 @
4743c9cd
...
@@ -23,66 +23,8 @@ limitations under the License. */
...
@@ -23,66 +23,8 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
namespace
ir
{
namespace
ir
{
namespace
{
void
CheckProgram
(
const
ProgramDesc
&
program
)
{
#define _INT(role) static_cast<int>(role)
std
::
map
<
int
,
bool
>
visit
;
for
(
OpDesc
*
op
:
program
.
Block
(
0
).
AllOps
())
{
// For backward compatibility, some program doesn't have role added.
if
(
!
op
->
HasAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
()))
continue
;
int
role_id
=
boost
::
get
<
int
>
(
op
->
GetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
()));
visit
[
role_id
]
=
true
;
switch
(
role_id
)
{
case
_INT
(
OpRole
::
kForward
):
if
(
visit
.
find
(
_INT
(
OpRole
::
kBackward
))
!=
visit
.
end
())
{
LOG
(
ERROR
)
<<
"Cannot add backward operator before forward operator "
<<
op
->
Type
();
}
break
;
case
_INT
(
OpRole
::
kBackward
):
case
_INT
(
OpRole
::
kBackward
)
|
_INT
(
OpRole
::
kLoss
):
PADDLE_ENFORCE
(
visit
.
find
(
_INT
(
OpRole
::
kOptimize
))
==
visit
.
end
(),
"Cannot add backward operator %s after optimize operator."
,
op
->
Type
());
break
;
case
_INT
(
OpRole
::
kForward
)
|
_INT
(
OpRole
::
kLoss
):
PADDLE_ENFORCE
(
visit
.
find
(
_INT
(
OpRole
::
kBackward
)
|
_INT
(
OpRole
::
kLoss
))
==
visit
.
end
(),
"Cannot add backward|loss operator before "
"forward|loss operator %s."
,
op
->
Type
());
PADDLE_ENFORCE
(
visit
.
find
(
_INT
(
OpRole
::
kOptimize
))
==
visit
.
end
(),
"Cannot add forward|loss operator %s after optimize operator."
,
op
->
Type
());
break
;
case
_INT
(
OpRole
::
kOptimize
):
case
_INT
(
OpRole
::
kOptimize
)
|
_INT
(
OpRole
::
kLRSched
):
PADDLE_ENFORCE
(
visit
.
find
(
_INT
(
OpRole
::
kBackward
))
!=
visit
.
end
(),
"Optimize operators %s must follow backward operator."
,
op
->
Type
());
break
;
case
_INT
(
OpRole
::
kLRSched
):
case
_INT
(
OpRole
::
kDist
):
case
_INT
(
OpRole
::
kRPC
):
case
_INT
(
OpRole
::
kNotSpecified
):
break
;
default:
LOG
(
FATAL
)
<<
"Unknown operator role. Don't add new role because "
"you don't know what you are doing."
;
}
}
#undef _INT
}
}
// namespace
Graph
::
Graph
(
const
ProgramDesc
&
program
)
:
program_
(
program
)
{
Graph
::
Graph
(
const
ProgramDesc
&
program
)
:
program_
(
program
)
{
CheckProgram
(
program_
);
auto
var_nodes
=
InitFromProgram
(
program_
);
auto
var_nodes
=
InitFromProgram
(
program_
);
ResolveHazard
(
var_nodes
);
ResolveHazard
(
var_nodes
);
}
}
...
...
paddle/fluid/framework/ir/graph_pattern_detector.cc
浏览文件 @
4743c9cd
...
@@ -1101,9 +1101,7 @@ PDNode *patterns::ElementwiseAdd::operator()(PDNode *x_var, PDNode *y_var) {
...
@@ -1101,9 +1101,7 @@ PDNode *patterns::ElementwiseAdd::operator()(PDNode *x_var, PDNode *y_var) {
return
out_var
;
return
out_var
;
}
}
std
::
unordered_set
<
std
::
string
>
conv_act_set
({
"identity"
,
"sigmoid"
,
"relu"
,
std
::
unordered_set
<
std
::
string
>
conv_act_set
({
"identity"
,
"relu"
});
"relu6"
,
"relux"
,
"tanh"
,
"band_pass"
});
PDNode
*
patterns
::
ConvElementwiseaddAct
::
operator
()(
PDNode
*
conv_in
)
{
PDNode
*
patterns
::
ConvElementwiseaddAct
::
operator
()(
PDNode
*
conv_in
)
{
conv_in
->
AsInput
();
conv_in
->
AsInput
();
...
@@ -1169,13 +1167,13 @@ PDNode *patterns::ConvElementwiseadd2Act::operator()(PDNode *conv_in) {
...
@@ -1169,13 +1167,13 @@ PDNode *patterns::ConvElementwiseadd2Act::operator()(PDNode *conv_in) {
->
AsInput
();
->
AsInput
();
auto
elementwise_add_out
=
pattern
->
NewNode
(
elementwise_add_out_repr
())
auto
elementwise_add_out
=
pattern
->
NewNode
(
elementwise_add_out_repr
())
->
assert_is_op_output
(
"elementwise_add"
)
->
assert_is_op_output
(
"elementwise_add"
)
->
assert_is_op_input
(
"elementwise_add"
,
"
X
"
)
->
assert_is_op_input
(
"elementwise_add"
,
"
Y
"
)
->
AsIntermediate
();
->
AsIntermediate
();
auto
elementwise_add_op_1
=
pattern
->
NewNode
(
elementwise_add_op_1_repr
())
auto
elementwise_add_op_1
=
pattern
->
NewNode
(
elementwise_add_op_1_repr
())
->
assert_is_op
(
"elementwise_add"
);
->
assert_is_op
(
"elementwise_add"
);
auto
elementwise_add_in_y_1
=
pattern
->
NewNode
(
elementwise_add_in_y_1_repr
())
auto
elementwise_add_in_y_1
=
pattern
->
NewNode
(
elementwise_add_in_y_1_repr
())
->
assert_is_op_input
(
"elementwise_add"
,
"
Y
"
)
->
assert_is_op_input
(
"elementwise_add"
,
"
X
"
)
->
AsInput
();
->
AsInput
();
auto
elementwise_add_out_1
=
pattern
->
NewNode
(
elementwise_add_out_1_repr
())
auto
elementwise_add_out_1
=
pattern
->
NewNode
(
elementwise_add_out_1_repr
())
->
assert_is_op_output
(
"elementwise_add"
)
->
assert_is_op_output
(
"elementwise_add"
)
...
@@ -1203,8 +1201,8 @@ PDNode *patterns::ConvElementwiseadd2Act::operator()(PDNode *conv_in) {
...
@@ -1203,8 +1201,8 @@ PDNode *patterns::ConvElementwiseadd2Act::operator()(PDNode *conv_in) {
conv_op
->
LinksFrom
({
conv_in
,
conv_filter
}).
LinksTo
({
conv_out
});
conv_op
->
LinksFrom
({
conv_in
,
conv_filter
}).
LinksTo
({
conv_out
});
elementwise_add_op
->
LinksFrom
({
conv_out
,
elementwise_add_in_y
})
elementwise_add_op
->
LinksFrom
({
conv_out
,
elementwise_add_in_y
})
.
LinksTo
({
elementwise_add_out
});
.
LinksTo
({
elementwise_add_out
});
elementwise_add_op_1
->
LinksFrom
(
elementwise_add_op_1
->
LinksFrom
(
{
elementwise_add_out
,
elementwise_add_in_y_1
})
{
elementwise_add_out
,
elementwise_add_in_y
_1
});
.
LinksTo
({
elementwise_add_out
_1
});
act_op
->
LinksFrom
({
elementwise_add_out_1
}).
LinksTo
({
act_out
});
act_op
->
LinksFrom
({
elementwise_add_out_1
}).
LinksTo
({
act_out
});
return
act_out
;
return
act_out
;
}
}
...
@@ -1236,6 +1234,78 @@ PDNode *patterns::ConvElementwiseadd::operator()(PDNode *conv_in) {
...
@@ -1236,6 +1234,78 @@ PDNode *patterns::ConvElementwiseadd::operator()(PDNode *conv_in) {
return
elementwise_add_out
;
return
elementwise_add_out
;
}
}
PDNode
*
patterns
::
ConvAffineChannel
::
operator
()(
paddle
::
framework
::
ir
::
PDNode
*
conv_input
,
bool
with_eltwise_add
)
{
// Create Operators
conv_input
->
assert_is_op_input
(
"conv2d"
,
"Input"
);
auto
*
conv_op
=
pattern
->
NewNode
(
conv_repr
())
->
assert_is_op
(
"conv2d"
);
PDNode
*
eltwise_op
=
nullptr
;
if
(
with_eltwise_add
)
{
eltwise_op
=
pattern
->
NewNode
(
eltwise_repr
())
->
assert_is_op
(
"elementwise_add"
);
}
auto
*
affine_channel_op
=
pattern
->
NewNode
(
affine_channel_repr
())
->
assert_is_op
(
"affine_channel"
);
// Create variables
// Conv Filter
auto
*
conv_weight_var
=
pattern
->
NewNode
(
conv_weight_repr
())
->
AsInput
()
->
assert_is_persistable_var
()
->
assert_is_op_input
(
"conv2d"
,
"Filter"
);
auto
*
conv_out_var
=
pattern
->
NewNode
(
conv_out_repr
())
->
AsIntermediate
()
->
assert_is_only_output_of_op
(
"conv2d"
);
PDNode
*
eltwise_y_in_var
=
nullptr
;
PDNode
*
eltwise_out_var
=
nullptr
;
if
(
with_eltwise_add
)
{
// Conv output as Bias input
conv_out_var
->
assert_is_op_input
(
"elementwise_add"
,
"X"
);
// Bias
eltwise_y_in_var
=
pattern
->
NewNode
(
eltwise_y_in_repr
())
->
assert_is_op_input
(
"elementwise_add"
,
"Y"
)
->
AsInput
();
eltwise_out_var
=
pattern
->
NewNode
(
eltwise_out_repr
())
->
AsIntermediate
()
->
assert_is_only_output_of_op
(
"elementwise_add"
);
}
else
{
// Conv output as AffineChannel input
conv_out_var
->
assert_is_op_input
(
"affine_channel"
,
"X"
);
}
// AC Scale
auto
*
ac_scale_var
=
pattern
->
NewNode
(
ac_scale_repr
())
->
AsInput
()
->
assert_is_persistable_var
()
->
assert_is_op_input
(
"affine_channel"
,
"Scale"
);
// AC Bias
auto
*
ac_bias_var
=
pattern
->
NewNode
(
ac_bias_repr
())
->
AsInput
()
->
assert_is_persistable_var
()
->
assert_is_op_input
(
"affine_channel"
,
"Bias"
);
// AC output
auto
*
ac_out_var
=
pattern
->
NewNode
(
ac_out_repr
())
->
AsOutput
()
->
assert_is_op_output
(
"affine_channel"
);
conv_op
->
LinksFrom
({
conv_input
,
conv_weight_var
}).
LinksTo
({
conv_out_var
});
if
(
with_eltwise_add
)
{
eltwise_op
->
LinksFrom
({
conv_out_var
,
eltwise_y_in_var
})
.
LinksTo
({
eltwise_out_var
});
affine_channel_op
->
LinksFrom
({
eltwise_out_var
,
ac_scale_var
,
ac_bias_var
})
.
LinksTo
({
ac_out_var
});
}
else
{
affine_channel_op
->
LinksFrom
({
conv_out_var
,
ac_scale_var
,
ac_bias_var
})
.
LinksTo
({
ac_out_var
});
}
return
ac_out_var
;
}
}
// namespace ir
}
// namespace ir
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/fluid/framework/ir/graph_pattern_detector.h
浏览文件 @
4743c9cd
...
@@ -734,6 +734,38 @@ struct ConvElementwiseadd : public PatternBase {
...
@@ -734,6 +734,38 @@ struct ConvElementwiseadd : public PatternBase {
PATTERN_DECL_NODE
(
elementwise_add_out
);
PATTERN_DECL_NODE
(
elementwise_add_out
);
};
};
// Conv with affine_channel
// op: conv + (elementwise_add +) affine_channel
// named nodes:
// conv_weight, conv_out, conv,
// ac_x, ac_scale, ac_bias
// affine_channel, ac_out
struct
ConvAffineChannel
:
public
PatternBase
{
ConvAffineChannel
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
)
:
PatternBase
(
pattern
,
name_scope
,
"conv_affine_channel"
)
{}
PDNode
*
operator
()(
PDNode
*
conv_input
,
bool
with_eltwise_add
);
// declare operator node's name
PATTERN_DECL_NODE
(
conv
);
PATTERN_DECL_NODE
(
affine_channel
);
PATTERN_DECL_NODE
(
eltwise
);
// ELEMENTWISE_ADD
// CONV inputs
PATTERN_DECL_NODE
(
conv_weight
);
// Filter
// CONV outputs
PATTERN_DECL_NODE
(
conv_out
);
// tmp
// ELTWISE inputs
PATTERN_DECL_NODE
(
eltwise_y_in
);
// ELTWISE outputs
PATTERN_DECL_NODE
(
eltwise_out
);
// tmp
// AC(Affine_Channel) inputs
PATTERN_DECL_NODE
(
ac_scale
);
PATTERN_DECL_NODE
(
ac_bias
);
// AC outputs
PATTERN_DECL_NODE
(
ac_out
);
// Out
};
}
// namespace patterns
}
// namespace patterns
// Link two ir::Nodes from each other.
// Link two ir::Nodes from each other.
...
...
paddle/fluid/framework/mixed_vector.h
浏览文件 @
4743c9cd
...
@@ -215,7 +215,7 @@ class Vector {
...
@@ -215,7 +215,7 @@ class Vector {
auto
stream
=
dev_ctx
->
stream
();
auto
stream
=
dev_ctx
->
stream
();
void
*
src
=
gpu_
->
ptr
();
void
*
src
=
gpu_
->
ptr
();
void
*
dst
=
cpu_
.
data
();
void
*
dst
=
cpu_
.
data
();
memory
::
Copy
(
platform
::
CPUPlace
(),
dst
,
CUDAPlace
().
get
(),
src
,
paddle
::
memory
::
Copy
(
platform
::
CPUPlace
(),
dst
,
CUDAPlace
().
get
(),
src
,
gpu_
->
size
(),
stream
);
gpu_
->
size
(),
stream
);
dev_ctx
->
Wait
();
dev_ctx
->
Wait
();
}
}
...
@@ -261,7 +261,7 @@ class Vector {
...
@@ -261,7 +261,7 @@ class Vector {
auto
*
dev_ctx
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
auto
*
dev_ctx
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
));
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
));
auto
stream
=
dev_ctx
->
stream
();
auto
stream
=
dev_ctx
->
stream
();
memory
::
Copy
(
CUDAPlace
().
get
(),
dst
,
platform
::
CPUPlace
(),
src
,
paddle
::
memory
::
Copy
(
CUDAPlace
().
get
(),
dst
,
platform
::
CPUPlace
(),
src
,
gpu_
->
size
(),
stream
);
gpu_
->
size
(),
stream
);
}
}
...
@@ -284,7 +284,7 @@ class Vector {
...
@@ -284,7 +284,7 @@ class Vector {
bool
IsInCPU
()
const
{
return
flag_
&
kDataInCPU
;
}
bool
IsInCPU
()
const
{
return
flag_
&
kDataInCPU
;
}
mutable
std
::
vector
<
T
>
cpu_
;
mutable
std
::
vector
<
T
>
cpu_
;
mutable
memory
::
AllocationPtr
gpu_
;
mutable
paddle
::
memory
::
AllocationPtr
gpu_
;
mutable
int
flag_
;
mutable
int
flag_
;
mutable
std
::
mutex
mtx_
;
mutable
std
::
mutex
mtx_
;
...
...
paddle/fluid/framework/op_proto_maker.cc
浏览文件 @
4743c9cd
...
@@ -82,10 +82,6 @@ void OpProtoAndCheckerMaker::operator()(proto::OpProto* proto,
...
@@ -82,10 +82,6 @@ void OpProtoAndCheckerMaker::operator()(proto::OpProto* proto,
AddAttr
<
std
::
string
>
(
OpNamescopeAttrName
(),
"Operator name with namesope."
)
AddAttr
<
std
::
string
>
(
OpNamescopeAttrName
(),
"Operator name with namesope."
)
.
SetDefault
(
""
);
.
SetDefault
(
""
);
AddAttr
<
std
::
vector
<
std
::
string
>>
(
OpCreationCallstackAttrName
(),
"Callstack for Op Creatation."
)
.
SetDefault
({});
Validate
();
Validate
();
}
}
...
...
paddle/fluid/framework/op_proto_maker.h
浏览文件 @
4743c9cd
...
@@ -47,7 +47,6 @@ class OpProtoAndCheckerMaker {
...
@@ -47,7 +47,6 @@ class OpProtoAndCheckerMaker {
static
const
char
*
OpRoleAttrName
()
{
return
"op_role"
;
}
static
const
char
*
OpRoleAttrName
()
{
return
"op_role"
;
}
static
const
char
*
OpRoleVarAttrName
()
{
return
"op_role_var"
;
}
static
const
char
*
OpRoleVarAttrName
()
{
return
"op_role_var"
;
}
static
const
char
*
OpNamescopeAttrName
()
{
return
"op_namescope"
;
}
static
const
char
*
OpNamescopeAttrName
()
{
return
"op_namescope"
;
}
static
const
char
*
OpCreationCallstackAttrName
()
{
return
"op_callstack"
;
}
void
operator
()(
proto
::
OpProto
*
proto
,
OpAttrChecker
*
attr_checker
);
void
operator
()(
proto
::
OpProto
*
proto
,
OpAttrChecker
*
attr_checker
);
...
...
paddle/fluid/framework/op_registry.h
浏览文件 @
4743c9cd
...
@@ -23,6 +23,7 @@ limitations under the License. */
...
@@ -23,6 +23,7 @@ limitations under the License. */
#include <unordered_map>
#include <unordered_map>
#include <unordered_set>
#include <unordered_set>
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#include "glog/logging.h" // For VLOG()
#include "glog/logging.h" // For VLOG()
#include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/details/op_registry.h"
#include "paddle/fluid/framework/details/op_registry.h"
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
4743c9cd
...
@@ -16,15 +16,10 @@ limitations under the License. */
...
@@ -16,15 +16,10 @@ limitations under the License. */
#include <glog/logging.h>
#include <glog/logging.h>
#include <algorithm>
#include <algorithm>
#include <sstream>
#include <string>
#include <vector>
#include "gflags/gflags.h"
#include "glog/logging.h"
#include "paddle/fluid/framework/data_transform.h"
#include "paddle/fluid/framework/data_transform.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_proto_maker.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/shape_inference.h"
#include "paddle/fluid/framework/shape_inference.h"
#include "paddle/fluid/framework/transfer_scope_cache.h"
#include "paddle/fluid/framework/transfer_scope_cache.h"
...
@@ -162,10 +157,7 @@ RuntimeContext::RuntimeContext(const VariableNameMap& innames,
...
@@ -162,10 +157,7 @@ RuntimeContext::RuntimeContext(const VariableNameMap& innames,
}
}
void
OperatorBase
::
Run
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
{
void
OperatorBase
::
Run
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
{
try
{
if
(
VLOG_IS_ON
(
4
))
{
VLOG
(
4
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
VLOG
(
4
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
}
if
(
platform
::
is_gpu_place
(
place
))
{
if
(
platform
::
is_gpu_place
(
place
))
{
#ifndef PADDLE_WITH_CUDA
#ifndef PADDLE_WITH_CUDA
PADDLE_THROW
(
"Cannot run operator on place %s"
,
place
);
PADDLE_THROW
(
"Cannot run operator on place %s"
,
place
);
...
@@ -175,54 +167,21 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
...
@@ -175,54 +167,21 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
#endif
#endif
}
}
// The profile has a process-wide mutex, results in serious performance
// The profile has a process-wide mutex, results in serious performance issue
// issue
// in concurrency scenerio. Here use an `if` to fix this issue.
// in concurrency scenerio. Here use an `if` to fix this issue.
// Please not remove the `if`, ask @Superjomn if there are any concern.
// Please not remove the `if`, ask @Superjomn if there are any concern.
if
(
platform
::
IsProfileEnabled
())
{
if
(
platform
::
IsProfileEnabled
())
{
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
DeviceContextPool
::
Instance
();
platform
::
RecordEvent
record_event
(
Type
(),
pool
.
Get
(
place
));
platform
::
RecordEvent
record_event
(
Type
(),
pool
.
Get
(
place
));
RunImpl
(
scope
,
place
);
RunImpl
(
scope
,
place
);
}
else
{
}
else
{
RunImpl
(
scope
,
place
);
RunImpl
(
scope
,
place
);
}
}
if
(
VLOG_IS_ON
(
3
))
{
VLOG
(
3
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
VLOG
(
3
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
}
}
catch
(
platform
::
EnforceNotMet
exception
)
{
if
(
Attrs
().
count
(
"sub_block"
)
!=
0
)
{
throw
exception
;
}
auto
&
callstack
=
Attr
<
std
::
vector
<
std
::
string
>>
(
OpProtoAndCheckerMaker
::
OpCreationCallstackAttrName
());
if
(
callstack
.
empty
())
{
throw
exception
;
}
std
::
ostringstream
sout
;
sout
<<
"Invoke operator "
<<
Type
()
<<
" error.
\n
"
;
sout
<<
"Python Callstacks:
\n
"
;
for
(
auto
&
line
:
callstack
)
{
sout
<<
line
;
}
sout
<<
"C++ Callstacks:
\n
"
;
sout
<<
exception
.
err_str_
;
exception
.
err_str_
=
sout
.
str
();
throw
exception
;
}
catch
(...)
{
std
::
rethrow_exception
(
std
::
current_exception
());
}
}
}
bool
OperatorBase
::
HasInputs
(
const
std
::
string
&
name
)
const
{
bool
OperatorBase
::
HasInputs
(
const
std
::
string
&
name
)
const
{
if
(
inputs_
.
find
(
name
)
!=
inputs_
.
end
())
{
return
inputs_
.
find
(
name
)
!=
inputs_
.
end
();
return
true
;
}
else
{
return
false
;
}
}
}
std
::
string
OperatorBase
::
Input
(
const
std
::
string
&
name
)
const
{
std
::
string
OperatorBase
::
Input
(
const
std
::
string
&
name
)
const
{
...
@@ -421,7 +380,7 @@ const Tensor* GetLoDTensorOrSelectedRowsValueFromVar(const Variable& var) {
...
@@ -421,7 +380,7 @@ const Tensor* GetLoDTensorOrSelectedRowsValueFromVar(const Variable& var) {
return
&
(
var
.
Get
<
SelectedRows
>
().
value
());
return
&
(
var
.
Get
<
SelectedRows
>
().
value
());
}
else
{
}
else
{
PADDLE_THROW
(
"Variable type_id %s, expect LoDTensor/SelectedRows."
,
PADDLE_THROW
(
"Variable type_id %s, expect LoDTensor/SelectedRows."
,
var
.
Type
().
name
(
));
ToTypeName
(
var
.
Type
()
));
}
}
}
}
...
@@ -432,7 +391,7 @@ Tensor* GetMutableLoDTensorOrSelectedRowsValueFromVar(Variable* var) {
...
@@ -432,7 +391,7 @@ Tensor* GetMutableLoDTensorOrSelectedRowsValueFromVar(Variable* var) {
return
var
->
GetMutable
<
SelectedRows
>
()
->
mutable_value
();
return
var
->
GetMutable
<
SelectedRows
>
()
->
mutable_value
();
}
else
{
}
else
{
PADDLE_THROW
(
"Variable type_id %s, expect LoDTensor/SelectedRows."
,
PADDLE_THROW
(
"Variable type_id %s, expect LoDTensor/SelectedRows."
,
var
->
Type
().
name
(
));
ToTypeName
(
var
->
Type
()
));
}
}
}
}
...
@@ -526,7 +485,7 @@ const std::vector<const Tensor*> ExecutionContext::MultiInput<Tensor>(
...
@@ -526,7 +485,7 @@ const std::vector<const Tensor*> ExecutionContext::MultiInput<Tensor>(
PADDLE_ENFORCE
(
PADDLE_ENFORCE
(
var
->
IsType
<
LoDTensor
>
(),
var
->
IsType
<
LoDTensor
>
(),
"should be LoDTensor, but the received type is %s"
,
"should be LoDTensor, but the received type is %s"
,
var
->
Type
().
name
(
));
ToTypeName
(
var
->
Type
()
));
return
&
(
var
->
Get
<
LoDTensor
>
());
return
&
(
var
->
Get
<
LoDTensor
>
());
});
});
return
res
;
return
res
;
...
@@ -545,7 +504,7 @@ const std::vector<const Tensor*> ExecutionContext::LegacyMultiInput<Tensor>(
...
@@ -545,7 +504,7 @@ const std::vector<const Tensor*> ExecutionContext::LegacyMultiInput<Tensor>(
PADDLE_ENFORCE
(
PADDLE_ENFORCE
(
var
->
IsType
<
LoDTensor
>
(),
var
->
IsType
<
LoDTensor
>
(),
"%s should be LoDTensor, but the received type is %s"
,
"%s should be LoDTensor, but the received type is %s"
,
sub_name
,
var
->
Type
().
name
(
));
sub_name
,
ToTypeName
(
var
->
Type
()
));
return
&
(
var
->
Get
<
LoDTensor
>
());
return
&
(
var
->
Get
<
LoDTensor
>
());
});
});
return
res
;
return
res
;
...
@@ -574,7 +533,7 @@ std::vector<Tensor*> ExecutionContext::MultiOutput<Tensor>(
...
@@ -574,7 +533,7 @@ std::vector<Tensor*> ExecutionContext::MultiOutput<Tensor>(
PADDLE_ENFORCE
(
PADDLE_ENFORCE
(
var
->
IsType
<
LoDTensor
>
(),
var
->
IsType
<
LoDTensor
>
(),
"%s should be LoDTensor, but the received type is %s"
,
"%s should be LoDTensor, but the received type is %s"
,
sub_name
,
var
->
Type
().
name
(
));
sub_name
,
ToTypeName
(
var
->
Type
()
));
return
var
->
GetMutable
<
LoDTensor
>
();
return
var
->
GetMutable
<
LoDTensor
>
();
});
});
return
res
;
return
res
;
...
@@ -816,7 +775,7 @@ class RuntimeInferShapeContext : public InferShapeContext {
...
@@ -816,7 +775,7 @@ class RuntimeInferShapeContext : public InferShapeContext {
PADDLE_THROW
(
PADDLE_THROW
(
"Only LoDTensor/SelectedRows support 'GetDim', but Variables "
"Only LoDTensor/SelectedRows support 'GetDim', but Variables "
"type_id is %s."
,
"type_id is %s."
,
var
->
Type
().
name
(
));
ToTypeName
(
var
->
Type
()
));
}
}
}
}
...
@@ -839,7 +798,7 @@ class RuntimeInferShapeContext : public InferShapeContext {
...
@@ -839,7 +798,7 @@ class RuntimeInferShapeContext : public InferShapeContext {
var
->
GetMutable
<
SelectedRows
>
()
->
set_height
(
dim
[
0
]);
var
->
GetMutable
<
SelectedRows
>
()
->
set_height
(
dim
[
0
]);
}
else
{
}
else
{
PADDLE_THROW
(
"Variable type_id %s, expect LoDTensor/SelectedRows."
,
PADDLE_THROW
(
"Variable type_id %s, expect LoDTensor/SelectedRows."
,
var
->
Type
().
name
(
));
ToTypeName
(
var
->
Type
()
));
}
}
}
}
...
...
paddle/fluid/framework/operator.h
浏览文件 @
4743c9cd
...
@@ -49,6 +49,8 @@ constexpr char kTempVarName[] = "@TEMP@";
...
@@ -49,6 +49,8 @@ constexpr char kTempVarName[] = "@TEMP@";
/// e.g. Variable "x@GRAD" is the gradient of varibale "x".
/// e.g. Variable "x@GRAD" is the gradient of varibale "x".
constexpr
char
kGradVarSuffix
[]
=
"@GRAD"
;
constexpr
char
kGradVarSuffix
[]
=
"@GRAD"
;
constexpr
size_t
kGradVarSuffixSize
=
5U
;
/// Variables with this suffix are supposed to be filled up with zeros.
/// Variables with this suffix are supposed to be filled up with zeros.
constexpr
char
kZeroVarSuffix
[]
=
"@ZERO"
;
constexpr
char
kZeroVarSuffix
[]
=
"@ZERO"
;
...
@@ -60,7 +62,11 @@ constexpr char kNewGradSuffix[] = "@NEWGRAD@";
...
@@ -60,7 +62,11 @@ constexpr char kNewGradSuffix[] = "@NEWGRAD@";
extern
std
::
vector
<
std
::
tuple
<
platform
::
Place
,
LibraryType
>>
kKernelPriority
;
extern
std
::
vector
<
std
::
tuple
<
platform
::
Place
,
LibraryType
>>
kKernelPriority
;
inline
std
::
string
GradVarName
(
const
std
::
string
&
var_name
)
{
inline
std
::
string
GradVarName
(
const
std
::
string
&
var_name
)
{
return
var_name
+
kGradVarSuffix
;
std
::
string
result
;
result
.
reserve
(
var_name
.
size
()
+
kGradVarSuffixSize
);
result
+=
var_name
;
result
+=
kGradVarSuffix
;
return
result
;
}
}
proto
::
VarType
::
Type
GetDataTypeOfVar
(
const
Variable
*
var
);
proto
::
VarType
::
Type
GetDataTypeOfVar
(
const
Variable
*
var
);
...
@@ -110,8 +116,8 @@ class OperatorBase {
...
@@ -110,8 +116,8 @@ class OperatorBase {
bool
HasAttr
(
const
std
::
string
&
name
)
const
{
return
attrs_
.
count
(
name
);
}
bool
HasAttr
(
const
std
::
string
&
name
)
const
{
return
attrs_
.
count
(
name
);
}
template
<
typename
T
>
template
<
typename
T
>
inline
const
T
&
Attr
(
const
std
::
string
&
name
)
const
{
inline
const
T
&
Attr
(
const
std
::
string
&
name
)
const
{
PADDLE_ENFORCE
(
attrs_
.
count
(
name
)
!=
0
,
"%s should be in AttributeMap"
,
PADDLE_ENFORCE
(
attrs_
.
find
(
name
)
!=
attrs_
.
end
()
,
name
);
"%s should be in AttributeMap"
,
name
);
return
boost
::
get
<
T
>
(
attrs_
.
at
(
name
));
return
boost
::
get
<
T
>
(
attrs_
.
at
(
name
));
}
}
const
AttributeMap
&
Attrs
()
const
{
return
attrs_
;
}
const
AttributeMap
&
Attrs
()
const
{
return
attrs_
;
}
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
4743c9cd
...
@@ -367,6 +367,7 @@ void ParallelExecutor::BCastParamsToDevices(
...
@@ -367,6 +367,7 @@ void ParallelExecutor::BCastParamsToDevices(
if
(
paddle
::
platform
::
is_gpu_place
(
main_tensor
.
place
()))
{
if
(
paddle
::
platform
::
is_gpu_place
(
main_tensor
.
place
()))
{
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
std
::
vector
<
void
*>
buffers
;
std
::
vector
<
void
*>
buffers
;
buffers
.
reserve
(
member_
->
places_
.
size
());
size_t
numel
=
main_tensor
.
numel
();
size_t
numel
=
main_tensor
.
numel
();
ncclDataType_t
data_type
=
platform
::
ToNCCLDataType
(
main_tensor
.
type
());
ncclDataType_t
data_type
=
platform
::
ToNCCLDataType
(
main_tensor
.
type
());
for
(
size_t
i
=
0
;
i
<
member_
->
places_
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
member_
->
places_
.
size
();
++
i
)
{
...
@@ -400,9 +401,7 @@ void ParallelExecutor::BCastParamsToDevices(
...
@@ -400,9 +401,7 @@ void ParallelExecutor::BCastParamsToDevices(
#endif
#endif
}
else
{
}
else
{
platform
::
CPUPlace
cpu
;
platform
::
CPUPlace
cpu
;
for
(
size_t
i
=
0
;
i
<
member_
->
places_
.
size
();
++
i
)
{
for
(
size_t
i
=
1
;
i
<
member_
->
places_
.
size
();
++
i
)
{
if
(
i
==
0
)
continue
;
auto
local_scope
=
member_
->
local_scopes_
[
i
];
auto
local_scope
=
member_
->
local_scopes_
[
i
];
auto
*
t
=
local_scope
->
Var
(
var
)
->
GetMutable
<
LoDTensor
>
();
auto
*
t
=
local_scope
->
Var
(
var
)
->
GetMutable
<
LoDTensor
>
();
...
...
paddle/fluid/framework/scope.cc
浏览文件 @
4743c9cd
...
@@ -165,11 +165,9 @@ std::string Scope::Rename(const std::string& origin_name) const {
...
@@ -165,11 +165,9 @@ std::string Scope::Rename(const std::string& origin_name) const {
Variable
*
Scope
::
VarInternal
(
const
std
::
string
&
name
)
{
Variable
*
Scope
::
VarInternal
(
const
std
::
string
&
name
)
{
auto
*
v
=
FindVarLocally
(
name
);
auto
*
v
=
FindVarLocally
(
name
);
if
(
v
!=
nullptr
)
return
v
;
if
(
v
!=
nullptr
)
return
v
;
v
=
new
Variable
();
v
=
new
Variable
();
vars_
[
name
].
reset
(
v
);
vars_
.
emplace
(
name
,
std
::
unique_ptr
<
Variable
>
(
v
)
);
VLOG
(
3
)
<<
"Create variable "
<<
name
;
VLOG
(
3
)
<<
"Create variable "
<<
name
;
v
->
name_
=
&
(
vars_
.
find
(
name
)
->
first
);
return
v
;
return
v
;
}
}
...
...
paddle/fluid/framework/scope_pool.cc
0 → 100644
浏览文件 @
4743c9cd
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/scope_pool.h"
#include "paddle/fluid/framework/threadpool.h"
namespace
paddle
{
namespace
framework
{
ScopePool
&
ScopePool
::
Instance
()
{
// NOLINT
static
ScopePool
pool
;
return
pool
;
}
void
ScopePool
::
DeleteScope
(
Scope
*
scope
)
{
delete
scope
;
}
void
ScopePool
::
Insert
(
std
::
unique_ptr
<
Scope
>
&&
s
)
{
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx_
);
scopes_
.
insert
(
s
.
release
());
}
void
ScopePool
::
Remove
(
Scope
*
s
)
{
size_t
has_scope
;
{
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx_
);
has_scope
=
scopes_
.
erase
(
s
);
}
PADDLE_ENFORCE
(
has_scope
>
0
,
"Delete non-existing global scope"
);
DeleteScope
(
s
);
}
ScopePool
::~
ScopePool
()
{
Clear
();
}
void
ScopePool
::
Clear
()
{
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx_
);
for
(
auto
*
s
:
scopes_
)
{
DeleteScope
(
s
);
}
scopes_
.
clear
();
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/scope_pool.h
0 → 100644
浏览文件 @
4743c9cd
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <mutex> // NOLINT
#include <unordered_set>
#include "paddle/fluid/framework/scope.h"
namespace
paddle
{
namespace
framework
{
class
ScopePool
{
public:
static
ScopePool
&
Instance
();
// NOLINT
void
Insert
(
std
::
unique_ptr
<
Scope
>
&&
s
);
void
Remove
(
Scope
*
s
);
void
Clear
();
~
ScopePool
();
private:
ScopePool
()
=
default
;
static
void
DeleteScope
(
Scope
*
scope
);
std
::
unordered_set
<
Scope
*>
scopes_
;
std
::
mutex
mtx_
;
};
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/var_type.h
浏览文件 @
4743c9cd
...
@@ -19,52 +19,50 @@ limitations under the License. */
...
@@ -19,52 +19,50 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/var_type_traits.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/framework/variable.h"
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
template
<
typename
T
>
template
<
typename
T
>
inline
bool
IsType
(
const
std
::
type_index
&
type
_index
)
{
inline
bool
IsType
(
const
std
::
type_index
&
type
)
{
return
type
_index
==
std
::
type_index
(
typeid
(
T
)
);
return
type
==
typeid
(
T
);
}
}
inline
proto
::
VarType
::
Type
ToVarType
(
std
::
type_index
type
)
{
inline
proto
::
VarType
::
Type
ToVarType
(
int
type
)
{
if
(
IsType
<
LoDTensor
>
(
type
))
{
switch
(
type
)
{
return
proto
::
VarType_Type_LOD_TENSOR
;
case
proto
::
VarType
::
LOD_TENSOR
:
}
else
if
(
IsType
<
LoDRankTable
>
(
type
))
{
case
proto
::
VarType
::
SELECTED_ROWS
:
return
proto
::
VarType_Type_LOD_RANK_TABLE
;
case
proto
::
VarType
::
LOD_RANK_TABLE
:
}
else
if
(
IsType
<
LoDTensorArray
>
(
type
))
{
case
proto
::
VarType
::
LOD_TENSOR_ARRAY
:
return
proto
::
VarType_Type_LOD_TENSOR_ARRAY
;
case
proto
::
VarType
::
READER
:
}
else
if
(
IsType
<
SelectedRows
>
(
type
))
{
return
static_cast
<
proto
::
VarType
::
Type
>
(
type
);
return
proto
::
VarType_Type_SELECTED_ROWS
;
default:
}
else
if
(
IsType
<
ReaderHolder
>
(
type
))
{
PADDLE_THROW
(
"ToVarType:Unsupported type %d"
,
type
);
return
proto
::
VarType_Type_READER
;
}
else
{
PADDLE_THROW
(
"ToVarType:Unsupported type %s"
,
type
.
name
());
}
}
}
}
template
<
typename
Visitor
>
template
<
typename
Visitor
>
inline
void
VisitVarType
(
const
framework
::
Variable
&
var
,
Visitor
visitor
)
{
inline
void
VisitVarType
(
const
framework
::
Variable
&
var
,
Visitor
visitor
)
{
switch
(
ToVarType
(
var
.
Type
()
))
{
switch
(
var
.
Type
(
))
{
case
proto
::
VarType
_Type_
LOD_TENSOR
:
case
proto
::
VarType
::
LOD_TENSOR
:
visitor
(
var
.
Get
<
LoDTensor
>
());
visitor
(
var
.
Get
<
LoDTensor
>
());
return
;
return
;
case
proto
::
VarType
_Type_
LOD_RANK_TABLE
:
case
proto
::
VarType
::
LOD_RANK_TABLE
:
visitor
(
var
.
Get
<
LoDRankTable
>
());
visitor
(
var
.
Get
<
LoDRankTable
>
());
return
;
return
;
case
proto
::
VarType
_Type_
LOD_TENSOR_ARRAY
:
case
proto
::
VarType
::
LOD_TENSOR_ARRAY
:
visitor
(
var
.
Get
<
LoDTensorArray
>
());
visitor
(
var
.
Get
<
LoDTensorArray
>
());
return
;
return
;
case
proto
::
VarType
_Type_
SELECTED_ROWS
:
case
proto
::
VarType
::
SELECTED_ROWS
:
visitor
(
var
.
Get
<
SelectedRows
>
());
visitor
(
var
.
Get
<
SelectedRows
>
());
return
;
return
;
case
proto
::
VarType
_Type_
READER
:
case
proto
::
VarType
::
READER
:
visitor
(
var
.
Get
<
ReaderHolder
>
());
visitor
(
var
.
Get
<
ReaderHolder
>
());
return
;
return
;
default:
default:
PADDLE_THROW
(
"Not supported visit type, %
d"
,
ToVarTyp
e
(
var
.
Type
()));
PADDLE_THROW
(
"Not supported visit type, %
s"
,
ToTypeNam
e
(
var
.
Type
()));
}
}
}
}
...
...
paddle/fluid/framework/var_type_inference_test.cc
浏览文件 @
4743c9cd
...
@@ -108,7 +108,7 @@ TEST(InferVarType, sum_op_without_infer_var_type) {
...
@@ -108,7 +108,7 @@ TEST(InferVarType, sum_op_without_infer_var_type) {
op
->
InferVarType
(
prog
.
MutableBlock
(
0
));
op
->
InferVarType
(
prog
.
MutableBlock
(
0
));
ASSERT_EQ
(
proto
::
VarType
_Type_
LOD_TENSOR
,
ASSERT_EQ
(
proto
::
VarType
::
LOD_TENSOR
,
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_out"
)
->
GetType
());
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_out"
)
->
GetType
());
}
}
...
...
paddle/fluid/framework/var_type_traits.cc
0 → 100644
浏览文件 @
4743c9cd
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/var_type_traits.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h"
#include "paddle/fluid/platform/macros.h"
#ifdef PADDLE_WITH_CUDA
#ifndef _WIN32
#include "paddle/fluid/operators/nccl/nccl_gpu_common.h"
#endif
#include <cudnn.h>
#include "paddle/fluid/operators/conv_cudnn_op_cache.h"
#include "paddle/fluid/operators/cudnn_rnn_cache.h"
#endif
namespace
paddle
{
namespace
framework
{
// Besides registering variable type id, it is helpful to register a
// var_id -> std::type_index map (for example, get type names according to id)
namespace
detail
{
template
<
int
kStart
,
int
kEnd
,
bool
kStop
>
struct
VarIdToTypeIndexMapInitializerImpl
{
template
<
typename
MapType1
,
typename
MapType2
>
static
void
Init
(
MapType1
*
id_to_type
,
MapType2
*
type_to_id
)
{
using
Type
=
typename
std
::
tuple_element
<
kStart
,
VarTypeRegistry
::
ArgTuple
>::
type
;
static_assert
(
!
std
::
is_same
<
Type
,
void
>::
value
,
"Type cannot be void"
);
constexpr
int
kId
=
VarTypeTrait
<
Type
>::
kId
;
auto
type
=
std
::
type_index
(
typeid
(
Type
));
PADDLE_ENFORCE
(
id_to_type
->
count
(
kId
)
==
0
,
"Registered duplicate type id %d for type %s"
,
kId
,
type
.
name
());
PADDLE_ENFORCE
(
type_to_id
->
count
(
type
)
==
0
,
"Registered duplicate type_index %s for id %d"
,
type
.
name
(),
kId
);
id_to_type
->
emplace
(
kId
,
type
);
type_to_id
->
emplace
(
type
,
kId
);
VarIdToTypeIndexMapInitializerImpl
<
kStart
+
1
,
kEnd
,
kStart
+
1
==
kEnd
>::
Init
(
id_to_type
,
type_to_id
);
}
};
template
<
int
kStart
,
int
kEnd
>
struct
VarIdToTypeIndexMapInitializerImpl
<
kStart
,
kEnd
,
true
>
{
template
<
typename
MapType1
,
typename
MapType2
>
static
void
Init
(
MapType1
*
,
MapType2
*
)
{}
};
// VarIdToTypeIndexMapInitializer is designed to initialize var_id ->
// std::type_index map and std::type_index -> var_id map
using
VarIdToTypeIndexMapInitializer
=
VarIdToTypeIndexMapInitializerImpl
<
0
,
VarTypeRegistry
::
kRegisteredTypeNum
,
VarTypeRegistry
::
kRegisteredTypeNum
==
0
>
;
struct
VarIdToTypeIndexMapHolder
{
DISABLE_COPY_AND_ASSIGN
(
VarIdToTypeIndexMapHolder
);
public:
static
const
std
::
type_index
&
ToTypeIndex
(
int
var_id
)
{
auto
it
=
Instance
().
id_to_type_map_
.
find
(
var_id
);
PADDLE_ENFORCE
(
it
!=
Instance
().
id_to_type_map_
.
end
(),
"VarId %d is not registered."
,
var_id
);
return
it
->
second
;
}
static
int
ToTypeId
(
const
std
::
type_index
&
type
)
{
auto
it
=
Instance
().
type_to_id_map_
.
find
(
type
);
PADDLE_ENFORCE
(
it
!=
Instance
().
type_to_id_map_
.
end
(),
"VarType %s is not registered."
,
type
.
name
());
return
it
->
second
;
}
private:
VarIdToTypeIndexMapHolder
()
{
VarIdToTypeIndexMapInitializer
::
Init
(
&
id_to_type_map_
,
&
type_to_id_map_
);
}
static
const
VarIdToTypeIndexMapHolder
&
Instance
()
{
static
const
VarIdToTypeIndexMapHolder
instance
;
return
instance
;
}
std
::
unordered_map
<
int
,
std
::
type_index
>
id_to_type_map_
;
std
::
unordered_map
<
std
::
type_index
,
int
>
type_to_id_map_
;
};
}
// namespace detail
const
std
::
type_index
&
ToTypeIndex
(
int
var_id
)
{
return
detail
::
VarIdToTypeIndexMapHolder
::
ToTypeIndex
(
var_id
);
}
const
char
*
ToTypeName
(
int
var_id
)
{
return
ToTypeIndex
(
var_id
).
name
();
}
int
ToTypeId
(
const
std
::
type_index
&
type
)
{
return
detail
::
VarIdToTypeIndexMapHolder
::
ToTypeId
(
type
);
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/var_type_traits.h
0 → 100644
浏览文件 @
4743c9cd
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include <tuple>
#include <typeindex>
#include <vector>
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/platform/place.h"
#ifdef PADDLE_WITH_CUDA
#include <cudnn.h>
#ifndef _WIN32
#include <nccl.h>
#endif
#endif
// Users should add forward declarations here
namespace
paddle
{
namespace
platform
{
#ifdef PADDLE_WITH_CUDA
#ifndef _WIN32
class
Communicator
;
#endif
#endif
}
// namespace platform
namespace
framework
{
class
Tensor
;
class
LoDTensor
;
class
SelectedRows
;
class
LoDRankTable
;
class
ReaderHolder
;
class
Scope
;
}
// namespace framework
namespace
operators
{
template
<
typename
T
>
class
AlgorithmsCache
;
class
CudnnRNNCache
;
namespace
reader
{
class
LoDTensorBlockingQueueHolder
;
}
// namespace reader
}
// namespace operators
}
// namespace paddle
namespace
paddle
{
namespace
framework
{
const
char
*
ToTypeName
(
int
var_id
);
const
std
::
type_index
&
ToTypeIndex
(
int
var_id
);
int
ToTypeId
(
const
std
::
type_index
&
type
);
namespace
detail
{
template
<
bool
kStop
,
int
kStart
,
int
kEnd
,
typename
T1
,
typename
T2
,
typename
...
Args
>
struct
TypePosFinderImpl
{
static
constexpr
int
kPos
=
std
::
is_same
<
T1
,
T2
>::
value
?
kStart
:
TypePosFinderImpl
<
kStart
+
2
==
kEnd
,
kStart
+
1
,
kEnd
,
T1
,
Args
...
>::
kPos
;
};
template
<
int
kStart
,
int
kEnd
,
typename
T1
,
typename
T2
>
struct
TypePosFinderImpl
<
true
,
kStart
,
kEnd
,
T1
,
T2
>
{
static
constexpr
int
kPos
=
std
::
is_same
<
T1
,
T2
>::
value
?
kStart
:
-
1
;
};
// TypePosFinder helps to find the position in which T is inside Args...
// If T is not inside Args..., kPos would be -1
template
<
typename
T
,
typename
...
Args
>
struct
TypePosFinder
{
static
constexpr
int
kPos
=
TypePosFinderImpl
<
sizeof
...(
Args
)
==
1
,
0
,
sizeof
...(
Args
),
T
,
Args
...
>::
kPos
;
};
template
<
typename
...
Args
>
struct
VarTypeRegistryImpl
{
static
constexpr
size_t
kRegisteredTypeNum
=
sizeof
...(
Args
);
using
ArgTuple
=
std
::
tuple
<
Args
...
>
;
// TypePos() returns the position in which T is inside Args...
// If T is not inside Args..., return -1
template
<
typename
T
>
static
constexpr
int
TypePos
()
{
return
TypePosFinder
<
T
,
Args
...
>::
kPos
;
}
// IsRegistered() returns whether T is registered inside RegistryImpl
template
<
typename
T
>
static
constexpr
bool
IsRegistered
()
{
return
TypePos
<
T
>
()
>=
0
;
}
};
}
// namespace detail
#define REG_PROTO_VAR_TYPE_TRAIT(type, proto_id) \
template <> \
struct VarTypeTrait<type> { \
static_assert(VarTypeRegistry::IsRegistered<type>(), \
"Must be registered type"); \
using Type = type; \
static constexpr int kId = static_cast<int>(proto_id); \
}
/**
* The following codes are designed to register variable types.
* Only registered types can be stored in Variable.
* This registry mechanism is designed to speed up Variable.
*
* Caution: If you want to add more var types, please consider carefully
* whether you really need to add it.
*/
// Users should add other variable types below.
// Paddle would generate unique Ids for each registered variable types.
using
VarTypeRegistry
=
detail
::
VarTypeRegistryImpl
<
Tensor
,
LoDTensor
,
SelectedRows
,
std
::
vector
<
Scope
*>
,
LoDRankTable
,
LoDTensorArray
,
platform
::
PlaceList
,
ReaderHolder
,
std
::
string
,
Scope
*
,
std
::
map
<
size_t
,
Tensor
>
,
operators
::
reader
::
LoDTensorBlockingQueueHolder
,
#ifdef PADDLE_WITH_CUDA
#ifndef _WIN32
ncclUniqueId
,
platform
::
Communicator
,
#endif
operators
::
AlgorithmsCache
<
cudnnConvolutionFwdAlgo_t
>
,
operators
::
AlgorithmsCache
<
cudnnConvolutionBwdDataAlgo_t
>
,
operators
::
AlgorithmsCache
<
cudnnConvolutionBwdFilterAlgo_t
>
,
operators
::
CudnnRNNCache
,
#endif
int
,
float
>
;
template
<
typename
T
>
struct
VarTypeTrait
{
static_assert
(
VarTypeRegistry
::
IsRegistered
<
T
>
(),
"Must be registered type"
);
using
Type
=
T
;
/**
* Unique VarType Id generation.
*
* The auto-generated id should not be the same as any protobuf id defined in
* framework.proto. Therefore, we generate id by adding the type pos and
* maximum protobuf id (i.e., proto::VarType::TUPLE).
*
* However, we may need more protobuf id in the future.
* To avoid changing this auto id generation algorithm frequently, we
* generate id by adding the type pos and twice of maximum protobuf id (i.e.,
* proto::VarType::TUPLE).
*/
static
constexpr
int
kId
=
VarTypeRegistry
::
TypePos
<
T
>
()
+
static_cast
<
int
>
(
proto
::
VarType
::
TUPLE
)
*
2
;
};
// Users should set some of variable type ids to be what is defined in
// framework.proto below
REG_PROTO_VAR_TYPE_TRAIT
(
LoDTensor
,
proto
::
VarType
::
LOD_TENSOR
);
REG_PROTO_VAR_TYPE_TRAIT
(
SelectedRows
,
proto
::
VarType
::
SELECTED_ROWS
);
REG_PROTO_VAR_TYPE_TRAIT
(
std
::
vector
<
Scope
*>
,
proto
::
VarType
::
STEP_SCOPES
);
REG_PROTO_VAR_TYPE_TRAIT
(
LoDRankTable
,
proto
::
VarType
::
LOD_RANK_TABLE
);
REG_PROTO_VAR_TYPE_TRAIT
(
LoDTensorArray
,
proto
::
VarType
::
LOD_TENSOR_ARRAY
);
REG_PROTO_VAR_TYPE_TRAIT
(
platform
::
PlaceList
,
proto
::
VarType
::
PLACE_LIST
);
REG_PROTO_VAR_TYPE_TRAIT
(
ReaderHolder
,
proto
::
VarType
::
READER
);
REG_PROTO_VAR_TYPE_TRAIT
(
int
,
proto
::
VarType
::
INT32
);
REG_PROTO_VAR_TYPE_TRAIT
(
float
,
proto
::
VarType
::
FP32
);
/** End of variable type registration */
template
<
typename
T
>
inline
constexpr
bool
IsRegisteredVarType
()
{
return
VarTypeRegistry
::
IsRegistered
<
T
>
();
}
#undef REG_PROTO_VAR_TYPE_TRAIT
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/var_type_traits_test.cc
0 → 100644
浏览文件 @
4743c9cd
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include <cstdint>
#include <iostream>
#include <unordered_set>
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/var_type_traits.h"
#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h"
#ifdef PADDLE_WITH_CUDA
#ifndef _WIN32
#include "paddle/fluid/operators/nccl/nccl_gpu_common.h"
#endif
#include "paddle/fluid/operators/conv_cudnn_op_cache.h"
#include "paddle/fluid/operators/cudnn_rnn_cache.h"
#endif
namespace
paddle
{
namespace
framework
{
template
<
int
kPos
,
int
kEnd
,
bool
kStop
>
struct
TypeIndexChecker
{
template
<
typename
SetType1
,
typename
SetType2
>
static
void
Check
(
SetType1
*
var_id_set
,
SetType2
*
type_index_set
)
{
using
Type
=
typename
std
::
tuple_element
<
kPos
,
VarTypeRegistry
::
ArgTuple
>::
type
;
static_assert
(
std
::
is_same
<
typename
VarTypeTrait
<
Type
>::
Type
,
Type
>::
value
,
"Type must be the same"
);
constexpr
auto
kId
=
VarTypeTrait
<
Type
>::
kId
;
std
::
type_index
actual_type
(
typeid
(
Type
));
EXPECT_EQ
(
std
::
string
(
ToTypeName
(
kId
)),
std
::
string
(
actual_type
.
name
()));
EXPECT_EQ
(
ToTypeIndex
(
kId
),
actual_type
);
EXPECT_EQ
(
ToTypeId
(
actual_type
),
kId
);
EXPECT_EQ
(
ToTypeIndex
(
ToTypeId
(
actual_type
)),
actual_type
);
EXPECT_EQ
(
ToTypeId
(
ToTypeIndex
(
kId
)),
kId
);
EXPECT_TRUE
(
var_id_set
->
count
(
kId
)
==
0
);
// NOLINT
EXPECT_TRUE
(
type_index_set
->
count
(
actual_type
)
==
0
);
// NOLINT
var_id_set
->
insert
(
kId
);
type_index_set
->
insert
(
std
::
type_index
(
typeid
(
Type
)));
TypeIndexChecker
<
kPos
+
1
,
kEnd
,
kPos
+
1
==
kEnd
>::
Check
(
var_id_set
,
type_index_set
);
}
};
template
<
int
kPos
,
int
kEnd
>
struct
TypeIndexChecker
<
kPos
,
kEnd
,
true
>
{
template
<
typename
SetType1
,
typename
SetType2
>
static
void
Check
(
SetType1
*
,
SetType2
*
)
{}
};
TEST
(
var_type_traits
,
check_no_duplicate_registry
)
{
constexpr
size_t
kRegisteredNum
=
VarTypeRegistry
::
kRegisteredTypeNum
;
std
::
unordered_set
<
int
>
var_id_set
;
std
::
unordered_set
<
std
::
type_index
>
type_index_set
;
TypeIndexChecker
<
0
,
kRegisteredNum
,
kRegisteredNum
==
0
>::
Check
(
&
var_id_set
,
&
type_index_set
);
}
template
<
typename
T
>
bool
CheckVarId
(
int
proto_id
)
{
static_assert
(
std
::
is_same
<
typename
VarTypeTrait
<
T
>::
Type
,
T
>::
value
,
"Type must be the same"
);
return
VarTypeTrait
<
T
>::
kId
==
proto_id
;
}
TEST
(
var_type_traits
,
check_proto_type_id
)
{
ASSERT_TRUE
(
CheckVarId
<
LoDTensor
>
(
proto
::
VarType
::
LOD_TENSOR
));
ASSERT_TRUE
(
CheckVarId
<
SelectedRows
>
(
proto
::
VarType
::
SELECTED_ROWS
));
ASSERT_TRUE
(
CheckVarId
<
std
::
vector
<
Scope
*>>
(
proto
::
VarType
::
STEP_SCOPES
));
ASSERT_TRUE
(
CheckVarId
<
LoDRankTable
>
(
proto
::
VarType
::
LOD_RANK_TABLE
));
ASSERT_TRUE
(
CheckVarId
<
LoDTensorArray
>
(
proto
::
VarType
::
LOD_TENSOR_ARRAY
));
ASSERT_TRUE
(
CheckVarId
<
platform
::
PlaceList
>
(
proto
::
VarType
::
PLACE_LIST
));
ASSERT_TRUE
(
CheckVarId
<
ReaderHolder
>
(
proto
::
VarType
::
READER
));
ASSERT_TRUE
(
CheckVarId
<
int
>
(
proto
::
VarType
::
INT32
));
ASSERT_TRUE
(
CheckVarId
<
float
>
(
proto
::
VarType
::
FP32
));
ASSERT_EQ
(
proto
::
VarType_Type_LOD_TENSOR
,
proto
::
VarType
::
LOD_TENSOR
);
ASSERT_EQ
(
proto
::
VarType_Type_SELECTED_ROWS
,
proto
::
VarType
::
SELECTED_ROWS
);
ASSERT_EQ
(
proto
::
VarType_Type_STEP_SCOPES
,
proto
::
VarType
::
STEP_SCOPES
);
ASSERT_EQ
(
proto
::
VarType_Type_LOD_RANK_TABLE
,
proto
::
VarType
::
LOD_RANK_TABLE
);
ASSERT_EQ
(
proto
::
VarType_Type_LOD_TENSOR_ARRAY
,
proto
::
VarType
::
LOD_TENSOR_ARRAY
);
ASSERT_EQ
(
proto
::
VarType_Type_PLACE_LIST
,
proto
::
VarType
::
PLACE_LIST
);
ASSERT_EQ
(
proto
::
VarType_Type_READER
,
proto
::
VarType
::
READER
);
ASSERT_EQ
(
proto
::
VarType_Type_FEED_MINIBATCH
,
proto
::
VarType
::
FEED_MINIBATCH
);
ASSERT_EQ
(
proto
::
VarType_Type_FETCH_LIST
,
proto
::
VarType
::
FETCH_LIST
);
ASSERT_EQ
(
proto
::
VarType_Type_RAW
,
proto
::
VarType
::
RAW
);
ASSERT_EQ
(
proto
::
VarType_Type_TUPLE
,
proto
::
VarType
::
TUPLE
);
ASSERT_EQ
(
proto
::
VarType_Type_INT32
,
proto
::
VarType
::
INT32
);
ASSERT_EQ
(
proto
::
VarType_Type_FP32
,
proto
::
VarType
::
FP32
);
}
TEST
(
var_type_traits
,
test_registry
)
{
using
Registry
=
detail
::
VarTypeRegistryImpl
<
int8_t
,
int32_t
,
size_t
,
double
>
;
ASSERT_TRUE
(
Registry
::
TypePos
<
int8_t
>
()
==
0
);
ASSERT_TRUE
(
Registry
::
TypePos
<
int32_t
>
()
==
1
);
ASSERT_TRUE
(
Registry
::
TypePos
<
size_t
>
()
==
2
);
ASSERT_TRUE
(
Registry
::
TypePos
<
double
>
()
==
3
);
ASSERT_TRUE
(
Registry
::
TypePos
<
float
>
()
==
-
1
);
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/variable.h
浏览文件 @
4743c9cd
...
@@ -18,7 +18,7 @@
...
@@ -18,7 +18,7 @@
#include <typeindex>
#include <typeindex>
#include <typeinfo>
#include <typeinfo>
#include "paddle/fluid/
platform/enforce
.h"
#include "paddle/fluid/
framework/var_type_traits
.h"
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
@@ -27,10 +27,14 @@ class Variable {
...
@@ -27,10 +27,14 @@ class Variable {
public:
public:
template
<
typename
T
>
template
<
typename
T
>
const
T
&
Get
()
const
{
const
T
&
Get
()
const
{
static_assert
(
IsRegisteredVarType
<
T
>
(),
"Not registered type. Please register T inside var_type_traits.h"
);
PADDLE_ENFORCE
(
holder_
!=
nullptr
,
"Variable must hold some thing"
);
PADDLE_ENFORCE
(
holder_
!=
nullptr
,
"Variable must hold some thing"
);
PADDLE_ENFORCE
(
IsType
<
T
>
()
,
PADDLE_ENFORCE
(
holder_
->
Type
()
==
VarTypeTrait
<
T
>::
kId
,
"Variable must be type %s, the holding type is %s"
,
"Variable must be type %s, the holding type is %s"
,
typeid
(
T
).
name
(),
holder_
->
Type
().
name
());
ToTypeName
(
VarTypeTrait
<
T
>::
kId
),
ToTypeName
(
holder_
->
Type
()));
return
*
static_cast
<
const
T
*>
(
holder_
->
Ptr
());
return
*
static_cast
<
const
T
*>
(
holder_
->
Ptr
());
}
}
...
@@ -39,61 +43,61 @@ class Variable {
...
@@ -39,61 +43,61 @@ class Variable {
template
<
typename
T
>
template
<
typename
T
>
T
*
GetMutable
()
{
T
*
GetMutable
()
{
if
(
!
holder_
)
{
if
(
!
holder_
)
{
holder_
.
reset
(
new
PlaceholderImpl
<
T
>
(
new
T
()
));
holder_
.
reset
(
new
PlaceholderImpl
<
T
>
());
}
else
{
}
else
{
PADDLE_ENFORCE
(
IsType
<
T
>
()
,
PADDLE_ENFORCE
(
holder_
->
Type
()
==
VarTypeTrait
<
T
>::
kId
,
"Variable must be type %s, the holding type is %s"
,
"Variable must be type %s, the holding type is %s"
,
typeid
(
T
).
name
(),
holder_
->
Type
().
name
());
ToTypeName
(
VarTypeTrait
<
T
>::
kId
),
ToTypeName
(
holder_
->
Type
()));
}
}
return
static_cast
<
T
*>
(
holder_
->
Ptr
());
return
static_cast
<
T
*>
(
holder_
->
Ptr
());
}
}
template
<
typename
T
>
template
<
typename
T
>
bool
IsType
()
const
{
bool
IsType
()
const
{
return
holder_
!=
nullptr
&&
return
holder_
&&
holder_
->
Type
()
==
VarTypeTrait
<
T
>::
kId
;
std
::
type_index
(
typeid
(
T
))
==
std
::
type_index
(
holder_
->
Type
());
}
}
void
Clear
()
{
holder_
.
reset
();
}
void
Clear
()
{
holder_
.
reset
();
}
std
::
type_index
Type
()
const
{
int
Type
()
const
{
PADDLE_ENFORCE
(
holder_
!=
nullptr
,
"Must hold memory"
);
PADDLE_ENFORCE
(
holder_
!=
nullptr
,
"Must hold memory"
);
return
holder_
->
Type
();
return
holder_
->
Type
();
}
}
private:
private:
struct
Placeholder
{
struct
Placeholder
{
virtual
~
Placeholder
()
{}
virtual
~
Placeholder
()
=
default
;
virtual
const
std
::
type_info
&
Type
()
const
=
0
;
virtual
void
*
Ptr
()
const
=
0
;
inline
int
Type
()
const
{
return
type_
;
}
inline
const
void
*
Ptr
()
const
{
return
ptr_
;
}
inline
void
*
Ptr
()
{
return
ptr_
;
}
protected:
inline
void
Init
(
void
*
p
,
int
type
)
{
ptr_
=
p
;
type_
=
type
;
}
void
*
ptr_
;
int
type_
;
};
};
// Placeholder hides type T, so it doesn't appear as a template
// Placeholder hides type T, so it doesn't appear as a template
// parameter of Variable.
// parameter of Variable.
template
<
typename
T
>
template
<
typename
T
>
struct
PlaceholderImpl
:
public
Placeholder
{
struct
PlaceholderImpl
:
public
Placeholder
{
explicit
PlaceholderImpl
(
T
*
ptr
)
:
ptr_
(
ptr
),
type_
(
typeid
(
T
))
{}
static_assert
(
IsRegisteredVarType
<
T
>
(),
virtual
const
std
::
type_info
&
Type
()
const
{
return
type_
;
}
"Not registered type. Please register T inside var_type_traits.h"
);
virtual
void
*
Ptr
()
const
{
return
static_cast
<
void
*>
(
ptr_
.
get
()
);
}
PlaceholderImpl
()
{
this
->
Init
(
&
obj_
,
VarTypeTrait
<
T
>::
kId
);
}
std
::
unique_ptr
<
T
>
ptr_
;
private:
const
std
::
type_info
&
type
_
;
T
obj
_
;
};
};
std
::
unique_ptr
<
Placeholder
>
// pointers to a PlaceholderImpl object indeed.
holder_
;
// pointers to a PlaceholderImpl object indeed.
std
::
unique_ptr
<
Placeholder
>
holder_
;
// name_ is only meaningful with a Scope and accessible by it.
//
// NOTE: Please don't expose name_ by adding methods like
// Variable::Name or Scope::VarName! A variable could have a human
// readable name or an auto-generated scope-unique name. In the
// former case, the caller knows the name and doesn't need to access
// the name; in the latter case, the variable should be identified
// by its address but not the unreadable name.
friend
class
Scope
;
const
std
::
string
*
name_
;
};
};
}
// namespace framework
}
// namespace framework
...
...
paddle/fluid/framework/variable_test.cc
浏览文件 @
4743c9cd
...
@@ -16,27 +16,28 @@
...
@@ -16,27 +16,28 @@
#include <string>
#include <string>
#include "gtest/gtest.h"
#include "gtest/gtest.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/framework/variable.h"
TEST
(
Variable
,
GetMutable
)
{
namespace
paddle
{
using
paddle
::
framework
::
Variable
;
namespace
framework
{
struct
Tensor
{
int
content_
;
};
TEST
(
Variable
,
GetMutable
)
{
std
::
unique_ptr
<
Variable
>
v
(
new
Variable
());
std
::
unique_ptr
<
Variable
>
v
(
new
Variable
());
Tensor
*
t
=
v
->
GetMutable
<
Tensor
>
();
auto
*
t
=
v
->
GetMutable
<
std
::
string
>
();
t
->
content_
=
1234
;
*
t
=
"1234"
;
const
Tensor
&
tt
=
v
->
Get
<
Tensor
>
();
const
auto
&
tt
=
v
->
Get
<
std
::
string
>
();
EXPECT_EQ
(
1234
,
tt
.
content_
);
EXPECT_EQ
(
"1234"
,
tt
);
try
{
try
{
v
->
GetMutable
<
std
::
string
>
();
v
->
GetMutable
<
Tensor
>
();
}
catch
(
std
::
exception
&
e
)
{
}
catch
(
std
::
exception
&
e
)
{
return
;
return
;
}
}
EXPECT_TRUE
(
false
);
EXPECT_TRUE
(
false
);
}
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/inference/analysis/analyzer_tester.cc
浏览文件 @
4743c9cd
...
@@ -69,17 +69,17 @@ void TestWord2vecPrediction(const std::string& model_path) {
...
@@ -69,17 +69,17 @@ void TestWord2vecPrediction(const std::string& model_path) {
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
PaddleTensor
>
outputs
;
CHECK
(
predictor
->
Run
(
slots
,
&
outputs
));
CHECK
(
predictor
->
Run
(
slots
,
&
outputs
));
PADDLE_ENFORCE
(
outputs
.
size
(),
1UL
);
PADDLE_ENFORCE
_EQ
(
outputs
.
size
(),
1UL
);
// Check the output buffer size and result of each tid.
// Check the output buffer size and result of each tid.
PADDLE_ENFORCE
(
outputs
.
front
().
data
.
length
(),
33168UL
);
PADDLE_ENFORCE
_EQ
(
outputs
.
front
().
data
.
length
(),
33168UL
);
float
result
[
5
]
=
{
0.00129761
,
0.00151112
,
0.000423564
,
0.00108815
,
float
result
[
5
]
=
{
0.00129761
,
0.00151112
,
0.000423564
,
0.00108815
,
0.000932706
};
0.000932706
};
const
size_t
num_elements
=
outputs
.
front
().
data
.
length
()
/
sizeof
(
float
);
const
size_t
num_elements
=
outputs
.
front
().
data
.
length
()
/
sizeof
(
float
);
// The outputs' buffers are in CPU memory.
// The outputs' buffers are in CPU memory.
for
(
size_t
i
=
0
;
i
<
std
::
min
(
static_cast
<
size_t
>
(
5UL
),
num_elements
);
for
(
size_t
i
=
0
;
i
<
std
::
min
(
static_cast
<
size_t
>
(
5UL
),
num_elements
);
i
++
)
{
i
++
)
{
LOG
(
INFO
)
<<
"data: "
LOG
(
INFO
)
<<
"data: "
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())[
i
]
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())
[
i
];
<<
" result: "
<<
result
[
i
];
PADDLE_ENFORCE
(
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())[
i
],
PADDLE_ENFORCE
(
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())[
i
],
result
[
i
]);
result
[
i
]);
}
}
...
...
paddle/fluid/inference/analysis/argument.h
浏览文件 @
4743c9cd
...
@@ -127,6 +127,7 @@ struct Argument {
...
@@ -127,6 +127,7 @@ struct Argument {
std
::
function
<
bool
(
const
framework
::
ir
::
Node
*
)
>
);
std
::
function
<
bool
(
const
framework
::
ir
::
Node
*
)
>
);
DECL_ARGUMENT_FIELD
(
tensorrt_max_batch_size
,
TensorRtMaxBatchSize
,
int
);
DECL_ARGUMENT_FIELD
(
tensorrt_max_batch_size
,
TensorRtMaxBatchSize
,
int
);
DECL_ARGUMENT_FIELD
(
tensorrt_workspace_size
,
TensorRtWorkspaceSize
,
int
);
DECL_ARGUMENT_FIELD
(
tensorrt_workspace_size
,
TensorRtWorkspaceSize
,
int
);
DECL_ARGUMENT_FIELD
(
tensorrt_min_subgraph_size
,
TensorRtMinSubgraphSize
,
int
);
// The program transformed by IR analysis phase.
// The program transformed by IR analysis phase.
DECL_ARGUMENT_UNIQUE_FIELD
(
ir_analyzed_program
,
IrAnalyzedProgram
,
DECL_ARGUMENT_UNIQUE_FIELD
(
ir_analyzed_program
,
IrAnalyzedProgram
,
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
4743c9cd
...
@@ -75,6 +75,8 @@ void IRPassManager::CreatePasses(Argument *argument,
...
@@ -75,6 +75,8 @@ void IRPassManager::CreatePasses(Argument *argument,
argument
->
tensorrt_node_teller_ptr
());
argument
->
tensorrt_node_teller_ptr
());
pass
->
Set
(
"workspace_size"
,
new
int
(
argument
->
tensorrt_workspace_size
()));
pass
->
Set
(
"workspace_size"
,
new
int
(
argument
->
tensorrt_workspace_size
()));
pass
->
Set
(
"max_batch_size"
,
new
int
(
argument
->
tensorrt_max_batch_size
()));
pass
->
Set
(
"max_batch_size"
,
new
int
(
argument
->
tensorrt_max_batch_size
()));
pass
->
Set
(
"min_subgraph_size"
,
new
int
(
argument
->
tensorrt_min_subgraph_size
()));
}
}
// graph_ = pass->Apply(std::move(graph_));
// graph_ = pass->Apply(std::move(graph_));
...
...
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
浏览文件 @
4743c9cd
...
@@ -12,12 +12,14 @@
...
@@ -12,12 +12,14 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include
"paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h"
#include
<algorithm>
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h"
#include "paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h"
#include "paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h"
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
...
@@ -36,7 +38,8 @@ std::unique_ptr<framework::ir::Graph> analysis::TensorRtSubgraphPass::ApplyImpl(
...
@@ -36,7 +38,8 @@ std::unique_ptr<framework::ir::Graph> analysis::TensorRtSubgraphPass::ApplyImpl(
auto
teller
=
auto
teller
=
Get
<
SubgraphDetector
::
NodeInsideSubgraphTeller
>
(
"tensorrt_node_teller"
);
Get
<
SubgraphDetector
::
NodeInsideSubgraphTeller
>
(
"tensorrt_node_teller"
);
SubGraphFuser
fuser
(
graph
.
get
(),
teller
,
2
/*min subgraph size*/
);
SubGraphFuser
fuser
(
graph
.
get
(),
teller
,
Get
<
int
>
(
"min_subgraph_size"
)
/*min subgraph size*/
);
fuser
();
fuser
();
for
(
auto
*
node
:
graph
->
Nodes
())
{
for
(
auto
*
node
:
graph
->
Nodes
())
{
...
@@ -197,10 +200,26 @@ void TensorRtSubgraphPass::CreateTensorRTOp(framework::ir::Node *node,
...
@@ -197,10 +200,26 @@ void TensorRtSubgraphPass::CreateTensorRTOp(framework::ir::Node *node,
std
::
vector
<
std
::
string
>
ExtractParameters
(
std
::
vector
<
std
::
string
>
ExtractParameters
(
const
std
::
unordered_set
<
Node
*>
&
nodes
)
{
const
std
::
unordered_set
<
Node
*>
&
nodes
)
{
// We can judge whether a variable is a parameter by
// its presistable property, but sometimes the presistable
// of the feed op output is true, so we have to identify it.
std
::
vector
<
std
::
string
>
feed_outputs
;
for
(
const
auto
&
node
:
nodes
)
{
if
(
!
node
->
IsOp
())
continue
;
std
::
string
op_type
=
node
->
Op
()
->
Type
();
if
(
op_type
==
"feed"
)
{
std
::
vector
<
std
::
string
>
output_names
=
node
->
Op
()
->
OutputArgumentNames
();
std
::
copy
(
output_names
.
begin
(),
output_names
.
end
(),
std
::
back_inserter
(
feed_outputs
));
}
}
std
::
vector
<
std
::
string
>
parameters
;
std
::
vector
<
std
::
string
>
parameters
;
for
(
const
auto
&
node
:
nodes
)
{
for
(
const
auto
&
node
:
nodes
)
{
if
(
!
node
->
IsVar
())
continue
;
if
(
!
node
->
IsVar
())
continue
;
if
(
node
->
Var
()
->
Persistable
())
{
if
(
node
->
Var
()
->
Persistable
()
&&
std
::
find
(
feed_outputs
.
begin
(),
feed_outputs
.
end
(),
node
->
Name
())
==
feed_outputs
.
end
())
{
parameters
.
push_back
(
node
->
Name
());
parameters
.
push_back
(
node
->
Name
());
}
}
}
}
...
@@ -215,4 +234,5 @@ REGISTER_PASS(tensorrt_subgraph_pass,
...
@@ -215,4 +234,5 @@ REGISTER_PASS(tensorrt_subgraph_pass,
paddle
::
inference
::
analysis
::
TensorRtSubgraphPass
)
paddle
::
inference
::
analysis
::
TensorRtSubgraphPass
)
.
RequirePassAttr
(
"tensorrt_node_teller"
)
.
RequirePassAttr
(
"tensorrt_node_teller"
)
.
RequirePassAttr
(
"max_batch_size"
)
.
RequirePassAttr
(
"max_batch_size"
)
.
RequirePassAttr
(
"workspace_size"
);
.
RequirePassAttr
(
"workspace_size"
)
.
RequirePassAttr
(
"min_subgraph_size"
);
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
4743c9cd
...
@@ -57,6 +57,7 @@ contrib::AnalysisConfig::AnalysisConfig(const contrib::AnalysisConfig &other) {
...
@@ -57,6 +57,7 @@ contrib::AnalysisConfig::AnalysisConfig(const contrib::AnalysisConfig &other) {
use_tensorrt_
=
other
.
use_tensorrt_
;
use_tensorrt_
=
other
.
use_tensorrt_
;
tensorrt_max_batchsize_
=
other
.
tensorrt_max_batchsize_
;
tensorrt_max_batchsize_
=
other
.
tensorrt_max_batchsize_
;
tensorrt_workspace_size_
=
other
.
tensorrt_workspace_size_
;
tensorrt_workspace_size_
=
other
.
tensorrt_workspace_size_
;
tensorrt_min_subgraph_size_
=
other
.
tensorrt_min_subgraph_size_
;
model_from_memory_
=
other
.
model_from_memory_
;
model_from_memory_
=
other
.
model_from_memory_
;
if
(
use_gpu
)
{
if
(
use_gpu
)
{
...
@@ -89,6 +90,7 @@ contrib::AnalysisConfig::AnalysisConfig(contrib::AnalysisConfig &&other) {
...
@@ -89,6 +90,7 @@ contrib::AnalysisConfig::AnalysisConfig(contrib::AnalysisConfig &&other) {
use_tensorrt_
=
other
.
use_tensorrt_
;
use_tensorrt_
=
other
.
use_tensorrt_
;
tensorrt_max_batchsize_
=
other
.
tensorrt_max_batchsize_
;
tensorrt_max_batchsize_
=
other
.
tensorrt_max_batchsize_
;
tensorrt_workspace_size_
=
other
.
tensorrt_workspace_size_
;
tensorrt_workspace_size_
=
other
.
tensorrt_workspace_size_
;
tensorrt_min_subgraph_size_
=
other
.
tensorrt_min_subgraph_size_
;
model_from_memory_
=
other
.
model_from_memory_
;
model_from_memory_
=
other
.
model_from_memory_
;
pass_builder_
=
std
::
move
(
other
.
pass_builder_
);
pass_builder_
=
std
::
move
(
other
.
pass_builder_
);
...
@@ -105,12 +107,14 @@ void contrib::AnalysisConfig::EnableMKLDNN() {
...
@@ -105,12 +107,14 @@ void contrib::AnalysisConfig::EnableMKLDNN() {
}
}
void
contrib
::
AnalysisConfig
::
EnableTensorRtEngine
(
int
workspace_size
,
void
contrib
::
AnalysisConfig
::
EnableTensorRtEngine
(
int
workspace_size
,
int
max_batch_size
)
{
int
max_batch_size
,
int
min_subgraph_size
)
{
use_tensorrt_
=
true
;
use_tensorrt_
=
true
;
tensorrt_workspace_size_
=
workspace_size
;
tensorrt_workspace_size_
=
workspace_size
;
tensorrt_max_batchsize_
=
max_batch_size
;
tensorrt_max_batchsize_
=
max_batch_size
;
// Append after the infer_clean pass.
tensorrt_min_subgraph_size_
=
min_subgraph_size
;
pass_builder
()
->
InsertPass
(
1
,
"tensorrt_subgraph_pass"
);
// Append after the conv+affine_channel fuse pass.
pass_builder
()
->
InsertPass
(
3
,
"tensorrt_subgraph_pass"
);
}
}
void
contrib
::
AnalysisConfig
::
SetModelBuffer
(
const
char
*
prog_buffer
,
void
contrib
::
AnalysisConfig
::
SetModelBuffer
(
const
char
*
prog_buffer
,
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
4743c9cd
...
@@ -328,6 +328,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
...
@@ -328,6 +328,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
argument_
.
SetUseTensorRT
(
true
);
argument_
.
SetUseTensorRT
(
true
);
argument_
.
SetTensorRtWorkspaceSize
(
config_
.
tensorrt_workspace_size_
);
argument_
.
SetTensorRtWorkspaceSize
(
config_
.
tensorrt_workspace_size_
);
argument_
.
SetTensorRtMaxBatchSize
(
config_
.
tensorrt_max_batchsize_
);
argument_
.
SetTensorRtMaxBatchSize
(
config_
.
tensorrt_max_batchsize_
);
argument_
.
SetTensorRtMinSubgraphSize
(
config_
.
tensorrt_min_subgraph_size_
);
}
}
if
(
config_
.
use_mkldnn_
)
{
if
(
config_
.
use_mkldnn_
)
{
...
...
paddle/fluid/inference/api/details/reset_tensor_array.cc
浏览文件 @
4743c9cd
...
@@ -25,7 +25,7 @@ void TensorArrayBatchCleaner::CollectTensorArrays(framework::Scope *scope) {
...
@@ -25,7 +25,7 @@ void TensorArrayBatchCleaner::CollectTensorArrays(framework::Scope *scope) {
// TODO(Superjomn) should avoid the case when a TensorArray is a
// TODO(Superjomn) should avoid the case when a TensorArray is a
// parameter.
// parameter.
if
(
var_name
==
"feed"
||
var_name
==
"fetch"
)
continue
;
if
(
var_name
==
"feed"
||
var_name
==
"fetch"
)
continue
;
if
(
var
->
Type
()
==
typeid
(
framework
::
LoDTensorArray
))
{
if
(
var
->
IsType
<
framework
::
LoDTensorArray
>
(
))
{
VLOG
(
4
)
<<
"collect "
<<
var_name
;
VLOG
(
4
)
<<
"collect "
<<
var_name
;
arrays_
.
push_back
(
var
->
GetMutable
<
framework
::
LoDTensorArray
>
());
arrays_
.
push_back
(
var
->
GetMutable
<
framework
::
LoDTensorArray
>
());
}
}
...
...
paddle/fluid/inference/api/details/reset_tensor_array.h
浏览文件 @
4743c9cd
...
@@ -27,8 +27,11 @@ namespace details {
...
@@ -27,8 +27,11 @@ namespace details {
// training phase.
// training phase.
struct
TensorArrayBatchCleaner
{
struct
TensorArrayBatchCleaner
{
TensorArrayBatchCleaner
()
{
TensorArrayBatchCleaner
()
{
valid_types_
.
insert
(
typeid
(
framework
::
Tensor
));
constexpr
auto
kTensorId
=
framework
::
VarTypeTrait
<
framework
::
Tensor
>::
kId
;
valid_types_
.
insert
(
typeid
(
framework
::
LoDTensor
));
constexpr
auto
kLoDTensorId
=
framework
::
VarTypeTrait
<
framework
::
LoDTensor
>::
kId
;
valid_types_
.
insert
(
kTensorId
);
valid_types_
.
insert
(
kLoDTensorId
);
}
}
// Collect the variables that are not Tensor or LoDTensor, and reset them to a
// Collect the variables that are not Tensor or LoDTensor, and reset them to a
// bool(trick), because some of them are containers, and some operators just
// bool(trick), because some of them are containers, and some operators just
...
@@ -46,7 +49,7 @@ struct TensorArrayBatchCleaner {
...
@@ -46,7 +49,7 @@ struct TensorArrayBatchCleaner {
bool
no_tensor_flag_
{
true
};
bool
no_tensor_flag_
{
true
};
std
::
vector
<
framework
::
LoDTensorArray
*>
arrays_
;
std
::
vector
<
framework
::
LoDTensorArray
*>
arrays_
;
std
::
unordered_set
<
std
::
type_index
>
valid_types_
;
std
::
unordered_set
<
int
>
valid_types_
;
std
::
unordered_set
<
framework
::
Variable
*>
no_tensor_vars_
;
std
::
unordered_set
<
framework
::
Variable
*>
no_tensor_vars_
;
};
};
...
...
paddle/fluid/inference/api/helper.h
浏览文件 @
4743c9cd
...
@@ -113,6 +113,16 @@ static void TensorAssignData(PaddleTensor *tensor,
...
@@ -113,6 +113,16 @@ static void TensorAssignData(PaddleTensor *tensor,
}
}
}
}
template
<
typename
T
>
static
void
TensorAssignData
(
PaddleTensor
*
tensor
,
const
std
::
vector
<
std
::
vector
<
T
>>
&
data
,
const
std
::
vector
<
size_t
>
&
lod
)
{
int
size
=
lod
[
lod
.
size
()
-
1
];
tensor
->
shape
.
assign
({
size
,
1
});
tensor
->
lod
.
assign
({
lod
});
TensorAssignData
(
tensor
,
data
);
}
template
<
typename
T
>
template
<
typename
T
>
static
int
ZeroCopyTensorAssignData
(
ZeroCopyTensor
*
tensor
,
static
int
ZeroCopyTensorAssignData
(
ZeroCopyTensor
*
tensor
,
const
std
::
vector
<
std
::
vector
<
T
>>
&
data
)
{
const
std
::
vector
<
std
::
vector
<
T
>>
&
data
)
{
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
4743c9cd
...
@@ -49,7 +49,7 @@ struct AnalysisConfig : public NativeConfig {
...
@@ -49,7 +49,7 @@ struct AnalysisConfig : public NativeConfig {
bool
use_feed_fetch_ops
{
true
};
bool
use_feed_fetch_ops
{
true
};
void
EnableTensorRtEngine
(
int
workspace_size
=
1
<<
20
,
void
EnableTensorRtEngine
(
int
workspace_size
=
1
<<
20
,
int
max_batch_size
=
1
);
int
max_batch_size
=
1
,
int
min_subgraph_size
=
3
);
bool
use_tensorrt
()
const
{
return
use_tensorrt_
;
}
bool
use_tensorrt
()
const
{
return
use_tensorrt_
;
}
void
EnableMKLDNN
();
void
EnableMKLDNN
();
...
@@ -69,8 +69,19 @@ struct AnalysisConfig : public NativeConfig {
...
@@ -69,8 +69,19 @@ struct AnalysisConfig : public NativeConfig {
bool
use_tensorrt_
{
false
};
bool
use_tensorrt_
{
false
};
bool
use_mkldnn_
{
false
};
bool
use_mkldnn_
{
false
};
std
::
unordered_set
<
std
::
string
>
mkldnn_enabled_op_types_
;
std
::
unordered_set
<
std
::
string
>
mkldnn_enabled_op_types_
;
// For workspace_size, refer it from here:
// https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#troubleshooting
int
tensorrt_workspace_size_
;
int
tensorrt_workspace_size_
;
// While TensorRT allows an engine optimized for a given max batch size
// to run at any smaller size, the performance for those smaller
// sizes may not be as well-optimized. Therefore, Max batch is best
// equivalent to the runtime batch size.
int
tensorrt_max_batchsize_
;
int
tensorrt_max_batchsize_
;
// We transform the Ops that can be converted into TRT layer in the model,
// and aggregate these Ops into subgraphs for TRT execution.
// We set this variable to control the minimum number of nodes in the
// subgraph, 3 as default value.
int
tensorrt_min_subgraph_size_
{
3
};
std
::
unique_ptr
<
PassStrategy
>
pass_builder_
;
std
::
unique_ptr
<
PassStrategy
>
pass_builder_
;
bool
model_from_memory_
{
false
};
bool
model_from_memory_
{
false
};
};
};
...
...
paddle/fluid/inference/api/paddle_pass_builder.h
浏览文件 @
4743c9cd
...
@@ -119,6 +119,8 @@ class GpuPassStrategy : public PassStrategy {
...
@@ -119,6 +119,8 @@ class GpuPassStrategy : public PassStrategy {
GpuPassStrategy
()
:
PassStrategy
({})
{
GpuPassStrategy
()
:
PassStrategy
({})
{
passes_
.
assign
({
passes_
.
assign
({
"infer_clean_graph_pass"
,
//
"infer_clean_graph_pass"
,
//
"conv_affine_channel_fuse_pass"
,
//
"conv_eltwiseadd_affine_channel_fuse_pass"
,
//
"conv_bn_fuse_pass"
,
//
"conv_bn_fuse_pass"
,
//
"conv_elementwise_add_act_fuse_pass"
,
//
"conv_elementwise_add_act_fuse_pass"
,
//
"conv_elementwise_add2_act_fuse_pass"
,
//
"conv_elementwise_add2_act_fuse_pass"
,
//
...
...
paddle/fluid/inference/tests/api/CMakeLists.txt
浏览文件 @
4743c9cd
...
@@ -108,6 +108,10 @@ inference_analysis_api_test_with_refer_result(test_analyzer_mobilenet_transpose
...
@@ -108,6 +108,10 @@ inference_analysis_api_test_with_refer_result(test_analyzer_mobilenet_transpose
inference_analysis_api_test_with_fake_data
(
test_analyzer_resnet50
inference_analysis_api_test_with_fake_data
(
test_analyzer_resnet50
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/resnet50"
analyzer_resnet50_tester.cc
"resnet50_model.tar.gz"
)
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/resnet50"
analyzer_resnet50_tester.cc
"resnet50_model.tar.gz"
)
# seq_pool1
inference_analysis_api_test_with_fake_data
(
test_analyzer_seq_pool1
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/seq_pool1"
analyzer_seq_pool1_tester.cc
"seq_pool1.tar.gz"
)
# mobilenet with depthwise_conv op
# mobilenet with depthwise_conv op
inference_analysis_api_test_with_fake_data
(
test_analyzer_mobilenet_depthwise_conv
inference_analysis_api_test_with_fake_data
(
test_analyzer_mobilenet_depthwise_conv
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/mobilenet_depthwise_conv"
analyzer_resnet50_tester.cc
"mobilenet_model.tar.gz"
)
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/mobilenet_depthwise_conv"
analyzer_resnet50_tester.cc
"mobilenet_model.tar.gz"
)
...
...
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
浏览文件 @
4743c9cd
...
@@ -98,10 +98,8 @@ void GetOneBatch(std::vector<PaddleTensor> *input_slots, DataRecord *data,
...
@@ -98,10 +98,8 @@ void GetOneBatch(std::vector<PaddleTensor> *input_slots, DataRecord *data,
auto
one_batch
=
data
->
NextBatch
();
auto
one_batch
=
data
->
NextBatch
();
PaddleTensor
input_tensor
;
PaddleTensor
input_tensor
;
input_tensor
.
name
=
"word"
;
input_tensor
.
name
=
"word"
;
input_tensor
.
shape
.
assign
({
static_cast
<
int
>
(
one_batch
.
data
.
size
()),
1
});
input_tensor
.
lod
.
assign
({
one_batch
.
lod
});
input_tensor
.
dtype
=
PaddleDType
::
INT64
;
input_tensor
.
dtype
=
PaddleDType
::
INT64
;
TensorAssignData
<
int64_t
>
(
&
input_tensor
,
{
one_batch
.
data
});
TensorAssignData
<
int64_t
>
(
&
input_tensor
,
{
one_batch
.
data
}
,
one_batch
.
lod
);
PADDLE_ENFORCE_EQ
(
batch_size
,
static_cast
<
int
>
(
one_batch
.
lod
.
size
()
-
1
));
PADDLE_ENFORCE_EQ
(
batch_size
,
static_cast
<
int
>
(
one_batch
.
lod
.
size
()
-
1
));
input_slots
->
assign
({
input_tensor
});
input_slots
->
assign
({
input_tensor
});
}
}
...
...
paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
浏览文件 @
4743c9cd
...
@@ -19,11 +19,9 @@ namespace inference {
...
@@ -19,11 +19,9 @@ namespace inference {
using
contrib
::
AnalysisConfig
;
using
contrib
::
AnalysisConfig
;
struct
DataRecord
{
struct
DataRecord
{
std
::
vector
<
std
::
vector
<
int64_t
>>
query
_data_all
,
title_data_all
;
std
::
vector
<
std
::
vector
<
int64_t
>>
query
,
title
;
std
::
vector
<
size_t
>
lod1
,
lod2
;
std
::
vector
<
size_t
>
lod1
,
lod2
;
size_t
batch_iter
{
0
};
size_t
batch_iter
{
0
},
batch_size
{
1
},
num_samples
;
// total number of samples
size_t
batch_size
{
1
};
size_t
num_samples
;
// total number of samples
DataRecord
()
=
default
;
DataRecord
()
=
default
;
explicit
DataRecord
(
const
std
::
string
&
path
,
int
batch_size
=
1
)
explicit
DataRecord
(
const
std
::
string
&
path
,
int
batch_size
=
1
)
:
batch_size
(
batch_size
)
{
:
batch_size
(
batch_size
)
{
...
@@ -33,22 +31,9 @@ struct DataRecord {
...
@@ -33,22 +31,9 @@ struct DataRecord {
DataRecord
data
;
DataRecord
data
;
size_t
batch_end
=
batch_iter
+
batch_size
;
size_t
batch_end
=
batch_iter
+
batch_size
;
// NOTE skip the final batch, if no enough data is provided.
// NOTE skip the final batch, if no enough data is provided.
if
(
batch_end
<=
query_data_all
.
size
())
{
if
(
batch_end
<=
query
.
size
())
{
data
.
query_data_all
.
assign
(
query_data_all
.
begin
()
+
batch_iter
,
GetInputPerBatch
(
query
,
&
data
.
query
,
&
data
.
lod1
,
batch_iter
,
batch_end
);
query_data_all
.
begin
()
+
batch_end
);
GetInputPerBatch
(
title
,
&
data
.
title
,
&
data
.
lod2
,
batch_iter
,
batch_end
);
data
.
title_data_all
.
assign
(
title_data_all
.
begin
()
+
batch_iter
,
title_data_all
.
begin
()
+
batch_end
);
// Prepare LoDs
data
.
lod1
.
push_back
(
0
);
data
.
lod2
.
push_back
(
0
);
CHECK
(
!
data
.
query_data_all
.
empty
());
CHECK
(
!
data
.
title_data_all
.
empty
());
CHECK_EQ
(
data
.
query_data_all
.
size
(),
data
.
title_data_all
.
size
());
for
(
size_t
j
=
0
;
j
<
data
.
query_data_all
.
size
();
j
++
)
{
// calculate lod
data
.
lod1
.
push_back
(
data
.
lod1
.
back
()
+
data
.
query_data_all
[
j
].
size
());
data
.
lod2
.
push_back
(
data
.
lod2
.
back
()
+
data
.
title_data_all
[
j
].
size
());
}
}
}
batch_iter
+=
batch_size
;
batch_iter
+=
batch_size
;
return
data
;
return
data
;
...
@@ -67,8 +52,8 @@ struct DataRecord {
...
@@ -67,8 +52,8 @@ struct DataRecord {
// load title data
// load title data
std
::
vector
<
int64_t
>
title_data
;
std
::
vector
<
int64_t
>
title_data
;
split_to_int64
(
data
[
1
],
' '
,
&
title_data
);
split_to_int64
(
data
[
1
],
' '
,
&
title_data
);
query
_data_all
.
push_back
(
std
::
move
(
query_data
));
query
.
push_back
(
std
::
move
(
query_data
));
title
_data_all
.
push_back
(
std
::
move
(
title_data
));
title
.
push_back
(
std
::
move
(
title_data
));
}
}
num_samples
=
num_lines
;
num_samples
=
num_lines
;
}
}
...
@@ -80,15 +65,9 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
...
@@ -80,15 +65,9 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
lod_query_tensor
.
name
=
"left"
;
lod_query_tensor
.
name
=
"left"
;
lod_title_tensor
.
name
=
"right"
;
lod_title_tensor
.
name
=
"right"
;
auto
one_batch
=
data
->
NextBatch
();
auto
one_batch
=
data
->
NextBatch
();
int
size1
=
one_batch
.
lod1
[
one_batch
.
lod1
.
size
()
-
1
];
// token batch size
int
size2
=
one_batch
.
lod2
[
one_batch
.
lod2
.
size
()
-
1
];
// token batch size
lod_query_tensor
.
shape
.
assign
({
size1
,
1
});
lod_query_tensor
.
lod
.
assign
({
one_batch
.
lod1
});
lod_title_tensor
.
shape
.
assign
({
size2
,
1
});
lod_title_tensor
.
lod
.
assign
({
one_batch
.
lod2
});
// assign data
// assign data
TensorAssignData
<
int64_t
>
(
&
lod_query_tensor
,
one_batch
.
query
_data_all
);
TensorAssignData
<
int64_t
>
(
&
lod_query_tensor
,
one_batch
.
query
,
one_batch
.
lod1
);
TensorAssignData
<
int64_t
>
(
&
lod_title_tensor
,
one_batch
.
title
_data_all
);
TensorAssignData
<
int64_t
>
(
&
lod_title_tensor
,
one_batch
.
title
,
one_batch
.
lod2
);
// Set inputs.
// Set inputs.
input_slots
->
assign
({
lod_query_tensor
,
lod_title_tensor
});
input_slots
->
assign
({
lod_query_tensor
,
lod_title_tensor
});
for
(
auto
&
tensor
:
*
input_slots
)
{
for
(
auto
&
tensor
:
*
input_slots
)
{
...
...
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
浏览文件 @
4743c9cd
...
@@ -19,11 +19,9 @@ namespace inference {
...
@@ -19,11 +19,9 @@ namespace inference {
using
contrib
::
AnalysisConfig
;
using
contrib
::
AnalysisConfig
;
struct
DataRecord
{
struct
DataRecord
{
std
::
vector
<
std
::
vector
<
int64_t
>>
word
_data_all
,
mention_data_all
;
std
::
vector
<
std
::
vector
<
int64_t
>>
word
,
mention
;
std
::
vector
<
size_t
>
lod
;
// two inputs have the same lod info.
std
::
vector
<
size_t
>
lod
;
// two inputs have the same lod info.
size_t
batch_iter
{
0
};
size_t
batch_iter
{
0
},
batch_size
{
1
},
num_samples
;
// total number of samples
size_t
batch_size
{
1
};
size_t
num_samples
;
// total number of samples
DataRecord
()
=
default
;
DataRecord
()
=
default
;
explicit
DataRecord
(
const
std
::
string
&
path
,
int
batch_size
=
1
)
explicit
DataRecord
(
const
std
::
string
&
path
,
int
batch_size
=
1
)
:
batch_size
(
batch_size
)
{
:
batch_size
(
batch_size
)
{
...
@@ -33,20 +31,10 @@ struct DataRecord {
...
@@ -33,20 +31,10 @@ struct DataRecord {
DataRecord
data
;
DataRecord
data
;
size_t
batch_end
=
batch_iter
+
batch_size
;
size_t
batch_end
=
batch_iter
+
batch_size
;
// NOTE skip the final batch, if no enough data is provided.
// NOTE skip the final batch, if no enough data is provided.
if
(
batch_end
<=
word_data_all
.
size
())
{
if
(
batch_end
<=
word
.
size
())
{
data
.
word_data_all
.
assign
(
word_data_all
.
begin
()
+
batch_iter
,
GetInputPerBatch
(
word
,
&
data
.
word
,
&
data
.
lod
,
batch_iter
,
batch_end
);
word_data_all
.
begin
()
+
batch_end
);
GetInputPerBatch
(
mention
,
&
data
.
mention
,
&
data
.
lod
,
batch_iter
,
data
.
mention_data_all
.
assign
(
mention_data_all
.
begin
()
+
batch_iter
,
batch_end
);
mention_data_all
.
begin
()
+
batch_end
);
// Prepare LoDs
data
.
lod
.
push_back
(
0
);
CHECK
(
!
data
.
word_data_all
.
empty
());
CHECK
(
!
data
.
mention_data_all
.
empty
());
CHECK_EQ
(
data
.
word_data_all
.
size
(),
data
.
mention_data_all
.
size
());
for
(
size_t
j
=
0
;
j
<
data
.
word_data_all
.
size
();
j
++
)
{
// calculate lod
data
.
lod
.
push_back
(
data
.
lod
.
back
()
+
data
.
word_data_all
[
j
].
size
());
}
}
}
batch_iter
+=
batch_size
;
batch_iter
+=
batch_size
;
return
data
;
return
data
;
...
@@ -65,8 +53,8 @@ struct DataRecord {
...
@@ -65,8 +53,8 @@ struct DataRecord {
// load mention data
// load mention data
std
::
vector
<
int64_t
>
mention_data
;
std
::
vector
<
int64_t
>
mention_data
;
split_to_int64
(
data
[
3
],
' '
,
&
mention_data
);
split_to_int64
(
data
[
3
],
' '
,
&
mention_data
);
word
_data_all
.
push_back
(
std
::
move
(
word_data
));
word
.
push_back
(
std
::
move
(
word_data
));
mention
_data_all
.
push_back
(
std
::
move
(
mention_data
));
mention
.
push_back
(
std
::
move
(
mention_data
));
}
}
num_samples
=
num_lines
;
num_samples
=
num_lines
;
}
}
...
@@ -78,14 +66,10 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
...
@@ -78,14 +66,10 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
lod_word_tensor
.
name
=
"word"
;
lod_word_tensor
.
name
=
"word"
;
lod_mention_tensor
.
name
=
"mention"
;
lod_mention_tensor
.
name
=
"mention"
;
auto
one_batch
=
data
->
NextBatch
();
auto
one_batch
=
data
->
NextBatch
();
int
size
=
one_batch
.
lod
[
one_batch
.
lod
.
size
()
-
1
];
// token batch size
lod_word_tensor
.
shape
.
assign
({
size
,
1
});
lod_word_tensor
.
lod
.
assign
({
one_batch
.
lod
});
lod_mention_tensor
.
shape
.
assign
({
size
,
1
});
lod_mention_tensor
.
lod
.
assign
({
one_batch
.
lod
});
// assign data
// assign data
TensorAssignData
<
int64_t
>
(
&
lod_word_tensor
,
one_batch
.
word_data_all
);
TensorAssignData
<
int64_t
>
(
&
lod_word_tensor
,
one_batch
.
word
,
one_batch
.
lod
);
TensorAssignData
<
int64_t
>
(
&
lod_mention_tensor
,
one_batch
.
mention_data_all
);
TensorAssignData
<
int64_t
>
(
&
lod_mention_tensor
,
one_batch
.
mention
,
one_batch
.
lod
);
// Set inputs.
// Set inputs.
input_slots
->
assign
({
lod_word_tensor
,
lod_mention_tensor
});
input_slots
->
assign
({
lod_word_tensor
,
lod_mention_tensor
});
for
(
auto
&
tensor
:
*
input_slots
)
{
for
(
auto
&
tensor
:
*
input_slots
)
{
...
...
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
浏览文件 @
4743c9cd
...
@@ -18,12 +18,9 @@ namespace paddle {
...
@@ -18,12 +18,9 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
struct
DataRecord
{
struct
DataRecord
{
std
::
vector
<
std
::
vector
<
int64_t
>>
title1_all
,
title2_all
,
title3_all
,
l1_all
;
std
::
vector
<
std
::
vector
<
int64_t
>>
title1
,
title2
,
title3
,
l1
;
std
::
vector
<
std
::
vector
<
int64_t
>>
title1
,
title2
,
title3
,
l1
;
std
::
vector
<
size_t
>
title1_lod
,
title2_lod
,
title3_lod
,
l1_lod
;
std
::
vector
<
size_t
>
lod1
,
lod2
,
lod3
,
l1_lod
;
size_t
batch_iter
{
0
};
size_t
batch_iter
{
0
},
batch_size
{
1
},
num_samples
;
// total number of samples
size_t
batch_size
{
1
};
size_t
num_samples
;
// total number of samples
DataRecord
()
=
default
;
DataRecord
()
=
default
;
explicit
DataRecord
(
const
std
::
string
&
path
,
int
batch_size
=
1
)
explicit
DataRecord
(
const
std
::
string
&
path
,
int
batch_size
=
1
)
:
batch_size
(
batch_size
)
{
:
batch_size
(
batch_size
)
{
...
@@ -33,41 +30,11 @@ struct DataRecord {
...
@@ -33,41 +30,11 @@ struct DataRecord {
DataRecord
data
;
DataRecord
data
;
size_t
batch_end
=
batch_iter
+
batch_size
;
size_t
batch_end
=
batch_iter
+
batch_size
;
// NOTE skip the final batch, if no enough data is provided.
// NOTE skip the final batch, if no enough data is provided.
if
(
batch_end
<=
title1_all
.
size
())
{
if
(
batch_end
<=
title1
.
size
())
{
data
.
title1_all
.
assign
(
title1_all
.
begin
()
+
batch_iter
,
GetInputPerBatch
(
title1
,
&
data
.
title1
,
&
data
.
lod1
,
batch_iter
,
batch_end
);
title1_all
.
begin
()
+
batch_end
);
GetInputPerBatch
(
title2
,
&
data
.
title2
,
&
data
.
lod2
,
batch_iter
,
batch_end
);
data
.
title2_all
.
assign
(
title2_all
.
begin
()
+
batch_iter
,
GetInputPerBatch
(
title3
,
&
data
.
title3
,
&
data
.
lod3
,
batch_iter
,
batch_end
);
title2_all
.
begin
()
+
batch_end
);
GetInputPerBatch
(
l1
,
&
data
.
l1
,
&
data
.
l1_lod
,
batch_iter
,
batch_end
);
data
.
title3_all
.
assign
(
title3_all
.
begin
()
+
batch_iter
,
title3_all
.
begin
()
+
batch_end
);
data
.
l1_all
.
assign
(
l1_all
.
begin
()
+
batch_iter
,
l1_all
.
begin
()
+
batch_end
);
// Prepare LoDs
data
.
title1_lod
.
push_back
(
0
);
data
.
title2_lod
.
push_back
(
0
);
data
.
title3_lod
.
push_back
(
0
);
data
.
l1_lod
.
push_back
(
0
);
CHECK
(
!
data
.
title1_all
.
empty
());
CHECK
(
!
data
.
title2_all
.
empty
());
CHECK
(
!
data
.
title3_all
.
empty
());
CHECK
(
!
data
.
l1_all
.
empty
());
CHECK_EQ
(
data
.
title1_all
.
size
(),
data
.
title2_all
.
size
());
CHECK_EQ
(
data
.
title1_all
.
size
(),
data
.
title3_all
.
size
());
CHECK_EQ
(
data
.
title1_all
.
size
(),
data
.
l1_all
.
size
());
for
(
size_t
j
=
0
;
j
<
data
.
title1_all
.
size
();
j
++
)
{
data
.
title1
.
push_back
(
data
.
title1_all
[
j
]);
data
.
title2
.
push_back
(
data
.
title2_all
[
j
]);
data
.
title3
.
push_back
(
data
.
title3_all
[
j
]);
data
.
l1
.
push_back
(
data
.
l1_all
[
j
]);
// calculate lod
data
.
title1_lod
.
push_back
(
data
.
title1_lod
.
back
()
+
data
.
title1_all
[
j
].
size
());
data
.
title2_lod
.
push_back
(
data
.
title2_lod
.
back
()
+
data
.
title2_all
[
j
].
size
());
data
.
title3_lod
.
push_back
(
data
.
title3_lod
.
back
()
+
data
.
title3_all
[
j
].
size
());
data
.
l1_lod
.
push_back
(
data
.
l1_lod
.
back
()
+
data
.
l1_all
[
j
].
size
());
}
}
}
batch_iter
+=
batch_size
;
batch_iter
+=
batch_size
;
return
data
;
return
data
;
...
@@ -92,10 +59,10 @@ struct DataRecord {
...
@@ -92,10 +59,10 @@ struct DataRecord {
// load l1 data
// load l1 data
std
::
vector
<
int64_t
>
l1_data
;
std
::
vector
<
int64_t
>
l1_data
;
split_to_int64
(
data
[
3
],
' '
,
&
l1_data
);
split_to_int64
(
data
[
3
],
' '
,
&
l1_data
);
title1
_all
.
push_back
(
std
::
move
(
title1_data
));
title1
.
push_back
(
std
::
move
(
title1_data
));
title2
_all
.
push_back
(
std
::
move
(
title2_data
));
title2
.
push_back
(
std
::
move
(
title2_data
));
title3
_all
.
push_back
(
std
::
move
(
title3_data
));
title3
.
push_back
(
std
::
move
(
title3_data
));
l1
_all
.
push_back
(
std
::
move
(
l1_data
));
l1
.
push_back
(
std
::
move
(
l1_data
));
}
}
num_samples
=
num_lines
;
num_samples
=
num_lines
;
}
}
...
@@ -109,24 +76,11 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
...
@@ -109,24 +76,11 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
title3_tensor
.
name
=
"title3"
;
title3_tensor
.
name
=
"title3"
;
l1_tensor
.
name
=
"l1"
;
l1_tensor
.
name
=
"l1"
;
auto
one_batch
=
data
->
NextBatch
();
auto
one_batch
=
data
->
NextBatch
();
int
title1_size
=
one_batch
.
title1_lod
[
one_batch
.
title1_lod
.
size
()
-
1
];
title1_tensor
.
shape
.
assign
({
title1_size
,
1
});
title1_tensor
.
lod
.
assign
({
one_batch
.
title1_lod
});
int
title2_size
=
one_batch
.
title2_lod
[
one_batch
.
title2_lod
.
size
()
-
1
];
title2_tensor
.
shape
.
assign
({
title2_size
,
1
});
title2_tensor
.
lod
.
assign
({
one_batch
.
title2_lod
});
int
title3_size
=
one_batch
.
title3_lod
[
one_batch
.
title3_lod
.
size
()
-
1
];
title3_tensor
.
shape
.
assign
({
title3_size
,
1
});
title3_tensor
.
lod
.
assign
({
one_batch
.
title3_lod
});
int
l1_size
=
one_batch
.
l1_lod
[
one_batch
.
l1_lod
.
size
()
-
1
];
l1_tensor
.
shape
.
assign
({
l1_size
,
1
});
l1_tensor
.
lod
.
assign
({
one_batch
.
l1_lod
});
// assign data
// assign data
TensorAssignData
<
int64_t
>
(
&
title1_tensor
,
one_batch
.
title1
);
TensorAssignData
<
int64_t
>
(
&
title1_tensor
,
one_batch
.
title1
,
one_batch
.
lod1
);
TensorAssignData
<
int64_t
>
(
&
title2_tensor
,
one_batch
.
title2
);
TensorAssignData
<
int64_t
>
(
&
title2_tensor
,
one_batch
.
title2
,
one_batch
.
lod2
);
TensorAssignData
<
int64_t
>
(
&
title3_tensor
,
one_batch
.
title3
);
TensorAssignData
<
int64_t
>
(
&
title3_tensor
,
one_batch
.
title3
,
one_batch
.
lod3
);
TensorAssignData
<
int64_t
>
(
&
l1_tensor
,
one_batch
.
l1
);
TensorAssignData
<
int64_t
>
(
&
l1_tensor
,
one_batch
.
l1
,
one_batch
.
l1_lod
);
// Set inputs.
// Set inputs.
input_slots
->
assign
({
title1_tensor
,
title2_tensor
,
title3_tensor
,
l1_tensor
});
input_slots
->
assign
({
title1_tensor
,
title2_tensor
,
title3_tensor
,
l1_tensor
});
for
(
auto
&
tensor
:
*
input_slots
)
{
for
(
auto
&
tensor
:
*
input_slots
)
{
...
...
paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
0 → 100644
浏览文件 @
4743c9cd
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <fstream>
#include <iostream>
#include "paddle/fluid/inference/tests/api/tester_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
param_file
=
FLAGS_infer_model
+
"/params"
;
cfg
->
prog_file
=
FLAGS_infer_model
+
"/model"
;
cfg
->
use_gpu
=
false
;
cfg
->
device
=
0
;
cfg
->
enable_ir_optim
=
true
;
cfg
->
specify_input_name
=
true
;
cfg
->
SetCpuMathLibraryNumThreads
(
FLAGS_paddle_num_threads
);
}
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
std
::
vector
<
std
::
string
>
feed_names
=
{
"slot10000_embed"
,
"slot10001_embed"
,
"slot10004_embed"
,
"slot10005_embed"
,
"slot10008_embed"
,
"slot10009_embed"
,
"slot10012_embed"
,
"slot10013_embed"
,
"slot10108_embed"
,
"slot13324_embed"
,
"slot13325_embed"
,
"slot13326_embed"
,
"slot13327_embed"
,
"slot13328_embed"
,
"slot13329_embed"
,
"slot13330_embed"
,
"slot13331_embed"
,
"slot15501_embed"
,
"slot15502_embed"
,
"slot15503_embed"
,
"slot15504_embed"
,
"slot15505_embed"
,
"slot15506_embed"
,
"slot15507_embed"
,
"slot15508_embed"
,
"slot15516_embed"
,
"slot15519_embed"
,
"slot15523_embed"
,
"slot15531_embed"
,
"slot15533_embed"
,
"slot15548_embed"
,
"slot15564_embed"
,
"slot15565_embed"
,
"slot15566_embed"
,
"slot15570_embed"
,
"slot15571_embed"
,
"slot15572_embed"
,
"slot15573_embed"
,
"slot15574_embed"
,
"slot15575_embed"
,
"slot15576_embed"
,
"slot15577_embed"
,
"slot15579_embed"
,
"slot15581_embed"
,
"slot15582_embed"
,
"slot15583_embed"
,
"slot15584_embed"
,
"slot5016_embed"
,
"slot5021_embed"
,
"slot6002_embed"
,
"slot6003_embed"
,
"slot6004_embed"
,
"slot6005_embed"
,
"slot6006_embed"
,
"slot6007_embed"
,
"slot6008_embed"
,
"slot6009_embed"
,
"slot6011_embed"
,
"slot6014_embed"
,
"slot6015_embed"
,
"slot6023_embed"
,
"slot6024_embed"
,
"slot6025_embed"
,
"slot6027_embed"
,
"slot6029_embed"
,
"slot6031_embed"
,
"slot6034_embed"
,
"slot6035_embed"
,
"slot6036_embed"
,
"slot6037_embed"
,
"slot6039_embed"
,
"slot6048_embed"
,
"slot6050_embed"
,
"slot6058_embed"
,
"slot6059_embed"
,
"slot6060_embed"
,
"slot6066_embed"
,
"slot6067_embed"
,
"slot6068_embed"
,
"slot6069_embed"
,
"slot6070_embed"
,
"slot6071_embed"
,
"slot6072_embed"
,
"slot6073_embed"
,
"slot6182_embed"
,
"slot6183_embed"
,
"slot6184_embed"
,
"slot6185_embed"
,
"slot6186_embed"
,
"slot6188_embed"
,
"slot6189_embed"
,
"slot6190_embed"
,
"slot6201_embed"
,
"slot6202_embed"
,
"slot6203_embed"
,
"slot6247_embed"
,
"slot6248_embed"
,
"slot6250_embed"
,
"slot6251_embed"
,
"slot6807_embed"
,
"slot6808_embed"
,
"slot6809_embed"
,
"slot6810_embed"
,
"slot6811_embed"
,
"slot6812_embed"
,
"slot6813_embed"
,
"slot6814_embed"
,
"slot6815_embed"
,
"slot6816_embed"
,
"slot6817_embed"
,
"slot6818_embed"
,
"slot6819_embed"
,
"slot6820_embed"
,
"slot6822_embed"
,
"slot6823_embed"
,
"slot6826_embed"
,
"slot7002_embed"
,
"slot7003_embed"
,
"slot7004_embed"
,
"slot7005_embed"
,
"slot7006_embed"
,
"slot7008_embed"
,
"slot7009_embed"
,
"slot7010_embed"
,
"slot7011_embed"
,
"slot7013_embed"
,
"slot7014_embed"
,
"slot7015_embed"
,
"slot7016_embed"
,
"slot7017_embed"
,
"slot7019_embed"
,
"slot7100_embed"
,
"slot7506_embed"
,
"slot7507_embed"
,
"slot7514_embed"
,
"slot7515_embed"
,
"slot7516_embed"
};
SetFakeImageInput
(
inputs
,
FLAGS_infer_model
,
true
,
"model"
,
"params"
,
&
feed_names
);
}
// Easy for profiling independently.
void
profile
(
bool
use_mkldnn
=
false
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
if
(
use_mkldnn
)
{
cfg
.
EnableMKLDNN
();
}
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
TestPrediction
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
}
TEST
(
Analyzer_seq_pool1
,
profile
)
{
profile
();
}
// Check the fuse status
TEST
(
Analyzer_seq_pool1
,
fuse_statis
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
int
num_ops
;
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
cfg
);
auto
fuse_statis
=
GetFuseStatis
(
static_cast
<
AnalysisPredictor
*>
(
predictor
.
get
()),
&
num_ops
);
LOG
(
INFO
)
<<
"num_ops: "
<<
num_ops
;
EXPECT_EQ
(
num_ops
,
314
);
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
4743c9cd
...
@@ -132,7 +132,8 @@ std::unordered_map<std::string, int> GetFuseStatis(PaddlePredictor *predictor,
...
@@ -132,7 +132,8 @@ std::unordered_map<std::string, int> GetFuseStatis(PaddlePredictor *predictor,
void
SetFakeImageInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
,
void
SetFakeImageInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
,
const
std
::
string
&
dirname
,
bool
is_combined
=
true
,
const
std
::
string
&
dirname
,
bool
is_combined
=
true
,
std
::
string
model_filename
=
"model"
,
std
::
string
model_filename
=
"model"
,
std
::
string
params_filename
=
"params"
)
{
std
::
string
params_filename
=
"params"
,
const
std
::
vector
<
std
::
string
>
*
feed_names
=
nullptr
)
{
// Set fake_image_data
// Set fake_image_data
PADDLE_ENFORCE_EQ
(
FLAGS_test_all_data
,
0
,
"Only have single batch of data."
);
PADDLE_ENFORCE_EQ
(
FLAGS_test_all_data
,
0
,
"Only have single batch of data."
);
std
::
vector
<
std
::
vector
<
int64_t
>>
feed_target_shapes
=
GetFeedTargetShapes
(
std
::
vector
<
std
::
vector
<
int64_t
>>
feed_target_shapes
=
GetFeedTargetShapes
(
...
@@ -146,29 +147,47 @@ void SetFakeImageInput(std::vector<std::vector<PaddleTensor>> *inputs,
...
@@ -146,29 +147,47 @@ void SetFakeImageInput(std::vector<std::vector<PaddleTensor>> *inputs,
os
<<
"}
\n
"
;
os
<<
"}
\n
"
;
}
}
LOG
(
INFO
)
<<
os
.
str
();
LOG
(
INFO
)
<<
os
.
str
();
if
(
feed_names
)
{
int
dim1
=
feed_target_shapes
[
0
][
1
];
PADDLE_ENFORCE_EQ
(
feed_names
->
size
(),
feed_target_shapes
.
size
());
int
dim2
=
feed_target_shapes
[
0
][
2
];
}
int
dim3
=
feed_target_shapes
[
0
][
3
];
std
::
vector
<
PaddleTensor
>
input_slots
(
feed_target_shapes
.
size
());
for
(
size_t
i
=
0
;
i
<
feed_target_shapes
.
size
();
++
i
)
{
PaddleTensor
input
;
const
auto
&
feed_shape
=
feed_target_shapes
[
i
];
std
::
vector
<
int
>
shape
({
FLAGS_batch_size
,
dim1
,
dim2
,
dim3
});
auto
&
input
=
input_slots
[
i
];
std
::
vector
<
int
>
shape
({
FLAGS_batch_size
});
for
(
size_t
s
=
1
;
s
<
feed_shape
.
size
();
++
s
)
{
shape
.
push_back
(
static_cast
<
int
>
(
feed_shape
[
s
]));
}
if
(
feed_names
)
{
input
.
name
=
(
*
feed_names
)[
i
];
}
input
.
shape
=
shape
;
input
.
shape
=
shape
;
input
.
dtype
=
PaddleDType
::
FLOAT32
;
input
.
dtype
=
PaddleDType
::
FLOAT32
;
size_t
len
=
std
::
accumulate
(
shape
.
begin
(),
shape
.
end
(),
1
,
// fill input data, for profile easily, do not use random data here.
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
size_t
size
=
FLAGS_batch_size
*
dim1
*
dim2
*
dim3
;
input
.
data
.
Resize
(
len
*
sizeof
(
float
))
;
input
.
data
.
Resize
(
size
*
sizeof
(
float
)
);
input
.
lod
.
assign
({{
0
,
static_cast
<
size_t
>
(
FLAGS_batch_size
)}}
);
float
*
input_data
=
static_cast
<
float
*>
(
input
.
data
.
data
());
float
*
input_data
=
static_cast
<
float
*>
(
input
.
data
.
data
());
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
// fill input data, for profile easily, do not use random data here.
*
(
input_data
+
i
)
=
static_cast
<
float
>
(
i
)
/
size
;
for
(
size_t
j
=
0
;
j
<
len
;
++
j
)
{
*
(
input_data
+
j
)
=
static_cast
<
float
>
(
j
)
/
len
;
}
}
}
std
::
vector
<
PaddleTensor
>
input_slots
;
input_slots
.
assign
({
input
});
(
*
inputs
).
emplace_back
(
input_slots
);
(
*
inputs
).
emplace_back
(
input_slots
);
}
}
void
GetInputPerBatch
(
const
std
::
vector
<
std
::
vector
<
int64_t
>>
&
in
,
std
::
vector
<
std
::
vector
<
int64_t
>>
*
out
,
std
::
vector
<
size_t
>
*
lod
,
size_t
batch_iter
,
size_t
batch_end
)
{
lod
->
clear
();
lod
->
push_back
(
0
);
for
(
auto
it
=
in
.
begin
()
+
batch_iter
;
it
<
in
.
begin
()
+
batch_end
;
it
++
)
{
out
->
push_back
(
*
it
);
lod
->
push_back
(
lod
->
back
()
+
(
*
it
).
size
());
// calculate lod
}
}
void
TestOneThreadPrediction
(
void
TestOneThreadPrediction
(
const
PaddlePredictor
::
Config
*
config
,
const
PaddlePredictor
::
Config
*
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
...
...
paddle/fluid/inference/tests/test.cmake
浏览文件 @
4743c9cd
...
@@ -3,14 +3,16 @@ set(INFERENCE_DEMO_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo" CACHE STRING
...
@@ -3,14 +3,16 @@ set(INFERENCE_DEMO_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo" CACHE STRING
"A path setting inference demo download directories."
)
"A path setting inference demo download directories."
)
function
(
inference_download install_dir url filename
)
function
(
inference_download install_dir url filename
)
message
(
STATUS
"Download inference test stuff from
${
url
}
/
${
filename
}
"
)
message
(
STATUS
"Download inference test stuff from
${
url
}
/
${
filename
}
"
)
execute_process
(
COMMAND bash -c
"mkdir -p
${
install_dir
}
"
)
file
(
DOWNLOAD
"
${
url
}
/
${
filename
}
"
"
${
install_dir
}
/
${
filename
}
"
)
execute_process
(
COMMAND bash -c
"cd
${
install_dir
}
&& wget -q
${
url
}
/
${
filename
}
"
)
message
(
STATUS
"finish downloading
${
filename
}
"
)
message
(
STATUS
"finish downloading
${
filename
}
"
)
endfunction
()
endfunction
()
function
(
inference_download_and_uncompress install_dir url filename
)
function
(
inference_download_and_uncompress install_dir url filename
)
inference_download
(
${
install_dir
}
${
url
}
${
filename
}
)
inference_download
(
${
install_dir
}
${
url
}
${
filename
}
)
execute_process
(
COMMAND bash -c
"cd
${
install_dir
}
&& tar xzf
${
filename
}
"
)
execute_process
(
COMMAND
${
CMAKE_COMMAND
}
-E tar xzf
${
install_dir
}
/
${
filename
}
WORKING_DIRECTORY
${
install_dir
}
)
endfunction
()
endfunction
()
set
(
WORD2VEC_INSTALL_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/word2vec"
)
set
(
WORD2VEC_INSTALL_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/word2vec"
)
...
...
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
4743c9cd
...
@@ -46,7 +46,7 @@ endif()
...
@@ -46,7 +46,7 @@ endif()
register_operators
(
EXCLUDES py_func_op warpctc_op conv_fusion_op DEPS
${
OP_HEADER_DEPS
}
${
OP_PREFETCH_DEPS
}
)
register_operators
(
EXCLUDES py_func_op warpctc_op conv_fusion_op DEPS
${
OP_HEADER_DEPS
}
${
OP_PREFETCH_DEPS
}
)
# warpctc_op needs cudnn 7 above
# warpctc_op needs cudnn 7 above
if
(
WITH_GPU
AND NOT WIN32
)
if
(
WITH_GPU
)
if
(
${
CUDNN_MAJOR_VERSION
}
VERSION_LESS 7
)
if
(
${
CUDNN_MAJOR_VERSION
}
VERSION_LESS 7
)
op_library
(
warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale SRCS warpctc_op.cc warpctc_op.cu.cc
)
op_library
(
warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale SRCS warpctc_op.cc warpctc_op.cu.cc
)
else
()
else
()
...
...
paddle/fluid/operators/clip_by_norm_op.h
浏览文件 @
4743c9cd
...
@@ -64,7 +64,7 @@ class ClipByNormKernel : public framework::OpKernel<T> {
...
@@ -64,7 +64,7 @@ class ClipByNormKernel : public framework::OpKernel<T> {
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
}
else
{
}
else
{
PADDLE_THROW
(
"Unexpected branch, input variable type is %s"
,
PADDLE_THROW
(
"Unexpected branch, input variable type is %s"
,
in_var
->
Type
().
name
(
));
framework
::
ToTypeName
(
in_var
->
Type
()
));
}
}
PADDLE_ENFORCE_NOT_NULL
(
input
);
PADDLE_ENFORCE_NOT_NULL
(
input
);
...
...
paddle/fluid/operators/controlflow/while_op.cc
浏览文件 @
4743c9cd
...
@@ -175,14 +175,13 @@ class WhileGradOp : public framework::OperatorBase {
...
@@ -175,14 +175,13 @@ class WhileGradOp : public framework::OperatorBase {
auto
&
og_inside
=
auto
&
og_inside
=
detail
::
Ref
(
cur_scope
.
Var
(
inside_og_name
),
detail
::
Ref
(
cur_scope
.
Var
(
inside_og_name
),
"Cannot find inside gradient %s"
,
inside_og_name
);
"Cannot find inside gradient %s"
,
inside_og_name
);
if
(
framework
::
IsType
<
framework
::
LoDTensor
>
(
og_outside
.
Type
()
))
{
if
(
og_outside
.
IsType
<
framework
::
LoDTensor
>
(
))
{
auto
&
outside_tensor
=
og_outside
.
Get
<
framework
::
LoDTensor
>
();
auto
&
outside_tensor
=
og_outside
.
Get
<
framework
::
LoDTensor
>
();
auto
&
inside_tensor
=
auto
&
inside_tensor
=
detail
::
Ref
(
og_inside
.
GetMutable
<
framework
::
LoDTensor
>
());
detail
::
Ref
(
og_inside
.
GetMutable
<
framework
::
LoDTensor
>
());
inside_tensor
.
set_lod
(
outside_tensor
.
lod
());
inside_tensor
.
set_lod
(
outside_tensor
.
lod
());
inside_tensor
.
ShareDataWith
(
outside_tensor
);
inside_tensor
.
ShareDataWith
(
outside_tensor
);
}
else
if
(
framework
::
IsType
<
framework
::
LoDTensorArray
>
(
}
else
if
(
og_outside
.
IsType
<
framework
::
LoDTensorArray
>
())
{
og_outside
.
Type
()))
{
auto
&
outside_array
=
og_outside
.
Get
<
framework
::
LoDTensorArray
>
();
auto
&
outside_array
=
og_outside
.
Get
<
framework
::
LoDTensorArray
>
();
auto
&
inside_array
=
auto
&
inside_array
=
detail
::
Ref
(
og_inside
.
GetMutable
<
framework
::
LoDTensorArray
>
());
detail
::
Ref
(
og_inside
.
GetMutable
<
framework
::
LoDTensorArray
>
());
...
@@ -256,7 +255,7 @@ class WhileGradOp : public framework::OperatorBase {
...
@@ -256,7 +255,7 @@ class WhileGradOp : public framework::OperatorBase {
var
->
IsType
<
LoDTensor
>
(),
var
->
IsType
<
LoDTensor
>
(),
"Currently the type of var only can be LoDTensorArray, "
"Currently the type of var only can be LoDTensorArray, "
"or LoDTensor, but the received var[%s] is %s."
,
"or LoDTensor, but the received var[%s] is %s."
,
inside_grad_name
,
var
->
Type
().
name
(
));
inside_grad_name
,
framework
::
ToTypeName
(
var
->
Type
()
));
if
(
var
->
IsType
<
LoDTensor
>
())
{
if
(
var
->
IsType
<
LoDTensor
>
())
{
auto
&
inside_tensor
=
var
->
Get
<
framework
::
LoDTensor
>
();
auto
&
inside_tensor
=
var
->
Get
<
framework
::
LoDTensor
>
();
...
...
paddle/fluid/operators/conv_fusion_op.cu.cc
浏览文件 @
4743c9cd
...
@@ -22,7 +22,7 @@ DECLARE_bool(cudnn_exhaustive_search);
...
@@ -22,7 +22,7 @@ DECLARE_bool(cudnn_exhaustive_search);
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
#if CUDNN_VERSION >= 7
001
#if CUDNN_VERSION >= 7
100
using
Tensor
=
framework
::
Tensor
;
using
Tensor
=
framework
::
Tensor
;
using
ScopedTensorDescriptor
=
platform
::
ScopedTensorDescriptor
;
using
ScopedTensorDescriptor
=
platform
::
ScopedTensorDescriptor
;
using
ScopedFilterDescriptor
=
platform
::
ScopedFilterDescriptor
;
using
ScopedFilterDescriptor
=
platform
::
ScopedFilterDescriptor
;
...
@@ -161,9 +161,7 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
...
@@ -161,9 +161,7 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_LE
(
workspace_size_in_bytes
,
workspace_size_limit
,
PADDLE_ENFORCE_LE
(
workspace_size_in_bytes
,
workspace_size_limit
,
"workspace_size to be allocated exceeds the limit"
);
"workspace_size to be allocated exceeds the limit"
);
if
((
activation
==
"identity"
)
&&
if
((
activation
==
"identity"
)
&&
(
!
residual
))
{
(
algo
!=
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
)
&&
(
!
residual
))
{
// Only the CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM algo is
// Only the CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM algo is
// enabled with CUDNN_ACTIVATION_IDENTITY in cuDNN lib.
// enabled with CUDNN_ACTIVATION_IDENTITY in cuDNN lib.
// But test in some case, the speed is slower, change to use
// But test in some case, the speed is slower, change to use
...
@@ -204,7 +202,7 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
...
@@ -204,7 +202,7 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
#if CUDNN_VERSION >= 7
001
#if CUDNN_VERSION >= 7
100
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
conv2d_fusion
,
ops
::
CUDNNConvFusionOpKernel
<
float
>
,
REGISTER_OP_CUDA_KERNEL
(
conv2d_fusion
,
ops
::
CUDNNConvFusionOpKernel
<
float
>
,
ops
::
CUDNNConvFusionOpKernel
<
double
>
);
ops
::
CUDNNConvFusionOpKernel
<
double
>
);
...
...
paddle/fluid/operators/cudnn_lstm_op.cu.cc
浏览文件 @
4743c9cd
...
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
...
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/cudnn_rnn_cache.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/cudnn_helper.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -22,239 +22,6 @@ namespace operators {
...
@@ -22,239 +22,6 @@ namespace operators {
using
LoDTensor
=
framework
::
LoDTensor
;
using
LoDTensor
=
framework
::
LoDTensor
;
using
Tensor
=
framework
::
Tensor
;
using
Tensor
=
framework
::
Tensor
;
struct
CudnnRNNCache
{
CudnnRNNCache
()
{
x_desc_
=
NULL
;
y_desc_
=
NULL
;
dx_desc_
=
NULL
;
dy_desc_
=
NULL
;
}
~
CudnnRNNCache
()
{
release
();
}
cudnnRNNDescriptor_t
rnn_desc_
;
cudnnTensorDescriptor_t
*
x_desc_
;
cudnnTensorDescriptor_t
*
y_desc_
;
cudnnTensorDescriptor_t
*
dx_desc_
;
cudnnTensorDescriptor_t
*
dy_desc_
;
cudnnTensorDescriptor_t
hx_desc_
;
cudnnTensorDescriptor_t
cx_desc_
;
cudnnTensorDescriptor_t
hy_desc_
;
cudnnTensorDescriptor_t
cy_desc_
;
cudnnTensorDescriptor_t
dhx_desc_
;
cudnnTensorDescriptor_t
dcx_desc_
;
cudnnTensorDescriptor_t
dhy_desc_
;
cudnnTensorDescriptor_t
dcy_desc_
;
cudnnTensorDescriptor_t
output_x_desc_
;
cudnnTensorDescriptor_t
output_y_desc_
;
cudnnDropoutDescriptor_t
dropout_desc_
;
size_t
weights_size_
;
cudnnFilterDescriptor_t
w_desc_
;
cudnnFilterDescriptor_t
dw_desc_
;
size_t
workspace_size_
;
size_t
reserve_size_
;
Tensor
reserve_data_
;
Tensor
workspace_data_
;
Tensor
dropout_state_
;
size_t
max_length_
;
float
dropout_prob_
;
bool
is_bidirec_
;
int
batch_size_
;
int
input_size_
;
int
hidden_size_
;
int
num_layers_
;
int
seed_
;
void
init
(
cudnnHandle_t
handle
,
const
framework
::
ExecutionContext
&
ctx
,
size_t
max_len
,
int
batch_size
,
int
input_size
,
int
hidden_size
,
int
num_layers
,
float
dropout_prob
,
bool
is_bidirec
,
int
seed
,
int
weight_numel
)
{
max_length_
=
max_len
;
batch_size_
=
batch_size
;
input_size_
=
input_size
;
hidden_size_
=
hidden_size
;
num_layers_
=
num_layers
;
dropout_prob_
=
dropout_prob
;
is_bidirec_
=
is_bidirec
;
seed_
=
seed
;
x_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
y_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
dx_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
dy_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
int
dim_a
[
3
];
int
stride_a
[
3
];
for
(
size_t
i
=
0
;
i
<
max_length_
;
++
i
)
{
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
x_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
y_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dx_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dy_desc_
[
i
]));
dim_a
[
0
]
=
batch_size_
;
dim_a
[
1
]
=
input_size_
;
dim_a
[
2
]
=
1
;
stride_a
[
0
]
=
dim_a
[
2
]
*
dim_a
[
1
];
stride_a
[
1
]
=
dim_a
[
2
];
stride_a
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
x_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dx_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
dim_a
[
0
]
=
batch_size_
;
dim_a
[
1
]
=
is_bidirec_
?
hidden_size_
*
2
:
hidden_size_
;
dim_a
[
2
]
=
1
;
stride_a
[
0
]
=
dim_a
[
2
]
*
dim_a
[
1
];
stride_a
[
1
]
=
dim_a
[
2
];
stride_a
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
y_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dy_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
}
dim_a
[
0
]
=
num_layers_
*
(
is_bidirec_
?
2
:
1
);
dim_a
[
1
]
=
batch_size_
;
dim_a
[
2
]
=
hidden_size_
;
stride_a
[
0
]
=
dim_a
[
2
]
*
dim_a
[
1
];
stride_a
[
1
]
=
dim_a
[
2
];
stride_a
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
hx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
cx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
hy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
cy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dhx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dcx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dhy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dcy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
hx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
cx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
hy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
cy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dhx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dcx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dhy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dcy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateDropoutDescriptor
(
&
dropout_desc_
));
size_t
state_size
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDropoutGetStatesSize
(
handle
,
&
state_size
);
dropout_state_
.
Resize
({
static_cast
<
int64_t
>
(
state_size
)}));
auto
*
dropout_state_data
=
dropout_state_
.
mutable_data
<
uint8_t
>
(
ctx
.
GetPlace
());
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetDropoutDescriptor
(
dropout_desc_
,
handle
,
dropout_prob_
,
dropout_state_data
,
state_size
,
seed_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateRNNDescriptor
(
&
rnn_desc_
));
#if CUDNN_VERSION >= 6000
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetRNNDescriptor_v6
(
handle
,
rnn_desc_
,
hidden_size_
,
num_layers_
,
dropout_desc_
,
CUDNN_LINEAR_INPUT
,
is_bidirec_
?
CUDNN_BIDIRECTIONAL
:
CUDNN_UNIDIRECTIONAL
,
CUDNN_LSTM
,
CUDNN_RNN_ALGO_STANDARD
,
CUDNN_DATA_FLOAT
));
#else
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetRNNDescriptor
(
rnn_desc_
,
hidden_size_
,
num_layers_
,
dropout_desc_
,
CUDNN_LINEAR_INPUT
,
is_bidirec_
?
CUDNN_BIDIRECTIONAL
:
CUDNN_UNIDIRECTIONAL
,
CUDNN_LSTM
,
CUDNN_DATA_FLOAT
));
#endif
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateFilterDescriptor
(
&
w_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateFilterDescriptor
(
&
dw_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetRNNParamsSize
(
handle
,
rnn_desc_
,
x_desc_
[
0
],
&
weights_size_
,
CUDNN_DATA_FLOAT
));
PADDLE_ENFORCE_EQ
(
weights_size_
,
sizeof
(
float
)
*
weight_numel
,
"cudnn lstm weight size should be SAME"
);
int
dim_w
[
3
];
dim_w
[
0
]
=
weights_size_
/
sizeof
(
float
);
dim_w
[
1
]
=
1
;
dim_w
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetFilterNdDescriptor
(
w_desc_
,
CUDNN_DATA_FLOAT
,
CUDNN_TENSOR_NCHW
,
3
,
dim_w
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetFilterNdDescriptor
(
dw_desc_
,
CUDNN_DATA_FLOAT
,
CUDNN_TENSOR_NCHW
,
3
,
dim_w
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetRNNWorkspaceSize
(
handle
,
rnn_desc_
,
max_length_
,
x_desc_
,
&
workspace_size_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetRNNTrainingReserveSize
(
handle
,
rnn_desc_
,
max_length_
,
x_desc_
,
&
reserve_size_
));
reserve_data_
.
Resize
({
static_cast
<
int64_t
>
(
reserve_size_
)});
reserve_data_
.
mutable_data
<
uint8_t
>
(
ctx
.
GetPlace
());
workspace_data_
.
Resize
({
static_cast
<
int64_t
>
(
workspace_size_
)});
workspace_data_
.
mutable_data
<
uint8_t
>
(
ctx
.
GetPlace
());
}
void
release
()
{
for
(
size_t
i
=
0
;
i
<
max_length_
;
++
i
)
{
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
x_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
y_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dx_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dy_desc_
[
i
]));
}
delete
[]
x_desc_
;
delete
[]
y_desc_
;
delete
[]
dx_desc_
;
delete
[]
dy_desc_
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
hx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
cx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
hy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
cy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dhx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dcx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dhy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dcy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyDropoutDescriptor
(
dropout_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyRNNDescriptor
(
rnn_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyFilterDescriptor
(
w_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyFilterDescriptor
(
dw_desc_
));
}
};
template
<
typename
T
>
template
<
typename
T
>
class
CudnnLSTMGPUKernel
:
public
framework
::
OpKernel
<
T
>
{
class
CudnnLSTMGPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
...
@@ -315,9 +82,9 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
...
@@ -315,9 +82,9 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
auto
input_w_numel
=
w
->
numel
();
auto
input_w_numel
=
w
->
numel
();
auto
batch_size
=
x
->
dims
()[
1
];
auto
batch_size
=
x
->
dims
()[
1
];
cudnn_rnn_cache
->
init
(
handle
,
ctx
,
max_len
,
batch_size
,
input
_size
,
cudnn_rnn_cache
->
init
(
handle
,
ctx
.
GetPlace
(),
max_len
,
batch
_size
,
hidden_size
,
num_layers
,
dropout_prob
,
is_bidirec
,
input_size
,
hidden_size
,
num_layers
,
dropout_prob
,
seed
,
input_w_numel
);
is_bidirec
,
seed
,
input_w_numel
);
}
}
auto
run_seq_len
=
x
->
dims
()[
0
];
auto
run_seq_len
=
x
->
dims
()[
0
];
...
...
paddle/fluid/operators/cudnn_rnn_cache.h
0 → 100644
浏览文件 @
4743c9cd
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/cudnn_helper.h"
namespace
paddle
{
namespace
operators
{
struct
CudnnRNNCache
{
CudnnRNNCache
()
{
x_desc_
=
NULL
;
y_desc_
=
NULL
;
dx_desc_
=
NULL
;
dy_desc_
=
NULL
;
}
~
CudnnRNNCache
()
{
release
();
}
cudnnRNNDescriptor_t
rnn_desc_
;
cudnnTensorDescriptor_t
*
x_desc_
;
cudnnTensorDescriptor_t
*
y_desc_
;
cudnnTensorDescriptor_t
*
dx_desc_
;
cudnnTensorDescriptor_t
*
dy_desc_
;
cudnnTensorDescriptor_t
hx_desc_
;
cudnnTensorDescriptor_t
cx_desc_
;
cudnnTensorDescriptor_t
hy_desc_
;
cudnnTensorDescriptor_t
cy_desc_
;
cudnnTensorDescriptor_t
dhx_desc_
;
cudnnTensorDescriptor_t
dcx_desc_
;
cudnnTensorDescriptor_t
dhy_desc_
;
cudnnTensorDescriptor_t
dcy_desc_
;
cudnnTensorDescriptor_t
output_x_desc_
;
cudnnTensorDescriptor_t
output_y_desc_
;
cudnnDropoutDescriptor_t
dropout_desc_
;
size_t
weights_size_
;
cudnnFilterDescriptor_t
w_desc_
;
cudnnFilterDescriptor_t
dw_desc_
;
size_t
workspace_size_
;
size_t
reserve_size_
;
framework
::
Tensor
reserve_data_
;
framework
::
Tensor
workspace_data_
;
framework
::
Tensor
dropout_state_
;
size_t
max_length_
;
float
dropout_prob_
;
bool
is_bidirec_
;
int
batch_size_
;
int
input_size_
;
int
hidden_size_
;
int
num_layers_
;
int
seed_
;
void
init
(
cudnnHandle_t
handle
,
const
platform
::
Place
&
place
,
size_t
max_len
,
int
batch_size
,
int
input_size
,
int
hidden_size
,
int
num_layers
,
float
dropout_prob
,
bool
is_bidirec
,
int
seed
,
int
weight_numel
)
{
max_length_
=
max_len
;
batch_size_
=
batch_size
;
input_size_
=
input_size
;
hidden_size_
=
hidden_size
;
num_layers_
=
num_layers
;
dropout_prob_
=
dropout_prob
;
is_bidirec_
=
is_bidirec
;
seed_
=
seed
;
x_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
y_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
dx_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
dy_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
int
dim_a
[
3
];
int
stride_a
[
3
];
for
(
size_t
i
=
0
;
i
<
max_length_
;
++
i
)
{
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
x_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
y_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dx_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dy_desc_
[
i
]));
dim_a
[
0
]
=
batch_size_
;
dim_a
[
1
]
=
input_size_
;
dim_a
[
2
]
=
1
;
stride_a
[
0
]
=
dim_a
[
2
]
*
dim_a
[
1
];
stride_a
[
1
]
=
dim_a
[
2
];
stride_a
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
x_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dx_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
dim_a
[
0
]
=
batch_size_
;
dim_a
[
1
]
=
is_bidirec_
?
hidden_size_
*
2
:
hidden_size_
;
dim_a
[
2
]
=
1
;
stride_a
[
0
]
=
dim_a
[
2
]
*
dim_a
[
1
];
stride_a
[
1
]
=
dim_a
[
2
];
stride_a
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
y_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dy_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
}
dim_a
[
0
]
=
num_layers_
*
(
is_bidirec_
?
2
:
1
);
dim_a
[
1
]
=
batch_size_
;
dim_a
[
2
]
=
hidden_size_
;
stride_a
[
0
]
=
dim_a
[
2
]
*
dim_a
[
1
];
stride_a
[
1
]
=
dim_a
[
2
];
stride_a
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
hx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
cx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
hy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
cy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dhx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dcx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dhy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dcy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
hx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
cx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
hy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
cy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dhx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dcx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dhy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dcy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateDropoutDescriptor
(
&
dropout_desc_
));
size_t
state_size
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDropoutGetStatesSize
(
handle
,
&
state_size
);
dropout_state_
.
Resize
({
static_cast
<
int64_t
>
(
state_size
)}));
auto
*
dropout_state_data
=
dropout_state_
.
mutable_data
<
uint8_t
>
(
place
);
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetDropoutDescriptor
(
dropout_desc_
,
handle
,
dropout_prob_
,
dropout_state_data
,
state_size
,
seed_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateRNNDescriptor
(
&
rnn_desc_
));
#if CUDNN_VERSION >= 6000
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetRNNDescriptor_v6
(
handle
,
rnn_desc_
,
hidden_size_
,
num_layers_
,
dropout_desc_
,
CUDNN_LINEAR_INPUT
,
is_bidirec_
?
CUDNN_BIDIRECTIONAL
:
CUDNN_UNIDIRECTIONAL
,
CUDNN_LSTM
,
CUDNN_RNN_ALGO_STANDARD
,
CUDNN_DATA_FLOAT
));
#else
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetRNNDescriptor
(
rnn_desc_
,
hidden_size_
,
num_layers_
,
dropout_desc_
,
CUDNN_LINEAR_INPUT
,
is_bidirec_
?
CUDNN_BIDIRECTIONAL
:
CUDNN_UNIDIRECTIONAL
,
CUDNN_LSTM
,
CUDNN_DATA_FLOAT
));
#endif
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateFilterDescriptor
(
&
w_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateFilterDescriptor
(
&
dw_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetRNNParamsSize
(
handle
,
rnn_desc_
,
x_desc_
[
0
],
&
weights_size_
,
CUDNN_DATA_FLOAT
));
PADDLE_ENFORCE_EQ
(
weights_size_
,
sizeof
(
float
)
*
weight_numel
,
"cudnn lstm weight size should be SAME"
);
int
dim_w
[
3
];
dim_w
[
0
]
=
weights_size_
/
sizeof
(
float
);
dim_w
[
1
]
=
1
;
dim_w
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetFilterNdDescriptor
(
w_desc_
,
CUDNN_DATA_FLOAT
,
CUDNN_TENSOR_NCHW
,
3
,
dim_w
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetFilterNdDescriptor
(
dw_desc_
,
CUDNN_DATA_FLOAT
,
CUDNN_TENSOR_NCHW
,
3
,
dim_w
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetRNNWorkspaceSize
(
handle
,
rnn_desc_
,
max_length_
,
x_desc_
,
&
workspace_size_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetRNNTrainingReserveSize
(
handle
,
rnn_desc_
,
max_length_
,
x_desc_
,
&
reserve_size_
));
reserve_data_
.
Resize
({
static_cast
<
int64_t
>
(
reserve_size_
)});
reserve_data_
.
mutable_data
<
uint8_t
>
(
place
);
workspace_data_
.
Resize
({
static_cast
<
int64_t
>
(
workspace_size_
)});
workspace_data_
.
mutable_data
<
uint8_t
>
(
place
);
}
void
release
()
{
for
(
size_t
i
=
0
;
i
<
max_length_
;
++
i
)
{
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
x_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
y_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dx_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dy_desc_
[
i
]));
}
delete
[]
x_desc_
;
delete
[]
y_desc_
;
delete
[]
dx_desc_
;
delete
[]
dy_desc_
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
hx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
cx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
hy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
cy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dhx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dcx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dhy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dcy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyDropoutDescriptor
(
dropout_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyRNNDescriptor
(
rnn_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyFilterDescriptor
(
w_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyFilterDescriptor
(
dw_desc_
));
}
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/cum_op.h
浏览文件 @
4743c9cd
...
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
...
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#pragma once
#include <array>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/operator.h"
...
...
paddle/fluid/operators/detail/safe_ref.h
浏览文件 @
4743c9cd
...
@@ -25,7 +25,7 @@ namespace detail {
...
@@ -25,7 +25,7 @@ namespace detail {
*/
*/
template
<
typename
T
,
typename
...
ARGS
>
template
<
typename
T
,
typename
...
ARGS
>
inline
T
&
Ref
(
T
*
ptr
,
ARGS
&&
...
args
)
{
inline
T
&
Ref
(
T
*
ptr
,
ARGS
&&
...
args
)
{
PADDLE_ENFORCE
(
ptr
!=
nullptr
,
args
...
);
PADDLE_ENFORCE
(
ptr
!=
nullptr
,
::
paddle
::
string
::
Sprintf
(
args
...)
);
return
*
ptr
;
return
*
ptr
;
}
}
...
...
paddle/fluid/operators/distributed/proto_encoder_helper.h
浏览文件 @
4743c9cd
...
@@ -84,7 +84,9 @@ class ProtoEncodeHelper {
...
@@ -84,7 +84,9 @@ class ProtoEncodeHelper {
~
ProtoEncodeHelper
()
{
~
ProtoEncodeHelper
()
{
#define REPLACE_ENFORCE_GLOG 1
#define REPLACE_ENFORCE_GLOG 1
// Make sure callers didn't do operations that went over max_size promised
// Make sure callers didn't do operations that went over max_size promised
if
(
paddle
::
platform
::
is_error
(
p_
<=
limit_
))
{
paddle
::
platform
::
throw_on_error
(
p_
<=
limit_
);
paddle
::
platform
::
throw_on_error
(
p_
<=
limit_
);
}
#undef REPLACE_ENFORCE_GLOG
#undef REPLACE_ENFORCE_GLOG
}
}
...
...
paddle/fluid/operators/distributed_ops/CMakeLists.txt
浏览文件 @
4743c9cd
...
@@ -33,7 +33,7 @@ register_operators(EXCLUDES gen_nccl_id_op DEPS ${DISTRIBUTE_DEPS})
...
@@ -33,7 +33,7 @@ register_operators(EXCLUDES gen_nccl_id_op DEPS ${DISTRIBUTE_DEPS})
if
(
WITH_GPU AND NOT WIN32
)
if
(
WITH_GPU AND NOT WIN32
)
set
(
DISTRIBUTE_DEPS
${
DISTRIBUTE_DEPS
}
nccl_common
)
set
(
DISTRIBUTE_DEPS
${
DISTRIBUTE_DEPS
}
nccl_common
)
op_library
(
gen_nccl_id_op
${
DISTRIBUTE_DEPS
}
nccl_common
)
op_library
(
gen_nccl_id_op
DEPS
${
DISTRIBUTE_DEPS
}
nccl_common
)
endif
()
endif
()
set
(
OPERATOR_DEPS
${
OPERATOR_DEPS
}
${
DISTRIBUTE_DEPS
}
PARENT_SCOPE
)
set
(
OPERATOR_DEPS
${
OPERATOR_DEPS
}
${
DISTRIBUTE_DEPS
}
PARENT_SCOPE
)
...
...
paddle/fluid/operators/distributed_ops/split_ids_op.h
浏览文件 @
4743c9cd
...
@@ -116,7 +116,7 @@ class SplitIdsOpKernel : public framework::OpKernel<T> {
...
@@ -116,7 +116,7 @@ class SplitIdsOpKernel : public framework::OpKernel<T> {
}
else
{
}
else
{
PADDLE_THROW
(
PADDLE_THROW
(
"% should be LoDTensor or SelectedRows, but the received type is %s"
,
"% should be LoDTensor or SelectedRows, but the received type is %s"
,
ctx
.
Inputs
(
"Ids"
)[
0
],
ids_var
->
Type
().
name
(
));
ctx
.
Inputs
(
"Ids"
)[
0
],
framework
::
ToTypeName
(
ids_var
->
Type
()
));
}
}
}
}
};
};
...
...
paddle/fluid/operators/elementwise/elementwise_mul_op.h
浏览文件 @
4743c9cd
...
@@ -83,7 +83,7 @@ class ElementwiseMulKernel : public framework::OpKernel<T> {
...
@@ -83,7 +83,7 @@ class ElementwiseMulKernel : public framework::OpKernel<T> {
z
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
"Out"
);
z
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
"Out"
);
}
else
{
}
else
{
PADDLE_THROW
(
"X's type[%s] is not supported by elementwise_op."
,
PADDLE_THROW
(
"X's type[%s] is not supported by elementwise_op."
,
x_var
->
Type
().
name
(
));
framework
::
ToTypeName
(
x_var
->
Type
()
));
}
}
z
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
z
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
...
...
paddle/fluid/operators/lrn_mkldnn_op.cc
浏览文件 @
4743c9cd
...
@@ -50,8 +50,8 @@ template <typename T>
...
@@ -50,8 +50,8 @@ template <typename T>
class
LRNMKLDNNOpKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
class
LRNMKLDNNOpKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
std
::
is_same
<
T
,
float
>::
value
,
const
bool
is_float_type
=
std
::
is_same
<
T
,
float
>::
value
;
"MKLDNN LRN must use float data."
);
PADDLE_ENFORCE
(
is_float_type
,
"MKLDNN LRN must use float data."
);
PADDLE_ENFORCE
(
paddle
::
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
PADDLE_ENFORCE
(
paddle
::
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
"MKLDNN LRN must use CPUPlace."
);
"MKLDNN LRN must use CPUPlace."
);
...
@@ -132,8 +132,8 @@ template <typename T>
...
@@ -132,8 +132,8 @@ template <typename T>
class
LRNMKLDNNGradOpKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
class
LRNMKLDNNGradOpKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
std
::
is_same
<
T
,
float
>::
value
,
const
bool
is_float_type
=
std
::
is_same
<
T
,
float
>::
value
;
"MKLDNN LRN must use float data."
);
PADDLE_ENFORCE
(
is_float_type
,
"MKLDNN LRN must use float data."
);
PADDLE_ENFORCE
(
paddle
::
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
PADDLE_ENFORCE
(
paddle
::
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
"MKLDNN LRN must use CPUPlace."
);
"MKLDNN LRN must use CPUPlace."
);
PADDLE_ENFORCE
(
PADDLE_ENFORCE
(
...
...
paddle/fluid/operators/optimizers/adadelta_op.h
浏览文件 @
4743c9cd
...
@@ -27,12 +27,14 @@ class AdadeltaOpKernel : public framework::OpKernel<T> {
...
@@ -27,12 +27,14 @@ class AdadeltaOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
"but the received is %s"
,
ctx
.
Inputs
(
"Grad"
).
front
(),
grad_var
->
Type
().
name
());
ctx
.
Inputs
(
"Grad"
).
front
(),
framework
::
ToTypeName
(
grad_var
->
Type
()));
auto
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
avg_squared_grad_out_tensor
=
auto
avg_squared_grad_out_tensor
=
...
...
paddle/fluid/operators/optimizers/adagrad_op.h
浏览文件 @
4743c9cd
...
@@ -50,7 +50,8 @@ class AdagradOpKernel : public framework::OpKernel<T> {
...
@@ -50,7 +50,8 @@ class AdagradOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
auto
*
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
*
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
*
moment_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"MomentOut"
);
auto
*
moment_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"MomentOut"
);
...
...
paddle/fluid/operators/optimizers/adam_op.h
浏览文件 @
4743c9cd
...
@@ -347,7 +347,8 @@ class AdamOpKernel : public framework::OpKernel<T> {
...
@@ -347,7 +347,8 @@ class AdamOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
using
paddle
::
framework
::
LoDTensor
;
using
paddle
::
framework
::
LoDTensor
;
using
paddle
::
operators
::
detail
::
Ref
;
using
paddle
::
operators
::
detail
::
Ref
;
...
...
paddle/fluid/operators/optimizers/adamax_op.h
浏览文件 @
4743c9cd
...
@@ -27,12 +27,14 @@ class AdamaxOpKernel : public framework::OpKernel<T> {
...
@@ -27,12 +27,14 @@ class AdamaxOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
"but the received is %s"
,
ctx
.
Inputs
(
"Grad"
).
front
(),
grad_var
->
Type
().
name
());
ctx
.
Inputs
(
"Grad"
).
front
(),
framework
::
ToTypeName
(
grad_var
->
Type
()));
auto
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
moment_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"MomentOut"
);
auto
moment_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"MomentOut"
);
...
...
paddle/fluid/operators/optimizers/decayed_adagrad_op.h
浏览文件 @
4743c9cd
...
@@ -27,12 +27,14 @@ class DecayedAdagradOpKernel : public framework::OpKernel<T> {
...
@@ -27,12 +27,14 @@ class DecayedAdagradOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
"but the received is %s"
,
ctx
.
Inputs
(
"Grad"
).
front
(),
grad_var
->
Type
().
name
());
ctx
.
Inputs
(
"Grad"
).
front
(),
framework
::
ToTypeName
(
grad_var
->
Type
()));
auto
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
moment_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"MomentOut"
);
auto
moment_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"MomentOut"
);
...
...
paddle/fluid/operators/optimizers/ftrl_op.h
浏览文件 @
4743c9cd
...
@@ -32,12 +32,14 @@ class FTRLOpKernel : public framework::OpKernel<T> {
...
@@ -32,12 +32,14 @@ class FTRLOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
"but the received is %s"
,
ctx
.
Inputs
(
"Grad"
).
front
(),
grad_var
->
Type
().
name
());
ctx
.
Inputs
(
"Grad"
).
front
(),
framework
::
ToTypeName
(
grad_var
->
Type
()));
auto
*
param_out
=
ctx
.
Output
<
Tensor
>
(
"ParamOut"
);
auto
*
param_out
=
ctx
.
Output
<
Tensor
>
(
"ParamOut"
);
auto
*
sq_accum_out
=
ctx
.
Output
<
Tensor
>
(
"SquaredAccumOut"
);
auto
*
sq_accum_out
=
ctx
.
Output
<
Tensor
>
(
"SquaredAccumOut"
);
...
...
paddle/fluid/operators/optimizers/momentum_op.h
浏览文件 @
4743c9cd
...
@@ -395,7 +395,7 @@ class MomentumOpKernel : public framework::OpKernel<T> {
...
@@ -395,7 +395,7 @@ class MomentumOpKernel : public framework::OpKernel<T> {
PADDLE_THROW
(
PADDLE_THROW
(
string
::
Sprintf
(
"MomentumOp only supports LoDTensor or SelectedRows "
string
::
Sprintf
(
"MomentumOp only supports LoDTensor or SelectedRows "
"gradient, but the received Variable Type is %s"
,
"gradient, but the received Variable Type is %s"
,
grad_var
->
Type
().
name
(
)));
framework
::
ToTypeName
(
grad_var
->
Type
()
)));
}
}
}
}
};
};
...
...
paddle/fluid/operators/optimizers/sgd_op.cu
浏览文件 @
4743c9cd
...
@@ -60,7 +60,8 @@ class SGDOpCUDAKernel : public framework::OpKernel<T> {
...
@@ -60,7 +60,8 @@ class SGDOpCUDAKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
auto
*
param
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Param"
);
auto
*
param
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Param"
);
auto
*
param_out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
*
param_out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
...
...
paddle/fluid/operators/split_lod_tensor_op.cc
浏览文件 @
4743c9cd
...
@@ -63,7 +63,7 @@ class SplitLoDTensorOp : public framework::OperatorBase {
...
@@ -63,7 +63,7 @@ class SplitLoDTensorOp : public framework::OperatorBase {
}
}
auto
*
mask_data
=
cpu_mask
->
data
<
bool
>
();
auto
*
mask_data
=
cpu_mask
->
data
<
bool
>
();
std
::
vector
<
std
::
vector
<
CopyRange
>>
copy_ranges
(
mask_dim
[
0
]
);
std
::
vector
<
std
::
vector
<
CopyRange
>>
copy_ranges
(
2
);
// set out_true/out_false lod
// set out_true/out_false lod
for
(
size_t
t
=
0
;
t
<
2
;
t
++
)
{
for
(
size_t
t
=
0
;
t
<
2
;
t
++
)
{
...
...
paddle/fluid/operators/sum_mkldnn_op.cc
浏览文件 @
4743c9cd
...
@@ -245,7 +245,7 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
...
@@ -245,7 +245,7 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
}
}
}
else
{
}
else
{
PADDLE_THROW
(
"Unexpected branch, output variable type is %s"
,
PADDLE_THROW
(
"Unexpected branch, output variable type is %s"
,
out_var
->
Type
().
name
(
));
framework
::
ToTypeName
(
out_var
->
Type
()
));
}
}
}
}
};
};
...
...
paddle/fluid/operators/sum_op.cc
浏览文件 @
4743c9cd
...
@@ -126,7 +126,7 @@ class SumOp : public framework::OperatorWithKernel {
...
@@ -126,7 +126,7 @@ class SumOp : public framework::OperatorWithKernel {
PADDLE_THROW
(
"Cannot find the input data type by all input data"
);
PADDLE_THROW
(
"Cannot find the input data type by all input data"
);
}
}
PADDLE_THROW
(
"Unexpected branch. Input type is %s"
,
PADDLE_THROW
(
"Unexpected branch. Input type is %s"
,
x_vars
[
0
]
->
Type
().
name
(
));
framework
::
ToTypeName
(
x_vars
[
0
]
->
Type
()
));
}
}
};
};
...
...
paddle/fluid/operators/sum_op.h
浏览文件 @
4743c9cd
...
@@ -163,7 +163,7 @@ class SumKernel : public framework::OpKernel<T> {
...
@@ -163,7 +163,7 @@ class SumKernel : public framework::OpKernel<T> {
}
}
}
else
{
}
else
{
PADDLE_THROW
(
"Unexpected branch, output variable type is %s"
,
PADDLE_THROW
(
"Unexpected branch, output variable type is %s"
,
out_var
->
Type
().
name
(
));
framework
::
ToTypeName
(
out_var
->
Type
()
));
}
}
}
}
};
};
...
...
paddle/fluid/platform/enforce.h
浏览文件 @
4743c9cd
...
@@ -140,68 +140,72 @@ struct EOFException : public std::exception {
...
@@ -140,68 +140,72 @@ struct EOFException : public std::exception {
#define LIKELY(condition) (condition)
#define LIKELY(condition) (condition)
#endif
#endif
inline
bool
is_error
(
bool
stat
)
{
return
!
stat
;
}
template
<
typename
...
Args
>
template
<
typename
...
Args
>
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
bool
stat
,
const
Args
&
...
args
)
{
bool
stat
,
const
Args
&
...
args
)
{
if
(
UNLIKELY
(
!
(
stat
)))
{
#ifndef REPLACE_ENFORCE_GLOG
#ifndef REPLACE_ENFORCE_GLOG
throw
std
::
runtime_error
(
string
::
Sprintf
(
args
...));
throw
std
::
runtime_error
(
string
::
Sprintf
(
args
...));
#else
#else
LOG
(
FATAL
)
<<
string
::
Sprintf
(
args
...);
LOG
(
FATAL
)
<<
string
::
Sprintf
(
args
...);
#endif
#endif
}
}
}
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
inline
bool
is_error
(
cudaError_t
e
)
{
return
UNLIKELY
(
e
);
}
template
<
typename
...
Args
>
template
<
typename
...
Args
>
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
cudaError_t
e
,
const
Args
&
...
args
)
{
cudaError_t
e
,
const
Args
&
...
args
)
{
if
(
UNLIKELY
(
e
))
{
#ifndef REPLACE_ENFORCE_GLOG
#ifndef REPLACE_ENFORCE_GLOG
throw
thrust
::
system_error
(
e
,
thrust
::
cuda_category
(),
throw
thrust
::
system_error
(
e
,
thrust
::
cuda_category
(),
string
::
Sprintf
(
args
...));
string
::
Sprintf
(
args
...));
#else
#else
LOG
(
FATAL
)
<<
string
::
Sprintf
(
args
...);
LOG
(
FATAL
)
<<
string
::
Sprintf
(
args
...);
#endif
#endif
}
}
inline
bool
is_error
(
curandStatus_t
stat
)
{
return
stat
!=
CURAND_STATUS_SUCCESS
;
}
}
template
<
typename
...
Args
>
template
<
typename
...
Args
>
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
curandStatus_t
stat
,
const
Args
&
...
args
)
{
curandStatus_t
stat
,
const
Args
&
...
args
)
{
if
(
stat
!=
CURAND_STATUS_SUCCESS
)
{
#ifndef REPLACE_ENFORCE_GLOG
#ifndef REPLACE_ENFORCE_GLOG
throw
thrust
::
system_error
(
cudaErrorLaunchFailure
,
thrust
::
cuda_category
(),
throw
thrust
::
system_error
(
cudaErrorLaunchFailure
,
thrust
::
cuda_category
(),
string
::
Sprintf
(
args
...));
string
::
Sprintf
(
args
...));
#else
#else
LOG
(
FATAL
)
<<
string
::
Sprintf
(
args
...);
LOG
(
FATAL
)
<<
string
::
Sprintf
(
args
...);
#endif
#endif
}
}
inline
bool
is_error
(
cudnnStatus_t
stat
)
{
return
stat
!=
CUDNN_STATUS_SUCCESS
;
}
}
template
<
typename
...
Args
>
template
<
typename
...
Args
>
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
cudnnStatus_t
stat
,
const
Args
&
...
args
)
{
cudnnStatus_t
stat
,
const
Args
&
...
args
)
{
if
(
stat
==
CUDNN_STATUS_SUCCESS
)
{
return
;
}
else
{
#ifndef REPLACE_ENFORCE_GLOG
#ifndef REPLACE_ENFORCE_GLOG
throw
std
::
runtime_error
(
platform
::
dynload
::
cudnnGetErrorString
(
stat
)
+
throw
std
::
runtime_error
(
platform
::
dynload
::
cudnnGetErrorString
(
stat
)
+
string
::
Sprintf
(
args
...));
string
::
Sprintf
(
args
...));
#else
#else
LOG
(
FATAL
)
<<
string
::
Sprintf
(
args
...);
LOG
(
FATAL
)
<<
string
::
Sprintf
(
args
...);
#endif
#endif
}
}
inline
bool
is_error
(
cublasStatus_t
stat
)
{
return
stat
!=
CUBLAS_STATUS_SUCCESS
;
}
}
template
<
typename
...
Args
>
template
<
typename
...
Args
>
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
cublasStatus_t
stat
,
const
Args
&
...
args
)
{
cublasStatus_t
stat
,
const
Args
&
...
args
)
{
std
::
string
err
;
std
::
string
err
;
if
(
stat
==
CUBLAS_STATUS_SUCCESS
)
{
if
(
stat
==
CUBLAS_STATUS_NOT_INITIALIZED
)
{
return
;
}
else
if
(
stat
==
CUBLAS_STATUS_NOT_INITIALIZED
)
{
err
=
"CUBLAS: not initialized, "
;
err
=
"CUBLAS: not initialized, "
;
}
else
if
(
stat
==
CUBLAS_STATUS_ALLOC_FAILED
)
{
}
else
if
(
stat
==
CUBLAS_STATUS_ALLOC_FAILED
)
{
err
=
"CUBLAS: alloc failed, "
;
err
=
"CUBLAS: alloc failed, "
;
...
@@ -254,21 +258,49 @@ inline void throw_on_error(T e) {
...
@@ -254,21 +258,49 @@ inline void throw_on_error(T e) {
#define PADDLE_THROW(...) \
#define PADDLE_THROW(...) \
throw ::paddle::platform::EnforceNotMet(__FILE__, __LINE__, __VA_ARGS__)
throw ::paddle::platform::EnforceNotMet(__FILE__, __LINE__, __VA_ARGS__)
#define __PADDLE_THROW_ERROR_I(_, _9, _8, _7, _6, _5, _4, _3, _2, X_, ...) X_;
#define __THROW_ON_ERROR_ONE_ARG(COND, ARG) \
::paddle::platform::throw_on_error(COND, ::paddle::string::Sprintf(ARG));
#define __PADDLE_THROW_ON_ERROR(COND, ...) \
__PADDLE_THROW_ERROR_I( \
__VA_ARGS__, ::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
__THROW_ON_ERROR_ONE_ARG(COND, __VA_ARGS__))
#define __PADDLE_UNARY_COMPARE(COND, ...) \
do { \
auto __cond = COND; \
if (UNLIKELY(::paddle::platform::is_error(__cond))) { \
__PADDLE_THROW_ON_ERROR(__cond, __VA_ARGS__); \
} \
} while (0)
#ifndef REPLACE_ENFORCE_GLOG
#ifndef REPLACE_ENFORCE_GLOG
#define
PADDLE_ENFORCE(...)
\
#define
__PADDLE_ENFORCE_I(COND, ...)
\
do { \
do { \
try { \
try { \
::paddle::platform::throw_on_error(__VA_ARGS__);
\
__PADDLE_UNARY_COMPARE(COND, __VA_ARGS__);
\
} catch (...) { \
} catch (...) { \
throw ::paddle::platform::EnforceNotMet(std::current_exception(), \
throw ::paddle::platform::EnforceNotMet(std::current_exception(), \
__FILE__, __LINE__); \
__FILE__, __LINE__); \
} \
} \
} while (
false
)
} while (
0
)
#else
#else
#define
PADDLE_ENFORCE(...) ::paddle::platform::throw_on_error(
__VA_ARGS__);
#define
__PADDLE_ENFORCE_I(COND, ...) __PADDLE_UNARY_COMPARE(COND,
__VA_ARGS__);
#endif // REPLACE_ENFORCE_GLOG
#endif // REPLACE_ENFORCE_GLOG
#define __PADDLE_ENFORCE(__args) __PADDLE_ENFORCE_I __args
#define PADDLE_ENFORCE(...) __PADDLE_ENFORCE((__VA_ARGS__))
#define PADDLE_THROW_EOF() \
#define PADDLE_THROW_EOF() \
do { \
do { \
throw ::paddle::platform::EOFException("There is no next data.", __FILE__, \
throw ::paddle::platform::EOFException("There is no next data.", __FILE__, \
...
...
paddle/fluid/platform/enforce_test.cc
浏览文件 @
4743c9cd
...
@@ -37,6 +37,25 @@ TEST(ENFORCE, FAILED) {
...
@@ -37,6 +37,25 @@ TEST(ENFORCE, FAILED) {
HasPrefix
(
StringPiece
(
error
.
what
()),
"Enforce is not ok 123 at all"
));
HasPrefix
(
StringPiece
(
error
.
what
()),
"Enforce is not ok 123 at all"
));
}
}
EXPECT_TRUE
(
caught_exception
);
EXPECT_TRUE
(
caught_exception
);
caught_exception
=
false
;
try
{
PADDLE_ENFORCE
(
false
,
"Enforce is not ok at all"
);
}
catch
(
paddle
::
platform
::
EnforceNotMet
error
)
{
caught_exception
=
true
;
EXPECT_TRUE
(
HasPrefix
(
StringPiece
(
error
.
what
()),
"Enforce is not ok at all"
));
}
EXPECT_TRUE
(
caught_exception
);
caught_exception
=
false
;
try
{
PADDLE_ENFORCE
(
false
);
}
catch
(
paddle
::
platform
::
EnforceNotMet
error
)
{
caught_exception
=
true
;
EXPECT_NE
(
std
::
string
(
error
.
what
()).
find
(
" at "
),
0
);
}
EXPECT_TRUE
(
caught_exception
);
}
}
TEST
(
ENFORCE
,
NO_ARG_OK
)
{
TEST
(
ENFORCE
,
NO_ARG_OK
)
{
...
...
paddle/fluid/platform/float16_test.cc
浏览文件 @
4743c9cd
...
@@ -12,6 +12,7 @@ limitations under the License. */
...
@@ -12,6 +12,7 @@ limitations under the License. */
#include <vector>
#include <vector>
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#include "gtest/gtest.h"
#include "gtest/gtest.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/platform/init.h"
#include "paddle/fluid/platform/init.h"
...
...
paddle/fluid/platform/float16_test.cu
浏览文件 @
4743c9cd
...
@@ -11,6 +11,7 @@ limitations under the License. */
...
@@ -11,6 +11,7 @@ limitations under the License. */
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/platform/float16.h"
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#include <glog/logging.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include <bitset>
#include <bitset>
...
...
paddle/fluid/pybind/CMakeLists.txt
浏览文件 @
4743c9cd
set
(
PYBIND_DEPS pybind python proto_desc memory executor async_executor prune feed_fetch_method pass_builder parallel_executor profiler layer
)
set
(
PYBIND_DEPS pybind python proto_desc memory executor async_executor prune feed_fetch_method pass_builder parallel_executor profiler layer
scope_pool
)
if
(
WITH_PYTHON
)
if
(
WITH_PYTHON
)
list
(
APPEND PYBIND_DEPS py_func_op
)
list
(
APPEND PYBIND_DEPS py_func_op
)
endif
()
endif
()
...
...
paddle/fluid/pybind/const_value.cc
浏览文件 @
4743c9cd
...
@@ -49,9 +49,6 @@ void BindConstValue(pybind11::module* m) {
...
@@ -49,9 +49,6 @@ void BindConstValue(pybind11::module* m) {
op_proto_and_checker_maker
.
def
(
op_proto_and_checker_maker
.
def
(
"kOpNameScopeAttrName"
,
"kOpNameScopeAttrName"
,
framework
::
OpProtoAndCheckerMaker
::
OpNamescopeAttrName
);
framework
::
OpProtoAndCheckerMaker
::
OpNamescopeAttrName
);
op_proto_and_checker_maker
.
def
(
"kOpCreationCallstackAttrName"
,
framework
::
OpProtoAndCheckerMaker
::
OpCreationCallstackAttrName
);
}
}
}
// namespace pybind
}
// namespace pybind
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
4743c9cd
...
@@ -32,6 +32,7 @@ limitations under the License. */
...
@@ -32,6 +32,7 @@ limitations under the License. */
#include "paddle/fluid/framework/parallel_executor.h"
#include "paddle/fluid/framework/parallel_executor.h"
#include "paddle/fluid/framework/prune.h"
#include "paddle/fluid/framework/prune.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/scope_pool.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/version.h"
#include "paddle/fluid/framework/version.h"
#include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/imperative/layer.h"
...
@@ -117,6 +118,9 @@ PYBIND11_MODULE(core, m) {
...
@@ -117,6 +118,9 @@ PYBIND11_MODULE(core, m) {
return
paddle
::
operators
::
AppendPythonCallableObjectAndReturnId
(
py_obj
);
return
paddle
::
operators
::
AppendPythonCallableObjectAndReturnId
(
py_obj
);
});
});
m
.
add_object
(
"_cleanup"
,
py
::
capsule
([]()
{
ScopePool
::
Instance
().
Clear
();
}));
py
::
class_
<
imperative
::
VarBase
,
PyVarBase
>
(
m
,
"VarBase"
,
R"DOC()DOC"
)
py
::
class_
<
imperative
::
VarBase
,
PyVarBase
>
(
m
,
"VarBase"
,
R"DOC()DOC"
)
.
def
(
py
::
init
<>
())
.
def
(
py
::
init
<>
())
.
def
(
"_run_backward"
,
.
def
(
"_run_backward"
,
...
@@ -454,7 +458,7 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -454,7 +458,7 @@ All parameter, weight, gradient are variables in Paddle.
},
},
py
::
return_value_policy
::
copy
);
py
::
return_value_policy
::
copy
);
py
::
class_
<
Scope
>
(
m
,
"Scope"
,
R"DOC(
py
::
class_
<
Scope
>
(
m
,
"
_
Scope"
,
R"DOC(
Scope is an association of a name to Variable. All variables belong to Scope.
Scope is an association of a name to Variable. All variables belong to Scope.
Variables in a parent scope can be retrieved from local scope.
Variables in a parent scope can be retrieved from local scope.
...
@@ -474,17 +478,26 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -474,17 +478,26 @@ All parameter, weight, gradient are variables in Paddle.
param.set(param_array, place)
param.set(param_array, place)
)DOC"
)
)DOC"
)
.
def
(
"_remove_from_pool"
,
[](
Scope
&
self
)
{
ScopePool
::
Instance
().
Remove
(
&
self
);
})
.
def
(
"var"
,
.
def
(
"var"
,
[](
Scope
&
self
,
const
std
::
string
&
name
)
->
Variable
*
{
[](
Scope
&
self
,
const
std
::
string
&
name
)
->
Variable
*
{
return
self
.
Var
(
name
);
return
self
.
Var
(
name
);
},
},
py
::
return_value_policy
::
reference
)
py
::
return_value_policy
::
reference
)
.
def
(
"find_var"
,
&
Scope
::
FindVar
,
py
::
return_value_policy
::
reference
)
.
def
(
"find_var"
,
&
Scope
::
FindVar
,
py
::
return_value_policy
::
reference
)
.
def
(
py
::
init
<>
())
.
def
(
"new_scope"
,
[](
Scope
&
self
)
->
Scope
*
{
return
&
self
.
NewScope
();
},
.
def
(
"new_scope"
,
[](
Scope
&
self
)
->
Scope
*
{
return
&
self
.
NewScope
();
},
py
::
return_value_policy
::
reference
)
py
::
return_value_policy
::
reference
)
.
def
(
"drop_kids"
,
&
Scope
::
DropKids
);
.
def
(
"drop_kids"
,
&
Scope
::
DropKids
);
m
.
def
(
"Scope"
,
[]()
->
Scope
*
{
auto
*
s
=
new
Scope
();
ScopePool
::
Instance
().
Insert
(
std
::
unique_ptr
<
Scope
>
(
s
));
return
s
;
},
py
::
return_value_policy
::
reference
);
//! @note: Be careful! PyBind will return std::string as an unicode, not
//! @note: Be careful! PyBind will return std::string as an unicode, not
//! Python str. If you want a str object, you should cast them in Python.
//! Python str. If you want a str object, you should cast them in Python.
m
.
def
(
"get_all_op_protos"
,
[]()
->
std
::
vector
<
py
::
bytes
>
{
m
.
def
(
"get_all_op_protos"
,
[]()
->
std
::
vector
<
py
::
bytes
>
{
...
...
paddle/fluid/string/printf.h
浏览文件 @
4743c9cd
...
@@ -87,7 +87,7 @@ void Fprintf(std::ostream& out, const char* fmt, const Args&... args) {
...
@@ -87,7 +87,7 @@ void Fprintf(std::ostream& out, const char* fmt, const Args&... args) {
template
<
typename
...
Args
>
template
<
typename
...
Args
>
std
::
string
Sprintf
(
const
Args
&
...
args
)
{
std
::
string
Sprintf
(
const
Args
&
...
args
)
{
std
::
ostringstream
oss
;
std
::
ostringstream
oss
;
Fprintf
(
oss
,
"
"
);
Fprintf
(
oss
,
"
%s"
,
args
...
);
return
oss
.
str
();
return
oss
.
str
();
}
}
...
...
paddle/scripts/paddle_build.sh
浏览文件 @
4743c9cd
...
@@ -14,7 +14,6 @@
...
@@ -14,7 +14,6 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
#=================================================
#=================================================
# Utils
# Utils
#=================================================
#=================================================
...
@@ -418,13 +417,6 @@ EOF
...
@@ -418,13 +417,6 @@ EOF
else
else
ctest
--output-on-failure
ctest
--output-on-failure
fi
fi
# make install should also be test when unittest
make
install
-j
`
nproc
`
pip
install
${
INSTALL_PREFIX
:-
/paddle/build
}
/opt/paddle/share/wheels/
*
.whl
if
[[
${
WITH_FLUID_ONLY
:-
OFF
}
==
"OFF"
]]
;
then
paddle version
fi
fi
fi
}
}
...
@@ -922,6 +914,7 @@ function main() {
...
@@ -922,6 +914,7 @@ function main() {
;;
;;
assert_api
)
assert_api
)
assert_api_not_changed
${
PYTHON_ABI
:-
""
}
assert_api_not_changed
${
PYTHON_ABI
:-
""
}
assert_api_spec_approvals
;;
;;
test_inference
)
test_inference
)
gen_capi_package
gen_capi_package
...
@@ -946,6 +939,15 @@ function main() {
...
@@ -946,6 +939,15 @@ function main() {
run_test
run_test
assert_api_not_changed
${
PYTHON_ABI
:-
""
}
assert_api_not_changed
${
PYTHON_ABI
:-
""
}
;;
;;
cmake_gen
)
cmake_gen
${
PYTHON_ABI
:-
""
}
;;
gen_fluid_lib
)
gen_fluid_lib
;;
test_fluid_lib
)
test_fluid_lib
;;
*
)
*
)
print_usage
print_usage
exit
0
exit
0
...
...
python/paddle/fluid/__init__.py
浏览文件 @
4743c9cd
...
@@ -46,7 +46,7 @@ from . import transpiler
...
@@ -46,7 +46,7 @@ from . import transpiler
from
.
import
distribute_lookup_table
from
.
import
distribute_lookup_table
from
.param_attr
import
ParamAttr
,
WeightNormParamAttr
from
.param_attr
import
ParamAttr
,
WeightNormParamAttr
from
.data_feeder
import
DataFeeder
from
.data_feeder
import
DataFeeder
from
.core
import
LoDTensor
,
LoDTensorArray
,
CPUPlace
,
CUDAPlace
,
CUDAPinnedPlace
,
Scope
from
.core
import
LoDTensor
,
LoDTensorArray
,
CPUPlace
,
CUDAPlace
,
CUDAPinnedPlace
,
Scope
,
_Scope
from
.transpiler
import
DistributeTranspiler
,
\
from
.transpiler
import
DistributeTranspiler
,
\
memory_optimize
,
release_memory
,
DistributeTranspilerConfig
memory_optimize
,
release_memory
,
DistributeTranspilerConfig
from
.lod_tensor
import
create_lod_tensor
,
create_random_int_lodtensor
from
.lod_tensor
import
create_lod_tensor
,
create_random_int_lodtensor
...
...
python/paddle/fluid/executor.py
浏览文件 @
4743c9cd
...
@@ -191,7 +191,7 @@ def _fetch_var(name, scope=None, return_numpy=True):
...
@@ -191,7 +191,7 @@ def _fetch_var(name, scope=None, return_numpy=True):
assert
isinstance
(
name
,
str
)
assert
isinstance
(
name
,
str
)
if
scope
is
None
:
if
scope
is
None
:
scope
=
global_scope
()
scope
=
global_scope
()
assert
isinstance
(
scope
,
core
.
Scope
)
assert
isinstance
(
scope
,
core
.
_
Scope
)
var
=
scope
.
find_var
(
name
)
var
=
scope
.
find_var
(
name
)
assert
var
is
not
None
,
(
assert
var
is
not
None
,
(
...
...
python/paddle/fluid/framework.py
浏览文件 @
4743c9cd
...
@@ -20,7 +20,6 @@ import os
...
@@ -20,7 +20,6 @@ import os
import
re
import
re
import
six
import
six
import
sys
import
sys
import
traceback
import
numpy
as
np
import
numpy
as
np
...
@@ -605,10 +604,6 @@ class Operator(object):
...
@@ -605,10 +604,6 @@ class Operator(object):
if
role_var_name
in
op_attrs
and
len
(
op_attrs
[
role_var_name
])
==
0
:
if
role_var_name
in
op_attrs
and
len
(
op_attrs
[
role_var_name
])
==
0
:
del
op_attrs
[
role_var_name
]
del
op_attrs
[
role_var_name
]
callstack_var_name
=
op_maker
.
kOpCreationCallstackAttrName
()
op_attrs
[
callstack_var_name
]
=
list
(
reversed
(
traceback
.
format_stack
()))[
1
:]
if
len
(
self
.
desc
.
type
())
!=
0
:
if
len
(
self
.
desc
.
type
())
!=
0
:
return
return
if
type
is
None
:
if
type
is
None
:
...
...
python/paddle/fluid/parallel_executor.py
浏览文件 @
4743c9cd
...
@@ -148,7 +148,7 @@ class ParallelExecutor(object):
...
@@ -148,7 +148,7 @@ class ParallelExecutor(object):
trainers_endpoints
),
"num_trainers == len(end_points)"
trainers_endpoints
),
"num_trainers == len(end_points)"
build_strategy
.
trainers_endpoints
=
trainers_endpoints
build_strategy
.
trainers_endpoints
=
trainers_endpoints
# step
5: get persistable_vars, parameter
_vars, places. persistable_vars
# step
6: get persistable
_vars, places. persistable_vars
# need be broadcast to other local_scope.
# need be broadcast to other local_scope.
persistable_vars
=
set
([
persistable_vars
=
set
([
cpt
.
to_text
(
v
.
name
)
for
v
in
[
cpt
.
to_text
(
v
.
name
)
for
v
in
[
...
@@ -164,7 +164,7 @@ class ParallelExecutor(object):
...
@@ -164,7 +164,7 @@ class ParallelExecutor(object):
places
=
list
(
map
(
place_obj
,
self
.
_places
))
places
=
list
(
map
(
place_obj
,
self
.
_places
))
# step
6
: init ParallelExecutor
# step
7
: init ParallelExecutor
self
.
executor
=
core
.
ParallelExecutor
(
self
.
executor
=
core
.
ParallelExecutor
(
places
,
persistable_vars
,
main
.
desc
,
places
,
persistable_vars
,
main
.
desc
,
cpt
.
to_text
(
loss_name
)
cpt
.
to_text
(
loss_name
)
...
...
python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py
浏览文件 @
4743c9cd
...
@@ -185,6 +185,8 @@ def main(use_cuda, parallel):
...
@@ -185,6 +185,8 @@ def main(use_cuda, parallel):
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
on_ci
=
bool
(
int
(
os
.
environ
.
get
(
"SKIP_UNSTABLE_CI"
,
'0'
)))
if
not
on_ci
:
for
use_cuda
in
(
False
,
True
):
for
use_cuda
in
(
False
,
True
):
for
parallel
in
(
False
,
True
):
for
parallel
in
(
False
,
True
):
if
use_cuda
and
not
core
.
is_compiled_with_cuda
():
if
use_cuda
and
not
core
.
is_compiled_with_cuda
():
...
...
python/paddle/fluid/tests/unittests/test_dist_se_resnext.py
浏览文件 @
4743c9cd
...
@@ -15,6 +15,18 @@
...
@@ -15,6 +15,18 @@
from
__future__
import
print_function
from
__future__
import
print_function
import
unittest
import
unittest
from
test_dist_base
import
TestDistBase
from
test_dist_base
import
TestDistBase
import
os
def
skip_ci
(
func
):
on_ci
=
bool
(
int
(
os
.
environ
.
get
(
"SKIP_UNSTABLE_CI"
,
'0'
)))
def
__func__
(
*
args
,
**
kwargs
):
if
on_ci
:
return
return
func
(
*
args
,
**
kwargs
)
return
__func__
class
TestDistSeResneXt2x2
(
TestDistBase
):
class
TestDistSeResneXt2x2
(
TestDistBase
):
...
@@ -22,6 +34,7 @@ class TestDistSeResneXt2x2(TestDistBase):
...
@@ -22,6 +34,7 @@ class TestDistSeResneXt2x2(TestDistBase):
self
.
_sync_mode
=
True
self
.
_sync_mode
=
True
self
.
_use_reader_alloc
=
False
self
.
_use_reader_alloc
=
False
@
skip_ci
def
test_dist_train
(
self
):
def
test_dist_train
(
self
):
self
.
check_with_place
(
"dist_se_resnext.py"
,
delta
=
1e-7
)
self
.
check_with_place
(
"dist_se_resnext.py"
,
delta
=
1e-7
)
...
@@ -32,6 +45,7 @@ class TestDistseResnXt2x2WithMemopt(TestDistBase):
...
@@ -32,6 +45,7 @@ class TestDistseResnXt2x2WithMemopt(TestDistBase):
self
.
_mem_opt
=
True
self
.
_mem_opt
=
True
self
.
_use_reader_alloc
=
False
self
.
_use_reader_alloc
=
False
@
skip_ci
def
test_dist_train
(
self
):
def
test_dist_train
(
self
):
self
.
check_with_place
(
"dist_se_resnext.py"
,
delta
=
1e-7
)
self
.
check_with_place
(
"dist_se_resnext.py"
,
delta
=
1e-7
)
...
@@ -41,6 +55,7 @@ class TestDistSeResneXt2x2Async(TestDistBase):
...
@@ -41,6 +55,7 @@ class TestDistSeResneXt2x2Async(TestDistBase):
self
.
_sync_mode
=
False
self
.
_sync_mode
=
False
self
.
_use_reader_alloc
=
False
self
.
_use_reader_alloc
=
False
@
skip_ci
def
test_dist_train
(
self
):
def
test_dist_train
(
self
):
self
.
check_with_place
(
"dist_se_resnext.py"
,
delta
=
100
)
self
.
check_with_place
(
"dist_se_resnext.py"
,
delta
=
100
)
...
...
python/paddle/fluid/tests/unittests/test_operator_desc.py
浏览文件 @
4743c9cd
...
@@ -69,7 +69,7 @@ class TestOperator(unittest.TestCase):
...
@@ -69,7 +69,7 @@ class TestOperator(unittest.TestCase):
set
(
mul_op
.
attr_names
),
set
(
mul_op
.
attr_names
),
set
([
set
([
"x_num_col_dims"
,
"y_num_col_dims"
,
"op_role"
,
"op_role_var"
,
"x_num_col_dims"
,
"y_num_col_dims"
,
"op_role"
,
"op_role_var"
,
"op_namescope"
,
"op_callstack"
"op_namescope"
]))
]))
self
.
assertEqual
(
mul_op
.
has_attr
(
"x_num_col_dims"
),
True
)
self
.
assertEqual
(
mul_op
.
has_attr
(
"x_num_col_dims"
),
True
)
self
.
assertEqual
(
mul_op
.
attr_type
(
"x_num_col_dims"
),
core
.
AttrType
.
INT
)
self
.
assertEqual
(
mul_op
.
attr_type
(
"x_num_col_dims"
),
core
.
AttrType
.
INT
)
...
...
python/paddle/fluid/tests/unittests/test_py_func_op.py
浏览文件 @
4743c9cd
...
@@ -26,7 +26,7 @@ os.environ['CPU_NUM'] = str(dev_cnt)
...
@@ -26,7 +26,7 @@ os.environ['CPU_NUM'] = str(dev_cnt)
def
dummy_func_with_no_input
():
def
dummy_func_with_no_input
():
return
float
(
1.0
)
return
np
.
array
([
0
],
dtype
=
'float32'
)
def
dummy_func_with_no_output
(
x
):
def
dummy_func_with_no_output
(
x
):
...
@@ -105,7 +105,7 @@ def simple_fc_net(img, label, use_py_func_op):
...
@@ -105,7 +105,7 @@ def simple_fc_net(img, label, use_py_func_op):
name
=
'test_tmp_var'
,
dtype
=
'float32'
,
shape
=
[
1
])
name
=
'test_tmp_var'
,
dtype
=
'float32'
,
shape
=
[
1
])
fluid
.
layers
.
py_func
(
fluid
.
layers
.
py_func
(
func
=
dummy_func_with_no_input
,
x
=
None
,
out
=
dummy_var
)
func
=
dummy_func_with_no_input
,
x
=
None
,
out
=
dummy_var
)
loss
+=
dummy_var
fluid
.
layers
.
py_func
(
func
=
dummy_func_with_no_output
,
x
=
loss
,
out
=
None
)
fluid
.
layers
.
py_func
(
func
=
dummy_func_with_no_output
,
x
=
loss
,
out
=
None
)
loss
=
fluid
.
layers
.
mean
(
loss
)
loss
=
fluid
.
layers
.
mean
(
loss
)
...
@@ -174,7 +174,7 @@ class TestPyFuncOpUseExecutor(unittest.TestCase):
...
@@ -174,7 +174,7 @@ class TestPyFuncOpUseExecutor(unittest.TestCase):
self
.
assertAlmostEqual
(
max_diff
,
0
,
delta
=
1e-3
)
self
.
assertAlmostEqual
(
max_diff
,
0
,
delta
=
1e-3
)
class
TestPyFuncOpUseParallelExecutor
(
unittest
.
TestCase
):
class
TestPyFuncOpUseParallelExecutor
(
TestPyFuncOpUseExecutor
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
use_parallel_executor
=
True
self
.
use_parallel_executor
=
True
...
...
python/paddle/fluid/tests/unittests/test_weight_decay.py
0 → 100644
浏览文件 @
4743c9cd
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
contextlib
import
unittest
from
functools
import
partial
import
numpy
as
np
import
paddle
import
paddle.fluid.core
as
core
import
paddle.fluid
as
fluid
def
get_places
():
places
=
[]
if
core
.
is_compiled_with_cuda
():
places
.
append
(
core
.
CUDAPlace
(
0
))
return
places
@
contextlib
.
contextmanager
def
prog_scope_guard
(
main_prog
,
startup_prog
):
scope
=
fluid
.
core
.
Scope
()
with
fluid
.
unique_name
.
guard
():
with
fluid
.
scope_guard
(
scope
):
with
fluid
.
program_guard
(
main_prog
,
startup_prog
):
yield
def
bow_net
(
data
,
label
,
dict_dim
,
is_sparse
=
False
,
emb_dim
=
128
,
hid_dim
=
128
,
hid_dim2
=
96
,
class_dim
=
2
):
"""
BOW net
This model is from https://github.com/PaddlePaddle/models:
fluid/PaddleNLP/text_classification/nets.py
"""
emb
=
fluid
.
layers
.
embedding
(
input
=
data
,
is_sparse
=
is_sparse
,
size
=
[
dict_dim
,
emb_dim
])
bow
=
fluid
.
layers
.
sequence_pool
(
input
=
emb
,
pool_type
=
'sum'
)
bow_tanh
=
fluid
.
layers
.
tanh
(
bow
)
fc_1
=
fluid
.
layers
.
fc
(
input
=
bow_tanh
,
size
=
hid_dim
,
act
=
"tanh"
)
fc_2
=
fluid
.
layers
.
fc
(
input
=
fc_1
,
size
=
hid_dim2
,
act
=
"tanh"
)
prediction
=
fluid
.
layers
.
fc
(
input
=
[
fc_2
],
size
=
class_dim
,
act
=
"softmax"
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
return
avg_cost
class
TestWeightDecay
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
word_dict
=
paddle
.
dataset
.
imdb
.
word_dict
()
reader
=
paddle
.
batch
(
paddle
.
dataset
.
imdb
.
train
(
self
.
word_dict
),
batch_size
=
4
)()
self
.
train_data
=
[
next
(
reader
)
for
_
in
range
(
5
)]
self
.
learning_rate
=
.
5
def
run_executor
(
self
,
place
,
feed_list
,
loss
):
exe
=
fluid
.
Executor
(
place
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
feed_list
,
place
=
place
)
exe
.
run
(
fluid
.
default_startup_program
())
main_prog
=
fluid
.
default_main_program
()
loss_set
=
[]
for
data
in
self
.
train_data
:
out
=
exe
.
run
(
main_prog
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
loss
.
name
])
print
(
"loss %s"
%
(
np
.
average
(
out
)))
loss_set
.
append
(
np
.
average
(
out
))
return
loss_set
def
run_parallel_exe
(
self
,
place
,
feed_list
,
loss
,
use_cuda
=
True
,
use_reduce
=
False
,
use_fast_executor
=
False
,
use_ir_memory_optimize
=
False
):
exe
=
fluid
.
Executor
(
place
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
feed_list
,
place
=
place
)
exe
.
run
(
fluid
.
default_startup_program
())
exec_strategy
=
fluid
.
ExecutionStrategy
()
if
use_fast_executor
:
exec_strategy
.
use_experimental_executor
=
True
build_strategy
=
fluid
.
BuildStrategy
()
build_strategy
.
reduce_strategy
=
fluid
.
BuildStrategy
.
ReduceStrategy
.
Reduce
\
if
use_reduce
else
fluid
.
BuildStrategy
.
ReduceStrategy
.
AllReduce
build_strategy
.
memory_optimize
=
use_ir_memory_optimize
parallel_exe
=
fluid
.
ParallelExecutor
(
use_cuda
,
loss_name
=
loss
.
name
,
exec_strategy
=
exec_strategy
,
build_strategy
=
build_strategy
)
loss_set
=
[]
for
data
in
self
.
train_data
:
out
=
parallel_exe
.
run
(
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
loss
.
name
])
print
(
"loss %s"
%
(
np
.
average
(
out
)))
loss_set
.
append
(
np
.
average
(
out
))
return
loss_set
def
check_weight_decay
(
self
,
place
,
model
,
use_parallel_exe
=
False
,
use_reduce
=
False
):
main_prog
=
fluid
.
framework
.
Program
()
startup_prog
=
fluid
.
framework
.
Program
()
startup_prog
.
random_seed
=
1
with
prog_scope_guard
(
main_prog
=
main_prog
,
startup_prog
=
startup_prog
):
data
=
fluid
.
layers
.
data
(
name
=
"words"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
1
],
dtype
=
"int64"
)
avg_cost
=
model
(
data
,
label
,
len
(
self
.
word_dict
))
param_list
=
[(
var
,
var
*
self
.
learning_rate
)
for
var
in
main_prog
.
block
(
0
).
all_parameters
()]
optimizer
=
fluid
.
optimizer
.
Adagrad
(
learning_rate
=
self
.
learning_rate
)
optimizer
.
minimize
(
avg_cost
)
for
params
in
param_list
:
updated_p
=
fluid
.
layers
.
elementwise_sub
(
x
=
params
[
0
],
y
=
params
[
1
])
fluid
.
layers
.
assign
(
input
=
updated_p
,
output
=
params
[
0
])
if
use_parallel_exe
:
loss
=
self
.
run_parallel_exe
(
place
,
[
data
,
label
],
loss
=
avg_cost
,
use_cuda
=
True
,
use_reduce
=
use_reduce
)
else
:
loss
=
self
.
run_executor
(
place
,
[
data
,
label
],
loss
=
avg_cost
)
return
loss
def
test_weight_decay
(
self
):
model
=
partial
(
bow_net
,
is_sparse
=
False
)
for
place
in
get_places
():
loss
=
self
.
check_weight_decay
(
place
,
model
,
use_parallel_exe
=
False
)
loss2
=
self
.
check_weight_decay
(
place
,
model
,
use_parallel_exe
=
True
,
use_reduce
=
False
)
for
i
in
range
(
len
(
loss
)):
assert
np
.
isclose
(
a
=
loss
[
i
],
b
=
loss2
[
i
],
rtol
=
5e-5
)
loss3
=
self
.
check_weight_decay
(
place
,
model
,
use_parallel_exe
=
True
,
use_reduce
=
True
)
for
i
in
range
(
len
(
loss
)):
assert
np
.
isclose
(
a
=
loss
[
i
],
b
=
loss3
[
i
],
rtol
=
5e-5
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/transpiler/inference_transpiler.py
浏览文件 @
4743c9cd
此差异已折叠。
点击以展开。
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录