Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
4743c9cd
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
4743c9cd
编写于
12月 27, 2018
作者:
Y
Yancey1989
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of github.com:PaddlePaddle/Paddle into parallel_graph_mode
上级
8cad371a
4048cfa9
变更
101
展开全部
隐藏空白更改
内联
并排
Showing
101 changed file
with
2266 addition
and
1057 deletion
+2266
-1057
Dockerfile
Dockerfile
+38
-38
cmake/external/mkldnn.cmake
cmake/external/mkldnn.cmake
+2
-2
cmake/inference_lib.cmake
cmake/inference_lib.cmake
+1
-1
cmake/simd.cmake
cmake/simd.cmake
+35
-38
paddle/contrib/float16/float16_transpiler.py
paddle/contrib/float16/float16_transpiler.py
+1
-1
paddle/fluid/API.spec
paddle/fluid/API.spec
+1
-5
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+21
-25
paddle/fluid/framework/data_device_transform_test.cu
paddle/fluid/framework/data_device_transform_test.cu
+1
-0
paddle/fluid/framework/details/eager_deletion_op_handle.cc
paddle/fluid/framework/details/eager_deletion_op_handle.cc
+1
-1
paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc
...uid/framework/details/fast_threaded_ssa_graph_executor.cc
+1
-0
paddle/fluid/framework/details/multi_devices_graph_pass.cc
paddle/fluid/framework/details/multi_devices_graph_pass.cc
+199
-208
paddle/fluid/framework/details/multi_devices_graph_pass.h
paddle/fluid/framework/details/multi_devices_graph_pass.h
+11
-8
paddle/fluid/framework/details/variable_visitor.cc
paddle/fluid/framework/details/variable_visitor.cc
+2
-2
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+1
-1
paddle/fluid/framework/ir/CMakeLists.txt
paddle/fluid/framework/ir/CMakeLists.txt
+1
-0
paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc
paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc
+222
-0
paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.h
paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.h
+49
-0
paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc
...fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc
+14
-11
paddle/fluid/framework/ir/graph.cc
paddle/fluid/framework/ir/graph.cc
+0
-58
paddle/fluid/framework/ir/graph_pattern_detector.cc
paddle/fluid/framework/ir/graph_pattern_detector.cc
+77
-7
paddle/fluid/framework/ir/graph_pattern_detector.h
paddle/fluid/framework/ir/graph_pattern_detector.h
+32
-0
paddle/fluid/framework/mixed_vector.h
paddle/fluid/framework/mixed_vector.h
+5
-5
paddle/fluid/framework/op_proto_maker.cc
paddle/fluid/framework/op_proto_maker.cc
+0
-4
paddle/fluid/framework/op_proto_maker.h
paddle/fluid/framework/op_proto_maker.h
+0
-1
paddle/fluid/framework/op_registry.h
paddle/fluid/framework/op_registry.h
+2
-1
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+25
-66
paddle/fluid/framework/operator.h
paddle/fluid/framework/operator.h
+9
-3
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+2
-3
paddle/fluid/framework/scope.cc
paddle/fluid/framework/scope.cc
+1
-3
paddle/fluid/framework/scope_pool.cc
paddle/fluid/framework/scope_pool.cc
+54
-0
paddle/fluid/framework/scope_pool.h
paddle/fluid/framework/scope_pool.h
+46
-0
paddle/fluid/framework/var_type.h
paddle/fluid/framework/var_type.h
+20
-22
paddle/fluid/framework/var_type_inference_test.cc
paddle/fluid/framework/var_type_inference_test.cc
+1
-1
paddle/fluid/framework/var_type_traits.cc
paddle/fluid/framework/var_type_traits.cc
+119
-0
paddle/fluid/framework/var_type_traits.h
paddle/fluid/framework/var_type_traits.h
+195
-0
paddle/fluid/framework/var_type_traits_test.cc
paddle/fluid/framework/var_type_traits_test.cc
+120
-0
paddle/fluid/framework/variable.h
paddle/fluid/framework/variable.h
+35
-31
paddle/fluid/framework/variable_test.cc
paddle/fluid/framework/variable_test.cc
+12
-11
paddle/fluid/inference/analysis/analyzer_tester.cc
paddle/fluid/inference/analysis/analyzer_tester.cc
+4
-4
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+1
-0
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+2
-0
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
...id/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+24
-4
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+7
-3
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+1
-0
paddle/fluid/inference/api/details/reset_tensor_array.cc
paddle/fluid/inference/api/details/reset_tensor_array.cc
+1
-1
paddle/fluid/inference/api/details/reset_tensor_array.h
paddle/fluid/inference/api/details/reset_tensor_array.h
+6
-3
paddle/fluid/inference/api/helper.h
paddle/fluid/inference/api/helper.h
+10
-0
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+12
-1
paddle/fluid/inference/api/paddle_pass_builder.h
paddle/fluid/inference/api/paddle_pass_builder.h
+7
-5
paddle/fluid/inference/tests/api/CMakeLists.txt
paddle/fluid/inference/tests/api/CMakeLists.txt
+4
-0
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
+1
-3
paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
+9
-30
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
+11
-27
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
...le/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
+15
-61
paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
...le/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
+117
-0
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+39
-20
paddle/fluid/inference/tests/test.cmake
paddle/fluid/inference/tests/test.cmake
+5
-3
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+1
-1
paddle/fluid/operators/clip_by_norm_op.h
paddle/fluid/operators/clip_by_norm_op.h
+1
-1
paddle/fluid/operators/controlflow/while_op.cc
paddle/fluid/operators/controlflow/while_op.cc
+3
-4
paddle/fluid/operators/conv_fusion_op.cu.cc
paddle/fluid/operators/conv_fusion_op.cu.cc
+3
-5
paddle/fluid/operators/cudnn_lstm_op.cu.cc
paddle/fluid/operators/cudnn_lstm_op.cu.cc
+4
-237
paddle/fluid/operators/cudnn_rnn_cache.h
paddle/fluid/operators/cudnn_rnn_cache.h
+255
-0
paddle/fluid/operators/cum_op.h
paddle/fluid/operators/cum_op.h
+2
-0
paddle/fluid/operators/detail/safe_ref.h
paddle/fluid/operators/detail/safe_ref.h
+1
-1
paddle/fluid/operators/distributed/proto_encoder_helper.h
paddle/fluid/operators/distributed/proto_encoder_helper.h
+3
-1
paddle/fluid/operators/distributed_ops/CMakeLists.txt
paddle/fluid/operators/distributed_ops/CMakeLists.txt
+1
-1
paddle/fluid/operators/distributed_ops/split_ids_op.h
paddle/fluid/operators/distributed_ops/split_ids_op.h
+1
-1
paddle/fluid/operators/elementwise/elementwise_mul_op.h
paddle/fluid/operators/elementwise/elementwise_mul_op.h
+1
-1
paddle/fluid/operators/lrn_mkldnn_op.cc
paddle/fluid/operators/lrn_mkldnn_op.cc
+4
-4
paddle/fluid/operators/optimizers/adadelta_op.h
paddle/fluid/operators/optimizers/adadelta_op.h
+4
-2
paddle/fluid/operators/optimizers/adagrad_op.h
paddle/fluid/operators/optimizers/adagrad_op.h
+2
-1
paddle/fluid/operators/optimizers/adam_op.h
paddle/fluid/operators/optimizers/adam_op.h
+2
-1
paddle/fluid/operators/optimizers/adamax_op.h
paddle/fluid/operators/optimizers/adamax_op.h
+4
-2
paddle/fluid/operators/optimizers/decayed_adagrad_op.h
paddle/fluid/operators/optimizers/decayed_adagrad_op.h
+4
-2
paddle/fluid/operators/optimizers/ftrl_op.h
paddle/fluid/operators/optimizers/ftrl_op.h
+4
-2
paddle/fluid/operators/optimizers/momentum_op.h
paddle/fluid/operators/optimizers/momentum_op.h
+1
-1
paddle/fluid/operators/optimizers/sgd_op.cu
paddle/fluid/operators/optimizers/sgd_op.cu
+2
-1
paddle/fluid/operators/split_lod_tensor_op.cc
paddle/fluid/operators/split_lod_tensor_op.cc
+1
-1
paddle/fluid/operators/sum_mkldnn_op.cc
paddle/fluid/operators/sum_mkldnn_op.cc
+1
-1
paddle/fluid/operators/sum_op.cc
paddle/fluid/operators/sum_op.cc
+1
-1
paddle/fluid/operators/sum_op.h
paddle/fluid/operators/sum_op.h
+1
-1
paddle/fluid/platform/enforce.h
paddle/fluid/platform/enforce.h
+60
-28
paddle/fluid/platform/enforce_test.cc
paddle/fluid/platform/enforce_test.cc
+19
-0
paddle/fluid/platform/float16_test.cc
paddle/fluid/platform/float16_test.cc
+1
-0
paddle/fluid/platform/float16_test.cu
paddle/fluid/platform/float16_test.cu
+1
-0
paddle/fluid/pybind/CMakeLists.txt
paddle/fluid/pybind/CMakeLists.txt
+1
-1
paddle/fluid/pybind/const_value.cc
paddle/fluid/pybind/const_value.cc
+0
-3
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+15
-2
paddle/fluid/string/printf.h
paddle/fluid/string/printf.h
+1
-1
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+10
-8
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+1
-1
python/paddle/fluid/executor.py
python/paddle/fluid/executor.py
+1
-1
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+0
-5
python/paddle/fluid/parallel_executor.py
python/paddle/fluid/parallel_executor.py
+2
-2
python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py
.../image_classification/test_image_classification_resnet.py
+7
-5
python/paddle/fluid/tests/unittests/test_dist_se_resnext.py
python/paddle/fluid/tests/unittests/test_dist_se_resnext.py
+15
-0
python/paddle/fluid/tests/unittests/test_operator_desc.py
python/paddle/fluid/tests/unittests/test_operator_desc.py
+1
-1
python/paddle/fluid/tests/unittests/test_py_func_op.py
python/paddle/fluid/tests/unittests/test_py_func_op.py
+3
-3
python/paddle/fluid/tests/unittests/test_weight_decay.py
python/paddle/fluid/tests/unittests/test_weight_decay.py
+188
-0
python/paddle/fluid/transpiler/inference_transpiler.py
python/paddle/fluid/transpiler/inference_transpiler.py
+1
-1
未找到文件。
Dockerfile
浏览文件 @
4743c9cd
...
...
@@ -94,52 +94,52 @@ RUN localedef -i en_US -f UTF-8 en_US.UTF-8
# specify sphinx version as 1.5.6 and remove -U option for [pip install -U
# sphinx-rtd-theme] since -U option will cause sphinx being updated to newest
# version(1.7.1 for now), which causes building documentation failed.
RUN
pip3
install
-U
wheel
&&
\
pip3
install
-U
docopt PyYAML
sphinx
==
1.5.6
&&
\
pip3
install
sphinx-rtd-theme
==
0.1.9 recommonmark
&&
\
pip3.6
install
-U
wheel
&&
\
pip3.6
install
-U
docopt PyYAML
sphinx
==
1.5.6
&&
\
pip3.6
install
sphinx-rtd-theme
==
0.1.9 recommonmark
&&
\
pip3.7
install
-U
wheel
&&
\
pip3.7
install
-U
docopt PyYAML
sphinx
==
1.5.6
&&
\
pip3.7
install
sphinx-rtd-theme
==
0.1.9 recommonmark
&&
\
RUN
pip3
--no-cache-dir
install
-U
wheel
&&
\
pip3
--no-cache-dir
install
-U
docopt PyYAML
sphinx
==
1.5.6
&&
\
pip3
--no-cache-dir
install
sphinx-rtd-theme
==
0.1.9 recommonmark
&&
\
pip3.6
--no-cache-dir
install
-U
wheel
&&
\
pip3.6
--no-cache-dir
install
-U
docopt PyYAML
sphinx
==
1.5.6
&&
\
pip3.6
--no-cache-dir
install
sphinx-rtd-theme
==
0.1.9 recommonmark
&&
\
pip3.7
--no-cache-dir
install
-U
wheel
&&
\
pip3.7
--no-cache-dir
install
-U
docopt PyYAML
sphinx
==
1.5.6
&&
\
pip3.7
--no-cache-dir
install
sphinx-rtd-theme
==
0.1.9 recommonmark
&&
\
easy_install
-U
pip
&&
\
pip
install
-U
pip setuptools wheel
&&
\
pip
install
-U
docopt PyYAML
sphinx
==
1.5.6
&&
\
pip
install
sphinx-rtd-theme
==
0.1.9 recommonmark
RUN
pip3
install
'pre-commit==1.10.4'
'ipython==5.3.0'
&&
\
pip3
install
'ipykernel==4.6.0'
'jupyter==1.0.0'
&&
\
pip3
install
opencv-python
&&
\
pip3.6
install
'pre-commit==1.10.4'
'ipython==5.3.0'
&&
\
pip3.6
install
'ipykernel==4.6.0'
'jupyter==1.0.0'
&&
\
pip3.6
install
opencv-python
&&
\
pip3.7
install
'pre-commit==1.10.4'
'ipython==5.3.0'
&&
\
pip3.7
install
'ipykernel==4.6.0'
'jupyter==1.0.0'
&&
\
pip3.7
install
opencv-python
&&
\
pip
install
'pre-commit==1.10.4'
'ipython==5.3.0'
&&
\
pip
install
'ipykernel==4.6.0'
'jupyter==1.0.0'
&&
\
pip
install
opencv-python
pip
--no-cache-dir
install
-U
pip setuptools wheel
&&
\
pip
--no-cache-dir
install
-U
docopt PyYAML
sphinx
==
1.5.6
&&
\
pip
--no-cache-dir
install
sphinx-rtd-theme
==
0.1.9 recommonmark
RUN
pip3
--no-cache-dir
install
'pre-commit==1.10.4'
'ipython==5.3.0'
&&
\
pip3
--no-cache-dir
install
'ipykernel==4.6.0'
'jupyter==1.0.0'
&&
\
pip3
--no-cache-dir
install
opencv-python
&&
\
pip3.6
--no-cache-dir
install
'pre-commit==1.10.4'
'ipython==5.3.0'
&&
\
pip3.6
--no-cache-dir
install
'ipykernel==4.6.0'
'jupyter==1.0.0'
&&
\
pip3.6
--no-cache-dir
install
opencv-python
&&
\
pip3.7
--no-cache-dir
install
'pre-commit==1.10.4'
'ipython==5.3.0'
&&
\
pip3.7
--no-cache-dir
install
'ipykernel==4.6.0'
'jupyter==1.0.0'
&&
\
pip3.7
--no-cache-dir
install
opencv-python
&&
\
pip
--no-cache-dir
install
'pre-commit==1.10.4'
'ipython==5.3.0'
&&
\
pip
--no-cache-dir
install
'ipykernel==4.6.0'
'jupyter==1.0.0'
&&
\
pip
--no-cache-dir
install
opencv-python
#For docstring checker
RUN
pip3
install
pylint pytest astroid isort
RUN
pip3.6
install
pylint pytest astroid isort
RUN
pip3.7
install
pylint pytest astroid isort
RUN
pip
install
pylint pytest astroid isort LinkChecker
RUN
pip3
--no-cache-dir
install
pylint pytest astroid isort
RUN
pip3.6
--no-cache-dir
install
pylint pytest astroid isort
RUN
pip3.7
--no-cache-dir
install
pylint pytest astroid isort
RUN
pip
--no-cache-dir
install
pylint pytest astroid isort LinkChecker
COPY
./python/requirements.txt /root/
RUN
pip3
install
-r
/root/requirements.txt
RUN
pip3.6
install
-r
/root/requirements.txt
RUN
pip3.7
install
-r
/root/requirements.txt
RUN
pip
install
-r
/root/requirements.txt
RUN
pip3
--no-cache-dir
install
-r
/root/requirements.txt
RUN
pip3.6
--no-cache-dir
install
-r
/root/requirements.txt
RUN
pip3.7
--no-cache-dir
install
-r
/root/requirements.txt
RUN
pip
--no-cache-dir
install
-r
/root/requirements.txt
# To fix https://github.com/PaddlePaddle/Paddle/issues/1954, we use
# the solution in https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl-py2
RUN
apt-get
install
-y
libssl-dev libffi-dev
RUN
pip3
install
certifi urllib3[secure]
RUN
pip3.6
install
certifi urllib3[secure]
RUN
pip3.7
install
certifi urllib3[secure]
RUN
pip
install
certifi urllib3[secure]
RUN
apt-get
install
-y
libssl-dev libffi-dev
&&
apt-get clean
-y
RUN
pip3
--no-cache-dir
install
certifi urllib3[secure]
RUN
pip3.6
--no-cache-dir
install
certifi urllib3[secure]
RUN
pip3.7
--no-cache-dir
install
certifi urllib3[secure]
RUN
pip
--no-cache-dir
install
certifi urllib3[secure]
# Install woboq_codebrowser to /woboq
...
...
cmake/external/mkldnn.cmake
浏览文件 @
4743c9cd
...
...
@@ -106,10 +106,10 @@ else(WIN32)
SET
(
MKLDNN_SHARED_LIB
${
MKLDNN_INSTALL_DIR
}
/libmkldnn.so.0
)
ADD_CUSTOM_COMMAND
(
OUTPUT
${
MKLDNN_SHARED_LIB
}
COMMAND
${
CMAKE_COMMAND
}
-E copy
${
MKLDNN_LIB
}
${
MKLDNN_SHARED_LIB
}
DEPENDS mkldnn
)
DEPENDS mkldnn
shared_mkldnn
)
endif
(
WIN32
)
ADD_CUSTOM_TARGET
(
mkldnn_shared_lib ALL DEPENDS
${
MKLDNN_SHARED_LIB
}
)
ADD_DEPENDENCIES
(
mkldnn_shared_lib
${
MKLDNN_PROJECT
}
mkldnn
)
IF
(
WITH_C_API
)
INSTALL
(
FILES
${
MKLDNN_SHARED_LIB
}
DESTINATION lib
)
ENDIF
()
...
...
cmake/inference_lib.cmake
浏览文件 @
4743c9cd
...
...
@@ -136,7 +136,7 @@ if (WITH_MKLDNN)
copy
(
mkldnn_lib
SRCS
${
MKLDNN_INC_DIR
}
${
MKLDNN_SHARED_LIB
}
DSTS
${
dst_dir
}
${
dst_dir
}
/lib
DEPS mkldnn
DEPS mkldnn
_shared_lib
)
endif
()
...
...
cmake/simd.cmake
浏览文件 @
4743c9cd
...
...
@@ -57,46 +57,43 @@ int main()
return 0;
}"
SSE3_FOUND
)
# disable AVX by default on windows
if
(
NOT WIN32
)
# Check AVX
set
(
CMAKE_REQUIRED_FLAGS
${
AVX_FLAG
}
)
set
(
AVX_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
__m256 a = _mm256_set_ps (-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f);
__m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
__m256 result = _mm256_add_ps (a, b);
return 0;
}"
AVX_FOUND
)
# Check AVX
set
(
CMAKE_REQUIRED_FLAGS
${
AVX_FLAG
}
)
set
(
AVX_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
__m256 a = _mm256_set_ps (-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f);
__m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
__m256 result = _mm256_add_ps (a, b);
return 0;
}"
AVX_FOUND
)
# Check AVX 2
set
(
CMAKE_REQUIRED_FLAGS
${
AVX2_FLAG
}
)
set
(
AVX2_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
__m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
__m256i result = _mm256_abs_epi32 (a);
return 0;
}"
AVX2_FOUND
)
# Check AVX 2
set
(
CMAKE_REQUIRED_FLAGS
${
AVX2_FLAG
}
)
set
(
AVX2_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
__m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
__m256i result = _mm256_abs_epi32 (a);
return 0;
}"
AVX2_FOUND
)
# Check AVX512F
set
(
CMAKE_REQUIRED_FLAGS
${
AVX512F_FLAG
}
)
set
(
AVX512F_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
__m512i a = _mm512_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4,
13, -5, 6, -7, 9, 2, -6, 3);
__m512i result = _mm512_abs_epi32 (a);
return 0;
}"
AVX512F_FOUND
)
endif
(
NOT WIN32
)
# Check AVX512F
set
(
CMAKE_REQUIRED_FLAGS
${
AVX512F_FLAG
}
)
set
(
AVX512F_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
__m512i a = _mm512_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4,
13, -5, 6, -7, 9, 2, -6, 3);
__m512i result = _mm512_abs_epi32 (a);
return 0;
}"
AVX512F_FOUND
)
set
(
CMAKE_REQUIRED_FLAGS
${
CMAKE_REQUIRED_FLAGS_RETAINED
}
)
mark_as_advanced
(
MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND AVX512F_FOUND
)
paddle/contrib/float16/float16_transpiler.py
浏览文件 @
4743c9cd
...
...
@@ -60,7 +60,7 @@ class Float16Transpiler:
raise
TypeError
(
"place should be as CPUPlace/CUDAPlace type"
)
if
scope
is
None
:
scope
=
global_scope
()
if
not
isinstance
(
scope
,
core
.
Scope
):
if
not
isinstance
(
scope
,
core
.
_
Scope
):
raise
TypeError
(
"scope should be as Scope type or None"
)
self
.
scope
=
scope
...
...
paddle/fluid/API.spec
浏览文件 @
4743c9cd
...
...
@@ -464,11 +464,7 @@ paddle.fluid.unique_name.switch ArgSpec(args=['new_generator'], varargs=None, ke
paddle.fluid.unique_name.guard ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
paddle.fluid.recordio_writer.convert_reader_to_recordio_file ArgSpec(args=['filename', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None))
paddle.fluid.recordio_writer.convert_reader_to_recordio_files ArgSpec(args=['filename', 'batch_per_file', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None))
paddle.fluid.Scope.__init__ __init__(self: paddle.fluid.core.Scope) -> None
paddle.fluid.Scope.drop_kids drop_kids(self: paddle.fluid.core.Scope) -> None
paddle.fluid.Scope.find_var find_var(self: paddle.fluid.core.Scope, arg0: unicode) -> paddle.fluid.core.Variable
paddle.fluid.Scope.new_scope new_scope(self: paddle.fluid.core.Scope) -> paddle.fluid.core.Scope
paddle.fluid.Scope.var var(self: paddle.fluid.core.Scope, arg0: unicode) -> paddle.fluid.core.Variable
paddle.fluid.Scope Scope() -> paddle.fluid.core._Scope
paddle.reader.map_readers ArgSpec(args=['func'], varargs='readers', keywords=None, defaults=None)
paddle.reader.buffered ArgSpec(args=['reader', 'size'], varargs=None, keywords=None, defaults=None)
paddle.reader.compose ArgSpec(args=[], varargs='readers', keywords='kwargs', defaults=None)
...
...
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
4743c9cd
...
...
@@ -7,27 +7,17 @@ function(windows_symbolic TARGET)
cmake_parse_arguments
(
windows_symbolic
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
set
(
final_path
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
windows_symbolic_PATH
}
)
foreach
(
src
${
windows_symbolic_SRCS
}
)
get_filename_component
(
src
${
src
}
NAME_WE
)
if
(
NOT EXISTS
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
src
}
.cc OR NOT EXISTS
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
src
}
.cu
)
message
(
FATAL
"
${
src
}
.cc and
${
src
}
.cu must exsits, and
${
src
}
.cu must be symbolic file."
)
endif
()
#only copy the xx.cu to.xx.cu when the content are modified
set
(
copy_flag 1
)
if
(
EXISTS
${
CMAKE_CURRENT_SOURCE_DIR
}
/.
${
src
}
.cu
)
file
(
READ
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
src
}
.cc SOURCE_STR
)
file
(
READ
${
CMAKE_CURRENT_SOURCE_DIR
}
/.
${
src
}
.cu TARGET_STR
)
if
(
SOURCE_STR STREQUAL TARGET_STR
)
set
(
copy_flag 0
)
endif
()
endif
()
if
(
copy_flag
)
add_custom_command
(
OUTPUT .
${
src
}
.cu
COMMAND
${
CMAKE_COMMAND
}
-E remove
${
CMAKE_CURRENT_SOURCE_DIR
}
/.
${
src
}
.cu
COMMAND
${
CMAKE_COMMAND
}
-E copy
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
src
}
.cc"
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/.
${
src
}
.cu"
COMMENT
"create hidden file of
${
src
}
.cu"
)
endif
(
copy_flag
)
add_custom_target
(
${
TARGET
}
ALL DEPENDS .
${
src
}
.cu
)
get_filename_component
(
src
${
src
}
NAME_WE
)
if
(
NOT EXISTS
${
final_path
}
/
${
src
}
.cc OR NOT EXISTS
${
final_path
}
/
${
src
}
.cu
)
message
(
FATAL
"
${
src
}
.cc and
${
src
}
.cu must exsits, and
${
src
}
.cu must be symbolic file."
)
endif
()
file
(
GENERATE OUTPUT
${
final_path
}
/.
${
src
}
.cu INPUT
${
final_path
}
/
${
src
}
.cc
)
add_custom_command
(
OUTPUT
${
final_path
}
/.
${
src
}
.cu
COMMAND
${
CMAKE_COMMAND
}
-E copy_if_different
"
${
final_path
}
/
${
src
}
.cc"
"
${
final_path
}
/.
${
src
}
.cu"
COMMENT
"create hidden file of
${
src
}
.cu"
)
add_custom_target
(
${
TARGET
}
ALL DEPENDS .
${
src
}
.cu
)
endforeach
()
endfunction
()
...
...
@@ -78,17 +68,23 @@ cc_library(garbage_collector SRCS garbage_collector.cc DEPS device_context memor
cc_library
(
reader SRCS reader.cc DEPS lod_tensor ddim
)
cc_test
(
reader_test SRCS reader_test.cc DEPS reader
)
cc_test
(
variable_test SRCS variable_test.cc
)
cc_library
(
threadpool SRCS threadpool.cc DEPS enforce
)
cc_test
(
threadpool_test SRCS threadpool_test.cc DEPS threadpool
)
cc_library
(
scope SRCS scope.cc DEPS glog threadpool
)
cc_library
(
var_type_traits SRCS var_type_traits DEPS lod_tensor selected_rows framework_proto
)
if
(
WITH_GPU
)
target_link_libraries
(
var_type_traits dynload_cuda
)
endif
()
cc_test
(
var_type_traits_test SRCS var_type_traits_test.cc DEPS var_type_traits
)
cc_library
(
scope SRCS scope.cc DEPS glog threadpool var_type_traits
)
cc_library
(
scope_pool SRCS scope_pool.cc DEPS scope
)
cc_test
(
scope_test SRCS scope_test.cc DEPS scope
)
cc_test
(
variable_test SRCS variable_test.cc DEPS tensor var_type_traits
)
cc_library
(
data_device_transform SRCS data_device_transform.cc DEPS tensor
)
nv_test
(
data_device_transform_test SRCS data_device_transform_test.cu
DEPS operator op_registry device_context math_function
)
DEPS operator op_registry device_context math_function
scope
)
if
(
WITH_GPU
)
if
(
WIN32
)
...
...
paddle/fluid/framework/data_device_transform_test.cu
浏览文件 @
4743c9cd
...
...
@@ -17,6 +17,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/device_context.h"
...
...
paddle/fluid/framework/details/eager_deletion_op_handle.cc
浏览文件 @
4743c9cd
...
...
@@ -88,7 +88,7 @@ void EagerDeletionOpHandle::RunImpl() {
}
}
else
{
PADDLE_THROW
(
"Type %s of %s is not supported eager deletion"
,
var
->
Type
().
name
(
),
name
);
framework
::
ToTypeName
(
var
->
Type
()
),
name
);
}
}
...
...
paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc
浏览文件 @
4743c9cd
...
...
@@ -120,6 +120,7 @@ FeedFetchList FastThreadedSSAGraphExecutor::Run(
ClearFetchOp
(
graph_
.
get
(),
&
fetch_ops
);
return
fetches
;
}
void
FastThreadedSSAGraphExecutor
::
RunOpAsync
(
std
::
unordered_map
<
OpHandleBase
*
,
std
::
atomic
<
int
>>
*
op_deps
,
OpHandleBase
*
op
,
...
...
paddle/fluid/framework/details/multi_devices_graph_pass.cc
浏览文件 @
4743c9cd
此差异已折叠。
点击以展开。
paddle/fluid/framework/details/multi_devices_graph_pass.h
浏览文件 @
4743c9cd
...
...
@@ -45,7 +45,7 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
#endif
int
GetVarDeviceID
(
const
ir
::
Graph
&
graph
,
const
std
::
string
&
varname
,
const
std
::
string
&
varname
,
const
std
::
unordered_map
<
std
::
string
,
int
>
&
sharded_var_device
)
const
;
bool
IsScaleLossOp
(
ir
::
Node
*
node
)
const
;
...
...
@@ -57,12 +57,6 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
ir
::
Graph
*
result
,
ir
::
Node
*
node
,
std
::
unordered_map
<
std
::
string
,
int
>
*
sharded_var_device
)
const
;
std
::
vector
<
std
::
string
>
FindDistTrainSendVars
(
const
std
::
vector
<
ir
::
Node
*>
&
nodes
)
const
;
std
::
vector
<
std
::
string
>
FindDistTrainRecvVars
(
const
std
::
vector
<
ir
::
Node
*>
&
nodes
)
const
;
void
CreateComputationalOps
(
ir
::
Graph
*
result
,
ir
::
Node
*
node
,
size_t
num_places
)
const
;
...
...
@@ -77,7 +71,7 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
int
dev_id
)
const
;
int
GetOpDeviceID
(
const
ir
::
Graph
&
graph
,
ir
::
Node
*
node
,
ir
::
Node
*
node
,
const
std
::
unordered_map
<
std
::
string
,
int
>
&
sharded_var_device
)
const
;
void
InsertAllReduceOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
og
)
const
;
...
...
@@ -100,6 +94,15 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
void
SetCommunicationContext
(
OpHandleBase
*
op_handle
,
const
platform
::
Place
&
p
)
const
;
std
::
vector
<
ir
::
Node
*>
SortForReduceMode
(
const
std
::
vector
<
ir
::
Node
*>
&
)
const
;
int
GetOpDeviceID
(
ir
::
Node
*
node
,
const
std
::
unordered_map
<
std
::
string
,
int
>
&
shared_var_device
,
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
ir
::
Node
*>>
*
delay_ops
)
const
;
mutable
std
::
string
loss_var_name_
;
mutable
std
::
vector
<
platform
::
Place
>
places_
;
mutable
std
::
vector
<
Scope
*>
local_scopes_
;
...
...
paddle/fluid/framework/details/variable_visitor.cc
浏览文件 @
4743c9cd
...
...
@@ -24,7 +24,7 @@ static void VisitVariable(Variable* var, Func* func) {
}
else
if
(
var
->
IsType
<
SelectedRows
>
())
{
(
*
func
)(
var
->
GetMutable
<
SelectedRows
>
());
}
else
{
PADDLE_THROW
(
"Not supported type %s"
,
var
->
Type
().
name
(
));
PADDLE_THROW
(
"Not supported type %s"
,
ToTypeName
(
var
->
Type
()
));
}
}
...
...
@@ -35,7 +35,7 @@ static void VisitVariable(const Variable& var, Func* func) {
}
else
if
(
var
.
IsType
<
SelectedRows
>
())
{
(
*
func
)(
var
.
Get
<
SelectedRows
>
());
}
else
{
PADDLE_THROW
(
"Not supported type %s"
,
var
.
Type
().
name
(
));
PADDLE_THROW
(
"Not supported type %s"
,
ToTypeName
(
var
.
Type
()
));
}
}
...
...
paddle/fluid/framework/executor.cc
浏览文件 @
4743c9cd
...
...
@@ -119,7 +119,7 @@ static void DeleteUnusedTensors(
}
}
else
{
PADDLE_THROW
(
"Type %s of %s is not supported eager deletion"
,
var
->
Type
().
name
(
),
name
);
framework
::
ToTypeName
(
var
->
Type
()
),
name
);
}
}
}
...
...
paddle/fluid/framework/ir/CMakeLists.txt
浏览文件 @
4743c9cd
...
...
@@ -45,6 +45,7 @@ pass_library(is_test_pass base)
pass_library
(
conv_elementwise_add_act_fuse_pass inference
)
pass_library
(
conv_elementwise_add2_act_fuse_pass inference
)
pass_library
(
conv_elementwise_add_fuse_pass inference
)
pass_library
(
conv_affine_channel_fuse_pass inference
)
if
(
WITH_MKLDNN
)
pass_library
(
mkldnn_placement_pass base
)
pass_library
(
depthwise_conv_mkldnn_pass base
)
...
...
paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc
0 → 100644
浏览文件 @
4743c9cd
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.h"
#include <functional>
#include <string>
#include <vector>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/operators/math/cpu_vec.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
#define GET_CONV_BN_NODES(pattern_name) \
/* OPERATORS */
\
GET_IR_NODE_FROM_SUBGRAPH(conv, conv, pattern_name); \
GET_IR_NODE_FROM_SUBGRAPH(affine_channel, affine_channel, pattern_name); \
/* CONV inputs */
\
GET_IR_NODE_FROM_SUBGRAPH(conv_weight, conv_weight, pattern_name); \
/* CONV outputs */
\
GET_IR_NODE_FROM_SUBGRAPH(conv_out, conv_out, pattern_name); \
/* Affine Channel inputs */
\
GET_IR_NODE_FROM_SUBGRAPH(ac_scale, ac_scale, pattern_name); \
GET_IR_NODE_FROM_SUBGRAPH(ac_bias, ac_bias, pattern_name); \
/* Affine channel outputs */
\
GET_IR_NODE_FROM_SUBGRAPH(ac_out, ac_out, pattern_name);
/* Out */
void
recompute_bias_and_weights
(
const
Scope
*
scope
,
ir
::
Node
*
conv_weight
,
const
ir
::
Node
&
ac_scale
,
const
LoDTensor
&
ac_bias_tensor
,
LoDTensor
*
eltwise_y_in_tensor
)
{
using
EigenVectorArrayMap
=
Eigen
::
Map
<
Eigen
::
Array
<
float
,
Eigen
::
Dynamic
,
1
>>
;
using
ConstEigenVectorArrayMap
=
Eigen
::
Map
<
const
Eigen
::
Array
<
float
,
Eigen
::
Dynamic
,
1
>>
;
using
EigenMatrixArrayMap
=
Eigen
::
Map
<
Eigen
::
Array
<
float
,
Eigen
::
Dynamic
,
Eigen
::
Dynamic
,
Eigen
::
RowMajor
>>
;
// Re-compute bias of conv2d from AffineChannel
PADDLE_ENFORCE_EQ
(
eltwise_y_in_tensor
->
dims
(),
ac_bias_tensor
.
dims
());
auto
*
scale_tensor
=
scope
->
FindVar
(
ac_scale
.
Name
())
->
GetMutable
<
LoDTensor
>
();
ConstEigenVectorArrayMap
scale_array
(
scale_tensor
->
data
<
float
>
(),
scale_tensor
->
numel
(),
1
);
ConstEigenVectorArrayMap
ac_bias_array
(
ac_bias_tensor
.
data
<
float
>
(),
ac_bias_tensor
.
numel
(),
1
);
EigenVectorArrayMap
eltwise_y_in_array
(
eltwise_y_in_tensor
->
mutable_data
<
float
>
(
platform
::
CPUPlace
()),
eltwise_y_in_tensor
->
numel
(),
1
);
eltwise_y_in_array
=
(
eltwise_y_in_array
*
scale_array
)
+
ac_bias_array
;
// Re-compute weight of conv2d from AffineChannel
auto
*
weights
=
scope
->
FindVar
(
conv_weight
->
Name
())
->
GetMutable
<
LoDTensor
>
();
auto
weights_shape
=
weights
->
dims
();
auto
weights_shape_2d
=
flatten_to_2d
(
weights_shape
,
1
);
EigenMatrixArrayMap
weights_array_2d
(
weights
->
mutable_data
<
float
>
(
platform
::
CPUPlace
()),
weights_shape_2d
[
0
],
weights_shape_2d
[
1
]);
weights_array_2d
.
colwise
()
*=
scale_array
;
}
std
::
unique_ptr
<
ir
::
Graph
>
ConvAffineChannelFusePass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
PADDLE_ENFORCE
(
graph
.
get
());
FusePassBase
::
Init
(
name_scope_
,
graph
.
get
());
auto
*
scope
=
param_scope
();
PADDLE_ENFORCE
(
scope
);
GraphPatternDetector
gpd
;
auto
*
conv_input
=
gpd
.
mutable_pattern
()
->
NewNode
(
patterns
::
PDNodeName
(
name_scope_
,
"conv_input"
))
->
AsInput
()
->
assert_is_op_input
(
"conv2d"
,
"Input"
);
patterns
::
ConvAffineChannel
conv_ac_pattern
(
gpd
.
mutable_pattern
(),
name_scope_
);
conv_ac_pattern
(
conv_input
,
false
/*with_eltwise_add*/
);
int
found_conv_ac_count
=
0
;
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
VLOG
(
4
)
<<
"handle ConvAffineChannel fuse"
;
GET_CONV_BN_NODES
(
conv_ac_pattern
);
// check if fuse can be done and if MKL-DNN should be used
FuseOptions
fuse_option
=
FindFuseOption
(
*
conv
,
*
affine_channel
);
if
(
fuse_option
==
DO_NOT_FUSE
)
{
VLOG
(
3
)
<<
"do not perform conv+affinechannel fuse"
;
return
;
}
// Create eltwise_y (conv bias) variable
VarDesc
eltwise_y_in_desc
(
patterns
::
PDNodeName
(
name_scope_
,
"eltwise_y_in"
));
eltwise_y_in_desc
.
SetPersistable
(
true
);
auto
*
eltwise_y_in_node
=
g
->
CreateVarNode
(
&
eltwise_y_in_desc
);
auto
*
eltwise_y_in_tensor
=
scope
->
Var
(
eltwise_y_in_node
->
Name
())
->
GetMutable
<
LoDTensor
>
();
// Get affine_channel bias
auto
*
ac_bias_tensor
=
scope
->
FindVar
(
ac_bias
->
Name
())
->
GetMutable
<
LoDTensor
>
();
// Initialize eltwise_y
eltwise_y_in_tensor
->
Resize
(
ac_bias_tensor
->
dims
());
std
::
fill_n
(
eltwise_y_in_tensor
->
mutable_data
<
float
>
(
platform
::
CPUPlace
()),
eltwise_y_in_tensor
->
numel
(),
0.0
f
);
// update weights and biases
recompute_bias_and_weights
(
scope
,
conv_weight
,
*
ac_scale
,
*
ac_bias_tensor
,
eltwise_y_in_tensor
);
// create an elementwise add node.
OpDesc
desc
;
desc
.
SetInput
(
"X"
,
std
::
vector
<
std
::
string
>
({
conv_out
->
Name
()}));
desc
.
SetInput
(
"Y"
,
std
::
vector
<
std
::
string
>
({
eltwise_y_in_node
->
Name
()}));
desc
.
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
ac_out
->
Name
()}));
desc
.
SetType
(
"elementwise_add"
);
desc
.
SetAttr
(
"axis"
,
1
);
auto
eltwise_op
=
g
->
CreateOpNode
(
&
desc
);
// OpDesc will be copied.
GraphSafeRemoveNodes
(
graph
.
get
(),
{
ac_scale
,
ac_bias
,
affine_channel
});
IR_NODE_LINK_TO
(
conv_out
,
eltwise_op
);
IR_NODE_LINK_TO
(
eltwise_y_in_node
,
eltwise_op
);
IR_NODE_LINK_TO
(
eltwise_op
,
ac_out
);
found_conv_ac_count
++
;
};
gpd
(
graph
.
get
(),
handler
);
AddStatis
(
found_conv_ac_count
);
return
graph
;
}
std
::
unique_ptr
<
ir
::
Graph
>
ConvEltwiseAddAffineChannelFusePass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
PADDLE_ENFORCE
(
graph
.
get
());
FusePassBase
::
Init
(
name_scope_
,
graph
.
get
());
auto
*
scope
=
param_scope
();
PADDLE_ENFORCE
(
scope
);
GraphPatternDetector
gpd
;
auto
*
conv_input
=
gpd
.
mutable_pattern
()
->
NewNode
(
patterns
::
PDNodeName
(
name_scope_
,
"conv_input"
))
->
AsInput
()
->
assert_is_op_input
(
"conv2d"
,
"Input"
);
patterns
::
ConvAffineChannel
conv_ac_pattern
(
gpd
.
mutable_pattern
(),
name_scope_
);
conv_ac_pattern
(
conv_input
,
true
/*with_eltwise_add*/
);
int
found_conv_ac_count
=
0
;
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
VLOG
(
4
)
<<
"handle ConvBN fuse"
;
GET_CONV_BN_NODES
(
conv_ac_pattern
);
// OPERATORS
GET_IR_NODE_FROM_SUBGRAPH
(
eltwise
,
eltwise
,
conv_ac_pattern
);
// BIAS inputs
GET_IR_NODE_FROM_SUBGRAPH
(
eltwise_y_in
,
eltwise_y_in
,
conv_ac_pattern
);
// BIAS outputs
GET_IR_NODE_FROM_SUBGRAPH
(
eltwise_out
,
eltwise_out
,
conv_ac_pattern
);
// Get eltwise_y (conv bias) variable
auto
*
eltwise_y_in_tensor
=
scope
->
FindVar
(
eltwise_y_in
->
Name
())
->
GetMutable
<
LoDTensor
>
();
// Get batch norm bias
auto
*
ac_bias_tensor
=
scope
->
FindVar
(
ac_bias
->
Name
())
->
GetMutable
<
LoDTensor
>
();
recompute_bias_and_weights
(
scope
,
conv_weight
,
*
ac_scale
,
*
ac_bias_tensor
,
eltwise_y_in_tensor
);
// Update the elementwise_add node
eltwise
->
Op
()
->
SetAttr
(
"axis"
,
1
);
eltwise
->
Op
()
->
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
ac_out
->
Name
()}));
GraphSafeRemoveNodes
(
graph
.
get
(),
{
ac_scale
,
ac_bias
,
affine_channel
,
eltwise_out
});
IR_NODE_LINK_TO
(
eltwise
,
ac_out
);
found_conv_ac_count
++
;
};
gpd
(
graph
.
get
(),
handler
);
AddStatis
(
found_conv_ac_count
);
return
graph
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
conv_affine_channel_fuse_pass
,
paddle
::
framework
::
ir
::
ConvAffineChannelFusePass
);
REGISTER_PASS
(
conv_eltwiseadd_affine_channel_fuse_pass
,
paddle
::
framework
::
ir
::
ConvEltwiseAddAffineChannelFusePass
);
paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.h
0 → 100644
浏览文件 @
4743c9cd
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
/*
* Fuse the Conv and ConvAffineChannel.
*/
class
ConvAffineChannelFusePass
:
public
FusePassBase
{
public:
virtual
~
ConvAffineChannelFusePass
()
{}
protected:
std
::
unique_ptr
<
ir
::
Graph
>
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
;
const
std
::
string
name_scope_
{
"conv_affine_channel_fuse"
};
};
class
ConvEltwiseAddAffineChannelFusePass
:
public
FusePassBase
{
public:
virtual
~
ConvEltwiseAddAffineChannelFusePass
()
{}
protected:
std
::
unique_ptr
<
ir
::
Graph
>
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
;
const
std
::
string
name_scope_
{
"conv_eltwiseadd_affine_channel_fuse"
};
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc
浏览文件 @
4743c9cd
...
...
@@ -40,18 +40,20 @@ framework::proto::OpDesc PrepareOpDesc(
const
std
::
string
&
output
)
{
auto
proto
=
base_desc
;
framework
::
OpDesc
desc
(
proto
,
nullptr
);
desc
.
SetType
(
"conv2d_fusion"
);
desc
.
SetInput
(
"Bias"
,
{
bias
});
desc
.
SetInput
(
"ResidualData"
,
{
bias1
});
desc
.
SetAttr
(
"activation"
,
activation
);
desc
.
SetOutput
(
"Output"
,
{
output
});
desc
.
SetAttr
(
"is_test"
,
true
);
desc
.
SetAttr
(
"use_cudnn"
,
false
);
desc
.
Flush
();
return
*
desc
.
Proto
();
}
std
::
unique_ptr
<
ir
::
Graph
>
ConvElementwiseAdd2ActFusePass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
const
std
::
string
pattern_name
=
"conv_elementwise_add_act_fuse"
;
const
std
::
string
pattern_name
=
"conv_elementwise_add
2
_act_fuse"
;
FusePassBase
::
Init
(
pattern_name
,
graph
.
get
());
GraphPatternDetector
gpd
;
...
...
@@ -76,22 +78,23 @@ std::unique_ptr<ir::Graph> ConvElementwiseAdd2ActFusePass::ApplyImpl(
framework
::
OpDesc
new_op_desc
(
new_op_proto
,
nullptr
);
// Create a new node for the fused op.
graph
->
CreateOpNode
(
&
new_op_desc
);
auto
*
new_conv_op
=
graph
->
CreateOpNode
(
&
new_op_desc
);
// Link inputs and outputs.
PADDLE_ENFORCE
(
subgraph
.
count
(
x
));
auto
*
conv_in_node
=
subgraph
.
at
(
x
);
IR_NODE_LINK_TO
(
conv_in_node
,
conv_op
);
// Input
IR_NODE_LINK_TO
(
conv_filter
,
conv_op
);
// Filter
IR_NODE_LINK_TO
(
conv_op
,
conv_out
);
// Output
IR_NODE_LINK_TO
(
elementwise_add_in_y
,
conv_op
);
// Bias
IR_NODE_LINK_TO
(
elementwise_add_in_y_1
,
conv_op
);
// Bias
IR_NODE_LINK_TO
(
conv_in_node
,
new_
conv_op
);
// Input
IR_NODE_LINK_TO
(
conv_filter
,
new_
conv_op
);
// Filter
IR_NODE_LINK_TO
(
elementwise_add_in_y
,
new_conv_op
);
// Bias
IR_NODE_LINK_TO
(
elementwise_add_in_y
_1
,
new_conv_op
);
// Bias
IR_NODE_LINK_TO
(
new_conv_op
,
act_out
);
// Output
// Delete the unneeded nodes.
GraphSafeRemoveNodes
(
graph
.
get
(),
{
conv_op
,
elementwise_add_op
,
elementwise_add_op_1
,
elementwise_add_out
});
GraphSafeRemoveNodes
(
graph
.
get
(),
{
conv_op
,
conv_out
,
elementwise_add_op
,
elementwise_add_op_1
,
elementwise_add_out
,
elementwise_add_out_1
,
act_op
});
};
gpd
(
graph
.
get
(),
handler
);
return
graph
;
...
...
paddle/fluid/framework/ir/graph.cc
浏览文件 @
4743c9cd
...
...
@@ -23,66 +23,8 @@ limitations under the License. */
namespace
paddle
{
namespace
framework
{
namespace
ir
{
namespace
{
void
CheckProgram
(
const
ProgramDesc
&
program
)
{
#define _INT(role) static_cast<int>(role)
std
::
map
<
int
,
bool
>
visit
;
for
(
OpDesc
*
op
:
program
.
Block
(
0
).
AllOps
())
{
// For backward compatibility, some program doesn't have role added.
if
(
!
op
->
HasAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
()))
continue
;
int
role_id
=
boost
::
get
<
int
>
(
op
->
GetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
()));
visit
[
role_id
]
=
true
;
switch
(
role_id
)
{
case
_INT
(
OpRole
::
kForward
):
if
(
visit
.
find
(
_INT
(
OpRole
::
kBackward
))
!=
visit
.
end
())
{
LOG
(
ERROR
)
<<
"Cannot add backward operator before forward operator "
<<
op
->
Type
();
}
break
;
case
_INT
(
OpRole
::
kBackward
):
case
_INT
(
OpRole
::
kBackward
)
|
_INT
(
OpRole
::
kLoss
):
PADDLE_ENFORCE
(
visit
.
find
(
_INT
(
OpRole
::
kOptimize
))
==
visit
.
end
(),
"Cannot add backward operator %s after optimize operator."
,
op
->
Type
());
break
;
case
_INT
(
OpRole
::
kForward
)
|
_INT
(
OpRole
::
kLoss
):
PADDLE_ENFORCE
(
visit
.
find
(
_INT
(
OpRole
::
kBackward
)
|
_INT
(
OpRole
::
kLoss
))
==
visit
.
end
(),
"Cannot add backward|loss operator before "
"forward|loss operator %s."
,
op
->
Type
());
PADDLE_ENFORCE
(
visit
.
find
(
_INT
(
OpRole
::
kOptimize
))
==
visit
.
end
(),
"Cannot add forward|loss operator %s after optimize operator."
,
op
->
Type
());
break
;
case
_INT
(
OpRole
::
kOptimize
):
case
_INT
(
OpRole
::
kOptimize
)
|
_INT
(
OpRole
::
kLRSched
):
PADDLE_ENFORCE
(
visit
.
find
(
_INT
(
OpRole
::
kBackward
))
!=
visit
.
end
(),
"Optimize operators %s must follow backward operator."
,
op
->
Type
());
break
;
case
_INT
(
OpRole
::
kLRSched
):
case
_INT
(
OpRole
::
kDist
):
case
_INT
(
OpRole
::
kRPC
):
case
_INT
(
OpRole
::
kNotSpecified
):
break
;
default:
LOG
(
FATAL
)
<<
"Unknown operator role. Don't add new role because "
"you don't know what you are doing."
;
}
}
#undef _INT
}
}
// namespace
Graph
::
Graph
(
const
ProgramDesc
&
program
)
:
program_
(
program
)
{
CheckProgram
(
program_
);
auto
var_nodes
=
InitFromProgram
(
program_
);
ResolveHazard
(
var_nodes
);
}
...
...
paddle/fluid/framework/ir/graph_pattern_detector.cc
浏览文件 @
4743c9cd
...
...
@@ -1101,9 +1101,7 @@ PDNode *patterns::ElementwiseAdd::operator()(PDNode *x_var, PDNode *y_var) {
return
out_var
;
}
std
::
unordered_set
<
std
::
string
>
conv_act_set
({
"identity"
,
"sigmoid"
,
"relu"
,
"relu6"
,
"relux"
,
"tanh"
,
"band_pass"
});
std
::
unordered_set
<
std
::
string
>
conv_act_set
({
"identity"
,
"relu"
});
PDNode
*
patterns
::
ConvElementwiseaddAct
::
operator
()(
PDNode
*
conv_in
)
{
conv_in
->
AsInput
();
...
...
@@ -1169,13 +1167,13 @@ PDNode *patterns::ConvElementwiseadd2Act::operator()(PDNode *conv_in) {
->
AsInput
();
auto
elementwise_add_out
=
pattern
->
NewNode
(
elementwise_add_out_repr
())
->
assert_is_op_output
(
"elementwise_add"
)
->
assert_is_op_input
(
"elementwise_add"
,
"
X
"
)
->
assert_is_op_input
(
"elementwise_add"
,
"
Y
"
)
->
AsIntermediate
();
auto
elementwise_add_op_1
=
pattern
->
NewNode
(
elementwise_add_op_1_repr
())
->
assert_is_op
(
"elementwise_add"
);
auto
elementwise_add_in_y_1
=
pattern
->
NewNode
(
elementwise_add_in_y_1_repr
())
->
assert_is_op_input
(
"elementwise_add"
,
"
Y
"
)
->
assert_is_op_input
(
"elementwise_add"
,
"
X
"
)
->
AsInput
();
auto
elementwise_add_out_1
=
pattern
->
NewNode
(
elementwise_add_out_1_repr
())
->
assert_is_op_output
(
"elementwise_add"
)
...
...
@@ -1203,8 +1201,8 @@ PDNode *patterns::ConvElementwiseadd2Act::operator()(PDNode *conv_in) {
conv_op
->
LinksFrom
({
conv_in
,
conv_filter
}).
LinksTo
({
conv_out
});
elementwise_add_op
->
LinksFrom
({
conv_out
,
elementwise_add_in_y
})
.
LinksTo
({
elementwise_add_out
});
elementwise_add_op_1
->
LinksFrom
(
{
elementwise_add_out
,
elementwise_add_in_y
_1
});
elementwise_add_op_1
->
LinksFrom
(
{
elementwise_add_out
,
elementwise_add_in_y_1
})
.
LinksTo
({
elementwise_add_out
_1
});
act_op
->
LinksFrom
({
elementwise_add_out_1
}).
LinksTo
({
act_out
});
return
act_out
;
}
...
...
@@ -1236,6 +1234,78 @@ PDNode *patterns::ConvElementwiseadd::operator()(PDNode *conv_in) {
return
elementwise_add_out
;
}
PDNode
*
patterns
::
ConvAffineChannel
::
operator
()(
paddle
::
framework
::
ir
::
PDNode
*
conv_input
,
bool
with_eltwise_add
)
{
// Create Operators
conv_input
->
assert_is_op_input
(
"conv2d"
,
"Input"
);
auto
*
conv_op
=
pattern
->
NewNode
(
conv_repr
())
->
assert_is_op
(
"conv2d"
);
PDNode
*
eltwise_op
=
nullptr
;
if
(
with_eltwise_add
)
{
eltwise_op
=
pattern
->
NewNode
(
eltwise_repr
())
->
assert_is_op
(
"elementwise_add"
);
}
auto
*
affine_channel_op
=
pattern
->
NewNode
(
affine_channel_repr
())
->
assert_is_op
(
"affine_channel"
);
// Create variables
// Conv Filter
auto
*
conv_weight_var
=
pattern
->
NewNode
(
conv_weight_repr
())
->
AsInput
()
->
assert_is_persistable_var
()
->
assert_is_op_input
(
"conv2d"
,
"Filter"
);
auto
*
conv_out_var
=
pattern
->
NewNode
(
conv_out_repr
())
->
AsIntermediate
()
->
assert_is_only_output_of_op
(
"conv2d"
);
PDNode
*
eltwise_y_in_var
=
nullptr
;
PDNode
*
eltwise_out_var
=
nullptr
;
if
(
with_eltwise_add
)
{
// Conv output as Bias input
conv_out_var
->
assert_is_op_input
(
"elementwise_add"
,
"X"
);
// Bias
eltwise_y_in_var
=
pattern
->
NewNode
(
eltwise_y_in_repr
())
->
assert_is_op_input
(
"elementwise_add"
,
"Y"
)
->
AsInput
();
eltwise_out_var
=
pattern
->
NewNode
(
eltwise_out_repr
())
->
AsIntermediate
()
->
assert_is_only_output_of_op
(
"elementwise_add"
);
}
else
{
// Conv output as AffineChannel input
conv_out_var
->
assert_is_op_input
(
"affine_channel"
,
"X"
);
}
// AC Scale
auto
*
ac_scale_var
=
pattern
->
NewNode
(
ac_scale_repr
())
->
AsInput
()
->
assert_is_persistable_var
()
->
assert_is_op_input
(
"affine_channel"
,
"Scale"
);
// AC Bias
auto
*
ac_bias_var
=
pattern
->
NewNode
(
ac_bias_repr
())
->
AsInput
()
->
assert_is_persistable_var
()
->
assert_is_op_input
(
"affine_channel"
,
"Bias"
);
// AC output
auto
*
ac_out_var
=
pattern
->
NewNode
(
ac_out_repr
())
->
AsOutput
()
->
assert_is_op_output
(
"affine_channel"
);
conv_op
->
LinksFrom
({
conv_input
,
conv_weight_var
}).
LinksTo
({
conv_out_var
});
if
(
with_eltwise_add
)
{
eltwise_op
->
LinksFrom
({
conv_out_var
,
eltwise_y_in_var
})
.
LinksTo
({
eltwise_out_var
});
affine_channel_op
->
LinksFrom
({
eltwise_out_var
,
ac_scale_var
,
ac_bias_var
})
.
LinksTo
({
ac_out_var
});
}
else
{
affine_channel_op
->
LinksFrom
({
conv_out_var
,
ac_scale_var
,
ac_bias_var
})
.
LinksTo
({
ac_out_var
});
}
return
ac_out_var
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/graph_pattern_detector.h
浏览文件 @
4743c9cd
...
...
@@ -734,6 +734,38 @@ struct ConvElementwiseadd : public PatternBase {
PATTERN_DECL_NODE
(
elementwise_add_out
);
};
// Conv with affine_channel
// op: conv + (elementwise_add +) affine_channel
// named nodes:
// conv_weight, conv_out, conv,
// ac_x, ac_scale, ac_bias
// affine_channel, ac_out
struct
ConvAffineChannel
:
public
PatternBase
{
ConvAffineChannel
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
)
:
PatternBase
(
pattern
,
name_scope
,
"conv_affine_channel"
)
{}
PDNode
*
operator
()(
PDNode
*
conv_input
,
bool
with_eltwise_add
);
// declare operator node's name
PATTERN_DECL_NODE
(
conv
);
PATTERN_DECL_NODE
(
affine_channel
);
PATTERN_DECL_NODE
(
eltwise
);
// ELEMENTWISE_ADD
// CONV inputs
PATTERN_DECL_NODE
(
conv_weight
);
// Filter
// CONV outputs
PATTERN_DECL_NODE
(
conv_out
);
// tmp
// ELTWISE inputs
PATTERN_DECL_NODE
(
eltwise_y_in
);
// ELTWISE outputs
PATTERN_DECL_NODE
(
eltwise_out
);
// tmp
// AC(Affine_Channel) inputs
PATTERN_DECL_NODE
(
ac_scale
);
PATTERN_DECL_NODE
(
ac_bias
);
// AC outputs
PATTERN_DECL_NODE
(
ac_out
);
// Out
};
}
// namespace patterns
// Link two ir::Nodes from each other.
...
...
paddle/fluid/framework/mixed_vector.h
浏览文件 @
4743c9cd
...
...
@@ -215,8 +215,8 @@ class Vector {
auto
stream
=
dev_ctx
->
stream
();
void
*
src
=
gpu_
->
ptr
();
void
*
dst
=
cpu_
.
data
();
memory
::
Copy
(
platform
::
CPUPlace
(),
dst
,
CUDAPlace
().
get
(),
src
,
gpu_
->
size
(),
stream
);
paddle
::
memory
::
Copy
(
platform
::
CPUPlace
(),
dst
,
CUDAPlace
().
get
(),
src
,
gpu_
->
size
(),
stream
);
dev_ctx
->
Wait
();
}
...
...
@@ -261,8 +261,8 @@ class Vector {
auto
*
dev_ctx
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
));
auto
stream
=
dev_ctx
->
stream
();
memory
::
Copy
(
CUDAPlace
().
get
(),
dst
,
platform
::
CPUPlace
(),
src
,
gpu_
->
size
(),
stream
);
paddle
::
memory
::
Copy
(
CUDAPlace
().
get
(),
dst
,
platform
::
CPUPlace
(),
src
,
gpu_
->
size
(),
stream
);
}
void
ImmutableCPU
()
const
{
...
...
@@ -284,7 +284,7 @@ class Vector {
bool
IsInCPU
()
const
{
return
flag_
&
kDataInCPU
;
}
mutable
std
::
vector
<
T
>
cpu_
;
mutable
memory
::
AllocationPtr
gpu_
;
mutable
paddle
::
memory
::
AllocationPtr
gpu_
;
mutable
int
flag_
;
mutable
std
::
mutex
mtx_
;
...
...
paddle/fluid/framework/op_proto_maker.cc
浏览文件 @
4743c9cd
...
...
@@ -82,10 +82,6 @@ void OpProtoAndCheckerMaker::operator()(proto::OpProto* proto,
AddAttr
<
std
::
string
>
(
OpNamescopeAttrName
(),
"Operator name with namesope."
)
.
SetDefault
(
""
);
AddAttr
<
std
::
vector
<
std
::
string
>>
(
OpCreationCallstackAttrName
(),
"Callstack for Op Creatation."
)
.
SetDefault
({});
Validate
();
}
...
...
paddle/fluid/framework/op_proto_maker.h
浏览文件 @
4743c9cd
...
...
@@ -47,7 +47,6 @@ class OpProtoAndCheckerMaker {
static
const
char
*
OpRoleAttrName
()
{
return
"op_role"
;
}
static
const
char
*
OpRoleVarAttrName
()
{
return
"op_role_var"
;
}
static
const
char
*
OpNamescopeAttrName
()
{
return
"op_namescope"
;
}
static
const
char
*
OpCreationCallstackAttrName
()
{
return
"op_callstack"
;
}
void
operator
()(
proto
::
OpProto
*
proto
,
OpAttrChecker
*
attr_checker
);
...
...
paddle/fluid/framework/op_registry.h
浏览文件 @
4743c9cd
...
...
@@ -23,7 +23,8 @@ limitations under the License. */
#include <unordered_map>
#include <unordered_set>
#include "glog/logging.h" // For VLOG()
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#include "glog/logging.h" // For VLOG()
#include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/details/op_registry.h"
#include "paddle/fluid/framework/framework.pb.h"
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
4743c9cd
...
...
@@ -16,15 +16,10 @@ limitations under the License. */
#include <glog/logging.h>
#include <algorithm>
#include <sstream>
#include <string>
#include <vector>
#include "gflags/gflags.h"
#include "glog/logging.h"
#include "paddle/fluid/framework/data_transform.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_proto_maker.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/shape_inference.h"
#include "paddle/fluid/framework/transfer_scope_cache.h"
...
...
@@ -162,67 +157,31 @@ RuntimeContext::RuntimeContext(const VariableNameMap& innames,
}
void
OperatorBase
::
Run
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
{
try
{
if
(
VLOG_IS_ON
(
4
))
{
VLOG
(
4
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
}
if
(
platform
::
is_gpu_place
(
place
))
{
VLOG
(
4
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
if
(
platform
::
is_gpu_place
(
place
))
{
#ifndef PADDLE_WITH_CUDA
PADDLE_THROW
(
"Cannot run operator on place %s"
,
place
);
PADDLE_THROW
(
"Cannot run operator on place %s"
,
place
);
#else
auto
dev_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
place
).
device
;
platform
::
SetDeviceId
(
dev_id
);
auto
dev_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
place
).
device
;
platform
::
SetDeviceId
(
dev_id
);
#endif
}
// The profile has a process-wide mutex, results in serious performance
// issue
// in concurrency scenerio. Here use an `if` to fix this issue.
// Please not remove the `if`, ask @Superjomn if there are any concern.
if
(
platform
::
IsProfileEnabled
())
{
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
RecordEvent
record_event
(
Type
(),
pool
.
Get
(
place
));
RunImpl
(
scope
,
place
);
}
else
{
RunImpl
(
scope
,
place
);
}
if
(
VLOG_IS_ON
(
3
))
{
VLOG
(
3
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
}
}
catch
(
platform
::
EnforceNotMet
exception
)
{
if
(
Attrs
().
count
(
"sub_block"
)
!=
0
)
{
throw
exception
;
}
auto
&
callstack
=
Attr
<
std
::
vector
<
std
::
string
>>
(
OpProtoAndCheckerMaker
::
OpCreationCallstackAttrName
());
}
if
(
callstack
.
empty
())
{
throw
exception
;
}
std
::
ostringstream
sout
;
sout
<<
"Invoke operator "
<<
Type
()
<<
" error.
\n
"
;
sout
<<
"Python Callstacks:
\n
"
;
for
(
auto
&
line
:
callstack
)
{
sout
<<
line
;
}
sout
<<
"C++ Callstacks:
\n
"
;
sout
<<
exception
.
err_str_
;
exception
.
err_str_
=
sout
.
str
();
throw
exception
;
}
catch
(...)
{
std
::
rethrow_exception
(
std
::
current_exception
());
// The profile has a process-wide mutex, results in serious performance issue
// in concurrency scenerio. Here use an `if` to fix this issue.
// Please not remove the `if`, ask @Superjomn if there are any concern.
if
(
platform
::
IsProfileEnabled
())
{
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
RecordEvent
record_event
(
Type
(),
pool
.
Get
(
place
));
RunImpl
(
scope
,
place
);
}
else
{
RunImpl
(
scope
,
place
);
}
VLOG
(
3
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
}
bool
OperatorBase
::
HasInputs
(
const
std
::
string
&
name
)
const
{
if
(
inputs_
.
find
(
name
)
!=
inputs_
.
end
())
{
return
true
;
}
else
{
return
false
;
}
return
inputs_
.
find
(
name
)
!=
inputs_
.
end
();
}
std
::
string
OperatorBase
::
Input
(
const
std
::
string
&
name
)
const
{
...
...
@@ -421,7 +380,7 @@ const Tensor* GetLoDTensorOrSelectedRowsValueFromVar(const Variable& var) {
return
&
(
var
.
Get
<
SelectedRows
>
().
value
());
}
else
{
PADDLE_THROW
(
"Variable type_id %s, expect LoDTensor/SelectedRows."
,
var
.
Type
().
name
(
));
ToTypeName
(
var
.
Type
()
));
}
}
...
...
@@ -432,7 +391,7 @@ Tensor* GetMutableLoDTensorOrSelectedRowsValueFromVar(Variable* var) {
return
var
->
GetMutable
<
SelectedRows
>
()
->
mutable_value
();
}
else
{
PADDLE_THROW
(
"Variable type_id %s, expect LoDTensor/SelectedRows."
,
var
->
Type
().
name
(
));
ToTypeName
(
var
->
Type
()
));
}
}
...
...
@@ -526,7 +485,7 @@ const std::vector<const Tensor*> ExecutionContext::MultiInput<Tensor>(
PADDLE_ENFORCE
(
var
->
IsType
<
LoDTensor
>
(),
"should be LoDTensor, but the received type is %s"
,
var
->
Type
().
name
(
));
ToTypeName
(
var
->
Type
()
));
return
&
(
var
->
Get
<
LoDTensor
>
());
});
return
res
;
...
...
@@ -545,7 +504,7 @@ const std::vector<const Tensor*> ExecutionContext::LegacyMultiInput<Tensor>(
PADDLE_ENFORCE
(
var
->
IsType
<
LoDTensor
>
(),
"%s should be LoDTensor, but the received type is %s"
,
sub_name
,
var
->
Type
().
name
(
));
sub_name
,
ToTypeName
(
var
->
Type
()
));
return
&
(
var
->
Get
<
LoDTensor
>
());
});
return
res
;
...
...
@@ -574,7 +533,7 @@ std::vector<Tensor*> ExecutionContext::MultiOutput<Tensor>(
PADDLE_ENFORCE
(
var
->
IsType
<
LoDTensor
>
(),
"%s should be LoDTensor, but the received type is %s"
,
sub_name
,
var
->
Type
().
name
(
));
sub_name
,
ToTypeName
(
var
->
Type
()
));
return
var
->
GetMutable
<
LoDTensor
>
();
});
return
res
;
...
...
@@ -816,7 +775,7 @@ class RuntimeInferShapeContext : public InferShapeContext {
PADDLE_THROW
(
"Only LoDTensor/SelectedRows support 'GetDim', but Variables "
"type_id is %s."
,
var
->
Type
().
name
(
));
ToTypeName
(
var
->
Type
()
));
}
}
...
...
@@ -839,7 +798,7 @@ class RuntimeInferShapeContext : public InferShapeContext {
var
->
GetMutable
<
SelectedRows
>
()
->
set_height
(
dim
[
0
]);
}
else
{
PADDLE_THROW
(
"Variable type_id %s, expect LoDTensor/SelectedRows."
,
var
->
Type
().
name
(
));
ToTypeName
(
var
->
Type
()
));
}
}
...
...
paddle/fluid/framework/operator.h
浏览文件 @
4743c9cd
...
...
@@ -49,6 +49,8 @@ constexpr char kTempVarName[] = "@TEMP@";
/// e.g. Variable "x@GRAD" is the gradient of varibale "x".
constexpr
char
kGradVarSuffix
[]
=
"@GRAD"
;
constexpr
size_t
kGradVarSuffixSize
=
5U
;
/// Variables with this suffix are supposed to be filled up with zeros.
constexpr
char
kZeroVarSuffix
[]
=
"@ZERO"
;
...
...
@@ -60,7 +62,11 @@ constexpr char kNewGradSuffix[] = "@NEWGRAD@";
extern
std
::
vector
<
std
::
tuple
<
platform
::
Place
,
LibraryType
>>
kKernelPriority
;
inline
std
::
string
GradVarName
(
const
std
::
string
&
var_name
)
{
return
var_name
+
kGradVarSuffix
;
std
::
string
result
;
result
.
reserve
(
var_name
.
size
()
+
kGradVarSuffixSize
);
result
+=
var_name
;
result
+=
kGradVarSuffix
;
return
result
;
}
proto
::
VarType
::
Type
GetDataTypeOfVar
(
const
Variable
*
var
);
...
...
@@ -110,8 +116,8 @@ class OperatorBase {
bool
HasAttr
(
const
std
::
string
&
name
)
const
{
return
attrs_
.
count
(
name
);
}
template
<
typename
T
>
inline
const
T
&
Attr
(
const
std
::
string
&
name
)
const
{
PADDLE_ENFORCE
(
attrs_
.
count
(
name
)
!=
0
,
"%s should be in AttributeMap"
,
name
);
PADDLE_ENFORCE
(
attrs_
.
find
(
name
)
!=
attrs_
.
end
()
,
"%s should be in AttributeMap"
,
name
);
return
boost
::
get
<
T
>
(
attrs_
.
at
(
name
));
}
const
AttributeMap
&
Attrs
()
const
{
return
attrs_
;
}
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
4743c9cd
...
...
@@ -367,6 +367,7 @@ void ParallelExecutor::BCastParamsToDevices(
if
(
paddle
::
platform
::
is_gpu_place
(
main_tensor
.
place
()))
{
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
std
::
vector
<
void
*>
buffers
;
buffers
.
reserve
(
member_
->
places_
.
size
());
size_t
numel
=
main_tensor
.
numel
();
ncclDataType_t
data_type
=
platform
::
ToNCCLDataType
(
main_tensor
.
type
());
for
(
size_t
i
=
0
;
i
<
member_
->
places_
.
size
();
++
i
)
{
...
...
@@ -400,9 +401,7 @@ void ParallelExecutor::BCastParamsToDevices(
#endif
}
else
{
platform
::
CPUPlace
cpu
;
for
(
size_t
i
=
0
;
i
<
member_
->
places_
.
size
();
++
i
)
{
if
(
i
==
0
)
continue
;
for
(
size_t
i
=
1
;
i
<
member_
->
places_
.
size
();
++
i
)
{
auto
local_scope
=
member_
->
local_scopes_
[
i
];
auto
*
t
=
local_scope
->
Var
(
var
)
->
GetMutable
<
LoDTensor
>
();
...
...
paddle/fluid/framework/scope.cc
浏览文件 @
4743c9cd
...
...
@@ -165,11 +165,9 @@ std::string Scope::Rename(const std::string& origin_name) const {
Variable
*
Scope
::
VarInternal
(
const
std
::
string
&
name
)
{
auto
*
v
=
FindVarLocally
(
name
);
if
(
v
!=
nullptr
)
return
v
;
v
=
new
Variable
();
vars_
[
name
].
reset
(
v
);
vars_
.
emplace
(
name
,
std
::
unique_ptr
<
Variable
>
(
v
)
);
VLOG
(
3
)
<<
"Create variable "
<<
name
;
v
->
name_
=
&
(
vars_
.
find
(
name
)
->
first
);
return
v
;
}
...
...
paddle/fluid/framework/scope_pool.cc
0 → 100644
浏览文件 @
4743c9cd
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/scope_pool.h"
#include "paddle/fluid/framework/threadpool.h"
namespace
paddle
{
namespace
framework
{
ScopePool
&
ScopePool
::
Instance
()
{
// NOLINT
static
ScopePool
pool
;
return
pool
;
}
void
ScopePool
::
DeleteScope
(
Scope
*
scope
)
{
delete
scope
;
}
void
ScopePool
::
Insert
(
std
::
unique_ptr
<
Scope
>
&&
s
)
{
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx_
);
scopes_
.
insert
(
s
.
release
());
}
void
ScopePool
::
Remove
(
Scope
*
s
)
{
size_t
has_scope
;
{
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx_
);
has_scope
=
scopes_
.
erase
(
s
);
}
PADDLE_ENFORCE
(
has_scope
>
0
,
"Delete non-existing global scope"
);
DeleteScope
(
s
);
}
ScopePool
::~
ScopePool
()
{
Clear
();
}
void
ScopePool
::
Clear
()
{
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx_
);
for
(
auto
*
s
:
scopes_
)
{
DeleteScope
(
s
);
}
scopes_
.
clear
();
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/scope_pool.h
0 → 100644
浏览文件 @
4743c9cd
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <mutex> // NOLINT
#include <unordered_set>
#include "paddle/fluid/framework/scope.h"
namespace
paddle
{
namespace
framework
{
class
ScopePool
{
public:
static
ScopePool
&
Instance
();
// NOLINT
void
Insert
(
std
::
unique_ptr
<
Scope
>
&&
s
);
void
Remove
(
Scope
*
s
);
void
Clear
();
~
ScopePool
();
private:
ScopePool
()
=
default
;
static
void
DeleteScope
(
Scope
*
scope
);
std
::
unordered_set
<
Scope
*>
scopes_
;
std
::
mutex
mtx_
;
};
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/var_type.h
浏览文件 @
4743c9cd
...
...
@@ -19,52 +19,50 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/var_type_traits.h"
#include "paddle/fluid/framework/variable.h"
namespace
paddle
{
namespace
framework
{
template
<
typename
T
>
inline
bool
IsType
(
const
std
::
type_index
&
type
_index
)
{
return
type
_index
==
std
::
type_index
(
typeid
(
T
)
);
inline
bool
IsType
(
const
std
::
type_index
&
type
)
{
return
type
==
typeid
(
T
);
}
inline
proto
::
VarType
::
Type
ToVarType
(
std
::
type_index
type
)
{
if
(
IsType
<
LoDTensor
>
(
type
))
{
return
proto
::
VarType_Type_LOD_TENSOR
;
}
else
if
(
IsType
<
LoDRankTable
>
(
type
))
{
return
proto
::
VarType_Type_LOD_RANK_TABLE
;
}
else
if
(
IsType
<
LoDTensorArray
>
(
type
))
{
return
proto
::
VarType_Type_LOD_TENSOR_ARRAY
;
}
else
if
(
IsType
<
SelectedRows
>
(
type
))
{
return
proto
::
VarType_Type_SELECTED_ROWS
;
}
else
if
(
IsType
<
ReaderHolder
>
(
type
))
{
return
proto
::
VarType_Type_READER
;
}
else
{
PADDLE_THROW
(
"ToVarType:Unsupported type %s"
,
type
.
name
());
inline
proto
::
VarType
::
Type
ToVarType
(
int
type
)
{
switch
(
type
)
{
case
proto
::
VarType
::
LOD_TENSOR
:
case
proto
::
VarType
::
SELECTED_ROWS
:
case
proto
::
VarType
::
LOD_RANK_TABLE
:
case
proto
::
VarType
::
LOD_TENSOR_ARRAY
:
case
proto
::
VarType
::
READER
:
return
static_cast
<
proto
::
VarType
::
Type
>
(
type
);
default:
PADDLE_THROW
(
"ToVarType:Unsupported type %d"
,
type
);
}
}
template
<
typename
Visitor
>
inline
void
VisitVarType
(
const
framework
::
Variable
&
var
,
Visitor
visitor
)
{
switch
(
ToVarType
(
var
.
Type
()
))
{
case
proto
::
VarType
_Type_
LOD_TENSOR
:
switch
(
var
.
Type
(
))
{
case
proto
::
VarType
::
LOD_TENSOR
:
visitor
(
var
.
Get
<
LoDTensor
>
());
return
;
case
proto
::
VarType
_Type_
LOD_RANK_TABLE
:
case
proto
::
VarType
::
LOD_RANK_TABLE
:
visitor
(
var
.
Get
<
LoDRankTable
>
());
return
;
case
proto
::
VarType
_Type_
LOD_TENSOR_ARRAY
:
case
proto
::
VarType
::
LOD_TENSOR_ARRAY
:
visitor
(
var
.
Get
<
LoDTensorArray
>
());
return
;
case
proto
::
VarType
_Type_
SELECTED_ROWS
:
case
proto
::
VarType
::
SELECTED_ROWS
:
visitor
(
var
.
Get
<
SelectedRows
>
());
return
;
case
proto
::
VarType
_Type_
READER
:
case
proto
::
VarType
::
READER
:
visitor
(
var
.
Get
<
ReaderHolder
>
());
return
;
default:
PADDLE_THROW
(
"Not supported visit type, %
d"
,
ToVarTyp
e
(
var
.
Type
()));
PADDLE_THROW
(
"Not supported visit type, %
s"
,
ToTypeNam
e
(
var
.
Type
()));
}
}
...
...
paddle/fluid/framework/var_type_inference_test.cc
浏览文件 @
4743c9cd
...
...
@@ -108,7 +108,7 @@ TEST(InferVarType, sum_op_without_infer_var_type) {
op
->
InferVarType
(
prog
.
MutableBlock
(
0
));
ASSERT_EQ
(
proto
::
VarType
_Type_
LOD_TENSOR
,
ASSERT_EQ
(
proto
::
VarType
::
LOD_TENSOR
,
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_out"
)
->
GetType
());
}
...
...
paddle/fluid/framework/var_type_traits.cc
0 → 100644
浏览文件 @
4743c9cd
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/var_type_traits.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h"
#include "paddle/fluid/platform/macros.h"
#ifdef PADDLE_WITH_CUDA
#ifndef _WIN32
#include "paddle/fluid/operators/nccl/nccl_gpu_common.h"
#endif
#include <cudnn.h>
#include "paddle/fluid/operators/conv_cudnn_op_cache.h"
#include "paddle/fluid/operators/cudnn_rnn_cache.h"
#endif
namespace
paddle
{
namespace
framework
{
// Besides registering variable type id, it is helpful to register a
// var_id -> std::type_index map (for example, get type names according to id)
namespace
detail
{
template
<
int
kStart
,
int
kEnd
,
bool
kStop
>
struct
VarIdToTypeIndexMapInitializerImpl
{
template
<
typename
MapType1
,
typename
MapType2
>
static
void
Init
(
MapType1
*
id_to_type
,
MapType2
*
type_to_id
)
{
using
Type
=
typename
std
::
tuple_element
<
kStart
,
VarTypeRegistry
::
ArgTuple
>::
type
;
static_assert
(
!
std
::
is_same
<
Type
,
void
>::
value
,
"Type cannot be void"
);
constexpr
int
kId
=
VarTypeTrait
<
Type
>::
kId
;
auto
type
=
std
::
type_index
(
typeid
(
Type
));
PADDLE_ENFORCE
(
id_to_type
->
count
(
kId
)
==
0
,
"Registered duplicate type id %d for type %s"
,
kId
,
type
.
name
());
PADDLE_ENFORCE
(
type_to_id
->
count
(
type
)
==
0
,
"Registered duplicate type_index %s for id %d"
,
type
.
name
(),
kId
);
id_to_type
->
emplace
(
kId
,
type
);
type_to_id
->
emplace
(
type
,
kId
);
VarIdToTypeIndexMapInitializerImpl
<
kStart
+
1
,
kEnd
,
kStart
+
1
==
kEnd
>::
Init
(
id_to_type
,
type_to_id
);
}
};
template
<
int
kStart
,
int
kEnd
>
struct
VarIdToTypeIndexMapInitializerImpl
<
kStart
,
kEnd
,
true
>
{
template
<
typename
MapType1
,
typename
MapType2
>
static
void
Init
(
MapType1
*
,
MapType2
*
)
{}
};
// VarIdToTypeIndexMapInitializer is designed to initialize var_id ->
// std::type_index map and std::type_index -> var_id map
using
VarIdToTypeIndexMapInitializer
=
VarIdToTypeIndexMapInitializerImpl
<
0
,
VarTypeRegistry
::
kRegisteredTypeNum
,
VarTypeRegistry
::
kRegisteredTypeNum
==
0
>
;
struct
VarIdToTypeIndexMapHolder
{
DISABLE_COPY_AND_ASSIGN
(
VarIdToTypeIndexMapHolder
);
public:
static
const
std
::
type_index
&
ToTypeIndex
(
int
var_id
)
{
auto
it
=
Instance
().
id_to_type_map_
.
find
(
var_id
);
PADDLE_ENFORCE
(
it
!=
Instance
().
id_to_type_map_
.
end
(),
"VarId %d is not registered."
,
var_id
);
return
it
->
second
;
}
static
int
ToTypeId
(
const
std
::
type_index
&
type
)
{
auto
it
=
Instance
().
type_to_id_map_
.
find
(
type
);
PADDLE_ENFORCE
(
it
!=
Instance
().
type_to_id_map_
.
end
(),
"VarType %s is not registered."
,
type
.
name
());
return
it
->
second
;
}
private:
VarIdToTypeIndexMapHolder
()
{
VarIdToTypeIndexMapInitializer
::
Init
(
&
id_to_type_map_
,
&
type_to_id_map_
);
}
static
const
VarIdToTypeIndexMapHolder
&
Instance
()
{
static
const
VarIdToTypeIndexMapHolder
instance
;
return
instance
;
}
std
::
unordered_map
<
int
,
std
::
type_index
>
id_to_type_map_
;
std
::
unordered_map
<
std
::
type_index
,
int
>
type_to_id_map_
;
};
}
// namespace detail
const
std
::
type_index
&
ToTypeIndex
(
int
var_id
)
{
return
detail
::
VarIdToTypeIndexMapHolder
::
ToTypeIndex
(
var_id
);
}
const
char
*
ToTypeName
(
int
var_id
)
{
return
ToTypeIndex
(
var_id
).
name
();
}
int
ToTypeId
(
const
std
::
type_index
&
type
)
{
return
detail
::
VarIdToTypeIndexMapHolder
::
ToTypeId
(
type
);
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/var_type_traits.h
0 → 100644
浏览文件 @
4743c9cd
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include <tuple>
#include <typeindex>
#include <vector>
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/platform/place.h"
#ifdef PADDLE_WITH_CUDA
#include <cudnn.h>
#ifndef _WIN32
#include <nccl.h>
#endif
#endif
// Users should add forward declarations here
namespace
paddle
{
namespace
platform
{
#ifdef PADDLE_WITH_CUDA
#ifndef _WIN32
class
Communicator
;
#endif
#endif
}
// namespace platform
namespace
framework
{
class
Tensor
;
class
LoDTensor
;
class
SelectedRows
;
class
LoDRankTable
;
class
ReaderHolder
;
class
Scope
;
}
// namespace framework
namespace
operators
{
template
<
typename
T
>
class
AlgorithmsCache
;
class
CudnnRNNCache
;
namespace
reader
{
class
LoDTensorBlockingQueueHolder
;
}
// namespace reader
}
// namespace operators
}
// namespace paddle
namespace
paddle
{
namespace
framework
{
const
char
*
ToTypeName
(
int
var_id
);
const
std
::
type_index
&
ToTypeIndex
(
int
var_id
);
int
ToTypeId
(
const
std
::
type_index
&
type
);
namespace
detail
{
template
<
bool
kStop
,
int
kStart
,
int
kEnd
,
typename
T1
,
typename
T2
,
typename
...
Args
>
struct
TypePosFinderImpl
{
static
constexpr
int
kPos
=
std
::
is_same
<
T1
,
T2
>::
value
?
kStart
:
TypePosFinderImpl
<
kStart
+
2
==
kEnd
,
kStart
+
1
,
kEnd
,
T1
,
Args
...
>::
kPos
;
};
template
<
int
kStart
,
int
kEnd
,
typename
T1
,
typename
T2
>
struct
TypePosFinderImpl
<
true
,
kStart
,
kEnd
,
T1
,
T2
>
{
static
constexpr
int
kPos
=
std
::
is_same
<
T1
,
T2
>::
value
?
kStart
:
-
1
;
};
// TypePosFinder helps to find the position in which T is inside Args...
// If T is not inside Args..., kPos would be -1
template
<
typename
T
,
typename
...
Args
>
struct
TypePosFinder
{
static
constexpr
int
kPos
=
TypePosFinderImpl
<
sizeof
...(
Args
)
==
1
,
0
,
sizeof
...(
Args
),
T
,
Args
...
>::
kPos
;
};
template
<
typename
...
Args
>
struct
VarTypeRegistryImpl
{
static
constexpr
size_t
kRegisteredTypeNum
=
sizeof
...(
Args
);
using
ArgTuple
=
std
::
tuple
<
Args
...
>
;
// TypePos() returns the position in which T is inside Args...
// If T is not inside Args..., return -1
template
<
typename
T
>
static
constexpr
int
TypePos
()
{
return
TypePosFinder
<
T
,
Args
...
>::
kPos
;
}
// IsRegistered() returns whether T is registered inside RegistryImpl
template
<
typename
T
>
static
constexpr
bool
IsRegistered
()
{
return
TypePos
<
T
>
()
>=
0
;
}
};
}
// namespace detail
#define REG_PROTO_VAR_TYPE_TRAIT(type, proto_id) \
template <> \
struct VarTypeTrait<type> { \
static_assert(VarTypeRegistry::IsRegistered<type>(), \
"Must be registered type"); \
using Type = type; \
static constexpr int kId = static_cast<int>(proto_id); \
}
/**
* The following codes are designed to register variable types.
* Only registered types can be stored in Variable.
* This registry mechanism is designed to speed up Variable.
*
* Caution: If you want to add more var types, please consider carefully
* whether you really need to add it.
*/
// Users should add other variable types below.
// Paddle would generate unique Ids for each registered variable types.
using
VarTypeRegistry
=
detail
::
VarTypeRegistryImpl
<
Tensor
,
LoDTensor
,
SelectedRows
,
std
::
vector
<
Scope
*>
,
LoDRankTable
,
LoDTensorArray
,
platform
::
PlaceList
,
ReaderHolder
,
std
::
string
,
Scope
*
,
std
::
map
<
size_t
,
Tensor
>
,
operators
::
reader
::
LoDTensorBlockingQueueHolder
,
#ifdef PADDLE_WITH_CUDA
#ifndef _WIN32
ncclUniqueId
,
platform
::
Communicator
,
#endif
operators
::
AlgorithmsCache
<
cudnnConvolutionFwdAlgo_t
>
,
operators
::
AlgorithmsCache
<
cudnnConvolutionBwdDataAlgo_t
>
,
operators
::
AlgorithmsCache
<
cudnnConvolutionBwdFilterAlgo_t
>
,
operators
::
CudnnRNNCache
,
#endif
int
,
float
>
;
template
<
typename
T
>
struct
VarTypeTrait
{
static_assert
(
VarTypeRegistry
::
IsRegistered
<
T
>
(),
"Must be registered type"
);
using
Type
=
T
;
/**
* Unique VarType Id generation.
*
* The auto-generated id should not be the same as any protobuf id defined in
* framework.proto. Therefore, we generate id by adding the type pos and
* maximum protobuf id (i.e., proto::VarType::TUPLE).
*
* However, we may need more protobuf id in the future.
* To avoid changing this auto id generation algorithm frequently, we
* generate id by adding the type pos and twice of maximum protobuf id (i.e.,
* proto::VarType::TUPLE).
*/
static
constexpr
int
kId
=
VarTypeRegistry
::
TypePos
<
T
>
()
+
static_cast
<
int
>
(
proto
::
VarType
::
TUPLE
)
*
2
;
};
// Users should set some of variable type ids to be what is defined in
// framework.proto below
REG_PROTO_VAR_TYPE_TRAIT
(
LoDTensor
,
proto
::
VarType
::
LOD_TENSOR
);
REG_PROTO_VAR_TYPE_TRAIT
(
SelectedRows
,
proto
::
VarType
::
SELECTED_ROWS
);
REG_PROTO_VAR_TYPE_TRAIT
(
std
::
vector
<
Scope
*>
,
proto
::
VarType
::
STEP_SCOPES
);
REG_PROTO_VAR_TYPE_TRAIT
(
LoDRankTable
,
proto
::
VarType
::
LOD_RANK_TABLE
);
REG_PROTO_VAR_TYPE_TRAIT
(
LoDTensorArray
,
proto
::
VarType
::
LOD_TENSOR_ARRAY
);
REG_PROTO_VAR_TYPE_TRAIT
(
platform
::
PlaceList
,
proto
::
VarType
::
PLACE_LIST
);
REG_PROTO_VAR_TYPE_TRAIT
(
ReaderHolder
,
proto
::
VarType
::
READER
);
REG_PROTO_VAR_TYPE_TRAIT
(
int
,
proto
::
VarType
::
INT32
);
REG_PROTO_VAR_TYPE_TRAIT
(
float
,
proto
::
VarType
::
FP32
);
/** End of variable type registration */
template
<
typename
T
>
inline
constexpr
bool
IsRegisteredVarType
()
{
return
VarTypeRegistry
::
IsRegistered
<
T
>
();
}
#undef REG_PROTO_VAR_TYPE_TRAIT
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/var_type_traits_test.cc
0 → 100644
浏览文件 @
4743c9cd
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include <cstdint>
#include <iostream>
#include <unordered_set>
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/var_type_traits.h"
#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h"
#ifdef PADDLE_WITH_CUDA
#ifndef _WIN32
#include "paddle/fluid/operators/nccl/nccl_gpu_common.h"
#endif
#include "paddle/fluid/operators/conv_cudnn_op_cache.h"
#include "paddle/fluid/operators/cudnn_rnn_cache.h"
#endif
namespace
paddle
{
namespace
framework
{
template
<
int
kPos
,
int
kEnd
,
bool
kStop
>
struct
TypeIndexChecker
{
template
<
typename
SetType1
,
typename
SetType2
>
static
void
Check
(
SetType1
*
var_id_set
,
SetType2
*
type_index_set
)
{
using
Type
=
typename
std
::
tuple_element
<
kPos
,
VarTypeRegistry
::
ArgTuple
>::
type
;
static_assert
(
std
::
is_same
<
typename
VarTypeTrait
<
Type
>::
Type
,
Type
>::
value
,
"Type must be the same"
);
constexpr
auto
kId
=
VarTypeTrait
<
Type
>::
kId
;
std
::
type_index
actual_type
(
typeid
(
Type
));
EXPECT_EQ
(
std
::
string
(
ToTypeName
(
kId
)),
std
::
string
(
actual_type
.
name
()));
EXPECT_EQ
(
ToTypeIndex
(
kId
),
actual_type
);
EXPECT_EQ
(
ToTypeId
(
actual_type
),
kId
);
EXPECT_EQ
(
ToTypeIndex
(
ToTypeId
(
actual_type
)),
actual_type
);
EXPECT_EQ
(
ToTypeId
(
ToTypeIndex
(
kId
)),
kId
);
EXPECT_TRUE
(
var_id_set
->
count
(
kId
)
==
0
);
// NOLINT
EXPECT_TRUE
(
type_index_set
->
count
(
actual_type
)
==
0
);
// NOLINT
var_id_set
->
insert
(
kId
);
type_index_set
->
insert
(
std
::
type_index
(
typeid
(
Type
)));
TypeIndexChecker
<
kPos
+
1
,
kEnd
,
kPos
+
1
==
kEnd
>::
Check
(
var_id_set
,
type_index_set
);
}
};
template
<
int
kPos
,
int
kEnd
>
struct
TypeIndexChecker
<
kPos
,
kEnd
,
true
>
{
template
<
typename
SetType1
,
typename
SetType2
>
static
void
Check
(
SetType1
*
,
SetType2
*
)
{}
};
TEST
(
var_type_traits
,
check_no_duplicate_registry
)
{
constexpr
size_t
kRegisteredNum
=
VarTypeRegistry
::
kRegisteredTypeNum
;
std
::
unordered_set
<
int
>
var_id_set
;
std
::
unordered_set
<
std
::
type_index
>
type_index_set
;
TypeIndexChecker
<
0
,
kRegisteredNum
,
kRegisteredNum
==
0
>::
Check
(
&
var_id_set
,
&
type_index_set
);
}
template
<
typename
T
>
bool
CheckVarId
(
int
proto_id
)
{
static_assert
(
std
::
is_same
<
typename
VarTypeTrait
<
T
>::
Type
,
T
>::
value
,
"Type must be the same"
);
return
VarTypeTrait
<
T
>::
kId
==
proto_id
;
}
TEST
(
var_type_traits
,
check_proto_type_id
)
{
ASSERT_TRUE
(
CheckVarId
<
LoDTensor
>
(
proto
::
VarType
::
LOD_TENSOR
));
ASSERT_TRUE
(
CheckVarId
<
SelectedRows
>
(
proto
::
VarType
::
SELECTED_ROWS
));
ASSERT_TRUE
(
CheckVarId
<
std
::
vector
<
Scope
*>>
(
proto
::
VarType
::
STEP_SCOPES
));
ASSERT_TRUE
(
CheckVarId
<
LoDRankTable
>
(
proto
::
VarType
::
LOD_RANK_TABLE
));
ASSERT_TRUE
(
CheckVarId
<
LoDTensorArray
>
(
proto
::
VarType
::
LOD_TENSOR_ARRAY
));
ASSERT_TRUE
(
CheckVarId
<
platform
::
PlaceList
>
(
proto
::
VarType
::
PLACE_LIST
));
ASSERT_TRUE
(
CheckVarId
<
ReaderHolder
>
(
proto
::
VarType
::
READER
));
ASSERT_TRUE
(
CheckVarId
<
int
>
(
proto
::
VarType
::
INT32
));
ASSERT_TRUE
(
CheckVarId
<
float
>
(
proto
::
VarType
::
FP32
));
ASSERT_EQ
(
proto
::
VarType_Type_LOD_TENSOR
,
proto
::
VarType
::
LOD_TENSOR
);
ASSERT_EQ
(
proto
::
VarType_Type_SELECTED_ROWS
,
proto
::
VarType
::
SELECTED_ROWS
);
ASSERT_EQ
(
proto
::
VarType_Type_STEP_SCOPES
,
proto
::
VarType
::
STEP_SCOPES
);
ASSERT_EQ
(
proto
::
VarType_Type_LOD_RANK_TABLE
,
proto
::
VarType
::
LOD_RANK_TABLE
);
ASSERT_EQ
(
proto
::
VarType_Type_LOD_TENSOR_ARRAY
,
proto
::
VarType
::
LOD_TENSOR_ARRAY
);
ASSERT_EQ
(
proto
::
VarType_Type_PLACE_LIST
,
proto
::
VarType
::
PLACE_LIST
);
ASSERT_EQ
(
proto
::
VarType_Type_READER
,
proto
::
VarType
::
READER
);
ASSERT_EQ
(
proto
::
VarType_Type_FEED_MINIBATCH
,
proto
::
VarType
::
FEED_MINIBATCH
);
ASSERT_EQ
(
proto
::
VarType_Type_FETCH_LIST
,
proto
::
VarType
::
FETCH_LIST
);
ASSERT_EQ
(
proto
::
VarType_Type_RAW
,
proto
::
VarType
::
RAW
);
ASSERT_EQ
(
proto
::
VarType_Type_TUPLE
,
proto
::
VarType
::
TUPLE
);
ASSERT_EQ
(
proto
::
VarType_Type_INT32
,
proto
::
VarType
::
INT32
);
ASSERT_EQ
(
proto
::
VarType_Type_FP32
,
proto
::
VarType
::
FP32
);
}
TEST
(
var_type_traits
,
test_registry
)
{
using
Registry
=
detail
::
VarTypeRegistryImpl
<
int8_t
,
int32_t
,
size_t
,
double
>
;
ASSERT_TRUE
(
Registry
::
TypePos
<
int8_t
>
()
==
0
);
ASSERT_TRUE
(
Registry
::
TypePos
<
int32_t
>
()
==
1
);
ASSERT_TRUE
(
Registry
::
TypePos
<
size_t
>
()
==
2
);
ASSERT_TRUE
(
Registry
::
TypePos
<
double
>
()
==
3
);
ASSERT_TRUE
(
Registry
::
TypePos
<
float
>
()
==
-
1
);
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/variable.h
浏览文件 @
4743c9cd
...
...
@@ -18,7 +18,7 @@
#include <typeindex>
#include <typeinfo>
#include "paddle/fluid/
platform/enforce
.h"
#include "paddle/fluid/
framework/var_type_traits
.h"
namespace
paddle
{
namespace
framework
{
...
...
@@ -27,10 +27,14 @@ class Variable {
public:
template
<
typename
T
>
const
T
&
Get
()
const
{
static_assert
(
IsRegisteredVarType
<
T
>
(),
"Not registered type. Please register T inside var_type_traits.h"
);
PADDLE_ENFORCE
(
holder_
!=
nullptr
,
"Variable must hold some thing"
);
PADDLE_ENFORCE
(
IsType
<
T
>
()
,
PADDLE_ENFORCE
(
holder_
->
Type
()
==
VarTypeTrait
<
T
>::
kId
,
"Variable must be type %s, the holding type is %s"
,
typeid
(
T
).
name
(),
holder_
->
Type
().
name
());
ToTypeName
(
VarTypeTrait
<
T
>::
kId
),
ToTypeName
(
holder_
->
Type
()));
return
*
static_cast
<
const
T
*>
(
holder_
->
Ptr
());
}
...
...
@@ -39,61 +43,61 @@ class Variable {
template
<
typename
T
>
T
*
GetMutable
()
{
if
(
!
holder_
)
{
holder_
.
reset
(
new
PlaceholderImpl
<
T
>
(
new
T
()
));
holder_
.
reset
(
new
PlaceholderImpl
<
T
>
());
}
else
{
PADDLE_ENFORCE
(
IsType
<
T
>
()
,
PADDLE_ENFORCE
(
holder_
->
Type
()
==
VarTypeTrait
<
T
>::
kId
,
"Variable must be type %s, the holding type is %s"
,
typeid
(
T
).
name
(),
holder_
->
Type
().
name
());
ToTypeName
(
VarTypeTrait
<
T
>::
kId
),
ToTypeName
(
holder_
->
Type
()));
}
return
static_cast
<
T
*>
(
holder_
->
Ptr
());
}
template
<
typename
T
>
bool
IsType
()
const
{
return
holder_
!=
nullptr
&&
std
::
type_index
(
typeid
(
T
))
==
std
::
type_index
(
holder_
->
Type
());
return
holder_
&&
holder_
->
Type
()
==
VarTypeTrait
<
T
>::
kId
;
}
void
Clear
()
{
holder_
.
reset
();
}
std
::
type_index
Type
()
const
{
int
Type
()
const
{
PADDLE_ENFORCE
(
holder_
!=
nullptr
,
"Must hold memory"
);
return
holder_
->
Type
();
}
private:
struct
Placeholder
{
virtual
~
Placeholder
()
{}
virtual
const
std
::
type_info
&
Type
()
const
=
0
;
virtual
void
*
Ptr
()
const
=
0
;
virtual
~
Placeholder
()
=
default
;
inline
int
Type
()
const
{
return
type_
;
}
inline
const
void
*
Ptr
()
const
{
return
ptr_
;
}
inline
void
*
Ptr
()
{
return
ptr_
;
}
protected:
inline
void
Init
(
void
*
p
,
int
type
)
{
ptr_
=
p
;
type_
=
type
;
}
void
*
ptr_
;
int
type_
;
};
// Placeholder hides type T, so it doesn't appear as a template
// parameter of Variable.
template
<
typename
T
>
struct
PlaceholderImpl
:
public
Placeholder
{
explicit
PlaceholderImpl
(
T
*
ptr
)
:
ptr_
(
ptr
),
type_
(
typeid
(
T
))
{}
virtual
const
std
::
type_info
&
Type
()
const
{
return
type_
;
}
virtual
void
*
Ptr
()
const
{
return
static_cast
<
void
*>
(
ptr_
.
get
()
);
}
static_assert
(
IsRegisteredVarType
<
T
>
(),
"Not registered type. Please register T inside var_type_traits.h"
);
PlaceholderImpl
()
{
this
->
Init
(
&
obj_
,
VarTypeTrait
<
T
>::
kId
);
}
std
::
unique_ptr
<
T
>
ptr_
;
const
std
::
type_info
&
type
_
;
private:
T
obj
_
;
};
std
::
unique_ptr
<
Placeholder
>
holder_
;
// pointers to a PlaceholderImpl object indeed.
// name_ is only meaningful with a Scope and accessible by it.
//
// NOTE: Please don't expose name_ by adding methods like
// Variable::Name or Scope::VarName! A variable could have a human
// readable name or an auto-generated scope-unique name. In the
// former case, the caller knows the name and doesn't need to access
// the name; in the latter case, the variable should be identified
// by its address but not the unreadable name.
friend
class
Scope
;
const
std
::
string
*
name_
;
// pointers to a PlaceholderImpl object indeed.
std
::
unique_ptr
<
Placeholder
>
holder_
;
};
}
// namespace framework
...
...
paddle/fluid/framework/variable_test.cc
浏览文件 @
4743c9cd
...
...
@@ -16,27 +16,28 @@
#include <string>
#include "gtest/gtest.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/variable.h"
TEST
(
Variable
,
GetMutable
)
{
using
paddle
::
framework
::
Variable
;
struct
Tensor
{
int
content_
;
};
namespace
paddle
{
namespace
framework
{
TEST
(
Variable
,
GetMutable
)
{
std
::
unique_ptr
<
Variable
>
v
(
new
Variable
());
Tensor
*
t
=
v
->
GetMutable
<
Tensor
>
();
t
->
content_
=
1234
;
auto
*
t
=
v
->
GetMutable
<
std
::
string
>
();
*
t
=
"1234"
;
const
Tensor
&
tt
=
v
->
Get
<
Tensor
>
();
EXPECT_EQ
(
1234
,
tt
.
content_
);
const
auto
&
tt
=
v
->
Get
<
std
::
string
>
();
EXPECT_EQ
(
"1234"
,
tt
);
try
{
v
->
GetMutable
<
std
::
string
>
();
v
->
GetMutable
<
Tensor
>
();
}
catch
(
std
::
exception
&
e
)
{
return
;
}
EXPECT_TRUE
(
false
);
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/inference/analysis/analyzer_tester.cc
浏览文件 @
4743c9cd
...
...
@@ -69,17 +69,17 @@ void TestWord2vecPrediction(const std::string& model_path) {
std
::
vector
<
PaddleTensor
>
outputs
;
CHECK
(
predictor
->
Run
(
slots
,
&
outputs
));
PADDLE_ENFORCE
(
outputs
.
size
(),
1UL
);
PADDLE_ENFORCE
_EQ
(
outputs
.
size
(),
1UL
);
// Check the output buffer size and result of each tid.
PADDLE_ENFORCE
(
outputs
.
front
().
data
.
length
(),
33168UL
);
PADDLE_ENFORCE
_EQ
(
outputs
.
front
().
data
.
length
(),
33168UL
);
float
result
[
5
]
=
{
0.00129761
,
0.00151112
,
0.000423564
,
0.00108815
,
0.000932706
};
const
size_t
num_elements
=
outputs
.
front
().
data
.
length
()
/
sizeof
(
float
);
// The outputs' buffers are in CPU memory.
for
(
size_t
i
=
0
;
i
<
std
::
min
(
static_cast
<
size_t
>
(
5UL
),
num_elements
);
i
++
)
{
LOG
(
INFO
)
<<
"data: "
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())
[
i
];
LOG
(
INFO
)
<<
"data: "
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())[
i
]
<<
" result: "
<<
result
[
i
];
PADDLE_ENFORCE
(
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())[
i
],
result
[
i
]);
}
...
...
paddle/fluid/inference/analysis/argument.h
浏览文件 @
4743c9cd
...
...
@@ -127,6 +127,7 @@ struct Argument {
std
::
function
<
bool
(
const
framework
::
ir
::
Node
*
)
>
);
DECL_ARGUMENT_FIELD
(
tensorrt_max_batch_size
,
TensorRtMaxBatchSize
,
int
);
DECL_ARGUMENT_FIELD
(
tensorrt_workspace_size
,
TensorRtWorkspaceSize
,
int
);
DECL_ARGUMENT_FIELD
(
tensorrt_min_subgraph_size
,
TensorRtMinSubgraphSize
,
int
);
// The program transformed by IR analysis phase.
DECL_ARGUMENT_UNIQUE_FIELD
(
ir_analyzed_program
,
IrAnalyzedProgram
,
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
4743c9cd
...
...
@@ -75,6 +75,8 @@ void IRPassManager::CreatePasses(Argument *argument,
argument
->
tensorrt_node_teller_ptr
());
pass
->
Set
(
"workspace_size"
,
new
int
(
argument
->
tensorrt_workspace_size
()));
pass
->
Set
(
"max_batch_size"
,
new
int
(
argument
->
tensorrt_max_batch_size
()));
pass
->
Set
(
"min_subgraph_size"
,
new
int
(
argument
->
tensorrt_min_subgraph_size
()));
}
// graph_ = pass->Apply(std::move(graph_));
...
...
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
浏览文件 @
4743c9cd
...
...
@@ -12,12 +12,14 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include
"paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h"
#include
<algorithm>
#include <string>
#include <vector>
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h"
#include "paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h"
namespace
paddle
{
namespace
inference
{
...
...
@@ -36,7 +38,8 @@ std::unique_ptr<framework::ir::Graph> analysis::TensorRtSubgraphPass::ApplyImpl(
auto
teller
=
Get
<
SubgraphDetector
::
NodeInsideSubgraphTeller
>
(
"tensorrt_node_teller"
);
SubGraphFuser
fuser
(
graph
.
get
(),
teller
,
2
/*min subgraph size*/
);
SubGraphFuser
fuser
(
graph
.
get
(),
teller
,
Get
<
int
>
(
"min_subgraph_size"
)
/*min subgraph size*/
);
fuser
();
for
(
auto
*
node
:
graph
->
Nodes
())
{
...
...
@@ -197,10 +200,26 @@ void TensorRtSubgraphPass::CreateTensorRTOp(framework::ir::Node *node,
std
::
vector
<
std
::
string
>
ExtractParameters
(
const
std
::
unordered_set
<
Node
*>
&
nodes
)
{
// We can judge whether a variable is a parameter by
// its presistable property, but sometimes the presistable
// of the feed op output is true, so we have to identify it.
std
::
vector
<
std
::
string
>
feed_outputs
;
for
(
const
auto
&
node
:
nodes
)
{
if
(
!
node
->
IsOp
())
continue
;
std
::
string
op_type
=
node
->
Op
()
->
Type
();
if
(
op_type
==
"feed"
)
{
std
::
vector
<
std
::
string
>
output_names
=
node
->
Op
()
->
OutputArgumentNames
();
std
::
copy
(
output_names
.
begin
(),
output_names
.
end
(),
std
::
back_inserter
(
feed_outputs
));
}
}
std
::
vector
<
std
::
string
>
parameters
;
for
(
const
auto
&
node
:
nodes
)
{
if
(
!
node
->
IsVar
())
continue
;
if
(
node
->
Var
()
->
Persistable
())
{
if
(
node
->
Var
()
->
Persistable
()
&&
std
::
find
(
feed_outputs
.
begin
(),
feed_outputs
.
end
(),
node
->
Name
())
==
feed_outputs
.
end
())
{
parameters
.
push_back
(
node
->
Name
());
}
}
...
...
@@ -215,4 +234,5 @@ REGISTER_PASS(tensorrt_subgraph_pass,
paddle
::
inference
::
analysis
::
TensorRtSubgraphPass
)
.
RequirePassAttr
(
"tensorrt_node_teller"
)
.
RequirePassAttr
(
"max_batch_size"
)
.
RequirePassAttr
(
"workspace_size"
);
.
RequirePassAttr
(
"workspace_size"
)
.
RequirePassAttr
(
"min_subgraph_size"
);
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
4743c9cd
...
...
@@ -57,6 +57,7 @@ contrib::AnalysisConfig::AnalysisConfig(const contrib::AnalysisConfig &other) {
use_tensorrt_
=
other
.
use_tensorrt_
;
tensorrt_max_batchsize_
=
other
.
tensorrt_max_batchsize_
;
tensorrt_workspace_size_
=
other
.
tensorrt_workspace_size_
;
tensorrt_min_subgraph_size_
=
other
.
tensorrt_min_subgraph_size_
;
model_from_memory_
=
other
.
model_from_memory_
;
if
(
use_gpu
)
{
...
...
@@ -89,6 +90,7 @@ contrib::AnalysisConfig::AnalysisConfig(contrib::AnalysisConfig &&other) {
use_tensorrt_
=
other
.
use_tensorrt_
;
tensorrt_max_batchsize_
=
other
.
tensorrt_max_batchsize_
;
tensorrt_workspace_size_
=
other
.
tensorrt_workspace_size_
;
tensorrt_min_subgraph_size_
=
other
.
tensorrt_min_subgraph_size_
;
model_from_memory_
=
other
.
model_from_memory_
;
pass_builder_
=
std
::
move
(
other
.
pass_builder_
);
...
...
@@ -105,12 +107,14 @@ void contrib::AnalysisConfig::EnableMKLDNN() {
}
void
contrib
::
AnalysisConfig
::
EnableTensorRtEngine
(
int
workspace_size
,
int
max_batch_size
)
{
int
max_batch_size
,
int
min_subgraph_size
)
{
use_tensorrt_
=
true
;
tensorrt_workspace_size_
=
workspace_size
;
tensorrt_max_batchsize_
=
max_batch_size
;
// Append after the infer_clean pass.
pass_builder
()
->
InsertPass
(
1
,
"tensorrt_subgraph_pass"
);
tensorrt_min_subgraph_size_
=
min_subgraph_size
;
// Append after the conv+affine_channel fuse pass.
pass_builder
()
->
InsertPass
(
3
,
"tensorrt_subgraph_pass"
);
}
void
contrib
::
AnalysisConfig
::
SetModelBuffer
(
const
char
*
prog_buffer
,
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
4743c9cd
...
...
@@ -328,6 +328,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
argument_
.
SetUseTensorRT
(
true
);
argument_
.
SetTensorRtWorkspaceSize
(
config_
.
tensorrt_workspace_size_
);
argument_
.
SetTensorRtMaxBatchSize
(
config_
.
tensorrt_max_batchsize_
);
argument_
.
SetTensorRtMinSubgraphSize
(
config_
.
tensorrt_min_subgraph_size_
);
}
if
(
config_
.
use_mkldnn_
)
{
...
...
paddle/fluid/inference/api/details/reset_tensor_array.cc
浏览文件 @
4743c9cd
...
...
@@ -25,7 +25,7 @@ void TensorArrayBatchCleaner::CollectTensorArrays(framework::Scope *scope) {
// TODO(Superjomn) should avoid the case when a TensorArray is a
// parameter.
if
(
var_name
==
"feed"
||
var_name
==
"fetch"
)
continue
;
if
(
var
->
Type
()
==
typeid
(
framework
::
LoDTensorArray
))
{
if
(
var
->
IsType
<
framework
::
LoDTensorArray
>
(
))
{
VLOG
(
4
)
<<
"collect "
<<
var_name
;
arrays_
.
push_back
(
var
->
GetMutable
<
framework
::
LoDTensorArray
>
());
}
...
...
paddle/fluid/inference/api/details/reset_tensor_array.h
浏览文件 @
4743c9cd
...
...
@@ -27,8 +27,11 @@ namespace details {
// training phase.
struct
TensorArrayBatchCleaner
{
TensorArrayBatchCleaner
()
{
valid_types_
.
insert
(
typeid
(
framework
::
Tensor
));
valid_types_
.
insert
(
typeid
(
framework
::
LoDTensor
));
constexpr
auto
kTensorId
=
framework
::
VarTypeTrait
<
framework
::
Tensor
>::
kId
;
constexpr
auto
kLoDTensorId
=
framework
::
VarTypeTrait
<
framework
::
LoDTensor
>::
kId
;
valid_types_
.
insert
(
kTensorId
);
valid_types_
.
insert
(
kLoDTensorId
);
}
// Collect the variables that are not Tensor or LoDTensor, and reset them to a
// bool(trick), because some of them are containers, and some operators just
...
...
@@ -46,7 +49,7 @@ struct TensorArrayBatchCleaner {
bool
no_tensor_flag_
{
true
};
std
::
vector
<
framework
::
LoDTensorArray
*>
arrays_
;
std
::
unordered_set
<
std
::
type_index
>
valid_types_
;
std
::
unordered_set
<
int
>
valid_types_
;
std
::
unordered_set
<
framework
::
Variable
*>
no_tensor_vars_
;
};
...
...
paddle/fluid/inference/api/helper.h
浏览文件 @
4743c9cd
...
...
@@ -113,6 +113,16 @@ static void TensorAssignData(PaddleTensor *tensor,
}
}
template
<
typename
T
>
static
void
TensorAssignData
(
PaddleTensor
*
tensor
,
const
std
::
vector
<
std
::
vector
<
T
>>
&
data
,
const
std
::
vector
<
size_t
>
&
lod
)
{
int
size
=
lod
[
lod
.
size
()
-
1
];
tensor
->
shape
.
assign
({
size
,
1
});
tensor
->
lod
.
assign
({
lod
});
TensorAssignData
(
tensor
,
data
);
}
template
<
typename
T
>
static
int
ZeroCopyTensorAssignData
(
ZeroCopyTensor
*
tensor
,
const
std
::
vector
<
std
::
vector
<
T
>>
&
data
)
{
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
4743c9cd
...
...
@@ -49,7 +49,7 @@ struct AnalysisConfig : public NativeConfig {
bool
use_feed_fetch_ops
{
true
};
void
EnableTensorRtEngine
(
int
workspace_size
=
1
<<
20
,
int
max_batch_size
=
1
);
int
max_batch_size
=
1
,
int
min_subgraph_size
=
3
);
bool
use_tensorrt
()
const
{
return
use_tensorrt_
;
}
void
EnableMKLDNN
();
...
...
@@ -69,8 +69,19 @@ struct AnalysisConfig : public NativeConfig {
bool
use_tensorrt_
{
false
};
bool
use_mkldnn_
{
false
};
std
::
unordered_set
<
std
::
string
>
mkldnn_enabled_op_types_
;
// For workspace_size, refer it from here:
// https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#troubleshooting
int
tensorrt_workspace_size_
;
// While TensorRT allows an engine optimized for a given max batch size
// to run at any smaller size, the performance for those smaller
// sizes may not be as well-optimized. Therefore, Max batch is best
// equivalent to the runtime batch size.
int
tensorrt_max_batchsize_
;
// We transform the Ops that can be converted into TRT layer in the model,
// and aggregate these Ops into subgraphs for TRT execution.
// We set this variable to control the minimum number of nodes in the
// subgraph, 3 as default value.
int
tensorrt_min_subgraph_size_
{
3
};
std
::
unique_ptr
<
PassStrategy
>
pass_builder_
;
bool
model_from_memory_
{
false
};
};
...
...
paddle/fluid/inference/api/paddle_pass_builder.h
浏览文件 @
4743c9cd
...
...
@@ -118,11 +118,13 @@ class GpuPassStrategy : public PassStrategy {
public:
GpuPassStrategy
()
:
PassStrategy
({})
{
passes_
.
assign
({
"infer_clean_graph_pass"
,
//
"conv_bn_fuse_pass"
,
//
"conv_elementwise_add_act_fuse_pass"
,
//
"conv_elementwise_add2_act_fuse_pass"
,
//
"conv_elementwise_add_fuse_pass"
,
//
"infer_clean_graph_pass"
,
//
"conv_affine_channel_fuse_pass"
,
//
"conv_eltwiseadd_affine_channel_fuse_pass"
,
//
"conv_bn_fuse_pass"
,
//
"conv_elementwise_add_act_fuse_pass"
,
//
"conv_elementwise_add2_act_fuse_pass"
,
//
"conv_elementwise_add_fuse_pass"
,
//
});
}
...
...
paddle/fluid/inference/tests/api/CMakeLists.txt
浏览文件 @
4743c9cd
...
...
@@ -108,6 +108,10 @@ inference_analysis_api_test_with_refer_result(test_analyzer_mobilenet_transpose
inference_analysis_api_test_with_fake_data
(
test_analyzer_resnet50
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/resnet50"
analyzer_resnet50_tester.cc
"resnet50_model.tar.gz"
)
# seq_pool1
inference_analysis_api_test_with_fake_data
(
test_analyzer_seq_pool1
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/seq_pool1"
analyzer_seq_pool1_tester.cc
"seq_pool1.tar.gz"
)
# mobilenet with depthwise_conv op
inference_analysis_api_test_with_fake_data
(
test_analyzer_mobilenet_depthwise_conv
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/mobilenet_depthwise_conv"
analyzer_resnet50_tester.cc
"mobilenet_model.tar.gz"
)
...
...
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
浏览文件 @
4743c9cd
...
...
@@ -98,10 +98,8 @@ void GetOneBatch(std::vector<PaddleTensor> *input_slots, DataRecord *data,
auto
one_batch
=
data
->
NextBatch
();
PaddleTensor
input_tensor
;
input_tensor
.
name
=
"word"
;
input_tensor
.
shape
.
assign
({
static_cast
<
int
>
(
one_batch
.
data
.
size
()),
1
});
input_tensor
.
lod
.
assign
({
one_batch
.
lod
});
input_tensor
.
dtype
=
PaddleDType
::
INT64
;
TensorAssignData
<
int64_t
>
(
&
input_tensor
,
{
one_batch
.
data
});
TensorAssignData
<
int64_t
>
(
&
input_tensor
,
{
one_batch
.
data
}
,
one_batch
.
lod
);
PADDLE_ENFORCE_EQ
(
batch_size
,
static_cast
<
int
>
(
one_batch
.
lod
.
size
()
-
1
));
input_slots
->
assign
({
input_tensor
});
}
...
...
paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
浏览文件 @
4743c9cd
...
...
@@ -19,11 +19,9 @@ namespace inference {
using
contrib
::
AnalysisConfig
;
struct
DataRecord
{
std
::
vector
<
std
::
vector
<
int64_t
>>
query
_data_all
,
title_data_all
;
std
::
vector
<
std
::
vector
<
int64_t
>>
query
,
title
;
std
::
vector
<
size_t
>
lod1
,
lod2
;
size_t
batch_iter
{
0
};
size_t
batch_size
{
1
};
size_t
num_samples
;
// total number of samples
size_t
batch_iter
{
0
},
batch_size
{
1
},
num_samples
;
// total number of samples
DataRecord
()
=
default
;
explicit
DataRecord
(
const
std
::
string
&
path
,
int
batch_size
=
1
)
:
batch_size
(
batch_size
)
{
...
...
@@ -33,22 +31,9 @@ struct DataRecord {
DataRecord
data
;
size_t
batch_end
=
batch_iter
+
batch_size
;
// NOTE skip the final batch, if no enough data is provided.
if
(
batch_end
<=
query_data_all
.
size
())
{
data
.
query_data_all
.
assign
(
query_data_all
.
begin
()
+
batch_iter
,
query_data_all
.
begin
()
+
batch_end
);
data
.
title_data_all
.
assign
(
title_data_all
.
begin
()
+
batch_iter
,
title_data_all
.
begin
()
+
batch_end
);
// Prepare LoDs
data
.
lod1
.
push_back
(
0
);
data
.
lod2
.
push_back
(
0
);
CHECK
(
!
data
.
query_data_all
.
empty
());
CHECK
(
!
data
.
title_data_all
.
empty
());
CHECK_EQ
(
data
.
query_data_all
.
size
(),
data
.
title_data_all
.
size
());
for
(
size_t
j
=
0
;
j
<
data
.
query_data_all
.
size
();
j
++
)
{
// calculate lod
data
.
lod1
.
push_back
(
data
.
lod1
.
back
()
+
data
.
query_data_all
[
j
].
size
());
data
.
lod2
.
push_back
(
data
.
lod2
.
back
()
+
data
.
title_data_all
[
j
].
size
());
}
if
(
batch_end
<=
query
.
size
())
{
GetInputPerBatch
(
query
,
&
data
.
query
,
&
data
.
lod1
,
batch_iter
,
batch_end
);
GetInputPerBatch
(
title
,
&
data
.
title
,
&
data
.
lod2
,
batch_iter
,
batch_end
);
}
batch_iter
+=
batch_size
;
return
data
;
...
...
@@ -67,8 +52,8 @@ struct DataRecord {
// load title data
std
::
vector
<
int64_t
>
title_data
;
split_to_int64
(
data
[
1
],
' '
,
&
title_data
);
query
_data_all
.
push_back
(
std
::
move
(
query_data
));
title
_data_all
.
push_back
(
std
::
move
(
title_data
));
query
.
push_back
(
std
::
move
(
query_data
));
title
.
push_back
(
std
::
move
(
title_data
));
}
num_samples
=
num_lines
;
}
...
...
@@ -80,15 +65,9 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
lod_query_tensor
.
name
=
"left"
;
lod_title_tensor
.
name
=
"right"
;
auto
one_batch
=
data
->
NextBatch
();
int
size1
=
one_batch
.
lod1
[
one_batch
.
lod1
.
size
()
-
1
];
// token batch size
int
size2
=
one_batch
.
lod2
[
one_batch
.
lod2
.
size
()
-
1
];
// token batch size
lod_query_tensor
.
shape
.
assign
({
size1
,
1
});
lod_query_tensor
.
lod
.
assign
({
one_batch
.
lod1
});
lod_title_tensor
.
shape
.
assign
({
size2
,
1
});
lod_title_tensor
.
lod
.
assign
({
one_batch
.
lod2
});
// assign data
TensorAssignData
<
int64_t
>
(
&
lod_query_tensor
,
one_batch
.
query
_data_all
);
TensorAssignData
<
int64_t
>
(
&
lod_title_tensor
,
one_batch
.
title
_data_all
);
TensorAssignData
<
int64_t
>
(
&
lod_query_tensor
,
one_batch
.
query
,
one_batch
.
lod1
);
TensorAssignData
<
int64_t
>
(
&
lod_title_tensor
,
one_batch
.
title
,
one_batch
.
lod2
);
// Set inputs.
input_slots
->
assign
({
lod_query_tensor
,
lod_title_tensor
});
for
(
auto
&
tensor
:
*
input_slots
)
{
...
...
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
浏览文件 @
4743c9cd
...
...
@@ -19,11 +19,9 @@ namespace inference {
using
contrib
::
AnalysisConfig
;
struct
DataRecord
{
std
::
vector
<
std
::
vector
<
int64_t
>>
word
_data_all
,
mention_data_all
;
std
::
vector
<
std
::
vector
<
int64_t
>>
word
,
mention
;
std
::
vector
<
size_t
>
lod
;
// two inputs have the same lod info.
size_t
batch_iter
{
0
};
size_t
batch_size
{
1
};
size_t
num_samples
;
// total number of samples
size_t
batch_iter
{
0
},
batch_size
{
1
},
num_samples
;
// total number of samples
DataRecord
()
=
default
;
explicit
DataRecord
(
const
std
::
string
&
path
,
int
batch_size
=
1
)
:
batch_size
(
batch_size
)
{
...
...
@@ -33,20 +31,10 @@ struct DataRecord {
DataRecord
data
;
size_t
batch_end
=
batch_iter
+
batch_size
;
// NOTE skip the final batch, if no enough data is provided.
if
(
batch_end
<=
word_data_all
.
size
())
{
data
.
word_data_all
.
assign
(
word_data_all
.
begin
()
+
batch_iter
,
word_data_all
.
begin
()
+
batch_end
);
data
.
mention_data_all
.
assign
(
mention_data_all
.
begin
()
+
batch_iter
,
mention_data_all
.
begin
()
+
batch_end
);
// Prepare LoDs
data
.
lod
.
push_back
(
0
);
CHECK
(
!
data
.
word_data_all
.
empty
());
CHECK
(
!
data
.
mention_data_all
.
empty
());
CHECK_EQ
(
data
.
word_data_all
.
size
(),
data
.
mention_data_all
.
size
());
for
(
size_t
j
=
0
;
j
<
data
.
word_data_all
.
size
();
j
++
)
{
// calculate lod
data
.
lod
.
push_back
(
data
.
lod
.
back
()
+
data
.
word_data_all
[
j
].
size
());
}
if
(
batch_end
<=
word
.
size
())
{
GetInputPerBatch
(
word
,
&
data
.
word
,
&
data
.
lod
,
batch_iter
,
batch_end
);
GetInputPerBatch
(
mention
,
&
data
.
mention
,
&
data
.
lod
,
batch_iter
,
batch_end
);
}
batch_iter
+=
batch_size
;
return
data
;
...
...
@@ -65,8 +53,8 @@ struct DataRecord {
// load mention data
std
::
vector
<
int64_t
>
mention_data
;
split_to_int64
(
data
[
3
],
' '
,
&
mention_data
);
word
_data_all
.
push_back
(
std
::
move
(
word_data
));
mention
_data_all
.
push_back
(
std
::
move
(
mention_data
));
word
.
push_back
(
std
::
move
(
word_data
));
mention
.
push_back
(
std
::
move
(
mention_data
));
}
num_samples
=
num_lines
;
}
...
...
@@ -78,14 +66,10 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
lod_word_tensor
.
name
=
"word"
;
lod_mention_tensor
.
name
=
"mention"
;
auto
one_batch
=
data
->
NextBatch
();
int
size
=
one_batch
.
lod
[
one_batch
.
lod
.
size
()
-
1
];
// token batch size
lod_word_tensor
.
shape
.
assign
({
size
,
1
});
lod_word_tensor
.
lod
.
assign
({
one_batch
.
lod
});
lod_mention_tensor
.
shape
.
assign
({
size
,
1
});
lod_mention_tensor
.
lod
.
assign
({
one_batch
.
lod
});
// assign data
TensorAssignData
<
int64_t
>
(
&
lod_word_tensor
,
one_batch
.
word_data_all
);
TensorAssignData
<
int64_t
>
(
&
lod_mention_tensor
,
one_batch
.
mention_data_all
);
TensorAssignData
<
int64_t
>
(
&
lod_word_tensor
,
one_batch
.
word
,
one_batch
.
lod
);
TensorAssignData
<
int64_t
>
(
&
lod_mention_tensor
,
one_batch
.
mention
,
one_batch
.
lod
);
// Set inputs.
input_slots
->
assign
({
lod_word_tensor
,
lod_mention_tensor
});
for
(
auto
&
tensor
:
*
input_slots
)
{
...
...
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
浏览文件 @
4743c9cd
...
...
@@ -18,12 +18,9 @@ namespace paddle {
namespace
inference
{
struct
DataRecord
{
std
::
vector
<
std
::
vector
<
int64_t
>>
title1_all
,
title2_all
,
title3_all
,
l1_all
;
std
::
vector
<
std
::
vector
<
int64_t
>>
title1
,
title2
,
title3
,
l1
;
std
::
vector
<
size_t
>
title1_lod
,
title2_lod
,
title3_lod
,
l1_lod
;
size_t
batch_iter
{
0
};
size_t
batch_size
{
1
};
size_t
num_samples
;
// total number of samples
std
::
vector
<
size_t
>
lod1
,
lod2
,
lod3
,
l1_lod
;
size_t
batch_iter
{
0
},
batch_size
{
1
},
num_samples
;
// total number of samples
DataRecord
()
=
default
;
explicit
DataRecord
(
const
std
::
string
&
path
,
int
batch_size
=
1
)
:
batch_size
(
batch_size
)
{
...
...
@@ -33,41 +30,11 @@ struct DataRecord {
DataRecord
data
;
size_t
batch_end
=
batch_iter
+
batch_size
;
// NOTE skip the final batch, if no enough data is provided.
if
(
batch_end
<=
title1_all
.
size
())
{
data
.
title1_all
.
assign
(
title1_all
.
begin
()
+
batch_iter
,
title1_all
.
begin
()
+
batch_end
);
data
.
title2_all
.
assign
(
title2_all
.
begin
()
+
batch_iter
,
title2_all
.
begin
()
+
batch_end
);
data
.
title3_all
.
assign
(
title3_all
.
begin
()
+
batch_iter
,
title3_all
.
begin
()
+
batch_end
);
data
.
l1_all
.
assign
(
l1_all
.
begin
()
+
batch_iter
,
l1_all
.
begin
()
+
batch_end
);
// Prepare LoDs
data
.
title1_lod
.
push_back
(
0
);
data
.
title2_lod
.
push_back
(
0
);
data
.
title3_lod
.
push_back
(
0
);
data
.
l1_lod
.
push_back
(
0
);
CHECK
(
!
data
.
title1_all
.
empty
());
CHECK
(
!
data
.
title2_all
.
empty
());
CHECK
(
!
data
.
title3_all
.
empty
());
CHECK
(
!
data
.
l1_all
.
empty
());
CHECK_EQ
(
data
.
title1_all
.
size
(),
data
.
title2_all
.
size
());
CHECK_EQ
(
data
.
title1_all
.
size
(),
data
.
title3_all
.
size
());
CHECK_EQ
(
data
.
title1_all
.
size
(),
data
.
l1_all
.
size
());
for
(
size_t
j
=
0
;
j
<
data
.
title1_all
.
size
();
j
++
)
{
data
.
title1
.
push_back
(
data
.
title1_all
[
j
]);
data
.
title2
.
push_back
(
data
.
title2_all
[
j
]);
data
.
title3
.
push_back
(
data
.
title3_all
[
j
]);
data
.
l1
.
push_back
(
data
.
l1_all
[
j
]);
// calculate lod
data
.
title1_lod
.
push_back
(
data
.
title1_lod
.
back
()
+
data
.
title1_all
[
j
].
size
());
data
.
title2_lod
.
push_back
(
data
.
title2_lod
.
back
()
+
data
.
title2_all
[
j
].
size
());
data
.
title3_lod
.
push_back
(
data
.
title3_lod
.
back
()
+
data
.
title3_all
[
j
].
size
());
data
.
l1_lod
.
push_back
(
data
.
l1_lod
.
back
()
+
data
.
l1_all
[
j
].
size
());
}
if
(
batch_end
<=
title1
.
size
())
{
GetInputPerBatch
(
title1
,
&
data
.
title1
,
&
data
.
lod1
,
batch_iter
,
batch_end
);
GetInputPerBatch
(
title2
,
&
data
.
title2
,
&
data
.
lod2
,
batch_iter
,
batch_end
);
GetInputPerBatch
(
title3
,
&
data
.
title3
,
&
data
.
lod3
,
batch_iter
,
batch_end
);
GetInputPerBatch
(
l1
,
&
data
.
l1
,
&
data
.
l1_lod
,
batch_iter
,
batch_end
);
}
batch_iter
+=
batch_size
;
return
data
;
...
...
@@ -92,10 +59,10 @@ struct DataRecord {
// load l1 data
std
::
vector
<
int64_t
>
l1_data
;
split_to_int64
(
data
[
3
],
' '
,
&
l1_data
);
title1
_all
.
push_back
(
std
::
move
(
title1_data
));
title2
_all
.
push_back
(
std
::
move
(
title2_data
));
title3
_all
.
push_back
(
std
::
move
(
title3_data
));
l1
_all
.
push_back
(
std
::
move
(
l1_data
));
title1
.
push_back
(
std
::
move
(
title1_data
));
title2
.
push_back
(
std
::
move
(
title2_data
));
title3
.
push_back
(
std
::
move
(
title3_data
));
l1
.
push_back
(
std
::
move
(
l1_data
));
}
num_samples
=
num_lines
;
}
...
...
@@ -109,24 +76,11 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
title3_tensor
.
name
=
"title3"
;
l1_tensor
.
name
=
"l1"
;
auto
one_batch
=
data
->
NextBatch
();
int
title1_size
=
one_batch
.
title1_lod
[
one_batch
.
title1_lod
.
size
()
-
1
];
title1_tensor
.
shape
.
assign
({
title1_size
,
1
});
title1_tensor
.
lod
.
assign
({
one_batch
.
title1_lod
});
int
title2_size
=
one_batch
.
title2_lod
[
one_batch
.
title2_lod
.
size
()
-
1
];
title2_tensor
.
shape
.
assign
({
title2_size
,
1
});
title2_tensor
.
lod
.
assign
({
one_batch
.
title2_lod
});
int
title3_size
=
one_batch
.
title3_lod
[
one_batch
.
title3_lod
.
size
()
-
1
];
title3_tensor
.
shape
.
assign
({
title3_size
,
1
});
title3_tensor
.
lod
.
assign
({
one_batch
.
title3_lod
});
int
l1_size
=
one_batch
.
l1_lod
[
one_batch
.
l1_lod
.
size
()
-
1
];
l1_tensor
.
shape
.
assign
({
l1_size
,
1
});
l1_tensor
.
lod
.
assign
({
one_batch
.
l1_lod
});
// assign data
TensorAssignData
<
int64_t
>
(
&
title1_tensor
,
one_batch
.
title1
);
TensorAssignData
<
int64_t
>
(
&
title2_tensor
,
one_batch
.
title2
);
TensorAssignData
<
int64_t
>
(
&
title3_tensor
,
one_batch
.
title3
);
TensorAssignData
<
int64_t
>
(
&
l1_tensor
,
one_batch
.
l1
);
TensorAssignData
<
int64_t
>
(
&
title1_tensor
,
one_batch
.
title1
,
one_batch
.
lod1
);
TensorAssignData
<
int64_t
>
(
&
title2_tensor
,
one_batch
.
title2
,
one_batch
.
lod2
);
TensorAssignData
<
int64_t
>
(
&
title3_tensor
,
one_batch
.
title3
,
one_batch
.
lod3
);
TensorAssignData
<
int64_t
>
(
&
l1_tensor
,
one_batch
.
l1
,
one_batch
.
l1_lod
);
// Set inputs.
input_slots
->
assign
({
title1_tensor
,
title2_tensor
,
title3_tensor
,
l1_tensor
});
for
(
auto
&
tensor
:
*
input_slots
)
{
...
...
paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
0 → 100644
浏览文件 @
4743c9cd
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <fstream>
#include <iostream>
#include "paddle/fluid/inference/tests/api/tester_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
param_file
=
FLAGS_infer_model
+
"/params"
;
cfg
->
prog_file
=
FLAGS_infer_model
+
"/model"
;
cfg
->
use_gpu
=
false
;
cfg
->
device
=
0
;
cfg
->
enable_ir_optim
=
true
;
cfg
->
specify_input_name
=
true
;
cfg
->
SetCpuMathLibraryNumThreads
(
FLAGS_paddle_num_threads
);
}
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
std
::
vector
<
std
::
string
>
feed_names
=
{
"slot10000_embed"
,
"slot10001_embed"
,
"slot10004_embed"
,
"slot10005_embed"
,
"slot10008_embed"
,
"slot10009_embed"
,
"slot10012_embed"
,
"slot10013_embed"
,
"slot10108_embed"
,
"slot13324_embed"
,
"slot13325_embed"
,
"slot13326_embed"
,
"slot13327_embed"
,
"slot13328_embed"
,
"slot13329_embed"
,
"slot13330_embed"
,
"slot13331_embed"
,
"slot15501_embed"
,
"slot15502_embed"
,
"slot15503_embed"
,
"slot15504_embed"
,
"slot15505_embed"
,
"slot15506_embed"
,
"slot15507_embed"
,
"slot15508_embed"
,
"slot15516_embed"
,
"slot15519_embed"
,
"slot15523_embed"
,
"slot15531_embed"
,
"slot15533_embed"
,
"slot15548_embed"
,
"slot15564_embed"
,
"slot15565_embed"
,
"slot15566_embed"
,
"slot15570_embed"
,
"slot15571_embed"
,
"slot15572_embed"
,
"slot15573_embed"
,
"slot15574_embed"
,
"slot15575_embed"
,
"slot15576_embed"
,
"slot15577_embed"
,
"slot15579_embed"
,
"slot15581_embed"
,
"slot15582_embed"
,
"slot15583_embed"
,
"slot15584_embed"
,
"slot5016_embed"
,
"slot5021_embed"
,
"slot6002_embed"
,
"slot6003_embed"
,
"slot6004_embed"
,
"slot6005_embed"
,
"slot6006_embed"
,
"slot6007_embed"
,
"slot6008_embed"
,
"slot6009_embed"
,
"slot6011_embed"
,
"slot6014_embed"
,
"slot6015_embed"
,
"slot6023_embed"
,
"slot6024_embed"
,
"slot6025_embed"
,
"slot6027_embed"
,
"slot6029_embed"
,
"slot6031_embed"
,
"slot6034_embed"
,
"slot6035_embed"
,
"slot6036_embed"
,
"slot6037_embed"
,
"slot6039_embed"
,
"slot6048_embed"
,
"slot6050_embed"
,
"slot6058_embed"
,
"slot6059_embed"
,
"slot6060_embed"
,
"slot6066_embed"
,
"slot6067_embed"
,
"slot6068_embed"
,
"slot6069_embed"
,
"slot6070_embed"
,
"slot6071_embed"
,
"slot6072_embed"
,
"slot6073_embed"
,
"slot6182_embed"
,
"slot6183_embed"
,
"slot6184_embed"
,
"slot6185_embed"
,
"slot6186_embed"
,
"slot6188_embed"
,
"slot6189_embed"
,
"slot6190_embed"
,
"slot6201_embed"
,
"slot6202_embed"
,
"slot6203_embed"
,
"slot6247_embed"
,
"slot6248_embed"
,
"slot6250_embed"
,
"slot6251_embed"
,
"slot6807_embed"
,
"slot6808_embed"
,
"slot6809_embed"
,
"slot6810_embed"
,
"slot6811_embed"
,
"slot6812_embed"
,
"slot6813_embed"
,
"slot6814_embed"
,
"slot6815_embed"
,
"slot6816_embed"
,
"slot6817_embed"
,
"slot6818_embed"
,
"slot6819_embed"
,
"slot6820_embed"
,
"slot6822_embed"
,
"slot6823_embed"
,
"slot6826_embed"
,
"slot7002_embed"
,
"slot7003_embed"
,
"slot7004_embed"
,
"slot7005_embed"
,
"slot7006_embed"
,
"slot7008_embed"
,
"slot7009_embed"
,
"slot7010_embed"
,
"slot7011_embed"
,
"slot7013_embed"
,
"slot7014_embed"
,
"slot7015_embed"
,
"slot7016_embed"
,
"slot7017_embed"
,
"slot7019_embed"
,
"slot7100_embed"
,
"slot7506_embed"
,
"slot7507_embed"
,
"slot7514_embed"
,
"slot7515_embed"
,
"slot7516_embed"
};
SetFakeImageInput
(
inputs
,
FLAGS_infer_model
,
true
,
"model"
,
"params"
,
&
feed_names
);
}
// Easy for profiling independently.
void
profile
(
bool
use_mkldnn
=
false
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
if
(
use_mkldnn
)
{
cfg
.
EnableMKLDNN
();
}
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
TestPrediction
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
}
TEST
(
Analyzer_seq_pool1
,
profile
)
{
profile
();
}
// Check the fuse status
TEST
(
Analyzer_seq_pool1
,
fuse_statis
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
int
num_ops
;
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
cfg
);
auto
fuse_statis
=
GetFuseStatis
(
static_cast
<
AnalysisPredictor
*>
(
predictor
.
get
()),
&
num_ops
);
LOG
(
INFO
)
<<
"num_ops: "
<<
num_ops
;
EXPECT_EQ
(
num_ops
,
314
);
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
4743c9cd
...
...
@@ -132,7 +132,8 @@ std::unordered_map<std::string, int> GetFuseStatis(PaddlePredictor *predictor,
void
SetFakeImageInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
,
const
std
::
string
&
dirname
,
bool
is_combined
=
true
,
std
::
string
model_filename
=
"model"
,
std
::
string
params_filename
=
"params"
)
{
std
::
string
params_filename
=
"params"
,
const
std
::
vector
<
std
::
string
>
*
feed_names
=
nullptr
)
{
// Set fake_image_data
PADDLE_ENFORCE_EQ
(
FLAGS_test_all_data
,
0
,
"Only have single batch of data."
);
std
::
vector
<
std
::
vector
<
int64_t
>>
feed_target_shapes
=
GetFeedTargetShapes
(
...
...
@@ -146,29 +147,47 @@ void SetFakeImageInput(std::vector<std::vector<PaddleTensor>> *inputs,
os
<<
"}
\n
"
;
}
LOG
(
INFO
)
<<
os
.
str
();
int
dim1
=
feed_target_shapes
[
0
][
1
];
int
dim2
=
feed_target_shapes
[
0
][
2
];
int
dim3
=
feed_target_shapes
[
0
][
3
];
PaddleTensor
input
;
std
::
vector
<
int
>
shape
({
FLAGS_batch_size
,
dim1
,
dim2
,
dim3
});
input
.
shape
=
shape
;
input
.
dtype
=
PaddleDType
::
FLOAT32
;
// fill input data, for profile easily, do not use random data here.
size_t
size
=
FLAGS_batch_size
*
dim1
*
dim2
*
dim3
;
input
.
data
.
Resize
(
size
*
sizeof
(
float
));
float
*
input_data
=
static_cast
<
float
*>
(
input
.
data
.
data
());
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
*
(
input_data
+
i
)
=
static_cast
<
float
>
(
i
)
/
size
;
if
(
feed_names
)
{
PADDLE_ENFORCE_EQ
(
feed_names
->
size
(),
feed_target_shapes
.
size
());
}
std
::
vector
<
PaddleTensor
>
input_slots
(
feed_target_shapes
.
size
());
for
(
size_t
i
=
0
;
i
<
feed_target_shapes
.
size
();
++
i
)
{
const
auto
&
feed_shape
=
feed_target_shapes
[
i
];
auto
&
input
=
input_slots
[
i
];
std
::
vector
<
int
>
shape
({
FLAGS_batch_size
});
for
(
size_t
s
=
1
;
s
<
feed_shape
.
size
();
++
s
)
{
shape
.
push_back
(
static_cast
<
int
>
(
feed_shape
[
s
]));
}
if
(
feed_names
)
{
input
.
name
=
(
*
feed_names
)[
i
];
}
input
.
shape
=
shape
;
input
.
dtype
=
PaddleDType
::
FLOAT32
;
size_t
len
=
std
::
accumulate
(
shape
.
begin
(),
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
input
.
data
.
Resize
(
len
*
sizeof
(
float
));
input
.
lod
.
assign
({{
0
,
static_cast
<
size_t
>
(
FLAGS_batch_size
)}});
float
*
input_data
=
static_cast
<
float
*>
(
input
.
data
.
data
());
// fill input data, for profile easily, do not use random data here.
for
(
size_t
j
=
0
;
j
<
len
;
++
j
)
{
*
(
input_data
+
j
)
=
static_cast
<
float
>
(
j
)
/
len
;
}
}
std
::
vector
<
PaddleTensor
>
input_slots
;
input_slots
.
assign
({
input
});
(
*
inputs
).
emplace_back
(
input_slots
);
}
void
GetInputPerBatch
(
const
std
::
vector
<
std
::
vector
<
int64_t
>>
&
in
,
std
::
vector
<
std
::
vector
<
int64_t
>>
*
out
,
std
::
vector
<
size_t
>
*
lod
,
size_t
batch_iter
,
size_t
batch_end
)
{
lod
->
clear
();
lod
->
push_back
(
0
);
for
(
auto
it
=
in
.
begin
()
+
batch_iter
;
it
<
in
.
begin
()
+
batch_end
;
it
++
)
{
out
->
push_back
(
*
it
);
lod
->
push_back
(
lod
->
back
()
+
(
*
it
).
size
());
// calculate lod
}
}
void
TestOneThreadPrediction
(
const
PaddlePredictor
::
Config
*
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
...
...
paddle/fluid/inference/tests/test.cmake
浏览文件 @
4743c9cd
...
...
@@ -3,14 +3,16 @@ set(INFERENCE_DEMO_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo" CACHE STRING
"A path setting inference demo download directories."
)
function
(
inference_download install_dir url filename
)
message
(
STATUS
"Download inference test stuff from
${
url
}
/
${
filename
}
"
)
execute_process
(
COMMAND bash -c
"mkdir -p
${
install_dir
}
"
)
execute_process
(
COMMAND bash -c
"cd
${
install_dir
}
&& wget -q
${
url
}
/
${
filename
}
"
)
file
(
DOWNLOAD
"
${
url
}
/
${
filename
}
"
"
${
install_dir
}
/
${
filename
}
"
)
message
(
STATUS
"finish downloading
${
filename
}
"
)
endfunction
()
function
(
inference_download_and_uncompress install_dir url filename
)
inference_download
(
${
install_dir
}
${
url
}
${
filename
}
)
execute_process
(
COMMAND bash -c
"cd
${
install_dir
}
&& tar xzf
${
filename
}
"
)
execute_process
(
COMMAND
${
CMAKE_COMMAND
}
-E tar xzf
${
install_dir
}
/
${
filename
}
WORKING_DIRECTORY
${
install_dir
}
)
endfunction
()
set
(
WORD2VEC_INSTALL_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/word2vec"
)
...
...
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
4743c9cd
...
...
@@ -46,7 +46,7 @@ endif()
register_operators
(
EXCLUDES py_func_op warpctc_op conv_fusion_op DEPS
${
OP_HEADER_DEPS
}
${
OP_PREFETCH_DEPS
}
)
# warpctc_op needs cudnn 7 above
if
(
WITH_GPU
AND NOT WIN32
)
if
(
WITH_GPU
)
if
(
${
CUDNN_MAJOR_VERSION
}
VERSION_LESS 7
)
op_library
(
warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale SRCS warpctc_op.cc warpctc_op.cu.cc
)
else
()
...
...
paddle/fluid/operators/clip_by_norm_op.h
浏览文件 @
4743c9cd
...
...
@@ -64,7 +64,7 @@ class ClipByNormKernel : public framework::OpKernel<T> {
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
}
else
{
PADDLE_THROW
(
"Unexpected branch, input variable type is %s"
,
in_var
->
Type
().
name
(
));
framework
::
ToTypeName
(
in_var
->
Type
()
));
}
PADDLE_ENFORCE_NOT_NULL
(
input
);
...
...
paddle/fluid/operators/controlflow/while_op.cc
浏览文件 @
4743c9cd
...
...
@@ -175,14 +175,13 @@ class WhileGradOp : public framework::OperatorBase {
auto
&
og_inside
=
detail
::
Ref
(
cur_scope
.
Var
(
inside_og_name
),
"Cannot find inside gradient %s"
,
inside_og_name
);
if
(
framework
::
IsType
<
framework
::
LoDTensor
>
(
og_outside
.
Type
()
))
{
if
(
og_outside
.
IsType
<
framework
::
LoDTensor
>
(
))
{
auto
&
outside_tensor
=
og_outside
.
Get
<
framework
::
LoDTensor
>
();
auto
&
inside_tensor
=
detail
::
Ref
(
og_inside
.
GetMutable
<
framework
::
LoDTensor
>
());
inside_tensor
.
set_lod
(
outside_tensor
.
lod
());
inside_tensor
.
ShareDataWith
(
outside_tensor
);
}
else
if
(
framework
::
IsType
<
framework
::
LoDTensorArray
>
(
og_outside
.
Type
()))
{
}
else
if
(
og_outside
.
IsType
<
framework
::
LoDTensorArray
>
())
{
auto
&
outside_array
=
og_outside
.
Get
<
framework
::
LoDTensorArray
>
();
auto
&
inside_array
=
detail
::
Ref
(
og_inside
.
GetMutable
<
framework
::
LoDTensorArray
>
());
...
...
@@ -256,7 +255,7 @@ class WhileGradOp : public framework::OperatorBase {
var
->
IsType
<
LoDTensor
>
(),
"Currently the type of var only can be LoDTensorArray, "
"or LoDTensor, but the received var[%s] is %s."
,
inside_grad_name
,
var
->
Type
().
name
(
));
inside_grad_name
,
framework
::
ToTypeName
(
var
->
Type
()
));
if
(
var
->
IsType
<
LoDTensor
>
())
{
auto
&
inside_tensor
=
var
->
Get
<
framework
::
LoDTensor
>
();
...
...
paddle/fluid/operators/conv_fusion_op.cu.cc
浏览文件 @
4743c9cd
...
...
@@ -22,7 +22,7 @@ DECLARE_bool(cudnn_exhaustive_search);
namespace
paddle
{
namespace
operators
{
#if CUDNN_VERSION >= 7
001
#if CUDNN_VERSION >= 7
100
using
Tensor
=
framework
::
Tensor
;
using
ScopedTensorDescriptor
=
platform
::
ScopedTensorDescriptor
;
using
ScopedFilterDescriptor
=
platform
::
ScopedFilterDescriptor
;
...
...
@@ -161,9 +161,7 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_LE
(
workspace_size_in_bytes
,
workspace_size_limit
,
"workspace_size to be allocated exceeds the limit"
);
if
((
activation
==
"identity"
)
&&
(
algo
!=
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
)
&&
(
!
residual
))
{
if
((
activation
==
"identity"
)
&&
(
!
residual
))
{
// Only the CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM algo is
// enabled with CUDNN_ACTIVATION_IDENTITY in cuDNN lib.
// But test in some case, the speed is slower, change to use
...
...
@@ -204,7 +202,7 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
}
// namespace operators
}
// namespace paddle
#if CUDNN_VERSION >= 7
001
#if CUDNN_VERSION >= 7
100
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
conv2d_fusion
,
ops
::
CUDNNConvFusionOpKernel
<
float
>
,
ops
::
CUDNNConvFusionOpKernel
<
double
>
);
...
...
paddle/fluid/operators/cudnn_lstm_op.cu.cc
浏览文件 @
4743c9cd
...
...
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/cudnn_rnn_cache.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/cudnn_helper.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -22,239 +22,6 @@ namespace operators {
using
LoDTensor
=
framework
::
LoDTensor
;
using
Tensor
=
framework
::
Tensor
;
struct
CudnnRNNCache
{
CudnnRNNCache
()
{
x_desc_
=
NULL
;
y_desc_
=
NULL
;
dx_desc_
=
NULL
;
dy_desc_
=
NULL
;
}
~
CudnnRNNCache
()
{
release
();
}
cudnnRNNDescriptor_t
rnn_desc_
;
cudnnTensorDescriptor_t
*
x_desc_
;
cudnnTensorDescriptor_t
*
y_desc_
;
cudnnTensorDescriptor_t
*
dx_desc_
;
cudnnTensorDescriptor_t
*
dy_desc_
;
cudnnTensorDescriptor_t
hx_desc_
;
cudnnTensorDescriptor_t
cx_desc_
;
cudnnTensorDescriptor_t
hy_desc_
;
cudnnTensorDescriptor_t
cy_desc_
;
cudnnTensorDescriptor_t
dhx_desc_
;
cudnnTensorDescriptor_t
dcx_desc_
;
cudnnTensorDescriptor_t
dhy_desc_
;
cudnnTensorDescriptor_t
dcy_desc_
;
cudnnTensorDescriptor_t
output_x_desc_
;
cudnnTensorDescriptor_t
output_y_desc_
;
cudnnDropoutDescriptor_t
dropout_desc_
;
size_t
weights_size_
;
cudnnFilterDescriptor_t
w_desc_
;
cudnnFilterDescriptor_t
dw_desc_
;
size_t
workspace_size_
;
size_t
reserve_size_
;
Tensor
reserve_data_
;
Tensor
workspace_data_
;
Tensor
dropout_state_
;
size_t
max_length_
;
float
dropout_prob_
;
bool
is_bidirec_
;
int
batch_size_
;
int
input_size_
;
int
hidden_size_
;
int
num_layers_
;
int
seed_
;
void
init
(
cudnnHandle_t
handle
,
const
framework
::
ExecutionContext
&
ctx
,
size_t
max_len
,
int
batch_size
,
int
input_size
,
int
hidden_size
,
int
num_layers
,
float
dropout_prob
,
bool
is_bidirec
,
int
seed
,
int
weight_numel
)
{
max_length_
=
max_len
;
batch_size_
=
batch_size
;
input_size_
=
input_size
;
hidden_size_
=
hidden_size
;
num_layers_
=
num_layers
;
dropout_prob_
=
dropout_prob
;
is_bidirec_
=
is_bidirec
;
seed_
=
seed
;
x_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
y_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
dx_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
dy_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
int
dim_a
[
3
];
int
stride_a
[
3
];
for
(
size_t
i
=
0
;
i
<
max_length_
;
++
i
)
{
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
x_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
y_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dx_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dy_desc_
[
i
]));
dim_a
[
0
]
=
batch_size_
;
dim_a
[
1
]
=
input_size_
;
dim_a
[
2
]
=
1
;
stride_a
[
0
]
=
dim_a
[
2
]
*
dim_a
[
1
];
stride_a
[
1
]
=
dim_a
[
2
];
stride_a
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
x_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dx_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
dim_a
[
0
]
=
batch_size_
;
dim_a
[
1
]
=
is_bidirec_
?
hidden_size_
*
2
:
hidden_size_
;
dim_a
[
2
]
=
1
;
stride_a
[
0
]
=
dim_a
[
2
]
*
dim_a
[
1
];
stride_a
[
1
]
=
dim_a
[
2
];
stride_a
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
y_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dy_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
}
dim_a
[
0
]
=
num_layers_
*
(
is_bidirec_
?
2
:
1
);
dim_a
[
1
]
=
batch_size_
;
dim_a
[
2
]
=
hidden_size_
;
stride_a
[
0
]
=
dim_a
[
2
]
*
dim_a
[
1
];
stride_a
[
1
]
=
dim_a
[
2
];
stride_a
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
hx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
cx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
hy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
cy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dhx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dcx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dhy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dcy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
hx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
cx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
hy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
cy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dhx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dcx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dhy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dcy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateDropoutDescriptor
(
&
dropout_desc_
));
size_t
state_size
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDropoutGetStatesSize
(
handle
,
&
state_size
);
dropout_state_
.
Resize
({
static_cast
<
int64_t
>
(
state_size
)}));
auto
*
dropout_state_data
=
dropout_state_
.
mutable_data
<
uint8_t
>
(
ctx
.
GetPlace
());
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetDropoutDescriptor
(
dropout_desc_
,
handle
,
dropout_prob_
,
dropout_state_data
,
state_size
,
seed_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateRNNDescriptor
(
&
rnn_desc_
));
#if CUDNN_VERSION >= 6000
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetRNNDescriptor_v6
(
handle
,
rnn_desc_
,
hidden_size_
,
num_layers_
,
dropout_desc_
,
CUDNN_LINEAR_INPUT
,
is_bidirec_
?
CUDNN_BIDIRECTIONAL
:
CUDNN_UNIDIRECTIONAL
,
CUDNN_LSTM
,
CUDNN_RNN_ALGO_STANDARD
,
CUDNN_DATA_FLOAT
));
#else
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetRNNDescriptor
(
rnn_desc_
,
hidden_size_
,
num_layers_
,
dropout_desc_
,
CUDNN_LINEAR_INPUT
,
is_bidirec_
?
CUDNN_BIDIRECTIONAL
:
CUDNN_UNIDIRECTIONAL
,
CUDNN_LSTM
,
CUDNN_DATA_FLOAT
));
#endif
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateFilterDescriptor
(
&
w_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateFilterDescriptor
(
&
dw_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetRNNParamsSize
(
handle
,
rnn_desc_
,
x_desc_
[
0
],
&
weights_size_
,
CUDNN_DATA_FLOAT
));
PADDLE_ENFORCE_EQ
(
weights_size_
,
sizeof
(
float
)
*
weight_numel
,
"cudnn lstm weight size should be SAME"
);
int
dim_w
[
3
];
dim_w
[
0
]
=
weights_size_
/
sizeof
(
float
);
dim_w
[
1
]
=
1
;
dim_w
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetFilterNdDescriptor
(
w_desc_
,
CUDNN_DATA_FLOAT
,
CUDNN_TENSOR_NCHW
,
3
,
dim_w
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetFilterNdDescriptor
(
dw_desc_
,
CUDNN_DATA_FLOAT
,
CUDNN_TENSOR_NCHW
,
3
,
dim_w
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetRNNWorkspaceSize
(
handle
,
rnn_desc_
,
max_length_
,
x_desc_
,
&
workspace_size_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetRNNTrainingReserveSize
(
handle
,
rnn_desc_
,
max_length_
,
x_desc_
,
&
reserve_size_
));
reserve_data_
.
Resize
({
static_cast
<
int64_t
>
(
reserve_size_
)});
reserve_data_
.
mutable_data
<
uint8_t
>
(
ctx
.
GetPlace
());
workspace_data_
.
Resize
({
static_cast
<
int64_t
>
(
workspace_size_
)});
workspace_data_
.
mutable_data
<
uint8_t
>
(
ctx
.
GetPlace
());
}
void
release
()
{
for
(
size_t
i
=
0
;
i
<
max_length_
;
++
i
)
{
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
x_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
y_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dx_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dy_desc_
[
i
]));
}
delete
[]
x_desc_
;
delete
[]
y_desc_
;
delete
[]
dx_desc_
;
delete
[]
dy_desc_
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
hx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
cx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
hy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
cy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dhx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dcx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dhy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dcy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyDropoutDescriptor
(
dropout_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyRNNDescriptor
(
rnn_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyFilterDescriptor
(
w_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyFilterDescriptor
(
dw_desc_
));
}
};
template
<
typename
T
>
class
CudnnLSTMGPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
...
...
@@ -315,9 +82,9 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
auto
input_w_numel
=
w
->
numel
();
auto
batch_size
=
x
->
dims
()[
1
];
cudnn_rnn_cache
->
init
(
handle
,
ctx
,
max_len
,
batch_size
,
input
_size
,
hidden_size
,
num_layers
,
dropout_prob
,
is_bidirec
,
seed
,
input_w_numel
);
cudnn_rnn_cache
->
init
(
handle
,
ctx
.
GetPlace
(),
max_len
,
batch
_size
,
input_size
,
hidden_size
,
num_layers
,
dropout_prob
,
is_bidirec
,
seed
,
input_w_numel
);
}
auto
run_seq_len
=
x
->
dims
()[
0
];
...
...
paddle/fluid/operators/cudnn_rnn_cache.h
0 → 100644
浏览文件 @
4743c9cd
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/cudnn_helper.h"
namespace
paddle
{
namespace
operators
{
struct
CudnnRNNCache
{
CudnnRNNCache
()
{
x_desc_
=
NULL
;
y_desc_
=
NULL
;
dx_desc_
=
NULL
;
dy_desc_
=
NULL
;
}
~
CudnnRNNCache
()
{
release
();
}
cudnnRNNDescriptor_t
rnn_desc_
;
cudnnTensorDescriptor_t
*
x_desc_
;
cudnnTensorDescriptor_t
*
y_desc_
;
cudnnTensorDescriptor_t
*
dx_desc_
;
cudnnTensorDescriptor_t
*
dy_desc_
;
cudnnTensorDescriptor_t
hx_desc_
;
cudnnTensorDescriptor_t
cx_desc_
;
cudnnTensorDescriptor_t
hy_desc_
;
cudnnTensorDescriptor_t
cy_desc_
;
cudnnTensorDescriptor_t
dhx_desc_
;
cudnnTensorDescriptor_t
dcx_desc_
;
cudnnTensorDescriptor_t
dhy_desc_
;
cudnnTensorDescriptor_t
dcy_desc_
;
cudnnTensorDescriptor_t
output_x_desc_
;
cudnnTensorDescriptor_t
output_y_desc_
;
cudnnDropoutDescriptor_t
dropout_desc_
;
size_t
weights_size_
;
cudnnFilterDescriptor_t
w_desc_
;
cudnnFilterDescriptor_t
dw_desc_
;
size_t
workspace_size_
;
size_t
reserve_size_
;
framework
::
Tensor
reserve_data_
;
framework
::
Tensor
workspace_data_
;
framework
::
Tensor
dropout_state_
;
size_t
max_length_
;
float
dropout_prob_
;
bool
is_bidirec_
;
int
batch_size_
;
int
input_size_
;
int
hidden_size_
;
int
num_layers_
;
int
seed_
;
void
init
(
cudnnHandle_t
handle
,
const
platform
::
Place
&
place
,
size_t
max_len
,
int
batch_size
,
int
input_size
,
int
hidden_size
,
int
num_layers
,
float
dropout_prob
,
bool
is_bidirec
,
int
seed
,
int
weight_numel
)
{
max_length_
=
max_len
;
batch_size_
=
batch_size
;
input_size_
=
input_size
;
hidden_size_
=
hidden_size
;
num_layers_
=
num_layers
;
dropout_prob_
=
dropout_prob
;
is_bidirec_
=
is_bidirec
;
seed_
=
seed
;
x_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
y_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
dx_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
dy_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
int
dim_a
[
3
];
int
stride_a
[
3
];
for
(
size_t
i
=
0
;
i
<
max_length_
;
++
i
)
{
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
x_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
y_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dx_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dy_desc_
[
i
]));
dim_a
[
0
]
=
batch_size_
;
dim_a
[
1
]
=
input_size_
;
dim_a
[
2
]
=
1
;
stride_a
[
0
]
=
dim_a
[
2
]
*
dim_a
[
1
];
stride_a
[
1
]
=
dim_a
[
2
];
stride_a
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
x_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dx_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
dim_a
[
0
]
=
batch_size_
;
dim_a
[
1
]
=
is_bidirec_
?
hidden_size_
*
2
:
hidden_size_
;
dim_a
[
2
]
=
1
;
stride_a
[
0
]
=
dim_a
[
2
]
*
dim_a
[
1
];
stride_a
[
1
]
=
dim_a
[
2
];
stride_a
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
y_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dy_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
}
dim_a
[
0
]
=
num_layers_
*
(
is_bidirec_
?
2
:
1
);
dim_a
[
1
]
=
batch_size_
;
dim_a
[
2
]
=
hidden_size_
;
stride_a
[
0
]
=
dim_a
[
2
]
*
dim_a
[
1
];
stride_a
[
1
]
=
dim_a
[
2
];
stride_a
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
hx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
cx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
hy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
cy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dhx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dcx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dhy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dcy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
hx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
cx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
hy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
cy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dhx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dcx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dhy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dcy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateDropoutDescriptor
(
&
dropout_desc_
));
size_t
state_size
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDropoutGetStatesSize
(
handle
,
&
state_size
);
dropout_state_
.
Resize
({
static_cast
<
int64_t
>
(
state_size
)}));
auto
*
dropout_state_data
=
dropout_state_
.
mutable_data
<
uint8_t
>
(
place
);
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetDropoutDescriptor
(
dropout_desc_
,
handle
,
dropout_prob_
,
dropout_state_data
,
state_size
,
seed_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateRNNDescriptor
(
&
rnn_desc_
));
#if CUDNN_VERSION >= 6000
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetRNNDescriptor_v6
(
handle
,
rnn_desc_
,
hidden_size_
,
num_layers_
,
dropout_desc_
,
CUDNN_LINEAR_INPUT
,
is_bidirec_
?
CUDNN_BIDIRECTIONAL
:
CUDNN_UNIDIRECTIONAL
,
CUDNN_LSTM
,
CUDNN_RNN_ALGO_STANDARD
,
CUDNN_DATA_FLOAT
));
#else
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetRNNDescriptor
(
rnn_desc_
,
hidden_size_
,
num_layers_
,
dropout_desc_
,
CUDNN_LINEAR_INPUT
,
is_bidirec_
?
CUDNN_BIDIRECTIONAL
:
CUDNN_UNIDIRECTIONAL
,
CUDNN_LSTM
,
CUDNN_DATA_FLOAT
));
#endif
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateFilterDescriptor
(
&
w_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateFilterDescriptor
(
&
dw_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetRNNParamsSize
(
handle
,
rnn_desc_
,
x_desc_
[
0
],
&
weights_size_
,
CUDNN_DATA_FLOAT
));
PADDLE_ENFORCE_EQ
(
weights_size_
,
sizeof
(
float
)
*
weight_numel
,
"cudnn lstm weight size should be SAME"
);
int
dim_w
[
3
];
dim_w
[
0
]
=
weights_size_
/
sizeof
(
float
);
dim_w
[
1
]
=
1
;
dim_w
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetFilterNdDescriptor
(
w_desc_
,
CUDNN_DATA_FLOAT
,
CUDNN_TENSOR_NCHW
,
3
,
dim_w
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetFilterNdDescriptor
(
dw_desc_
,
CUDNN_DATA_FLOAT
,
CUDNN_TENSOR_NCHW
,
3
,
dim_w
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetRNNWorkspaceSize
(
handle
,
rnn_desc_
,
max_length_
,
x_desc_
,
&
workspace_size_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetRNNTrainingReserveSize
(
handle
,
rnn_desc_
,
max_length_
,
x_desc_
,
&
reserve_size_
));
reserve_data_
.
Resize
({
static_cast
<
int64_t
>
(
reserve_size_
)});
reserve_data_
.
mutable_data
<
uint8_t
>
(
place
);
workspace_data_
.
Resize
({
static_cast
<
int64_t
>
(
workspace_size_
)});
workspace_data_
.
mutable_data
<
uint8_t
>
(
place
);
}
void
release
()
{
for
(
size_t
i
=
0
;
i
<
max_length_
;
++
i
)
{
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
x_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
y_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dx_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dy_desc_
[
i
]));
}
delete
[]
x_desc_
;
delete
[]
y_desc_
;
delete
[]
dx_desc_
;
delete
[]
dy_desc_
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
hx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
cx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
hy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
cy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dhx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dcx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dhy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dcy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyDropoutDescriptor
(
dropout_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyRNNDescriptor
(
rnn_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyFilterDescriptor
(
w_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyFilterDescriptor
(
dw_desc_
));
}
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/cum_op.h
浏览文件 @
4743c9cd
...
...
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <array>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
...
...
paddle/fluid/operators/detail/safe_ref.h
浏览文件 @
4743c9cd
...
...
@@ -25,7 +25,7 @@ namespace detail {
*/
template
<
typename
T
,
typename
...
ARGS
>
inline
T
&
Ref
(
T
*
ptr
,
ARGS
&&
...
args
)
{
PADDLE_ENFORCE
(
ptr
!=
nullptr
,
args
...
);
PADDLE_ENFORCE
(
ptr
!=
nullptr
,
::
paddle
::
string
::
Sprintf
(
args
...)
);
return
*
ptr
;
}
...
...
paddle/fluid/operators/distributed/proto_encoder_helper.h
浏览文件 @
4743c9cd
...
...
@@ -84,7 +84,9 @@ class ProtoEncodeHelper {
~
ProtoEncodeHelper
()
{
#define REPLACE_ENFORCE_GLOG 1
// Make sure callers didn't do operations that went over max_size promised
paddle
::
platform
::
throw_on_error
(
p_
<=
limit_
);
if
(
paddle
::
platform
::
is_error
(
p_
<=
limit_
))
{
paddle
::
platform
::
throw_on_error
(
p_
<=
limit_
);
}
#undef REPLACE_ENFORCE_GLOG
}
...
...
paddle/fluid/operators/distributed_ops/CMakeLists.txt
浏览文件 @
4743c9cd
...
...
@@ -33,7 +33,7 @@ register_operators(EXCLUDES gen_nccl_id_op DEPS ${DISTRIBUTE_DEPS})
if
(
WITH_GPU AND NOT WIN32
)
set
(
DISTRIBUTE_DEPS
${
DISTRIBUTE_DEPS
}
nccl_common
)
op_library
(
gen_nccl_id_op
${
DISTRIBUTE_DEPS
}
nccl_common
)
op_library
(
gen_nccl_id_op
DEPS
${
DISTRIBUTE_DEPS
}
nccl_common
)
endif
()
set
(
OPERATOR_DEPS
${
OPERATOR_DEPS
}
${
DISTRIBUTE_DEPS
}
PARENT_SCOPE
)
...
...
paddle/fluid/operators/distributed_ops/split_ids_op.h
浏览文件 @
4743c9cd
...
...
@@ -116,7 +116,7 @@ class SplitIdsOpKernel : public framework::OpKernel<T> {
}
else
{
PADDLE_THROW
(
"% should be LoDTensor or SelectedRows, but the received type is %s"
,
ctx
.
Inputs
(
"Ids"
)[
0
],
ids_var
->
Type
().
name
(
));
ctx
.
Inputs
(
"Ids"
)[
0
],
framework
::
ToTypeName
(
ids_var
->
Type
()
));
}
}
};
...
...
paddle/fluid/operators/elementwise/elementwise_mul_op.h
浏览文件 @
4743c9cd
...
...
@@ -83,7 +83,7 @@ class ElementwiseMulKernel : public framework::OpKernel<T> {
z
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
"Out"
);
}
else
{
PADDLE_THROW
(
"X's type[%s] is not supported by elementwise_op."
,
x_var
->
Type
().
name
(
));
framework
::
ToTypeName
(
x_var
->
Type
()
));
}
z
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
...
...
paddle/fluid/operators/lrn_mkldnn_op.cc
浏览文件 @
4743c9cd
...
...
@@ -50,8 +50,8 @@ template <typename T>
class
LRNMKLDNNOpKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
std
::
is_same
<
T
,
float
>::
value
,
"MKLDNN LRN must use float data."
);
const
bool
is_float_type
=
std
::
is_same
<
T
,
float
>::
value
;
PADDLE_ENFORCE
(
is_float_type
,
"MKLDNN LRN must use float data."
);
PADDLE_ENFORCE
(
paddle
::
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
"MKLDNN LRN must use CPUPlace."
);
...
...
@@ -132,8 +132,8 @@ template <typename T>
class
LRNMKLDNNGradOpKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
std
::
is_same
<
T
,
float
>::
value
,
"MKLDNN LRN must use float data."
);
const
bool
is_float_type
=
std
::
is_same
<
T
,
float
>::
value
;
PADDLE_ENFORCE
(
is_float_type
,
"MKLDNN LRN must use float data."
);
PADDLE_ENFORCE
(
paddle
::
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
"MKLDNN LRN must use CPUPlace."
);
PADDLE_ENFORCE
(
...
...
paddle/fluid/operators/optimizers/adadelta_op.h
浏览文件 @
4743c9cd
...
...
@@ -27,12 +27,14 @@ class AdadeltaOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Grad"
).
front
(),
grad_var
->
Type
().
name
());
ctx
.
Inputs
(
"Grad"
).
front
(),
framework
::
ToTypeName
(
grad_var
->
Type
()));
auto
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
avg_squared_grad_out_tensor
=
...
...
paddle/fluid/operators/optimizers/adagrad_op.h
浏览文件 @
4743c9cd
...
...
@@ -50,7 +50,8 @@ class AdagradOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
auto
*
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
*
moment_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"MomentOut"
);
...
...
paddle/fluid/operators/optimizers/adam_op.h
浏览文件 @
4743c9cd
...
...
@@ -347,7 +347,8 @@ class AdamOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
using
paddle
::
framework
::
LoDTensor
;
using
paddle
::
operators
::
detail
::
Ref
;
...
...
paddle/fluid/operators/optimizers/adamax_op.h
浏览文件 @
4743c9cd
...
...
@@ -27,12 +27,14 @@ class AdamaxOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Grad"
).
front
(),
grad_var
->
Type
().
name
());
ctx
.
Inputs
(
"Grad"
).
front
(),
framework
::
ToTypeName
(
grad_var
->
Type
()));
auto
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
moment_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"MomentOut"
);
...
...
paddle/fluid/operators/optimizers/decayed_adagrad_op.h
浏览文件 @
4743c9cd
...
...
@@ -27,12 +27,14 @@ class DecayedAdagradOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Grad"
).
front
(),
grad_var
->
Type
().
name
());
ctx
.
Inputs
(
"Grad"
).
front
(),
framework
::
ToTypeName
(
grad_var
->
Type
()));
auto
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
moment_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"MomentOut"
);
...
...
paddle/fluid/operators/optimizers/ftrl_op.h
浏览文件 @
4743c9cd
...
...
@@ -32,12 +32,14 @@ class FTRLOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Grad"
).
front
(),
grad_var
->
Type
().
name
());
ctx
.
Inputs
(
"Grad"
).
front
(),
framework
::
ToTypeName
(
grad_var
->
Type
()));
auto
*
param_out
=
ctx
.
Output
<
Tensor
>
(
"ParamOut"
);
auto
*
sq_accum_out
=
ctx
.
Output
<
Tensor
>
(
"SquaredAccumOut"
);
...
...
paddle/fluid/operators/optimizers/momentum_op.h
浏览文件 @
4743c9cd
...
...
@@ -395,7 +395,7 @@ class MomentumOpKernel : public framework::OpKernel<T> {
PADDLE_THROW
(
string
::
Sprintf
(
"MomentumOp only supports LoDTensor or SelectedRows "
"gradient, but the received Variable Type is %s"
,
grad_var
->
Type
().
name
(
)));
framework
::
ToTypeName
(
grad_var
->
Type
()
)));
}
}
};
...
...
paddle/fluid/operators/optimizers/sgd_op.cu
浏览文件 @
4743c9cd
...
...
@@ -60,7 +60,8 @@ class SGDOpCUDAKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
auto
*
param
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Param"
);
auto
*
param_out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
...
...
paddle/fluid/operators/split_lod_tensor_op.cc
浏览文件 @
4743c9cd
...
...
@@ -63,7 +63,7 @@ class SplitLoDTensorOp : public framework::OperatorBase {
}
auto
*
mask_data
=
cpu_mask
->
data
<
bool
>
();
std
::
vector
<
std
::
vector
<
CopyRange
>>
copy_ranges
(
mask_dim
[
0
]
);
std
::
vector
<
std
::
vector
<
CopyRange
>>
copy_ranges
(
2
);
// set out_true/out_false lod
for
(
size_t
t
=
0
;
t
<
2
;
t
++
)
{
...
...
paddle/fluid/operators/sum_mkldnn_op.cc
浏览文件 @
4743c9cd
...
...
@@ -245,7 +245,7 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
}
}
else
{
PADDLE_THROW
(
"Unexpected branch, output variable type is %s"
,
out_var
->
Type
().
name
(
));
framework
::
ToTypeName
(
out_var
->
Type
()
));
}
}
};
...
...
paddle/fluid/operators/sum_op.cc
浏览文件 @
4743c9cd
...
...
@@ -126,7 +126,7 @@ class SumOp : public framework::OperatorWithKernel {
PADDLE_THROW
(
"Cannot find the input data type by all input data"
);
}
PADDLE_THROW
(
"Unexpected branch. Input type is %s"
,
x_vars
[
0
]
->
Type
().
name
(
));
framework
::
ToTypeName
(
x_vars
[
0
]
->
Type
()
));
}
};
...
...
paddle/fluid/operators/sum_op.h
浏览文件 @
4743c9cd
...
...
@@ -163,7 +163,7 @@ class SumKernel : public framework::OpKernel<T> {
}
}
else
{
PADDLE_THROW
(
"Unexpected branch, output variable type is %s"
,
out_var
->
Type
().
name
(
));
framework
::
ToTypeName
(
out_var
->
Type
()
));
}
}
};
...
...
paddle/fluid/platform/enforce.h
浏览文件 @
4743c9cd
...
...
@@ -140,68 +140,72 @@ struct EOFException : public std::exception {
#define LIKELY(condition) (condition)
#endif
inline
bool
is_error
(
bool
stat
)
{
return
!
stat
;
}
template
<
typename
...
Args
>
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
bool
stat
,
const
Args
&
...
args
)
{
if
(
UNLIKELY
(
!
(
stat
)))
{
#ifndef REPLACE_ENFORCE_GLOG
throw
std
::
runtime_error
(
string
::
Sprintf
(
args
...));
throw
std
::
runtime_error
(
string
::
Sprintf
(
args
...));
#else
LOG
(
FATAL
)
<<
string
::
Sprintf
(
args
...);
LOG
(
FATAL
)
<<
string
::
Sprintf
(
args
...);
#endif
}
}
#ifdef PADDLE_WITH_CUDA
inline
bool
is_error
(
cudaError_t
e
)
{
return
UNLIKELY
(
e
);
}
template
<
typename
...
Args
>
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
cudaError_t
e
,
const
Args
&
...
args
)
{
if
(
UNLIKELY
(
e
))
{
#ifndef REPLACE_ENFORCE_GLOG
throw
thrust
::
system_error
(
e
,
thrust
::
cuda_category
(),
string
::
Sprintf
(
args
...));
throw
thrust
::
system_error
(
e
,
thrust
::
cuda_category
(),
string
::
Sprintf
(
args
...));
#else
LOG
(
FATAL
)
<<
string
::
Sprintf
(
args
...);
LOG
(
FATAL
)
<<
string
::
Sprintf
(
args
...);
#endif
}
}
inline
bool
is_error
(
curandStatus_t
stat
)
{
return
stat
!=
CURAND_STATUS_SUCCESS
;
}
template
<
typename
...
Args
>
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
curandStatus_t
stat
,
const
Args
&
...
args
)
{
if
(
stat
!=
CURAND_STATUS_SUCCESS
)
{
#ifndef REPLACE_ENFORCE_GLOG
throw
thrust
::
system_error
(
cudaErrorLaunchFailure
,
thrust
::
cuda_category
(),
string
::
Sprintf
(
args
...));
throw
thrust
::
system_error
(
cudaErrorLaunchFailure
,
thrust
::
cuda_category
(),
string
::
Sprintf
(
args
...));
#else
LOG
(
FATAL
)
<<
string
::
Sprintf
(
args
...);
LOG
(
FATAL
)
<<
string
::
Sprintf
(
args
...);
#endif
}
}
inline
bool
is_error
(
cudnnStatus_t
stat
)
{
return
stat
!=
CUDNN_STATUS_SUCCESS
;
}
template
<
typename
...
Args
>
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
cudnnStatus_t
stat
,
const
Args
&
...
args
)
{
if
(
stat
==
CUDNN_STATUS_SUCCESS
)
{
return
;
}
else
{
#ifndef REPLACE_ENFORCE_GLOG
throw
std
::
runtime_error
(
platform
::
dynload
::
cudnnGetErrorString
(
stat
)
+
string
::
Sprintf
(
args
...));
throw
std
::
runtime_error
(
platform
::
dynload
::
cudnnGetErrorString
(
stat
)
+
string
::
Sprintf
(
args
...));
#else
LOG
(
FATAL
)
<<
string
::
Sprintf
(
args
...);
LOG
(
FATAL
)
<<
string
::
Sprintf
(
args
...);
#endif
}
}
inline
bool
is_error
(
cublasStatus_t
stat
)
{
return
stat
!=
CUBLAS_STATUS_SUCCESS
;
}
template
<
typename
...
Args
>
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
cublasStatus_t
stat
,
const
Args
&
...
args
)
{
std
::
string
err
;
if
(
stat
==
CUBLAS_STATUS_SUCCESS
)
{
return
;
}
else
if
(
stat
==
CUBLAS_STATUS_NOT_INITIALIZED
)
{
if
(
stat
==
CUBLAS_STATUS_NOT_INITIALIZED
)
{
err
=
"CUBLAS: not initialized, "
;
}
else
if
(
stat
==
CUBLAS_STATUS_ALLOC_FAILED
)
{
err
=
"CUBLAS: alloc failed, "
;
...
...
@@ -254,21 +258,49 @@ inline void throw_on_error(T e) {
#define PADDLE_THROW(...) \
throw ::paddle::platform::EnforceNotMet(__FILE__, __LINE__, __VA_ARGS__)
#define __PADDLE_THROW_ERROR_I(_, _9, _8, _7, _6, _5, _4, _3, _2, X_, ...) X_;
#define __THROW_ON_ERROR_ONE_ARG(COND, ARG) \
::paddle::platform::throw_on_error(COND, ::paddle::string::Sprintf(ARG));
#define __PADDLE_THROW_ON_ERROR(COND, ...) \
__PADDLE_THROW_ERROR_I( \
__VA_ARGS__, ::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
__THROW_ON_ERROR_ONE_ARG(COND, __VA_ARGS__))
#define __PADDLE_UNARY_COMPARE(COND, ...) \
do { \
auto __cond = COND; \
if (UNLIKELY(::paddle::platform::is_error(__cond))) { \
__PADDLE_THROW_ON_ERROR(__cond, __VA_ARGS__); \
} \
} while (0)
#ifndef REPLACE_ENFORCE_GLOG
#define
PADDLE_ENFORCE(...)
\
#define
__PADDLE_ENFORCE_I(COND, ...)
\
do { \
try { \
::paddle::platform::throw_on_error(__VA_ARGS__);
\
__PADDLE_UNARY_COMPARE(COND, __VA_ARGS__);
\
} catch (...) { \
throw ::paddle::platform::EnforceNotMet(std::current_exception(), \
__FILE__, __LINE__); \
} \
} while (
false
)
} while (
0
)
#else
#define
PADDLE_ENFORCE(...) ::paddle::platform::throw_on_error(
__VA_ARGS__);
#define
__PADDLE_ENFORCE_I(COND, ...) __PADDLE_UNARY_COMPARE(COND,
__VA_ARGS__);
#endif // REPLACE_ENFORCE_GLOG
#define __PADDLE_ENFORCE(__args) __PADDLE_ENFORCE_I __args
#define PADDLE_ENFORCE(...) __PADDLE_ENFORCE((__VA_ARGS__))
#define PADDLE_THROW_EOF() \
do { \
throw ::paddle::platform::EOFException("There is no next data.", __FILE__, \
...
...
paddle/fluid/platform/enforce_test.cc
浏览文件 @
4743c9cd
...
...
@@ -37,6 +37,25 @@ TEST(ENFORCE, FAILED) {
HasPrefix
(
StringPiece
(
error
.
what
()),
"Enforce is not ok 123 at all"
));
}
EXPECT_TRUE
(
caught_exception
);
caught_exception
=
false
;
try
{
PADDLE_ENFORCE
(
false
,
"Enforce is not ok at all"
);
}
catch
(
paddle
::
platform
::
EnforceNotMet
error
)
{
caught_exception
=
true
;
EXPECT_TRUE
(
HasPrefix
(
StringPiece
(
error
.
what
()),
"Enforce is not ok at all"
));
}
EXPECT_TRUE
(
caught_exception
);
caught_exception
=
false
;
try
{
PADDLE_ENFORCE
(
false
);
}
catch
(
paddle
::
platform
::
EnforceNotMet
error
)
{
caught_exception
=
true
;
EXPECT_NE
(
std
::
string
(
error
.
what
()).
find
(
" at "
),
0
);
}
EXPECT_TRUE
(
caught_exception
);
}
TEST
(
ENFORCE
,
NO_ARG_OK
)
{
...
...
paddle/fluid/platform/float16_test.cc
浏览文件 @
4743c9cd
...
...
@@ -12,6 +12,7 @@ limitations under the License. */
#include <vector>
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#include "gtest/gtest.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/platform/init.h"
...
...
paddle/fluid/platform/float16_test.cu
浏览文件 @
4743c9cd
...
...
@@ -11,6 +11,7 @@ limitations under the License. */
#include "paddle/fluid/platform/float16.h"
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <bitset>
...
...
paddle/fluid/pybind/CMakeLists.txt
浏览文件 @
4743c9cd
set
(
PYBIND_DEPS pybind python proto_desc memory executor async_executor prune feed_fetch_method pass_builder parallel_executor profiler layer
)
set
(
PYBIND_DEPS pybind python proto_desc memory executor async_executor prune feed_fetch_method pass_builder parallel_executor profiler layer
scope_pool
)
if
(
WITH_PYTHON
)
list
(
APPEND PYBIND_DEPS py_func_op
)
endif
()
...
...
paddle/fluid/pybind/const_value.cc
浏览文件 @
4743c9cd
...
...
@@ -49,9 +49,6 @@ void BindConstValue(pybind11::module* m) {
op_proto_and_checker_maker
.
def
(
"kOpNameScopeAttrName"
,
framework
::
OpProtoAndCheckerMaker
::
OpNamescopeAttrName
);
op_proto_and_checker_maker
.
def
(
"kOpCreationCallstackAttrName"
,
framework
::
OpProtoAndCheckerMaker
::
OpCreationCallstackAttrName
);
}
}
// namespace pybind
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
4743c9cd
...
...
@@ -32,6 +32,7 @@ limitations under the License. */
#include "paddle/fluid/framework/parallel_executor.h"
#include "paddle/fluid/framework/prune.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/scope_pool.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/version.h"
#include "paddle/fluid/imperative/layer.h"
...
...
@@ -117,6 +118,9 @@ PYBIND11_MODULE(core, m) {
return
paddle
::
operators
::
AppendPythonCallableObjectAndReturnId
(
py_obj
);
});
m
.
add_object
(
"_cleanup"
,
py
::
capsule
([]()
{
ScopePool
::
Instance
().
Clear
();
}));
py
::
class_
<
imperative
::
VarBase
,
PyVarBase
>
(
m
,
"VarBase"
,
R"DOC()DOC"
)
.
def
(
py
::
init
<>
())
.
def
(
"_run_backward"
,
...
...
@@ -454,7 +458,7 @@ All parameter, weight, gradient are variables in Paddle.
},
py
::
return_value_policy
::
copy
);
py
::
class_
<
Scope
>
(
m
,
"Scope"
,
R"DOC(
py
::
class_
<
Scope
>
(
m
,
"
_
Scope"
,
R"DOC(
Scope is an association of a name to Variable. All variables belong to Scope.
Variables in a parent scope can be retrieved from local scope.
...
...
@@ -474,17 +478,26 @@ All parameter, weight, gradient are variables in Paddle.
param.set(param_array, place)
)DOC"
)
.
def
(
"_remove_from_pool"
,
[](
Scope
&
self
)
{
ScopePool
::
Instance
().
Remove
(
&
self
);
})
.
def
(
"var"
,
[](
Scope
&
self
,
const
std
::
string
&
name
)
->
Variable
*
{
return
self
.
Var
(
name
);
},
py
::
return_value_policy
::
reference
)
.
def
(
"find_var"
,
&
Scope
::
FindVar
,
py
::
return_value_policy
::
reference
)
.
def
(
py
::
init
<>
())
.
def
(
"new_scope"
,
[](
Scope
&
self
)
->
Scope
*
{
return
&
self
.
NewScope
();
},
py
::
return_value_policy
::
reference
)
.
def
(
"drop_kids"
,
&
Scope
::
DropKids
);
m
.
def
(
"Scope"
,
[]()
->
Scope
*
{
auto
*
s
=
new
Scope
();
ScopePool
::
Instance
().
Insert
(
std
::
unique_ptr
<
Scope
>
(
s
));
return
s
;
},
py
::
return_value_policy
::
reference
);
//! @note: Be careful! PyBind will return std::string as an unicode, not
//! Python str. If you want a str object, you should cast them in Python.
m
.
def
(
"get_all_op_protos"
,
[]()
->
std
::
vector
<
py
::
bytes
>
{
...
...
paddle/fluid/string/printf.h
浏览文件 @
4743c9cd
...
...
@@ -87,7 +87,7 @@ void Fprintf(std::ostream& out, const char* fmt, const Args&... args) {
template
<
typename
...
Args
>
std
::
string
Sprintf
(
const
Args
&
...
args
)
{
std
::
ostringstream
oss
;
Fprintf
(
oss
,
"
"
);
Fprintf
(
oss
,
"
%s"
,
args
...
);
return
oss
.
str
();
}
...
...
paddle/scripts/paddle_build.sh
浏览文件 @
4743c9cd
...
...
@@ -14,7 +14,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#=================================================
# Utils
#=================================================
...
...
@@ -418,13 +417,6 @@ EOF
else
ctest
--output-on-failure
fi
# make install should also be test when unittest
make
install
-j
`
nproc
`
pip
install
${
INSTALL_PREFIX
:-
/paddle/build
}
/opt/paddle/share/wheels/
*
.whl
if
[[
${
WITH_FLUID_ONLY
:-
OFF
}
==
"OFF"
]]
;
then
paddle version
fi
fi
}
...
...
@@ -922,6 +914,7 @@ function main() {
;;
assert_api
)
assert_api_not_changed
${
PYTHON_ABI
:-
""
}
assert_api_spec_approvals
;;
test_inference
)
gen_capi_package
...
...
@@ -946,6 +939,15 @@ function main() {
run_test
assert_api_not_changed
${
PYTHON_ABI
:-
""
}
;;
cmake_gen
)
cmake_gen
${
PYTHON_ABI
:-
""
}
;;
gen_fluid_lib
)
gen_fluid_lib
;;
test_fluid_lib
)
test_fluid_lib
;;
*
)
print_usage
exit
0
...
...
python/paddle/fluid/__init__.py
浏览文件 @
4743c9cd
...
...
@@ -46,7 +46,7 @@ from . import transpiler
from
.
import
distribute_lookup_table
from
.param_attr
import
ParamAttr
,
WeightNormParamAttr
from
.data_feeder
import
DataFeeder
from
.core
import
LoDTensor
,
LoDTensorArray
,
CPUPlace
,
CUDAPlace
,
CUDAPinnedPlace
,
Scope
from
.core
import
LoDTensor
,
LoDTensorArray
,
CPUPlace
,
CUDAPlace
,
CUDAPinnedPlace
,
Scope
,
_Scope
from
.transpiler
import
DistributeTranspiler
,
\
memory_optimize
,
release_memory
,
DistributeTranspilerConfig
from
.lod_tensor
import
create_lod_tensor
,
create_random_int_lodtensor
...
...
python/paddle/fluid/executor.py
浏览文件 @
4743c9cd
...
...
@@ -191,7 +191,7 @@ def _fetch_var(name, scope=None, return_numpy=True):
assert
isinstance
(
name
,
str
)
if
scope
is
None
:
scope
=
global_scope
()
assert
isinstance
(
scope
,
core
.
Scope
)
assert
isinstance
(
scope
,
core
.
_
Scope
)
var
=
scope
.
find_var
(
name
)
assert
var
is
not
None
,
(
...
...
python/paddle/fluid/framework.py
浏览文件 @
4743c9cd
...
...
@@ -20,7 +20,6 @@ import os
import
re
import
six
import
sys
import
traceback
import
numpy
as
np
...
...
@@ -605,10 +604,6 @@ class Operator(object):
if
role_var_name
in
op_attrs
and
len
(
op_attrs
[
role_var_name
])
==
0
:
del
op_attrs
[
role_var_name
]
callstack_var_name
=
op_maker
.
kOpCreationCallstackAttrName
()
op_attrs
[
callstack_var_name
]
=
list
(
reversed
(
traceback
.
format_stack
()))[
1
:]
if
len
(
self
.
desc
.
type
())
!=
0
:
return
if
type
is
None
:
...
...
python/paddle/fluid/parallel_executor.py
浏览文件 @
4743c9cd
...
...
@@ -148,7 +148,7 @@ class ParallelExecutor(object):
trainers_endpoints
),
"num_trainers == len(end_points)"
build_strategy
.
trainers_endpoints
=
trainers_endpoints
# step
5: get persistable_vars, parameter
_vars, places. persistable_vars
# step
6: get persistable
_vars, places. persistable_vars
# need be broadcast to other local_scope.
persistable_vars
=
set
([
cpt
.
to_text
(
v
.
name
)
for
v
in
[
...
...
@@ -164,7 +164,7 @@ class ParallelExecutor(object):
places
=
list
(
map
(
place_obj
,
self
.
_places
))
# step
6
: init ParallelExecutor
# step
7
: init ParallelExecutor
self
.
executor
=
core
.
ParallelExecutor
(
places
,
persistable_vars
,
main
.
desc
,
cpt
.
to_text
(
loss_name
)
...
...
python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py
浏览文件 @
4743c9cd
...
...
@@ -185,8 +185,10 @@ def main(use_cuda, parallel):
if
__name__
==
'__main__'
:
for
use_cuda
in
(
False
,
True
):
for
parallel
in
(
False
,
True
):
if
use_cuda
and
not
core
.
is_compiled_with_cuda
():
continue
main
(
use_cuda
=
use_cuda
,
parallel
=
parallel
)
on_ci
=
bool
(
int
(
os
.
environ
.
get
(
"SKIP_UNSTABLE_CI"
,
'0'
)))
if
not
on_ci
:
for
use_cuda
in
(
False
,
True
):
for
parallel
in
(
False
,
True
):
if
use_cuda
and
not
core
.
is_compiled_with_cuda
():
continue
main
(
use_cuda
=
use_cuda
,
parallel
=
parallel
)
python/paddle/fluid/tests/unittests/test_dist_se_resnext.py
浏览文件 @
4743c9cd
...
...
@@ -15,6 +15,18 @@
from
__future__
import
print_function
import
unittest
from
test_dist_base
import
TestDistBase
import
os
def
skip_ci
(
func
):
on_ci
=
bool
(
int
(
os
.
environ
.
get
(
"SKIP_UNSTABLE_CI"
,
'0'
)))
def
__func__
(
*
args
,
**
kwargs
):
if
on_ci
:
return
return
func
(
*
args
,
**
kwargs
)
return
__func__
class
TestDistSeResneXt2x2
(
TestDistBase
):
...
...
@@ -22,6 +34,7 @@ class TestDistSeResneXt2x2(TestDistBase):
self
.
_sync_mode
=
True
self
.
_use_reader_alloc
=
False
@
skip_ci
def
test_dist_train
(
self
):
self
.
check_with_place
(
"dist_se_resnext.py"
,
delta
=
1e-7
)
...
...
@@ -32,6 +45,7 @@ class TestDistseResnXt2x2WithMemopt(TestDistBase):
self
.
_mem_opt
=
True
self
.
_use_reader_alloc
=
False
@
skip_ci
def
test_dist_train
(
self
):
self
.
check_with_place
(
"dist_se_resnext.py"
,
delta
=
1e-7
)
...
...
@@ -41,6 +55,7 @@ class TestDistSeResneXt2x2Async(TestDistBase):
self
.
_sync_mode
=
False
self
.
_use_reader_alloc
=
False
@
skip_ci
def
test_dist_train
(
self
):
self
.
check_with_place
(
"dist_se_resnext.py"
,
delta
=
100
)
...
...
python/paddle/fluid/tests/unittests/test_operator_desc.py
浏览文件 @
4743c9cd
...
...
@@ -69,7 +69,7 @@ class TestOperator(unittest.TestCase):
set
(
mul_op
.
attr_names
),
set
([
"x_num_col_dims"
,
"y_num_col_dims"
,
"op_role"
,
"op_role_var"
,
"op_namescope"
,
"op_callstack"
"op_namescope"
]))
self
.
assertEqual
(
mul_op
.
has_attr
(
"x_num_col_dims"
),
True
)
self
.
assertEqual
(
mul_op
.
attr_type
(
"x_num_col_dims"
),
core
.
AttrType
.
INT
)
...
...
python/paddle/fluid/tests/unittests/test_py_func_op.py
浏览文件 @
4743c9cd
...
...
@@ -26,7 +26,7 @@ os.environ['CPU_NUM'] = str(dev_cnt)
def
dummy_func_with_no_input
():
return
float
(
1.0
)
return
np
.
array
([
0
],
dtype
=
'float32'
)
def
dummy_func_with_no_output
(
x
):
...
...
@@ -105,7 +105,7 @@ def simple_fc_net(img, label, use_py_func_op):
name
=
'test_tmp_var'
,
dtype
=
'float32'
,
shape
=
[
1
])
fluid
.
layers
.
py_func
(
func
=
dummy_func_with_no_input
,
x
=
None
,
out
=
dummy_var
)
loss
+=
dummy_var
fluid
.
layers
.
py_func
(
func
=
dummy_func_with_no_output
,
x
=
loss
,
out
=
None
)
loss
=
fluid
.
layers
.
mean
(
loss
)
...
...
@@ -174,7 +174,7 @@ class TestPyFuncOpUseExecutor(unittest.TestCase):
self
.
assertAlmostEqual
(
max_diff
,
0
,
delta
=
1e-3
)
class
TestPyFuncOpUseParallelExecutor
(
unittest
.
TestCase
):
class
TestPyFuncOpUseParallelExecutor
(
TestPyFuncOpUseExecutor
):
def
setUp
(
self
):
self
.
use_parallel_executor
=
True
...
...
python/paddle/fluid/tests/unittests/test_weight_decay.py
0 → 100644
浏览文件 @
4743c9cd
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
contextlib
import
unittest
from
functools
import
partial
import
numpy
as
np
import
paddle
import
paddle.fluid.core
as
core
import
paddle.fluid
as
fluid
def
get_places
():
places
=
[]
if
core
.
is_compiled_with_cuda
():
places
.
append
(
core
.
CUDAPlace
(
0
))
return
places
@
contextlib
.
contextmanager
def
prog_scope_guard
(
main_prog
,
startup_prog
):
scope
=
fluid
.
core
.
Scope
()
with
fluid
.
unique_name
.
guard
():
with
fluid
.
scope_guard
(
scope
):
with
fluid
.
program_guard
(
main_prog
,
startup_prog
):
yield
def
bow_net
(
data
,
label
,
dict_dim
,
is_sparse
=
False
,
emb_dim
=
128
,
hid_dim
=
128
,
hid_dim2
=
96
,
class_dim
=
2
):
"""
BOW net
This model is from https://github.com/PaddlePaddle/models:
fluid/PaddleNLP/text_classification/nets.py
"""
emb
=
fluid
.
layers
.
embedding
(
input
=
data
,
is_sparse
=
is_sparse
,
size
=
[
dict_dim
,
emb_dim
])
bow
=
fluid
.
layers
.
sequence_pool
(
input
=
emb
,
pool_type
=
'sum'
)
bow_tanh
=
fluid
.
layers
.
tanh
(
bow
)
fc_1
=
fluid
.
layers
.
fc
(
input
=
bow_tanh
,
size
=
hid_dim
,
act
=
"tanh"
)
fc_2
=
fluid
.
layers
.
fc
(
input
=
fc_1
,
size
=
hid_dim2
,
act
=
"tanh"
)
prediction
=
fluid
.
layers
.
fc
(
input
=
[
fc_2
],
size
=
class_dim
,
act
=
"softmax"
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
return
avg_cost
class
TestWeightDecay
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
word_dict
=
paddle
.
dataset
.
imdb
.
word_dict
()
reader
=
paddle
.
batch
(
paddle
.
dataset
.
imdb
.
train
(
self
.
word_dict
),
batch_size
=
4
)()
self
.
train_data
=
[
next
(
reader
)
for
_
in
range
(
5
)]
self
.
learning_rate
=
.
5
def
run_executor
(
self
,
place
,
feed_list
,
loss
):
exe
=
fluid
.
Executor
(
place
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
feed_list
,
place
=
place
)
exe
.
run
(
fluid
.
default_startup_program
())
main_prog
=
fluid
.
default_main_program
()
loss_set
=
[]
for
data
in
self
.
train_data
:
out
=
exe
.
run
(
main_prog
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
loss
.
name
])
print
(
"loss %s"
%
(
np
.
average
(
out
)))
loss_set
.
append
(
np
.
average
(
out
))
return
loss_set
def
run_parallel_exe
(
self
,
place
,
feed_list
,
loss
,
use_cuda
=
True
,
use_reduce
=
False
,
use_fast_executor
=
False
,
use_ir_memory_optimize
=
False
):
exe
=
fluid
.
Executor
(
place
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
feed_list
,
place
=
place
)
exe
.
run
(
fluid
.
default_startup_program
())
exec_strategy
=
fluid
.
ExecutionStrategy
()
if
use_fast_executor
:
exec_strategy
.
use_experimental_executor
=
True
build_strategy
=
fluid
.
BuildStrategy
()
build_strategy
.
reduce_strategy
=
fluid
.
BuildStrategy
.
ReduceStrategy
.
Reduce
\
if
use_reduce
else
fluid
.
BuildStrategy
.
ReduceStrategy
.
AllReduce
build_strategy
.
memory_optimize
=
use_ir_memory_optimize
parallel_exe
=
fluid
.
ParallelExecutor
(
use_cuda
,
loss_name
=
loss
.
name
,
exec_strategy
=
exec_strategy
,
build_strategy
=
build_strategy
)
loss_set
=
[]
for
data
in
self
.
train_data
:
out
=
parallel_exe
.
run
(
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
loss
.
name
])
print
(
"loss %s"
%
(
np
.
average
(
out
)))
loss_set
.
append
(
np
.
average
(
out
))
return
loss_set
def
check_weight_decay
(
self
,
place
,
model
,
use_parallel_exe
=
False
,
use_reduce
=
False
):
main_prog
=
fluid
.
framework
.
Program
()
startup_prog
=
fluid
.
framework
.
Program
()
startup_prog
.
random_seed
=
1
with
prog_scope_guard
(
main_prog
=
main_prog
,
startup_prog
=
startup_prog
):
data
=
fluid
.
layers
.
data
(
name
=
"words"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
1
],
dtype
=
"int64"
)
avg_cost
=
model
(
data
,
label
,
len
(
self
.
word_dict
))
param_list
=
[(
var
,
var
*
self
.
learning_rate
)
for
var
in
main_prog
.
block
(
0
).
all_parameters
()]
optimizer
=
fluid
.
optimizer
.
Adagrad
(
learning_rate
=
self
.
learning_rate
)
optimizer
.
minimize
(
avg_cost
)
for
params
in
param_list
:
updated_p
=
fluid
.
layers
.
elementwise_sub
(
x
=
params
[
0
],
y
=
params
[
1
])
fluid
.
layers
.
assign
(
input
=
updated_p
,
output
=
params
[
0
])
if
use_parallel_exe
:
loss
=
self
.
run_parallel_exe
(
place
,
[
data
,
label
],
loss
=
avg_cost
,
use_cuda
=
True
,
use_reduce
=
use_reduce
)
else
:
loss
=
self
.
run_executor
(
place
,
[
data
,
label
],
loss
=
avg_cost
)
return
loss
def
test_weight_decay
(
self
):
model
=
partial
(
bow_net
,
is_sparse
=
False
)
for
place
in
get_places
():
loss
=
self
.
check_weight_decay
(
place
,
model
,
use_parallel_exe
=
False
)
loss2
=
self
.
check_weight_decay
(
place
,
model
,
use_parallel_exe
=
True
,
use_reduce
=
False
)
for
i
in
range
(
len
(
loss
)):
assert
np
.
isclose
(
a
=
loss
[
i
],
b
=
loss2
[
i
],
rtol
=
5e-5
)
loss3
=
self
.
check_weight_decay
(
place
,
model
,
use_parallel_exe
=
True
,
use_reduce
=
True
)
for
i
in
range
(
len
(
loss
)):
assert
np
.
isclose
(
a
=
loss
[
i
],
b
=
loss3
[
i
],
rtol
=
5e-5
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/transpiler/inference_transpiler.py
浏览文件 @
4743c9cd
此差异已折叠。
点击以展开。
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录