Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
5ef123c7
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5ef123c7
编写于
11月 15, 2018
作者:
T
Tao Luo
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' into dam_fc
上级
980a6753
d3aed98d
变更
98
隐藏空白更改
内联
并排
Showing
98 changed file
with
2308 addition
and
765 deletion
+2308
-765
CMakeLists.txt
CMakeLists.txt
+7
-0
cmake/cuda.cmake
cmake/cuda.cmake
+7
-2
cmake/cudnn.cmake
cmake/cudnn.cmake
+6
-1
cmake/external/anakin.cmake
cmake/external/anakin.cmake
+5
-3
cmake/external/boost.cmake
cmake/external/boost.cmake
+6
-12
cmake/external/gflags.cmake
cmake/external/gflags.cmake
+7
-2
cmake/external/glog.cmake
cmake/external/glog.cmake
+5
-1
cmake/external/openblas.cmake
cmake/external/openblas.cmake
+1
-4
cmake/external/protobuf.cmake
cmake/external/protobuf.cmake
+8
-2
cmake/external/python.cmake
cmake/external/python.cmake
+42
-0
cmake/external/xxhash.cmake
cmake/external/xxhash.cmake
+45
-16
cmake/generic.cmake
cmake/generic.cmake
+44
-1
cmake/inference_lib.cmake
cmake/inference_lib.cmake
+138
-117
doc/v2/dev/contribute_to_paddle_en.md
doc/v2/dev/contribute_to_paddle_en.md
+1
-1
paddle/fluid/CMakeLists.txt
paddle/fluid/CMakeLists.txt
+2
-1
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+24
-8
paddle/fluid/framework/details/build_strategy.cc
paddle/fluid/framework/details/build_strategy.cc
+10
-6
paddle/fluid/framework/details/build_strategy.h
paddle/fluid/framework/details/build_strategy.h
+10
-1
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+21
-1
paddle/fluid/framework/garbage_collector.h
paddle/fluid/framework/garbage_collector.h
+1
-1
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
+9
-9
paddle/fluid/framework/ir/node.cc
paddle/fluid/framework/ir/node.cc
+5
-0
paddle/fluid/framework/ir/node.h
paddle/fluid/framework/ir/node.h
+4
-0
paddle/fluid/framework/ir/pass.h
paddle/fluid/framework/ir/pass.h
+18
-18
paddle/fluid/framework/ngraph_bridge.cc
paddle/fluid/framework/ngraph_bridge.cc
+39
-0
paddle/fluid/framework/ngraph_bridge.h
paddle/fluid/framework/ngraph_bridge.h
+58
-0
paddle/fluid/framework/ngraph_operator.cc
paddle/fluid/framework/ngraph_operator.cc
+220
-0
paddle/fluid/framework/ngraph_operator.h
paddle/fluid/framework/ngraph_operator.h
+72
-0
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+7
-4
paddle/fluid/inference/CMakeLists.txt
paddle/fluid/inference/CMakeLists.txt
+35
-6
paddle/fluid/inference/analysis/helper.h
paddle/fluid/inference/analysis/helper.h
+1
-14
paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc
...uid/inference/analysis/passes/ir_analysis_compose_pass.cc
+1
-1
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+1
-0
paddle/fluid/inference/api/analysis_predictor_tester.cc
paddle/fluid/inference/api/analysis_predictor_tester.cc
+1
-1
paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc
paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc
+1
-1
paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
+1
-1
paddle/fluid/inference/api/helper.h
paddle/fluid/inference/api/helper.h
+5
-0
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+2
-0
paddle/fluid/inference/api/paddle_pass_builder.h
paddle/fluid/inference/api/paddle_pass_builder.h
+2
-2
paddle/fluid/inference/tensorrt/CMakeLists.txt
paddle/fluid/inference/tensorrt/CMakeLists.txt
+1
-0
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+6
-3
paddle/fluid/inference/tensorrt/convert/concat_op.cc
paddle/fluid/inference/tensorrt/convert/concat_op.cc
+1
-1
paddle/fluid/inference/tensorrt/convert/split_op.cc
paddle/fluid/inference/tensorrt/convert/split_op.cc
+75
-0
paddle/fluid/inference/tensorrt/convert/test_split_op.cc
paddle/fluid/inference/tensorrt/convert/test_split_op.cc
+53
-0
paddle/fluid/inference/tensorrt/engine.cc
paddle/fluid/inference/tensorrt/engine.cc
+6
-0
paddle/fluid/inference/tensorrt/engine.h
paddle/fluid/inference/tensorrt/engine.h
+5
-0
paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt
paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt
+1
-0
paddle/fluid/inference/tensorrt/plugin/serialize.h
paddle/fluid/inference/tensorrt/plugin/serialize.h
+111
-0
paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu
paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu
+81
-0
paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h
paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h
+74
-0
paddle/fluid/inference/tensorrt/plugin/trt_plugin.cc
paddle/fluid/inference/tensorrt/plugin/trt_plugin.cc
+61
-0
paddle/fluid/inference/tensorrt/plugin/trt_plugin.h
paddle/fluid/inference/tensorrt/plugin/trt_plugin.h
+80
-0
paddle/fluid/inference/tests/api/CMakeLists.txt
paddle/fluid/inference/tests/api/CMakeLists.txt
+1
-2
paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
+4
-2
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
+4
-2
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
+4
-2
paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
+4
-2
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
+6
-4
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
+4
-2
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
...le/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
+4
-2
paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc
...nference/tests/api/analyzer_text_classification_tester.cc
+6
-3
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
+4
-2
paddle/fluid/inference/tests/api/config_printer.h
paddle/fluid/inference/tests/api/config_printer.h
+79
-0
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+64
-23
paddle/fluid/inference/tests/api/trt_models_tester.cc
paddle/fluid/inference/tests/api/trt_models_tester.cc
+123
-122
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+5
-2
paddle/fluid/operators/conv_cudnn_op.cu.cc
paddle/fluid/operators/conv_cudnn_op.cu.cc
+8
-2
paddle/fluid/operators/detection/roi_perspective_transform_op.cu
...fluid/operators/detection/roi_perspective_transform_op.cu
+4
-0
paddle/fluid/operators/elementwise_op_function.h
paddle/fluid/operators/elementwise_op_function.h
+26
-0
paddle/fluid/operators/grid_sampler_op.h
paddle/fluid/operators/grid_sampler_op.h
+2
-1
paddle/fluid/operators/math/CMakeLists.txt
paddle/fluid/operators/math/CMakeLists.txt
+10
-9
paddle/fluid/operators/math/softmax.cc
paddle/fluid/operators/math/softmax.cc
+4
-2
paddle/fluid/operators/math/softmax.cu
paddle/fluid/operators/math/softmax.cu
+8
-3
paddle/fluid/operators/math/softmax.h
paddle/fluid/operators/math/softmax.h
+1
-1
paddle/fluid/operators/math/softmax_impl.h
paddle/fluid/operators/math/softmax_impl.h
+37
-4
paddle/fluid/operators/pad_constant_like_op.cc
paddle/fluid/operators/pad_constant_like_op.cc
+1
-1
paddle/fluid/operators/roi_pool_op.cc
paddle/fluid/operators/roi_pool_op.cc
+1
-1
paddle/fluid/operators/softmax_op.h
paddle/fluid/operators/softmax_op.h
+6
-1
paddle/fluid/operators/softmax_with_cross_entropy_op.h
paddle/fluid/operators/softmax_with_cross_entropy_op.h
+2
-2
paddle/fluid/operators/unpool_op.cc
paddle/fluid/operators/unpool_op.cc
+2
-2
paddle/fluid/platform/init.cc
paddle/fluid/platform/init.cc
+10
-0
paddle/fluid/platform/nccl_helper.h
paddle/fluid/platform/nccl_helper.h
+2
-0
paddle/fluid/platform/port.h
paddle/fluid/platform/port.h
+2
-7
paddle/fluid/platform/variant.h
paddle/fluid/platform/variant.h
+8
-0
paddle/fluid/pybind/CMakeLists.txt
paddle/fluid/pybind/CMakeLists.txt
+11
-3
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+40
-10
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+15
-0
python/CMakeLists.txt
python/CMakeLists.txt
+34
-15
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+8
-3
python/paddle/fluid/contrib/inferencer.py
python/paddle/fluid/contrib/inferencer.py
+3
-1
python/paddle/fluid/contrib/trainer.py
python/paddle/fluid/contrib/trainer.py
+2
-1
python/paddle/fluid/layers/io.py
python/paddle/fluid/layers/io.py
+61
-58
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+192
-179
python/paddle/fluid/layers/ops.py
python/paddle/fluid/layers/ops.py
+21
-21
python/paddle/fluid/tests/unittests/test_dist_base.py
python/paddle/fluid/tests/unittests/test_dist_base.py
+1
-1
python/paddle/fluid/tests/unittests/test_pass_builder.py
python/paddle/fluid/tests/unittests/test_pass_builder.py
+6
-1
python/requirements.txt
python/requirements.txt
+1
-1
python/setup.py.in
python/setup.py.in
+40
-24
未找到文件。
CMakeLists.txt
浏览文件 @
5ef123c7
...
@@ -26,6 +26,11 @@ message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: "
...
@@ -26,6 +26,11 @@ message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: "
"
${
CMAKE_C_COMPILER_ID
}
${
CMAKE_C_COMPILER_VERSION
}
"
)
"
${
CMAKE_C_COMPILER_ID
}
${
CMAKE_C_COMPILER_VERSION
}
"
)
if
(
WIN32
)
if
(
WIN32
)
set
(
CMAKE_STATIC_LIBRARY_PREFIX lib
)
set
(
CMAKE_STATIC_LIBRARY_PREFIX lib
)
add_definitions
(
"/DGOOGLE_GLOG_DLL_DECL="
)
set
(
CMAKE_C_FLAGS_DEBUG
"
${
CMAKE_C_FLAGS_DEBUG
}
/bigobj /MTd"
)
set
(
CMAKE_C_FLAGS_RELEASE
"
${
CMAKE_C_FLAGS_RELEASE
}
/bigobj /MT"
)
set
(
CMAKE_CXX_FLAGS_DEBUG
"
${
CMAKE_CXX_FLAGS_DEBUG
}
/bigobj /MTd"
)
set
(
CMAKE_CXX_FLAGS_RELEASE
"
${
CMAKE_CXX_FLAGS_RELEASE
}
/bigobj /MT"
)
endif
(
WIN32
)
endif
(
WIN32
)
if
(
NOT CMAKE_CROSSCOMPILING
)
if
(
NOT CMAKE_CROSSCOMPILING
)
...
@@ -66,6 +71,8 @@ option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF)
...
@@ -66,6 +71,8 @@ option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF)
option
(
WITH_CONTRIB
"Compile the third-party contributation"
OFF
)
option
(
WITH_CONTRIB
"Compile the third-party contributation"
OFF
)
option
(
REPLACE_ENFORCE_GLOG
"Replace PADDLE_ENFORCE with glog/CHECK for better debug."
OFF
)
option
(
REPLACE_ENFORCE_GLOG
"Replace PADDLE_ENFORCE with glog/CHECK for better debug."
OFF
)
option
(
WITH_ANAKIN
"Compile with Anakin library"
OFF
)
option
(
WITH_ANAKIN
"Compile with Anakin library"
OFF
)
option
(
ANAKIN_BUILD_FAT_BIN
"Build anakin cuda fat-bin lib for all device plantform, ignored when WITH_ANAKIN=OFF"
OFF
)
option
(
ANAKIN_BUILD_CROSS_PLANTFORM
"Build anakin lib for any nvidia device plantform. ignored when WITH_ANAKIN=OFF"
ON
)
option
(
WITH_GRPC
"Use grpc as the default rpc framework"
${
WITH_DISTRIBUTE
}
)
option
(
WITH_GRPC
"Use grpc as the default rpc framework"
${
WITH_DISTRIBUTE
}
)
option
(
WITH_BRPC_RDMA
"Use brpc rdma as the rpc protocal"
OFF
)
option
(
WITH_BRPC_RDMA
"Use brpc rdma as the rpc protocal"
OFF
)
option
(
ON_INFER
"Turn on inference optimization."
OFF
)
option
(
ON_INFER
"Turn on inference optimization."
OFF
)
...
...
cmake/cuda.cmake
浏览文件 @
5ef123c7
...
@@ -157,6 +157,9 @@ list(APPEND EXTERNAL_LIBS ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY})
...
@@ -157,6 +157,9 @@ list(APPEND EXTERNAL_LIBS ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY})
if
(
NOT WITH_DSO
)
if
(
NOT WITH_DSO
)
# TODO(panyx0718): CUPTI only allows DSO?
# TODO(panyx0718): CUPTI only allows DSO?
list
(
APPEND EXTERNAL_LIBS
${
CUDNN_LIBRARY
}
${
CUPTI_LIBRARY
}
${
CUDA_CUBLAS_LIBRARIES
}
${
CUDA_curand_LIBRARY
}
${
NCCL_LIBRARY
}
)
list
(
APPEND EXTERNAL_LIBS
${
CUDNN_LIBRARY
}
${
CUPTI_LIBRARY
}
${
CUDA_CUBLAS_LIBRARIES
}
${
CUDA_curand_LIBRARY
}
${
NCCL_LIBRARY
}
)
if
(
WIN32
)
set_property
(
GLOBAL PROPERTY CUDA_MODULES
${
CUDNN_LIBRARY
}
${
CUDA_CUBLAS_LIBRARIES
}
${
CUDA_curand_LIBRARY
}
)
endif
(
WIN32
)
endif
(
NOT WITH_DSO
)
endif
(
NOT WITH_DSO
)
# setting nvcc arch flags
# setting nvcc arch flags
...
@@ -196,10 +199,12 @@ elseif(CMAKE_BUILD_TYPE STREQUAL "MinSizeRel")
...
@@ -196,10 +199,12 @@ elseif(CMAKE_BUILD_TYPE STREQUAL "MinSizeRel")
list
(
APPEND CUDA_NVCC_FLAGS
${
CMAKE_CXX_FLAGS_RELEASE
}
)
list
(
APPEND CUDA_NVCC_FLAGS
${
CMAKE_CXX_FLAGS_RELEASE
}
)
endif
()
endif
()
else
(
NOT WIN32
)
else
(
NOT WIN32
)
if
(
CMAKE_BUILD_TYPE STREQUAL
"Release"
)
if
(
CMAKE_BUILD_TYPE STREQUAL
"Debug"
)
list
(
APPEND CUDA_NVCC_FLAGS
"-g -G"
)
elseif
(
CMAKE_BUILD_TYPE STREQUAL
"Release"
)
list
(
APPEND CUDA_NVCC_FLAGS
"-O3 -DNDEBUG"
)
list
(
APPEND CUDA_NVCC_FLAGS
"-O3 -DNDEBUG"
)
else
()
else
()
message
(
FATAL
"Windows only support Release
build now. Please set visual studio build type to Release
, x64 build."
)
message
(
FATAL
"Windows only support Release
or Debug build now. Please set visual studio build type to Release/Debug
, x64 build."
)
endif
()
endif
()
endif
(
NOT WIN32
)
endif
(
NOT WIN32
)
...
...
cmake/cudnn.cmake
浏览文件 @
5ef123c7
...
@@ -2,7 +2,12 @@ if(NOT WITH_GPU)
...
@@ -2,7 +2,12 @@ if(NOT WITH_GPU)
return
()
return
()
endif
()
endif
()
set
(
CUDNN_ROOT
"/usr"
CACHE PATH
"CUDNN ROOT"
)
if
(
WIN32
)
set
(
CUDNN_ROOT
${
CUDA_TOOLKIT_ROOT_DIR
}
)
else
(
WIN32
)
set
(
CUDNN_ROOT
"/usr"
CACHE PATH
"CUDNN ROOT"
)
endif
(
WIN32
)
find_path
(
CUDNN_INCLUDE_DIR cudnn.h
find_path
(
CUDNN_INCLUDE_DIR cudnn.h
PATHS
${
CUDNN_ROOT
}
${
CUDNN_ROOT
}
/include
PATHS
${
CUDNN_ROOT
}
${
CUDNN_ROOT
}
/include
$ENV{CUDNN_ROOT} $ENV{CUDNN_ROOT}/include
${
CUDA_TOOLKIT_INCLUDE
}
$ENV{CUDNN_ROOT} $ENV{CUDNN_ROOT}/include
${
CUDA_TOOLKIT_INCLUDE
}
...
...
cmake/external/anakin.cmake
浏览文件 @
5ef123c7
...
@@ -58,19 +58,21 @@ ExternalProject_Add(
...
@@ -58,19 +58,21 @@ ExternalProject_Add(
-DPROTOBUF_ROOT=
${
THIRD_PARTY_PATH
}
/install/protobuf
-DPROTOBUF_ROOT=
${
THIRD_PARTY_PATH
}
/install/protobuf
-DMKLML_ROOT=
${
THIRD_PARTY_PATH
}
/install/mklml
-DMKLML_ROOT=
${
THIRD_PARTY_PATH
}
/install/mklml
-DENABLE_OP_TIMER=
${
ANAKIN_ENABLE_OP_TIMER
}
-DENABLE_OP_TIMER=
${
ANAKIN_ENABLE_OP_TIMER
}
-DBUILD_FAT_BIN=
${
ANAKIN_BUILD_FAT_BIN
}
-DBUILD_CROSS_PLANTFORM=
${
ANAKIN_BUILD_CROSS_PLANTFORM
}
${
EXTERNAL_OPTIONAL_ARGS
}
${
EXTERNAL_OPTIONAL_ARGS
}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=
${
ANAKIN_INSTALL_DIR
}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=
${
ANAKIN_INSTALL_DIR
}
)
)
message
(
STATUS
"Anakin for inference is enabled"
)
message
(
STATUS
"Anakin for inference is enabled"
)
message
(
STATUS
"Anakin is set INCLUDE:
${
ANAKIN_INCLUDE
}
LIBRARY:
${
ANAKIN_LIBRARY
}
"
)
message
(
STATUS
"Anakin is set INCLUDE:
${
ANAKIN_INCLUDE
}
LIBRARY:
${
ANAKIN_LIBRARY
}
"
)
add_dependencies
(
extern_anakin protobuf mklml
)
add_library
(
anakin_shared SHARED IMPORTED GLOBAL
)
add_library
(
anakin_shared SHARED IMPORTED GLOBAL
)
set_property
(
TARGET anakin_shared PROPERTY IMPORTED_LOCATION
${
ANAKIN_SHARED_LIB
}
)
set_property
(
TARGET anakin_shared PROPERTY IMPORTED_LOCATION
${
ANAKIN_SHARED_LIB
}
)
add_dependencies
(
anakin_shared extern_anakin
protobuf mklml
)
add_dependencies
(
anakin_shared extern_anakin
)
add_library
(
anakin_saber SHARED IMPORTED GLOBAL
)
add_library
(
anakin_saber SHARED IMPORTED GLOBAL
)
set_property
(
TARGET anakin_saber PROPERTY IMPORTED_LOCATION
${
ANAKIN_SABER_LIB
}
)
set_property
(
TARGET anakin_saber PROPERTY IMPORTED_LOCATION
${
ANAKIN_SABER_LIB
}
)
add_dependencies
(
anakin_saber extern_anakin
protobuf mklml
)
add_dependencies
(
anakin_saber extern_anakin
)
list
(
APPEND external_project_dependencies anakin_shared anakin_saber
)
list
(
APPEND external_project_dependencies anakin_shared anakin_saber
)
cmake/external/boost.cmake
浏览文件 @
5ef123c7
...
@@ -28,34 +28,28 @@ if((NOT DEFINED BOOST_TAR) OR (NOT DEFINED BOOST_URL))
...
@@ -28,34 +28,28 @@ if((NOT DEFINED BOOST_TAR) OR (NOT DEFINED BOOST_URL))
set
(
BOOST_TAR
"boost_1_41_0"
CACHE STRING
""
FORCE
)
set
(
BOOST_TAR
"boost_1_41_0"
CACHE STRING
""
FORCE
)
set
(
BOOST_URL
"http://paddlepaddledeps.cdn.bcebos.com/
${
BOOST_TAR
}
.tar.gz"
CACHE STRING
""
FORCE
)
set
(
BOOST_URL
"http://paddlepaddledeps.cdn.bcebos.com/
${
BOOST_TAR
}
.tar.gz"
CACHE STRING
""
FORCE
)
endif
()
endif
()
IF
(
WIN32
)
MESSAGE
(
WARNING,
"In windows, boost can not be downloaded automaticlly, please build it manually and put it at "
${
THIRD_PARTY_PATH
}
install/boost
)
MESSAGE
(
STATUS
"BOOST_TAR:
${
BOOST_TAR
}
, BOOST_URL:
${
BOOST_URL
}
"
)
else
()
MESSAGE
(
STATUS
"BOOST_TAR:
${
BOOST_TAR
}
, BOOST_URL:
${
BOOST_URL
}
"
)
ENDIF
(
WIN32
)
set
(
BOOST_SOURCES_DIR
${
THIRD_PARTY_PATH
}
/boost
)
set
(
BOOST_SOURCES_DIR
${
THIRD_PARTY_PATH
}
/boost
)
set
(
BOOST_DOWNLOAD_DIR
"
${
BOOST_SOURCES_DIR
}
/src/
${
BOOST_PROJECT
}
"
)
set
(
BOOST_DOWNLOAD_DIR
"
${
BOOST_SOURCES_DIR
}
/src/
${
BOOST_PROJECT
}
"
)
set
(
BOOST_INCLUDE_DIR
"
${
BOOST_DOWNLOAD_DIR
}
/
${
BOOST_TAR
}
"
CACHE PATH
"boost include directory."
FORCE
)
set_directory_properties
(
PROPERTIES CLEAN_NO_CUSTOM 1
)
set
(
BOOST_INCLUDE_DIR
"
${
BOOST_DOWNLOAD_DIR
}
"
CACHE PATH
"boost include directory."
FORCE
)
set_directory_properties
(
PROPERTIES CLEAN_NO_CUSTOM 1
)
include_directories
(
${
BOOST_INCLUDE_DIR
}
)
include_directories
(
${
BOOST_INCLUDE_DIR
}
)
if
(
NOT WIN32
)
ExternalProject_Add
(
ExternalProject_Add
(
${
BOOST_PROJECT
}
${
BOOST_PROJECT
}
${
EXTERNAL_PROJECT_LOG_ARGS
}
${
EXTERNAL_PROJECT_LOG_ARGS
}
DOWNLOAD_DIR
${
BOOST_DOWNLOAD_DIR
}
DOWNLOAD_DIR
${
BOOST_DOWNLOAD_DIR
}
DOWNLOAD_COMMAND wget --no-check-certificate
${
BOOST_URL
}
-c -q -O
${
BOOST_TAR
}
.tar.gz
URL
${
BOOST_URL
}
&& tar zxf
${
BOOST_TAR
}
.tar.gz
DOWNLOAD_NO_PROGRESS 1
DOWNLOAD_NO_PROGRESS 1
PREFIX
${
BOOST_SOURCES_DIR
}
PREFIX
${
BOOST_SOURCES_DIR
}
CONFIGURE_COMMAND
""
CONFIGURE_COMMAND
""
BUILD_COMMAND
""
BUILD_COMMAND
""
INSTALL_COMMAND
""
INSTALL_COMMAND
""
UPDATE_COMMAND
""
UPDATE_COMMAND
""
)
)
endif
(
NOT WIN32
)
if
(
${
CMAKE_VERSION
}
VERSION_LESS
"3.3.0"
OR NOT WIN32
)
if
(
${
CMAKE_VERSION
}
VERSION_LESS
"3.3.0"
OR NOT WIN32
)
set
(
dummyfile
${
CMAKE_CURRENT_BINARY_DIR
}
/boost_dummy.c
)
set
(
dummyfile
${
CMAKE_CURRENT_BINARY_DIR
}
/boost_dummy.c
)
...
...
cmake/external/gflags.cmake
浏览文件 @
5ef123c7
...
@@ -35,7 +35,12 @@ ExternalProject_Add(
...
@@ -35,7 +35,12 @@ ExternalProject_Add(
CMAKE_ARGS -DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
CMAKE_ARGS -DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
-DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
-DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
-DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
-DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
-DCMAKE_CXX_FLAGS_RELEASE=
${
CMAKE_CXX_FLAGS_RELEASE
}
-DCMAKE_CXX_FLAGS_DEBUG=
${
CMAKE_CXX_FLAGS_DEBUG
}
-DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
-DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
-DCMAKE_C_FLAGS_DEBUG=
${
CMAKE_C_FLAGS_DEBUG
}
-DCMAKE_C_FLAGS_RELEASE=
${
CMAKE_C_FLAGS_RELEASE
}
-DBUILD_STATIC_LIBS=ON
-DCMAKE_INSTALL_PREFIX=
${
GFLAGS_INSTALL_DIR
}
-DCMAKE_INSTALL_PREFIX=
${
GFLAGS_INSTALL_DIR
}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DBUILD_TESTING=OFF
-DBUILD_TESTING=OFF
...
@@ -48,8 +53,8 @@ ExternalProject_Add(
...
@@ -48,8 +53,8 @@ ExternalProject_Add(
IF
(
WIN32
)
IF
(
WIN32
)
IF
(
NOT EXISTS
"
${
GFLAGS_INSTALL_DIR
}
/lib/libgflags.lib"
)
IF
(
NOT EXISTS
"
${
GFLAGS_INSTALL_DIR
}
/lib/libgflags.lib"
)
add_custom_command
(
TARGET extern_gflags POST_BUILD
add_custom_command
(
TARGET extern_gflags POST_BUILD
COMMAND cmake -E rename
${
GFLAGS_INSTALL_DIR
}
/lib/gflags_static.lib
${
GFLAGS_INSTALL_DIR
}
/lib/libgflags.lib
COMMAND cmake -E copy
${
GFLAGS_INSTALL_DIR
}
/lib/gflags_static.lib
${
GFLAGS_INSTALL_DIR
}
/lib/libgflags.lib
)
)
ENDIF
()
ENDIF
()
ENDIF
(
WIN32
)
ENDIF
(
WIN32
)
ADD_LIBRARY
(
gflags STATIC IMPORTED GLOBAL
)
ADD_LIBRARY
(
gflags STATIC IMPORTED GLOBAL
)
...
...
cmake/external/glog.cmake
浏览文件 @
5ef123c7
...
@@ -46,7 +46,11 @@ ExternalProject_Add(
...
@@ -46,7 +46,11 @@ ExternalProject_Add(
CMAKE_ARGS -DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
CMAKE_ARGS -DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
-DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
-DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
-DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
-DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
-DCMAKE_CXX_FLAGS_RELEASE=
${
CMAKE_CXX_FLAGS_RELEASE
}
-DCMAKE_CXX_FLAGS_DEBUG=
${
CMAKE_CXX_FLAGS_DEBUG
}
-DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
-DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
-DCMAKE_C_FLAGS_DEBUG=
${
CMAKE_C_FLAGS_DEBUG
}
-DCMAKE_C_FLAGS_RELEASE=
${
CMAKE_C_FLAGS_RELEASE
}
-DCMAKE_INSTALL_PREFIX=
${
GLOG_INSTALL_DIR
}
-DCMAKE_INSTALL_PREFIX=
${
GLOG_INSTALL_DIR
}
-DCMAKE_INSTALL_LIBDIR=
${
GLOG_INSTALL_DIR
}
/lib
-DCMAKE_INSTALL_LIBDIR=
${
GLOG_INSTALL_DIR
}
/lib
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
...
@@ -63,7 +67,7 @@ ExternalProject_Add(
...
@@ -63,7 +67,7 @@ ExternalProject_Add(
IF
(
WIN32
)
IF
(
WIN32
)
IF
(
NOT EXISTS
"
${
GLOG_INSTALL_DIR
}
/lib/libglog.lib"
)
IF
(
NOT EXISTS
"
${
GLOG_INSTALL_DIR
}
/lib/libglog.lib"
)
add_custom_command
(
TARGET extern_glog POST_BUILD
add_custom_command
(
TARGET extern_glog POST_BUILD
COMMAND cmake -E
rename
${
GLOG_INSTALL_DIR
}
/lib/glog.lib
${
GLOG_INSTALL_DIR
}
/lib/libglog.lib
COMMAND cmake -E
copy
${
GLOG_INSTALL_DIR
}
/lib/glog.lib
${
GLOG_INSTALL_DIR
}
/lib/libglog.lib
)
)
ENDIF
()
ENDIF
()
ENDIF
(
WIN32
)
ENDIF
(
WIN32
)
...
...
cmake/external/openblas.cmake
浏览文件 @
5ef123c7
...
@@ -17,12 +17,8 @@ IF(USE_EIGEN_FOR_BLAS)
...
@@ -17,12 +17,8 @@ IF(USE_EIGEN_FOR_BLAS)
ENDIF
(
USE_EIGEN_FOR_BLAS
)
ENDIF
(
USE_EIGEN_FOR_BLAS
)
INCLUDE
(
cblas
)
INCLUDE
(
cblas
)
# IF(WIN32 AND NOT ${CBLAS_FOUND})
IF
(
NOT
${
CBLAS_FOUND
}
)
IF
(
NOT
${
CBLAS_FOUND
}
)
INCLUDE
(
ExternalProject
)
INCLUDE
(
ExternalProject
)
SET
(
CBLAS_SOURCES_DIR
${
THIRD_PARTY_PATH
}
/openblas
)
SET
(
CBLAS_SOURCES_DIR
${
THIRD_PARTY_PATH
}
/openblas
)
...
@@ -34,6 +30,7 @@ IF(NOT ${CBLAS_FOUND})
...
@@ -34,6 +30,7 @@ IF(NOT ${CBLAS_FOUND})
CACHE FILEPATH
"openblas library."
FORCE
)
CACHE FILEPATH
"openblas library."
FORCE
)
ADD_DEFINITIONS
(
-DPADDLE_USE_OPENBLAS
)
ADD_DEFINITIONS
(
-DPADDLE_USE_OPENBLAS
)
IF
(
WIN32
)
IF
(
WIN32
)
SET
(
CBLAS_FOUND true
)
SET
(
CBLAS_FOUND true
)
MESSAGE
(
WARNING,
"In windows, openblas only support msvc build, please build it manually and put it at "
${
CBLAS_INSTALL_DIR
}
)
MESSAGE
(
WARNING,
"In windows, openblas only support msvc build, please build it manually and put it at "
${
CBLAS_INSTALL_DIR
}
)
...
...
cmake/external/protobuf.cmake
浏览文件 @
5ef123c7
...
@@ -140,7 +140,6 @@ endmacro()
...
@@ -140,7 +140,6 @@ endmacro()
set
(
PROTOBUF_ROOT
""
CACHE PATH
"Folder contains protobuf"
)
set
(
PROTOBUF_ROOT
""
CACHE PATH
"Folder contains protobuf"
)
IF
(
WIN32
)
IF
(
WIN32
)
SET
(
PROTOBUF_ROOT
${
THIRD_PARTY_PATH
}
/install/protobuf
)
SET
(
PROTOBUF_ROOT
${
THIRD_PARTY_PATH
}
/install/protobuf
)
MESSAGE
(
WARNING,
"In windows, protobuf only support msvc build, please build it manually and put it at "
${
PROTOBUF_ROOT
}
)
ENDIF
(
WIN32
)
ENDIF
(
WIN32
)
if
(
NOT
"
${
PROTOBUF_ROOT
}
"
STREQUAL
""
)
if
(
NOT
"
${
PROTOBUF_ROOT
}
"
STREQUAL
""
)
...
@@ -188,13 +187,20 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
...
@@ -188,13 +187,20 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
SET
(
OPTIONAL_ARGS
SET
(
OPTIONAL_ARGS
"-DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
"
"-DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
"
"-DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
"
"-DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
"
"-DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
"
"-DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
"
"-DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
"
"-DCMAKE_C_FLAGS_DEBUG=
${
CMAKE_C_FLAGS_DEBUG
}
"
"-DCMAKE_C_FLAGS_RELEASE=
${
CMAKE_C_FLAGS_RELEASE
}
"
"-DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
"
"-DCMAKE_CXX_FLAGS_RELEASE=
${
CMAKE_CXX_FLAGS_RELEASE
}
"
"-DCMAKE_CXX_FLAGS_DEBUG=
${
CMAKE_CXX_FLAGS_DEBUG
}
"
"-Dprotobuf_WITH_ZLIB=ON"
"-Dprotobuf_WITH_ZLIB=ON"
"-DZLIB_ROOT:FILEPATH=
${
ZLIB_ROOT
}
"
"-DZLIB_ROOT:FILEPATH=
${
ZLIB_ROOT
}
"
${
EXTERNAL_OPTIONAL_ARGS
}
)
${
EXTERNAL_OPTIONAL_ARGS
}
)
SET
(
OPTIONAL_CACHE_ARGS
"-DZLIB_ROOT:STRING=
${
ZLIB_ROOT
}
"
)
SET
(
OPTIONAL_CACHE_ARGS
"-DZLIB_ROOT:STRING=
${
ZLIB_ROOT
}
"
)
ENDIF
()
ENDIF
()
IF
(
WIN32
)
SET
(
OPTIONAL_ARGS
${
OPTIONAL_ARGS
}
"-DCMAKE_GENERATOR_PLATFORM=x64"
)
ENDIF
()
SET
(
PROTOBUF_REPO
"https://github.com/google/protobuf.git"
)
SET
(
PROTOBUF_REPO
"https://github.com/google/protobuf.git"
)
SET
(
PROTOBUF_TAG
"9f75c5aa851cd877fb0d93ccc31b8567a6706546"
)
SET
(
PROTOBUF_TAG
"9f75c5aa851cd877fb0d93ccc31b8567a6706546"
)
...
...
cmake/external/python.cmake
浏览文件 @
5ef123c7
...
@@ -21,6 +21,48 @@ INCLUDE(python_module)
...
@@ -21,6 +21,48 @@ INCLUDE(python_module)
FIND_PACKAGE
(
PythonInterp
${
PY_VERSION
}
)
FIND_PACKAGE
(
PythonInterp
${
PY_VERSION
}
)
FIND_PACKAGE
(
PythonLibs
${
PY_VERSION
}
)
FIND_PACKAGE
(
PythonLibs
${
PY_VERSION
}
)
if
(
WIN32
)
execute_process
(
COMMAND
"
${
PYTHON_EXECUTABLE
}
"
"-c"
"from distutils import sysconfig as s;import sys;import struct;
print(sys.prefix);
print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION'));
"
RESULT_VARIABLE _PYTHON_SUCCESS
OUTPUT_VARIABLE _PYTHON_VALUES
ERROR_VARIABLE _PYTHON_ERROR_VALUE
)
if
(
NOT _PYTHON_SUCCESS MATCHES 0
)
set
(
PYTHONLIBS_FOUND FALSE
)
return
()
endif
()
# Convert the process output into a list
string
(
REGEX REPLACE
";"
"
\\\\
;"
_PYTHON_VALUES
${
_PYTHON_VALUES
}
)
string
(
REGEX REPLACE
"
\n
"
";"
_PYTHON_VALUES
${
_PYTHON_VALUES
}
)
list
(
GET _PYTHON_VALUES 0 PYTHON_PREFIX
)
list
(
GET _PYTHON_VALUES 1 PYTHON_LIBRARY_SUFFIX
)
# Make sure all directory separators are '/'
string
(
REGEX REPLACE
"
\\\\
"
"/"
PYTHON_PREFIX
${
PYTHON_PREFIX
}
)
set
(
PYTHON_LIBRARY
"
${
PYTHON_PREFIX
}
/libs/Python
${
PYTHON_LIBRARY_SUFFIX
}
.lib"
)
# when run in a venv, PYTHON_PREFIX points to it. But the libraries remain in the
# original python installation. They may be found relative to PYTHON_INCLUDE_DIR.
if
(
NOT EXISTS
"
${
PYTHON_LIBRARY
}
"
)
get_filename_component
(
_PYTHON_ROOT
${
PYTHON_INCLUDE_DIR
}
DIRECTORY
)
set
(
PYTHON_LIBRARY
"
${
_PYTHON_ROOT
}
/libs/Python
${
PYTHON_LIBRARY_SUFFIX
}
.lib"
)
endif
()
# raise an error if the python libs are still not found.
if
(
NOT EXISTS
"
${
PYTHON_LIBRARY
}
"
)
message
(
FATAL_ERROR
"Python libraries not found"
)
endif
()
SET
(
PYTHON_LIBRARIES
"
${
PYTHON_LIBRARY
}
"
)
endif
(
WIN32
)
# Fixme: Maybe find a static library. Get SHARED/STATIC by FIND_PACKAGE.
# Fixme: Maybe find a static library. Get SHARED/STATIC by FIND_PACKAGE.
ADD_LIBRARY
(
python SHARED IMPORTED GLOBAL
)
ADD_LIBRARY
(
python SHARED IMPORTED GLOBAL
)
SET_PROPERTY
(
TARGET python PROPERTY IMPORTED_LOCATION
${
PYTHON_LIBRARIES
}
)
SET_PROPERTY
(
TARGET python PROPERTY IMPORTED_LOCATION
${
PYTHON_LIBRARIES
}
)
...
...
cmake/external/xxhash.cmake
浏览文件 @
5ef123c7
...
@@ -14,23 +14,52 @@ ELSE()
...
@@ -14,23 +14,52 @@ ELSE()
ENDIF
(
APPLE
)
ENDIF
(
APPLE
)
ENDIF
()
ENDIF
()
ExternalProject_Add
(
if
(
WIN32
)
extern_xxhash
ExternalProject_Add
(
${
EXTERNAL_PROJECT_LOG_ARGS
}
extern_xxhash
GIT_REPOSITORY
"https://github.com/Cyan4973/xxHash"
${
EXTERNAL_PROJECT_LOG_ARGS
}
GIT_TAG
"v0.6.5"
GIT_REPOSITORY
"https://github.com/Cyan4973/xxHash"
PREFIX
${
XXHASH_SOURCE_DIR
}
GIT_TAG
"v0.6.5"
DOWNLOAD_NAME
"xxhash"
PREFIX
${
XXHASH_SOURCE_DIR
}
UPDATE_COMMAND
""
DOWNLOAD_NAME
"xxhash"
CONFIGURE_COMMAND
""
UPDATE_COMMAND
""
BUILD_IN_SOURCE 1
BUILD_IN_SOURCE 1
PATCH_COMMAND
PATCH_COMMAND
BUILD_COMMAND
${
BUILD_CMD
}
CONFIGURE_COMMAND
INSTALL_COMMAND export PREFIX=
${
XXHASH_INSTALL_DIR
}
/ && make install
${
CMAKE_COMMAND
}
${
XXHASH_SOURCE_DIR
}
/src/extern_xxhash/cmake_unofficial
TEST_COMMAND
""
-DCMAKE_INSTALL_PREFIX:PATH=
${
XXHASH_INSTALL_DIR
}
)
-DCMAKE_BUILD_TYPE:STRING=
${
CMAKE_BUILD_TYPE
}
-DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DBUILD_XXHSUM=OFF
-DCMAKE_GENERATOR_PLATFORM=x64
-DBUILD_SHARED_LIBS=OFF
${
OPTIONAL_CACHE_ARGS
}
TEST_COMMAND
""
)
else
()
ExternalProject_Add
(
extern_xxhash
${
EXTERNAL_PROJECT_LOG_ARGS
}
GIT_REPOSITORY
"https://github.com/Cyan4973/xxHash"
GIT_TAG
"v0.6.5"
PREFIX
${
XXHASH_SOURCE_DIR
}
DOWNLOAD_NAME
"xxhash"
UPDATE_COMMAND
""
CONFIGURE_COMMAND
""
BUILD_IN_SOURCE 1
PATCH_COMMAND
BUILD_COMMAND
${
BUILD_CMD
}
INSTALL_COMMAND export PREFIX=
${
XXHASH_INSTALL_DIR
}
/ && make install
TEST_COMMAND
""
)
endif
()
set
(
XXHASH_LIBRARIES
"
${
XXHASH_INSTALL_DIR
}
/lib/libxxhash.a"
)
if
(
WIN32
)
set
(
XXHASH_LIBRARIES
"
${
XXHASH_INSTALL_DIR
}
/lib/xxhash.lib"
)
else
()
set
(
XXHASH_LIBRARIES
"
${
XXHASH_INSTALL_DIR
}
/lib/libxxhash.a"
)
endif
()
INCLUDE_DIRECTORIES
(
${
XXHASH_INCLUDE_DIR
}
)
INCLUDE_DIRECTORIES
(
${
XXHASH_INCLUDE_DIR
}
)
add_library
(
xxhash STATIC IMPORTED GLOBAL
)
add_library
(
xxhash STATIC IMPORTED GLOBAL
)
...
...
cmake/generic.cmake
浏览文件 @
5ef123c7
...
@@ -266,7 +266,11 @@ function(cc_library TARGET_NAME)
...
@@ -266,7 +266,11 @@ function(cc_library TARGET_NAME)
if
(
"
${
cc_library_DEPS
}
;"
MATCHES
"python;"
)
if
(
"
${
cc_library_DEPS
}
;"
MATCHES
"python;"
)
list
(
REMOVE_ITEM cc_library_DEPS python
)
list
(
REMOVE_ITEM cc_library_DEPS python
)
add_dependencies
(
${
TARGET_NAME
}
python
)
add_dependencies
(
${
TARGET_NAME
}
python
)
target_link_libraries
(
${
TARGET_NAME
}
"-Wl,-undefined,dynamic_lookup"
)
if
(
WIN32
)
target_link_libraries
(
${
TARGET_NAME
}
${
PYTHON_LIBRARIES
}
)
else
()
target_link_libraries
(
${
TARGET_NAME
}
"-Wl,-undefined,dynamic_lookup"
)
endif
(
WIN32
)
endif
()
endif
()
target_link_libraries
(
${
TARGET_NAME
}
${
cc_library_DEPS
}
)
target_link_libraries
(
${
TARGET_NAME
}
${
cc_library_DEPS
}
)
add_dependencies
(
${
TARGET_NAME
}
${
cc_library_DEPS
}
)
add_dependencies
(
${
TARGET_NAME
}
${
cc_library_DEPS
}
)
...
@@ -288,6 +292,45 @@ function(cc_library TARGET_NAME)
...
@@ -288,6 +292,45 @@ function(cc_library TARGET_NAME)
endif
(
cc_library_SRCS
)
endif
(
cc_library_SRCS
)
endfunction
(
cc_library
)
endfunction
(
cc_library
)
# The link operation under windows may exceeds the maximum characters limit, simply break the link command
# into multiple link opeartion can fix that, say
# original:
# lib /out:target.lib a.lib b.lib c.lib d.lib
# after:
# 1. lib /out:dummy_lib_1.lib a.lib b.lib
# 2. lib /out:dummy_lib_2.lib c.lib d.lib
# 1. lib /out:target.lib dummy_lib_1.lib dummy_lib_2.lib
function
(
sep_library TARGET_NAME
)
set
(
options STATIC static SHARED shared
)
set
(
oneValueArgs
""
)
set
(
multiValueArgs SRCS DEPS
)
cmake_parse_arguments
(
sep_library
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
set
(
dummy_index 1
)
set
(
dummy_offset 1
)
# the dummy target would be consisted of limit size libraries
set
(
dummy_limit 50
)
list
(
LENGTH sep_library_DEPS sep_all_len
)
foreach
(
v
${
sep_library_DEPS
}
)
list
(
APPEND dummy_list
${
v
}
)
list
(
LENGTH dummy_list listlen
)
if
((
${
listlen
}
GREATER
${
dummy_limit
}
)
OR
(
${
dummy_offset
}
EQUAL
${
sep_all_len
}
))
message
(
"create dummy library
${
TARGET_NAME
}
_dummy_lib_
${
dummy_index
}
for
${
TARGET_NAME
}
"
)
cc_library
(
${
TARGET_NAME
}
_dummy_lib_
${
dummy_index
}
STATIC DEPS
${
dummy_list
}
)
foreach
(
i
${
dummy_list
}
)
list
(
REMOVE_AT dummy_list 0
)
endforeach
()
list
(
APPEND
${
TARGET_NAME
}
_dummy_list
${
TARGET_NAME
}
_dummy_lib_
${
dummy_index
}
)
MATH
(
EXPR dummy_index
"
${
dummy_index
}
+1"
)
endif
()
MATH
(
EXPR dummy_offset
"
${
dummy_offset
}
+1"
)
endforeach
()
if
(
${
sep_library_SHARED
}
)
cc_library
(
${
TARGET_NAME
}
SHARED SRCS
${
sep_library_SRCS
}
DEPS
${${
TARGET_NAME
}
_dummy_list
}
)
else
(
${
sep_library_SHARED
}
)
cc_library
(
${
TARGET_NAME
}
STATIC SRCS
${
sep_library_SRCS
}
DEPS
${${
TARGET_NAME
}
_dummy_list
}
)
endif
(
${
sep_library_SHARED
}
)
endfunction
(
sep_library
)
function
(
cc_binary TARGET_NAME
)
function
(
cc_binary TARGET_NAME
)
set
(
options
""
)
set
(
options
""
)
set
(
oneValueArgs
""
)
set
(
oneValueArgs
""
)
...
...
cmake/inference_lib.cmake
浏览文件 @
5ef123c7
...
@@ -22,144 +22,165 @@ function(copy TARGET)
...
@@ -22,144 +22,165 @@ function(copy TARGET)
list
(
LENGTH copy_lib_SRCS copy_lib_SRCS_len
)
list
(
LENGTH copy_lib_SRCS copy_lib_SRCS_len
)
list
(
LENGTH copy_lib_DSTS copy_lib_DSTS_len
)
list
(
LENGTH copy_lib_DSTS copy_lib_DSTS_len
)
if
(
NOT
${
copy_lib_SRCS_len
}
EQUAL
${
copy_lib_DSTS_len
}
)
if
(
NOT
${
copy_lib_SRCS_len
}
EQUAL
${
copy_lib_DSTS_len
}
)
message
(
FATAL_ERROR
"
${
TARGET
}
source numbers are not equal to destination numbers"
)
message
(
FATAL_ERROR
"
${
TARGET
}
source numbers are not equal to destination numbers"
)
endif
()
endif
()
math
(
EXPR len
"
${
copy_lib_SRCS_len
}
- 1"
)
math
(
EXPR len
"
${
copy_lib_SRCS_len
}
- 1"
)
add_custom_target
(
${
TARGET
}
DEPENDS
${
copy_lib_DEPS
}
)
add_custom_target
(
${
TARGET
}
DEPENDS
${
copy_lib_DEPS
}
)
foreach
(
index RANGE
${
len
}
)
foreach
(
index RANGE
${
len
}
)
list
(
GET copy_lib_SRCS
${
index
}
src
)
list
(
GET copy_lib_SRCS
${
index
}
src
)
list
(
GET copy_lib_DSTS
${
index
}
dst
)
list
(
GET copy_lib_DSTS
${
index
}
dst
)
add_custom_command
(
TARGET
${
TARGET
}
PRE_BUILD
if
(
WIN32
)
COMMAND mkdir -p
"
${
dst
}
"
# windows cmd shell will not expand wildcard automatically.
COMMAND cp -r
"
${
src
}
"
"
${
dst
}
"
# below expand the files,libs and copy them by rules.
COMMENT
"copying
${
src
}
->
${
dst
}
"
)
file
(
GLOB header_files
${
src
}
"*.h"
)
endforeach
()
file
(
GLOB static_lib_files
${
src
}
"*.lib"
)
file
(
GLOB dll_lib_files
${
src
}
"*.dll"
)
set
(
src_files
${
header_files
}
${
static_lib_files
}
${
dll_lib_files
}
)
if
(
NOT
"
${
src_files
}
"
STREQUAL
""
)
list
(
REMOVE_DUPLICATES src_files
)
endif
()
add_custom_command
(
TARGET
${
TARGET
}
PRE_BUILD
COMMAND
${
CMAKE_COMMAND
}
-E make_directory
"
${
dst
}
"
)
foreach
(
src_file
${
src_files
}
)
add_custom_command
(
TARGET
${
TARGET
}
PRE_BUILD
COMMAND
${
CMAKE_COMMAND
}
-E copy
"
${
src_file
}
"
"
${
dst
}
"
COMMENT
"copying
${
src_file
}
->
${
dst
}
"
)
endforeach
()
else
(
WIN32
)
# not windows
add_custom_command
(
TARGET
${
TARGET
}
PRE_BUILD
COMMAND mkdir -p
"
${
dst
}
"
COMMAND cp -r
"
${
src
}
"
"
${
dst
}
"
COMMENT
"copying
${
src
}
->
${
dst
}
"
)
endif
(
WIN32
)
# not windows
endforeach
()
endfunction
()
endfunction
()
# third party
# third party
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/eigen3"
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/eigen3"
)
copy
(
eigen3_lib
copy
(
eigen3_lib
SRCS
${
EIGEN_INCLUDE_DIR
}
/Eigen/Core
${
EIGEN_INCLUDE_DIR
}
/Eigen/src
${
EIGEN_INCLUDE_DIR
}
/unsupported/Eigen
SRCS
${
EIGEN_INCLUDE_DIR
}
/Eigen/Core
${
EIGEN_INCLUDE_DIR
}
/Eigen/src
${
EIGEN_INCLUDE_DIR
}
/unsupported/Eigen
DSTS
${
dst_dir
}
/Eigen
${
dst_dir
}
/Eigen
${
dst_dir
}
/unsupported
DSTS
${
dst_dir
}
/Eigen
${
dst_dir
}
/Eigen
${
dst_dir
}
/unsupported
DEPS eigen3
DEPS eigen3
)
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/gflags"
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/gflags"
)
copy
(
gflags_lib
copy
(
gflags_lib
SRCS
${
GFLAGS_INCLUDE_DIR
}
${
GFLAGS_LIBRARIES
}
SRCS
${
GFLAGS_INCLUDE_DIR
}
${
GFLAGS_LIBRARIES
}
DSTS
${
dst_dir
}
${
dst_dir
}
/lib
DSTS
${
dst_dir
}
${
dst_dir
}
/lib
DEPS gflags
DEPS gflags
)
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/glog"
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/glog"
)
copy
(
glog_lib
copy
(
glog_lib
SRCS
${
GLOG_INCLUDE_DIR
}
${
GLOG_LIBRARIES
}
SRCS
${
GLOG_INCLUDE_DIR
}
${
GLOG_LIBRARIES
}
DSTS
${
dst_dir
}
${
dst_dir
}
/lib
DSTS
${
dst_dir
}
${
dst_dir
}
/lib
DEPS glog
DEPS glog
)
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/boost/"
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/boost/"
)
copy
(
boost_lib
copy
(
boost_lib
SRCS
${
BOOST_INCLUDE_DIR
}
/boost
SRCS
${
BOOST_INCLUDE_DIR
}
/boost
DSTS
${
dst_dir
}
DSTS
${
dst_dir
}
DEPS boost
DEPS boost
)
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/xxhash"
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/xxhash"
)
copy
(
xxhash_lib
copy
(
xxhash_lib
SRCS
${
XXHASH_INCLUDE_DIR
}
${
XXHASH_LIBRARIES
}
SRCS
${
XXHASH_INCLUDE_DIR
}
${
XXHASH_LIBRARIES
}
DSTS
${
dst_dir
}
${
dst_dir
}
/lib
DSTS
${
dst_dir
}
${
dst_dir
}
/lib
DEPS xxhash
DEPS xxhash
)
)
if
(
NOT PROTOBUF_FOUND
)
if
(
NOT PROTOBUF_FOUND
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/protobuf"
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/protobuf"
)
copy
(
protobuf_lib
copy
(
protobuf_lib
SRCS
${
PROTOBUF_INCLUDE_DIR
}
${
PROTOBUF_LIBRARY
}
SRCS
${
PROTOBUF_INCLUDE_DIR
}
${
PROTOBUF_LIBRARY
}
DSTS
${
dst_dir
}
${
dst_dir
}
/lib
DSTS
${
dst_dir
}
${
dst_dir
}
/lib
DEPS extern_protobuf
DEPS extern_protobuf
)
)
endif
()
endif
()
if
(
NOT CBLAS_FOUND
)
if
(
NOT CBLAS_FOUND
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/openblas"
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/openblas"
)
copy
(
openblas_lib
copy
(
openblas_lib
SRCS
${
CBLAS_INSTALL_DIR
}
/lib
${
CBLAS_INSTALL_DIR
}
/include
SRCS
${
CBLAS_INSTALL_DIR
}
/lib
${
CBLAS_INSTALL_DIR
}
/include
DSTS
${
dst_dir
}
${
dst_dir
}
DSTS
${
dst_dir
}
${
dst_dir
}
DEPS extern_openblas
DEPS extern_openblas
)
)
elseif
(
WITH_MKLML
)
elseif
(
WITH_MKLML
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/mklml"
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/mklml"
)
copy
(
mklml_lib
copy
(
mklml_lib
SRCS
${
MKLML_LIB
}
${
MKLML_IOMP_LIB
}
${
MKLML_INC_DIR
}
SRCS
${
MKLML_LIB
}
${
MKLML_IOMP_LIB
}
${
MKLML_INC_DIR
}
DSTS
${
dst_dir
}
/lib
${
dst_dir
}
/lib
${
dst_dir
}
DSTS
${
dst_dir
}
/lib
${
dst_dir
}
/lib
${
dst_dir
}
DEPS mklml
DEPS mklml
)
)
endif
()
endif
()
if
(
WITH_MKLDNN
)
if
(
WITH_MKLDNN
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/mkldnn"
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/mkldnn"
)
copy
(
mkldnn_lib
copy
(
mkldnn_lib
SRCS
${
MKLDNN_INC_DIR
}
${
MKLDNN_SHARED_LIB
}
SRCS
${
MKLDNN_INC_DIR
}
${
MKLDNN_SHARED_LIB
}
DSTS
${
dst_dir
}
${
dst_dir
}
/lib
DSTS
${
dst_dir
}
${
dst_dir
}
/lib
DEPS mkldnn
DEPS mkldnn
)
)
endif
()
endif
()
if
(
NOT WIN32
)
if
(
NOT WIN32
)
if
(
NOT MOBILE_INFERENCE AND NOT RPI
)
if
(
NOT MOBILE_INFERENCE AND NOT RPI
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/snappy"
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/snappy"
)
copy
(
snappy_lib
copy
(
snappy_lib
SRCS
${
SNAPPY_INCLUDE_DIR
}
${
SNAPPY_LIBRARIES
}
SRCS
${
SNAPPY_INCLUDE_DIR
}
${
SNAPPY_LIBRARIES
}
DSTS
${
dst_dir
}
${
dst_dir
}
/lib
DSTS
${
dst_dir
}
${
dst_dir
}
/lib
DEPS snappy
)
DEPS snappy
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/snappystream"
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/snappystream"
)
copy
(
snappystream_lib
copy
(
snappystream_lib
SRCS
${
SNAPPYSTREAM_INCLUDE_DIR
}
${
SNAPPYSTREAM_LIBRARIES
}
SRCS
${
SNAPPYSTREAM_INCLUDE_DIR
}
${
SNAPPYSTREAM_LIBRARIES
}
DSTS
${
dst_dir
}
${
dst_dir
}
/lib
DSTS
${
dst_dir
}
${
dst_dir
}
/lib
DEPS snappystream
)
DEPS snappystream
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/zlib"
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/zlib"
)
copy
(
zlib_lib
copy
(
zlib_lib
SRCS
${
ZLIB_INCLUDE_DIR
}
${
ZLIB_LIBRARIES
}
SRCS
${
ZLIB_INCLUDE_DIR
}
${
ZLIB_LIBRARIES
}
DSTS
${
dst_dir
}
${
dst_dir
}
/lib
DSTS
${
dst_dir
}
${
dst_dir
}
/lib
DEPS zlib
)
DEPS zlib
)
endif
()
endif
()
endif
(
NOT WIN32
)
endif
(
NOT WIN32
)
# paddle fluid module
# paddle fluid module
set
(
src_dir
"
${
PADDLE_SOURCE_DIR
}
/paddle/fluid"
)
set
(
src_dir
"
${
PADDLE_SOURCE_DIR
}
/paddle/fluid"
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/paddle/fluid"
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/paddle/fluid"
)
set
(
module
"framework"
)
set
(
module
"framework"
)
if
(
NOT WIN32
)
if
(
NOT WIN32
)
set
(
framework_lib_deps framework_py_proto
)
set
(
framework_lib_deps framework_py_proto
)
endif
(
NOT WIN32
)
endif
(
NOT WIN32
)
copy
(
framework_lib DEPS
${
framework_lib_deps
}
copy
(
framework_lib DEPS
${
framework_lib_deps
}
SRCS
${
src_dir
}
/
${
module
}
/*.h
${
src_dir
}
/
${
module
}
/details/*.h
${
PADDLE_BINARY_DIR
}
/paddle/fluid/framework/framework.pb.h
SRCS
${
src_dir
}
/
${
module
}
/*.h
${
src_dir
}
/
${
module
}
/details/*.h
${
PADDLE_BINARY_DIR
}
/paddle/fluid/framework/framework.pb.h
${
src_dir
}
/
${
module
}
/ir/*.h
${
src_dir
}
/
${
module
}
/ir/*.h
DSTS
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
/details
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
/ir
DSTS
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
/details
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
/ir
)
)
set
(
module
"memory"
)
set
(
module
"memory"
)
copy
(
memory_lib
copy
(
memory_lib
SRCS
${
src_dir
}
/
${
module
}
/*.h
${
src_dir
}
/
${
module
}
/detail/*.h
SRCS
${
src_dir
}
/
${
module
}
/*.h
${
src_dir
}
/
${
module
}
/detail/*.h
DSTS
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
/detail
DSTS
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
/detail
)
)
set
(
inference_deps paddle_fluid_shared paddle_fluid
)
set
(
inference_deps paddle_fluid_shared paddle_fluid
)
set
(
module
"inference/api"
)
set
(
module
"inference/api"
)
if
(
WITH_ANAKIN AND WITH_MKL
)
if
(
WITH_ANAKIN AND WITH_MKL
)
copy
(
anakin_inference_lib DEPS paddle_inference_api inference_anakin_api
copy
(
anakin_inference_lib DEPS paddle_inference_api inference_anakin_api
SRCS
SRCS
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/api/libinference_anakin_api*
# compiled anakin api
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/api/libinference_anakin_api*
# compiled anakin api
${
ANAKIN_INSTALL_DIR
}
# anakin release
${
ANAKIN_INSTALL_DIR
}
# anakin release
DSTS
${
FLUID_INSTALL_DIR
}
/third_party/install/anakin
${
FLUID_INSTALL_DIR
}
/third_party/install/anakin
)
DSTS
${
FLUID_INSTALL_DIR
}
/third_party/install/anakin
${
FLUID_INSTALL_DIR
}
/third_party/install/anakin
)
list
(
APPEND inference_deps anakin_inference_lib
)
list
(
APPEND inference_deps anakin_inference_lib
)
endif
()
endif
()
set
(
module
"inference"
)
set
(
module
"inference"
)
copy
(
inference_lib DEPS
${
inference_deps
}
copy
(
inference_lib DEPS
${
inference_deps
}
...
@@ -167,30 +188,30 @@ copy(inference_lib DEPS ${inference_deps}
...
@@ -167,30 +188,30 @@ copy(inference_lib DEPS ${inference_deps}
${
src_dir
}
/
${
module
}
/api/paddle_*.h
${
src_dir
}
/
${
module
}
/api/paddle_*.h
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/api/paddle_inference_pass.h
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/api/paddle_inference_pass.h
DSTS
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
DSTS
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
)
)
set
(
module
"platform"
)
set
(
module
"platform"
)
copy
(
platform_lib DEPS profiler_py_proto
copy
(
platform_lib DEPS profiler_py_proto
SRCS
${
src_dir
}
/
${
module
}
/*.h
${
src_dir
}
/
${
module
}
/dynload/*.h
${
src_dir
}
/
${
module
}
/details/*.h
SRCS
${
src_dir
}
/
${
module
}
/*.h
${
src_dir
}
/
${
module
}
/dynload/*.h
${
src_dir
}
/
${
module
}
/details/*.h
DSTS
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
/dynload
${
dst_dir
}
/
${
module
}
/details
DSTS
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
/dynload
${
dst_dir
}
/
${
module
}
/details
)
)
set
(
module
"string"
)
set
(
module
"string"
)
copy
(
string_lib
copy
(
string_lib
SRCS
${
src_dir
}
/
${
module
}
/*.h
${
src_dir
}
/
${
module
}
/tinyformat/*.h
SRCS
${
src_dir
}
/
${
module
}
/*.h
${
src_dir
}
/
${
module
}
/tinyformat/*.h
DSTS
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
/tinyformat
DSTS
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
/tinyformat
)
)
set
(
module
"pybind"
)
set
(
module
"pybind"
)
copy
(
pybind_lib
copy
(
pybind_lib
SRCS
${
CMAKE_CURRENT_BINARY_DIR
}
/paddle/fluid/
${
module
}
/pybind.h
SRCS
${
CMAKE_CURRENT_BINARY_DIR
}
/paddle/fluid/
${
module
}
/pybind.h
DSTS
${
dst_dir
}
/
${
module
}
DSTS
${
dst_dir
}
/
${
module
}
)
)
# CMakeCache Info
# CMakeCache Info
copy
(
cmake_cache
copy
(
cmake_cache
SRCS
${
CMAKE_CURRENT_BINARY_DIR
}
/CMakeCache.txt
SRCS
${
CMAKE_CURRENT_BINARY_DIR
}
/CMakeCache.txt
DSTS
${
FLUID_INSTALL_DIR
}
)
DSTS
${
FLUID_INSTALL_DIR
}
)
# This command generates a complete fluid library for both train and inference
# This command generates a complete fluid library for both train and inference
add_custom_target
(
fluid_lib_dist DEPENDS
${
fluid_lib_dist_dep
}
)
add_custom_target
(
fluid_lib_dist DEPENDS
${
fluid_lib_dist_dep
}
)
...
@@ -198,9 +219,9 @@ add_custom_target(fluid_lib_dist DEPENDS ${fluid_lib_dist_dep})
...
@@ -198,9 +219,9 @@ add_custom_target(fluid_lib_dist DEPENDS ${fluid_lib_dist_dep})
# Following commands generate a inference-only fluid library
# Following commands generate a inference-only fluid library
# third_party, version.txt and CMakeCache.txt are the same position with ${FLUID_INSTALL_DIR}
# third_party, version.txt and CMakeCache.txt are the same position with ${FLUID_INSTALL_DIR}
copy
(
third_party DEPS fluid_lib_dist
copy
(
third_party DEPS fluid_lib_dist
SRCS
${
FLUID_INSTALL_DIR
}
/third_party
${
FLUID_INSTALL_DIR
}
/CMakeCache.txt
SRCS
${
FLUID_INSTALL_DIR
}
/third_party
${
FLUID_INSTALL_DIR
}
/CMakeCache.txt
DSTS
${
FLUID_INFERENCE_INSTALL_DIR
}
${
FLUID_INFERENCE_INSTALL_DIR
}
DSTS
${
FLUID_INFERENCE_INSTALL_DIR
}
${
FLUID_INFERENCE_INSTALL_DIR
}
)
)
# only need libpaddle_fluid.so/a and paddle_*.h for inference-only library
# only need libpaddle_fluid.so/a and paddle_*.h for inference-only library
copy
(
inference_api_lib DEPS fluid_lib_dist
copy
(
inference_api_lib DEPS fluid_lib_dist
...
@@ -213,20 +234,20 @@ add_custom_target(inference_lib_dist DEPENDS third_party inference_api_lib)
...
@@ -213,20 +234,20 @@ add_custom_target(inference_lib_dist DEPENDS third_party inference_api_lib)
# paddle fluid version
# paddle fluid version
function
(
version version_file
)
function
(
version version_file
)
execute_process
(
execute_process
(
COMMAND
${
GIT_EXECUTABLE
}
log --pretty=format:%H -1
COMMAND
${
GIT_EXECUTABLE
}
log --pretty=format:%H -1
WORKING_DIRECTORY
${
PADDLE_SOURCE_DIR
}
WORKING_DIRECTORY
${
PADDLE_SOURCE_DIR
}
OUTPUT_VARIABLE PADDLE_GIT_COMMIT
)
OUTPUT_VARIABLE PADDLE_GIT_COMMIT
)
file
(
WRITE
${
version_file
}
file
(
WRITE
${
version_file
}
"GIT COMMIT ID:
${
PADDLE_GIT_COMMIT
}
\n
"
"GIT COMMIT ID:
${
PADDLE_GIT_COMMIT
}
\n
"
"WITH_MKL:
${
WITH_MKL
}
\n
"
"WITH_MKL:
${
WITH_MKL
}
\n
"
"WITH_MKLDNN:
${
WITH_MKLDNN
}
\n
"
"WITH_MKLDNN:
${
WITH_MKLDNN
}
\n
"
"WITH_GPU:
${
WITH_GPU
}
\n
"
)
"WITH_GPU:
${
WITH_GPU
}
\n
"
)
if
(
WITH_GPU
)
if
(
WITH_GPU
)
file
(
APPEND
${
version_file
}
file
(
APPEND
${
version_file
}
"CUDA version:
${
CUDA_VERSION
}
\n
"
"CUDA version:
${
CUDA_VERSION
}
\n
"
"CUDNN version: v
${
CUDNN_MAJOR_VERSION
}
\n
"
)
"CUDNN version: v
${
CUDNN_MAJOR_VERSION
}
\n
"
)
endif
()
endif
()
endfunction
()
endfunction
()
version
(
${
FLUID_INSTALL_DIR
}
/version.txt
)
version
(
${
FLUID_INSTALL_DIR
}
/version.txt
)
version
(
${
FLUID_INFERENCE_INSTALL_DIR
}
/version.txt
)
version
(
${
FLUID_INFERENCE_INSTALL_DIR
}
/version.txt
)
doc/v2/dev/contribute_to_paddle_en.md
浏览文件 @
5ef123c7
../../../CONTRIBUTING.md
../../../CONTRIBUTING.md
\ No newline at end of file
paddle/fluid/CMakeLists.txt
浏览文件 @
5ef123c7
...
@@ -4,11 +4,12 @@ add_subdirectory(framework)
...
@@ -4,11 +4,12 @@ add_subdirectory(framework)
add_subdirectory
(
operators
)
add_subdirectory
(
operators
)
add_subdirectory
(
string
)
add_subdirectory
(
string
)
if
(
NOT WIN32
)
add_subdirectory
(
pybind
)
add_subdirectory
(
pybind
)
if
(
NOT WIN32
)
add_subdirectory
(
recordio
)
add_subdirectory
(
recordio
)
endif
(
NOT WIN32
)
endif
(
NOT WIN32
)
# NOTE: please add subdirectory inference at last.
# NOTE: please add subdirectory inference at last.
add_subdirectory
(
inference
)
add_subdirectory
(
inference
)
add_subdirectory
(
train
)
add_subdirectory
(
train
)
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
5ef123c7
...
@@ -136,20 +136,32 @@ cc_library(version SRCS version.cc)
...
@@ -136,20 +136,32 @@ cc_library(version SRCS version.cc)
cc_test
(
version_test SRCS version_test.cc DEPS version
)
cc_test
(
version_test SRCS version_test.cc DEPS version
)
cc_library
(
proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog version
)
cc_library
(
proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog version
)
cc_library
(
ngraph_bridge SRCS ngraph_bridge.cc DEPS operator framework_proto
)
if
(
NOT WIN32
)
cc_library
(
ngraph_operator SRCS ngraph_operator.cc DEPS ngraph_bridge operator op_info device_context tensor scope glog
shape_inference data_transform lod_tensor profiler
)
endif
(
NOT WIN32
)
cc_library
(
op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc
)
cc_library
(
op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc
)
nv_test
(
op_registry_test SRCS op_registry_test.cc DEPS op_registry
)
nv_test
(
op_registry_test SRCS op_registry_test.cc DEPS op_registry
)
if
(
NOT WIN32
)
py_proto_compile
(
framework_py_proto SRCS framework.proto
)
py_proto_compile
(
framework_py_proto SRCS framework.proto
)
# Generate an empty __init__.py to make framework_py_proto as a valid python module.
# Generate an empty __init__.py to make framework_py_proto as a valid python module.
add_custom_target
(
framework_py_proto_init ALL COMMAND
${
CMAKE_COMMAND
}
-E touch __init__.py
)
add_custom_target
(
framework_py_proto_init ALL COMMAND
${
CMAKE_COMMAND
}
-E touch __init__.py
)
add_dependencies
(
framework_py_proto framework_py_proto_init
)
add_dependencies
(
framework_py_proto framework_py_proto_init
)
add_custom_command
(
TARGET framework_py_proto POST_BUILD
if
(
NOT WIN32
)
COMMAND
${
CMAKE_COMMAND
}
-E make_directory
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/proto
add_custom_command
(
TARGET framework_py_proto POST_BUILD
COMMAND cp *.py
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/proto/
COMMAND
${
CMAKE_COMMAND
}
-E make_directory
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/proto
COMMENT
"Copy generated python proto into directory paddle/fluid/proto."
COMMAND cp *.py
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/proto/
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
COMMENT
"Copy generated python proto into directory paddle/fluid/proto."
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
else
(
NOT WIN32
)
string
(
REPLACE
"/"
"
\\
"
proto_dstpath
"
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/proto/"
)
add_custom_command
(
TARGET framework_py_proto POST_BUILD
COMMAND
${
CMAKE_COMMAND
}
-E make_directory
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/proto
COMMAND copy /Y *.py
${
proto_dstpath
}
COMMENT
"Copy generated python proto into directory paddle/fluid/proto."
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
endif
(
NOT WIN32
)
endif
(
NOT WIN32
)
cc_library
(
lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor
)
cc_library
(
lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor
)
...
@@ -163,10 +175,14 @@ if(WITH_DISTRIBUTE)
...
@@ -163,10 +175,14 @@ if(WITH_DISTRIBUTE)
set
(
DISTRIBUTE_COMPILE_FLAGS
"-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor"
)
set
(
DISTRIBUTE_COMPILE_FLAGS
"-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor"
)
set_source_files_properties
(
executor.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
executor.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
else
()
else
()
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass
)
if
(
NOT WIN32
)
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass ngraph_operator
)
else
(
NOT WIN32
)
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass
)
endif
(
NOT WIN32
)
cc_test
(
test_naive_executor SRCS naive_executor_test.cc DEPS naive_executor elementwise_add_op
)
cc_test
(
test_naive_executor SRCS naive_executor_test.cc DEPS naive_executor elementwise_add_op
)
endif
()
endif
()
if
(
NOT WIN32
)
if
(
NOT WIN32
)
cc_library
(
parallel_executor SRCS parallel_executor.cc DEPS
cc_library
(
parallel_executor SRCS parallel_executor.cc DEPS
threaded_ssa_graph_executor scope_buffered_ssa_graph_executor
threaded_ssa_graph_executor scope_buffered_ssa_graph_executor
...
...
paddle/fluid/framework/details/build_strategy.cc
浏览文件 @
5ef123c7
...
@@ -79,9 +79,15 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
...
@@ -79,9 +79,15 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
BuildStrategy
strategy_
;
BuildStrategy
strategy_
;
};
};
std
::
shared_ptr
<
ir
::
PassBuilder
>
BuildStrategy
::
CreatePassesFromStrategy
()
std
::
shared_ptr
<
ir
::
PassBuilder
>
BuildStrategy
::
CreatePassesFromStrategy
(
const
{
bool
finalize_strategy
)
const
{
if
(
is_finalized_
)
{
return
pass_builder_
;
}
pass_builder_
.
reset
(
new
ParallelExecutorPassBuilder
(
*
this
));
pass_builder_
.
reset
(
new
ParallelExecutorPassBuilder
(
*
this
));
if
(
finalize_strategy
)
{
is_finalized_
=
true
;
}
return
pass_builder_
;
return
pass_builder_
;
}
}
...
@@ -95,10 +101,8 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
...
@@ -95,10 +101,8 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
#else
#else
const
bool
use_cuda
)
const
{
const
bool
use_cuda
)
const
{
#endif
#endif
// Create a default one if not initialized by user.
// Create a default one if not finalized by user.
if
(
!
pass_builder_
)
{
CreatePassesFromStrategy
(
false
);
CreatePassesFromStrategy
();
}
std
::
unique_ptr
<
ir
::
Graph
>
graph
(
new
ir
::
Graph
(
main_program
));
std
::
unique_ptr
<
ir
::
Graph
>
graph
(
new
ir
::
Graph
(
main_program
));
...
...
paddle/fluid/framework/details/build_strategy.h
浏览文件 @
5ef123c7
...
@@ -75,12 +75,20 @@ struct BuildStrategy {
...
@@ -75,12 +75,20 @@ struct BuildStrategy {
bool
remove_unnecessary_lock_
{
false
};
bool
remove_unnecessary_lock_
{
false
};
// NOTE:
// Before you add new options, think if it's a general strategy that works
// with other strategy. If not, the strategy should be created through
// CreatePassesFromStrategy and the pass can be managed separately.
// User normally doesn't need to call this API.
// User normally doesn't need to call this API.
// The PassBuilder allows for more customized insert, remove of passes
// The PassBuilder allows for more customized insert, remove of passes
// from python side.
// from python side.
// A new PassBuilder is created based on configs defined above and
// A new PassBuilder is created based on configs defined above and
// passes are owned by the PassBuilder.
// passes are owned by the PassBuilder.
std
::
shared_ptr
<
ir
::
PassBuilder
>
CreatePassesFromStrategy
()
const
;
std
::
shared_ptr
<
ir
::
PassBuilder
>
CreatePassesFromStrategy
(
bool
finalize_strategy
)
const
;
bool
IsFinalized
()
const
{
return
is_finalized_
;
}
// Apply the passes built by the pass_builder_. The passes will be
// Apply the passes built by the pass_builder_. The passes will be
// applied to the Program and output an ir::Graph.
// applied to the Program and output an ir::Graph.
...
@@ -97,6 +105,7 @@ struct BuildStrategy {
...
@@ -97,6 +105,7 @@ struct BuildStrategy {
#endif
#endif
private:
private:
mutable
bool
is_finalized_
=
false
;
mutable
std
::
shared_ptr
<
ir
::
PassBuilder
>
pass_builder_
;
mutable
std
::
shared_ptr
<
ir
::
PassBuilder
>
pass_builder_
;
};
};
...
...
paddle/fluid/framework/executor.cc
浏览文件 @
5ef123c7
...
@@ -17,6 +17,7 @@ limitations under the License. */
...
@@ -17,6 +17,7 @@ limitations under the License. */
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/ngraph_operator.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/operators/detail/macros.h"
#include "paddle/fluid/operators/detail/macros.h"
...
@@ -25,6 +26,7 @@ limitations under the License. */
...
@@ -25,6 +26,7 @@ limitations under the License. */
DECLARE_bool
(
benchmark
);
DECLARE_bool
(
benchmark
);
DEFINE_bool
(
use_mkldnn
,
false
,
"Use MKLDNN to run"
);
DEFINE_bool
(
use_mkldnn
,
false
,
"Use MKLDNN to run"
);
DEFINE_bool
(
use_ngraph
,
false
,
"Use NGRAPH to run"
);
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
@@ -81,6 +83,24 @@ static void DeleteUnusedTensors(const Scope& scope, const OperatorBase* op,
...
@@ -81,6 +83,24 @@ static void DeleteUnusedTensors(const Scope& scope, const OperatorBase* op,
}
}
}
}
static
void
EnableFusedOp
(
ExecutorPrepareContext
*
ctx
)
{
#ifdef PADDLE_WITH_NGRAPH
VLOG
(
3
)
<<
"use_ngraph=True"
;
auto
intervals
=
FusedOperator
::
FusedOpIntervals
(
&
ctx
->
ops_
);
for
(
auto
&
interval
:
intervals
)
{
auto
*
fused_op
=
new
FusedOperator
(
ctx
->
prog_
,
ctx
->
block_id_
,
interval
.
at
(
0
),
interval
.
at
(
1
));
*
interval
[
0
]
=
std
::
unique_ptr
<
OperatorBase
>
(
fused_op
);
}
for
(
auto
it
=
intervals
.
rbegin
();
it
!=
intervals
.
rend
();
++
it
)
{
ctx
->
ops_
.
erase
(
it
->
at
(
0
)
+
1
,
it
->
at
(
1
));
}
#else
LOG
(
WARNING
)
<<
"'NGRAPH' is not supported, Please re-compile with WITH_NGRAPH option"
;
#endif
}
Executor
::
Executor
(
const
platform
::
Place
&
place
)
:
place_
(
place
)
{}
Executor
::
Executor
(
const
platform
::
Place
&
place
)
:
place_
(
place
)
{}
void
Executor
::
Close
()
{
void
Executor
::
Close
()
{
...
@@ -338,6 +358,7 @@ std::unique_ptr<ExecutorPrepareContext> Executor::Prepare(
...
@@ -338,6 +358,7 @@ std::unique_ptr<ExecutorPrepareContext> Executor::Prepare(
for
(
auto
&
op_desc
:
block
.
AllOps
())
{
for
(
auto
&
op_desc
:
block
.
AllOps
())
{
ctx
->
ops_
.
push_back
(
OpRegistry
::
CreateOp
(
*
op_desc
));
ctx
->
ops_
.
push_back
(
OpRegistry
::
CreateOp
(
*
op_desc
));
}
}
if
(
FLAGS_use_ngraph
)
EnableFusedOp
(
ctx
.
get
());
return
ctx
;
return
ctx
;
}
}
...
@@ -486,6 +507,5 @@ void Executor::EnableMKLDNN(const ProgramDesc& program) {
...
@@ -486,6 +507,5 @@ void Executor::EnableMKLDNN(const ProgramDesc& program) {
<<
"'MKLDNN' is not supported, Please re-compile with WITH_MKLDNN option"
;
<<
"'MKLDNN' is not supported, Please re-compile with WITH_MKLDNN option"
;
#endif
#endif
}
}
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/fluid/framework/garbage_collector.h
浏览文件 @
5ef123c7
...
@@ -29,7 +29,7 @@ template <typename T>
...
@@ -29,7 +29,7 @@ template <typename T>
class
GarbageCollector
{
class
GarbageCollector
{
public:
public:
GarbageCollector
(
const
platform
::
Place
&
place
,
size_t
max_memory_size
)
GarbageCollector
(
const
platform
::
Place
&
place
,
size_t
max_memory_size
)
:
max_memory_size_
(
std
::
max
(
max_memory_size
,
static_cast
<
size_t
>
(
1
)))
{
:
max_memory_size_
(
(
std
::
max
)
(
max_memory_size
,
static_cast
<
size_t
>
(
1
)))
{
garbages_
.
reset
(
new
std
::
deque
<
T
*>
());
garbages_
.
reset
(
new
std
::
deque
<
T
*>
());
dev_ctx_
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
);
dev_ctx_
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
);
}
}
...
...
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
浏览文件 @
5ef123c7
...
@@ -211,12 +211,12 @@ void PrepareLSTMWeight(const LoDTensor& W_forget_w0,
...
@@ -211,12 +211,12 @@ void PrepareLSTMWeight(const LoDTensor& W_forget_w0,
VLOG
(
30
)
<<
"LSTMWeight resized to "
<<
out
->
dims
();
VLOG
(
30
)
<<
"LSTMWeight resized to "
<<
out
->
dims
();
float
*
out_data
=
out
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
float
*
out_data
=
out
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
std
::
array
<
const
float
*
,
4
>
tensors
(
std
::
array
<
const
float
*
,
4
>
tensors
{
{{
W_forget_w0
.
data
<
float
>
(),
W_input_w0
.
data
<
float
>
(),
W_forget_w0
.
data
<
float
>
(),
W_input_w0
.
data
<
float
>
(),
W_output_w0
.
data
<
float
>
(),
W_cell_w0
.
data
<
float
>
()}})
;
W_output_w0
.
data
<
float
>
(),
W_cell_w0
.
data
<
float
>
()}
;
std
::
array
<
const
float
*
,
4
>
tensors1
(
std
::
array
<
const
float
*
,
4
>
tensors1
{
{{
W_forget_w1
.
data
<
float
>
(),
W_input_w1
.
data
<
float
>
(),
W_forget_w1
.
data
<
float
>
(),
W_input_w1
.
data
<
float
>
(),
W_output_w1
.
data
<
float
>
(),
W_cell_w1
.
data
<
float
>
()}})
;
W_output_w1
.
data
<
float
>
(),
W_cell_w1
.
data
<
float
>
()}
;
for
(
int
row
=
0
;
row
<
D
;
row
++
)
{
for
(
int
row
=
0
;
row
<
D
;
row
++
)
{
for
(
int
col
=
0
;
col
<
4
;
col
++
)
{
for
(
int
col
=
0
;
col
<
4
;
col
++
)
{
...
@@ -238,9 +238,9 @@ void PrepareLSTMWeight(const LoDTensor& W_forget_w0,
...
@@ -238,9 +238,9 @@ void PrepareLSTMWeight(const LoDTensor& W_forget_w0,
void
PrepareLSTMBias
(
const
LoDTensor
&
B_forget
,
const
LoDTensor
&
B_input
,
void
PrepareLSTMBias
(
const
LoDTensor
&
B_forget
,
const
LoDTensor
&
B_input
,
const
LoDTensor
&
B_output
,
const
LoDTensor
&
B_cell
,
const
LoDTensor
&
B_output
,
const
LoDTensor
&
B_cell
,
LoDTensor
*
out
)
{
LoDTensor
*
out
)
{
std
::
array
<
const
float
*
,
4
>
tensors
(
std
::
array
<
const
float
*
,
4
>
tensors
{
{{
B_forget
.
data
<
float
>
(),
B_input
.
data
<
float
>
(),
B_output
.
data
<
float
>
(),
B_forget
.
data
<
float
>
(),
B_input
.
data
<
float
>
(),
B_output
.
data
<
float
>
(),
B_cell
.
data
<
float
>
()}})
;
B_cell
.
data
<
float
>
()}
;
PADDLE_ENFORCE_EQ
(
B_forget
.
dims
().
size
(),
1
);
PADDLE_ENFORCE_EQ
(
B_forget
.
dims
().
size
(),
1
);
int
D
=
B_forget
.
dims
()[
0
];
int
D
=
B_forget
.
dims
()[
0
];
...
...
paddle/fluid/framework/ir/node.cc
浏览文件 @
5ef123c7
...
@@ -17,7 +17,12 @@ limitations under the License. */
...
@@ -17,7 +17,12 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
namespace
ir
{
namespace
ir
{
// msvc15 don't support constexpr in correct way.
#if !defined(_WIN32)
constexpr
char
Node
::
kControlDepVarName
[];
constexpr
char
Node
::
kControlDepVarName
[];
#else
const
char
Node
::
kControlDepVarName
[]
=
"__control_var"
;
#endif
std
::
unique_ptr
<
Node
>
CreateNodeForTest
(
const
std
::
string
&
name
,
std
::
unique_ptr
<
Node
>
CreateNodeForTest
(
const
std
::
string
&
name
,
Node
::
Type
type
)
{
Node
::
Type
type
)
{
...
...
paddle/fluid/framework/ir/node.h
浏览文件 @
5ef123c7
...
@@ -55,7 +55,11 @@ class Node {
...
@@ -55,7 +55,11 @@ class Node {
}
}
enum
class
Type
{
kOperation
,
kVariable
};
enum
class
Type
{
kOperation
,
kVariable
};
#if !defined(_WIN32) // msvc not support constexpr correctly.
static
constexpr
char
kControlDepVarName
[]
=
"__control_var"
;
static
constexpr
char
kControlDepVarName
[]
=
"__control_var"
;
#else
static
const
char
kControlDepVarName
[];
#endif
Type
NodeType
()
const
{
return
type_
;
}
Type
NodeType
()
const
{
return
type_
;
}
...
...
paddle/fluid/framework/ir/pass.h
浏览文件 @
5ef123c7
...
@@ -197,26 +197,26 @@ struct PassRegistrar : public Registrar {
...
@@ -197,26 +197,26 @@ struct PassRegistrar : public Registrar {
msg)
msg)
// Register a new pass that can be applied on the IR.
// Register a new pass that can be applied on the IR.
#define REGISTER_PASS(pass_type, pass_class)
\
#define REGISTER_PASS(pass_type, pass_class) \
STATIC_ASSERT_PASS_GLOBAL_NAMESPACE(
\
STATIC_ASSERT_PASS_GLOBAL_NAMESPACE( \
__reg_pass__##pass_type,
\
__reg_pass__##pass_type, \
"REGISTER_PASS must be called in global namespace");
\
"REGISTER_PASS must be called in global namespace"); \
static ::paddle::framework::ir::PassRegistrar<pass_class>
\
static ::paddle::framework::ir::PassRegistrar<pass_class> \
__pass_registrar_##pass_type##__(#pass_type);
\
__pass_registrar_##pass_type##__(#pass_type); \
int TouchPassRegistrar_##pass_type() {
\
int TouchPassRegistrar_##pass_type() { \
__pass_registrar_##pass_type##__.Touch();
\
__pass_registrar_##pass_type##__.Touch(); \
return 0;
\
return 0; \
}
\
} \
static ::paddle::framework::ir::PassRegistrar<pass_class>
\
static ::paddle::framework::ir::PassRegistrar<pass_class> \
&__pass_tmp_registrar_##pass_type##__
__attribute__((unused)) =
\
&__pass_tmp_registrar_##pass_type##__
UNUSED =
\
__pass_registrar_##pass_type##__
__pass_registrar_##pass_type##__
#define USE_PASS(pass_type)
\
#define USE_PASS(pass_type) \
STATIC_ASSERT_PASS_GLOBAL_NAMESPACE(
\
STATIC_ASSERT_PASS_GLOBAL_NAMESPACE( \
__use_pass_itself_##pass_type,
\
__use_pass_itself_##pass_type, \
"USE_PASS must be called in global namespace");
\
"USE_PASS must be called in global namespace"); \
extern int TouchPassRegistrar_##pass_type();
\
extern int TouchPassRegistrar_##pass_type(); \
static int use_pass_itself_##pass_type##_
__attribute__((unused)) =
\
static int use_pass_itself_##pass_type##_
UNUSED =
\
TouchPassRegistrar_##pass_type()
TouchPassRegistrar_##pass_type()
}
// namespace ir
}
// namespace ir
...
...
paddle/fluid/framework/ngraph_bridge.cc
0 → 100644
浏览文件 @
5ef123c7
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_NGRAPH
#include <algorithm>
#include <functional>
#include "paddle/fluid/framework/ngraph_bridge.h"
#include "ngraph/ngraph.hpp"
namespace
paddle
{
namespace
framework
{
std
::
map
<
std
::
string
,
std
::
function
<
void
(
const
std
::
shared_ptr
<
OperatorBase
>&
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
)
>>
NgraphBridge
::
NG_NODE_MAP
=
{};
void
NgraphBridge
::
build_graph
(
const
std
::
shared_ptr
<
OperatorBase
>&
op
)
{
auto
&
op_type
=
op
->
Type
();
NG_NODE_MAP
[
op_type
](
op
,
ngb_node_map
);
}
}
// namespace framework
}
// namespace paddle
#endif
paddle/fluid/framework/ngraph_bridge.h
0 → 100644
浏览文件 @
5ef123c7
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_NGRAPH
#include <algorithm>
#include <map>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/enforce.h"
#include "ngraph/ngraph.hpp"
namespace
paddle
{
namespace
framework
{
class
NgraphBridge
{
public:
static
std
::
map
<
std
::
string
,
std
::
function
<
void
(
const
std
::
shared_ptr
<
OperatorBase
>&
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
)
>>
NG_NODE_MAP
;
explicit
NgraphBridge
(
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
var_node_map
)
:
ngb_node_map
(
var_node_map
)
{}
void
build_graph
(
const
std
::
shared_ptr
<
OperatorBase
>&
op
);
private:
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
;
};
}
// namespace framework
}
// namespace paddle
#endif
paddle/fluid/framework/ngraph_operator.cc
0 → 100644
浏览文件 @
5ef123c7
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_NGRAPH
#include <glog/logging.h>
#include <algorithm>
#include <map>
#include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/ngraph_operator.h"
#include "paddle/fluid/framework/shape_inference.h"
#include "paddle/fluid/framework/var_desc.h"
#include "paddle/fluid/framework/var_type.h"
namespace
paddle
{
namespace
framework
{
static
std
::
map
<
proto
::
VarType
::
Type
,
ngraph
::
element
::
Type
>
pd2ng_type_map
=
{
{
proto
::
VarType
::
FP32
,
ngraph
::
element
::
f32
},
{
proto
::
VarType
::
FP64
,
ngraph
::
element
::
f64
},
{
proto
::
VarType
::
INT32
,
ngraph
::
element
::
i32
},
{
proto
::
VarType
::
INT64
,
ngraph
::
element
::
i64
},
{
proto
::
VarType
::
BOOL
,
ngraph
::
element
::
boolean
},
};
typedef
enum
{
/* nGraph support state on ops */
FULL_TRAIN
,
/* Support full ops for train */
PARTIAL_TRAIN
,
/* Support partial ops for train */
FULL_TEST
,
/* Support full list of ops for test */
PARTIAL_TEST
/* Support partial list of ops for test */
}
op_state
;
class
NgraphOperator
{
public:
explicit
NgraphOperator
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
,
const
std
::
vector
<
std
::
shared_ptr
<
OperatorBase
>>&
ops
,
const
std
::
unordered_map
<
std
::
string
,
ngraph
::
element
::
Type
>&
var_type_map
,
const
std
::
unordered_set
<
std
::
string
>&
persist
,
const
std
::
unordered_set
<
std
::
string
>&
fetches
,
const
std
::
unordered_set
<
std
::
string
>&
post_op_inputs
,
op_state
ng_op_state
)
:
scope_
(
scope
),
place_
(
place
),
fused_ops_
(
ops
),
var_type_map_
(
var_type_map
),
persistables_
(
persist
),
fetches_
(
fetches
),
post_op_inputs_
(
post_op_inputs
),
ng_op_state_
(
ng_op_state
)
{}
void
Run
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
;
private:
static
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Function
>>
func_cache
;
const
Scope
&
scope_
;
const
platform
::
Place
&
place_
;
std
::
vector
<
std
::
shared_ptr
<
OperatorBase
>>
fused_ops_
;
std
::
unordered_map
<
std
::
string
,
ngraph
::
element
::
Type
>
var_type_map_
;
std
::
unordered_set
<
std
::
string
>
persistables_
;
std
::
unordered_set
<
std
::
string
>
fetches_
;
std
::
unordered_set
<
std
::
string
>
post_op_inputs_
;
op_state
ng_op_state_
;
};
std
::
vector
<
std
::
vector
<
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>::
iterator
>>
FusedOperator
::
FusedOpIntervals
(
std
::
vector
<
std
::
unique_ptr
<
paddle
::
framework
::
OperatorBase
>>*
ops
)
{
std
::
vector
<
std
::
vector
<
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>::
iterator
>>
intervals
;
if
(
ops
->
empty
())
{
return
intervals
;
}
size_t
size
=
ops
->
size
();
size_t
left
=
0
;
while
(
left
<
size
&&
ops
.
at
(
left
)
->
Type
()
!=
kFeedOpType
)
{
++
left
;
}
if
(
left
==
size
)
{
return
intervals
;
}
while
(
left
<
size
&&
ops
->
at
(
left
)
->
Type
()
==
kFeedOpType
)
{
++
left
;
}
size_t
right
=
left
;
while
(
right
<
size
&&
ops
->
at
(
right
)
->
Type
()
!=
kFetchOpType
)
{
++
right
;
}
if
(
right
==
size
)
{
return
intervals
;
}
if
(
left
>=
right
)
return
intervals
;
// (left, right - 1) represents indices between feed and fetch
size_t
pivot
=
left
;
while
(
pivot
<
right
)
{
auto
op_type
=
ops
->
at
(
pivot
)
->
Type
();
if
(
paddle
::
framework
::
NgraphBridge
::
NG_NODE_MAP
.
find
(
op_type
)
==
paddle
::
framework
::
NgraphBridge
::
NG_NODE_MAP
.
end
())
{
++
pivot
;
}
else
{
size_t
start
=
pivot
,
end
=
start
;
while
(
pivot
<
right
&&
(
paddle
::
framework
::
NgraphBridge
::
NG_NODE_MAP
.
find
(
ops
.
at
(
pivot
)
->
Type
())
!=
paddle
::
framework
::
NgraphBridge
::
NG_NODE_MAP
.
end
()))
{
++
pivot
;
++
end
;
}
std
::
vector
<
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>::
iterator
>
interval
=
{
ops
->
begin
()
+
start
,
ops
->
begin
()
+
end
};
intervals
.
push_back
(
interval
);
}
}
// end while
return
intervals
;
}
FusedOperator
::
FusedOperator
(
const
ProgramDesc
&
prog
,
size_t
block_id
,
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>::
iterator
start
,
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>::
iterator
end
,
const
std
::
string
&
type
,
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
AttributeMap
&
attrs
)
:
OperatorBase
(
type
,
inputs
,
outputs
,
attrs
),
pdesc
(
prog
),
block
(
block_id
)
{
for
(
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>::
iterator
it
=
start
;
it
!=
end
;
++
it
)
{
fused_ops_
.
push_back
(
std
::
move
(
*
it
));
}
for
(
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>::
iterator
it
=
end
;
(
*
it
)
->
Type
()
!=
kFetchOpType
;
++
it
)
{
for
(
auto
&
var_name_item
:
(
*
it
)
->
Inputs
())
{
for
(
auto
&
var_name
:
var_name_item
.
second
)
{
post_op_inputs_
.
insert
(
var_name
);
}
}
}
if
((
*
(
start
-
1
))
->
Type
()
==
kFeedOpType
&&
(
*
end
)
->
Type
()
==
kFetchOpType
)
{
is_complete
=
true
;
}
Process
();
}
void
FusedOperator
::
Process
()
{
auto
&
bdesc
=
pdesc_
.
Block
(
block_
);
for
(
auto
&
var
:
bdesc
.
AllVars
())
{
if
(
!
(
var
->
GetType
()
==
proto
::
VarType
::
SELECTED_ROWS
||
var
->
GetType
()
==
proto
::
VarType
::
LOD_TENSOR
||
var
->
GetType
()
==
proto
::
VarType
::
LOD_TENSOR_ARRAY
))
{
continue
;
}
auto
var_name
=
var
->
Name
();
if
(
var
->
Name
()
==
framework
::
kEmptyVarName
)
{
continue
;
}
if
(
var_name
!=
"fetch"
&&
var_name
!=
"feed"
)
{
auto
pd_type
=
var
->
GetDataType
();
if
(
pd2ng_type_map
.
find
(
pd_type
)
==
pd2ng_type_map
.
end
())
{
PADDLE_THROW
(
"Data type of var %s not found in pd2ng_type_map"
,
var_name
);
}
var_type_map_
[
var_name
]
=
pd2ng_type_map
[
pd_type
];
}
if
(
var
->
Persistable
())
{
persistables_
.
insert
(
var
->
Name
());
}
}
for
(
auto
*
op
:
bdesc
.
AllOps
())
{
if
(
op
->
Type
()
==
kFetchOpType
)
{
std
::
string
fetch_target_name
=
op
->
Input
(
"X"
)[
0
];
fetches_
.
insert
(
fetch_target_name
);
}
}
}
void
FusedOperator
::
RunImpl
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
{
op_state
ng_op_state
=
PARTIAL_TEST
;
auto
&
bdesc
=
pdesc_
.
Block
(
block_
);
for
(
auto
*
op
:
bdesc
.
AllOps
())
{
if
(
op
->
Type
().
find
(
"_grad"
)
!=
std
::
string
::
npos
)
{
ng_op_state
=
PARTIAL_TRAIN
;
break
;
}
}
if
(
is_full
)
{
ng_op_state
=
ng_op_state
==
PARTIAL_TEST
?
FULL_TEST
:
FULL_TRAIN
;
}
NgraphOperator
ngraph_op
(
scope
,
place
,
fused_ops_
,
var_type_map_
,
persistables_
,
fetches_
,
post_op_inputs_
,
ng_op_state
);
ngraph_op
.
Run
(
scope
,
place
);
}
}
// namespace framework
}
// namespace paddle
#endif
paddle/fluid/framework/ngraph_operator.h
0 → 100644
浏览文件 @
5ef123c7
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_NGRAPH
#include <algorithm>
#include <atomic>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/ngraph_bridge.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_kernel_type.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/variant.h"
#include "ngraph/ngraph.hpp"
namespace
paddle
{
namespace
framework
{
class
FusedOperator
:
public
OperatorBase
{
public:
static
std
::
vector
<
std
::
vector
<
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>::
iterator
>>
FusedOpIntervals
(
std
::
vector
<
std
::
unique_ptr
<
paddle
::
framework
::
OperatorBase
>>*
ops
);
explicit
FusedOperator
(
const
ProgramDesc
&
prog
,
size_t
block_id
,
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>::
iterator
start
,
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>::
iterator
end
,
const
std
::
string
&
type
=
"fused_op"
,
const
VariableNameMap
&
inputs
=
{},
const
VariableNameMap
&
outputs
=
{},
const
AttributeMap
&
attrs
=
{});
void
RunImpl
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
final
;
private:
const
ProgramDesc
pdesc_
;
size_t
block_
;
std
::
vector
<
std
::
shared_ptr
<
OperatorBase
>>
fused_ops_
;
std
::
unordered_map
<
std
::
string
,
ngraph
::
element
::
Type
>
var_type_map_
;
std
::
unordered_set
<
std
::
string
>
persistables_
;
std
::
unordered_set
<
std
::
string
>
fetches_
;
std
::
unordered_set
<
std
::
string
>
post_op_inputs_
;
bool
is_full_
=
false
;
void
Process
();
};
}
// namespace framework
}
// namespace paddle
#endif
paddle/fluid/framework/operator.cc
浏览文件 @
5ef123c7
...
@@ -150,14 +150,17 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
...
@@ -150,14 +150,17 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
#endif
#endif
}
}
// The profile has a process-wide mutex, results in serious performance issue
// The profile has a process-wide mutex, results in serious performance issue
// in concurrency scenerio. Here use an `if` to fix this issue.
// in concurrency scenerio. Here use an `if` to fix this issue.
// Please not remove the `if`, ask @Superjomn if there are any concern.
// Please not remove the `if`, ask @Superjomn if there are any concern.
#ifndef _WIN32
if
(
platform
::
IsProfileEnabled
())
{
if
(
platform
::
IsProfileEnabled
())
{
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
RecordEvent
record_event
(
Type
(),
pool
.
Get
(
place
));
platform
::
RecordEvent
record_event
(
Type
(),
pool
.
Get
(
place
));
RunImpl
(
scope
,
place
);
RunImpl
(
scope
,
place
);
}
else
{
}
else
#endif
{
RunImpl
(
scope
,
place
);
RunImpl
(
scope
,
place
);
}
}
VLOG
(
30
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
VLOG
(
30
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
...
...
paddle/fluid/inference/CMakeLists.txt
浏览文件 @
5ef123c7
...
@@ -16,9 +16,21 @@ cc_library(paddle_fluid_api
...
@@ -16,9 +16,21 @@ cc_library(paddle_fluid_api
DEPS
${
FLUID_CORE_MODULES
}
${
GLOB_OP_LIB
}
)
DEPS
${
FLUID_CORE_MODULES
}
${
GLOB_OP_LIB
}
)
get_property
(
fluid_modules GLOBAL PROPERTY FLUID_MODULES
)
get_property
(
fluid_modules GLOBAL PROPERTY FLUID_MODULES
)
get_property
(
cuda_modules GLOBAL PROPERTY CUDA_MODULES
)
get_property
(
fluid_third_partys GLOBAL PROPERTY FLUID_THRID_PARTYS
)
if
(
WIN32
)
list
(
APPEND fluid_third_partys gflags glog protobuf cblas
)
endif
(
WIN32
)
# paddle_fluid_origin exclude inference api interface
# paddle_fluid_origin exclude inference api interface
cc_library
(
paddle_fluid_origin DEPS
${
fluid_modules
}
paddle_fluid_api
)
if
(
WIN32
)
sep_library
(
paddle_fluid_origin DEPS
${
fluid_modules
}
paddle_fluid_api
)
if
(
WITH_GPU AND NOT WITH_DSO
)
target_link_libraries
(
paddle_fluid_origin
${
cuda_modules
}
)
endif
(
WITH_GPU AND NOT WITH_DSO
)
else
(
WIN32
)
cc_library
(
paddle_fluid_origin DEPS
${
fluid_modules
}
paddle_fluid_api
)
endif
(
WIN32
)
add_subdirectory
(
api
)
add_subdirectory
(
api
)
...
@@ -28,8 +40,16 @@ set(SHARED_INFERENCE_SRCS
...
@@ -28,8 +40,16 @@ set(SHARED_INFERENCE_SRCS
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/analysis_predictor.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/analysis_predictor.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/details/zero_copy_tensor.cc
)
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/details/zero_copy_tensor.cc
)
# Create static library
if
(
WIN32
)
cc_library
(
paddle_fluid DEPS
${
fluid_modules
}
${
STATIC_INFERENCE_APIS
}
zero_copy_tensor reset_tensor_array analysis_config paddle_pass_builder
)
sep_library
(
paddle_fluid DEPS
${
fluid_modules
}
${
STATIC_INFERENCE_APIS
}
zero_copy_tensor reset_tensor_array
analysis_config paddle_pass_builder
)
if
(
WITH_GPU AND NOT WITH_DSO
)
target_link_libraries
(
paddle_fluid
${
cuda_modules
}
)
endif
(
WITH_GPU AND NOT WITH_DSO
)
else
(
WIN32
)
cc_library
(
paddle_fluid DEPS
${
fluid_modules
}
${
STATIC_INFERENCE_APIS
}
zero_copy_tensor reset_tensor_array
analysis_config paddle_pass_builder
)
endif
(
WIN32
)
if
(
NOT APPLE
)
if
(
NOT APPLE
)
# TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
# TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
...
@@ -38,11 +58,20 @@ if(NOT APPLE)
...
@@ -38,11 +58,20 @@ if(NOT APPLE)
endif
()
endif
()
# Create shared library
# Create shared library
cc_library
(
paddle_fluid_shared SHARED SRCS
${
SHARED_INFERENCE_SRCS
}
if
(
WIN32
)
DEPS
${
fluid_modules
}
paddle_fluid_api reset_tensor_array analysis_config paddle_pass_builder
)
sep_library
(
paddle_fluid_shared SHARED SRCS
${
SHARED_INFERENCE_SRCS
}
DEPS
${
fluid_modules
}
paddle_fluid_api reset_tensor_array analysis_config paddle_pass_builder
)
target_link_libraries
(
paddle_fluid_shared shlwapi
)
if
(
WITH_GPU AND NOT WITH_DSO
)
target_link_libraries
(
paddle_fluid_origin
${
cuda_modules
}
)
endif
(
WITH_GPU AND NOT WITH_DSO
)
else
(
WIN32
)
cc_library
(
paddle_fluid_shared SHARED SRCS
${
SHARED_INFERENCE_SRCS
}
DEPS
${
fluid_modules
}
paddle_fluid_api reset_tensor_array analysis_config paddle_pass_builder
)
endif
()
set_target_properties
(
paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid
)
set_target_properties
(
paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid
)
if
(
NOT APPLE
)
if
(
NOT APPLE
AND NOT WIN32
)
# TODO(liuyiqun): Temporarily disable the link flag because it is not support on Mac.
# TODO(liuyiqun): Temporarily disable the link flag because it is not support on Mac.
set
(
LINK_FLAGS
"-Wl,--version-script
${
CMAKE_CURRENT_SOURCE_DIR
}
/paddle_fluid.map"
)
set
(
LINK_FLAGS
"-Wl,--version-script
${
CMAKE_CURRENT_SOURCE_DIR
}
/paddle_fluid.map"
)
set_target_properties
(
paddle_fluid_shared PROPERTIES LINK_FLAGS
"
${
LINK_FLAGS
}
"
)
set_target_properties
(
paddle_fluid_shared PROPERTIES LINK_FLAGS
"
${
LINK_FLAGS
}
"
)
...
...
paddle/fluid/inference/analysis/helper.h
浏览文件 @
5ef123c7
...
@@ -26,6 +26,7 @@ limitations under the License. */
...
@@ -26,6 +26,7 @@ limitations under the License. */
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/port.h"
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
...
@@ -124,20 +125,6 @@ T &GetFromScope(const framework::Scope &scope, const std::string &name) {
...
@@ -124,20 +125,6 @@ T &GetFromScope(const framework::Scope &scope, const std::string &name) {
return
*
var
->
GetMutable
<
T
>
();
return
*
var
->
GetMutable
<
T
>
();
}
}
static
void
ExecShellCommand
(
const
std
::
string
&
cmd
,
std
::
string
*
message
)
{
char
buffer
[
128
];
std
::
shared_ptr
<
FILE
>
pipe
(
popen
(
cmd
.
c_str
(),
"r"
),
pclose
);
if
(
!
pipe
)
{
LOG
(
ERROR
)
<<
"error running command: "
<<
cmd
;
return
;
}
while
(
!
feof
(
pipe
.
get
()))
{
if
(
fgets
(
buffer
,
128
,
pipe
.
get
())
!=
nullptr
)
{
*
message
+=
buffer
;
}
}
}
static
framework
::
proto
::
ProgramDesc
LoadProgramDesc
(
static
framework
::
proto
::
ProgramDesc
LoadProgramDesc
(
const
std
::
string
&
model_path
)
{
const
std
::
string
&
model_path
)
{
std
::
ifstream
fin
(
model_path
,
std
::
ios
::
in
|
std
::
ios
::
binary
);
std
::
ifstream
fin
(
model_path
,
std
::
ios
::
in
|
std
::
ios
::
binary
);
...
...
paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc
浏览文件 @
5ef123c7
...
@@ -45,7 +45,7 @@ void IrAnalysisComposePass::InitTensorRTAttrs(Argument *argument) {
...
@@ -45,7 +45,7 @@ void IrAnalysisComposePass::InitTensorRTAttrs(Argument *argument) {
std
::
unordered_set
<
std
::
string
>
teller_set
(
std
::
unordered_set
<
std
::
string
>
teller_set
(
{
"mul"
,
"conv2d"
,
"pool2d"
,
"relu"
,
"softmax"
,
"sigmoid"
,
{
"mul"
,
"conv2d"
,
"pool2d"
,
"relu"
,
"softmax"
,
"sigmoid"
,
"depthwise_conv2d"
,
"batch_norm"
,
"concat"
,
"tanh"
,
"pad"
,
"depthwise_conv2d"
,
"batch_norm"
,
"concat"
,
"tanh"
,
"pad"
,
"elementwise_add"
,
"dropout"
});
"elementwise_add"
,
"dropout"
,
"split"
});
if
(
!
node
->
IsOp
())
return
false
;
if
(
!
node
->
IsOp
())
return
false
;
if
(
teller_set
.
count
(
node
->
Op
()
->
Type
()))
{
if
(
teller_set
.
count
(
node
->
Op
()
->
Type
()))
{
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
5ef123c7
...
@@ -548,4 +548,5 @@ USE_TRT_CONVERTER(batch_norm);
...
@@ -548,4 +548,5 @@ USE_TRT_CONVERTER(batch_norm);
USE_TRT_CONVERTER
(
concat
);
USE_TRT_CONVERTER
(
concat
);
USE_TRT_CONVERTER
(
dropout
);
USE_TRT_CONVERTER
(
dropout
);
USE_TRT_CONVERTER
(
pad
);
USE_TRT_CONVERTER
(
pad
);
USE_TRT_CONVERTER
(
split
);
#endif
#endif
paddle/fluid/inference/api/analysis_predictor_tester.cc
浏览文件 @
5ef123c7
...
@@ -15,7 +15,7 @@
...
@@ -15,7 +15,7 @@
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include <glog/logging.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include <thread>
#include <thread>
// NOLINT
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
...
...
paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc
浏览文件 @
5ef123c7
...
@@ -23,7 +23,7 @@ limitations under the License. */
...
@@ -23,7 +23,7 @@ limitations under the License. */
#include <memory>
#include <memory>
#include <thread> //NOLINT
#include <thread> //NOLINT
#include "utils.h"
#include "utils.h"
// NOLINT
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
DEFINE_bool
(
use_gpu
,
false
,
"Whether use gpu."
);
DEFINE_bool
(
use_gpu
,
false
,
"Whether use gpu."
);
...
...
paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
浏览文件 @
5ef123c7
...
@@ -4,7 +4,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
...
@@ -4,7 +4,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
distributed under the License is distributed on an "AS IS" BASIS,
...
...
paddle/fluid/inference/api/helper.h
浏览文件 @
5ef123c7
...
@@ -15,9 +15,14 @@
...
@@ -15,9 +15,14 @@
#pragma once
#pragma once
#include <glog/logging.h>
#include <glog/logging.h>
#if !defined(_WIN32)
#include <sys/time.h>
#include <sys/time.h>
#else
#endif
#include <algorithm>
#include <algorithm>
#include <chrono> // NOLINT
#include <chrono> // NOLINT
#include <iterator>
#include <numeric>
#include <numeric>
#include <sstream>
#include <sstream>
#include <string>
#include <string>
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
5ef123c7
...
@@ -49,6 +49,8 @@ struct AnalysisConfig : public NativeConfig {
...
@@ -49,6 +49,8 @@ struct AnalysisConfig : public NativeConfig {
void
EnableTensorRtEngine
(
int
workspace_size
=
1
<<
20
,
void
EnableTensorRtEngine
(
int
workspace_size
=
1
<<
20
,
int
max_batch_size
=
1
);
int
max_batch_size
=
1
);
bool
use_tensorrt
()
const
{
return
use_tensorrt_
;
}
// NOTE this is just for internal development, please not use it.
// NOTE this is just for internal development, please not use it.
// NOT stable yet.
// NOT stable yet.
void
EnableMKLDNN
();
void
EnableMKLDNN
();
...
...
paddle/fluid/inference/api/paddle_pass_builder.h
浏览文件 @
5ef123c7
...
@@ -91,7 +91,7 @@ class CpuPassStrategy : public PassStrategy {
...
@@ -91,7 +91,7 @@ class CpuPassStrategy : public PassStrategy {
virtual
~
CpuPassStrategy
()
=
default
;
virtual
~
CpuPassStrategy
()
=
default
;
v
irtual
v
oid
EnableMKLDNN
()
override
{
void
EnableMKLDNN
()
override
{
// TODO(Superjomn) Consider the way to mix CPU with GPU.
// TODO(Superjomn) Consider the way to mix CPU with GPU.
#ifdef PADDLE_WITH_MKLDNN
#ifdef PADDLE_WITH_MKLDNN
passes_
.
insert
(
passes_
.
begin
(),
"mkldnn_placement_pass"
);
passes_
.
insert
(
passes_
.
begin
(),
"mkldnn_placement_pass"
);
...
@@ -123,7 +123,7 @@ class GpuPassStrategy : public PassStrategy {
...
@@ -123,7 +123,7 @@ class GpuPassStrategy : public PassStrategy {
GpuPassStrategy
(
const
GpuPassStrategy
&
other
)
GpuPassStrategy
(
const
GpuPassStrategy
&
other
)
:
PassStrategy
(
other
.
AllPasses
())
{}
:
PassStrategy
(
other
.
AllPasses
())
{}
v
irtual
v
oid
EnableMKLDNN
()
override
;
void
EnableMKLDNN
()
override
;
virtual
~
GpuPassStrategy
()
=
default
;
virtual
~
GpuPassStrategy
()
=
default
;
};
};
...
...
paddle/fluid/inference/tensorrt/CMakeLists.txt
浏览文件 @
5ef123c7
nv_library
(
tensorrt_engine SRCS engine.cc DEPS framework_proto device_context
)
nv_library
(
tensorrt_engine SRCS engine.cc DEPS framework_proto device_context
)
nv_test
(
test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader
)
nv_test
(
test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader
)
nv_test
(
test_tensorrt_engine SRCS test_engine.cc DEPS dynload_cuda tensorrt_engine
)
nv_test
(
test_tensorrt_engine SRCS test_engine.cc DEPS dynload_cuda tensorrt_engine
)
add_subdirectory
(
plugin
)
add_subdirectory
(
convert
)
add_subdirectory
(
convert
)
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
浏览文件 @
5ef123c7
# Add TRT tests
# Add TRT tests
nv_library
(
tensorrt_converter
nv_library
(
tensorrt_converter
SRCS mul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc elementwise_op.cc
SRCS mul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc elementwise_op.cc
batch_norm_op.cc activation_op.cc softmax_op.cc concat_op.cc dropout_op.cc pad_op.cc
batch_norm_op.cc activation_op.cc softmax_op.cc concat_op.cc dropout_op.cc
DEPS tensorrt_engine operator scope framework_proto op_registry
)
pad_op.cc split_op.cc
DEPS tensorrt_engine tensorrt_plugin operator scope framework_proto op_registry
)
nv_test
(
test_op_converter SRCS test_op_converter.cc DEPS
nv_test
(
test_op_converter SRCS test_op_converter.cc DEPS
${
FLUID_CORE_MODULES
}
tensorrt_engine tensorrt_converter
)
${
FLUID_CORE_MODULES
}
tensorrt_engine tensorrt_converter
)
...
@@ -28,6 +29,8 @@ nv_test(test_trt_concat_op SRCS test_concat_op.cc concat_op.cc
...
@@ -28,6 +29,8 @@ nv_test(test_trt_concat_op SRCS test_concat_op.cc concat_op.cc
DEPS
${
FLUID_CORE_MODULES
}
tensorrt_engine concat_op SERIAL
)
DEPS
${
FLUID_CORE_MODULES
}
tensorrt_engine concat_op SERIAL
)
nv_test
(
test_trt_dropout_op SRCS test_dropout_op.cc dropout_op.cc
nv_test
(
test_trt_dropout_op SRCS test_dropout_op.cc dropout_op.cc
DEPS
${
FLUID_CORE_MODULES
}
tensorrt_engine dropout_op SERIAL
)
DEPS
${
FLUID_CORE_MODULES
}
tensorrt_engine dropout_op SERIAL
)
nv_test
(
test_trt_pad_op SRCS test_pad_op.cc pad_op.cc
nv_test
(
test_trt_pad_op SRCS test_pad_op.cc pad_op.cc
DEPS
${
FLUID_CORE_MODULES
}
tensorrt_engine pad_op SERIAL
)
DEPS
${
FLUID_CORE_MODULES
}
tensorrt_engine pad_op SERIAL
)
nv_test
(
test_trt_split_op SRCS test_split_op.cc split_op.cc
DEPS
${
FLUID_CORE_MODULES
}
tensorrt_engine tensorrt_plugin
split_op concat_op SERIAL
)
paddle/fluid/inference/tensorrt/convert/concat_op.cc
浏览文件 @
5ef123c7
...
@@ -19,7 +19,7 @@ namespace inference {
...
@@ -19,7 +19,7 @@ namespace inference {
namespace
tensorrt
{
namespace
tensorrt
{
/*
/*
*
MulOp, IMatrixMultiplyLayer in TRT. This Layer doesn't has weights.
*
ConcatOp
*/
*/
class
ConcatOpConverter
:
public
OpConverter
{
class
ConcatOpConverter
:
public
OpConverter
{
public:
public:
...
...
paddle/fluid/inference/tensorrt/convert/split_op.cc
0 → 100644
浏览文件 @
5ef123c7
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h"
namespace
paddle
{
namespace
inference
{
namespace
tensorrt
{
/*
* SplitOp.
*/
class
SplitOpConverter
:
public
OpConverter
{
public:
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
{
VLOG
(
40
)
<<
"convert a fluid split op to tensorrt split layer"
;
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
// Declare inputs
auto
*
input
=
engine_
->
GetITensor
(
op_desc
.
Input
(
"X"
)[
0
]);
auto
input_dims
=
input
->
getDimensions
();
int
input_num
=
op_desc
.
Input
(
"X"
).
size
();
size_t
output_num
=
op_desc
.
Output
(
"Out"
).
size
();
// Get Attrs
PADDLE_ENFORCE
(
input_num
==
1
);
int
axis
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"axis"
));
std
::
vector
<
int
>
output_lengths
=
boost
::
get
<
std
::
vector
<
int
>>
(
op_desc
.
GetAttr
(
"sections"
));
PADDLE_ENFORCE
(
axis
!=
0
);
if
(
axis
<
0
)
{
axis
+=
input_dims
.
nbDims
;
}
else
{
axis
-=
1
;
}
PADDLE_ENFORCE
(
output_lengths
.
size
()
==
output_num
);
//
SplitPlugin
*
plugin
=
new
SplitPlugin
(
axis
,
output_lengths
);
nvinfer1
::
IPluginLayer
*
layer
=
engine_
->
AddPlugin
(
&
input
,
input_num
,
plugin
);
std
::
string
layer_name
=
"split (Output: "
;
for
(
size_t
i
=
0
;
i
<
output_num
;
i
++
)
{
auto
output_name
=
op_desc
.
Output
(
"Out"
)[
i
];
layer
->
getOutput
(
i
)
->
setName
(
output_name
.
c_str
());
engine_
->
SetITensor
(
output_name
,
layer
->
getOutput
(
i
));
layer_name
+=
output_name
;
if
(
test_mode
)
{
engine_
->
DeclareOutput
(
output_name
);
}
}
layer
->
setName
((
layer_name
+
")"
).
c_str
());
}
};
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
REGISTER_TRT_OP_CONVERTER
(
split
,
SplitOpConverter
);
paddle/fluid/inference/tensorrt/convert/test_split_op.cc
0 → 100644
浏览文件 @
5ef123c7
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/convert/ut_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
tensorrt
{
TEST
(
split_op
,
test
)
{
std
::
unordered_set
<
std
::
string
>
parameters
({
""
});
framework
::
Scope
scope
;
TRTConvertValidation
validator
(
10
,
parameters
,
scope
,
1000
);
validator
.
DeclInputVar
(
"split_input"
,
nvinfer1
::
DimsCHW
(
3
,
2
,
2
));
validator
.
DeclOutputVar
(
"split_out1"
,
nvinfer1
::
DimsCHW
(
2
,
2
,
2
));
validator
.
DeclOutputVar
(
"split_out2"
,
nvinfer1
::
DimsCHW
(
1
,
2
,
2
));
// Prepare Op description
framework
::
OpDesc
desc
;
desc
.
SetType
(
"split"
);
desc
.
SetInput
(
"X"
,
{
"split_input"
});
desc
.
SetOutput
(
"Out"
,
{
"split_out1"
,
"split_out2"
});
int
num
=
0
;
int
axis
=
1
;
std
::
vector
<
int
>
output_lengths
=
{
2
,
1
};
desc
.
SetAttr
(
"axis"
,
axis
);
desc
.
SetAttr
(
"num"
,
num
);
desc
.
SetAttr
(
"sections"
,
output_lengths
);
validator
.
SetOp
(
*
desc
.
Proto
());
validator
.
Execute
(
1
);
}
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
USE_OP
(
split
);
paddle/fluid/inference/tensorrt/engine.cc
浏览文件 @
5ef123c7
...
@@ -255,6 +255,12 @@ void TensorRTEngine::freshDeviceId() {
...
@@ -255,6 +255,12 @@ void TensorRTEngine::freshDeviceId() {
cudaSetDevice
(
device_
);
cudaSetDevice
(
device_
);
}
}
nvinfer1
::
IPluginLayer
*
TensorRTEngine
::
AddPlugin
(
nvinfer1
::
ITensor
*
const
*
inputs
,
int
nbInputs
,
PluginTensorRT
*
plugin
)
{
owned_plugin_
.
emplace_back
(
plugin
);
return
infer_network_
.
get
()
->
addPluginExt
(
inputs
,
nbInputs
,
*
plugin
);
}
}
// namespace tensorrt
}
// namespace tensorrt
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/tensorrt/engine.h
浏览文件 @
5ef123c7
...
@@ -22,6 +22,7 @@ limitations under the License. */
...
@@ -22,6 +22,7 @@ limitations under the License. */
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/inference/engine.h"
#include "paddle/fluid/inference/engine.h"
#include "paddle/fluid/inference/tensorrt/helper.h"
#include "paddle/fluid/inference/tensorrt/helper.h"
#include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/inference/utils/singleton.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -125,6 +126,8 @@ class TensorRTEngine : public EngineBase {
...
@@ -125,6 +126,8 @@ class TensorRTEngine : public EngineBase {
void
SetRuntimeBatch
(
size_t
batch_size
);
void
SetRuntimeBatch
(
size_t
batch_size
);
int
GetRuntimeBatch
();
int
GetRuntimeBatch
();
int
GetDevice
()
{
return
device_
;
}
int
GetDevice
()
{
return
device_
;
}
nvinfer1
::
IPluginLayer
*
AddPlugin
(
nvinfer1
::
ITensor
*
const
*
inputs
,
int
nbInputs
,
PluginTensorRT
*
);
// A pointer to CPU memory is needed of the TRT weight.
// A pointer to CPU memory is needed of the TRT weight.
// Before TRT runs, fluid loads weight into GPU storage.
// Before TRT runs, fluid loads weight into GPU storage.
...
@@ -164,8 +167,10 @@ class TensorRTEngine : public EngineBase {
...
@@ -164,8 +167,10 @@ class TensorRTEngine : public EngineBase {
std
::
unordered_map
<
std
::
string
/*name*/
,
size_t
/*max size*/
>
buffer_sizes_
;
std
::
unordered_map
<
std
::
string
/*name*/
,
size_t
/*max size*/
>
buffer_sizes_
;
std
::
unordered_map
<
std
::
string
/*name*/
,
nvinfer1
::
ITensor
*
/*ITensor*/
>
std
::
unordered_map
<
std
::
string
/*name*/
,
nvinfer1
::
ITensor
*
/*ITensor*/
>
itensor_map_
;
itensor_map_
;
// The specific GPU id that the TensorRTEngine bounded to.
// The specific GPU id that the TensorRTEngine bounded to.
int
device_
;
int
device_
;
std
::
vector
<
std
::
unique_ptr
<
PluginTensorRT
>>
owned_plugin_
;
// TensorRT related internal members
// TensorRT related internal members
template
<
typename
T
>
template
<
typename
T
>
...
...
paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt
0 → 100644
浏览文件 @
5ef123c7
nv_library
(
tensorrt_plugin SRCS trt_plugin.cc split_op_plugin.cu DEPS enforce
)
paddle/fluid/inference/tensorrt/plugin/serialize.h
0 → 100644
浏览文件 @
5ef123c7
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cassert>
#include <cstring>
#include <type_traits>
#include <vector>
template
<
typename
T
>
inline
void
SerializeValue
(
void
**
buffer
,
T
const
&
value
);
template
<
typename
T
>
inline
void
DeserializeValue
(
void
const
**
buffer
,
size_t
*
buffer_size
,
T
*
value
);
namespace
{
template
<
typename
T
,
class
Enable
=
void
>
struct
Serializer
{};
template
<
typename
T
>
struct
Serializer
<
T
,
typename
std
::
enable_if
<
std
::
is_arithmetic
<
T
>::
value
||
std
::
is_enum
<
T
>::
value
||
std
::
is_pod
<
T
>::
value
>::
type
>
{
static
size_t
SerializedSize
(
T
const
&
value
)
{
return
sizeof
(
T
);
}
static
void
Serialize
(
void
**
buffer
,
T
const
&
value
)
{
std
::
memcpy
(
*
buffer
,
&
value
,
sizeof
(
T
));
reinterpret_cast
<
char
*&>
(
*
buffer
)
+=
sizeof
(
T
);
}
static
void
Deserialize
(
void
const
**
buffer
,
size_t
*
buffer_size
,
T
*
value
)
{
assert
(
*
buffer_size
>=
sizeof
(
T
));
std
::
memcpy
(
value
,
*
buffer
,
sizeof
(
T
));
reinterpret_cast
<
char
const
*&>
(
*
buffer
)
+=
sizeof
(
T
);
*
buffer_size
-=
sizeof
(
T
);
}
};
template
<
>
struct
Serializer
<
const
char
*>
{
static
size_t
SerializedSize
(
const
char
*
value
)
{
return
strlen
(
value
)
+
1
;
}
static
void
Serialize
(
void
**
buffer
,
const
char
*
value
)
{
std
::
strcpy
(
static_cast
<
char
*>
(
*
buffer
),
value
);
reinterpret_cast
<
char
*&>
(
*
buffer
)
+=
strlen
(
value
)
+
1
;
}
static
void
Deserialize
(
void
const
**
buffer
,
size_t
*
buffer_size
,
const
char
**
value
)
{
*
value
=
static_cast
<
char
const
*>
(
*
buffer
);
size_t
data_size
=
strnlen
(
*
value
,
*
buffer_size
)
+
1
;
assert
(
*
buffer_size
>=
data_size
);
reinterpret_cast
<
char
const
*&>
(
*
buffer
)
+=
data_size
;
*
buffer_size
-=
data_size
;
}
};
template
<
typename
T
>
struct
Serializer
<
std
::
vector
<
T
>
,
typename
std
::
enable_if
<
std
::
is_arithmetic
<
T
>::
value
||
std
::
is_enum
<
T
>::
value
||
std
::
is_pod
<
T
>::
value
>::
type
>
{
static
size_t
SerializedSize
(
std
::
vector
<
T
>
const
&
value
)
{
return
sizeof
(
value
.
size
())
+
value
.
size
()
*
sizeof
(
T
);
}
static
void
Serialize
(
void
**
buffer
,
std
::
vector
<
T
>
const
&
value
)
{
SerializeValue
(
buffer
,
value
.
size
());
size_t
nbyte
=
value
.
size
()
*
sizeof
(
T
);
std
::
memcpy
(
*
buffer
,
value
.
data
(),
nbyte
);
reinterpret_cast
<
char
*&>
(
*
buffer
)
+=
nbyte
;
}
static
void
Deserialize
(
void
const
**
buffer
,
size_t
*
buffer_size
,
std
::
vector
<
T
>*
value
)
{
size_t
size
;
DeserializeValue
(
buffer
,
buffer_size
,
&
size
);
value
->
resize
(
size
);
size_t
nbyte
=
value
->
size
()
*
sizeof
(
T
);
assert
(
*
buffer_size
>=
nbyte
);
std
::
memcpy
(
value
->
data
(),
*
buffer
,
nbyte
);
reinterpret_cast
<
char
const
*&>
(
*
buffer
)
+=
nbyte
;
*
buffer_size
-=
nbyte
;
}
};
}
// namespace
template
<
typename
T
>
inline
size_t
SerializedSize
(
T
const
&
value
)
{
return
Serializer
<
T
>::
SerializedSize
(
value
);
}
template
<
typename
T
>
inline
void
SerializeValue
(
void
**
buffer
,
T
const
&
value
)
{
return
Serializer
<
T
>::
Serialize
(
buffer
,
value
);
}
template
<
typename
T
>
inline
void
DeserializeValue
(
void
const
**
buffer
,
size_t
*
buffer_size
,
T
*
value
)
{
return
Serializer
<
T
>::
Deserialize
(
buffer
,
buffer_size
,
value
);
}
paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu
0 → 100644
浏览文件 @
5ef123c7
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdio.h>
#include <cassert>
#include "paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h"
namespace
paddle
{
namespace
inference
{
namespace
tensorrt
{
nvinfer1
::
Dims
SplitPlugin
::
getOutputDimensions
(
int
index
,
const
nvinfer1
::
Dims
*
inputDims
,
int
nbInputs
)
{
assert
(
nbInputs
==
1
);
assert
(
index
<
this
->
getNbOutputs
());
nvinfer1
::
Dims
const
&
input_dims
=
inputDims
[
0
];
nvinfer1
::
Dims
output_dims
=
input_dims
;
output_dims
.
d
[
axis_
]
=
output_length_
.
at
(
index
);
return
output_dims
;
}
int
SplitPlugin
::
initialize
()
{
std
::
vector
<
int
>
segment_offsets
(
1
,
0
);
for
(
int
i
=
0
;
i
<
this
->
getNbOutputs
();
++
i
)
{
segment_offsets
.
push_back
(
segment_offsets
.
back
()
+
output_length_
[
i
]);
}
segment_offsets_
=
segment_offsets
;
nvinfer1
::
Dims
dims
=
this
->
getInputDims
(
0
);
nx_
=
1
;
for
(
int
i
=
dims
.
nbDims
-
1
;
i
>
axis_
;
--
i
)
{
nx_
*=
dims
.
d
[
i
];
}
ny_
=
dims
.
d
[
axis_
];
nz_
=
1
;
for
(
int
i
=
axis_
-
1
;
i
>=
0
;
--
i
)
{
nz_
*=
dims
.
d
[
i
];
}
return
0
;
}
int
SplitPlugin
::
enqueue
(
int
batchSize
,
const
void
*
const
*
inputs
,
void
**
outputs
,
void
*
workspace
,
cudaStream_t
stream
)
{
auto
const
&
input_dims
=
this
->
getInputDims
(
0
);
int
input_size
=
0
;
float
const
*
idata
=
reinterpret_cast
<
float
const
*>
(
inputs
[
0
]);
float
**
odatas
=
reinterpret_cast
<
float
**>
(
outputs
);
// kernel impl here.
int
inputBatchOffset
=
nx_
*
ny_
*
nz_
;
for
(
size_t
i
=
0
;
i
<
this
->
getNbOutputs
();
i
++
)
{
for
(
size_t
j
=
0
;
j
<
batchSize
;
j
++
)
{
cudaMemcpyAsync
(
odatas
[
i
]
+
j
*
(
segment_offsets_
[
i
+
1
]
-
segment_offsets_
[
i
])
*
nx_
*
sizeof
(
float
),
inputs
[
0
]
+
(
inputBatchOffset
*
j
+
segment_offsets_
[
i
]
*
nx_
)
*
sizeof
(
float
),
(
segment_offsets_
[
i
+
1
]
-
segment_offsets_
[
i
])
*
nx_
*
sizeof
(
float
),
cudaMemcpyDeviceToDevice
,
stream
);
}
}
return
cudaGetLastError
()
!=
cudaSuccess
;
}
}
// tensorrt
}
// inference
}
// paddle
paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h
0 → 100644
浏览文件 @
5ef123c7
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h"
namespace
paddle
{
namespace
inference
{
namespace
tensorrt
{
class
SplitPlugin
:
public
PluginTensorRT
{
int
axis_
;
std
::
vector
<
int
>
output_length_
;
int
nx_
,
ny_
,
nz_
;
std
::
vector
<
int
>
segment_offsets_
;
protected:
virtual
size_t
getSerializationSize
()
override
{
return
SerializedSize
(
axis_
)
+
SerializedSize
(
output_length_
)
+
getBaseSerializationSize
();
}
// TRT will call this func when we need to serialize the configuration of
// tensorrt.
// It should not be called by users.
virtual
void
serialize
(
void
*
buffer
)
override
{
serializeBase
(
buffer
);
SerializeValue
(
&
buffer
,
axis_
);
SerializeValue
(
&
buffer
,
output_length_
);
}
public:
SplitPlugin
(
int
axis
,
std
::
vector
<
int
>
const
&
output_lengths
)
:
axis_
(
axis
),
output_length_
(
output_lengths
)
{
assert
(
axis
<=
nvinfer1
::
Dims
::
MAX_DIMS
);
}
// It was used for tensorrt deserialization.
// It should not be called by users.
SplitPlugin
(
void
const
*
serialData
,
size_t
serialLength
)
{
deserializeBase
(
serialData
,
serialLength
);
DeserializeValue
(
&
serialData
,
&
serialLength
,
&
axis_
);
DeserializeValue
(
&
serialData
,
&
serialLength
,
&
output_length_
);
}
SplitPlugin
*
clone
()
const
override
{
return
new
SplitPlugin
(
axis_
,
output_length_
);
}
virtual
const
char
*
getPluginType
()
const
override
{
return
"split"
;
}
virtual
int
getNbOutputs
()
const
override
{
return
output_length_
.
size
();
}
virtual
nvinfer1
::
Dims
getOutputDimensions
(
int
index
,
const
nvinfer1
::
Dims
*
inputs
,
int
nbInputDims
)
override
;
virtual
int
initialize
()
override
;
virtual
int
enqueue
(
int
batchSize
,
const
void
*
const
*
inputs
,
void
**
outputs
,
void
*
workspace
,
cudaStream_t
stream
)
override
;
};
}
// tensorrt
}
// inference
}
// paddle
paddle/fluid/inference/tensorrt/plugin/trt_plugin.cc
0 → 100644
浏览文件 @
5ef123c7
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h"
namespace
paddle
{
namespace
inference
{
namespace
tensorrt
{
void
PluginTensorRT
::
serializeBase
(
void
*&
buffer
)
{
SerializeValue
(
&
buffer
,
input_dims_
);
SerializeValue
(
&
buffer
,
max_batch_size_
);
SerializeValue
(
&
buffer
,
data_type_
);
SerializeValue
(
&
buffer
,
data_format_
);
}
void
PluginTensorRT
::
deserializeBase
(
void
const
*&
serialData
,
size_t
&
serialLength
)
{
DeserializeValue
(
&
serialData
,
&
serialLength
,
&
input_dims_
);
DeserializeValue
(
&
serialData
,
&
serialLength
,
&
max_batch_size_
);
DeserializeValue
(
&
serialData
,
&
serialLength
,
&
data_type_
);
DeserializeValue
(
&
serialData
,
&
serialLength
,
&
data_format_
);
}
size_t
PluginTensorRT
::
getBaseSerializationSize
()
{
return
(
SerializedSize
(
input_dims_
)
+
SerializedSize
(
max_batch_size_
)
+
SerializedSize
(
data_type_
)
+
SerializedSize
(
data_format_
));
}
bool
PluginTensorRT
::
supportsFormat
(
nvinfer1
::
DataType
type
,
nvinfer1
::
PluginFormat
format
)
const
{
return
((
type
==
nvinfer1
::
DataType
::
kFLOAT
)
&&
(
format
==
nvinfer1
::
PluginFormat
::
kNCHW
));
}
void
PluginTensorRT
::
configureWithFormat
(
const
nvinfer1
::
Dims
*
inputDims
,
int
nbInputs
,
const
nvinfer1
::
Dims
*
outputDims
,
int
nbOutputs
,
nvinfer1
::
DataType
type
,
nvinfer1
::
PluginFormat
format
,
int
maxBatchSize
)
{
data_type_
=
type
;
data_format_
=
format
;
input_dims_
.
assign
(
inputDims
,
inputDims
+
nbInputs
);
max_batch_size_
=
maxBatchSize
;
}
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tensorrt/plugin/trt_plugin.h
0 → 100644
浏览文件 @
5ef123c7
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cassert>
#include <cstring>
#include <iostream>
#include <unordered_map>
#include <vector>
#include "NvInfer.h"
#include "paddle/fluid/inference/tensorrt/plugin/serialize.h"
namespace
paddle
{
namespace
inference
{
namespace
tensorrt
{
class
PluginTensorRT
:
public
nvinfer1
::
IPluginExt
{
public:
PluginTensorRT
()
{}
PluginTensorRT
(
const
void
*
serialized_data
,
size_t
length
)
{}
nvinfer1
::
Dims
const
&
getInputDims
(
int
index
)
const
{
return
input_dims_
.
at
(
index
);
}
size_t
getMaxBatchSize
()
const
{
return
max_batch_size_
;
}
nvinfer1
::
DataType
getDataType
()
const
{
return
data_type_
;
}
nvinfer1
::
PluginFormat
getDataFormat
()
const
{
return
data_format_
;
}
virtual
const
char
*
getPluginVersion
()
const
{
return
"1"
;
}
size_t
getWorkspaceSize
(
int
)
const
override
{
return
0
;
}
void
terminate
()
override
{}
virtual
~
PluginTensorRT
()
{}
// Check format support. The default is FLOAT32 and NCHW.
bool
supportsFormat
(
nvinfer1
::
DataType
type
,
nvinfer1
::
PluginFormat
format
)
const
override
;
void
configureWithFormat
(
const
nvinfer1
::
Dims
*
inputDims
,
int
nbInputs
,
const
nvinfer1
::
Dims
*
outputDims
,
int
nbOutputs
,
nvinfer1
::
DataType
type
,
nvinfer1
::
PluginFormat
format
,
int
maxBatchSize
)
override
;
// *NOTE* The following functions need to be overrided in the subclass.
virtual
nvinfer1
::
IPluginExt
*
clone
()
const
=
0
;
virtual
const
char
*
getPluginType
()
const
=
0
;
// Initialize the layer for execution. This is called when the engine is
// created.
int
initialize
()
override
{
return
0
;
}
// Serialize the layer config to buffer.
virtual
void
serialize
(
void
*
buffer
)
=
0
;
virtual
size_t
getSerializationSize
()
=
0
;
virtual
int
enqueue
(
int
batchSize
,
const
void
*
const
*
inputs
,
void
**
outputs
,
void
*
workspace
,
cudaStream_t
stream
)
=
0
;
protected:
// Deserialize input_dims, max_batch_size, data_type, data_format
void
deserializeBase
(
void
const
*&
serialData
,
size_t
&
serialLength
);
size_t
getBaseSerializationSize
();
// Serialize input_dims, max_batch_size, data_type, data_format
void
serializeBase
(
void
*&
buffer
);
std
::
vector
<
nvinfer1
::
Dims
>
input_dims_
;
size_t
max_batch_size_
;
nvinfer1
::
DataType
data_type_
;
nvinfer1
::
PluginFormat
data_format_
;
};
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tests/api/CMakeLists.txt
浏览文件 @
5ef123c7
...
@@ -104,8 +104,7 @@ if(WITH_GPU AND TENSORRT_FOUND)
...
@@ -104,8 +104,7 @@ if(WITH_GPU AND TENSORRT_FOUND)
if
(
NOT EXISTS
${
TRT_MODEL_INSTALL_DIR
}
)
if
(
NOT EXISTS
${
TRT_MODEL_INSTALL_DIR
}
)
inference_download_and_uncompress
(
${
TRT_MODEL_INSTALL_DIR
}
${
INFERENCE_URL
}
/tensorrt_test
"trt_test_models.tar.gz"
)
inference_download_and_uncompress
(
${
TRT_MODEL_INSTALL_DIR
}
${
INFERENCE_URL
}
/tensorrt_test
"trt_test_models.tar.gz"
)
endif
()
endif
()
inference_analysis_test
(
test_trt_models SRCS trt_models_tester.cc
inference_analysis_test
(
test_trt_models SRCS trt_models_tester.cc
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
analysis
${
analysis_deps
}
ir_pass_manager analysis_predictor
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
analysis
${
analysis_deps
}
ir_pass_manager analysis_predictor
ARGS --
dirname
=
${
TRT_MODEL_INSTALL_DIR
}
/trt_test_models SERIAL
)
ARGS --
infer_model
=
${
TRT_MODEL_INSTALL_DIR
}
/trt_test_models SERIAL
)
endif
()
endif
()
paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
浏览文件 @
5ef123c7
...
@@ -178,7 +178,8 @@ TEST(Analyzer_dam, profile) {
...
@@ -178,7 +178,8 @@ TEST(Analyzer_dam, profile) {
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
SetInput
(
&
input_slots_all
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
TestPrediction
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
...
@@ -213,7 +214,8 @@ TEST(Analyzer_dam, compare) {
...
@@ -213,7 +214,8 @@ TEST(Analyzer_dam, compare) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
CompareNativeAndAnalysis
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
}
}
// namespace inference
}
// namespace inference
...
...
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
浏览文件 @
5ef123c7
...
@@ -133,7 +133,8 @@ TEST(Analyzer_LAC, profile) {
...
@@ -133,7 +133,8 @@ TEST(Analyzer_LAC, profile) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
SetInput
(
&
input_slots_all
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
TestPrediction
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
// the first inference result
// the first inference result
...
@@ -175,7 +176,8 @@ TEST(Analyzer_LAC, compare) {
...
@@ -175,7 +176,8 @@ TEST(Analyzer_LAC, compare) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
CompareNativeAndAnalysis
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
}
}
// namespace analysis
}
// namespace analysis
...
...
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
浏览文件 @
5ef123c7
...
@@ -121,7 +121,8 @@ TEST(Analyzer_Chinese_ner, profile) {
...
@@ -121,7 +121,8 @@ TEST(Analyzer_Chinese_ner, profile) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
SetInput
(
&
input_slots_all
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
TestPrediction
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
// the first inference result
// the first inference result
...
@@ -160,7 +161,8 @@ TEST(Analyzer_Chinese_ner, compare) {
...
@@ -160,7 +161,8 @@ TEST(Analyzer_Chinese_ner, compare) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
CompareNativeAndAnalysis
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
}
}
// namespace inference
}
// namespace inference
...
...
paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
浏览文件 @
5ef123c7
...
@@ -45,7 +45,8 @@ void profile(bool use_mkldnn = false) {
...
@@ -45,7 +45,8 @@ void profile(bool use_mkldnn = false) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
SetInput
(
&
input_slots_all
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
TestPrediction
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
}
}
TEST
(
Analyzer_resnet50
,
profile
)
{
profile
();
}
TEST
(
Analyzer_resnet50
,
profile
)
{
profile
();
}
...
@@ -74,7 +75,8 @@ void compare(bool use_mkldnn = false) {
...
@@ -74,7 +75,8 @@ void compare(bool use_mkldnn = false) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
CompareNativeAndAnalysis
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
}
TEST
(
Analyzer_resnet50
,
compare
)
{
compare
();
}
TEST
(
Analyzer_resnet50
,
compare
)
{
compare
();
}
...
...
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
浏览文件 @
5ef123c7
...
@@ -233,8 +233,8 @@ TEST(Analyzer_rnn1, profile) {
...
@@ -233,8 +233,8 @@ TEST(Analyzer_rnn1, profile) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
SetInput
(
&
input_slots_all
);
LOG
(
INFO
)
<<
"to test prediction"
;
TestPrediction
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
}
}
// Check the fuse status
// Check the fuse status
...
@@ -261,7 +261,8 @@ TEST(Analyzer_rnn1, compare) {
...
@@ -261,7 +261,8 @@ TEST(Analyzer_rnn1, compare) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
CompareNativeAndAnalysis
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
}
// Test Multi-Thread.
// Test Multi-Thread.
...
@@ -272,7 +273,8 @@ TEST(Analyzer_rnn1, multi_thread) {
...
@@ -272,7 +273,8 @@ TEST(Analyzer_rnn1, multi_thread) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
SetInput
(
&
input_slots_all
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
4
/* multi_thread */
);
TestPrediction
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
,
&
outputs
,
4
/* multi_thread */
);
}
}
// Validate that the AnalysisPredictor + ZeroCopyTensor really works by testing
// Validate that the AnalysisPredictor + ZeroCopyTensor really works by testing
...
...
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
浏览文件 @
5ef123c7
...
@@ -132,7 +132,8 @@ TEST(Analyzer_rnn2, profile) {
...
@@ -132,7 +132,8 @@ TEST(Analyzer_rnn2, profile) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
SetInput
(
&
input_slots_all
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
TestPrediction
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
// the first inference result
// the first inference result
...
@@ -153,7 +154,8 @@ TEST(Analyzer_rnn2, compare) {
...
@@ -153,7 +154,8 @@ TEST(Analyzer_rnn2, compare) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
CompareNativeAndAnalysis
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
}
}
// namespace inference
}
// namespace inference
...
...
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
浏览文件 @
5ef123c7
...
@@ -161,7 +161,8 @@ TEST(Analyzer_seq_conv1, profile) {
...
@@ -161,7 +161,8 @@ TEST(Analyzer_seq_conv1, profile) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
SetInput
(
&
input_slots_all
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
TestPrediction
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
// the first inference result
// the first inference result
...
@@ -198,7 +199,8 @@ TEST(Analyzer_seq_conv1, compare) {
...
@@ -198,7 +199,8 @@ TEST(Analyzer_seq_conv1, compare) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
CompareNativeAndAnalysis
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
}
}
// namespace inference
}
// namespace inference
...
...
paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc
浏览文件 @
5ef123c7
...
@@ -74,7 +74,8 @@ TEST(Analyzer_Text_Classification, profile) {
...
@@ -74,7 +74,8 @@ TEST(Analyzer_Text_Classification, profile) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
SetInput
(
&
input_slots_all
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
TestPrediction
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
if
(
FLAGS_num_threads
==
1
)
{
if
(
FLAGS_num_threads
==
1
)
{
// Get output
// Get output
...
@@ -101,7 +102,8 @@ TEST(Analyzer_Text_Classification, compare) {
...
@@ -101,7 +102,8 @@ TEST(Analyzer_Text_Classification, compare) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
CompareNativeAndAnalysis
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
}
TEST
(
Analyzer_Text_Classification
,
compare_against_embedding_fc_lstm_fused
)
{
TEST
(
Analyzer_Text_Classification
,
compare_against_embedding_fc_lstm_fused
)
{
...
@@ -112,7 +114,8 @@ TEST(Analyzer_Text_Classification, compare_against_embedding_fc_lstm_fused) {
...
@@ -112,7 +114,8 @@ TEST(Analyzer_Text_Classification, compare_against_embedding_fc_lstm_fused) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
CompareNativeAndAnalysis
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
}
}
// namespace inference
}
// namespace inference
...
...
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
浏览文件 @
5ef123c7
...
@@ -91,7 +91,8 @@ void profile(bool use_mkldnn = false) {
...
@@ -91,7 +91,8 @@ void profile(bool use_mkldnn = false) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
SetInput
(
&
input_slots_all
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
TestPrediction
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
const
float
ocr_result_data
[]
=
{
const
float
ocr_result_data
[]
=
{
...
@@ -133,7 +134,8 @@ void compare(bool use_mkldnn = false) {
...
@@ -133,7 +134,8 @@ void compare(bool use_mkldnn = false) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
CompareNativeAndAnalysis
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
}
TEST
(
Analyzer_vis
,
compare
)
{
compare
();
}
TEST
(
Analyzer_vis
,
compare
)
{
compare
();
}
...
...
paddle/fluid/inference/tests/api/config_printer.h
0 → 100644
浏览文件 @
5ef123c7
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <ostream>
#include <string>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
namespace
paddle
{
namespace
inference
{
thread_local
int
num_spaces
=
0
;
static
std
::
string
GenSpaces
(
int
num_spaces
)
{
std
::
ostringstream
os
;
for
(
int
i
=
0
;
i
<
num_spaces
;
++
i
)
{
os
<<
" "
;
}
return
os
.
str
();
}
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
PaddlePredictor
::
Config
&
config
)
{
os
<<
GenSpaces
(
num_spaces
)
<<
"PaddlePredictor::Config {
\n
"
;
num_spaces
++
;
os
<<
GenSpaces
(
num_spaces
)
<<
"model_dir: "
<<
config
.
model_dir
<<
"
\n
"
;
num_spaces
--
;
os
<<
GenSpaces
(
num_spaces
)
<<
"}
\n
"
;
return
os
;
}
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
NativeConfig
&
config
)
{
os
<<
GenSpaces
(
num_spaces
)
<<
"NativeConfig {
\n
"
;
num_spaces
++
;
os
<<
*
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
config
);
os
<<
GenSpaces
(
num_spaces
)
<<
"use_gpu: "
<<
config
.
use_gpu
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"device: "
<<
config
.
device
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"fraction_of_gpu_memory: "
<<
config
.
fraction_of_gpu_memory
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"prog_file: "
<<
config
.
prog_file
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"param_file: "
<<
config
.
param_file
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"specify_input_name: "
<<
config
.
specify_input_name
<<
"
\n
"
;
num_spaces
--
;
os
<<
GenSpaces
(
num_spaces
)
<<
"}
\n
"
;
return
os
;
}
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
contrib
::
AnalysisConfig
&
config
)
{
os
<<
GenSpaces
(
num_spaces
)
<<
"contrib::AnalysisConfig {
\n
"
;
num_spaces
++
;
os
<<
*
reinterpret_cast
<
const
NativeConfig
*>
(
&
config
);
os
<<
GenSpaces
(
num_spaces
)
<<
"enable_ir_optim: "
<<
config
.
enable_ir_optim
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"use_feed_fetch_ops: "
<<
config
.
use_feed_fetch_ops
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"use_tensorrt: "
<<
config
.
use_tensorrt
()
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"use_mkldnn: "
<<
config
.
use_mkldnn
()
<<
"
\n
"
;
num_spaces
--
;
os
<<
GenSpaces
(
num_spaces
)
<<
"}
\n
"
;
return
os
;
}
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
5ef123c7
...
@@ -19,13 +19,16 @@
...
@@ -19,13 +19,16 @@
#include <string>
#include <string>
#include <thread> // NOLINT
#include <thread> // NOLINT
#include <vector>
#include <vector>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/tests/api/config_printer.h"
#include "paddle/fluid/inference/tests/test_helper.h"
#include "paddle/fluid/inference/tests/test_helper.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
...
@@ -38,10 +41,18 @@ DEFINE_int32(num_threads, 1, "Running the inference program in multi-threads.");
...
@@ -38,10 +41,18 @@ DEFINE_int32(num_threads, 1, "Running the inference program in multi-threads.");
DEFINE_bool
(
use_analysis
,
true
,
DEFINE_bool
(
use_analysis
,
true
,
"Running the inference program in analysis mode."
);
"Running the inference program in analysis mode."
);
DECLARE_bool
(
profile
);
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
using
contrib
::
AnalysisConfig
;
void
PrintConfig
(
const
PaddlePredictor
::
Config
*
config
,
bool
use_analysis
)
{
if
(
use_analysis
)
{
LOG
(
INFO
)
<<
*
reinterpret_cast
<
const
contrib
::
AnalysisConfig
*>
(
config
);
return
;
}
LOG
(
INFO
)
<<
*
config
;
}
void
CompareResult
(
const
std
::
vector
<
PaddleTensor
>
&
outputs
,
void
CompareResult
(
const
std
::
vector
<
PaddleTensor
>
&
outputs
,
const
std
::
vector
<
PaddleTensor
>
&
ref_outputs
)
{
const
std
::
vector
<
PaddleTensor
>
&
ref_outputs
)
{
...
@@ -77,12 +88,13 @@ void CompareResult(const std::vector<PaddleTensor> &outputs,
...
@@ -77,12 +88,13 @@ void CompareResult(const std::vector<PaddleTensor> &outputs,
}
}
std
::
unique_ptr
<
PaddlePredictor
>
CreateTestPredictor
(
std
::
unique_ptr
<
PaddlePredictor
>
CreateTestPredictor
(
const
AnalysisConfig
&
config
,
bool
use_analysis
=
true
)
{
const
PaddlePredictor
::
Config
*
config
,
bool
use_analysis
=
true
)
{
if
(
use_analysis
)
{
if
(
use_analysis
)
{
return
CreatePaddlePredictor
<
contrib
::
AnalysisConfig
>
(
config
);
return
CreatePaddlePredictor
<
contrib
::
AnalysisConfig
>
(
}
else
{
*
(
reinterpret_cast
<
const
contrib
::
AnalysisConfig
*>
(
config
)));
return
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
}
}
return
CreatePaddlePredictor
<
NativeConfig
>
(
*
(
reinterpret_cast
<
const
NativeConfig
*>
(
config
)));
}
}
size_t
GetSize
(
const
PaddleTensor
&
out
)
{
return
VecReduceToInt
(
out
.
shape
);
}
size_t
GetSize
(
const
PaddleTensor
&
out
)
{
return
VecReduceToInt
(
out
.
shape
);
}
...
@@ -111,11 +123,23 @@ std::unordered_map<std::string, int> GetFuseStatis(PaddlePredictor *predictor,
...
@@ -111,11 +123,23 @@ std::unordered_map<std::string, int> GetFuseStatis(PaddlePredictor *predictor,
}
}
void
SetFakeImageInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
,
void
SetFakeImageInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
,
const
std
::
string
&
dirname
)
{
const
std
::
string
&
dirname
,
bool
is_combined
=
true
,
std
::
string
model_filename
=
"model"
,
std
::
string
params_filename
=
"params"
)
{
// Set fake_image_data
// Set fake_image_data
PADDLE_ENFORCE_EQ
(
FLAGS_test_all_data
,
0
,
"Only have single batch of data."
);
PADDLE_ENFORCE_EQ
(
FLAGS_test_all_data
,
0
,
"Only have single batch of data."
);
std
::
vector
<
std
::
vector
<
int64_t
>>
feed_target_shapes
=
std
::
vector
<
std
::
vector
<
int64_t
>>
feed_target_shapes
=
GetFeedTargetShapes
(
GetFeedTargetShapes
(
dirname
,
true
,
"model"
,
"params"
);
dirname
,
is_combined
,
model_filename
,
params_filename
);
std
::
ostringstream
os
;
for
(
size_t
i
=
0
;
i
<
feed_target_shapes
.
size
();
++
i
)
{
os
<<
"feed target "
<<
i
<<
": {"
<<
feed_target_shapes
[
i
][
0
];
for
(
size_t
j
=
1
;
j
<
feed_target_shapes
[
i
].
size
();
++
j
)
{
os
<<
", "
<<
feed_target_shapes
[
i
][
j
];
}
os
<<
"}
\n
"
;
}
LOG
(
INFO
)
<<
os
.
str
();
int
dim1
=
feed_target_shapes
[
0
][
1
];
int
dim1
=
feed_target_shapes
[
0
][
1
];
int
dim2
=
feed_target_shapes
[
0
][
2
];
int
dim2
=
feed_target_shapes
[
0
][
2
];
int
dim3
=
feed_target_shapes
[
0
][
3
];
int
dim3
=
feed_target_shapes
[
0
][
3
];
...
@@ -139,25 +163,43 @@ void SetFakeImageInput(std::vector<std::vector<PaddleTensor>> *inputs,
...
@@ -139,25 +163,43 @@ void SetFakeImageInput(std::vector<std::vector<PaddleTensor>> *inputs,
}
}
void
TestOneThreadPrediction
(
void
TestOneThreadPrediction
(
const
AnalysisConfig
&
config
,
const
PaddlePredictor
::
Config
*
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
outputs
,
bool
use_analysis
=
true
)
{
std
::
vector
<
PaddleTensor
>
*
outputs
,
bool
use_analysis
=
true
)
{
int
batch_size
=
FLAGS_batch_size
;
int
batch_size
=
FLAGS_batch_size
;
int
num_times
=
FLAGS_repeat
;
int
num_times
=
FLAGS_repeat
;
auto
predictor
=
CreateTestPredictor
(
config
,
use_analysis
);
auto
predictor
=
CreateTestPredictor
(
config
,
use_analysis
);
Timer
timer
;
timer
.
tic
();
// warmup run
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
LOG
(
INFO
)
<<
"Warm up run..."
;
for
(
size_t
j
=
0
;
j
<
inputs
.
size
();
j
++
)
{
{
predictor
->
Run
(
inputs
[
j
],
outputs
);
Timer
warmup_timer
;
warmup_timer
.
tic
();
predictor
->
Run
(
inputs
[
0
],
outputs
,
batch_size
);
PrintTime
(
batch_size
,
1
,
1
,
0
,
warmup_timer
.
toc
(),
1
);
#if !defined(_WIN32)
if
(
FLAGS_profile
)
{
paddle
::
platform
::
ResetProfiler
();
}
#endif
}
LOG
(
INFO
)
<<
"Run "
<<
num_times
<<
" times..."
;
{
Timer
run_timer
;
run_timer
.
tic
();
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
inputs
.
size
();
j
++
)
{
predictor
->
Run
(
inputs
[
j
],
outputs
,
batch_size
);
}
}
}
PrintTime
(
batch_size
,
num_times
,
1
,
0
,
run_timer
.
toc
()
/
num_times
,
inputs
.
size
());
}
}
PrintTime
(
batch_size
,
num_times
,
1
,
0
,
timer
.
toc
()
/
num_times
,
inputs
.
size
());
}
}
void
TestMultiThreadPrediction
(
void
TestMultiThreadPrediction
(
const
AnalysisConfig
&
config
,
const
PaddlePredictor
::
Config
*
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
outputs
,
int
num_threads
,
std
::
vector
<
PaddleTensor
>
*
outputs
,
int
num_threads
,
bool
use_analysis
=
true
)
{
bool
use_analysis
=
true
)
{
...
@@ -200,12 +242,11 @@ void TestMultiThreadPrediction(
...
@@ -200,12 +242,11 @@ void TestMultiThreadPrediction(
}
}
}
}
void
TestPrediction
(
const
AnalysisConfig
&
config
,
void
TestPrediction
(
const
PaddlePredictor
::
Config
*
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
outputs
,
int
num_threads
,
std
::
vector
<
PaddleTensor
>
*
outputs
,
int
num_threads
,
bool
use_analysis
=
FLAGS_use_analysis
)
{
bool
use_analysis
=
FLAGS_use_analysis
)
{
LOG
(
INFO
)
<<
"use_analysis: "
<<
use_analysis
PrintConfig
(
config
,
use_analysis
);
<<
", use_mkldnn: "
<<
config
.
use_mkldnn
();
if
(
num_threads
==
1
)
{
if
(
num_threads
==
1
)
{
TestOneThreadPrediction
(
config
,
inputs
,
outputs
,
use_analysis
);
TestOneThreadPrediction
(
config
,
inputs
,
outputs
,
use_analysis
);
}
else
{
}
else
{
...
@@ -215,9 +256,9 @@ void TestPrediction(const AnalysisConfig &config,
...
@@ -215,9 +256,9 @@ void TestPrediction(const AnalysisConfig &config,
}
}
void
CompareNativeAndAnalysis
(
void
CompareNativeAndAnalysis
(
const
AnalysisConfig
&
config
,
const
PaddlePredictor
::
Config
*
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
)
{
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
)
{
LOG
(
INFO
)
<<
"use_mkldnn: "
<<
config
.
use_mkldnn
(
);
PrintConfig
(
config
,
true
);
std
::
vector
<
PaddleTensor
>
native_outputs
,
analysis_outputs
;
std
::
vector
<
PaddleTensor
>
native_outputs
,
analysis_outputs
;
TestOneThreadPrediction
(
config
,
inputs
,
&
native_outputs
,
false
);
TestOneThreadPrediction
(
config
,
inputs
,
&
native_outputs
,
false
);
TestOneThreadPrediction
(
config
,
inputs
,
&
analysis_outputs
,
true
);
TestOneThreadPrediction
(
config
,
inputs
,
&
analysis_outputs
,
true
);
...
...
paddle/fluid/inference/tests/api/trt_models_tester.cc
浏览文件 @
5ef123c7
/
/
Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
/
*
Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
//
Licensed under the Apache License, Version 2.0 (the "License");
Licensed under the Apache License, Version 2.0 (the "License");
//
you may not use this file except in compliance with the License.
you may not use this file except in compliance with the License.
//
You may obtain a copy of the License at
You may obtain a copy of the License at
//
//
http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
//
//
Unless required by applicable law or agreed to in writing, software
Unless required by applicable law or agreed to in writing, software
//
distributed under the License is distributed on an "AS IS" BASIS,
distributed under the License is distributed on an "AS IS" BASIS,
//
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
// limitations under the License.
limitations under the License. */
#include <gflags/gflags.h>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/tests/api/tester_helper.h"
#include "paddle/fluid/inference/tests/api/tester_helper.h"
namespace
paddle
{
namespace
paddle
{
using
paddle
::
contrib
::
AnalysisConfig
;
namespace
inference
{
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
DEFINE_bool
(
use_tensorrt
,
true
,
"Test the performance of TensorRT engine."
);
DEFINE_string
(
prog_filename
,
""
,
"Name of model file."
);
NativeConfig
GetConfigNative
()
{
DEFINE_string
(
param_filename
,
""
,
"Name of parameters file."
);
NativeConfig
config
;
config
.
model_dir
=
FLAGS_dirname
;
template
<
typename
ConfigType
>
// LOG(INFO) << "dirname " << config.model_dir;
void
SetConfig
(
ConfigType
*
config
,
std
::
string
model_dir
,
bool
use_gpu
,
config
.
fraction_of_gpu_memory
=
0.15
;
bool
use_tensorrt
=
false
,
int
batch_size
=
-
1
)
{
config
.
use_gpu
=
true
;
if
(
!
FLAGS_prog_filename
.
empty
()
&&
!
FLAGS_param_filename
.
empty
())
{
config
.
device
=
0
;
config
->
prog_file
=
model_dir
+
"/"
+
FLAGS_prog_filename
;
return
config
;
config
->
param_file
=
model_dir
+
"/"
+
FLAGS_param_filename
;
}
}
else
{
config
->
model_dir
=
model_dir
;
void
PrepareTRTConfig
(
AnalysisConfig
*
config
)
{
}
config
->
model_dir
=
FLAGS_dirname
+
"/"
+
"mobilenet"
;
if
(
use_gpu
)
{
config
->
fraction_of_gpu_memory
=
0.15
;
config
->
use_gpu
=
true
;
config
->
EnableTensorRtEngine
(
1
<<
10
,
5
);
config
->
device
=
0
;
config
->
pass_builder
()
->
DeletePass
(
"conv_bn_fuse_pass"
);
config
->
fraction_of_gpu_memory
=
0.15
;
config
->
pass_builder
()
->
DeletePass
(
"fc_fuse_pass"
);
}
config
->
pass_builder
()
->
TurnOnDebug
();
}
}
void
PrepareInputs
(
std
::
vector
<
PaddleTensor
>
*
tensors
,
int
batch_size
)
{
template
<
>
PADDLE_ENFORCE_EQ
(
tensors
->
size
(),
1UL
);
void
SetConfig
<
contrib
::
AnalysisConfig
>
(
contrib
::
AnalysisConfig
*
config
,
auto
&
tensor
=
tensors
->
front
();
std
::
string
model_dir
,
bool
use_gpu
,
int
height
=
224
;
bool
use_tensorrt
,
int
batch_size
)
{
int
width
=
224
;
if
(
!
FLAGS_prog_filename
.
empty
()
&&
!
FLAGS_param_filename
.
empty
())
{
float
*
data
=
new
float
[
batch_size
*
3
*
height
*
width
];
config
->
prog_file
=
model_dir
+
"/"
+
FLAGS_prog_filename
;
memset
(
data
,
0
,
sizeof
(
float
)
*
(
batch_size
*
3
*
height
*
width
));
config
->
param_file
=
model_dir
+
"/"
+
FLAGS_param_filename
;
data
[
0
]
=
1.0
f
;
}
else
{
config
->
model_dir
=
model_dir
;
// Prepare inputs
}
tensor
.
name
=
"input_0"
;
if
(
use_gpu
)
{
tensor
.
shape
=
std
::
vector
<
int
>
({
batch_size
,
3
,
height
,
width
});
config
->
use_gpu
=
true
;
tensor
.
data
=
PaddleBuf
(
static_cast
<
void
*>
(
data
),
config
->
device
=
0
;
sizeof
(
float
)
*
(
batch_size
*
3
*
height
*
width
));
config
->
fraction_of_gpu_memory
=
0.15
;
tensor
.
dtype
=
PaddleDType
::
FLOAT32
;
if
(
use_tensorrt
)
{
config
->
EnableTensorRtEngine
(
1
<<
10
,
batch_size
);
config
->
pass_builder
()
->
DeletePass
(
"conv_bn_fuse_pass"
);
config
->
pass_builder
()
->
DeletePass
(
"fc_fuse_pass"
);
config
->
pass_builder
()
->
TurnOnDebug
();
}
else
{
config
->
enable_ir_optim
=
true
;
}
}
}
}
void
CompareTensorRTWithFluid
(
int
batch_size
,
std
::
string
model_dirname
)
{
void
profile
(
std
::
string
model_dir
,
bool
use_analysis
,
bool
use_tensorrt
)
{
auto
config0
=
GetConfigNative
();
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs_all
;
config0
.
model_dir
=
model_dirname
;
if
(
!
FLAGS_prog_filename
.
empty
()
&&
!
FLAGS_param_filename
.
empty
())
{
SetFakeImageInput
(
&
inputs_all
,
model_dir
,
true
,
FLAGS_prog_filename
,
AnalysisConfig
config1
(
true
);
FLAGS_param_filename
);
PrepareTRTConfig
(
&
config1
);
}
else
{
config1
.
model_dir
=
model_dirname
;
SetFakeImageInput
(
&
inputs_all
,
model_dir
,
false
,
"__model__"
,
""
);
auto
predictor0
=
CreatePaddlePredictor
<
NativeConfig
>
(
config0
);
auto
predictor1
=
CreatePaddlePredictor
(
config1
);
// Prepare inputs
std
::
vector
<
PaddleTensor
>
paddle_tensor_feeds
(
1
);
PrepareInputs
(
&
paddle_tensor_feeds
,
batch_size
);
// Prepare outputs
std
::
vector
<
PaddleTensor
>
outputs0
;
std
::
vector
<
PaddleTensor
>
outputs1
;
CHECK
(
predictor0
->
Run
(
paddle_tensor_feeds
,
&
outputs0
));
CHECK
(
predictor1
->
Run
(
paddle_tensor_feeds
,
&
outputs1
,
batch_size
));
const
size_t
num_elements
=
outputs0
.
front
().
data
.
length
()
/
sizeof
(
float
);
const
size_t
num_elements1
=
outputs1
.
front
().
data
.
length
()
/
sizeof
(
float
);
EXPECT_EQ
(
num_elements
,
num_elements1
);
auto
*
data0
=
static_cast
<
float
*>
(
outputs0
.
front
().
data
.
data
());
auto
*
data1
=
static_cast
<
float
*>
(
outputs1
.
front
().
data
.
data
());
ASSERT_GT
(
num_elements
,
0UL
);
for
(
size_t
i
=
0
;
i
<
std
::
min
(
num_elements
,
num_elements1
);
i
++
)
{
EXPECT_NEAR
(
data0
[
i
],
data1
[
i
],
1e-3
);
}
}
}
TEST
(
trt_models_test
,
mobilenet
)
{
std
::
vector
<
PaddleTensor
>
outputs
;
CompareTensorRTWithFluid
(
1
,
FLAGS_dirname
+
"/"
+
"mobilenet"
);
if
(
use_analysis
||
use_tensorrt
)
{
}
contrib
::
AnalysisConfig
config
(
true
);
TEST
(
trt_models_test
,
resnet50
)
{
SetConfig
<
contrib
::
AnalysisConfig
>
(
&
config
,
model_dir
,
true
,
use_tensorrt
,
CompareTensorRTWithFluid
(
1
,
FLAGS_dirname
+
"/"
+
"resnet50"
);
FLAGS_batch_size
);
}
TestPrediction
(
reinterpret_cast
<
PaddlePredictor
::
Config
*>
(
&
config
),
TEST
(
trt_models_test
,
resnext50
)
{
inputs_all
,
&
outputs
,
FLAGS_num_threads
,
true
);
CompareTensorRTWithFluid
(
1
,
FLAGS_dirname
+
"/"
+
"resnext50"
);
}
else
{
NativeConfig
config
;
SetConfig
<
NativeConfig
>
(
&
config
,
model_dir
,
true
,
false
);
TestPrediction
(
reinterpret_cast
<
PaddlePredictor
::
Config
*>
(
&
config
),
inputs_all
,
&
outputs
,
FLAGS_num_threads
,
false
);
}
}
}
TEST
(
trt_models_test
,
raw_gpu
)
{
void
compare
(
std
::
string
model_dir
,
bool
use_tensorrt
)
{
std
::
string
model_dir
=
FLAGS_dirname
+
"/"
+
"mobilenet"
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs_all
;
auto
config0
=
GetConfigNative
();
if
(
!
FLAGS_prog_filename
.
empty
()
&&
!
FLAGS_param_filename
.
empty
())
{
config0
.
model_dir
=
model_dir
;
SetFakeImageInput
(
&
inputs_all
,
model_dir
,
true
,
FLAGS_prog_filename
,
int
batch_size
=
2
;
FLAGS_param_filename
);
}
else
{
AnalysisConfig
config1
(
true
);
SetFakeImageInput
(
&
inputs_all
,
model_dir
,
false
,
"__model__"
,
""
);
config1
.
fraction_of_gpu_memory
=
0.1
;
}
config1
.
enable_ir_optim
=
true
;
config1
.
model_dir
=
model_dir
;
auto
predictor0
=
CreatePaddlePredictor
<
NativeConfig
>
(
config0
);
std
::
vector
<
PaddleTensor
>
native_outputs
;
auto
predictor1
=
CreatePaddlePredictor
(
config1
);
NativeConfig
native_config
;
SetConfig
<
NativeConfig
>
(
&
native_config
,
model_dir
,
true
,
false
,
FLAGS_batch_size
);
TestOneThreadPrediction
(
reinterpret_cast
<
PaddlePredictor
::
Config
*>
(
&
native_config
),
inputs_all
,
&
native_outputs
,
false
);
std
::
vector
<
PaddleTensor
>
analysis_outputs
;
contrib
::
AnalysisConfig
analysis_config
(
true
);
SetConfig
<
contrib
::
AnalysisConfig
>
(
&
analysis_config
,
model_dir
,
true
,
use_tensorrt
,
FLAGS_batch_size
);
TestOneThreadPrediction
(
reinterpret_cast
<
PaddlePredictor
::
Config
*>
(
&
analysis_config
),
inputs_all
,
&
analysis_outputs
,
true
);
CompareResult
(
native_outputs
,
analysis_outputs
);
}
// Prepare inputs
TEST
(
TensorRT_mobilenet
,
compare
)
{
std
::
vector
<
PaddleTensor
>
paddle_tensor_feeds
(
1
);
std
::
string
model_dir
=
FLAGS_infer_model
+
"/mobilenet"
;
PrepareInputs
(
&
paddle_tensor_feeds
,
batch_size
);
compare
(
model_dir
,
/* use_tensorrt */
true
);
}
// Prepare outputs
TEST
(
TensorRT_resnet50
,
compare
)
{
std
::
vector
<
PaddleTensor
>
outputs0
;
std
::
string
model_dir
=
FLAGS_infer_model
+
"/resnet50"
;
std
::
vector
<
PaddleTensor
>
outputs1
;
compare
(
model_dir
,
/* use_tensorrt */
true
);
CHECK
(
predictor0
->
Run
(
paddle_tensor_feeds
,
&
outputs0
));
}
CHECK
(
predictor1
->
Run
(
paddle_tensor_feeds
,
&
outputs1
,
batch_size
));
const
size_t
num_elements
=
outputs0
.
front
().
data
.
length
()
/
sizeof
(
float
);
TEST
(
TensorRT_resnext50
,
compare
)
{
const
size_t
num_elements1
=
outputs1
.
front
().
data
.
length
()
/
sizeof
(
float
);
std
::
string
model_dir
=
FLAGS_infer_model
+
"/resnext50"
;
EXPECT_EQ
(
num_elements
,
num_elements1
);
compare
(
model_dir
,
/* use_tensorrt */
true
);
}
auto
*
data0
=
static_cast
<
float
*>
(
outputs0
.
front
().
data
.
data
());
TEST
(
TensorRT_resnext50
,
profile
)
{
auto
*
data1
=
static_cast
<
float
*>
(
outputs1
.
front
().
data
.
data
());
std
::
string
model_dir
=
FLAGS_infer_model
+
"/resnext50"
;
profile
(
model_dir
,
/* use_analysis */
true
,
FLAGS_use_tensorrt
);
}
ASSERT_GT
(
num_elements
,
0UL
);
TEST
(
TensorRT_mobilenet
,
analysis
)
{
for
(
size_t
i
=
0
;
i
<
std
::
min
(
num_elements
,
num_elements1
);
i
++
)
{
std
::
string
model_dir
=
FLAGS_infer_model
+
"/"
+
"mobilenet"
;
EXPECT_NEAR
(
data0
[
i
],
data1
[
i
],
1e-3
);
compare
(
model_dir
,
/* use_tensorrt */
false
);
}
}
}
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
USE_PASS
(
tensorrt_subgraph_pass
);
USE_PASS
(
tensorrt_subgraph_pass
);
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
5ef123c7
...
@@ -94,7 +94,8 @@ function(op_library TARGET)
...
@@ -94,7 +94,8 @@ function(op_library TARGET)
# remove windows unsupported op, because windows has no nccl, no warpctc such ops.
# remove windows unsupported op, because windows has no nccl, no warpctc such ops.
foreach
(
windows_unsupport_op
"nccl_op"
"gen_nccl_id_op"
"warpctc_op"
"hierarchical_sigmoid_op"
foreach
(
windows_unsupport_op
"nccl_op"
"gen_nccl_id_op"
"warpctc_op"
"hierarchical_sigmoid_op"
"crf_decoding_op"
"select_op"
"lstmp_op"
"gru_op"
"fusion_gru_op"
"lstm_op"
"fusion_lstm_op"
"cumsum_op"
"crf_decoding_op"
"select_op"
"lstmp_op"
"gru_op"
"fusion_gru_op"
"lstm_op"
"fusion_lstm_op"
"cumsum_op"
"fusion_seqconv_eltadd_relu_op"
"channel_send_op"
"channel_create_op"
"channel_close_op"
"channel_recv_op"
)
"fusion_seqconv_eltadd_relu_op"
"channel_send_op"
"channel_create_op"
"channel_close_op"
"channel_recv_op"
"fusion_seqexpand_concat_fc_op"
"attention_lstm_op"
"fused_embedding_fc_lstm_op"
"fc_op"
)
if
(
"
${
TARGET
}
"
STREQUAL
"
${
windows_unsupport_op
}
"
)
if
(
"
${
TARGET
}
"
STREQUAL
"
${
windows_unsupport_op
}
"
)
return
()
return
()
endif
()
endif
()
...
@@ -308,8 +309,10 @@ op_library(flatten_op DEPS reshape_op)
...
@@ -308,8 +309,10 @@ op_library(flatten_op DEPS reshape_op)
op_library
(
sequence_pad_op DEPS sequence_padding
)
op_library
(
sequence_pad_op DEPS sequence_padding
)
op_library
(
unstack_op DEPS stack_op
)
op_library
(
unstack_op DEPS stack_op
)
op_library
(
fake_quantize_op DEPS memory
)
op_library
(
fake_quantize_op DEPS memory
)
if
(
NOT WIN32
)
op_library
(
crf_decoding_op DEPS jit_kernel
)
op_library
(
crf_decoding_op DEPS jit_kernel
)
op_library
(
fusion_lstm_op DEPS jit_kernel
)
op_library
(
fusion_lstm_op DEPS jit_kernel
)
endif
(
NOT WIN32
)
if
(
WITH_GPU
)
if
(
WITH_GPU
)
op_library
(
conv_op DEPS vol2col depthwise_conv im2col
)
op_library
(
conv_op DEPS vol2col depthwise_conv im2col
)
op_library
(
layer_norm_op DEPS cub
)
op_library
(
layer_norm_op DEPS cub
)
...
@@ -325,8 +328,8 @@ op_library(save_op DEPS lod_tensor)
...
@@ -325,8 +328,8 @@ op_library(save_op DEPS lod_tensor)
op_library
(
load_op DEPS lod_tensor
)
op_library
(
load_op DEPS lod_tensor
)
op_library
(
save_combine_op DEPS lod_tensor
)
op_library
(
save_combine_op DEPS lod_tensor
)
op_library
(
load_combine_op DEPS lod_tensor
)
op_library
(
load_combine_op DEPS lod_tensor
)
op_library
(
tensor_array_to_tensor_op DEPS concat_op
)
op_library
(
concat_op DEPS concat_and_split
)
op_library
(
concat_op DEPS concat_and_split
)
op_library
(
tensor_array_to_tensor_op DEPS concat_op
)
list
(
REMOVE_ITEM GENERAL_OPS
${
DEPS_OPS
}
)
list
(
REMOVE_ITEM GENERAL_OPS
${
DEPS_OPS
}
)
...
...
paddle/fluid/operators/conv_cudnn_op.cu.cc
浏览文件 @
5ef123c7
...
@@ -50,12 +50,18 @@ static constexpr char kCUDNNBwdFilterAlgoCache[] = "kCUDNNBwdFilterAlgoCache";
...
@@ -50,12 +50,18 @@ static constexpr char kCUDNNBwdFilterAlgoCache[] = "kCUDNNBwdFilterAlgoCache";
static
constexpr
size_t
kCONV_CUDNN_WORKSPACE_LIMIT_BYTES
=
static
constexpr
size_t
kCONV_CUDNN_WORKSPACE_LIMIT_BYTES
=
static_cast
<
size_t
>
(
1024
)
*
1024
*
1024
;
static_cast
<
size_t
>
(
1024
)
*
1024
*
1024
;
static
constexpr
size_t
kNUM_CUDNN_FWD_ALGS
=
#if CUDNN_VERSION_MIN(6, 0, 5)
CUDNN_CONVOLUTION_BWD_FILTER
_ALGO_COUNT
;
static
constexpr
size_t
kNUM_CUDNN_FWD_ALGS
=
CUDNN_CONVOLUTION_FWD
_ALGO_COUNT
;
static
constexpr
size_t
kNUM_CUDNN_BWD_FILTER_ALGS
=
static
constexpr
size_t
kNUM_CUDNN_BWD_FILTER_ALGS
=
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT
;
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT
;
static
constexpr
size_t
kNUM_CUDNN_BWD_DATA_ALGS
=
static
constexpr
size_t
kNUM_CUDNN_BWD_DATA_ALGS
=
CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT
;
CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT
;
#else
// cuDNN v5 has no CUDNN_CONVOLUTION_FWD_ALGO_COUNT etc.
static
constexpr
size_t
kNUM_CUDNN_FWD_ALGS
=
7
;
static
constexpr
size_t
kNUM_CUDNN_BWD_FILTER_ALGS
=
4
;
static
constexpr
size_t
kNUM_CUDNN_BWD_DATA_ALGS
=
5
;
#endif
template
<
typename
T
>
template
<
typename
T
>
class
CUDNNConvOpKernel
:
public
framework
::
OpKernel
<
T
>
{
class
CUDNNConvOpKernel
:
public
framework
::
OpKernel
<
T
>
{
...
...
paddle/fluid/operators/detection/roi_perspective_transform_op.cu
浏览文件 @
5ef123c7
...
@@ -15,6 +15,10 @@ limitations under the License. */
...
@@ -15,6 +15,10 @@ limitations under the License. */
#include <algorithm>
#include <algorithm>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/float16.h"
using
paddle
::
platform
::
PADDLE_CUDA_NUM_THREADS
;
using
paddle
::
platform
::
float16
;
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
...
paddle/fluid/operators/elementwise_op_function.h
浏览文件 @
5ef123c7
...
@@ -111,6 +111,17 @@ class RowwiseTransformIterator<T, platform::CPUDeviceContext>
...
@@ -111,6 +111,17 @@ class RowwiseTransformIterator<T, platform::CPUDeviceContext>
return
*
this
;
return
*
this
;
}
}
RowwiseTransformIterator
<
T
,
platform
::
CPUDeviceContext
>
&
operator
+
(
int
n
)
{
while
(
n
--
>
0
)
{
++
i_
;
if
(
UNLIKELY
(
i_
==
n_
))
{
i_
=
0
;
}
}
return
*
this
;
}
bool
operator
==
(
const
RowwiseTransformIterator
<
T
,
platform
::
CPUDeviceContext
>
bool
operator
==
(
const
RowwiseTransformIterator
<
T
,
platform
::
CPUDeviceContext
>
&
rhs
)
const
{
&
rhs
)
const
{
return
(
ptr_
+
i_
)
==
&
(
*
rhs
);
return
(
ptr_
+
i_
)
==
&
(
*
rhs
);
...
@@ -149,6 +160,21 @@ class MidWiseTransformIterator<T, platform::CPUDeviceContext>
...
@@ -149,6 +160,21 @@ class MidWiseTransformIterator<T, platform::CPUDeviceContext>
return
*
this
;
return
*
this
;
}
}
MidWiseTransformIterator
<
T
,
platform
::
CPUDeviceContext
>
&
operator
+
(
int
n
)
{
while
(
n
--
>
0
)
{
++
j_
;
if
(
UNLIKELY
(
j_
==
post_
))
{
++
i_
;
j_
=
0
;
if
(
UNLIKELY
(
i_
==
n_
))
{
i_
=
0
;
}
}
}
return
*
this
;
}
bool
operator
==
(
const
MidWiseTransformIterator
<
T
,
platform
::
CPUDeviceContext
>
bool
operator
==
(
const
MidWiseTransformIterator
<
T
,
platform
::
CPUDeviceContext
>
&
rhs
)
const
{
&
rhs
)
const
{
return
(
ptr_
+
i_
)
==
&
(
*
rhs
);
return
(
ptr_
+
i_
)
==
&
(
*
rhs
);
...
...
paddle/fluid/operators/grid_sampler_op.h
浏览文件 @
5ef123c7
...
@@ -63,7 +63,8 @@ static void CalcGridLocations(const platform::CPUDeviceContext& ctx,
...
@@ -63,7 +63,8 @@ static void CalcGridLocations(const platform::CPUDeviceContext& ctx,
Tensor
ones
;
Tensor
ones
;
ones
.
mutable_data
<
T
>
({
n
,
h
,
w
},
ctx
.
GetPlace
());
ones
.
mutable_data
<
T
>
({
n
,
h
,
w
},
ctx
.
GetPlace
());
auto
ones_t
=
EigenTensor
<
T
,
3
>::
From
(
ones
).
setConstant
(
1.0
);
auto
ones_t
=
EigenTensor
<
T
,
3
>::
From
(
ones
).
setConstant
(
1.0
);
Tensor
half_xmax
,
half_ymax
;
Tensor
half_xmax
;
Tensor
half_ymax
;
half_xmax
.
mutable_data
<
T
>
({
n
,
h
,
w
},
ctx
.
GetPlace
());
half_xmax
.
mutable_data
<
T
>
({
n
,
h
,
w
},
ctx
.
GetPlace
());
auto
half_xmax_t
=
auto
half_xmax_t
=
EigenTensor
<
T
,
3
>::
From
(
half_xmax
).
setConstant
(
0.5
*
x_max
);
EigenTensor
<
T
,
3
>::
From
(
half_xmax
).
setConstant
(
0.5
*
x_max
);
...
...
paddle/fluid/operators/math/CMakeLists.txt
浏览文件 @
5ef123c7
...
@@ -75,12 +75,13 @@ if(WITH_GPU)
...
@@ -75,12 +75,13 @@ if(WITH_GPU)
endif
()
endif
()
cc_test
(
concat_test SRCS concat_test.cc DEPS concat_and_split
)
cc_test
(
concat_test SRCS concat_test.cc DEPS concat_and_split
)
cc_test
(
cpu_vec_test SRCS cpu_vec_test.cc DEPS blas cpu_info
)
cc_test
(
cpu_vec_test SRCS cpu_vec_test.cc DEPS blas cpu_info
)
if
(
NOT WIN32
)
set
(
JIT_KERNEL_SRCS jit_kernel.cc jit_kernel_blas.cc jit_kernel_exp.cc jit_kernel_rnn.cc jit_kernel_crf_decode.cc
)
set
(
JIT_KERNEL_SRCS jit_kernel.cc jit_kernel_blas.cc jit_kernel_exp.cc jit_kernel_rnn.cc jit_kernel_crf_decode.cc
)
set
(
JIT_KERNEL_DEPS cpu_info cblas gflags enforce
)
set
(
JIT_KERNEL_DEPS cpu_info cblas gflags enforce
)
if
(
WITH_XBYAK
)
if
(
WITH_XBYAK
)
list
(
APPEND JIT_KERNEL_SRCS jit_gen.cc jit_code.cc
)
list
(
APPEND JIT_KERNEL_SRCS jit_gen.cc jit_code.cc
)
list
(
APPEND JIT_KERNEL_DEPS xbyak
)
list
(
APPEND JIT_KERNEL_DEPS xbyak
)
endif
()
endif
()
cc_library
(
jit_kernel SRCS
${
JIT_KERNEL_SRCS
}
DEPS
${
JIT_KERNEL_DEPS
}
)
cc_library
(
jit_kernel SRCS
${
JIT_KERNEL_SRCS
}
DEPS
${
JIT_KERNEL_DEPS
}
)
cc_test
(
jit_kernel_test SRCS jit_kernel_test.cc DEPS jit_kernel
)
cc_test
(
jit_kernel_test SRCS jit_kernel_test.cc DEPS jit_kernel
)
endif
(
NOT WIN32
)
paddle/fluid/operators/math/softmax.cc
浏览文件 @
5ef123c7
...
@@ -19,8 +19,10 @@ namespace paddle {
...
@@ -19,8 +19,10 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
namespace
math
{
namespace
math
{
template
class
SoftmaxFunctor
<
platform
::
CPUDeviceContext
,
float
>;
template
class
SoftmaxFunctor
<
platform
::
CPUDeviceContext
,
float
,
true
>;
template
class
SoftmaxFunctor
<
platform
::
CPUDeviceContext
,
double
>;
template
class
SoftmaxFunctor
<
platform
::
CPUDeviceContext
,
float
,
false
>;
template
class
SoftmaxFunctor
<
platform
::
CPUDeviceContext
,
double
,
true
>;
template
class
SoftmaxFunctor
<
platform
::
CPUDeviceContext
,
double
,
false
>;
template
class
SoftmaxGradFunctor
<
platform
::
CPUDeviceContext
,
float
>;
template
class
SoftmaxGradFunctor
<
platform
::
CPUDeviceContext
,
float
>;
template
class
SoftmaxGradFunctor
<
platform
::
CPUDeviceContext
,
double
>;
template
class
SoftmaxGradFunctor
<
platform
::
CPUDeviceContext
,
double
>;
...
...
paddle/fluid/operators/math/softmax.cu
浏览文件 @
5ef123c7
...
@@ -98,9 +98,14 @@ template class SoftmaxGradCUDNNFunctor<float>;
...
@@ -98,9 +98,14 @@ template class SoftmaxGradCUDNNFunctor<float>;
template
class
SoftmaxGradCUDNNFunctor
<
double
>;
template
class
SoftmaxGradCUDNNFunctor
<
double
>;
template
class
SoftmaxGradCUDNNFunctor
<
platform
::
float16
>;
template
class
SoftmaxGradCUDNNFunctor
<
platform
::
float16
>;
template
class
SoftmaxFunctor
<
platform
::
CUDADeviceContext
,
platform
::
float16
>;
template
class
SoftmaxFunctor
<
platform
::
CUDADeviceContext
,
platform
::
float16
,
template
class
SoftmaxFunctor
<
platform
::
CUDADeviceContext
,
float
>;
false
>;
template
class
SoftmaxFunctor
<
platform
::
CUDADeviceContext
,
double
>;
template
class
SoftmaxFunctor
<
platform
::
CUDADeviceContext
,
platform
::
float16
,
true
>;
template
class
SoftmaxFunctor
<
platform
::
CUDADeviceContext
,
float
,
false
>;
template
class
SoftmaxFunctor
<
platform
::
CUDADeviceContext
,
double
,
false
>;
template
class
SoftmaxFunctor
<
platform
::
CUDADeviceContext
,
float
,
true
>;
template
class
SoftmaxFunctor
<
platform
::
CUDADeviceContext
,
double
,
true
>;
template
class
SoftmaxGradFunctor
<
platform
::
CUDADeviceContext
,
float
>;
template
class
SoftmaxGradFunctor
<
platform
::
CUDADeviceContext
,
float
>;
template
class
SoftmaxGradFunctor
<
platform
::
CUDADeviceContext
,
double
>;
template
class
SoftmaxGradFunctor
<
platform
::
CUDADeviceContext
,
double
>;
template
class
SoftmaxGradFunctor
<
platform
::
CUDADeviceContext
,
template
class
SoftmaxGradFunctor
<
platform
::
CUDADeviceContext
,
...
...
paddle/fluid/operators/math/softmax.h
浏览文件 @
5ef123c7
...
@@ -19,7 +19,7 @@ namespace paddle {
...
@@ -19,7 +19,7 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
namespace
math
{
namespace
math
{
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
,
bool
is_test
>
class
SoftmaxFunctor
{
class
SoftmaxFunctor
{
public:
public:
void
operator
()(
const
DeviceContext
&
context
,
const
framework
::
Tensor
*
X
,
void
operator
()(
const
DeviceContext
&
context
,
const
framework
::
Tensor
*
X
,
...
...
paddle/fluid/operators/math/softmax_impl.h
浏览文件 @
5ef123c7
...
@@ -32,10 +32,10 @@ struct ValueClip {
...
@@ -32,10 +32,10 @@ struct ValueClip {
}
}
};
};
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
,
bool
is_test
>
void
SoftmaxFunctor
<
DeviceContext
,
T
>::
operator
()(
const
DeviceContext
&
context
,
void
SoftmaxFunctor
<
DeviceContext
,
T
,
is_test
>::
operator
()(
const
framework
::
Tensor
*
X
,
const
DeviceContext
&
context
,
const
framework
::
Tensor
*
X
,
framework
::
Tensor
*
Y
)
{
framework
::
Tensor
*
Y
)
{
auto
logits
=
EigenMatrix
<
T
>::
From
(
*
X
);
auto
logits
=
EigenMatrix
<
T
>::
From
(
*
X
);
auto
softmax
=
EigenMatrix
<
T
>::
From
(
*
Y
);
auto
softmax
=
EigenMatrix
<
T
>::
From
(
*
Y
);
...
@@ -65,6 +65,39 @@ void SoftmaxFunctor<DeviceContext, T>::operator()(const DeviceContext& context,
...
@@ -65,6 +65,39 @@ void SoftmaxFunctor<DeviceContext, T>::operator()(const DeviceContext& context,
.
broadcast
(
one_by_class
));
.
broadcast
(
one_by_class
));
}
}
template
<
typename
DeviceContext
,
typename
T
>
class
SoftmaxFunctor
<
DeviceContext
,
T
,
true
>
{
void
operator
()(
const
DeviceContext
&
context
,
const
framework
::
Tensor
*
X
,
framework
::
Tensor
*
Y
)
{
auto
logits
=
EigenMatrix
<
T
>::
From
(
*
X
);
auto
softmax
=
EigenMatrix
<
T
>::
From
(
*
Y
);
const
int
kBatchDim
=
0
;
const
int
kClassDim
=
1
;
const
int
batch_size
=
logits
.
dimension
(
kBatchDim
);
const
int
num_classes
=
logits
.
dimension
(
kClassDim
);
Eigen
::
DSizes
<
int
,
1
>
along_class
(
kClassDim
);
Eigen
::
DSizes
<
int
,
2
>
batch_by_one
(
batch_size
,
1
);
Eigen
::
DSizes
<
int
,
2
>
one_by_class
(
1
,
num_classes
);
auto
shifted_logits
=
(
logits
-
logits
.
maximum
(
along_class
)
.
eval
()
.
reshape
(
batch_by_one
)
.
broadcast
(
one_by_class
));
softmax
.
device
(
*
context
.
eigen_device
())
=
shifted_logits
.
exp
();
softmax
.
device
(
*
context
.
eigen_device
())
=
(
softmax
*
softmax
.
sum
(
along_class
)
.
inverse
()
.
eval
()
.
reshape
(
batch_by_one
)
.
broadcast
(
one_by_class
));
}
};
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
void
SoftmaxGradFunctor
<
DeviceContext
,
T
>::
operator
()(
void
SoftmaxGradFunctor
<
DeviceContext
,
T
>::
operator
()(
const
DeviceContext
&
context
,
const
framework
::
Tensor
*
y
,
const
DeviceContext
&
context
,
const
framework
::
Tensor
*
y
,
...
...
paddle/fluid/operators/pad_constant_like_op.cc
浏览文件 @
5ef123c7
...
@@ -74,7 +74,7 @@ PadConstantLikeOp Operator.
...
@@ -74,7 +74,7 @@ PadConstantLikeOp Operator.
Pad input(Y) with a pad_value, the number of values padded to the edges of each
Pad input(Y) with a pad_value, the number of values padded to the edges of each
axis is specified by the difference of the shape of X and Y.
axis is specified by the difference of the shape of X and Y.
((0, shape_x_0 - shape_y_0),
…
(0, shape_x_n - shape_y_n)) unique pad widths for
((0, shape_x_0 - shape_y_0),
...
(0, shape_x_n - shape_y_n)) unique pad widths for
each axis.
each axis.
The input should be a k-D tensor(k > 0 and k < 7). As an example:
The input should be a k-D tensor(k > 0 and k < 7). As an example:
...
...
paddle/fluid/operators/roi_pool_op.cc
浏览文件 @
5ef123c7
...
@@ -122,7 +122,7 @@ class ROIPoolOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -122,7 +122,7 @@ class ROIPoolOpMaker : public framework::OpProtoAndCheckerMaker {
"(Tensor), "
"(Tensor), "
"Argmaxes corresponding to indices in X used "
"Argmaxes corresponding to indices in X used "
"for gradient computation. Only output "
"for gradient computation. Only output "
"if arg
“is_test”
is false."
)
"if arg
\"
is_test
\"
is false."
)
.
AsIntermediate
();
.
AsIntermediate
();
AddAttr
<
float
>
(
"spatial_scale"
,
AddAttr
<
float
>
(
"spatial_scale"
,
"(float, default 1.0), "
"(float, default 1.0), "
...
...
paddle/fluid/operators/softmax_op.h
浏览文件 @
5ef123c7
...
@@ -35,8 +35,13 @@ class SoftmaxKernel : public framework::OpKernel<T> {
...
@@ -35,8 +35,13 @@ class SoftmaxKernel : public framework::OpKernel<T> {
Tensor
X_2d
=
framework
::
ReshapeToMatrix
(
*
X
,
rank
-
1
);
Tensor
X_2d
=
framework
::
ReshapeToMatrix
(
*
X
,
rank
-
1
);
Tensor
Out_2d
=
framework
::
ReshapeToMatrix
(
*
Out
,
rank
-
1
);
Tensor
Out_2d
=
framework
::
ReshapeToMatrix
(
*
Out
,
rank
-
1
);
math
::
SoftmaxFunctor
<
DeviceContext
,
T
>
()(
#ifdef ON_INFER
math
::
SoftmaxFunctor
<
DeviceContext
,
T
,
true
>
()(
context
.
template
device_context
<
DeviceContext
>(),
&
X_2d
,
&
Out_2d
);
context
.
template
device_context
<
DeviceContext
>(),
&
X_2d
,
&
Out_2d
);
#else
math
::
SoftmaxFunctor
<
DeviceContext
,
T
,
false
>
()(
context
.
template
device_context
<
DeviceContext
>(),
&
X_2d
,
&
Out_2d
);
#endif
}
}
};
};
...
...
paddle/fluid/operators/softmax_with_cross_entropy_op.h
浏览文件 @
5ef123c7
...
@@ -42,8 +42,8 @@ class SoftmaxWithCrossEntropyKernel : public framework::OpKernel<T> {
...
@@ -42,8 +42,8 @@ class SoftmaxWithCrossEntropyKernel : public framework::OpKernel<T> {
auto
&
dev_ctx
=
auto
&
dev_ctx
=
context
.
template
device_context
<
platform
::
CPUDeviceContext
>();
context
.
template
device_context
<
platform
::
CPUDeviceContext
>();
math
::
SoftmaxFunctor
<
platform
::
CPUDeviceContext
,
T
>
()(
dev_ctx
,
logits
,
math
::
SoftmaxFunctor
<
platform
::
CPUDeviceContext
,
T
,
false
>
()(
softmax
);
dev_ctx
,
logits
,
softmax
);
math
::
CrossEntropyFunctor
<
platform
::
CPUDeviceContext
,
T
>
()(
math
::
CrossEntropyFunctor
<
platform
::
CPUDeviceContext
,
T
>
()(
dev_ctx
,
loss
,
softmax
,
labels
,
context
.
Attr
<
bool
>
(
"soft_label"
),
dev_ctx
,
loss
,
softmax
,
labels
,
context
.
Attr
<
bool
>
(
"soft_label"
),
context
.
Attr
<
int
>
(
"ignore_index"
));
context
.
Attr
<
int
>
(
"ignore_index"
));
...
...
paddle/fluid/operators/unpool_op.cc
浏览文件 @
5ef123c7
...
@@ -57,8 +57,8 @@ class Unpool2dOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -57,8 +57,8 @@ class Unpool2dOpMaker : public framework::OpProtoAndCheckerMaker {
Input shape is: $(N, C_{in}, H_{in}, W_{in})$, Output shape is:
Input shape is: $(N, C_{in}, H_{in}, W_{in})$, Output shape is:
$(N, C_{out}, H_{out}, W_{out})$, where
$(N, C_{out}, H_{out}, W_{out})$, where
$$
$$
H_{out} = (H_{in}
−1) * strides[0] −
2 * paddings[0] + ksize[0] \\
H_{out} = (H_{in}
-1) * strides[0] -
2 * paddings[0] + ksize[0] \\
W_{out} = (W_{in}
−1) * strides[1] −
2 * paddings[1] + ksize[1]
W_{out} = (W_{in}
-1) * strides[1] -
2 * paddings[1] + ksize[1]
$$
$$
Paper: http://www.matthewzeiler.com/wp-content/uploads/2017/07/iccv2011.pdf
Paper: http://www.matthewzeiler.com/wp-content/uploads/2017/07/iccv2011.pdf
)DOC"
);
)DOC"
);
...
...
paddle/fluid/platform/init.cc
浏览文件 @
5ef123c7
...
@@ -112,6 +112,14 @@ void InitDevices(bool init_p2p, const std::vector<int> devices) {
...
@@ -112,6 +112,14 @@ void InitDevices(bool init_p2p, const std::vector<int> devices) {
}
}
places
.
emplace_back
(
platform
::
CPUPlace
());
places
.
emplace_back
(
platform
::
CPUPlace
());
platform
::
DeviceContextPool
::
Init
(
places
);
platform
::
DeviceContextPool
::
Init
(
places
);
// windows has no support for openblas multi-thread
#ifdef _WIN32
if
(
FLAGS_paddle_num_threads
>
1
)
{
FLAGS_paddle_num_threads
=
1
;
}
#endif
#ifndef PADDLE_WITH_MKLDNN
#ifndef PADDLE_WITH_MKLDNN
platform
::
SetNumThreads
(
FLAGS_paddle_num_threads
);
platform
::
SetNumThreads
(
FLAGS_paddle_num_threads
);
#endif
#endif
...
@@ -167,7 +175,9 @@ void InitGLOG(const std::string &prog_name) {
...
@@ -167,7 +175,9 @@ void InitGLOG(const std::string &prog_name) {
// glog will not hold the ARGV[0] inside.
// glog will not hold the ARGV[0] inside.
// Use strdup to alloc a new string.
// Use strdup to alloc a new string.
google
::
InitGoogleLogging
(
strdup
(
prog_name
.
c_str
()));
google
::
InitGoogleLogging
(
strdup
(
prog_name
.
c_str
()));
#ifndef _WIN32
google
::
InstallFailureSignalHandler
();
google
::
InstallFailureSignalHandler
();
#endif
}
}
}
// namespace framework
}
// namespace framework
...
...
paddle/fluid/platform/nccl_helper.h
浏览文件 @
5ef123c7
...
@@ -12,6 +12,7 @@
...
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#ifndef _WIN32
#pragma once
#pragma once
#include <stdio.h>
#include <stdio.h>
...
@@ -149,3 +150,4 @@ struct NCCLContextMap {
...
@@ -149,3 +150,4 @@ struct NCCLContextMap {
}
// namespace platform
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
#endif
paddle/fluid/platform/port.h
浏览文件 @
5ef123c7
...
@@ -24,21 +24,16 @@
...
@@ -24,21 +24,16 @@
#include "glog/logging.h"
#include "glog/logging.h"
#if !defined(_WIN32)
#if !defined(_WIN32)
#define UNUSED __attribute__((unused))
#include <dlfcn.h> // dladdr
#include <dlfcn.h> // dladdr
#include <execinfo.h> // backtrace
#include <execinfo.h> // backtrace
#include <sys/stat.h>
#include <sys/stat.h>
#include <algorithm> // std::accumulate
#include <algorithm> // std::accumulate
#else
#else
#include <io.h> // _popen, _pclose
#include <io.h> // _popen, _pclose
#include <stdio.h>
#include <windows.h>
#include <windows.h>
#if defined(_WIN32)
#include <numeric> // std::accumulate in msvc
#include <numeric> // std::accumulate in msvc
#endif
#ifndef S_ISDIR // windows port for sys/stat.h
// windows version of __attribute__((unused))
#define UNUSED __pragma(warning(suppress : 4100))
#ifndef S_ISDIR // windows port for sys/stat.h
#define S_ISDIR(mode) (((mode)&S_IFMT) == S_IFDIR)
#define S_ISDIR(mode) (((mode)&S_IFMT) == S_IFDIR)
#endif // S_ISDIR
#endif // S_ISDIR
...
...
paddle/fluid/platform/variant.h
浏览文件 @
5ef123c7
...
@@ -42,3 +42,11 @@ limitations under the License. */
...
@@ -42,3 +42,11 @@ limitations under the License. */
#include <boost/mpl/comparison.hpp>
#include <boost/mpl/comparison.hpp>
#include <boost/mpl/less_equal.hpp>
#include <boost/mpl/less_equal.hpp>
#include <boost/variant.hpp>
#include <boost/variant.hpp>
// some platform-independent defintion
#if defined(_WIN32)
#define UNUSED
#define __builtin_expect(EXP, C) (EXP)
#else
#define UNUSED __attribute__((unused))
#endif
paddle/fluid/pybind/CMakeLists.txt
浏览文件 @
5ef123c7
...
@@ -2,9 +2,9 @@
...
@@ -2,9 +2,9 @@
set
(
PYBIND_DEPS pybind python proto_desc memory executor prune feed_fetch_method pass_builder
)
set
(
PYBIND_DEPS pybind python proto_desc memory executor prune feed_fetch_method pass_builder
)
set
(
PYBIND_SRCS pybind.cc exception.cc protobuf.cc const_value.cc
)
set
(
PYBIND_SRCS pybind.cc exception.cc protobuf.cc const_value.cc
)
if
(
NOT WIN32
)
if
(
NOT WIN32
)
list
(
APPEND PYBIND_DEPS parallel_executor profiler
)
list
(
APPEND PYBIND_DEPS parallel_executor profiler
)
list
(
APPEND PYBIND_SRCS recordio.cc
)
list
(
APPEND PYBIND_SRCS recordio.cc
)
endif
()
endif
(
NOT WIN32
)
if
(
WITH_PYTHON
)
if
(
WITH_PYTHON
)
if
(
WITH_AMD_GPU
)
if
(
WITH_AMD_GPU
)
hip_library
(
paddle_pybind SHARED
hip_library
(
paddle_pybind SHARED
...
@@ -21,5 +21,13 @@ if(WITH_PYTHON)
...
@@ -21,5 +21,13 @@ if(WITH_PYTHON)
endif
(
NOT APPLE AND NOT ANDROID AND NOT WIN32
)
endif
(
NOT APPLE AND NOT ANDROID AND NOT WIN32
)
endif
(
WITH_AMD_GPU
)
endif
(
WITH_AMD_GPU
)
if
(
WIN32
)
if
(
WITH_GPU AND NOT WITH_DSO
)
get_property
(
cuda_modules GLOBAL PROPERTY CUDA_MODULES
)
target_link_libraries
(
paddle_pybind
${
cuda_modules
}
)
endif
(
WITH_GPU AND NOT WITH_DSO
)
target_link_libraries
(
paddle_pybind shlwapi
)
endif
(
WIN32
)
cc_test
(
tensor_py_test SRCS tensor_py_test.cc DEPS python
)
cc_test
(
tensor_py_test SRCS tensor_py_test.cc DEPS python
)
endif
(
WITH_PYTHON
)
endif
(
WITH_PYTHON
)
paddle/fluid/pybind/pybind.cc
浏览文件 @
5ef123c7
...
@@ -21,6 +21,13 @@ limitations under the License. */
...
@@ -21,6 +21,13 @@ limitations under the License. */
#include <utility>
#include <utility>
#include <vector>
#include <vector>
#if defined(_WIN32)
#define NOMINMAX
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#define GOOGLE_GLOG_DLL_DECL
#include <Windows.h>
#endif
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/framework.pb.h"
...
@@ -29,7 +36,9 @@ limitations under the License. */
...
@@ -29,7 +36,9 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#ifndef _WIN32
#include "paddle/fluid/framework/parallel_executor.h"
#include "paddle/fluid/framework/parallel_executor.h"
#endif
#include "paddle/fluid/framework/prune.h"
#include "paddle/fluid/framework/prune.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/selected_rows.h"
...
@@ -50,7 +59,9 @@ limitations under the License. */
...
@@ -50,7 +59,9 @@ limitations under the License. */
#include "paddle/fluid/string/to_string.h"
#include "paddle/fluid/string/to_string.h"
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
#ifndef _WIN32
#include "paddle/fluid/operators/nccl/nccl_gpu_common.h"
#include "paddle/fluid/operators/nccl/nccl_gpu_common.h"
#endif
#include "paddle/fluid/platform/cuda_profiler.h"
#include "paddle/fluid/platform/cuda_profiler.h"
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/platform/gpu_info.h"
#endif
#endif
...
@@ -340,22 +351,25 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -340,22 +351,25 @@ All parameter, weight, gradient are variables in Paddle.
.
def
(
"get_lod_tensor_array"
,
.
def
(
"get_lod_tensor_array"
,
[](
Variable
&
self
)
{
return
self
.
GetMutable
<
LoDTensorArray
>
();
},
[](
Variable
&
self
)
{
return
self
.
GetMutable
<
LoDTensorArray
>
();
},
py
::
return_value_policy
::
reference
)
py
::
return_value_policy
::
reference
)
#if
def PADDLE_WITH_CUDA
#if
(defined(PADDLE_WITH_CUDA) && !defined(_WIN32))
.
def
(
"get_communicator"
,
.
def
(
"get_communicator"
,
[](
Variable
&
self
)
->
platform
::
Communicator
*
{
[](
Variable
&
self
)
->
platform
::
Communicator
*
{
return
self
.
GetMutable
<
platform
::
Communicator
>
();
return
self
.
GetMutable
<
platform
::
Communicator
>
();
},
},
py
::
return_value_policy
::
reference
)
py
::
return_value_policy
::
reference
)
#endif
.
def
(
"get_reader"
,
.
def
(
"get_reader"
,
[](
Variable
&
self
)
->
framework
::
ReaderHolder
*
{
[](
Variable
&
self
)
->
framework
::
ReaderHolder
*
{
PADDLE_ENFORCE
(
self
.
IsType
<
framework
::
ReaderHolder
>
());
PADDLE_ENFORCE
(
self
.
IsType
<
framework
::
ReaderHolder
>
());
return
self
.
GetMutable
<
framework
::
ReaderHolder
>
();
return
self
.
GetMutable
<
framework
::
ReaderHolder
>
();
},
},
py
::
return_value_policy
::
reference
);
py
::
return_value_policy
::
reference
)
#endif
;
#if !defined(_WIN32)
py
::
class_
<
framework
::
ReaderHolder
>
(
m
,
"Reader"
,
""
)
py
::
class_
<
framework
::
ReaderHolder
>
(
m
,
"Reader"
,
""
)
.
def
(
"reset"
,
&
framework
::
ReaderHolder
::
ResetAll
);
.
def
(
"reset"
,
&
framework
::
ReaderHolder
::
ResetAll
);
#endif
using
LoDTensorBlockingQueue
=
using
LoDTensorBlockingQueue
=
::
paddle
::
operators
::
reader
::
LoDTensorBlockingQueue
;
::
paddle
::
operators
::
reader
::
LoDTensorBlockingQueue
;
...
@@ -480,7 +494,7 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -480,7 +494,7 @@ All parameter, weight, gradient are variables in Paddle.
#endif
#endif
});;
});;
// clang-format on
// clang-format on
#if
def PADDLE_WITH_CUDA
#if
(defined(PADDLE_WITH_CUDA) && !defined(_WIN32))
py
::
class_
<
platform
::
Communicator
>
(
m
,
"Communicator"
).
def
(
py
::
init
<>
());
py
::
class_
<
platform
::
Communicator
>
(
m
,
"Communicator"
).
def
(
py
::
init
<>
());
#endif
#endif
py
::
class_
<
platform
::
CUDAPlace
>
(
m
,
"CUDAPlace"
)
py
::
class_
<
platform
::
CUDAPlace
>
(
m
,
"CUDAPlace"
)
...
@@ -617,11 +631,14 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -617,11 +631,14 @@ All parameter, weight, gradient are variables in Paddle.
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
m
.
def
(
"get_cuda_device_count"
,
platform
::
GetCUDADeviceCount
);
m
.
def
(
"get_cuda_device_count"
,
platform
::
GetCUDADeviceCount
);
#ifndef _WIN32
m
.
def
(
"nvprof_init"
,
platform
::
CudaProfilerInit
);
m
.
def
(
"nvprof_init"
,
platform
::
CudaProfilerInit
);
m
.
def
(
"nvprof_start"
,
platform
::
CudaProfilerStart
);
m
.
def
(
"nvprof_start"
,
platform
::
CudaProfilerStart
);
m
.
def
(
"nvprof_stop"
,
platform
::
CudaProfilerStop
);
m
.
def
(
"nvprof_stop"
,
platform
::
CudaProfilerStop
);
#endif
#endif
#endif
#ifndef _WIN32
py
::
enum_
<
platform
::
ProfilerState
>
(
m
,
"ProfilerState"
,
py
::
arithmetic
())
py
::
enum_
<
platform
::
ProfilerState
>
(
m
,
"ProfilerState"
,
py
::
arithmetic
())
.
value
(
"kDisabled"
,
platform
::
ProfilerState
::
kDisabled
)
.
value
(
"kDisabled"
,
platform
::
ProfilerState
::
kDisabled
)
.
value
(
"kCPU"
,
platform
::
ProfilerState
::
kCPU
)
.
value
(
"kCPU"
,
platform
::
ProfilerState
::
kCPU
)
...
@@ -642,6 +659,7 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -642,6 +659,7 @@ All parameter, weight, gradient are variables in Paddle.
m
.
def
(
"disable_profiler"
,
platform
::
DisableProfiler
);
m
.
def
(
"disable_profiler"
,
platform
::
DisableProfiler
);
m
.
def
(
"is_profiler_enabled"
,
platform
::
IsProfileEnabled
);
m
.
def
(
"is_profiler_enabled"
,
platform
::
IsProfileEnabled
);
m
.
def
(
"reset_profiler"
,
platform
::
ResetProfiler
);
m
.
def
(
"reset_profiler"
,
platform
::
ResetProfiler
);
#endif
py
::
class_
<
ir
::
Pass
,
std
::
shared_ptr
<
ir
::
Pass
>>
pass
(
m
,
"Pass"
);
py
::
class_
<
ir
::
Pass
,
std
::
shared_ptr
<
ir
::
Pass
>>
pass
(
m
,
"Pass"
);
pass
.
def
(
py
::
init
())
pass
.
def
(
py
::
init
())
...
@@ -650,9 +668,9 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -650,9 +668,9 @@ All parameter, weight, gradient are variables in Paddle.
[](
ir
::
Pass
&
self
,
const
std
::
string
&
name
,
const
std
::
string
&
attr
)
{
[](
ir
::
Pass
&
self
,
const
std
::
string
&
name
,
const
std
::
string
&
attr
)
{
self
.
Set
<
std
::
string
>
(
name
,
new
std
::
string
(
attr
));
self
.
Set
<
std
::
string
>
(
name
,
new
std
::
string
(
attr
));
})
})
.
def
(
"set_int"
,
[](
ir
::
Pass
&
self
,
const
std
::
string
&
name
,
int
val
)
{
.
def
(
"set_int"
,
[](
ir
::
Pass
&
self
,
const
std
::
string
&
name
,
self
.
Set
<
const
int
>
(
name
,
new
int
(
val
));
int
val
)
{
self
.
Set
<
const
int
>
(
name
,
new
int
(
val
));
})
}
);
.
def
(
"type"
,
&
ir
::
Pass
::
Type
);
py
::
class_
<
ir
::
PassBuilder
,
std
::
shared_ptr
<
ir
::
PassBuilder
>>
pb
(
py
::
class_
<
ir
::
PassBuilder
,
std
::
shared_ptr
<
ir
::
PassBuilder
>>
pb
(
m
,
"PassBuilder"
);
m
,
"PassBuilder"
);
...
@@ -670,6 +688,7 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -670,6 +688,7 @@ All parameter, weight, gradient are variables in Paddle.
.
def
(
"remove_pass"
,
.
def
(
"remove_pass"
,
[](
ir
::
PassBuilder
&
self
,
size_t
idx
)
{
self
.
RemovePass
(
idx
);
});
[](
ir
::
PassBuilder
&
self
,
size_t
idx
)
{
self
.
RemovePass
(
idx
);
});
#ifndef _WIN32
// -- python binds for parallel executor.
// -- python binds for parallel executor.
py
::
class_
<
ParallelExecutor
>
pe
(
m
,
"ParallelExecutor"
);
py
::
class_
<
ParallelExecutor
>
pe
(
m
,
"ParallelExecutor"
);
py
::
class_
<
ExecutionStrategy
>
exec_strategy
(
pe
,
"ExecutionStrategy"
,
R"DOC(
py
::
class_
<
ExecutionStrategy
>
exec_strategy
(
pe
,
"ExecutionStrategy"
,
R"DOC(
...
@@ -791,6 +810,7 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -791,6 +810,7 @@ All parameter, weight, gradient are variables in Paddle.
"reduce_strategy"
,
"reduce_strategy"
,
[](
const
BuildStrategy
&
self
)
{
return
self
.
reduce_
;
},
[](
const
BuildStrategy
&
self
)
{
return
self
.
reduce_
;
},
[](
BuildStrategy
&
self
,
BuildStrategy
::
ReduceStrategy
strategy
)
{
[](
BuildStrategy
&
self
,
BuildStrategy
::
ReduceStrategy
strategy
)
{
PADDLE_ENFORCE
(
!
self
.
IsFinalized
(),
"BuildStrategy is finlaized."
);
self
.
reduce_
=
strategy
;
self
.
reduce_
=
strategy
;
},
},
R"DOC(The type is STR, there are two reduce strategies in ParallelExecutor,
R"DOC(The type is STR, there are two reduce strategies in ParallelExecutor,
...
@@ -804,6 +824,7 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -804,6 +824,7 @@ All parameter, weight, gradient are variables in Paddle.
[](
const
BuildStrategy
&
self
)
{
return
self
.
gradient_scale_
;
},
[](
const
BuildStrategy
&
self
)
{
return
self
.
gradient_scale_
;
},
[](
BuildStrategy
&
self
,
[](
BuildStrategy
&
self
,
BuildStrategy
::
GradientScaleStrategy
strategy
)
{
BuildStrategy
::
GradientScaleStrategy
strategy
)
{
PADDLE_ENFORCE
(
!
self
.
IsFinalized
(),
"BuildStrategy is finlaized."
);
self
.
gradient_scale_
=
strategy
;
self
.
gradient_scale_
=
strategy
;
},
},
R"DOC(The type is STR, there are three ways of defining :math:`loss@grad` in
R"DOC(The type is STR, there are three ways of defining :math:`loss@grad` in
...
@@ -815,6 +836,7 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -815,6 +836,7 @@ All parameter, weight, gradient are variables in Paddle.
"debug_graphviz_path"
,
"debug_graphviz_path"
,
[](
const
BuildStrategy
&
self
)
{
return
self
.
debug_graphviz_path_
;
},
[](
const
BuildStrategy
&
self
)
{
return
self
.
debug_graphviz_path_
;
},
[](
BuildStrategy
&
self
,
const
std
::
string
&
path
)
{
[](
BuildStrategy
&
self
,
const
std
::
string
&
path
)
{
PADDLE_ENFORCE
(
!
self
.
IsFinalized
(),
"BuildStrategy is finlaized."
);
self
.
debug_graphviz_path_
=
path
;
self
.
debug_graphviz_path_
=
path
;
},
},
R"DOC(The type is STR, debug_graphviz_path indicate the path that
R"DOC(The type is STR, debug_graphviz_path indicate the path that
...
@@ -824,6 +846,7 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -824,6 +846,7 @@ All parameter, weight, gradient are variables in Paddle.
"enable_data_balance"
,
"enable_data_balance"
,
[](
const
BuildStrategy
&
self
)
{
return
self
.
enable_data_balance_
;
},
[](
const
BuildStrategy
&
self
)
{
return
self
.
enable_data_balance_
;
},
[](
BuildStrategy
&
self
,
bool
b
)
{
[](
BuildStrategy
&
self
,
bool
b
)
{
PADDLE_ENFORCE
(
!
self
.
IsFinalized
(),
"BuildStrategy is finlaized."
);
self
.
enable_data_balance_
=
b
;
self
.
enable_data_balance_
=
b
;
})
// FIXME(chengudo): enable_data_balance seems not important
})
// FIXME(chengudo): enable_data_balance seems not important
.
def_property
(
.
def_property
(
...
@@ -832,6 +855,7 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -832,6 +855,7 @@ All parameter, weight, gradient are variables in Paddle.
return
self
.
enable_sequential_execution_
;
return
self
.
enable_sequential_execution_
;
},
},
[](
BuildStrategy
&
self
,
bool
b
)
{
[](
BuildStrategy
&
self
,
bool
b
)
{
PADDLE_ENFORCE
(
!
self
.
IsFinalized
(),
"BuildStrategy is finlaized."
);
self
.
enable_sequential_execution_
=
b
;
self
.
enable_sequential_execution_
=
b
;
},
},
R"DOC(The type is BOOL. If set True, the execution order of ops would be the same as what is in the program. Default False.)DOC"
)
R"DOC(The type is BOOL. If set True, the execution order of ops would be the same as what is in the program. Default False.)DOC"
)
...
@@ -841,6 +865,7 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -841,6 +865,7 @@ All parameter, weight, gradient are variables in Paddle.
return
self
.
remove_unnecessary_lock_
;
return
self
.
remove_unnecessary_lock_
;
},
},
[](
BuildStrategy
&
self
,
bool
b
)
{
[](
BuildStrategy
&
self
,
bool
b
)
{
PADDLE_ENFORCE
(
!
self
.
IsFinalized
(),
"BuildStrategy is finlaized."
);
self
.
remove_unnecessary_lock_
=
b
;
self
.
remove_unnecessary_lock_
=
b
;
},
},
R"DOC(The type is BOOL. If set True, some locks in GPU ops would be released and ParallelExecutor would run faster. Default False.)DOC"
)
R"DOC(The type is BOOL. If set True, some locks in GPU ops would be released and ParallelExecutor would run faster. Default False.)DOC"
)
...
@@ -850,15 +875,19 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -850,15 +875,19 @@ All parameter, weight, gradient are variables in Paddle.
return
self
.
fuse_elewise_add_act_ops_
;
return
self
.
fuse_elewise_add_act_ops_
;
},
},
[](
BuildStrategy
&
self
,
bool
b
)
{
[](
BuildStrategy
&
self
,
bool
b
)
{
PADDLE_ENFORCE
(
!
self
.
IsFinalized
(),
"BuildStrategy is finlaized."
);
self
.
fuse_elewise_add_act_ops_
=
b
;
self
.
fuse_elewise_add_act_ops_
=
b
;
},
},
R"DOC(The type is BOOL, fuse_elewise_add_act_ops indicate whether
R"DOC(The type is BOOL, fuse_elewise_add_act_ops indicate whether
to fuse elementwise_add_op and activation_op,
to fuse elementwise_add_op and activation_op,
it may make the execution faster. Default False)DOC"
)
it may make the execution faster. Default False)DOC"
)
.
def
(
"_
create_passes_from_strategy
"
,
.
def
(
"_
finalize_strategy_and_create_passes
"
,
[](
BuildStrategy
&
self
)
->
std
::
shared_ptr
<
ir
::
PassBuilder
>
{
[](
BuildStrategy
&
self
)
->
std
::
shared_ptr
<
ir
::
PassBuilder
>
{
return
self
.
CreatePassesFromStrategy
();
return
self
.
CreatePassesFromStrategy
(
true
);
});
},
R"DOC(Allow user to customized passes. Normally model-specific
optimization passes should be defined in this way. BuildStrategy
cannot be updated after being finalized.)DOC"
);
pe
.
def
(
py
::
init
<
const
std
::
vector
<
platform
::
Place
>
&
,
pe
.
def
(
py
::
init
<
const
std
::
vector
<
platform
::
Place
>
&
,
const
std
::
unordered_set
<
std
::
string
>
&
,
const
std
::
unordered_set
<
std
::
string
>
&
,
...
@@ -887,6 +916,7 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -887,6 +916,7 @@ All parameter, weight, gradient are variables in Paddle.
});
});
BindRecordIOWriter
(
&
m
);
BindRecordIOWriter
(
&
m
);
#endif
return
m
.
ptr
();
return
m
.
ptr
();
}
}
}
// namespace pybind
}
// namespace pybind
...
...
paddle/scripts/paddle_build.sh
浏览文件 @
5ef123c7
...
@@ -156,6 +156,8 @@ function cmake_gen() {
...
@@ -156,6 +156,8 @@ function cmake_gen() {
-DWITH_INFERENCE_API_TEST=
${
WITH_INFERENCE_API_TEST
:-
ON
}
-DWITH_INFERENCE_API_TEST=
${
WITH_INFERENCE_API_TEST
:-
ON
}
-DINFERENCE_DEMO_INSTALL_DIR=
${
INFERENCE_DEMO_INSTALL_DIR
}
-DINFERENCE_DEMO_INSTALL_DIR=
${
INFERENCE_DEMO_INSTALL_DIR
}
-DWITH_ANAKIN=
${
WITH_ANAKIN
:-
OFF
}
-DWITH_ANAKIN=
${
WITH_ANAKIN
:-
OFF
}
-DANAKIN_BUILD_FAT_BIN=
${
ANAKIN_BUILD_FAT_BIN
:OFF
}
-DANAKIN_BUILD_CROSS_PLANTFORM=
${
ANAKIN_BUILD_CROSS_PLANTFORM
:ON
}
-DPY_VERSION=
${
PY_VERSION
:-
2
.7
}
-DPY_VERSION=
${
PY_VERSION
:-
2
.7
}
-DCMAKE_INSTALL_PREFIX=
${
INSTALL_PREFIX
:-
/paddle/build
}
-DCMAKE_INSTALL_PREFIX=
${
INSTALL_PREFIX
:-
/paddle/build
}
========================================
========================================
...
@@ -188,6 +190,8 @@ EOF
...
@@ -188,6 +190,8 @@ EOF
-DWITH_INFERENCE_API_TEST
=
${
WITH_INFERENCE_API_TEST
:-
ON
}
\
-DWITH_INFERENCE_API_TEST
=
${
WITH_INFERENCE_API_TEST
:-
ON
}
\
-DINFERENCE_DEMO_INSTALL_DIR
=
${
INFERENCE_DEMO_INSTALL_DIR
}
\
-DINFERENCE_DEMO_INSTALL_DIR
=
${
INFERENCE_DEMO_INSTALL_DIR
}
\
-DWITH_ANAKIN
=
${
WITH_ANAKIN
:-
OFF
}
\
-DWITH_ANAKIN
=
${
WITH_ANAKIN
:-
OFF
}
\
-DANAKIN_BUILD_FAT_BIN
=
${
ANAKIN_BUILD_FAT_BIN
:OFF
}
\
-DANAKIN_BUILD_CROSS_PLANTFORM
=
${
ANAKIN_BUILD_CROSS_PLANTFORM
:ON
}
\
-DPY_VERSION
=
${
PY_VERSION
:-
2
.7
}
\
-DPY_VERSION
=
${
PY_VERSION
:-
2
.7
}
\
-DCMAKE_INSTALL_PREFIX
=
${
INSTALL_PREFIX
:-
/paddle/build
}
-DCMAKE_INSTALL_PREFIX
=
${
INSTALL_PREFIX
:-
/paddle/build
}
...
@@ -777,6 +781,17 @@ function main() {
...
@@ -777,6 +781,17 @@ function main() {
test_fluid_lib
test_fluid_lib
assert_api_spec_approvals
assert_api_spec_approvals
;;
;;
assert_api
)
assert_api_not_changed
${
PYTHON_ABI
:-
""
}
;;
test_inference
)
gen_capi_package
gen_fluid_lib
test_fluid_lib
;;
assert_api_approvals
)
assert_api_spec_approvals
;;
maccheck
)
maccheck
)
cmake_gen
${
PYTHON_ABI
:-
""
}
cmake_gen
${
PYTHON_ABI
:-
""
}
build_mac
build_mac
...
...
python/CMakeLists.txt
浏览文件 @
5ef123c7
...
@@ -45,23 +45,42 @@ endif()
...
@@ -45,23 +45,42 @@ endif()
configure_file
(
${
CMAKE_CURRENT_SOURCE_DIR
}
/setup.py.in
configure_file
(
${
CMAKE_CURRENT_SOURCE_DIR
}
/setup.py.in
${
CMAKE_CURRENT_BINARY_DIR
}
/setup.py
)
${
CMAKE_CURRENT_BINARY_DIR
}
/setup.py
)
IF
(
WIN32
)
set
(
FLUID_CORE
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/core.so
)
# Python would use the .pyd by default under Windows series platform
add_custom_command
(
OUTPUT
${
FLUID_CORE
}
set
(
FLUID_DST_DIR
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/
)
COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind>
${
FLUID_CORE
}
get_filename_component
(
openblas_refpath
${
CBLAS_LIBRARIES
}
DIRECTORY
)
DEPENDS paddle_pybind
)
set
(
FLUID_CORE
${
FLUID_DST_DIR
}
/core.pyd
)
add_custom_command
(
OUTPUT
${
FLUID_CORE
}
COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind>
${
FLUID_CORE
}
COMMAND cmake -E copy
${
openblas_refpath
}
/openblas.dll
${
FLUID_DST_DIR
}
DEPENDS paddle_pybind
)
ELSE
()
set
(
FLUID_CORE
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/core.so
)
add_custom_command
(
OUTPUT
${
FLUID_CORE
}
COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind>
${
FLUID_CORE
}
DEPENDS paddle_pybind
)
ENDIF
()
add_custom_target
(
copy_paddle_pybind ALL DEPENDS
${
FLUID_CORE
}
)
add_custom_target
(
copy_paddle_pybind ALL DEPENDS
${
FLUID_CORE
}
)
IF
(
WIN32
)
add_custom_command
(
OUTPUT
${
PADDLE_PYTHON_BUILD_DIR
}
/.timestamp
add_custom_command
(
OUTPUT
${
PADDLE_PYTHON_BUILD_DIR
}
/.timestamp
COMMAND touch stub.cc
COMMAND
${
CMAKE_COMMAND
}
-E copy_directory
${
PADDLE_SOURCE_DIR
}
/python/paddle
${
PADDLE_BINARY_DIR
}
/python/paddle/
COMMAND cp -r
${
PADDLE_SOURCE_DIR
}
/python/paddle
${
PADDLE_BINARY_DIR
}
/python
COMMAND
${
CMAKE_COMMAND
}
-E copy_directory
${
PADDLE_SOURCE_DIR
}
/paddle/py_paddle
${
PADDLE_BINARY_DIR
}
/python/
COMMAND cp -r
${
PADDLE_SOURCE_DIR
}
/paddle/py_paddle
${
PADDLE_BINARY_DIR
}
/python/
COMMAND
${
CMAKE_COMMAND
}
-E env
${
py_env
}
${
PYTHON_EXECUTABLE
}
setup.py bdist_wheel
COMMAND env
${
py_env
}
${
PYTHON_EXECUTABLE
}
setup.py bdist_wheel
COMMAND
${
CMAKE_COMMAND
}
-E touch
${
PADDLE_PYTHON_BUILD_DIR
}
/.timestamp
COMMAND
${
CMAKE_COMMAND
}
-E touch
${
PADDLE_PYTHON_BUILD_DIR
}
/.timestamp
COMMAND
${
CMAKE_COMMAND
}
-E remove_directory
${
PADDLE_PYTHON_BUILD_DIR
}
/lib-python
COMMAND
${
CMAKE_COMMAND
}
-E remove_directory
${
PADDLE_PYTHON_BUILD_DIR
}
/lib-python
DEPENDS gen_proto_py copy_paddle_pybind
${
FLUID_CORE
}
framework_py_proto profiler_py_proto
${
PY_FILES
}
${
external_project_dependencies
}
${
COPY_PADDLE_MASTER
}
)
COMMAND
${
CMAKE_COMMAND
}
-E copy_directory
${
PADDLE_PYTHON_BUILD_DIR
}
/lib.*
${
PADDLE_PYTHON_BUILD_DIR
}
/lib-python
ELSE
(
WIN32
)
DEPENDS gen_proto_py copy_paddle_pybind
${
FLUID_CORE
}
framework_py_proto profiler_py_proto
${
PY_FILES
}
${
external_project_dependencies
}
${
COPY_PADDLE_MASTER
}
)
add_custom_command
(
OUTPUT
${
PADDLE_PYTHON_BUILD_DIR
}
/.timestamp
COMMAND touch stub.cc
COMMAND cp -r
${
PADDLE_SOURCE_DIR
}
/python/paddle
${
PADDLE_BINARY_DIR
}
/python
COMMAND cp -r
${
PADDLE_SOURCE_DIR
}
/paddle/py_paddle
${
PADDLE_BINARY_DIR
}
/python/
COMMAND env
${
py_env
}
${
PYTHON_EXECUTABLE
}
setup.py bdist_wheel
COMMAND
${
CMAKE_COMMAND
}
-E touch
${
PADDLE_PYTHON_BUILD_DIR
}
/.timestamp
COMMAND
${
CMAKE_COMMAND
}
-E remove_directory
${
PADDLE_PYTHON_BUILD_DIR
}
/lib-python
COMMAND
${
CMAKE_COMMAND
}
-E copy_directory
${
PADDLE_PYTHON_BUILD_DIR
}
/lib*
${
PADDLE_PYTHON_BUILD_DIR
}
/lib-python
DEPENDS gen_proto_py copy_paddle_pybind
${
FLUID_CORE
}
framework_py_proto profiler_py_proto
${
PY_FILES
}
${
external_project_dependencies
}
${
COPY_PADDLE_MASTER
}
)
ENDIF
()
set
(
paddle_python_deps
${
PADDLE_PYTHON_BUILD_DIR
}
/.timestamp
${
MKL_DEPENDS
}
)
set
(
paddle_python_deps
${
PADDLE_PYTHON_BUILD_DIR
}
/.timestamp
${
MKL_DEPENDS
}
)
if
(
NOT WITH_FLUID_ONLY
)
if
(
NOT WITH_FLUID_ONLY
)
...
...
python/paddle/fluid/__init__.py
浏览文件 @
5ef123c7
...
@@ -13,6 +13,7 @@
...
@@ -13,6 +13,7 @@
# limitations under the License.
# limitations under the License.
from
__future__
import
print_function
from
__future__
import
print_function
import
os
# import all class inside framework into fluid module
# import all class inside framework into fluid module
from
.
import
framework
from
.
import
framework
from
.framework
import
*
from
.framework
import
*
...
@@ -111,12 +112,16 @@ def __bootstrap__():
...
@@ -111,12 +112,16 @@ def __bootstrap__():
os
.
environ
[
'OMP_NUM_THREADS'
]
=
str
(
num_threads
)
os
.
environ
[
'OMP_NUM_THREADS'
]
=
str
(
num_threads
)
read_env_flags
=
[
read_env_flags
=
[
'use_pinned_memory'
,
'check_nan_inf'
,
'benchmark'
,
'
warpctc_dir
'
,
'use_pinned_memory'
,
'check_nan_inf'
,
'benchmark'
,
'
eager_delete_scope
'
,
'
eager_delete_scope'
,
'use_mkldnn
'
,
'initial_cpu_memory_in_mb'
,
'
use_mkldnn'
,
'use_ngraph
'
,
'initial_cpu_memory_in_mb'
,
'init_allocated_mem'
,
'free_idle_memory'
,
'paddle_num_threads'
,
'init_allocated_mem'
,
'free_idle_memory'
,
'paddle_num_threads'
,
'dist_threadpool_size'
,
'
cpu_deterministic'
,
'
eager_delete_tensor_gb'
,
'dist_threadpool_size'
,
'eager_delete_tensor_gb'
,
'reader_queue_speed_test_mode'
'reader_queue_speed_test_mode'
]
]
if
os
.
name
!=
'nt'
:
read_env_flags
.
append
(
'warpctc_dir'
)
read_env_flags
.
append
(
'cpu_deterministic'
)
if
core
.
is_compiled_with_dist
():
if
core
.
is_compiled_with_dist
():
read_env_flags
.
append
(
'rpc_deadline'
)
read_env_flags
.
append
(
'rpc_deadline'
)
read_env_flags
.
append
(
'rpc_server_profile_path'
)
read_env_flags
.
append
(
'rpc_server_profile_path'
)
...
...
python/paddle/fluid/contrib/inferencer.py
浏览文件 @
5ef123c7
...
@@ -15,13 +15,15 @@
...
@@ -15,13 +15,15 @@
from
__future__
import
print_function
from
__future__
import
print_function
import
contextlib
import
contextlib
import
os
from
..
import
core
from
..
import
core
from
..
import
executor
from
..
import
executor
from
..
import
framework
from
..
import
framework
from
..
import
io
from
..
import
io
from
..
import
parallel_executor
if
os
.
name
!=
'nt'
:
from
..
import
parallel_executor
from
..
import
unique_name
from
..
import
unique_name
from
.trainer
import
check_and_get_place
from
.trainer
import
check_and_get_place
...
...
python/paddle/fluid/contrib/trainer.py
浏览文件 @
5ef123c7
...
@@ -28,7 +28,8 @@ from .. import framework
...
@@ -28,7 +28,8 @@ from .. import framework
from
..
import
io
from
..
import
io
# optimizer is same as the parameter of Trainer.__init__. Rename it to opt_module
# optimizer is same as the parameter of Trainer.__init__. Rename it to opt_module
from
..
import
optimizer
as
opt_module
from
..
import
optimizer
as
opt_module
from
..
import
parallel_executor
if
os
.
name
!=
'nt'
:
from
..
import
parallel_executor
from
..transpiler
import
distribute_transpiler
from
..transpiler
import
distribute_transpiler
__all__
=
[
__all__
=
[
...
...
python/paddle/fluid/layers/io.py
浏览文件 @
5ef123c7
...
@@ -15,6 +15,7 @@
...
@@ -15,6 +15,7 @@
from
__future__
import
print_function
from
__future__
import
print_function
import
contextlib
import
contextlib
import
multiprocessing
import
multiprocessing
import
os
import
six
import
six
import
threading
import
threading
...
@@ -346,70 +347,72 @@ def _copy_reader_create_op_(block, op):
...
@@ -346,70 +347,72 @@ def _copy_reader_create_op_(block, op):
return
new_op
return
new_op
@
templatedoc
(
op_type
=
'create_recordio_file_reader'
)
if
os
.
name
!=
'nt'
:
def
open_recordio_file
(
filename
,
shapes
,
@
templatedoc
(
op_type
=
'create_recordio_file_reader'
)
lod_levels
,
def
open_recordio_file
(
filename
,
dtypes
,
shapes
,
pass_num
=
1
,
lod_levels
,
for_parallel
=
True
):
dtypes
,
"""
pass_num
=
1
,
${comment}
for_parallel
=
True
):
"""
Args:
${comment}
filename(${filename_type}): ${filename_comment}.
shapes(list): List of tuples which declaring data shapes.
Args:
lod_levels(${lod_levels_type}): ${lod_levels_comment}.
filename(${filename_type}): ${filename_comment}.
dtypes(list): List of strs which declaring data type.
shapes(list): List of tuples which declaring data shapes.
pass_num(int): Number of passes to run.
lod_levels(${lod_levels_type}): ${lod_levels_comment}.
for_parallel(Bool): Set it as True if you are going to run
dtypes(list): List of strs which declaring data type.
subsequent operators in parallel.
pass_num(int): Number of passes to run.
for_parallel(Bool): Set it as True if you are going to run
Returns:
subsequent operators in parallel.
${out_comment}.
Returns:
Examples:
${out_comment}.
>>> import paddle.fluid as fluid
Examples:
>>> reader = fluid.layers.io.open_recordio_file(
>>> filename='./data.recordio',
>>> import paddle.fluid as fluid
>>> shapes=[(3,224,224), (1)],
>>> reader = fluid.layers.io.open_recordio_file(
>>> lod_levels=[0, 0],
>>> filename='./data.recordio',
>>> dtypes=['float32', 'int64'])
>>> shapes=[(3,224,224), (1)],
>>> # Via the reader, we can use 'read_file' layer to get data:
>>> lod_levels=[0, 0],
>>> image, label = fluid.layers.io.read_file(reader)
>>> dtypes=['float32', 'int64'])
"""
>>> # Via the reader, we can use 'read_file' layer to get data:
dtypes
=
[
convert_np_dtype_to_dtype_
(
dt
)
for
dt
in
dtypes
]
>>> image, label = fluid.layers.io.read_file(reader)
shape_concat
=
[]
"""
ranks
=
[]
dtypes
=
[
convert_np_dtype_to_dtype_
(
dt
)
for
dt
in
dtypes
]
shape_concat
=
[]
ranks
=
[]
for
shape
in
shapes
:
for
shape
in
shapes
:
shape_concat
.
extend
(
shape
)
shape_concat
.
extend
(
shape
)
ranks
.
append
(
len
(
shape
))
ranks
.
append
(
len
(
shape
))
var_name
=
unique_name
(
'open_recordio_file'
)
var_name
=
unique_name
(
'open_recordio_file'
)
startup_blk
=
default_startup_program
().
current_block
()
startup_blk
=
default_startup_program
().
current_block
()
startup_var
=
startup_blk
.
create_var
(
name
=
var_name
)
startup_var
=
startup_blk
.
create_var
(
name
=
var_name
)
startup_blk
.
append_op
(
startup_blk
.
append_op
(
type
=
'create_recordio_file_reader'
,
type
=
'create_recordio_file_reader'
,
outputs
=
{
'Out'
:
[
startup_var
]},
outputs
=
{
'Out'
:
[
startup_var
]},
attrs
=
{
attrs
=
{
'shape_concat'
:
shape_concat
,
'shape_concat'
:
shape_concat
,
'lod_levels'
:
lod_levels
,
'lod_levels'
:
lod_levels
,
'filename'
:
filename
,
'filename'
:
filename
,
'ranks'
:
ranks
'ranks'
:
ranks
})
})
startup_var
.
desc
.
set_dtypes
(
dtypes
)
startup_var
.
desc
.
set_dtypes
(
dtypes
)
startup_var
.
persistable
=
True
startup_var
.
persistable
=
True
main_prog_var
=
_copy_reader_var_
(
default_main_program
().
current_block
(),
main_prog_var
=
_copy_reader_var_
(
startup_var
)
default_main_program
().
current_block
(),
startup_var
)
if
pass_num
>
1
:
if
pass_num
>
1
:
main_prog_var
=
multi_pass
(
reader
=
main_prog_var
,
pass_num
=
pass_num
)
main_prog_var
=
multi_pass
(
reader
=
main_prog_var
,
pass_num
=
pass_num
)
return
monkey_patch_reader_methods
(
main_prog_var
)
return
monkey_patch_reader_methods
(
main_prog_var
)
def
random_data_generator
(
low
,
high
,
shapes
,
lod_levels
,
for_parallel
=
True
):
def
random_data_generator
(
low
,
high
,
shapes
,
lod_levels
,
for_parallel
=
True
):
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
5ef123c7
...
@@ -18,6 +18,7 @@ All layers just related to the neural network.
...
@@ -18,6 +18,7 @@ All layers just related to the neural network.
from
__future__
import
print_function
from
__future__
import
print_function
import
numpy
as
np
import
numpy
as
np
import
os
from
..layer_helper
import
LayerHelper
from
..layer_helper
import
LayerHelper
from
..initializer
import
Normal
,
Constant
from
..initializer
import
Normal
,
Constant
from
..framework
import
Variable
,
OpProtoHolder
from
..framework
import
Variable
,
OpProtoHolder
...
@@ -341,126 +342,128 @@ def embedding(input,
...
@@ -341,126 +342,128 @@ def embedding(input,
return
tmp
return
tmp
@
templatedoc
(
op_type
=
"lstm"
)
if
os
.
name
!=
'nt'
:
def
dynamic_lstm
(
input
,
size
,
h_0
=
None
,
c_0
=
None
,
param_attr
=
None
,
bias_attr
=
None
,
use_peepholes
=
True
,
is_reverse
=
False
,
gate_activation
=
'sigmoid'
,
cell_activation
=
'tanh'
,
candidate_activation
=
'tanh'
,
dtype
=
'float32'
,
name
=
None
):
"""
${comment}
Args:
input (Variable): ${input_comment}
size (int): 4 * hidden size.
h_0(Variable): The initial hidden state is an optional input, default is zero.
This is a tensor with shape (N x D), where N is the
batch size and D is the hidden size.
c_0(Variable): The initial cell state is an optional input, default is zero.
This is a tensor with shape (N x D), where N is the
batch size. `h_0` and `c_0` can be NULL but only at the same time.
param_attr(ParamAttr|None): The parameter attribute for the learnable
hidden-hidden weights.
- Weights = {:math:`W_{ch}, W_{ih},
\
@
templatedoc
(
op_type
=
"lstm"
)
W_{fh}, W_{oh}`}
def
dynamic_lstm
(
input
,
- The shape is (D x 4D), where D is the hidden
size
,
size.
h_0
=
None
,
c_0
=
None
,
If it is set to None or one attribute of ParamAttr,
param_attr
=
None
,
dynamic_lstm will create ParamAttr as param_attr.
bias_attr
=
None
,
If the Initializer of the param_attr is not set, the
use_peepholes
=
True
,
parameter is initialized with Xavier. Default: None.
is_reverse
=
False
,
bias_attr (ParamAttr|None): The bias attribute for the learnable bias
gate_activation
=
'sigmoid'
,
weights, which contains two parts, input-hidden
cell_activation
=
'tanh'
,
bias weights and peephole connections weights if
candidate_activation
=
'tanh'
,
setting `use_peepholes` to `True`.
dtype
=
'float32'
,
name
=
None
):
1. `use_peepholes = False`
"""
- Biases = {:math:`b_c, b_i, b_f, b_o`}.
${comment}
- The shape is (1 x 4D).
2. `use_peepholes = True`
Args:
- Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic},
\
input (Variable): ${input_comment}
W_{fc}, W_{oc}`}.
size (int): 4 * hidden size.
- The shape is (1 x 7D).
h_0(Variable): The initial hidden state is an optional input, default is zero.
This is a tensor with shape (N x D), where N is the
If it is set to None or one attribute of ParamAttr,
batch size and D is the hidden size.
dynamic_lstm will create ParamAttr as bias_attr.
c_0(Variable): The initial cell state is an optional input, default is zero.
If the Initializer of the bias_attr is not set,
This is a tensor with shape (N x D), where N is the
the bias is initialized zero. Default: None.
batch size. `h_0` and `c_0` can be NULL but only at the same time.
use_peepholes (bool): ${use_peepholes_comment}
param_attr(ParamAttr|None): The parameter attribute for the learnable
is_reverse (bool): ${is_reverse_comment}
hidden-hidden weights.
gate_activation (str): ${gate_activation_comment}
cell_activation (str): ${cell_activation_comment}
- Weights = {:math:`W_{ch}, W_{ih},
\
candidate_activation (str): ${candidate_activation_comment}
W_{fh}, W_{oh}`}
dtype (str): Data type. Choices = ["float32", "float64"], default "float32".
- The shape is (D x 4D), where D is the hidden
name (str|None): A name for this layer(optional). If set None, the layer
size.
will be named automatically.
If it is set to None or one attribute of ParamAttr,
Returns:
dynamic_lstm will create ParamAttr as param_attr.
tuple: The hidden state, and cell state of LSTM. The shape of both
\
If the Initializer of the param_attr is not set, the
is (T x D), and lod is the same with the `input`.
parameter is initialized with Xavier. Default: None.
bias_attr (ParamAttr|None): The bias attribute for the learnable bias
Examples:
weights, which contains two parts, input-hidden
.. code-block:: python
bias weights and peephole connections weights if
setting `use_peepholes` to `True`.
hidden_dim = 512
forward_proj = fluid.layers.fc(input=input_seq, size=hidden_dim * 4,
1. `use_peepholes = False`
bias_attr=False)
- Biases = {:math:`b_c, b_i, b_f, b_o`}.
forward, _ = fluid.layers.dynamic_lstm(
- The shape is (1 x 4D).
input=forward_proj, size=hidden_dim * 4, use_peepholes=False)
2. `use_peepholes = True`
"""
- Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic},
\
assert
bias_attr
is
not
False
,
"bias_attr should not be False in dynamic_lstmp."
W_{fc}, W_{oc}`}.
helper
=
LayerHelper
(
'lstm'
,
**
locals
())
- The shape is (1 x 7D).
size
=
size
//
4
weight
=
helper
.
create_parameter
(
If it is set to None or one attribute of ParamAttr,
attr
=
helper
.
param_attr
,
shape
=
[
size
,
4
*
size
],
dtype
=
dtype
)
dynamic_lstm will create ParamAttr as bias_attr.
bias_size
=
[
1
,
7
*
size
]
If the Initializer of the bias_attr is not set,
if
not
use_peepholes
:
the bias is initialized zero. Default: None.
bias_size
[
1
]
=
4
*
size
use_peepholes (bool): ${use_peepholes_comment}
bias
=
helper
.
create_parameter
(
is_reverse (bool): ${is_reverse_comment}
attr
=
helper
.
bias_attr
,
shape
=
bias_size
,
dtype
=
dtype
,
is_bias
=
True
)
gate_activation (str): ${gate_activation_comment}
cell_activation (str): ${cell_activation_comment}
candidate_activation (str): ${candidate_activation_comment}
dtype (str): Data type. Choices = ["float32", "float64"], default "float32".
name (str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
tuple: The hidden state, and cell state of LSTM. The shape of both
\
is (T x D), and lod is the same with the `input`.
Examples:
.. code-block:: python
hidden_dim = 512
forward_proj = fluid.layers.fc(input=input_seq, size=hidden_dim * 4,
bias_attr=False)
forward, _ = fluid.layers.dynamic_lstm(
input=forward_proj, size=hidden_dim * 4, use_peepholes=False)
"""
assert
bias_attr
is
not
False
,
"bias_attr should not be False in dynamic_lstmp."
helper
=
LayerHelper
(
'lstm'
,
**
locals
())
size
=
size
//
4
weight
=
helper
.
create_parameter
(
attr
=
helper
.
param_attr
,
shape
=
[
size
,
4
*
size
],
dtype
=
dtype
)
bias_size
=
[
1
,
7
*
size
]
if
not
use_peepholes
:
bias_size
[
1
]
=
4
*
size
bias
=
helper
.
create_parameter
(
attr
=
helper
.
bias_attr
,
shape
=
bias_size
,
dtype
=
dtype
,
is_bias
=
True
)
hidden
=
helper
.
create_variable_for_type_inference
(
dtype
)
hidden
=
helper
.
create_variable_for_type_inference
(
dtype
)
cell
=
helper
.
create_variable_for_type_inference
(
dtype
)
cell
=
helper
.
create_variable_for_type_inference
(
dtype
)
batch_gate
=
helper
.
create_variable_for_type_inference
(
dtype
)
batch_gate
=
helper
.
create_variable_for_type_inference
(
dtype
)
batch_cell_pre_act
=
helper
.
create_variable_for_type_inference
(
dtype
)
batch_cell_pre_act
=
helper
.
create_variable_for_type_inference
(
dtype
)
inputs
=
{
'Input'
:
input
,
'Weight'
:
weight
,
'Bias'
:
bias
}
inputs
=
{
'Input'
:
input
,
'Weight'
:
weight
,
'Bias'
:
bias
}
batch_size
=
input
.
shape
[
0
]
batch_size
=
input
.
shape
[
0
]
if
h_0
:
if
h_0
:
assert
h_0
.
shape
==
(
batch_size
,
size
),
\
assert
h_0
.
shape
==
(
batch_size
,
size
),
\
'The shape of h0 should be (batch_size, %d)'
%
size
'The shape of h0 should be (batch_size, %d)'
%
size
inputs
[
'H0'
]
=
h_0
inputs
[
'H0'
]
=
h_0
if
c_0
:
if
c_0
:
assert
c_0
.
shape
==
(
batch_size
,
size
),
\
assert
c_0
.
shape
==
(
batch_size
,
size
),
\
'The shape of c0 should be (batch_size, %d)'
%
size
'The shape of c0 should be (batch_size, %d)'
%
size
inputs
[
'C0'
]
=
c_0
inputs
[
'C0'
]
=
c_0
helper
.
append_op
(
helper
.
append_op
(
type
=
'lstm'
,
type
=
'lstm'
,
inputs
=
inputs
,
inputs
=
inputs
,
outputs
=
{
outputs
=
{
'Hidden'
:
hidden
,
'Hidden'
:
hidden
,
'Cell'
:
cell
,
'Cell'
:
cell
,
'BatchGate'
:
batch_gate
,
'BatchGate'
:
batch_gate
,
'BatchCellPreAct'
:
batch_cell_pre_act
'BatchCellPreAct'
:
batch_cell_pre_act
},
},
attrs
=
{
attrs
=
{
'use_peepholes'
:
use_peepholes
,
'use_peepholes'
:
use_peepholes
,
'is_reverse'
:
is_reverse
,
'is_reverse'
:
is_reverse
,
'gate_activation'
:
gate_activation
,
'gate_activation'
:
gate_activation
,
'cell_activation'
:
cell_activation
,
'cell_activation'
:
cell_activation
,
'candidate_activation'
:
candidate_activation
'candidate_activation'
:
candidate_activation
})
})
return
hidden
,
cell
return
hidden
,
cell
def
dynamic_lstmp
(
input
,
def
dynamic_lstmp
(
input
,
...
@@ -959,39 +962,43 @@ def linear_chain_crf(input, label, param_attr=None):
...
@@ -959,39 +962,43 @@ def linear_chain_crf(input, label, param_attr=None):
return
log_likelihood
return
log_likelihood
@
templatedoc
()
if
os
.
name
!=
'nt'
:
def
crf_decoding
(
input
,
param_attr
,
label
=
None
):
"""
${comment}
Args:
@
templatedoc
()
input(${emission_type}): ${emission_comment}
def
crf_decoding
(
input
,
param_attr
,
label
=
None
):
"""
${comment}
param_attr(ParamAttr): The parameter attribute for training.
Args:
input(${emission_type}): ${emission_comment}
label(${label_type}): ${label_comment}
param_attr(ParamAttr): The parameter attribute for training.
Returns:
label(${label_type}): ${label_comment}
Variable: ${viterbi_path_comment}
Example
s:
Return
s:
.. code-block:: python
Variable: ${viterbi_path_comment}
crf_decode = layers.crf_decoding(
Examples:
input=hidden, param_attr=ParamAttr(name="crfw"))
.. code-block:: python
"""
helper
=
LayerHelper
(
'crf_decoding'
,
**
locals
())
crf_decode = layers.crf_decoding(
transition
=
helper
.
get_parameter
(
param_attr
.
name
)
input=hidden, param_attr=ParamAttr(name="crfw"))
viterbi_path
=
helper
.
create_variable_for_type_inference
(
"""
dtype
=
helper
.
input_dtype
())
helper
=
LayerHelper
(
'crf_decoding'
,
**
locals
())
helper
.
append_op
(
transition
=
helper
.
get_parameter
(
param_attr
.
name
)
type
=
'crf_decoding'
,
viterbi_path
=
helper
.
create_variable_for_type_inference
(
inputs
=
{
"Emission"
:
[
input
],
dtype
=
helper
.
input_dtype
())
helper
.
append_op
(
type
=
'crf_decoding'
,
inputs
=
{
"Emission"
:
[
input
],
"Transition"
:
transition
,
"Transition"
:
transition
,
"Label"
:
label
},
"Label"
:
label
outputs
=
{
"ViterbiPath"
:
[
viterbi_path
]})
},
outputs
=
{
"ViterbiPath"
:
[
viterbi_path
]})
return
viterbi_path
return
viterbi_path
@
templatedoc
()
@
templatedoc
()
...
@@ -5538,42 +5545,48 @@ def label_smooth(label,
...
@@ -5538,42 +5545,48 @@ def label_smooth(label,
return
smooth_label
return
smooth_label
@
templatedoc
()
if
os
.
name
!=
'nt'
:
def
roi_pool
(
input
,
rois
,
pooled_height
=
1
,
pooled_width
=
1
,
spatial_scale
=
1.0
):
"""
@
templatedoc
()
${comment}
def
roi_pool
(
input
,
rois
,
Args:
pooled_height
=
1
,
input (Variable): ${x_comment}
pooled_width
=
1
,
rois (Variable): ROIs (Regions of Interest) to pool over.
spatial_scale
=
1.0
):
pooled_height (integer): ${pooled_height_comment} Default: 1
"""
pooled_width (integer): ${pooled_width_comment} Default: 1
${comment}
spatial_scale (float): ${spatial_scale_comment} Default: 1.0
Args:
Returns:
input (Variable): ${x_comment}
Variable: ${out_comment}.
rois (Variable): ROIs (Regions of Interest) to pool over.
pooled_height (integer): ${pooled_height_comment} Default: 1
Examples:
pooled_width (integer): ${pooled_width_comment} Default: 1
.. code-block:: python
spatial_scale (float): ${spatial_scale_comment} Default: 1.0
pool_out = fluid.layers.roi_pool(input=x, rois=rois, 7, 7, 1.0)
Returns:
"""
Variable: ${out_comment}.
helper
=
LayerHelper
(
'roi_pool'
,
**
locals
())
dtype
=
helper
.
input_dtype
()
Examples:
pool_out
=
helper
.
create_variable_for_type_inference
(
dtype
)
.. code-block:: python
argmaxes
=
helper
.
create_variable_for_type_inference
(
dtype
=
'int32'
)
helper
.
append_op
(
pool_out = fluid.layers.roi_pool(input=x, rois=rois, 7, 7, 1.0)
type
=
"roi_pool"
,
"""
inputs
=
{
"X"
:
input
,
helper
=
LayerHelper
(
'roi_pool'
,
**
locals
())
"ROIs"
:
rois
},
dtype
=
helper
.
input_dtype
()
outputs
=
{
"Out"
:
pool_out
,
pool_out
=
helper
.
create_variable_for_type_inference
(
dtype
)
"Argmax"
:
argmaxes
},
argmaxes
=
helper
.
create_variable_for_type_inference
(
dtype
=
'int32'
)
attrs
=
{
helper
.
append_op
(
"pooled_height"
:
pooled_height
,
type
=
"roi_pool"
,
"pooled_width"
:
pooled_width
,
inputs
=
{
"X"
:
input
,
"spatial_scale"
:
spatial_scale
"ROIs"
:
rois
},
})
outputs
=
{
"Out"
:
pool_out
,
return
pool_out
"Argmax"
:
argmaxes
},
attrs
=
{
"pooled_height"
:
pooled_height
,
"pooled_width"
:
pooled_width
,
"spatial_scale"
:
spatial_scale
})
return
pool_out
@
templatedoc
()
@
templatedoc
()
...
@@ -6822,7 +6835,7 @@ def prelu(x, mode, param_attr=None, name=None):
...
@@ -6822,7 +6835,7 @@ def prelu(x, mode, param_attr=None, name=None):
alpha_shape
=
x
.
shape
alpha_shape
=
x
.
shape
dtype
=
helper
.
input_dtype
(
input_param_name
=
'x'
)
dtype
=
helper
.
input_dtype
(
input_param_name
=
'x'
)
alpha
=
helper
.
create_parameter
(
alpha
=
helper
.
create_parameter
(
attr
=
param_attr
,
attr
=
helper
.
param_attr
,
shape
=
alpha_shape
,
shape
=
alpha_shape
,
dtype
=
'float32'
,
dtype
=
'float32'
,
is_bias
=
False
,
is_bias
=
False
,
...
...
python/paddle/fluid/layers/ops.py
浏览文件 @
5ef123c7
...
@@ -13,6 +13,7 @@
...
@@ -13,6 +13,7 @@
# limitations under the License.
# limitations under the License.
from
__future__
import
print_function
from
__future__
import
print_function
import
os
from
.layer_function_generator
import
generate_layer_fn
,
generate_layer_fn_noattr
from
.layer_function_generator
import
generate_layer_fn
,
generate_layer_fn_noattr
from
..
import
core
from
..
import
core
from
..framework
import
convert_np_dtype_to_dtype_
from
..framework
import
convert_np_dtype_to_dtype_
...
@@ -99,27 +100,26 @@ Examples:
...
@@ -99,27 +100,26 @@ Examples:
>>> result = fluid.layers.hard_shrink(x=data, threshold=0.3)
>>> result = fluid.layers.hard_shrink(x=data, threshold=0.3)
"""
"""
__all__
+=
[
'cumsum'
]
if
os
.
name
!=
'nt'
:
__all__
+=
[
'cumsum'
]
_cum_sum_
=
generate_layer_fn
(
'cumsum'
)
_cum_sum_
=
generate_layer_fn
(
'cumsum'
)
def
cumsum
(
x
,
axis
=
None
,
exclusive
=
None
,
reverse
=
None
):
def
cumsum
(
x
,
axis
=
None
,
exclusive
=
None
,
reverse
=
None
):
locals_var
=
locals
().
keys
()
locals_var
=
locals
().
keys
()
kwargs
=
dict
()
kwargs
=
dict
()
for
name
in
locals_var
:
for
name
in
locals_var
:
val
=
locals
()[
name
]
val
=
locals
()[
name
]
if
val
is
not
None
:
if
val
is
not
None
:
kwargs
[
name
]
=
val
kwargs
[
name
]
=
val
return
_cum_sum_
(
**
kwargs
)
return
_cum_sum_
(
**
kwargs
)
cumsum
.
__doc__
=
_cum_sum_
.
__doc__
+
"""
cumsum
.
__doc__
=
_cum_sum_
.
__doc__
+
"""
Examples:
Examples:
>>> data = fluid.layers.data(name="input", shape=[32, 784])
>>> data = fluid.layers.data(name="input", shape=[32, 784])
>>> result = fluid.layers.cumsum(data, axis=0)
>>> result = fluid.layers.cumsum(data, axis=0)
"""
"""
__all__
+=
[
'thresholded_relu'
]
__all__
+=
[
'thresholded_relu'
]
...
...
python/paddle/fluid/tests/unittests/test_dist_base.py
浏览文件 @
5ef123c7
...
@@ -105,7 +105,7 @@ class TestDistRunnerBase(object):
...
@@ -105,7 +105,7 @@ class TestDistRunnerBase(object):
build_stra
.
reduce_strategy
=
fluid
.
BuildStrategy
.
ReduceStrategy
.
AllReduce
build_stra
.
reduce_strategy
=
fluid
.
BuildStrategy
.
ReduceStrategy
.
AllReduce
if
args
.
batch_merge_repeat
>
1
:
if
args
.
batch_merge_repeat
>
1
:
pass_builder
=
build_stra
.
_
create_passes_from_strategy
()
pass_builder
=
build_stra
.
_
finalize_strategy_and_create_passes
()
mypass
=
pass_builder
.
insert_pass
(
mypass
=
pass_builder
.
insert_pass
(
len
(
pass_builder
.
all_passes
())
-
2
,
"multi_batch_merge_pass"
)
len
(
pass_builder
.
all_passes
())
-
2
,
"multi_batch_merge_pass"
)
mypass
.
set_int
(
"num_repeats"
,
args
.
batch_merge_repeat
)
mypass
.
set_int
(
"num_repeats"
,
args
.
batch_merge_repeat
)
...
...
python/paddle/fluid/tests/unittests/test_pass_builder.py
浏览文件 @
5ef123c7
...
@@ -94,7 +94,12 @@ class TestPassBuilder(unittest.TestCase):
...
@@ -94,7 +94,12 @@ class TestPassBuilder(unittest.TestCase):
def
test_parallel_testing_with_new_strategy
(
self
):
def
test_parallel_testing_with_new_strategy
(
self
):
build_strategy
=
fluid
.
BuildStrategy
()
build_strategy
=
fluid
.
BuildStrategy
()
pass_builder
=
build_strategy
.
_create_passes_from_strategy
()
self
.
assertFalse
(
build_strategy
.
fuse_elewise_add_act_ops
)
build_strategy
.
fuse_elewise_add_act_ops
=
True
pass_builder
=
build_strategy
.
_finalize_strategy_and_create_passes
()
self
.
assertTrue
(
"fuse_elewise_add_act_pass"
in
[
p
.
type
()
for
p
in
pass_builder
.
all_passes
()])
origin_len
=
len
(
pass_builder
.
all_passes
())
origin_len
=
len
(
pass_builder
.
all_passes
())
viz_pass
=
pass_builder
.
append_pass
(
"graph_viz_pass"
)
viz_pass
=
pass_builder
.
append_pass
(
"graph_viz_pass"
)
...
...
python/requirements.txt
浏览文件 @
5ef123c7
requests==2.9.2
requests==2.9.2
numpy>=1.12,<=1.14 #TODO:change to ">=1.12" when numpy fix bug in 1.15 and higher version
numpy>=1.12,<=1.14 #TODO:change to ">=1.12" when numpy fix bug in 1.15 and higher version
protobuf==3.1
protobuf==3.1
recordio>=0.1.0
recordio>=0.1.0
; sys_platform != 'win32'
matplotlib==2.2.3 # TODO: let python3 paddlepaddle package use latest matplotlib
matplotlib==2.2.3 # TODO: let python3 paddlepaddle package use latest matplotlib
rarfile
rarfile
scipy>=0.19.0
scipy>=0.19.0
...
...
python/setup.py.in
浏览文件 @
5ef123c7
...
@@ -9,7 +9,7 @@ class BinaryDistribution(Distribution):
...
@@ -9,7 +9,7 @@ class BinaryDistribution(Distribution):
RC = 0
RC = 0
ext_name = '.dll' if os.name == 'nt' else '.so'
def git_commit():
def git_commit():
try:
try:
...
@@ -136,10 +136,13 @@ if '${WITH_FLUID_ONLY}'== 'OFF':
...
@@ -136,10 +136,13 @@ if '${WITH_FLUID_ONLY}'== 'OFF':
'${PADDLE_BINARY_DIR}/paddle/legacy/pserver/paddle_pserver_main',
'${PADDLE_BINARY_DIR}/paddle/legacy/pserver/paddle_pserver_main',
'${PADDLE_BINARY_DIR}/paddle/scripts/paddle']
'${PADDLE_BINARY_DIR}/paddle/scripts/paddle']
package_data={'paddle.fluid': ['core.so']}
package_data={'paddle.fluid': ['core' + (ext_name if os.name != 'nt' else '.pyd')]}
if os.name == 'nt':
package_data['paddle.fluid'] += ['openblas' + ext_name]
if '${WITH_FLUID_ONLY}'== 'OFF':
if '${WITH_FLUID_ONLY}'== 'OFF':
package_data['paddle.v2.master']=['libpaddle_master
.so'
]
package_data['paddle.v2.master']=['libpaddle_master
' + ext_name
]
package_data['py_paddle']=['*.py','_swig_paddle
.so'
]
package_data['py_paddle']=['*.py','_swig_paddle
' + + ext_name
]
package_dir={
package_dir={
'': '${PADDLE_BINARY_DIR}/python',
'': '${PADDLE_BINARY_DIR}/python',
...
@@ -153,13 +156,15 @@ if '${WITH_FLUID_ONLY}'== 'OFF':
...
@@ -153,13 +156,15 @@ if '${WITH_FLUID_ONLY}'== 'OFF':
package_dir['py_paddle']='${PADDLE_BINARY_DIR}/python/py_paddle'
package_dir['py_paddle']='${PADDLE_BINARY_DIR}/python/py_paddle'
# put all thirdparty libraries in paddle.libs
# put all thirdparty libraries in paddle.libs
package_data['paddle.libs']=['libwarpctc.so']
libs_path='${PADDLE_BINARY_DIR}/python/paddle/libs'
libs_path='${PADDLE_BINARY_DIR}/python/paddle/libs'
shutil.copy('${WARPCTC_LIBRARIES}', libs_path)
if os.name != 'nt':
package_data['paddle.libs']= []
package_data['paddle.libs']=['libwarpctc' + ext_name]
shutil.copy('${WARPCTC_LIBRARIES}', libs_path)
if '${WITH_MKL}' == 'ON':
if '${WITH_MKL}' == 'ON':
shutil.copy('${MKLML_LIB}', libs_path)
shutil.copy('${MKLML_LIB}', libs_path)
shutil.copy('${MKLML_IOMP_LIB}', libs_path)
shutil.copy('${MKLML_IOMP_LIB}', libs_path)
package_data['paddle.libs']+=['libmklml_intel
.so','libiomp5.so'
]
package_data['paddle.libs']+=['libmklml_intel
' + ext_name,'libiomp5' + ext_name
]
if '${CMAKE_BUILD_TYPE}' == 'Release':
if '${CMAKE_BUILD_TYPE}' == 'Release':
# only change rpath in Release mode.
# only change rpath in Release mode.
if '${WITH_MKLDNN}' == 'ON':
if '${WITH_MKLDNN}' == 'ON':
...
@@ -187,36 +192,47 @@ if '${WITH_NGRAPH}' == 'ON':
...
@@ -187,36 +192,47 @@ if '${WITH_NGRAPH}' == 'ON':
'${NGRAPH_CPU_LIB_NAME}',
'${NGRAPH_CPU_LIB_NAME}',
'${NGRAPH_TBB_LIB_NAME}']
'${NGRAPH_TBB_LIB_NAME}']
# remove unused paddle/libs/__init__.py
# remove unused paddle/libs/__init__.py
os.remove(libs_path+'/__init__.py')
if os.path.isfile(libs_path+'/__init__.py'):
os.remove(libs_path+'/__init__.py')
package_dir['paddle.libs']=libs_path
package_dir['paddle.libs']=libs_path
# change rpath of core.
so
, add $ORIGIN/../libs/ to it.
# change rpath of core.
ext
, add $ORIGIN/../libs/ to it.
# The reason is that libwarpctc.
so, libiomp5.so
etc are in paddle.libs, and
# The reason is that libwarpctc.
ext, libiomp5.ext
etc are in paddle.libs, and
# core.
so
is in paddle.fluid, thus paddle/fluid/../libs will pointer to above libraries.
# core.
ext
is in paddle.fluid, thus paddle/fluid/../libs will pointer to above libraries.
# This operation will fix https://github.com/PaddlePaddle/Paddle/issues/3213
# This operation will fix https://github.com/PaddlePaddle/Paddle/issues/3213
if '${CMAKE_BUILD_TYPE}' == 'Release':
if '${CMAKE_BUILD_TYPE}' == 'Release':
# only change rpath in Release mode, since in Debug mode, core.so is too large to be changed.
if os.name != 'nt':
if "@APPLE@" == "1":
# only change rpath in Release mode, since in Debug mode, core.xx is too large to be changed.
command = "install_name_tool -id \"@loader_path/../libs/\" ${PADDLE_BINARY_DIR}/python/paddle/fluid/core.so"
else:
command = "patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/core.so"
if os.system(command) != 0:
raise Exception("patch core.so failed, command: %s" % command)
if '${WITH_FLUID_ONLY}'== 'OFF':
# change rpath of _swig_paddle.so.
if "@APPLE@" == "1":
if "@APPLE@" == "1":
command = "install_name_tool -id \"@loader_path/../
paddle/libs/\" ${PADDLE_BINARY_DIR}/python/py_paddle/_swig_paddle.so"
command = "install_name_tool -id \"@loader_path/../
libs/\" ${PADDLE_BINARY_DIR}/python/paddle/fluid/core" + ext_name
else:
else:
command = "patchelf --set-rpath '$ORIGIN/../
paddle/libs/' ${PADDLE_BINARY_DIR}/python/py_paddle/_swig_paddle.so"
command = "patchelf --set-rpath '$ORIGIN/../
libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/core" + ext_name
if os.system(command) != 0:
if os.system(command) != 0:
raise Exception("patch _swig_paddle.so failed, command: %s" % command)
raise Exception("patch core.%s failed, command: %s" % (ext_name, command))
if '${WITH_FLUID_ONLY}'== 'OFF':
# change rpath of _swig_paddle.xx.
if "@APPLE@" == "1":
command = "install_name_tool -id \"@loader_path/../paddle/libs/\" ${PADDLE_BINARY_DIR}/python/py_paddle/_swig_paddle" + ext_name
else:
command = "patchelf --set-rpath '$ORIGIN/../paddle/libs/' ${PADDLE_BINARY_DIR}/python/py_paddle/_swig_paddle" + ext_name
if os.system(command) != 0:
raise Exception("patch _swig_paddle.%s failed, command: %s" % (ext_name, command))
ext_modules = [Extension('_foo', ['stub.cc'])]
if os.name == 'nt':
# fix the path separator under windows
fix_package_dir = {}
for k, v in package_dir.items():
fix_package_dir[k] = v.replace('/', '\\')
package_dir = fix_package_dir
ext_modules = []
setup(name='${PACKAGE_NAME}',
setup(name='${PACKAGE_NAME}',
version='${PADDLE_VERSION}',
version='${PADDLE_VERSION}',
description='Parallel Distributed Deep Learning',
description='Parallel Distributed Deep Learning',
install_requires=setup_requires,
install_requires=setup_requires,
packages=packages,
packages=packages,
ext_modules=
[Extension('_foo', ['stub.cc'])]
,
ext_modules=
ext_modules
,
package_data=package_data,
package_data=package_data,
package_dir=package_dir,
package_dir=package_dir,
scripts=paddle_bins
scripts=paddle_bins
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录