Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
7b40f7ce
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
7b40f7ce
编写于
4月 08, 2018
作者:
L
Liu Yiqun
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' into core_inference_prepare
上级
3a825782
9ed3674c
变更
136
隐藏空白更改
内联
并排
Showing
136 changed file
with
2015 addition
and
1190 deletion
+2015
-1190
.gitignore
.gitignore
+0
-9
cmake/external/mklml.cmake
cmake/external/mklml.cmake
+1
-1
cmake/external/snappystream.cmake
cmake/external/snappystream.cmake
+3
-1
cmake/external/warpctc.cmake
cmake/external/warpctc.cmake
+2
-1
cmake/external/zlib.cmake
cmake/external/zlib.cmake
+2
-1
cmake/generic.cmake
cmake/generic.cmake
+3
-3
doc/fluid/CMakeLists.txt
doc/fluid/CMakeLists.txt
+2
-2
doc/fluid/api/CMakeLists.txt
doc/fluid/api/CMakeLists.txt
+1
-1
doc/fluid/dev/index_cn.rst
doc/fluid/dev/index_cn.rst
+1
-1
doc/fluid/dev/index_en.rst
doc/fluid/dev/index_en.rst
+1
-1
doc/fluid/dev/releasing_process_cn.md
doc/fluid/dev/releasing_process_cn.md
+30
-24
doc/fluid/dev/releasing_process_en.md
doc/fluid/dev/releasing_process_en.md
+210
-0
doc/templates/conf.py.cn.in
doc/templates/conf.py.cn.in
+1
-1
doc/templates/conf.py.en.in
doc/templates/conf.py.en.in
+1
-1
doc/v2/CMakeLists.txt
doc/v2/CMakeLists.txt
+2
-2
doc/v2/api/CMakeLists.txt
doc/v2/api/CMakeLists.txt
+1
-1
paddle/api/CMakeLists.txt
paddle/api/CMakeLists.txt
+6
-5
paddle/api/test/CMakeLists.txt
paddle/api/test/CMakeLists.txt
+5
-0
paddle/fluid/.clang-format
paddle/fluid/.clang-format
+0
-0
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+2
-2
paddle/fluid/framework/block_desc.h
paddle/fluid/framework/block_desc.h
+3
-0
paddle/fluid/framework/channel.h
paddle/fluid/framework/channel.h
+4
-3
paddle/fluid/framework/channel_impl.h
paddle/fluid/framework/channel_impl.h
+11
-10
paddle/fluid/framework/channel_test.cc
paddle/fluid/framework/channel_test.cc
+80
-80
paddle/fluid/framework/details/CMakeLists.txt
paddle/fluid/framework/details/CMakeLists.txt
+1
-1
paddle/fluid/framework/lod_tensor.h
paddle/fluid/framework/lod_tensor.h
+1
-0
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+11
-0
paddle/fluid/framework/operator.h
paddle/fluid/framework/operator.h
+2
-0
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+19
-2
paddle/fluid/framework/parallel_executor.h
paddle/fluid/framework/parallel_executor.h
+5
-1
paddle/fluid/framework/selected_rows.cc
paddle/fluid/framework/selected_rows.cc
+5
-1
paddle/fluid/framework/selected_rows.h
paddle/fluid/framework/selected_rows.h
+13
-1
paddle/fluid/framework/tensor_impl.h
paddle/fluid/framework/tensor_impl.h
+12
-5
paddle/fluid/framework/tuple.h
paddle/fluid/framework/tuple.h
+8
-7
paddle/fluid/inference/io.cc
paddle/fluid/inference/io.cc
+3
-6
paddle/fluid/inference/io.h
paddle/fluid/inference/io.h
+1
-2
paddle/fluid/inference/tests/book/CMakeLists.txt
paddle/fluid/inference/tests/book/CMakeLists.txt
+1
-1
paddle/fluid/inference/tests/book/test_inference_fit_a_line.cc
...e/fluid/inference/tests/book/test_inference_fit_a_line.cc
+3
-3
paddle/fluid/inference/tests/book/test_inference_image_classification.cc
...ference/tests/book/test_inference_image_classification.cc
+7
-9
paddle/fluid/inference/tests/book/test_inference_label_semantic_roles.cc
...ference/tests/book/test_inference_label_semantic_roles.cc
+9
-25
paddle/fluid/inference/tests/book/test_inference_recognize_digits.cc
...d/inference/tests/book/test_inference_recognize_digits.cc
+7
-9
paddle/fluid/inference/tests/book/test_inference_recommender_system.cc
...inference/tests/book/test_inference_recommender_system.cc
+8
-8
paddle/fluid/inference/tests/book/test_inference_rnn_encoder_decoder.cc
...nference/tests/book/test_inference_rnn_encoder_decoder.cc
+5
-5
paddle/fluid/inference/tests/book/test_inference_understand_sentiment.cc
...ference/tests/book/test_inference_understand_sentiment.cc
+2
-4
paddle/fluid/inference/tests/book/test_inference_word2vec.cc
paddle/fluid/inference/tests/book/test_inference_word2vec.cc
+5
-5
paddle/fluid/inference/tests/test_helper.h
paddle/fluid/inference/tests/test_helper.h
+36
-39
paddle/fluid/memory/.clang-format
paddle/fluid/memory/.clang-format
+0
-5
paddle/fluid/memory/memory.cc
paddle/fluid/memory/memory.cc
+1
-1
paddle/fluid/memory/memory_test.cc
paddle/fluid/memory/memory_test.cc
+4
-4
paddle/fluid/operators/.clang-format
paddle/fluid/operators/.clang-format
+0
-5
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+2
-2
paddle/fluid/operators/conv_cudnn_op.cu.cc
paddle/fluid/operators/conv_cudnn_op.cu.cc
+22
-0
paddle/fluid/operators/fc_mkldnn_op.cc
paddle/fluid/operators/fc_mkldnn_op.cc
+3
-3
paddle/fluid/operators/lookup_table_op.cc
paddle/fluid/operators/lookup_table_op.cc
+4
-18
paddle/fluid/operators/lookup_table_op.h
paddle/fluid/operators/lookup_table_op.h
+8
-10
paddle/fluid/operators/math/math_function.cu
paddle/fluid/operators/math/math_function.cu
+24
-9
paddle/fluid/operators/math/softmax.cu
paddle/fluid/operators/math/softmax.cu
+3
-0
paddle/fluid/operators/math/softmax_impl.h
paddle/fluid/operators/math/softmax_impl.h
+1
-1
paddle/fluid/operators/prior_box_op.cc
paddle/fluid/operators/prior_box_op.cc
+3
-4
paddle/fluid/operators/prior_box_op.cu
paddle/fluid/operators/prior_box_op.cu
+167
-0
paddle/fluid/operators/prior_box_op.h
paddle/fluid/operators/prior_box_op.h
+10
-35
paddle/fluid/operators/sgd_op.cc
paddle/fluid/operators/sgd_op.cc
+7
-6
paddle/fluid/operators/sgd_op.h
paddle/fluid/operators/sgd_op.h
+80
-43
paddle/fluid/operators/softmax_op.cc
paddle/fluid/operators/softmax_op.cc
+7
-2
paddle/fluid/operators/softmax_op.cu.cc
paddle/fluid/operators/softmax_op.cu.cc
+6
-5
paddle/fluid/platform/.clang-format
paddle/fluid/platform/.clang-format
+0
-5
paddle/fluid/platform/CMakeLists.txt
paddle/fluid/platform/CMakeLists.txt
+2
-2
paddle/fluid/platform/cpu_info_test.cc
paddle/fluid/platform/cpu_info_test.cc
+1
-1
paddle/fluid/platform/cudnn_helper.h
paddle/fluid/platform/cudnn_helper.h
+3
-1
paddle/fluid/platform/dynload/cublas.cc
paddle/fluid/platform/dynload/cublas.cc
+4
-0
paddle/fluid/platform/dynload/cublas.h
paddle/fluid/platform/dynload/cublas.h
+32
-24
paddle/fluid/platform/dynload/cudnn.cc
paddle/fluid/platform/dynload/cudnn.cc
+2
-1
paddle/fluid/platform/dynload/cudnn.h
paddle/fluid/platform/dynload/cudnn.h
+16
-15
paddle/fluid/platform/dynload/cupti.h
paddle/fluid/platform/dynload/cupti.h
+15
-14
paddle/fluid/platform/dynload/curand.h
paddle/fluid/platform/dynload/curand.h
+15
-14
paddle/fluid/platform/dynload/dynamic_loader.cc
paddle/fluid/platform/dynload/dynamic_loader.cc
+46
-43
paddle/fluid/platform/dynload/dynamic_loader.h
paddle/fluid/platform/dynload/dynamic_loader.h
+7
-49
paddle/fluid/platform/dynload/nccl.cc
paddle/fluid/platform/dynload/nccl.cc
+0
-5
paddle/fluid/platform/dynload/nccl.h
paddle/fluid/platform/dynload/nccl.h
+15
-13
paddle/fluid/platform/dynload/warpctc.h
paddle/fluid/platform/dynload/warpctc.h
+15
-14
paddle/fluid/platform/enforce.h
paddle/fluid/platform/enforce.h
+15
-15
paddle/fluid/platform/enforce_test.cc
paddle/fluid/platform/enforce_test.cc
+0
-4
paddle/fluid/platform/float16.h
paddle/fluid/platform/float16.h
+161
-66
paddle/fluid/platform/gpu_info.cc
paddle/fluid/platform/gpu_info.cc
+6
-5
paddle/fluid/platform/gpu_info.h
paddle/fluid/platform/gpu_info.h
+1
-5
paddle/fluid/platform/place.h
paddle/fluid/platform/place.h
+2
-1
paddle/fluid/pybind/.clang-format
paddle/fluid/pybind/.clang-format
+0
-5
paddle/fluid/pybind/CMakeLists.txt
paddle/fluid/pybind/CMakeLists.txt
+2
-0
paddle/fluid/pybind/const_value.cc
paddle/fluid/pybind/const_value.cc
+6
-6
paddle/fluid/pybind/const_value.h
paddle/fluid/pybind/const_value.h
+5
-4
paddle/fluid/pybind/exception.cc
paddle/fluid/pybind/exception.cc
+4
-3
paddle/fluid/pybind/exception.h
paddle/fluid/pybind/exception.h
+5
-2
paddle/fluid/pybind/protobuf.cc
paddle/fluid/pybind/protobuf.cc
+149
-133
paddle/fluid/pybind/protobuf.h
paddle/fluid/pybind/protobuf.h
+7
-7
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+54
-12
paddle/fluid/pybind/recordio.cc
paddle/fluid/pybind/recordio.cc
+10
-2
paddle/fluid/pybind/recordio.h
paddle/fluid/pybind/recordio.h
+2
-1
paddle/fluid/pybind/tensor_py.h
paddle/fluid/pybind/tensor_py.h
+93
-47
paddle/fluid/pybind/tensor_py_test.cc
paddle/fluid/pybind/tensor_py_test.cc
+44
-0
paddle/fluid/recordio/chunk.cc
paddle/fluid/recordio/chunk.cc
+8
-6
paddle/fluid/recordio/chunk.h
paddle/fluid/recordio/chunk.h
+2
-2
paddle/fluid/recordio/chunk_test.cc
paddle/fluid/recordio/chunk_test.cc
+5
-7
paddle/fluid/recordio/header.h
paddle/fluid/recordio/header.h
+2
-2
paddle/fluid/recordio/header_test.cc
paddle/fluid/recordio/header_test.cc
+2
-4
paddle/fluid/recordio/scanner.cc
paddle/fluid/recordio/scanner.cc
+4
-0
paddle/fluid/recordio/scanner.h
paddle/fluid/recordio/scanner.h
+5
-2
paddle/fluid/recordio/writer.cc
paddle/fluid/recordio/writer.cc
+5
-0
paddle/fluid/recordio/writer.h
paddle/fluid/recordio/writer.h
+6
-5
paddle/fluid/recordio/writer_scanner_test.cc
paddle/fluid/recordio/writer_scanner_test.cc
+4
-3
paddle/fluid/string/.clang-format
paddle/fluid/string/.clang-format
+0
-1
paddle/fluid/string/piece.cc
paddle/fluid/string/piece.cc
+1
-1
paddle/fluid/string/printf.h
paddle/fluid/string/printf.h
+2
-0
paddle/fluid/string/printf_test.cc
paddle/fluid/string/printf_test.cc
+3
-2
paddle/fluid/string/to_string_test.cc
paddle/fluid/string/to_string_test.cc
+3
-4
paddle/gserver/tests/CMakeLists.txt
paddle/gserver/tests/CMakeLists.txt
+12
-7
paddle/gserver/tests/test_Upsample.cpp
paddle/gserver/tests/test_Upsample.cpp
+43
-42
paddle/trainer/tests/CMakeLists.txt
paddle/trainer/tests/CMakeLists.txt
+9
-4
paddle/utils/CMakeLists.txt
paddle/utils/CMakeLists.txt
+2
-2
proto/CMakeLists.txt
proto/CMakeLists.txt
+3
-2
python/CMakeLists.txt
python/CMakeLists.txt
+5
-3
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+2
-1
python/paddle/fluid/distribute_transpiler.py
python/paddle/fluid/distribute_transpiler.py
+8
-16
python/paddle/fluid/distributed_splitter.py
python/paddle/fluid/distributed_splitter.py
+11
-4
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+15
-8
python/paddle/fluid/parallel_executor.py
python/paddle/fluid/parallel_executor.py
+31
-10
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+2
-2
python/paddle/fluid/tests/unittests/test_conv2d_op.py
python/paddle/fluid/tests/unittests/test_conv2d_op.py
+7
-4
python/paddle/fluid/tests/unittests/test_lookup_table_op.py
python/paddle/fluid/tests/unittests/test_lookup_table_op.py
+4
-4
python/paddle/fluid/tests/unittests/test_parallel_executor.py
...on/paddle/fluid/tests/unittests/test_parallel_executor.py
+49
-29
python/paddle/fluid/tests/unittests/test_prior_box_op.py
python/paddle/fluid/tests/unittests/test_prior_box_op.py
+27
-29
python/paddle/fluid/tests/unittests/test_protobuf_descs.py
python/paddle/fluid/tests/unittests/test_protobuf_descs.py
+21
-21
python/paddle/fluid/tests/unittests/test_sgd_op.py
python/paddle/fluid/tests/unittests/test_sgd_op.py
+67
-0
python/paddle/fluid/tests/unittests/test_softmax_op.py
python/paddle/fluid/tests/unittests/test_softmax_op.py
+11
-0
python/paddle/trainer_config_helpers/tests/CMakeLists.txt
python/paddle/trainer_config_helpers/tests/CMakeLists.txt
+4
-4
python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh
...trainer_config_helpers/tests/configs/generate_protostr.sh
+0
-1
python/setup.py.in
python/setup.py.in
+3
-2
未找到文件。
.gitignore
浏览文件 @
7b40f7ce
...
@@ -25,12 +25,3 @@ third_party/
...
@@ -25,12 +25,3 @@ third_party/
# clion workspace.
# clion workspace.
cmake-build-*
cmake-build-*
# generated while compiling
paddle/pybind/pybind.h
CMakeFiles
cmake_install.cmake
paddle/.timestamp
python/paddlepaddle.egg-info/
paddle/fluid/pybind/pybind.h
python/paddle/version.py
cmake/external/mklml.cmake
浏览文件 @
7b40f7ce
...
@@ -28,7 +28,7 @@ INCLUDE(ExternalProject)
...
@@ -28,7 +28,7 @@ INCLUDE(ExternalProject)
SET
(
MKLML_PROJECT
"extern_mklml"
)
SET
(
MKLML_PROJECT
"extern_mklml"
)
SET
(
MKLML_VER
"mklml_lnx_2018.0.1.20171007"
)
SET
(
MKLML_VER
"mklml_lnx_2018.0.1.20171007"
)
SET
(
MKLML_URL
"http
s://github.com/01org/mkl-dnn/releases/download/v0.11
/
${
MKLML_VER
}
.tgz"
)
SET
(
MKLML_URL
"http
://paddlepaddledeps.bj.bcebos.com
/
${
MKLML_VER
}
.tgz"
)
SET
(
MKLML_SOURCE_DIR
"
${
THIRD_PARTY_PATH
}
/mklml"
)
SET
(
MKLML_SOURCE_DIR
"
${
THIRD_PARTY_PATH
}
/mklml"
)
SET
(
MKLML_DOWNLOAD_DIR
"
${
MKLML_SOURCE_DIR
}
/src/
${
MKLML_PROJECT
}
"
)
SET
(
MKLML_DOWNLOAD_DIR
"
${
MKLML_SOURCE_DIR
}
/src/
${
MKLML_PROJECT
}
"
)
SET
(
MKLML_DST_DIR
"mklml"
)
SET
(
MKLML_DST_DIR
"mklml"
)
...
...
cmake/external/snappystream.cmake
浏览文件 @
7b40f7ce
...
@@ -54,5 +54,7 @@ add_library(snappystream STATIC IMPORTED GLOBAL)
...
@@ -54,5 +54,7 @@ add_library(snappystream STATIC IMPORTED GLOBAL)
set_property
(
TARGET snappystream PROPERTY IMPORTED_LOCATION
set_property
(
TARGET snappystream PROPERTY IMPORTED_LOCATION
"
${
SNAPPYSTREAM_INSTALL_DIR
}
/lib/libsnappystream.a"
)
"
${
SNAPPYSTREAM_INSTALL_DIR
}
/lib/libsnappystream.a"
)
include_directories
(
${
SNAPPYSTREAM_INCLUDE_DIR
}
)
include_directories
(
${
SNAPPYSTREAM_INCLUDE_DIR
}
)
# For snappysteam to include its own headers.
include_directories
(
${
THIRD_PARTY_PATH
}
/install
)
# For Paddle to include snappy stream headers.
add_dependencies
(
snappystream extern_snappystream
)
add_dependencies
(
snappystream extern_snappystream
)
cmake/external/warpctc.cmake
浏览文件 @
7b40f7ce
...
@@ -62,7 +62,8 @@ ExternalProject_Add(
...
@@ -62,7 +62,8 @@ ExternalProject_Add(
)
)
MESSAGE
(
STATUS
"warp-ctc library:
${
WARPCTC_LIBRARIES
}
"
)
MESSAGE
(
STATUS
"warp-ctc library:
${
WARPCTC_LIBRARIES
}
"
)
INCLUDE_DIRECTORIES
(
${
WARPCTC_INCLUDE_DIR
}
)
INCLUDE_DIRECTORIES
(
${
WARPCTC_INCLUDE_DIR
}
)
# For warpctc code to include its headers.
INCLUDE_DIRECTORIES
(
${
THIRD_PARTY_PATH
}
/install
)
# For Paddle code to include warpctc headers.
ADD_LIBRARY
(
warpctc SHARED IMPORTED GLOBAL
)
ADD_LIBRARY
(
warpctc SHARED IMPORTED GLOBAL
)
SET_PROPERTY
(
TARGET warpctc PROPERTY IMPORTED_LOCATION
${
WARPCTC_LIBRARIES
}
)
SET_PROPERTY
(
TARGET warpctc PROPERTY IMPORTED_LOCATION
${
WARPCTC_LIBRARIES
}
)
...
...
cmake/external/zlib.cmake
浏览文件 @
7b40f7ce
...
@@ -25,7 +25,8 @@ ELSE(WIN32)
...
@@ -25,7 +25,8 @@ ELSE(WIN32)
SET
(
ZLIB_LIBRARIES
"
${
ZLIB_INSTALL_DIR
}
/lib/libz.a"
CACHE FILEPATH
"zlib library."
FORCE
)
SET
(
ZLIB_LIBRARIES
"
${
ZLIB_INSTALL_DIR
}
/lib/libz.a"
CACHE FILEPATH
"zlib library."
FORCE
)
ENDIF
(
WIN32
)
ENDIF
(
WIN32
)
INCLUDE_DIRECTORIES
(
${
ZLIB_INCLUDE_DIR
}
)
INCLUDE_DIRECTORIES
(
${
ZLIB_INCLUDE_DIR
}
)
# For zlib code to include its own headers.
INCLUDE_DIRECTORIES
(
${
THIRD_PARTY_PATH
}
/install
)
# For Paddle code to include zlib.h.
ExternalProject_Add
(
ExternalProject_Add
(
extern_zlib
extern_zlib
...
...
cmake/generic.cmake
浏览文件 @
7b40f7ce
...
@@ -251,7 +251,7 @@ function(cc_test TARGET_NAME)
...
@@ -251,7 +251,7 @@ function(cc_test TARGET_NAME)
add_dependencies
(
${
TARGET_NAME
}
${
cc_test_DEPS
}
paddle_gtest_main paddle_memory gtest gflags glog
)
add_dependencies
(
${
TARGET_NAME
}
${
cc_test_DEPS
}
paddle_gtest_main paddle_memory gtest gflags glog
)
add_test
(
NAME
${
TARGET_NAME
}
add_test
(
NAME
${
TARGET_NAME
}
COMMAND
${
TARGET_NAME
}
${
cc_test_ARGS
}
COMMAND
${
TARGET_NAME
}
${
cc_test_ARGS
}
WORKING_DIRECTORY
${
CMAKE_CURRENT_
SOURCE
_DIR
}
)
WORKING_DIRECTORY
${
CMAKE_CURRENT_
BINARY
_DIR
}
)
endif
()
endif
()
endfunction
(
cc_test
)
endfunction
(
cc_test
)
...
@@ -561,9 +561,9 @@ function(py_test TARGET_NAME)
...
@@ -561,9 +561,9 @@ function(py_test TARGET_NAME)
set
(
multiValueArgs SRCS DEPS ARGS ENVS
)
set
(
multiValueArgs SRCS DEPS ARGS ENVS
)
cmake_parse_arguments
(
py_test
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
cmake_parse_arguments
(
py_test
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
add_test
(
NAME
${
TARGET_NAME
}
add_test
(
NAME
${
TARGET_NAME
}
COMMAND env PYTHONPATH=
${
PADDLE_
PYTHON_BUILD_DIR
}
/lib-
python
${
py_test_ENVS
}
COMMAND env PYTHONPATH=
${
PADDLE_
BINARY_DIR
}
/
python
${
py_test_ENVS
}
${
PYTHON_EXECUTABLE
}
-u
${
py_test_SRCS
}
${
py_test_ARGS
}
${
PYTHON_EXECUTABLE
}
-u
${
py_test_SRCS
}
${
py_test_ARGS
}
WORKING_DIRECTORY
${
CMAKE_CURRENT_
SOURCE
_DIR
}
)
WORKING_DIRECTORY
${
CMAKE_CURRENT_
BINARY
_DIR
}
)
endif
()
endif
()
endfunction
()
endfunction
()
...
...
doc/fluid/CMakeLists.txt
浏览文件 @
7b40f7ce
...
@@ -27,7 +27,7 @@ sphinx_add_target(paddle_fluid_docs
...
@@ -27,7 +27,7 @@ sphinx_add_target(paddle_fluid_docs
${
CMAKE_CURRENT_SOURCE_DIR
}
${
CMAKE_CURRENT_SOURCE_DIR
}
${
SPHINX_HTML_DIR_EN
}
)
${
SPHINX_HTML_DIR_EN
}
)
add_dependencies
(
paddle_fluid_docs gen_proto_py
)
add_dependencies
(
paddle_fluid_docs gen_proto_py
paddle_python
)
# configured documentation tools and intermediate build results
# configured documentation tools and intermediate build results
set
(
BINARY_BUILD_DIR_CN
"
${
CMAKE_CURRENT_BINARY_DIR
}
/cn/_build"
)
set
(
BINARY_BUILD_DIR_CN
"
${
CMAKE_CURRENT_BINARY_DIR
}
/cn/_build"
)
...
@@ -50,6 +50,6 @@ sphinx_add_target(paddle_fluid_docs_cn
...
@@ -50,6 +50,6 @@ sphinx_add_target(paddle_fluid_docs_cn
${
CMAKE_CURRENT_SOURCE_DIR
}
${
CMAKE_CURRENT_SOURCE_DIR
}
${
SPHINX_HTML_DIR_CN
}
)
${
SPHINX_HTML_DIR_CN
}
)
add_dependencies
(
paddle_fluid_docs_cn gen_proto_py
)
add_dependencies
(
paddle_fluid_docs_cn gen_proto_py
paddle_python
)
add_subdirectory
(
api
)
add_subdirectory
(
api
)
doc/fluid/api/CMakeLists.txt
浏览文件 @
7b40f7ce
...
@@ -19,4 +19,4 @@ sphinx_add_target(paddle_fluid_apis
...
@@ -19,4 +19,4 @@ sphinx_add_target(paddle_fluid_apis
${
CMAKE_CURRENT_SOURCE_DIR
}
${
CMAKE_CURRENT_SOURCE_DIR
}
${
SPHINX_HTML_DIR_EN
}
)
${
SPHINX_HTML_DIR_EN
}
)
add_dependencies
(
paddle_fluid_apis gen_proto_py framework_py_proto copy_paddle_pybind
)
add_dependencies
(
paddle_fluid_apis gen_proto_py framework_py_proto copy_paddle_pybind
paddle_python
)
doc/fluid/dev/index_cn.rst
浏览文件 @
7b40f7ce
...
@@ -9,5 +9,5 @@
...
@@ -9,5 +9,5 @@
use_eigen_cn.md
use_eigen_cn.md
name_convention.md
name_convention.md
support_new_device.md
support_new_device.md
releasing_process.md
releasing_process
_cn
.md
op_markdown_format.md
op_markdown_format.md
doc/fluid/dev/index_en.rst
浏览文件 @
7b40f7ce
...
@@ -9,5 +9,5 @@ Development
...
@@ -9,5 +9,5 @@ Development
use_eigen_en.md
use_eigen_en.md
name_convention.md
name_convention.md
support_new_device.md
support_new_device.md
releasing_process.md
releasing_process
_en
.md
op_markdown_format.md
op_markdown_format.md
doc/fluid/dev/releasing_process.md
→
doc/fluid/dev/releasing_process
_cn
.md
浏览文件 @
7b40f7ce
...
@@ -10,19 +10,10 @@ PaddlePaddle每次发新的版本,遵循以下流程:
...
@@ -10,19 +10,10 @@ PaddlePaddle每次发新的版本,遵循以下流程:
*
使用Regression Test List作为检查列表,测试本次release的正确性。
*
使用Regression Test List作为检查列表,测试本次release的正确性。
*
如果失败,记录下所有失败的例子,在这个
`release/版本号`
分支中,修复所有bug后,Patch号加一,到第二步
*
如果失败,记录下所有失败的例子,在这个
`release/版本号`
分支中,修复所有bug后,Patch号加一,到第二步
*
修改
`python/setup.py.in`
中的版本信息,并将
`istaged`
字段设为
`True`
。
*
修改
`python/setup.py.in`
中的版本信息,并将
`istaged`
字段设为
`True`
。
*
编译这个版本的python wheel包,并发布到pypi。
*
将这个版本的python wheel包发布到pypi。
*
由于pypi.python.org目前遵循
[
严格的命名规范PEP 513
](
https://www.python.org/dev/peps/pep-0513
)
,在使用twine上传之前,需要重命名wheel包中platform相关的后缀,比如将
`linux_x86_64`
修改成
`manylinux1_x86_64`
。
*
更新Docker镜像(参考后面的操作细节)。
*
pypi上的package名称为paddlepaddle和paddlepaddle_gpu,如果要上传GPU版本的包,需要修改build/python/setup.py中,name: "paddlepaddle_gpu"并重新打包wheel包:
`python setup.py bdist_wheel`
。
1.
第三步完成后,将
`release/版本号`
分支合入master分支,将master分支的合入commit打上tag,tag为
`版本号`
。同时再将
`master`
分支合入
`develop`
分支。
*
上传方法:
1.
协同完成Release Note的书写。
```
cd build/python
pip install twine
twine upload dist/[package to upload]
```
*
编译这个版本的Docker发行镜像,发布到dockerhub。如果失败,修复Docker编译镜像问题,Patch号加一,返回第二步
1.
第三步完成后,将
`release/版本号`
分支合入master分支,并删除
`release/版本号`
分支。将master分支的合入commit打上tag,tag为
`版本号`
。同时再将
`master`
分支合入
`develop`
分支。最后删除
`release/版本号`
分支。
1.
协同完成Release Note的书写
需要注意的是:
需要注意的是:
...
@@ -31,13 +22,18 @@ PaddlePaddle每次发新的版本,遵循以下流程:
...
@@ -31,13 +22,18 @@ PaddlePaddle每次发新的版本,遵循以下流程:
## 发布wheel包到pypi
## 发布wheel包到pypi
使用
[
PaddlePaddle CI
](
https://paddleci.ngrok.io/project.html?projectId=Manylinux1&tab=projectOverview
)
1.
使用
[
PaddlePaddle CI
](
https://paddleci.ngrok.io/project.html?projectId=Manylinux1&tab=projectOverview
)
完成自动化二进制编译,参考下图,选择需要发布的版本(通常包含一个CPU版本和一个GPU版本),点击"run"右侧的"..."按钮,可以
完成自动化二进制编译,参考下图,选择需要发布的版本(通常包含一个CPU版本和一个GPU版本),点击"run"右侧的"..."按钮,可以
弹出下面的选择框,在第二个tab (Changes)里选择需要发布的分支,这里选择0.11.0,然后点击"Run Build"按钮。等待编译完成后
弹出下面的选择框,在第二个tab (Changes)里选择需要发布的分支,这里选择0.11.0,然后点击"Run Build"按钮。
可以在此页面的"Artifacts"下拉框中找到生成的3个二进制文件,分别对应CAPI,
`cp27m`
和
`cp27mu`
的版本。然后按照上述的方法
<img
src=
"https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/ci_build_whl.png"
>
使用
`twine`
工具上传即可。
1.
等待编译完成后可以在此页面的"Artifacts"下拉框中找到生成的3个二进制文件,分别对应CAPI,
`cp27m`
和
`cp27mu`
的版本。
1.
由于pypi.python.org目前遵循
[
严格的命名规范PEP 513
](
https://www.python.org/dev/peps/pep-0513
)
,在使用twine上传之前,需要重命名wheel包中platform相关的后缀,比如将
`linux_x86_64`
修改成
`manylinux1_x86_64`
。
<img
src=
"https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/ci_build_whl.png"
>
1.
上传:
```
cd build/python
pip install twine
twine upload dist/[package to upload]
```
*
注:CI环境使用 https://github.com/PaddlePaddle/buildtools 这里的DockerImage作为编译环境以支持更多的Linux
*
注:CI环境使用 https://github.com/PaddlePaddle/buildtools 这里的DockerImage作为编译环境以支持更多的Linux
发型版,如果需要手动编译,也可以使用这些镜像。这些镜像也可以从 https://hub.docker.com/r/paddlepaddle/paddle_manylinux_devel/tags/ 下载得到。
发型版,如果需要手动编译,也可以使用这些镜像。这些镜像也可以从 https://hub.docker.com/r/paddlepaddle/paddle_manylinux_devel/tags/ 下载得到。
...
@@ -48,10 +44,20 @@ PaddlePaddle每次发新的版本,遵循以下流程:
...
@@ -48,10 +44,20 @@ PaddlePaddle每次发新的版本,遵循以下流程:
上述PaddlePaddle CI编译wheel完成后会自动将Docker镜像push到DockerHub,所以,发布Docker镜像只需要对自动push的镜像打上
上述PaddlePaddle CI编译wheel完成后会自动将Docker镜像push到DockerHub,所以,发布Docker镜像只需要对自动push的镜像打上
版本号对应的tag即可:
版本号对应的tag即可:
1.
进入 https://hub.docker.com/r/paddlepaddle/paddle/tags/ 查看latest tag的更新时间是否在上述编译wheel包完成后是否最新。
```
1.
执行
`docker pull paddlepaddle/paddle:[latest tag]`
,latest tag可以是latest或latest-gpu等。
docker pull [镜像]:latest
1.
执行
`docker tag paddlepaddle/paddle:[latest tag] paddlepaddle/paddle:[version]`
docker tag [镜像]:latest [镜像]:[version]
1.
执行
`docker push paddlepaddle/paddle:[version]`
docker push [镜像]:[version]
```
需要更新的镜像tag包括:
*
`[version]`
: CPU版本
*
`[version]-openblas`
: openblas版本
*
`[version]-gpu`
: GPU版本(CUDA 8.0 cudnn 5)
*
`[version]-gpu-[cudaver]-[cudnnver]`
: 不同cuda, cudnn版本的镜像
之后可进入 https://hub.docker.com/r/paddlepaddle/paddle/tags/ 查看是否发布成功。
## PaddlePaddle 分支规范
## PaddlePaddle 分支规范
...
@@ -76,7 +82,7 @@ PaddlePaddle开发过程使用[git-flow](http://nvie.com/posts/a-successful-git-
...
@@ -76,7 +82,7 @@ PaddlePaddle开发过程使用[git-flow](http://nvie.com/posts/a-successful-git-
### PaddlePaddle Book中所有章节
### PaddlePaddle Book中所有章节
PaddlePaddle每次发版本首先要保证PaddlePaddle Book中所有章节功能的正确性。功能的正确性包括验证PaddlePaddle目前的
`paddle_trainer`
训练和纯使用
`Python`
训练模型正确性。
PaddlePaddle每次发版本首先要保证PaddlePaddle Book中所有章节功能的正确性。功能的正确性包括验证PaddlePaddle目前的
`paddle_trainer`
训练和纯使用
`Python`
训练
(V2和Fluid)
模型正确性。
<table>
<table>
<thead>
<thead>
...
...
doc/fluid/dev/releasing_process_en.md
0 → 100644
浏览文件 @
7b40f7ce
# PaddlePaddle Releasing Process
PaddlePaddle manages its branches using "git-flow branching model", and
[
Semantic Versioning
](
http://semver.org/
)
as it's version number semantics.
Each time we release a new PaddlePaddle version, we should follow the below steps:
1.
Fork a new branch from
`develop`
named
`release/[version]`
, e.g.
`release/0.10.0`
.
1.
Push a new tag on the release branch, the tag name should be like
`[version]rc.patch`
. The
first tag should be
`0.10.0rc1`
, and the second should be
`0.10.0.rc2`
and so on.
1.
After that, we should do:
*
Run all regression test on the Regression Test List (see PaddlePaddle TeamCity CI), to confirm
that this release has no major bugs.
*
If regression test fails, we must fix those bugs and create a new
`release/[version]`
branch from previous release branch.
*
Modify
`python/setup.py.in`
, change the version number and change
`ISTAGED`
to
`True`
.
*
Publish PaddlePaddle release wheel packages to pypi (see below instructions for detail).
*
Update the Docker images (see below instructions for detail).
1.
After above step, merge
`release/[version]`
branch to master and push a tag on the master commit,
then merge
`master`
to
`develop`
.
1.
Update the Release Note.
***NOTE:**
*
*
Do
***NOT**
*
merge commits from develop branch to release branches to keep the release branch contain
features only for current release, so that we can test on that version.
*
If we want to fix bugs on release branches, we must merge the fix to master, develop and release branch.
## Publish Wheel Packages to pypi
1.
Use our
[
CI tool
](
https://paddleci.ngrok.io/project.html?projectId=Manylinux1&tab=projectOverview
)
to build all wheel packages needed to publish. As shown in the following picture, choose a build
version, click "..." button on the right side of "Run" button, and switch to the second tab in the
pop-up box, choose the current release branch and click "Run Build" button. You may repeat this
step to start different versions of builds.
<img
src=
"https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/ci_build_whl.png"
>
1.
After the build succeeds, download the outputs under "Artifacts" including capi,
`cp27m`
and
`cp27mu`
.
1.
Since pypi.python.org follows
[
PEP 513
](
https://www.python.org/dev/peps/pep-0513
)
, before we
upload the package using
`twine`
, we need to rename the package from
`linux_x86_64`
to
`manylinux1_x86_64`
.
1.
Start the upload:
```
cd build/python
pip install twine
twine upload dist/[package to upload]
```
*
NOTE: We use a special Docker image to build our releases to support more Linux distributions, you can
download it from https://hub.docker.com/r/paddlepaddle/paddle_manylinux_devel/tags/, or build it using
scripts under
`tools/manylinux1`
.
*
pypi does not allow overwrite the already uploaded version of wheel package, even if you delete the
old version. you must change the version number before upload a new one.
## Publish Docker Images
Our CI tool will push latest images to DockerHub, so we only need to push a version tag like:
```
docker pull [image]:latest
docker tag [image]:latest [image]:[version]
docker push [image]:[version]
```
Tags that need to be updated are:
*
`[version]`
: CPU only version image
*
`[version]-openblas`
: openblas version image
*
`[version]-gpu`
: GPU version(using CUDA 8.0 cudnn 5)
*
`[version]-gpu-[cudaver]-[cudnnver]`
: tag for different cuda, cudnn versions
You can then checkout the latest pushed tags at https://hub.docker.com/r/paddlepaddle/paddle/tags/.
## Branching Model
We use
[
git-flow
](
http://nvie.com/posts/a-successful-git-branching-model/
)
as our branching model,
with some modifications:
*
`master`
branch is the stable branch. Each version on the master branch is tested and guaranteed.
*
`develop`
branch is for development. Each commit on develop branch has passed CI unit test, but no
regression tests are run.
*
`release/[version]`
branch is used to publish each release. Latest release version branches have
bugfix only for that version, but no feature updates.
*
Developer forks are not required to follow
[
git-flow
](
http://nvie.com/posts/a-successful-git-branching-model/
)
branching model, all forks is like a feature branch.
*
Advise: developer fork's develop branch is used to sync up with main repo's develop branch.
*
Advise: developer use it's fork's develop branch to for new branch to start developing.
*
Use that branch on developer's fork to create pull requests and start reviews.
*
developer can push new commits to that branch when the pull request is open.
*
Bug fixes are also started from developers forked repo. And, bug fixes branch can merge to
`master`
,
`develop`
and
`releases`
.
## PaddlePaddle Regression Test List
### All Chapters of PaddlePaddle Book
We need to guarantee that all the chapters of PaddlePaddle Book can run correctly. Including
V1 (
`paddle_trainer`
training) and V2 training and Fluid training.
<table>
<thead>
<tr>
<th></th>
<th>
Linear Regression
</th>
<th>
Recognize Digits
</th>
<th>
Image Classification
</th>
<th>
Word2Vec
</th>
<th>
Personalized Recommendation
</th>
<th>
Sentiment Analysis
</th>
<th>
Semantic Role Labeling
</th>
<th>
Machine Translation
</th>
</tr>
</thead>
<tbody>
<tr>
<td>
API.V2 + Docker + GPU
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
</tr>
<tr>
<td>
API.V2 + Docker + CPU
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
</tr>
<tr>
<td>
`paddle_trainer`
+ Docker + GPU
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
</tr>
<tr>
<td>
`paddle_trainer`
+ Docker + CPU
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
</tr>
<tr>
<td>
API.V2 + Ubuntu + GPU
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
</tr>
<tr>
<td>
API.V2 + Ubuntu + CPU
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
</tr>
<tr>
<td>
`paddle_trainer`
+ Ubuntu + GPU
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
</tr>
<tr>
<td>
`paddle_trainer`
+ Ubuntu + CPU
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
<td>
</td>
</tr>
</tbody>
</table>
doc/templates/conf.py.cn.in
浏览文件 @
7b40f7ce
...
@@ -13,7 +13,7 @@
...
@@ -13,7 +13,7 @@
# serve to show the default.
# serve to show the default.
import sys
import sys
import os, subprocess
import os, subprocess
sys.path.insert(0, os.path.abspath('@PADDLE_
SOURCE
_DIR@/python'))
sys.path.insert(0, os.path.abspath('@PADDLE_
BINARY
_DIR@/python'))
import shlex
import shlex
from recommonmark import parser, transform
from recommonmark import parser, transform
import paddle
import paddle
...
...
doc/templates/conf.py.en.in
浏览文件 @
7b40f7ce
...
@@ -13,7 +13,7 @@
...
@@ -13,7 +13,7 @@
# serve to show the default.
# serve to show the default.
import sys
import sys
import os, subprocess
import os, subprocess
sys.path.insert(0, os.path.abspath('@PADDLE_
SOURCE
_DIR@/python'))
sys.path.insert(0, os.path.abspath('@PADDLE_
BINARY
_DIR@/python'))
import shlex
import shlex
from recommonmark import parser, transform
from recommonmark import parser, transform
import paddle
import paddle
...
...
doc/v2/CMakeLists.txt
浏览文件 @
7b40f7ce
...
@@ -27,7 +27,7 @@ sphinx_add_target(paddle_v2_docs
...
@@ -27,7 +27,7 @@ sphinx_add_target(paddle_v2_docs
${
CMAKE_CURRENT_SOURCE_DIR
}
${
CMAKE_CURRENT_SOURCE_DIR
}
${
SPHINX_HTML_DIR_EN
}
)
${
SPHINX_HTML_DIR_EN
}
)
add_dependencies
(
paddle_v2_docs gen_proto_py
)
add_dependencies
(
paddle_v2_docs gen_proto_py
paddle_python
)
# configured documentation tools and intermediate build results
# configured documentation tools and intermediate build results
set
(
BINARY_BUILD_DIR_CN
"
${
CMAKE_CURRENT_BINARY_DIR
}
/cn/_build"
)
set
(
BINARY_BUILD_DIR_CN
"
${
CMAKE_CURRENT_BINARY_DIR
}
/cn/_build"
)
...
@@ -50,6 +50,6 @@ sphinx_add_target(paddle_v2_docs_cn
...
@@ -50,6 +50,6 @@ sphinx_add_target(paddle_v2_docs_cn
${
CMAKE_CURRENT_SOURCE_DIR
}
${
CMAKE_CURRENT_SOURCE_DIR
}
${
SPHINX_HTML_DIR_CN
}
)
${
SPHINX_HTML_DIR_CN
}
)
add_dependencies
(
paddle_v2_docs_cn gen_proto_py
)
add_dependencies
(
paddle_v2_docs_cn gen_proto_py
paddle_python
)
add_subdirectory
(
api
)
add_subdirectory
(
api
)
doc/v2/api/CMakeLists.txt
浏览文件 @
7b40f7ce
...
@@ -19,4 +19,4 @@ sphinx_add_target(paddle_v2_apis
...
@@ -19,4 +19,4 @@ sphinx_add_target(paddle_v2_apis
${
CMAKE_CURRENT_SOURCE_DIR
}
${
CMAKE_CURRENT_SOURCE_DIR
}
${
SPHINX_HTML_DIR_EN
}
)
${
SPHINX_HTML_DIR_EN
}
)
add_dependencies
(
paddle_v2_apis gen_proto_py framework_py_proto copy_paddle_pybind
)
add_dependencies
(
paddle_v2_apis gen_proto_py framework_py_proto copy_paddle_pybind
paddle_python
)
paddle/api/CMakeLists.txt
浏览文件 @
7b40f7ce
...
@@ -89,16 +89,17 @@ SWIG_LINK_LIBRARIES(swig_paddle
...
@@ -89,16 +89,17 @@ SWIG_LINK_LIBRARIES(swig_paddle
${
START_END
}
${
START_END
}
)
)
add_custom_command
(
OUTPUT
${
PADDLE_SOURCE_DIR
}
/paddle/py_paddle/_swig_paddle.so
add_custom_command
(
OUTPUT
${
PADDLE_BINARY_DIR
}
/python/py_paddle/_swig_paddle.so
COMMAND cp
${
CMAKE_CURRENT_BINARY_DIR
}
/swig_paddle.py
${
PADDLE_SOURCE_DIR
}
/paddle/py_paddle
COMMAND
${
CMAKE_COMMAND
}
-E make_directory
${
PADDLE_BINARY_DIR
}
/python/py_paddle
COMMAND cp
${
CMAKE_CURRENT_BINARY_DIR
}
/_swig_paddle.so
${
PADDLE_SOURCE_DIR
}
/paddle/py_paddle
COMMAND cp
${
CMAKE_CURRENT_BINARY_DIR
}
/swig_paddle.py
${
PADDLE_BINARY_DIR
}
/python/py_paddle
COMMAND
${
CMAKE_COMMAND
}
-E touch .timestamp
COMMAND cp
${
CMAKE_CURRENT_BINARY_DIR
}
/_swig_paddle.so
${
PADDLE_BINARY_DIR
}
/python/py_paddle
COMMAND
${
CMAKE_COMMAND
}
-E touch
${
PADDLE_BINARY_DIR
}
/.timestamp
WORKING_DIRECTORY
${
PADDLE_SOURCE_DIR
}
/paddle
WORKING_DIRECTORY
${
PADDLE_SOURCE_DIR
}
/paddle
DEPENDS _swig_paddle
DEPENDS _swig_paddle
)
)
# TODO(yuyang18) : make wheel name calculated by cmake
# TODO(yuyang18) : make wheel name calculated by cmake
add_custom_target
(
python_api_wheel ALL DEPENDS
${
PADDLE_
SOURCE_DIR
}
/paddle
/py_paddle/_swig_paddle.so
)
add_custom_target
(
python_api_wheel ALL DEPENDS
${
PADDLE_
BINARY_DIR
}
/python
/py_paddle/_swig_paddle.so
)
if
(
WITH_TESTING
)
if
(
WITH_TESTING
)
IF
(
NOT PY_PIP_FOUND
)
IF
(
NOT PY_PIP_FOUND
)
...
...
paddle/api/test/CMakeLists.txt
浏览文件 @
7b40f7ce
add_custom_command
(
OUTPUT
${
CMAKE_CURRENT_BINARY_DIR
}
/testTrain.py
COMMAND cp -r
${
CMAKE_CURRENT_SOURCE_DIR
}
/*.py
${
CMAKE_CURRENT_BINARY_DIR
}
)
add_custom_target
(
copy_api_test ALL DEPENDS testTrain.py
)
py_test
(
testTrain SRCS testTrain.py
)
py_test
(
testTrain SRCS testTrain.py
)
py_test
(
testMatrix SRCS testMatrix.py
)
py_test
(
testMatrix SRCS testMatrix.py
)
py_test
(
testVector SRCS testVector.py
)
py_test
(
testVector SRCS testVector.py
)
...
...
paddle/fluid/
framework/
.clang-format
→
paddle/fluid/.clang-format
浏览文件 @
7b40f7ce
文件已移动
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
7b40f7ce
...
@@ -74,8 +74,8 @@ py_proto_compile(framework_py_proto SRCS framework.proto)
...
@@ -74,8 +74,8 @@ py_proto_compile(framework_py_proto SRCS framework.proto)
add_custom_target
(
framework_py_proto_init ALL COMMAND
${
CMAKE_COMMAND
}
-E touch __init__.py
)
add_custom_target
(
framework_py_proto_init ALL COMMAND
${
CMAKE_COMMAND
}
-E touch __init__.py
)
add_dependencies
(
framework_py_proto framework_py_proto_init
)
add_dependencies
(
framework_py_proto framework_py_proto_init
)
add_custom_command
(
TARGET framework_py_proto POST_BUILD
add_custom_command
(
TARGET framework_py_proto POST_BUILD
COMMAND
${
CMAKE_COMMAND
}
-E make_directory
${
PADDLE_
SOURCE
_DIR
}
/python/paddle/fluid/proto
COMMAND
${
CMAKE_COMMAND
}
-E make_directory
${
PADDLE_
BINARY
_DIR
}
/python/paddle/fluid/proto
COMMAND cp *.py
${
PADDLE_
SOURCE
_DIR
}
/python/paddle/fluid/proto/
COMMAND cp *.py
${
PADDLE_
BINARY
_DIR
}
/python/paddle/fluid/proto/
COMMENT
"Copy generated python proto into directory paddle/fluid/proto."
COMMENT
"Copy generated python proto into directory paddle/fluid/proto."
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
...
...
paddle/fluid/framework/block_desc.h
浏览文件 @
7b40f7ce
...
@@ -17,6 +17,7 @@ limitations under the License. */
...
@@ -17,6 +17,7 @@ limitations under the License. */
#include <deque>
#include <deque>
#include <memory>
#include <memory>
#include <set>
#include <set>
#include <string>
#include <unordered_map>
#include <unordered_map>
#include <vector>
#include <vector>
...
@@ -96,6 +97,8 @@ class BlockDesc {
...
@@ -96,6 +97,8 @@ class BlockDesc {
*/
*/
void
RemoveOp
(
size_t
s
,
size_t
e
);
void
RemoveOp
(
size_t
s
,
size_t
e
);
void
RemoveVar
(
const
std
::
string
&
name
)
{
vars_
.
erase
(
name
);
}
std
::
vector
<
OpDesc
*>
AllOps
()
const
;
std
::
vector
<
OpDesc
*>
AllOps
()
const
;
size_t
OpSize
()
const
{
return
ops_
.
size
();
}
size_t
OpSize
()
const
{
return
ops_
.
size
();
}
...
...
paddle/fluid/framework/channel.h
浏览文件 @
7b40f7ce
...
@@ -14,8 +14,8 @@ limitations under the License. */
...
@@ -14,8 +14,8 @@ limitations under the License. */
#pragma once
#pragma once
#include <stddef.h> // for size_t
#include <stddef.h>
// for size_t
#include <condition_variable>
#include <condition_variable>
// NOLINT
#include <typeindex>
#include <typeindex>
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
...
@@ -216,7 +216,8 @@ class ChannelHolder {
...
@@ -216,7 +216,8 @@ class ChannelHolder {
template
<
typename
T
>
template
<
typename
T
>
struct
PlaceholderImpl
:
public
Placeholder
{
struct
PlaceholderImpl
:
public
Placeholder
{
PlaceholderImpl
(
size_t
buffer_size
)
:
type_
(
std
::
type_index
(
typeid
(
T
)))
{
explicit
PlaceholderImpl
(
size_t
buffer_size
)
:
type_
(
std
::
type_index
(
typeid
(
T
)))
{
channel_
.
reset
(
MakeChannel
<
T
>
(
buffer_size
));
channel_
.
reset
(
MakeChannel
<
T
>
(
buffer_size
));
}
}
...
...
paddle/fluid/framework/channel_impl.h
浏览文件 @
7b40f7ce
...
@@ -15,7 +15,7 @@ limitations under the License. */
...
@@ -15,7 +15,7 @@ limitations under the License. */
#pragma once
#pragma once
#include <stddef.h> // for size_t
#include <stddef.h> // for size_t
#include <atomic>
#include <atomic>
#include <condition_variable>
#include <condition_variable>
// NOLINT
#include <deque>
#include <deque>
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
...
@@ -38,7 +38,7 @@ class ChannelImpl : public paddle::framework::Channel<T> {
...
@@ -38,7 +38,7 @@ class ChannelImpl : public paddle::framework::Channel<T> {
virtual
void
Unlock
();
virtual
void
Unlock
();
virtual
bool
IsClosed
();
virtual
bool
IsClosed
();
virtual
void
Close
();
virtual
void
Close
();
ChannelImpl
(
size_t
);
explicit
ChannelImpl
(
size_t
);
virtual
~
ChannelImpl
();
virtual
~
ChannelImpl
();
virtual
void
AddToSendQ
(
const
void
*
referrer
,
T
*
data
,
virtual
void
AddToSendQ
(
const
void
*
referrer
,
T
*
data
,
...
@@ -60,7 +60,7 @@ class ChannelImpl : public paddle::framework::Channel<T> {
...
@@ -60,7 +60,7 @@ class ChannelImpl : public paddle::framework::Channel<T> {
const
void
*
referrer
;
// TODO(thuan): figure out better way to do this
const
void
*
referrer
;
// TODO(thuan): figure out better way to do this
std
::
function
<
bool
(
ChannelAction
)
>
callback
;
std
::
function
<
bool
(
ChannelAction
)
>
callback
;
QueueMessage
(
T
*
item
)
explicit
QueueMessage
(
T
*
item
)
:
data
(
item
),
cond
(
std
::
make_shared
<
std
::
condition_variable_any
>
())
{}
:
data
(
item
),
cond
(
std
::
make_shared
<
std
::
condition_variable_any
>
())
{}
QueueMessage
(
T
*
item
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
)
QueueMessage
(
T
*
item
,
std
::
shared_ptr
<
std
::
condition_variable_any
>
cond
)
...
@@ -88,15 +88,15 @@ class ChannelImpl : public paddle::framework::Channel<T> {
...
@@ -88,15 +88,15 @@ class ChannelImpl : public paddle::framework::Channel<T> {
}
}
std
::
shared_ptr
<
QueueMessage
>
get_first_message
(
std
::
shared_ptr
<
QueueMessage
>
get_first_message
(
std
::
deque
<
std
::
shared_ptr
<
QueueMessage
>>
&
queue
,
ChannelAction
action
)
{
std
::
deque
<
std
::
shared_ptr
<
QueueMessage
>>
*
queue
,
ChannelAction
action
)
{
while
(
!
queue
.
empty
())
{
while
(
!
queue
->
empty
())
{
// Check whether this message was added by Select
// Check whether this message was added by Select
// If this was added by Select then execute the callback
// If this was added by Select then execute the callback
// to check if you can execute this message. The callback
// to check if you can execute this message. The callback
// can return false if some other case was executed in Select.
// can return false if some other case was executed in Select.
// In that case just discard this QueueMessage and process next.
// In that case just discard this QueueMessage and process next.
std
::
shared_ptr
<
QueueMessage
>
m
=
queue
.
front
();
std
::
shared_ptr
<
QueueMessage
>
m
=
queue
->
front
();
queue
.
pop_front
();
queue
->
pop_front
();
if
(
m
->
callback
==
nullptr
||
m
->
callback
(
action
))
return
m
;
if
(
m
->
callback
==
nullptr
||
m
->
callback
(
action
))
return
m
;
}
}
return
nullptr
;
return
nullptr
;
...
@@ -147,7 +147,7 @@ void ChannelImpl<T>::Send(T *item) {
...
@@ -147,7 +147,7 @@ void ChannelImpl<T>::Send(T *item) {
// to send to the receiver, bypassing the channel buffer if any
// to send to the receiver, bypassing the channel buffer if any
if
(
!
recvq
.
empty
())
{
if
(
!
recvq
.
empty
())
{
std
::
shared_ptr
<
QueueMessage
>
m
=
std
::
shared_ptr
<
QueueMessage
>
m
=
get_first_message
(
recvq
,
ChannelAction
::
SEND
);
get_first_message
(
&
recvq
,
ChannelAction
::
SEND
);
if
(
m
!=
nullptr
)
{
if
(
m
!=
nullptr
)
{
*
(
m
->
data
)
=
std
::
move
(
*
item
);
*
(
m
->
data
)
=
std
::
move
(
*
item
);
...
@@ -198,7 +198,7 @@ bool ChannelImpl<T>::Receive(T *item) {
...
@@ -198,7 +198,7 @@ bool ChannelImpl<T>::Receive(T *item) {
// buffer and move front of send queue to the buffer
// buffer and move front of send queue to the buffer
if
(
!
sendq
.
empty
())
{
if
(
!
sendq
.
empty
())
{
std
::
shared_ptr
<
QueueMessage
>
m
=
std
::
shared_ptr
<
QueueMessage
>
m
=
get_first_message
(
sendq
,
ChannelAction
::
RECEIVE
);
get_first_message
(
&
sendq
,
ChannelAction
::
RECEIVE
);
if
(
buf_
.
size
()
>
0
)
{
if
(
buf_
.
size
()
>
0
)
{
// Case 1 : Channel is Buffered
// Case 1 : Channel is Buffered
// Do Data transfer from front of buffer
// Do Data transfer from front of buffer
...
@@ -219,8 +219,9 @@ bool ChannelImpl<T>::Receive(T *item) {
...
@@ -219,8 +219,9 @@ bool ChannelImpl<T>::Receive(T *item) {
if
(
m
!=
nullptr
)
{
if
(
m
!=
nullptr
)
{
*
item
=
std
::
move
(
*
(
m
->
data
));
*
item
=
std
::
move
(
*
(
m
->
data
));
m
->
Notify
();
m
->
Notify
();
}
else
}
else
{
return
recv_return
(
Receive
(
item
));
return
recv_return
(
Receive
(
item
));
}
}
}
return
recv_return
(
true
);
return
recv_return
(
true
);
}
}
...
...
paddle/fluid/framework/channel_test.cc
浏览文件 @
7b40f7ce
...
@@ -14,8 +14,8 @@ limitations under the License. */
...
@@ -14,8 +14,8 @@ limitations under the License. */
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/channel.h"
#include <chrono>
#include <chrono>
// NOLINT
#include <thread>
#include <thread>
// NOLINT
#include "gtest/gtest.h"
#include "gtest/gtest.h"
using
paddle
::
framework
::
Channel
;
using
paddle
::
framework
::
Channel
;
...
@@ -166,9 +166,9 @@ TEST(Channel, ConcurrentSendNonConcurrentReceiveWithSufficientBufferSize) {
...
@@ -166,9 +166,9 @@ TEST(Channel, ConcurrentSendNonConcurrentReceiveWithSufficientBufferSize) {
std
::
thread
t
([
&
]()
{
std
::
thread
t
([
&
]()
{
// Try to write more than buffer size.
// Try to write more than buffer size.
for
(
size_t
i
=
0
;
i
<
2
*
buffer_size
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
2
*
buffer_size
;
++
i
)
{
if
(
i
<
buffer_size
)
if
(
i
<
buffer_size
)
{
ch
->
Send
(
&
i
);
// should block after 10 iterations
ch
->
Send
(
&
i
);
// should block after 10 iterations
else
{
}
else
{
bool
is_exception
=
false
;
bool
is_exception
=
false
;
try
{
try
{
ch
->
Send
(
&
i
);
ch
->
Send
(
&
i
);
...
@@ -212,12 +212,12 @@ TEST(Channel, RecevingOrderEqualToSendingOrderWithBufferedChannel3) {
...
@@ -212,12 +212,12 @@ TEST(Channel, RecevingOrderEqualToSendingOrderWithBufferedChannel3) {
}
}
void
ChannelCloseUnblocksReceiversTest
(
Channel
<
int
>
*
ch
)
{
void
ChannelCloseUnblocksReceiversTest
(
Channel
<
int
>
*
ch
)
{
size_t
num_t
hreads
=
5
;
const
size_t
kNumT
hreads
=
5
;
std
::
thread
t
[
num_t
hreads
];
std
::
thread
t
[
kNumT
hreads
];
bool
thread_ended
[
num_t
hreads
];
bool
thread_ended
[
kNumT
hreads
];
// Launches threads that try to read and are blocked because of no writers
// Launches threads that try to read and are blocked because of no writers
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
thread_ended
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
t
[
i
]
=
std
::
thread
(
[
&
](
bool
*
p
)
{
[
&
](
bool
*
p
)
{
...
@@ -230,7 +230,7 @@ void ChannelCloseUnblocksReceiversTest(Channel<int> *ch) {
...
@@ -230,7 +230,7 @@ void ChannelCloseUnblocksReceiversTest(Channel<int> *ch) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait 0.2 sec
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait 0.2 sec
// Verify that all the threads are blocked
// Verify that all the threads are blocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
false
);
EXPECT_EQ
(
thread_ended
[
i
],
false
);
}
}
...
@@ -241,21 +241,21 @@ void ChannelCloseUnblocksReceiversTest(Channel<int> *ch) {
...
@@ -241,21 +241,21 @@ void ChannelCloseUnblocksReceiversTest(Channel<int> *ch) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait 0.2 sec
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait 0.2 sec
// Verify that all threads got unblocked
// Verify that all threads got unblocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
}
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
t
[
i
].
join
();
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
t
[
i
].
join
();
}
}
void
ChannelCloseUnblocksSendersTest
(
Channel
<
int
>
*
ch
,
bool
isBuffered
)
{
void
ChannelCloseUnblocksSendersTest
(
Channel
<
int
>
*
ch
,
bool
isBuffered
)
{
size_t
num_t
hreads
=
5
;
const
size_t
kNumT
hreads
=
5
;
std
::
thread
t
[
num_t
hreads
];
std
::
thread
t
[
kNumT
hreads
];
bool
thread_ended
[
num_t
hreads
];
bool
thread_ended
[
kNumT
hreads
];
bool
send_success
[
num_t
hreads
];
bool
send_success
[
kNumT
hreads
];
// Launches threads that try to write and are blocked because of no readers
// Launches threads that try to write and are blocked because of no readers
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
thread_ended
[
i
]
=
false
;
send_success
[
i
]
=
false
;
send_success
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
t
[
i
]
=
std
::
thread
(
...
@@ -277,13 +277,13 @@ void ChannelCloseUnblocksSendersTest(Channel<int> *ch, bool isBuffered) {
...
@@ -277,13 +277,13 @@ void ChannelCloseUnblocksSendersTest(Channel<int> *ch, bool isBuffered) {
if
(
isBuffered
)
{
if
(
isBuffered
)
{
// If ch is Buffered, atleast 4 threads must be blocked.
// If ch is Buffered, atleast 4 threads must be blocked.
int
ct
=
0
;
int
ct
=
0
;
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
if
(
!
thread_ended
[
i
])
ct
++
;
if
(
!
thread_ended
[
i
])
ct
++
;
}
}
EXPECT_GE
(
ct
,
4
);
EXPECT_GE
(
ct
,
4
);
}
else
{
}
else
{
// If ch is UnBuffered, all the threads should be blocked.
// If ch is UnBuffered, all the threads should be blocked.
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
false
);
EXPECT_EQ
(
thread_ended
[
i
],
false
);
}
}
}
}
...
@@ -294,21 +294,21 @@ void ChannelCloseUnblocksSendersTest(Channel<int> *ch, bool isBuffered) {
...
@@ -294,21 +294,21 @@ void ChannelCloseUnblocksSendersTest(Channel<int> *ch, bool isBuffered) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
// Verify that all threads got unblocked
// Verify that all threads got unblocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
}
if
(
isBuffered
)
{
if
(
isBuffered
)
{
// Verify that only 1 send was successful
// Verify that only 1 send was successful
int
ct
=
0
;
int
ct
=
0
;
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
if
(
send_success
[
i
])
ct
++
;
if
(
send_success
[
i
])
ct
++
;
}
}
// Only 1 send must be successful
// Only 1 send must be successful
EXPECT_EQ
(
ct
,
1
);
EXPECT_EQ
(
ct
,
1
);
}
}
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
t
[
i
].
join
();
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
t
[
i
].
join
();
}
}
// This tests that closing a buffered channel also unblocks
// This tests that closing a buffered channel also unblocks
...
@@ -409,13 +409,13 @@ TEST(Channel, UnbufferedMoreReceiveLessSendTest) {
...
@@ -409,13 +409,13 @@ TEST(Channel, UnbufferedMoreReceiveLessSendTest) {
// This tests that destroying a channel unblocks
// This tests that destroying a channel unblocks
// any senders waiting for channel to have write space
// any senders waiting for channel to have write space
void
ChannelDestroyUnblockSenders
(
Channel
<
int
>
*
ch
,
bool
isBuffered
)
{
void
ChannelDestroyUnblockSenders
(
Channel
<
int
>
*
ch
,
bool
isBuffered
)
{
size_t
num_t
hreads
=
5
;
const
size_t
kNumT
hreads
=
5
;
std
::
thread
t
[
num_t
hreads
];
std
::
thread
t
[
kNumT
hreads
];
bool
thread_ended
[
num_t
hreads
];
bool
thread_ended
[
kNumT
hreads
];
bool
send_success
[
num_t
hreads
];
bool
send_success
[
kNumT
hreads
];
// Launches threads that try to write and are blocked because of no readers
// Launches threads that try to write and are blocked because of no readers
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
thread_ended
[
i
]
=
false
;
send_success
[
i
]
=
false
;
send_success
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
t
[
i
]
=
std
::
thread
(
...
@@ -438,14 +438,14 @@ void ChannelDestroyUnblockSenders(Channel<int> *ch, bool isBuffered) {
...
@@ -438,14 +438,14 @@ void ChannelDestroyUnblockSenders(Channel<int> *ch, bool isBuffered) {
if
(
isBuffered
)
{
if
(
isBuffered
)
{
// If channel is buffered, verify that atleast 4 threads are blocked
// If channel is buffered, verify that atleast 4 threads are blocked
int
ct
=
0
;
int
ct
=
0
;
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
if
(
thread_ended
[
i
]
==
false
)
ct
++
;
if
(
thread_ended
[
i
]
==
false
)
ct
++
;
}
}
// Atleast 4 threads must be blocked
// Atleast 4 threads must be blocked
EXPECT_GE
(
ct
,
4
);
EXPECT_GE
(
ct
,
4
);
}
else
{
}
else
{
// Verify that all the threads are blocked
// Verify that all the threads are blocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
false
);
EXPECT_EQ
(
thread_ended
[
i
],
false
);
}
}
}
}
...
@@ -454,13 +454,13 @@ void ChannelDestroyUnblockSenders(Channel<int> *ch, bool isBuffered) {
...
@@ -454,13 +454,13 @@ void ChannelDestroyUnblockSenders(Channel<int> *ch, bool isBuffered) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
// Verify that all threads got unblocked
// Verify that all threads got unblocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
}
// Count number of successful sends
// Count number of successful sends
int
ct
=
0
;
int
ct
=
0
;
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
if
(
send_success
[
i
])
ct
++
;
if
(
send_success
[
i
])
ct
++
;
}
}
...
@@ -473,18 +473,18 @@ void ChannelDestroyUnblockSenders(Channel<int> *ch, bool isBuffered) {
...
@@ -473,18 +473,18 @@ void ChannelDestroyUnblockSenders(Channel<int> *ch, bool isBuffered) {
}
}
// Join all threads
// Join all threads
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
t
[
i
].
join
();
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
t
[
i
].
join
();
}
}
// This tests that destroying a channel also unblocks
// This tests that destroying a channel also unblocks
// any receivers waiting on the channel
// any receivers waiting on the channel
void
ChannelDestroyUnblockReceivers
(
Channel
<
int
>
*
ch
)
{
void
ChannelDestroyUnblockReceivers
(
Channel
<
int
>
*
ch
)
{
size_t
num_t
hreads
=
5
;
const
size_t
kNumT
hreads
=
5
;
std
::
thread
t
[
num_t
hreads
];
std
::
thread
t
[
kNumT
hreads
];
bool
thread_ended
[
num_t
hreads
];
bool
thread_ended
[
kNumT
hreads
];
// Launches threads that try to read and are blocked because of no writers
// Launches threads that try to read and are blocked because of no writers
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
thread_ended
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
t
[
i
]
=
std
::
thread
(
[
&
](
bool
*
p
)
{
[
&
](
bool
*
p
)
{
...
@@ -498,18 +498,18 @@ void ChannelDestroyUnblockReceivers(Channel<int> *ch) {
...
@@ -498,18 +498,18 @@ void ChannelDestroyUnblockReceivers(Channel<int> *ch) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
100
));
// wait
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
100
));
// wait
// Verify that all threads are blocked
// Verify that all threads are blocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
false
);
EXPECT_EQ
(
thread_ended
[
i
],
false
);
}
}
// delete the channel
// delete the channel
delete
ch
;
delete
ch
;
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
// Verify that all threads got unblocked
// Verify that all threads got unblocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
}
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
t
[
i
].
join
();
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
t
[
i
].
join
();
}
}
TEST
(
Channel
,
BufferedChannelDestroyUnblocksReceiversTest
)
{
TEST
(
Channel
,
BufferedChannelDestroyUnblocksReceiversTest
)
{
...
@@ -679,12 +679,12 @@ TEST(ChannelHolder, TypeMismatchReceiveTest) {
...
@@ -679,12 +679,12 @@ TEST(ChannelHolder, TypeMismatchReceiveTest) {
}
}
void
ChannelHolderCloseUnblocksReceiversTest
(
ChannelHolder
*
ch
)
{
void
ChannelHolderCloseUnblocksReceiversTest
(
ChannelHolder
*
ch
)
{
size_t
num_t
hreads
=
5
;
const
size_t
kNumT
hreads
=
5
;
std
::
thread
t
[
num_t
hreads
];
std
::
thread
t
[
kNumT
hreads
];
bool
thread_ended
[
num_t
hreads
];
bool
thread_ended
[
kNumT
hreads
];
// Launches threads that try to read and are blocked because of no writers
// Launches threads that try to read and are blocked because of no writers
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
thread_ended
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
t
[
i
]
=
std
::
thread
(
[
&
](
bool
*
p
)
{
[
&
](
bool
*
p
)
{
...
@@ -697,7 +697,7 @@ void ChannelHolderCloseUnblocksReceiversTest(ChannelHolder *ch) {
...
@@ -697,7 +697,7 @@ void ChannelHolderCloseUnblocksReceiversTest(ChannelHolder *ch) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait 0.2 sec
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait 0.2 sec
// Verify that all the threads are blocked
// Verify that all the threads are blocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
false
);
EXPECT_EQ
(
thread_ended
[
i
],
false
);
}
}
...
@@ -708,21 +708,21 @@ void ChannelHolderCloseUnblocksReceiversTest(ChannelHolder *ch) {
...
@@ -708,21 +708,21 @@ void ChannelHolderCloseUnblocksReceiversTest(ChannelHolder *ch) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait 0.2 sec
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait 0.2 sec
// Verify that all threads got unblocked
// Verify that all threads got unblocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
}
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
t
[
i
].
join
();
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
t
[
i
].
join
();
}
}
void
ChannelHolderCloseUnblocksSendersTest
(
ChannelHolder
*
ch
,
bool
isBuffered
)
{
void
ChannelHolderCloseUnblocksSendersTest
(
ChannelHolder
*
ch
,
bool
isBuffered
)
{
size_t
num_t
hreads
=
5
;
const
size_t
kNumT
hreads
=
5
;
std
::
thread
t
[
num_t
hreads
];
std
::
thread
t
[
kNumT
hreads
];
bool
thread_ended
[
num_t
hreads
];
bool
thread_ended
[
kNumT
hreads
];
bool
send_success
[
num_t
hreads
];
bool
send_success
[
kNumT
hreads
];
// Launches threads that try to write and are blocked because of no readers
// Launches threads that try to write and are blocked because of no readers
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
thread_ended
[
i
]
=
false
;
send_success
[
i
]
=
false
;
send_success
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
t
[
i
]
=
std
::
thread
(
...
@@ -744,13 +744,13 @@ void ChannelHolderCloseUnblocksSendersTest(ChannelHolder *ch, bool isBuffered) {
...
@@ -744,13 +744,13 @@ void ChannelHolderCloseUnblocksSendersTest(ChannelHolder *ch, bool isBuffered) {
if
(
isBuffered
)
{
if
(
isBuffered
)
{
// If ch is Buffered, atleast 4 threads must be blocked.
// If ch is Buffered, atleast 4 threads must be blocked.
int
ct
=
0
;
int
ct
=
0
;
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
if
(
!
thread_ended
[
i
])
ct
++
;
if
(
!
thread_ended
[
i
])
ct
++
;
}
}
EXPECT_GE
(
ct
,
4
);
EXPECT_GE
(
ct
,
4
);
}
else
{
}
else
{
// If ch is UnBuffered, all the threads should be blocked.
// If ch is UnBuffered, all the threads should be blocked.
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
false
);
EXPECT_EQ
(
thread_ended
[
i
],
false
);
}
}
}
}
...
@@ -761,21 +761,21 @@ void ChannelHolderCloseUnblocksSendersTest(ChannelHolder *ch, bool isBuffered) {
...
@@ -761,21 +761,21 @@ void ChannelHolderCloseUnblocksSendersTest(ChannelHolder *ch, bool isBuffered) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
// Verify that all threads got unblocked
// Verify that all threads got unblocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
}
if
(
isBuffered
)
{
if
(
isBuffered
)
{
// Verify that only 1 send was successful
// Verify that only 1 send was successful
int
ct
=
0
;
int
ct
=
0
;
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
if
(
send_success
[
i
])
ct
++
;
if
(
send_success
[
i
])
ct
++
;
}
}
// Only 1 send must be successful
// Only 1 send must be successful
EXPECT_EQ
(
ct
,
1
);
EXPECT_EQ
(
ct
,
1
);
}
}
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
t
[
i
].
join
();
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
t
[
i
].
join
();
}
}
// This tests that closing a channelholder unblocks
// This tests that closing a channelholder unblocks
...
@@ -813,13 +813,13 @@ TEST(Channel, ChannelHolderCloseUnblocksSendersTest) {
...
@@ -813,13 +813,13 @@ TEST(Channel, ChannelHolderCloseUnblocksSendersTest) {
// This tests that destroying a channelholder unblocks
// This tests that destroying a channelholder unblocks
// any senders waiting for channel
// any senders waiting for channel
void
ChannelHolderDestroyUnblockSenders
(
ChannelHolder
*
ch
,
bool
isBuffered
)
{
void
ChannelHolderDestroyUnblockSenders
(
ChannelHolder
*
ch
,
bool
isBuffered
)
{
size_t
num_t
hreads
=
5
;
const
size_t
kNumT
hreads
=
5
;
std
::
thread
t
[
num_t
hreads
];
std
::
thread
t
[
kNumT
hreads
];
bool
thread_ended
[
num_t
hreads
];
bool
thread_ended
[
kNumT
hreads
];
bool
send_success
[
num_t
hreads
];
bool
send_success
[
kNumT
hreads
];
// Launches threads that try to write and are blocked because of no readers
// Launches threads that try to write and are blocked because of no readers
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
thread_ended
[
i
]
=
false
;
send_success
[
i
]
=
false
;
send_success
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
t
[
i
]
=
std
::
thread
(
...
@@ -841,14 +841,14 @@ void ChannelHolderDestroyUnblockSenders(ChannelHolder *ch, bool isBuffered) {
...
@@ -841,14 +841,14 @@ void ChannelHolderDestroyUnblockSenders(ChannelHolder *ch, bool isBuffered) {
if
(
isBuffered
)
{
if
(
isBuffered
)
{
// If channel is buffered, verify that atleast 4 threads are blocked
// If channel is buffered, verify that atleast 4 threads are blocked
int
ct
=
0
;
int
ct
=
0
;
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
if
(
thread_ended
[
i
]
==
false
)
ct
++
;
if
(
thread_ended
[
i
]
==
false
)
ct
++
;
}
}
// Atleast 4 threads must be blocked
// Atleast 4 threads must be blocked
EXPECT_GE
(
ct
,
4
);
EXPECT_GE
(
ct
,
4
);
}
else
{
}
else
{
// Verify that all the threads are blocked
// Verify that all the threads are blocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
false
);
EXPECT_EQ
(
thread_ended
[
i
],
false
);
}
}
}
}
...
@@ -857,13 +857,13 @@ void ChannelHolderDestroyUnblockSenders(ChannelHolder *ch, bool isBuffered) {
...
@@ -857,13 +857,13 @@ void ChannelHolderDestroyUnblockSenders(ChannelHolder *ch, bool isBuffered) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
// Verify that all threads got unblocked
// Verify that all threads got unblocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
}
// Count number of successfuld sends
// Count number of successfuld sends
int
ct
=
0
;
int
ct
=
0
;
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
if
(
send_success
[
i
])
ct
++
;
if
(
send_success
[
i
])
ct
++
;
}
}
...
@@ -876,18 +876,18 @@ void ChannelHolderDestroyUnblockSenders(ChannelHolder *ch, bool isBuffered) {
...
@@ -876,18 +876,18 @@ void ChannelHolderDestroyUnblockSenders(ChannelHolder *ch, bool isBuffered) {
}
}
// Join all threads
// Join all threads
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
t
[
i
].
join
();
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
t
[
i
].
join
();
}
}
// This tests that destroying a channelholder also unblocks
// This tests that destroying a channelholder also unblocks
// any receivers waiting on the channel
// any receivers waiting on the channel
void
ChannelHolderDestroyUnblockReceivers
(
ChannelHolder
*
ch
)
{
void
ChannelHolderDestroyUnblockReceivers
(
ChannelHolder
*
ch
)
{
size_t
num_t
hreads
=
5
;
const
size_t
kNumT
hreads
=
5
;
std
::
thread
t
[
num_t
hreads
];
std
::
thread
t
[
kNumT
hreads
];
bool
thread_ended
[
num_t
hreads
];
bool
thread_ended
[
kNumT
hreads
];
// Launches threads that try to read and are blocked because of no writers
// Launches threads that try to read and are blocked because of no writers
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
thread_ended
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
t
[
i
]
=
std
::
thread
(
[
&
](
bool
*
p
)
{
[
&
](
bool
*
p
)
{
...
@@ -901,18 +901,18 @@ void ChannelHolderDestroyUnblockReceivers(ChannelHolder *ch) {
...
@@ -901,18 +901,18 @@ void ChannelHolderDestroyUnblockReceivers(ChannelHolder *ch) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
// Verify that all threads are blocked
// Verify that all threads are blocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
false
);
EXPECT_EQ
(
thread_ended
[
i
],
false
);
}
}
// delete the channel
// delete the channel
delete
ch
;
delete
ch
;
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
200
));
// wait
// Verify that all threads got unblocked
// Verify that all threads got unblocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
}
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
t
[
i
].
join
();
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
t
[
i
].
join
();
}
}
TEST
(
ChannelHolder
,
ChannelHolderDestroyUnblocksReceiversTest
)
{
TEST
(
ChannelHolder
,
ChannelHolderDestroyUnblocksReceiversTest
)
{
...
@@ -945,12 +945,12 @@ TEST(ChannelHolder, ChannelHolderDestroyUnblocksSendersTest) {
...
@@ -945,12 +945,12 @@ TEST(ChannelHolder, ChannelHolderDestroyUnblocksSendersTest) {
// This tests that closing a channelholder many times.
// This tests that closing a channelholder many times.
void
ChannelHolderManyTimesClose
(
ChannelHolder
*
ch
)
{
void
ChannelHolderManyTimesClose
(
ChannelHolder
*
ch
)
{
const
int
num_t
hreads
=
15
;
const
int
kNumT
hreads
=
15
;
std
::
thread
t
[
num_t
hreads
];
std
::
thread
t
[
kNumT
hreads
];
bool
thread_ended
[
num_t
hreads
];
bool
thread_ended
[
kNumT
hreads
];
// Launches threads that try to send data to channel.
// Launches threads that try to send data to channel.
for
(
size_t
i
=
0
;
i
<
num_t
hreads
/
3
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
/
3
;
i
++
)
{
thread_ended
[
i
]
=
false
;
thread_ended
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
t
[
i
]
=
std
::
thread
(
[
&
](
bool
*
ended
)
{
[
&
](
bool
*
ended
)
{
...
@@ -962,7 +962,7 @@ void ChannelHolderManyTimesClose(ChannelHolder *ch) {
...
@@ -962,7 +962,7 @@ void ChannelHolderManyTimesClose(ChannelHolder *ch) {
}
}
// Launches threads that try to receive data to channel.
// Launches threads that try to receive data to channel.
for
(
size_t
i
=
num_threads
/
3
;
i
<
2
*
num_t
hreads
/
3
;
i
++
)
{
for
(
size_t
i
=
kNumThreads
/
3
;
i
<
2
*
kNumT
hreads
/
3
;
i
++
)
{
thread_ended
[
i
]
=
false
;
thread_ended
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
t
[
i
]
=
std
::
thread
(
[
&
](
bool
*
p
)
{
[
&
](
bool
*
p
)
{
...
@@ -976,7 +976,7 @@ void ChannelHolderManyTimesClose(ChannelHolder *ch) {
...
@@ -976,7 +976,7 @@ void ChannelHolderManyTimesClose(ChannelHolder *ch) {
}
}
// Launches threads that try to close the channel.
// Launches threads that try to close the channel.
for
(
size_t
i
=
2
*
num_threads
/
3
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
2
*
kNumThreads
/
3
;
i
<
kNumT
hreads
;
i
++
)
{
thread_ended
[
i
]
=
false
;
thread_ended
[
i
]
=
false
;
t
[
i
]
=
std
::
thread
(
t
[
i
]
=
std
::
thread
(
[
&
](
bool
*
p
)
{
[
&
](
bool
*
p
)
{
...
@@ -991,13 +991,13 @@ void ChannelHolderManyTimesClose(ChannelHolder *ch) {
...
@@ -991,13 +991,13 @@ void ChannelHolderManyTimesClose(ChannelHolder *ch) {
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
100
));
// wait
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
100
));
// wait
// Verify that all threads are unblocked
// Verify that all threads are unblocked
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
{
EXPECT_EQ
(
thread_ended
[
i
],
true
);
EXPECT_EQ
(
thread_ended
[
i
],
true
);
}
}
EXPECT_TRUE
(
ch
->
IsClosed
());
EXPECT_TRUE
(
ch
->
IsClosed
());
// delete the channel
// delete the channel
delete
ch
;
delete
ch
;
for
(
size_t
i
=
0
;
i
<
num_t
hreads
;
i
++
)
t
[
i
].
join
();
for
(
size_t
i
=
0
;
i
<
kNumT
hreads
;
i
++
)
t
[
i
].
join
();
}
}
TEST
(
ChannelHolder
,
ChannelHolderManyTimesCloseTest
)
{
TEST
(
ChannelHolder
,
ChannelHolderManyTimesCloseTest
)
{
...
...
paddle/fluid/framework/details/CMakeLists.txt
浏览文件 @
7b40f7ce
...
@@ -16,6 +16,6 @@ else()
...
@@ -16,6 +16,6 @@ else()
endif
()
endif
()
cc_library
(
multi_devices_graph_builder SRCS multi_devices_graph_builder.cc DEPS ssa_graph_builder computation_op_handle
cc_library
(
multi_devices_graph_builder SRCS multi_devices_graph_builder.cc DEPS ssa_graph_builder computation_op_handle
scale_loss_grad_op_handle
${
multi_devices_graph_builder_deps
}
)
scale_loss_grad_op_handle
${
multi_devices_graph_builder_deps
}
)
cc_library
(
ssa_graph_executor SRCS ssa_graph_executor.cc DEPS ssa_graph
)
cc_library
(
ssa_graph_executor SRCS ssa_graph_executor.cc DEPS ssa_graph
framework_proto
)
cc_library
(
threaded_ssa_graph_executor SRCS threaded_ssa_graph_executor.cc DEPS fetch_op_handle ssa_graph_executor scope
cc_library
(
threaded_ssa_graph_executor SRCS threaded_ssa_graph_executor.cc DEPS fetch_op_handle ssa_graph_executor scope
simple_threadpool device_context
)
simple_threadpool device_context
)
paddle/fluid/framework/lod_tensor.h
浏览文件 @
7b40f7ce
...
@@ -142,6 +142,7 @@ class LoDTensor : public Tensor {
...
@@ -142,6 +142,7 @@ class LoDTensor : public Tensor {
return
(
lod_
)[
level
].
size
()
-
1
;
return
(
lod_
)[
level
].
size
()
-
1
;
}
}
// Split LoDTensor and copy to each place specified in places.
std
::
vector
<
LoDTensor
>
SplitLoDTensor
(
std
::
vector
<
LoDTensor
>
SplitLoDTensor
(
const
std
::
vector
<
platform
::
Place
>
places
)
const
;
const
std
::
vector
<
platform
::
Place
>
places
)
const
;
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
7b40f7ce
...
@@ -35,6 +35,17 @@ std::vector<std::tuple<platform::Place, LibraryType>> kKernelPriority = {
...
@@ -35,6 +35,17 @@ std::vector<std::tuple<platform::Place, LibraryType>> kKernelPriority = {
std
::
make_tuple
(
platform
::
CPUPlace
(),
LibraryType
::
kPlain
),
std
::
make_tuple
(
platform
::
CPUPlace
(),
LibraryType
::
kPlain
),
};
};
proto
::
VarType
::
Type
GetDataTypeOfVar
(
const
Variable
*
var
)
{
if
(
var
->
IsType
<
framework
::
LoDTensor
>
())
{
return
framework
::
ToDataType
(
var
->
Get
<
framework
::
LoDTensor
>
().
type
());
}
else
if
(
var
->
IsType
<
framework
::
SelectedRows
>
())
{
return
framework
::
ToDataType
(
var
->
Get
<
framework
::
SelectedRows
>
().
value
().
type
());
}
else
{
PADDLE_THROW
(
"Var should be LoDTensor or SelectedRows"
);
}
}
static
DDim
GetDims
(
const
Scope
&
scope
,
const
std
::
string
&
name
)
{
static
DDim
GetDims
(
const
Scope
&
scope
,
const
std
::
string
&
name
)
{
Variable
*
var
=
scope
.
FindVar
(
name
);
Variable
*
var
=
scope
.
FindVar
(
name
);
if
(
var
==
nullptr
)
{
if
(
var
==
nullptr
)
{
...
...
paddle/fluid/framework/operator.h
浏览文件 @
7b40f7ce
...
@@ -61,6 +61,8 @@ inline std::string GradVarName(const std::string& var_name) {
...
@@ -61,6 +61,8 @@ inline std::string GradVarName(const std::string& var_name) {
return
var_name
+
kGradVarSuffix
;
return
var_name
+
kGradVarSuffix
;
}
}
proto
::
VarType
::
Type
GetDataTypeOfVar
(
const
Variable
*
var
);
class
OperatorBase
;
class
OperatorBase
;
class
ExecutionContext
;
class
ExecutionContext
;
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
7b40f7ce
...
@@ -150,13 +150,30 @@ void ParallelExecutor::BCastParamsToGPUs(
...
@@ -150,13 +150,30 @@ void ParallelExecutor::BCastParamsToGPUs(
#endif
#endif
}
}
void
ParallelExecutor
::
Run
(
const
std
::
vector
<
std
::
string
>
&
fetch_tensors
,
void
ParallelExecutor
::
Run
(
const
std
::
string
&
fetched_var_name
)
{
const
std
::
vector
<
std
::
string
>
&
fetch_tensors
,
const
std
::
string
&
fetched_var_name
,
const
std
::
unordered_map
<
std
::
string
,
LoDTensor
>
&
feed_tensors
)
{
platform
::
RecordBlock
b
(
0
);
platform
::
RecordBlock
b
(
0
);
SplitTensorToPlaces
(
feed_tensors
);
auto
fetch_data
=
member_
->
executor_
->
Run
(
fetch_tensors
);
auto
fetch_data
=
member_
->
executor_
->
Run
(
fetch_tensors
);
*
member_
->
global_scope_
->
Var
(
fetched_var_name
)
->
GetMutable
<
FeedFetchList
>
()
=
*
member_
->
global_scope_
->
Var
(
fetched_var_name
)
->
GetMutable
<
FeedFetchList
>
()
=
fetch_data
;
fetch_data
;
}
}
void
ParallelExecutor
::
SplitTensorToPlaces
(
const
std
::
unordered_map
<
std
::
string
,
LoDTensor
>
&
feed_tensors
)
{
for
(
auto
it
:
feed_tensors
)
{
auto
lod_tensors
=
it
.
second
.
SplitLoDTensor
(
member_
->
places_
);
for
(
size_t
j
=
0
;
j
<
member_
->
places_
.
size
();
++
j
)
{
// TODO(panxy0718): Do I need to delete this var?
member_
->
local_scopes_
[
j
]
->
Var
(
it
.
first
)
->
GetMutable
<
LoDTensor
>
()
->
ShareDataWith
(
lod_tensors
[
j
]);
}
}
}
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/fluid/framework/parallel_executor.h
浏览文件 @
7b40f7ce
...
@@ -42,9 +42,13 @@ class ParallelExecutor {
...
@@ -42,9 +42,13 @@ class ParallelExecutor {
bool
allow_op_delay
);
bool
allow_op_delay
);
void
Run
(
const
std
::
vector
<
std
::
string
>&
fetch_tensors
,
void
Run
(
const
std
::
vector
<
std
::
string
>&
fetch_tensors
,
const
std
::
string
&
fetched_var_name
=
"fetched_var"
);
const
std
::
string
&
fetched_var_name
,
const
std
::
unordered_map
<
std
::
string
,
LoDTensor
>&
feed_tensors
);
private:
private:
void
SplitTensorToPlaces
(
const
std
::
unordered_map
<
std
::
string
,
LoDTensor
>&
feed_tensors
);
ParallelExecutorPrivate
*
member_
;
ParallelExecutorPrivate
*
member_
;
void
BCastParamsToGPUs
(
const
ProgramDesc
&
startup_program
)
const
;
void
BCastParamsToGPUs
(
const
ProgramDesc
&
startup_program
)
const
;
...
...
paddle/fluid/framework/selected_rows.cc
浏览文件 @
7b40f7ce
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -13,6 +16,7 @@ limitations under the License. */
...
@@ -13,6 +16,7 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
void
SerializeToStream
(
std
::
ostream
&
os
,
const
SelectedRows
&
selected_rows
,
void
SerializeToStream
(
std
::
ostream
&
os
,
const
SelectedRows
&
selected_rows
,
const
platform
::
DeviceContext
&
dev_ctx
)
{
const
platform
::
DeviceContext
&
dev_ctx
)
{
{
// the 1st field, uint32_t version
{
// the 1st field, uint32_t version
...
...
paddle/fluid/framework/selected_rows.h
浏览文件 @
7b40f7ce
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -47,6 +50,15 @@ class SelectedRows {
...
@@ -47,6 +50,15 @@ class SelectedRows {
void
set_rows
(
const
Vector
<
int64_t
>&
rows
)
{
rows_
=
rows
;
}
void
set_rows
(
const
Vector
<
int64_t
>&
rows
)
{
rows_
=
rows
;
}
/**
* get the index of id in rows
*/
int64_t
index
(
int64_t
id
)
const
{
auto
it
=
std
::
find
(
rows_
.
begin
(),
rows_
.
end
(),
id
);
PADDLE_ENFORCE
(
it
!=
rows_
.
end
(),
"id should be in rows"
);
return
static_cast
<
int64_t
>
(
std
::
distance
(
rows_
.
begin
(),
it
));
}
DDim
GetCompleteDims
()
const
{
DDim
GetCompleteDims
()
const
{
std
::
vector
<
int64_t
>
dims
=
vectorize
(
value_
->
dims
());
std
::
vector
<
int64_t
>
dims
=
vectorize
(
value_
->
dims
());
dims
[
0
]
=
height_
;
dims
[
0
]
=
height_
;
...
...
paddle/fluid/framework/tensor_impl.h
浏览文件 @
7b40f7ce
...
@@ -128,13 +128,20 @@ inline void* Tensor::mutable_data(platform::Place place, std::type_index type) {
...
@@ -128,13 +128,20 @@ inline void* Tensor::mutable_data(platform::Place place, std::type_index type) {
if
(
platform
::
is_cpu_place
(
place
))
{
if
(
platform
::
is_cpu_place
(
place
))
{
holder_
.
reset
(
new
PlaceholderImpl
<
platform
::
CPUPlace
>
(
holder_
.
reset
(
new
PlaceholderImpl
<
platform
::
CPUPlace
>
(
boost
::
get
<
platform
::
CPUPlace
>
(
place
),
size
,
type
));
boost
::
get
<
platform
::
CPUPlace
>
(
place
),
size
,
type
));
}
else
if
(
platform
::
is_gpu_place
(
place
))
{
}
else
if
(
platform
::
is_gpu_place
(
place
)
||
platform
::
is_cuda_pinned_place
(
place
))
{
#ifndef PADDLE_WITH_CUDA
#ifndef PADDLE_WITH_CUDA
PADDLE_THROW
(
"'CUDAPlace' is not supported in CPU only device."
);
PADDLE_THROW
(
"CUDAPlace or CUDAPinnedPlace is not supported in CPU-only mode."
);
}
}
#else
#else
holder_
.
reset
(
new
PlaceholderImpl
<
platform
::
CUDAPlace
>
(
if
(
platform
::
is_gpu_place
(
place
))
{
boost
::
get
<
platform
::
CUDAPlace
>
(
place
),
size
,
type
));
holder_
.
reset
(
new
PlaceholderImpl
<
platform
::
CUDAPlace
>
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place
),
size
,
type
));
}
else
if
(
platform
::
is_cuda_pinned_place
(
place
))
{
holder_
.
reset
(
new
PlaceholderImpl
<
platform
::
CUDAPinnedPlace
>
(
boost
::
get
<
platform
::
CUDAPinnedPlace
>
(
place
),
size
,
type
));
}
}
}
#endif
#endif
offset_
=
0
;
offset_
=
0
;
...
@@ -145,7 +152,7 @@ inline void* Tensor::mutable_data(platform::Place place, std::type_index type) {
...
@@ -145,7 +152,7 @@ inline void* Tensor::mutable_data(platform::Place place, std::type_index type) {
inline
void
*
Tensor
::
mutable_data
(
platform
::
Place
place
)
{
inline
void
*
Tensor
::
mutable_data
(
platform
::
Place
place
)
{
PADDLE_ENFORCE
(
this
->
holder_
!=
nullptr
,
PADDLE_ENFORCE
(
this
->
holder_
!=
nullptr
,
"Cannot invoke mutable data if current hold nothing"
);
"Cannot invoke mutable data if current hold nothing
.
"
);
return
mutable_data
(
place
,
holder_
->
type
());
return
mutable_data
(
place
,
holder_
->
type
());
}
}
...
...
paddle/fluid/framework/tuple.h
浏览文件 @
7b40f7ce
...
@@ -35,24 +35,25 @@ class Tuple {
...
@@ -35,24 +35,25 @@ class Tuple {
public:
public:
using
ElementVars
=
std
::
vector
<
ElementVar
>
;
using
ElementVars
=
std
::
vector
<
ElementVar
>
;
Tuple
(
std
::
vector
<
ElementVar
>&
var
,
std
::
vector
<
VarDesc
>&
var_desc
)
Tuple
(
const
std
::
vector
<
ElementVar
>&
var
,
const
std
::
vector
<
VarDesc
>&
var_desc
)
:
var_
(
var
),
var_desc_
(
var_desc
)
{}
:
var_
(
var
),
var_desc_
(
var_desc
)
{}
Tuple
(
std
::
vector
<
ElementVar
>&
var
)
:
var_
(
var
)
{}
explicit
Tuple
(
std
::
vector
<
ElementVar
>&
var
)
:
var_
(
var
)
{}
ElementVar
get
(
int
idx
)
const
{
return
var_
[
idx
];
}
;
ElementVar
get
(
int
idx
)
const
{
return
var_
[
idx
];
}
ElementVar
&
get
(
int
idx
)
{
return
var_
[
idx
];
}
;
ElementVar
&
get
(
int
idx
)
{
return
var_
[
idx
];
}
bool
isSameType
(
Tuple
&
t
)
const
;
bool
isSameType
(
const
Tuple
&
t
)
const
;
size_t
getSize
()
const
{
return
var_
.
size
();
}
;
size_t
getSize
()
const
{
return
var_
.
size
();
}
private:
private:
ElementVars
var_
;
ElementVars
var_
;
std
::
vector
<
VarDesc
>
var_desc_
;
std
::
vector
<
VarDesc
>
var_desc_
;
};
};
bool
Tuple
::
isSameType
(
Tuple
&
t
)
const
{
bool
Tuple
::
isSameType
(
const
Tuple
&
t
)
const
{
size_t
tuple_size
=
getSize
();
size_t
tuple_size
=
getSize
();
if
(
tuple_size
!=
t
.
getSize
())
{
if
(
tuple_size
!=
t
.
getSize
())
{
return
false
;
return
false
;
...
...
paddle/fluid/inference/io.cc
浏览文件 @
7b40f7ce
...
@@ -41,8 +41,7 @@ bool IsPersistable(const framework::VarDesc* var) {
...
@@ -41,8 +41,7 @@ bool IsPersistable(const framework::VarDesc* var) {
return
false
;
return
false
;
}
}
void
LoadPersistables
(
framework
::
Executor
&
executor
,
void
LoadPersistables
(
framework
::
Executor
&
executor
,
framework
::
Scope
&
scope
,
framework
::
Scope
&
scope
,
const
framework
::
ProgramDesc
&
main_program
,
const
framework
::
ProgramDesc
&
main_program
,
const
std
::
string
&
dirname
,
const
std
::
string
&
dirname
,
const
std
::
string
&
param_filename
)
{
const
std
::
string
&
param_filename
)
{
...
@@ -108,10 +107,8 @@ std::unique_ptr<framework::ProgramDesc> Load(framework::Executor& executor,
...
@@ -108,10 +107,8 @@ std::unique_ptr<framework::ProgramDesc> Load(framework::Executor& executor,
}
}
std
::
unique_ptr
<
framework
::
ProgramDesc
>
Load
(
std
::
unique_ptr
<
framework
::
ProgramDesc
>
Load
(
framework
::
Executor
&
executor
,
framework
::
Executor
&
executor
,
framework
::
Scope
&
scope
,
framework
::
Scope
&
scope
,
const
std
::
string
&
prog_filename
,
const
std
::
string
&
param_filename
)
{
const
std
::
string
&
prog_filename
,
const
std
::
string
&
param_filename
)
{
std
::
string
model_filename
=
prog_filename
;
std
::
string
model_filename
=
prog_filename
;
std
::
string
program_desc_str
;
std
::
string
program_desc_str
;
ReadBinaryFile
(
model_filename
,
program_desc_str
);
ReadBinaryFile
(
model_filename
,
program_desc_str
);
...
...
paddle/fluid/inference/io.h
浏览文件 @
7b40f7ce
...
@@ -24,8 +24,7 @@ limitations under the License. */
...
@@ -24,8 +24,7 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
void
LoadPersistables
(
framework
::
Executor
&
executor
,
void
LoadPersistables
(
framework
::
Executor
&
executor
,
framework
::
Scope
&
scope
,
framework
::
Scope
&
scope
,
const
framework
::
ProgramDesc
&
main_program
,
const
framework
::
ProgramDesc
&
main_program
,
const
std
::
string
&
dirname
,
const
std
::
string
&
dirname
,
const
std
::
string
&
param_filename
);
const
std
::
string
&
param_filename
);
...
...
paddle/fluid/inference/tests/book/CMakeLists.txt
浏览文件 @
7b40f7ce
...
@@ -4,7 +4,7 @@ function(inference_test TARGET_NAME)
...
@@ -4,7 +4,7 @@ function(inference_test TARGET_NAME)
set
(
multiValueArgs ARGS
)
set
(
multiValueArgs ARGS
)
cmake_parse_arguments
(
inference_test
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
cmake_parse_arguments
(
inference_test
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
set
(
PYTHON_TESTS_DIR
${
PADDLE_
SOURCE
_DIR
}
/python/paddle/fluid/tests
)
set
(
PYTHON_TESTS_DIR
${
PADDLE_
BINARY
_DIR
}
/python/paddle/fluid/tests
)
set
(
arg_list
""
)
set
(
arg_list
""
)
if
(
inference_test_ARGS
)
if
(
inference_test_ARGS
)
foreach
(
arg
${
inference_test_ARGS
}
)
foreach
(
arg
${
inference_test_ARGS
}
)
...
...
paddle/fluid/inference/tests/book/test_inference_fit_a_line.cc
浏览文件 @
7b40f7ce
...
@@ -9,8 +9,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -9,8 +9,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include <gtest/gtest.h>
#include "gflags/gflags.h"
#include "gflags/gflags.h"
#include "gtest/gtest.h"
#include "paddle/fluid/inference/tests/test_helper.h"
#include "paddle/fluid/inference/tests/test_helper.h"
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
...
@@ -30,8 +30,8 @@ TEST(inference, fit_a_line) {
...
@@ -30,8 +30,8 @@ TEST(inference, fit_a_line) {
// The second dim of the input tensor should be 13
// The second dim of the input tensor should be 13
// The input data should be >= 0
// The input data should be >= 0
int64_t
batch_size
=
10
;
int64_t
batch_size
=
10
;
SetupTensor
<
float
>
(
SetupTensor
<
float
>
(
&
input
,
{
batch_size
,
13
},
static_cast
<
float
>
(
0
),
input
,
{
batch_size
,
13
},
static_cast
<
float
>
(
0
),
static_cast
<
float
>
(
10
));
static_cast
<
float
>
(
10
));
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_feeds
;
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_feeds
;
cpu_feeds
.
push_back
(
&
input
);
cpu_feeds
.
push_back
(
&
input
);
...
...
paddle/fluid/inference/tests/book/test_inference_image_classification.cc
浏览文件 @
7b40f7ce
...
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include <gtest/gtest.h>
#include "gflags/gflags.h"
#include "gflags/gflags.h"
#include "gtest/gtest.h"
#include "paddle/fluid/inference/tests/test_helper.h"
#include "paddle/fluid/inference/tests/test_helper.h"
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
...
@@ -35,10 +35,8 @@ TEST(inference, image_classification) {
...
@@ -35,10 +35,8 @@ TEST(inference, image_classification) {
paddle
::
framework
::
LoDTensor
input
;
paddle
::
framework
::
LoDTensor
input
;
// Use normilized image pixels as input data,
// Use normilized image pixels as input data,
// which should be in the range [0.0, 1.0].
// which should be in the range [0.0, 1.0].
SetupTensor
<
float
>
(
input
,
SetupTensor
<
float
>
(
&
input
,
{
FLAGS_batch_size
,
3
,
32
,
32
},
{
FLAGS_batch_size
,
3
,
32
,
32
},
static_cast
<
float
>
(
0
),
static_cast
<
float
>
(
1
));
static_cast
<
float
>
(
0
),
static_cast
<
float
>
(
1
));
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_feeds
;
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_feeds
;
cpu_feeds
.
push_back
(
&
input
);
cpu_feeds
.
push_back
(
&
input
);
...
@@ -48,8 +46,8 @@ TEST(inference, image_classification) {
...
@@ -48,8 +46,8 @@ TEST(inference, image_classification) {
// Run inference on CPU
// Run inference on CPU
LOG
(
INFO
)
<<
"--- CPU Runs: ---"
;
LOG
(
INFO
)
<<
"--- CPU Runs: ---"
;
TestInference
<
paddle
::
platform
::
CPUPlace
,
true
>
(
TestInference
<
paddle
::
platform
::
CPUPlace
,
true
>
(
dirname
,
cpu_feeds
,
dirname
,
cpu_feeds
,
cpu_fetchs1
,
FLAGS_repeat
);
cpu_fetchs1
,
FLAGS_repeat
);
LOG
(
INFO
)
<<
output1
.
dims
();
LOG
(
INFO
)
<<
output1
.
dims
();
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
...
@@ -59,8 +57,8 @@ TEST(inference, image_classification) {
...
@@ -59,8 +57,8 @@ TEST(inference, image_classification) {
// Run inference on CUDA GPU
// Run inference on CUDA GPU
LOG
(
INFO
)
<<
"--- GPU Runs: ---"
;
LOG
(
INFO
)
<<
"--- GPU Runs: ---"
;
TestInference
<
paddle
::
platform
::
CUDAPlace
,
true
>
(
TestInference
<
paddle
::
platform
::
CUDAPlace
,
true
>
(
dirname
,
cpu_feeds
,
dirname
,
cpu_feeds
,
cpu_fetchs2
,
FLAGS_repeat
);
cpu_fetchs2
,
FLAGS_repeat
);
LOG
(
INFO
)
<<
output2
.
dims
();
LOG
(
INFO
)
<<
output2
.
dims
();
CheckError
<
float
>
(
output1
,
output2
);
CheckError
<
float
>
(
output1
,
output2
);
...
...
paddle/fluid/inference/tests/book/test_inference_label_semantic_roles.cc
浏览文件 @
7b40f7ce
...
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include <gtest/gtest.h>
#include "gflags/gflags.h"
#include "gflags/gflags.h"
#include "gtest/gtest.h"
#include "paddle/fluid/inference/tests/test_helper.h"
#include "paddle/fluid/inference/tests/test_helper.h"
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
...
@@ -36,37 +36,21 @@ TEST(inference, label_semantic_roles) {
...
@@ -36,37 +36,21 @@ TEST(inference, label_semantic_roles) {
int64_t
predicate_dict_len
=
3162
;
int64_t
predicate_dict_len
=
3162
;
int64_t
mark_dict_len
=
2
;
int64_t
mark_dict_len
=
2
;
SetupLoDTensor
(
word
,
SetupLoDTensor
(
&
word
,
lod
,
static_cast
<
int64_t
>
(
0
),
lod
,
static_cast
<
int64_t
>
(
0
),
static_cast
<
int64_t
>
(
word_dict_len
-
1
));
static_cast
<
int64_t
>
(
word_dict_len
-
1
));
SetupLoDTensor
(
predicate
,
SetupLoDTensor
(
&
predicate
,
lod
,
static_cast
<
int64_t
>
(
0
),
lod
,
static_cast
<
int64_t
>
(
0
),
static_cast
<
int64_t
>
(
predicate_dict_len
-
1
));
static_cast
<
int64_t
>
(
predicate_dict_len
-
1
));
SetupLoDTensor
(
ctx_n2
,
SetupLoDTensor
(
&
ctx_n2
,
lod
,
static_cast
<
int64_t
>
(
0
),
lod
,
static_cast
<
int64_t
>
(
0
),
static_cast
<
int64_t
>
(
word_dict_len
-
1
));
static_cast
<
int64_t
>
(
word_dict_len
-
1
));
SetupLoDTensor
(
ctx_n1
,
SetupLoDTensor
(
&
ctx_n1
,
lod
,
static_cast
<
int64_t
>
(
0
),
lod
,
static_cast
<
int64_t
>
(
0
),
static_cast
<
int64_t
>
(
word_dict_len
-
1
));
static_cast
<
int64_t
>
(
word_dict_len
-
1
));
SetupLoDTensor
(
ctx_0
,
SetupLoDTensor
(
&
ctx_0
,
lod
,
static_cast
<
int64_t
>
(
0
),
lod
,
static_cast
<
int64_t
>
(
0
),
static_cast
<
int64_t
>
(
word_dict_len
-
1
));
static_cast
<
int64_t
>
(
word_dict_len
-
1
));
SetupLoDTensor
(
ctx_p1
,
SetupLoDTensor
(
&
ctx_p1
,
lod
,
static_cast
<
int64_t
>
(
0
),
lod
,
static_cast
<
int64_t
>
(
0
),
static_cast
<
int64_t
>
(
word_dict_len
-
1
));
static_cast
<
int64_t
>
(
word_dict_len
-
1
));
SetupLoDTensor
(
ctx_p2
,
SetupLoDTensor
(
&
ctx_p2
,
lod
,
static_cast
<
int64_t
>
(
0
),
lod
,
static_cast
<
int64_t
>
(
0
),
static_cast
<
int64_t
>
(
word_dict_len
-
1
));
static_cast
<
int64_t
>
(
word_dict_len
-
1
));
SetupLoDTensor
(
mark
,
SetupLoDTensor
(
&
mark
,
lod
,
static_cast
<
int64_t
>
(
0
),
lod
,
static_cast
<
int64_t
>
(
0
),
static_cast
<
int64_t
>
(
mark_dict_len
-
1
));
static_cast
<
int64_t
>
(
mark_dict_len
-
1
));
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_feeds
;
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_feeds
;
...
...
paddle/fluid/inference/tests/book/test_inference_recognize_digits.cc
浏览文件 @
7b40f7ce
...
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include <gtest/gtest.h>
#include "gflags/gflags.h"
#include "gflags/gflags.h"
#include "gtest/gtest.h"
#include "paddle/fluid/inference/tests/test_helper.h"
#include "paddle/fluid/inference/tests/test_helper.h"
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
...
@@ -35,10 +35,8 @@ TEST(inference, recognize_digits) {
...
@@ -35,10 +35,8 @@ TEST(inference, recognize_digits) {
paddle
::
framework
::
LoDTensor
input
;
paddle
::
framework
::
LoDTensor
input
;
// Use normilized image pixels as input data,
// Use normilized image pixels as input data,
// which should be in the range [-1.0, 1.0].
// which should be in the range [-1.0, 1.0].
SetupTensor
<
float
>
(
input
,
SetupTensor
<
float
>
(
&
input
,
{
FLAGS_batch_size
,
1
,
28
,
28
},
{
FLAGS_batch_size
,
1
,
28
,
28
},
static_cast
<
float
>
(
-
1
),
static_cast
<
float
>
(
1
));
static_cast
<
float
>
(
-
1
),
static_cast
<
float
>
(
1
));
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_feeds
;
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_feeds
;
cpu_feeds
.
push_back
(
&
input
);
cpu_feeds
.
push_back
(
&
input
);
...
@@ -49,8 +47,8 @@ TEST(inference, recognize_digits) {
...
@@ -49,8 +47,8 @@ TEST(inference, recognize_digits) {
// Run inference on CPU
// Run inference on CPU
LOG
(
INFO
)
<<
"--- CPU Runs: is_combined="
<<
is_combined
<<
" ---"
;
LOG
(
INFO
)
<<
"--- CPU Runs: is_combined="
<<
is_combined
<<
" ---"
;
TestInference
<
paddle
::
platform
::
CPUPlace
>
(
TestInference
<
paddle
::
platform
::
CPUPlace
>
(
dirname
,
cpu_feeds
,
cpu_fetchs1
,
dirname
,
cpu_feeds
,
cpu_fetchs1
,
FLAGS_repeat
,
is_combined
);
FLAGS_repeat
,
is_combined
);
LOG
(
INFO
)
<<
output1
.
dims
();
LOG
(
INFO
)
<<
output1
.
dims
();
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
...
@@ -60,8 +58,8 @@ TEST(inference, recognize_digits) {
...
@@ -60,8 +58,8 @@ TEST(inference, recognize_digits) {
// Run inference on CUDA GPU
// Run inference on CUDA GPU
LOG
(
INFO
)
<<
"--- GPU Runs: is_combined="
<<
is_combined
<<
" ---"
;
LOG
(
INFO
)
<<
"--- GPU Runs: is_combined="
<<
is_combined
<<
" ---"
;
TestInference
<
paddle
::
platform
::
CUDAPlace
>
(
TestInference
<
paddle
::
platform
::
CUDAPlace
>
(
dirname
,
cpu_feeds
,
cpu_fetchs2
,
dirname
,
cpu_feeds
,
cpu_fetchs2
,
FLAGS_repeat
,
is_combined
);
FLAGS_repeat
,
is_combined
);
LOG
(
INFO
)
<<
output2
.
dims
();
LOG
(
INFO
)
<<
output2
.
dims
();
CheckError
<
float
>
(
output1
,
output2
);
CheckError
<
float
>
(
output1
,
output2
);
...
...
paddle/fluid/inference/tests/book/test_inference_recommender_system.cc
浏览文件 @
7b40f7ce
...
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include <gtest/gtest.h>
#include "gflags/gflags.h"
#include "gflags/gflags.h"
#include "gtest/gtest.h"
#include "paddle/fluid/inference/tests/test_helper.h"
#include "paddle/fluid/inference/tests/test_helper.h"
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
...
@@ -36,25 +36,25 @@ TEST(inference, recommender_system) {
...
@@ -36,25 +36,25 @@ TEST(inference, recommender_system) {
// Use the first data from paddle.dataset.movielens.test() as input
// Use the first data from paddle.dataset.movielens.test() as input
std
::
vector
<
int64_t
>
user_id_data
=
{
1
};
std
::
vector
<
int64_t
>
user_id_data
=
{
1
};
SetupTensor
<
int64_t
>
(
user_id
,
{
batch_size
,
1
},
user_id_data
);
SetupTensor
<
int64_t
>
(
&
user_id
,
{
batch_size
,
1
},
user_id_data
);
std
::
vector
<
int64_t
>
gender_id_data
=
{
1
};
std
::
vector
<
int64_t
>
gender_id_data
=
{
1
};
SetupTensor
<
int64_t
>
(
gender_id
,
{
batch_size
,
1
},
gender_id_data
);
SetupTensor
<
int64_t
>
(
&
gender_id
,
{
batch_size
,
1
},
gender_id_data
);
std
::
vector
<
int64_t
>
age_id_data
=
{
0
};
std
::
vector
<
int64_t
>
age_id_data
=
{
0
};
SetupTensor
<
int64_t
>
(
age_id
,
{
batch_size
,
1
},
age_id_data
);
SetupTensor
<
int64_t
>
(
&
age_id
,
{
batch_size
,
1
},
age_id_data
);
std
::
vector
<
int64_t
>
job_id_data
=
{
10
};
std
::
vector
<
int64_t
>
job_id_data
=
{
10
};
SetupTensor
<
int64_t
>
(
job_id
,
{
batch_size
,
1
},
job_id_data
);
SetupTensor
<
int64_t
>
(
&
job_id
,
{
batch_size
,
1
},
job_id_data
);
std
::
vector
<
int64_t
>
movie_id_data
=
{
783
};
std
::
vector
<
int64_t
>
movie_id_data
=
{
783
};
SetupTensor
<
int64_t
>
(
movie_id
,
{
batch_size
,
1
},
movie_id_data
);
SetupTensor
<
int64_t
>
(
&
movie_id
,
{
batch_size
,
1
},
movie_id_data
);
std
::
vector
<
int64_t
>
category_id_data
=
{
10
,
8
,
9
};
std
::
vector
<
int64_t
>
category_id_data
=
{
10
,
8
,
9
};
SetupLoDTensor
<
int64_t
>
(
category_id
,
{
3
,
1
},
{{
0
,
3
}},
category_id_data
);
SetupLoDTensor
<
int64_t
>
(
&
category_id
,
{
3
,
1
},
{{
0
,
3
}},
category_id_data
);
std
::
vector
<
int64_t
>
movie_title_data
=
{
1069
,
4140
,
2923
,
710
,
988
};
std
::
vector
<
int64_t
>
movie_title_data
=
{
1069
,
4140
,
2923
,
710
,
988
};
SetupLoDTensor
<
int64_t
>
(
movie_title
,
{
5
,
1
},
{{
0
,
5
}},
movie_title_data
);
SetupLoDTensor
<
int64_t
>
(
&
movie_title
,
{
5
,
1
},
{{
0
,
5
}},
movie_title_data
);
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_feeds
;
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_feeds
;
cpu_feeds
.
push_back
(
&
user_id
);
cpu_feeds
.
push_back
(
&
user_id
);
...
...
paddle/fluid/inference/tests/book/test_inference_rnn_encoder_decoder.cc
浏览文件 @
7b40f7ce
...
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include <gtest/gtest.h>
#include "gflags/gflags.h"
#include "gflags/gflags.h"
#include "gtest/gtest.h"
#include "paddle/fluid/inference/tests/test_helper.h"
#include "paddle/fluid/inference/tests/test_helper.h"
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
...
@@ -32,10 +32,10 @@ TEST(inference, rnn_encoder_decoder) {
...
@@ -32,10 +32,10 @@ TEST(inference, rnn_encoder_decoder) {
paddle
::
framework
::
LoDTensor
word_data
,
trg_word
;
paddle
::
framework
::
LoDTensor
word_data
,
trg_word
;
paddle
::
framework
::
LoD
lod
{{
0
,
4
,
10
}};
paddle
::
framework
::
LoD
lod
{{
0
,
4
,
10
}};
SetupLoDTensor
(
SetupLoDTensor
(
&
word_data
,
lod
,
static_cast
<
int64_t
>
(
0
),
word_data
,
lod
,
static_cast
<
int64_t
>
(
0
),
static_cast
<
int64_t
>
(
1
));
static_cast
<
int64_t
>
(
1
));
SetupLoDTensor
(
SetupLoDTensor
(
&
trg_word
,
lod
,
static_cast
<
int64_t
>
(
0
),
trg_word
,
lod
,
static_cast
<
int64_t
>
(
0
),
static_cast
<
int64_t
>
(
1
));
static_cast
<
int64_t
>
(
1
));
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_feeds
;
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_feeds
;
cpu_feeds
.
push_back
(
&
word_data
);
cpu_feeds
.
push_back
(
&
word_data
);
...
...
paddle/fluid/inference/tests/book/test_inference_understand_sentiment.cc
浏览文件 @
7b40f7ce
...
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include <gtest/gtest.h>
#include "gflags/gflags.h"
#include "gflags/gflags.h"
#include "gtest/gtest.h"
#include "paddle/fluid/inference/tests/test_helper.h"
#include "paddle/fluid/inference/tests/test_helper.h"
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
...
@@ -33,9 +33,7 @@ TEST(inference, understand_sentiment) {
...
@@ -33,9 +33,7 @@ TEST(inference, understand_sentiment) {
paddle
::
framework
::
LoD
lod
{{
0
,
4
,
10
}};
paddle
::
framework
::
LoD
lod
{{
0
,
4
,
10
}};
int64_t
word_dict_len
=
5147
;
int64_t
word_dict_len
=
5147
;
SetupLoDTensor
(
words
,
SetupLoDTensor
(
&
words
,
lod
,
static_cast
<
int64_t
>
(
0
),
lod
,
static_cast
<
int64_t
>
(
0
),
static_cast
<
int64_t
>
(
word_dict_len
-
1
));
static_cast
<
int64_t
>
(
word_dict_len
-
1
));
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_feeds
;
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_feeds
;
...
...
paddle/fluid/inference/tests/book/test_inference_word2vec.cc
浏览文件 @
7b40f7ce
...
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include <gtest/gtest.h>
#include "gflags/gflags.h"
#include "gflags/gflags.h"
#include "gtest/gtest.h"
#include "paddle/fluid/inference/tests/test_helper.h"
#include "paddle/fluid/inference/tests/test_helper.h"
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
...
@@ -33,10 +33,10 @@ TEST(inference, word2vec) {
...
@@ -33,10 +33,10 @@ TEST(inference, word2vec) {
paddle
::
framework
::
LoD
lod
{{
0
,
1
}};
paddle
::
framework
::
LoD
lod
{{
0
,
1
}};
int64_t
dict_size
=
2073
;
// The size of dictionary
int64_t
dict_size
=
2073
;
// The size of dictionary
SetupLoDTensor
(
first_word
,
lod
,
static_cast
<
int64_t
>
(
0
),
dict_size
-
1
);
SetupLoDTensor
(
&
first_word
,
lod
,
static_cast
<
int64_t
>
(
0
),
dict_size
-
1
);
SetupLoDTensor
(
second_word
,
lod
,
static_cast
<
int64_t
>
(
0
),
dict_size
-
1
);
SetupLoDTensor
(
&
second_word
,
lod
,
static_cast
<
int64_t
>
(
0
),
dict_size
-
1
);
SetupLoDTensor
(
third_word
,
lod
,
static_cast
<
int64_t
>
(
0
),
dict_size
-
1
);
SetupLoDTensor
(
&
third_word
,
lod
,
static_cast
<
int64_t
>
(
0
),
dict_size
-
1
);
SetupLoDTensor
(
fourth_word
,
lod
,
static_cast
<
int64_t
>
(
0
),
dict_size
-
1
);
SetupLoDTensor
(
&
fourth_word
,
lod
,
static_cast
<
int64_t
>
(
0
),
dict_size
-
1
);
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_feeds
;
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_feeds
;
cpu_feeds
.
push_back
(
&
first_word
);
cpu_feeds
.
push_back
(
&
first_word
);
...
...
paddle/fluid/inference/tests/test_helper.h
浏览文件 @
7b40f7ce
...
@@ -11,59 +11,59 @@ distributed under the License is distributed on an "AS IS" BASIS,
...
@@ -11,59 +11,59 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#include <map>
#include <random>
#include <string>
#include <vector>
#include <time.h>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/io.h"
#include "paddle/fluid/inference/io.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
template
<
typename
T
>
template
<
typename
T
>
void
SetupTensor
(
paddle
::
framework
::
LoDTensor
&
input
,
void
SetupTensor
(
paddle
::
framework
::
LoDTensor
*
input
,
paddle
::
framework
::
DDim
dims
,
paddle
::
framework
::
DDim
dims
,
T
lower
,
T
upper
)
{
T
lower
,
std
::
mt19937
rng
(
100
);
// An arbitrarily chosen but fixed seed.
T
upper
)
{
std
::
uniform_real_distribution
<
double
>
uniform_dist
(
0
,
1
);
srand
(
time
(
0
));
T
*
input_ptr
=
input
.
mutable_data
<
T
>
(
dims
,
paddle
::
platform
::
CPUPlace
());
T
*
input_ptr
=
input
->
mutable_data
<
T
>
(
dims
,
paddle
::
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
input
.
numel
();
++
i
)
{
for
(
int
i
=
0
;
i
<
input
->
numel
();
++
i
)
{
input_ptr
[
i
]
=
input_ptr
[
i
]
=
static_cast
<
T
>
(
uniform_dist
(
rng
)
*
(
upper
-
lower
)
+
lower
);
(
static_cast
<
T
>
(
rand
())
/
static_cast
<
T
>
(
RAND_MAX
))
*
(
upper
-
lower
)
+
lower
;
}
}
}
}
template
<
typename
T
>
template
<
typename
T
>
void
SetupTensor
(
paddle
::
framework
::
LoDTensor
&
input
,
void
SetupTensor
(
paddle
::
framework
::
LoDTensor
*
input
,
paddle
::
framework
::
DDim
dims
,
paddle
::
framework
::
DDim
dims
,
const
std
::
vector
<
T
>&
data
)
{
std
::
vector
<
T
>&
data
)
{
CHECK_EQ
(
paddle
::
framework
::
product
(
dims
),
static_cast
<
int64_t
>
(
data
.
size
()));
CHECK_EQ
(
paddle
::
framework
::
product
(
dims
),
static_cast
<
int64_t
>
(
data
.
size
()));
T
*
input_ptr
=
input
.
mutable_data
<
T
>
(
dims
,
paddle
::
platform
::
CPUPlace
());
T
*
input_ptr
=
input
->
mutable_data
<
T
>
(
dims
,
paddle
::
platform
::
CPUPlace
());
memcpy
(
input_ptr
,
data
.
data
(),
input
.
numel
()
*
sizeof
(
T
));
memcpy
(
input_ptr
,
data
.
data
(),
input
->
numel
()
*
sizeof
(
T
));
}
}
template
<
typename
T
>
template
<
typename
T
>
void
SetupLoDTensor
(
paddle
::
framework
::
LoDTensor
&
input
,
void
SetupLoDTensor
(
paddle
::
framework
::
LoDTensor
*
input
,
paddle
::
framework
::
LoD
&
lod
,
const
paddle
::
framework
::
LoD
&
lod
,
T
lower
,
T
upper
)
{
T
lower
,
input
->
set_lod
(
lod
);
T
upper
)
{
input
.
set_lod
(
lod
);
int
dim
=
lod
[
0
][
lod
[
0
].
size
()
-
1
];
int
dim
=
lod
[
0
][
lod
[
0
].
size
()
-
1
];
SetupTensor
<
T
>
(
input
,
{
dim
,
1
},
lower
,
upper
);
SetupTensor
<
T
>
(
input
,
{
dim
,
1
},
lower
,
upper
);
}
}
template
<
typename
T
>
template
<
typename
T
>
void
SetupLoDTensor
(
paddle
::
framework
::
LoDTensor
&
input
,
void
SetupLoDTensor
(
paddle
::
framework
::
LoDTensor
*
input
,
paddle
::
framework
::
DDim
dims
,
paddle
::
framework
::
DDim
dims
,
paddle
::
framework
::
LoD
lod
,
const
paddle
::
framework
::
LoD
lod
,
std
::
vector
<
T
>&
data
)
{
const
std
::
vector
<
T
>&
data
)
{
const
size_t
level
=
lod
.
size
()
-
1
;
const
size_t
level
=
lod
.
size
()
-
1
;
CHECK_EQ
(
dims
[
0
],
static_cast
<
int64_t
>
((
lod
[
level
]).
back
()));
CHECK_EQ
(
dims
[
0
],
static_cast
<
int64_t
>
((
lod
[
level
]).
back
()));
input
.
set_lod
(
lod
);
input
->
set_lod
(
lod
);
SetupTensor
<
T
>
(
input
,
dims
,
data
);
SetupTensor
<
T
>
(
input
,
dims
,
data
);
}
}
template
<
typename
T
>
template
<
typename
T
>
void
CheckError
(
paddle
::
framework
::
LoDTensor
&
output1
,
void
CheckError
(
const
paddle
::
framework
::
LoDTensor
&
output1
,
paddle
::
framework
::
LoDTensor
&
output2
)
{
const
paddle
::
framework
::
LoDTensor
&
output2
)
{
// Check lod information
// Check lod information
EXPECT_EQ
(
output1
.
lod
(),
output2
.
lod
());
EXPECT_EQ
(
output1
.
lod
(),
output2
.
lod
());
...
@@ -91,9 +91,8 @@ void CheckError(paddle::framework::LoDTensor& output1,
...
@@ -91,9 +91,8 @@ void CheckError(paddle::framework::LoDTensor& output1,
template
<
typename
Place
,
bool
PrepareContext
=
false
>
template
<
typename
Place
,
bool
PrepareContext
=
false
>
void
TestInference
(
const
std
::
string
&
dirname
,
void
TestInference
(
const
std
::
string
&
dirname
,
const
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>&
cpu_feeds
,
const
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>&
cpu_feeds
,
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>&
cpu_fetchs
,
const
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>&
cpu_fetchs
,
const
int
repeat
=
1
,
const
int
repeat
=
1
,
const
bool
is_combined
=
false
)
{
const
bool
is_combined
=
false
)
{
// 1. Define place, executor, scope
// 1. Define place, executor, scope
auto
place
=
Place
();
auto
place
=
Place
();
auto
executor
=
paddle
::
framework
::
Executor
(
place
);
auto
executor
=
paddle
::
framework
::
Executor
(
place
);
...
@@ -132,11 +131,9 @@ void TestInference(const std::string& dirname,
...
@@ -132,11 +131,9 @@ void TestInference(const std::string& dirname,
// `fluid.io.save_inference_model`.
// `fluid.io.save_inference_model`.
std
::
string
prog_filename
=
"__model_combined__"
;
std
::
string
prog_filename
=
"__model_combined__"
;
std
::
string
param_filename
=
"__params_combined__"
;
std
::
string
param_filename
=
"__params_combined__"
;
inference_program
=
inference_program
=
paddle
::
inference
::
Load
(
paddle
::
inference
::
Load
(
executor
,
executor
,
*
scope
,
dirname
+
"/"
+
prog_filename
,
*
scope
,
dirname
+
"/"
+
param_filename
);
dirname
+
"/"
+
prog_filename
,
dirname
+
"/"
+
param_filename
);
}
else
{
}
else
{
// Parameters are saved in separate files sited in the specified
// Parameters are saved in separate files sited in the specified
// `dirname`.
// `dirname`.
...
@@ -173,8 +170,8 @@ void TestInference(const std::string& dirname,
...
@@ -173,8 +170,8 @@ void TestInference(const std::string& dirname,
std
::
unique_ptr
<
paddle
::
framework
::
ExecutorPrepareContext
>
ctx
;
std
::
unique_ptr
<
paddle
::
framework
::
ExecutorPrepareContext
>
ctx
;
if
(
PrepareContext
)
{
if
(
PrepareContext
)
{
ctx
=
executor
.
Prepare
(
*
inference_program
,
0
);
ctx
=
executor
.
Prepare
(
*
inference_program
,
0
);
executor
.
RunPreparedContext
(
executor
.
RunPreparedContext
(
ctx
.
get
(),
scope
,
feed_targets
,
ctx
.
get
(),
scope
,
feed_targets
,
fetch_targets
);
fetch_targets
);
}
else
{
}
else
{
executor
.
Run
(
*
inference_program
,
scope
,
feed_targets
,
fetch_targets
);
executor
.
Run
(
*
inference_program
,
scope
,
feed_targets
,
fetch_targets
);
}
}
...
@@ -191,8 +188,8 @@ void TestInference(const std::string& dirname,
...
@@ -191,8 +188,8 @@ void TestInference(const std::string& dirname,
if
(
PrepareContext
)
{
if
(
PrepareContext
)
{
// Note: if you changed the inference_program, you need to call
// Note: if you changed the inference_program, you need to call
// executor.Prepare() again to get a new ExecutorPrepareContext.
// executor.Prepare() again to get a new ExecutorPrepareContext.
executor
.
RunPreparedContext
(
executor
.
RunPreparedContext
(
ctx
.
get
(),
scope
,
feed_targets
,
ctx
.
get
(),
scope
,
feed_targets
,
fetch_targets
);
fetch_targets
);
}
else
{
}
else
{
executor
.
Run
(
*
inference_program
,
scope
,
feed_targets
,
fetch_targets
);
executor
.
Run
(
*
inference_program
,
scope
,
feed_targets
,
fetch_targets
);
}
}
...
...
paddle/fluid/memory/.clang-format
已删除
100644 → 0
浏览文件 @
3a825782
---
Language: Cpp
BasedOnStyle: Google
Standard: Cpp11
...
paddle/fluid/memory/memory.cc
浏览文件 @
7b40f7ce
...
@@ -95,7 +95,7 @@ void* Alloc<platform::CUDAPlace>(platform::CUDAPlace place, size_t size) {
...
@@ -95,7 +95,7 @@ void* Alloc<platform::CUDAPlace>(platform::CUDAPlace place, size_t size) {
int
cur_dev
=
platform
::
GetCurrentDeviceId
();
int
cur_dev
=
platform
::
GetCurrentDeviceId
();
platform
::
SetDeviceId
(
place
.
device
);
platform
::
SetDeviceId
(
place
.
device
);
size_t
avail
,
total
;
size_t
avail
,
total
;
platform
::
GpuMemoryUsage
(
avail
,
total
);
platform
::
GpuMemoryUsage
(
&
avail
,
&
total
);
LOG
(
WARNING
)
<<
"Cannot allocate "
<<
size
<<
" bytes in GPU "
LOG
(
WARNING
)
<<
"Cannot allocate "
<<
size
<<
" bytes in GPU "
<<
place
.
device
<<
", available "
<<
avail
<<
" bytes"
;
<<
place
.
device
<<
", available "
<<
avail
<<
" bytes"
;
LOG
(
WARNING
)
<<
"total "
<<
total
;
LOG
(
WARNING
)
<<
"total "
<<
total
;
...
...
paddle/fluid/memory/memory_test.cc
浏览文件 @
7b40f7ce
...
@@ -13,16 +13,16 @@ See the License for the specific language governing permissions and
...
@@ -13,16 +13,16 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/memory/memory.h"
#include "paddle/fluid/memory/memory.h"
#include <unordered_map>
#include "gtest/gtest.h"
#include "paddle/fluid/memory/detail/memory_block.h"
#include "paddle/fluid/memory/detail/memory_block.h"
#include "paddle/fluid/memory/detail/meta_data.h"
#include "paddle/fluid/memory/detail/meta_data.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/place.h"
#include <gtest/gtest.h>
#include <unordered_map>
inline
bool
is_aligned
(
void
const
*
p
)
{
inline
bool
is_aligned
(
void
const
*
p
)
{
return
0
==
(
reinterpret_cast
<
uintptr_t
>
(
p
)
&
0x3
);
return
0
==
(
reinterpret_cast
<
uintptr_t
>
(
p
)
&
0x3
);
}
}
...
...
paddle/fluid/operators/.clang-format
已删除
100644 → 0
浏览文件 @
3a825782
---
Language: Cpp
BasedOnStyle: Google
Standard: Cpp11
...
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
7b40f7ce
...
@@ -3,8 +3,8 @@ string(REPLACE "_mkldnn" "" GENERAL_OPS "${GENERAL_OPS}")
...
@@ -3,8 +3,8 @@ string(REPLACE "_mkldnn" "" GENERAL_OPS "${GENERAL_OPS}")
string
(
REPLACE
".cc"
""
GENERAL_OPS
"
${
GENERAL_OPS
}
"
)
string
(
REPLACE
".cc"
""
GENERAL_OPS
"
${
GENERAL_OPS
}
"
)
list
(
REMOVE_DUPLICATES GENERAL_OPS
)
list
(
REMOVE_DUPLICATES GENERAL_OPS
)
set
(
DEPS_OPS
""
)
set
(
DEPS_OPS
""
)
set
(
pybind_file
${
PADDLE_
SOURCE
_DIR
}
/paddle/fluid/pybind/pybind.h
)
set
(
pybind_file
${
PADDLE_
BINARY
_DIR
}
/paddle/fluid/pybind/pybind.h
)
file
(
WRITE
${
pybind_file
}
"// Generated by the paddle/operator/CMakeLists.txt. DO NOT EDIT!
\n\n
"
)
file
(
WRITE
${
pybind_file
}
"// Generated by the paddle/
fluid/
operator/CMakeLists.txt. DO NOT EDIT!
\n\n
"
)
function
(
op_library TARGET
)
function
(
op_library TARGET
)
# op_library is a function to create op library. The interface is same as
# op_library is a function to create op library. The interface is same as
# cc_library. But it handle split GPU/CPU code and link some common library
# cc_library. But it handle split GPU/CPU code and link some common library
...
...
paddle/fluid/operators/conv_cudnn_op.cu.cc
浏览文件 @
7b40f7ce
...
@@ -128,10 +128,32 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
...
@@ -128,10 +128,32 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
handle
,
cudnn_input_desc
,
cudnn_filter_desc
,
cudnn_conv_desc
,
handle
,
cudnn_input_desc
,
cudnn_filter_desc
,
cudnn_conv_desc
,
cudnn_output_desc
,
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT
,
cudnn_output_desc
,
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT
,
workspace_size_limit
,
&
algo
));
workspace_size_limit
,
&
algo
));
#if CUDA_VERSION >= 9000 && CUDNN_VERSION_MIN(7, 0, 1)
// Tensor core is supported since the volta GPU and
// is only enabled when input and filter data are float16
if
(
dev_ctx
.
GetComputeCapability
()
>=
70
&&
std
::
type_index
(
typeid
(
T
))
==
std
::
type_index
(
typeid
(
platform
::
float16
)))
{
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnSetConvolutionMathType
(
cudnn_conv_desc
,
CUDNN_TENSOR_OP_MATH
));
// Currently tensor core is only enabled using this algo
algo
=
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
;
}
else
{
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnSetConvolutionMathType
(
cudnn_conv_desc
,
CUDNN_DEFAULT_MATH
));
}
#endif
// get workspace size able to allocate
// get workspace size able to allocate
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnGetConvolutionForwardWorkspaceSize
(
PADDLE_ENFORCE
(
platform
::
dynload
::
cudnnGetConvolutionForwardWorkspaceSize
(
handle
,
cudnn_input_desc
,
cudnn_filter_desc
,
cudnn_conv_desc
,
handle
,
cudnn_input_desc
,
cudnn_filter_desc
,
cudnn_conv_desc
,
cudnn_output_desc
,
algo
,
&
workspace_size_in_bytes
));
cudnn_output_desc
,
algo
,
&
workspace_size_in_bytes
));
// It is possible for float16 on Volta GPU to allocate more memory than
// the limit because the algo is overrided to use tensor core.
PADDLE_ENFORCE_LE
(
workspace_size_in_bytes
,
workspace_size_limit
,
"workspace_size to be allocated exceeds the limit"
);
// Allocate on GPU memory
// Allocate on GPU memory
platform
::
CUDAPlace
gpu
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
());
platform
::
CUDAPlace
gpu
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
());
cudnn_workspace
=
paddle
::
memory
::
Alloc
(
gpu
,
workspace_size_in_bytes
);
cudnn_workspace
=
paddle
::
memory
::
Alloc
(
gpu
,
workspace_size_in_bytes
);
...
...
paddle/fluid/operators/fc_mkldnn_op.cc
浏览文件 @
7b40f7ce
...
@@ -27,8 +27,8 @@ template <typename T>
...
@@ -27,8 +27,8 @@ template <typename T>
class
MKLDNNMD
{
class
MKLDNNMD
{
public:
public:
explicit
MKLDNNMD
(
const
T
*
in
,
const
T
*
w
,
bool
bias
)
explicit
MKLDNNMD
(
const
T
*
in
,
const
T
*
w
,
bool
bias
)
:
in
{
paddle
::
framework
::
vectorize2int
(
in
->
dims
())}
,
:
in
(
paddle
::
framework
::
vectorize2int
(
in
->
dims
()))
,
w
{
paddle
::
framework
::
vectorize2int
(
w
->
dims
())}
{
w
(
paddle
::
framework
::
vectorize2int
(
w
->
dims
()))
{
with_bias_
=
bias
;
with_bias_
=
bias
;
}
}
...
@@ -78,7 +78,7 @@ class MKLDNNMD {
...
@@ -78,7 +78,7 @@ class MKLDNNMD {
class
MKLDNNMemory
{
class
MKLDNNMemory
{
public:
public:
MKLDNNMemory
(
MKLDNNMD
<
Tensor
>*
t
,
const
mkldnn
::
engine
&
e
)
MKLDNNMemory
(
MKLDNNMD
<
Tensor
>*
t
,
const
mkldnn
::
engine
&
e
)
:
md_
{
t
},
engine_
{
e
}
{}
:
md_
(
t
),
engine_
(
e
)
{}
virtual
~
MKLDNNMemory
()
=
default
;
virtual
~
MKLDNNMemory
()
=
default
;
template
<
typename
Output
>
template
<
typename
Output
>
...
...
paddle/fluid/operators/lookup_table_op.cc
浏览文件 @
7b40f7ce
...
@@ -18,22 +18,6 @@ limitations under the License. */
...
@@ -18,22 +18,6 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
static
inline
framework
::
OpKernelType
ExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
{
auto
*
table_var
=
ctx
.
InputVar
(
"W"
);
if
(
table_var
->
IsType
<
LoDTensor
>
())
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
table_var
->
Get
<
LoDTensor
>
().
type
()),
ctx
.
device_context
());
}
else
if
(
table_var
->
IsType
<
SelectedRows
>
())
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
table_var
->
Get
<
SelectedRows
>
().
value
().
type
()),
ctx
.
device_context
());
}
else
{
PADDLE_THROW
(
"W should be LoDTensor or SelectedRows"
);
}
}
class
LookupTableOp
:
public
framework
::
OperatorWithKernel
{
class
LookupTableOp
:
public
framework
::
OperatorWithKernel
{
public:
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
...
@@ -67,7 +51,8 @@ class LookupTableOp : public framework::OperatorWithKernel {
...
@@ -67,7 +51,8 @@ class LookupTableOp : public framework::OperatorWithKernel {
protected:
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
ExpectedKernelType
(
ctx
);
auto
data_type
=
framework
::
GetDataTypeOfVar
(
ctx
.
InputVar
(
"W"
));
return
framework
::
OpKernelType
(
data_type
,
ctx
.
device_context
());
}
}
};
};
...
@@ -138,7 +123,8 @@ class LookupTableOpGrad : public framework::OperatorWithKernel {
...
@@ -138,7 +123,8 @@ class LookupTableOpGrad : public framework::OperatorWithKernel {
protected:
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
ExpectedKernelType
(
ctx
);
auto
data_type
=
framework
::
GetDataTypeOfVar
(
ctx
.
InputVar
(
"W"
));
return
framework
::
OpKernelType
(
data_type
,
ctx
.
device_context
());
}
}
};
};
...
...
paddle/fluid/operators/lookup_table_op.h
浏览文件 @
7b40f7ce
...
@@ -30,13 +30,7 @@ using LoDTensor = framework::LoDTensor;
...
@@ -30,13 +30,7 @@ using LoDTensor = framework::LoDTensor;
using
SelectedRows
=
framework
::
SelectedRows
;
using
SelectedRows
=
framework
::
SelectedRows
;
using
DDim
=
framework
::
DDim
;
using
DDim
=
framework
::
DDim
;
static
constexpr
int64_t
kNoPadding
=
-
1
;
constexpr
int64_t
kNoPadding
=
-
1
;
inline
size_t
getIndex
(
const
std
::
vector
<
int64_t
>
&
rows
,
int64_t
value
)
{
auto
it
=
std
::
find
(
rows
.
begin
(),
rows
.
end
(),
value
);
PADDLE_ENFORCE
(
it
!=
rows
.
end
(),
"id should be in rows"
);
return
static_cast
<
size_t
>
(
std
::
distance
(
rows
.
begin
(),
it
));
}
template
<
typename
T
>
template
<
typename
T
>
class
LookupTableKernel
:
public
framework
::
OpKernel
<
T
>
{
class
LookupTableKernel
:
public
framework
::
OpKernel
<
T
>
{
...
@@ -55,7 +49,9 @@ class LookupTableKernel : public framework::OpKernel<T> {
...
@@ -55,7 +49,9 @@ class LookupTableKernel : public framework::OpKernel<T> {
auto
*
table_t
=
context
.
Input
<
SelectedRows
>
(
"W"
);
auto
*
table_t
=
context
.
Input
<
SelectedRows
>
(
"W"
);
table_dim
=
table_t
->
value
().
dims
();
table_dim
=
table_t
->
value
().
dims
();
}
else
{
}
else
{
PADDLE_THROW
(
"table only support LoDTensor and SelectedRows"
);
PADDLE_THROW
(
"The parameter W of a LookupTable "
"must be either LoDTensor or SelectedRows"
);
}
}
int64_t
*
ids
;
int64_t
*
ids
;
...
@@ -107,7 +103,7 @@ class LookupTableKernel : public framework::OpKernel<T> {
...
@@ -107,7 +103,7 @@ class LookupTableKernel : public framework::OpKernel<T> {
memset
(
output
+
i
*
row_width
,
0
,
row_width
*
sizeof
(
T
));
memset
(
output
+
i
*
row_width
,
0
,
row_width
*
sizeof
(
T
));
}
else
{
}
else
{
PADDLE_ENFORCE_GE
(
ids
[
i
],
0
);
PADDLE_ENFORCE_GE
(
ids
[
i
],
0
);
auto
id_index
=
getIndex
(
table_t
.
rows
(),
ids
[
i
]);
auto
id_index
=
table_t
.
index
(
ids
[
i
]);
memcpy
(
output
+
i
*
row_width
,
table
+
id_index
*
row_width
,
memcpy
(
output
+
i
*
row_width
,
table
+
id_index
*
row_width
,
row_width
*
sizeof
(
T
));
row_width
*
sizeof
(
T
));
}
}
...
@@ -128,7 +124,9 @@ class LookupTableGradKernel : public framework::OpKernel<T> {
...
@@ -128,7 +124,9 @@ class LookupTableGradKernel : public framework::OpKernel<T> {
auto
*
table_t
=
context
.
Input
<
SelectedRows
>
(
"W"
);
auto
*
table_t
=
context
.
Input
<
SelectedRows
>
(
"W"
);
table_dim
=
table_t
->
value
().
dims
();
table_dim
=
table_t
->
value
().
dims
();
}
else
{
}
else
{
PADDLE_THROW
(
"table only support LoDTensor and SelectedRows"
);
PADDLE_THROW
(
"The parameter W of a LookupTable "
"must be either LoDTensor or SelectedRows"
);
}
}
bool
is_sparse
=
context
.
Attr
<
bool
>
(
"is_sparse"
);
bool
is_sparse
=
context
.
Attr
<
bool
>
(
"is_sparse"
);
...
...
paddle/fluid/operators/math/math_function.cu
浏览文件 @
7b40f7ce
...
@@ -39,18 +39,33 @@ void gemm<platform::CUDADeviceContext, float16>(
...
@@ -39,18 +39,33 @@ void gemm<platform::CUDADeviceContext, float16>(
cublasOperation_t
cuTransB
=
cublasOperation_t
cuTransB
=
(
transB
==
CblasNoTrans
)
?
CUBLAS_OP_N
:
CUBLAS_OP_T
;
(
transB
==
CblasNoTrans
)
?
CUBLAS_OP_N
:
CUBLAS_OP_T
;
const
half
h_alpha
=
static_cast
<
const
half
>
(
alpha
);
float
h_alpha
=
static_cast
<
float
>
(
alpha
);
const
half
h_beta
=
static_cast
<
const
half
>
(
beta
);
float
h_beta
=
static_cast
<
float
>
(
beta
);
const
half
*
h_A
=
reinterpret_cast
<
const
half
*>
(
A
);
const
half
*
h_B
=
reinterpret_cast
<
const
half
*>
(
B
);
half
*
h_C
=
reinterpret_cast
<
half
*>
(
C
);
// TODO(kexinzhao): add processing code for compute capability < 53 case
// TODO(kexinzhao): add processing code for compute capability < 53 case
PADDLE_ENFORCE_GE
(
context
.
GetComputeCapability
(),
53
,
PADDLE_ENFORCE_GE
(
context
.
GetComputeCapability
(),
53
,
"cublas Hgemm requires GPU compute capability >= 53"
);
"cublas fp16 gemm requires GPU compute capability >= 53"
);
PADDLE_ENFORCE
(
platform
::
dynload
::
cublasHgemm
(
context
.
cublas_handle
(),
cuTransB
,
cuTransA
,
N
,
M
,
K
,
&
h_alpha
,
h_B
,
ldb
,
cublasGemmAlgo_t
algo
=
CUBLAS_GEMM_DFALT
;
h_A
,
lda
,
&
h_beta
,
h_C
,
N
));
#if CUDA_VERSION >= 9000
if
(
context
.
GetComputeCapability
()
>=
70
)
{
PADDLE_ENFORCE
(
platform
::
dynload
::
cublasSetMathMode
(
context
.
cublas_handle
(),
CUBLAS_TENSOR_OP_MATH
));
algo
=
CUBLAS_GEMM_DFALT_TENSOR_OP
;
}
else
{
PADDLE_ENFORCE
(
platform
::
dynload
::
cublasSetMathMode
(
context
.
cublas_handle
(),
CUBLAS_DEFAULT_MATH
));
}
#endif
// cublasHgemm does true FP16 computation which is slow for non-Volta
// GPUs. So use cublasGemmEx instead which does pesudo FP16 computation:
// input/output in fp16, computation in fp32, which can also be accelerated
// using tensor cores in volta GPUs.
PADDLE_ENFORCE
(
platform
::
dynload
::
cublasGemmEx
(
context
.
cublas_handle
(),
cuTransB
,
cuTransA
,
N
,
M
,
K
,
&
h_alpha
,
B
,
CUDA_R_16F
,
ldb
,
A
,
CUDA_R_16F
,
lda
,
&
h_beta
,
C
,
CUDA_R_16F
,
N
,
CUDA_R_32F
,
algo
));
}
}
template
<
>
template
<
>
...
...
paddle/fluid/operators/math/softmax.cu
浏览文件 @
7b40f7ce
...
@@ -14,6 +14,8 @@ limitations under the License. */
...
@@ -14,6 +14,8 @@ limitations under the License. */
#define EIGEN_USE_GPU
#define EIGEN_USE_GPU
#include <vector>
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/softmax.h"
#include "paddle/fluid/operators/math/softmax.h"
#include "paddle/fluid/operators/math/softmax_impl.h"
#include "paddle/fluid/operators/math/softmax_impl.h"
...
@@ -95,6 +97,7 @@ template class SoftmaxCUDNNFunctor<double>;
...
@@ -95,6 +97,7 @@ template class SoftmaxCUDNNFunctor<double>;
template
class
SoftmaxGradCUDNNFunctor
<
float
>;
template
class
SoftmaxGradCUDNNFunctor
<
float
>;
template
class
SoftmaxGradCUDNNFunctor
<
double
>;
template
class
SoftmaxGradCUDNNFunctor
<
double
>;
template
class
SoftmaxFunctor
<
platform
::
CUDADeviceContext
,
platform
::
float16
>;
template
class
SoftmaxFunctor
<
platform
::
CUDADeviceContext
,
float
>;
template
class
SoftmaxFunctor
<
platform
::
CUDADeviceContext
,
float
>;
template
class
SoftmaxFunctor
<
platform
::
CUDADeviceContext
,
double
>;
template
class
SoftmaxFunctor
<
platform
::
CUDADeviceContext
,
double
>;
template
class
SoftmaxGradFunctor
<
platform
::
CUDADeviceContext
,
float
>;
template
class
SoftmaxGradFunctor
<
platform
::
CUDADeviceContext
,
float
>;
...
...
paddle/fluid/operators/math/softmax_impl.h
浏览文件 @
7b40f7ce
...
@@ -27,7 +27,7 @@ using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
...
@@ -27,7 +27,7 @@ using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
template
<
typename
T
>
template
<
typename
T
>
struct
ValueClip
{
struct
ValueClip
{
HOSTDEVICE
T
operator
()(
const
T
&
x
)
const
{
HOSTDEVICE
T
operator
()(
const
T
&
x
)
const
{
const
T
kThreshold
=
-
64.
;
const
T
kThreshold
=
static_cast
<
T
>
(
-
64.
)
;
return
x
<
kThreshold
?
kThreshold
:
x
;
return
x
<
kThreshold
?
kThreshold
:
x
;
}
}
};
};
...
...
paddle/fluid/operators/prior_box_op.cc
浏览文件 @
7b40f7ce
...
@@ -73,7 +73,7 @@ class PriorBoxOp : public framework::OperatorWithKernel {
...
@@ -73,7 +73,7 @@ class PriorBoxOp : public framework::OperatorWithKernel {
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
Tensor
>
(
"Input"
)
->
type
()),
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
Tensor
>
(
"Input"
)
->
type
()),
platform
::
CPUPlace
());
ctx
.
device_context
());
}
}
};
};
...
@@ -171,6 +171,5 @@ namespace ops = paddle::operators;
...
@@ -171,6 +171,5 @@ namespace ops = paddle::operators;
REGISTER_OPERATOR
(
prior_box
,
ops
::
PriorBoxOp
,
ops
::
PriorBoxOpMaker
,
REGISTER_OPERATOR
(
prior_box
,
ops
::
PriorBoxOp
,
ops
::
PriorBoxOpMaker
,
paddle
::
framework
::
EmptyGradOpMaker
);
paddle
::
framework
::
EmptyGradOpMaker
);
REGISTER_OP_CPU_KERNEL
(
REGISTER_OP_CPU_KERNEL
(
prior_box
,
ops
::
PriorBoxOpKernel
<
float
>
,
prior_box
,
ops
::
PriorBoxOpKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
PriorBoxOpKernel
<
double
>
);
ops
::
PriorBoxOpKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
);
paddle/fluid/operators/prior_box_op.cu
0 → 100644
浏览文件 @
7b40f7ce
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/prior_box_op.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
T
>
__device__
inline
T
clip
(
T
in
)
{
return
min
(
max
(
in
,
0.
),
1.
);
}
template
<
typename
T
>
__global__
void
GenPriorBox
(
T
*
out
,
const
T
*
aspect_ratios
,
const
int
height
,
const
int
width
,
const
int
im_height
,
const
int
im_width
,
const
int
as_num
,
const
T
offset
,
const
T
step_width
,
const
T
step_height
,
const
T
*
min_sizes
,
const
T
*
max_sizes
,
const
int
min_num
,
bool
is_clip
)
{
int
num_priors
=
max_sizes
?
as_num
*
min_num
+
min_num
:
as_num
*
min_num
;
int
box_num
=
height
*
width
*
num_priors
;
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
box_num
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
h
=
i
/
(
num_priors
*
width
);
int
w
=
(
i
/
num_priors
)
%
width
;
int
p
=
i
%
num_priors
;
int
m
=
max_sizes
?
p
/
(
as_num
+
1
)
:
p
/
as_num
;
T
cx
=
(
w
+
offset
)
*
step_width
;
T
cy
=
(
h
+
offset
)
*
step_height
;
T
bw
,
bh
;
T
min_size
=
min_sizes
[
m
];
if
(
max_sizes
)
{
int
s
=
p
%
(
as_num
+
1
);
if
(
s
<
as_num
)
{
T
ar
=
aspect_ratios
[
s
];
bw
=
min_size
*
sqrt
(
ar
)
/
2.
;
bh
=
min_size
/
sqrt
(
ar
)
/
2.
;
}
else
{
T
max_size
=
max_sizes
[
m
];
bw
=
sqrt
(
min_size
*
max_size
)
/
2.
;
bh
=
bw
;
}
}
else
{
int
s
=
p
%
as_num
;
T
ar
=
aspect_ratios
[
s
];
bw
=
min_size
*
sqrt
(
ar
)
/
2.
;
bh
=
min_size
/
sqrt
(
ar
)
/
2.
;
}
T
xmin
=
(
cx
-
bw
)
/
im_width
;
T
ymin
=
(
cy
-
bh
)
/
im_height
;
T
xmax
=
(
cx
+
bw
)
/
im_width
;
T
ymax
=
(
cy
+
bh
)
/
im_height
;
out
[
i
*
4
]
=
is_clip
?
clip
<
T
>
(
xmin
)
:
xmin
;
out
[
i
*
4
+
1
]
=
is_clip
?
clip
<
T
>
(
ymin
)
:
ymin
;
out
[
i
*
4
+
2
]
=
is_clip
?
clip
<
T
>
(
xmax
)
:
xmax
;
out
[
i
*
4
+
3
]
=
is_clip
?
clip
<
T
>
(
ymax
)
:
ymax
;
}
}
template
<
typename
T
>
__global__
void
SetVariance
(
T
*
out
,
const
T
*
var
,
const
int
vnum
,
const
int
num
)
{
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
num
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
out
[
i
]
=
var
[
i
%
vnum
];
}
}
template
<
typename
T
>
class
PriorBoxOpCUDAKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
input
=
ctx
.
Input
<
paddle
::
framework
::
Tensor
>
(
"Input"
);
auto
*
image
=
ctx
.
Input
<
paddle
::
framework
::
Tensor
>
(
"Image"
);
auto
*
boxes
=
ctx
.
Output
<
paddle
::
framework
::
Tensor
>
(
"Boxes"
);
auto
*
vars
=
ctx
.
Output
<
paddle
::
framework
::
Tensor
>
(
"Variances"
);
auto
min_sizes
=
ctx
.
Attr
<
std
::
vector
<
float
>>
(
"min_sizes"
);
auto
max_sizes
=
ctx
.
Attr
<
std
::
vector
<
float
>>
(
"max_sizes"
);
auto
input_aspect_ratio
=
ctx
.
Attr
<
std
::
vector
<
float
>>
(
"aspect_ratios"
);
auto
variances
=
ctx
.
Attr
<
std
::
vector
<
float
>>
(
"variances"
);
auto
flip
=
ctx
.
Attr
<
bool
>
(
"flip"
);
auto
clip
=
ctx
.
Attr
<
bool
>
(
"clip"
);
std
::
vector
<
float
>
aspect_ratios
;
ExpandAspectRatios
(
input_aspect_ratio
,
flip
,
aspect_ratios
);
T
step_w
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"step_w"
));
T
step_h
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"step_h"
));
T
offset
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"offset"
));
auto
im_width
=
image
->
dims
()[
3
];
auto
im_height
=
image
->
dims
()[
2
];
auto
width
=
input
->
dims
()[
3
];
auto
height
=
input
->
dims
()[
2
];
T
step_width
,
step_height
;
if
(
step_w
==
0
||
step_h
==
0
)
{
step_width
=
static_cast
<
T
>
(
im_width
)
/
width
;
step_height
=
static_cast
<
T
>
(
im_height
)
/
height
;
}
else
{
step_width
=
step_w
;
step_height
=
step_h
;
}
int
num_priors
=
aspect_ratios
.
size
()
*
min_sizes
.
size
();
if
(
max_sizes
.
size
()
>
0
)
{
num_priors
+=
max_sizes
.
size
();
}
int
min_num
=
static_cast
<
int
>
(
min_sizes
.
size
());
int
box_num
=
width
*
height
*
num_priors
;
int
block
=
512
;
int
grid
=
(
box_num
+
block
-
1
)
/
block
;
auto
stream
=
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>().
stream
();
boxes
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
vars
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
framework
::
Tensor
r
;
framework
::
TensorFromVector
(
aspect_ratios
,
ctx
.
device_context
(),
&
r
);
framework
::
Tensor
min
;
framework
::
TensorFromVector
(
min_sizes
,
ctx
.
device_context
(),
&
min
);
T
*
max_data
=
nullptr
;
framework
::
Tensor
max
;
if
(
max_sizes
.
size
()
>
0
)
{
framework
::
TensorFromVector
(
max_sizes
,
ctx
.
device_context
(),
&
max
);
max_data
=
max
.
data
<
T
>
();
}
GenPriorBox
<
T
><<<
grid
,
block
,
0
,
stream
>>>
(
boxes
->
data
<
T
>
(),
r
.
data
<
T
>
(),
height
,
width
,
im_height
,
im_width
,
aspect_ratios
.
size
(),
offset
,
step_width
,
step_height
,
min
.
data
<
T
>
(),
max_data
,
min_num
,
clip
);
framework
::
Tensor
v
;
framework
::
TensorFromVector
(
variances
,
ctx
.
device_context
(),
&
v
);
grid
=
(
box_num
*
4
+
block
-
1
)
/
block
;
SetVariance
<
T
><<<
grid
,
block
,
0
,
stream
>>>
(
vars
->
data
<
T
>
(),
v
.
data
<
T
>
(),
variances
.
size
(),
box_num
*
4
);
}
};
// namespace operators
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
prior_box
,
ops
::
PriorBoxOpCUDAKernel
<
float
>
,
ops
::
PriorBoxOpCUDAKernel
<
double
>
);
paddle/fluid/operators/prior_box_op.h
浏览文件 @
7b40f7ce
...
@@ -51,7 +51,7 @@ struct ClipFunctor {
...
@@ -51,7 +51,7 @@ struct ClipFunctor {
}
}
};
};
template
<
typename
Place
,
typename
T
>
template
<
typename
T
>
class
PriorBoxOpKernel
:
public
framework
::
OpKernel
<
T
>
{
class
PriorBoxOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
...
@@ -106,49 +106,24 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
...
@@ -106,49 +106,24 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
int
idx
=
0
;
int
idx
=
0
;
for
(
size_t
s
=
0
;
s
<
min_sizes
.
size
();
++
s
)
{
for
(
size_t
s
=
0
;
s
<
min_sizes
.
size
();
++
s
)
{
auto
min_size
=
min_sizes
[
s
];
auto
min_size
=
min_sizes
[
s
];
// first prior: aspect_ratio = 1, size = min_size
// priors with different aspect ratios
box_width
=
box_height
=
min_size
/
2.
;
for
(
size_t
r
=
0
;
r
<
aspect_ratios
.
size
();
++
r
)
{
// xmin
float
ar
=
aspect_ratios
[
r
];
e_boxes
(
h
,
w
,
idx
,
0
)
=
(
center_x
-
box_width
)
/
img_width
;
box_width
=
min_size
*
sqrt
(
ar
)
/
2.
;
// ymin
box_height
=
min_size
/
sqrt
(
ar
)
/
2.
;
e_boxes
(
h
,
w
,
idx
,
1
)
=
(
center_y
-
box_height
)
/
img_height
;
// xmax
e_boxes
(
h
,
w
,
idx
,
2
)
=
(
center_x
+
box_width
)
/
img_width
;
// ymax
e_boxes
(
h
,
w
,
idx
,
3
)
=
(
center_y
+
box_height
)
/
img_height
;
idx
++
;
if
(
max_sizes
.
size
()
>
0
)
{
auto
max_size
=
max_sizes
[
s
];
// second prior: aspect_ratio = 1,
// size = sqrt(min_size * max_size)
box_width
=
box_height
=
sqrt
(
min_size
*
max_size
)
/
2.
;
// xmin
e_boxes
(
h
,
w
,
idx
,
0
)
=
(
center_x
-
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
0
)
=
(
center_x
-
box_width
)
/
img_width
;
// ymin
e_boxes
(
h
,
w
,
idx
,
1
)
=
(
center_y
-
box_height
)
/
img_height
;
e_boxes
(
h
,
w
,
idx
,
1
)
=
(
center_y
-
box_height
)
/
img_height
;
// xmax
e_boxes
(
h
,
w
,
idx
,
2
)
=
(
center_x
+
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
2
)
=
(
center_x
+
box_width
)
/
img_width
;
// ymax
e_boxes
(
h
,
w
,
idx
,
3
)
=
(
center_y
+
box_height
)
/
img_height
;
e_boxes
(
h
,
w
,
idx
,
3
)
=
(
center_y
+
box_height
)
/
img_height
;
idx
++
;
idx
++
;
}
}
if
(
max_sizes
.
size
()
>
0
)
{
// rest of priors
auto
max_size
=
max_sizes
[
s
];
for
(
size_t
r
=
0
;
r
<
aspect_ratios
.
size
();
++
r
)
{
// square prior with size sqrt(minSize * maxSize)
float
ar
=
aspect_ratios
[
r
];
box_width
=
box_height
=
sqrt
(
min_size
*
max_size
)
/
2.
;
if
(
fabs
(
ar
-
1.
)
<
1e-6
)
{
continue
;
}
box_width
=
min_size
*
sqrt
(
ar
)
/
2.
;
box_height
=
min_size
/
sqrt
(
ar
)
/
2.
;
// xmin
e_boxes
(
h
,
w
,
idx
,
0
)
=
(
center_x
-
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
0
)
=
(
center_x
-
box_width
)
/
img_width
;
// ymin
e_boxes
(
h
,
w
,
idx
,
1
)
=
(
center_y
-
box_height
)
/
img_height
;
e_boxes
(
h
,
w
,
idx
,
1
)
=
(
center_y
-
box_height
)
/
img_height
;
// xmax
e_boxes
(
h
,
w
,
idx
,
2
)
=
(
center_x
+
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
2
)
=
(
center_x
+
box_width
)
/
img_width
;
// ymax
e_boxes
(
h
,
w
,
idx
,
3
)
=
(
center_y
+
box_height
)
/
img_height
;
e_boxes
(
h
,
w
,
idx
,
3
)
=
(
center_y
+
box_height
)
/
img_height
;
idx
++
;
idx
++
;
}
}
...
...
paddle/fluid/operators/sgd_op.cc
浏览文件 @
7b40f7ce
...
@@ -43,9 +43,8 @@ class SGDOp : public framework::OperatorWithKernel {
...
@@ -43,9 +43,8 @@ class SGDOp : public framework::OperatorWithKernel {
protected:
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
auto
data_type
=
framework
::
GetDataTypeOfVar
(
ctx
.
InputVar
(
"Param"
));
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Param"
)
->
type
()),
return
framework
::
OpKernelType
(
data_type
,
ctx
.
device_context
());
ctx
.
GetPlace
());
}
}
};
};
...
@@ -53,10 +52,12 @@ class SGDOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -53,10 +52,12 @@ class SGDOpMaker : public framework::OpProtoAndCheckerMaker {
public:
public:
SGDOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
SGDOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"Param"
,
"(Tensor) Input parameter"
);
AddInput
(
"Param"
,
"(Tensor
or SelectedRows
) Input parameter"
);
AddInput
(
"LearningRate"
,
"(Tensor) Learning rate of SGD"
);
AddInput
(
"LearningRate"
,
"(Tensor) Learning rate of SGD"
);
AddInput
(
"Grad"
,
"(Tensor) Input gradient"
);
AddInput
(
"Grad"
,
"(Tensor or SelectedRows) Input gradient"
);
AddOutput
(
"ParamOut"
,
"(Tensor) Output parameter"
);
AddOutput
(
"ParamOut"
,
"(Tensor or SelectedRows, same with Param) "
"Output parameter, should share the same memory with Param"
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
SGD operator
SGD operator
...
...
paddle/fluid/operators/sgd_op.h
浏览文件 @
7b40f7ce
...
@@ -23,60 +23,97 @@ namespace operators {
...
@@ -23,60 +23,97 @@ namespace operators {
template
<
typename
T
>
template
<
typename
T
>
class
SGDOpKernel
:
public
framework
::
OpKernel
<
T
>
{
class
SGDOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
param
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Param"
);
const
auto
*
learning_rate
=
ctx
.
Input
<
framework
::
Tensor
>
(
"LearningRate"
);
auto
*
param_out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
*
learning_rate
=
ctx
.
Input
<
framework
::
Tensor
>
(
"LearningRate"
);
const
auto
*
param_var
=
ctx
.
InputVar
(
"Param"
);
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
// Actually, all tensors are LoDTensor except SelectedRows.
if
(
param_var
->
IsType
<
framework
::
LoDTensor
>
())
{
if
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
())
{
const
auto
*
param
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Param"
);
param_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
*
param_out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
*
grad
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Grad"
);
// Actually, all tensors are LoDTensor except SelectedRows.
auto
p
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
param
);
if
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
())
{
auto
g
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
grad
);
param_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
o
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
param_out
);
const
auto
*
grad
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Grad"
);
auto
*
lr
=
learning_rate
->
data
<
T
>
();
auto
p
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
param
);
o
=
p
-
lr
[
0
]
*
g
;
auto
g
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
grad
);
}
else
if
(
grad_var
->
IsType
<
framework
::
SelectedRows
>
())
{
auto
o
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
param_out
);
// TODO(qijun): In Sparse SGD operator, in-place update is enforced.
auto
*
lr
=
learning_rate
->
data
<
T
>
();
// This manual optimization brings difficulty to track data dependency.
// It's better to find a more elegant solution.
o
=
p
-
lr
[
0
]
*
g
;
PADDLE_ENFORCE_EQ
(
param
,
param_out
);
}
else
if
(
grad_var
->
IsType
<
framework
::
SelectedRows
>
())
{
auto
*
grad
=
ctx
.
Input
<
framework
::
SelectedRows
>
(
"Grad"
);
// TODO(qijun): In Sparse SGD operator, in-place update is enforced.
// This manual optimization brings difficulty to track data dependency.
// It's better to find a more elegant solution.
PADDLE_ENFORCE_EQ
(
param
,
param_out
);
const
auto
*
grad
=
ctx
.
Input
<
framework
::
SelectedRows
>
(
"Grad"
);
// for distributed training, a sparse var may be empty,
// just skip updating.
if
(
grad
->
rows
().
size
()
==
0
)
{
return
;
}
auto
grad_height
=
grad
->
height
();
auto
out_dims
=
param_out
->
dims
();
PADDLE_ENFORCE_EQ
(
grad_height
,
out_dims
[
0
]);
auto
&
grad_value
=
grad
->
value
();
auto
&
grad_rows
=
grad
->
rows
();
size_t
grad_row_numel
=
grad_value
.
numel
()
/
grad_rows
.
size
();
PADDLE_ENFORCE_EQ
(
grad_row_numel
,
param_out
->
numel
()
/
grad_height
);
auto
*
grad_data
=
grad_value
.
data
<
T
>
();
auto
*
out_data
=
param_out
->
data
<
T
>
();
auto
*
lr
=
learning_rate
->
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
grad_rows
.
size
();
i
++
)
{
PADDLE_ENFORCE
(
grad_rows
[
i
]
<
grad_height
,
"Input rows index should less than height"
);
for
(
int64_t
j
=
0
;
j
<
grad_row_numel
;
j
++
)
{
out_data
[
grad_rows
[
i
]
*
grad_row_numel
+
j
]
-=
lr
[
0
]
*
grad_data
[
i
*
grad_row_numel
+
j
];
}
}
}
else
{
PADDLE_THROW
(
"Unsupported Variable Type of Grad"
);
}
}
else
if
(
param_var
->
IsType
<
framework
::
SelectedRows
>
())
{
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
SelectedRows
>
(),
"when param "
"is SelectedRows, gradient should also be SelectedRows"
);
const
auto
&
param
=
param_var
->
Get
<
framework
::
SelectedRows
>
();
auto
*
param_out
=
ctx
.
Output
<
framework
::
SelectedRows
>
(
"ParamOut"
);
const
auto
&
grad
=
grad_var
->
Get
<
framework
::
SelectedRows
>
();
// for distributed training, a sparse var may be empty,
// for distributed training, a sparse var may be empty,
// just skip updating.
// just skip updating.
if
(
grad
->
rows
().
size
()
==
0
)
{
if
(
grad
.
rows
().
size
()
==
0
)
{
return
;
return
;
}
}
auto
in_height
=
grad
->
height
();
size_t
param_row_width
=
param
.
value
().
numel
()
/
param
.
rows
().
size
();
auto
out_dims
=
param_out
->
dims
();
size_t
grad_row_width
=
grad
.
value
().
numel
()
/
grad
.
rows
().
size
();
PADDLE_ENFORCE_EQ
(
in_height
,
out_dims
[
0
]);
PADDLE_ENFORCE_EQ
(
param_row_width
,
grad_row_width
,
"param_row should have the same size with grad_row"
);
auto
&
in_value
=
grad
->
value
();
auto
&
in_rows
=
grad
->
rows
();
int64_t
in_row_numel
=
in_value
.
numel
()
/
in_rows
.
size
();
const
auto
*
lr
=
learning_rate
->
data
<
T
>
();
PADDLE_ENFORCE_EQ
(
in_row_numel
,
param_out
->
numel
()
/
in_height
);
const
auto
*
grad_data
=
grad
.
value
().
data
<
T
>
();
auto
*
out_data
=
param_out
->
mutable_value
()
->
data
<
T
>
();
auto
*
in_data
=
in_value
.
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
grad
.
rows
().
size
();
i
++
)
{
auto
*
out_data
=
param_out
->
data
<
T
>
();
PADDLE_ENFORCE
(
grad
.
rows
()[
i
]
<
grad
.
height
(),
auto
*
lr
=
learning_rate
->
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
in_rows
.
size
();
i
++
)
{
PADDLE_ENFORCE
(
in_rows
[
i
]
<
in_height
,
"Input rows index should less than height"
);
"Input rows index should less than height"
);
for
(
int64_t
j
=
0
;
j
<
in_row_numel
;
j
++
)
{
int64_t
id_index
=
param
.
index
(
grad
.
rows
()[
i
]);
out_data
[
in_rows
[
i
]
*
in_row_numel
+
j
]
-=
for
(
int64_t
j
=
0
;
j
<
grad_row_width
;
j
++
)
{
lr
[
0
]
*
in_data
[
i
*
in_row_numel
+
j
];
out_data
[
id_index
*
grad_row_width
+
j
]
-=
lr
[
0
]
*
grad_data
[
i
*
grad_row_width
+
j
];
}
}
}
}
}
else
{
}
else
{
PADDLE_THROW
(
"Unsupported Variable Type of
Grad
"
);
PADDLE_THROW
(
"Unsupported Variable Type of
Parameter
"
);
}
}
}
}
};
};
...
...
paddle/fluid/operators/softmax_op.cc
浏览文件 @
7b40f7ce
...
@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
...
@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/operators/softmax_op.h"
#include "paddle/fluid/operators/softmax_op.h"
#include <string>
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/cudnn_helper.h"
#include "paddle/fluid/platform/cudnn_helper.h"
#endif
#endif
...
@@ -20,6 +23,7 @@ limitations under the License. */
...
@@ -20,6 +23,7 @@ limitations under the License. */
#ifdef PADDLE_WITH_MKLDNN
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
#endif
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -60,8 +64,8 @@ class SoftmaxOp : public framework::OperatorWithKernel {
...
@@ -60,8 +64,8 @@ class SoftmaxOp : public framework::OperatorWithKernel {
auto
input_data_type
=
auto
input_data_type
=
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
());
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
());
if
(
input_data_type
==
framework
::
proto
::
VarType
::
FP16
)
{
if
(
input_data_type
==
framework
::
proto
::
VarType
::
FP16
)
{
PADDLE_ENFORCE
_EQ
(
library_
,
framework
::
LibraryType
::
kCUDNN
,
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
())
,
"float16 can only be used when CUDNN is used
"
);
"float16 can only be used on GPU place
"
);
}
}
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
...
@@ -70,6 +74,7 @@ class SoftmaxOp : public framework::OperatorWithKernel {
...
@@ -70,6 +74,7 @@ class SoftmaxOp : public framework::OperatorWithKernel {
library_
);
library_
);
}
}
};
};
class
SoftmaxOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
class
SoftmaxOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
public:
SoftmaxOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
SoftmaxOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
...
...
paddle/fluid/operators/softmax_op.cu.cc
浏览文件 @
7b40f7ce
...
@@ -13,11 +13,12 @@ See the License for the specific language governing permissions and
...
@@ -13,11 +13,12 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/operators/softmax_op.h"
#include "paddle/fluid/operators/softmax_op.h"
#include "paddle/fluid/platform/float16.h"
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_CUDA_KERNEL
(
softmax
,
ops
::
SoftmaxKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
);
REGISTER_OP_CUDA_KERNEL
(
REGISTER_OP_CUDA_KERNEL
(
softmax_grad
,
softmax
,
ops
::
SoftmaxKernel
<
plat
::
CUDADeviceContext
,
float
>
,
ops
::
SoftmaxGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
);
ops
::
SoftmaxKernel
<
plat
::
CUDADeviceContext
,
plat
::
float16
>
);
REGISTER_OP_CUDA_KERNEL
(
softmax_grad
,
ops
::
SoftmaxGradKernel
<
plat
::
CUDADeviceContext
,
float
>
);
paddle/fluid/platform/.clang-format
已删除
100644 → 0
浏览文件 @
3a825782
---
Language: Cpp
BasedOnStyle: Google
Standard: Cpp11
...
paddle/fluid/platform/CMakeLists.txt
浏览文件 @
7b40f7ce
...
@@ -6,8 +6,8 @@ add_custom_target(profiler_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch _
...
@@ -6,8 +6,8 @@ add_custom_target(profiler_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch _
add_dependencies
(
profiler_py_proto profiler_py_proto_init
)
add_dependencies
(
profiler_py_proto profiler_py_proto_init
)
add_custom_command
(
TARGET profiler_py_proto POST_BUILD
add_custom_command
(
TARGET profiler_py_proto POST_BUILD
COMMAND
${
CMAKE_COMMAND
}
-E make_directory
${
PADDLE_
SOURCE
_DIR
}
/python/paddle/fluid/proto/profiler
COMMAND
${
CMAKE_COMMAND
}
-E make_directory
${
PADDLE_
BINARY
_DIR
}
/python/paddle/fluid/proto/profiler
COMMAND cp *.py
${
PADDLE_
SOURCE
_DIR
}
/python/paddle/fluid/proto/profiler
COMMAND cp *.py
${
PADDLE_
BINARY
_DIR
}
/python/paddle/fluid/proto/profiler
COMMENT
"Copy generated python proto into directory paddle/fluid/proto/profiler."
COMMENT
"Copy generated python proto into directory paddle/fluid/proto/profiler."
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
...
...
paddle/fluid/platform/cpu_info_test.cc
浏览文件 @
7b40f7ce
...
@@ -12,7 +12,6 @@
...
@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/string/printf.h"
#include <ostream>
#include <ostream>
#include <sstream>
#include <sstream>
...
@@ -20,6 +19,7 @@
...
@@ -20,6 +19,7 @@
#include "gflags/gflags.h"
#include "gflags/gflags.h"
#include "glog/logging.h"
#include "glog/logging.h"
#include "gtest/gtest.h"
#include "gtest/gtest.h"
#include "paddle/fluid/string/printf.h"
DECLARE_double
(
fraction_of_cpu_memory_to_use
);
DECLARE_double
(
fraction_of_cpu_memory_to_use
);
...
...
paddle/fluid/platform/cudnn_helper.h
浏览文件 @
7b40f7ce
...
@@ -257,9 +257,11 @@ class ScopedConvolutionDescriptor {
...
@@ -257,9 +257,11 @@ class ScopedConvolutionDescriptor {
}
}
#endif
#endif
cudnnDataType_t
compute_type
=
(
type
==
CUDNN_DATA_DOUBLE
)
?
CUDNN_DATA_DOUBLE
:
CUDNN_DATA_FLOAT
;
PADDLE_ENFORCE
(
dynload
::
cudnnSetConvolutionNdDescriptor
(
PADDLE_ENFORCE
(
dynload
::
cudnnSetConvolutionNdDescriptor
(
desc_
,
pads
.
size
(),
pads
.
data
(),
strides
.
data
(),
dilations
.
data
(),
desc_
,
pads
.
size
(),
pads
.
data
(),
strides
.
data
(),
dilations
.
data
(),
CUDNN_CROSS_CORRELATION
,
type
));
CUDNN_CROSS_CORRELATION
,
compute_
type
));
return
desc_
;
return
desc_
;
}
}
...
...
paddle/fluid/platform/dynload/cublas.cc
浏览文件 @
7b40f7ce
...
@@ -24,6 +24,10 @@ void *cublas_dso_handle = nullptr;
...
@@ -24,6 +24,10 @@ void *cublas_dso_handle = nullptr;
CUBLAS_BLAS_ROUTINE_EACH
(
DEFINE_WRAP
);
CUBLAS_BLAS_ROUTINE_EACH
(
DEFINE_WRAP
);
#ifdef CUBLAS_BLAS_ROUTINE_EACH_R2
CUBLAS_BLAS_ROUTINE_EACH_R2
(
DEFINE_WRAP
);
#endif
}
// namespace dynload
}
// namespace dynload
}
// namespace platform
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
paddle/fluid/platform/dynload/cublas.h
浏览文件 @
7b40f7ce
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#pragma once
#include <cublas_v2.h>
#include <cublas_v2.h>
#include <cuda.h>
#include <dlfcn.h>
#include <dlfcn.h>
#include <mutex>
#include <mutex>
// NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -34,18 +35,18 @@ extern void *cublas_dso_handle;
...
@@ -34,18 +35,18 @@ extern void *cublas_dso_handle;
* note: default dynamic linked libs
* note: default dynamic linked libs
*/
*/
#ifdef PADDLE_USE_DSO
#ifdef PADDLE_USE_DSO
#define DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(__name) \
#define DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(__name)
\
struct DynLoad__##__name { \
struct DynLoad__##__name {
\
template <typename... Args> \
template <typename... Args>
\
inline cublasStatus_t operator()(Args... args) { \
inline cublasStatus_t operator()(Args... args) {
\
typedef cublasStatus_t (*cublasFunc)(Args...); \
typedef cublasStatus_t (*cublasFunc)(Args...);
\
std::call_once(cublas_dso_flag, \
std::call_once(cublas_dso_flag,
[]() {
\
paddle::platform::dynload::GetCublasDsoHandle,
\
cublas_dso_handle = paddle::platform::dynload::GetCublasDsoHandle();
\
&cublas_dso_handle);
\
});
\
void *p_##__name = dlsym(cublas_dso_handle, #__name); \
void *p_##__name = dlsym(cublas_dso_handle, #__name);
\
return reinterpret_cast<cublasFunc>(p_##__name)(args...); \
return reinterpret_cast<cublasFunc>(p_##__name)(args...);
\
} \
}
\
}; \
};
\
extern DynLoad__##__name __name
extern DynLoad__##__name __name
#else
#else
#define DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(__name) \
#define DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(__name) \
...
@@ -70,6 +71,7 @@ extern void *cublas_dso_handle;
...
@@ -70,6 +71,7 @@ extern void *cublas_dso_handle;
__macro(cublasDgemm_v2); \
__macro(cublasDgemm_v2); \
__macro(cublasHgemm); \
__macro(cublasHgemm); \
__macro(cublasSgemmEx); \
__macro(cublasSgemmEx); \
__macro(cublasGemmEx); \
__macro(cublasSgeam_v2); \
__macro(cublasSgeam_v2); \
__macro(cublasDgeam_v2); \
__macro(cublasDgeam_v2); \
__macro(cublasCreate_v2); \
__macro(cublasCreate_v2); \
...
@@ -89,9 +91,15 @@ extern void *cublas_dso_handle;
...
@@ -89,9 +91,15 @@ extern void *cublas_dso_handle;
__macro(cublasSgetrfBatched); \
__macro(cublasSgetrfBatched); \
__macro(cublasSgetriBatched); \
__macro(cublasSgetriBatched); \
__macro(cublasDgetrfBatched); \
__macro(cublasDgetrfBatched); \
__macro(cublasDgetriBatched)
__macro(cublasDgetriBatched)
;
CUBLAS_BLAS_ROUTINE_EACH
(
DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP
);
CUBLAS_BLAS_ROUTINE_EACH
(
DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP
)
// APIs available after CUDA 9.0
#if CUDA_VERSION >= 9000
#define CUBLAS_BLAS_ROUTINE_EACH_R2(__macro) __macro(cublasSetMathMode);
CUBLAS_BLAS_ROUTINE_EACH_R2
(
DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP
)
#endif
#undef DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP
#undef DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP
}
// namespace dynload
}
// namespace dynload
...
...
paddle/fluid/platform/dynload/cudnn.cc
浏览文件 @
7b40f7ce
...
@@ -44,7 +44,8 @@ CUDNN_DNN_ROUTINE_EACH_R7(DEFINE_WRAP);
...
@@ -44,7 +44,8 @@ CUDNN_DNN_ROUTINE_EACH_R7(DEFINE_WRAP);
#ifdef PADDLE_USE_DSO
#ifdef PADDLE_USE_DSO
bool
HasCUDNN
()
{
bool
HasCUDNN
()
{
std
::
call_once
(
cudnn_dso_flag
,
GetCUDNNDsoHandle
,
&
cudnn_dso_handle
);
std
::
call_once
(
cudnn_dso_flag
,
[]()
{
cudnn_dso_handle
=
GetCUDNNDsoHandle
();
});
return
cudnn_dso_handle
!=
nullptr
;
return
cudnn_dso_handle
!=
nullptr
;
}
}
...
...
paddle/fluid/platform/dynload/cudnn.h
浏览文件 @
7b40f7ce
...
@@ -16,7 +16,7 @@ limitations under the License. */
...
@@ -16,7 +16,7 @@ limitations under the License. */
#include <cudnn.h>
#include <cudnn.h>
#include <dlfcn.h>
#include <dlfcn.h>
#include <mutex>
#include <mutex>
// NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -30,19 +30,19 @@ extern bool HasCUDNN();
...
@@ -30,19 +30,19 @@ extern bool HasCUDNN();
#ifdef PADDLE_USE_DSO
#ifdef PADDLE_USE_DSO
extern
void
EnforceCUDNNLoaded
(
const
char
*
fn_name
);
extern
void
EnforceCUDNNLoaded
(
const
char
*
fn_name
);
#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name) \
#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name)
\
struct DynLoad__##__name { \
struct DynLoad__##__name {
\
template <typename... Args> \
template <typename... Args>
\
auto operator()(Args... args) -> decltype(__name(args...)) { \
auto operator()(Args... args) -> decltype(__name(args...)) {
\
using cudnn_func = decltype(__name(args...)) (*)(Args...); \
using cudnn_func = decltype(__name(args...)) (*)(Args...);
\
std::call_once(cudnn_dso_flag, \
std::call_once(cudnn_dso_flag,
[]() {
\
paddle::platform::dynload::GetCUDNNDsoHandle,
\
cudnn_dso_handle = paddle::platform::dynload::GetCUDNNDsoHandle();
\
&cudnn_dso_handle);
\
});
\
EnforceCUDNNLoaded(#__name); \
EnforceCUDNNLoaded(#__name);
\
void* p_##__name = dlsym(cudnn_dso_handle, #__name); \
void* p_##__name = dlsym(cudnn_dso_handle, #__name);
\
return reinterpret_cast<cudnn_func>(p_##__name)(args...); \
return reinterpret_cast<cudnn_func>(p_##__name)(args...);
\
} \
}
\
}; \
};
\
extern struct DynLoad__##__name __name
extern struct DynLoad__##__name __name
#else
#else
...
@@ -140,7 +140,8 @@ CUDNN_DNN_ROUTINE_EACH_R5(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
...
@@ -140,7 +140,8 @@ CUDNN_DNN_ROUTINE_EACH_R5(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#if CUDNN_VERSION >= 7001
#if CUDNN_VERSION >= 7001
#define CUDNN_DNN_ROUTINE_EACH_R7(__macro) \
#define CUDNN_DNN_ROUTINE_EACH_R7(__macro) \
__macro(cudnnSetConvolutionGroupCount);
__macro(cudnnSetConvolutionGroupCount); \
__macro(cudnnSetConvolutionMathType);
CUDNN_DNN_ROUTINE_EACH_R7
(
DECLARE_DYNAMIC_LOAD_CUDNN_WRAP
)
CUDNN_DNN_ROUTINE_EACH_R7
(
DECLARE_DYNAMIC_LOAD_CUDNN_WRAP
)
#endif
#endif
...
...
paddle/fluid/platform/dynload/cupti.h
浏览文件 @
7b40f7ce
...
@@ -11,14 +11,15 @@ distributed under the License is distributed on an "AS IS" BASIS,
...
@@ -11,14 +11,15 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#pragma once
#ifdef PADDLE_WITH_CUPTI
#ifdef PADDLE_WITH_CUPTI
#include <cuda.h>
#include <cuda.h>
#include <cupti.h>
#include <cupti.h>
#include <dlfcn.h>
#include <dlfcn.h>
#include <mutex>
#include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -36,18 +37,18 @@ extern void *cupti_dso_handle;
...
@@ -36,18 +37,18 @@ extern void *cupti_dso_handle;
* note: default dynamic linked libs
* note: default dynamic linked libs
*/
*/
#ifdef PADDLE_USE_DSO
#ifdef PADDLE_USE_DSO
#define DECLARE_DYNAMIC_LOAD_CUPTI_WRAP(__name) \
#define DECLARE_DYNAMIC_LOAD_CUPTI_WRAP(__name)
\
struct DynLoad__##__name { \
struct DynLoad__##__name {
\
template <typename... Args> \
template <typename... Args>
\
inline CUptiResult CUPTIAPI operator()(Args... args) { \
inline CUptiResult CUPTIAPI operator()(Args... args) {
\
typedef CUptiResult CUPTIAPI (*cuptiFunc)(Args...); \
typedef CUptiResult CUPTIAPI (*cuptiFunc)(Args...);
\
std::call_once(cupti_dso_flag, \
std::call_once(cupti_dso_flag,
[]() {
\
paddle::platform::dynload::GetCUPTIDsoHandle,
\
cupti_dso_handle = paddle::platform::dynload::GetCUPTIDsoHandle();
\
&cupti_dso_handle);
\
});
\
void *p_##__name = dlsym(cupti_dso_handle, #__name); \
void *p_##__name = dlsym(cupti_dso_handle, #__name);
\
return reinterpret_cast<cuptiFunc>(p_##__name)(args...); \
return reinterpret_cast<cuptiFunc>(p_##__name)(args...);
\
} \
}
\
}; \
};
\
extern DynLoad__##__name __name
extern DynLoad__##__name __name
#else
#else
#define DECLARE_DYNAMIC_LOAD_CUPTI_WRAP(__name) \
#define DECLARE_DYNAMIC_LOAD_CUPTI_WRAP(__name) \
...
...
paddle/fluid/platform/dynload/curand.h
浏览文件 @
7b40f7ce
...
@@ -11,12 +11,13 @@ distributed under the License is distributed on an "AS IS" BASIS,
...
@@ -11,12 +11,13 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#pragma once
#include <curand.h>
#include <curand.h>
#include <dlfcn.h>
#include <dlfcn.h>
#include <mutex>
#include <mutex> // NOLINT
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -25,18 +26,18 @@ namespace dynload {
...
@@ -25,18 +26,18 @@ namespace dynload {
extern
std
::
once_flag
curand_dso_flag
;
extern
std
::
once_flag
curand_dso_flag
;
extern
void
*
curand_dso_handle
;
extern
void
*
curand_dso_handle
;
#ifdef PADDLE_USE_DSO
#ifdef PADDLE_USE_DSO
#define DECLARE_DYNAMIC_LOAD_CURAND_WRAP(__name) \
#define DECLARE_DYNAMIC_LOAD_CURAND_WRAP(__name)
\
struct DynLoad__##__name { \
struct DynLoad__##__name {
\
template <typename... Args> \
template <typename... Args>
\
curandStatus_t operator()(Args... args) { \
curandStatus_t operator()(Args... args) {
\
typedef curandStatus_t (*curandFunc)(Args...); \
typedef curandStatus_t (*curandFunc)(Args...);
\
std::call_once(curand_dso_flag, \
std::call_once(curand_dso_flag,
[]() {
\
paddle::platform::dynload::GetCurandDsoHandle,
\
curand_dso_handle = paddle::platform::dynload::GetCurandDsoHandle();
\
&curand_dso_handle);
\
});
\
void *p_##__name = dlsym(curand_dso_handle, #__name); \
void *p_##__name = dlsym(curand_dso_handle, #__name);
\
return reinterpret_cast<curandFunc>(p_##__name)(args...); \
return reinterpret_cast<curandFunc>(p_##__name)(args...);
\
} \
}
\
}; \
};
\
extern DynLoad__##__name __name
extern DynLoad__##__name __name
#else
#else
#define DECLARE_DYNAMIC_LOAD_CURAND_WRAP(__name) \
#define DECLARE_DYNAMIC_LOAD_CURAND_WRAP(__name) \
...
...
paddle/fluid/platform/dynload/dynamic_loader.cc
浏览文件 @
7b40f7ce
...
@@ -11,12 +11,14 @@ distributed under the License is distributed on an "AS IS" BASIS,
...
@@ -11,12 +11,14 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include <dlfcn.h>
#include <dlfcn.h>
#include <memory>
#include <memory>
#include <mutex>
#include <mutex>
// NOLINT
#include <string>
#include <string>
#include "gflags/gflags.h"
#include "gflags/gflags.h"
#include "glog/logging.h"
#include "glog/logging.h"
#include "paddle/fluid/platform/dynload/cupti_lib_path.h"
#include "paddle/fluid/platform/dynload/cupti_lib_path.h"
...
@@ -65,22 +67,21 @@ static inline std::string join(const std::string& part1,
...
@@ -65,22 +67,21 @@ static inline std::string join(const std::string& part1,
return
ret
;
return
ret
;
}
}
static
inline
void
GetDsoHandleFromDefaultPath
(
std
::
string
&
dso_path
,
static
inline
void
*
GetDsoHandleFromDefaultPath
(
const
std
::
string
&
dso_path
,
void
**
dso_handle
,
int
dynload_flags
)
{
int
dynload_flags
)
{
VLOG
(
3
)
<<
"Try to find library: "
<<
dso_path
VLOG
(
3
)
<<
"Try to find library: "
<<
dso_path
<<
" from default system path."
;
<<
" from default system path."
;
// default search from LD_LIBRARY_PATH/DYLD_LIBRARY_PATH
// default search from LD_LIBRARY_PATH/DYLD_LIBRARY_PATH
*
dso_handle
=
dlopen
(
dso_path
.
c_str
(),
dynload_flags
);
void
*
dso_handle
=
dlopen
(
dso_path
.
c_str
(),
dynload_flags
);
// DYLD_LIBRARY_PATH is disabled after Mac OS 10.11 to
// DYLD_LIBRARY_PATH is disabled after Mac OS 10.11 to
// bring System Integrity Projection (SIP), if dso_handle
// bring System Integrity Projection (SIP), if dso_handle
// is null, search from default package path in Mac OS.
// is null, search from default package path in Mac OS.
#if defined(__APPLE__) || defined(__OSX__)
#if defined(__APPLE__) || defined(__OSX__)
if
(
nullptr
==
*
dso_handle
)
{
if
(
nullptr
==
dso_handle
)
{
dso_
path
=
join
(
"/usr/local/cuda/lib/"
,
dso_path
);
dso_
handle
=
*
dso_handle
=
dlopen
(
dso_path
.
c_str
(),
dynload_flags
);
dlopen
(
join
(
"/usr/local/cuda/lib/"
,
dso_path
)
.
c_str
(),
dynload_flags
);
if
(
nullptr
==
*
dso_handle
)
{
if
(
nullptr
==
dso_handle
)
{
if
(
dso_path
==
"libcudnn.dylib"
)
{
if
(
dso_path
==
"libcudnn.dylib"
)
{
LOG
(
WARNING
)
<<
"Note: [Recommend] copy cudnn into /usr/local/cuda/
\n
"
LOG
(
WARNING
)
<<
"Note: [Recommend] copy cudnn into /usr/local/cuda/
\n
"
"For instance, sudo tar -xzf "
"For instance, sudo tar -xzf "
...
@@ -91,28 +92,29 @@ static inline void GetDsoHandleFromDefaultPath(std::string& dso_path,
...
@@ -91,28 +92,29 @@ static inline void GetDsoHandleFromDefaultPath(std::string& dso_path,
}
}
}
}
#endif
#endif
return
dso_handle
;
}
}
static
inline
void
GetDsoHandleFromSearchPath
(
const
std
::
string
&
search_root
,
static
inline
void
*
GetDsoHandleFromSearchPath
(
const
std
::
string
&
search_root
,
const
std
::
string
&
dso_name
,
const
std
::
string
&
dso_name
,
void
**
dso_handle
,
bool
throw_on_error
=
true
)
{
bool
throw_on_error
=
true
)
{
int
dynload_flags
=
RTLD_LAZY
|
RTLD_LOCAL
;
int
dynload_flags
=
RTLD_LAZY
|
RTLD_LOCAL
;
*
dso_handle
=
nullptr
;
void
*
dso_handle
=
nullptr
;
std
::
string
dlPath
=
dso_name
;
std
::
string
dlPath
=
dso_name
;
if
(
search_root
.
empty
())
{
if
(
search_root
.
empty
())
{
GetDsoHandleFromDefaultPath
(
dlPath
,
dso_handle
,
dynload_flags
);
dso_handle
=
GetDsoHandleFromDefaultPath
(
dlPath
,
dynload_flags
);
}
else
{
}
else
{
// search xxx.so from custom path
// search xxx.so from custom path
dlPath
=
join
(
search_root
,
dso_name
);
dlPath
=
join
(
search_root
,
dso_name
);
*
dso_handle
=
dlopen
(
dlPath
.
c_str
(),
dynload_flags
);
dso_handle
=
dlopen
(
dlPath
.
c_str
(),
dynload_flags
);
// if not found, search from default path
// if not found, search from default path
if
(
nullptr
==
*
dso_handle
)
{
if
(
nullptr
==
dso_handle
)
{
LOG
(
WARNING
)
<<
"Failed to find dynamic library: "
<<
dlPath
<<
" ("
LOG
(
WARNING
)
<<
"Failed to find dynamic library: "
<<
dlPath
<<
" ("
<<
dlerror
()
<<
")"
;
<<
dlerror
()
<<
")"
;
dlPath
=
dso_name
;
dlPath
=
dso_name
;
GetDsoHandleFromDefaultPath
(
dlPath
,
dso_handle
,
dynload_flags
);
dso_handle
=
GetDsoHandleFromDefaultPath
(
dlPath
,
dynload_flags
);
}
}
}
}
auto
error_msg
=
auto
error_msg
=
...
@@ -124,70 +126,71 @@ static inline void GetDsoHandleFromSearchPath(const std::string& search_root,
...
@@ -124,70 +126,71 @@ static inline void GetDsoHandleFromSearchPath(const std::string& search_root,
"using the DYLD_LIBRARY_PATH is impossible unless System "
"using the DYLD_LIBRARY_PATH is impossible unless System "
"Integrity Protection (SIP) is disabled."
;
"Integrity Protection (SIP) is disabled."
;
if
(
throw_on_error
)
{
if
(
throw_on_error
)
{
PADDLE_ENFORCE
(
nullptr
!=
*
dso_handle
,
error_msg
,
dlPath
,
dlerror
());
PADDLE_ENFORCE
(
nullptr
!=
dso_handle
,
error_msg
,
dlPath
,
dlerror
());
}
else
if
(
nullptr
==
*
dso_handle
)
{
}
else
if
(
nullptr
==
dso_handle
)
{
LOG
(
WARNING
)
<<
string
::
Sprintf
(
error_msg
,
dlPath
,
dlerror
());
LOG
(
WARNING
)
<<
string
::
Sprintf
(
error_msg
,
dlPath
,
dlerror
());
}
}
return
dso_handle
;
}
}
void
GetCublasDsoHandle
(
void
**
dso_handle
)
{
void
*
GetCublasDsoHandle
(
)
{
#if defined(__APPLE__) || defined(__OSX__)
#if defined(__APPLE__) || defined(__OSX__)
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
"libcublas.dylib"
,
dso_handle
);
return
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
"libcublas.dylib"
);
#else
#else
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
"libcublas.so"
,
dso_handle
);
return
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
"libcublas.so"
);
#endif
#endif
}
}
void
GetCUDNNDsoHandle
(
void
**
dso_handle
)
{
void
*
GetCUDNNDsoHandle
(
)
{
#if defined(__APPLE__) || defined(__OSX__)
#if defined(__APPLE__) || defined(__OSX__)
GetDsoHandleFromSearchPath
(
FLAGS_cudnn_dir
,
"libcudnn.dylib"
,
dso_handle
,
return
GetDsoHandleFromSearchPath
(
FLAGS_cudnn_dir
,
"libcudnn.dylib"
,
false
);
false
);
#else
#else
GetDsoHandleFromSearchPath
(
FLAGS_cudnn_dir
,
"libcudnn.so"
,
dso_handle
,
false
);
return
GetDsoHandleFromSearchPath
(
FLAGS_cudnn_dir
,
"libcudnn.so"
,
false
);
#endif
#endif
}
}
void
GetCUPTIDsoHandle
(
void
**
dso_handle
)
{
void
*
GetCUPTIDsoHandle
(
)
{
std
::
string
cupti_path
=
cupti_lib_path
;
std
::
string
cupti_path
=
cupti_lib_path
;
if
(
!
FLAGS_cupti_dir
.
empty
())
{
if
(
!
FLAGS_cupti_dir
.
empty
())
{
cupti_path
=
FLAGS_cupti_dir
;
cupti_path
=
FLAGS_cupti_dir
;
}
}
#if defined(__APPLE__) || defined(__OSX__)
#if defined(__APPLE__) || defined(__OSX__)
GetDsoHandleFromSearchPath
(
cupti_path
,
"libcupti.dylib"
,
dso_handle
,
false
);
return
GetDsoHandleFromSearchPath
(
cupti_path
,
"libcupti.dylib"
,
false
);
#else
#else
GetDsoHandleFromSearchPath
(
cupti_path
,
"libcupti.so"
,
dso_handle
,
false
);
return
GetDsoHandleFromSearchPath
(
cupti_path
,
"libcupti.so"
,
false
);
#endif
#endif
}
}
void
GetCurandDsoHandle
(
void
**
dso_handle
)
{
void
*
GetCurandDsoHandle
(
)
{
#if defined(__APPLE__) || defined(__OSX__)
#if defined(__APPLE__) || defined(__OSX__)
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
"libcurand.dylib"
,
dso_handle
);
return
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
"libcurand.dylib"
);
#else
#else
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
"libcurand.so"
,
dso_handle
);
return
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
"libcurand.so"
);
#endif
#endif
}
}
void
GetWarpCTCDsoHandle
(
void
**
dso_handle
)
{
void
*
GetWarpCTCDsoHandle
(
)
{
#if defined(__APPLE__) || defined(__OSX__)
#if defined(__APPLE__) || defined(__OSX__)
GetDsoHandleFromSearchPath
(
FLAGS_warpctc_dir
,
"libwarpctc.dylib"
,
dso_handle
);
return
GetDsoHandleFromSearchPath
(
FLAGS_warpctc_dir
,
"libwarpctc.dylib"
);
#else
#else
GetDsoHandleFromSearchPath
(
FLAGS_warpctc_dir
,
"libwarpctc.so"
,
dso_handle
);
return
GetDsoHandleFromSearchPath
(
FLAGS_warpctc_dir
,
"libwarpctc.so"
);
#endif
#endif
}
}
void
GetLapackDsoHandle
(
void
**
dso_handle
)
{
void
*
GetLapackDsoHandle
(
)
{
#if defined(__APPLE__) || defined(__OSX__)
#if defined(__APPLE__) || defined(__OSX__)
GetDsoHandleFromSearchPath
(
FLAGS_lapack_dir
,
"liblapacke.dylib"
,
dso_handle
);
return
GetDsoHandleFromSearchPath
(
FLAGS_lapack_dir
,
"liblapacke.dylib"
);
#else
#else
GetDsoHandleFromSearchPath
(
FLAGS_lapack_dir
,
"liblapacke.so"
,
dso_handle
);
return
GetDsoHandleFromSearchPath
(
FLAGS_lapack_dir
,
"liblapacke.so"
);
#endif
#endif
}
}
void
GetNCCLDsoHandle
(
void
**
dso_handle
)
{
void
*
GetNCCLDsoHandle
(
)
{
#if defined(__APPLE__) || defined(__OSX__)
#if defined(__APPLE__) || defined(__OSX__)
GetDsoHandleFromSearchPath
(
FLAGS_nccl_dir
,
"libnccl.dylib"
,
dso_handle
);
return
GetDsoHandleFromSearchPath
(
FLAGS_nccl_dir
,
"libnccl.dylib"
);
#else
#else
GetDsoHandleFromSearchPath
(
FLAGS_nccl_dir
,
"libnccl.so"
,
dso_handle
);
return
GetDsoHandleFromSearchPath
(
FLAGS_nccl_dir
,
"libnccl.so"
);
#endif
#endif
}
}
...
...
paddle/fluid/platform/dynload/dynamic_loader.h
浏览文件 @
7b40f7ce
...
@@ -18,55 +18,13 @@ namespace paddle {
...
@@ -18,55 +18,13 @@ namespace paddle {
namespace
platform
{
namespace
platform
{
namespace
dynload
{
namespace
dynload
{
/**
void
*
GetCublasDsoHandle
();
* @brief load the DSO of CUBLAS
void
*
GetCUDNNDsoHandle
();
*
void
*
GetCUPTIDsoHandle
();
* @param **dso_handle dso handler
void
*
GetCurandDsoHandle
();
*
void
*
GetWarpCTCDsoHandle
();
*/
void
*
GetLapackDsoHandle
();
void
GetCublasDsoHandle
(
void
**
dso_handle
);
void
*
GetNCCLDsoHandle
();
/**
* @brief load the DSO of CUDNN
*
* @param **dso_handle dso handler
*
*/
void
GetCUDNNDsoHandle
(
void
**
dso_handle
);
void
GetCUPTIDsoHandle
(
void
**
dso_handle
);
/**
* @brief load the DSO of CURAND
*
* @param **dso_handle dso handler
*
*/
void
GetCurandDsoHandle
(
void
**
dso_handle
);
/**
* @brief load the DSO of warp-ctc
*
* @param **dso_handle dso handler
*
*/
void
GetWarpCTCDsoHandle
(
void
**
dso_handle
);
/**
* @brief load the DSO of lapack
*
* @param **dso_handle dso handler
*
*/
void
GetLapackDsoHandle
(
void
**
dso_handle
);
/**
* @brief load the DSO of NVIDIA nccl
*
* @param **dso_handle dso handler
*
*/
void
GetNCCLDsoHandle
(
void
**
dso_handle
);
}
// namespace dynload
}
// namespace dynload
}
// namespace platform
}
// namespace platform
...
...
paddle/fluid/platform/dynload/nccl.cc
浏览文件 @
7b40f7ce
...
@@ -25,11 +25,6 @@ void *nccl_dso_handle;
...
@@ -25,11 +25,6 @@ void *nccl_dso_handle;
NCCL_RAND_ROUTINE_EACH
(
DEFINE_WRAP
);
NCCL_RAND_ROUTINE_EACH
(
DEFINE_WRAP
);
void
LoadNCCLDSO
()
{
platform
::
call_once
(
nccl_dso_flag
,
[]
{
GetNCCLDsoHandle
(
&
nccl_dso_handle
);
});
}
}
// namespace dynload
}
// namespace dynload
}
// namespace platform
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
paddle/fluid/platform/dynload/nccl.h
浏览文件 @
7b40f7ce
...
@@ -11,12 +11,13 @@ distributed under the License is distributed on an "AS IS" BASIS,
...
@@ -11,12 +11,13 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#pragma once
#include <dlfcn.h>
#include <dlfcn.h>
#include <nccl.h>
#include <nccl.h>
#include <mutex>
#include <mutex> // NOLINT
#include "paddle/fluid/platform/call_once.h"
#include "paddle/fluid/platform/call_once.h"
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
...
@@ -28,18 +29,19 @@ extern std::once_flag nccl_dso_flag;
...
@@ -28,18 +29,19 @@ extern std::once_flag nccl_dso_flag;
extern
void
*
nccl_dso_handle
;
extern
void
*
nccl_dso_handle
;
#ifdef PADDLE_USE_DSO
#ifdef PADDLE_USE_DSO
extern
void
LoadNCCLDSO
();
#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \
#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \
struct DynLoad__##__name { \
struct DynLoad__##__name { \
template <typename... Args> \
template <typename... Args> \
auto operator()(Args... args) -> decltype(__name(args...)) { \
auto operator()(Args... args) -> decltype(__name(args...)) { \
using nccl_func = decltype(__name(args...)) (*)(Args...); \
using nccl_func = decltype(__name(args...)) (*)(Args...); \
paddle::platform::dynload::LoadNCCLDSO(); \
std::call_once(nccl_dso_flag, []() { \
void* p_##__name = dlsym(nccl_dso_handle, #__name); \
nccl_dso_handle = paddle::platform::dynload::GetNCCLDsoHandle(); \
return reinterpret_cast<nccl_func>(p_##__name)(args...); \
}); \
} \
void* p_##__name = dlsym(nccl_dso_handle, #__name); \
}; \
return reinterpret_cast<nccl_func>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name
extern DynLoad__##__name __name
#else
#else
#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \
#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \
...
...
paddle/fluid/platform/dynload/warpctc.h
浏览文件 @
7b40f7ce
...
@@ -15,9 +15,10 @@ limitations under the License. */
...
@@ -15,9 +15,10 @@ limitations under the License. */
#pragma once
#pragma once
#include <dlfcn.h>
#include <dlfcn.h>
#include <mutex>
#include <mutex>
// NOLINT
#include "ctc.h"
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include "warpctc/include/ctc.h"
namespace
paddle
{
namespace
paddle
{
namespace
platform
{
namespace
platform
{
...
@@ -31,18 +32,18 @@ extern void* warpctc_dso_handle;
...
@@ -31,18 +32,18 @@ extern void* warpctc_dso_handle;
* (for each function) to dynamic load warpctc routine
* (for each function) to dynamic load warpctc routine
* via operator overloading.
* via operator overloading.
*/
*/
#define DYNAMIC_LOAD_WARPCTC_WRAP(__name) \
#define DYNAMIC_LOAD_WARPCTC_WRAP(__name)
\
struct DynLoad__##__name { \
struct DynLoad__##__name {
\
template <typename... Args> \
template <typename... Args>
\
auto operator()(Args... args) -> decltype(__name(args...)) { \
auto operator()(Args... args) -> decltype(__name(args...)) {
\
using warpctcFunc = decltype(__name(args...)) (*)(Args...); \
using warpctcFunc = decltype(__name(args...)) (*)(Args...);
\
std::call_once(warpctc_dso_flag, \
std::call_once(warpctc_dso_flag,
[]() {
\
paddle::platform::dynload::GetWarpCTCDsoHandle,
\
warpctc_dso_handle = paddle::platform::dynload::GetWarpCTCDsoHandle();
\
&warpctc_dso_handle);
\
});
\
void* p_##_name = dlsym(warpctc_dso_handle, #__name); \
void* p_##_name = dlsym(warpctc_dso_handle, #__name);
\
return reinterpret_cast<warpctcFunc>(p_##_name)(args...); \
return reinterpret_cast<warpctcFunc>(p_##_name)(args...);
\
} \
}
\
}; \
};
\
extern DynLoad__##__name __name
extern DynLoad__##__name __name
#define DECLARE_DYNAMIC_LOAD_WARPCTC_WRAP(__name) \
#define DECLARE_DYNAMIC_LOAD_WARPCTC_WRAP(__name) \
...
...
paddle/fluid/platform/enforce.h
浏览文件 @
7b40f7ce
...
@@ -16,35 +16,35 @@ limitations under the License. */
...
@@ -16,35 +16,35 @@ limitations under the License. */
#include <dlfcn.h> // for dladdr
#include <dlfcn.h> // for dladdr
#include <execinfo.h> // for backtrace
#include <execinfo.h> // for backtrace
#ifdef __GNUC__
#include <cxxabi.h> // for __cxa_demangle
#endif // __GNUC__
#ifdef PADDLE_WITH_CUDA
#include <cublas_v2.h>
#include <cudnn.h>
#include <curand.h>
#include <thrust/system/cuda/error.h>
#include <thrust/system_error.h>
#endif // PADDLE_WITH_CUDA
#include <iomanip>
#include <iomanip>
#include <memory>
#include <memory>
#include <sstream>
#include <sstream>
#include <stdexcept>
#include <stdexcept>
#include <string>
#include <string>
#include "glog/logging.h"
#include "paddle/fluid/platform/macros.h"
#include "paddle/fluid/platform/macros.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/string/to_string.h"
#include "paddle/fluid/string/to_string.h"
#ifdef __GNUC__
#include <cxxabi.h> // for __cxa_demangle
#endif
#include <glog/logging.h>
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/dynload/cublas.h"
#include "paddle/fluid/platform/dynload/cublas.h"
#include "paddle/fluid/platform/dynload/cudnn.h"
#include "paddle/fluid/platform/dynload/cudnn.h"
#include "paddle/fluid/platform/dynload/curand.h"
#include "paddle/fluid/platform/dynload/curand.h"
#include "paddle/fluid/platform/dynload/nccl.h"
#include "paddle/fluid/platform/dynload/nccl.h"
#include <cublas_v2.h>
#include <cudnn.h>
#include <curand.h>
#include <thrust/system/cuda/error.h>
#include <thrust/system_error.h>
#endif
#endif
namespace
paddle
{
namespace
paddle
{
...
@@ -185,7 +185,7 @@ inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
...
@@ -185,7 +185,7 @@ inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
}
}
}
}
#endif // PADDLE_
ONLY_CPU
#endif // PADDLE_
WITH_CUDA
template
<
typename
T
>
template
<
typename
T
>
inline
void
throw_on_error
(
T
e
)
{
inline
void
throw_on_error
(
T
e
)
{
...
...
paddle/fluid/platform/enforce_test.cc
浏览文件 @
7b40f7ce
...
@@ -96,7 +96,6 @@ TEST(ENFORCE_GT, FAIL) {
...
@@ -96,7 +96,6 @@ TEST(ENFORCE_GT, FAIL) {
bool
caught_exception
=
false
;
bool
caught_exception
=
false
;
try
{
try
{
PADDLE_ENFORCE_GT
(
1
,
2UL
);
PADDLE_ENFORCE_GT
(
1
,
2UL
);
}
catch
(
paddle
::
platform
::
EnforceNotMet
error
)
{
}
catch
(
paddle
::
platform
::
EnforceNotMet
error
)
{
caught_exception
=
true
;
caught_exception
=
true
;
EXPECT_TRUE
(
EXPECT_TRUE
(
...
@@ -115,7 +114,6 @@ TEST(ENFORCE_GE, FAIL) {
...
@@ -115,7 +114,6 @@ TEST(ENFORCE_GE, FAIL) {
bool
caught_exception
=
false
;
bool
caught_exception
=
false
;
try
{
try
{
PADDLE_ENFORCE_GE
(
1
,
2UL
);
PADDLE_ENFORCE_GE
(
1
,
2UL
);
}
catch
(
paddle
::
platform
::
EnforceNotMet
error
)
{
}
catch
(
paddle
::
platform
::
EnforceNotMet
error
)
{
caught_exception
=
true
;
caught_exception
=
true
;
EXPECT_TRUE
(
EXPECT_TRUE
(
...
@@ -135,7 +133,6 @@ TEST(ENFORCE_LE, FAIL) {
...
@@ -135,7 +133,6 @@ TEST(ENFORCE_LE, FAIL) {
bool
caught_exception
=
false
;
bool
caught_exception
=
false
;
try
{
try
{
PADDLE_ENFORCE_GT
(
1
,
2UL
);
PADDLE_ENFORCE_GT
(
1
,
2UL
);
}
catch
(
paddle
::
platform
::
EnforceNotMet
error
)
{
}
catch
(
paddle
::
platform
::
EnforceNotMet
error
)
{
caught_exception
=
true
;
caught_exception
=
true
;
EXPECT_TRUE
(
EXPECT_TRUE
(
...
@@ -171,7 +168,6 @@ TEST(ENFORCE_NOT_NULL, FAIL) {
...
@@ -171,7 +168,6 @@ TEST(ENFORCE_NOT_NULL, FAIL) {
try
{
try
{
int
*
a
=
nullptr
;
int
*
a
=
nullptr
;
PADDLE_ENFORCE_NOT_NULL
(
a
);
PADDLE_ENFORCE_NOT_NULL
(
a
);
}
catch
(
paddle
::
platform
::
EnforceNotMet
error
)
{
}
catch
(
paddle
::
platform
::
EnforceNotMet
error
)
{
caught_exception
=
true
;
caught_exception
=
true
;
EXPECT_TRUE
(
HasPrefix
(
StringPiece
(
error
.
what
()),
"a should not be null"
));
EXPECT_TRUE
(
HasPrefix
(
StringPiece
(
error
.
what
()),
"a should not be null"
));
...
...
paddle/fluid/platform/float16.h
浏览文件 @
7b40f7ce
...
@@ -15,6 +15,7 @@ limitations under the License. */
...
@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#pragma once
#include <stdint.h>
#include <stdint.h>
#include <limits>
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
#include <cuda.h>
#include <cuda.h>
...
@@ -293,39 +294,39 @@ struct PADDLE_ALIGN(2) float16 {
...
@@ -293,39 +294,39 @@ struct PADDLE_ALIGN(2) float16 {
HOSTDEVICE
inline
explicit
operator
bool
()
const
{
return
(
x
&
0x7fff
)
!=
0
;
}
HOSTDEVICE
inline
explicit
operator
bool
()
const
{
return
(
x
&
0x7fff
)
!=
0
;
}
HOSTDEVICE
inline
explicit
operator
int8_t
()
const
{
HOSTDEVICE
inline
explicit
operator
int8_t
()
const
{
return
static_cast
<
int8_t
>
(
float
(
*
this
));
return
static_cast
<
int8_t
>
(
static_cast
<
float
>
(
*
this
));
}
}
HOSTDEVICE
inline
explicit
operator
uint8_t
()
const
{
HOSTDEVICE
inline
explicit
operator
uint8_t
()
const
{
return
static_cast
<
uint8_t
>
(
float
(
*
this
));
return
static_cast
<
uint8_t
>
(
static_cast
<
float
>
(
*
this
));
}
}
HOSTDEVICE
inline
explicit
operator
int16_t
()
const
{
HOSTDEVICE
inline
explicit
operator
int16_t
()
const
{
return
static_cast
<
int16_t
>
(
float
(
*
this
));
return
static_cast
<
int16_t
>
(
static_cast
<
float
>
(
*
this
));
}
}
HOSTDEVICE
inline
explicit
operator
uint16_t
()
const
{
HOSTDEVICE
inline
explicit
operator
uint16_t
()
const
{
return
static_cast
<
uint16_t
>
(
float
(
*
this
));
return
static_cast
<
uint16_t
>
(
static_cast
<
float
>
(
*
this
));
}
}
HOSTDEVICE
inline
explicit
operator
int32_t
()
const
{
HOSTDEVICE
inline
explicit
operator
int32_t
()
const
{
return
static_cast
<
int32_t
>
(
float
(
*
this
));
return
static_cast
<
int32_t
>
(
static_cast
<
float
>
(
*
this
));
}
}
HOSTDEVICE
inline
explicit
operator
uint32_t
()
const
{
HOSTDEVICE
inline
explicit
operator
uint32_t
()
const
{
return
static_cast
<
uint32_t
>
(
float
(
*
this
));
return
static_cast
<
uint32_t
>
(
static_cast
<
float
>
(
*
this
));
}
}
HOSTDEVICE
inline
explicit
operator
int64_t
()
const
{
HOSTDEVICE
inline
explicit
operator
int64_t
()
const
{
return
static_cast
<
int64_t
>
(
float
(
*
this
));
return
static_cast
<
int64_t
>
(
static_cast
<
float
>
(
*
this
));
}
}
HOSTDEVICE
inline
explicit
operator
uint64_t
()
const
{
HOSTDEVICE
inline
explicit
operator
uint64_t
()
const
{
return
static_cast
<
uint64_t
>
(
float
(
*
this
));
return
static_cast
<
uint64_t
>
(
static_cast
<
float
>
(
*
this
));
}
}
HOSTDEVICE
inline
explicit
operator
double
()
const
{
HOSTDEVICE
inline
explicit
operator
double
()
const
{
return
static_cast
<
double
>
(
float
(
*
this
));
return
static_cast
<
double
>
(
static_cast
<
float
>
(
*
this
));
}
}
private:
private:
...
@@ -370,7 +371,7 @@ DEVICE inline half operator+(const half& a, const half& b) {
...
@@ -370,7 +371,7 @@ DEVICE inline half operator+(const half& a, const half& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hadd
(
a
,
b
);
return
__hadd
(
a
,
b
);
#else
#else
float
res
=
float
(
float16
(
a
))
+
float
(
float16
(
b
));
float
res
=
static_cast
<
float
>
(
float16
(
a
))
+
static_cast
<
float
>
(
float16
(
b
));
return
half
(
float16
(
res
));
return
half
(
float16
(
res
));
#endif
#endif
}
}
...
@@ -379,7 +380,7 @@ DEVICE inline half operator-(const half& a, const half& b) {
...
@@ -379,7 +380,7 @@ DEVICE inline half operator-(const half& a, const half& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hsub
(
a
,
b
);
return
__hsub
(
a
,
b
);
#else
#else
float
res
=
float
(
float16
(
a
))
-
float
(
float16
(
b
));
float
res
=
static_cast
<
float
>
(
float16
(
a
))
-
static_cast
<
float
>
(
float16
(
b
));
return
half
(
float16
(
res
));
return
half
(
float16
(
res
));
#endif
#endif
}
}
...
@@ -388,7 +389,7 @@ DEVICE inline half operator*(const half& a, const half& b) {
...
@@ -388,7 +389,7 @@ DEVICE inline half operator*(const half& a, const half& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hmul
(
a
,
b
);
return
__hmul
(
a
,
b
);
#else
#else
float
res
=
float
(
float16
(
a
))
*
float
(
float16
(
b
));
float
res
=
static_cast
<
float
>
(
float16
(
a
))
*
static_cast
<
float
>
(
float16
(
b
));
return
half
(
float16
(
res
));
return
half
(
float16
(
res
));
#endif
#endif
}
}
...
@@ -399,7 +400,7 @@ DEVICE inline half operator/(const half& a, const half& b) {
...
@@ -399,7 +400,7 @@ DEVICE inline half operator/(const half& a, const half& b) {
float
denom
=
__half2float
(
b
);
float
denom
=
__half2float
(
b
);
return
__float2half
(
num
/
denom
);
return
__float2half
(
num
/
denom
);
#else
#else
float
res
=
float
(
float16
(
a
))
/
float
(
float16
(
b
));
float
res
=
static_cast
<
float
>
(
float16
(
a
))
/
static_cast
<
float
>
(
float16
(
b
));
return
half
(
float16
(
res
));
return
half
(
float16
(
res
));
#endif
#endif
}
}
...
@@ -408,27 +409,27 @@ DEVICE inline half operator-(const half& a) {
...
@@ -408,27 +409,27 @@ DEVICE inline half operator-(const half& a) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hneg
(
a
);
return
__hneg
(
a
);
#else
#else
float
res
=
-
float
(
float16
(
a
));
float
res
=
-
static_cast
<
float
>
(
float16
(
a
));
return
half
(
float16
(
res
));
return
half
(
float16
(
res
));
#endif
#endif
}
}
DEVICE
inline
half
&
operator
+=
(
half
&
a
,
const
half
&
b
)
{
DEVICE
inline
half
&
operator
+=
(
half
&
a
,
const
half
&
b
)
{
// NOLINT
a
=
a
+
b
;
a
=
a
+
b
;
return
a
;
return
a
;
}
}
DEVICE
inline
half
&
operator
-=
(
half
&
a
,
const
half
&
b
)
{
DEVICE
inline
half
&
operator
-=
(
half
&
a
,
const
half
&
b
)
{
// NOLINT
a
=
a
-
b
;
a
=
a
-
b
;
return
a
;
return
a
;
}
}
DEVICE
inline
half
&
operator
*=
(
half
&
a
,
const
half
&
b
)
{
DEVICE
inline
half
&
operator
*=
(
half
&
a
,
const
half
&
b
)
{
// NOLINT
a
=
a
*
b
;
a
=
a
*
b
;
return
a
;
return
a
;
}
}
DEVICE
inline
half
&
operator
/=
(
half
&
a
,
const
half
&
b
)
{
DEVICE
inline
half
&
operator
/=
(
half
&
a
,
const
half
&
b
)
{
// NOLINT
a
=
a
/
b
;
a
=
a
/
b
;
return
a
;
return
a
;
}
}
...
@@ -437,7 +438,7 @@ DEVICE inline bool operator==(const half& a, const half& b) {
...
@@ -437,7 +438,7 @@ DEVICE inline bool operator==(const half& a, const half& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__heq
(
a
,
b
);
return
__heq
(
a
,
b
);
#else
#else
return
float
(
float16
(
a
))
==
float
(
float16
(
b
));
return
static_cast
<
float
>
(
float16
(
a
))
==
static_cast
<
float
>
(
float16
(
b
));
#endif
#endif
}
}
...
@@ -445,7 +446,7 @@ DEVICE inline bool operator!=(const half& a, const half& b) {
...
@@ -445,7 +446,7 @@ DEVICE inline bool operator!=(const half& a, const half& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hne
(
a
,
b
);
return
__hne
(
a
,
b
);
#else
#else
return
float
(
float16
(
a
))
!=
float
(
float16
(
b
));
return
static_cast
<
float
>
(
float16
(
a
))
!=
static_cast
<
float
>
(
float16
(
b
));
#endif
#endif
}
}
...
@@ -453,7 +454,7 @@ DEVICE inline bool operator<(const half& a, const half& b) {
...
@@ -453,7 +454,7 @@ DEVICE inline bool operator<(const half& a, const half& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hlt
(
a
,
b
);
return
__hlt
(
a
,
b
);
#else
#else
return
float
(
float16
(
a
))
<
float
(
float16
(
b
));
return
static_cast
<
float
>
(
float16
(
a
))
<
static_cast
<
float
>
(
float16
(
b
));
#endif
#endif
}
}
...
@@ -461,7 +462,7 @@ DEVICE inline bool operator<=(const half& a, const half& b) {
...
@@ -461,7 +462,7 @@ DEVICE inline bool operator<=(const half& a, const half& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hle
(
a
,
b
);
return
__hle
(
a
,
b
);
#else
#else
return
float
(
float16
(
a
))
<=
float
(
float16
(
b
));
return
static_cast
<
float
>
(
float16
(
a
))
<=
static_cast
<
float
>
(
float16
(
b
));
#endif
#endif
}
}
...
@@ -469,7 +470,7 @@ DEVICE inline bool operator>(const half& a, const half& b) {
...
@@ -469,7 +470,7 @@ DEVICE inline bool operator>(const half& a, const half& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hgt
(
a
,
b
);
return
__hgt
(
a
,
b
);
#else
#else
return
float
(
float16
(
a
))
>
float
(
float16
(
b
));
return
static_cast
<
float
>
(
float16
(
a
))
>
static_cast
<
float
>
(
float16
(
b
));
#endif
#endif
}
}
...
@@ -477,7 +478,7 @@ DEVICE inline bool operator>=(const half& a, const half& b) {
...
@@ -477,7 +478,7 @@ DEVICE inline bool operator>=(const half& a, const half& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hge
(
a
,
b
);
return
__hge
(
a
,
b
);
#else
#else
return
float
(
float16
(
a
))
>=
float
(
float16
(
b
));
return
static_cast
<
float
>
(
float16
(
a
))
>=
static_cast
<
float
>
(
float16
(
b
));
#endif
#endif
}
}
...
@@ -489,7 +490,7 @@ HOSTDEVICE inline float16 operator+(const float16& a, const float16& b) {
...
@@ -489,7 +490,7 @@ HOSTDEVICE inline float16 operator+(const float16& a, const float16& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
float16
(
__hadd
(
half
(
a
),
half
(
b
)));
return
float16
(
__hadd
(
half
(
a
),
half
(
b
)));
#else
#else
return
float16
(
float
(
a
)
+
float
(
b
));
return
float16
(
static_cast
<
float
>
(
a
)
+
static_cast
<
float
>
(
b
));
#endif
#endif
}
}
...
@@ -497,7 +498,7 @@ HOSTDEVICE inline float16 operator-(const float16& a, const float16& b) {
...
@@ -497,7 +498,7 @@ HOSTDEVICE inline float16 operator-(const float16& a, const float16& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
float16
(
__hsub
(
half
(
a
),
half
(
b
)));
return
float16
(
__hsub
(
half
(
a
),
half
(
b
)));
#else
#else
return
float16
(
float
(
a
)
-
float
(
b
));
return
float16
(
static_cast
<
float
>
(
a
)
-
static_cast
<
float
>
(
b
));
#endif
#endif
}
}
...
@@ -505,7 +506,7 @@ HOSTDEVICE inline float16 operator*(const float16& a, const float16& b) {
...
@@ -505,7 +506,7 @@ HOSTDEVICE inline float16 operator*(const float16& a, const float16& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
float16
(
__hmul
(
half
(
a
),
half
(
b
)));
return
float16
(
__hmul
(
half
(
a
),
half
(
b
)));
#else
#else
return
float16
(
float
(
a
)
*
float
(
b
));
return
float16
(
static_cast
<
float
>
(
a
)
*
static_cast
<
float
>
(
b
));
#endif
#endif
}
}
...
@@ -516,7 +517,7 @@ HOSTDEVICE inline float16 operator/(const float16& a, const float16& b) {
...
@@ -516,7 +517,7 @@ HOSTDEVICE inline float16 operator/(const float16& a, const float16& b) {
float
denom
=
__half2float
(
half
(
b
));
float
denom
=
__half2float
(
half
(
b
));
return
float16
(
num
/
denom
);
return
float16
(
num
/
denom
);
#else
#else
return
float16
(
float
(
a
)
/
float
(
b
));
return
float16
(
static_cast
<
float
>
(
a
)
/
static_cast
<
float
>
(
b
));
#endif
#endif
}
}
...
@@ -530,22 +531,22 @@ HOSTDEVICE inline float16 operator-(const float16& a) {
...
@@ -530,22 +531,22 @@ HOSTDEVICE inline float16 operator-(const float16& a) {
#endif
#endif
}
}
HOSTDEVICE
inline
float16
&
operator
+=
(
float16
&
a
,
const
float16
&
b
)
{
HOSTDEVICE
inline
float16
&
operator
+=
(
float16
&
a
,
const
float16
&
b
)
{
// NOLINT
a
=
a
+
b
;
a
=
a
+
b
;
return
a
;
return
a
;
}
}
HOSTDEVICE
inline
float16
&
operator
-=
(
float16
&
a
,
const
float16
&
b
)
{
HOSTDEVICE
inline
float16
&
operator
-=
(
float16
&
a
,
const
float16
&
b
)
{
// NOLINT
a
=
a
-
b
;
a
=
a
-
b
;
return
a
;
return
a
;
}
}
HOSTDEVICE
inline
float16
&
operator
*=
(
float16
&
a
,
const
float16
&
b
)
{
HOSTDEVICE
inline
float16
&
operator
*=
(
float16
&
a
,
const
float16
&
b
)
{
// NOLINT
a
=
a
*
b
;
a
=
a
*
b
;
return
a
;
return
a
;
}
}
HOSTDEVICE
inline
float16
&
operator
/=
(
float16
&
a
,
const
float16
&
b
)
{
HOSTDEVICE
inline
float16
&
operator
/=
(
float16
&
a
,
const
float16
&
b
)
{
// NOLINT
a
=
a
/
b
;
a
=
a
/
b
;
return
a
;
return
a
;
}
}
...
@@ -554,7 +555,7 @@ HOSTDEVICE inline bool operator==(const float16& a, const float16& b) {
...
@@ -554,7 +555,7 @@ HOSTDEVICE inline bool operator==(const float16& a, const float16& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__heq
(
half
(
a
),
half
(
b
));
return
__heq
(
half
(
a
),
half
(
b
));
#else
#else
return
float
(
a
)
==
float
(
b
);
return
static_cast
<
float
>
(
a
)
==
static_cast
<
float
>
(
b
);
#endif
#endif
}
}
...
@@ -562,7 +563,7 @@ HOSTDEVICE inline bool operator!=(const float16& a, const float16& b) {
...
@@ -562,7 +563,7 @@ HOSTDEVICE inline bool operator!=(const float16& a, const float16& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hne
(
half
(
a
),
half
(
b
));
return
__hne
(
half
(
a
),
half
(
b
));
#else
#else
return
float
(
a
)
!=
float
(
b
);
return
static_cast
<
float
>
(
a
)
!=
static_cast
<
float
>
(
b
);
#endif
#endif
}
}
...
@@ -570,7 +571,7 @@ HOSTDEVICE inline bool operator<(const float16& a, const float16& b) {
...
@@ -570,7 +571,7 @@ HOSTDEVICE inline bool operator<(const float16& a, const float16& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hlt
(
half
(
a
),
half
(
b
));
return
__hlt
(
half
(
a
),
half
(
b
));
#else
#else
return
float
(
a
)
<
float
(
b
);
return
static_cast
<
float
>
(
a
)
<
static_cast
<
float
>
(
b
);
#endif
#endif
}
}
...
@@ -578,7 +579,7 @@ HOSTDEVICE inline bool operator<=(const float16& a, const float16& b) {
...
@@ -578,7 +579,7 @@ HOSTDEVICE inline bool operator<=(const float16& a, const float16& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hle
(
half
(
a
),
half
(
b
));
return
__hle
(
half
(
a
),
half
(
b
));
#else
#else
return
float
(
a
)
<=
float
(
b
);
return
static_cast
<
float
>
(
a
)
<=
static_cast
<
float
>
(
b
);
#endif
#endif
}
}
...
@@ -586,7 +587,7 @@ HOSTDEVICE inline bool operator>(const float16& a, const float16& b) {
...
@@ -586,7 +587,7 @@ HOSTDEVICE inline bool operator>(const float16& a, const float16& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hgt
(
half
(
a
),
half
(
b
));
return
__hgt
(
half
(
a
),
half
(
b
));
#else
#else
return
float
(
a
)
>
float
(
b
);
return
static_cast
<
float
>
(
a
)
>
static_cast
<
float
>
(
b
);
#endif
#endif
}
}
...
@@ -594,7 +595,7 @@ HOSTDEVICE inline bool operator>=(const float16& a, const float16& b) {
...
@@ -594,7 +595,7 @@ HOSTDEVICE inline bool operator>=(const float16& a, const float16& b) {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hge
(
half
(
a
),
half
(
b
));
return
__hge
(
half
(
a
),
half
(
b
));
#else
#else
return
float
(
a
)
>=
float
(
b
);
return
static_cast
<
float
>
(
a
)
>=
static_cast
<
float
>
(
b
);
#endif
#endif
}
}
...
@@ -679,22 +680,22 @@ inline float16 operator-(const float16& a) {
...
@@ -679,22 +680,22 @@ inline float16 operator-(const float16& a) {
return
res
;
return
res
;
}
}
inline
float16
&
operator
+=
(
float16
&
a
,
const
float16
&
b
)
{
inline
float16
&
operator
+=
(
float16
&
a
,
const
float16
&
b
)
{
// NOLINT
a
=
a
+
b
;
a
=
a
+
b
;
return
a
;
return
a
;
}
}
inline
float16
&
operator
-=
(
float16
&
a
,
const
float16
&
b
)
{
inline
float16
&
operator
-=
(
float16
&
a
,
const
float16
&
b
)
{
// NOLINT
a
=
a
-
b
;
a
=
a
-
b
;
return
a
;
return
a
;
}
}
inline
float16
&
operator
*=
(
float16
&
a
,
const
float16
&
b
)
{
inline
float16
&
operator
*=
(
float16
&
a
,
const
float16
&
b
)
{
// NOLINT
a
=
a
*
b
;
a
=
a
*
b
;
return
a
;
return
a
;
}
}
inline
float16
&
operator
/=
(
float16
&
a
,
const
float16
&
b
)
{
inline
float16
&
operator
/=
(
float16
&
a
,
const
float16
&
b
)
{
// NOLINT
a
=
a
/
b
;
a
=
a
/
b
;
return
a
;
return
a
;
}
}
...
@@ -784,19 +785,19 @@ inline bool operator>=(const float16& a, const float16& b) {
...
@@ -784,19 +785,19 @@ inline bool operator>=(const float16& a, const float16& b) {
// Arithmetic operators for float16, software emulated on other CPU
// Arithmetic operators for float16, software emulated on other CPU
#else
#else
inline
float16
operator
+
(
const
float16
&
a
,
const
float16
&
b
)
{
inline
float16
operator
+
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float16
(
float
(
a
)
+
float
(
b
));
return
float16
(
static_cast
<
float
>
(
a
)
+
static_cast
<
float
>
(
b
));
}
}
inline
float16
operator
-
(
const
float16
&
a
,
const
float16
&
b
)
{
inline
float16
operator
-
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float16
(
float
(
a
)
-
float
(
b
));
return
float16
(
static_cast
<
float
>
(
a
)
-
static_cast
<
float
>
(
b
));
}
}
inline
float16
operator
*
(
const
float16
&
a
,
const
float16
&
b
)
{
inline
float16
operator
*
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float16
(
float
(
a
)
*
float
(
b
));
return
float16
(
static_cast
<
float
>
(
a
)
*
static_cast
<
float
>
(
b
));
}
}
inline
float16
operator
/
(
const
float16
&
a
,
const
float16
&
b
)
{
inline
float16
operator
/
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float16
(
float
(
a
)
/
float
(
b
));
return
float16
(
static_cast
<
float
>
(
a
)
/
static_cast
<
float
>
(
b
));
}
}
inline
float16
operator
-
(
const
float16
&
a
)
{
inline
float16
operator
-
(
const
float16
&
a
)
{
...
@@ -805,51 +806,57 @@ inline float16 operator-(const float16& a) {
...
@@ -805,51 +806,57 @@ inline float16 operator-(const float16& a) {
return
res
;
return
res
;
}
}
inline
float16
&
operator
+=
(
float16
&
a
,
const
float16
&
b
)
{
inline
float16
&
operator
+=
(
float16
&
a
,
const
float16
&
b
)
{
// NOLINT
a
=
float16
(
float
(
a
)
+
float
(
b
));
a
=
float16
(
static_cast
<
float
>
(
a
)
+
static_cast
<
float
>
(
b
));
return
a
;
return
a
;
}
}
inline
float16
&
operator
-=
(
float16
&
a
,
const
float16
&
b
)
{
inline
float16
&
operator
-=
(
float16
&
a
,
const
float16
&
b
)
{
// NOLINT
a
=
float16
(
float
(
a
)
-
float
(
b
));
a
=
float16
(
static_cast
<
float
>
(
a
)
-
static_cast
<
float
>
(
b
));
return
a
;
return
a
;
}
}
inline
float16
&
operator
*=
(
float16
&
a
,
const
float16
&
b
)
{
inline
float16
&
operator
*=
(
float16
&
a
,
const
float16
&
b
)
{
// NOLINT
a
=
float16
(
float
(
a
)
*
float
(
b
));
a
=
float16
(
static_cast
<
float
>
(
a
)
*
static_cast
<
float
>
(
b
));
return
a
;
return
a
;
}
}
inline
float16
&
operator
/=
(
float16
&
a
,
const
float16
&
b
)
{
inline
float16
&
operator
/=
(
float16
&
a
,
const
float16
&
b
)
{
// NOLINT
a
=
float16
(
float
(
a
)
/
float
(
b
));
a
=
float16
(
static_cast
<
float
>
(
a
)
/
static_cast
<
float
>
(
b
));
return
a
;
return
a
;
}
}
inline
bool
operator
==
(
const
float16
&
a
,
const
float16
&
b
)
{
inline
bool
operator
==
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float
(
a
)
==
float
(
b
);
return
static_cast
<
float
>
(
a
)
==
static_cast
<
float
>
(
b
);
}
}
inline
bool
operator
!=
(
const
float16
&
a
,
const
float16
&
b
)
{
inline
bool
operator
!=
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float
(
a
)
!=
float
(
b
);
return
static_cast
<
float
>
(
a
)
!=
static_cast
<
float
>
(
b
);
}
}
inline
bool
operator
<
(
const
float16
&
a
,
const
float16
&
b
)
{
inline
bool
operator
<
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float
(
a
)
<
float
(
b
);
return
static_cast
<
float
>
(
a
)
<
static_cast
<
float
>
(
b
);
}
}
inline
bool
operator
<=
(
const
float16
&
a
,
const
float16
&
b
)
{
inline
bool
operator
<=
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float
(
a
)
<=
float
(
b
);
return
static_cast
<
float
>
(
a
)
<=
static_cast
<
float
>
(
b
);
}
}
inline
bool
operator
>
(
const
float16
&
a
,
const
float16
&
b
)
{
inline
bool
operator
>
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float
(
a
)
>
float
(
b
);
return
static_cast
<
float
>
(
a
)
>
static_cast
<
float
>
(
b
);
}
}
inline
bool
operator
>=
(
const
float16
&
a
,
const
float16
&
b
)
{
inline
bool
operator
>=
(
const
float16
&
a
,
const
float16
&
b
)
{
return
float
(
a
)
>=
float
(
b
);
return
static_cast
<
float
>
(
a
)
>=
static_cast
<
float
>
(
b
);
}
}
#endif
#endif
HOSTDEVICE
inline
float16
raw_uint16_to_float16
(
uint16_t
a
)
{
float16
res
;
res
.
x
=
a
;
return
res
;
}
HOSTDEVICE
inline
bool
(
isnan
)(
const
float16
&
a
)
{
HOSTDEVICE
inline
bool
(
isnan
)(
const
float16
&
a
)
{
#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
return
__hisnan
(
half
(
a
));
return
__hisnan
(
half
(
a
));
...
@@ -886,28 +893,116 @@ struct is_pod<paddle::platform::float16> {
...
@@ -886,28 +893,116 @@ struct is_pod<paddle::platform::float16> {
is_standard_layout
<
paddle
::
platform
::
float16
>::
value
;
is_standard_layout
<
paddle
::
platform
::
float16
>::
value
;
};
};
template
<
>
struct
numeric_limits
<
paddle
::
platform
::
float16
>
{
static
const
bool
is_specialized
=
true
;
static
const
bool
is_signed
=
true
;
static
const
bool
is_integer
=
false
;
static
const
bool
is_exact
=
false
;
static
const
bool
has_infinity
=
true
;
static
const
bool
has_quiet_NaN
=
true
;
static
const
bool
has_signaling_NaN
=
true
;
static
const
float_denorm_style
has_denorm
=
denorm_present
;
static
const
bool
has_denorm_loss
=
false
;
static
const
std
::
float_round_style
round_style
=
std
::
round_to_nearest
;
static
const
bool
is_iec559
=
false
;
static
const
bool
is_bounded
=
false
;
static
const
bool
is_modulo
=
false
;
static
const
int
digits
=
11
;
static
const
int
digits10
=
3
;
static
const
int
max_digits10
=
5
;
static
const
int
radix
=
2
;
static
const
int
min_exponent
=
-
13
;
static
const
int
min_exponent10
=
-
4
;
static
const
int
max_exponent
=
16
;
static
const
int
max_exponent10
=
4
;
static
const
bool
traps
=
true
;
static
const
bool
tinyness_before
=
false
;
static
paddle
::
platform
::
float16
(
min
)()
{
return
paddle
::
platform
::
raw_uint16_to_float16
(
0x400
);
}
static
paddle
::
platform
::
float16
lowest
()
{
return
paddle
::
platform
::
raw_uint16_to_float16
(
0xfbff
);
}
static
paddle
::
platform
::
float16
(
max
)()
{
return
paddle
::
platform
::
raw_uint16_to_float16
(
0x7bff
);
}
static
paddle
::
platform
::
float16
epsilon
()
{
return
paddle
::
platform
::
raw_uint16_to_float16
(
0x0800
);
}
static
paddle
::
platform
::
float16
round_error
()
{
return
paddle
::
platform
::
float16
(
0.5
);
}
static
paddle
::
platform
::
float16
infinity
()
{
return
paddle
::
platform
::
raw_uint16_to_float16
(
0x7c00
);
}
static
paddle
::
platform
::
float16
quiet_NaN
()
{
return
paddle
::
platform
::
raw_uint16_to_float16
(
0x7e00
);
}
static
paddle
::
platform
::
float16
signaling_NaN
()
{
return
paddle
::
platform
::
raw_uint16_to_float16
(
0x7e00
);
}
static
paddle
::
platform
::
float16
denorm_min
()
{
return
paddle
::
platform
::
raw_uint16_to_float16
(
0x1
);
}
};
}
// namespace std
}
// namespace std
namespace
Eigen
{
namespace
Eigen
{
using
float16
=
paddle
::
platform
::
float16
;
template
<
>
struct
NumTraits
<
float16
>
:
GenericNumTraits
<
float16
>
{
enum
{
IsSigned
=
true
,
IsInteger
=
false
,
IsComplex
=
false
,
RequireInitialization
=
false
};
HOSTDEVICE
static
inline
float16
epsilon
()
{
return
paddle
::
platform
::
raw_uint16_to_float16
(
0x0800
);
}
HOSTDEVICE
static
inline
float16
dummy_precision
()
{
return
float16
(
1e-2
f
);
}
HOSTDEVICE
static
inline
float16
highest
()
{
return
paddle
::
platform
::
raw_uint16_to_float16
(
0x7bff
);
}
HOSTDEVICE
static
inline
float16
lowest
()
{
return
paddle
::
platform
::
raw_uint16_to_float16
(
0xfbff
);
}
HOSTDEVICE
static
inline
float16
infinity
()
{
return
paddle
::
platform
::
raw_uint16_to_float16
(
0x7c00
);
}
HOSTDEVICE
static
inline
float16
quiet_NaN
()
{
return
paddle
::
platform
::
raw_uint16_to_float16
(
0x7c01
);
}
};
namespace
numext
{
namespace
numext
{
template
<
>
template
<
>
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
bool
(
isnan
)(
HOSTDEVICE
inline
bool
(
isnan
)(
const
float16
&
a
)
{
const
paddle
::
platform
::
float16
&
a
)
{
return
(
paddle
::
platform
::
isnan
)(
a
);
return
(
paddle
::
platform
::
isnan
)(
a
);
}
}
template
<
>
template
<
>
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
bool
(
isinf
)(
HOSTDEVICE
inline
bool
(
isinf
)(
const
float16
&
a
)
{
const
paddle
::
platform
::
float16
&
a
)
{
return
(
paddle
::
platform
::
isinf
)(
a
);
return
(
paddle
::
platform
::
isinf
)(
a
);
}
}
template
<
>
template
<
>
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
bool
(
isfinite
)(
HOSTDEVICE
inline
bool
(
isfinite
)(
const
float16
&
a
)
{
const
paddle
::
platform
::
float16
&
a
)
{
return
(
paddle
::
platform
::
isfinite
)(
a
);
return
(
paddle
::
platform
::
isfinite
)(
a
);
}
}
template
<
>
HOSTDEVICE
inline
float16
exp
(
const
float16
&
a
)
{
return
float16
(
::
expf
(
static_cast
<
float
>
(
a
)));
}
}
// namespace numext
}
// namespace numext
}
// namespace Eigen
}
// namespace Eigen
paddle/fluid/platform/gpu_info.cc
浏览文件 @
7b40f7ce
...
@@ -14,8 +14,9 @@ limitations under the License. */
...
@@ -14,8 +14,9 @@ limitations under the License. */
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/platform/gpu_info.h"
#include
"gflags/gflags.h"
#include
<algorithm>
#include "gflags/gflags.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
DEFINE_double
(
fraction_of_gpu_memory_to_use
,
0.92
,
DEFINE_double
(
fraction_of_gpu_memory_to_use
,
0.92
,
...
@@ -77,8 +78,8 @@ void SetDeviceId(int id) {
...
@@ -77,8 +78,8 @@ void SetDeviceId(int id) {
"cudaSetDevice failed in paddle::platform::SetDeviceId"
);
"cudaSetDevice failed in paddle::platform::SetDeviceId"
);
}
}
void
GpuMemoryUsage
(
size_t
&
available
,
size_t
&
total
)
{
void
GpuMemoryUsage
(
size_t
*
available
,
size_t
*
total
)
{
PADDLE_ENFORCE
(
cudaMemGetInfo
(
&
available
,
&
total
),
PADDLE_ENFORCE
(
cudaMemGetInfo
(
available
,
total
),
"cudaMemGetInfo failed in paddle::platform::GetMemoryUsage"
);
"cudaMemGetInfo failed in paddle::platform::GetMemoryUsage"
);
}
}
...
@@ -86,7 +87,7 @@ size_t GpuMaxAllocSize() {
...
@@ -86,7 +87,7 @@ size_t GpuMaxAllocSize() {
size_t
total
=
0
;
size_t
total
=
0
;
size_t
available
=
0
;
size_t
available
=
0
;
GpuMemoryUsage
(
available
,
total
);
GpuMemoryUsage
(
&
available
,
&
total
);
// Reserve the rest for page tables, etc.
// Reserve the rest for page tables, etc.
return
static_cast
<
size_t
>
(
total
*
FLAGS_fraction_of_gpu_memory_to_use
);
return
static_cast
<
size_t
>
(
total
*
FLAGS_fraction_of_gpu_memory_to_use
);
...
@@ -101,7 +102,7 @@ size_t GpuMaxChunkSize() {
...
@@ -101,7 +102,7 @@ size_t GpuMaxChunkSize() {
size_t
total
=
0
;
size_t
total
=
0
;
size_t
available
=
0
;
size_t
available
=
0
;
GpuMemoryUsage
(
available
,
total
);
GpuMemoryUsage
(
&
available
,
&
total
);
VLOG
(
10
)
<<
"GPU Usage "
<<
available
/
1024
/
1024
<<
"M/"
VLOG
(
10
)
<<
"GPU Usage "
<<
available
/
1024
/
1024
<<
"M/"
<<
total
/
1024
/
1024
<<
"M"
;
<<
total
/
1024
/
1024
<<
"M"
;
size_t
reserving
=
static_cast
<
size_t
>
(
0.05
*
total
);
size_t
reserving
=
static_cast
<
size_t
>
(
0.05
*
total
);
...
...
paddle/fluid/platform/gpu_info.h
浏览文件 @
7b40f7ce
...
@@ -23,10 +23,6 @@ limitations under the License. */
...
@@ -23,10 +23,6 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
platform
{
namespace
platform
{
//! Environment variable: fraction of GPU memory to use on each device.
const
std
::
string
kEnvFractionGpuMemoryToUse
=
"PADDLE_FRACTION_GPU_MEMORY_TO_USE"
;
//! Get the total number of GPU devices in system.
//! Get the total number of GPU devices in system.
int
GetCUDADeviceCount
();
int
GetCUDADeviceCount
();
...
@@ -46,7 +42,7 @@ int GetCurrentDeviceId();
...
@@ -46,7 +42,7 @@ int GetCurrentDeviceId();
void
SetDeviceId
(
int
device_id
);
void
SetDeviceId
(
int
device_id
);
//! Get the memory usage of current GPU device.
//! Get the memory usage of current GPU device.
void
GpuMemoryUsage
(
size_t
&
available
,
size_t
&
total
);
void
GpuMemoryUsage
(
size_t
*
available
,
size_t
*
total
);
//! Get the maximum allocation size of current GPU device.
//! Get the maximum allocation size of current GPU device.
size_t
GpuMaxAllocSize
();
size_t
GpuMaxAllocSize
();
...
...
paddle/fluid/platform/place.h
浏览文件 @
7b40f7ce
...
@@ -11,10 +11,11 @@ distributed under the License is distributed on an "AS IS" BASIS,
...
@@ -11,10 +11,11 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#pragma once
#include <iostream>
#include <iostream>
#include <vector>
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/variant.h"
#include "paddle/fluid/platform/variant.h"
...
...
paddle/fluid/pybind/.clang-format
已删除
100644 → 0
浏览文件 @
3a825782
---
Language: Cpp
BasedOnStyle: Google
Standard: Cpp11
...
paddle/fluid/pybind/CMakeLists.txt
浏览文件 @
7b40f7ce
...
@@ -15,4 +15,6 @@ if(WITH_PYTHON)
...
@@ -15,4 +15,6 @@ if(WITH_PYTHON)
target_link_libraries
(
paddle_pybind rt
)
target_link_libraries
(
paddle_pybind rt
)
endif
(
NOT APPLE AND NOT ANDROID
)
endif
(
NOT APPLE AND NOT ANDROID
)
endif
(
WITH_AMD_GPU
)
endif
(
WITH_AMD_GPU
)
cc_test
(
tensor_py_test SRCS tensor_py_test.cc DEPS python
)
endif
(
WITH_PYTHON
)
endif
(
WITH_PYTHON
)
paddle/fluid/pybind/const_value.cc
浏览文件 @
7b40f7ce
...
@@ -12,17 +12,17 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,17 +12,17 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "const_value.h"
#include "
paddle/fluid/pybind/
const_value.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/operator.h"
namespace
paddle
{
namespace
paddle
{
namespace
pybind
{
namespace
pybind
{
void
BindConstValue
(
pybind11
::
module
&
m
)
{
void
BindConstValue
(
pybind11
::
module
*
m
)
{
m
.
def
(
"kEmptyVarName"
,
[]
{
return
framework
::
kEmptyVarName
;
});
m
->
def
(
"kEmptyVarName"
,
[]
{
return
framework
::
kEmptyVarName
;
});
m
.
def
(
"kTempVarName"
,
[]
{
return
framework
::
kTempVarName
;
});
m
->
def
(
"kTempVarName"
,
[]
{
return
framework
::
kTempVarName
;
});
m
.
def
(
"kGradVarSuffix"
,
[]
{
return
framework
::
kGradVarSuffix
;
});
m
->
def
(
"kGradVarSuffix"
,
[]
{
return
framework
::
kGradVarSuffix
;
});
m
.
def
(
"kZeroVarSuffix"
,
[]
{
return
framework
::
kZeroVarSuffix
;
});
m
->
def
(
"kZeroVarSuffix"
,
[]
{
return
framework
::
kZeroVarSuffix
;
});
}
}
}
// namespace pybind
}
// namespace pybind
...
...
paddle/fluid/pybind/const_value.h
浏览文件 @
7b40f7ce
...
@@ -11,16 +11,17 @@ distributed under the License is distributed on an "AS IS" BASIS,
...
@@ -11,16 +11,17 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#pragma once
#include <Python.h>
#include <Python.h>
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "pybind11/pybind11.h"
#include "pybind11/pybind11.h"
namespace
py
=
pybind11
;
namespace
paddle
{
namespace
paddle
{
namespace
pybind
{
namespace
pybind
{
extern
void
BindConstValue
(
pybind11
::
module
&
m
);
void
BindConstValue
(
pybind11
::
module
*
m
);
}
// namespace pybind
}
// namespace pybind
}
// namespace paddle
}
// namespace paddle
paddle/fluid/pybind/exception.cc
浏览文件 @
7b40f7ce
...
@@ -17,8 +17,8 @@ limitations under the License. */
...
@@ -17,8 +17,8 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
pybind
{
namespace
pybind
{
void
BindException
(
pybind11
::
module
&
m
)
{
void
BindException
(
pybind11
::
module
*
m
)
{
static
pybind11
::
exception
<
platform
::
EnforceNotMet
>
exc
(
m
,
"EnforceNotMet"
);
static
pybind11
::
exception
<
platform
::
EnforceNotMet
>
exc
(
*
m
,
"EnforceNotMet"
);
pybind11
::
register_exception_translator
([](
std
::
exception_ptr
p
)
{
pybind11
::
register_exception_translator
([](
std
::
exception_ptr
p
)
{
try
{
try
{
if
(
p
)
std
::
rethrow_exception
(
p
);
if
(
p
)
std
::
rethrow_exception
(
p
);
...
@@ -27,7 +27,8 @@ void BindException(pybind11::module& m) {
...
@@ -27,7 +27,8 @@ void BindException(pybind11::module& m) {
}
}
});
});
m
.
def
(
"__unittest_throw_exception__"
,
[]
{
PADDLE_THROW
(
"test exception"
);
});
m
->
def
(
"__unittest_throw_exception__"
,
[]
{
PADDLE_THROW
(
"test exception"
);
});
}
}
}
// namespace pybind
}
// namespace pybind
...
...
paddle/fluid/pybind/exception.h
浏览文件 @
7b40f7ce
...
@@ -11,14 +11,17 @@ distributed under the License is distributed on an "AS IS" BASIS,
...
@@ -11,14 +11,17 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#pragma once
#include <Python.h>
#include <Python.h>
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "pybind11/pybind11.h"
#include "pybind11/pybind11.h"
namespace
paddle
{
namespace
paddle
{
namespace
pybind
{
namespace
pybind
{
extern
void
BindException
(
pybind11
::
module
&
m
);
void
BindException
(
pybind11
::
module
*
m
);
}
// namespace pybind
}
// namespace pybind
}
// namespace paddle
}
// namespace paddle
paddle/fluid/pybind/protobuf.cc
浏览文件 @
7b40f7ce
...
@@ -11,10 +11,13 @@ distributed under the License is distributed on an "AS IS" BASIS,
...
@@ -11,10 +11,13 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/pybind/protobuf.h"
#include "paddle/fluid/pybind/protobuf.h"
#include <deque>
#include <deque>
#include <iostream>
#include <iostream>
#include <string>
#include <tuple>
#include "paddle/fluid/framework/backward.h"
#include "paddle/fluid/framework/backward.h"
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/op_desc.h"
...
@@ -95,10 +98,11 @@ struct type_caster<boost::variant<Args...>>
...
@@ -95,10 +98,11 @@ struct type_caster<boost::variant<Args...>>
namespace
paddle
{
namespace
paddle
{
namespace
pybind
{
namespace
pybind
{
using
namespace
paddle
::
framework
;
// NOLINT
namespace
pd
=
paddle
::
framework
;
template
<
typename
T
>
template
<
typename
T
>
static
py
::
bytes
SerializeMessage
(
T
&
self
)
{
static
pybind11
::
bytes
SerializeMessage
(
T
&
self
)
{
// NOLINT due to pybind11 convention.
// Check IsInitialized in Python
// Check IsInitialized in Python
std
::
string
retv
;
std
::
string
retv
;
PADDLE_ENFORCE
(
self
.
Proto
()
->
SerializePartialToString
(
&
retv
),
PADDLE_ENFORCE
(
self
.
Proto
()
->
SerializePartialToString
(
&
retv
),
...
@@ -107,24 +111,24 @@ static py::bytes SerializeMessage(T &self) {
...
@@ -107,24 +111,24 @@ static py::bytes SerializeMessage(T &self) {
}
}
// Bind Methods
// Bind Methods
void
BindProgramDesc
(
py
::
module
&
m
)
{
void
BindProgramDesc
(
py
bind11
::
module
*
m
)
{
py
::
class_
<
ProgramDesc
>
(
m
,
"ProgramDesc"
,
""
)
py
bind11
::
class_
<
pd
::
ProgramDesc
>
(
*
m
,
"ProgramDesc"
,
""
)
.
def
(
py
::
init
<>
())
.
def
(
py
bind11
::
init
<>
())
.
def
(
"__init__"
,
.
def
(
"__init__"
,
[](
ProgramDesc
&
self
,
const
ProgramDesc
&
other
)
{
[](
pd
::
ProgramDesc
&
self
,
const
pd
::
ProgramDesc
&
other
)
{
new
(
&
self
)
ProgramDesc
(
other
);
new
(
&
self
)
pd
::
ProgramDesc
(
other
);
})
})
.
def
(
"__init__"
,
.
def
(
"__init__"
,
[](
ProgramDesc
&
self
,
const
py
::
bytes
&
binary_str
)
{
[](
pd
::
ProgramDesc
&
self
,
const
pybind11
::
bytes
&
binary_str
)
{
std
::
string
str
(
binary_str
);
std
::
string
str
(
binary_str
);
new
(
&
self
)
ProgramDesc
(
str
);
new
(
&
self
)
pd
::
ProgramDesc
(
str
);
})
})
.
def
(
"append_block"
,
&
ProgramDesc
::
AppendBlock
,
.
def
(
"append_block"
,
&
pd
::
ProgramDesc
::
AppendBlock
,
py
::
return_value_policy
::
reference
)
py
bind11
::
return_value_policy
::
reference
)
.
def
(
"append_backward"
,
.
def
(
"append_backward"
,
[](
ProgramDesc
&
program_desc
,
const
VarDesc
&
target
,
[](
pd
::
ProgramDesc
&
program_desc
,
const
pd
::
VarDesc
&
target
,
const
std
::
unordered_set
<
std
::
string
>
&
no_grad_vars
)
{
const
std
::
unordered_set
<
std
::
string
>
&
no_grad_vars
)
{
ParamGradInfoMap
param_grad_map
=
pd
::
ParamGradInfoMap
param_grad_map
=
AppendBackward
(
program_desc
,
target
,
no_grad_vars
);
AppendBackward
(
program_desc
,
target
,
no_grad_vars
);
std
::
unordered_map
<
std
::
unordered_map
<
std
::
string
,
std
::
tuple
<
std
::
string
/* grad_var_name */
,
std
::
string
,
std
::
tuple
<
std
::
string
/* grad_var_name */
,
...
@@ -138,172 +142,184 @@ void BindProgramDesc(py::module &m) {
...
@@ -138,172 +142,184 @@ void BindProgramDesc(py::module &m) {
}
}
return
retv
;
return
retv
;
})
})
.
def
(
"block"
,
&
ProgramDesc
::
MutableBlock
,
.
def
(
"block"
,
&
pd
::
ProgramDesc
::
MutableBlock
,
py
::
return_value_policy
::
reference
)
py
bind11
::
return_value_policy
::
reference
)
.
def
(
"num_blocks"
,
&
ProgramDesc
::
Size
)
.
def
(
"num_blocks"
,
&
pd
::
ProgramDesc
::
Size
)
.
def
(
"serialize_to_string"
,
SerializeMessage
<
ProgramDesc
>
)
.
def
(
"serialize_to_string"
,
SerializeMessage
<
pd
::
ProgramDesc
>
)
.
def
(
"parse_from_string"
,
.
def
(
"parse_from_string"
,
[](
ProgramDesc
&
program_desc
,
const
std
::
string
&
data
)
{
[](
pd
::
ProgramDesc
&
program_desc
,
const
std
::
string
&
data
)
{
proto
::
ProgramDesc
*
desc
=
program_desc
.
Proto
();
p
d
::
p
roto
::
ProgramDesc
*
desc
=
program_desc
.
Proto
();
PADDLE_ENFORCE
(
desc
->
ParseFromString
(
data
),
PADDLE_ENFORCE
(
desc
->
ParseFromString
(
data
),
"Fail to parse ProgramDesc from string. This could "
"Fail to parse ProgramDesc from string. This could "
"be a bug of Paddle."
);
"be a bug of Paddle."
);
});
});
}
}
void
BindBlockDesc
(
py
::
module
&
m
)
{
void
BindBlockDesc
(
py
bind11
::
module
*
m
)
{
py
::
class_
<
BlockDesc
>
(
m
,
"BlockDesc"
,
""
)
py
bind11
::
class_
<
pd
::
BlockDesc
>
(
*
m
,
"BlockDesc"
,
""
)
.
def_property_readonly
(
"id"
,
&
BlockDesc
::
ID
)
.
def_property_readonly
(
"id"
,
&
pd
::
BlockDesc
::
ID
)
.
def_property_readonly
(
"parent"
,
&
BlockDesc
::
Parent
)
.
def_property_readonly
(
"parent"
,
&
pd
::
BlockDesc
::
Parent
)
.
def
(
"get_forward_block_idx"
,
&
BlockDesc
::
ForwardBlockID
)
.
def
(
"get_forward_block_idx"
,
&
pd
::
BlockDesc
::
ForwardBlockID
)
.
def
(
"set_forward_block_idx"
,
&
BlockDesc
::
SetForwardBlockID
)
.
def
(
"set_forward_block_idx"
,
&
pd
::
BlockDesc
::
SetForwardBlockID
)
.
def
(
"append_op"
,
&
BlockDesc
::
AppendOp
,
.
def
(
"append_op"
,
&
pd
::
BlockDesc
::
AppendOp
,
py
::
return_value_policy
::
reference
)
py
bind11
::
return_value_policy
::
reference
)
.
def
(
"prepend_op"
,
&
BlockDesc
::
PrependOp
,
.
def
(
"prepend_op"
,
&
pd
::
BlockDesc
::
PrependOp
,
py
::
return_value_policy
::
reference
)
py
bind11
::
return_value_policy
::
reference
)
.
def
(
"insert_op"
,
&
BlockDesc
::
InsertOp
,
.
def
(
"insert_op"
,
&
pd
::
BlockDesc
::
InsertOp
,
py
::
return_value_policy
::
reference
)
py
bind11
::
return_value_policy
::
reference
)
.
def
(
"remove_op"
,
&
BlockDesc
::
RemoveOp
)
.
def
(
"remove_op"
,
&
pd
::
BlockDesc
::
RemoveOp
)
.
def
(
"var"
,
.
def
(
"var"
,
[](
BlockDesc
&
self
,
py
::
bytes
byte_name
)
{
[](
pd
::
BlockDesc
&
self
,
pybind11
::
bytes
byte_name
)
{
std
::
string
name
=
byte_name
;
std
::
string
name
=
byte_name
;
return
self
.
Var
(
name
);
return
self
.
Var
(
name
);
},
},
py
::
return_value_policy
::
reference
)
py
bind11
::
return_value_policy
::
reference
)
.
def
(
"has_var"
,
.
def
(
"has_var"
,
[](
BlockDesc
&
self
,
py
::
bytes
byte_name
)
{
[](
pd
::
BlockDesc
&
self
,
pybind11
::
bytes
byte_name
)
{
std
::
string
name
=
byte_name
;
std
::
string
name
=
byte_name
;
return
self
.
HasVar
(
name
);
return
self
.
HasVar
(
name
);
},
},
py
::
return_value_policy
::
reference
)
py
bind11
::
return_value_policy
::
reference
)
.
def
(
"rename_var"
,
.
def
(
"rename_var"
,
[](
BlockDesc
&
self
,
const
py
::
bytes
&
byte_name
,
[](
pd
::
BlockDesc
&
self
,
const
pybind11
::
bytes
&
byte_name
,
const
py
::
bytes
&
byte_name_new
)
{
const
py
bind11
::
bytes
&
byte_name_new
)
{
std
::
string
name
=
byte_name
;
std
::
string
name
=
byte_name
;
std
::
string
new_name
=
byte_name_new
;
std
::
string
new_name
=
byte_name_new
;
self
.
RenameVar
(
name
,
new_name
);
self
.
RenameVar
(
name
,
new_name
);
})
})
.
def
(
"has_var_recursive"
,
.
def
(
"has_var_recursive"
,
[](
BlockDesc
&
self
,
py
::
bytes
byte_name
)
{
[](
pd
::
BlockDesc
&
self
,
pybind11
::
bytes
byte_name
)
{
std
::
string
name
=
byte_name
;
std
::
string
name
=
byte_name
;
return
self
.
HasVarRecursive
(
name
);
return
self
.
HasVarRecursive
(
name
);
})
})
.
def
(
"find_var"
,
.
def
(
"find_var"
,
[](
BlockDesc
&
self
,
py
::
bytes
byte_name
)
{
[](
pd
::
BlockDesc
&
self
,
pybind11
::
bytes
byte_name
)
{
std
::
string
name
=
byte_name
;
std
::
string
name
=
byte_name
;
return
self
.
FindVar
(
name
);
return
self
.
FindVar
(
name
);
},
},
py
::
return_value_policy
::
reference
)
py
bind11
::
return_value_policy
::
reference
)
.
def
(
"find_var_recursive"
,
.
def
(
"find_var_recursive"
,
[](
BlockDesc
&
self
,
py
::
bytes
byte_name
)
{
[](
pd
::
BlockDesc
&
self
,
pybind11
::
bytes
byte_name
)
{
std
::
string
name
=
byte_name
;
std
::
string
name
=
byte_name
;
return
self
.
FindVarRecursive
(
name
);
return
self
.
FindVarRecursive
(
name
);
},
},
py
::
return_value_policy
::
reference
)
pybind11
::
return_value_policy
::
reference
)
.
def
(
"all_vars"
,
&
BlockDesc
::
AllVars
,
py
::
return_value_policy
::
reference
)
.
def
(
"remove_var"
,
.
def
(
"op_size"
,
&
BlockDesc
::
OpSize
)
[](
pd
::
BlockDesc
&
self
,
pybind11
::
bytes
byte_name
)
{
.
def
(
"op"
,
&
BlockDesc
::
Op
,
py
::
return_value_policy
::
reference
)
std
::
string
name
=
byte_name
;
.
def
(
"serialize_to_string"
,
SerializeMessage
<
BlockDesc
>
);
return
self
.
RemoveVar
(
name
);
},
pybind11
::
return_value_policy
::
reference
)
.
def
(
"all_vars"
,
&
pd
::
BlockDesc
::
AllVars
,
pybind11
::
return_value_policy
::
reference
)
.
def
(
"op_size"
,
&
pd
::
BlockDesc
::
OpSize
)
.
def
(
"op"
,
&
pd
::
BlockDesc
::
Op
,
pybind11
::
return_value_policy
::
reference
)
.
def
(
"serialize_to_string"
,
SerializeMessage
<
pd
::
BlockDesc
>
);
}
}
void
BindVarDsec
(
py
::
module
&
m
)
{
void
BindVarDsec
(
py
bind11
::
module
*
m
)
{
py
::
class_
<
VarDesc
>
var_desc
(
m
,
"VarDesc"
,
""
);
py
bind11
::
class_
<
pd
::
VarDesc
>
var_desc
(
*
m
,
"VarDesc"
,
""
);
var_desc
var_desc
.
def
(
"name"
,
.
def
(
"name"
,
[](
VarDesc
&
self
)
{
[](
pd
::
VarDesc
&
self
)
{
py
::
bytes
name
=
self
.
Name
();
py
bind11
::
bytes
name
=
self
.
Name
();
return
name
;
return
name
;
},
},
py
::
return_value_policy
::
reference
)
pybind11
::
return_value_policy
::
reference
)
.
def
(
"set_name"
,
&
VarDesc
::
SetName
)
.
def
(
"set_name"
,
&
pd
::
VarDesc
::
SetName
)
.
def
(
"set_shape"
,
&
VarDesc
::
SetShape
)
.
def
(
"set_shape"
,
&
pd
::
VarDesc
::
SetShape
)
.
def
(
"set_shapes"
,
&
VarDesc
::
SetShapes
)
.
def
(
"set_shapes"
,
&
pd
::
VarDesc
::
SetShapes
)
.
def
(
"set_dtype"
,
&
VarDesc
::
SetDataType
)
.
def
(
"set_dtype"
,
&
pd
::
VarDesc
::
SetDataType
)
.
def
(
"set_dtypes"
,
&
VarDesc
::
SetDataTypes
)
.
def
(
"set_dtypes"
,
&
pd
::
VarDesc
::
SetDataTypes
)
.
def
(
"set_capacity"
,
&
VarDesc
::
SetCapacity
)
.
def
(
"set_capacity"
,
&
pd
::
VarDesc
::
SetCapacity
)
.
def
(
"shape"
,
&
VarDesc
::
GetShape
,
py
::
return_value_policy
::
reference
)
.
def
(
"shape"
,
&
pd
::
VarDesc
::
GetShape
,
.
def
(
"shapes"
,
&
VarDesc
::
GetShapes
,
py
::
return_value_policy
::
reference
)
pybind11
::
return_value_policy
::
reference
)
.
def
(
"dtype"
,
&
VarDesc
::
GetDataType
,
py
::
return_value_policy
::
reference
)
.
def
(
"shapes"
,
&
pd
::
VarDesc
::
GetShapes
,
.
def
(
"dtypes"
,
&
VarDesc
::
GetDataTypes
,
py
::
return_value_policy
::
reference
)
pybind11
::
return_value_policy
::
reference
)
.
def
(
"lod_level"
,
&
VarDesc
::
GetLoDLevel
)
.
def
(
"dtype"
,
&
pd
::
VarDesc
::
GetDataType
,
.
def
(
"lod_levels"
,
&
VarDesc
::
GetLoDLevels
,
pybind11
::
return_value_policy
::
reference
)
py
::
return_value_policy
::
reference
)
.
def
(
"dtypes"
,
&
pd
::
VarDesc
::
GetDataTypes
,
.
def
(
"set_lod_level"
,
&
VarDesc
::
SetLoDLevel
)
pybind11
::
return_value_policy
::
reference
)
.
def
(
"set_lod_levels"
,
&
VarDesc
::
SetLoDLevels
)
.
def
(
"lod_level"
,
&
pd
::
VarDesc
::
GetLoDLevel
)
.
def
(
"type"
,
&
VarDesc
::
GetType
)
.
def
(
"lod_levels"
,
&
pd
::
VarDesc
::
GetLoDLevels
,
.
def
(
"set_type"
,
&
VarDesc
::
SetType
)
pybind11
::
return_value_policy
::
reference
)
.
def
(
"serialize_to_string"
,
SerializeMessage
<
VarDesc
>
)
.
def
(
"set_lod_level"
,
&
pd
::
VarDesc
::
SetLoDLevel
)
.
def
(
"persistable"
,
&
VarDesc
::
Persistable
)
.
def
(
"set_lod_levels"
,
&
pd
::
VarDesc
::
SetLoDLevels
)
.
def
(
"set_persistable"
,
&
VarDesc
::
SetPersistable
);
.
def
(
"type"
,
&
pd
::
VarDesc
::
GetType
)
.
def
(
"set_type"
,
&
pd
::
VarDesc
::
SetType
)
.
def
(
"serialize_to_string"
,
SerializeMessage
<
pd
::
VarDesc
>
)
.
def
(
"persistable"
,
&
pd
::
VarDesc
::
Persistable
)
.
def
(
"set_persistable"
,
&
pd
::
VarDesc
::
SetPersistable
);
py
::
enum_
<
proto
::
VarType
::
Type
>
(
var_desc
,
"VarType"
,
""
)
py
bind11
::
enum_
<
pd
::
proto
::
VarType
::
Type
>
(
var_desc
,
"VarType"
,
""
)
.
value
(
"BOOL"
,
proto
::
VarType
::
BOOL
)
.
value
(
"BOOL"
,
p
d
::
p
roto
::
VarType
::
BOOL
)
.
value
(
"INT16"
,
proto
::
VarType
::
INT16
)
.
value
(
"INT16"
,
p
d
::
p
roto
::
VarType
::
INT16
)
.
value
(
"INT32"
,
proto
::
VarType
::
INT32
)
.
value
(
"INT32"
,
p
d
::
p
roto
::
VarType
::
INT32
)
.
value
(
"INT64"
,
proto
::
VarType
::
INT64
)
.
value
(
"INT64"
,
p
d
::
p
roto
::
VarType
::
INT64
)
.
value
(
"FP16"
,
proto
::
VarType
::
FP16
)
.
value
(
"FP16"
,
p
d
::
p
roto
::
VarType
::
FP16
)
.
value
(
"FP32"
,
proto
::
VarType
::
FP32
)
.
value
(
"FP32"
,
p
d
::
p
roto
::
VarType
::
FP32
)
.
value
(
"FP64"
,
proto
::
VarType
::
FP64
)
.
value
(
"FP64"
,
p
d
::
p
roto
::
VarType
::
FP64
)
.
value
(
"LOD_TENSOR"
,
proto
::
VarType
::
LOD_TENSOR
)
.
value
(
"LOD_TENSOR"
,
p
d
::
p
roto
::
VarType
::
LOD_TENSOR
)
.
value
(
"SELECTED_ROWS"
,
proto
::
VarType
::
SELECTED_ROWS
)
.
value
(
"SELECTED_ROWS"
,
p
d
::
p
roto
::
VarType
::
SELECTED_ROWS
)
.
value
(
"FEED_MINIBATCH"
,
proto
::
VarType
::
FEED_MINIBATCH
)
.
value
(
"FEED_MINIBATCH"
,
p
d
::
p
roto
::
VarType
::
FEED_MINIBATCH
)
.
value
(
"FETCH_LIST"
,
proto
::
VarType
::
FETCH_LIST
)
.
value
(
"FETCH_LIST"
,
p
d
::
p
roto
::
VarType
::
FETCH_LIST
)
.
value
(
"STEP_SCOPES"
,
proto
::
VarType
::
STEP_SCOPES
)
.
value
(
"STEP_SCOPES"
,
p
d
::
p
roto
::
VarType
::
STEP_SCOPES
)
.
value
(
"LOD_RANK_TABLE"
,
proto
::
VarType
::
LOD_RANK_TABLE
)
.
value
(
"LOD_RANK_TABLE"
,
p
d
::
p
roto
::
VarType
::
LOD_RANK_TABLE
)
.
value
(
"LOD_TENSOR_ARRAY"
,
proto
::
VarType
::
LOD_TENSOR_ARRAY
)
.
value
(
"LOD_TENSOR_ARRAY"
,
p
d
::
p
roto
::
VarType
::
LOD_TENSOR_ARRAY
)
.
value
(
"CHANNEL"
,
proto
::
VarType
::
CHANNEL
)
.
value
(
"CHANNEL"
,
p
d
::
p
roto
::
VarType
::
CHANNEL
)
.
value
(
"PLACE_LIST"
,
proto
::
VarType
::
PLACE_LIST
)
.
value
(
"PLACE_LIST"
,
p
d
::
p
roto
::
VarType
::
PLACE_LIST
)
.
value
(
"READER"
,
proto
::
VarType
::
READER
)
.
value
(
"READER"
,
p
d
::
p
roto
::
VarType
::
READER
)
.
value
(
"RAW"
,
proto
::
VarType
::
RAW
);
.
value
(
"RAW"
,
p
d
::
p
roto
::
VarType
::
RAW
);
}
}
void
BindOpDesc
(
py
::
module
&
m
)
{
void
BindOpDesc
(
py
bind11
::
module
*
m
)
{
py
::
enum_
<
proto
::
AttrType
>
(
m
,
"AttrType"
,
""
)
py
bind11
::
enum_
<
pd
::
proto
::
AttrType
>
(
*
m
,
"AttrType"
,
""
)
.
value
(
"INT"
,
proto
::
AttrType
::
INT
)
.
value
(
"INT"
,
p
d
::
p
roto
::
AttrType
::
INT
)
.
value
(
"INTS"
,
proto
::
AttrType
::
INTS
)
.
value
(
"INTS"
,
p
d
::
p
roto
::
AttrType
::
INTS
)
.
value
(
"FLOAT"
,
proto
::
AttrType
::
FLOAT
)
.
value
(
"FLOAT"
,
p
d
::
p
roto
::
AttrType
::
FLOAT
)
.
value
(
"FLOATS"
,
proto
::
AttrType
::
FLOATS
)
.
value
(
"FLOATS"
,
p
d
::
p
roto
::
AttrType
::
FLOATS
)
.
value
(
"STRING"
,
proto
::
AttrType
::
STRING
)
.
value
(
"STRING"
,
p
d
::
p
roto
::
AttrType
::
STRING
)
.
value
(
"STRINGS"
,
proto
::
AttrType
::
STRINGS
)
.
value
(
"STRINGS"
,
p
d
::
p
roto
::
AttrType
::
STRINGS
)
.
value
(
"BOOL"
,
proto
::
AttrType
::
BOOLEAN
)
.
value
(
"BOOL"
,
p
d
::
p
roto
::
AttrType
::
BOOLEAN
)
.
value
(
"BOOLS"
,
proto
::
AttrType
::
BOOLEANS
)
.
value
(
"BOOLS"
,
p
d
::
p
roto
::
AttrType
::
BOOLEANS
)
.
value
(
"BLOCK"
,
proto
::
AttrType
::
BLOCK
);
.
value
(
"BLOCK"
,
p
d
::
p
roto
::
AttrType
::
BLOCK
);
py
::
class_
<
OpDesc
>
op_desc
(
m
,
"OpDesc"
,
""
);
py
bind11
::
class_
<
pd
::
OpDesc
>
op_desc
(
*
m
,
"OpDesc"
,
""
);
op_desc
op_desc
.
def
(
"__init__"
,
[](
OpDesc
&
self
)
{
new
(
&
self
)
OpDesc
();
},
.
def
(
"__init__"
,
[](
pd
::
OpDesc
&
self
)
{
new
(
&
self
)
pd
::
OpDesc
();
},
py
::
return_value_policy
::
reference
)
py
bind11
::
return_value_policy
::
reference
)
.
def
(
"copy_from"
,
&
OpDesc
::
CopyFrom
)
.
def
(
"copy_from"
,
&
pd
::
OpDesc
::
CopyFrom
)
.
def
(
"type"
,
&
OpDesc
::
Type
)
.
def
(
"type"
,
&
pd
::
OpDesc
::
Type
)
.
def
(
"set_type"
,
&
OpDesc
::
SetType
)
.
def
(
"set_type"
,
&
pd
::
OpDesc
::
SetType
)
.
def
(
"input"
,
&
OpDesc
::
Input
)
.
def
(
"input"
,
&
pd
::
OpDesc
::
Input
)
.
def
(
"input_names"
,
&
OpDesc
::
InputNames
)
.
def
(
"input_names"
,
&
pd
::
OpDesc
::
InputNames
)
.
def
(
"output"
,
&
OpDesc
::
Output
)
.
def
(
"output"
,
&
pd
::
OpDesc
::
Output
)
.
def
(
"output_names"
,
&
OpDesc
::
OutputNames
)
.
def
(
"output_names"
,
&
pd
::
OpDesc
::
OutputNames
)
.
def
(
"set_input"
,
&
OpDesc
::
SetInput
)
.
def
(
"set_input"
,
&
pd
::
OpDesc
::
SetInput
)
.
def
(
"set_output"
,
&
OpDesc
::
SetOutput
)
.
def
(
"set_output"
,
&
pd
::
OpDesc
::
SetOutput
)
.
def
(
"input_arg_names"
,
&
OpDesc
::
InputArgumentNames
)
.
def
(
"input_arg_names"
,
&
pd
::
OpDesc
::
InputArgumentNames
)
.
def
(
"output_arg_names"
,
&
OpDesc
::
OutputArgumentNames
)
.
def
(
"output_arg_names"
,
&
pd
::
OpDesc
::
OutputArgumentNames
)
.
def
(
"rename_input"
,
&
OpDesc
::
RenameInput
)
.
def
(
"rename_input"
,
&
pd
::
OpDesc
::
RenameInput
)
.
def
(
"rename_output"
,
&
OpDesc
::
RenameOutput
)
.
def
(
"rename_output"
,
&
pd
::
OpDesc
::
RenameOutput
)
.
def
(
"has_attr"
,
&
OpDesc
::
HasAttr
)
.
def
(
"has_attr"
,
&
pd
::
OpDesc
::
HasAttr
)
.
def
(
"attr_type"
,
&
OpDesc
::
GetAttrType
)
.
def
(
"attr_type"
,
&
pd
::
OpDesc
::
GetAttrType
)
.
def
(
"attr_names"
,
&
OpDesc
::
AttrNames
)
.
def
(
"attr_names"
,
&
pd
::
OpDesc
::
AttrNames
)
.
def
(
"set_attr"
,
&
OpDesc
::
SetAttr
)
.
def
(
"set_attr"
,
&
pd
::
OpDesc
::
SetAttr
)
.
def
(
"attr"
,
&
OpDesc
::
GetAttr
)
.
def
(
"attr"
,
&
pd
::
OpDesc
::
GetAttr
)
.
def
(
"set_block_attr"
,
&
OpDesc
::
SetBlockAttr
)
.
def
(
"set_block_attr"
,
&
pd
::
OpDesc
::
SetBlockAttr
)
.
def
(
"set_serialized_attr"
,
.
def
(
"set_serialized_attr"
,
[](
OpDesc
&
self
,
const
std
::
string
&
name
,
[](
pd
::
OpDesc
&
self
,
const
std
::
string
&
name
,
const
py
::
bytes
&
seriralized
)
{
const
py
bind11
::
bytes
&
seriralized
)
{
std
::
string
ser
(
seriralized
);
std
::
string
ser
(
seriralized
);
self
.
SetAttr
(
name
,
ser
);
self
.
SetAttr
(
name
,
ser
);
})
})
.
def
(
"block_attr"
,
&
OpDesc
::
GetBlockAttr
)
.
def
(
"block_attr"
,
&
pd
::
OpDesc
::
GetBlockAttr
)
.
def
(
"check_attrs"
,
&
OpDesc
::
CheckAttrs
)
.
def
(
"check_attrs"
,
&
pd
::
OpDesc
::
CheckAttrs
)
.
def
(
"infer_shape"
,
&
OpDesc
::
InferShape
)
.
def
(
"infer_shape"
,
&
pd
::
OpDesc
::
InferShape
)
.
def
(
"infer_var_type"
,
&
OpDesc
::
InferVarType
)
.
def
(
"infer_var_type"
,
&
pd
::
OpDesc
::
InferVarType
)
.
def
(
"serialize_to_string"
,
SerializeMessage
<
OpDesc
>
)
.
def
(
"serialize_to_string"
,
SerializeMessage
<
pd
::
OpDesc
>
)
.
def
(
"block"
,
&
OpDesc
::
Block
,
py
::
return_value_policy
::
reference
);
.
def
(
"block"
,
&
pd
::
OpDesc
::
Block
,
pybind11
::
return_value_policy
::
reference
);
}
}
}
// namespace pybind
}
// namespace pybind
...
...
paddle/fluid/pybind/protobuf.h
浏览文件 @
7b40f7ce
...
@@ -11,25 +11,25 @@ distributed under the License is distributed on an "AS IS" BASIS,
...
@@ -11,25 +11,25 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#pragma once
#include <Python.h>
#include <Python.h>
#include <fstream>
#include <fstream>
#include <vector>
#include <vector>
#include "paddle/fluid/platform/variant.h"
#include "paddle/fluid/platform/variant.h"
#include "pybind11/numpy.h"
#include "pybind11/numpy.h"
#include "pybind11/pybind11.h"
#include "pybind11/pybind11.h"
#include "pybind11/stl.h"
#include "pybind11/stl.h"
namespace
py
=
pybind11
;
namespace
paddle
{
namespace
paddle
{
namespace
pybind
{
namespace
pybind
{
void
BindProgramDesc
(
py
::
module
&
m
);
void
BindProgramDesc
(
pybind11
::
module
*
m
);
void
BindBlockDesc
(
py
::
module
&
m
);
void
BindBlockDesc
(
pybind11
::
module
*
m
);
void
BindVarDsec
(
py
::
module
&
m
);
void
BindVarDsec
(
pybind11
::
module
*
m
);
void
BindOpDesc
(
py
::
module
&
m
);
void
BindOpDesc
(
pybind11
::
module
*
m
);
}
// namespace pybind
}
// namespace pybind
}
// namespace paddle
}
// namespace paddle
paddle/fluid/pybind/pybind.cc
浏览文件 @
7b40f7ce
...
@@ -11,11 +11,17 @@ distributed under the License is distributed on an "AS IS" BASIS,
...
@@ -11,11 +11,17 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include <Python.h>
#include <algorithm>
#include <map>
#include <mutex> // NOLINT // for call_once
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "paddle/fluid/pybind/protobuf.h"
#include "paddle/fluid/pybind/protobuf.h"
#include <mutex> // for call_once
#include <unordered_map>
#include "paddle/fluid/framework/backward.h"
#include "paddle/fluid/framework/backward.h"
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/executor.h"
...
@@ -32,7 +38,6 @@ limitations under the License. */
...
@@ -32,7 +38,6 @@ limitations under the License. */
#include "paddle/fluid/operators/cond_op.h"
#include "paddle/fluid/operators/cond_op.h"
#include "paddle/fluid/operators/net_op.h"
#include "paddle/fluid/operators/net_op.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/pybind/const_value.h"
#include "paddle/fluid/pybind/const_value.h"
...
@@ -69,7 +74,7 @@ PYBIND11_PLUGIN(core) {
...
@@ -69,7 +74,7 @@ PYBIND11_PLUGIN(core) {
// not cause namespace pollution.
// not cause namespace pollution.
using
namespace
paddle
::
framework
;
// NOLINT
using
namespace
paddle
::
framework
;
// NOLINT
BindException
(
m
);
BindException
(
&
m
);
py
::
class_
<
Tensor
>
(
m
,
"Tensor"
,
py
::
buffer_protocol
())
py
::
class_
<
Tensor
>
(
m
,
"Tensor"
,
py
::
buffer_protocol
())
.
def_buffer
(
.
def_buffer
(
...
@@ -100,6 +105,14 @@ PYBIND11_PLUGIN(core) {
...
@@ -100,6 +105,14 @@ PYBIND11_PLUGIN(core) {
[](
Tensor
&
self
,
paddle
::
platform
::
CUDAPlace
&
place
)
{
[](
Tensor
&
self
,
paddle
::
platform
::
CUDAPlace
&
place
)
{
self
.
mutable_data
<
int
>
(
place
);
self
.
mutable_data
<
int
>
(
place
);
})
})
.
def
(
"alloc_int"
,
[](
Tensor
&
self
,
paddle
::
platform
::
CUDAPinnedPlace
&
place
)
{
self
.
mutable_data
<
int
>
(
place
);
})
.
def
(
"alloc_float"
,
[](
Tensor
&
self
,
paddle
::
platform
::
CUDAPinnedPlace
&
place
)
{
self
.
mutable_data
<
float
>
(
place
);
})
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
float
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
float
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
int
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
int
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
double
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
double
>
)
...
@@ -113,6 +126,12 @@ PYBIND11_PLUGIN(core) {
...
@@ -113,6 +126,12 @@ PYBIND11_PLUGIN(core) {
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
int64_t
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
int64_t
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
bool
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
bool
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
uint16_t
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
uint16_t
>
)
.
def
(
"set"
,
PyCUDAPinnedTensorSetFromArray
<
float
>
)
.
def
(
"set"
,
PyCUDAPinnedTensorSetFromArray
<
int
>
)
.
def
(
"set"
,
PyCUDAPinnedTensorSetFromArray
<
double
>
)
.
def
(
"set"
,
PyCUDAPinnedTensorSetFromArray
<
int64_t
>
)
.
def
(
"set"
,
PyCUDAPinnedTensorSetFromArray
<
bool
>
)
.
def
(
"set"
,
PyCUDAPinnedTensorSetFromArray
<
uint16_t
>
)
#endif
#endif
.
def
(
"shape"
,
[](
Tensor
&
self
)
{
return
vectorize
(
self
.
dims
());
})
.
def
(
"shape"
,
[](
Tensor
&
self
)
{
return
vectorize
(
self
.
dims
());
})
.
def
(
"set_float_element"
,
TensorSetElement
<
float
>
)
.
def
(
"set_float_element"
,
TensorSetElement
<
float
>
)
...
@@ -317,7 +336,17 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -317,7 +336,17 @@ All parameter, weight, gradient are variables in Paddle.
#else
#else
return
new
paddle
::
platform
::
CUDADeviceContext
(
place
);
return
new
paddle
::
platform
::
CUDADeviceContext
(
place
);
#endif
#endif
});
})
.
def_static
(
"create"
,
[](
paddle
::
platform
::
CUDAPinnedPlace
&
place
)
->
paddle
::
platform
::
DeviceContext
*
{
#ifndef PADDLE_WITH_CUDA
PADDLE_THROW
(
"CUDAPinnedPlace is not supported in CPU device."
);
#else
return
new
paddle
::
platform
::
CUDAPinnedDeviceContext
(
place
);
#endif
});;
// clang-format on
// clang-format on
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
py
::
class_
<
platform
::
Communicator
>
(
m
,
"Communicator"
).
def
(
py
::
init
<>
());
py
::
class_
<
platform
::
Communicator
>
(
m
,
"Communicator"
).
def
(
py
::
init
<>
());
...
@@ -330,6 +359,10 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -330,6 +359,10 @@ All parameter, weight, gradient are variables in Paddle.
.
def
(
py
::
init
<>
())
.
def
(
py
::
init
<>
())
.
def
(
"__str__"
,
string
::
to_string
<
const
platform
::
CPUPlace
&>
);
.
def
(
"__str__"
,
string
::
to_string
<
const
platform
::
CPUPlace
&>
);
py
::
class_
<
paddle
::
platform
::
CUDAPinnedPlace
>
(
m
,
"CUDAPinnedPlace"
)
.
def
(
py
::
init
<>
())
.
def
(
"__str__"
,
string
::
to_string
<
const
platform
::
CUDAPinnedPlace
&>
);
py
::
class_
<
platform
::
Place
>
(
m
,
"Place"
)
py
::
class_
<
platform
::
Place
>
(
m
,
"Place"
)
.
def
(
py
::
init
<>
())
.
def
(
py
::
init
<>
())
.
def
(
"set_place"
,
.
def
(
"set_place"
,
...
@@ -339,7 +372,11 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -339,7 +372,11 @@ All parameter, weight, gradient are variables in Paddle.
.
def
(
"set_place"
,
.
def
(
"set_place"
,
[](
platform
::
Place
&
self
,
const
platform
::
CUDAPlace
&
gpu_place
)
{
[](
platform
::
Place
&
self
,
const
platform
::
CUDAPlace
&
gpu_place
)
{
self
=
gpu_place
;
self
=
gpu_place
;
});
})
.
def
(
"set_place"
,
[](
platform
::
Place
&
self
,
const
platform
::
CUDAPinnedPlace
&
cuda_pinned_place
)
{
self
=
cuda_pinned_place
;
});
py
::
class_
<
OperatorBase
>
(
m
,
"Operator"
)
py
::
class_
<
OperatorBase
>
(
m
,
"Operator"
)
.
def_static
(
"create"
,
.
def_static
(
"create"
,
...
@@ -363,6 +400,11 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -363,6 +400,11 @@ All parameter, weight, gradient are variables in Paddle.
.
def
(
"run"
,
.
def
(
"run"
,
[](
OperatorBase
&
self
,
const
Scope
&
scope
,
[](
OperatorBase
&
self
,
const
Scope
&
scope
,
const
platform
::
CUDAPlace
&
place
)
{
self
.
Run
(
scope
,
place
);
})
const
platform
::
CUDAPlace
&
place
)
{
self
.
Run
(
scope
,
place
);
})
.
def
(
"run"
,
[](
OperatorBase
&
self
,
const
Scope
&
scope
,
const
platform
::
CUDAPinnedPlace
&
place
)
{
self
.
Run
(
scope
,
place
);
})
.
def
(
"type"
,
.
def
(
"type"
,
[](
const
OperatorBase
&
op
)
->
std
::
string
{
return
op
.
Type
();
})
[](
const
OperatorBase
&
op
)
->
std
::
string
{
return
op
.
Type
();
})
.
def
(
"outputs"
,
.
def
(
"outputs"
,
...
@@ -436,11 +478,11 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -436,11 +478,11 @@ All parameter, weight, gradient are variables in Paddle.
m
.
def
(
"set_feed_variable"
,
framework
::
SetFeedVariable
);
m
.
def
(
"set_feed_variable"
,
framework
::
SetFeedVariable
);
m
.
def
(
"get_fetch_variable"
,
framework
::
GetFetchVariable
);
m
.
def
(
"get_fetch_variable"
,
framework
::
GetFetchVariable
);
BindProgramDesc
(
m
);
BindProgramDesc
(
&
m
);
BindBlockDesc
(
m
);
BindBlockDesc
(
&
m
);
BindVarDsec
(
m
);
BindVarDsec
(
&
m
);
BindOpDesc
(
m
);
BindOpDesc
(
&
m
);
BindConstValue
(
m
);
BindConstValue
(
&
m
);
py
::
class_
<
framework
::
LoDRankTable
>
(
m
,
"LodRankTable"
)
py
::
class_
<
framework
::
LoDRankTable
>
(
m
,
"LodRankTable"
)
.
def
(
"items"
,
[](
framework
::
LoDRankTable
&
table
)
{
.
def
(
"items"
,
[](
framework
::
LoDRankTable
&
table
)
{
...
@@ -511,7 +553,7 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -511,7 +553,7 @@ All parameter, weight, gradient are variables in Paddle.
})
})
.
def
(
"run"
,
&
ParallelExecutor
::
Run
);
.
def
(
"run"
,
&
ParallelExecutor
::
Run
);
BindRecordIOWriter
(
m
);
BindRecordIOWriter
(
&
m
);
return
m
.
ptr
();
return
m
.
ptr
();
}
}
}
// namespace pybind
}
// namespace pybind
...
...
paddle/fluid/pybind/recordio.cc
浏览文件 @
7b40f7ce
...
@@ -13,13 +13,19 @@
...
@@ -13,13 +13,19 @@
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/pybind/recordio.h"
#include "paddle/fluid/pybind/recordio.h"
#include <fstream>
#include <fstream>
#include <string>
#include <vector>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/recordio/writer.h"
#include "paddle/fluid/recordio/writer.h"
namespace
paddle
{
namespace
paddle
{
namespace
pybind
{
namespace
pybind
{
namespace
{
class
RecordIOWriter
{
class
RecordIOWriter
{
public:
public:
RecordIOWriter
(
const
std
::
string
&
filename
,
recordio
::
Compressor
compressor
,
RecordIOWriter
(
const
std
::
string
&
filename
,
recordio
::
Compressor
compressor
,
...
@@ -49,8 +55,10 @@ class RecordIOWriter {
...
@@ -49,8 +55,10 @@ class RecordIOWriter {
recordio
::
Writer
writer_
;
recordio
::
Writer
writer_
;
};
};
void
BindRecordIOWriter
(
py
::
module
&
m
)
{
}
// namespace
py
::
class_
<
RecordIOWriter
>
writer
(
m
,
"RecordIOWriter"
,
""
);
void
BindRecordIOWriter
(
py
::
module
*
m
)
{
py
::
class_
<
RecordIOWriter
>
writer
(
*
m
,
"RecordIOWriter"
,
""
);
py
::
enum_
<
recordio
::
Compressor
>
(
writer
,
"Compressor"
,
""
)
py
::
enum_
<
recordio
::
Compressor
>
(
writer
,
"Compressor"
,
""
)
.
value
(
"Snappy"
,
recordio
::
Compressor
::
kSnappy
)
.
value
(
"Snappy"
,
recordio
::
Compressor
::
kSnappy
)
.
value
(
"NoCompress"
,
recordio
::
Compressor
::
kNoCompress
);
.
value
(
"NoCompress"
,
recordio
::
Compressor
::
kNoCompress
);
...
...
paddle/fluid/pybind/recordio.h
浏览文件 @
7b40f7ce
...
@@ -21,6 +21,7 @@ namespace py = pybind11;
...
@@ -21,6 +21,7 @@ namespace py = pybind11;
namespace
paddle
{
namespace
paddle
{
namespace
pybind
{
namespace
pybind
{
extern
void
BindRecordIOWriter
(
py
::
module
&
m
);
void
BindRecordIOWriter
(
py
::
module
*
m
);
}
// namespace pybind
}
// namespace pybind
}
// namespace paddle
}
// namespace paddle
paddle/fluid/pybind/tensor_py.h
浏览文件 @
7b40f7ce
...
@@ -13,7 +13,10 @@ See the License for the specific language governing permissions and
...
@@ -13,7 +13,10 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#pragma once
#include <Python.h>
#include <string>
#include <string>
#include <tuple>
#include <vector>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/device_context.h"
...
@@ -21,12 +24,8 @@ limitations under the License. */
...
@@ -21,12 +24,8 @@ limitations under the License. */
#include "pybind11/numpy.h"
#include "pybind11/numpy.h"
#include "pybind11/pybind11.h"
#include "pybind11/pybind11.h"
namespace
py
=
pybind11
;
namespace
paddle
{
namespace
paddle
{
namespace
pybind
{
namespace
pybind
{
namespace
details
{
namespace
details
{
template
<
bool
less
,
size_t
I
,
typename
...
ARGS
>
template
<
bool
less
,
size_t
I
,
typename
...
ARGS
>
...
@@ -34,16 +33,16 @@ struct CastToPyBufferImpl;
...
@@ -34,16 +33,16 @@ struct CastToPyBufferImpl;
template
<
size_t
I
,
typename
...
ARGS
>
template
<
size_t
I
,
typename
...
ARGS
>
struct
CastToPyBufferImpl
<
false
,
I
,
ARGS
...
>
{
struct
CastToPyBufferImpl
<
false
,
I
,
ARGS
...
>
{
py
::
buffer_info
operator
()(
framework
::
Tensor
&
tensor
)
{
py
bind11
::
buffer_info
operator
()(
const
framework
::
Tensor
&
tensor
)
{
PADDLE_THROW
(
"This type of tensor cannot be expose to Python"
);
PADDLE_THROW
(
"This type of tensor cannot be expose to Python"
);
return
py
::
buffer_info
();
return
py
bind11
::
buffer_info
();
}
}
};
};
template
<
size_t
I
,
typename
...
ARGS
>
template
<
size_t
I
,
typename
...
ARGS
>
struct
CastToPyBufferImpl
<
true
,
I
,
ARGS
...
>
{
struct
CastToPyBufferImpl
<
true
,
I
,
ARGS
...
>
{
using
CUR_TYPE
=
typename
std
::
tuple_element
<
I
,
std
::
tuple
<
ARGS
...
>>::
type
;
using
CUR_TYPE
=
typename
std
::
tuple_element
<
I
,
std
::
tuple
<
ARGS
...
>>::
type
;
py
::
buffer_info
operator
()(
framework
::
Tensor
&
tensor
)
{
py
bind11
::
buffer_info
operator
()(
const
framework
::
Tensor
&
tensor
)
{
if
(
std
::
type_index
(
typeid
(
CUR_TYPE
))
==
tensor
.
type
())
{
if
(
std
::
type_index
(
typeid
(
CUR_TYPE
))
==
tensor
.
type
())
{
auto
dim_vec
=
framework
::
vectorize
(
tensor
.
dims
());
auto
dim_vec
=
framework
::
vectorize
(
tensor
.
dims
());
std
::
vector
<
size_t
>
dims_outside
;
std
::
vector
<
size_t
>
dims_outside
;
...
@@ -82,15 +81,15 @@ struct CastToPyBufferImpl<true, I, ARGS...> {
...
@@ -82,15 +81,15 @@ struct CastToPyBufferImpl<true, I, ARGS...> {
if
(
std
::
type_index
(
typeid
(
CUR_TYPE
))
==
if
(
std
::
type_index
(
typeid
(
CUR_TYPE
))
==
std
::
type_index
(
typeid
(
platform
::
float16
)))
{
std
::
type_index
(
typeid
(
platform
::
float16
)))
{
return
py
::
buffer_info
(
dst_tensor
.
data
<
CUR_TYPE
>
(),
sizeof
(
CUR_TYPE
),
return
py
bind11
::
buffer_info
(
"e"
,
/* np.dtype('e') == np.float16 */
dst_tensor
.
data
<
CUR_TYPE
>
(),
sizeof
(
CUR_TYPE
),
(
size_t
)
framework
::
arity
(
dst_tensor
.
dims
()),
"e"
,
/* np.dtype('e') == np.float16 */
dims_outside
,
strides
);
(
size_t
)
framework
::
arity
(
dst_tensor
.
dims
()),
dims_outside
,
strides
);
}
else
{
}
else
{
return
py
::
buffer_info
(
dst_tensor
.
data
<
CUR_TYPE
>
(),
sizeof
(
CUR_TYPE
),
return
py
bind11
::
buffer_info
(
py
::
format_descriptor
<
CUR_TYPE
>::
format
(
),
dst_tensor
.
data
<
CUR_TYPE
>
(),
sizeof
(
CUR_TYPE
),
(
size_t
)
framework
::
arity
(
dst_tensor
.
dims
()
),
pybind11
::
format_descriptor
<
CUR_TYPE
>::
format
(
),
dims_outside
,
strides
);
(
size_t
)
framework
::
arity
(
dst_tensor
.
dims
()),
dims_outside
,
strides
);
}
}
}
else
{
}
else
{
constexpr
bool
less
=
I
+
1
<
std
::
tuple_size
<
std
::
tuple
<
ARGS
...
>>::
value
;
constexpr
bool
less
=
I
+
1
<
std
::
tuple_size
<
std
::
tuple
<
ARGS
...
>>::
value
;
...
@@ -101,7 +100,7 @@ struct CastToPyBufferImpl<true, I, ARGS...> {
...
@@ -101,7 +100,7 @@ struct CastToPyBufferImpl<true, I, ARGS...> {
}
// namespace details
}
// namespace details
inline
py
::
buffer_info
CastToPyBuffer
(
framework
::
Tensor
&
tensor
)
{
inline
py
bind11
::
buffer_info
CastToPyBuffer
(
const
framework
::
Tensor
&
tensor
)
{
auto
buffer_info
=
auto
buffer_info
=
details
::
CastToPyBufferImpl
<
true
,
0
,
float
,
int
,
double
,
int64_t
,
bool
,
details
::
CastToPyBufferImpl
<
true
,
0
,
float
,
int
,
double
,
int64_t
,
bool
,
platform
::
float16
>
()(
tensor
);
platform
::
float16
>
()(
tensor
);
...
@@ -109,7 +108,7 @@ inline py::buffer_info CastToPyBuffer(framework::Tensor &tensor) {
...
@@ -109,7 +108,7 @@ inline py::buffer_info CastToPyBuffer(framework::Tensor &tensor) {
}
}
template
<
typename
T
>
template
<
typename
T
>
T
TensorGetElement
(
framework
::
Tensor
&
self
,
size_t
offset
)
{
T
TensorGetElement
(
const
framework
::
Tensor
&
self
,
size_t
offset
)
{
if
(
platform
::
is_cpu_place
(
self
.
place
()))
{
if
(
platform
::
is_cpu_place
(
self
.
place
()))
{
return
self
.
data
<
T
>
()[
offset
];
return
self
.
data
<
T
>
()[
offset
];
}
else
{
}
else
{
...
@@ -121,64 +120,70 @@ T TensorGetElement(framework::Tensor &self, size_t offset) {
...
@@ -121,64 +120,70 @@ T TensorGetElement(framework::Tensor &self, size_t offset) {
// TODO(dzhwinter) : fix the redundent Tensor allocate and free
// TODO(dzhwinter) : fix the redundent Tensor allocate and free
template
<
typename
T
>
template
<
typename
T
>
void
TensorSetElement
(
framework
::
Tensor
&
self
,
size_t
offset
,
T
elem
)
{
void
TensorSetElement
(
framework
::
Tensor
*
self
,
size_t
offset
,
T
elem
)
{
if
(
platform
::
is_gpu_place
(
self
.
place
()))
{
if
(
platform
::
is_gpu_place
(
self
->
place
()))
{
std
::
shared_ptr
<
framework
::
Tensor
>
dst
(
new
framework
::
Tensor
);
std
::
shared_ptr
<
framework
::
Tensor
>
dst
(
new
framework
::
Tensor
);
framework
::
TensorCopy
(
self
,
platform
::
CPUPlace
(),
dst
.
get
());
framework
::
TensorCopy
(
*
self
,
platform
::
CPUPlace
(),
dst
.
get
());
dst
->
data
<
T
>
()[
offset
]
=
elem
;
dst
->
data
<
T
>
()[
offset
]
=
elem
;
framework
::
TensorCopy
(
*
dst
.
get
(),
self
.
place
(),
&
self
);
framework
::
TensorCopy
(
*
dst
.
get
(),
self
->
place
(),
self
);
}
else
if
(
platform
::
is_cpu_place
(
self
.
place
()))
{
}
else
if
(
platform
::
is_cpu_place
(
self
->
place
()))
{
self
.
data
<
T
>
()[
offset
]
=
elem
;
self
->
data
<
T
>
()[
offset
]
=
elem
;
}
}
}
}
template
<
typename
T
>
template
<
typename
T
>
void
PyCPUTensorSetFromArray
(
void
PyCPUTensorSetFromArray
(
framework
::
Tensor
&
self
,
framework
::
Tensor
*
self
,
py
::
array_t
<
T
,
py
::
array
::
c_style
|
py
::
array
::
forcecast
>
array
,
pybind11
::
array_t
<
T
,
pybind11
::
array
::
c_style
|
pybind11
::
array
::
forcecast
>
paddle
::
platform
::
CPUPlace
&
place
)
{
array
,
paddle
::
platform
::
CPUPlace
place
)
{
std
::
vector
<
int64_t
>
dims
;
std
::
vector
<
int64_t
>
dims
;
dims
.
reserve
(
array
.
ndim
());
dims
.
reserve
(
array
.
ndim
());
for
(
size_t
i
=
0
;
i
<
array
.
ndim
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
array
.
ndim
();
++
i
)
{
dims
.
push_back
(
(
int
)
array
.
shape
()[
i
]
);
dims
.
push_back
(
static_cast
<
int
>
(
array
.
shape
()[
i
])
);
}
}
self
.
Resize
(
framework
::
make_ddim
(
dims
));
self
->
Resize
(
framework
::
make_ddim
(
dims
));
auto
*
dst
=
self
.
mutable_data
<
T
>
(
place
);
auto
*
dst
=
self
->
mutable_data
<
T
>
(
place
);
std
::
memcpy
(
dst
,
array
.
data
(),
sizeof
(
T
)
*
array
.
size
());
std
::
memcpy
(
dst
,
array
.
data
(),
sizeof
(
T
)
*
array
.
size
());
}
}
template
<
>
template
<
>
// This following specialization maps uint16_t in the parameter type to
// platform::float16.
void
PyCPUTensorSetFromArray
(
void
PyCPUTensorSetFromArray
(
framework
::
Tensor
&
self
,
framework
::
Tensor
*
self
,
py
::
array_t
<
uint16_t
,
py
::
array
::
c_style
|
py
::
array
::
forcecast
>
array
,
pybind11
::
array_t
<
uint16_t
,
paddle
::
platform
::
CPUPlace
&
place
)
{
pybind11
::
array
::
c_style
|
pybind11
::
array
::
forcecast
>
array
,
paddle
::
platform
::
CPUPlace
place
)
{
std
::
vector
<
int64_t
>
dims
;
std
::
vector
<
int64_t
>
dims
;
dims
.
reserve
(
array
.
ndim
());
dims
.
reserve
(
array
.
ndim
());
for
(
size_t
i
=
0
;
i
<
array
.
ndim
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
array
.
ndim
();
++
i
)
{
dims
.
push_back
(
(
int
)
array
.
shape
()[
i
]
);
dims
.
push_back
(
static_cast
<
int
>
(
array
.
shape
()[
i
])
);
}
}
self
.
Resize
(
framework
::
make_ddim
(
dims
));
self
->
Resize
(
framework
::
make_ddim
(
dims
));
auto
*
dst
=
self
.
mutable_data
<
platform
::
float16
>
(
place
);
auto
*
dst
=
self
->
mutable_data
<
platform
::
float16
>
(
place
);
std
::
memcpy
(
dst
,
array
.
data
(),
sizeof
(
uint16_t
)
*
array
.
size
());
std
::
memcpy
(
dst
,
array
.
data
(),
sizeof
(
uint16_t
)
*
array
.
size
());
}
}
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
template
<
typename
T
>
template
<
typename
T
>
void
PyCUDATensorSetFromArray
(
void
PyCUDATensorSetFromArray
(
framework
::
Tensor
&
self
,
framework
::
Tensor
*
self
,
py
::
array_t
<
T
,
py
::
array
::
c_style
|
py
::
array
::
forcecast
>
array
,
pybind11
::
array_t
<
T
,
pybind11
::
array
::
c_style
|
pybind11
::
array
::
forcecast
>
paddle
::
platform
::
CUDAPlace
&
place
)
{
array
,
paddle
::
platform
::
CUDAPlace
place
)
{
std
::
vector
<
int64_t
>
dims
;
std
::
vector
<
int64_t
>
dims
;
dims
.
reserve
(
array
.
ndim
());
dims
.
reserve
(
array
.
ndim
());
for
(
size_t
i
=
0
;
i
<
array
.
ndim
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
array
.
ndim
();
++
i
)
{
dims
.
push_back
(
(
int
)
array
.
shape
()[
i
]
);
dims
.
push_back
(
static_cast
<
int
>
(
array
.
shape
()[
i
])
);
}
}
self
.
Resize
(
framework
::
make_ddim
(
dims
));
self
->
Resize
(
framework
::
make_ddim
(
dims
));
auto
*
dst
=
self
.
mutable_data
<
T
>
(
place
);
auto
*
dst
=
self
->
mutable_data
<
T
>
(
place
);
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
dev_ctx
=
auto
dev_ctx
=
...
@@ -188,18 +193,22 @@ void PyCUDATensorSetFromArray(
...
@@ -188,18 +193,22 @@ void PyCUDATensorSetFromArray(
}
}
template
<
>
template
<
>
// This following specialization maps uint16_t in the parameter type to
// platform::float16.
void
PyCUDATensorSetFromArray
(
void
PyCUDATensorSetFromArray
(
framework
::
Tensor
&
self
,
framework
::
Tensor
*
self
,
py
::
array_t
<
uint16_t
,
py
::
array
::
c_style
|
py
::
array
::
forcecast
>
array
,
pybind11
::
array_t
<
uint16_t
,
paddle
::
platform
::
CUDAPlace
&
place
)
{
pybind11
::
array
::
c_style
|
pybind11
::
array
::
forcecast
>
array
,
paddle
::
platform
::
CUDAPlace
place
)
{
std
::
vector
<
int64_t
>
dims
;
std
::
vector
<
int64_t
>
dims
;
dims
.
reserve
(
array
.
ndim
());
dims
.
reserve
(
array
.
ndim
());
for
(
size_t
i
=
0
;
i
<
array
.
ndim
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
array
.
ndim
();
++
i
)
{
dims
.
push_back
(
(
int
)
array
.
shape
()[
i
]
);
dims
.
push_back
(
static_cast
<
int
>
(
array
.
shape
()[
i
])
);
}
}
self
.
Resize
(
framework
::
make_ddim
(
dims
));
self
->
Resize
(
framework
::
make_ddim
(
dims
));
auto
*
dst
=
self
.
mutable_data
<
platform
::
float16
>
(
place
);
auto
*
dst
=
self
->
mutable_data
<
platform
::
float16
>
(
place
);
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
dev_ctx
=
auto
dev_ctx
=
...
@@ -208,6 +217,43 @@ void PyCUDATensorSetFromArray(
...
@@ -208,6 +217,43 @@ void PyCUDATensorSetFromArray(
sizeof
(
uint16_t
)
*
array
.
size
(),
sizeof
(
uint16_t
)
*
array
.
size
(),
cudaMemcpyHostToDevice
,
dev_ctx
->
stream
());
cudaMemcpyHostToDevice
,
dev_ctx
->
stream
());
}
}
template
<
typename
T
>
void
PyCUDAPinnedTensorSetFromArray
(
framework
::
Tensor
*
self
,
pybind11
::
array_t
<
T
,
pybind11
::
array
::
c_style
|
pybind11
::
array
::
forcecast
>
array
,
const
paddle
::
platform
::
CUDAPinnedPlace
&
place
)
{
std
::
vector
<
int64_t
>
dims
;
dims
.
reserve
(
array
.
ndim
());
for
(
size_t
i
=
0
;
i
<
array
.
ndim
();
++
i
)
{
dims
.
push_back
(
static_cast
<
int
>
(
array
.
shape
()[
i
]));
}
self
->
Resize
(
framework
::
make_ddim
(
dims
));
auto
*
dst
=
self
->
mutable_data
<
T
>
(
place
);
std
::
memcpy
(
dst
,
array
.
data
(),
sizeof
(
T
)
*
array
.
size
());
}
template
<
>
// This following specialization maps uint16_t in the parameter type to
// platform::float16.
void
PyCUDAPinnedTensorSetFromArray
(
framework
::
Tensor
*
self
,
pybind11
::
array_t
<
uint16_t
,
pybind11
::
array
::
c_style
|
pybind11
::
array
::
forcecast
>
array
,
const
paddle
::
platform
::
CUDAPinnedPlace
&
place
)
{
std
::
vector
<
int64_t
>
dims
;
dims
.
reserve
(
array
.
ndim
());
for
(
size_t
i
=
0
;
i
<
array
.
ndim
();
++
i
)
{
dims
.
push_back
(
static_cast
<
int
>
(
array
.
shape
()[
i
]));
}
self
->
Resize
(
framework
::
make_ddim
(
dims
));
auto
*
dst
=
self
->
mutable_data
<
platform
::
float16
>
(
place
);
std
::
memcpy
(
dst
,
array
.
data
(),
sizeof
(
uint16_t
)
*
array
.
size
());
}
#endif
#endif
}
// namespace pybind
}
// namespace pybind
...
...
paddle/fluid/pybind/tensor_py_test.cc
0 → 100644
浏览文件 @
7b40f7ce
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/pybind/tensor_py.h"
#include <iostream>
#include "gtest/gtest.h"
#include "paddle/fluid/framework/tensor.h"
TEST
(
TensorPy
,
CastToPyBufferImpl
)
{
typedef
int
ElemType
;
paddle
::
framework
::
Tensor
t
;
auto
d
=
paddle
::
framework
::
make_ddim
({
1
,
2
,
3
});
int
*
p
=
t
.
mutable_data
<
ElemType
>
(
d
,
paddle
::
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
paddle
::
framework
::
product
(
d
);
++
i
)
{
p
[
i
]
=
i
;
}
pybind11
::
buffer_info
bi
=
paddle
::
pybind
::
CastToPyBuffer
(
t
);
EXPECT_EQ
(
bi
.
itemsize
,
static_cast
<
size_t
>
(
sizeof
(
ElemType
)));
EXPECT_EQ
(
bi
.
size
,
static_cast
<
size_t
>
(
paddle
::
framework
::
product
(
d
)));
EXPECT_EQ
(
bi
.
ndim
,
static_cast
<
size_t
>
(
3
));
// 3-dimensional as d.
EXPECT_EQ
(
bi
.
shape
.
size
(),
3U
);
// as Dim d.
EXPECT_EQ
(
bi
.
shape
[
0
],
static_cast
<
size_t
>
(
1
));
EXPECT_EQ
(
bi
.
shape
[
1
],
static_cast
<
size_t
>
(
2
));
EXPECT_EQ
(
bi
.
shape
[
2
],
static_cast
<
size_t
>
(
3
));
EXPECT_EQ
(
bi
.
strides
.
size
(),
3U
);
// 3-dimensional as d.
EXPECT_EQ
(
bi
.
strides
[
2
],
static_cast
<
size_t
>
(
sizeof
(
ElemType
)));
EXPECT_EQ
(
bi
.
strides
[
1
],
static_cast
<
size_t
>
(
sizeof
(
ElemType
)
*
3
));
EXPECT_EQ
(
bi
.
strides
[
0
],
static_cast
<
size_t
>
(
sizeof
(
ElemType
)
*
2
*
3
));
}
paddle/fluid/recordio/chunk.cc
浏览文件 @
7b40f7ce
...
@@ -14,11 +14,13 @@
...
@@ -14,11 +14,13 @@
#include "paddle/fluid/recordio/chunk.h"
#include "paddle/fluid/recordio/chunk.h"
#include <algorithm>
#include <memory>
#include <memory>
#include <sstream>
#include <sstream>
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "snappystream.hpp"
#include "snappy
_stream/include/snappy
stream.hpp"
#include "zlib.h"
#include "zlib
/include/zlib
.h"
namespace
paddle
{
namespace
paddle
{
namespace
recordio
{
namespace
recordio
{
...
@@ -58,8 +60,8 @@ static void ReadStreamByBuf(std::istream& in, size_t limit, Callback callback) {
...
@@ -58,8 +60,8 @@ static void ReadStreamByBuf(std::istream& in, size_t limit, Callback callback) {
* Copy stream in to another stream
* Copy stream in to another stream
*/
*/
static
void
PipeStream
(
std
::
istream
&
in
,
std
::
ostream
&
os
)
{
static
void
PipeStream
(
std
::
istream
&
in
,
std
::
ostream
&
os
)
{
ReadStreamByBuf
(
ReadStreamByBuf
(
in
,
0
,
in
,
0
,
[
&
os
](
const
char
*
buf
,
size_t
len
)
{
os
.
write
(
buf
,
len
);
});
[
&
os
](
const
char
*
buf
,
size_t
len
)
{
os
.
write
(
buf
,
len
);
});
}
}
/**
/**
...
@@ -68,8 +70,8 @@ static void PipeStream(std::istream& in, std::ostream& os) {
...
@@ -68,8 +70,8 @@ static void PipeStream(std::istream& in, std::ostream& os) {
static
uint32_t
Crc32Stream
(
std
::
istream
&
in
,
size_t
limit
=
0
)
{
static
uint32_t
Crc32Stream
(
std
::
istream
&
in
,
size_t
limit
=
0
)
{
uint32_t
crc
=
static_cast
<
uint32_t
>
(
crc32
(
0
,
nullptr
,
0
));
uint32_t
crc
=
static_cast
<
uint32_t
>
(
crc32
(
0
,
nullptr
,
0
));
ReadStreamByBuf
(
in
,
limit
,
[
&
crc
](
const
char
*
buf
,
size_t
len
)
{
ReadStreamByBuf
(
in
,
limit
,
[
&
crc
](
const
char
*
buf
,
size_t
len
)
{
crc
=
static_cast
<
uint32_t
>
(
crc32
(
crc
=
static_cast
<
uint32_t
>
(
crc32
(
crc
,
reinterpret_cast
<
const
Bytef
*>
(
buf
),
crc
,
reinterpret_cast
<
const
Bytef
*>
(
buf
),
static_cast
<
uInt
>
(
len
)));
static_cast
<
uInt
>
(
len
)));
});
});
return
crc
;
return
crc
;
}
}
...
...
paddle/fluid/recordio/chunk.h
浏览文件 @
7b40f7ce
...
@@ -24,7 +24,7 @@ namespace recordio {
...
@@ -24,7 +24,7 @@ namespace recordio {
// A Chunk contains the Header and optionally compressed records.
// A Chunk contains the Header and optionally compressed records.
class
Chunk
{
class
Chunk
{
public:
public:
Chunk
()
:
num_bytes_
(
0
)
{}
Chunk
()
:
num_bytes_
(
0
)
{}
void
Add
(
const
std
::
string
&
buf
)
{
void
Add
(
const
std
::
string
&
buf
)
{
num_bytes_
+=
buf
.
size
();
num_bytes_
+=
buf
.
size
();
...
@@ -46,7 +46,7 @@ public:
...
@@ -46,7 +46,7 @@ public:
bool
Empty
()
const
{
return
records_
.
empty
();
}
bool
Empty
()
const
{
return
records_
.
empty
();
}
private:
private:
std
::
vector
<
std
::
string
>
records_
;
std
::
vector
<
std
::
string
>
records_
;
// sum of record lengths in bytes.
// sum of record lengths in bytes.
size_t
num_bytes_
;
size_t
num_bytes_
;
...
...
paddle/fluid/recordio/chunk_test.cc
浏览文件 @
7b40f7ce
...
@@ -18,29 +18,27 @@
...
@@ -18,29 +18,27 @@
#include "gtest/gtest.h"
#include "gtest/gtest.h"
using
namespace
paddle
::
recordio
;
TEST
(
Chunk
,
SaveLoad
)
{
TEST
(
Chunk
,
SaveLoad
)
{
Chunk
ch
;
paddle
::
recordio
::
Chunk
ch
;
ch
.
Add
(
std
::
string
(
"12345"
,
6
));
ch
.
Add
(
std
::
string
(
"12345"
,
6
));
ch
.
Add
(
std
::
string
(
"123"
,
4
));
ch
.
Add
(
std
::
string
(
"123"
,
4
));
std
::
stringstream
ss
;
std
::
stringstream
ss
;
ch
.
Write
(
ss
,
Compressor
::
kNoCompress
);
ch
.
Write
(
ss
,
paddle
::
recordio
::
Compressor
::
kNoCompress
);
ss
.
seekg
(
0
);
ss
.
seekg
(
0
);
ch
.
Parse
(
ss
);
ch
.
Parse
(
ss
);
ASSERT_EQ
(
ch
.
NumBytes
(),
10U
);
ASSERT_EQ
(
ch
.
NumBytes
(),
10U
);
}
}
TEST
(
Chunk
,
Compressor
)
{
TEST
(
Chunk
,
Compressor
)
{
Chunk
ch
;
paddle
::
recordio
::
Chunk
ch
;
ch
.
Add
(
std
::
string
(
"12345"
,
6
));
ch
.
Add
(
std
::
string
(
"12345"
,
6
));
ch
.
Add
(
std
::
string
(
"123"
,
4
));
ch
.
Add
(
std
::
string
(
"123"
,
4
));
ch
.
Add
(
std
::
string
(
"123"
,
4
));
ch
.
Add
(
std
::
string
(
"123"
,
4
));
ch
.
Add
(
std
::
string
(
"123"
,
4
));
ch
.
Add
(
std
::
string
(
"123"
,
4
));
std
::
stringstream
ss
;
std
::
stringstream
ss
;
ch
.
Write
(
ss
,
Compressor
::
kSnappy
);
ch
.
Write
(
ss
,
paddle
::
recordio
::
Compressor
::
kSnappy
);
std
::
stringstream
ss2
;
std
::
stringstream
ss2
;
ch
.
Write
(
ss2
,
Compressor
::
kNoCompress
);
ch
.
Write
(
ss2
,
paddle
::
recordio
::
Compressor
::
kNoCompress
);
ASSERT_LE
(
ss
.
tellp
(),
ss2
.
tellp
());
// Compress should contain less data;
ASSERT_LE
(
ss
.
tellp
(),
ss2
.
tellp
());
// Compress should contain less data;
ch
.
Clear
();
ch
.
Clear
();
...
...
paddle/fluid/recordio/header.h
浏览文件 @
7b40f7ce
...
@@ -37,7 +37,7 @@ enum class Compressor : uint32_t {
...
@@ -37,7 +37,7 @@ enum class Compressor : uint32_t {
// Header is the metadata of Chunk
// Header is the metadata of Chunk
class
Header
{
class
Header
{
public:
public:
Header
();
Header
();
Header
(
uint32_t
num
,
uint32_t
sum
,
Compressor
ct
,
uint32_t
cs
);
Header
(
uint32_t
num
,
uint32_t
sum
,
Compressor
ct
,
uint32_t
cs
);
...
@@ -51,7 +51,7 @@ public:
...
@@ -51,7 +51,7 @@ public:
Compressor
CompressType
()
const
{
return
compressor_
;
}
Compressor
CompressType
()
const
{
return
compressor_
;
}
uint32_t
CompressSize
()
const
{
return
compress_size_
;
}
uint32_t
CompressSize
()
const
{
return
compress_size_
;
}
private:
private:
uint32_t
num_records_
;
uint32_t
num_records_
;
uint32_t
checksum_
;
uint32_t
checksum_
;
Compressor
compressor_
;
Compressor
compressor_
;
...
...
paddle/fluid/recordio/header_test.cc
浏览文件 @
7b40f7ce
...
@@ -18,14 +18,12 @@
...
@@ -18,14 +18,12 @@
#include "gtest/gtest.h"
#include "gtest/gtest.h"
using
namespace
paddle
::
recordio
;
TEST
(
Recordio
,
ChunkHead
)
{
TEST
(
Recordio
,
ChunkHead
)
{
Header
hdr
(
0
,
1
,
Compressor
::
kGzip
,
3
);
paddle
::
recordio
::
Header
hdr
(
0
,
1
,
paddle
::
recordio
::
Compressor
::
kGzip
,
3
);
std
::
stringstream
ss
;
std
::
stringstream
ss
;
hdr
.
Write
(
ss
);
hdr
.
Write
(
ss
);
ss
.
seekg
(
0
,
std
::
ios
::
beg
);
ss
.
seekg
(
0
,
std
::
ios
::
beg
);
Header
hdr2
;
paddle
::
recordio
::
Header
hdr2
;
hdr2
.
Parse
(
ss
);
hdr2
.
Parse
(
ss
);
EXPECT_TRUE
(
hdr
==
hdr2
);
EXPECT_TRUE
(
hdr
==
hdr2
);
}
}
paddle/fluid/recordio/scanner.cc
浏览文件 @
7b40f7ce
...
@@ -13,10 +13,14 @@
...
@@ -13,10 +13,14 @@
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/recordio/scanner.h"
#include "paddle/fluid/recordio/scanner.h"
#include <string>
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
paddle
{
namespace
recordio
{
namespace
recordio
{
Scanner
::
Scanner
(
std
::
unique_ptr
<
std
::
istream
>
&&
stream
)
Scanner
::
Scanner
(
std
::
unique_ptr
<
std
::
istream
>
&&
stream
)
:
stream_
(
std
::
move
(
stream
))
{
:
stream_
(
std
::
move
(
stream
))
{
Reset
();
Reset
();
...
...
paddle/fluid/recordio/scanner.h
浏览文件 @
7b40f7ce
...
@@ -16,12 +16,15 @@
...
@@ -16,12 +16,15 @@
#include <fstream>
#include <fstream>
#include <memory>
#include <memory>
#include <string>
#include "paddle/fluid/recordio/chunk.h"
#include "paddle/fluid/recordio/chunk.h"
namespace
paddle
{
namespace
paddle
{
namespace
recordio
{
namespace
recordio
{
class
Scanner
{
class
Scanner
{
public:
public:
explicit
Scanner
(
std
::
unique_ptr
<
std
::
istream
>&&
stream
);
explicit
Scanner
(
std
::
unique_ptr
<
std
::
istream
>&&
stream
);
explicit
Scanner
(
const
std
::
string
&
filename
);
explicit
Scanner
(
const
std
::
string
&
filename
);
...
@@ -32,7 +35,7 @@ public:
...
@@ -32,7 +35,7 @@ public:
bool
HasNext
()
const
;
bool
HasNext
()
const
;
private:
private:
std
::
unique_ptr
<
std
::
istream
>
stream_
;
std
::
unique_ptr
<
std
::
istream
>
stream_
;
Chunk
cur_chunk_
;
Chunk
cur_chunk_
;
size_t
offset_
;
size_t
offset_
;
...
...
paddle/fluid/recordio/writer.cc
浏览文件 @
7b40f7ce
...
@@ -12,9 +12,14 @@
...
@@ -12,9 +12,14 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/recordio/writer.h"
#include "paddle/fluid/recordio/writer.h"
#include <string>
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
paddle
{
namespace
recordio
{
namespace
recordio
{
void
Writer
::
Write
(
const
std
::
string
&
record
)
{
void
Writer
::
Write
(
const
std
::
string
&
record
)
{
cur_chunk_
.
Add
(
record
);
cur_chunk_
.
Add
(
record
);
if
(
cur_chunk_
.
NumRecords
()
>=
max_num_records_in_chunk_
)
{
if
(
cur_chunk_
.
NumRecords
()
>=
max_num_records_in_chunk_
)
{
...
...
paddle/fluid/recordio/writer.h
浏览文件 @
7b40f7ce
...
@@ -11,16 +11,17 @@
...
@@ -11,16 +11,17 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#pragma once
#pragma once
#include <string>
#include "paddle/fluid/recordio/chunk.h"
#include "paddle/fluid/recordio/chunk.h"
namespace
paddle
{
namespace
paddle
{
namespace
recordio
{
namespace
recordio
{
class
Writer
{
class
Writer
{
public:
public:
Writer
(
std
::
ostream
*
sout
,
Writer
(
std
::
ostream
*
sout
,
Compressor
compressor
,
Compressor
compressor
,
size_t
max_num_records_in_chunk
=
1000
)
size_t
max_num_records_in_chunk
=
1000
)
:
stream_
(
*
sout
),
:
stream_
(
*
sout
),
max_num_records_in_chunk_
(
max_num_records_in_chunk
),
max_num_records_in_chunk_
(
max_num_records_in_chunk
),
...
@@ -32,7 +33,7 @@ public:
...
@@ -32,7 +33,7 @@ public:
~
Writer
();
~
Writer
();
private:
private:
std
::
ostream
&
stream_
;
std
::
ostream
&
stream_
;
size_t
max_num_records_in_chunk_
;
size_t
max_num_records_in_chunk_
;
Chunk
cur_chunk_
;
Chunk
cur_chunk_
;
...
...
paddle/fluid/recordio/writer_scanner_test.cc
浏览文件 @
7b40f7ce
...
@@ -12,9 +12,10 @@
...
@@ -12,9 +12,10 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "gtest/gtest.h"
#include <sstream>
#include <sstream>
#include <string>
#include "gtest/gtest.h"
#include "paddle/fluid/recordio/scanner.h"
#include "paddle/fluid/recordio/scanner.h"
#include "paddle/fluid/recordio/writer.h"
#include "paddle/fluid/recordio/writer.h"
...
@@ -66,4 +67,4 @@ TEST(WriterScanner, TinyChunk) {
...
@@ -66,4 +67,4 @@ TEST(WriterScanner, TinyChunk) {
ASSERT_EQ
(
scanner
.
Next
(),
"DEFG"
);
ASSERT_EQ
(
scanner
.
Next
(),
"DEFG"
);
ASSERT_FALSE
(
scanner
.
HasNext
());
ASSERT_FALSE
(
scanner
.
HasNext
());
}
}
}
}
\ No newline at end of file
paddle/fluid/string/.clang-format
已删除
120000 → 0
浏览文件 @
3a825782
../framework/.clang-format
\ No newline at end of file
paddle/fluid/string/piece.cc
浏览文件 @
7b40f7ce
...
@@ -12,7 +12,7 @@
...
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "piece.h"
#include "p
addle/fluid/string/p
iece.h"
#include <string.h>
#include <string.h>
...
...
paddle/fluid/string/printf.h
浏览文件 @
7b40f7ce
...
@@ -71,6 +71,8 @@
...
@@ -71,6 +71,8 @@
#include <iostream>
#include <iostream>
#include <sstream>
#include <sstream>
#include <string>
#include "tinyformat/tinyformat.h" // https://github.com/c42f/tinyformat
#include "tinyformat/tinyformat.h" // https://github.com/c42f/tinyformat
namespace
paddle
{
namespace
paddle
{
...
...
paddle/fluid/string/printf_test.cc
浏览文件 @
7b40f7ce
...
@@ -11,7 +11,8 @@
...
@@ -11,7 +11,8 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "printf.h"
#include "paddle/fluid/string/printf.h"
#include <string>
#include <string>
...
@@ -21,7 +22,7 @@ TEST(StringPrintf, StringPrintf) {
...
@@ -21,7 +22,7 @@ TEST(StringPrintf, StringPrintf) {
std
::
string
weekday
=
"Wednesday"
;
std
::
string
weekday
=
"Wednesday"
;
const
char
*
month
=
"July"
;
const
char
*
month
=
"July"
;
size_t
day
=
27
;
size_t
day
=
27
;
long
hour
=
14
;
int
hour
=
14
;
int
min
=
44
;
int
min
=
44
;
EXPECT_EQ
(
std
::
string
(
"Wednesday, July 27, 14:44"
),
EXPECT_EQ
(
std
::
string
(
"Wednesday, July 27, 14:44"
),
paddle
::
string
::
Sprintf
(
"%s, %s %d, %.2d:%.2d"
,
weekday
,
month
,
day
,
paddle
::
string
::
Sprintf
(
"%s, %s %d, %.2d:%.2d"
,
weekday
,
month
,
day
,
...
...
paddle/fluid/string/to_string_test.cc
浏览文件 @
7b40f7ce
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "to_string.h"
#include "
paddle/fluid/string/
to_string.h"
#include <gtest/gtest.h>
#include <gtest/gtest.h>
constexpr
char
kOutputString
[]
=
"User Defined Output"
;
constexpr
char
kOutputString
[]
=
"User Defined Output"
;
...
@@ -26,14 +26,13 @@ std::ostream& operator<<(std::ostream& s, const UserDefinedClass& ins) {
...
@@ -26,14 +26,13 @@ std::ostream& operator<<(std::ostream& s, const UserDefinedClass& ins) {
}
}
TEST
(
to_string
,
normal
)
{
TEST
(
to_string
,
normal
)
{
using
namespace
paddle
::
string
;
using
paddle
::
string
::
to_
string
;
ASSERT_EQ
(
"10"
,
to_string
(
10
));
ASSERT_EQ
(
"10"
,
to_string
(
10
));
ASSERT_EQ
(
"abc"
,
to_string
(
"abc"
));
ASSERT_EQ
(
"abc"
,
to_string
(
"abc"
));
ASSERT_EQ
(
"1.2"
,
to_string
(
1.2
));
ASSERT_EQ
(
"1.2"
,
to_string
(
1.2
));
}
}
TEST
(
to_string
,
user_defined
)
{
TEST
(
to_string
,
user_defined
)
{
using
namespace
paddle
::
string
;
UserDefinedClass
instance
;
UserDefinedClass
instance
;
ASSERT_EQ
(
kOutputString
,
to_string
(
instance
));
ASSERT_EQ
(
kOutputString
,
paddle
::
string
::
to_string
(
instance
));
}
}
paddle/gserver/tests/CMakeLists.txt
浏览文件 @
7b40f7ce
...
@@ -14,6 +14,11 @@ function(gserver_test TARGET)
...
@@ -14,6 +14,11 @@ function(gserver_test TARGET)
COMMAND
${
TARGET
}
)
COMMAND
${
TARGET
}
)
endfunction
()
endfunction
()
add_custom_command
(
OUTPUT
${
CMAKE_CURRENT_BINARY_DIR
}
/concat_dotmul_a.conf
COMMAND cp -r
${
CMAKE_CURRENT_SOURCE_DIR
}
/*
${
CMAKE_CURRENT_BINARY_DIR
}
)
add_custom_target
(
copy_gserver_conf ALL DEPENDS concat_dotmul_a.conf
)
gserver_test
(
test_LayerGrad
)
gserver_test
(
test_LayerGrad
)
gserver_test
(
test_CRFLayerGrad
)
gserver_test
(
test_CRFLayerGrad
)
gserver_test
(
test_CrossEntropyOverBeamGrad
)
gserver_test
(
test_CrossEntropyOverBeamGrad
)
...
@@ -31,12 +36,12 @@ gserver_test(test_Upsample)
...
@@ -31,12 +36,12 @@ gserver_test(test_Upsample)
set
(
PYTHON_PATH
set
(
PYTHON_PATH
${
PADDLE_SOURCE_DIR
}
/paddle/.set_python_path.sh -d
${
PADDLE_SOURCE_DIR
}
/paddle/.set_python_path.sh -d
${
PADDLE_
SOURCE_DIR
}
/python/:
${
PADDLE_SOURCE
_DIR
}
/paddle/gserver/tests
)
${
PADDLE_
BINARY_DIR
}
/python/:
${
PADDLE_BINARY
_DIR
}
/paddle/gserver/tests
)
function
(
gserver_test_with_python TARGET
)
function
(
gserver_test_with_python TARGET
)
add_unittest_without_exec
(
${
TARGET
}
${
TARGET
}
.cpp
)
add_unittest_without_exec
(
${
TARGET
}
${
TARGET
}
.cpp
)
add_test
(
NAME
${
TARGET
}
add_test
(
NAME
${
TARGET
}
COMMAND
${
PYTHON_PATH
}
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
TARGET
}
COMMAND
${
PYTHON_PATH
}
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
TARGET
}
WORKING_DIRECTORY
${
PADDLE_
SOURCE
_DIR
}
/paddle/
)
WORKING_DIRECTORY
${
PADDLE_
BINARY
_DIR
}
/paddle/
)
endfunction
()
endfunction
()
gserver_test_with_python
(
test_PyDataProvider2
)
gserver_test_with_python
(
test_PyDataProvider2
)
...
@@ -57,7 +62,7 @@ if(WITH_MKLDNN)
...
@@ -57,7 +62,7 @@ if(WITH_MKLDNN)
LayerGradUtil.cpp
)
LayerGradUtil.cpp
)
add_test
(
NAME test_MKLDNN
add_test
(
NAME test_MKLDNN
COMMAND
${
PYTHON_PATH
}
${
CMAKE_CURRENT_BINARY_DIR
}
/test_MKLDNN
COMMAND
${
PYTHON_PATH
}
${
CMAKE_CURRENT_BINARY_DIR
}
/test_MKLDNN
WORKING_DIRECTORY
${
PADDLE_
SOURCE
_DIR
}
/paddle
)
WORKING_DIRECTORY
${
PADDLE_
BINARY
_DIR
}
/paddle
)
endif
()
endif
()
############### test_WarpCTCLayer #######################
############### test_WarpCTCLayer #######################
...
@@ -66,7 +71,7 @@ if(NOT WITH_DOUBLE AND NOT MOBILE_INFERENCE)
...
@@ -66,7 +71,7 @@ if(NOT WITH_DOUBLE AND NOT MOBILE_INFERENCE)
test_WarpCTCLayer.cpp
)
test_WarpCTCLayer.cpp
)
add_test
(
NAME test_WarpCTCLayer
add_test
(
NAME test_WarpCTCLayer
COMMAND
${
CMAKE_CURRENT_BINARY_DIR
}
/test_WarpCTCLayer --warpctc_dir=
${
WARPCTC_LIB_DIR
}
COMMAND
${
CMAKE_CURRENT_BINARY_DIR
}
/test_WarpCTCLayer --warpctc_dir=
${
WARPCTC_LIB_DIR
}
WORKING_DIRECTORY
${
PADDLE_
SOURCE
_DIR
}
/paddle
)
WORKING_DIRECTORY
${
PADDLE_
BINARY
_DIR
}
/paddle
)
endif
()
endif
()
if
(
NOT MOBILE_INFERENCE
)
if
(
NOT MOBILE_INFERENCE
)
...
@@ -84,15 +89,15 @@ if(NOT MOBILE_INFERENCE)
...
@@ -84,15 +89,15 @@ if(NOT MOBILE_INFERENCE)
endif
()
endif
()
add_test
(
NAME test_NetworkCompare
add_test
(
NAME test_NetworkCompare
COMMAND
${
PYTHON_PATH
}
${
CMAKE_CURRENT_BINARY_DIR
}
/test_NetworkCompare --use_gpu=
${
use_gpu
}
COMMAND
${
PYTHON_PATH
}
${
CMAKE_CURRENT_BINARY_DIR
}
/test_NetworkCompare --use_gpu=
${
use_gpu
}
WORKING_DIRECTORY
${
PADDLE_
SOURCE
_DIR
}
/paddle
)
WORKING_DIRECTORY
${
PADDLE_
BINARY
_DIR
}
/paddle
)
############ test_CompareSparse ################
############ test_CompareSparse ################
add_unittest_without_exec
(
test_CompareSparse
add_unittest_without_exec
(
test_CompareSparse
test_CompareSparse.cpp
)
test_CompareSparse.cpp
)
if
(
NOT ON_TRAVIS
)
if
(
NOT ON_TRAVIS
)
add_test
(
NAME test_CompareSparse
add_test
(
NAME test_CompareSparse
COMMAND
${
PYTHON_PATH
}
.
/.set_port.sh -p port -n 6
COMMAND
${
PYTHON_PATH
}
${
PADDLE_SOURCE_DIR
}
/paddle
/.set_port.sh -p port -n 6
${
CMAKE_CURRENT_BINARY_DIR
}
/test_CompareSparse
${
CMAKE_CURRENT_BINARY_DIR
}
/test_CompareSparse
WORKING_DIRECTORY
${
PADDLE_
SOURCE
_DIR
}
/paddle/
)
WORKING_DIRECTORY
${
PADDLE_
BINARY
_DIR
}
/paddle/
)
endif
()
endif
()
endif
()
endif
()
paddle/gserver/tests/test_Upsample.cpp
浏览文件 @
7b40f7ce
...
@@ -20,10 +20,8 @@ limitations under the License. */
...
@@ -20,10 +20,8 @@ limitations under the License. */
#include "paddle/math/MathUtils.h"
#include "paddle/math/MathUtils.h"
#include "paddle/testing/TestUtil.h"
#include "paddle/testing/TestUtil.h"
using
namespace
paddle
;
void
setPoolConfig
(
paddle
::
TestConfig
*
config
,
paddle
::
PoolConfig
*
pool
,
void
setPoolConfig
(
TestConfig
*
config
,
PoolConfig
*
pool
,
const
string
&
poolType
)
{
const
string
&
poolType
)
{
(
*
config
).
biasSize
=
0
;
(
*
config
).
biasSize
=
0
;
(
*
config
).
layerConfig
.
set_type
(
"pool"
);
(
*
config
).
layerConfig
.
set_type
(
"pool"
);
...
@@ -42,21 +40,23 @@ void setPoolConfig(TestConfig* config,
...
@@ -42,21 +40,23 @@ void setPoolConfig(TestConfig* config,
pool
->
set_stride
(
sw
);
pool
->
set_stride
(
sw
);
pool
->
set_stride_y
(
sh
);
pool
->
set_stride_y
(
sh
);
int
ow
=
outputSize
(
pool
->
img_size
(),
kw
,
pw
,
sw
,
/* caffeMode */
false
);
int
ow
=
int
oh
=
outputSize
(
pool
->
img_size_y
(),
kh
,
ph
,
sh
,
/* caffeMode */
false
);
paddle
::
outputSize
(
pool
->
img_size
(),
kw
,
pw
,
sw
,
/* caffeMode */
false
);
int
oh
=
paddle
::
outputSize
(
pool
->
img_size_y
(),
kh
,
ph
,
sh
,
/* caffeMode */
false
);
pool
->
set_output_x
(
ow
);
pool
->
set_output_x
(
ow
);
pool
->
set_output_y
(
oh
);
pool
->
set_output_y
(
oh
);
}
}
LayerPtr
doOneUpsampleTest
(
MatrixPtr
&
inputMat
,
paddle
::
LayerPtr
doOneUpsampleTest
(
const
paddle
::
MatrixPtr
&
inputMat
,
const
string
&
poolType
,
const
string
&
poolType
,
bool
use_gpu
,
bool
use_gpu
,
real
*
tempGradData
)
{
real
*
tempGradData
)
{
/* prepare maxPoolWithMaskLayer */
/* prepare maxPoolWithMaskLayer */
TestConfig
config
;
paddle
::
TestConfig
config
;
config
.
inputDefs
.
push_back
({
INPUT_DATA
,
"layer_0"
,
128
,
0
});
config
.
inputDefs
.
push_back
({
paddle
::
INPUT_DATA
,
"layer_0"
,
128
,
0
});
LayerInputConfig
*
input
=
config
.
layerConfig
.
add_inputs
();
paddle
::
LayerInputConfig
*
input
=
config
.
layerConfig
.
add_inputs
();
PoolConfig
*
pool
=
input
->
mutable_pool_conf
();
paddle
::
PoolConfig
*
pool
=
input
->
mutable_pool_conf
();
pool
->
set_img_size
(
8
);
pool
->
set_img_size
(
8
);
pool
->
set_img_size_y
(
8
);
pool
->
set_img_size_y
(
8
);
...
@@ -66,9 +66,9 @@ LayerPtr doOneUpsampleTest(MatrixPtr& inputMat,
...
@@ -66,9 +66,9 @@ LayerPtr doOneUpsampleTest(MatrixPtr& inputMat,
config
.
layerConfig
.
set_name
(
"MaxPoolWithMask"
);
config
.
layerConfig
.
set_name
(
"MaxPoolWithMask"
);
std
::
vector
<
DataLayerPtr
>
dataLayers
;
std
::
vector
<
paddle
::
DataLayerPtr
>
dataLayers
;
LayerMap
layerMap
;
paddle
::
LayerMap
layerMap
;
vector
<
Argument
>
datas
;
vector
<
paddle
::
Argument
>
datas
;
initDataLayer
(
config
,
initDataLayer
(
config
,
&
dataLayers
,
&
dataLayers
,
...
@@ -82,20 +82,20 @@ LayerPtr doOneUpsampleTest(MatrixPtr& inputMat,
...
@@ -82,20 +82,20 @@ LayerPtr doOneUpsampleTest(MatrixPtr& inputMat,
dataLayers
[
0
]
->
getOutputValue
()
->
copyFrom
(
*
inputMat
);
dataLayers
[
0
]
->
getOutputValue
()
->
copyFrom
(
*
inputMat
);
FLAGS_use_gpu
=
use_gpu
;
FLAGS_use_gpu
=
use_gpu
;
std
::
vector
<
ParameterPtr
>
parameters
;
std
::
vector
<
paddle
::
ParameterPtr
>
parameters
;
LayerPtr
maxPoolingWithMaskOutputLayer
;
paddle
::
LayerPtr
maxPoolingWithMaskOutputLayer
;
initTestLayer
(
config
,
&
layerMap
,
&
parameters
,
&
maxPoolingWithMaskOutputLayer
);
initTestLayer
(
config
,
&
layerMap
,
&
parameters
,
&
maxPoolingWithMaskOutputLayer
);
maxPoolingWithMaskOutputLayer
->
forward
(
PASS_GC
);
maxPoolingWithMaskOutputLayer
->
forward
(
paddle
::
PASS_GC
);
/* prepare the upsample layer */
/* prepare the upsample layer */
LayerConfig
upsampleLayerConfig
;
paddle
::
LayerConfig
upsampleLayerConfig
;
upsampleLayerConfig
.
set_type
(
"upsample"
);
upsampleLayerConfig
.
set_type
(
"upsample"
);
LayerInputConfig
*
input1
=
upsampleLayerConfig
.
add_inputs
();
paddle
::
LayerInputConfig
*
input1
=
upsampleLayerConfig
.
add_inputs
();
upsampleLayerConfig
.
add_inputs
();
upsampleLayerConfig
.
add_inputs
();
UpsampleConfig
*
upsampleConfig
=
input1
->
mutable_upsample_conf
();
paddle
::
UpsampleConfig
*
upsampleConfig
=
input1
->
mutable_upsample_conf
();
upsampleConfig
->
set_scale
(
2
);
upsampleConfig
->
set_scale
(
2
);
ImageConfig
*
imageConfig
=
upsampleConfig
->
mutable_image_conf
();
paddle
::
ImageConfig
*
imageConfig
=
upsampleConfig
->
mutable_image_conf
();
imageConfig
->
set_channels
(
2
);
imageConfig
->
set_channels
(
2
);
imageConfig
->
set_img_size
(
4
);
imageConfig
->
set_img_size
(
4
);
imageConfig
->
set_img_size_y
(
4
);
imageConfig
->
set_img_size_y
(
4
);
...
@@ -103,17 +103,18 @@ LayerPtr doOneUpsampleTest(MatrixPtr& inputMat,
...
@@ -103,17 +103,18 @@ LayerPtr doOneUpsampleTest(MatrixPtr& inputMat,
upsampleLayerConfig
.
set_name
(
"upsample"
);
upsampleLayerConfig
.
set_name
(
"upsample"
);
for
(
size_t
i
=
0
;
i
<
2
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
2
;
i
++
)
{
LayerInputConfig
&
inputTemp
=
*
(
upsampleLayerConfig
.
mutable_inputs
(
i
));
paddle
::
LayerInputConfig
&
inputTemp
=
*
(
upsampleLayerConfig
.
mutable_inputs
(
i
));
inputTemp
.
set_input_layer_name
(
"MaxPoolWithMask"
);
inputTemp
.
set_input_layer_name
(
"MaxPoolWithMask"
);
}
}
LayerPtr
upsampleLayer
;
paddle
::
LayerPtr
upsampleLayer
;
ParameterMap
parameterMap
;
paddle
::
ParameterMap
parameterMap
;
upsampleLayer
=
Layer
::
create
(
upsampleLayerConfig
);
upsampleLayer
=
paddle
::
Layer
::
create
(
upsampleLayerConfig
);
layerMap
[
upsampleLayerConfig
.
name
()]
=
upsampleLayer
;
layerMap
[
upsampleLayerConfig
.
name
()]
=
upsampleLayer
;
upsampleLayer
->
init
(
layerMap
,
parameterMap
);
upsampleLayer
->
init
(
layerMap
,
parameterMap
);
upsampleLayer
->
setNeedGradient
(
true
);
upsampleLayer
->
setNeedGradient
(
true
);
upsampleLayer
->
forward
(
PASS_GC
);
upsampleLayer
->
forward
(
paddle
::
PASS_GC
);
upsampleLayer
->
getOutputGrad
()
->
copyFrom
(
tempGradData
,
128
);
upsampleLayer
->
getOutputGrad
()
->
copyFrom
(
tempGradData
,
128
);
upsampleLayer
->
backward
();
upsampleLayer
->
backward
();
...
@@ -122,31 +123,31 @@ LayerPtr doOneUpsampleTest(MatrixPtr& inputMat,
...
@@ -122,31 +123,31 @@ LayerPtr doOneUpsampleTest(MatrixPtr& inputMat,
TEST
(
Layer
,
maxPoolingWithMaskOutputLayerFwd
)
{
TEST
(
Layer
,
maxPoolingWithMaskOutputLayerFwd
)
{
bool
useGpu
=
false
;
bool
useGpu
=
false
;
MatrixPtr
inputMat
;
paddle
::
MatrixPtr
inputMat
;
MatrixPtr
inputGPUMat
;
paddle
::
MatrixPtr
inputGPUMat
;
MatrixPtr
tempGradMat
;
paddle
::
MatrixPtr
tempGradMat
;
inputMat
=
Matrix
::
create
(
1
,
128
,
false
,
useGpu
);
inputMat
=
paddle
::
Matrix
::
create
(
1
,
128
,
false
,
useGpu
);
inputMat
->
randomizeUniform
();
inputMat
->
randomizeUniform
();
tempGradMat
=
Matrix
::
create
(
1
,
128
,
false
,
useGpu
);
tempGradMat
=
paddle
::
Matrix
::
create
(
1
,
128
,
false
,
useGpu
);
tempGradMat
->
randomizeUniform
();
tempGradMat
->
randomizeUniform
();
real
*
data
=
inputMat
->
getData
();
real
*
tempGradData
=
tempGradMat
->
getData
();
real
*
tempGradData
=
tempGradMat
->
getData
();
LayerPtr
upsampleLayerCPU
=
paddle
::
LayerPtr
upsampleLayerCPU
=
doOneUpsampleTest
(
inputMat
,
"max-pool-with-mask"
,
useGpu
,
tempGradData
);
doOneUpsampleTest
(
inputMat
,
"max-pool-with-mask"
,
useGpu
,
tempGradData
);
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
useGpu
=
true
;
useGpu
=
true
;
inputGPUMat
=
Matrix
::
create
(
1
,
128
,
false
,
useGpu
);
real
*
data
=
inputMat
->
getData
();
inputGPUMat
=
paddle
::
Matrix
::
create
(
1
,
128
,
false
,
useGpu
);
inputGPUMat
->
copyFrom
(
data
,
128
);
inputGPUMat
->
copyFrom
(
data
,
128
);
LayerPtr
upsampleLayerGPU
=
doOneUpsampleTest
(
paddle
::
LayerPtr
upsampleLayerGPU
=
doOneUpsampleTest
(
inputGPUMat
,
"max-pool-with-mask"
,
useGpu
,
tempGradData
);
inputGPUMat
,
"max-pool-with-mask"
,
useGpu
,
tempGradData
);
checkMatrixEqual
(
upsampleLayerCPU
->
getOutput
(
""
).
value
,
paddle
::
checkMatrixEqual
(
upsampleLayerCPU
->
getOutput
(
""
).
value
,
upsampleLayerGPU
->
getOutput
(
""
).
value
);
upsampleLayerGPU
->
getOutput
(
""
).
value
);
checkMatrixEqual
(
upsampleLayerCPU
->
getPrev
(
0
)
->
getOutputGrad
(),
paddle
::
checkMatrixEqual
(
upsampleLayerCPU
->
getPrev
(
0
)
->
getOutputGrad
(),
upsampleLayerGPU
->
getPrev
(
0
)
->
getOutputGrad
());
upsampleLayerGPU
->
getPrev
(
0
)
->
getOutputGrad
());
#endif
#endif
}
}
paddle/trainer/tests/CMakeLists.txt
浏览文件 @
7b40f7ce
add_custom_command
(
OUTPUT
${
CMAKE_CURRENT_BINARY_DIR
}
/sample_trainer_config.conf
COMMAND cp -r
${
CMAKE_CURRENT_SOURCE_DIR
}
/*
${
CMAKE_CURRENT_BINARY_DIR
}
)
add_custom_target
(
copy_trainer_conf ALL DEPENDS sample_trainer_config.conf
)
set
(
PYTHON_PATH
set
(
PYTHON_PATH
${
PADDLE_SOURCE_DIR
}
/paddle/.set_python_path.sh -d
${
PADDLE_SOURCE_DIR
}
/paddle/.set_python_path.sh -d
${
PADDLE_
SOURCE_DIR
}
/python/:
${
PADDLE_SOURCE
_DIR
}
/paddle/trainer/tests
)
${
PADDLE_
BINARY_DIR
}
/python/:
${
PADDLE_BINARY
_DIR
}
/paddle/trainer/tests
)
function
(
trainer_test TARGET
)
function
(
trainer_test TARGET
)
add_unittest_without_exec
(
${
TARGET
}
${
TARGET
}
.cpp
)
add_unittest_without_exec
(
${
TARGET
}
${
TARGET
}
.cpp
)
add_test
(
NAME
${
TARGET
}
add_test
(
NAME
${
TARGET
}
COMMAND
${
PYTHON_PATH
}
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
TARGET
}
COMMAND
${
PYTHON_PATH
}
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
TARGET
}
WORKING_DIRECTORY
${
PADDLE_
SOURCE
_DIR
}
/paddle/
)
WORKING_DIRECTORY
${
PADDLE_
BINARY
_DIR
}
/paddle/
)
endfunction
()
endfunction
()
trainer_test
(
test_Compare
)
trainer_test
(
test_Compare
)
...
@@ -22,11 +27,11 @@ if(WITH_PYTHON)
...
@@ -22,11 +27,11 @@ if(WITH_PYTHON)
add_test
(
NAME test_TrainerOnePass
add_test
(
NAME test_TrainerOnePass
COMMAND
${
PYTHON_PATH
}
${
PADDLE_SOURCE_DIR
}
/paddle/.set_port.sh -p port
COMMAND
${
PYTHON_PATH
}
${
PADDLE_SOURCE_DIR
}
/paddle/.set_port.sh -p port
${
CMAKE_CURRENT_BINARY_DIR
}
/test_TrainerOnePass
${
CMAKE_CURRENT_BINARY_DIR
}
/test_TrainerOnePass
WORKING_DIRECTORY
${
PADDLE_
SOURCE
_DIR
}
/paddle/
)
WORKING_DIRECTORY
${
PADDLE_
BINARY
_DIR
}
/paddle/
)
endif
()
endif
()
#################### test_config_parser #########################
#################### test_config_parser #########################
add_test
(
NAME test_config_parser
add_test
(
NAME test_config_parser
COMMAND
${
PYTHON_PATH
}
${
PYTHON_EXECUTABLE
}
COMMAND
${
PYTHON_PATH
}
${
PYTHON_EXECUTABLE
}
${
PADDLE_SOURCE_DIR
}
/paddle/trainer/tests/config_parser_test.py
${
PADDLE_SOURCE_DIR
}
/paddle/trainer/tests/config_parser_test.py
WORKING_DIRECTORY
${
PADDLE_
SOURCE
_DIR
}
/paddle/
)
WORKING_DIRECTORY
${
PADDLE_
BINARY
_DIR
}
/paddle/
)
paddle/utils/CMakeLists.txt
浏览文件 @
7b40f7ce
...
@@ -2,8 +2,8 @@
...
@@ -2,8 +2,8 @@
file
(
GLOB UTIL_HEADERS . *.h
)
file
(
GLOB UTIL_HEADERS . *.h
)
file
(
GLOB UTIL_SOURCES . *.cpp
)
file
(
GLOB UTIL_SOURCES . *.cpp
)
create_resources
(
${
CMAKE_CURRENT_SOURCE_DIR
}
/enable_virtualenv.py
create_resources
(
${
CMAKE_CURRENT_SOURCE_DIR
}
/enable_virtualenv.py
${
CMAKE_CURRENT_
SOURCE
_DIR
}
/enable_virtualenv.c
)
${
CMAKE_CURRENT_
BINARY
_DIR
}
/enable_virtualenv.c
)
set
(
UTIL_RES
${
CMAKE_CURRENT_
SOURCE
_DIR
}
/enable_virtualenv.c
)
set
(
UTIL_RES
${
CMAKE_CURRENT_
BINARY
_DIR
}
/enable_virtualenv.c
)
if
(
APPLE
)
if
(
APPLE
)
file
(
GLOB UTIL_ARCH_SOURCES . arch/osx/*.cpp
)
file
(
GLOB UTIL_ARCH_SOURCES . arch/osx/*.cpp
)
...
...
proto/CMakeLists.txt
浏览文件 @
7b40f7ce
...
@@ -15,13 +15,14 @@ foreach(filename ${proto_filenames})
...
@@ -15,13 +15,14 @@ foreach(filename ${proto_filenames})
get_filename_component
(
ABS_FIL
${
filename
}
ABSOLUTE
)
get_filename_component
(
ABS_FIL
${
filename
}
ABSOLUTE
)
get_filename_component
(
FIL_WE
${
filename
}
NAME_WE
)
get_filename_component
(
FIL_WE
${
filename
}
NAME_WE
)
set
(
CUR_PROTO_GEN_PY
set
(
CUR_PROTO_GEN_PY
${
PADDLE_
SOURCE
_DIR
}
/paddle/python/paddle/proto/
${
FIL_WE
}
_pb2.py
)
${
PADDLE_
BINARY
_DIR
}
/paddle/python/paddle/proto/
${
FIL_WE
}
_pb2.py
)
set
(
PROTO_GEN_PY
set
(
PROTO_GEN_PY
${
CUR_PROTO_GEN_PY
}
${
CUR_PROTO_GEN_PY
}
${
PROTO_GEN_PY
}
)
${
PROTO_GEN_PY
}
)
add_custom_command
(
OUTPUT
${
CUR_PROTO_GEN_PY
}
add_custom_command
(
OUTPUT
${
CUR_PROTO_GEN_PY
}
COMMAND
${
CMAKE_COMMAND
}
-E make_directory
${
PADDLE_BINARY_DIR
}
/python/paddle/proto
COMMAND
${
PROTOBUF_PROTOC_EXECUTABLE
}
COMMAND
${
PROTOBUF_PROTOC_EXECUTABLE
}
ARGS
"--python_out=
${
PADDLE_
SOURCE
_DIR
}
/python/paddle/proto"
ARGS
"--python_out=
${
PADDLE_
BINARY
_DIR
}
/python/paddle/proto"
"-I"
${
CMAKE_CURRENT_SOURCE_DIR
}
${
ABS_FIL
}
"-I"
${
CMAKE_CURRENT_SOURCE_DIR
}
${
ABS_FIL
}
DEPENDS
${
ABS_FIL
}
protoc
)
DEPENDS
${
ABS_FIL
}
protoc
)
endforeach
()
endforeach
()
...
...
python/CMakeLists.txt
浏览文件 @
7b40f7ce
...
@@ -47,14 +47,16 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
...
@@ -47,14 +47,16 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
${
CMAKE_CURRENT_BINARY_DIR
}
/setup.py
)
${
CMAKE_CURRENT_BINARY_DIR
}
/setup.py
)
add_custom_command
(
OUTPUT
${
PADDLE_
SOURCE
_DIR
}
/python/paddle/fluid/core.so
add_custom_command
(
OUTPUT
${
PADDLE_
BINARY
_DIR
}
/python/paddle/fluid/core.so
COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind>
${
PADDLE_
SOURCE
_DIR
}
/python/paddle/fluid/core.so
COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind>
${
PADDLE_
BINARY
_DIR
}
/python/paddle/fluid/core.so
DEPENDS paddle_pybind
)
DEPENDS paddle_pybind
)
add_custom_target
(
copy_paddle_pybind ALL DEPENDS
${
PADDLE_
SOURCE
_DIR
}
/python/paddle/fluid/core.so
)
add_custom_target
(
copy_paddle_pybind ALL DEPENDS
${
PADDLE_
BINARY
_DIR
}
/python/paddle/fluid/core.so
)
add_custom_command
(
OUTPUT
${
PADDLE_PYTHON_BUILD_DIR
}
/.timestamp
add_custom_command
(
OUTPUT
${
PADDLE_PYTHON_BUILD_DIR
}
/.timestamp
COMMAND touch stub.cc
COMMAND touch stub.cc
COMMAND
${
CMAKE_COMMAND
}
-E copy_directory
${
PADDLE_SOURCE_DIR
}
/python/paddle
${
PADDLE_BINARY_DIR
}
/python/paddle
COMMAND cp -r
${
PADDLE_SOURCE_DIR
}
/paddle/py_paddle
${
PADDLE_BINARY_DIR
}
/python/
COMMAND env
${
py_env
}
${
PYTHON_EXECUTABLE
}
setup.py bdist_wheel
COMMAND env
${
py_env
}
${
PYTHON_EXECUTABLE
}
setup.py bdist_wheel
COMMAND
${
CMAKE_COMMAND
}
-E touch
${
PADDLE_PYTHON_BUILD_DIR
}
/.timestamp
COMMAND
${
CMAKE_COMMAND
}
-E touch
${
PADDLE_PYTHON_BUILD_DIR
}
/.timestamp
COMMAND
${
CMAKE_COMMAND
}
-E remove_directory
${
PADDLE_PYTHON_BUILD_DIR
}
/lib-python
COMMAND
${
CMAKE_COMMAND
}
-E remove_directory
${
PADDLE_PYTHON_BUILD_DIR
}
/lib-python
...
...
python/paddle/fluid/__init__.py
浏览文件 @
7b40f7ce
...
@@ -31,7 +31,7 @@ import regularizer
...
@@ -31,7 +31,7 @@ import regularizer
import
average
import
average
from
param_attr
import
ParamAttr
,
WeightNormParamAttr
from
param_attr
import
ParamAttr
,
WeightNormParamAttr
from
data_feeder
import
DataFeeder
from
data_feeder
import
DataFeeder
from
core
import
LoDTensor
,
CPUPlace
,
CUDAPlace
from
core
import
LoDTensor
,
CPUPlace
,
CUDAPlace
,
CUDAPinnedPlace
from
distribute_transpiler
import
DistributeTranspiler
from
distribute_transpiler
import
DistributeTranspiler
from
distribute_transpiler_simple
import
SimpleDistributeTranspiler
from
distribute_transpiler_simple
import
SimpleDistributeTranspiler
from
concurrency
import
(
Go
,
make_channel
,
channel_send
,
channel_recv
,
from
concurrency
import
(
Go
,
make_channel
,
channel_send
,
channel_recv
,
...
@@ -57,6 +57,7 @@ __all__ = framework.__all__ + executor.__all__ + concurrency.__all__ + [
...
@@ -57,6 +57,7 @@ __all__ = framework.__all__ + executor.__all__ + concurrency.__all__ + [
'LoDTensor'
,
'LoDTensor'
,
'CPUPlace'
,
'CPUPlace'
,
'CUDAPlace'
,
'CUDAPlace'
,
'CUDAPinnedPlace'
,
'Tensor'
,
'Tensor'
,
'ParamAttr'
,
'ParamAttr'
,
'WeightNormParamAttr'
,
'WeightNormParamAttr'
,
...
...
python/paddle/fluid/distribute_transpiler.py
浏览文件 @
7b40f7ce
...
@@ -17,7 +17,7 @@ import framework
...
@@ -17,7 +17,7 @@ import framework
from
framework
import
Program
,
default_main_program
,
default_startup_program
,
Parameter
,
Variable
from
framework
import
Program
,
default_main_program
,
default_startup_program
,
Parameter
,
Variable
import
optimizer
import
optimizer
from
layer_helper
import
LayerHelper
from
layer_helper
import
LayerHelper
from
distributed_spliter
import
*
import
distributed_splitter
as
splitter
import
math
import
math
from
.
import
core
from
.
import
core
import
debuger
import
debuger
...
@@ -36,7 +36,7 @@ class VarBlock:
...
@@ -36,7 +36,7 @@ class VarBlock:
class
UnionFind
(
object
):
class
UnionFind
(
object
):
""" Union-find data struct.
""" Union-find data struct.
Union-find is a data struct that keeps track of a set of elements partitioned
Union-find is a data struct that keeps track of a set of elements partitioned
into a number of disjoint (non-overlapping) subsets.
into a number of disjoint (non-overlapping) subsets.
...
@@ -138,7 +138,7 @@ class DistributeTranspiler:
...
@@ -138,7 +138,7 @@ class DistributeTranspiler:
program
=
None
,
program
=
None
,
pservers
=
"127.0.0.1:6174"
,
pservers
=
"127.0.0.1:6174"
,
trainers
=
1
,
trainers
=
1
,
split_method
=
round_robin
):
split_method
=
splitter
.
round_robin
):
"""
"""
Transpile the program to distributed data-parallelism programs.
Transpile the program to distributed data-parallelism programs.
The main_program will be transformed to use a remote parameter server
The main_program will be transformed to use a remote parameter server
...
@@ -303,7 +303,7 @@ class DistributeTranspiler:
...
@@ -303,7 +303,7 @@ class DistributeTranspiler:
# If two ops are connected, we could add these two ops
# If two ops are connected, we could add these two ops
# into one set.
# into one set.
ufind
=
self
.
_create_ufind
(
self
.
optimize_ops
)
ufind
=
self
.
_create_ufind
(
self
.
optimize_ops
)
# step 4.2
# step 4.2
# Iterate through the ops and append optimize op which
# Iterate through the ops and append optimize op which
# located on current pserver
# located on current pserver
opt_op_on_pserver
=
[]
opt_op_on_pserver
=
[]
...
@@ -312,7 +312,7 @@ class DistributeTranspiler:
...
@@ -312,7 +312,7 @@ class DistributeTranspiler:
opt_op_on_pserver
.
append
(
op
)
opt_op_on_pserver
.
append
(
op
)
# step 4.3
# step 4.3
# Iterate through the ops, and if an op and the optimize ops
# Iterate through the ops, and if an op and the optimize ops
# which located on current pserver are in one set, then
# which located on current pserver are in one set, then
# append it into the sub program.
# append it into the sub program.
# We try to put optimization program run parallelly, assume
# We try to put optimization program run parallelly, assume
...
@@ -408,11 +408,7 @@ class DistributeTranspiler:
...
@@ -408,11 +408,7 @@ class DistributeTranspiler:
pserver_vars
=
pserver_program
.
global_block
().
vars
pserver_vars
=
pserver_program
.
global_block
().
vars
created_var_map
=
dict
()
created_var_map
=
dict
()
for
_
,
var
in
pserver_vars
.
iteritems
():
for
_
,
var
in
pserver_vars
.
iteritems
():
tmpvar
=
s_prog
.
global_block
().
create_var
(
tmpvar
=
s_prog
.
global_block
().
clone_variable
(
var
)
name
=
var
.
name
,
persistable
=
var
.
persistable
,
dtype
=
var
.
dtype
,
shape
=
var
.
shape
)
created_var_map
[
var
.
name
]
=
tmpvar
created_var_map
[
var
.
name
]
=
tmpvar
# 2. rename op outputs
# 2. rename op outputs
...
@@ -708,11 +704,7 @@ class DistributeTranspiler:
...
@@ -708,11 +704,7 @@ class DistributeTranspiler:
varlist
=
[
varlist
]
varlist
=
[
varlist
]
for
var
in
varlist
:
for
var
in
varlist
:
program
.
global_block
().
create_var
(
program
.
global_block
().
clone_variable
(
var
)
name
=
var
.
name
,
persistable
=
var
.
persistable
,
dtype
=
var
.
dtype
,
shape
=
var
.
shape
)
optimize_block
.
append_op
(
optimize_block
.
append_op
(
type
=
opt_op
.
type
,
type
=
opt_op
.
type
,
...
@@ -760,7 +752,7 @@ class DistributeTranspiler:
...
@@ -760,7 +752,7 @@ class DistributeTranspiler:
def
_is_opt_op
(
self
,
op
):
def
_is_opt_op
(
self
,
op
):
# NOTE: It's a HACK implement.
# NOTE: It's a HACK implement.
# optimize op: SGDOptimize, MomentumOptimizer, AdamOptimizer and etc...
# optimize op: SGDOptimize, MomentumOptimizer, AdamOptimizer and etc...
if
"Param"
in
op
.
input_names
and
\
if
"Param"
in
op
.
input_names
and
\
"LearningRate"
in
op
.
input_names
:
"LearningRate"
in
op
.
input_names
:
return
True
return
True
...
...
python/paddle/fluid/distributed_spliter.py
→
python/paddle/fluid/distributed_split
t
er.py
浏览文件 @
7b40f7ce
...
@@ -17,8 +17,10 @@ def hash_name(varlist, pserver_endpoints):
...
@@ -17,8 +17,10 @@ def hash_name(varlist, pserver_endpoints):
"""
"""
hash variable names to several endpoints.
hash variable names to several endpoints.
:param varlist: a list of Variables
Args:
:return: a map of pserver endpoint -> varname
varlist(list): a list of Variables
Returns(dict): a map of pserver endpoint -> varname
"""
"""
def
_hash_block
(
block_str
,
total
):
def
_hash_block
(
block_str
,
total
):
...
@@ -34,9 +36,14 @@ def hash_name(varlist, pserver_endpoints):
...
@@ -34,9 +36,14 @@ def hash_name(varlist, pserver_endpoints):
def
round_robin
(
varlist
,
pserver_endpoints
):
def
round_robin
(
varlist
,
pserver_endpoints
):
"""
"""
distribute variables to several endpoints.
Distribute variables to several endpoints.
Args:
varlist(list): a list of variables
pserver_endpoints(list): a list of pserver endpoints
Returns(list[int]): the endpoint for each variable
"""
"""
assert
(
len
(
varlist
)
>
len
(
pserver_endpoints
))
assert
(
len
(
varlist
)
>
=
len
(
pserver_endpoints
))
eplist
=
[]
eplist
=
[]
pserver_idx
=
0
pserver_idx
=
0
...
...
python/paddle/fluid/framework.py
浏览文件 @
7b40f7ce
...
@@ -838,7 +838,7 @@ class Block(object):
...
@@ -838,7 +838,7 @@ class Block(object):
def
sync_with_cpp
(
self
):
def
sync_with_cpp
(
self
):
"""
"""
Sync
with
the desc on the c++ end.
Sync
from
the desc on the c++ end.
This method is used to synchronize the c++ desc instance generated by backward.
This method is used to synchronize the c++ desc instance generated by backward.
"""
"""
...
@@ -946,13 +946,20 @@ class Block(object):
...
@@ -946,13 +946,20 @@ class Block(object):
The new variable cloned from 'var' in current block.
The new variable cloned from 'var' in current block.
"""
"""
assert
isinstance
(
var
,
Variable
)
assert
isinstance
(
var
,
Variable
)
return
self
.
create_var
(
ret_var
=
None
name
=
var
.
name
,
# make STEP_SCOPES var can be safely cloned.
shape
=
var
.
shape
,
if
var
.
type
==
core
.
VarDesc
.
VarType
.
STEP_SCOPES
:
dtype
=
var
.
dtype
,
ret_var
=
self
.
create_var
(
type
=
var
.
type
,
name
=
var
.
name
,
persistable
=
var
.
persistable
,
type
=
var
.
type
)
lod_level
=
var
.
lod_level
,
else
:
persistable
=
True
)
ret_var
=
self
.
create_var
(
name
=
var
.
name
,
shape
=
var
.
shape
,
dtype
=
var
.
dtype
,
type
=
var
.
type
,
lod_level
=
var
.
lod_level
,
persistable
=
True
)
return
ret_var
class
Program
(
object
):
class
Program
(
object
):
...
...
python/paddle/fluid/parallel_executor.py
浏览文件 @
7b40f7ce
...
@@ -26,25 +26,29 @@ class ParallelExecutor(object):
...
@@ -26,25 +26,29 @@ class ParallelExecutor(object):
use_cuda
,
use_cuda
,
num_threads
=
None
,
num_threads
=
None
,
allow_op_delay
=
False
):
allow_op_delay
=
False
):
places
=
[]
self
.
_places
=
[]
self
.
_act_places
=
[]
if
use_cuda
:
if
use_cuda
:
for
i
in
xrange
(
core
.
get_cuda_device_count
()):
for
i
in
xrange
(
core
.
get_cuda_device_count
()):
p
=
core
.
Place
()
p
=
core
.
Place
()
p
.
set_place
(
core
.
CUDAPlace
(
i
))
self
.
_act_places
.
append
(
core
.
CUDAPlace
(
i
))
places
.
append
(
p
)
p
.
set_place
(
self
.
_act_places
[
-
1
])
self
.
_places
.
append
(
p
)
else
:
else
:
for
i
in
xrange
(
multiprocessing
.
cpu_count
()):
for
i
in
xrange
(
multiprocessing
.
cpu_count
()):
p
=
core
.
Place
()
p
=
core
.
Place
()
p
.
set_place
(
core
.
CPUPlace
())
self
.
_act_places
.
append
(
core
.
CPUPlace
(
i
))
places
.
append
(
p
)
p
.
set_place
(
self
.
_act_places
[
-
1
])
self
.
_places
.
append
(
p
)
assert
self
.
_places
,
"no place for execution"
if
num_threads
is
None
:
if
num_threads
is
None
:
if
use_cuda
:
if
use_cuda
:
# Experiments on se-resnext shows that too many threads hurt
# Experiments on se-resnext shows that too many threads hurt
# performance. Worth tunning for other models in the future.
# performance. Worth tunning for other models in the future.
num_threads
=
len
(
places
)
num_threads
=
len
(
self
.
_
places
)
else
:
else
:
min
(
len
(
places
)
*
2
,
multiprocessing
.
cpu_count
())
min
(
len
(
self
.
_
places
)
*
2
,
multiprocessing
.
cpu_count
())
startup
=
framework
.
default_startup_program
()
startup
=
framework
.
default_startup_program
()
main
=
framework
.
default_main_program
()
main
=
framework
.
default_main_program
()
...
@@ -53,7 +57,7 @@ class ParallelExecutor(object):
...
@@ -53,7 +57,7 @@ class ParallelExecutor(object):
self
.
executor
=
core
.
ParallelExecutor
(
self
.
executor
=
core
.
ParallelExecutor
(
num_threads
,
num_threads
,
True
if
use_cuda
else
False
,
# use_event
True
if
use_cuda
else
False
,
# use_event
places
,
self
.
_
places
,
set
([
set
([
p
.
name
for
p
in
main
.
global_block
().
iter_parameters
()
p
.
name
for
p
in
main
.
global_block
().
iter_parameters
()
if
not
p
.
stop_gradient
if
not
p
.
stop_gradient
...
@@ -65,8 +69,25 @@ class ParallelExecutor(object):
...
@@ -65,8 +69,25 @@ class ParallelExecutor(object):
allow_op_delay
)
allow_op_delay
)
self
.
scope
=
scope
self
.
scope
=
scope
def
run
(
self
,
fetch_list
):
def
run
(
self
,
fetch_list
,
feed_dict
=
{}):
"""
:param fetch_list: A list of variable names that will be fetched.
:param feed_dict: A dict mapping for feed variable name to LoDTensor
or numpy array.
:return: fetched value list.
"""
if
not
isinstance
(
feed_dict
,
dict
):
raise
TypeError
(
"feed_dict should be a dict"
)
feed_tensor_dict
=
{}
for
i
,
feed_name
in
enumerate
(
feed_dict
):
feed_tensor
=
feed_dict
[
feed_name
]
if
not
isinstance
(
feed_tensor
,
core
.
LoDTensor
):
feed_tensor
=
core
.
LoDTensor
()
feed_tensor
.
set
(
feed_dict
[
feed_name
],
self
.
_act_places
[
0
])
feed_tensor_dict
[
feed_name
]
=
feed_tensor
fetch_var_name
=
'@FETCHED_VAR_NAME@'
fetch_var_name
=
'@FETCHED_VAR_NAME@'
self
.
executor
.
run
(
fetch_list
,
fetch_var_name
)
self
.
executor
.
run
(
fetch_list
,
fetch_var_name
,
feed_tensor_dict
)
arr
=
self
.
scope
.
find_var
(
fetch_var_name
).
get_lod_tensor_array
()
arr
=
self
.
scope
.
find_var
(
fetch_var_name
).
get_lod_tensor_array
()
return
[
arr
[
i
]
for
i
in
range
(
len
(
arr
))]
return
[
arr
[
i
]
for
i
in
range
(
len
(
arr
))]
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
7b40f7ce
...
@@ -22,9 +22,9 @@ function(py_test_modules TARGET_NAME)
...
@@ -22,9 +22,9 @@ function(py_test_modules TARGET_NAME)
set
(
multiValueArgs MODULES DEPS ARGS ENVS
)
set
(
multiValueArgs MODULES DEPS ARGS ENVS
)
cmake_parse_arguments
(
py_test_modules
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
cmake_parse_arguments
(
py_test_modules
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
add_test
(
NAME
${
TARGET_NAME
}
add_test
(
NAME
${
TARGET_NAME
}
COMMAND env PYTHONPATH=
${
PADDLE_
PYTHON_BUILD_DIR
}
/lib-
python
${
py_test_modules_ENVS
}
COMMAND env PYTHONPATH=
${
PADDLE_
BINARY_DIR
}
/
python
${
py_test_modules_ENVS
}
${
PYTHON_EXECUTABLE
}
-u -m unittest --verbose
${
py_test_modules_MODULES
}
${
py_test_modules_ARGS
}
${
PYTHON_EXECUTABLE
}
-u -m unittest --verbose
${
py_test_modules_MODULES
}
${
py_test_modules_ARGS
}
WORKING_DIRECTORY
${
CMAKE_CURRENT_
SOURCE
_DIR
}
)
WORKING_DIRECTORY
${
CMAKE_CURRENT_
BINARY
_DIR
}
)
endif
()
endif
()
endfunction
()
endfunction
()
...
...
python/paddle/fluid/tests/unittests/test_conv2d_op.py
浏览文件 @
7b40f7ce
...
@@ -97,8 +97,11 @@ class TestConv2dOp(OpTest):
...
@@ -97,8 +97,11 @@ class TestConv2dOp(OpTest):
}
}
self
.
outputs
=
{
'Output'
:
output
}
self
.
outputs
=
{
'Output'
:
output
}
def
testcudnn
(
self
):
return
core
.
is_compiled_with_cuda
()
and
self
.
use_cudnn
def
test_check_output
(
self
):
def
test_check_output
(
self
):
if
self
.
use_cudnn
:
if
self
.
testcudnn
()
:
place
=
core
.
CUDAPlace
(
0
)
place
=
core
.
CUDAPlace
(
0
)
self
.
check_output_with_place
(
place
,
atol
=
1e-5
)
self
.
check_output_with_place
(
place
,
atol
=
1e-5
)
else
:
else
:
...
@@ -107,7 +110,7 @@ class TestConv2dOp(OpTest):
...
@@ -107,7 +110,7 @@ class TestConv2dOp(OpTest):
def
test_check_grad
(
self
):
def
test_check_grad
(
self
):
if
self
.
dtype
==
np
.
float16
:
if
self
.
dtype
==
np
.
float16
:
return
return
if
self
.
use_cudnn
:
if
self
.
testcudnn
()
:
place
=
core
.
CUDAPlace
(
0
)
place
=
core
.
CUDAPlace
(
0
)
self
.
check_grad_with_place
(
self
.
check_grad_with_place
(
place
,
place
,
...
@@ -121,7 +124,7 @@ class TestConv2dOp(OpTest):
...
@@ -121,7 +124,7 @@ class TestConv2dOp(OpTest):
def
test_check_grad_no_filter
(
self
):
def
test_check_grad_no_filter
(
self
):
if
self
.
dtype
==
np
.
float16
:
if
self
.
dtype
==
np
.
float16
:
return
return
if
self
.
use_cudnn
:
if
self
.
testcudnn
()
:
place
=
core
.
CUDAPlace
(
0
)
place
=
core
.
CUDAPlace
(
0
)
self
.
check_grad_with_place
(
self
.
check_grad_with_place
(
place
,
[
'Input'
],
place
,
[
'Input'
],
...
@@ -138,7 +141,7 @@ class TestConv2dOp(OpTest):
...
@@ -138,7 +141,7 @@ class TestConv2dOp(OpTest):
def
test_check_grad_no_input
(
self
):
def
test_check_grad_no_input
(
self
):
if
self
.
dtype
==
np
.
float16
:
if
self
.
dtype
==
np
.
float16
:
return
return
if
self
.
use_cudnn
:
if
self
.
testcudnn
()
:
place
=
core
.
CUDAPlace
(
0
)
place
=
core
.
CUDAPlace
(
0
)
self
.
check_grad_with_place
(
self
.
check_grad_with_place
(
place
,
[
'Filter'
],
place
,
[
'Filter'
],
...
...
python/paddle/fluid/tests/unittests/test_lookup_table_op.py
浏览文件 @
7b40f7ce
...
@@ -115,18 +115,18 @@ class TestLookupTableWIsSelectedRows(OpTest):
...
@@ -115,18 +115,18 @@ class TestLookupTableWIsSelectedRows(OpTest):
w_array
=
np
.
ones
((
len
(
rows
),
row_numel
)).
astype
(
"float32"
)
w_array
=
np
.
ones
((
len
(
rows
),
row_numel
)).
astype
(
"float32"
)
for
i
in
range
(
len
(
rows
)):
for
i
in
range
(
len
(
rows
)):
w_array
[
i
]
*=
i
w_array
[
i
]
*=
i
ids
_tensor
=
w_selected_rows
.
get_tensor
()
w
_tensor
=
w_selected_rows
.
get_tensor
()
ids
_tensor
.
set
(
w_array
,
place
)
w
_tensor
.
set
(
w_array
,
place
)
# create Out Variable
# create Out Variable
O
ut_tensor
=
scope
.
var
(
'Out'
).
get_tensor
()
o
ut_tensor
=
scope
.
var
(
'Out'
).
get_tensor
()
# create and run lookup_table operator
# create and run lookup_table operator
lookup_table
=
Operator
(
"lookup_table"
,
W
=
'W'
,
Ids
=
'Ids'
,
Out
=
'Out'
)
lookup_table
=
Operator
(
"lookup_table"
,
W
=
'W'
,
Ids
=
'Ids'
,
Out
=
'Out'
)
lookup_table
.
run
(
scope
,
place
)
lookup_table
.
run
(
scope
,
place
)
# get result from Out
# get result from Out
result_array
=
np
.
array
(
O
ut_tensor
)
result_array
=
np
.
array
(
o
ut_tensor
)
# all(): return True if all elements of the iterable are true (or if the iterable is empty)
# all(): return True if all elements of the iterable are true (or if the iterable is empty)
for
idx
,
row
in
enumerate
(
ids_array
):
for
idx
,
row
in
enumerate
(
ids_array
):
assert
(
row
[
0
]
==
result_array
[
idx
]).
all
()
assert
(
row
[
0
]
==
result_array
[
idx
]).
all
()
...
...
python/paddle/fluid/tests/unittests/test_parallel_executor.py
浏览文件 @
7b40f7ce
...
@@ -21,13 +21,17 @@ import paddle.dataset.mnist as mnist
...
@@ -21,13 +21,17 @@ import paddle.dataset.mnist as mnist
import
paddle.dataset.wmt16
as
wmt16
import
paddle.dataset.wmt16
as
wmt16
def
simple_fc_net
():
def
simple_fc_net
(
use_feed
):
reader
=
fluid
.
layers
.
open_recordio_file
(
if
use_feed
:
filename
=
'./mnist.recordio'
,
img
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
784
],
dtype
=
'float32'
)
shapes
=
[[
-
1
,
784
],
[
-
1
,
1
]],
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
lod_levels
=
[
0
,
0
],
else
:
dtypes
=
[
'float32'
,
'int64'
])
reader
=
fluid
.
layers
.
open_recordio_file
(
img
,
label
=
fluid
.
layers
.
read_file
(
reader
)
filename
=
'./mnist.recordio'
,
shapes
=
[[
-
1
,
784
],
[
-
1
,
1
]],
lod_levels
=
[
0
,
0
],
dtypes
=
[
'float32'
,
'int64'
])
img
,
label
=
fluid
.
layers
.
read_file
(
reader
)
hidden
=
img
hidden
=
img
for
_
in
xrange
(
4
):
for
_
in
xrange
(
4
):
hidden
=
fluid
.
layers
.
fc
(
hidden
=
fluid
.
layers
.
fc
(
...
@@ -42,13 +46,18 @@ def simple_fc_net():
...
@@ -42,13 +46,18 @@ def simple_fc_net():
return
loss
return
loss
def
fc_with_batchnorm
():
def
fc_with_batchnorm
(
use_feed
):
reader
=
fluid
.
layers
.
open_recordio_file
(
if
use_feed
:
filename
=
'./mnist.recordio'
,
img
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
784
],
dtype
=
'float32'
)
shapes
=
[[
-
1
,
784
],
[
-
1
,
1
]],
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
lod_levels
=
[
0
,
0
],
else
:
dtypes
=
[
'float32'
,
'int64'
])
reader
=
fluid
.
layers
.
open_recordio_file
(
img
,
label
=
fluid
.
layers
.
read_file
(
reader
)
filename
=
'./mnist.recordio'
,
shapes
=
[[
-
1
,
784
],
[
-
1
,
1
]],
lod_levels
=
[
0
,
0
],
dtypes
=
[
'float32'
,
'int64'
])
img
,
label
=
fluid
.
layers
.
read_file
(
reader
)
hidden
=
img
hidden
=
img
for
_
in
xrange
(
1
):
for
_
in
xrange
(
1
):
hidden
=
fluid
.
layers
.
fc
(
hidden
=
fluid
.
layers
.
fc
(
...
@@ -135,7 +144,9 @@ def bottleneck_block(input, num_filters, stride, cardinality, reduction_ratio):
...
@@ -135,7 +144,9 @@ def bottleneck_block(input, num_filters, stride, cardinality, reduction_ratio):
return
fluid
.
layers
.
elementwise_add
(
x
=
short
,
y
=
scale
,
act
=
'relu'
)
return
fluid
.
layers
.
elementwise_add
(
x
=
short
,
y
=
scale
,
act
=
'relu'
)
def
SE_ResNeXt152Small
(
batch_size
=
2
):
def
SE_ResNeXt50Small
(
batch_size
=
2
,
use_feed
=
False
):
assert
not
use_feed
,
"SE_ResNeXt doesn't support feed yet"
img
=
fluid
.
layers
.
fill_constant
(
img
=
fluid
.
layers
.
fill_constant
(
shape
=
[
batch_size
,
3
,
224
,
224
],
dtype
=
'float32'
,
value
=
0.0
)
shape
=
[
batch_size
,
3
,
224
,
224
],
dtype
=
'float32'
,
value
=
0.0
)
label
=
fluid
.
layers
.
fill_constant
(
label
=
fluid
.
layers
.
fill_constant
(
...
@@ -150,9 +161,9 @@ def SE_ResNeXt152Small(batch_size=2):
...
@@ -150,9 +161,9 @@ def SE_ResNeXt152Small(batch_size=2):
conv
=
fluid
.
layers
.
pool2d
(
conv
=
fluid
.
layers
.
pool2d
(
input
=
conv
,
pool_size
=
3
,
pool_stride
=
2
,
pool_padding
=
1
,
pool_type
=
'max'
)
input
=
conv
,
pool_size
=
3
,
pool_stride
=
2
,
pool_padding
=
1
,
pool_type
=
'max'
)
cardinality
=
64
cardinality
=
32
reduction_ratio
=
16
reduction_ratio
=
16
depth
=
[
3
,
8
,
3
6
,
3
]
depth
=
[
3
,
4
,
6
,
3
]
num_filters
=
[
128
,
256
,
512
,
1024
]
num_filters
=
[
128
,
256
,
512
,
1024
]
for
block
in
range
(
len
(
depth
)):
for
block
in
range
(
len
(
depth
)):
...
@@ -185,30 +196,28 @@ class TestParallelExecutorBase(unittest.TestCase):
...
@@ -185,30 +196,28 @@ class TestParallelExecutorBase(unittest.TestCase):
memory_opt
=
True
,
memory_opt
=
True
,
iter
=
10
,
iter
=
10
,
batch_size
=
None
,
batch_size
=
None
,
allow_op_delay
=
False
):
allow_op_delay
=
False
,
feed_dict
=
{}):
main
=
fluid
.
Program
()
main
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main
,
startup
):
with
fluid
.
program_guard
(
main
,
startup
):
loss
=
method
()
loss
=
method
(
use_feed
=
len
(
feed_dict
)
>
0
)
adam
=
fluid
.
optimizer
.
Adam
()
adam
=
fluid
.
optimizer
.
Adam
()
adam
.
minimize
(
loss
)
adam
.
minimize
(
loss
)
if
memory_opt
:
if
memory_opt
:
fluid
.
memory_optimize
(
main
)
fluid
.
memory_optimize
(
main
)
exe
=
fluid
.
ParallelExecutor
(
exe
=
fluid
.
ParallelExecutor
(
loss_name
=
loss
.
name
,
use_cuda
=
True
)
loss_name
=
loss
.
name
,
use_cuda
=
True
,
allow_op_delay
=
allow_op_delay
)
if
batch_size
is
not
None
:
if
batch_size
is
not
None
:
batch_size
*=
fluid
.
core
.
get_cuda_device_count
()
batch_size
*=
fluid
.
core
.
get_cuda_device_count
()
begin
=
time
.
time
()
begin
=
time
.
time
()
first_loss
,
=
exe
.
run
([
loss
.
name
])
first_loss
,
=
exe
.
run
([
loss
.
name
]
,
feed_dict
=
feed_dict
)
first_loss
=
numpy
.
array
(
first_loss
)
first_loss
=
numpy
.
array
(
first_loss
)
for
i
in
xrange
(
iter
):
for
i
in
xrange
(
iter
):
exe
.
run
([])
exe
.
run
([]
,
feed_dict
=
feed_dict
)
last_loss
,
=
exe
.
run
([
loss
.
name
])
last_loss
,
=
exe
.
run
([
loss
.
name
]
,
feed_dict
=
feed_dict
)
end
=
time
.
time
()
end
=
time
.
time
()
if
batch_size
is
not
None
:
if
batch_size
is
not
None
:
...
@@ -242,9 +251,19 @@ class TestMNIST(TestParallelExecutorBase):
...
@@ -242,9 +251,19 @@ class TestMNIST(TestParallelExecutorBase):
self
.
check_network_convergence
(
simple_fc_net
)
self
.
check_network_convergence
(
simple_fc_net
)
self
.
check_network_convergence
(
simple_fc_net
,
allow_op_delay
=
True
)
self
.
check_network_convergence
(
simple_fc_net
,
allow_op_delay
=
True
)
img
=
numpy
.
zeros
(
shape
=
[
32
,
784
],
dtype
=
'float32'
)
label
=
numpy
.
ones
(
shape
=
[
32
,
1
],
dtype
=
'int64'
)
self
.
check_network_convergence
(
simple_fc_net
,
feed_dict
=
{
"image"
:
img
,
"label"
:
label
})
def
test_batchnorm_fc
(
self
):
def
test_batchnorm_fc
(
self
):
self
.
check_network_convergence
(
fc_with_batchnorm
)
self
.
check_network_convergence
(
fc_with_batchnorm
)
self
.
check_network_convergence
(
fc_with_batchnorm
,
allow_op_delay
=
True
)
img
=
numpy
.
zeros
(
shape
=
[
32
,
784
],
dtype
=
'float32'
)
label
=
numpy
.
ones
(
shape
=
[
32
,
1
],
dtype
=
'int64'
)
self
.
check_network_convergence
(
fc_with_batchnorm
,
feed_dict
=
{
"image"
:
img
,
"label"
:
label
})
class
TestResnet
(
TestParallelExecutorBase
):
class
TestResnet
(
TestParallelExecutorBase
):
...
@@ -271,7 +290,7 @@ class TestResnet(TestParallelExecutorBase):
...
@@ -271,7 +290,7 @@ class TestResnet(TestParallelExecutorBase):
batch_size
=
2
batch_size
=
2
self
.
check_network_convergence
(
self
.
check_network_convergence
(
functools
.
partial
(
functools
.
partial
(
SE_ResNeXt
152
Small
,
batch_size
=
batch_size
),
SE_ResNeXt
50
Small
,
batch_size
=
batch_size
),
iter
=
20
,
iter
=
20
,
batch_size
=
batch_size
)
batch_size
=
batch_size
)
...
@@ -400,7 +419,8 @@ def prepare_batch_input(insts, src_pad_idx, trg_pad_idx, n_head):
...
@@ -400,7 +419,8 @@ def prepare_batch_input(insts, src_pad_idx, trg_pad_idx, n_head):
import
transformer_model
import
transformer_model
def
transformer
():
def
transformer
(
use_feed
):
assert
not
use_feed
,
"transfomer doesn't support feed yet"
return
transformer_model
.
transformer
(
return
transformer_model
.
transformer
(
ModelHyperParams
.
src_vocab_size
+
1
,
ModelHyperParams
.
src_vocab_size
+
1
,
ModelHyperParams
.
trg_vocab_size
+
1
,
ModelHyperParams
.
max_length
+
1
,
ModelHyperParams
.
trg_vocab_size
+
1
,
ModelHyperParams
.
max_length
+
1
,
...
...
python/paddle/fluid/tests/unittests/test_prior_box_op.py
浏览文件 @
7b40f7ce
...
@@ -28,7 +28,6 @@ class TestPriorBoxOp(OpTest):
...
@@ -28,7 +28,6 @@ class TestPriorBoxOp(OpTest):
self
.
attrs
=
{
self
.
attrs
=
{
'min_sizes'
:
self
.
min_sizes
,
'min_sizes'
:
self
.
min_sizes
,
'max_sizes'
:
self
.
max_sizes
,
'aspect_ratios'
:
self
.
aspect_ratios
,
'aspect_ratios'
:
self
.
aspect_ratios
,
'variances'
:
self
.
variances
,
'variances'
:
self
.
variances
,
'flip'
:
self
.
flip
,
'flip'
:
self
.
flip
,
...
@@ -37,25 +36,28 @@ class TestPriorBoxOp(OpTest):
...
@@ -37,25 +36,28 @@ class TestPriorBoxOp(OpTest):
'step_h'
:
self
.
step_h
,
'step_h'
:
self
.
step_h
,
'offset'
:
self
.
offset
'offset'
:
self
.
offset
}
}
if
len
(
self
.
max_sizes
)
>
0
:
self
.
attrs
[
'max_sizes'
]
=
self
.
max_sizes
self
.
outputs
=
{
'Boxes'
:
self
.
out_boxes
,
'Variances'
:
self
.
out_var
}
self
.
outputs
=
{
'Boxes'
:
self
.
out_boxes
,
'Variances'
:
self
.
out_var
}
def
test_check_output
(
self
):
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
()
def
test_check_grad
(
self
):
return
def
setUp
(
self
):
def
setUp
(
self
):
self
.
op_type
=
"prior_box"
self
.
op_type
=
"prior_box"
self
.
set_data
()
self
.
set_data
()
def
set_max_sizes
(
self
):
max_sizes
=
[
5
,
10
]
self
.
max_sizes
=
np
.
array
(
max_sizes
).
astype
(
'float32'
).
tolist
()
def
init_test_params
(
self
):
def
init_test_params
(
self
):
self
.
layer_w
=
4
self
.
layer_w
=
32
self
.
layer_h
=
4
self
.
layer_h
=
32
self
.
image_w
=
2
0
self
.
image_w
=
4
0
self
.
image_h
=
2
0
self
.
image_h
=
4
0
self
.
step_w
=
float
(
self
.
image_w
)
/
float
(
self
.
layer_w
)
self
.
step_w
=
float
(
self
.
image_w
)
/
float
(
self
.
layer_w
)
self
.
step_h
=
float
(
self
.
image_h
)
/
float
(
self
.
layer_h
)
self
.
step_h
=
float
(
self
.
image_h
)
/
float
(
self
.
layer_h
)
...
@@ -66,8 +68,7 @@ class TestPriorBoxOp(OpTest):
...
@@ -66,8 +68,7 @@ class TestPriorBoxOp(OpTest):
self
.
min_sizes
=
[
2
,
4
]
self
.
min_sizes
=
[
2
,
4
]
self
.
min_sizes
=
np
.
array
(
self
.
min_sizes
).
astype
(
'float32'
).
tolist
()
self
.
min_sizes
=
np
.
array
(
self
.
min_sizes
).
astype
(
'float32'
).
tolist
()
self
.
max_sizes
=
[
5
,
10
]
self
.
set_max_sizes
()
self
.
max_sizes
=
np
.
array
(
self
.
max_sizes
).
astype
(
'float32'
).
tolist
()
self
.
aspect_ratios
=
[
2.0
,
3.0
]
self
.
aspect_ratios
=
[
2.0
,
3.0
]
self
.
flip
=
True
self
.
flip
=
True
self
.
real_aspect_ratios
=
[
1
,
2.0
,
1.0
/
2.0
,
3.0
,
1.0
/
3.0
]
self
.
real_aspect_ratios
=
[
1
,
2.0
,
1.0
/
2.0
,
3.0
,
1.0
/
3.0
]
...
@@ -79,7 +80,7 @@ class TestPriorBoxOp(OpTest):
...
@@ -79,7 +80,7 @@ class TestPriorBoxOp(OpTest):
self
.
clip
=
True
self
.
clip
=
True
self
.
num_priors
=
len
(
self
.
real_aspect_ratios
)
*
len
(
self
.
min_sizes
)
self
.
num_priors
=
len
(
self
.
real_aspect_ratios
)
*
len
(
self
.
min_sizes
)
if
len
(
self
.
max_sizes
)
>
1
:
if
len
(
self
.
max_sizes
)
>
0
:
self
.
num_priors
+=
len
(
self
.
max_sizes
)
self
.
num_priors
+=
len
(
self
.
max_sizes
)
self
.
offset
=
0.5
self
.
offset
=
0.5
...
@@ -105,35 +106,27 @@ class TestPriorBoxOp(OpTest):
...
@@ -105,35 +106,27 @@ class TestPriorBoxOp(OpTest):
idx
=
0
idx
=
0
for
s
in
range
(
len
(
self
.
min_sizes
)):
for
s
in
range
(
len
(
self
.
min_sizes
)):
min_size
=
self
.
min_sizes
[
s
]
min_size
=
self
.
min_sizes
[
s
]
c_w
=
c_h
=
min_size
/
2.
# rest of priors
out_boxes
[
h
,
w
,
idx
,
:]
=
[
for
r
in
range
(
len
(
self
.
real_aspect_ratios
)):
(
c_x
-
c_w
)
/
self
.
image_w
,
(
c_y
-
c_h
)
/
self
.
image_h
,
ar
=
self
.
real_aspect_ratios
[
r
]
(
c_x
+
c_w
)
/
self
.
image_w
,
(
c_y
+
c_h
)
/
self
.
image_h
c_w
=
min_size
*
math
.
sqrt
(
ar
)
/
2
]
c_h
=
(
min_size
/
math
.
sqrt
(
ar
))
/
2
idx
+=
1
if
len
(
self
.
max_sizes
)
>
0
:
max_size
=
self
.
max_sizes
[
s
]
# second prior: aspect_ratio = 1,
c_w
=
c_h
=
math
.
sqrt
(
min_size
*
max_size
)
/
2
out_boxes
[
h
,
w
,
idx
,
:]
=
[(
c_x
-
c_w
)
/
self
.
image_w
,
out_boxes
[
h
,
w
,
idx
,
:]
=
[(
c_x
-
c_w
)
/
self
.
image_w
,
(
c_y
-
c_h
)
/
self
.
image_h
,
(
c_y
-
c_h
)
/
self
.
image_h
,
(
c_x
+
c_w
)
/
self
.
image_w
,
(
c_x
+
c_w
)
/
self
.
image_w
,
(
c_y
+
c_h
)
/
self
.
image_h
]
(
c_y
+
c_h
)
/
self
.
image_h
]
idx
+=
1
idx
+=
1
# rest of priors
if
len
(
self
.
max_sizes
)
>
0
:
for
r
in
range
(
len
(
self
.
real_aspect_ratios
)):
max_size
=
self
.
max_sizes
[
s
]
ar
=
self
.
real_aspect_ratios
[
r
]
# second prior: aspect_ratio = 1,
if
math
.
fabs
(
ar
-
1.
)
<
1e-6
:
c_w
=
c_h
=
math
.
sqrt
(
min_size
*
max_size
)
/
2
continue
c_w
=
min_size
*
math
.
sqrt
(
ar
)
/
2
c_h
=
(
min_size
/
math
.
sqrt
(
ar
))
/
2
out_boxes
[
h
,
w
,
idx
,
:]
=
[(
c_x
-
c_w
)
/
self
.
image_w
,
out_boxes
[
h
,
w
,
idx
,
:]
=
[(
c_x
-
c_w
)
/
self
.
image_w
,
(
c_y
-
c_h
)
/
self
.
image_h
,
(
c_y
-
c_h
)
/
self
.
image_h
,
(
c_x
+
c_w
)
/
self
.
image_w
,
(
c_x
+
c_w
)
/
self
.
image_w
,
(
c_y
+
c_h
)
/
self
.
image_h
]
(
c_y
+
c_h
)
/
self
.
image_h
]
idx
+=
1
idx
+=
1
# clip the prior's coordidate such that it is within[0, 1]
# clip the prior's coordidate such that it is within[0, 1]
if
self
.
clip
:
if
self
.
clip
:
out_boxes
=
np
.
clip
(
out_boxes
,
0.0
,
1.0
)
out_boxes
=
np
.
clip
(
out_boxes
,
0.0
,
1.0
)
...
@@ -144,5 +137,10 @@ class TestPriorBoxOp(OpTest):
...
@@ -144,5 +137,10 @@ class TestPriorBoxOp(OpTest):
self
.
out_var
=
out_var
.
astype
(
'float32'
)
self
.
out_var
=
out_var
.
astype
(
'float32'
)
class
TestPriorBoxOpWithMaxSize
(
TestPriorBoxOp
):
def
set_max_sizes
(
self
):
self
.
max_sizes
=
[]
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_protobuf_descs.py
浏览文件 @
7b40f7ce
...
@@ -19,9 +19,9 @@ from paddle.fluid.framework import Program
...
@@ -19,9 +19,9 @@ from paddle.fluid.framework import Program
class
TestOpDesc
(
unittest
.
TestCase
):
class
TestOpDesc
(
unittest
.
TestCase
):
def
test_op_desc
(
self
):
def
test_op_desc
(
self
):
prog
=
core
.
ProgramDesc
()
prog
ram_desc
=
core
.
ProgramDesc
()
self
.
assertIsNotNone
(
prog
)
self
.
assertIsNotNone
(
prog
ram_desc
)
block
=
prog
.
block
(
0
)
block
=
prog
ram_desc
.
block
(
0
)
self
.
assertIsNotNone
(
block
)
self
.
assertIsNotNone
(
block
)
op
=
block
.
append_op
()
op
=
block
.
append_op
()
self
.
assertIsNotNone
(
op
)
self
.
assertIsNotNone
(
op
)
...
@@ -67,7 +67,7 @@ class TestOpDesc(unittest.TestCase):
...
@@ -67,7 +67,7 @@ class TestOpDesc(unittest.TestCase):
self
.
assertEqual
(
8
,
len
(
op
.
attr_names
()))
self
.
assertEqual
(
8
,
len
(
op
.
attr_names
()))
op
.
set_block_attr
(
"block_attr"
,
prog
.
block
(
0
))
op
.
set_block_attr
(
"block_attr"
,
prog
ram_desc
.
block
(
0
))
self
.
assertEqual
(
0
,
op
.
block_attr
(
"block_attr"
))
self
.
assertEqual
(
0
,
op
.
block_attr
(
"block_attr"
))
mul_op
=
block
.
append_op
()
mul_op
=
block
.
append_op
()
...
@@ -88,20 +88,20 @@ class TestProgramDesc(unittest.TestCase):
...
@@ -88,20 +88,20 @@ class TestProgramDesc(unittest.TestCase):
del
program_desc
del
program_desc
def
test_append_block
(
self
):
def
test_append_block
(
self
):
prog_desc
=
core
.
ProgramDesc
()
prog
ram
_desc
=
core
.
ProgramDesc
()
self
.
assertIsNotNone
(
prog_desc
)
self
.
assertIsNotNone
(
prog
ram
_desc
)
block_root
=
prog_desc
.
block
(
0
)
block_root
=
prog
ram
_desc
.
block
(
0
)
self
.
assertIsNotNone
(
block_root
)
self
.
assertIsNotNone
(
block_root
)
self
.
assertEqual
(
block_root
.
id
,
0
)
self
.
assertEqual
(
block_root
.
id
,
0
)
block1
=
prog_desc
.
append_block
(
block_root
)
block1
=
prog
ram
_desc
.
append_block
(
block_root
)
block2
=
prog_desc
.
append_block
(
block1
)
block2
=
prog
ram
_desc
.
append_block
(
block1
)
self
.
assertIsNotNone
(
block1
)
self
.
assertIsNotNone
(
block1
)
self
.
assertEqual
(
block1
.
id
,
block2
.
parent
)
self
.
assertEqual
(
block1
.
id
,
block2
.
parent
)
self
.
assertEqual
(
block_root
.
id
,
block1
.
parent
)
self
.
assertEqual
(
block_root
.
id
,
block1
.
parent
)
block3
=
prog_desc
.
append_block
(
block_root
)
block3
=
prog
ram
_desc
.
append_block
(
block_root
)
self
.
assertEqual
(
block3
.
parent
,
block_root
.
id
)
self
.
assertEqual
(
block3
.
parent
,
block_root
.
id
)
self
.
assertEqual
(
prog_desc
.
block
(
1
).
id
,
1
)
self
.
assertEqual
(
prog
ram
_desc
.
block
(
1
).
id
,
1
)
self
.
assertEqual
(
4
,
prog_desc
.
num_blocks
())
self
.
assertEqual
(
4
,
prog
ram
_desc
.
num_blocks
())
class
TestVarDesc
(
unittest
.
TestCase
):
class
TestVarDesc
(
unittest
.
TestCase
):
...
@@ -162,9 +162,9 @@ class TestVarDesc(unittest.TestCase):
...
@@ -162,9 +162,9 @@ class TestVarDesc(unittest.TestCase):
class
TestBlockDesc
(
unittest
.
TestCase
):
class
TestBlockDesc
(
unittest
.
TestCase
):
def
test_add_var
(
self
):
def
test_add_var
(
self
):
prog
=
core
.
ProgramDesc
()
prog
ram_desc
=
core
.
ProgramDesc
()
self
.
assertIsNotNone
(
prog
)
self
.
assertIsNotNone
(
prog
ram_desc
)
block
=
prog
.
block
(
0
)
block
=
prog
ram_desc
.
block
(
0
)
self
.
assertIsNotNone
(
block
)
self
.
assertIsNotNone
(
block
)
var1
=
block
.
var
(
"var1"
)
var1
=
block
.
var
(
"var1"
)
var2
=
block
.
var
(
"var2"
)
var2
=
block
.
var
(
"var2"
)
...
@@ -175,9 +175,9 @@ class TestBlockDesc(unittest.TestCase):
...
@@ -175,9 +175,9 @@ class TestBlockDesc(unittest.TestCase):
self
.
assertEqual
(
var2_re
,
var2
)
self
.
assertEqual
(
var2_re
,
var2
)
def
test_add_op
(
self
):
def
test_add_op
(
self
):
prog
=
core
.
ProgramDesc
()
prog
ram_desc
=
core
.
ProgramDesc
()
self
.
assertIsNotNone
(
prog
)
self
.
assertIsNotNone
(
prog
ram_desc
)
block
=
prog
.
block
(
0
)
block
=
prog
ram_desc
.
block
(
0
)
self
.
assertIsNotNone
(
block
)
self
.
assertIsNotNone
(
block
)
op1
=
block
.
append_op
()
op1
=
block
.
append_op
()
op2
=
block
.
append_op
()
op2
=
block
.
append_op
()
...
@@ -189,9 +189,9 @@ class TestBlockDesc(unittest.TestCase):
...
@@ -189,9 +189,9 @@ class TestBlockDesc(unittest.TestCase):
def
test_remove_op
(
self
):
def
test_remove_op
(
self
):
program
=
Program
()
program
=
Program
()
prog
=
program
.
desc
prog
ram_desc
=
program
.
desc
self
.
assertIsNotNone
(
prog
)
self
.
assertIsNotNone
(
prog
ram_desc
)
block
=
prog
.
block
(
0
)
block
=
prog
ram_desc
.
block
(
0
)
self
.
assertIsNotNone
(
block
)
self
.
assertIsNotNone
(
block
)
op0
=
block
.
append_op
()
op0
=
block
.
append_op
()
...
...
python/paddle/fluid/tests/unittests/test_sgd_op.py
浏览文件 @
7b40f7ce
...
@@ -97,5 +97,72 @@ class TestSparseSGDOp(unittest.TestCase):
...
@@ -97,5 +97,72 @@ class TestSparseSGDOp(unittest.TestCase):
self
.
check_with_place
(
place
)
self
.
check_with_place
(
place
)
class
TestSGDOpOptimizeSelectedRows
(
unittest
.
TestCase
):
def
check_with_place
(
self
,
place
):
scope
=
core
.
Scope
()
row_width
=
12
# create and initialize Grad Variable
grad_height
=
10
grad_rows
=
[
0
,
4
,
7
]
grad_selected_rows
=
scope
.
var
(
'Grad'
).
get_selected_rows
()
grad_selected_rows
.
set_height
(
grad_height
)
grad_selected_rows
.
set_rows
(
grad_rows
)
grad_array
=
np
.
ones
((
len
(
grad_rows
),
row_width
)).
astype
(
"float32"
)
grad_array
[
0
,
0
]
=
2.0
grad_array
[
2
,
8
]
=
4.0
grad_tensor
=
grad_selected_rows
.
get_tensor
()
grad_tensor
.
set
(
grad_array
,
place
)
# create and initialize Param Variable
# create and initialize W Variable
param_rows
=
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
]
# init Param
w_selected_rows
=
scope
.
var
(
'Param'
).
get_selected_rows
()
w_selected_rows
.
set_height
(
len
(
param_rows
))
w_selected_rows
.
set_rows
(
param_rows
)
w_array
=
np
.
ones
((
len
(
param_rows
),
row_width
)).
astype
(
"float32"
)
for
i
in
range
(
len
(
param_rows
)):
w_array
[
i
]
*=
i
w_tensor
=
w_selected_rows
.
get_tensor
()
w_tensor
.
set
(
w_array
,
place
)
w_before_optimize
=
np
.
array
(
w_tensor
)
# create and initialize LeraningRate Variable
lr_value
=
0.1
lr
=
scope
.
var
(
'LearningRate'
).
get_tensor
()
lr_array
=
np
.
full
((
1
),
lr_value
).
astype
(
"float32"
)
lr
.
set
(
lr_array
,
place
)
# optimize with Python
w_after_optimize
=
np
.
copy
(
w_before_optimize
)
for
index
,
id
in
enumerate
(
grad_rows
):
w_after_optimize
[
id
]
=
w_before_optimize
[
id
]
-
lr_value
*
grad_array
[
index
]
# create and run sgd operator
sgd_op
=
Operator
(
"sgd"
,
Param
=
'Param'
,
Grad
=
'Grad'
,
ParamOut
=
'Param'
,
LearningRate
=
'LearningRate'
)
sgd_op
.
run
(
scope
,
place
)
# get and compare result
result_array
=
np
.
array
(
w_tensor
)
assert
(
result_array
==
w_after_optimize
).
all
()
def
test_sparse_parameter_sgd
(
self
):
places
=
[
core
.
CPUPlace
()]
# do not support GPU kernel currently
for
place
in
places
:
self
.
check_with_place
(
place
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_softmax_op.py
浏览文件 @
7b40f7ce
...
@@ -68,6 +68,17 @@ class TestSoftmaxCUDNNOp(TestSoftmaxOp):
...
@@ -68,6 +68,17 @@ class TestSoftmaxCUDNNOp(TestSoftmaxOp):
self
.
use_cudnn
=
True
self
.
use_cudnn
=
True
class
TestSoftmaxFP16Op
(
TestSoftmaxOp
):
def
init_kernel_type
(
self
):
self
.
dtype
=
np
.
float16
def
test_check_output
(
self
):
if
core
.
is_compiled_with_cuda
():
place
=
core
.
CUDAPlace
(
0
)
if
core
.
is_float16_supported
(
place
):
self
.
check_output_with_place
(
place
,
atol
=
1e-3
)
class
TestSoftmaxFP16CUDNNOp
(
TestSoftmaxOp
):
class
TestSoftmaxFP16CUDNNOp
(
TestSoftmaxOp
):
def
init_kernel_type
(
self
):
def
init_kernel_type
(
self
):
self
.
use_cudnn
=
True
self
.
use_cudnn
=
True
...
...
python/paddle/trainer_config_helpers/tests/CMakeLists.txt
浏览文件 @
7b40f7ce
#################### test_config_parser #########################
#################### test_config_parser #########################
add_test
(
NAME layers_test
add_test
(
NAME layers_test
COMMAND
${
PADDLE_SOURCE_DIR
}
/paddle/.set_python_path.sh -d
${
PADDLE_
SOURCE
_DIR
}
/python/
COMMAND
${
PADDLE_SOURCE_DIR
}
/paddle/.set_python_path.sh -d
${
PADDLE_
BINARY
_DIR
}
/python/
${
PYTHON_EXECUTABLE
}
${
PADDLE_SOURCE_DIR
}
/python/paddle/trainer_config_helpers/tests/layers_test.py
${
PYTHON_EXECUTABLE
}
${
PADDLE_SOURCE_DIR
}
/python/paddle/trainer_config_helpers/tests/layers_test.py
WORKING_DIRECTORY
${
PADDLE_SOURCE_DIR
}
/python/paddle
)
WORKING_DIRECTORY
${
PADDLE_SOURCE_DIR
}
/python/paddle
)
add_test
(
NAME test_reset_hook
add_test
(
NAME test_reset_hook
COMMAND
${
PADDLE_SOURCE_DIR
}
/paddle/.set_python_path.sh -d
${
PADDLE_
SOURCE
_DIR
}
/python/
COMMAND
${
PADDLE_SOURCE_DIR
}
/paddle/.set_python_path.sh -d
${
PADDLE_
BINARY
_DIR
}
/python/
${
PYTHON_EXECUTABLE
}
${
PADDLE_SOURCE_DIR
}
/python/paddle/trainer_config_helpers/tests/test_reset_hook.py
${
PYTHON_EXECUTABLE
}
${
PADDLE_SOURCE_DIR
}
/python/paddle/trainer_config_helpers/tests/test_reset_hook.py
WORKING_DIRECTORY
${
PADDLE_SOURCE_DIR
}
/python/paddle
)
WORKING_DIRECTORY
${
PADDLE_SOURCE_DIR
}
/python/paddle
)
add_paddle_exe
(
protobuf_equal ProtobufEqualMain.cpp
)
add_paddle_exe
(
protobuf_equal ProtobufEqualMain.cpp
)
add_test
(
NAME test_layerHelpers
add_test
(
NAME test_layerHelpers
COMMAND
COMMAND
${
PADDLE_SOURCE_DIR
}
/paddle/.set_python_path.sh -d
${
PADDLE_BINARY_DIR
}
/python/
${
PADDLE_
SOURCE
_DIR
}
/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh
${
PYTHON_EXECUTABLE
}
${
PADDLE_
BINARY
_DIR
}
/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh
${
PYTHON_EXECUTABLE
}
${
CMAKE_CURRENT_BINARY_DIR
}
/protobuf_equal
${
CMAKE_CURRENT_BINARY_DIR
}
/protobuf_equal
)
)
python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh
浏览文件 @
7b40f7ce
...
@@ -2,7 +2,6 @@
...
@@ -2,7 +2,6 @@
set
-e
set
-e
cd
`
dirname
$0
`
cd
`
dirname
$0
`
export
PYTHONPATH
=
$PWD
/../../../../
protostr
=
$PWD
/protostr
protostr
=
$PWD
/protostr
.
file_list.sh
.
file_list.sh
...
...
python/setup.py.in
浏览文件 @
7b40f7ce
...
@@ -58,7 +58,7 @@ def mkl():
...
@@ -58,7 +58,7 @@ def mkl():
'istaged': ISTAGED,
'istaged': ISTAGED,
'with_mkl': '@WITH_MKL@'})
'with_mkl': '@WITH_MKL@'})
write_version_py(filename='@PADDLE_
SOURCE
_DIR@/python/paddle/version.py')
write_version_py(filename='@PADDLE_
BINARY
_DIR@/python/paddle/version.py')
packages=['paddle',
packages=['paddle',
...
@@ -107,9 +107,10 @@ package_dir={
...
@@ -107,9 +107,10 @@ package_dir={
# So that package points to other directory.
# So that package points to other directory.
'paddle.fluid.proto.profiler': '${PADDLE_BINARY_DIR}/paddle/fluid/platform',
'paddle.fluid.proto.profiler': '${PADDLE_BINARY_DIR}/paddle/fluid/platform',
'paddle.fluid.proto': '${PADDLE_BINARY_DIR}/paddle/fluid/framework',
'paddle.fluid.proto': '${PADDLE_BINARY_DIR}/paddle/fluid/framework',
'paddle.fluid': '${PADDLE_BINARY_DIR}/python/paddle/fluid',
}
}
if '${WITH_FLUID_ONLY}'== 'OFF':
if '${WITH_FLUID_ONLY}'== 'OFF':
package_dir['py_paddle']='${PADDLE_
SOURCE_DIR}/paddle
/py_paddle'
package_dir['py_paddle']='${PADDLE_
BINARY_DIR}/python
/py_paddle'
paddle_rt_lib_dir = 'lib'
paddle_rt_lib_dir = 'lib'
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录