Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
488610a6
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
488610a6
编写于
11月 22, 2018
作者:
S
sneaxiy
浏览文件
操作
浏览文件
下载
差异文件
merge develop
test=develop
上级
3912545f
de2db117
变更
45
隐藏空白更改
内联
并排
Showing
45 changed file
with
492 addition
and
555 deletion
+492
-555
CMakeLists.txt
CMakeLists.txt
+18
-3
cmake/external/gtest.cmake
cmake/external/gtest.cmake
+4
-0
cmake/external/snappy.cmake
cmake/external/snappy.cmake
+10
-2
cmake/external/snappystream.cmake
cmake/external/snappystream.cmake
+35
-26
cmake/generic.cmake
cmake/generic.cmake
+3
-0
cmake/operators.cmake
cmake/operators.cmake
+1
-3
cmake/simd.cmake
cmake/simd.cmake
+38
-35
paddle/fluid/CMakeLists.txt
paddle/fluid/CMakeLists.txt
+1
-5
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+1
-14
paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h
...luid/framework/details/fast_threaded_ssa_graph_executor.h
+1
-1
paddle/fluid/framework/eigen.h
paddle/fluid/framework/eigen.h
+0
-5
paddle/fluid/framework/op_registry.h
paddle/fluid/framework/op_registry.h
+0
-5
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+0
-2
paddle/fluid/framework/operator.h
paddle/fluid/framework/operator.h
+0
-2
paddle/fluid/inference/api/api_impl.h
paddle/fluid/inference/api/api_impl.h
+0
-6
paddle/fluid/memory/allocation/cpu_allocator.h
paddle/fluid/memory/allocation/cpu_allocator.h
+6
-0
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+6
-6
paddle/fluid/operators/hierarchical_sigmoid_op.h
paddle/fluid/operators/hierarchical_sigmoid_op.h
+1
-1
paddle/fluid/operators/math/CMakeLists.txt
paddle/fluid/operators/math/CMakeLists.txt
+15
-20
paddle/fluid/operators/math/detail/activation_functions.h
paddle/fluid/operators/math/detail/activation_functions.h
+1
-0
paddle/fluid/operators/math/matrix_bit_code.h
paddle/fluid/operators/math/matrix_bit_code.h
+1
-2
paddle/fluid/operators/reader/create_py_reader_op.cc
paddle/fluid/operators/reader/create_py_reader_op.cc
+1
-1
paddle/fluid/operators/roi_align_op.cc
paddle/fluid/operators/roi_align_op.cc
+3
-3
paddle/fluid/operators/roi_pool_op.cc
paddle/fluid/operators/roi_pool_op.cc
+3
-3
paddle/fluid/operators/space_to_depth_op.cc
paddle/fluid/operators/space_to_depth_op.cc
+1
-1
paddle/fluid/platform/CMakeLists.txt
paddle/fluid/platform/CMakeLists.txt
+8
-4
paddle/fluid/platform/cpu_helper.cc
paddle/fluid/platform/cpu_helper.cc
+7
-0
paddle/fluid/platform/device_tracer.h
paddle/fluid/platform/device_tracer.h
+1
-11
paddle/fluid/platform/dynload/cudnn.h
paddle/fluid/platform/dynload/cudnn.h
+0
-2
paddle/fluid/platform/enforce.h
paddle/fluid/platform/enforce.h
+15
-55
paddle/fluid/platform/init.cc
paddle/fluid/platform/init.cc
+0
-7
paddle/fluid/platform/init.h
paddle/fluid/platform/init.h
+0
-3
paddle/fluid/platform/port.h
paddle/fluid/platform/port.h
+31
-4
paddle/fluid/platform/profiler.cc
paddle/fluid/platform/profiler.cc
+1
-1
paddle/fluid/platform/profiler.h
paddle/fluid/platform/profiler.h
+0
-10
paddle/fluid/platform/stream_callback_manager.h
paddle/fluid/platform/stream_callback_manager.h
+6
-7
paddle/fluid/pybind/CMakeLists.txt
paddle/fluid/pybind/CMakeLists.txt
+2
-6
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+5
-20
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+2
-3
python/paddle/fluid/contrib/inferencer.py
python/paddle/fluid/contrib/inferencer.py
+1
-3
python/paddle/fluid/contrib/trainer.py
python/paddle/fluid/contrib/trainer.py
+1
-2
python/paddle/fluid/layers/io.py
python/paddle/fluid/layers/io.py
+58
-60
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+178
-190
python/paddle/fluid/layers/ops.py
python/paddle/fluid/layers/ops.py
+21
-20
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+5
-1
未找到文件。
CMakeLists.txt
浏览文件 @
488610a6
...
...
@@ -130,6 +130,21 @@ if (APPLE OR WIN32)
"Disable MKL for building on mac and windows"
FORCE
)
endif
()
if
(
WIN32
)
set
(
WITH_AVX OFF CACHE STRING
"Disable AVX when compiling for Windows"
FORCE
)
set
(
WITH_DSO OFF CACHE STRING
"Disable DSO when compiling for Windows"
FORCE
)
set
(
WITH_MKL OFF CACHE STRING
"Disable MKL when compiling for Windows"
FORCE
)
set
(
WITH_DISTRIBUTE OFF CACHE STRING
"Disable DISTRIBUTE when compiling for Windows"
FORCE
)
set
(
WITH_C_API OFF CACHE STRING
"Disable C_API when compiling for Windows"
FORCE
)
set
(
WITH_FLUID_ONLY ON CACHE STRING
"Enable FLUID_ONLY when compiling for Windows"
FORCE
)
endif
()
set
(
THIRD_PARTY_PATH
"
${
CMAKE_BINARY_DIR
}
/third_party"
CACHE STRING
"A path setting third party libraries download & build directories."
)
...
...
@@ -191,11 +206,11 @@ include(external/cares)
include
(
external/cub
)
include
(
external/xxhash
)
# download xxhash
include
(
external/dlpack
)
if
(
NOT WIN32
)
# there is no official support of snappystream, warpctc, nccl, cupti in windows
include
(
external/snappy
)
# download snappy
include
(
external/snappystream
)
# download snappystream
if
(
NOT WIN32
)
# there is no official support of warpctc, nccl, cupti in windows
include
(
external/warpctc
)
# download, build, install warpctc
include
(
cupti
)
endif
(
NOT WIN32
)
...
...
cmake/external/gtest.cmake
浏览文件 @
488610a6
...
...
@@ -50,7 +50,11 @@ IF(WITH_TESTING)
CMAKE_ARGS -DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
-DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
-DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
-DCMAKE_CXX_FLAGS_RELEASE=
${
CMAKE_CXX_FLAGS_RELEASE
}
-DCMAKE_CXX_FLAGS_DEBUG=
${
CMAKE_CXX_FLAGS_DEBUG
}
-DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
-DCMAKE_C_FLAGS_DEBUG=
${
CMAKE_C_FLAGS_DEBUG
}
-DCMAKE_C_FLAGS_RELEASE=
${
CMAKE_C_FLAGS_RELEASE
}
-DCMAKE_INSTALL_PREFIX=
${
GTEST_INSTALL_DIR
}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DBUILD_GMOCK=ON
...
...
cmake/external/snappy.cmake
浏览文件 @
488610a6
...
...
@@ -24,7 +24,11 @@ set(SNAPPY_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy)
set
(
SNAPPY_INSTALL_DIR
${
THIRD_PARTY_PATH
}
/install/snappy
)
set
(
SNAPPY_INCLUDE_DIR
"
${
SNAPPY_INSTALL_DIR
}
/include"
CACHE PATH
"snappy include directory."
FORCE
)
set
(
SNAPPY_LIBRARIES
"
${
SNAPPY_INSTALL_DIR
}
/lib/libsnappy.a"
)
if
(
WIN32
)
set
(
SNAPPY_LIBRARIES
"
${
SNAPPY_INSTALL_DIR
}
/lib/snappy.lib"
)
else
(
WIN32
)
set
(
SNAPPY_LIBRARIES
"
${
SNAPPY_INSTALL_DIR
}
/lib/libsnappy.a"
)
endif
(
WIN32
)
ExternalProject_Add
(
extern_snappy
...
...
@@ -34,8 +38,12 @@ ExternalProject_Add(
UPDATE_COMMAND
""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
-DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
-DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
-DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
-DCMAKE_C_FLAGS_DEBUG=
${
CMAKE_C_FLAGS_DEBUG
}
-DCMAKE_C_FLAGS_RELEASE=
${
CMAKE_C_FLAGS_RELEASE
}
-DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
-DCMAKE_CXX_FLAGS_RELEASE=
${
CMAKE_CXX_FLAGS_RELEASE
}
-DCMAKE_CXX_FLAGS_DEBUG=
${
CMAKE_CXX_FLAGS_DEBUG
}
-DCMAKE_INSTALL_PREFIX=
${
SNAPPY_INSTALL_DIR
}
-DCMAKE_INSTALL_LIBDIR=
${
SNAPPY_INSTALL_DIR
}
/lib
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
...
...
cmake/external/snappystream.cmake
浏览文件 @
488610a6
...
...
@@ -18,36 +18,45 @@ ENDIF()
include
(
ExternalProject
)
# NOTE: snappy is needed when linking with recordio
set
(
SNAPPYSTREAM_SOURCES_DIR
${
THIRD_PARTY_PATH
}
/snappy_stream
)
set
(
SNAPPYSTREAM_INSTALL_DIR
${
THIRD_PARTY_PATH
}
/install/snappy_stream
)
set
(
SNAPPYSTREAM_INCLUDE_DIR
"
${
SNAPPYSTREAM_INSTALL_DIR
}
/include"
CACHE PATH
"snappy stream include directory."
FORCE
)
set
(
SNAPPYSTREAM_LIBRARIES
"
${
SNAPPYSTREAM_INSTALL_DIR
}
/lib/libsnappystream.a"
)
ExternalProject_Add
(
extern_snappystream
GIT_REPOSITORY
"https://github.com/hoxnox/snappystream.git"
GIT_TAG
"0.2.8"
PREFIX
${
SNAPPYSTREAM_SOURCES_DIR
}
UPDATE_COMMAND
""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
-DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
-DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
-DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
-DCMAKE_INSTALL_PREFIX=
${
SNAPPY_INSTALL_DIR
}
-DCMAKE_INSTALL_LIBDIR=
${
SNAPPY_INSTALL_DIR
}
/lib
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=
${
THIRD_PARTY_BUILD_TYPE
}
-DSNAPPY_ROOT=
${
SNAPPY_INSTALL_DIR
}
${
EXTERNAL_OPTIONAL_ARGS
}
CMAKE_CACHE_ARGS
-DCMAKE_INSTALL_PREFIX:PATH=
${
SNAPPYSTREAM_INSTALL_DIR
}
-DCMAKE_INSTALL_LIBDIR:PATH=
${
SNAPPYSTREAM_INSTALL_DIR
}
/lib
-DCMAKE_BUILD_TYPE:STRING=
${
THIRD_PARTY_BUILD_TYPE
}
DEPENDS snappy
)
if
(
WIN32
)
# Fix me, VS2015 come without VLA support
set
(
SNAPPYSTREAM_LIBRARIES
"
${
SNAPPYSTREAM_INSTALL_DIR
}
/lib/snappystream.lib"
)
MESSAGE
(
WARNING,
"In windows, snappystream has no compile support for windows,
please build it manually and put it at "
${
SNAPPYSTREAM_INSTALL_DIR
}
)
else
(
WIN32
)
set
(
SNAPPYSTREAM_LIBRARIES
"
${
SNAPPYSTREAM_INSTALL_DIR
}
/lib/libsnappystream.a"
)
ExternalProject_Add
(
extern_snappystream
GIT_REPOSITORY
"https://github.com/hoxnox/snappystream.git"
GIT_TAG
"0.2.8"
PREFIX
${
SNAPPYSTREAM_SOURCES_DIR
}
UPDATE_COMMAND
""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
-DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
-DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
-DCMAKE_C_FLAGS_DEBUG=
${
CMAKE_C_FLAGS_DEBUG
}
-DCMAKE_C_FLAGS_RELEASE=
${
CMAKE_C_FLAGS_RELEASE
}
-DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
-DCMAKE_CXX_FLAGS_RELEASE=
${
CMAKE_CXX_FLAGS_RELEASE
}
-DCMAKE_CXX_FLAGS_DEBUG=
${
CMAKE_CXX_FLAGS_DEBUG
}
-DCMAKE_INSTALL_PREFIX=
${
SNAPPY_INSTALL_DIR
}
-DCMAKE_INSTALL_LIBDIR=
${
SNAPPY_INSTALL_DIR
}
/lib
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=
${
THIRD_PARTY_BUILD_TYPE
}
-DSNAPPY_ROOT=
${
SNAPPY_INSTALL_DIR
}
${
EXTERNAL_OPTIONAL_ARGS
}
CMAKE_CACHE_ARGS
-DCMAKE_INSTALL_PREFIX:PATH=
${
SNAPPYSTREAM_INSTALL_DIR
}
-DCMAKE_INSTALL_LIBDIR:PATH=
${
SNAPPYSTREAM_INSTALL_DIR
}
/lib
-DCMAKE_BUILD_TYPE:STRING=
${
THIRD_PARTY_BUILD_TYPE
}
DEPENDS snappy
)
endif
(
WIN32
)
add_library
(
snappystream STATIC IMPORTED GLOBAL
)
set_property
(
TARGET snappystream PROPERTY IMPORTED_LOCATION
${
SNAPPYSTREAM_LIBRARIES
}
)
...
...
cmake/generic.cmake
浏览文件 @
488610a6
...
...
@@ -351,6 +351,9 @@ function(cc_test TARGET_NAME)
cmake_parse_arguments
(
cc_test
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
add_executable
(
${
TARGET_NAME
}
${
cc_test_SRCS
}
)
target_link_libraries
(
${
TARGET_NAME
}
${
cc_test_DEPS
}
paddle_gtest_main lod_tensor memory gtest gflags glog
)
if
(
WIN32
)
target_link_libraries
(
${
TARGET_NAME
}
shlwapi
)
endif
(
WIN32
)
add_dependencies
(
${
TARGET_NAME
}
${
cc_test_DEPS
}
paddle_gtest_main lod_tensor memory gtest gflags glog
)
add_test
(
NAME
${
TARGET_NAME
}
COMMAND
${
TARGET_NAME
}
${
cc_test_ARGS
}
...
...
cmake/operators.cmake
浏览文件 @
488610a6
...
...
@@ -84,9 +84,7 @@ function(op_library TARGET)
endif
()
if
(
WIN32
)
# remove windows unsupported op, because windows has no nccl, no warpctc such ops.
foreach
(
windows_unsupport_op
"nccl_op"
"gen_nccl_id_op"
"warpctc_op"
"hierarchical_sigmoid_op"
"crf_decoding_op"
"select_op"
"lstmp_op"
"gru_op"
"fusion_gru_op"
"lstm_op"
"fusion_lstm_op"
"cumsum_op"
"fusion_seqconv_eltadd_relu_op"
"channel_send_op"
"channel_create_op"
"channel_close_op"
"channel_recv_op"
)
foreach
(
windows_unsupport_op
"nccl_op"
"gen_nccl_id_op"
"warpctc_op"
)
if
(
"
${
TARGET
}
"
STREQUAL
"
${
windows_unsupport_op
}
"
)
return
()
endif
()
...
...
cmake/simd.cmake
浏览文件 @
488610a6
...
...
@@ -57,43 +57,46 @@ int main()
return 0;
}"
SSE3_FOUND
)
# Check AVX
set
(
CMAKE_REQUIRED_FLAGS
${
AVX_FLAG
}
)
set
(
AVX_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
__m256 a = _mm256_set_ps (-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f);
__m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
__m256 result = _mm256_add_ps (a, b);
return 0;
}"
AVX_FOUND
)
# disable AVX by default on windows
if
(
NOT WIN32
)
# Check AVX
set
(
CMAKE_REQUIRED_FLAGS
${
AVX_FLAG
}
)
set
(
AVX_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
__m256 a = _mm256_set_ps (-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f);
__m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
__m256 result = _mm256_add_ps (a, b);
return 0;
}"
AVX_FOUND
)
# Check AVX 2
set
(
CMAKE_REQUIRED_FLAGS
${
AVX2_FLAG
}
)
set
(
AVX2_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
__m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
__m256i result = _mm256_abs_epi32 (a);
return 0;
}"
AVX2_FOUND
)
# Check AVX 2
set
(
CMAKE_REQUIRED_FLAGS
${
AVX2_FLAG
}
)
set
(
AVX2_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
__m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
__m256i result = _mm256_abs_epi32 (a);
return 0;
}"
AVX2_FOUND
)
# Check AVX512F
set
(
CMAKE_REQUIRED_FLAGS
${
AVX512F_FLAG
}
)
set
(
AVX512F_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
__m512i a = _mm512_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4,
13, -5, 6, -7, 9, 2, -6, 3);
__m512i result = _mm512_abs_epi32 (a);
return 0;
}"
AVX512F_FOUND
)
# Check AVX512F
set
(
CMAKE_REQUIRED_FLAGS
${
AVX512F_FLAG
}
)
set
(
AVX512F_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
__m512i a = _mm512_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4,
13, -5, 6, -7, 9, 2, -6, 3);
__m512i result = _mm512_abs_epi32 (a);
return 0;
}"
AVX512F_FOUND
)
endif
(
NOT WIN32
)
set
(
CMAKE_REQUIRED_FLAGS
${
CMAKE_REQUIRED_FLAGS_RETAINED
}
)
mark_as_advanced
(
MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND AVX512F_FOUND
)
paddle/fluid/CMakeLists.txt
浏览文件 @
488610a6
...
...
@@ -3,13 +3,9 @@ add_subdirectory(platform)
add_subdirectory
(
framework
)
add_subdirectory
(
operators
)
add_subdirectory
(
string
)
add_subdirectory
(
pybind
)
if
(
NOT WIN32
)
add_subdirectory
(
recordio
)
endif
(
NOT WIN32
)
add_subdirectory
(
pybind
)
# NOTE: please add subdirectory inference at last.
add_subdirectory
(
inference
)
add_subdirectory
(
train
)
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
488610a6
...
...
@@ -31,9 +31,7 @@ function(windows_symbolic TARGET)
endfunction
()
add_subdirectory
(
ir
)
if
(
NOT WIN32
)
add_subdirectory
(
details
)
endif
(
NOT WIN32
)
# ddim lib
proto_library
(
framework_proto SRCS framework.proto
)
...
...
@@ -68,11 +66,7 @@ if(WITH_GPU)
else
()
cc_test
(
mixed_vector_test SRCS mixed_vector_test.cc DEPS place memory device_context tensor
)
endif
()
if
(
NOT WIN32
)
cc_library
(
lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto recordio version
)
else
()
cc_library
(
lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto version
)
endif
(
NOT WIN32
)
cc_library
(
lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto recordio version
)
cc_test
(
lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor memory
)
nv_test
(
lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor
)
...
...
@@ -122,13 +116,8 @@ cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker)
cc_library
(
op_info SRCS op_info.cc DEPS attribute framework_proto
)
cc_library
(
shape_inference SRCS shape_inference.cc DEPS ddim attribute device_context
)
if
(
NOT WIN32
)
cc_library
(
operator SRCS operator.cc DEPS op_info device_context tensor scope glog
shape_inference data_transform lod_tensor profiler
)
else
()
cc_library
(
operator SRCS operator.cc DEPS op_info device_context tensor scope glog
shape_inference data_transform lod_tensor
)
endif
(
NOT WIN32
)
cc_test
(
operator_test SRCS operator_test.cc DEPS operator op_registry device_context
)
...
...
@@ -183,12 +172,10 @@ else()
cc_test
(
test_naive_executor SRCS naive_executor_test.cc DEPS naive_executor elementwise_add_op
)
endif
()
if
(
NOT WIN32
)
cc_library
(
parallel_executor SRCS parallel_executor.cc DEPS
threaded_ssa_graph_executor scope_buffered_ssa_graph_executor
graph build_strategy
fast_threaded_ssa_graph_executor
)
endif
()
# NOT WIN32
cc_library
(
prune SRCS prune.cc DEPS framework_proto
)
cc_test
(
prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context
)
...
...
paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h
浏览文件 @
488610a6
...
...
@@ -13,9 +13,9 @@
// limitations under the License.
#pragma once
#include <ThreadPool.h>
#include <string>
#include <vector>
#include "ThreadPool.h"
#include "paddle/fluid/framework/blocking_queue.h"
#include "paddle/fluid/framework/details/exception_holder.h"
#include "paddle/fluid/framework/details/execution_strategy.h"
...
...
paddle/fluid/framework/eigen.h
浏览文件 @
488610a6
...
...
@@ -13,11 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
// logging.h and windows.h conflict
#define GLOG_NO_ABBREVIATED_SEVERITIES
// solve static linking error in windows
// https://github.com/google/glog/issues/301
#define GOOGLE_GLOG_DLL_DECL
#include "paddle/fluid/framework/tensor.h"
#include "unsupported/Eigen/CXX11/Tensor"
...
...
paddle/fluid/framework/op_registry.h
浏览文件 @
488610a6
...
...
@@ -23,11 +23,6 @@ limitations under the License. */
#include <unordered_map>
#include <unordered_set>
#if defined(_WIN32)
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#define GOOGLE_GLOG_DLL_DECL
#endif
#include "glog/logging.h" // For VLOG()
#include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/details/op_registry.h"
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
488610a6
...
...
@@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define GLOG_NO_ABBREVIATED_SEVERITIES
#define GOOGLE_GLOG_DLL_DECL
#include <gflags/gflags.h>
#include <glog/logging.h>
...
...
paddle/fluid/framework/operator.h
浏览文件 @
488610a6
...
...
@@ -20,8 +20,6 @@ limitations under the License. */
#include <tuple>
#include <unordered_map>
#include <vector>
#define GLOG_NO_ABBREVIATED_SEVERITIES
#define GOOGLE_GLOG_DLL_DECL
#include "glog/logging.h" // For VLOG
#include "paddle/fluid/framework/attribute.h"
...
...
paddle/fluid/inference/api/api_impl.h
浏览文件 @
488610a6
...
...
@@ -14,12 +14,6 @@ limitations under the License. */
#pragma once
// logging.h and windows.h conflict
#define GLOG_NO_ABBREVIATED_SEVERITIES
// solve static linking error in windows
// https://github.com/google/glog/issues/301
#define GOOGLE_GLOG_DLL_DECL
#include <glog/logging.h>
#include <map>
#include <memory>
...
...
paddle/fluid/memory/allocation/cpu_allocator.h
浏览文件 @
488610a6
...
...
@@ -15,6 +15,12 @@
#pragma once
#include "paddle/fluid/memory/allocation/allocator.h"
#ifdef _WIN32
#define posix_memalign_free _aligned_free
#define posix_memalign(p, a, s) \
(((*(p)) = _aligned_malloc((s), (a))), *(p) ? 0 : errno)
#endif
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
...
...
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
488610a6
...
...
@@ -22,9 +22,7 @@ if(WITH_DISTRIBUTE)
add_subdirectory
(
distributed_ops
)
endif
()
if
(
NOT WIN32
)
add_subdirectory
(
reader
)
endif
()
add_subdirectory
(
reader
)
if
(
NOT WIN32
)
add_subdirectory
(
nccl
)
...
...
@@ -42,7 +40,7 @@ endif()
register_operators
(
EXCLUDES warpctc_op conv_fusion_op DEPS
${
OP_HEADER_DEPS
}
)
# warpctc_op needs cudnn 7 above
if
(
WITH_GPU
)
if
(
WITH_GPU
AND NOT WIN32
)
if
(
${
CUDNN_MAJOR_VERSION
}
VERSION_LESS 7
)
op_library
(
warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale SRCS warpctc_op.cc warpctc_op.cu.cc
)
else
()
...
...
@@ -59,10 +57,12 @@ endif()
set
(
COMMON_OP_DEPS
${
OP_HEADER_DEPS
}
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
selected_rows_functor selected_rows lod_tensor maxouting unpooling pooling lod_rank_table context_project sequence_pooling executor
dynload_warpctc sequence_padding sequence_scale cos_sim_functor memory jit_kernel concat_and_split cross_entropy softmax vol2col im2col sampler
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
selected_rows_functor selected_rows lod_tensor maxouting unpooling pooling lod_rank_table context_project sequence_pooling executor
)
if
(
NOT WIN32
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
dynload_warpctc
)
endif
()
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
sequence_padding sequence_scale cos_sim_functor memory jit_kernel concat_and_split cross_entropy softmax vol2col im2col sampler
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions
)
if
(
WITH_GPU
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
depthwise_conv
)
endif
()
...
...
paddle/fluid/operators/hierarchical_sigmoid_op.h
浏览文件 @
488610a6
...
...
@@ -111,7 +111,7 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
auto
pre_out_mat
=
EigenMatrix
<
T
>::
From
(
*
pre_out
);
auto
pre_out_grad_mat
=
EigenMatrix
<
T
>::
From
(
pre_out_grad
);
auto
out_grad_mat
=
EigenMatrix
<
T
>::
From
(
*
out_grad
);
Eigen
::
array
<
int
,
2
>
bcast
({{
1
,
static_cast
<
int
>
(
pre_out_grad
.
dims
()[
1
])}})
;
Eigen
::
array
<
int
,
2
>
bcast
{
1
,
static_cast
<
int
>
(
pre_out_grad
.
dims
()[
1
])}
;
// softrelu derivative
pre_out_grad_mat
.
device
(
place
)
=
...
...
paddle/fluid/operators/math/CMakeLists.txt
浏览文件 @
488610a6
if
(
NOT WIN32
)
add_subdirectory
(
detail
)
endif
(
NOT WIN32
)
add_subdirectory
(
detail
)
function
(
math_library TARGET
)
# math_library is a function to create math library.
...
...
@@ -43,10 +41,8 @@ math_library(depthwise_conv)
math_library
(
im2col
)
math_library
(
sampler
)
if
(
NOT WIN32
)
# windows do not support avx functions yet.
math_library
(
gru_compute DEPS activation_functions math_function
)
math_library
(
lstm_compute DEPS activation_functions
)
endif
(
NOT WIN32
)
math_library
(
gru_compute DEPS activation_functions math_function
)
math_library
(
lstm_compute DEPS activation_functions
)
cc_library
(
blas SRCS blas.cc DEPS cblas framework_proto device_context
)
math_library
(
math_function DEPS blas
)
...
...
@@ -58,9 +54,9 @@ math_library(sequence_padding)
math_library
(
sequence_pooling DEPS math_function
)
math_library
(
sequence_scale
)
math_library
(
softmax DEPS math_function
)
if
(
NOT WIN32
)
math_library
(
matrix_bit_code
)
endif
(
NOT WIN32
)
math_library
(
matrix_bit_code
)
math_library
(
unpooling
)
math_library
(
vol2col
)
...
...
@@ -76,13 +72,12 @@ if(WITH_GPU)
endif
()
cc_test
(
concat_test SRCS concat_test.cc DEPS concat_and_split
)
cc_test
(
cpu_vec_test SRCS cpu_vec_test.cc DEPS blas cpu_info
)
if
(
NOT WIN32
)
set
(
JIT_KERNEL_SRCS jit_kernel.cc jit_kernel_blas.cc jit_kernel_exp.cc jit_kernel_rnn.cc jit_kernel_crf_decode.cc jit_kernel_layer_norm.cc
)
set
(
JIT_KERNEL_DEPS cpu_info cblas gflags enforce
)
if
(
WITH_XBYAK
)
list
(
APPEND JIT_KERNEL_SRCS jit_gen.cc jit_code.cc
)
list
(
APPEND JIT_KERNEL_DEPS xbyak
)
endif
()
cc_library
(
jit_kernel SRCS
${
JIT_KERNEL_SRCS
}
DEPS
${
JIT_KERNEL_DEPS
}
)
cc_test
(
jit_kernel_test SRCS jit_kernel_test.cc DEPS jit_kernel
)
endif
(
NOT WIN32
)
set
(
JIT_KERNEL_SRCS jit_kernel.cc jit_kernel_blas.cc jit_kernel_exp.cc jit_kernel_rnn.cc jit_kernel_crf_decode.cc jit_kernel_layer_norm.cc
)
set
(
JIT_KERNEL_DEPS cpu_info cblas gflags enforce
)
if
(
WITH_XBYAK
)
list
(
APPEND JIT_KERNEL_SRCS jit_gen.cc jit_code.cc
)
list
(
APPEND JIT_KERNEL_DEPS xbyak
)
endif
()
cc_library
(
jit_kernel SRCS
${
JIT_KERNEL_SRCS
}
DEPS
${
JIT_KERNEL_DEPS
}
)
cc_test
(
jit_kernel_test SRCS jit_kernel_test.cc DEPS jit_kernel
)
paddle/fluid/operators/math/detail/activation_functions.h
浏览文件 @
488610a6
...
...
@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#include <math.h>
#include <string>
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/hostdevice.h"
...
...
paddle/fluid/operators/math/matrix_bit_code.h
浏览文件 @
488610a6
...
...
@@ -67,7 +67,7 @@ inline constexpr size_t FindLastSet(size_t x) {
:
(
std
::
is_same
<
size_t
,
unsigned
long
>::
value
// NOLINT
?
(
x
?
8
*
sizeof
(
x
)
-
__builtin_clzl
(
x
)
:
0
)
:
(
x
?
8
*
sizeof
(
x
)
-
__builtin_clzll
(
x
)
:
0
));
}
#else
// windows don't have built-in clz, ctz function
template
<
typename
T
>
...
...
@@ -92,7 +92,6 @@ inline int clz(const T& value) {
inline
size_t
FindLastSet
(
size_t
x
)
{
return
sizeof
(
size_t
)
*
8
-
clz
(
x
);
}
#endif // !_WIN32
}
struct
SimpleCode
{
SimpleCode
(
size_t
code
,
size_t
num_classes
)
:
c_
(
code
+
num_classes
)
{}
...
...
paddle/fluid/operators/reader/create_py_reader_op.cc
浏览文件 @
488610a6
...
...
@@ -74,7 +74,7 @@ class CreatePyReaderOpMaker : public FileReaderMakerBase {
"Name of the `LoDTensorBlockingQueueHolder` variable"
);
AddComment
(
R"DOC(
Create PyReader to support LoDTensor data feeding in Python side.
Create PyReader to support LoDTensor data feeding in Python side.
)DOC"
);
}
};
...
...
paddle/fluid/operators/roi_align_op.cc
浏览文件 @
488610a6
...
...
@@ -35,10 +35,10 @@ class ROIAlignOp : public framework::OperatorWithKernel {
"The format of input tensor is NCHW."
);
PADDLE_ENFORCE
(
rois_dims
.
size
()
==
2
,
"ROIs should be a 2-D LoDTensor of shape (num_rois, 4)"
"given as [[x1, y1, x2, y2],
…
]."
);
"given as [[x1, y1, x2, y2],
...
]."
);
PADDLE_ENFORCE
(
rois_dims
[
1
]
==
4
,
"ROIs should be a 2-D LoDTensor of shape (num_rois, 4)"
"given as [[x1, y1, x2, y2],
…
]."
);
"given as [[x1, y1, x2, y2],
...
]."
);
int
pooled_height
=
ctx
->
Attrs
().
Get
<
int
>
(
"pooled_height"
);
int
pooled_width
=
ctx
->
Attrs
().
Get
<
int
>
(
"pooled_width"
);
float
spatial_scale
=
ctx
->
Attrs
().
Get
<
float
>
(
"spatial_scale"
);
...
...
@@ -103,7 +103,7 @@ class ROIAlignOpMaker : public framework::OpProtoAndCheckerMaker {
"(LoDTensor), "
"ROIs (Regions of Interest) to pool over. "
"should be a 2-D LoDTensor of shape (num_rois, 4)"
"given as [[x1, y1, x2, y2],
…
]. "
"given as [[x1, y1, x2, y2],
...
]. "
"(x1, y1) is the top left coordinates, and "
"(x2, y2) is the bottom right coordinates."
);
AddOutput
(
"Out"
,
...
...
paddle/fluid/operators/roi_pool_op.cc
浏览文件 @
488610a6
...
...
@@ -40,10 +40,10 @@ class ROIPoolOp : public framework::OperatorWithKernel {
"The format of input tensor is NCHW."
);
PADDLE_ENFORCE
(
rois_dims
.
size
()
==
2
,
"ROIs should be a 2-D LoDTensor of shape (num_rois, 4)"
"given as [[x1, y1, x2, y2],
…
]."
);
"given as [[x1, y1, x2, y2],
...
]."
);
PADDLE_ENFORCE
(
rois_dims
[
1
]
==
kROISize
,
"ROIs should be a 2-D LoDTensor of shape (num_rois, 4)"
"given as [[x1, y1, x2, y2],
…
]."
);
"given as [[x1, y1, x2, y2],
...
]."
);
int
pooled_height
=
ctx
->
Attrs
().
Get
<
int
>
(
"pooled_height"
);
int
pooled_width
=
ctx
->
Attrs
().
Get
<
int
>
(
"pooled_width"
);
...
...
@@ -110,7 +110,7 @@ class ROIPoolOpMaker : public framework::OpProtoAndCheckerMaker {
"(LoDTensor), "
"ROIs (Regions of Interest) to pool over. "
"should be a 2-D LoDTensor of shape (num_rois, 4)"
"given as [[x1, y1, x2, y2],
…
]. "
"given as [[x1, y1, x2, y2],
...
]. "
"Where batch_id is the id of the data, "
"(x1, y1) is the top left coordinates, and "
"(x2, y2) is the bottom right coordinates."
);
...
...
paddle/fluid/operators/space_to_depth_op.cc
浏览文件 @
488610a6
...
...
@@ -86,7 +86,7 @@ class SpaceToDepthOpMaker : public framework::OpProtoAndCheckerMaker {
.
GreaterThan
(
1
);
AddComment
(
R"DOC(
reorg operator used in Yolo v2.
The equation is: C2 = C1/blocksize * blocksize, W2 = W1 * blocksize + offset % blocksize, H2 = H1 * blocksize + offset / blocksize,
The equation is: C2 = C1/blocksize * blocksize, W2 = W1 * blocksize + offset % blocksize, H2 = H1 * blocksize + offset / blocksize,
Reshape Input(X) into the shape according to Attr(blocksize). The
data in Input(X) are unchanged.
...
...
paddle/fluid/platform/CMakeLists.txt
浏览文件 @
488610a6
if
(
NOT WIN32
)
proto_library
(
profiler_proto SRCS profiler.proto DEPS framework_proto
)
py_proto_compile
(
profiler_py_proto SRCS profiler.proto
)
...
...
@@ -6,11 +5,19 @@ add_custom_target(profiler_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch _
add_dependencies
(
profiler_py_proto profiler_py_proto_init
)
if
(
NOT WIN32
)
add_custom_command
(
TARGET profiler_py_proto POST_BUILD
COMMAND
${
CMAKE_COMMAND
}
-E make_directory
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/proto/profiler
COMMAND cp *.py
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/proto/profiler
COMMENT
"Copy generated python proto into directory paddle/fluid/proto/profiler."
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
else
(
NOT WIN32
)
string
(
REPLACE
"/"
"
\\
"
proto_dstpath
"
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/proto/profiler/"
)
add_custom_command
(
TARGET profiler_py_proto POST_BUILD
COMMAND
${
CMAKE_COMMAND
}
-E make_directory
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/proto/profiler
COMMAND copy /Y *.py
${
proto_dstpath
}
COMMENT
"Copy generated python proto into directory paddle/fluid/proto/profiler."
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
endif
(
NOT WIN32
)
if
(
WITH_GPU
)
...
...
@@ -60,12 +67,9 @@ cc_test(init_test SRCS init_test.cc DEPS device_context)
nv_test
(
cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda
)
nv_test
(
transform_test SRCS transform_test.cu DEPS memory place device_context
)
if
(
NOT WIN32
)
cc_library
(
device_tracer SRCS device_tracer.cc DEPS boost profiler_proto framework_proto
${
GPU_CTX_DEPS
}
)
cc_library
(
profiler SRCS profiler.cc DEPS device_context device_tracer
)
cc_test
(
profiler_test SRCS profiler_test.cc DEPS profiler
)
endif
(
NOT WIN32
)
nv_test
(
float16_gpu_test SRCS float16_test.cu DEPS lod_tensor
)
cc_test
(
float16_test SRCS float16_test.cc DEPS lod_tensor
)
...
...
paddle/fluid/platform/cpu_helper.cc
浏览文件 @
488610a6
...
...
@@ -29,6 +29,13 @@ namespace platform {
void
SetNumThreads
(
int
num_threads
)
{
#ifdef PADDLE_USE_OPENBLAS
// windows has no support for openblas multi-thread
// please refer to: https://github.com/PaddlePaddle/Paddle/issues/7234
#ifdef _WIN32
if
(
num_threads
>
1
)
{
num_threads
=
1
;
}
#endif
int
real_num_threads
=
num_threads
>
1
?
num_threads
:
1
;
openblas_set_num_threads
(
real_num_threads
);
#elif defined(PADDLE_WITH_MKLML)
...
...
paddle/fluid/platform/device_tracer.h
浏览文件 @
488610a6
...
...
@@ -13,17 +13,11 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#if !defined(_WIN32)
#include <sys/time.h>
#else
#include <windows.h>
#endif // !_WIN32
#include <time.h>
#include <chrono> // NOLINT
#include <string>
#include "paddle/fluid/platform/dynload/cupti.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/profiler.pb.h"
namespace
paddle
{
...
...
@@ -32,15 +26,11 @@ namespace platform {
///////////////////////
// WARN: Under Development. Don't depend on it yet.
//////////////////////
#if !defined(_WIN32)
inline
uint64_t
PosixInNsec
()
{
struct
timeval
tv
;
gettimeofday
(
&
tv
,
nullptr
);
return
1000
*
(
static_cast
<
uint64_t
>
(
tv
.
tv_sec
)
*
1000000
+
tv
.
tv_usec
);
}
#else
inline
uint64_t
PosixInNsec
()
{
return
static_cast
<
uint64_t
>
(
0
);
}
#endif // !_WIN32
// DeviceTracer performs the following tasks:
// 1. Register cuda callbacks for various events: kernel, memcpy, etc.
...
...
paddle/fluid/platform/dynload/cudnn.h
浏览文件 @
488610a6
...
...
@@ -13,8 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#define GLOG_NO_ABBREVIATED_SEVERITIES
#define GOOGLE_GLOG_DLL_DECL
#include <glog/logging.h>
#include <cudnn.h>
...
...
paddle/fluid/platform/enforce.h
浏览文件 @
488610a6
...
...
@@ -18,12 +18,6 @@ limitations under the License. */
#include <cxxabi.h> // for __cxa_demangle
#endif // __GNUC__
#if defined(_WIN32)
#define NOMINMAX // msvc max/min macro conflict with std::min/max
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#define GOOGLE_GLOG_DLL_DECL
#endif
#ifdef PADDLE_WITH_CUDA
#include <cublas_v2.h>
#include <cudnn.h>
...
...
@@ -127,14 +121,14 @@ struct EOFException : public std::exception {
#define UNLIKELY(condition) __builtin_expect(static_cast<bool>(condition), 0)
#else
// there is no equivalent intrinsics in msvc.
#define UNLIKELY(condition) (condition
== 0
)
#define UNLIKELY(condition) (condition)
#endif
#if !defined(_WIN32)
#define LIKELY(condition) __builtin_expect(static_cast<bool>(condition), 1)
#else
// there is no equivalent intrinsics in msvc.
#define LIKELY(condition) (condition
!= 0
)
#define LIKELY(condition) (condition)
#endif
template
<
typename
...
Args
>
...
...
@@ -248,7 +242,6 @@ inline void throw_on_error(T e) {
throw_on_error
(
e
,
""
);
}
#if !defined(_WIN32)
#define PADDLE_THROW(...) \
do { \
throw ::paddle::platform::EnforceNotMet( \
...
...
@@ -272,17 +265,6 @@ inline void throw_on_error(T e) {
#define PADDLE_ENFORCE(...) ::paddle::platform::throw_on_error(__VA_ARGS__);
#endif // REPLACE_ENFORCE_GLOG
#else // !_WIN32
// disable enforce, caused by the varardic macro exception error
#define PADDLE_THROW(x) \
do { \
throw std::make_exception_ptr( \
std::runtime_error("Windows disable the enforce.")); \
} while (false)
#define PADDLE_ENFORCE(x, ...) x
#endif // !_WIN32
#define PADDLE_THROW_EOF() \
do { \
throw ::paddle::platform::EOFException("There is no next data.", __FILE__, \
...
...
@@ -302,20 +284,6 @@ inline void throw_on_error(T e) {
* extra messages is also supported, for example:
* PADDLE_ENFORCE(a, b, "some simple enforce failed between %d numbers", 2)
*/
#if !defined(_WIN32)
#define PADDLE_ENFORCE_EQ(__VAL0, __VAL1, ...) \
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, ==, !=, __VA_ARGS__)
#define PADDLE_ENFORCE_NE(__VAL0, __VAL1, ...) \
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, !=, ==, __VA_ARGS__)
#define PADDLE_ENFORCE_GT(__VAL0, __VAL1, ...) \
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, >, <=, __VA_ARGS__)
#define PADDLE_ENFORCE_GE(__VAL0, __VAL1, ...) \
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, >=, <, __VA_ARGS__)
#define PADDLE_ENFORCE_LT(__VAL0, __VAL1, ...) \
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <, >=, __VA_ARGS__)
#define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) \
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <=, >, __VA_ARGS__)
#define PADDLE_ENFORCE_NOT_NULL(__VAL, ...) \
do { \
if (UNLIKELY(nullptr == (__VAL))) { \
...
...
@@ -335,27 +303,19 @@ inline void throw_on_error(T e) {
paddle::string::Sprintf("" __VA_ARGS__)); \
} \
} while (0)
#else
#define PADDLE_ENFORCE_EQ(__VAL0, __VAL1, ...) ((__VAL0) == (__VAL1))
#define PADDLE_ENFORCE_NE(__VAL0, __VAL1, ...) ((__VAL0) != (__VAL1))
#define PADDLE_ENFORCE_GT(__VAL0, __VAL1, ...) ((__VAL0) > (__VAL1))
#define PADDLE_ENFORCE_GE(__VAL0, __VAL1, ...) ((__VAL0) >= (__VAL1))
#define PADDLE_ENFORCE_LT(__VAL0, __VAL1, ...) ((__VAL0) < (__VAL1))
#define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) ((__VAL0) <= (__VAL1))
#define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \
do { \
if (!((__VAL0)__CMP(__VAL1))) { \
PADDLE_THROW("Windows disable the enforce. Enforce failed."); \
} \
} while (0)
#define PADDLE_ENFORCE_NOT_NULL(__VAL1, ...) \
do { \
if (nullptr == (__VAL1)) { \
PADDLE_THROW("Windows disable the enforce. Enforce failed"); \
} \
} while (0)
#endif // !_WIN32
#define PADDLE_ENFORCE_EQ(__VAL0, __VAL1, ...) \
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, ==, !=, __VA_ARGS__)
#define PADDLE_ENFORCE_NE(__VAL0, __VAL1, ...) \
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, !=, ==, __VA_ARGS__)
#define PADDLE_ENFORCE_GT(__VAL0, __VAL1, ...) \
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, >, <=, __VA_ARGS__)
#define PADDLE_ENFORCE_GE(__VAL0, __VAL1, ...) \
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, >=, <, __VA_ARGS__)
#define PADDLE_ENFORCE_LT(__VAL0, __VAL1, ...) \
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <, >=, __VA_ARGS__)
#define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) \
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <=, >, __VA_ARGS__)
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/init.cc
浏览文件 @
488610a6
...
...
@@ -117,13 +117,6 @@ void InitDevices(bool init_p2p, const std::vector<int> devices) {
places
.
emplace_back
(
platform
::
CPUPlace
());
platform
::
DeviceContextPool
::
Init
(
places
);
// windows has no support for openblas multi-thread
#ifdef _WIN32
if
(
FLAGS_paddle_num_threads
>
1
)
{
FLAGS_paddle_num_threads
=
1
;
}
#endif
#ifndef PADDLE_WITH_MKLDNN
platform
::
SetNumThreads
(
FLAGS_paddle_num_threads
);
#endif
...
...
paddle/fluid/platform/init.h
浏览文件 @
488610a6
...
...
@@ -16,9 +16,6 @@ limitations under the License. */
#include <string>
#include <vector>
#define GLOG_NO_ABBREVIATED_SEVERITIES
#define GOOGLE_GLOG_DLL_DECL
#include "gflags/gflags.h"
#include "glog/logging.h"
...
...
paddle/fluid/platform/port.h
浏览文件 @
488610a6
...
...
@@ -17,6 +17,7 @@
#include <cstdio>
#include <stdexcept>
#include <time.h>
#include <memory>
#include <string>
...
...
@@ -27,8 +28,13 @@
#include <dlfcn.h> // dladdr
#include <execinfo.h> // backtrace
#include <sys/stat.h>
#include <sys/time.h>
#include <algorithm> // std::accumulate
#else
#define NOMINMAX // msvc max/min macro conflict with std::min/max
// solve static linking error in windows
// https://github.com/google/glog/issues/301
#define GOOGLE_GLOG_DLL_DECL
#include <io.h> // _popen, _pclose
#include <stdio.h>
#include <windows.h>
...
...
@@ -57,6 +63,25 @@ static void *dlopen(const char *filename, int flag) {
return
reinterpret_cast
<
void
*>
(
hModule
);
}
static
int
gettimeofday
(
struct
timeval
*
tp
,
void
*
tzp
)
{
time_t
clock
;
struct
tm
tm
;
SYSTEMTIME
wtm
;
GetLocalTime
(
&
wtm
);
tm
.
tm_year
=
wtm
.
wYear
-
1900
;
tm
.
tm_mon
=
wtm
.
wMonth
-
1
;
tm
.
tm_mday
=
wtm
.
wDay
;
tm
.
tm_hour
=
wtm
.
wHour
;
tm
.
tm_min
=
wtm
.
wMinute
;
tm
.
tm_sec
=
wtm
.
wSecond
;
tm
.
tm_isdst
=
-
1
;
clock
=
mktime
(
&
tm
);
tp
->
tv_sec
=
clock
;
tp
->
tv_usec
=
wtm
.
wMilliseconds
*
1000
;
return
(
0
);
}
#endif // !_WIN32
static
void
ExecShellCommand
(
const
std
::
string
&
cmd
,
std
::
string
*
message
)
{
...
...
@@ -132,10 +157,12 @@ static void MkDir(const char *path) {
}
}
#else
CreateDirectory
(
path
,
NULL
);
auto
errorno
=
GetLastError
();
if
(
errorno
!=
ERROR_ALREADY_EXISTS
)
{
throw
std
::
runtime_error
(
path_error
);
BOOL
return_value
=
CreateDirectory
(
path
,
NULL
);
if
(
!
return_value
)
{
auto
errorno
=
GetLastError
();
if
(
errorno
!=
ERROR_ALREADY_EXISTS
)
{
throw
std
::
runtime_error
(
path_error
);
}
}
#endif // !_WIN32
}
...
...
paddle/fluid/platform/profiler.cc
浏览文件 @
488610a6
...
...
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/port.h"
#include <sys/time.h>
#include <algorithm>
#include <iomanip>
#include <limits>
...
...
paddle/fluid/platform/profiler.h
浏览文件 @
488610a6
...
...
@@ -69,7 +69,6 @@ void PushEvent(const std::string& name, const DeviceContext* dev_ctx);
void
PopEvent
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
);
#if !defined(_WIN32)
struct
RecordEvent
{
// dev_ctx can be set to nullptr if device is cpu.
RecordEvent
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
);
...
...
@@ -106,15 +105,6 @@ struct RecordBlock {
std
::
string
name_
;
uint64_t
start_ns_
;
};
#else
// windows do not support profiler temporarily.
struct
RecordEvent
{
RecordEvent
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
)
{}
};
struct
RecordBlock
{
explicit
RecordBlock
(
int
block_id
)
{}
};
#endif
// Return the event list of all threads. Assumed the returned value calls
// event_lists, event_lists[i][j] represents the j-th Event of i-th thread.
...
...
paddle/fluid/platform/stream_callback_manager.h
浏览文件 @
488610a6
...
...
@@ -45,16 +45,15 @@ class StreamCallbackManager {
inline
void
AddCallback
(
Callback
&&
callback
)
const
{
auto
*
stream_callback_context
=
new
StreamCallbackContext
(
this
,
std
::
forward
<
Callback
>
(
callback
));
PADDLE_ENFORCE
(
#if CUDA_VERSION >= 10000
cudaLaunchHostFunc
(
stream_
,
StreamCallbackManager
::
StreamCallbackFunc
,
stream_callback_context
)
PADDLE_ENFORCE
(
cudaLaunchHostFunc
(
stream_
,
StreamCallbackManager
::
StreamCallbackFunc
,
stream_callback_context
));
// NOLINT
#else
cudaStreamAddCallback
(
stream_
,
StreamCallbackManager
::
StreamCallbackFunc
,
stream_callback_context
,
0
)
PADDLE_ENFORCE
(
cudaStreamAddCallback
(
stream_
,
StreamCallbackManager
::
StreamCallbackFunc
,
stream_callback_context
,
0
));
// NOLINT
#endif
);
// NOLINT
}
void
Wait
()
const
{
thread_pool_
.
reset
(
new
ThreadPool
(
1
));
}
...
...
paddle/fluid/pybind/CMakeLists.txt
浏览文件 @
488610a6
set
(
PYBIND_DEPS pybind python proto_desc memory executor prune feed_fetch_method pass_builder
)
set
(
PYBIND_SRCS pybind.cc exception.cc protobuf.cc const_value.cc
)
if
(
NOT WIN32
)
list
(
APPEND PYBIND_DEPS parallel_executor profiler
)
list
(
APPEND PYBIND_SRCS recordio.cc
)
endif
(
NOT WIN32
)
set
(
PYBIND_DEPS pybind python proto_desc memory executor prune feed_fetch_method pass_builder parallel_executor profiler
)
set
(
PYBIND_SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc
)
if
(
WITH_PYTHON
)
if
(
WITH_AMD_GPU
)
hip_library
(
paddle_pybind SHARED
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
488610a6
...
...
@@ -21,13 +21,6 @@ limitations under the License. */
#include <utility>
#include <vector>
#if defined(_WIN32)
#define NOMINMAX
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#define GOOGLE_GLOG_DLL_DECL
#include <Windows.h>
#endif
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/framework.pb.h"
...
...
@@ -36,9 +29,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/op_registry.h"
#ifndef _WIN32
#include "paddle/fluid/framework/parallel_executor.h"
#endif
#include "paddle/fluid/framework/prune.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/selected_rows.h"
...
...
@@ -46,6 +37,7 @@ limitations under the License. */
#include "paddle/fluid/memory/allocation/allocator_strategy.h"
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/init.h"
#include "paddle/fluid/platform/place.h"
...
...
@@ -95,6 +87,9 @@ bool IsCompiledWithDIST() {
}
PYBIND11_PLUGIN
(
core
)
{
// Not used, just make sure cpu_info.cc is linked.
paddle
::
platform
::
CpuTotalPhysicalMemory
();
paddle
::
memory
::
allocation
::
UseAllocatorStrategyGFlag
();
py
::
module
m
(
"core"
,
"C++ core of PaddlePaddle"
);
...
...
@@ -359,22 +354,16 @@ All parameter, weight, gradient are variables in Paddle.
return
self
.
GetMutable
<
platform
::
Communicator
>
();
},
py
::
return_value_policy
::
reference
)
#endif
#ifndef _WIN32
.
def
(
"get_reader"
,
[](
Variable
&
self
)
->
framework
::
ReaderHolder
*
{
PADDLE_ENFORCE
(
self
.
IsType
<
framework
::
ReaderHolder
>
());
return
self
.
GetMutable
<
framework
::
ReaderHolder
>
();
},
py
::
return_value_policy
::
reference
)
#endif
;
// NOLINT
py
::
return_value_policy
::
reference
);
#if !defined(_WIN32)
py
::
class_
<
framework
::
ReaderHolder
>
(
m
,
"Reader"
,
""
)
.
def
(
"reset"
,
&
framework
::
ReaderHolder
::
ResetAll
);
#endif
using
LoDTensorBlockingQueue
=
::
paddle
::
operators
::
reader
::
LoDTensorBlockingQueue
;
...
...
@@ -643,7 +632,6 @@ All parameter, weight, gradient are variables in Paddle.
#endif
#endif
#ifndef _WIN32
py
::
enum_
<
platform
::
ProfilerState
>
(
m
,
"ProfilerState"
,
py
::
arithmetic
())
.
value
(
"kDisabled"
,
platform
::
ProfilerState
::
kDisabled
)
.
value
(
"kCPU"
,
platform
::
ProfilerState
::
kCPU
)
...
...
@@ -664,7 +652,6 @@ All parameter, weight, gradient are variables in Paddle.
m
.
def
(
"disable_profiler"
,
platform
::
DisableProfiler
);
m
.
def
(
"is_profiler_enabled"
,
platform
::
IsProfileEnabled
);
m
.
def
(
"reset_profiler"
,
platform
::
ResetProfiler
);
#endif
py
::
class_
<
ir
::
Pass
,
std
::
shared_ptr
<
ir
::
Pass
>>
pass
(
m
,
"Pass"
);
pass
.
def
(
py
::
init
())
...
...
@@ -693,7 +680,6 @@ All parameter, weight, gradient are variables in Paddle.
.
def
(
"remove_pass"
,
[](
ir
::
PassBuilder
&
self
,
size_t
idx
)
{
self
.
RemovePass
(
idx
);
});
#ifndef _WIN32
// -- python binds for parallel executor.
py
::
class_
<
ParallelExecutor
>
pe
(
m
,
"ParallelExecutor"
);
py
::
class_
<
ExecutionStrategy
>
exec_strategy
(
pe
,
"ExecutionStrategy"
,
R"DOC(
...
...
@@ -921,7 +907,6 @@ All parameter, weight, gradient are variables in Paddle.
});
BindRecordIOWriter
(
&
m
);
#endif
return
m
.
ptr
();
}
}
// namespace pybind
...
...
python/paddle/fluid/__init__.py
浏览文件 @
488610a6
...
...
@@ -115,9 +115,8 @@ def __bootstrap__():
'use_pinned_memory'
,
'check_nan_inf'
,
'benchmark'
,
'eager_delete_scope'
,
'use_mkldnn'
,
'use_ngraph'
,
'initial_cpu_memory_in_mb'
,
'init_allocated_mem'
,
'free_idle_memory'
,
'paddle_num_threads'
,
"dist_threadpool_size"
,
'cpu_deterministic'
,
'eager_delete_tensor_gb'
,
'allocator_strategy'
,
'reader_queue_speed_test_mode'
,
'print_sub_graph_dir'
"dist_threadpool_size"
,
'eager_delete_tensor_gb'
,
'allocator_strategy'
,
'reader_queue_speed_test_mode'
,
'print_sub_graph_dir'
]
if
os
.
name
!=
'nt'
:
read_env_flags
.
append
(
'warpctc_dir'
)
...
...
python/paddle/fluid/contrib/inferencer.py
浏览文件 @
488610a6
...
...
@@ -15,15 +15,13 @@
from
__future__
import
print_function
import
contextlib
import
os
from
..
import
core
from
..
import
executor
from
..
import
framework
from
..
import
io
if
os
.
name
!=
'nt'
:
from
..
import
parallel_executor
from
..
import
parallel_executor
from
..
import
unique_name
from
.trainer
import
check_and_get_place
...
...
python/paddle/fluid/contrib/trainer.py
浏览文件 @
488610a6
...
...
@@ -28,8 +28,7 @@ from .. import framework
from
..
import
io
# optimizer is same as the parameter of Trainer.__init__. Rename it to opt_module
from
..
import
optimizer
as
opt_module
if
os
.
name
!=
'nt'
:
from
..
import
parallel_executor
from
..
import
parallel_executor
from
..transpiler
import
distribute_transpiler
__all__
=
[
...
...
python/paddle/fluid/layers/io.py
浏览文件 @
488610a6
...
...
@@ -347,72 +347,70 @@ def _copy_reader_create_op_(block, op):
return
new_op
if
os
.
name
!=
'nt'
:
@
templatedoc
(
op_type
=
'create_recordio_file_reader'
)
def
open_recordio_file
(
filename
,
shapes
,
lod_levels
,
dtypes
,
pass_num
=
1
,
for_parallel
=
True
):
"""
${comment}
Args:
filename(${filename_type}): ${filename_comment}.
shapes(list): List of tuples which declaring data shapes.
lod_levels(${lod_levels_type}): ${lod_levels_comment}.
dtypes(list): List of strs which declaring data type.
pass_num(int): Number of passes to run.
for_parallel(Bool): Set it as True if you are going to run
subsequent operators in parallel.
Returns:
${out_comment}.
Examples:
>>> import paddle.fluid as fluid
>>> reader = fluid.layers.io.open_recordio_file(
>>> filename='./data.recordio',
>>> shapes=[(3,224,224), (1)],
>>> lod_levels=[0, 0],
>>> dtypes=['float32', 'int64'])
>>> # Via the reader, we can use 'read_file' layer to get data:
>>> image, label = fluid.layers.io.read_file(reader)
"""
dtypes
=
[
convert_np_dtype_to_dtype_
(
dt
)
for
dt
in
dtypes
]
shape_concat
=
[]
ranks
=
[]
@
templatedoc
(
op_type
=
'create_recordio_file_reader'
)
def
open_recordio_file
(
filename
,
shapes
,
lod_levels
,
dtypes
,
pass_num
=
1
,
for_parallel
=
True
):
"""
${comment}
for
shape
in
shapes
:
shape_concat
.
extend
(
shape
)
ranks
.
append
(
len
(
shape
))
Args:
filename(${filename_type}): ${filename_comment}.
shapes(list): List of tuples which declaring data shapes.
lod_levels(${lod_levels_type}): ${lod_levels_comment}.
dtypes(list): List of strs which declaring data type.
pass_num(int): Number of passes to run.
for_parallel(Bool): Set it as True if you are going to run
subsequent operators in parallel.
var_name
=
unique_name
(
'open_recordio_file'
)
Returns:
${out_comment}.
startup_blk
=
default_startup_program
().
current_block
()
startup_var
=
startup_blk
.
create_var
(
name
=
var_name
)
startup_blk
.
append_op
(
type
=
'create_recordio_file_reader'
,
outputs
=
{
'Out'
:
[
startup_var
]},
attrs
=
{
'shape_concat'
:
shape_concat
,
'lod_levels'
:
lod_levels
,
'filename'
:
filename
,
'ranks'
:
ranks
})
Examples:
startup_var
.
desc
.
set_dtypes
(
dtypes
)
startup_var
.
persistable
=
True
main_prog_var
=
_copy_reader_var_
(
default_main_program
().
current_block
(),
startup_var
)
>>> import paddle.fluid as fluid
>>> reader = fluid.layers.io.open_recordio_file(
>>> filename='./data.recordio',
>>> shapes=[(3,224,224), (1)],
>>> lod_levels=[0, 0],
>>> dtypes=['float32', 'int64'])
>>> # Via the reader, we can use 'read_file' layer to get data:
>>> image, label = fluid.layers.io.read_file(reader)
"""
dtypes
=
[
convert_np_dtype_to_dtype_
(
dt
)
for
dt
in
dtypes
]
shape_concat
=
[]
ranks
=
[]
if
pass_num
>
1
:
main_prog_var
=
multi_pass
(
reader
=
main_prog_var
,
pass_num
=
pass_num
)
for
shape
in
shapes
:
shape_concat
.
extend
(
shape
)
ranks
.
append
(
len
(
shape
))
var_name
=
unique_name
(
'open_recordio_file'
)
startup_blk
=
default_startup_program
().
current_block
()
startup_var
=
startup_blk
.
create_var
(
name
=
var_name
)
startup_blk
.
append_op
(
type
=
'create_recordio_file_reader'
,
outputs
=
{
'Out'
:
[
startup_var
]},
attrs
=
{
'shape_concat'
:
shape_concat
,
'lod_levels'
:
lod_levels
,
'filename'
:
filename
,
'ranks'
:
ranks
})
return
monkey_patch_reader_methods
(
main_prog_var
)
startup_var
.
desc
.
set_dtypes
(
dtypes
)
startup_var
.
persistable
=
True
main_prog_var
=
_copy_reader_var_
(
default_main_program
().
current_block
(),
startup_var
)
if
pass_num
>
1
:
main_prog_var
=
multi_pass
(
reader
=
main_prog_var
,
pass_num
=
pass_num
)
return
monkey_patch_reader_methods
(
main_prog_var
)
def
random_data_generator
(
low
,
high
,
shapes
,
lod_levels
,
for_parallel
=
True
):
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
488610a6
...
...
@@ -343,128 +343,126 @@ def embedding(input,
return
tmp
if
os
.
name
!=
'nt'
:
@
templatedoc
(
op_type
=
"lstm"
)
def
dynamic_lstm
(
input
,
size
,
h_0
=
None
,
c_0
=
None
,
param_attr
=
None
,
bias_attr
=
None
,
use_peepholes
=
True
,
is_reverse
=
False
,
gate_activation
=
'sigmoid'
,
cell_activation
=
'tanh'
,
candidate_activation
=
'tanh'
,
dtype
=
'float32'
,
name
=
None
):
"""
${comment}
@
templatedoc
(
op_type
=
"lstm"
)
def
dynamic_lstm
(
input
,
size
,
h_0
=
None
,
c_0
=
None
,
param_attr
=
None
,
bias_attr
=
None
,
use_peepholes
=
True
,
is_reverse
=
False
,
gate_activation
=
'sigmoid'
,
cell_activation
=
'tanh'
,
candidate_activation
=
'tanh'
,
dtype
=
'float32'
,
name
=
None
):
"""
${comment}
Args:
input (Variable): ${input_comment}
size (int): 4 * hidden size.
h_0(Variable): The initial hidden state is an optional input, default is zero.
This is a tensor with shape (N x D), where N is the
batch size and D is the hidden size.
c_0(Variable): The initial cell state is an optional input, default is zero.
This is a tensor with shape (N x D), where N is the
batch size. `h_0` and `c_0` can be NULL but only at the same time.
param_attr(ParamAttr|None): The parameter attribute for the learnable
hidden-hidden weights.
- Weights = {:math:`W_{ch}, W_{ih},
\
W_{fh}, W_{oh}`}
- The shape is (D x 4D), where D is the hidden
size.
If it is set to None or one attribute of ParamAttr,
dynamic_lstm will create ParamAttr as param_attr.
If the Initializer of the param_attr is not set, the
parameter is initialized with Xavier. Default: None.
bias_attr (ParamAttr|None): The bias attribute for the learnable bias
weights, which contains two parts, input-hidden
bias weights and peephole connections weights if
setting `use_peepholes` to `True`.
1. `use_peepholes = False`
- Biases = {:math:`b_c, b_i, b_f, b_o`}.
- The shape is (1 x 4D).
2. `use_peepholes = True`
- Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic},
\
W_{fc}, W_{oc}`}.
- The shape is (1 x 7D).
If it is set to None or one attribute of ParamAttr,
dynamic_lstm will create ParamAttr as bias_attr.
If the Initializer of the bias_attr is not set,
the bias is initialized zero. Default: None.
use_peepholes (bool): ${use_peepholes_comment}
is_reverse (bool): ${is_reverse_comment}
gate_activation (str): ${gate_activation_comment}
cell_activation (str): ${cell_activation_comment}
candidate_activation (str): ${candidate_activation_comment}
dtype (str): Data type. Choices = ["float32", "float64"], default "float32".
name (str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
tuple: The hidden state, and cell state of LSTM. The shape of both
\
is (T x D), and lod is the same with the `input`.
Examples:
.. code-block:: python
hidden_dim = 512
forward_proj = fluid.layers.fc(input=input_seq, size=hidden_dim * 4,
bias_attr=False)
forward, _ = fluid.layers.dynamic_lstm(
input=forward_proj, size=hidden_dim * 4, use_peepholes=False)
"""
assert
bias_attr
is
not
False
,
"bias_attr should not be False in dynamic_lstmp."
helper
=
LayerHelper
(
'lstm'
,
**
locals
())
size
=
size
//
4
weight
=
helper
.
create_parameter
(
attr
=
helper
.
param_attr
,
shape
=
[
size
,
4
*
size
],
dtype
=
dtype
)
bias_size
=
[
1
,
7
*
size
]
if
not
use_peepholes
:
bias_size
[
1
]
=
4
*
size
bias
=
helper
.
create_parameter
(
attr
=
helper
.
bias_attr
,
shape
=
bias_size
,
dtype
=
dtype
,
is_bias
=
True
)
Args:
input (Variable): ${input_comment}
size (int): 4 * hidden size.
h_0(Variable): The initial hidden state is an optional input, default is zero.
This is a tensor with shape (N x D), where N is the
batch size and D is the hidden size.
c_0(Variable): The initial cell state is an optional input, default is zero.
This is a tensor with shape (N x D), where N is the
batch size. `h_0` and `c_0` can be NULL but only at the same time.
param_attr(ParamAttr|None): The parameter attribute for the learnable
hidden-hidden weights.
hidden
=
helper
.
create_variable_for_type_inference
(
dtype
)
cell
=
helper
.
create_variable_for_type_inference
(
dtype
)
batch_gate
=
helper
.
create_variable_for_type_inference
(
dtype
)
batch_cell_pre_act
=
helper
.
create_variable_for_type_inference
(
dtype
)
inputs
=
{
'Input'
:
input
,
'Weight'
:
weight
,
'Bias'
:
bias
}
batch_size
=
input
.
shape
[
0
]
if
h_0
:
assert
h_0
.
shape
==
(
batch_size
,
size
),
\
'The shape of h0 should be (batch_size, %d)'
%
size
inputs
[
'H0'
]
=
h_0
if
c_0
:
assert
c_0
.
shape
==
(
batch_size
,
size
),
\
'The shape of c0 should be (batch_size, %d)'
%
size
inputs
[
'C0'
]
=
c_0
- Weights = {:math:`W_{ch}, W_{ih},
\
W_{fh}, W_{oh}`}
- The shape is (D x 4D), where D is the hidden
size.
helper
.
append_op
(
type
=
'lstm'
,
inputs
=
inputs
,
outputs
=
{
'Hidden'
:
hidden
,
'Cell'
:
cell
,
'BatchGate'
:
batch_gate
,
'BatchCellPreAct'
:
batch_cell_pre_act
},
attrs
=
{
'use_peepholes'
:
use_peepholes
,
'is_reverse'
:
is_reverse
,
'gate_activation'
:
gate_activation
,
'cell_activation'
:
cell_activation
,
'candidate_activation'
:
candidate_activation
})
return
hidden
,
cell
If it is set to None or one attribute of ParamAttr,
dynamic_lstm will create ParamAttr as param_attr.
If the Initializer of the param_attr is not set, the
parameter is initialized with Xavier. Default: None.
bias_attr (ParamAttr|None): The bias attribute for the learnable bias
weights, which contains two parts, input-hidden
bias weights and peephole connections weights if
setting `use_peepholes` to `True`.
1. `use_peepholes = False`
- Biases = {:math:`b_c, b_i, b_f, b_o`}.
- The shape is (1 x 4D).
2. `use_peepholes = True`
- Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic},
\
W_{fc}, W_{oc}`}.
- The shape is (1 x 7D).
If it is set to None or one attribute of ParamAttr,
dynamic_lstm will create ParamAttr as bias_attr.
If the Initializer of the bias_attr is not set,
the bias is initialized zero. Default: None.
use_peepholes (bool): ${use_peepholes_comment}
is_reverse (bool): ${is_reverse_comment}
gate_activation (str): ${gate_activation_comment}
cell_activation (str): ${cell_activation_comment}
candidate_activation (str): ${candidate_activation_comment}
dtype (str): Data type. Choices = ["float32", "float64"], default "float32".
name (str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
tuple: The hidden state, and cell state of LSTM. The shape of both
\
is (T x D), and lod is the same with the `input`.
Examples:
.. code-block:: python
hidden_dim = 512
forward_proj = fluid.layers.fc(input=input_seq, size=hidden_dim * 4,
bias_attr=False)
forward, _ = fluid.layers.dynamic_lstm(
input=forward_proj, size=hidden_dim * 4, use_peepholes=False)
"""
assert
bias_attr
is
not
False
,
"bias_attr should not be False in dynamic_lstmp."
helper
=
LayerHelper
(
'lstm'
,
**
locals
())
size
=
size
//
4
weight
=
helper
.
create_parameter
(
attr
=
helper
.
param_attr
,
shape
=
[
size
,
4
*
size
],
dtype
=
dtype
)
bias_size
=
[
1
,
7
*
size
]
if
not
use_peepholes
:
bias_size
[
1
]
=
4
*
size
bias
=
helper
.
create_parameter
(
attr
=
helper
.
bias_attr
,
shape
=
bias_size
,
dtype
=
dtype
,
is_bias
=
True
)
hidden
=
helper
.
create_variable_for_type_inference
(
dtype
)
cell
=
helper
.
create_variable_for_type_inference
(
dtype
)
batch_gate
=
helper
.
create_variable_for_type_inference
(
dtype
)
batch_cell_pre_act
=
helper
.
create_variable_for_type_inference
(
dtype
)
inputs
=
{
'Input'
:
input
,
'Weight'
:
weight
,
'Bias'
:
bias
}
batch_size
=
input
.
shape
[
0
]
if
h_0
:
assert
h_0
.
shape
==
(
batch_size
,
size
),
\
'The shape of h0 should be (batch_size, %d)'
%
size
inputs
[
'H0'
]
=
h_0
if
c_0
:
assert
c_0
.
shape
==
(
batch_size
,
size
),
\
'The shape of c0 should be (batch_size, %d)'
%
size
inputs
[
'C0'
]
=
c_0
helper
.
append_op
(
type
=
'lstm'
,
inputs
=
inputs
,
outputs
=
{
'Hidden'
:
hidden
,
'Cell'
:
cell
,
'BatchGate'
:
batch_gate
,
'BatchCellPreAct'
:
batch_cell_pre_act
},
attrs
=
{
'use_peepholes'
:
use_peepholes
,
'is_reverse'
:
is_reverse
,
'gate_activation'
:
gate_activation
,
'cell_activation'
:
cell_activation
,
'candidate_activation'
:
candidate_activation
})
return
hidden
,
cell
def
dynamic_lstmp
(
input
,
...
...
@@ -963,43 +961,39 @@ def linear_chain_crf(input, label, param_attr=None):
return
log_likelihood
if
os
.
name
!=
'nt'
:
@
templatedoc
()
def
crf_decoding
(
input
,
param_attr
,
label
=
None
):
"""
${comment}
@
templatedoc
()
def
crf_decoding
(
input
,
param_attr
,
label
=
None
):
"""
${comment}
Args:
input(${emission_type}): ${emission_comment}
Args:
input(${emission_type}): ${emission_comment}
param_attr(ParamAttr): The parameter attribute for training.
param_attr(ParamAttr): The parameter attribute for training.
label(${label_type}): ${label_comment}
label(${label_type}): ${label_comment}
Returns:
Variable: ${viterbi_path_comment}
Returns:
Variable: ${viterbi_path_comment}
Examples:
.. code-block:: python
Examples:
.. code-block:: python
crf_decode = layers.crf_decoding(
input=hidden, param_attr=ParamAttr(name="crfw"))
"""
helper
=
LayerHelper
(
'crf_decoding'
,
**
locals
())
transition
=
helper
.
get_parameter
(
param_attr
.
name
)
viterbi_path
=
helper
.
create_variable_for_type_inference
(
dtype
=
helper
.
input_dtype
())
helper
.
append_op
(
type
=
'crf_decoding'
,
inputs
=
{
"Emission"
:
[
input
],
crf_decode = layers.crf_decoding(
input=hidden, param_attr=ParamAttr(name="crfw"))
"""
helper
=
LayerHelper
(
'crf_decoding'
,
**
locals
())
transition
=
helper
.
get_parameter
(
param_attr
.
name
)
viterbi_path
=
helper
.
create_variable_for_type_inference
(
dtype
=
helper
.
input_dtype
())
helper
.
append_op
(
type
=
'crf_decoding'
,
inputs
=
{
"Emission"
:
[
input
],
"Transition"
:
transition
,
"Label"
:
label
},
outputs
=
{
"ViterbiPath"
:
[
viterbi_path
]})
"Label"
:
label
},
outputs
=
{
"ViterbiPath"
:
[
viterbi_path
]})
return
viterbi_path
return
viterbi_path
@
templatedoc
()
...
...
@@ -5593,48 +5587,42 @@ def label_smooth(label,
return
smooth_label
if
os
.
name
!=
'nt'
:
@
templatedoc
()
def
roi_pool
(
input
,
rois
,
pooled_height
=
1
,
pooled_width
=
1
,
spatial_scale
=
1.0
):
"""
${comment}
Args:
input (Variable): ${x_comment}
rois (Variable): ROIs (Regions of Interest) to pool over.
pooled_height (integer): ${pooled_height_comment} Default: 1
pooled_width (integer): ${pooled_width_comment} Default: 1
spatial_scale (float): ${spatial_scale_comment} Default: 1.0
Returns:
Variable: ${out_comment}.
Examples:
.. code-block:: python
pool_out = fluid.layers.roi_pool(input=x, rois=rois, 7, 7, 1.0)
"""
helper
=
LayerHelper
(
'roi_pool'
,
**
locals
())
dtype
=
helper
.
input_dtype
()
pool_out
=
helper
.
create_variable_for_type_inference
(
dtype
)
argmaxes
=
helper
.
create_variable_for_type_inference
(
dtype
=
'int32'
)
helper
.
append_op
(
type
=
"roi_pool"
,
inputs
=
{
"X"
:
input
,
"ROIs"
:
rois
},
outputs
=
{
"Out"
:
pool_out
,
"Argmax"
:
argmaxes
},
attrs
=
{
"pooled_height"
:
pooled_height
,
"pooled_width"
:
pooled_width
,
"spatial_scale"
:
spatial_scale
})
return
pool_out
@
templatedoc
()
def
roi_pool
(
input
,
rois
,
pooled_height
=
1
,
pooled_width
=
1
,
spatial_scale
=
1.0
):
"""
${comment}
Args:
input (Variable): ${x_comment}
rois (Variable): ROIs (Regions of Interest) to pool over.
pooled_height (integer): ${pooled_height_comment} Default: 1
pooled_width (integer): ${pooled_width_comment} Default: 1
spatial_scale (float): ${spatial_scale_comment} Default: 1.0
Returns:
Variable: ${out_comment}.
Examples:
.. code-block:: python
pool_out = fluid.layers.roi_pool(input=x, rois=rois, 7, 7, 1.0)
"""
helper
=
LayerHelper
(
'roi_pool'
,
**
locals
())
dtype
=
helper
.
input_dtype
()
pool_out
=
helper
.
create_variable_for_type_inference
(
dtype
)
argmaxes
=
helper
.
create_variable_for_type_inference
(
dtype
=
'int32'
)
helper
.
append_op
(
type
=
"roi_pool"
,
inputs
=
{
"X"
:
input
,
"ROIs"
:
rois
},
outputs
=
{
"Out"
:
pool_out
,
"Argmax"
:
argmaxes
},
attrs
=
{
"pooled_height"
:
pooled_height
,
"pooled_width"
:
pooled_width
,
"spatial_scale"
:
spatial_scale
})
return
pool_out
@
templatedoc
()
...
...
python/paddle/fluid/layers/ops.py
浏览文件 @
488610a6
...
...
@@ -100,26 +100,27 @@ Examples:
>>> result = fluid.layers.hard_shrink(x=data, threshold=0.3)
"""
if
os
.
name
!=
'nt'
:
__all__
+=
[
'cumsum'
]
_cum_sum_
=
generate_layer_fn
(
'cumsum'
)
def
cumsum
(
x
,
axis
=
None
,
exclusive
=
None
,
reverse
=
None
):
locals_var
=
locals
().
keys
()
kwargs
=
dict
()
for
name
in
locals_var
:
val
=
locals
()[
name
]
if
val
is
not
None
:
kwargs
[
name
]
=
val
return
_cum_sum_
(
**
kwargs
)
cumsum
.
__doc__
=
_cum_sum_
.
__doc__
+
"""
Examples:
>>> data = fluid.layers.data(name="input", shape=[32, 784])
>>> result = fluid.layers.cumsum(data, axis=0)
"""
__all__
+=
[
'cumsum'
]
_cum_sum_
=
generate_layer_fn
(
'cumsum'
)
def
cumsum
(
x
,
axis
=
None
,
exclusive
=
None
,
reverse
=
None
):
locals_var
=
locals
().
keys
()
kwargs
=
dict
()
for
name
in
locals_var
:
val
=
locals
()[
name
]
if
val
is
not
None
:
kwargs
[
name
]
=
val
return
_cum_sum_
(
**
kwargs
)
cumsum
.
__doc__
=
_cum_sum_
.
__doc__
+
"""
Examples:
>>> data = fluid.layers.data(name="input", shape=[32, 784])
>>> result = fluid.layers.cumsum(data, axis=0)
"""
__all__
+=
[
'thresholded_relu'
]
...
...
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
488610a6
...
...
@@ -23,7 +23,9 @@ if(NOT WITH_DISTRIBUTE)
LIST
(
REMOVE_ITEM TEST_OPS test_dist_text_classification
)
endif
(
NOT WITH_DISTRIBUTE
)
if
(
${
CUDNN_MAJOR_VERSION
}
VERSION_LESS 7
)
if
(
NOT
${
WITH_GPU
}
)
LIST
(
REMOVE_ITEM TEST_OPS test_conv2d_fusion_op
)
elseif
(
${
CUDNN_MAJOR_VERSION
}
VERSION_LESS 7
)
LIST
(
REMOVE_ITEM TEST_OPS test_conv2d_fusion_op
)
endif
()
...
...
@@ -79,10 +81,12 @@ list(REMOVE_ITEM TEST_OPS test_dist_se_resnext)
list
(
REMOVE_ITEM TEST_OPS test_dist_transformer
)
list
(
REMOVE_ITEM TEST_OPS test_parallel_executor_transformer
)
list
(
REMOVE_ITEM TEST_OPS test_image_classification_resnet
)
list
(
REMOVE_ITEM TEST_OPS test_interpolate_op
)
foreach
(
TEST_OP
${
TEST_OPS
}
)
py_test_modules
(
${
TEST_OP
}
MODULES
${
TEST_OP
}
)
endforeach
(
TEST_OP
)
py_test_modules
(
test_warpctc_op MODULES test_warpctc_op ENVS FLAGS_warpctc_dir=
${
WARPCTC_LIB_DIR
}
SERIAL
)
py_test_modules
(
test_interpolate_op MODULES test_interpolate_op SERIAL
)
if
(
WITH_DISTRIBUTE
)
py_test_modules
(
test_dist_train MODULES test_dist_train SERIAL
)
set_tests_properties
(
test_listen_and_serv_op PROPERTIES TIMEOUT 20
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录