Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
be04d99f
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
be04d99f
编写于
11月 26, 2018
作者:
M
minqiyang
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into revert_vlog
test=develop
上级
53433d7f
05b7ee7e
变更
56
隐藏空白更改
内联
并排
Showing
56 changed file
with
157 addition
and
139 deletion
+157
-139
cmake/cuda.cmake
cmake/cuda.cmake
+4
-1
cmake/external/pybind11.cmake
cmake/external/pybind11.cmake
+1
-1
cmake/generic.cmake
cmake/generic.cmake
+9
-2
paddle/fluid/API.spec
paddle/fluid/API.spec
+4
-4
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+0
-5
paddle/fluid/framework/details/all_reduce_op_handle.cc
paddle/fluid/framework/details/all_reduce_op_handle.cc
+2
-2
paddle/fluid/framework/details/all_reduce_op_handle.h
paddle/fluid/framework/details/all_reduce_op_handle.h
+3
-3
paddle/fluid/framework/details/broadcast_op_handle.cc
paddle/fluid/framework/details/broadcast_op_handle.cc
+1
-1
paddle/fluid/framework/details/broadcast_op_handle.h
paddle/fluid/framework/details/broadcast_op_handle.h
+3
-3
paddle/fluid/framework/details/broadcast_op_handle_test.h
paddle/fluid/framework/details/broadcast_op_handle_test.h
+6
-6
paddle/fluid/framework/details/build_strategy.cc
paddle/fluid/framework/details/build_strategy.cc
+2
-2
paddle/fluid/framework/details/build_strategy.h
paddle/fluid/framework/details/build_strategy.h
+2
-2
paddle/fluid/framework/details/data_balance_op_handle.cc
paddle/fluid/framework/details/data_balance_op_handle.cc
+1
-1
paddle/fluid/framework/details/data_balance_op_handle.h
paddle/fluid/framework/details/data_balance_op_handle.h
+2
-2
paddle/fluid/framework/details/fused_broadcast_op_handle.h
paddle/fluid/framework/details/fused_broadcast_op_handle.h
+2
-2
paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc
...fluid/framework/details/fused_broadcast_op_handle_test.cc
+2
-2
paddle/fluid/framework/details/multi_devices_graph_pass.cc
paddle/fluid/framework/details/multi_devices_graph_pass.cc
+8
-8
paddle/fluid/framework/details/multi_devices_graph_pass.h
paddle/fluid/framework/details/multi_devices_graph_pass.h
+1
-1
paddle/fluid/framework/details/reduce_op_handle.cc
paddle/fluid/framework/details/reduce_op_handle.cc
+1
-1
paddle/fluid/framework/details/reduce_op_handle.h
paddle/fluid/framework/details/reduce_op_handle.h
+2
-2
paddle/fluid/framework/details/reduce_op_handle_test.cc
paddle/fluid/framework/details/reduce_op_handle_test.cc
+6
-6
paddle/fluid/framework/ir/is_test_pass_tester.cc
paddle/fluid/framework/ir/is_test_pass_tester.cc
+4
-1
paddle/fluid/framework/lod_tensor.cc
paddle/fluid/framework/lod_tensor.cc
+1
-16
paddle/fluid/framework/lod_tensor_test.cc
paddle/fluid/framework/lod_tensor_test.cc
+0
-2
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+4
-7
paddle/fluid/inference/analysis/analyzer_tester.cc
paddle/fluid/inference/analysis/analyzer_tester.cc
+2
-1
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+0
-4
paddle/fluid/inference/api/api_impl.cc
paddle/fluid/inference/api/api_impl.cc
+0
-4
paddle/fluid/inference/api/helper.h
paddle/fluid/inference/api/helper.h
+1
-4
paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc
paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc
+0
-1
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+0
-4
paddle/fluid/inference/tests/book/test_inference_nlp.cc
paddle/fluid/inference/tests/book/test_inference_nlp.cc
+0
-1
paddle/fluid/inference/tests/test_helper.h
paddle/fluid/inference/tests/test_helper.h
+1
-0
paddle/fluid/operators/beam_search_op_test.cc
paddle/fluid/operators/beam_search_op_test.cc
+2
-2
paddle/fluid/operators/distributed/grpc_client.cc
paddle/fluid/operators/distributed/grpc_client.cc
+1
-1
paddle/fluid/operators/distributed/grpc_serde.cc
paddle/fluid/operators/distributed/grpc_serde.cc
+1
-1
paddle/fluid/operators/distributed/grpc_serde.h
paddle/fluid/operators/distributed/grpc_serde.h
+2
-1
paddle/fluid/operators/distributed/sendrecvop_utils.cc
paddle/fluid/operators/distributed/sendrecvop_utils.cc
+1
-1
paddle/fluid/operators/distributed/sendrecvop_utils.h
paddle/fluid/operators/distributed/sendrecvop_utils.h
+1
-1
paddle/fluid/operators/math/cpu_vec_test.cc
paddle/fluid/operators/math/cpu_vec_test.cc
+1
-1
paddle/fluid/operators/math/im2col_test.cc
paddle/fluid/operators/math/im2col_test.cc
+1
-1
paddle/fluid/operators/math/jit_kernel_test.cc
paddle/fluid/operators/math/jit_kernel_test.cc
+1
-1
paddle/fluid/platform/cudnn_helper.h
paddle/fluid/platform/cudnn_helper.h
+1
-1
paddle/fluid/platform/dynload/cudnn.h
paddle/fluid/platform/dynload/cudnn.h
+7
-7
paddle/fluid/platform/gpu_info.cc
paddle/fluid/platform/gpu_info.cc
+10
-1
paddle/fluid/platform/stream_callback_manager.h
paddle/fluid/platform/stream_callback_manager.h
+1
-1
paddle/fluid/pybind/protobuf.cc
paddle/fluid/pybind/protobuf.cc
+7
-6
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+2
-3
paddle/fluid/pybind/tensor_py.h
paddle/fluid/pybind/tensor_py.h
+0
-1
paddle/legacy/cuda/include/hl_warpctc_wrap.h
paddle/legacy/cuda/include/hl_warpctc_wrap.h
+2
-1
paddle/legacy/cuda/src/hl_cuda_device.cc
paddle/legacy/cuda/src/hl_cuda_device.cc
+4
-0
paddle/legacy/utils/ThreadLocal.h
paddle/legacy/utils/ThreadLocal.h
+3
-1
paddle/legacy/utils/Util.h
paddle/legacy/utils/Util.h
+27
-0
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+1
-1
paddle/testing/CMakeLists.txt
paddle/testing/CMakeLists.txt
+4
-2
python/paddle/fluid/metrics.py
python/paddle/fluid/metrics.py
+2
-2
未找到文件。
cmake/cuda.cmake
浏览文件 @
be04d99f
...
...
@@ -199,8 +199,11 @@ elseif(CMAKE_BUILD_TYPE STREQUAL "MinSizeRel")
list
(
APPEND CUDA_NVCC_FLAGS
${
CMAKE_CXX_FLAGS_RELEASE
}
)
endif
()
else
(
NOT WIN32
)
list
(
APPEND CUDA_NVCC_FLAGS
"--compiler-options;/bigobj"
)
if
(
CMAKE_BUILD_TYPE STREQUAL
"Debug"
)
list
(
APPEND CUDA_NVCC_FLAGS
"-g -G"
)
list
(
APPEND CUDA_NVCC_FLAGS
"-g -G"
)
# match the cl's _ITERATOR_DEBUG_LEVEL
list
(
APPEND CUDA_NVCC_FLAGS
"-D_DEBUG"
)
elseif
(
CMAKE_BUILD_TYPE STREQUAL
"Release"
)
list
(
APPEND CUDA_NVCC_FLAGS
"-O3 -DNDEBUG"
)
else
()
...
...
cmake/external/pybind11.cmake
浏览文件 @
be04d99f
...
...
@@ -26,7 +26,7 @@ ExternalProject_Add(
extern_pybind
${
EXTERNAL_PROJECT_LOG_ARGS
}
GIT_REPOSITORY
"https://github.com/pybind/pybind11.git"
GIT_TAG
"v2.
1.1
"
GIT_TAG
"v2.
2.4
"
PREFIX
${
PYBIND_SOURCE_DIR
}
UPDATE_COMMAND
""
CONFIGURE_COMMAND
""
...
...
cmake/generic.cmake
浏览文件 @
be04d99f
...
...
@@ -349,10 +349,17 @@ function(cc_test TARGET_NAME)
set
(
oneValueArgs
""
)
set
(
multiValueArgs SRCS DEPS ARGS
)
cmake_parse_arguments
(
cc_test
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
if
(
WIN32
)
list
(
APPEND win32_deps shlwapi
)
if
(
"
${
cc_test_DEPS
}
;"
MATCHES
"python;"
)
list
(
REMOVE_ITEM cc_test_DEPS python
)
list
(
APPEND win32_deps
${
PYTHON_LIBRARIES
}
)
endif
()
endif
(
WIN32
)
add_executable
(
${
TARGET_NAME
}
${
cc_test_SRCS
}
)
target_link_libraries
(
${
TARGET_NAME
}
${
cc_test_DEPS
}
paddle_gtest_main lod_tensor memory gtest gflags glog
)
if
(
WIN32
)
target_link_libraries
(
${
TARGET_NAME
}
shlwapi
)
target_link_libraries
(
${
TARGET_NAME
}
${
win32_deps
}
)
endif
(
WIN32
)
add_dependencies
(
${
TARGET_NAME
}
${
cc_test_DEPS
}
paddle_gtest_main lod_tensor memory gtest gflags glog
)
add_test
(
NAME
${
TARGET_NAME
}
...
...
@@ -683,7 +690,7 @@ function(py_test TARGET_NAME)
set
(
multiValueArgs SRCS DEPS ARGS ENVS
)
cmake_parse_arguments
(
py_test
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
add_test
(
NAME
${
TARGET_NAME
}
COMMAND env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true
COMMAND
${
CMAKE_COMMAND
}
-E
env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true
FLAGS_cpu_deterministic=true
PYTHONPATH=
${
PADDLE_BINARY_DIR
}
/python
${
py_test_ENVS
}
${
PYTHON_EXECUTABLE
}
-u
${
py_test_SRCS
}
${
py_test_ARGS
}
...
...
paddle/fluid/API.spec
浏览文件 @
be04d99f
...
...
@@ -26,10 +26,10 @@ paddle.fluid.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], vara
paddle.fluid.DistributeTranspilerConfig.__init__
paddle.fluid.ParallelExecutor.__init__ ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None))
paddle.fluid.ParallelExecutor.run ArgSpec(args=['self', 'fetch_list', 'feed', 'feed_dict', 'return_numpy'], varargs=None, keywords=None, defaults=(None, None, True))
paddle.fluid.ExecutionStrategy.__init__ __init__(self: paddle.fluid.core.ExecutionStrategy) -> None
paddle.fluid.BuildStrategy.GradientScaleStrategy.__init__ __init__(self: paddle.fluid.core.GradientScaleStrategy, arg0: int) -> None
paddle.fluid.BuildStrategy.ReduceStrategy.__init__ __init__(self: paddle.fluid.core.ReduceStrategy, arg0: int) -> None
paddle.fluid.BuildStrategy.__init__ __init__(self: paddle.fluid.core.BuildStrategy) -> None
paddle.fluid.ExecutionStrategy.__init__ __init__(self: paddle.fluid.core.
ParallelExecutor.
ExecutionStrategy) -> None
paddle.fluid.BuildStrategy.GradientScaleStrategy.__init__ __init__(self: paddle.fluid.core.
ParallelExecutor.BuildStrategy.
GradientScaleStrategy, arg0: int) -> None
paddle.fluid.BuildStrategy.ReduceStrategy.__init__ __init__(self: paddle.fluid.core.
ParallelExecutor.BuildStrategy.
ReduceStrategy, arg0: int) -> None
paddle.fluid.BuildStrategy.__init__ __init__(self: paddle.fluid.core.
ParallelExecutor.
BuildStrategy) -> None
paddle.fluid.create_lod_tensor ArgSpec(args=['data', 'recursive_seq_lens', 'place'], varargs=None, keywords=None, defaults=None)
paddle.fluid.create_random_int_lodtensor ArgSpec(args=['recursive_seq_lens', 'base_shape', 'place', 'low', 'high'], varargs=None, keywords=None, defaults=None)
paddle.fluid.io.save_vars ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None))
...
...
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
be04d99f
...
...
@@ -116,14 +116,9 @@ cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker)
cc_library
(
op_info SRCS op_info.cc DEPS attribute framework_proto
)
cc_library
(
shape_inference SRCS shape_inference.cc DEPS ddim attribute device_context
)
if
(
NOT WIN32
)
cc_library
(
transfer_scope_cache SRCS transfer_scope_cache.cc DEPS scope framework_proto device_context
)
cc_library
(
operator SRCS operator.cc DEPS op_info device_context tensor scope glog
shape_inference data_transform lod_tensor profiler transfer_scope_cache
)
else
()
cc_library
(
operator SRCS operator.cc DEPS op_info device_context tensor scope glog
shape_inference data_transform lod_tensor
)
endif
(
NOT WIN32
)
cc_test
(
operator_test SRCS operator_test.cc DEPS operator op_registry device_context
)
...
...
paddle/fluid/framework/details/all_reduce_op_handle.cc
浏览文件 @
be04d99f
...
...
@@ -23,7 +23,7 @@ namespace paddle {
namespace
framework
{
namespace
details
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
AllReduceOpHandle
::
AllReduceOpHandle
(
ir
::
Node
*
node
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
...
...
@@ -74,7 +74,7 @@ void AllReduceOpHandle::RunImpl() {
}
if
(
platform
::
is_gpu_place
(
lod_tensors
[
0
]
->
place
()))
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
PADDLE_ENFORCE
(
nccl_ctxs_
,
"nccl_ctxs should not be nullptr."
);
int
dtype
=
-
1
;
size_t
numel
=
0
;
...
...
paddle/fluid/framework/details/all_reduce_op_handle.h
浏览文件 @
be04d99f
...
...
@@ -20,7 +20,7 @@
#include "paddle/fluid/framework/details/op_handle_base.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h"
#endif
...
...
@@ -29,7 +29,7 @@ namespace framework {
namespace
details
{
struct
AllReduceOpHandle
:
public
OpHandleBase
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
AllReduceOpHandle
(
ir
::
Node
*
node
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
platform
::
NCCLContextMap
*
ctxs
);
...
...
@@ -49,7 +49,7 @@ struct AllReduceOpHandle : public OpHandleBase {
private:
std
::
vector
<
Scope
*>
local_scopes_
;
std
::
vector
<
platform
::
Place
>
places_
;
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
const
platform
::
NCCLContextMap
*
nccl_ctxs_
;
#endif
};
...
...
paddle/fluid/framework/details/broadcast_op_handle.cc
浏览文件 @
be04d99f
...
...
@@ -82,7 +82,7 @@ void BroadcastOpHandle::BroadcastOneVar(
});
}
}
else
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
VarHandle
*
out_handle
=
nullptr
;
int
root_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
in_tensor
.
place
()).
device
;
std
::
vector
<
std
::
function
<
void
()
>>
broadcast_calls
;
...
...
paddle/fluid/framework/details/broadcast_op_handle.h
浏览文件 @
be04d99f
...
...
@@ -24,7 +24,7 @@
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/platform/device_context.h"
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h"
#endif
...
...
@@ -34,7 +34,7 @@ namespace details {
struct
BroadcastOpHandle
:
public
OpHandleBase
{
public:
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
BroadcastOpHandle
(
ir
::
Node
*
node
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
platform
::
NCCLContextMap
*
nccl_ctxs
)
...
...
@@ -68,7 +68,7 @@ struct BroadcastOpHandle : public OpHandleBase {
std
::
vector
<
Scope
*>
local_scopes_
;
std
::
vector
<
platform
::
Place
>
places_
;
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
const
platform
::
NCCLContextMap
*
nccl_ctxs_
;
#endif
...
...
paddle/fluid/framework/details/broadcast_op_handle_test.h
浏览文件 @
be04d99f
...
...
@@ -42,7 +42,7 @@ struct TestBroadcastOpHandle {
std
::
vector
<
std
::
unique_ptr
<
ir
::
Node
>>
nodes_
;
std
::
vector
<
p
::
Place
>
place_list_
;
bool
use_gpu_
;
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
std
::
unique_ptr
<
platform
::
NCCLContextMap
>
nccl_ctxs_
;
#endif
...
...
@@ -50,7 +50,7 @@ struct TestBroadcastOpHandle {
for
(
size_t
j
=
0
;
j
<
ctxs_
.
size
();
++
j
)
{
ctxs_
[
j
]
->
Wait
();
}
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
if
(
nccl_ctxs_
)
{
nccl_ctxs_
->
WaitAll
();
}
...
...
@@ -60,7 +60,7 @@ struct TestBroadcastOpHandle {
void
InitCtxOnGpu
(
bool
use_gpu
)
{
use_gpu_
=
use_gpu
;
if
(
use_gpu_
)
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
int
count
=
p
::
GetCUDADeviceCount
();
if
(
count
<=
1
)
{
LOG
(
WARNING
)
<<
"Cannot test multi-gpu Broadcast, because the CUDA "
...
...
@@ -84,7 +84,7 @@ struct TestBroadcastOpHandle {
place_list_
.
push_back
(
p
);
ctxs_
.
emplace_back
(
new
p
::
CPUDeviceContext
(
p
));
}
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
nccl_ctxs_
.
reset
(
nullptr
);
#endif
}
...
...
@@ -106,14 +106,14 @@ struct TestBroadcastOpHandle {
nodes_
.
emplace_back
(
ir
::
CreateNodeForTest
(
"node0"
,
ir
::
Node
::
Type
::
kOperation
));
if
(
use_gpu_
)
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
op_handle_
=
new
BroadcastOpHandle
(
nodes_
.
back
().
get
(),
local_scopes_
,
place_list_
,
nccl_ctxs_
.
get
());
#else
PADDLE_THROW
(
"CUDA is not support."
);
#endif
}
else
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
op_handle_
=
new
BroadcastOpHandle
(
nodes_
.
back
().
get
(),
local_scopes_
,
place_list_
,
nccl_ctxs_
.
get
());
#else
...
...
paddle/fluid/framework/details/build_strategy.cc
浏览文件 @
be04d99f
...
...
@@ -96,7 +96,7 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
const
std
::
string
&
loss_var_name
,
const
std
::
unordered_set
<
std
::
string
>
&
param_names
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
const
bool
use_cuda
,
platform
::
NCCLContextMap
*
nccl_ctxs
)
const
{
#else
const
bool
use_cuda
)
const
{
...
...
@@ -118,7 +118,7 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
pass
->
Erase
(
"local_scopes"
);
pass
->
SetNotOwned
<
const
std
::
vector
<
Scope
*>>
(
"local_scopes"
,
&
local_scopes
);
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
platform
::
NCCLContextMap
*
nctx
=
use_cuda
?
nccl_ctxs
:
nullptr
;
pass
->
Erase
(
"nccl_ctxs"
);
pass
->
SetNotOwned
<
platform
::
NCCLContextMap
>
(
"nccl_ctxs"
,
nctx
);
...
...
paddle/fluid/framework/details/build_strategy.h
浏览文件 @
be04d99f
...
...
@@ -23,7 +23,7 @@
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h"
#endif
...
...
@@ -98,7 +98,7 @@ struct BuildStrategy {
const
std
::
string
&
loss_var_name
,
const
std
::
unordered_set
<
std
::
string
>
&
param_names
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
const
bool
use_cuda
,
platform
::
NCCLContextMap
*
nccl_ctxs
)
const
;
#else
const
bool
use_cuda
)
const
;
...
...
paddle/fluid/framework/details/data_balance_op_handle.cc
浏览文件 @
be04d99f
...
...
@@ -20,7 +20,7 @@ namespace paddle {
namespace
framework
{
namespace
details
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
DataBalanceOpHandle
::
DataBalanceOpHandle
(
ir
::
Node
*
node
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
...
...
paddle/fluid/framework/details/data_balance_op_handle.h
浏览文件 @
be04d99f
...
...
@@ -19,7 +19,7 @@
#include "paddle/fluid/framework/details/op_handle_base.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h"
#endif
...
...
@@ -29,7 +29,7 @@ namespace details {
struct
DataBalanceOpHandle
:
public
OpHandleBase
{
public:
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
DataBalanceOpHandle
(
ir
::
Node
*
node
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
platform
::
NCCLContextMap
*
ctxs
);
...
...
paddle/fluid/framework/details/fused_broadcast_op_handle.h
浏览文件 @
be04d99f
...
...
@@ -25,7 +25,7 @@
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/platform/device_context.h"
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h"
#endif
...
...
@@ -35,7 +35,7 @@ namespace details {
struct
FusedBroadcastOpHandle
:
public
BroadcastOpHandle
{
public:
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
FusedBroadcastOpHandle
(
ir
::
Node
*
node
,
const
std
::
vector
<
Scope
*>
local_scopes
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
...
...
paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc
浏览文件 @
be04d99f
...
...
@@ -44,14 +44,14 @@ struct TestFusedBroadcastOpHandle : TestBroadcastOpHandle {
nodes_
.
emplace_back
(
ir
::
CreateNodeForTest
(
"fused_broadcast"
,
ir
::
Node
::
Type
::
kOperation
));
if
(
use_gpu_
)
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
op_handle_
=
new
FusedBroadcastOpHandle
(
nodes_
.
back
().
get
(),
local_scopes_
,
place_list_
,
nccl_ctxs_
.
get
());
#else
PADDLE_THROW
(
"CUDA is not supported."
);
#endif
}
else
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
op_handle_
=
new
FusedBroadcastOpHandle
(
nodes_
.
back
().
get
(),
local_scopes_
,
place_list_
,
nccl_ctxs_
.
get
());
#else
...
...
paddle/fluid/framework/details/multi_devices_graph_pass.cc
浏览文件 @
be04d99f
...
...
@@ -142,7 +142,7 @@ void MultiDevSSAGraphBuilder::Init() const {
places_
=
Get
<
const
std
::
vector
<
platform
::
Place
>>
(
kPlaces
);
local_scopes_
=
Get
<
const
std
::
vector
<
Scope
*>>
(
kLocalScopes
);
strategy_
=
Get
<
const
BuildStrategy
>
(
kStrategy
);
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
nccl_ctxs_
=
&
Get
<
platform
::
NCCLContextMap
>
(
"nccl_ctxs"
);
#endif
...
...
@@ -431,7 +431,7 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
}
}
bool
use_gpu
=
false
;
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
use_gpu
=
nccl_ctxs_
!=
nullptr
;
#endif
...
...
@@ -478,7 +478,7 @@ bool MultiDevSSAGraphBuilder::IsSparseGradient(const std::string &og) const {
void
MultiDevSSAGraphBuilder
::
SetCommunicationContext
(
OpHandleBase
*
op_handle
,
const
platform
::
Place
&
p
)
const
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
if
(
nccl_ctxs_
==
nullptr
)
{
op_handle
->
SetDeviceContext
(
p
,
platform
::
DeviceContextPool
::
Instance
().
Get
(
p
));
...
...
@@ -492,7 +492,7 @@ void MultiDevSSAGraphBuilder::SetCommunicationContext(
void
MultiDevSSAGraphBuilder
::
CreateBroadcastOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
p_name
,
size_t
src_dev_id
)
const
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
auto
*
op_handle
=
new
BroadcastOpHandle
(
result
->
CreateEmptyNode
(
"broadcast"
,
ir
::
Node
::
Type
::
kOperation
),
local_scopes_
,
places_
,
nccl_ctxs_
);
...
...
@@ -522,7 +522,7 @@ void MultiDevSSAGraphBuilder::CreateBroadcastOp(ir::Graph *result,
void
MultiDevSSAGraphBuilder
::
CreateFusedBroadcastOp
(
ir
::
Graph
*
result
,
const
std
::
vector
<
std
::
unordered_set
<
std
::
string
>>
&
bcast_varnames
)
const
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
auto
*
op_handle
=
new
FusedBroadcastOpHandle
(
result
->
CreateEmptyNode
(
"fused_broadcast"
,
ir
::
Node
::
Type
::
kOperation
),
local_scopes_
,
places_
,
nccl_ctxs_
);
...
...
@@ -568,7 +568,7 @@ void MultiDevSSAGraphBuilder::CreateComputationalOp(ir::Graph *result,
void
MultiDevSSAGraphBuilder
::
InsertAllReduceOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
og
)
const
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
result
->
Get
<
GraphOps
>
(
kGraphOps
).
emplace_back
(
new
AllReduceOpHandle
(
result
->
CreateEmptyNode
(
"allreduce"
,
ir
::
Node
::
Type
::
kOperation
),
local_scopes_
,
places_
,
nccl_ctxs_
));
...
...
@@ -597,7 +597,7 @@ void MultiDevSSAGraphBuilder::InsertAllReduceOp(ir::Graph *result,
void
MultiDevSSAGraphBuilder
::
InsertDataBalanceOp
(
ir
::
Graph
*
result
,
const
std
::
vector
<
std
::
string
>
&
datas
)
const
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
result
->
Get
<
GraphOps
>
(
kGraphOps
).
emplace_back
(
new
DataBalanceOpHandle
(
result
->
CreateEmptyNode
(
"data_balance"
,
ir
::
Node
::
Type
::
kOperation
),
local_scopes_
,
places_
,
nccl_ctxs_
));
...
...
@@ -694,7 +694,7 @@ void MultiDevSSAGraphBuilder::CreateComputationalOps(ir::Graph *result,
VarHandle
*
MultiDevSSAGraphBuilder
::
CreateReduceOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
og
,
int
dst_dev_id
)
const
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
result
->
Get
<
GraphOps
>
(
kGraphOps
).
emplace_back
(
new
ReduceOpHandle
(
result
->
CreateEmptyNode
(
"reduce"
,
ir
::
Node
::
Type
::
kOperation
),
local_scopes_
,
places_
,
nccl_ctxs_
));
...
...
paddle/fluid/framework/details/multi_devices_graph_pass.h
浏览文件 @
be04d99f
...
...
@@ -40,7 +40,7 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
size_t
device_id
)
const
;
void
Init
()
const
;
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
mutable
platform
::
NCCLContextMap
*
nccl_ctxs_
;
#endif
...
...
paddle/fluid/framework/details/reduce_op_handle.cc
浏览文件 @
be04d99f
...
...
@@ -125,7 +125,7 @@ void ReduceOpHandle::RunImpl() {
}
});
}
else
if
(
paddle
::
platform
::
is_gpu_place
(
lod_tensors
[
0
]
->
place
()))
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
auto
pre_in
=
pre_in_var
->
Get
<
framework
::
LoDTensor
>
();
VariableVisitor
::
ShareDimsAndLoD
(
*
pre_in_var
,
out_var
);
VariableVisitor
::
GetMutableTensor
(
out_var
).
mutable_data
(
...
...
paddle/fluid/framework/details/reduce_op_handle.h
浏览文件 @
be04d99f
...
...
@@ -23,7 +23,7 @@
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/platform/device_context.h"
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h"
#endif
...
...
@@ -35,7 +35,7 @@ struct ReduceOpHandle : public OpHandleBase {
std
::
vector
<
Scope
*>
local_scopes_
;
std
::
vector
<
platform
::
Place
>
places_
;
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
const
platform
::
NCCLContextMap
*
nccl_ctxs_
;
ReduceOpHandle
(
ir
::
Node
*
node
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
...
...
paddle/fluid/framework/details/reduce_op_handle_test.cc
浏览文件 @
be04d99f
...
...
@@ -35,7 +35,7 @@ struct TestReduceOpHandle {
std
::
vector
<
p
::
Place
>
gpu_list_
;
std
::
vector
<
std
::
unique_ptr
<
p
::
DeviceContext
>>
ctxs_
;
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
std
::
unique_ptr
<
platform
::
NCCLContextMap
>
nccl_ctxs_
;
#endif
...
...
@@ -43,7 +43,7 @@ struct TestReduceOpHandle {
for
(
size_t
j
=
0
;
j
<
ctxs_
.
size
();
++
j
)
{
ctxs_
[
j
]
->
Wait
();
}
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
if
(
nccl_ctxs_
)
{
nccl_ctxs_
->
WaitAll
();
}
...
...
@@ -53,7 +53,7 @@ struct TestReduceOpHandle {
void
InitCtxOnGpu
(
bool
use_gpu
)
{
use_gpu_
=
use_gpu
;
if
(
use_gpu
)
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
int
count
=
p
::
GetCUDADeviceCount
();
if
(
count
<=
1
)
{
LOG
(
WARNING
)
<<
"Cannot test multi-gpu Broadcast, because the CUDA "
...
...
@@ -77,7 +77,7 @@ struct TestReduceOpHandle {
gpu_list_
.
push_back
(
p
);
ctxs_
.
emplace_back
(
new
p
::
CPUDeviceContext
(
p
));
}
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
nccl_ctxs_
.
reset
(
nullptr
);
#endif
}
...
...
@@ -99,14 +99,14 @@ struct TestReduceOpHandle {
nodes
.
emplace_back
(
new
ir
::
Node
(
"node"
));
if
(
use_gpu_
)
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
op_handle_
.
reset
(
new
ReduceOpHandle
(
nodes
.
back
().
get
(),
local_scopes_
,
gpu_list_
,
nccl_ctxs_
.
get
()));
#else
PADDLE_THROW
(
"CUDA is not support."
);
#endif
}
else
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
op_handle_
.
reset
(
new
ReduceOpHandle
(
nodes
.
back
().
get
(),
local_scopes_
,
gpu_list_
,
nccl_ctxs_
.
get
()));
#else
...
...
paddle/fluid/framework/ir/is_test_pass_tester.cc
浏览文件 @
be04d99f
...
...
@@ -15,7 +15,10 @@
#include "paddle/fluid/framework/ir/is_test_pass.h"
#include <gtest/gtest.h>
#ifdef _WIN32
#undef FALSE
#undef TRUE
#endif
namespace
paddle
{
namespace
framework
{
namespace
ir
{
...
...
paddle/fluid/framework/lod_tensor.cc
浏览文件 @
be04d99f
...
...
@@ -26,10 +26,8 @@ limitations under the License. */
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/memory/memory.h"
#if !defined(_WIN32)
#include "paddle/fluid/recordio/scanner.h"
#include "paddle/fluid/recordio/writer.h"
#endif // _WIN32
namespace
paddle
{
namespace
framework
{
...
...
@@ -305,7 +303,6 @@ void DeserializeFromStream(std::istream &is, LoDTensor *tensor,
TensorFromStream
(
is
,
static_cast
<
Tensor
*>
(
tensor
),
dev_ctx
);
}
#if !defined(_WIN32)
void
WriteToRecordIO
(
recordio
::
Writer
*
writer
,
const
std
::
vector
<
LoDTensor
>
&
tensor
,
const
platform
::
DeviceContext
&
dev_ctx
)
{
...
...
@@ -335,19 +332,7 @@ bool ReadFromRecordIO(recordio::Scanner *scanner,
return
true
;
}
#else
class
Writer
{};
class
Scanner
{};
void
WriteToRecordIO
(
recordio
::
Writer
*
writer
,
const
std
::
vector
<
LoDTensor
>
&
tensor
,
const
platform
::
DeviceContext
&
dev_ctx
)
{}
bool
ReadFromRecordIO
(
recordio
::
Scanner
*
scanner
,
const
platform
::
DeviceContext
&
dev_ctx
,
std
::
vector
<
LoDTensor
>
*
result_ptr
)
{
PADDLE_ENFORCE
(
"windows didn't supported recordio!."
);
return
true
;
}
#endif // _WIN32
std
::
vector
<
LoDTensor
>
LoDTensor
::
SplitLoDTensor
(
const
std
::
vector
<
platform
::
Place
>
places
)
const
{
check_memory_size
();
...
...
paddle/fluid/framework/lod_tensor_test.cc
浏览文件 @
be04d99f
...
...
@@ -274,7 +274,6 @@ TEST(LoD, ConvertToOffsetBasedLoD) {
EXPECT_EQ
(
offset_lod
,
expected
);
}
#if !defined(_WIN32)
template
<
typename
T
>
static
void
TestRecordIO
()
{
LoDTensor
tensor
;
...
...
@@ -321,7 +320,6 @@ TEST(LoDTensor, RecordIO) {
TestRecordIO
<
float
>
();
TestRecordIO
<
double
>
();
}
#endif // !defined(_WIN32)
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/operator.cc
浏览文件 @
be04d99f
...
...
@@ -149,17 +149,14 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
#endif
}
// The profile has a process-wide mutex, results in serious performance issue
// in concurrency scenerio. Here use an `if` to fix this issue.
// Please not remove the `if`, ask @Superjomn if there are any concern.
#ifndef _WIN32
// The profile has a process-wide mutex, results in serious performance issue
// in concurrency scenerio. Here use an `if` to fix this issue.
// Please not remove the `if`, ask @Superjomn if there are any concern.
if
(
platform
::
IsProfileEnabled
())
{
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
RecordEvent
record_event
(
Type
(),
pool
.
Get
(
place
));
RunImpl
(
scope
,
place
);
}
else
// NOLINT
#endif
{
}
else
{
RunImpl
(
scope
,
place
);
}
VLOG
(
3
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
...
...
paddle/fluid/inference/analysis/analyzer_tester.cc
浏览文件 @
be04d99f
...
...
@@ -19,6 +19,7 @@
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/platform/port.h"
namespace
paddle
{
namespace
inference
{
...
...
@@ -75,7 +76,7 @@ void TestWord2vecPrediction(const std::string& model_path) {
0.000932706
};
const
size_t
num_elements
=
outputs
.
front
().
data
.
length
()
/
sizeof
(
float
);
// The outputs' buffers are in CPU memory.
for
(
size_t
i
=
0
;
i
<
std
::
min
(
5UL
,
num_elements
);
i
++
)
{
for
(
size_t
i
=
0
;
i
<
std
::
min
(
(
size_t
)
5UL
,
num_elements
);
i
++
)
{
LOG
(
INFO
)
<<
"data: "
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())[
i
];
PADDLE_ENFORCE
(
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())[
i
],
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
be04d99f
...
...
@@ -56,7 +56,6 @@ bool AnalysisPredictor::Init(
const
std
::
shared_ptr
<
framework
::
Scope
>
&
parent_scope
,
const
std
::
shared_ptr
<
framework
::
ProgramDesc
>
&
program
)
{
VLOG
(
3
)
<<
"Predictor::init()"
;
#if !defined(_WIN32)
if
(
FLAGS_profile
)
{
LOG
(
WARNING
)
<<
"Profiler is actived, might affect the performance"
;
LOG
(
INFO
)
<<
"You can turn off by set gflags '-profile false'"
;
...
...
@@ -64,7 +63,6 @@ bool AnalysisPredictor::Init(
:
platform
::
ProfilerState
::
kCPU
;
platform
::
EnableProfiler
(
tracking_device
);
}
#endif
// no matter with or without MKLDNN
paddle
::
platform
::
SetNumThreads
(
config_
.
cpu_math_library_num_threads
());
...
...
@@ -520,12 +518,10 @@ bool AnalysisPredictor::LoadParameters() {
}
AnalysisPredictor
::~
AnalysisPredictor
()
{
#if !defined(_WIN32)
if
(
FLAGS_profile
)
{
platform
::
DisableProfiler
(
platform
::
EventSortingKey
::
kTotal
,
"./profile.log"
);
}
#endif
if
(
sub_scope_
)
{
scope_
->
DeleteScope
(
sub_scope_
);
}
...
...
paddle/fluid/inference/api/api_impl.cc
浏览文件 @
be04d99f
...
...
@@ -64,7 +64,6 @@ void NativePaddlePredictor::PrepareFeedFetch() {
bool
NativePaddlePredictor
::
Init
(
std
::
shared_ptr
<
framework
::
Scope
>
parent_scope
)
{
VLOG
(
3
)
<<
"Predictor::init()"
;
#if !defined(_WIN32)
if
(
FLAGS_profile
)
{
LOG
(
WARNING
)
<<
"Profiler is actived, might affect the performance"
;
LOG
(
INFO
)
<<
"You can turn off by set gflags '-profile false'"
;
...
...
@@ -73,7 +72,6 @@ bool NativePaddlePredictor::Init(
:
platform
::
ProfilerState
::
kCPU
;
platform
::
EnableProfiler
(
tracking_device
);
}
#endif
// no matter with or without MKLDNN
paddle
::
platform
::
SetNumThreads
(
config_
.
cpu_math_library_num_threads
());
...
...
@@ -121,12 +119,10 @@ bool NativePaddlePredictor::Init(
}
NativePaddlePredictor
::~
NativePaddlePredictor
()
{
#if !defined(_WIN32)
if
(
FLAGS_profile
)
{
platform
::
DisableProfiler
(
platform
::
EventSortingKey
::
kTotal
,
"./profile.log"
);
}
#endif
if
(
sub_scope_
)
{
scope_
->
DeleteScope
(
sub_scope_
);
}
...
...
paddle/fluid/inference/api/helper.h
浏览文件 @
be04d99f
...
...
@@ -15,10 +15,6 @@
#pragma once
#include <glog/logging.h>
#if !defined(_WIN32)
#include <sys/time.h>
#else
#endif
#include <algorithm>
#include <chrono> // NOLINT
...
...
@@ -28,6 +24,7 @@
#include <string>
#include <vector>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/string/printf.h"
namespace
paddle
{
...
...
paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc
浏览文件 @
be04d99f
...
...
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include <gflags/gflags.h>
#include <sys/time.h>
#include <time.h>
#include <algorithm>
#include <fstream>
...
...
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
be04d99f
...
...
@@ -178,11 +178,9 @@ void TestOneThreadPrediction(
warmup_timer
.
tic
();
predictor
->
Run
(
inputs
[
0
],
outputs
,
batch_size
);
PrintTime
(
batch_size
,
1
,
1
,
0
,
warmup_timer
.
toc
(),
1
);
#if !defined(_WIN32)
if
(
FLAGS_profile
)
{
paddle
::
platform
::
ResetProfiler
();
}
#endif
}
LOG
(
INFO
)
<<
"Run "
<<
num_times
<<
" times..."
;
...
...
@@ -232,11 +230,9 @@ void TestMultiThreadPrediction(
warmup_timer
.
tic
();
predictor
->
Run
(
inputs
[
0
],
outputs
,
batch_size
);
PrintTime
(
batch_size
,
1
,
num_threads
,
tid
,
warmup_timer
.
toc
(),
1
);
#if !defined(_WIN32)
if
(
FLAGS_profile
)
{
paddle
::
platform
::
ResetProfiler
();
}
#endif
}
LOG
(
INFO
)
<<
"Thread "
<<
tid
<<
" run "
<<
num_times
<<
" times..."
;
...
...
paddle/fluid/inference/tests/book/test_inference_nlp.cc
浏览文件 @
be04d99f
...
...
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <sys/time.h>
#include <time.h>
#include <fstream>
#include <thread> // NOLINT
...
...
paddle/fluid/inference/tests/test_helper.h
浏览文件 @
be04d99f
...
...
@@ -20,6 +20,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/io.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/profiler.h"
DECLARE_bool
(
use_mkldnn
);
...
...
paddle/fluid/operators/beam_search_op_test.cc
浏览文件 @
be04d99f
...
...
@@ -46,7 +46,7 @@ void CreateInput(LoDTensor* ids, LoDTensor* scores) {
auto
*
scores_data
=
scores
->
mutable_data
<
float
>
(
place
);
vector
<
int64_t
>
_ids
({
4
,
2
,
5
,
2
,
1
,
3
,
3
,
5
,
2
,
8
,
2
,
1
});
vector
<
float
>
_scores
(
{
0.5
,
0.3
,
0.2
,
0.6
,
0.3
,
0.1
,
0.9
,
0.5
,
0.1
,
0.7
,
0.5
,
0.1
});
{
0.5
f
,
0.3
f
,
0.2
f
,
0.6
f
,
0.3
f
,
0.1
f
,
0.9
f
,
0.5
f
,
0.1
f
,
0.7
f
,
0.5
f
,
0.1
f
});
for
(
int
i
=
0
;
i
<
12
;
i
++
)
{
ids_data
[
i
]
=
_ids
[
i
];
...
...
@@ -80,7 +80,7 @@ TEST(DISABLED_beam_search_op, run) {
ASSERT_EQ
(
sids
.
lod
(),
sscores
.
lod
());
vector
<
int
>
tids
({
4
,
2
,
3
,
8
});
vector
<
float
>
tscores
({
0.5
,
0.6
,
0.9
,
0.7
});
vector
<
float
>
tscores
({
0.5
f
,
0.6
f
,
0.9
f
,
0.7
f
});
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
ASSERT_EQ
(
tids
[
i
],
sids
.
data
<
int64_t
>
()[
i
]);
...
...
paddle/fluid/operators/distributed/grpc_client.cc
浏览文件 @
be04d99f
...
...
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <sys/time.h>
#include <limits>
#include "glog/logging.h" // For VLOG
...
...
@@ -20,6 +19,7 @@ limitations under the License. */
#include "paddle/fluid/operators/distributed/grpc_client.h"
#include "paddle/fluid/operators/distributed/grpc_serde.h"
#include "paddle/fluid/operators/distributed/request_handler.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/profiler.h"
DECLARE_bool
(
rpc_disable_reuse_port
);
...
...
paddle/fluid/operators/distributed/grpc_serde.cc
浏览文件 @
be04d99f
...
...
@@ -15,7 +15,6 @@ limitations under the License. */
#ifdef PADDLE_WITH_CUDA
#include <nccl.h>
#endif
#include <sys/time.h>
#include <thread> // NOLINT
#include "google/protobuf/io/coded_stream.h"
...
...
@@ -26,6 +25,7 @@ limitations under the License. */
#include "paddle/fluid/operators/distributed/grpc_variable_response.h"
#include "paddle/fluid/operators/distributed/proto_encoder_helper.h"
#include "paddle/fluid/operators/distributed/sendrecvop_utils.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/profiler.h"
namespace
paddle
{
...
...
paddle/fluid/operators/distributed/grpc_serde.h
浏览文件 @
be04d99f
...
...
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <sys/time.h>
#include <iostream>
#include <string>
#include <vector>
...
...
@@ -25,6 +25,7 @@ limitations under the License. */
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/operators/distributed/sendrecvop_utils.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/operators/distributed/send_recv.grpc.pb.h"
#include "paddle/fluid/operators/distributed/send_recv.pb.h"
...
...
paddle/fluid/operators/distributed/sendrecvop_utils.cc
浏览文件 @
be04d99f
...
...
@@ -15,12 +15,12 @@ limitations under the License. */
#ifdef PADDLE_WITH_CUDA
#include <nccl.h>
#endif
#include <sys/time.h>
#include <thread> // NOLINT
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/operators/distributed/sendrecvop_utils.h"
#include "paddle/fluid/operators/distributed/variable_response.h"
#include "paddle/fluid/platform/port.h"
DEFINE_bool
(
rpc_disable_reuse_port
,
false
,
"Disable SO_REUSEPORT or not."
);
...
...
paddle/fluid/operators/distributed/sendrecvop_utils.h
浏览文件 @
be04d99f
...
...
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <sys/time.h>
#include <iostream>
#include <string>
#include <vector>
...
...
@@ -24,6 +23,7 @@ limitations under the License. */
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/operators/distributed/send_recv.pb.h"
...
...
paddle/fluid/operators/math/cpu_vec_test.cc
浏览文件 @
be04d99f
...
...
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <sys/time.h>
#include <cmath>
#include <cstring>
#include <random>
...
...
@@ -22,6 +21,7 @@ limitations under the License. */
#include "gtest/gtest.h"
#include "paddle/fluid/operators/math/cpu_vec.h"
#include "paddle/fluid/platform/port.h"
inline
double
GetCurrentUS
()
{
struct
timeval
time
;
...
...
paddle/fluid/operators/math/im2col_test.cc
浏览文件 @
be04d99f
...
...
@@ -14,9 +14,9 @@ limitations under the License. */
#include "paddle/fluid/operators/math/im2col.h"
#include <gtest/gtest.h>
#include <sys/time.h>
#include <vector>
#include "paddle/fluid/operators/math/im2col_cfo_cpu.h"
#include "paddle/fluid/platform/port.h"
template
<
typename
DeviceContext
,
typename
Place
>
void
testIm2col
()
{
...
...
paddle/fluid/operators/math/jit_kernel_test.cc
浏览文件 @
be04d99f
...
...
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/math/jit_kernel.h"
#include <sys/time.h>
#include <cmath> // for exp
#include <cstring> // for memcpy
#include <random>
...
...
@@ -22,6 +21,7 @@ limitations under the License. */
#include "gflags/gflags.h"
#include "glog/logging.h"
#include "gtest/gtest.h"
#include "paddle/fluid/platform/port.h"
#ifdef PADDLE_WITH_MKLML
#include "paddle/fluid/platform/dynload/mklml.h"
...
...
paddle/fluid/platform/cudnn_helper.h
浏览文件 @
be04d99f
...
...
@@ -62,7 +62,7 @@ inline const char* cudnnGetErrorString(cudnnStatus_t status) {
#define CUDNN_ENFORCE(condition) \
do { \
cudnnStatus_t status = condition;
\
auto status = condition;
\
if (UNLIKELY(status != CUDNN_STATUS_SUCCESS)) { \
PADDLE_THROW(::paddle::platform::cudnnGetErrorString(status)); \
} \
...
...
paddle/fluid/platform/dynload/cudnn.h
浏览文件 @
be04d99f
...
...
@@ -48,13 +48,13 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
#else
#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name)
\
struct DynLoad__##__name {
\
template <typename... Args>
\
inline
cudnnStatus_t operator()(Args... args) {
\
return ::__name(args...);
\
}
\
};
\
#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
inline
auto operator()(Args... args) {
\
return ::__name(args...); \
} \
}; \
extern DynLoad__##__name __name
#endif
...
...
paddle/fluid/platform/gpu_info.cc
浏览文件 @
be04d99f
...
...
@@ -19,7 +19,16 @@ limitations under the License. */
#include "gflags/gflags.h"
#include "paddle/fluid/platform/enforce.h"
DEFINE_double
(
fraction_of_gpu_memory_to_use
,
0.92
,
#ifndef _WIN32
const
float
fraction_of_gpu_memory_to_use
=
0.92
f
;
#else
// fraction_of_gpu_memory_to_use cannot be too high on windows,
// since the win32 graphic sub-system can occupy some GPU memory
// which may lead to insufficient memory left for paddle
const
float
fraction_of_gpu_memory_to_use
=
0.5
f
;
#endif
DEFINE_double
(
fraction_of_gpu_memory_to_use
,
fraction_of_gpu_memory_to_use
,
"Allocate a trunk of gpu memory that is this fraction of the "
"total gpu memory size. Future memory usage will be allocated "
"from the trunk. If the trunk doesn't have enough gpu memory, "
...
...
paddle/fluid/platform/stream_callback_manager.h
浏览文件 @
be04d99f
...
...
@@ -14,11 +14,11 @@
#pragma once
#include <ThreadPool.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <functional>
#include <memory>
#include "ThreadPool.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
...
...
paddle/fluid/pybind/protobuf.cc
浏览文件 @
be04d99f
...
...
@@ -30,11 +30,12 @@ namespace pybind11 {
namespace
detail
{
// Can be replaced by a generic lambda in C++14
struct
variant_caster_visitor
:
public
boost
::
static_visitor
<
handle
>
{
struct
__attribute__
((
visibility
(
"hidden"
)))
paddle_variant_caster_visitor
:
public
boost
::
static_visitor
<
handle
>
{
return_value_policy
policy
;
handle
parent
;
variant_caster_visitor
(
return_value_policy
policy
,
handle
parent
)
paddle_
variant_caster_visitor
(
return_value_policy
policy
,
handle
parent
)
:
policy
(
policy
),
parent
(
parent
)
{}
template
<
class
T
>
...
...
@@ -44,10 +45,10 @@ struct variant_caster_visitor : public boost::static_visitor<handle> {
};
template
<
class
Variant
>
struct
variant_caster
;
struct
paddle_
variant_caster
;
template
<
template
<
class
...
>
class
V
,
class
...
Ts
>
struct
variant_caster
<
V
<
Ts
...
>>
{
struct
paddle_
variant_caster
<
V
<
Ts
...
>>
{
using
Type
=
V
<
Ts
...
>
;
template
<
typename
T
>
...
...
@@ -90,7 +91,7 @@ struct variant_caster<V<Ts...>> {
static
handle
cast
(
Type
const
&
src
,
return_value_policy
policy
,
handle
parent
)
{
variant_caster_visitor
visitor
(
policy
,
parent
);
paddle_
variant_caster_visitor
visitor
(
policy
,
parent
);
return
boost
::
apply_visitor
(
visitor
,
src
);
}
...
...
@@ -101,7 +102,7 @@ struct variant_caster<V<Ts...>> {
// Add specialization for concrete variant type
template
<
class
...
Args
>
struct
type_caster
<
boost
::
variant
<
Args
...
>>
:
variant_caster
<
boost
::
variant
<
Args
...
>>
{};
:
paddle_
variant_caster
<
boost
::
variant
<
Args
...
>>
{};
}
// namespace detail
}
// namespace pybind11
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
be04d99f
...
...
@@ -86,12 +86,12 @@ bool IsCompiledWithDIST() {
#endif
}
PYBIND11_
PLUGIN
(
core
)
{
PYBIND11_
MODULE
(
core
,
m
)
{
// Not used, just make sure cpu_info.cc is linked.
paddle
::
platform
::
CpuTotalPhysicalMemory
();
paddle
::
memory
::
allocation
::
UseAllocatorStrategyGFlag
();
py
::
module
m
(
"core"
,
"C++ core of PaddlePaddle"
)
;
m
.
doc
()
=
"C++ core of PaddlePaddle"
;
// using framework in this function. Since it is inside a function, it will
// not cause namespace pollution.
...
...
@@ -907,7 +907,6 @@ All parameter, weight, gradient are variables in Paddle.
});
BindRecordIOWriter
(
&
m
);
return
m
.
ptr
();
}
}
// namespace pybind
}
// namespace paddle
paddle/fluid/pybind/tensor_py.h
浏览文件 @
be04d99f
...
...
@@ -21,7 +21,6 @@ limitations under the License. */
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/float16.h"
#include "pybind11/common.h"
#include "pybind11/numpy.h"
#include "pybind11/pybind11.h"
...
...
paddle/legacy/cuda/include/hl_warpctc_wrap.h
浏览文件 @
be04d99f
...
...
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef _WIN32
#ifndef HL_WARPCTC_WRAP_H_
#define HL_WARPCTC_WRAP_H_
#include "ctc.h"
#include "hl_base.h"
...
...
@@ -91,3 +91,4 @@ extern void hl_warpctc_get_workspace_size(const int* cpuLabelLengths,
size_t
*
bytes
);
#endif // HL_WARPCTC_WRAP_H_
#endif
paddle/legacy/cuda/src/hl_cuda_device.cc
浏览文件 @
be04d99f
...
...
@@ -132,11 +132,15 @@ inline pid_t gettid() {
uint64_t
tid
;
pthread_threadid_np
(
NULL
,
&
tid
);
#else
#ifndef _WIN32
#ifndef __NR_gettid
#define __NR_gettid 224
#endif
pid_t
tid
=
syscall
(
__NR_gettid
);
#endif
#else // _WIN32
pid_t
tid
=
_getpid
();
#endif // _WIN32
CHECK_NE
((
int
)
tid
,
-
1
);
return
tid
;
}
...
...
paddle/legacy/utils/ThreadLocal.h
浏览文件 @
be04d99f
...
...
@@ -14,10 +14,12 @@ limitations under the License. */
#pragma once
#ifndef _WIN32
#include <pthread.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
#endif
#include <sys/types.h>
#include <map>
#include <mutex>
#include <random>
...
...
paddle/legacy/utils/Util.h
浏览文件 @
be04d99f
...
...
@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once
#ifndef _WIN32
#include <sys/syscall.h> // for syscall()
#endif
#include <sys/types.h>
#include <algorithm>
#include <cmath>
...
...
@@ -40,6 +42,31 @@ inline int rand_r(unsigned int* seedp) {
}
#endif
#ifdef _WIN32
#define NOMINMAX // msvc max/min macro conflict with std::min/max
#include <windows.h>
template
<
typename
T
>
inline
int
__builtin_clz
(
const
T
&
value
)
{
DWORD
leadning_zero
=
0
;
if
(
_BitScanReverse
(
&
leadning_zero
,
value
))
{
return
static_cast
<
int
>
(
sizeof
(
T
)
*
8
-
leadning_zero
);
}
else
{
return
static_cast
<
int
>
(
0
);
}
}
inline
int
__builtin_clzl
(
const
unsigned
long
&
value
)
{
return
__builtin_clz
(
value
);
}
inline
int
__builtin_clzll
(
const
unsigned
long
long
&
value
)
{
return
__builtin_clz
(
value
);
}
#define pid_t int
#endif
/**
* Loop over the elements in a container
* TODO(yuyang18): It's this foreach useful? Why not use C++ 11 foreach,
...
...
paddle/scripts/paddle_build.sh
浏览文件 @
be04d99f
...
...
@@ -149,7 +149,7 @@ function cmake_gen() {
elif
[
"
$1
"
==
"cp37-cp37m"
]
;
then
export
LD_LIBRARY_PATH
=
/opt/_internal/cpython-3.7.0/lib/:
${
LD_LIBRARY_PATH
}
export
PATH
=
/opt/_internal/cpython-3.7.0/bin/:
${
PATH
}
export
PYTHON_FLAGS
=
"-DPYTHON_EXECUTABLE:FILEPATH=/opt/_internal/cpython-3.7.0/bin/python3
export
PYTHON_FLAGS
=
"-DPYTHON_EXECUTABLE:FILEPATH=/opt/_internal/cpython-3.7.0/bin/python3
.7
-DPYTHON_INCLUDE_DIR:PATH=/opt/_internal/cpython-3.7.0/include/python3.7m
-DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-3.7.0/lib/libpython3.so"
fi
...
...
paddle/testing/CMakeLists.txt
浏览文件 @
be04d99f
...
...
@@ -3,8 +3,10 @@
if
(
WITH_TESTING
)
add_library
(
paddle_test_main STATIC TestMain.cpp
)
add_dependencies
(
paddle_test_main paddle_proto
${
external_project_dependencies
}
)
add_library
(
paddle_test_util STATIC TestUtil.cpp
)
add_dependencies
(
paddle_test_util paddle_proto
${
external_project_dependencies
}
)
if
(
NOT WIN32
)
add_library
(
paddle_test_util STATIC TestUtil.cpp
)
add_dependencies
(
paddle_test_util paddle_proto
${
external_project_dependencies
}
)
endif
(
NOT WIN32
)
if
(
NOT MOBILE_INFERENCE
)
cc_library
(
paddle_gtest_main SRCS paddle_gtest_main.cc DEPS device_context memory gtest gflags
)
endif
()
...
...
python/paddle/fluid/metrics.py
浏览文件 @
be04d99f
...
...
@@ -46,8 +46,8 @@ def _is_numpy_(var):
def
_is_number_
(
var
):
return
isinstance
(
var
,
int
)
or
isinstance
(
var
,
float
)
or
(
isinstance
(
var
,
np
.
ndarray
)
and
var
.
shape
==
(
1
,
))
return
isinstance
(
var
,
int
)
or
isinstance
(
var
,
np
.
int64
)
or
isinstance
(
var
,
float
)
or
(
isinstance
(
var
,
np
.
ndarray
)
and
var
.
shape
==
(
1
,
))
def
_is_number_or_matrix_
(
var
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录