Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
05b7ee7e
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
05b7ee7e
编写于
11月 26, 2018
作者:
W
wopeizl
提交者:
GitHub
11月 26, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #14545 from wopeizl/windows/online
Windows/online
上级
10e3d677
6250be4b
变更
50
隐藏空白更改
内联
并排
Showing
50 changed file
with
142 addition
and
123 deletion
+142
-123
cmake/cuda.cmake
cmake/cuda.cmake
+4
-1
cmake/generic.cmake
cmake/generic.cmake
+9
-2
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+0
-5
paddle/fluid/framework/details/all_reduce_op_handle.cc
paddle/fluid/framework/details/all_reduce_op_handle.cc
+2
-2
paddle/fluid/framework/details/all_reduce_op_handle.h
paddle/fluid/framework/details/all_reduce_op_handle.h
+3
-3
paddle/fluid/framework/details/broadcast_op_handle.cc
paddle/fluid/framework/details/broadcast_op_handle.cc
+1
-1
paddle/fluid/framework/details/broadcast_op_handle.h
paddle/fluid/framework/details/broadcast_op_handle.h
+3
-3
paddle/fluid/framework/details/broadcast_op_handle_test.h
paddle/fluid/framework/details/broadcast_op_handle_test.h
+6
-6
paddle/fluid/framework/details/build_strategy.cc
paddle/fluid/framework/details/build_strategy.cc
+2
-2
paddle/fluid/framework/details/build_strategy.h
paddle/fluid/framework/details/build_strategy.h
+2
-2
paddle/fluid/framework/details/data_balance_op_handle.cc
paddle/fluid/framework/details/data_balance_op_handle.cc
+1
-1
paddle/fluid/framework/details/data_balance_op_handle.h
paddle/fluid/framework/details/data_balance_op_handle.h
+2
-2
paddle/fluid/framework/details/fused_broadcast_op_handle.h
paddle/fluid/framework/details/fused_broadcast_op_handle.h
+2
-2
paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc
...fluid/framework/details/fused_broadcast_op_handle_test.cc
+2
-2
paddle/fluid/framework/details/multi_devices_graph_pass.cc
paddle/fluid/framework/details/multi_devices_graph_pass.cc
+8
-8
paddle/fluid/framework/details/multi_devices_graph_pass.h
paddle/fluid/framework/details/multi_devices_graph_pass.h
+1
-1
paddle/fluid/framework/details/reduce_op_handle.cc
paddle/fluid/framework/details/reduce_op_handle.cc
+1
-1
paddle/fluid/framework/details/reduce_op_handle.h
paddle/fluid/framework/details/reduce_op_handle.h
+2
-2
paddle/fluid/framework/details/reduce_op_handle_test.cc
paddle/fluid/framework/details/reduce_op_handle_test.cc
+6
-6
paddle/fluid/framework/ir/is_test_pass_tester.cc
paddle/fluid/framework/ir/is_test_pass_tester.cc
+4
-1
paddle/fluid/framework/lod_tensor.cc
paddle/fluid/framework/lod_tensor.cc
+1
-16
paddle/fluid/framework/lod_tensor_test.cc
paddle/fluid/framework/lod_tensor_test.cc
+0
-2
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+4
-7
paddle/fluid/inference/analysis/analyzer_tester.cc
paddle/fluid/inference/analysis/analyzer_tester.cc
+2
-1
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+0
-4
paddle/fluid/inference/api/api_impl.cc
paddle/fluid/inference/api/api_impl.cc
+0
-4
paddle/fluid/inference/api/helper.h
paddle/fluid/inference/api/helper.h
+1
-4
paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc
paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc
+0
-1
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+0
-4
paddle/fluid/inference/tests/book/test_inference_nlp.cc
paddle/fluid/inference/tests/book/test_inference_nlp.cc
+0
-1
paddle/fluid/inference/tests/test_helper.h
paddle/fluid/inference/tests/test_helper.h
+1
-0
paddle/fluid/operators/beam_search_op_test.cc
paddle/fluid/operators/beam_search_op_test.cc
+2
-2
paddle/fluid/operators/distributed/grpc_client.cc
paddle/fluid/operators/distributed/grpc_client.cc
+1
-1
paddle/fluid/operators/distributed/grpc_serde.cc
paddle/fluid/operators/distributed/grpc_serde.cc
+1
-1
paddle/fluid/operators/distributed/grpc_serde.h
paddle/fluid/operators/distributed/grpc_serde.h
+2
-1
paddle/fluid/operators/distributed/sendrecvop_utils.cc
paddle/fluid/operators/distributed/sendrecvop_utils.cc
+1
-1
paddle/fluid/operators/distributed/sendrecvop_utils.h
paddle/fluid/operators/distributed/sendrecvop_utils.h
+1
-1
paddle/fluid/operators/math/cpu_vec_test.cc
paddle/fluid/operators/math/cpu_vec_test.cc
+1
-1
paddle/fluid/operators/math/im2col_test.cc
paddle/fluid/operators/math/im2col_test.cc
+1
-1
paddle/fluid/operators/math/jit_kernel_test.cc
paddle/fluid/operators/math/jit_kernel_test.cc
+1
-1
paddle/fluid/platform/cudnn_helper.h
paddle/fluid/platform/cudnn_helper.h
+1
-1
paddle/fluid/platform/dynload/cudnn.h
paddle/fluid/platform/dynload/cudnn.h
+7
-7
paddle/fluid/platform/gpu_info.cc
paddle/fluid/platform/gpu_info.cc
+10
-1
paddle/fluid/platform/stream_callback_manager.h
paddle/fluid/platform/stream_callback_manager.h
+1
-1
paddle/legacy/cuda/include/hl_warpctc_wrap.h
paddle/legacy/cuda/include/hl_warpctc_wrap.h
+2
-1
paddle/legacy/cuda/src/hl_cuda_device.cc
paddle/legacy/cuda/src/hl_cuda_device.cc
+4
-0
paddle/legacy/utils/ThreadLocal.h
paddle/legacy/utils/ThreadLocal.h
+3
-1
paddle/legacy/utils/Util.h
paddle/legacy/utils/Util.h
+27
-0
paddle/testing/CMakeLists.txt
paddle/testing/CMakeLists.txt
+4
-2
python/paddle/fluid/metrics.py
python/paddle/fluid/metrics.py
+2
-2
未找到文件。
cmake/cuda.cmake
浏览文件 @
05b7ee7e
...
@@ -199,8 +199,11 @@ elseif(CMAKE_BUILD_TYPE STREQUAL "MinSizeRel")
...
@@ -199,8 +199,11 @@ elseif(CMAKE_BUILD_TYPE STREQUAL "MinSizeRel")
list
(
APPEND CUDA_NVCC_FLAGS
${
CMAKE_CXX_FLAGS_RELEASE
}
)
list
(
APPEND CUDA_NVCC_FLAGS
${
CMAKE_CXX_FLAGS_RELEASE
}
)
endif
()
endif
()
else
(
NOT WIN32
)
else
(
NOT WIN32
)
list
(
APPEND CUDA_NVCC_FLAGS
"--compiler-options;/bigobj"
)
if
(
CMAKE_BUILD_TYPE STREQUAL
"Debug"
)
if
(
CMAKE_BUILD_TYPE STREQUAL
"Debug"
)
list
(
APPEND CUDA_NVCC_FLAGS
"-g -G"
)
list
(
APPEND CUDA_NVCC_FLAGS
"-g -G"
)
# match the cl's _ITERATOR_DEBUG_LEVEL
list
(
APPEND CUDA_NVCC_FLAGS
"-D_DEBUG"
)
elseif
(
CMAKE_BUILD_TYPE STREQUAL
"Release"
)
elseif
(
CMAKE_BUILD_TYPE STREQUAL
"Release"
)
list
(
APPEND CUDA_NVCC_FLAGS
"-O3 -DNDEBUG"
)
list
(
APPEND CUDA_NVCC_FLAGS
"-O3 -DNDEBUG"
)
else
()
else
()
...
...
cmake/generic.cmake
浏览文件 @
05b7ee7e
...
@@ -349,10 +349,17 @@ function(cc_test TARGET_NAME)
...
@@ -349,10 +349,17 @@ function(cc_test TARGET_NAME)
set
(
oneValueArgs
""
)
set
(
oneValueArgs
""
)
set
(
multiValueArgs SRCS DEPS ARGS
)
set
(
multiValueArgs SRCS DEPS ARGS
)
cmake_parse_arguments
(
cc_test
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
cmake_parse_arguments
(
cc_test
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
if
(
WIN32
)
list
(
APPEND win32_deps shlwapi
)
if
(
"
${
cc_test_DEPS
}
;"
MATCHES
"python;"
)
list
(
REMOVE_ITEM cc_test_DEPS python
)
list
(
APPEND win32_deps
${
PYTHON_LIBRARIES
}
)
endif
()
endif
(
WIN32
)
add_executable
(
${
TARGET_NAME
}
${
cc_test_SRCS
}
)
add_executable
(
${
TARGET_NAME
}
${
cc_test_SRCS
}
)
target_link_libraries
(
${
TARGET_NAME
}
${
cc_test_DEPS
}
paddle_gtest_main lod_tensor memory gtest gflags glog
)
target_link_libraries
(
${
TARGET_NAME
}
${
cc_test_DEPS
}
paddle_gtest_main lod_tensor memory gtest gflags glog
)
if
(
WIN32
)
if
(
WIN32
)
target_link_libraries
(
${
TARGET_NAME
}
shlwapi
)
target_link_libraries
(
${
TARGET_NAME
}
${
win32_deps
}
)
endif
(
WIN32
)
endif
(
WIN32
)
add_dependencies
(
${
TARGET_NAME
}
${
cc_test_DEPS
}
paddle_gtest_main lod_tensor memory gtest gflags glog
)
add_dependencies
(
${
TARGET_NAME
}
${
cc_test_DEPS
}
paddle_gtest_main lod_tensor memory gtest gflags glog
)
add_test
(
NAME
${
TARGET_NAME
}
add_test
(
NAME
${
TARGET_NAME
}
...
@@ -683,7 +690,7 @@ function(py_test TARGET_NAME)
...
@@ -683,7 +690,7 @@ function(py_test TARGET_NAME)
set
(
multiValueArgs SRCS DEPS ARGS ENVS
)
set
(
multiValueArgs SRCS DEPS ARGS ENVS
)
cmake_parse_arguments
(
py_test
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
cmake_parse_arguments
(
py_test
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
add_test
(
NAME
${
TARGET_NAME
}
add_test
(
NAME
${
TARGET_NAME
}
COMMAND env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true
COMMAND
${
CMAKE_COMMAND
}
-E
env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true
FLAGS_cpu_deterministic=true
FLAGS_cpu_deterministic=true
PYTHONPATH=
${
PADDLE_BINARY_DIR
}
/python
${
py_test_ENVS
}
PYTHONPATH=
${
PADDLE_BINARY_DIR
}
/python
${
py_test_ENVS
}
${
PYTHON_EXECUTABLE
}
-u
${
py_test_SRCS
}
${
py_test_ARGS
}
${
PYTHON_EXECUTABLE
}
-u
${
py_test_SRCS
}
${
py_test_ARGS
}
...
...
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
05b7ee7e
...
@@ -116,14 +116,9 @@ cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker)
...
@@ -116,14 +116,9 @@ cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker)
cc_library
(
op_info SRCS op_info.cc DEPS attribute framework_proto
)
cc_library
(
op_info SRCS op_info.cc DEPS attribute framework_proto
)
cc_library
(
shape_inference SRCS shape_inference.cc DEPS ddim attribute device_context
)
cc_library
(
shape_inference SRCS shape_inference.cc DEPS ddim attribute device_context
)
if
(
NOT WIN32
)
cc_library
(
transfer_scope_cache SRCS transfer_scope_cache.cc DEPS scope framework_proto device_context
)
cc_library
(
transfer_scope_cache SRCS transfer_scope_cache.cc DEPS scope framework_proto device_context
)
cc_library
(
operator SRCS operator.cc DEPS op_info device_context tensor scope glog
cc_library
(
operator SRCS operator.cc DEPS op_info device_context tensor scope glog
shape_inference data_transform lod_tensor profiler transfer_scope_cache
)
shape_inference data_transform lod_tensor profiler transfer_scope_cache
)
else
()
cc_library
(
operator SRCS operator.cc DEPS op_info device_context tensor scope glog
shape_inference data_transform lod_tensor
)
endif
(
NOT WIN32
)
cc_test
(
operator_test SRCS operator_test.cc DEPS operator op_registry device_context
)
cc_test
(
operator_test SRCS operator_test.cc DEPS operator op_registry device_context
)
...
...
paddle/fluid/framework/details/all_reduce_op_handle.cc
浏览文件 @
05b7ee7e
...
@@ -23,7 +23,7 @@ namespace paddle {
...
@@ -23,7 +23,7 @@ namespace paddle {
namespace
framework
{
namespace
framework
{
namespace
details
{
namespace
details
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
AllReduceOpHandle
::
AllReduceOpHandle
(
ir
::
Node
*
node
,
AllReduceOpHandle
::
AllReduceOpHandle
(
ir
::
Node
*
node
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
...
@@ -74,7 +74,7 @@ void AllReduceOpHandle::RunImpl() {
...
@@ -74,7 +74,7 @@ void AllReduceOpHandle::RunImpl() {
}
}
if
(
platform
::
is_gpu_place
(
lod_tensors
[
0
]
->
place
()))
{
if
(
platform
::
is_gpu_place
(
lod_tensors
[
0
]
->
place
()))
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
PADDLE_ENFORCE
(
nccl_ctxs_
,
"nccl_ctxs should not be nullptr."
);
PADDLE_ENFORCE
(
nccl_ctxs_
,
"nccl_ctxs should not be nullptr."
);
int
dtype
=
-
1
;
int
dtype
=
-
1
;
size_t
numel
=
0
;
size_t
numel
=
0
;
...
...
paddle/fluid/framework/details/all_reduce_op_handle.h
浏览文件 @
05b7ee7e
...
@@ -20,7 +20,7 @@
...
@@ -20,7 +20,7 @@
#include "paddle/fluid/framework/details/op_handle_base.h"
#include "paddle/fluid/framework/details/op_handle_base.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/scope.h"
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h"
#include "paddle/fluid/platform/nccl_helper.h"
#endif
#endif
...
@@ -29,7 +29,7 @@ namespace framework {
...
@@ -29,7 +29,7 @@ namespace framework {
namespace
details
{
namespace
details
{
struct
AllReduceOpHandle
:
public
OpHandleBase
{
struct
AllReduceOpHandle
:
public
OpHandleBase
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
AllReduceOpHandle
(
ir
::
Node
*
node
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
AllReduceOpHandle
(
ir
::
Node
*
node
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
platform
::
NCCLContextMap
*
ctxs
);
const
platform
::
NCCLContextMap
*
ctxs
);
...
@@ -49,7 +49,7 @@ struct AllReduceOpHandle : public OpHandleBase {
...
@@ -49,7 +49,7 @@ struct AllReduceOpHandle : public OpHandleBase {
private:
private:
std
::
vector
<
Scope
*>
local_scopes_
;
std
::
vector
<
Scope
*>
local_scopes_
;
std
::
vector
<
platform
::
Place
>
places_
;
std
::
vector
<
platform
::
Place
>
places_
;
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
const
platform
::
NCCLContextMap
*
nccl_ctxs_
;
const
platform
::
NCCLContextMap
*
nccl_ctxs_
;
#endif
#endif
};
};
...
...
paddle/fluid/framework/details/broadcast_op_handle.cc
浏览文件 @
05b7ee7e
...
@@ -82,7 +82,7 @@ void BroadcastOpHandle::BroadcastOneVar(
...
@@ -82,7 +82,7 @@ void BroadcastOpHandle::BroadcastOneVar(
});
});
}
}
}
else
{
}
else
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
VarHandle
*
out_handle
=
nullptr
;
VarHandle
*
out_handle
=
nullptr
;
int
root_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
in_tensor
.
place
()).
device
;
int
root_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
in_tensor
.
place
()).
device
;
std
::
vector
<
std
::
function
<
void
()
>>
broadcast_calls
;
std
::
vector
<
std
::
function
<
void
()
>>
broadcast_calls
;
...
...
paddle/fluid/framework/details/broadcast_op_handle.h
浏览文件 @
05b7ee7e
...
@@ -24,7 +24,7 @@
...
@@ -24,7 +24,7 @@
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/device_context.h"
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h"
#include "paddle/fluid/platform/nccl_helper.h"
#endif
#endif
...
@@ -34,7 +34,7 @@ namespace details {
...
@@ -34,7 +34,7 @@ namespace details {
struct
BroadcastOpHandle
:
public
OpHandleBase
{
struct
BroadcastOpHandle
:
public
OpHandleBase
{
public:
public:
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
BroadcastOpHandle
(
ir
::
Node
*
node
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
BroadcastOpHandle
(
ir
::
Node
*
node
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
platform
::
NCCLContextMap
*
nccl_ctxs
)
const
platform
::
NCCLContextMap
*
nccl_ctxs
)
...
@@ -68,7 +68,7 @@ struct BroadcastOpHandle : public OpHandleBase {
...
@@ -68,7 +68,7 @@ struct BroadcastOpHandle : public OpHandleBase {
std
::
vector
<
Scope
*>
local_scopes_
;
std
::
vector
<
Scope
*>
local_scopes_
;
std
::
vector
<
platform
::
Place
>
places_
;
std
::
vector
<
platform
::
Place
>
places_
;
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
const
platform
::
NCCLContextMap
*
nccl_ctxs_
;
const
platform
::
NCCLContextMap
*
nccl_ctxs_
;
#endif
#endif
...
...
paddle/fluid/framework/details/broadcast_op_handle_test.h
浏览文件 @
05b7ee7e
...
@@ -42,7 +42,7 @@ struct TestBroadcastOpHandle {
...
@@ -42,7 +42,7 @@ struct TestBroadcastOpHandle {
std
::
vector
<
std
::
unique_ptr
<
ir
::
Node
>>
nodes_
;
std
::
vector
<
std
::
unique_ptr
<
ir
::
Node
>>
nodes_
;
std
::
vector
<
p
::
Place
>
place_list_
;
std
::
vector
<
p
::
Place
>
place_list_
;
bool
use_gpu_
;
bool
use_gpu_
;
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
std
::
unique_ptr
<
platform
::
NCCLContextMap
>
nccl_ctxs_
;
std
::
unique_ptr
<
platform
::
NCCLContextMap
>
nccl_ctxs_
;
#endif
#endif
...
@@ -50,7 +50,7 @@ struct TestBroadcastOpHandle {
...
@@ -50,7 +50,7 @@ struct TestBroadcastOpHandle {
for
(
size_t
j
=
0
;
j
<
ctxs_
.
size
();
++
j
)
{
for
(
size_t
j
=
0
;
j
<
ctxs_
.
size
();
++
j
)
{
ctxs_
[
j
]
->
Wait
();
ctxs_
[
j
]
->
Wait
();
}
}
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
if
(
nccl_ctxs_
)
{
if
(
nccl_ctxs_
)
{
nccl_ctxs_
->
WaitAll
();
nccl_ctxs_
->
WaitAll
();
}
}
...
@@ -60,7 +60,7 @@ struct TestBroadcastOpHandle {
...
@@ -60,7 +60,7 @@ struct TestBroadcastOpHandle {
void
InitCtxOnGpu
(
bool
use_gpu
)
{
void
InitCtxOnGpu
(
bool
use_gpu
)
{
use_gpu_
=
use_gpu
;
use_gpu_
=
use_gpu
;
if
(
use_gpu_
)
{
if
(
use_gpu_
)
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
int
count
=
p
::
GetCUDADeviceCount
();
int
count
=
p
::
GetCUDADeviceCount
();
if
(
count
<=
1
)
{
if
(
count
<=
1
)
{
LOG
(
WARNING
)
<<
"Cannot test multi-gpu Broadcast, because the CUDA "
LOG
(
WARNING
)
<<
"Cannot test multi-gpu Broadcast, because the CUDA "
...
@@ -84,7 +84,7 @@ struct TestBroadcastOpHandle {
...
@@ -84,7 +84,7 @@ struct TestBroadcastOpHandle {
place_list_
.
push_back
(
p
);
place_list_
.
push_back
(
p
);
ctxs_
.
emplace_back
(
new
p
::
CPUDeviceContext
(
p
));
ctxs_
.
emplace_back
(
new
p
::
CPUDeviceContext
(
p
));
}
}
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
nccl_ctxs_
.
reset
(
nullptr
);
nccl_ctxs_
.
reset
(
nullptr
);
#endif
#endif
}
}
...
@@ -106,14 +106,14 @@ struct TestBroadcastOpHandle {
...
@@ -106,14 +106,14 @@ struct TestBroadcastOpHandle {
nodes_
.
emplace_back
(
nodes_
.
emplace_back
(
ir
::
CreateNodeForTest
(
"node0"
,
ir
::
Node
::
Type
::
kOperation
));
ir
::
CreateNodeForTest
(
"node0"
,
ir
::
Node
::
Type
::
kOperation
));
if
(
use_gpu_
)
{
if
(
use_gpu_
)
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
op_handle_
=
new
BroadcastOpHandle
(
nodes_
.
back
().
get
(),
local_scopes_
,
op_handle_
=
new
BroadcastOpHandle
(
nodes_
.
back
().
get
(),
local_scopes_
,
place_list_
,
nccl_ctxs_
.
get
());
place_list_
,
nccl_ctxs_
.
get
());
#else
#else
PADDLE_THROW
(
"CUDA is not support."
);
PADDLE_THROW
(
"CUDA is not support."
);
#endif
#endif
}
else
{
}
else
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
op_handle_
=
new
BroadcastOpHandle
(
nodes_
.
back
().
get
(),
local_scopes_
,
op_handle_
=
new
BroadcastOpHandle
(
nodes_
.
back
().
get
(),
local_scopes_
,
place_list_
,
nccl_ctxs_
.
get
());
place_list_
,
nccl_ctxs_
.
get
());
#else
#else
...
...
paddle/fluid/framework/details/build_strategy.cc
浏览文件 @
05b7ee7e
...
@@ -96,7 +96,7 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
...
@@ -96,7 +96,7 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
const
std
::
string
&
loss_var_name
,
const
std
::
string
&
loss_var_name
,
const
std
::
unordered_set
<
std
::
string
>
&
param_names
,
const
std
::
unordered_set
<
std
::
string
>
&
param_names
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
const
bool
use_cuda
,
platform
::
NCCLContextMap
*
nccl_ctxs
)
const
{
const
bool
use_cuda
,
platform
::
NCCLContextMap
*
nccl_ctxs
)
const
{
#else
#else
const
bool
use_cuda
)
const
{
const
bool
use_cuda
)
const
{
...
@@ -118,7 +118,7 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
...
@@ -118,7 +118,7 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
pass
->
Erase
(
"local_scopes"
);
pass
->
Erase
(
"local_scopes"
);
pass
->
SetNotOwned
<
const
std
::
vector
<
Scope
*>>
(
"local_scopes"
,
pass
->
SetNotOwned
<
const
std
::
vector
<
Scope
*>>
(
"local_scopes"
,
&
local_scopes
);
&
local_scopes
);
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
platform
::
NCCLContextMap
*
nctx
=
use_cuda
?
nccl_ctxs
:
nullptr
;
platform
::
NCCLContextMap
*
nctx
=
use_cuda
?
nccl_ctxs
:
nullptr
;
pass
->
Erase
(
"nccl_ctxs"
);
pass
->
Erase
(
"nccl_ctxs"
);
pass
->
SetNotOwned
<
platform
::
NCCLContextMap
>
(
"nccl_ctxs"
,
nctx
);
pass
->
SetNotOwned
<
platform
::
NCCLContextMap
>
(
"nccl_ctxs"
,
nctx
);
...
...
paddle/fluid/framework/details/build_strategy.h
浏览文件 @
05b7ee7e
...
@@ -23,7 +23,7 @@
...
@@ -23,7 +23,7 @@
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h"
#include "paddle/fluid/platform/nccl_helper.h"
#endif
#endif
...
@@ -98,7 +98,7 @@ struct BuildStrategy {
...
@@ -98,7 +98,7 @@ struct BuildStrategy {
const
std
::
string
&
loss_var_name
,
const
std
::
string
&
loss_var_name
,
const
std
::
unordered_set
<
std
::
string
>
&
param_names
,
const
std
::
unordered_set
<
std
::
string
>
&
param_names
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
const
bool
use_cuda
,
platform
::
NCCLContextMap
*
nccl_ctxs
)
const
;
const
bool
use_cuda
,
platform
::
NCCLContextMap
*
nccl_ctxs
)
const
;
#else
#else
const
bool
use_cuda
)
const
;
const
bool
use_cuda
)
const
;
...
...
paddle/fluid/framework/details/data_balance_op_handle.cc
浏览文件 @
05b7ee7e
...
@@ -20,7 +20,7 @@ namespace paddle {
...
@@ -20,7 +20,7 @@ namespace paddle {
namespace
framework
{
namespace
framework
{
namespace
details
{
namespace
details
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
DataBalanceOpHandle
::
DataBalanceOpHandle
(
DataBalanceOpHandle
::
DataBalanceOpHandle
(
ir
::
Node
*
node
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
ir
::
Node
*
node
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
...
...
paddle/fluid/framework/details/data_balance_op_handle.h
浏览文件 @
05b7ee7e
...
@@ -19,7 +19,7 @@
...
@@ -19,7 +19,7 @@
#include "paddle/fluid/framework/details/op_handle_base.h"
#include "paddle/fluid/framework/details/op_handle_base.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/scope.h"
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h"
#include "paddle/fluid/platform/nccl_helper.h"
#endif
#endif
...
@@ -29,7 +29,7 @@ namespace details {
...
@@ -29,7 +29,7 @@ namespace details {
struct
DataBalanceOpHandle
:
public
OpHandleBase
{
struct
DataBalanceOpHandle
:
public
OpHandleBase
{
public:
public:
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
DataBalanceOpHandle
(
ir
::
Node
*
node
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
DataBalanceOpHandle
(
ir
::
Node
*
node
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
platform
::
NCCLContextMap
*
ctxs
);
const
platform
::
NCCLContextMap
*
ctxs
);
...
...
paddle/fluid/framework/details/fused_broadcast_op_handle.h
浏览文件 @
05b7ee7e
...
@@ -25,7 +25,7 @@
...
@@ -25,7 +25,7 @@
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/device_context.h"
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h"
#include "paddle/fluid/platform/nccl_helper.h"
#endif
#endif
...
@@ -35,7 +35,7 @@ namespace details {
...
@@ -35,7 +35,7 @@ namespace details {
struct
FusedBroadcastOpHandle
:
public
BroadcastOpHandle
{
struct
FusedBroadcastOpHandle
:
public
BroadcastOpHandle
{
public:
public:
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
FusedBroadcastOpHandle
(
ir
::
Node
*
node
,
FusedBroadcastOpHandle
(
ir
::
Node
*
node
,
const
std
::
vector
<
Scope
*>
local_scopes
,
const
std
::
vector
<
Scope
*>
local_scopes
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
...
...
paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc
浏览文件 @
05b7ee7e
...
@@ -44,14 +44,14 @@ struct TestFusedBroadcastOpHandle : TestBroadcastOpHandle {
...
@@ -44,14 +44,14 @@ struct TestFusedBroadcastOpHandle : TestBroadcastOpHandle {
nodes_
.
emplace_back
(
nodes_
.
emplace_back
(
ir
::
CreateNodeForTest
(
"fused_broadcast"
,
ir
::
Node
::
Type
::
kOperation
));
ir
::
CreateNodeForTest
(
"fused_broadcast"
,
ir
::
Node
::
Type
::
kOperation
));
if
(
use_gpu_
)
{
if
(
use_gpu_
)
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
op_handle_
=
new
FusedBroadcastOpHandle
(
op_handle_
=
new
FusedBroadcastOpHandle
(
nodes_
.
back
().
get
(),
local_scopes_
,
place_list_
,
nccl_ctxs_
.
get
());
nodes_
.
back
().
get
(),
local_scopes_
,
place_list_
,
nccl_ctxs_
.
get
());
#else
#else
PADDLE_THROW
(
"CUDA is not supported."
);
PADDLE_THROW
(
"CUDA is not supported."
);
#endif
#endif
}
else
{
}
else
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
op_handle_
=
new
FusedBroadcastOpHandle
(
op_handle_
=
new
FusedBroadcastOpHandle
(
nodes_
.
back
().
get
(),
local_scopes_
,
place_list_
,
nccl_ctxs_
.
get
());
nodes_
.
back
().
get
(),
local_scopes_
,
place_list_
,
nccl_ctxs_
.
get
());
#else
#else
...
...
paddle/fluid/framework/details/multi_devices_graph_pass.cc
浏览文件 @
05b7ee7e
...
@@ -142,7 +142,7 @@ void MultiDevSSAGraphBuilder::Init() const {
...
@@ -142,7 +142,7 @@ void MultiDevSSAGraphBuilder::Init() const {
places_
=
Get
<
const
std
::
vector
<
platform
::
Place
>>
(
kPlaces
);
places_
=
Get
<
const
std
::
vector
<
platform
::
Place
>>
(
kPlaces
);
local_scopes_
=
Get
<
const
std
::
vector
<
Scope
*>>
(
kLocalScopes
);
local_scopes_
=
Get
<
const
std
::
vector
<
Scope
*>>
(
kLocalScopes
);
strategy_
=
Get
<
const
BuildStrategy
>
(
kStrategy
);
strategy_
=
Get
<
const
BuildStrategy
>
(
kStrategy
);
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
nccl_ctxs_
=
&
Get
<
platform
::
NCCLContextMap
>
(
"nccl_ctxs"
);
nccl_ctxs_
=
&
Get
<
platform
::
NCCLContextMap
>
(
"nccl_ctxs"
);
#endif
#endif
...
@@ -431,7 +431,7 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
...
@@ -431,7 +431,7 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
}
}
}
}
bool
use_gpu
=
false
;
bool
use_gpu
=
false
;
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
use_gpu
=
nccl_ctxs_
!=
nullptr
;
use_gpu
=
nccl_ctxs_
!=
nullptr
;
#endif
#endif
...
@@ -478,7 +478,7 @@ bool MultiDevSSAGraphBuilder::IsSparseGradient(const std::string &og) const {
...
@@ -478,7 +478,7 @@ bool MultiDevSSAGraphBuilder::IsSparseGradient(const std::string &og) const {
void
MultiDevSSAGraphBuilder
::
SetCommunicationContext
(
void
MultiDevSSAGraphBuilder
::
SetCommunicationContext
(
OpHandleBase
*
op_handle
,
const
platform
::
Place
&
p
)
const
{
OpHandleBase
*
op_handle
,
const
platform
::
Place
&
p
)
const
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
if
(
nccl_ctxs_
==
nullptr
)
{
if
(
nccl_ctxs_
==
nullptr
)
{
op_handle
->
SetDeviceContext
(
p
,
op_handle
->
SetDeviceContext
(
p
,
platform
::
DeviceContextPool
::
Instance
().
Get
(
p
));
platform
::
DeviceContextPool
::
Instance
().
Get
(
p
));
...
@@ -492,7 +492,7 @@ void MultiDevSSAGraphBuilder::SetCommunicationContext(
...
@@ -492,7 +492,7 @@ void MultiDevSSAGraphBuilder::SetCommunicationContext(
void
MultiDevSSAGraphBuilder
::
CreateBroadcastOp
(
ir
::
Graph
*
result
,
void
MultiDevSSAGraphBuilder
::
CreateBroadcastOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
p_name
,
const
std
::
string
&
p_name
,
size_t
src_dev_id
)
const
{
size_t
src_dev_id
)
const
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
auto
*
op_handle
=
new
BroadcastOpHandle
(
auto
*
op_handle
=
new
BroadcastOpHandle
(
result
->
CreateEmptyNode
(
"broadcast"
,
ir
::
Node
::
Type
::
kOperation
),
result
->
CreateEmptyNode
(
"broadcast"
,
ir
::
Node
::
Type
::
kOperation
),
local_scopes_
,
places_
,
nccl_ctxs_
);
local_scopes_
,
places_
,
nccl_ctxs_
);
...
@@ -522,7 +522,7 @@ void MultiDevSSAGraphBuilder::CreateBroadcastOp(ir::Graph *result,
...
@@ -522,7 +522,7 @@ void MultiDevSSAGraphBuilder::CreateBroadcastOp(ir::Graph *result,
void
MultiDevSSAGraphBuilder
::
CreateFusedBroadcastOp
(
void
MultiDevSSAGraphBuilder
::
CreateFusedBroadcastOp
(
ir
::
Graph
*
result
,
ir
::
Graph
*
result
,
const
std
::
vector
<
std
::
unordered_set
<
std
::
string
>>
&
bcast_varnames
)
const
{
const
std
::
vector
<
std
::
unordered_set
<
std
::
string
>>
&
bcast_varnames
)
const
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
auto
*
op_handle
=
new
FusedBroadcastOpHandle
(
auto
*
op_handle
=
new
FusedBroadcastOpHandle
(
result
->
CreateEmptyNode
(
"fused_broadcast"
,
ir
::
Node
::
Type
::
kOperation
),
result
->
CreateEmptyNode
(
"fused_broadcast"
,
ir
::
Node
::
Type
::
kOperation
),
local_scopes_
,
places_
,
nccl_ctxs_
);
local_scopes_
,
places_
,
nccl_ctxs_
);
...
@@ -568,7 +568,7 @@ void MultiDevSSAGraphBuilder::CreateComputationalOp(ir::Graph *result,
...
@@ -568,7 +568,7 @@ void MultiDevSSAGraphBuilder::CreateComputationalOp(ir::Graph *result,
void
MultiDevSSAGraphBuilder
::
InsertAllReduceOp
(
ir
::
Graph
*
result
,
void
MultiDevSSAGraphBuilder
::
InsertAllReduceOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
og
)
const
{
const
std
::
string
&
og
)
const
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
result
->
Get
<
GraphOps
>
(
kGraphOps
).
emplace_back
(
new
AllReduceOpHandle
(
result
->
Get
<
GraphOps
>
(
kGraphOps
).
emplace_back
(
new
AllReduceOpHandle
(
result
->
CreateEmptyNode
(
"allreduce"
,
ir
::
Node
::
Type
::
kOperation
),
result
->
CreateEmptyNode
(
"allreduce"
,
ir
::
Node
::
Type
::
kOperation
),
local_scopes_
,
places_
,
nccl_ctxs_
));
local_scopes_
,
places_
,
nccl_ctxs_
));
...
@@ -597,7 +597,7 @@ void MultiDevSSAGraphBuilder::InsertAllReduceOp(ir::Graph *result,
...
@@ -597,7 +597,7 @@ void MultiDevSSAGraphBuilder::InsertAllReduceOp(ir::Graph *result,
void
MultiDevSSAGraphBuilder
::
InsertDataBalanceOp
(
void
MultiDevSSAGraphBuilder
::
InsertDataBalanceOp
(
ir
::
Graph
*
result
,
const
std
::
vector
<
std
::
string
>
&
datas
)
const
{
ir
::
Graph
*
result
,
const
std
::
vector
<
std
::
string
>
&
datas
)
const
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
result
->
Get
<
GraphOps
>
(
kGraphOps
).
emplace_back
(
new
DataBalanceOpHandle
(
result
->
Get
<
GraphOps
>
(
kGraphOps
).
emplace_back
(
new
DataBalanceOpHandle
(
result
->
CreateEmptyNode
(
"data_balance"
,
ir
::
Node
::
Type
::
kOperation
),
result
->
CreateEmptyNode
(
"data_balance"
,
ir
::
Node
::
Type
::
kOperation
),
local_scopes_
,
places_
,
nccl_ctxs_
));
local_scopes_
,
places_
,
nccl_ctxs_
));
...
@@ -694,7 +694,7 @@ void MultiDevSSAGraphBuilder::CreateComputationalOps(ir::Graph *result,
...
@@ -694,7 +694,7 @@ void MultiDevSSAGraphBuilder::CreateComputationalOps(ir::Graph *result,
VarHandle
*
MultiDevSSAGraphBuilder
::
CreateReduceOp
(
ir
::
Graph
*
result
,
VarHandle
*
MultiDevSSAGraphBuilder
::
CreateReduceOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
og
,
const
std
::
string
&
og
,
int
dst_dev_id
)
const
{
int
dst_dev_id
)
const
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
result
->
Get
<
GraphOps
>
(
kGraphOps
).
emplace_back
(
new
ReduceOpHandle
(
result
->
Get
<
GraphOps
>
(
kGraphOps
).
emplace_back
(
new
ReduceOpHandle
(
result
->
CreateEmptyNode
(
"reduce"
,
ir
::
Node
::
Type
::
kOperation
),
result
->
CreateEmptyNode
(
"reduce"
,
ir
::
Node
::
Type
::
kOperation
),
local_scopes_
,
places_
,
nccl_ctxs_
));
local_scopes_
,
places_
,
nccl_ctxs_
));
...
...
paddle/fluid/framework/details/multi_devices_graph_pass.h
浏览文件 @
05b7ee7e
...
@@ -40,7 +40,7 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
...
@@ -40,7 +40,7 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
size_t
device_id
)
const
;
size_t
device_id
)
const
;
void
Init
()
const
;
void
Init
()
const
;
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
mutable
platform
::
NCCLContextMap
*
nccl_ctxs_
;
mutable
platform
::
NCCLContextMap
*
nccl_ctxs_
;
#endif
#endif
...
...
paddle/fluid/framework/details/reduce_op_handle.cc
浏览文件 @
05b7ee7e
...
@@ -125,7 +125,7 @@ void ReduceOpHandle::RunImpl() {
...
@@ -125,7 +125,7 @@ void ReduceOpHandle::RunImpl() {
}
}
});
});
}
else
if
(
paddle
::
platform
::
is_gpu_place
(
lod_tensors
[
0
]
->
place
()))
{
}
else
if
(
paddle
::
platform
::
is_gpu_place
(
lod_tensors
[
0
]
->
place
()))
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
auto
pre_in
=
pre_in_var
->
Get
<
framework
::
LoDTensor
>
();
auto
pre_in
=
pre_in_var
->
Get
<
framework
::
LoDTensor
>
();
VariableVisitor
::
ShareDimsAndLoD
(
*
pre_in_var
,
out_var
);
VariableVisitor
::
ShareDimsAndLoD
(
*
pre_in_var
,
out_var
);
VariableVisitor
::
GetMutableTensor
(
out_var
).
mutable_data
(
VariableVisitor
::
GetMutableTensor
(
out_var
).
mutable_data
(
...
...
paddle/fluid/framework/details/reduce_op_handle.h
浏览文件 @
05b7ee7e
...
@@ -23,7 +23,7 @@
...
@@ -23,7 +23,7 @@
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/device_context.h"
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h"
#include "paddle/fluid/platform/nccl_helper.h"
#endif
#endif
...
@@ -35,7 +35,7 @@ struct ReduceOpHandle : public OpHandleBase {
...
@@ -35,7 +35,7 @@ struct ReduceOpHandle : public OpHandleBase {
std
::
vector
<
Scope
*>
local_scopes_
;
std
::
vector
<
Scope
*>
local_scopes_
;
std
::
vector
<
platform
::
Place
>
places_
;
std
::
vector
<
platform
::
Place
>
places_
;
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
const
platform
::
NCCLContextMap
*
nccl_ctxs_
;
const
platform
::
NCCLContextMap
*
nccl_ctxs_
;
ReduceOpHandle
(
ir
::
Node
*
node
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
ReduceOpHandle
(
ir
::
Node
*
node
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
...
...
paddle/fluid/framework/details/reduce_op_handle_test.cc
浏览文件 @
05b7ee7e
...
@@ -35,7 +35,7 @@ struct TestReduceOpHandle {
...
@@ -35,7 +35,7 @@ struct TestReduceOpHandle {
std
::
vector
<
p
::
Place
>
gpu_list_
;
std
::
vector
<
p
::
Place
>
gpu_list_
;
std
::
vector
<
std
::
unique_ptr
<
p
::
DeviceContext
>>
ctxs_
;
std
::
vector
<
std
::
unique_ptr
<
p
::
DeviceContext
>>
ctxs_
;
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
std
::
unique_ptr
<
platform
::
NCCLContextMap
>
nccl_ctxs_
;
std
::
unique_ptr
<
platform
::
NCCLContextMap
>
nccl_ctxs_
;
#endif
#endif
...
@@ -43,7 +43,7 @@ struct TestReduceOpHandle {
...
@@ -43,7 +43,7 @@ struct TestReduceOpHandle {
for
(
size_t
j
=
0
;
j
<
ctxs_
.
size
();
++
j
)
{
for
(
size_t
j
=
0
;
j
<
ctxs_
.
size
();
++
j
)
{
ctxs_
[
j
]
->
Wait
();
ctxs_
[
j
]
->
Wait
();
}
}
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
if
(
nccl_ctxs_
)
{
if
(
nccl_ctxs_
)
{
nccl_ctxs_
->
WaitAll
();
nccl_ctxs_
->
WaitAll
();
}
}
...
@@ -53,7 +53,7 @@ struct TestReduceOpHandle {
...
@@ -53,7 +53,7 @@ struct TestReduceOpHandle {
void
InitCtxOnGpu
(
bool
use_gpu
)
{
void
InitCtxOnGpu
(
bool
use_gpu
)
{
use_gpu_
=
use_gpu
;
use_gpu_
=
use_gpu
;
if
(
use_gpu
)
{
if
(
use_gpu
)
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
int
count
=
p
::
GetCUDADeviceCount
();
int
count
=
p
::
GetCUDADeviceCount
();
if
(
count
<=
1
)
{
if
(
count
<=
1
)
{
LOG
(
WARNING
)
<<
"Cannot test multi-gpu Broadcast, because the CUDA "
LOG
(
WARNING
)
<<
"Cannot test multi-gpu Broadcast, because the CUDA "
...
@@ -77,7 +77,7 @@ struct TestReduceOpHandle {
...
@@ -77,7 +77,7 @@ struct TestReduceOpHandle {
gpu_list_
.
push_back
(
p
);
gpu_list_
.
push_back
(
p
);
ctxs_
.
emplace_back
(
new
p
::
CPUDeviceContext
(
p
));
ctxs_
.
emplace_back
(
new
p
::
CPUDeviceContext
(
p
));
}
}
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
nccl_ctxs_
.
reset
(
nullptr
);
nccl_ctxs_
.
reset
(
nullptr
);
#endif
#endif
}
}
...
@@ -99,14 +99,14 @@ struct TestReduceOpHandle {
...
@@ -99,14 +99,14 @@ struct TestReduceOpHandle {
nodes
.
emplace_back
(
new
ir
::
Node
(
"node"
));
nodes
.
emplace_back
(
new
ir
::
Node
(
"node"
));
if
(
use_gpu_
)
{
if
(
use_gpu_
)
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
op_handle_
.
reset
(
new
ReduceOpHandle
(
nodes
.
back
().
get
(),
local_scopes_
,
op_handle_
.
reset
(
new
ReduceOpHandle
(
nodes
.
back
().
get
(),
local_scopes_
,
gpu_list_
,
nccl_ctxs_
.
get
()));
gpu_list_
,
nccl_ctxs_
.
get
()));
#else
#else
PADDLE_THROW
(
"CUDA is not support."
);
PADDLE_THROW
(
"CUDA is not support."
);
#endif
#endif
}
else
{
}
else
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
op_handle_
.
reset
(
new
ReduceOpHandle
(
nodes
.
back
().
get
(),
local_scopes_
,
op_handle_
.
reset
(
new
ReduceOpHandle
(
nodes
.
back
().
get
(),
local_scopes_
,
gpu_list_
,
nccl_ctxs_
.
get
()));
gpu_list_
,
nccl_ctxs_
.
get
()));
#else
#else
...
...
paddle/fluid/framework/ir/is_test_pass_tester.cc
浏览文件 @
05b7ee7e
...
@@ -15,7 +15,10 @@
...
@@ -15,7 +15,10 @@
#include "paddle/fluid/framework/ir/is_test_pass.h"
#include "paddle/fluid/framework/ir/is_test_pass.h"
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#ifdef _WIN32
#undef FALSE
#undef TRUE
#endif
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
namespace
ir
{
namespace
ir
{
...
...
paddle/fluid/framework/lod_tensor.cc
浏览文件 @
05b7ee7e
...
@@ -26,10 +26,8 @@ limitations under the License. */
...
@@ -26,10 +26,8 @@ limitations under the License. */
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/memory/memory.h"
#include "paddle/fluid/memory/memory.h"
#if !defined(_WIN32)
#include "paddle/fluid/recordio/scanner.h"
#include "paddle/fluid/recordio/scanner.h"
#include "paddle/fluid/recordio/writer.h"
#include "paddle/fluid/recordio/writer.h"
#endif // _WIN32
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
@@ -305,7 +303,6 @@ void DeserializeFromStream(std::istream &is, LoDTensor *tensor,
...
@@ -305,7 +303,6 @@ void DeserializeFromStream(std::istream &is, LoDTensor *tensor,
TensorFromStream
(
is
,
static_cast
<
Tensor
*>
(
tensor
),
dev_ctx
);
TensorFromStream
(
is
,
static_cast
<
Tensor
*>
(
tensor
),
dev_ctx
);
}
}
#if !defined(_WIN32)
void
WriteToRecordIO
(
recordio
::
Writer
*
writer
,
void
WriteToRecordIO
(
recordio
::
Writer
*
writer
,
const
std
::
vector
<
LoDTensor
>
&
tensor
,
const
std
::
vector
<
LoDTensor
>
&
tensor
,
const
platform
::
DeviceContext
&
dev_ctx
)
{
const
platform
::
DeviceContext
&
dev_ctx
)
{
...
@@ -335,19 +332,7 @@ bool ReadFromRecordIO(recordio::Scanner *scanner,
...
@@ -335,19 +332,7 @@ bool ReadFromRecordIO(recordio::Scanner *scanner,
return
true
;
return
true
;
}
}
#else
class
Writer
{};
class
Scanner
{};
void
WriteToRecordIO
(
recordio
::
Writer
*
writer
,
const
std
::
vector
<
LoDTensor
>
&
tensor
,
const
platform
::
DeviceContext
&
dev_ctx
)
{}
bool
ReadFromRecordIO
(
recordio
::
Scanner
*
scanner
,
const
platform
::
DeviceContext
&
dev_ctx
,
std
::
vector
<
LoDTensor
>
*
result_ptr
)
{
PADDLE_ENFORCE
(
"windows didn't supported recordio!."
);
return
true
;
}
#endif // _WIN32
std
::
vector
<
LoDTensor
>
LoDTensor
::
SplitLoDTensor
(
std
::
vector
<
LoDTensor
>
LoDTensor
::
SplitLoDTensor
(
const
std
::
vector
<
platform
::
Place
>
places
)
const
{
const
std
::
vector
<
platform
::
Place
>
places
)
const
{
check_memory_size
();
check_memory_size
();
...
...
paddle/fluid/framework/lod_tensor_test.cc
浏览文件 @
05b7ee7e
...
@@ -274,7 +274,6 @@ TEST(LoD, ConvertToOffsetBasedLoD) {
...
@@ -274,7 +274,6 @@ TEST(LoD, ConvertToOffsetBasedLoD) {
EXPECT_EQ
(
offset_lod
,
expected
);
EXPECT_EQ
(
offset_lod
,
expected
);
}
}
#if !defined(_WIN32)
template
<
typename
T
>
template
<
typename
T
>
static
void
TestRecordIO
()
{
static
void
TestRecordIO
()
{
LoDTensor
tensor
;
LoDTensor
tensor
;
...
@@ -321,7 +320,6 @@ TEST(LoDTensor, RecordIO) {
...
@@ -321,7 +320,6 @@ TEST(LoDTensor, RecordIO) {
TestRecordIO
<
float
>
();
TestRecordIO
<
float
>
();
TestRecordIO
<
double
>
();
TestRecordIO
<
double
>
();
}
}
#endif // !defined(_WIN32)
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/fluid/framework/operator.cc
浏览文件 @
05b7ee7e
...
@@ -149,17 +149,14 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
...
@@ -149,17 +149,14 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
#endif
#endif
}
}
// The profile has a process-wide mutex, results in serious performance issue
// The profile has a process-wide mutex, results in serious performance issue
// in concurrency scenerio. Here use an `if` to fix this issue.
// in concurrency scenerio. Here use an `if` to fix this issue.
// Please not remove the `if`, ask @Superjomn if there are any concern.
// Please not remove the `if`, ask @Superjomn if there are any concern.
#ifndef _WIN32
if
(
platform
::
IsProfileEnabled
())
{
if
(
platform
::
IsProfileEnabled
())
{
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
RecordEvent
record_event
(
Type
(),
pool
.
Get
(
place
));
platform
::
RecordEvent
record_event
(
Type
(),
pool
.
Get
(
place
));
RunImpl
(
scope
,
place
);
RunImpl
(
scope
,
place
);
}
else
}
else
{
#endif
{
RunImpl
(
scope
,
place
);
RunImpl
(
scope
,
place
);
}
}
VLOG
(
30
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
VLOG
(
30
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
...
...
paddle/fluid/inference/analysis/analyzer_tester.cc
浏览文件 @
05b7ee7e
...
@@ -19,6 +19,7 @@
...
@@ -19,6 +19,7 @@
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/platform/port.h"
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
...
@@ -75,7 +76,7 @@ void TestWord2vecPrediction(const std::string& model_path) {
...
@@ -75,7 +76,7 @@ void TestWord2vecPrediction(const std::string& model_path) {
0.000932706
};
0.000932706
};
const
size_t
num_elements
=
outputs
.
front
().
data
.
length
()
/
sizeof
(
float
);
const
size_t
num_elements
=
outputs
.
front
().
data
.
length
()
/
sizeof
(
float
);
// The outputs' buffers are in CPU memory.
// The outputs' buffers are in CPU memory.
for
(
size_t
i
=
0
;
i
<
std
::
min
(
5UL
,
num_elements
);
i
++
)
{
for
(
size_t
i
=
0
;
i
<
std
::
min
(
(
size_t
)
5UL
,
num_elements
);
i
++
)
{
LOG
(
INFO
)
<<
"data: "
LOG
(
INFO
)
<<
"data: "
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())[
i
];
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())[
i
];
PADDLE_ENFORCE
(
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())[
i
],
PADDLE_ENFORCE
(
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())[
i
],
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
05b7ee7e
...
@@ -56,7 +56,6 @@ bool AnalysisPredictor::Init(
...
@@ -56,7 +56,6 @@ bool AnalysisPredictor::Init(
const
std
::
shared_ptr
<
framework
::
Scope
>
&
parent_scope
,
const
std
::
shared_ptr
<
framework
::
Scope
>
&
parent_scope
,
const
std
::
shared_ptr
<
framework
::
ProgramDesc
>
&
program
)
{
const
std
::
shared_ptr
<
framework
::
ProgramDesc
>
&
program
)
{
VLOG
(
30
)
<<
"Predictor::init()"
;
VLOG
(
30
)
<<
"Predictor::init()"
;
#if !defined(_WIN32)
if
(
FLAGS_profile
)
{
if
(
FLAGS_profile
)
{
LOG
(
WARNING
)
<<
"Profiler is actived, might affect the performance"
;
LOG
(
WARNING
)
<<
"Profiler is actived, might affect the performance"
;
LOG
(
INFO
)
<<
"You can turn off by set gflags '-profile false'"
;
LOG
(
INFO
)
<<
"You can turn off by set gflags '-profile false'"
;
...
@@ -64,7 +63,6 @@ bool AnalysisPredictor::Init(
...
@@ -64,7 +63,6 @@ bool AnalysisPredictor::Init(
:
platform
::
ProfilerState
::
kCPU
;
:
platform
::
ProfilerState
::
kCPU
;
platform
::
EnableProfiler
(
tracking_device
);
platform
::
EnableProfiler
(
tracking_device
);
}
}
#endif
// no matter with or without MKLDNN
// no matter with or without MKLDNN
paddle
::
platform
::
SetNumThreads
(
config_
.
cpu_math_library_num_threads
());
paddle
::
platform
::
SetNumThreads
(
config_
.
cpu_math_library_num_threads
());
...
@@ -520,12 +518,10 @@ bool AnalysisPredictor::LoadParameters() {
...
@@ -520,12 +518,10 @@ bool AnalysisPredictor::LoadParameters() {
}
}
AnalysisPredictor
::~
AnalysisPredictor
()
{
AnalysisPredictor
::~
AnalysisPredictor
()
{
#if !defined(_WIN32)
if
(
FLAGS_profile
)
{
if
(
FLAGS_profile
)
{
platform
::
DisableProfiler
(
platform
::
EventSortingKey
::
kTotal
,
platform
::
DisableProfiler
(
platform
::
EventSortingKey
::
kTotal
,
"./profile.log"
);
"./profile.log"
);
}
}
#endif
if
(
sub_scope_
)
{
if
(
sub_scope_
)
{
scope_
->
DeleteScope
(
sub_scope_
);
scope_
->
DeleteScope
(
sub_scope_
);
}
}
...
...
paddle/fluid/inference/api/api_impl.cc
浏览文件 @
05b7ee7e
...
@@ -64,7 +64,6 @@ void NativePaddlePredictor::PrepareFeedFetch() {
...
@@ -64,7 +64,6 @@ void NativePaddlePredictor::PrepareFeedFetch() {
bool
NativePaddlePredictor
::
Init
(
bool
NativePaddlePredictor
::
Init
(
std
::
shared_ptr
<
framework
::
Scope
>
parent_scope
)
{
std
::
shared_ptr
<
framework
::
Scope
>
parent_scope
)
{
VLOG
(
3
)
<<
"Predictor::init()"
;
VLOG
(
3
)
<<
"Predictor::init()"
;
#if !defined(_WIN32)
if
(
FLAGS_profile
)
{
if
(
FLAGS_profile
)
{
LOG
(
WARNING
)
<<
"Profiler is actived, might affect the performance"
;
LOG
(
WARNING
)
<<
"Profiler is actived, might affect the performance"
;
LOG
(
INFO
)
<<
"You can turn off by set gflags '-profile false'"
;
LOG
(
INFO
)
<<
"You can turn off by set gflags '-profile false'"
;
...
@@ -73,7 +72,6 @@ bool NativePaddlePredictor::Init(
...
@@ -73,7 +72,6 @@ bool NativePaddlePredictor::Init(
:
platform
::
ProfilerState
::
kCPU
;
:
platform
::
ProfilerState
::
kCPU
;
platform
::
EnableProfiler
(
tracking_device
);
platform
::
EnableProfiler
(
tracking_device
);
}
}
#endif
// no matter with or without MKLDNN
// no matter with or without MKLDNN
paddle
::
platform
::
SetNumThreads
(
config_
.
cpu_math_library_num_threads
());
paddle
::
platform
::
SetNumThreads
(
config_
.
cpu_math_library_num_threads
());
...
@@ -121,12 +119,10 @@ bool NativePaddlePredictor::Init(
...
@@ -121,12 +119,10 @@ bool NativePaddlePredictor::Init(
}
}
NativePaddlePredictor
::~
NativePaddlePredictor
()
{
NativePaddlePredictor
::~
NativePaddlePredictor
()
{
#if !defined(_WIN32)
if
(
FLAGS_profile
)
{
if
(
FLAGS_profile
)
{
platform
::
DisableProfiler
(
platform
::
EventSortingKey
::
kTotal
,
platform
::
DisableProfiler
(
platform
::
EventSortingKey
::
kTotal
,
"./profile.log"
);
"./profile.log"
);
}
}
#endif
if
(
sub_scope_
)
{
if
(
sub_scope_
)
{
scope_
->
DeleteScope
(
sub_scope_
);
scope_
->
DeleteScope
(
sub_scope_
);
}
}
...
...
paddle/fluid/inference/api/helper.h
浏览文件 @
05b7ee7e
...
@@ -15,10 +15,6 @@
...
@@ -15,10 +15,6 @@
#pragma once
#pragma once
#include <glog/logging.h>
#include <glog/logging.h>
#if !defined(_WIN32)
#include <sys/time.h>
#else
#endif
#include <algorithm>
#include <algorithm>
#include <chrono> // NOLINT
#include <chrono> // NOLINT
...
@@ -28,6 +24,7 @@
...
@@ -28,6 +24,7 @@
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/string/printf.h"
namespace
paddle
{
namespace
paddle
{
...
...
paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc
浏览文件 @
05b7ee7e
...
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
...
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include <gflags/gflags.h>
#include <gflags/gflags.h>
#include <sys/time.h>
#include <time.h>
#include <time.h>
#include <algorithm>
#include <algorithm>
#include <fstream>
#include <fstream>
...
...
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
05b7ee7e
...
@@ -178,11 +178,9 @@ void TestOneThreadPrediction(
...
@@ -178,11 +178,9 @@ void TestOneThreadPrediction(
warmup_timer
.
tic
();
warmup_timer
.
tic
();
predictor
->
Run
(
inputs
[
0
],
outputs
,
batch_size
);
predictor
->
Run
(
inputs
[
0
],
outputs
,
batch_size
);
PrintTime
(
batch_size
,
1
,
1
,
0
,
warmup_timer
.
toc
(),
1
);
PrintTime
(
batch_size
,
1
,
1
,
0
,
warmup_timer
.
toc
(),
1
);
#if !defined(_WIN32)
if
(
FLAGS_profile
)
{
if
(
FLAGS_profile
)
{
paddle
::
platform
::
ResetProfiler
();
paddle
::
platform
::
ResetProfiler
();
}
}
#endif
}
}
LOG
(
INFO
)
<<
"Run "
<<
num_times
<<
" times..."
;
LOG
(
INFO
)
<<
"Run "
<<
num_times
<<
" times..."
;
...
@@ -232,11 +230,9 @@ void TestMultiThreadPrediction(
...
@@ -232,11 +230,9 @@ void TestMultiThreadPrediction(
warmup_timer
.
tic
();
warmup_timer
.
tic
();
predictor
->
Run
(
inputs
[
0
],
outputs
,
batch_size
);
predictor
->
Run
(
inputs
[
0
],
outputs
,
batch_size
);
PrintTime
(
batch_size
,
1
,
num_threads
,
tid
,
warmup_timer
.
toc
(),
1
);
PrintTime
(
batch_size
,
1
,
num_threads
,
tid
,
warmup_timer
.
toc
(),
1
);
#if !defined(_WIN32)
if
(
FLAGS_profile
)
{
if
(
FLAGS_profile
)
{
paddle
::
platform
::
ResetProfiler
();
paddle
::
platform
::
ResetProfiler
();
}
}
#endif
}
}
LOG
(
INFO
)
<<
"Thread "
<<
tid
<<
" run "
<<
num_times
<<
" times..."
;
LOG
(
INFO
)
<<
"Thread "
<<
tid
<<
" run "
<<
num_times
<<
" times..."
;
...
...
paddle/fluid/inference/tests/book/test_inference_nlp.cc
浏览文件 @
05b7ee7e
...
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include <sys/time.h>
#include <time.h>
#include <time.h>
#include <fstream>
#include <fstream>
#include <thread> // NOLINT
#include <thread> // NOLINT
...
...
paddle/fluid/inference/tests/test_helper.h
浏览文件 @
05b7ee7e
...
@@ -20,6 +20,7 @@ limitations under the License. */
...
@@ -20,6 +20,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/io.h"
#include "paddle/fluid/inference/io.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
DECLARE_bool
(
use_mkldnn
);
DECLARE_bool
(
use_mkldnn
);
...
...
paddle/fluid/operators/beam_search_op_test.cc
浏览文件 @
05b7ee7e
...
@@ -46,7 +46,7 @@ void CreateInput(LoDTensor* ids, LoDTensor* scores) {
...
@@ -46,7 +46,7 @@ void CreateInput(LoDTensor* ids, LoDTensor* scores) {
auto
*
scores_data
=
scores
->
mutable_data
<
float
>
(
place
);
auto
*
scores_data
=
scores
->
mutable_data
<
float
>
(
place
);
vector
<
int64_t
>
_ids
({
4
,
2
,
5
,
2
,
1
,
3
,
3
,
5
,
2
,
8
,
2
,
1
});
vector
<
int64_t
>
_ids
({
4
,
2
,
5
,
2
,
1
,
3
,
3
,
5
,
2
,
8
,
2
,
1
});
vector
<
float
>
_scores
(
vector
<
float
>
_scores
(
{
0.5
,
0.3
,
0.2
,
0.6
,
0.3
,
0.1
,
0.9
,
0.5
,
0.1
,
0.7
,
0.5
,
0.1
});
{
0.5
f
,
0.3
f
,
0.2
f
,
0.6
f
,
0.3
f
,
0.1
f
,
0.9
f
,
0.5
f
,
0.1
f
,
0.7
f
,
0.5
f
,
0.1
f
});
for
(
int
i
=
0
;
i
<
12
;
i
++
)
{
for
(
int
i
=
0
;
i
<
12
;
i
++
)
{
ids_data
[
i
]
=
_ids
[
i
];
ids_data
[
i
]
=
_ids
[
i
];
...
@@ -80,7 +80,7 @@ TEST(DISABLED_beam_search_op, run) {
...
@@ -80,7 +80,7 @@ TEST(DISABLED_beam_search_op, run) {
ASSERT_EQ
(
sids
.
lod
(),
sscores
.
lod
());
ASSERT_EQ
(
sids
.
lod
(),
sscores
.
lod
());
vector
<
int
>
tids
({
4
,
2
,
3
,
8
});
vector
<
int
>
tids
({
4
,
2
,
3
,
8
});
vector
<
float
>
tscores
({
0.5
,
0.6
,
0.9
,
0.7
});
vector
<
float
>
tscores
({
0.5
f
,
0.6
f
,
0.9
f
,
0.7
f
});
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
ASSERT_EQ
(
tids
[
i
],
sids
.
data
<
int64_t
>
()[
i
]);
ASSERT_EQ
(
tids
[
i
],
sids
.
data
<
int64_t
>
()[
i
]);
...
...
paddle/fluid/operators/distributed/grpc_client.cc
浏览文件 @
05b7ee7e
...
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include <sys/time.h>
#include <limits>
#include <limits>
#include "glog/logging.h" // For VLOG
#include "glog/logging.h" // For VLOG
...
@@ -20,6 +19,7 @@ limitations under the License. */
...
@@ -20,6 +19,7 @@ limitations under the License. */
#include "paddle/fluid/operators/distributed/grpc_client.h"
#include "paddle/fluid/operators/distributed/grpc_client.h"
#include "paddle/fluid/operators/distributed/grpc_serde.h"
#include "paddle/fluid/operators/distributed/grpc_serde.h"
#include "paddle/fluid/operators/distributed/request_handler.h"
#include "paddle/fluid/operators/distributed/request_handler.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
DECLARE_bool
(
rpc_disable_reuse_port
);
DECLARE_bool
(
rpc_disable_reuse_port
);
...
...
paddle/fluid/operators/distributed/grpc_serde.cc
浏览文件 @
05b7ee7e
...
@@ -15,7 +15,6 @@ limitations under the License. */
...
@@ -15,7 +15,6 @@ limitations under the License. */
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
#include <nccl.h>
#include <nccl.h>
#endif
#endif
#include <sys/time.h>
#include <thread> // NOLINT
#include <thread> // NOLINT
#include "google/protobuf/io/coded_stream.h"
#include "google/protobuf/io/coded_stream.h"
...
@@ -26,6 +25,7 @@ limitations under the License. */
...
@@ -26,6 +25,7 @@ limitations under the License. */
#include "paddle/fluid/operators/distributed/grpc_variable_response.h"
#include "paddle/fluid/operators/distributed/grpc_variable_response.h"
#include "paddle/fluid/operators/distributed/proto_encoder_helper.h"
#include "paddle/fluid/operators/distributed/proto_encoder_helper.h"
#include "paddle/fluid/operators/distributed/sendrecvop_utils.h"
#include "paddle/fluid/operators/distributed/sendrecvop_utils.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
namespace
paddle
{
namespace
paddle
{
...
...
paddle/fluid/operators/distributed/grpc_serde.h
浏览文件 @
05b7ee7e
...
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
...
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#pragma once
#include <sys/time.h>
#include <iostream>
#include <iostream>
#include <string>
#include <string>
#include <vector>
#include <vector>
...
@@ -25,6 +25,7 @@ limitations under the License. */
...
@@ -25,6 +25,7 @@ limitations under the License. */
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/operators/distributed/sendrecvop_utils.h"
#include "paddle/fluid/operators/distributed/sendrecvop_utils.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/operators/distributed/send_recv.grpc.pb.h"
#include "paddle/fluid/operators/distributed/send_recv.grpc.pb.h"
#include "paddle/fluid/operators/distributed/send_recv.pb.h"
#include "paddle/fluid/operators/distributed/send_recv.pb.h"
...
...
paddle/fluid/operators/distributed/sendrecvop_utils.cc
浏览文件 @
05b7ee7e
...
@@ -15,12 +15,12 @@ limitations under the License. */
...
@@ -15,12 +15,12 @@ limitations under the License. */
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
#include <nccl.h>
#include <nccl.h>
#endif
#endif
#include <sys/time.h>
#include <thread> // NOLINT
#include <thread> // NOLINT
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/operators/distributed/sendrecvop_utils.h"
#include "paddle/fluid/operators/distributed/sendrecvop_utils.h"
#include "paddle/fluid/operators/distributed/variable_response.h"
#include "paddle/fluid/operators/distributed/variable_response.h"
#include "paddle/fluid/platform/port.h"
DEFINE_bool
(
rpc_disable_reuse_port
,
false
,
"Disable SO_REUSEPORT or not."
);
DEFINE_bool
(
rpc_disable_reuse_port
,
false
,
"Disable SO_REUSEPORT or not."
);
...
...
paddle/fluid/operators/distributed/sendrecvop_utils.h
浏览文件 @
05b7ee7e
...
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
...
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#pragma once
#include <sys/time.h>
#include <iostream>
#include <iostream>
#include <string>
#include <string>
#include <vector>
#include <vector>
...
@@ -24,6 +23,7 @@ limitations under the License. */
...
@@ -24,6 +23,7 @@ limitations under the License. */
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/operators/distributed/send_recv.pb.h"
#include "paddle/fluid/operators/distributed/send_recv.pb.h"
...
...
paddle/fluid/operators/math/cpu_vec_test.cc
浏览文件 @
05b7ee7e
...
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include <sys/time.h>
#include <cmath>
#include <cmath>
#include <cstring>
#include <cstring>
#include <random>
#include <random>
...
@@ -22,6 +21,7 @@ limitations under the License. */
...
@@ -22,6 +21,7 @@ limitations under the License. */
#include "gtest/gtest.h"
#include "gtest/gtest.h"
#include "paddle/fluid/operators/math/cpu_vec.h"
#include "paddle/fluid/operators/math/cpu_vec.h"
#include "paddle/fluid/platform/port.h"
inline
double
GetCurrentUS
()
{
inline
double
GetCurrentUS
()
{
struct
timeval
time
;
struct
timeval
time
;
...
...
paddle/fluid/operators/math/im2col_test.cc
浏览文件 @
05b7ee7e
...
@@ -14,9 +14,9 @@ limitations under the License. */
...
@@ -14,9 +14,9 @@ limitations under the License. */
#include "paddle/fluid/operators/math/im2col.h"
#include "paddle/fluid/operators/math/im2col.h"
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include <sys/time.h>
#include <vector>
#include <vector>
#include "paddle/fluid/operators/math/im2col_cfo_cpu.h"
#include "paddle/fluid/operators/math/im2col_cfo_cpu.h"
#include "paddle/fluid/platform/port.h"
template
<
typename
DeviceContext
,
typename
Place
>
template
<
typename
DeviceContext
,
typename
Place
>
void
testIm2col
()
{
void
testIm2col
()
{
...
...
paddle/fluid/operators/math/jit_kernel_test.cc
浏览文件 @
05b7ee7e
...
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
...
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/operators/math/jit_kernel.h"
#include "paddle/fluid/operators/math/jit_kernel.h"
#include <sys/time.h>
#include <cmath> // for exp
#include <cmath> // for exp
#include <cstring> // for memcpy
#include <cstring> // for memcpy
#include <random>
#include <random>
...
@@ -22,6 +21,7 @@ limitations under the License. */
...
@@ -22,6 +21,7 @@ limitations under the License. */
#include "gflags/gflags.h"
#include "gflags/gflags.h"
#include "glog/logging.h"
#include "glog/logging.h"
#include "gtest/gtest.h"
#include "gtest/gtest.h"
#include "paddle/fluid/platform/port.h"
#ifdef PADDLE_WITH_MKLML
#ifdef PADDLE_WITH_MKLML
#include "paddle/fluid/platform/dynload/mklml.h"
#include "paddle/fluid/platform/dynload/mklml.h"
...
...
paddle/fluid/platform/cudnn_helper.h
浏览文件 @
05b7ee7e
...
@@ -62,7 +62,7 @@ inline const char* cudnnGetErrorString(cudnnStatus_t status) {
...
@@ -62,7 +62,7 @@ inline const char* cudnnGetErrorString(cudnnStatus_t status) {
#define CUDNN_ENFORCE(condition) \
#define CUDNN_ENFORCE(condition) \
do { \
do { \
cudnnStatus_t status = condition;
\
auto status = condition;
\
if (UNLIKELY(status != CUDNN_STATUS_SUCCESS)) { \
if (UNLIKELY(status != CUDNN_STATUS_SUCCESS)) { \
PADDLE_THROW(::paddle::platform::cudnnGetErrorString(status)); \
PADDLE_THROW(::paddle::platform::cudnnGetErrorString(status)); \
} \
} \
...
...
paddle/fluid/platform/dynload/cudnn.h
浏览文件 @
05b7ee7e
...
@@ -48,13 +48,13 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
...
@@ -48,13 +48,13 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
#else
#else
#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name)
\
#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name) \
struct DynLoad__##__name {
\
struct DynLoad__##__name { \
template <typename... Args>
\
template <typename... Args> \
inline
cudnnStatus_t operator()(Args... args) {
\
inline
auto operator()(Args... args) {
\
return ::__name(args...);
\
return ::__name(args...); \
}
\
} \
};
\
}; \
extern DynLoad__##__name __name
extern DynLoad__##__name __name
#endif
#endif
...
...
paddle/fluid/platform/gpu_info.cc
浏览文件 @
05b7ee7e
...
@@ -19,7 +19,16 @@ limitations under the License. */
...
@@ -19,7 +19,16 @@ limitations under the License. */
#include "gflags/gflags.h"
#include "gflags/gflags.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
DEFINE_double
(
fraction_of_gpu_memory_to_use
,
0.92
,
#ifndef _WIN32
const
float
fraction_of_gpu_memory_to_use
=
0.92
f
;
#else
// fraction_of_gpu_memory_to_use cannot be too high on windows,
// since the win32 graphic sub-system can occupy some GPU memory
// which may lead to insufficient memory left for paddle
const
float
fraction_of_gpu_memory_to_use
=
0.5
f
;
#endif
DEFINE_double
(
fraction_of_gpu_memory_to_use
,
fraction_of_gpu_memory_to_use
,
"Allocate a trunk of gpu memory that is this fraction of the "
"Allocate a trunk of gpu memory that is this fraction of the "
"total gpu memory size. Future memory usage will be allocated "
"total gpu memory size. Future memory usage will be allocated "
"from the trunk. If the trunk doesn't have enough gpu memory, "
"from the trunk. If the trunk doesn't have enough gpu memory, "
...
...
paddle/fluid/platform/stream_callback_manager.h
浏览文件 @
05b7ee7e
...
@@ -14,11 +14,11 @@
...
@@ -14,11 +14,11 @@
#pragma once
#pragma once
#include <ThreadPool.h>
#include <cuda.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_runtime.h>
#include <functional>
#include <functional>
#include <memory>
#include <memory>
#include "ThreadPool.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
paddle
{
...
...
paddle/legacy/cuda/include/hl_warpctc_wrap.h
浏览文件 @
05b7ee7e
...
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#ifndef _WIN32
#ifndef HL_WARPCTC_WRAP_H_
#ifndef HL_WARPCTC_WRAP_H_
#define HL_WARPCTC_WRAP_H_
#define HL_WARPCTC_WRAP_H_
#include "ctc.h"
#include "ctc.h"
#include "hl_base.h"
#include "hl_base.h"
...
@@ -91,3 +91,4 @@ extern void hl_warpctc_get_workspace_size(const int* cpuLabelLengths,
...
@@ -91,3 +91,4 @@ extern void hl_warpctc_get_workspace_size(const int* cpuLabelLengths,
size_t
*
bytes
);
size_t
*
bytes
);
#endif // HL_WARPCTC_WRAP_H_
#endif // HL_WARPCTC_WRAP_H_
#endif
paddle/legacy/cuda/src/hl_cuda_device.cc
浏览文件 @
05b7ee7e
...
@@ -132,11 +132,15 @@ inline pid_t gettid() {
...
@@ -132,11 +132,15 @@ inline pid_t gettid() {
uint64_t
tid
;
uint64_t
tid
;
pthread_threadid_np
(
NULL
,
&
tid
);
pthread_threadid_np
(
NULL
,
&
tid
);
#else
#else
#ifndef _WIN32
#ifndef __NR_gettid
#ifndef __NR_gettid
#define __NR_gettid 224
#define __NR_gettid 224
#endif
#endif
pid_t
tid
=
syscall
(
__NR_gettid
);
pid_t
tid
=
syscall
(
__NR_gettid
);
#endif
#endif
#else // _WIN32
pid_t
tid
=
_getpid
();
#endif // _WIN32
CHECK_NE
((
int
)
tid
,
-
1
);
CHECK_NE
((
int
)
tid
,
-
1
);
return
tid
;
return
tid
;
}
}
...
...
paddle/legacy/utils/ThreadLocal.h
浏览文件 @
05b7ee7e
...
@@ -14,10 +14,12 @@ limitations under the License. */
...
@@ -14,10 +14,12 @@ limitations under the License. */
#pragma once
#pragma once
#ifndef _WIN32
#include <pthread.h>
#include <pthread.h>
#include <sys/syscall.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
#include <unistd.h>
#endif
#include <sys/types.h>
#include <map>
#include <map>
#include <mutex>
#include <mutex>
#include <random>
#include <random>
...
...
paddle/legacy/utils/Util.h
浏览文件 @
05b7ee7e
...
@@ -14,7 +14,9 @@ limitations under the License. */
...
@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once
#pragma once
#ifndef _WIN32
#include <sys/syscall.h> // for syscall()
#include <sys/syscall.h> // for syscall()
#endif
#include <sys/types.h>
#include <sys/types.h>
#include <algorithm>
#include <algorithm>
#include <cmath>
#include <cmath>
...
@@ -40,6 +42,31 @@ inline int rand_r(unsigned int* seedp) {
...
@@ -40,6 +42,31 @@ inline int rand_r(unsigned int* seedp) {
}
}
#endif
#endif
#ifdef _WIN32
#define NOMINMAX // msvc max/min macro conflict with std::min/max
#include <windows.h>
template
<
typename
T
>
inline
int
__builtin_clz
(
const
T
&
value
)
{
DWORD
leadning_zero
=
0
;
if
(
_BitScanReverse
(
&
leadning_zero
,
value
))
{
return
static_cast
<
int
>
(
sizeof
(
T
)
*
8
-
leadning_zero
);
}
else
{
return
static_cast
<
int
>
(
0
);
}
}
inline
int
__builtin_clzl
(
const
unsigned
long
&
value
)
{
return
__builtin_clz
(
value
);
}
inline
int
__builtin_clzll
(
const
unsigned
long
long
&
value
)
{
return
__builtin_clz
(
value
);
}
#define pid_t int
#endif
/**
/**
* Loop over the elements in a container
* Loop over the elements in a container
* TODO(yuyang18): It's this foreach useful? Why not use C++ 11 foreach,
* TODO(yuyang18): It's this foreach useful? Why not use C++ 11 foreach,
...
...
paddle/testing/CMakeLists.txt
浏览文件 @
05b7ee7e
...
@@ -3,8 +3,10 @@
...
@@ -3,8 +3,10 @@
if
(
WITH_TESTING
)
if
(
WITH_TESTING
)
add_library
(
paddle_test_main STATIC TestMain.cpp
)
add_library
(
paddle_test_main STATIC TestMain.cpp
)
add_dependencies
(
paddle_test_main paddle_proto
${
external_project_dependencies
}
)
add_dependencies
(
paddle_test_main paddle_proto
${
external_project_dependencies
}
)
add_library
(
paddle_test_util STATIC TestUtil.cpp
)
if
(
NOT WIN32
)
add_dependencies
(
paddle_test_util paddle_proto
${
external_project_dependencies
}
)
add_library
(
paddle_test_util STATIC TestUtil.cpp
)
add_dependencies
(
paddle_test_util paddle_proto
${
external_project_dependencies
}
)
endif
(
NOT WIN32
)
if
(
NOT MOBILE_INFERENCE
)
if
(
NOT MOBILE_INFERENCE
)
cc_library
(
paddle_gtest_main SRCS paddle_gtest_main.cc DEPS device_context memory gtest gflags
)
cc_library
(
paddle_gtest_main SRCS paddle_gtest_main.cc DEPS device_context memory gtest gflags
)
endif
()
endif
()
...
...
python/paddle/fluid/metrics.py
浏览文件 @
05b7ee7e
...
@@ -46,8 +46,8 @@ def _is_numpy_(var):
...
@@ -46,8 +46,8 @@ def _is_numpy_(var):
def
_is_number_
(
var
):
def
_is_number_
(
var
):
return
isinstance
(
var
,
int
)
or
isinstance
(
var
,
float
)
or
(
isinstance
(
return
isinstance
(
var
,
int
)
or
isinstance
(
var
,
np
.
int64
)
or
isinstance
(
var
,
np
.
ndarray
)
and
var
.
shape
==
(
1
,
))
var
,
float
)
or
(
isinstance
(
var
,
np
.
ndarray
)
and
var
.
shape
==
(
1
,
))
def
_is_number_or_matrix_
(
var
):
def
_is_number_or_matrix_
(
var
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录