Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
7c8c9dc9
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
7c8c9dc9
编写于
11月 22, 2018
作者:
P
peizhilin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix unit test cases
上级
bef475c9
变更
43
隐藏空白更改
内联
并排
Showing
43 changed file
with
138 addition
and
89 deletion
+138
-89
cmake/generic.cmake
cmake/generic.cmake
+9
-2
paddle/fluid/framework/details/all_reduce_op_handle.cc
paddle/fluid/framework/details/all_reduce_op_handle.cc
+2
-2
paddle/fluid/framework/details/all_reduce_op_handle.h
paddle/fluid/framework/details/all_reduce_op_handle.h
+3
-3
paddle/fluid/framework/details/broadcast_op_handle.cc
paddle/fluid/framework/details/broadcast_op_handle.cc
+1
-1
paddle/fluid/framework/details/broadcast_op_handle.h
paddle/fluid/framework/details/broadcast_op_handle.h
+3
-3
paddle/fluid/framework/details/broadcast_op_handle_test.h
paddle/fluid/framework/details/broadcast_op_handle_test.h
+6
-6
paddle/fluid/framework/details/build_strategy.cc
paddle/fluid/framework/details/build_strategy.cc
+2
-2
paddle/fluid/framework/details/build_strategy.h
paddle/fluid/framework/details/build_strategy.h
+2
-2
paddle/fluid/framework/details/data_balance_op_handle.cc
paddle/fluid/framework/details/data_balance_op_handle.cc
+1
-1
paddle/fluid/framework/details/data_balance_op_handle.h
paddle/fluid/framework/details/data_balance_op_handle.h
+2
-2
paddle/fluid/framework/details/fused_broadcast_op_handle.h
paddle/fluid/framework/details/fused_broadcast_op_handle.h
+2
-2
paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc
...fluid/framework/details/fused_broadcast_op_handle_test.cc
+2
-2
paddle/fluid/framework/details/multi_devices_graph_pass.cc
paddle/fluid/framework/details/multi_devices_graph_pass.cc
+8
-8
paddle/fluid/framework/details/multi_devices_graph_pass.h
paddle/fluid/framework/details/multi_devices_graph_pass.h
+1
-1
paddle/fluid/framework/details/reduce_op_handle.cc
paddle/fluid/framework/details/reduce_op_handle.cc
+1
-1
paddle/fluid/framework/details/reduce_op_handle.h
paddle/fluid/framework/details/reduce_op_handle.h
+2
-2
paddle/fluid/framework/details/reduce_op_handle_test.cc
paddle/fluid/framework/details/reduce_op_handle_test.cc
+6
-6
paddle/fluid/framework/ir/is_test_pass_tester.cc
paddle/fluid/framework/ir/is_test_pass_tester.cc
+4
-1
paddle/fluid/inference/analysis/analyzer_tester.cc
paddle/fluid/inference/analysis/analyzer_tester.cc
+2
-1
paddle/fluid/inference/api/helper.h
paddle/fluid/inference/api/helper.h
+1
-4
paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc
paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc
+0
-1
paddle/fluid/inference/tests/book/test_inference_nlp.cc
paddle/fluid/inference/tests/book/test_inference_nlp.cc
+0
-1
paddle/fluid/inference/tests/test_helper.h
paddle/fluid/inference/tests/test_helper.h
+1
-0
paddle/fluid/operators/beam_search_op_test.cc
paddle/fluid/operators/beam_search_op_test.cc
+8
-8
paddle/fluid/operators/distributed/grpc_client.cc
paddle/fluid/operators/distributed/grpc_client.cc
+1
-1
paddle/fluid/operators/distributed/grpc_serde.cc
paddle/fluid/operators/distributed/grpc_serde.cc
+1
-1
paddle/fluid/operators/distributed/grpc_serde.h
paddle/fluid/operators/distributed/grpc_serde.h
+2
-1
paddle/fluid/operators/distributed/sendrecvop_utils.cc
paddle/fluid/operators/distributed/sendrecvop_utils.cc
+1
-1
paddle/fluid/operators/distributed/sendrecvop_utils.h
paddle/fluid/operators/distributed/sendrecvop_utils.h
+1
-1
paddle/fluid/operators/math/cpu_vec_test.cc
paddle/fluid/operators/math/cpu_vec_test.cc
+1
-1
paddle/fluid/operators/math/im2col_test.cc
paddle/fluid/operators/math/im2col_test.cc
+1
-1
paddle/fluid/operators/math/jit_kernel_test.cc
paddle/fluid/operators/math/jit_kernel_test.cc
+1
-1
paddle/fluid/platform/cudnn_helper.h
paddle/fluid/platform/cudnn_helper.h
+1
-1
paddle/fluid/platform/dynload/cudnn.h
paddle/fluid/platform/dynload/cudnn.h
+7
-7
paddle/fluid/platform/gpu_info.cc
paddle/fluid/platform/gpu_info.cc
+4
-1
paddle/fluid/platform/stream_callback_manager.h
paddle/fluid/platform/stream_callback_manager.h
+1
-1
paddle/legacy/cuda/include/hl_warpctc_wrap.h
paddle/legacy/cuda/include/hl_warpctc_wrap.h
+2
-1
paddle/legacy/cuda/src/hl_cuda_device.cc
paddle/legacy/cuda/src/hl_cuda_device.cc
+4
-0
paddle/legacy/utils/ThreadLocal.h
paddle/legacy/utils/ThreadLocal.h
+3
-1
paddle/legacy/utils/Util.h
paddle/legacy/utils/Util.h
+27
-0
paddle/testing/CMakeLists.txt
paddle/testing/CMakeLists.txt
+4
-2
python/paddle/fluid/metrics.py
python/paddle/fluid/metrics.py
+2
-2
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+5
-3
未找到文件。
cmake/generic.cmake
浏览文件 @
7c8c9dc9
...
...
@@ -349,10 +349,17 @@ function(cc_test TARGET_NAME)
set
(
oneValueArgs
""
)
set
(
multiValueArgs SRCS DEPS ARGS
)
cmake_parse_arguments
(
cc_test
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
if
(
WIN32
)
list
(
APPEND win32_deps shlwapi
)
if
(
"
${
cc_test_DEPS
}
;"
MATCHES
"python;"
)
list
(
REMOVE_ITEM cc_test_DEPS python
)
list
(
APPEND win32_deps
${
PYTHON_LIBRARIES
}
)
endif
()
endif
(
WIN32
)
add_executable
(
${
TARGET_NAME
}
${
cc_test_SRCS
}
)
target_link_libraries
(
${
TARGET_NAME
}
${
cc_test_DEPS
}
paddle_gtest_main lod_tensor memory gtest gflags glog
)
if
(
WIN32
)
target_link_libraries
(
${
TARGET_NAME
}
shlwapi
)
target_link_libraries
(
${
TARGET_NAME
}
${
win32_deps
}
)
endif
(
WIN32
)
add_dependencies
(
${
TARGET_NAME
}
${
cc_test_DEPS
}
paddle_gtest_main lod_tensor memory gtest gflags glog
)
add_test
(
NAME
${
TARGET_NAME
}
...
...
@@ -679,7 +686,7 @@ function(py_test TARGET_NAME)
set
(
multiValueArgs SRCS DEPS ARGS ENVS
)
cmake_parse_arguments
(
py_test
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
add_test
(
NAME
${
TARGET_NAME
}
COMMAND env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true
COMMAND
${
CMAKE_COMMAND
}
-E
env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true
FLAGS_cpu_deterministic=true
PYTHONPATH=
${
PADDLE_BINARY_DIR
}
/python
${
py_test_ENVS
}
${
PYTHON_EXECUTABLE
}
-u
${
py_test_SRCS
}
${
py_test_ARGS
}
...
...
paddle/fluid/framework/details/all_reduce_op_handle.cc
浏览文件 @
7c8c9dc9
...
...
@@ -23,7 +23,7 @@ namespace paddle {
namespace
framework
{
namespace
details
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
AllReduceOpHandle
::
AllReduceOpHandle
(
ir
::
Node
*
node
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
...
...
@@ -74,7 +74,7 @@ void AllReduceOpHandle::RunImpl() {
}
if
(
platform
::
is_gpu_place
(
lod_tensors
[
0
]
->
place
()))
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
PADDLE_ENFORCE
(
nccl_ctxs_
,
"nccl_ctxs should not be nullptr."
);
int
dtype
=
-
1
;
size_t
numel
=
0
;
...
...
paddle/fluid/framework/details/all_reduce_op_handle.h
浏览文件 @
7c8c9dc9
...
...
@@ -20,7 +20,7 @@
#include "paddle/fluid/framework/details/op_handle_base.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h"
#endif
...
...
@@ -29,7 +29,7 @@ namespace framework {
namespace
details
{
struct
AllReduceOpHandle
:
public
OpHandleBase
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
AllReduceOpHandle
(
ir
::
Node
*
node
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
platform
::
NCCLContextMap
*
ctxs
);
...
...
@@ -49,7 +49,7 @@ struct AllReduceOpHandle : public OpHandleBase {
private:
std
::
vector
<
Scope
*>
local_scopes_
;
std
::
vector
<
platform
::
Place
>
places_
;
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
const
platform
::
NCCLContextMap
*
nccl_ctxs_
;
#endif
};
...
...
paddle/fluid/framework/details/broadcast_op_handle.cc
浏览文件 @
7c8c9dc9
...
...
@@ -82,7 +82,7 @@ void BroadcastOpHandle::BroadcastOneVar(
});
}
}
else
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
VarHandle
*
out_handle
=
nullptr
;
int
root_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
in_tensor
.
place
()).
device
;
std
::
vector
<
std
::
function
<
void
()
>>
broadcast_calls
;
...
...
paddle/fluid/framework/details/broadcast_op_handle.h
浏览文件 @
7c8c9dc9
...
...
@@ -24,7 +24,7 @@
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/platform/device_context.h"
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h"
#endif
...
...
@@ -34,7 +34,7 @@ namespace details {
struct
BroadcastOpHandle
:
public
OpHandleBase
{
public:
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
BroadcastOpHandle
(
ir
::
Node
*
node
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
platform
::
NCCLContextMap
*
nccl_ctxs
)
...
...
@@ -68,7 +68,7 @@ struct BroadcastOpHandle : public OpHandleBase {
std
::
vector
<
Scope
*>
local_scopes_
;
std
::
vector
<
platform
::
Place
>
places_
;
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
const
platform
::
NCCLContextMap
*
nccl_ctxs_
;
#endif
...
...
paddle/fluid/framework/details/broadcast_op_handle_test.h
浏览文件 @
7c8c9dc9
...
...
@@ -42,7 +42,7 @@ struct TestBroadcastOpHandle {
std
::
vector
<
std
::
unique_ptr
<
ir
::
Node
>>
nodes_
;
std
::
vector
<
p
::
Place
>
place_list_
;
bool
use_gpu_
;
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
std
::
unique_ptr
<
platform
::
NCCLContextMap
>
nccl_ctxs_
;
#endif
...
...
@@ -50,7 +50,7 @@ struct TestBroadcastOpHandle {
for
(
size_t
j
=
0
;
j
<
ctxs_
.
size
();
++
j
)
{
ctxs_
[
j
]
->
Wait
();
}
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
if
(
nccl_ctxs_
)
{
nccl_ctxs_
->
WaitAll
();
}
...
...
@@ -60,7 +60,7 @@ struct TestBroadcastOpHandle {
void
InitCtxOnGpu
(
bool
use_gpu
)
{
use_gpu_
=
use_gpu
;
if
(
use_gpu_
)
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
int
count
=
p
::
GetCUDADeviceCount
();
if
(
count
<=
1
)
{
LOG
(
WARNING
)
<<
"Cannot test multi-gpu Broadcast, because the CUDA "
...
...
@@ -84,7 +84,7 @@ struct TestBroadcastOpHandle {
place_list_
.
push_back
(
p
);
ctxs_
.
emplace_back
(
new
p
::
CPUDeviceContext
(
p
));
}
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
nccl_ctxs_
.
reset
(
nullptr
);
#endif
}
...
...
@@ -106,14 +106,14 @@ struct TestBroadcastOpHandle {
nodes_
.
emplace_back
(
ir
::
CreateNodeForTest
(
"node0"
,
ir
::
Node
::
Type
::
kOperation
));
if
(
use_gpu_
)
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
op_handle_
=
new
BroadcastOpHandle
(
nodes_
.
back
().
get
(),
local_scopes_
,
place_list_
,
nccl_ctxs_
.
get
());
#else
PADDLE_THROW
(
"CUDA is not support."
);
#endif
}
else
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
op_handle_
=
new
BroadcastOpHandle
(
nodes_
.
back
().
get
(),
local_scopes_
,
place_list_
,
nccl_ctxs_
.
get
());
#else
...
...
paddle/fluid/framework/details/build_strategy.cc
浏览文件 @
7c8c9dc9
...
...
@@ -96,7 +96,7 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
const
std
::
string
&
loss_var_name
,
const
std
::
unordered_set
<
std
::
string
>
&
param_names
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
const
bool
use_cuda
,
platform
::
NCCLContextMap
*
nccl_ctxs
)
const
{
#else
const
bool
use_cuda
)
const
{
...
...
@@ -118,7 +118,7 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
pass
->
Erase
(
"local_scopes"
);
pass
->
SetNotOwned
<
const
std
::
vector
<
Scope
*>>
(
"local_scopes"
,
&
local_scopes
);
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
platform
::
NCCLContextMap
*
nctx
=
use_cuda
?
nccl_ctxs
:
nullptr
;
pass
->
Erase
(
"nccl_ctxs"
);
pass
->
SetNotOwned
<
platform
::
NCCLContextMap
>
(
"nccl_ctxs"
,
nctx
);
...
...
paddle/fluid/framework/details/build_strategy.h
浏览文件 @
7c8c9dc9
...
...
@@ -23,7 +23,7 @@
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h"
#endif
...
...
@@ -98,7 +98,7 @@ struct BuildStrategy {
const
std
::
string
&
loss_var_name
,
const
std
::
unordered_set
<
std
::
string
>
&
param_names
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
const
bool
use_cuda
,
platform
::
NCCLContextMap
*
nccl_ctxs
)
const
;
#else
const
bool
use_cuda
)
const
;
...
...
paddle/fluid/framework/details/data_balance_op_handle.cc
浏览文件 @
7c8c9dc9
...
...
@@ -20,7 +20,7 @@ namespace paddle {
namespace
framework
{
namespace
details
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
DataBalanceOpHandle
::
DataBalanceOpHandle
(
ir
::
Node
*
node
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
...
...
paddle/fluid/framework/details/data_balance_op_handle.h
浏览文件 @
7c8c9dc9
...
...
@@ -19,7 +19,7 @@
#include "paddle/fluid/framework/details/op_handle_base.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h"
#endif
...
...
@@ -29,7 +29,7 @@ namespace details {
struct
DataBalanceOpHandle
:
public
OpHandleBase
{
public:
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
DataBalanceOpHandle
(
ir
::
Node
*
node
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
platform
::
NCCLContextMap
*
ctxs
);
...
...
paddle/fluid/framework/details/fused_broadcast_op_handle.h
浏览文件 @
7c8c9dc9
...
...
@@ -25,7 +25,7 @@
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/platform/device_context.h"
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h"
#endif
...
...
@@ -35,7 +35,7 @@ namespace details {
struct
FusedBroadcastOpHandle
:
public
BroadcastOpHandle
{
public:
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
FusedBroadcastOpHandle
(
ir
::
Node
*
node
,
const
std
::
vector
<
Scope
*>
local_scopes
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
...
...
paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc
浏览文件 @
7c8c9dc9
...
...
@@ -44,14 +44,14 @@ struct TestFusedBroadcastOpHandle : TestBroadcastOpHandle {
nodes_
.
emplace_back
(
ir
::
CreateNodeForTest
(
"fused_broadcast"
,
ir
::
Node
::
Type
::
kOperation
));
if
(
use_gpu_
)
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
op_handle_
=
new
FusedBroadcastOpHandle
(
nodes_
.
back
().
get
(),
local_scopes_
,
place_list_
,
nccl_ctxs_
.
get
());
#else
PADDLE_THROW
(
"CUDA is not supported."
);
#endif
}
else
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
op_handle_
=
new
FusedBroadcastOpHandle
(
nodes_
.
back
().
get
(),
local_scopes_
,
place_list_
,
nccl_ctxs_
.
get
());
#else
...
...
paddle/fluid/framework/details/multi_devices_graph_pass.cc
浏览文件 @
7c8c9dc9
...
...
@@ -142,7 +142,7 @@ void MultiDevSSAGraphBuilder::Init() const {
places_
=
Get
<
const
std
::
vector
<
platform
::
Place
>>
(
kPlaces
);
local_scopes_
=
Get
<
const
std
::
vector
<
Scope
*>>
(
kLocalScopes
);
strategy_
=
Get
<
const
BuildStrategy
>
(
kStrategy
);
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
nccl_ctxs_
=
&
Get
<
platform
::
NCCLContextMap
>
(
"nccl_ctxs"
);
#endif
...
...
@@ -431,7 +431,7 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
}
}
bool
use_gpu
=
false
;
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
use_gpu
=
nccl_ctxs_
!=
nullptr
;
#endif
...
...
@@ -478,7 +478,7 @@ bool MultiDevSSAGraphBuilder::IsSparseGradient(const std::string &og) const {
void
MultiDevSSAGraphBuilder
::
SetCommunicationContext
(
OpHandleBase
*
op_handle
,
const
platform
::
Place
&
p
)
const
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
if
(
nccl_ctxs_
==
nullptr
)
{
op_handle
->
SetDeviceContext
(
p
,
platform
::
DeviceContextPool
::
Instance
().
Get
(
p
));
...
...
@@ -492,7 +492,7 @@ void MultiDevSSAGraphBuilder::SetCommunicationContext(
void
MultiDevSSAGraphBuilder
::
CreateBroadcastOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
p_name
,
size_t
src_dev_id
)
const
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
auto
*
op_handle
=
new
BroadcastOpHandle
(
result
->
CreateEmptyNode
(
"broadcast"
,
ir
::
Node
::
Type
::
kOperation
),
local_scopes_
,
places_
,
nccl_ctxs_
);
...
...
@@ -522,7 +522,7 @@ void MultiDevSSAGraphBuilder::CreateBroadcastOp(ir::Graph *result,
void
MultiDevSSAGraphBuilder
::
CreateFusedBroadcastOp
(
ir
::
Graph
*
result
,
const
std
::
vector
<
std
::
unordered_set
<
std
::
string
>>
&
bcast_varnames
)
const
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
auto
*
op_handle
=
new
FusedBroadcastOpHandle
(
result
->
CreateEmptyNode
(
"fused_broadcast"
,
ir
::
Node
::
Type
::
kOperation
),
local_scopes_
,
places_
,
nccl_ctxs_
);
...
...
@@ -568,7 +568,7 @@ void MultiDevSSAGraphBuilder::CreateComputationalOp(ir::Graph *result,
void
MultiDevSSAGraphBuilder
::
InsertAllReduceOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
og
)
const
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
result
->
Get
<
GraphOps
>
(
kGraphOps
).
emplace_back
(
new
AllReduceOpHandle
(
result
->
CreateEmptyNode
(
"allreduce"
,
ir
::
Node
::
Type
::
kOperation
),
local_scopes_
,
places_
,
nccl_ctxs_
));
...
...
@@ -597,7 +597,7 @@ void MultiDevSSAGraphBuilder::InsertAllReduceOp(ir::Graph *result,
void
MultiDevSSAGraphBuilder
::
InsertDataBalanceOp
(
ir
::
Graph
*
result
,
const
std
::
vector
<
std
::
string
>
&
datas
)
const
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
result
->
Get
<
GraphOps
>
(
kGraphOps
).
emplace_back
(
new
DataBalanceOpHandle
(
result
->
CreateEmptyNode
(
"data_balance"
,
ir
::
Node
::
Type
::
kOperation
),
local_scopes_
,
places_
,
nccl_ctxs_
));
...
...
@@ -694,7 +694,7 @@ void MultiDevSSAGraphBuilder::CreateComputationalOps(ir::Graph *result,
VarHandle
*
MultiDevSSAGraphBuilder
::
CreateReduceOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
og
,
int
dst_dev_id
)
const
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
result
->
Get
<
GraphOps
>
(
kGraphOps
).
emplace_back
(
new
ReduceOpHandle
(
result
->
CreateEmptyNode
(
"reduce"
,
ir
::
Node
::
Type
::
kOperation
),
local_scopes_
,
places_
,
nccl_ctxs_
));
...
...
paddle/fluid/framework/details/multi_devices_graph_pass.h
浏览文件 @
7c8c9dc9
...
...
@@ -40,7 +40,7 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
size_t
device_id
)
const
;
void
Init
()
const
;
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
mutable
platform
::
NCCLContextMap
*
nccl_ctxs_
;
#endif
...
...
paddle/fluid/framework/details/reduce_op_handle.cc
浏览文件 @
7c8c9dc9
...
...
@@ -125,7 +125,7 @@ void ReduceOpHandle::RunImpl() {
}
});
}
else
if
(
paddle
::
platform
::
is_gpu_place
(
lod_tensors
[
0
]
->
place
()))
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
auto
pre_in
=
pre_in_var
->
Get
<
framework
::
LoDTensor
>
();
VariableVisitor
::
ShareDimsAndLoD
(
*
pre_in_var
,
out_var
);
VariableVisitor
::
GetMutableTensor
(
out_var
).
mutable_data
(
...
...
paddle/fluid/framework/details/reduce_op_handle.h
浏览文件 @
7c8c9dc9
...
...
@@ -23,7 +23,7 @@
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/platform/device_context.h"
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h"
#endif
...
...
@@ -35,7 +35,7 @@ struct ReduceOpHandle : public OpHandleBase {
std
::
vector
<
Scope
*>
local_scopes_
;
std
::
vector
<
platform
::
Place
>
places_
;
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
const
platform
::
NCCLContextMap
*
nccl_ctxs_
;
ReduceOpHandle
(
ir
::
Node
*
node
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
...
...
paddle/fluid/framework/details/reduce_op_handle_test.cc
浏览文件 @
7c8c9dc9
...
...
@@ -35,7 +35,7 @@ struct TestReduceOpHandle {
std
::
vector
<
p
::
Place
>
gpu_list_
;
std
::
vector
<
std
::
unique_ptr
<
p
::
DeviceContext
>>
ctxs_
;
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
std
::
unique_ptr
<
platform
::
NCCLContextMap
>
nccl_ctxs_
;
#endif
...
...
@@ -43,7 +43,7 @@ struct TestReduceOpHandle {
for
(
size_t
j
=
0
;
j
<
ctxs_
.
size
();
++
j
)
{
ctxs_
[
j
]
->
Wait
();
}
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
if
(
nccl_ctxs_
)
{
nccl_ctxs_
->
WaitAll
();
}
...
...
@@ -53,7 +53,7 @@ struct TestReduceOpHandle {
void
InitCtxOnGpu
(
bool
use_gpu
)
{
use_gpu_
=
use_gpu
;
if
(
use_gpu
)
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
int
count
=
p
::
GetCUDADeviceCount
();
if
(
count
<=
1
)
{
LOG
(
WARNING
)
<<
"Cannot test multi-gpu Broadcast, because the CUDA "
...
...
@@ -77,7 +77,7 @@ struct TestReduceOpHandle {
gpu_list_
.
push_back
(
p
);
ctxs_
.
emplace_back
(
new
p
::
CPUDeviceContext
(
p
));
}
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
nccl_ctxs_
.
reset
(
nullptr
);
#endif
}
...
...
@@ -99,14 +99,14 @@ struct TestReduceOpHandle {
nodes
.
emplace_back
(
new
ir
::
Node
(
"node"
));
if
(
use_gpu_
)
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
op_handle_
.
reset
(
new
ReduceOpHandle
(
nodes
.
back
().
get
(),
local_scopes_
,
gpu_list_
,
nccl_ctxs_
.
get
()));
#else
PADDLE_THROW
(
"CUDA is not support."
);
#endif
}
else
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
op_handle_
.
reset
(
new
ReduceOpHandle
(
nodes
.
back
().
get
(),
local_scopes_
,
gpu_list_
,
nccl_ctxs_
.
get
()));
#else
...
...
paddle/fluid/framework/ir/is_test_pass_tester.cc
浏览文件 @
7c8c9dc9
...
...
@@ -15,7 +15,10 @@
#include "paddle/fluid/framework/ir/is_test_pass.h"
#include <gtest/gtest.h>
#ifdef _WIN32
#undef FALSE
#undef TRUE
#endif
namespace
paddle
{
namespace
framework
{
namespace
ir
{
...
...
paddle/fluid/inference/analysis/analyzer_tester.cc
浏览文件 @
7c8c9dc9
...
...
@@ -19,6 +19,7 @@
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/platform/port.h"
namespace
paddle
{
namespace
inference
{
...
...
@@ -75,7 +76,7 @@ void TestWord2vecPrediction(const std::string& model_path) {
0.000932706
};
const
size_t
num_elements
=
outputs
.
front
().
data
.
length
()
/
sizeof
(
float
);
// The outputs' buffers are in CPU memory.
for
(
size_t
i
=
0
;
i
<
std
::
min
(
5UL
,
num_elements
);
i
++
)
{
for
(
size_t
i
=
0
;
i
<
std
::
min
(
(
size_t
)
5UL
,
num_elements
);
i
++
)
{
LOG
(
INFO
)
<<
"data: "
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())[
i
];
PADDLE_ENFORCE
(
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())[
i
],
...
...
paddle/fluid/inference/api/helper.h
浏览文件 @
7c8c9dc9
...
...
@@ -15,10 +15,6 @@
#pragma once
#include <glog/logging.h>
#if !defined(_WIN32)
#include <sys/time.h>
#else
#endif
#include <algorithm>
#include <chrono> // NOLINT
...
...
@@ -28,6 +24,7 @@
#include <string>
#include <vector>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/string/printf.h"
namespace
paddle
{
...
...
paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc
浏览文件 @
7c8c9dc9
...
...
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include <gflags/gflags.h>
#include <sys/time.h>
#include <time.h>
#include <algorithm>
#include <fstream>
...
...
paddle/fluid/inference/tests/book/test_inference_nlp.cc
浏览文件 @
7c8c9dc9
...
...
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <sys/time.h>
#include <time.h>
#include <fstream>
#include <thread> // NOLINT
...
...
paddle/fluid/inference/tests/test_helper.h
浏览文件 @
7c8c9dc9
...
...
@@ -20,6 +20,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/io.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/profiler.h"
DECLARE_bool
(
use_mkldnn
);
...
...
paddle/fluid/operators/beam_search_op_test.cc
浏览文件 @
7c8c9dc9
...
...
@@ -30,23 +30,23 @@ using std::endl;
void
CreateInput
(
LoDTensor
*
ids
,
LoDTensor
*
scores
)
{
LoD
lod
;
vector
<
size_t
>
level0
({
0
,
2
,
4
})
;
vector
<
size_t
>
level1
({
0
,
1
,
2
,
3
,
4
})
;
vector
<
size_t
>
level0
{
0
,
2
,
4
}
;
vector
<
size_t
>
level1
{
0
,
1
,
2
,
3
,
4
}
;
lod
.
push_back
(
level0
);
lod
.
push_back
(
level1
);
ids
->
set_lod
(
lod
);
scores
->
set_lod
(
lod
);
auto
dims
=
framework
::
make_ddim
(
vector
<
int64_t
>
({
4
,
3
})
);
auto
dims
=
framework
::
make_ddim
(
vector
<
int64_t
>
{
4
,
3
}
);
ids
->
Resize
(
dims
);
scores
->
Resize
(
dims
);
CPUPlace
place
;
auto
*
ids_data
=
ids
->
mutable_data
<
int64_t
>
(
place
);
auto
*
scores_data
=
scores
->
mutable_data
<
float
>
(
place
);
vector
<
int64_t
>
_ids
({
4
,
2
,
5
,
2
,
1
,
3
,
3
,
5
,
2
,
8
,
2
,
1
})
;
vector
<
float
>
_scores
(
{
0.5
,
0.3
,
0.2
,
0.6
,
0.3
,
0.1
,
0.9
,
0.5
,
0.1
,
0.7
,
0.5
,
0.1
})
;
vector
<
int64_t
>
_ids
{
4
,
2
,
5
,
2
,
1
,
3
,
3
,
5
,
2
,
8
,
2
,
1
}
;
vector
<
float
>
_scores
{
0.5
f
,
0.3
f
,
0.2
f
,
0.6
f
,
0.3
f
,
0.1
f
,
0.9
f
,
0.5
f
,
0.1
f
,
0.7
f
,
0.5
f
,
0.1
f
}
;
for
(
int
i
=
0
;
i
<
12
;
i
++
)
{
ids_data
[
i
]
=
_ids
[
i
];
...
...
@@ -79,8 +79,8 @@ TEST(DISABLED_beam_search_op, run) {
ASSERT_EQ
(
sids
.
lod
(),
sscores
.
lod
());
vector
<
int
>
tids
({
4
,
2
,
3
,
8
})
;
vector
<
float
>
tscores
({
0.5
,
0.6
,
0.9
,
0.7
})
;
vector
<
int
>
tids
{
4
,
2
,
3
,
8
}
;
vector
<
float
>
tscores
{
0.5
f
,
0.6
f
,
0.9
f
,
0.7
f
}
;
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
ASSERT_EQ
(
tids
[
i
],
sids
.
data
<
int64_t
>
()[
i
]);
...
...
paddle/fluid/operators/distributed/grpc_client.cc
浏览文件 @
7c8c9dc9
...
...
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <sys/time.h>
#include <limits>
#include "glog/logging.h" // For VLOG
...
...
@@ -20,6 +19,7 @@ limitations under the License. */
#include "paddle/fluid/operators/distributed/grpc_client.h"
#include "paddle/fluid/operators/distributed/grpc_serde.h"
#include "paddle/fluid/operators/distributed/request_handler.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/profiler.h"
namespace
paddle
{
...
...
paddle/fluid/operators/distributed/grpc_serde.cc
浏览文件 @
7c8c9dc9
...
...
@@ -15,7 +15,6 @@ limitations under the License. */
#ifdef PADDLE_WITH_CUDA
#include <nccl.h>
#endif
#include <sys/time.h>
#include <thread> // NOLINT
#include "google/protobuf/io/coded_stream.h"
...
...
@@ -26,6 +25,7 @@ limitations under the License. */
#include "paddle/fluid/operators/distributed/grpc_variable_response.h"
#include "paddle/fluid/operators/distributed/proto_encoder_helper.h"
#include "paddle/fluid/operators/distributed/sendrecvop_utils.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/profiler.h"
namespace
paddle
{
...
...
paddle/fluid/operators/distributed/grpc_serde.h
浏览文件 @
7c8c9dc9
...
...
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <sys/time.h>
#include <iostream>
#include <string>
#include <vector>
...
...
@@ -25,6 +25,7 @@ limitations under the License. */
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/operators/distributed/sendrecvop_utils.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/operators/distributed/send_recv.grpc.pb.h"
#include "paddle/fluid/operators/distributed/send_recv.pb.h"
...
...
paddle/fluid/operators/distributed/sendrecvop_utils.cc
浏览文件 @
7c8c9dc9
...
...
@@ -15,12 +15,12 @@ limitations under the License. */
#ifdef PADDLE_WITH_CUDA
#include <nccl.h>
#endif
#include <sys/time.h>
#include <thread> // NOLINT
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/operators/distributed/sendrecvop_utils.h"
#include "paddle/fluid/operators/distributed/variable_response.h"
#include "paddle/fluid/platform/port.h"
namespace
paddle
{
namespace
operators
{
...
...
paddle/fluid/operators/distributed/sendrecvop_utils.h
浏览文件 @
7c8c9dc9
...
...
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <sys/time.h>
#include <iostream>
#include <string>
#include <vector>
...
...
@@ -24,6 +23,7 @@ limitations under the License. */
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/operators/distributed/send_recv.pb.h"
...
...
paddle/fluid/operators/math/cpu_vec_test.cc
浏览文件 @
7c8c9dc9
...
...
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <sys/time.h>
#include <cmath>
#include <cstring>
#include <random>
...
...
@@ -22,6 +21,7 @@ limitations under the License. */
#include "gtest/gtest.h"
#include "paddle/fluid/operators/math/cpu_vec.h"
#include "paddle/fluid/platform/port.h"
inline
double
GetCurrentUS
()
{
struct
timeval
time
;
...
...
paddle/fluid/operators/math/im2col_test.cc
浏览文件 @
7c8c9dc9
...
...
@@ -14,9 +14,9 @@ limitations under the License. */
#include "paddle/fluid/operators/math/im2col.h"
#include <gtest/gtest.h>
#include <sys/time.h>
#include <vector>
#include "paddle/fluid/operators/math/im2col_cfo_cpu.h"
#include "paddle/fluid/platform/port.h"
template
<
typename
DeviceContext
,
typename
Place
>
void
testIm2col
()
{
...
...
paddle/fluid/operators/math/jit_kernel_test.cc
浏览文件 @
7c8c9dc9
...
...
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/math/jit_kernel.h"
#include <sys/time.h>
#include <cmath> // for exp
#include <cstring> // for memcpy
#include <random>
...
...
@@ -22,6 +21,7 @@ limitations under the License. */
#include "gflags/gflags.h"
#include "glog/logging.h"
#include "gtest/gtest.h"
#include "paddle/fluid/platform/port.h"
#ifdef PADDLE_WITH_MKLML
#include "paddle/fluid/platform/dynload/mklml.h"
...
...
paddle/fluid/platform/cudnn_helper.h
浏览文件 @
7c8c9dc9
...
...
@@ -62,7 +62,7 @@ inline const char* cudnnGetErrorString(cudnnStatus_t status) {
#define CUDNN_ENFORCE(condition) \
do { \
cudnnStatus_t status = condition;
\
auto status = condition;
\
if (UNLIKELY(status != CUDNN_STATUS_SUCCESS)) { \
PADDLE_THROW(::paddle::platform::cudnnGetErrorString(status)); \
} \
...
...
paddle/fluid/platform/dynload/cudnn.h
浏览文件 @
7c8c9dc9
...
...
@@ -48,13 +48,13 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
#else
#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name)
\
struct DynLoad__##__name {
\
template <typename... Args>
\
inline
cudnnStatus_t operator()(Args... args) {
\
return ::__name(args...);
\
}
\
};
\
#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
inline
auto operator()(Args... args) {
\
return ::__name(args...); \
} \
}; \
extern DynLoad__##__name __name
#endif
...
...
paddle/fluid/platform/gpu_info.cc
浏览文件 @
7c8c9dc9
...
...
@@ -19,7 +19,10 @@ limitations under the License. */
#include "gflags/gflags.h"
#include "paddle/fluid/platform/enforce.h"
DEFINE_double
(
fraction_of_gpu_memory_to_use
,
0.92
,
// fraction_of_gpu_memory_to_use cannot be too high on windows,
// since the win32 graphic sub-system can occupy some GPU memory
// which may lead to insufficient memory left for paddle
DEFINE_double
(
fraction_of_gpu_memory_to_use
,
0.5
,
"Allocate a trunk of gpu memory that is this fraction of the "
"total gpu memory size. Future memory usage will be allocated "
"from the trunk. If the trunk doesn't have enough gpu memory, "
...
...
paddle/fluid/platform/stream_callback_manager.h
浏览文件 @
7c8c9dc9
...
...
@@ -18,7 +18,7 @@
#include <cuda_runtime.h>
#include <functional>
#include <memory>
#include
"ThreadPool.h"
#include
<ThreadPool.h>
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
...
...
paddle/legacy/cuda/include/hl_warpctc_wrap.h
浏览文件 @
7c8c9dc9
...
...
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef _WIN32
#ifndef HL_WARPCTC_WRAP_H_
#define HL_WARPCTC_WRAP_H_
#include "ctc.h"
#include "hl_base.h"
...
...
@@ -91,3 +91,4 @@ extern void hl_warpctc_get_workspace_size(const int* cpuLabelLengths,
size_t
*
bytes
);
#endif // HL_WARPCTC_WRAP_H_
#endif
paddle/legacy/cuda/src/hl_cuda_device.cc
浏览文件 @
7c8c9dc9
...
...
@@ -132,11 +132,15 @@ inline pid_t gettid() {
uint64_t
tid
;
pthread_threadid_np
(
NULL
,
&
tid
);
#else
#ifndef _WIN32
#ifndef __NR_gettid
#define __NR_gettid 224
#endif
pid_t
tid
=
syscall
(
__NR_gettid
);
#endif
#else // _WIN32
pid_t
tid
=
_getpid
();
#endif // _WIN32
CHECK_NE
((
int
)
tid
,
-
1
);
return
tid
;
}
...
...
paddle/legacy/utils/ThreadLocal.h
浏览文件 @
7c8c9dc9
...
...
@@ -14,10 +14,12 @@ limitations under the License. */
#pragma once
#ifndef _WIN32
#include <pthread.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
#endif
#include <sys/types.h>
#include <map>
#include <mutex>
#include <random>
...
...
paddle/legacy/utils/Util.h
浏览文件 @
7c8c9dc9
...
...
@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once
#ifndef _WIN32
#include <sys/syscall.h> // for syscall()
#endif
#include <sys/types.h>
#include <algorithm>
#include <cmath>
...
...
@@ -40,6 +42,31 @@ inline int rand_r(unsigned int* seedp) {
}
#endif
#ifdef _WIN32
#define NOMINMAX // msvc max/min macro conflict with std::min/max
#include <windows.h>
template
<
typename
T
>
inline
int
__builtin_clz
(
const
T
&
value
)
{
DWORD
leadning_zero
=
0
;
if
(
_BitScanReverse
(
&
leadning_zero
,
value
))
{
return
static_cast
<
int
>
(
sizeof
(
T
)
*
8
-
leadning_zero
);
}
else
{
return
static_cast
<
int
>
(
0
);
}
}
inline
int
__builtin_clzl
(
const
unsigned
long
&
value
)
{
return
__builtin_clz
(
value
);
}
inline
int
__builtin_clzll
(
const
unsigned
long
long
&
value
)
{
return
__builtin_clz
(
value
);
}
#define pid_t int
#endif
/**
* Loop over the elements in a container
* TODO(yuyang18): It's this foreach useful? Why not use C++ 11 foreach,
...
...
paddle/testing/CMakeLists.txt
浏览文件 @
7c8c9dc9
...
...
@@ -3,8 +3,10 @@
if
(
WITH_TESTING
)
add_library
(
paddle_test_main STATIC TestMain.cpp
)
add_dependencies
(
paddle_test_main paddle_proto
${
external_project_dependencies
}
)
add_library
(
paddle_test_util STATIC TestUtil.cpp
)
add_dependencies
(
paddle_test_util paddle_proto
${
external_project_dependencies
}
)
if
(
NOT WIN32
)
add_library
(
paddle_test_util STATIC TestUtil.cpp
)
add_dependencies
(
paddle_test_util paddle_proto
${
external_project_dependencies
}
)
endif
(
NOT WIN32
)
if
(
NOT MOBILE_INFERENCE
)
cc_library
(
paddle_gtest_main SRCS paddle_gtest_main.cc DEPS device_context memory gtest gflags
)
endif
()
...
...
python/paddle/fluid/metrics.py
浏览文件 @
7c8c9dc9
...
...
@@ -46,8 +46,8 @@ def _is_numpy_(var):
def
_is_number_
(
var
):
return
isinstance
(
var
,
int
)
or
isinstance
(
var
,
float
)
or
(
isinstance
(
var
,
np
.
ndarray
)
and
var
.
shape
==
(
1
,
))
return
isinstance
(
var
,
int
)
or
isinstance
(
var
,
np
.
int64
)
or
isinstance
(
var
,
float
)
or
(
isinstance
(
var
,
np
.
ndarray
)
and
var
.
shape
==
(
1
,
))
def
_is_number_or_matrix_
(
var
):
...
...
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
7c8c9dc9
...
...
@@ -23,9 +23,11 @@ if(NOT WITH_DISTRIBUTE)
LIST
(
REMOVE_ITEM TEST_OPS test_dist_text_classification
)
endif
(
NOT WITH_DISTRIBUTE
)
if
(
${
CUDNN_MAJOR_VERSION
}
VERSION_LESS 7
)
LIST
(
REMOVE_ITEM TEST_OPS test_conv2d_fusion_op
)
endif
()
if
(
WITH_GPU
)
if
(
${
CUDNN_MAJOR_VERSION
}
VERSION_LESS 7
)
LIST
(
REMOVE_ITEM TEST_OPS test_conv2d_fusion_op
)
endif
()
endif
(
WITH_GPU
)
list
(
REMOVE_ITEM TEST_OPS test_seq_concat_op
)
# FIXME(helin): https://github.com/PaddlePaddle/Paddle/issues/8290
list
(
REMOVE_ITEM TEST_OPS test_modified_huber_loss_op
)
# FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/5184
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录