Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
1dad8cea
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
1dad8cea
编写于
11月 20, 2020
作者:
G
gongweibao
提交者:
GitHub
11月 20, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix gpu memory allocation bug. (#28703)
上级
91bab752
变更
20
显示空白变更内容
内联
并排
Showing
20 changed file
with
80 addition
and
61 deletion
+80
-61
paddle/fluid/framework/data_device_transform_test.cu
paddle/fluid/framework/data_device_transform_test.cu
+1
-1
paddle/fluid/framework/lod_tensor_test.cu
paddle/fluid/framework/lod_tensor_test.cu
+2
-2
paddle/fluid/framework/operator_test.cc
paddle/fluid/framework/operator_test.cc
+10
-10
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+44
-0
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+1
-1
paddle/fluid/inference/api/api_impl.cc
paddle/fluid/inference/api/api_impl.cc
+1
-1
paddle/fluid/inference/io.cc
paddle/fluid/inference/io.cc
+1
-2
paddle/fluid/memory/allocation/allocator_facade.cc
paddle/fluid/memory/allocation/allocator_facade.cc
+1
-0
paddle/fluid/operators/benchmark/op_tester.cc
paddle/fluid/operators/benchmark/op_tester.cc
+1
-1
paddle/fluid/operators/fused/fusion_group_op_test.cc
paddle/fluid/operators/fused/fusion_group_op_test.cc
+1
-1
paddle/fluid/platform/device_code_test.cc
paddle/fluid/platform/device_code_test.cc
+2
-2
paddle/fluid/platform/init.cc
paddle/fluid/platform/init.cc
+4
-29
paddle/fluid/platform/init.h
paddle/fluid/platform/init.h
+2
-2
paddle/fluid/platform/init_test.cc
paddle/fluid/platform/init_test.cc
+3
-3
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+1
-1
paddle/fluid/train/demo/demo_trainer.cc
paddle/fluid/train/demo/demo_trainer.cc
+1
-1
paddle/fluid/train/imdb_demo/demo_trainer.cc
paddle/fluid/train/imdb_demo/demo_trainer.cc
+1
-1
paddle/fluid/train/test_train_recognize_digits.cc
paddle/fluid/train/test_train_recognize_digits.cc
+1
-1
paddle/testing/paddle_gtest_main.cc
paddle/testing/paddle_gtest_main.cc
+1
-1
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+1
-1
未找到文件。
paddle/fluid/framework/data_device_transform_test.cu
浏览文件 @
1dad8cea
...
@@ -103,7 +103,7 @@ static void BuildVar(const std::string& param_name,
...
@@ -103,7 +103,7 @@ static void BuildVar(const std::string& param_name,
}
}
TEST
(
Operator
,
CPUtoGPU
)
{
TEST
(
Operator
,
CPUtoGPU
)
{
paddle
::
framework
::
InitDevices
(
true
);
paddle
::
framework
::
InitDevices
();
paddle
::
framework
::
Scope
scope
;
paddle
::
framework
::
Scope
scope
;
paddle
::
platform
::
CPUPlace
cpu_place
;
paddle
::
platform
::
CPUPlace
cpu_place
;
...
...
paddle/fluid/framework/lod_tensor_test.cu
浏览文件 @
1dad8cea
...
@@ -26,7 +26,7 @@ __global__ void test(size_t* a, int size) {
...
@@ -26,7 +26,7 @@ __global__ void test(size_t* a, int size) {
}
}
TEST
(
LoD
,
data
)
{
TEST
(
LoD
,
data
)
{
paddle
::
framework
::
InitDevices
(
true
);
paddle
::
framework
::
InitDevices
();
paddle
::
framework
::
LoD
lod
{{
0
,
1
,
2
}};
paddle
::
framework
::
LoD
lod
{{
0
,
1
,
2
}};
lod
.
push_back
({
0
,
2
,
4
,
5
});
lod
.
push_back
({
0
,
2
,
4
,
5
});
...
@@ -42,7 +42,7 @@ TEST(LoD, data) {
...
@@ -42,7 +42,7 @@ TEST(LoD, data) {
}
}
TEST
(
LoDTensor
,
LoDInGPU
)
{
TEST
(
LoDTensor
,
LoDInGPU
)
{
paddle
::
framework
::
InitDevices
(
true
);
paddle
::
framework
::
InitDevices
();
paddle
::
framework
::
LoDTensor
lod_tensor
;
paddle
::
framework
::
LoDTensor
lod_tensor
;
paddle
::
platform
::
CUDAPlace
place
(
0
);
paddle
::
platform
::
CUDAPlace
place
(
0
);
...
...
paddle/fluid/framework/operator_test.cc
浏览文件 @
1dad8cea
...
@@ -76,7 +76,7 @@ REGISTER_OP_WITHOUT_GRADIENT(test_operator,
...
@@ -76,7 +76,7 @@ REGISTER_OP_WITHOUT_GRADIENT(test_operator,
paddle
::
framework
::
OpWithoutKernelCheckerMaker
);
paddle
::
framework
::
OpWithoutKernelCheckerMaker
);
TEST
(
OperatorBase
,
all
)
{
TEST
(
OperatorBase
,
all
)
{
paddle
::
framework
::
InitDevices
(
true
);
paddle
::
framework
::
InitDevices
();
paddle
::
framework
::
proto
::
OpDesc
op_desc
;
paddle
::
framework
::
proto
::
OpDesc
op_desc
;
op_desc
.
set_type
(
"test_operator"
);
op_desc
.
set_type
(
"test_operator"
);
BuildVar
(
"input"
,
{
"IN1"
},
op_desc
.
add_inputs
());
BuildVar
(
"input"
,
{
"IN1"
},
op_desc
.
add_inputs
());
...
@@ -228,7 +228,7 @@ REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(
...
@@ -228,7 +228,7 @@ REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(
// test with single input
// test with single input
TEST
(
OpKernel
,
all
)
{
TEST
(
OpKernel
,
all
)
{
paddle
::
framework
::
InitDevices
(
true
);
paddle
::
framework
::
InitDevices
();
paddle
::
framework
::
proto
::
OpDesc
op_desc
;
paddle
::
framework
::
proto
::
OpDesc
op_desc
;
op_desc
.
set_type
(
"op_with_kernel"
);
op_desc
.
set_type
(
"op_with_kernel"
);
BuildVar
(
"x"
,
{
"IN1"
},
op_desc
.
add_inputs
());
BuildVar
(
"x"
,
{
"IN1"
},
op_desc
.
add_inputs
());
...
@@ -268,7 +268,7 @@ REGISTER_OP_CPU_KERNEL(op_multi_inputs_with_kernel,
...
@@ -268,7 +268,7 @@ REGISTER_OP_CPU_KERNEL(op_multi_inputs_with_kernel,
// test with multi inputs
// test with multi inputs
TEST
(
OpKernel
,
multi_inputs
)
{
TEST
(
OpKernel
,
multi_inputs
)
{
paddle
::
framework
::
InitDevices
(
true
);
paddle
::
framework
::
InitDevices
();
paddle
::
framework
::
proto
::
OpDesc
op_desc
;
paddle
::
framework
::
proto
::
OpDesc
op_desc
;
op_desc
.
set_type
(
"op_multi_inputs_with_kernel"
);
op_desc
.
set_type
(
"op_multi_inputs_with_kernel"
);
...
@@ -419,7 +419,7 @@ REGISTER_OP_CPU_KERNEL(indicate_other_data_type_test,
...
@@ -419,7 +419,7 @@ REGISTER_OP_CPU_KERNEL(indicate_other_data_type_test,
paddle
::
platform
::
CPUDeviceContext
,
int
>
);
paddle
::
platform
::
CPUDeviceContext
,
int
>
);
TEST
(
IndicateVarDataTypeTest
,
lodtensor
)
{
TEST
(
IndicateVarDataTypeTest
,
lodtensor
)
{
paddle
::
framework
::
InitDevices
(
true
);
paddle
::
framework
::
InitDevices
();
paddle
::
framework
::
proto
::
OpDesc
op_desc
;
paddle
::
framework
::
proto
::
OpDesc
op_desc
;
op_desc
.
set_type
(
"indicate_lod_tensor_data_type_test"
);
op_desc
.
set_type
(
"indicate_lod_tensor_data_type_test"
);
BuildVar
(
"LoDTensor"
,
{
"lodtensor_1"
},
op_desc
.
add_inputs
());
BuildVar
(
"LoDTensor"
,
{
"lodtensor_1"
},
op_desc
.
add_inputs
());
...
@@ -447,7 +447,7 @@ TEST(IndicateVarDataTypeTest, lodtensor) {
...
@@ -447,7 +447,7 @@ TEST(IndicateVarDataTypeTest, lodtensor) {
}
}
TEST
(
IndicateVarDataTypeTest
,
selectedrows
)
{
TEST
(
IndicateVarDataTypeTest
,
selectedrows
)
{
paddle
::
framework
::
InitDevices
(
true
);
paddle
::
framework
::
InitDevices
();
paddle
::
framework
::
proto
::
OpDesc
op_desc
;
paddle
::
framework
::
proto
::
OpDesc
op_desc
;
op_desc
.
set_type
(
"indicate_selected_rows_data_type_test"
);
op_desc
.
set_type
(
"indicate_selected_rows_data_type_test"
);
BuildVar
(
"SelectedRows"
,
{
"selected_rows_1"
},
op_desc
.
add_inputs
());
BuildVar
(
"SelectedRows"
,
{
"selected_rows_1"
},
op_desc
.
add_inputs
());
...
@@ -474,7 +474,7 @@ TEST(IndicateVarDataTypeTest, selectedrows) {
...
@@ -474,7 +474,7 @@ TEST(IndicateVarDataTypeTest, selectedrows) {
}
}
TEST
(
IndicateVarDataTypeTest
,
other
)
{
TEST
(
IndicateVarDataTypeTest
,
other
)
{
paddle
::
framework
::
InitDevices
(
true
);
paddle
::
framework
::
InitDevices
();
paddle
::
framework
::
proto
::
OpDesc
op_desc
;
paddle
::
framework
::
proto
::
OpDesc
op_desc
;
op_desc
.
set_type
(
"indicate_other_data_type_test"
);
op_desc
.
set_type
(
"indicate_other_data_type_test"
);
BuildVar
(
"Other"
,
{
"lod_rank_table_1"
},
op_desc
.
add_inputs
());
BuildVar
(
"Other"
,
{
"lod_rank_table_1"
},
op_desc
.
add_inputs
());
...
@@ -504,7 +504,7 @@ TEST(IndicateVarDataTypeTest, other) {
...
@@ -504,7 +504,7 @@ TEST(IndicateVarDataTypeTest, other) {
}
}
TEST
(
ExecutionContextAttrAndInOut
,
new_api
)
{
TEST
(
ExecutionContextAttrAndInOut
,
new_api
)
{
paddle
::
framework
::
InitDevices
(
true
);
paddle
::
framework
::
InitDevices
();
paddle
::
framework
::
proto
::
OpDesc
op_desc
;
paddle
::
framework
::
proto
::
OpDesc
op_desc
;
op_desc
.
set_type
(
"test_operator"
);
op_desc
.
set_type
(
"test_operator"
);
BuildVar
(
"input"
,
{
"IN1"
},
op_desc
.
add_inputs
());
BuildVar
(
"input"
,
{
"IN1"
},
op_desc
.
add_inputs
());
...
@@ -596,7 +596,7 @@ REGISTER_OP_CPU_KERNEL(set_lod_level_test,
...
@@ -596,7 +596,7 @@ REGISTER_OP_CPU_KERNEL(set_lod_level_test,
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
void
SetGetLoDLevelTestMain
(
std
::
string
op_type
)
{
void
SetGetLoDLevelTestMain
(
std
::
string
op_type
)
{
paddle
::
framework
::
InitDevices
(
false
,
{});
paddle
::
framework
::
InitDevices
({});
paddle
::
framework
::
proto
::
OpDesc
op_desc
;
paddle
::
framework
::
proto
::
OpDesc
op_desc
;
op_desc
.
set_type
(
op_type
);
op_desc
.
set_type
(
op_type
);
BuildVar
(
"X"
,
{
"x.0"
},
op_desc
.
add_inputs
());
BuildVar
(
"X"
,
{
"x.0"
},
op_desc
.
add_inputs
());
...
@@ -701,7 +701,7 @@ REGISTER_OP_CPU_KERNEL(op_without_unused_var,
...
@@ -701,7 +701,7 @@ REGISTER_OP_CPU_KERNEL(op_without_unused_var,
TEST
(
OpWithUnusedVar
,
all
)
{
TEST
(
OpWithUnusedVar
,
all
)
{
// enable the unused_var_check
// enable the unused_var_check
FLAGS_enable_unused_var_check
=
true
;
FLAGS_enable_unused_var_check
=
true
;
paddle
::
framework
::
InitDevices
(
true
);
paddle
::
framework
::
InitDevices
();
paddle
::
framework
::
proto
::
OpDesc
op_desc
;
paddle
::
framework
::
proto
::
OpDesc
op_desc
;
op_desc
.
set_type
(
"op_with_unused_var"
);
op_desc
.
set_type
(
"op_with_unused_var"
);
BuildVar
(
"X"
,
{
"X"
},
op_desc
.
add_inputs
());
BuildVar
(
"X"
,
{
"X"
},
op_desc
.
add_inputs
());
...
@@ -726,7 +726,7 @@ TEST(OpWithoutUnusedVar, all) {
...
@@ -726,7 +726,7 @@ TEST(OpWithoutUnusedVar, all) {
// enable the unused_var_check
// enable the unused_var_check
FLAGS_enable_unused_var_check
=
true
;
FLAGS_enable_unused_var_check
=
true
;
paddle
::
framework
::
InitDevices
(
true
);
paddle
::
framework
::
InitDevices
();
paddle
::
framework
::
proto
::
OpDesc
op_desc
;
paddle
::
framework
::
proto
::
OpDesc
op_desc
;
op_desc
.
set_type
(
"op_without_unused_var"
);
op_desc
.
set_type
(
"op_without_unused_var"
);
BuildVar
(
"X"
,
{
"X"
},
op_desc
.
add_inputs
());
BuildVar
(
"X"
,
{
"X"
},
op_desc
.
add_inputs
());
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
1dad8cea
...
@@ -36,6 +36,10 @@ limitations under the License. */
...
@@ -36,6 +36,10 @@ limitations under the License. */
#include "paddle/fluid/platform/event.h"
#include "paddle/fluid/platform/event.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/cuda_device_guard.h"
#endif
DECLARE_double
(
eager_delete_tensor_gb
);
DECLARE_double
(
eager_delete_tensor_gb
);
#ifdef WITH_GPERFTOOLS
#ifdef WITH_GPERFTOOLS
...
@@ -55,6 +59,10 @@ static std::once_flag gProfileOnce;
...
@@ -55,6 +59,10 @@ static std::once_flag gProfileOnce;
static
bool
gProfileStarted
=
false
;
static
bool
gProfileStarted
=
false
;
#endif
#endif
#ifdef PADDLE_WITH_CUDA
std
::
once_flag
p2p_init_flag
;
#endif
class
ParallelExecutorPrivate
{
class
ParallelExecutorPrivate
{
public:
public:
ParallelExecutorPrivate
(
const
std
::
vector
<
platform
::
Place
>
&
places
,
ParallelExecutorPrivate
(
const
std
::
vector
<
platform
::
Place
>
&
places
,
...
@@ -458,6 +466,41 @@ bool ParallelExecutor::NeedCreateLocalExeScope() {
...
@@ -458,6 +466,41 @@ bool ParallelExecutor::NeedCreateLocalExeScope() {
return
executor
&&
executor
->
NeedCreateLocalExeScope
();
return
executor
&&
executor
->
NeedCreateLocalExeScope
();
}
}
void
InitP2P
(
const
std
::
vector
<
platform
::
Place
>
&
places
)
{
#ifdef PADDLE_WITH_CUDA
std
::
call_once
(
p2p_init_flag
,
[
&
]()
{
int
count
=
places
.
size
();
if
(
count
<=
1
)
return
;
std
::
vector
<
int
>
devices
;
for
(
int
i
=
0
;
i
<
count
;
i
++
)
{
if
(
!
is_gpu_place
(
places
[
i
]))
return
;
platform
::
CUDAPlace
device
=
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
places
[
i
]);
devices
.
push_back
(
device
.
GetDeviceId
());
}
for
(
int
i
=
0
;
i
<
count
;
++
i
)
{
for
(
int
j
=
0
;
j
<
count
;
++
j
)
{
if
(
devices
[
i
]
==
devices
[
j
])
continue
;
int
can_acess
=
-
1
;
cudaError_t
ret
=
cudaDeviceCanAccessPeer
(
&
can_acess
,
devices
[
i
],
devices
[
j
]);
if
(
ret
!=
cudaSuccess
||
can_acess
!=
1
)
{
LOG
(
WARNING
)
<<
"Cannot enable P2P access from "
<<
devices
[
i
]
<<
" to "
<<
devices
[
j
];
}
else
{
platform
::
CUDADeviceGuard
guard
(
devices
[
i
]);
cudaDeviceEnablePeerAccess
(
devices
[
j
],
0
);
}
}
}
VLOG
(
1
)
<<
"init p2p"
;
});
#endif
}
ParallelExecutor
::
ParallelExecutor
(
const
std
::
vector
<
platform
::
Place
>
&
places
,
ParallelExecutor
::
ParallelExecutor
(
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
vector
<
std
::
string
>
&
bcast_vars
,
const
std
::
vector
<
std
::
string
>
&
bcast_vars
,
const
std
::
string
&
loss_var_name
,
const
std
::
string
&
loss_var_name
,
...
@@ -470,6 +513,7 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
...
@@ -470,6 +513,7 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
PADDLE_ENFORCE
(
places
.
size
()
>
0
&&
!
is_xpu_place
(
places
[
0
]),
PADDLE_ENFORCE
(
places
.
size
()
>
0
&&
!
is_xpu_place
(
places
[
0
]),
platform
::
errors
::
Unavailable
(
platform
::
errors
::
Unavailable
(
"XPU is not supported in ParallelExecutor"
));
"XPU is not supported in ParallelExecutor"
));
InitP2P
(
places
);
ir
::
InitReaderQueueDeviceCount
(
graph
,
*
(
member_
->
global_scope_
),
ir
::
InitReaderQueueDeviceCount
(
graph
,
*
(
member_
->
global_scope_
),
member_
->
places_
.
size
());
member_
->
places_
.
size
());
member_
->
use_cuda_
=
exec_strategy
.
use_cuda_
;
member_
->
use_cuda_
=
exec_strategy
.
use_cuda_
;
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
1dad8cea
...
@@ -174,7 +174,7 @@ bool AnalysisPredictor::PrepareScope(
...
@@ -174,7 +174,7 @@ bool AnalysisPredictor::PrepareScope(
scope_
=
parent_scope
;
scope_
=
parent_scope
;
status_is_cloned_
=
true
;
status_is_cloned_
=
true
;
}
else
{
}
else
{
paddle
::
framework
::
InitDevices
(
false
);
paddle
::
framework
::
InitDevices
();
scope_
.
reset
(
new
paddle
::
framework
::
Scope
(),
[](
framework
::
Scope
*
scope
)
{
scope_
.
reset
(
new
paddle
::
framework
::
Scope
(),
[](
framework
::
Scope
*
scope
)
{
delete
scope
;
delete
scope
;
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
...
...
paddle/fluid/inference/api/api_impl.cc
浏览文件 @
1dad8cea
...
@@ -91,7 +91,7 @@ bool NativePaddlePredictor::Init(
...
@@ -91,7 +91,7 @@ bool NativePaddlePredictor::Init(
platform
::
errors
::
PreconditionNotMet
(
platform
::
errors
::
PreconditionNotMet
(
"The sub_scope should not be nullptr."
));
"The sub_scope should not be nullptr."
));
}
else
{
}
else
{
paddle
::
framework
::
InitDevices
(
false
);
paddle
::
framework
::
InitDevices
();
scope_
.
reset
(
new
paddle
::
framework
::
Scope
());
scope_
.
reset
(
new
paddle
::
framework
::
Scope
());
}
}
...
...
paddle/fluid/inference/io.cc
浏览文件 @
1dad8cea
...
@@ -25,7 +25,6 @@ limitations under the License. */
...
@@ -25,7 +25,6 @@ limitations under the License. */
#include "paddle/fluid/pybind/pybind.h"
#include "paddle/fluid/pybind/pybind.h"
DEFINE_string
(
devices
,
""
,
"The devices to be used which is joined by comma."
);
DEFINE_string
(
devices
,
""
,
"The devices to be used which is joined by comma."
);
DEFINE_bool
(
init_p2p
,
false
,
"Whether to init p2p."
);
DEFINE_int32
(
math_num_threads
,
1
,
DEFINE_int32
(
math_num_threads
,
1
,
"Number of threads used to run math functions."
);
"Number of threads used to run math functions."
);
...
@@ -42,7 +41,7 @@ void Init(const std::vector<std::string> argv) {
...
@@ -42,7 +41,7 @@ void Init(const std::vector<std::string> argv) {
while
(
std
::
getline
(
tokenStream
,
token
,
','
))
{
while
(
std
::
getline
(
tokenStream
,
token
,
','
))
{
devices
.
push_back
(
std
::
stoi
(
token
));
devices
.
push_back
(
std
::
stoi
(
token
));
}
}
framework
::
InitDevices
(
FLAGS_init_p2p
,
devices
);
framework
::
InitDevices
(
devices
);
}
}
void
ReadBinaryFile
(
const
std
::
string
&
filename
,
std
::
string
*
contents
)
{
void
ReadBinaryFile
(
const
std
::
string
&
filename
,
std
::
string
*
contents
)
{
...
...
paddle/fluid/memory/allocation/allocator_facade.cc
浏览文件 @
1dad8cea
...
@@ -37,6 +37,7 @@
...
@@ -37,6 +37,7 @@
#include "paddle/fluid/memory/allocation/pinned_allocator.h"
#include "paddle/fluid/memory/allocation/pinned_allocator.h"
#include "paddle/fluid/memory/allocation/thread_local_allocator.h"
#include "paddle/fluid/memory/allocation/thread_local_allocator.h"
#include "paddle/fluid/platform/cuda_device_guard.h"
#include "paddle/fluid/platform/cuda_device_guard.h"
#include "paddle/fluid/platform/dynload/cupti.h"
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/platform/gpu_info.h"
#endif
#endif
#ifdef PADDLE_WITH_XPU
#ifdef PADDLE_WITH_XPU
...
...
paddle/fluid/operators/benchmark/op_tester.cc
浏览文件 @
1dad8cea
...
@@ -57,7 +57,7 @@ void OpTester::Init(const OpTesterConfig &config) {
...
@@ -57,7 +57,7 @@ void OpTester::Init(const OpTesterConfig &config) {
place_
=
paddle
::
platform
::
CPUPlace
();
place_
=
paddle
::
platform
::
CPUPlace
();
}
}
framework
::
InitDevices
(
false
);
framework
::
InitDevices
();
scope_
.
reset
(
new
paddle
::
framework
::
Scope
());
scope_
.
reset
(
new
paddle
::
framework
::
Scope
());
op_
=
framework
::
OpRegistry
::
CreateOp
(
op_desc_
);
op_
=
framework
::
OpRegistry
::
CreateOp
(
op_desc_
);
...
...
paddle/fluid/operators/fused/fusion_group_op_test.cc
浏览文件 @
1dad8cea
...
@@ -140,7 +140,7 @@ void TestMain(const std::vector<std::string>& input_names,
...
@@ -140,7 +140,7 @@ void TestMain(const std::vector<std::string>& input_names,
std
::
string
func_name
,
std
::
string
cuda_kernel_str
,
std
::
string
func_name
,
std
::
string
cuda_kernel_str
,
CPUKernelFunc
cpu_kernel_func
)
{
CPUKernelFunc
cpu_kernel_func
)
{
// Compile the device code
// Compile the device code
paddle
::
framework
::
InitDevices
(
false
,
{
0
});
paddle
::
framework
::
InitDevices
({
0
});
platform
::
CUDAPlace
place
=
platform
::
CUDAPlace
(
0
);
platform
::
CUDAPlace
place
=
platform
::
CUDAPlace
(
0
);
PrepareDeviceCode
(
place
,
func_name
,
cuda_kernel_str
);
PrepareDeviceCode
(
place
,
func_name
,
cuda_kernel_str
);
...
...
paddle/fluid/platform/device_code_test.cc
浏览文件 @
1dad8cea
...
@@ -35,7 +35,7 @@ TEST(DeviceCode, cuda) {
...
@@ -35,7 +35,7 @@ TEST(DeviceCode, cuda) {
return
;
return
;
}
}
paddle
::
framework
::
InitDevices
(
false
,
{
0
});
paddle
::
framework
::
InitDevices
({
0
});
paddle
::
platform
::
CUDAPlace
place
=
paddle
::
platform
::
CUDAPlace
(
0
);
paddle
::
platform
::
CUDAPlace
place
=
paddle
::
platform
::
CUDAPlace
(
0
);
paddle
::
platform
::
CUDADeviceCode
code
(
place
,
"saxpy_kernel"
,
saxpy_code
);
paddle
::
platform
::
CUDADeviceCode
code
(
place
,
"saxpy_kernel"
,
saxpy_code
);
...
@@ -90,7 +90,7 @@ TEST(DeviceCodePool, cuda) {
...
@@ -90,7 +90,7 @@ TEST(DeviceCodePool, cuda) {
return
;
return
;
}
}
paddle
::
framework
::
InitDevices
(
false
,
{
0
});
paddle
::
framework
::
InitDevices
({
0
});
paddle
::
platform
::
CUDAPlace
place
=
paddle
::
platform
::
CUDAPlace
(
0
);
paddle
::
platform
::
CUDAPlace
place
=
paddle
::
platform
::
CUDAPlace
(
0
);
paddle
::
platform
::
DeviceCodePool
&
pool
=
paddle
::
platform
::
DeviceCodePool
&
pool
=
paddle
::
platform
::
DeviceCodePool
::
Init
({
place
});
paddle
::
platform
::
DeviceCodePool
::
Init
({
place
});
...
...
paddle/fluid/platform/init.cc
浏览文件 @
1dad8cea
...
@@ -63,7 +63,6 @@ namespace framework {
...
@@ -63,7 +63,6 @@ namespace framework {
std
::
once_flag
gflags_init_flag
;
std
::
once_flag
gflags_init_flag
;
std
::
once_flag
glog_init_flag
;
std
::
once_flag
glog_init_flag
;
std
::
once_flag
p2p_init_flag
;
bool
InitGflags
(
std
::
vector
<
std
::
string
>
args
)
{
bool
InitGflags
(
std
::
vector
<
std
::
string
>
args
)
{
bool
successed
=
false
;
bool
successed
=
false
;
...
@@ -95,28 +94,7 @@ bool InitGflags(std::vector<std::string> args) {
...
@@ -95,28 +94,7 @@ bool InitGflags(std::vector<std::string> args) {
return
successed
;
return
successed
;
}
}
void
InitP2P
(
std
::
vector
<
int
>
devices
)
{
#ifdef PADDLE_WITH_CUDA
std
::
call_once
(
p2p_init_flag
,
[
&
]()
{
int
count
=
devices
.
size
();
for
(
int
i
=
0
;
i
<
count
;
++
i
)
{
for
(
int
j
=
0
;
j
<
count
;
++
j
)
{
if
(
devices
[
i
]
==
devices
[
j
])
continue
;
int
can_acess
=
-
1
;
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaDeviceCanAccessPeer
(
&
can_acess
,
devices
[
i
],
devices
[
j
]));
if
(
can_acess
!=
1
)
{
VLOG
(
2
)
<<
"Cannot enable P2P access from "
<<
devices
[
i
]
<<
" to "
<<
devices
[
j
];
}
else
{
platform
::
CUDADeviceGuard
guard
(
devices
[
i
]);
cudaDeviceEnablePeerAccess
(
devices
[
j
],
0
);
}
}
}
});
#endif
}
void
InitCupti
()
{
void
InitCupti
()
{
#ifdef PADDLE_WITH_CUPTI
#ifdef PADDLE_WITH_CUPTI
...
@@ -144,7 +122,7 @@ void InitCupti() {
...
@@ -144,7 +122,7 @@ void InitCupti() {
#endif
#endif
}
}
void
InitDevices
(
bool
init_p2p
)
{
void
InitDevices
()
{
// CUPTI attribute should be set before any CUDA context is created (see CUPTI
// CUPTI attribute should be set before any CUDA context is created (see CUPTI
// documentation about CUpti_ActivityAttribute).
// documentation about CUpti_ActivityAttribute).
InitCupti
();
InitCupti
();
...
@@ -166,10 +144,10 @@ void InitDevices(bool init_p2p) {
...
@@ -166,10 +144,10 @@ void InitDevices(bool init_p2p) {
LOG
(
WARNING
)
<<
"Compiled with WITH_XPU, but no XPU found in runtime."
;
LOG
(
WARNING
)
<<
"Compiled with WITH_XPU, but no XPU found in runtime."
;
}
}
#endif
#endif
InitDevices
(
init_p2p
,
devices
);
InitDevices
(
devices
);
}
}
void
InitDevices
(
bool
init_p2p
,
const
std
::
vector
<
int
>
devices
)
{
void
InitDevices
(
const
std
::
vector
<
int
>
devices
)
{
std
::
vector
<
platform
::
Place
>
places
;
std
::
vector
<
platform
::
Place
>
places
;
for
(
size_t
i
=
0
;
i
<
devices
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
devices
.
size
();
++
i
)
{
...
@@ -187,9 +165,6 @@ void InitDevices(bool init_p2p, const std::vector<int> devices) {
...
@@ -187,9 +165,6 @@ void InitDevices(bool init_p2p, const std::vector<int> devices) {
places
.
emplace_back
(
platform
::
XPUPlace
(
devices
[
i
]));
places
.
emplace_back
(
platform
::
XPUPlace
(
devices
[
i
]));
#endif
#endif
}
}
if
(
init_p2p
)
{
InitP2P
(
devices
);
}
places
.
emplace_back
(
platform
::
CPUPlace
());
places
.
emplace_back
(
platform
::
CPUPlace
());
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
places
.
emplace_back
(
platform
::
CUDAPinnedPlace
());
places
.
emplace_back
(
platform
::
CUDAPinnedPlace
());
...
...
paddle/fluid/platform/init.h
浏览文件 @
1dad8cea
...
@@ -35,9 +35,9 @@ bool InitGflags(std::vector<std::string> argv);
...
@@ -35,9 +35,9 @@ bool InitGflags(std::vector<std::string> argv);
void
InitGLOG
(
const
std
::
string
&
prog_name
);
void
InitGLOG
(
const
std
::
string
&
prog_name
);
void
InitDevices
(
bool
init_p2p
);
void
InitDevices
();
void
InitDevices
(
bool
init_p2p
,
const
std
::
vector
<
int
>
devices
);
void
InitDevices
(
const
std
::
vector
<
int
>
devices
);
#ifndef _WIN32
#ifndef _WIN32
class
SignalMessageDumper
{
class
SignalMessageDumper
{
...
...
paddle/fluid/platform/init_test.cc
浏览文件 @
1dad8cea
...
@@ -22,7 +22,7 @@ TEST(InitDevices, CPU) {
...
@@ -22,7 +22,7 @@ TEST(InitDevices, CPU) {
using
paddle
::
platform
::
DeviceContextPool
;
using
paddle
::
platform
::
DeviceContextPool
;
#if !defined(PADDLE_WITH_CUDA) && !defined(PADDLE_WITH_XPU)
#if !defined(PADDLE_WITH_CUDA) && !defined(PADDLE_WITH_XPU)
InitDevices
(
true
);
InitDevices
();
DeviceContextPool
&
pool
=
DeviceContextPool
::
Instance
();
DeviceContextPool
&
pool
=
DeviceContextPool
::
Instance
();
ASSERT_EQ
(
pool
.
size
(),
1U
);
ASSERT_EQ
(
pool
.
size
(),
1U
);
#endif
#endif
...
@@ -34,7 +34,7 @@ TEST(InitDevices, CUDA) {
...
@@ -34,7 +34,7 @@ TEST(InitDevices, CUDA) {
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
int
count
=
paddle
::
platform
::
GetCUDADeviceCount
();
int
count
=
paddle
::
platform
::
GetCUDADeviceCount
();
InitDevices
(
true
);
InitDevices
();
DeviceContextPool
&
pool
=
DeviceContextPool
::
Instance
();
DeviceContextPool
&
pool
=
DeviceContextPool
::
Instance
();
ASSERT_EQ
(
pool
.
size
(),
2U
+
static_cast
<
unsigned
>
(
count
));
ASSERT_EQ
(
pool
.
size
(),
2U
+
static_cast
<
unsigned
>
(
count
));
#endif
#endif
...
@@ -46,7 +46,7 @@ TEST(InitDevices, XPU) {
...
@@ -46,7 +46,7 @@ TEST(InitDevices, XPU) {
#ifdef PADDLE_WITH_XPU
#ifdef PADDLE_WITH_XPU
int
count
=
paddle
::
platform
::
GetXPUDeviceCount
();
int
count
=
paddle
::
platform
::
GetXPUDeviceCount
();
InitDevices
(
true
);
InitDevices
();
DeviceContextPool
&
pool
=
DeviceContextPool
::
Instance
();
DeviceContextPool
&
pool
=
DeviceContextPool
::
Instance
();
ASSERT_EQ
(
pool
.
size
(),
1U
+
static_cast
<
unsigned
>
(
count
));
ASSERT_EQ
(
pool
.
size
(),
1U
+
static_cast
<
unsigned
>
(
count
));
#endif
#endif
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
1dad8cea
...
@@ -1715,7 +1715,7 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -1715,7 +1715,7 @@ All parameter, weight, gradient are variables in Paddle.
m
.
def
(
"init_glog"
,
framework
::
InitGLOG
);
m
.
def
(
"init_glog"
,
framework
::
InitGLOG
);
m
.
def
(
"load_op_library"
,
framework
::
LoadOpLib
);
m
.
def
(
"load_op_library"
,
framework
::
LoadOpLib
);
m
.
def
(
"init_devices"
,
m
.
def
(
"init_devices"
,
[](
bool
init_p2p
)
{
framework
::
InitDevices
(
init_p2p
);
});
[](
)
{
framework
::
InitDevices
(
);
});
m
.
def
(
"is_compiled_with_cuda"
,
IsCompiledWithCUDA
);
m
.
def
(
"is_compiled_with_cuda"
,
IsCompiledWithCUDA
);
m
.
def
(
"is_compiled_with_xpu"
,
IsCompiledWithXPU
);
m
.
def
(
"is_compiled_with_xpu"
,
IsCompiledWithXPU
);
...
...
paddle/fluid/train/demo/demo_trainer.cc
浏览文件 @
1dad8cea
...
@@ -55,7 +55,7 @@ std::unique_ptr<paddle::framework::ProgramDesc> Load(
...
@@ -55,7 +55,7 @@ std::unique_ptr<paddle::framework::ProgramDesc> Load(
}
// namespace paddle
}
// namespace paddle
int
main
()
{
int
main
()
{
paddle
::
framework
::
InitDevices
(
false
);
paddle
::
framework
::
InitDevices
();
const
auto
cpu_place
=
paddle
::
platform
::
CPUPlace
();
const
auto
cpu_place
=
paddle
::
platform
::
CPUPlace
();
...
...
paddle/fluid/train/imdb_demo/demo_trainer.cc
浏览文件 @
1dad8cea
...
@@ -105,7 +105,7 @@ int main(int argc, char* argv[]) {
...
@@ -105,7 +105,7 @@ int main(int argc, char* argv[]) {
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"At least one file to train, but received number of file is %d."
,
"At least one file to train, but received number of file is %d."
,
file_vec
.
size
()));
file_vec
.
size
()));
paddle
::
framework
::
InitDevices
(
false
);
paddle
::
framework
::
InitDevices
();
const
auto
cpu_place
=
paddle
::
platform
::
CPUPlace
();
const
auto
cpu_place
=
paddle
::
platform
::
CPUPlace
();
paddle
::
framework
::
Executor
executor
(
cpu_place
);
paddle
::
framework
::
Executor
executor
(
cpu_place
);
paddle
::
framework
::
Scope
scope
;
paddle
::
framework
::
Scope
scope
;
...
...
paddle/fluid/train/test_train_recognize_digits.cc
浏览文件 @
1dad8cea
...
@@ -33,7 +33,7 @@ DEFINE_string(dirname, "", "Directory of the train model.");
...
@@ -33,7 +33,7 @@ DEFINE_string(dirname, "", "Directory of the train model.");
namespace
paddle
{
namespace
paddle
{
void
Train
(
std
::
string
model_dir
)
{
void
Train
(
std
::
string
model_dir
)
{
framework
::
InitDevices
(
false
);
framework
::
InitDevices
();
const
auto
cpu_place
=
platform
::
CPUPlace
();
const
auto
cpu_place
=
platform
::
CPUPlace
();
framework
::
Executor
executor
(
cpu_place
);
framework
::
Executor
executor
(
cpu_place
);
framework
::
Scope
scope
;
framework
::
Scope
scope
;
...
...
paddle/testing/paddle_gtest_main.cc
浏览文件 @
1dad8cea
...
@@ -121,7 +121,7 @@ int main(int argc, char** argv) {
...
@@ -121,7 +121,7 @@ int main(int argc, char** argv) {
int
internal_argc
=
internal_argv
.
size
();
int
internal_argc
=
internal_argv
.
size
();
char
**
arr
=
internal_argv
.
data
();
char
**
arr
=
internal_argv
.
data
();
paddle
::
platform
::
ParseCommandLineFlags
(
internal_argc
,
arr
,
true
);
paddle
::
platform
::
ParseCommandLineFlags
(
internal_argc
,
arr
,
true
);
paddle
::
framework
::
InitDevices
(
true
);
paddle
::
framework
::
InitDevices
();
int
ret
=
RUN_ALL_TESTS
();
int
ret
=
RUN_ALL_TESTS
();
...
...
python/paddle/fluid/__init__.py
浏览文件 @
1dad8cea
...
@@ -254,7 +254,7 @@ def __bootstrap__():
...
@@ -254,7 +254,7 @@ def __bootstrap__():
core
.
init_gflags
([
"--tryfromenv="
+
","
.
join
(
read_env_flags
)])
core
.
init_gflags
([
"--tryfromenv="
+
","
.
join
(
read_env_flags
)])
core
.
init_glog
(
sys
.
argv
[
0
])
core
.
init_glog
(
sys
.
argv
[
0
])
# don't init_p2p when in unittest to save time.
# don't init_p2p when in unittest to save time.
core
.
init_devices
(
not
in_test
)
core
.
init_devices
()
# TODO(panyx0718): Avoid doing complex initialization logic in __init__.py.
# TODO(panyx0718): Avoid doing complex initialization logic in __init__.py.
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录