未验证 提交 1dad8cea 编写于 作者: G gongweibao 提交者: GitHub

Fix gpu memory allocation bug. (#28703)

上级 91bab752
...@@ -103,7 +103,7 @@ static void BuildVar(const std::string& param_name, ...@@ -103,7 +103,7 @@ static void BuildVar(const std::string& param_name,
} }
TEST(Operator, CPUtoGPU) { TEST(Operator, CPUtoGPU) {
paddle::framework::InitDevices(true); paddle::framework::InitDevices();
paddle::framework::Scope scope; paddle::framework::Scope scope;
paddle::platform::CPUPlace cpu_place; paddle::platform::CPUPlace cpu_place;
......
...@@ -26,7 +26,7 @@ __global__ void test(size_t* a, int size) { ...@@ -26,7 +26,7 @@ __global__ void test(size_t* a, int size) {
} }
TEST(LoD, data) { TEST(LoD, data) {
paddle::framework::InitDevices(true); paddle::framework::InitDevices();
paddle::framework::LoD lod{{0, 1, 2}}; paddle::framework::LoD lod{{0, 1, 2}};
lod.push_back({0, 2, 4, 5}); lod.push_back({0, 2, 4, 5});
...@@ -42,7 +42,7 @@ TEST(LoD, data) { ...@@ -42,7 +42,7 @@ TEST(LoD, data) {
} }
TEST(LoDTensor, LoDInGPU) { TEST(LoDTensor, LoDInGPU) {
paddle::framework::InitDevices(true); paddle::framework::InitDevices();
paddle::framework::LoDTensor lod_tensor; paddle::framework::LoDTensor lod_tensor;
paddle::platform::CUDAPlace place(0); paddle::platform::CUDAPlace place(0);
......
...@@ -76,7 +76,7 @@ REGISTER_OP_WITHOUT_GRADIENT(test_operator, ...@@ -76,7 +76,7 @@ REGISTER_OP_WITHOUT_GRADIENT(test_operator,
paddle::framework::OpWithoutKernelCheckerMaker); paddle::framework::OpWithoutKernelCheckerMaker);
TEST(OperatorBase, all) { TEST(OperatorBase, all) {
paddle::framework::InitDevices(true); paddle::framework::InitDevices();
paddle::framework::proto::OpDesc op_desc; paddle::framework::proto::OpDesc op_desc;
op_desc.set_type("test_operator"); op_desc.set_type("test_operator");
BuildVar("input", {"IN1"}, op_desc.add_inputs()); BuildVar("input", {"IN1"}, op_desc.add_inputs());
...@@ -228,7 +228,7 @@ REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE( ...@@ -228,7 +228,7 @@ REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(
// test with single input // test with single input
TEST(OpKernel, all) { TEST(OpKernel, all) {
paddle::framework::InitDevices(true); paddle::framework::InitDevices();
paddle::framework::proto::OpDesc op_desc; paddle::framework::proto::OpDesc op_desc;
op_desc.set_type("op_with_kernel"); op_desc.set_type("op_with_kernel");
BuildVar("x", {"IN1"}, op_desc.add_inputs()); BuildVar("x", {"IN1"}, op_desc.add_inputs());
...@@ -268,7 +268,7 @@ REGISTER_OP_CPU_KERNEL(op_multi_inputs_with_kernel, ...@@ -268,7 +268,7 @@ REGISTER_OP_CPU_KERNEL(op_multi_inputs_with_kernel,
// test with multi inputs // test with multi inputs
TEST(OpKernel, multi_inputs) { TEST(OpKernel, multi_inputs) {
paddle::framework::InitDevices(true); paddle::framework::InitDevices();
paddle::framework::proto::OpDesc op_desc; paddle::framework::proto::OpDesc op_desc;
op_desc.set_type("op_multi_inputs_with_kernel"); op_desc.set_type("op_multi_inputs_with_kernel");
...@@ -419,7 +419,7 @@ REGISTER_OP_CPU_KERNEL(indicate_other_data_type_test, ...@@ -419,7 +419,7 @@ REGISTER_OP_CPU_KERNEL(indicate_other_data_type_test,
paddle::platform::CPUDeviceContext, int>); paddle::platform::CPUDeviceContext, int>);
TEST(IndicateVarDataTypeTest, lodtensor) { TEST(IndicateVarDataTypeTest, lodtensor) {
paddle::framework::InitDevices(true); paddle::framework::InitDevices();
paddle::framework::proto::OpDesc op_desc; paddle::framework::proto::OpDesc op_desc;
op_desc.set_type("indicate_lod_tensor_data_type_test"); op_desc.set_type("indicate_lod_tensor_data_type_test");
BuildVar("LoDTensor", {"lodtensor_1"}, op_desc.add_inputs()); BuildVar("LoDTensor", {"lodtensor_1"}, op_desc.add_inputs());
...@@ -447,7 +447,7 @@ TEST(IndicateVarDataTypeTest, lodtensor) { ...@@ -447,7 +447,7 @@ TEST(IndicateVarDataTypeTest, lodtensor) {
} }
TEST(IndicateVarDataTypeTest, selectedrows) { TEST(IndicateVarDataTypeTest, selectedrows) {
paddle::framework::InitDevices(true); paddle::framework::InitDevices();
paddle::framework::proto::OpDesc op_desc; paddle::framework::proto::OpDesc op_desc;
op_desc.set_type("indicate_selected_rows_data_type_test"); op_desc.set_type("indicate_selected_rows_data_type_test");
BuildVar("SelectedRows", {"selected_rows_1"}, op_desc.add_inputs()); BuildVar("SelectedRows", {"selected_rows_1"}, op_desc.add_inputs());
...@@ -474,7 +474,7 @@ TEST(IndicateVarDataTypeTest, selectedrows) { ...@@ -474,7 +474,7 @@ TEST(IndicateVarDataTypeTest, selectedrows) {
} }
TEST(IndicateVarDataTypeTest, other) { TEST(IndicateVarDataTypeTest, other) {
paddle::framework::InitDevices(true); paddle::framework::InitDevices();
paddle::framework::proto::OpDesc op_desc; paddle::framework::proto::OpDesc op_desc;
op_desc.set_type("indicate_other_data_type_test"); op_desc.set_type("indicate_other_data_type_test");
BuildVar("Other", {"lod_rank_table_1"}, op_desc.add_inputs()); BuildVar("Other", {"lod_rank_table_1"}, op_desc.add_inputs());
...@@ -504,7 +504,7 @@ TEST(IndicateVarDataTypeTest, other) { ...@@ -504,7 +504,7 @@ TEST(IndicateVarDataTypeTest, other) {
} }
TEST(ExecutionContextAttrAndInOut, new_api) { TEST(ExecutionContextAttrAndInOut, new_api) {
paddle::framework::InitDevices(true); paddle::framework::InitDevices();
paddle::framework::proto::OpDesc op_desc; paddle::framework::proto::OpDesc op_desc;
op_desc.set_type("test_operator"); op_desc.set_type("test_operator");
BuildVar("input", {"IN1"}, op_desc.add_inputs()); BuildVar("input", {"IN1"}, op_desc.add_inputs());
...@@ -596,7 +596,7 @@ REGISTER_OP_CPU_KERNEL(set_lod_level_test, ...@@ -596,7 +596,7 @@ REGISTER_OP_CPU_KERNEL(set_lod_level_test,
paddle::platform::CPUDeviceContext, float>); paddle::platform::CPUDeviceContext, float>);
void SetGetLoDLevelTestMain(std::string op_type) { void SetGetLoDLevelTestMain(std::string op_type) {
paddle::framework::InitDevices(false, {}); paddle::framework::InitDevices({});
paddle::framework::proto::OpDesc op_desc; paddle::framework::proto::OpDesc op_desc;
op_desc.set_type(op_type); op_desc.set_type(op_type);
BuildVar("X", {"x.0"}, op_desc.add_inputs()); BuildVar("X", {"x.0"}, op_desc.add_inputs());
...@@ -701,7 +701,7 @@ REGISTER_OP_CPU_KERNEL(op_without_unused_var, ...@@ -701,7 +701,7 @@ REGISTER_OP_CPU_KERNEL(op_without_unused_var,
TEST(OpWithUnusedVar, all) { TEST(OpWithUnusedVar, all) {
// enable the unused_var_check // enable the unused_var_check
FLAGS_enable_unused_var_check = true; FLAGS_enable_unused_var_check = true;
paddle::framework::InitDevices(true); paddle::framework::InitDevices();
paddle::framework::proto::OpDesc op_desc; paddle::framework::proto::OpDesc op_desc;
op_desc.set_type("op_with_unused_var"); op_desc.set_type("op_with_unused_var");
BuildVar("X", {"X"}, op_desc.add_inputs()); BuildVar("X", {"X"}, op_desc.add_inputs());
...@@ -726,7 +726,7 @@ TEST(OpWithoutUnusedVar, all) { ...@@ -726,7 +726,7 @@ TEST(OpWithoutUnusedVar, all) {
// enable the unused_var_check // enable the unused_var_check
FLAGS_enable_unused_var_check = true; FLAGS_enable_unused_var_check = true;
paddle::framework::InitDevices(true); paddle::framework::InitDevices();
paddle::framework::proto::OpDesc op_desc; paddle::framework::proto::OpDesc op_desc;
op_desc.set_type("op_without_unused_var"); op_desc.set_type("op_without_unused_var");
BuildVar("X", {"X"}, op_desc.add_inputs()); BuildVar("X", {"X"}, op_desc.add_inputs());
......
...@@ -36,6 +36,10 @@ limitations under the License. */ ...@@ -36,6 +36,10 @@ limitations under the License. */
#include "paddle/fluid/platform/event.h" #include "paddle/fluid/platform/event.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/cuda_device_guard.h"
#endif
DECLARE_double(eager_delete_tensor_gb); DECLARE_double(eager_delete_tensor_gb);
#ifdef WITH_GPERFTOOLS #ifdef WITH_GPERFTOOLS
...@@ -55,6 +59,10 @@ static std::once_flag gProfileOnce; ...@@ -55,6 +59,10 @@ static std::once_flag gProfileOnce;
static bool gProfileStarted = false; static bool gProfileStarted = false;
#endif #endif
#ifdef PADDLE_WITH_CUDA
std::once_flag p2p_init_flag;
#endif
class ParallelExecutorPrivate { class ParallelExecutorPrivate {
public: public:
ParallelExecutorPrivate(const std::vector<platform::Place> &places, ParallelExecutorPrivate(const std::vector<platform::Place> &places,
...@@ -458,6 +466,41 @@ bool ParallelExecutor::NeedCreateLocalExeScope() { ...@@ -458,6 +466,41 @@ bool ParallelExecutor::NeedCreateLocalExeScope() {
return executor && executor->NeedCreateLocalExeScope(); return executor && executor->NeedCreateLocalExeScope();
} }
void InitP2P(const std::vector<platform::Place> &places) {
#ifdef PADDLE_WITH_CUDA
std::call_once(p2p_init_flag, [&]() {
int count = places.size();
if (count <= 1) return;
std::vector<int> devices;
for (int i = 0; i < count; i++) {
if (!is_gpu_place(places[i])) return;
platform::CUDAPlace device =
BOOST_GET_CONST(platform::CUDAPlace, places[i]);
devices.push_back(device.GetDeviceId());
}
for (int i = 0; i < count; ++i) {
for (int j = 0; j < count; ++j) {
if (devices[i] == devices[j]) continue;
int can_acess = -1;
cudaError_t ret =
cudaDeviceCanAccessPeer(&can_acess, devices[i], devices[j]);
if (ret != cudaSuccess || can_acess != 1) {
LOG(WARNING) << "Cannot enable P2P access from " << devices[i]
<< " to " << devices[j];
} else {
platform::CUDADeviceGuard guard(devices[i]);
cudaDeviceEnablePeerAccess(devices[j], 0);
}
}
}
VLOG(1) << "init p2p";
});
#endif
}
ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places, ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
const std::vector<std::string> &bcast_vars, const std::vector<std::string> &bcast_vars,
const std::string &loss_var_name, const std::string &loss_var_name,
...@@ -470,6 +513,7 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places, ...@@ -470,6 +513,7 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
PADDLE_ENFORCE(places.size() > 0 && !is_xpu_place(places[0]), PADDLE_ENFORCE(places.size() > 0 && !is_xpu_place(places[0]),
platform::errors::Unavailable( platform::errors::Unavailable(
"XPU is not supported in ParallelExecutor")); "XPU is not supported in ParallelExecutor"));
InitP2P(places);
ir::InitReaderQueueDeviceCount(graph, *(member_->global_scope_), ir::InitReaderQueueDeviceCount(graph, *(member_->global_scope_),
member_->places_.size()); member_->places_.size());
member_->use_cuda_ = exec_strategy.use_cuda_; member_->use_cuda_ = exec_strategy.use_cuda_;
......
...@@ -174,7 +174,7 @@ bool AnalysisPredictor::PrepareScope( ...@@ -174,7 +174,7 @@ bool AnalysisPredictor::PrepareScope(
scope_ = parent_scope; scope_ = parent_scope;
status_is_cloned_ = true; status_is_cloned_ = true;
} else { } else {
paddle::framework::InitDevices(false); paddle::framework::InitDevices();
scope_.reset(new paddle::framework::Scope(), [](framework::Scope *scope) { scope_.reset(new paddle::framework::Scope(), [](framework::Scope *scope) {
delete scope; delete scope;
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
......
...@@ -91,7 +91,7 @@ bool NativePaddlePredictor::Init( ...@@ -91,7 +91,7 @@ bool NativePaddlePredictor::Init(
platform::errors::PreconditionNotMet( platform::errors::PreconditionNotMet(
"The sub_scope should not be nullptr.")); "The sub_scope should not be nullptr."));
} else { } else {
paddle::framework::InitDevices(false); paddle::framework::InitDevices();
scope_.reset(new paddle::framework::Scope()); scope_.reset(new paddle::framework::Scope());
} }
......
...@@ -25,7 +25,6 @@ limitations under the License. */ ...@@ -25,7 +25,6 @@ limitations under the License. */
#include "paddle/fluid/pybind/pybind.h" #include "paddle/fluid/pybind/pybind.h"
DEFINE_string(devices, "", "The devices to be used which is joined by comma."); DEFINE_string(devices, "", "The devices to be used which is joined by comma.");
DEFINE_bool(init_p2p, false, "Whether to init p2p.");
DEFINE_int32(math_num_threads, 1, DEFINE_int32(math_num_threads, 1,
"Number of threads used to run math functions."); "Number of threads used to run math functions.");
...@@ -42,7 +41,7 @@ void Init(const std::vector<std::string> argv) { ...@@ -42,7 +41,7 @@ void Init(const std::vector<std::string> argv) {
while (std::getline(tokenStream, token, ',')) { while (std::getline(tokenStream, token, ',')) {
devices.push_back(std::stoi(token)); devices.push_back(std::stoi(token));
} }
framework::InitDevices(FLAGS_init_p2p, devices); framework::InitDevices(devices);
} }
void ReadBinaryFile(const std::string& filename, std::string* contents) { void ReadBinaryFile(const std::string& filename, std::string* contents) {
......
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
#include "paddle/fluid/memory/allocation/pinned_allocator.h" #include "paddle/fluid/memory/allocation/pinned_allocator.h"
#include "paddle/fluid/memory/allocation/thread_local_allocator.h" #include "paddle/fluid/memory/allocation/thread_local_allocator.h"
#include "paddle/fluid/platform/cuda_device_guard.h" #include "paddle/fluid/platform/cuda_device_guard.h"
#include "paddle/fluid/platform/dynload/cupti.h"
#include "paddle/fluid/platform/gpu_info.h" #include "paddle/fluid/platform/gpu_info.h"
#endif #endif
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
......
...@@ -57,7 +57,7 @@ void OpTester::Init(const OpTesterConfig &config) { ...@@ -57,7 +57,7 @@ void OpTester::Init(const OpTesterConfig &config) {
place_ = paddle::platform::CPUPlace(); place_ = paddle::platform::CPUPlace();
} }
framework::InitDevices(false); framework::InitDevices();
scope_.reset(new paddle::framework::Scope()); scope_.reset(new paddle::framework::Scope());
op_ = framework::OpRegistry::CreateOp(op_desc_); op_ = framework::OpRegistry::CreateOp(op_desc_);
......
...@@ -140,7 +140,7 @@ void TestMain(const std::vector<std::string>& input_names, ...@@ -140,7 +140,7 @@ void TestMain(const std::vector<std::string>& input_names,
std::string func_name, std::string cuda_kernel_str, std::string func_name, std::string cuda_kernel_str,
CPUKernelFunc cpu_kernel_func) { CPUKernelFunc cpu_kernel_func) {
// Compile the device code // Compile the device code
paddle::framework::InitDevices(false, {0}); paddle::framework::InitDevices({0});
platform::CUDAPlace place = platform::CUDAPlace(0); platform::CUDAPlace place = platform::CUDAPlace(0);
PrepareDeviceCode(place, func_name, cuda_kernel_str); PrepareDeviceCode(place, func_name, cuda_kernel_str);
......
...@@ -35,7 +35,7 @@ TEST(DeviceCode, cuda) { ...@@ -35,7 +35,7 @@ TEST(DeviceCode, cuda) {
return; return;
} }
paddle::framework::InitDevices(false, {0}); paddle::framework::InitDevices({0});
paddle::platform::CUDAPlace place = paddle::platform::CUDAPlace(0); paddle::platform::CUDAPlace place = paddle::platform::CUDAPlace(0);
paddle::platform::CUDADeviceCode code(place, "saxpy_kernel", saxpy_code); paddle::platform::CUDADeviceCode code(place, "saxpy_kernel", saxpy_code);
...@@ -90,7 +90,7 @@ TEST(DeviceCodePool, cuda) { ...@@ -90,7 +90,7 @@ TEST(DeviceCodePool, cuda) {
return; return;
} }
paddle::framework::InitDevices(false, {0}); paddle::framework::InitDevices({0});
paddle::platform::CUDAPlace place = paddle::platform::CUDAPlace(0); paddle::platform::CUDAPlace place = paddle::platform::CUDAPlace(0);
paddle::platform::DeviceCodePool& pool = paddle::platform::DeviceCodePool& pool =
paddle::platform::DeviceCodePool::Init({place}); paddle::platform::DeviceCodePool::Init({place});
......
...@@ -63,7 +63,6 @@ namespace framework { ...@@ -63,7 +63,6 @@ namespace framework {
std::once_flag gflags_init_flag; std::once_flag gflags_init_flag;
std::once_flag glog_init_flag; std::once_flag glog_init_flag;
std::once_flag p2p_init_flag;
bool InitGflags(std::vector<std::string> args) { bool InitGflags(std::vector<std::string> args) {
bool successed = false; bool successed = false;
...@@ -95,28 +94,7 @@ bool InitGflags(std::vector<std::string> args) { ...@@ -95,28 +94,7 @@ bool InitGflags(std::vector<std::string> args) {
return successed; return successed;
} }
void InitP2P(std::vector<int> devices) {
#ifdef PADDLE_WITH_CUDA
std::call_once(p2p_init_flag, [&]() {
int count = devices.size();
for (int i = 0; i < count; ++i) {
for (int j = 0; j < count; ++j) {
if (devices[i] == devices[j]) continue;
int can_acess = -1;
PADDLE_ENFORCE_CUDA_SUCCESS(
cudaDeviceCanAccessPeer(&can_acess, devices[i], devices[j]));
if (can_acess != 1) {
VLOG(2) << "Cannot enable P2P access from " << devices[i] << " to "
<< devices[j];
} else {
platform::CUDADeviceGuard guard(devices[i]);
cudaDeviceEnablePeerAccess(devices[j], 0);
}
}
}
});
#endif
}
void InitCupti() { void InitCupti() {
#ifdef PADDLE_WITH_CUPTI #ifdef PADDLE_WITH_CUPTI
...@@ -144,7 +122,7 @@ void InitCupti() { ...@@ -144,7 +122,7 @@ void InitCupti() {
#endif #endif
} }
void InitDevices(bool init_p2p) { void InitDevices() {
// CUPTI attribute should be set before any CUDA context is created (see CUPTI // CUPTI attribute should be set before any CUDA context is created (see CUPTI
// documentation about CUpti_ActivityAttribute). // documentation about CUpti_ActivityAttribute).
InitCupti(); InitCupti();
...@@ -166,10 +144,10 @@ void InitDevices(bool init_p2p) { ...@@ -166,10 +144,10 @@ void InitDevices(bool init_p2p) {
LOG(WARNING) << "Compiled with WITH_XPU, but no XPU found in runtime."; LOG(WARNING) << "Compiled with WITH_XPU, but no XPU found in runtime.";
} }
#endif #endif
InitDevices(init_p2p, devices); InitDevices(devices);
} }
void InitDevices(bool init_p2p, const std::vector<int> devices) { void InitDevices(const std::vector<int> devices) {
std::vector<platform::Place> places; std::vector<platform::Place> places;
for (size_t i = 0; i < devices.size(); ++i) { for (size_t i = 0; i < devices.size(); ++i) {
...@@ -187,9 +165,6 @@ void InitDevices(bool init_p2p, const std::vector<int> devices) { ...@@ -187,9 +165,6 @@ void InitDevices(bool init_p2p, const std::vector<int> devices) {
places.emplace_back(platform::XPUPlace(devices[i])); places.emplace_back(platform::XPUPlace(devices[i]));
#endif #endif
} }
if (init_p2p) {
InitP2P(devices);
}
places.emplace_back(platform::CPUPlace()); places.emplace_back(platform::CPUPlace());
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
places.emplace_back(platform::CUDAPinnedPlace()); places.emplace_back(platform::CUDAPinnedPlace());
......
...@@ -35,9 +35,9 @@ bool InitGflags(std::vector<std::string> argv); ...@@ -35,9 +35,9 @@ bool InitGflags(std::vector<std::string> argv);
void InitGLOG(const std::string& prog_name); void InitGLOG(const std::string& prog_name);
void InitDevices(bool init_p2p); void InitDevices();
void InitDevices(bool init_p2p, const std::vector<int> devices); void InitDevices(const std::vector<int> devices);
#ifndef _WIN32 #ifndef _WIN32
class SignalMessageDumper { class SignalMessageDumper {
......
...@@ -22,7 +22,7 @@ TEST(InitDevices, CPU) { ...@@ -22,7 +22,7 @@ TEST(InitDevices, CPU) {
using paddle::platform::DeviceContextPool; using paddle::platform::DeviceContextPool;
#if !defined(PADDLE_WITH_CUDA) && !defined(PADDLE_WITH_XPU) #if !defined(PADDLE_WITH_CUDA) && !defined(PADDLE_WITH_XPU)
InitDevices(true); InitDevices();
DeviceContextPool& pool = DeviceContextPool::Instance(); DeviceContextPool& pool = DeviceContextPool::Instance();
ASSERT_EQ(pool.size(), 1U); ASSERT_EQ(pool.size(), 1U);
#endif #endif
...@@ -34,7 +34,7 @@ TEST(InitDevices, CUDA) { ...@@ -34,7 +34,7 @@ TEST(InitDevices, CUDA) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
int count = paddle::platform::GetCUDADeviceCount(); int count = paddle::platform::GetCUDADeviceCount();
InitDevices(true); InitDevices();
DeviceContextPool& pool = DeviceContextPool::Instance(); DeviceContextPool& pool = DeviceContextPool::Instance();
ASSERT_EQ(pool.size(), 2U + static_cast<unsigned>(count)); ASSERT_EQ(pool.size(), 2U + static_cast<unsigned>(count));
#endif #endif
...@@ -46,7 +46,7 @@ TEST(InitDevices, XPU) { ...@@ -46,7 +46,7 @@ TEST(InitDevices, XPU) {
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
int count = paddle::platform::GetXPUDeviceCount(); int count = paddle::platform::GetXPUDeviceCount();
InitDevices(true); InitDevices();
DeviceContextPool& pool = DeviceContextPool::Instance(); DeviceContextPool& pool = DeviceContextPool::Instance();
ASSERT_EQ(pool.size(), 1U + static_cast<unsigned>(count)); ASSERT_EQ(pool.size(), 1U + static_cast<unsigned>(count));
#endif #endif
......
...@@ -1715,7 +1715,7 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -1715,7 +1715,7 @@ All parameter, weight, gradient are variables in Paddle.
m.def("init_glog", framework::InitGLOG); m.def("init_glog", framework::InitGLOG);
m.def("load_op_library", framework::LoadOpLib); m.def("load_op_library", framework::LoadOpLib);
m.def("init_devices", m.def("init_devices",
[](bool init_p2p) { framework::InitDevices(init_p2p); }); []() { framework::InitDevices(); });
m.def("is_compiled_with_cuda", IsCompiledWithCUDA); m.def("is_compiled_with_cuda", IsCompiledWithCUDA);
m.def("is_compiled_with_xpu", IsCompiledWithXPU); m.def("is_compiled_with_xpu", IsCompiledWithXPU);
......
...@@ -55,7 +55,7 @@ std::unique_ptr<paddle::framework::ProgramDesc> Load( ...@@ -55,7 +55,7 @@ std::unique_ptr<paddle::framework::ProgramDesc> Load(
} // namespace paddle } // namespace paddle
int main() { int main() {
paddle::framework::InitDevices(false); paddle::framework::InitDevices();
const auto cpu_place = paddle::platform::CPUPlace(); const auto cpu_place = paddle::platform::CPUPlace();
......
...@@ -105,7 +105,7 @@ int main(int argc, char* argv[]) { ...@@ -105,7 +105,7 @@ int main(int argc, char* argv[]) {
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"At least one file to train, but received number of file is %d.", "At least one file to train, but received number of file is %d.",
file_vec.size())); file_vec.size()));
paddle::framework::InitDevices(false); paddle::framework::InitDevices();
const auto cpu_place = paddle::platform::CPUPlace(); const auto cpu_place = paddle::platform::CPUPlace();
paddle::framework::Executor executor(cpu_place); paddle::framework::Executor executor(cpu_place);
paddle::framework::Scope scope; paddle::framework::Scope scope;
......
...@@ -33,7 +33,7 @@ DEFINE_string(dirname, "", "Directory of the train model."); ...@@ -33,7 +33,7 @@ DEFINE_string(dirname, "", "Directory of the train model.");
namespace paddle { namespace paddle {
void Train(std::string model_dir) { void Train(std::string model_dir) {
framework::InitDevices(false); framework::InitDevices();
const auto cpu_place = platform::CPUPlace(); const auto cpu_place = platform::CPUPlace();
framework::Executor executor(cpu_place); framework::Executor executor(cpu_place);
framework::Scope scope; framework::Scope scope;
......
...@@ -121,7 +121,7 @@ int main(int argc, char** argv) { ...@@ -121,7 +121,7 @@ int main(int argc, char** argv) {
int internal_argc = internal_argv.size(); int internal_argc = internal_argv.size();
char** arr = internal_argv.data(); char** arr = internal_argv.data();
paddle::platform::ParseCommandLineFlags(internal_argc, arr, true); paddle::platform::ParseCommandLineFlags(internal_argc, arr, true);
paddle::framework::InitDevices(true); paddle::framework::InitDevices();
int ret = RUN_ALL_TESTS(); int ret = RUN_ALL_TESTS();
......
...@@ -254,7 +254,7 @@ def __bootstrap__(): ...@@ -254,7 +254,7 @@ def __bootstrap__():
core.init_gflags(["--tryfromenv=" + ",".join(read_env_flags)]) core.init_gflags(["--tryfromenv=" + ",".join(read_env_flags)])
core.init_glog(sys.argv[0]) core.init_glog(sys.argv[0])
# don't init_p2p when in unittest to save time. # don't init_p2p when in unittest to save time.
core.init_devices(not in_test) core.init_devices()
# TODO(panyx0718): Avoid doing complex initialization logic in __init__.py. # TODO(panyx0718): Avoid doing complex initialization logic in __init__.py.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册