提交 b138d29c 编写于 作者: X Xin Pan

Avoid init_p2p all the times

上级 09b4a1a3
...@@ -105,7 +105,7 @@ static void BuildVar(const std::string& param_name, ...@@ -105,7 +105,7 @@ static void BuildVar(const std::string& param_name,
TEST(Operator, CPUtoGPU) { TEST(Operator, CPUtoGPU) {
using namespace paddle::framework; using namespace paddle::framework;
using namespace paddle::platform; using namespace paddle::platform;
InitDevices(); InitDevices(true);
paddle::framework::Scope scope; paddle::framework::Scope scope;
paddle::platform::CPUPlace cpu_place; paddle::platform::CPUPlace cpu_place;
......
...@@ -64,7 +64,7 @@ void InitP2P(int count) { ...@@ -64,7 +64,7 @@ void InitP2P(int count) {
#endif #endif
} }
void InitDevices() { void InitDevices(bool init_p2p) {
/*Init all avaiable devices by default */ /*Init all avaiable devices by default */
std::vector<platform::Place> places; std::vector<platform::Place> places;
...@@ -85,7 +85,9 @@ void InitDevices() { ...@@ -85,7 +85,9 @@ void InitDevices() {
for (int i = 0; i < count; ++i) { for (int i = 0; i < count; ++i) {
places.emplace_back(platform::CUDAPlace(i)); places.emplace_back(platform::CUDAPlace(i));
} }
if (init_p2p) {
InitP2P(count); InitP2P(count);
}
platform::DeviceContextPool::Init(places); platform::DeviceContextPool::Init(places);
} }
......
...@@ -24,7 +24,7 @@ void InitGflags(std::vector<std::string> &argv); ...@@ -24,7 +24,7 @@ void InitGflags(std::vector<std::string> &argv);
void InitGLOG(const std::string &prog_name); void InitGLOG(const std::string &prog_name);
void InitDevices(); void InitDevices(bool init_p2p);
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -21,7 +21,7 @@ TEST(InitDevices, CPU) { ...@@ -21,7 +21,7 @@ TEST(InitDevices, CPU) {
using paddle::platform::DeviceContextPool; using paddle::platform::DeviceContextPool;
#ifndef PADDLE_WITH_CUDA #ifndef PADDLE_WITH_CUDA
InitDevices(); InitDevices(true);
DeviceContextPool& pool = DeviceContextPool::Instance(); DeviceContextPool& pool = DeviceContextPool::Instance();
ASSERT_EQ(pool.size(), 1U); ASSERT_EQ(pool.size(), 1U);
#endif #endif
...@@ -33,7 +33,7 @@ TEST(InitDevices, CUDA) { ...@@ -33,7 +33,7 @@ TEST(InitDevices, CUDA) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
int count = paddle::platform::GetCUDADeviceCount(); int count = paddle::platform::GetCUDADeviceCount();
InitDevices(); InitDevices(true);
DeviceContextPool& pool = DeviceContextPool::Instance(); DeviceContextPool& pool = DeviceContextPool::Instance();
ASSERT_EQ(pool.size(), 1U + static_cast<unsigned>(count)); ASSERT_EQ(pool.size(), 1U + static_cast<unsigned>(count));
#endif #endif
......
...@@ -30,7 +30,7 @@ __global__ void test(size_t* a, int size) { ...@@ -30,7 +30,7 @@ __global__ void test(size_t* a, int size) {
} }
TEST(LoD, data) { TEST(LoD, data) {
paddle::framework::InitDevices(); paddle::framework::InitDevices(true);
paddle::framework::LoD lod{{0, 1, 2}}; paddle::framework::LoD lod{{0, 1, 2}};
lod.push_back({0, 2, 4, 5}); lod.push_back({0, 2, 4, 5});
...@@ -46,7 +46,7 @@ TEST(LoD, data) { ...@@ -46,7 +46,7 @@ TEST(LoD, data) {
} }
TEST(LoDTensor, LoDInGPU) { TEST(LoDTensor, LoDInGPU) {
paddle::framework::InitDevices(); paddle::framework::InitDevices(true);
paddle::framework::LoDTensor lod_tensor; paddle::framework::LoDTensor lod_tensor;
paddle::platform::CUDAPlace place(0); paddle::platform::CUDAPlace place(0);
......
...@@ -72,7 +72,7 @@ REGISTER_OP_WITHOUT_GRADIENT(test_operator, ...@@ -72,7 +72,7 @@ REGISTER_OP_WITHOUT_GRADIENT(test_operator,
paddle::framework::OpWithoutKernelCheckerMaker); paddle::framework::OpWithoutKernelCheckerMaker);
TEST(OperatorBase, all) { TEST(OperatorBase, all) {
paddle::framework::InitDevices(); paddle::framework::InitDevices(true);
paddle::framework::proto::OpDesc op_desc; paddle::framework::proto::OpDesc op_desc;
op_desc.set_type("test_operator"); op_desc.set_type("test_operator");
BuildVar("input", {"IN1"}, op_desc.add_inputs()); BuildVar("input", {"IN1"}, op_desc.add_inputs());
...@@ -198,7 +198,7 @@ REGISTER_OP_CPU_KERNEL(op_with_kernel, ...@@ -198,7 +198,7 @@ REGISTER_OP_CPU_KERNEL(op_with_kernel,
// test with single input // test with single input
TEST(OpKernel, all) { TEST(OpKernel, all) {
paddle::framework::InitDevices(); paddle::framework::InitDevices(true);
paddle::framework::proto::OpDesc op_desc; paddle::framework::proto::OpDesc op_desc;
op_desc.set_type("op_with_kernel"); op_desc.set_type("op_with_kernel");
BuildVar("x", {"IN1"}, op_desc.add_inputs()); BuildVar("x", {"IN1"}, op_desc.add_inputs());
...@@ -228,7 +228,7 @@ REGISTER_OP_CPU_KERNEL(op_multi_inputs_with_kernel, ...@@ -228,7 +228,7 @@ REGISTER_OP_CPU_KERNEL(op_multi_inputs_with_kernel,
TEST(OpKernel, multi_inputs) { TEST(OpKernel, multi_inputs) {
using namespace paddle::framework; using namespace paddle::framework;
paddle::framework::InitDevices(); paddle::framework::InitDevices(true);
proto::OpDesc op_desc; proto::OpDesc op_desc;
op_desc.set_type("op_multi_inputs_with_kernel"); op_desc.set_type("op_multi_inputs_with_kernel");
...@@ -269,7 +269,7 @@ class OperatorClone : public paddle::framework::OperatorBase { ...@@ -269,7 +269,7 @@ class OperatorClone : public paddle::framework::OperatorBase {
}; };
TEST(Operator, Clone) { TEST(Operator, Clone) {
paddle::framework::InitDevices(); paddle::framework::InitDevices(true);
OperatorClone a("ABC", paddle::framework::VariableNameMap{}, OperatorClone a("ABC", paddle::framework::VariableNameMap{},
paddle::framework::VariableNameMap{}, paddle::framework::VariableNameMap{},
paddle::framework::AttributeMap{}); paddle::framework::AttributeMap{});
......
...@@ -423,7 +423,8 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -423,7 +423,8 @@ All parameter, weight, gradient are variables in Paddle.
m.def("init_gflags", framework::InitGflags); m.def("init_gflags", framework::InitGflags);
m.def("init_glog", framework::InitGLOG); m.def("init_glog", framework::InitGLOG);
m.def("init_devices", &framework::InitDevices); m.def("init_devices",
[](bool init_p2p) { framework::InitDevices(init_p2p); });
m.def("is_compiled_with_cuda", IsCompiledWithCUDA); m.def("is_compiled_with_cuda", IsCompiledWithCUDA);
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
......
...@@ -41,6 +41,6 @@ int main(int argc, char** argv) { ...@@ -41,6 +41,6 @@ int main(int argc, char** argv) {
paddle::memory::Used(paddle::platform::CUDAPlace(0)); paddle::memory::Used(paddle::platform::CUDAPlace(0));
#endif #endif
paddle::framework::InitDevices(); paddle::framework::InitDevices(true);
return RUN_ALL_TESTS(); return RUN_ALL_TESTS();
} }
...@@ -84,6 +84,8 @@ def __bootstrap__(): ...@@ -84,6 +84,8 @@ def __bootstrap__():
import core import core
import os import os
in_test = 'unittest' in sys.modules
try: try:
num_threads = int(os.getenv('OMP_NUM_THREADS', '1')) num_threads = int(os.getenv('OMP_NUM_THREADS', '1'))
except ValueError: except ValueError:
...@@ -108,8 +110,11 @@ def __bootstrap__(): ...@@ -108,8 +110,11 @@ def __bootstrap__():
core.init_gflags([sys.argv[0]] + core.init_gflags([sys.argv[0]] +
["--tryfromenv=" + ",".join(read_env_flags)]) ["--tryfromenv=" + ",".join(read_env_flags)])
core.init_glog(sys.argv[0]) core.init_glog(sys.argv[0])
core.init_devices() # don't init_p2p when in unittest to save time.
core.init_devices(not in_test)
# TODO(panyx0718): Avoid doing complex initialization logic in __init__.py.
# Consider paddle.init(args) or paddle.main(args)
layers.monkey_patch_variable() layers.monkey_patch_variable()
__bootstrap__() __bootstrap__()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册