提交 18ac6947 编写于 作者: X Xin Pan

Enable P2P memory copy

On k40 with 4 devices, time reduces from ~4.0 to ~3.8+, should be
more obvious on better hardware
上级 1ca1e1c3
......@@ -26,6 +26,7 @@ namespace paddle {
namespace framework {
std::once_flag gflags_init_flag;
std::once_flag p2p_init_flag;
void InitGflags(std::vector<std::string> &argv) {
std::call_once(gflags_init_flag, [&]() {
......@@ -42,6 +43,25 @@ void InitGflags(std::vector<std::string> &argv) {
});
}
void InitP2P(int count) {
std::call_once(p2p_init_flag, [&]() {
for (int i = 0; i < count; ++i) {
for (int j = 0; j < count; ++j) {
if (i == j) continue;
int can_acess = -1;
PADDLE_ENFORCE(cudaDeviceCanAccessPeer(&can_acess, i, j),
"Failed to test P2P access.");
if (can_acess != 1) {
LOG(WARNING) << "Cannot enable P2P access from " << i << " to " << j;
} else {
cudaSetDevice(i);
cudaDeviceEnablePeerAccess(j, 0);
}
}
}
});
}
void InitDevices() {
/*Init all avaiable devices by default */
......@@ -63,7 +83,7 @@ void InitDevices() {
for (int i = 0; i < count; ++i) {
places.emplace_back(platform::CUDAPlace(i));
}
InitP2P(count);
platform::DeviceContextPool::Init(places);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册