提交 b9d855cb 编写于 作者: J jonyguo

fix: device occupied tdt hung

上级 9e124493
......@@ -194,17 +194,19 @@ bool MsContext::OpenTsd() {
}
MS_LOG(INFO) << "Device id = " << device_id << ", rank size = " << rank_size << ".";
TDT_StatusT status = tdt::TsdClient::GetInstance()->Open(device_id, rank_size);
if (status != TDT_OK) {
MS_LOG(EXCEPTION) << "Device " << device_id << " is occupied, open tsd failed, status = " << status << ".";
return false;
}
int32_t initStatus = tdt::TdtHostInit(device_id);
if (initStatus != TDT_OK_CODE) {
MS_LOG(EXCEPTION) << "Init tsd failed, status = " << initStatus << ".";
return false;
}
tdt_print_ = std::thread(TensorPrint());
TDT_StatusT status = tdt::TsdClient::GetInstance()->Open(device_id, rank_size);
if (status != TDT_OK) {
MS_LOG(EXCEPTION) << "Device " << device_id << " is occupied, open tsd failed, status = " << status << ".";
return false;
}
tsd_ref_++;
MS_LOG(INFO) << "Open and init tsd successful, tsd reference = " << tsd_ref_ << ".";
return true;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册