提交 fec235fc 编写于 作者: L lizhenyu

change the default memory copy way to async

上级 e5c7ecfd
...@@ -102,7 +102,7 @@ bool CudaDriver::CopyHostMemToDeviceAsync(const DeviceMemPtr &dst, const void *s ...@@ -102,7 +102,7 @@ bool CudaDriver::CopyHostMemToDeviceAsync(const DeviceMemPtr &dst, const void *s
bool CudaDriver::CopyDeviceMemToHostAsync(const HostMemPtr &dst, const DeviceMemPtr &src, size_t size, bool CudaDriver::CopyDeviceMemToHostAsync(const HostMemPtr &dst, const DeviceMemPtr &src, size_t size,
DeviceStream stream) { DeviceStream stream) {
auto ret = cudaMemcpyAsync(dst, src, size, cudaMemcpyHostToDevice, (cudaStream_t)stream); auto ret = cudaMemcpyAsync(dst, src, size, cudaMemcpyDeviceToHost, (cudaStream_t)stream);
if (ret != cudaSuccess) { if (ret != cudaSuccess) {
MS_LOG(ERROR) << "cudaMemcpyAsync failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret); MS_LOG(ERROR) << "cudaMemcpyAsync failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
return false; return false;
......
...@@ -37,7 +37,13 @@ bool GPUDeviceAddress::SyncDeviceToHost(const std::vector<int> &, size_t size, T ...@@ -37,7 +37,13 @@ bool GPUDeviceAddress::SyncDeviceToHost(const std::vector<int> &, size_t size, T
bool GPUDeviceAddress::SyncHostToDevice(const std::vector<int> &, size_t, TypeId, const void *host_ptr) const { bool GPUDeviceAddress::SyncHostToDevice(const std::vector<int> &, size_t, TypeId, const void *host_ptr) const {
MS_EXCEPTION_IF_NULL(host_ptr); MS_EXCEPTION_IF_NULL(host_ptr);
return GPUDeviceManager::GetInstance().CopyHostMemToDevice(ptr_, host_ptr, size_); auto &stream = GPUDeviceManager::GetInstance().default_stream();
MS_EXCEPTION_IF_NULL(stream);
if (!GPUDeviceManager::GetInstance().CopyHostMemToDeviceAsync(ptr_, host_ptr, size_, stream)) {
MS_LOG(ERROR) << "CopyHostMemToDeviceAsync failed";
return false;
}
return GPUDeviceManager::GetInstance().SyncStream(stream);
} }
GPUDeviceAddress::~GPUDeviceAddress() { GPUDeviceAddress::~GPUDeviceAddress() {
......
...@@ -89,6 +89,16 @@ bool GPUDeviceManager::CopyDeviceMemToHost(const HostMemPtr &dst, const DeviceMe ...@@ -89,6 +89,16 @@ bool GPUDeviceManager::CopyDeviceMemToHost(const HostMemPtr &dst, const DeviceMe
bool GPUDeviceManager::CopyHostMemToDevice(const DeviceMemPtr &dst, const void *src, size_t size) const { bool GPUDeviceManager::CopyHostMemToDevice(const DeviceMemPtr &dst, const void *src, size_t size) const {
return CudaDriver::CopyHostMemToDevice(dst, src, size); return CudaDriver::CopyHostMemToDevice(dst, src, size);
} }
bool GPUDeviceManager::CopyDeviceMemToHostAsync(const HostMemPtr &dst, const DeviceMemPtr &src, size_t size,
DeviceStream stream) const {
return CudaDriver::CopyDeviceMemToHostAsync(dst, src, size, stream);
}
bool GPUDeviceManager::CopyHostMemToDeviceAsync(const DeviceMemPtr &dst, const void *src, size_t size,
DeviceStream stream) const {
return CudaDriver::CopyHostMemToDeviceAsync(dst, src, size, stream);
}
} // namespace gpu } // namespace gpu
} // namespace device } // namespace device
} // namespace mindspore } // namespace mindspore
...@@ -47,6 +47,9 @@ class GPUDeviceManager { ...@@ -47,6 +47,9 @@ class GPUDeviceManager {
bool CopyDeviceMemToHost(const HostMemPtr &dst, const DeviceMemPtr &src, size_t size) const; bool CopyDeviceMemToHost(const HostMemPtr &dst, const DeviceMemPtr &src, size_t size) const;
bool CopyHostMemToDevice(const DeviceMemPtr &dst, const void *src, size_t size) const; bool CopyHostMemToDevice(const DeviceMemPtr &dst, const void *src, size_t size) const;
bool CopyDeviceMemToHostAsync(const HostMemPtr &dst, const DeviceMemPtr &src, size_t size, DeviceStream stream) const;
bool CopyHostMemToDeviceAsync(const DeviceMemPtr &dst, const void *src, size_t size, DeviceStream stream) const;
static GPUDeviceManager &GetInstance() { static GPUDeviceManager &GetInstance() {
static GPUDeviceManager instance; static GPUDeviceManager instance;
return instance; return instance;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册