From 303fb789a550dc1b962af008198158e583918f7d Mon Sep 17 00:00:00 2001 From: qijun Date: Fri, 28 Jul 2017 09:47:45 +0000 Subject: [PATCH] refine tensor copy from --- paddle/framework/detail/tensor-inl.h | 34 ++++++++-------------------- paddle/framework/tensor.h | 9 +------- paddle/memory/memcpy.cc | 6 ++--- paddle/memory/memcpy.h | 2 +- 4 files changed, 15 insertions(+), 36 deletions(-) diff --git a/paddle/framework/detail/tensor-inl.h b/paddle/framework/detail/tensor-inl.h index 78797f58d2e..e7ff09dd5c9 100644 --- a/paddle/framework/detail/tensor-inl.h +++ b/paddle/framework/detail/tensor-inl.h @@ -83,7 +83,7 @@ inline void Tensor::ShareDataWith(const Tensor& src) { template inline void Tensor::CopyFrom(const Tensor& src, - const platform::CPUPlace& dst_place) { + const platform::Place& dst_place) { src.check_memory_size(); Resize(src.dims()); @@ -94,41 +94,27 @@ inline void Tensor::CopyFrom(const Tensor& src, auto size = product(src.dims_) * sizeof(T); - if (platform::is_cpu_place(src_place)) { + if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) { memory::Copy(boost::get(dst_place), dst_ptr, boost::get(src_place), src_ptr, size); } #ifndef PADDLE_ONLY_CPU - else if (platform::is_gpu_place(src_place)) { + else if (platform::is_gpu_place(src_place) && + platform::is_cpu_place(dst_place)) { memory::Copy(boost::get(dst_place), dst_ptr, boost::get(src_place), src_ptr, size, 0); - } -#endif -} - -#ifndef PADDLE_ONLY_CPU -template -inline void Tensor::CopyFrom(const Tensor& src, - const platform::GPUPlace& dst_place) { - src.check_memory_size(); - Resize(src.dims()); - - auto src_place = src.holder_->place(); - auto src_ptr = static_cast(src.data()); - - auto dst_ptr = static_cast(mutable_data(dst_place)); - - auto size = product(src.dims_) * sizeof(T); - - if (platform::is_cpu_place(src_place)) { + } else if (platform::is_cpu_place(src_place) && + platform::is_gpu_place(dst_place)) { memory::Copy(boost::get(dst_place), dst_ptr, boost::get(src_place), src_ptr, size, 0); - } else if (platform::is_gpu_place(src_place)) { + } else if (platform::is_gpu_place(src_place) && + platform::is_gpu_place(dst_place)) { memory::Copy(boost::get(dst_place), dst_ptr, boost::get(src_place), src_ptr, size, 0); } -} + #endif +} template inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const { diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 039ab08374e..76070f636b0 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -94,14 +94,7 @@ class Tensor { * @note CopyFrom supports CPU <-> GPU, GPU <-> GPU. */ template - inline void CopyFrom(const Tensor& src, - const platform::CPUDeviceContext& ctx); - -#ifndef PADDLE_ONLY_CPU - template - inline void CopyFrom(const Tensor& src, - const platform::CUDADeviceContext& ctx); -#endif + inline void CopyFrom(const Tensor& src, const platform::Place& dst_place); /** * @brief Return the slice of the tensor. diff --git a/paddle/memory/memcpy.cc b/paddle/memory/memcpy.cc index 2cc32dd8dde..aaab1142ca1 100644 --- a/paddle/memory/memcpy.cc +++ b/paddle/memory/memcpy.cc @@ -34,7 +34,7 @@ void Copy(platform::CPUPlace dst_place, void* dst, platform::GPUPlace src_place, const void* src, size_t num, - cudaStream_t stream = 0) { + cudaStream_t stream) { platform::SetDeviceId(src_place.device); platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyDeviceToHost, stream); } @@ -44,7 +44,7 @@ void Copy(platform::GPUPlace dst_place, void* dst, platform::CPUPlace src_place, const void* src, size_t num, - cudaStream_t stream = 0) { + cudaStream_t stream) { platform::SetDeviceId(dst_place.device); platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyHostToDevice, stream); } @@ -54,7 +54,7 @@ void Copy(platform::GPUPlace dst_place, void* dst, platform::GPUPlace src_place, const void* src, size_t num, - cudaStream_t stream = 0) { + cudaStream_t stream) { if (dst_place == src_place) { platform::SetDeviceId(src_place.device); platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyDeviceToDevice, stream); diff --git a/paddle/memory/memcpy.h b/paddle/memory/memcpy.h index eb2647c617d..2b9c0eada6e 100644 --- a/paddle/memory/memcpy.h +++ b/paddle/memory/memcpy.h @@ -51,7 +51,7 @@ void Copy(DstPlace, void* dst, SrcPlace, const void* src, size_t num); */ template void Copy(DstPlace, void* dst, SrcPlace, const void* src, size_t num, - cudaStream_t stream = 0); + cudaStream_t stream); #endif // PADDLE_ONLY_CPU -- GitLab