Move memory::Copy out from memory.h into memcpy.h

858dea88 · Yi Wang · 6cae35b5 · 858dea88 · 858dea88 · 858dea88
5 changed file
--- a/paddle/memory/CMakeLists.txt
+++ b/paddle/memory/CMakeLists.txt
 add_subdirectory(detail)
 cc_library(memory SRCS memory.cc)
+cc_library(memcpy SRCS memcpy.cc)
 cc_library(paddle_memory
    DEPS

--- a/paddle/memory/memcpy.cc
+++ b/paddle/memory/memcpy.cc
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/memory/memcpy.h"
+#include <cstring>  // for memcpy
+#include "paddle/platform/device_context.h"
+namespace paddle {
+namespace memory {
+template <>
+void Copy<platform::CPUPlace, platform::CPUPlace>(platform::CPUPlace, void* dst,
+                                                  platform::CPUPlace,
+                                                  const void* src, size_t num) {
+  std::memcpy(dst, src, num);
+}
+#ifndef PADDLE_ONLY_CPU
+template <>
+void Copy<platform::CPUPlace, platform::GPUPlace>(platform::CPUPlace dst_place,
+                                                  void* dst,
+                                                  platform::GPUPlace src_place,
+                                                  const void* src, size_t num,
+                                                  cudaStream_t stream) {
+  platform::GPUPlaceGuard g(src_place.device);
+  platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyDeviceToHost, stream);
+}
+template <>
+void Copy<platform::GPUPlace, platform::CPUPlace>(platform::GPUPlace dst_place,
+                                                  void* dst,
+                                                  platform::CPUPlace src_place,
+                                                  const void* src, size_t num,
+                                                  cudaStream_t stream) {
+  platform::GPUPlaceGuard g(dst_place.device);
+  platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyHostToDevice, stream);
+}
+template <>
+void Copy<platform::GPUPlace, platform::GPUPlace>(platform::GPUPlace dst_place,
+                                                  void* dst,
+                                                  platform::GPUPlace src_place,
+                                                  const void* src, size_t num,
+                                                  cudaStream_t stream) {
+  if (dst_place == src_place) {
+    platform::GPUPlaceGuard g(src_place.device);
+    platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyDeviceToDevice, stream);
+  } else {
+    platform::GpuMemcpyPeer(dst, dst_place.device, src, src_place.device, num,
+                            stream);
+  }
+}
+#endif  // PADDLE_ONLY_CPU
--- a/paddle/memory/memcpy.h
+++ b/paddle/memory/memcpy.h
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma once
+#include "paddle/platform/gpu_info.h"
+#include "paddle/platform/place.h"
+namespace paddle {
+namespace memory {
+template <typename DstPlace, typename SrcPlace>
+void Copy(DstPlace, void* dst, SrcPlace, const void* src, size_t num);
+#ifndef PADDLE_ONLY_CPU
+template <typename DstPlace, typename SrcPlace>
+void Copy(DstPlace, void* dst, SrcPlace, const void* src, size_t num,
+          cudaStream_t stream);
+#endif  // PADDLE_ONLY_CPU
+}  // namespace memory
+}  // namespace paddle
--- a/paddle/memory/memory.cc
+++ b/paddle/memory/memory.cc
@@ -46,13 +46,6 @@ size_t Used<platform::CPUPlace>(platform::CPUPlace place) {
  return GetCPUBuddyAllocator()->Used();
 }
-template <>
-void Copy<platform::CPUPlace, platform::CPUPlace>(platform::CPUPlace, void* dst,
-                                                  platform::CPUPlace,
-                                                  const void* src, size_t num) {
-  std::memcpy(dst, src, num);
-}
 #ifndef PADDLE_ONLY_CPU
 detail::BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) {
@@ -85,41 +78,6 @@ size_t Used<platform::GPUPlace>(platform::GPUPlace place) {
  return GetGPUBuddyAllocator(place.device)->Used();
 }
-template <>
-void Copy<platform::CPUPlace, platform::GPUPlace>(platform::CPUPlace dst_place,
-                                                  void* dst,
-                                                  platform::GPUPlace src_place,
-                                                  const void* src, size_t num,
-                                                  cudaStream_t stream) {
-  platform::SetDeviceId(src_place.device);
-  platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyDeviceToHost, stream);
-}
-template <>
-void Copy<platform::GPUPlace, platform::CPUPlace>(platform::GPUPlace dst_place,
-                                                  void* dst,
-                                                  platform::CPUPlace src_place,
-                                                  const void* src, size_t num,
-                                                  cudaStream_t stream) {
-  platform::SetDeviceId(dst_place.device);
-  platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyHostToDevice, stream);
-}
-template <>
-void Copy<platform::GPUPlace, platform::GPUPlace>(platform::GPUPlace dst_place,
-                                                  void* dst,
-                                                  platform::GPUPlace src_place,
-                                                  const void* src, size_t num,
-                                                  cudaStream_t stream) {
-  if (dst_place == src_place) {
-    platform::SetDeviceId(src_place.device);
-    platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyDeviceToDevice, stream);
-  } else {
-    platform::GpuMemcpyPeer(dst, dst_place.device, src, src_place.device, num,
-                            stream);
-  }
-}
 #endif  // PADDLE_ONLY_CPU
 }  // namespace memory

--- a/paddle/memory/memory.h
+++ b/paddle/memory/memory.h
@@ -29,15 +29,6 @@ void Free(Place, void*);
 template <typename Place>
 size_t Used(Place);
-template <typename DstPlace, typename SrcPlace>
-void Copy(DstPlace, void* dst, SrcPlace, const void* src, size_t num);
-#ifndef PADDLE_ONLY_CPU
-template <typename DstPlace, typename SrcPlace>
-void Copy(DstPlace, void* dst, SrcPlace, const void* src, size_t num,
-          cudaStream_t stream);
-#endif  // PADDLE_ONLY_CPU
 template <typename T, /* must be POD types */
          typename Place /* platform::GPUPlace or platform::CPUPlace */,
          typename std::enable_if<std::is_pod<T>::value>::type* = nullptr>