diff --git a/paddle/memory/CMakeLists.txt b/paddle/memory/CMakeLists.txt
index 3943c3cfad31d13a00645aba6fc153d3d13da987..86625124967d7dfb392f5a8e74e591cb2955385f 100644
--- a/paddle/memory/CMakeLists.txt
+++ b/paddle/memory/CMakeLists.txt
@@ -1 +1,7 @@
 add_subdirectory(detail)
+
+if(${WITH_GPU})
+  nv_library(memory SRCS memory.cc)
+else(${WITH_GPU})
+  cc_library(memory SRCS memroy.cc)
+endif(${WITH_GPU})
diff --git a/paddle/memory/detail/CMakeLists.txt b/paddle/memory/detail/CMakeLists.txt
index 81ca8a0bbf0ef2162a6efc63037a18b10bbcd563..3b5bbd7a12fab043c7e6cc66943715a40371c8a8 100644
--- a/paddle/memory/detail/CMakeLists.txt
+++ b/paddle/memory/detail/CMakeLists.txt
@@ -1,2 +1 @@
-cc_test(cpu_allocator_test SRCS cpu_allocator_test.cc)
-nv_test(gpu_allocator_test SRCS gpu_allocator_test.cc)
+cc_test(system_allocator_test SRCS system_allocator_test.cc)
diff --git a/paddle/memory/detail/cpu_allocator.h b/paddle/memory/detail/cpu_allocator.h
deleted file mode 100644
index 17753ccef718f8b258e962384c3273317fc72bec..0000000000000000000000000000000000000000
--- a/paddle/memory/detail/cpu_allocator.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#include <stddef.h>  // for size_t
-#include <cstdlib>   // for malloc and free
-
-#ifndef _WIN32
-#include <sys/mman.h>  // for mlock and munlock
-#endif
-
-namespace paddle {
-namespace memory {
-namespace detail {
-
-// CPUAllocator<staging=true> calls mlock, which returns
-// pinned and locked memory as staging areas for data exchange
-// between host and device.  Allocates too much would reduce the
-// amount of memory available to the system for paging.  So, by
-// default, we should use CPUAllocator<staging=false>.
-template <bool staging>
-class CPUAllocator {
- public:
-  void* Alloc(size_t size);
-  void Free(void* p, size_t size);
-};
-
-template <>
-class CPUAllocator<false> {
- public:
-  void* Alloc(size_t size) { return std::malloc(size); }
-  void Free(void* p, size_t size) { std::free(p); }
-};
-
-template <>
-class CPUAllocator<true> {
- public:
-  void* Alloc(size_t size) {
-    void* p = std::malloc(size);
-    if (p == nullptr) {
-      return p;
-    }
-#ifndef _WIN32
-    mlock(p, size);
-#endif
-    return p;
-  }
-
-  void Free(void* p, size_t size) {
-#ifndef _WIN32
-    munlock(p, size);
-#endif
-    std::free(p);
-  }
-};
-
-}  // namespace detail
-}  // namespace memory
-}  // namespace paddle
diff --git a/paddle/memory/detail/cpu_allocator_test.cc b/paddle/memory/detail/cpu_allocator_test.cc
deleted file mode 100644
index 4e45266cd8ad8a2bfd8f2135d259691559bbcbf4..0000000000000000000000000000000000000000
--- a/paddle/memory/detail/cpu_allocator_test.cc
+++ /dev/null
@@ -1,30 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/memory/detail/cpu_allocator.h"
-#include "gtest/gtest.h"
-
-TEST(CPUAllocator, NonStaging) {
-  paddle::memory::detail::CPUAllocator<false> a;
-  void* p = a.Alloc(4096);
-  EXPECT_NE(p, nullptr);
-  a.Free(p, 4096);
-}
-
-TEST(CPUAllocator, Staging) {
-  paddle::memory::detail::CPUAllocator<true> a;
-  void* p = a.Alloc(4096);
-  EXPECT_NE(p, nullptr);
-  a.Free(p, 4096);
-}
diff --git a/paddle/memory/detail/gpu_allocator.h b/paddle/memory/detail/system_allocator.h
similarity index 58%
rename from paddle/memory/detail/gpu_allocator.h
rename to paddle/memory/detail/system_allocator.h
index 9452c41fb897554c731ffd00a4fb70b7d4abf0b6..0a6455318899564dbd19b6b8dc8e7b61d59e7832 100644
--- a/paddle/memory/detail/gpu_allocator.h
+++ b/paddle/memory/detail/system_allocator.h
@@ -14,20 +14,58 @@ limitations under the License. */
 
 #pragma once
 
-#include <stddef.h>  // for size_t
+#include <stddef.h>    // for size_t
+#include <sys/mman.h>  // for mlock and munlock
+#include <cstdlib>     // for malloc and free
 
-#include <thrust/system_error.h>
+#ifndef PADDLE_ONLY_CPU
 #include <thrust/system/cuda/error.h>
+#include <thrust/system_error.h>
+#endif  // PADDLE_ONLY_CPU
 
 namespace paddle {
 namespace memory {
 namespace detail {
 
+class SystemAllocator {
+ public:
+  virtual void* Alloc(size_t size) = 0;
+  virtual void* Free(void* p) = 0;
+};
+
+// CPUAllocator<lock_memory=true> calls mlock, which returns pinned
+// and locked memory as staging areas for data exchange between host
+// and device.  Allocates too much would reduce the amount of memory
+// available to the system for paging.  So, by default, we should use
+// CPUAllocator<staging=false>.
+template <bool lock_memory>
+class CPUAllocator : public SystemAllocator {
+ public:
+  virtual void* Alloc(size_t size) {
+    void* p = std::malloc(size);
+    if (p != nullptr && lock_memory) {
+      mlock(p, size);
+    }
+    return p;
+  }
+
+  virtual void Free(void* p, size_t size) {
+    if (p != nullptr && lock_memory) {
+      munlock(p, size);
+    }
+    std::free(p);
+  }
+};
+
+#ifndef PADDLE_ONLY_CPU  // The following code are for CUDA.
+
+namespace {
 inline void throw_on_error(cudaError_t e, const char* message) {
   if (e) {
     throw thrust::system_error(e, thrust::cuda_category(), message);
   }
 }
+}  // namespace
 
 // GPUAllocator<staging=true> calls cudaHostMalloc, which returns
 // pinned and locked memory as staging areas for data exchange
@@ -36,17 +74,11 @@ inline void throw_on_error(cudaError_t e, const char* message) {
 // default, we should use GPUAllocator<staging=false>.
 template <bool staging>
 class GPUAllocator {
-public:
-  void* Alloc(size_t size);
-  void Free(void* p, size_t size);
-};
-
-template <>
-class GPUAllocator<false> {
-public:
+ public:
   void* Alloc(size_t size) {
     void* p = 0;
-    cudaError_t result = cudaMalloc(&p, size);
+    cudaError_t result =
+        staging ? cudaMallocHost(&p, size) : cudaMalloc(&p, size);
     if (result == cudaSuccess) {
       return p;
     }
@@ -60,32 +92,15 @@ public:
     // that is returned if you ever call cudaFree after the
     // driver has already shutdown. This happens only if the
     // process is terminating, in which case we don't care if
-    // cudaFree succeeds. 
-    auto err = cudaFree(p);
+    // cudaFree succeeds.
+    auto err = staging ? cudaFreeHost(p) : cudaFree(p);
     if (err != cudaErrorCudartUnloading) {
-        throw_on_error(err, "cudaFree failed");
+      throw_on_error(err, "cudaFree failed");
     }
   }
 };
 
-template <>
-class GPUAllocator<true> {
-public:
-  void* Alloc(size_t size) {
-    void* p = 0;
-    cudaError_t result = cudaMallocHost(&p, size);
-    if (result == cudaSuccess) {
-        return p;
-    }
-    // clear last error
-    cudaGetLastError();
-    return nullptr;
-  }
-
-  void Free(void* p, size_t size) {
-    throw_on_error(cudaFreeHost(p), "cudaFreeHost failed");
-  }
-};
+#endif  // PADDLE_ONLY_CPU
 
 }  // namespace detail
 }  // namespace memory
diff --git a/paddle/memory/detail/gpu_allocator_test.cc b/paddle/memory/detail/system_allocator_test.cc
similarity index 69%
rename from paddle/memory/detail/gpu_allocator_test.cc
rename to paddle/memory/detail/system_allocator_test.cc
index 18c1c9ab43084ae72185565cb7c04cf6d5c9937c..4e7b8018b6a072bcac0f4dc636df440cbc350303 100644
--- a/paddle/memory/detail/gpu_allocator_test.cc
+++ b/paddle/memory/detail/system_allocator_test.cc
@@ -12,9 +12,25 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/memory/detail/gpu_allocator.h"
+#include "paddle/memory/detail/system_allocator.h"
 #include "gtest/gtest.h"
 
+TEST(CPUAllocator, NoLockMem) {
+  paddle::memory::detail::CPUAllocator<false> a;
+  void* p = a.Alloc(4096);
+  EXPECT_NE(p, nullptr);
+  a.Free(p, 4096);
+}
+
+TEST(CPUAllocator, LockMem) {
+  paddle::memory::detail::CPUAllocator<true> a;
+  void* p = a.Alloc(4096);
+  EXPECT_NE(p, nullptr);
+  a.Free(p, 4096);
+}
+
+#ifndef PADDLE_ONLY_CPU
+
 TEST(GPUAllocator, NonStaging) {
   paddle::memory::detail::GPUAllocator<false> a;
   void* p = a.Alloc(4096);
@@ -28,3 +44,5 @@ TEST(GPUAllocator, Staging) {
   EXPECT_NE(p, nullptr);
   a.Free(p, 4096);
 }
+
+#endif  // PADDLE_ONLY_CPU
diff --git a/paddle/memory/memory.cc b/paddle/memory/memory.cc
index b617923731a4d92e9765e2b73c55984a70a59264..ca3c01ebdb03598f760d19475b42930a137b3bba 100644
--- a/paddle/memory/memory.cc
+++ b/paddle/memory/memory.cc
@@ -14,48 +14,41 @@ limitations under the License. */
 
 #include "paddle/memory/memory.h"
 
+#include "paddle/memory/detail/cpu_allocator.h"
+#include "paddle/memory/detail/gpu_allocator.h"
+
 namespace paddle {
 namespace memory {
 
-template <>
-void* Alloc<CPUPlace>(CPUPlace, size_t size) {
-  return GetCPUBuddyAllocator(false /*non-staging*/)->Alloc(size);
-}
-
-void* AllocStaging(CPUPlace, size_t size) {
-  return GetCPUBuddyAllocator(true /*staging*/)->Alloc(size);
-}
-
-template <>
-void* Alloc<GPUPlace>(GPUPlace pl, size_t size) {
-  return GetGPUBuddyAllocator(pl.device)->Alloc(size);
-}
-
-template <>
-void Free<CPUPlace>(CPUPlace, void* p) {
-  return GetCPUBuddyAllocator(false /*non-staging*/)->Free(p);
-}
-
-void FreeStaging(CPUPlace, void* p) {
-  return GetCPUBuddyAllocator(false /*non-staging*/)->Free(p);
-}
-
-#ifdef PADDLE_WITH_GPU
-template <>
-void* Alloc<GPUPlace>(GPUPlace pl, void* p) {
-  return GetGPUBuddyAllocator(pl.device)->Free(p);
-}
-
-template <>
-size_t Used<CPUPlace>(CPUPlace) {
+void Alloc(paddle::platform::Place pl, size_t size) {
+#ifndef PADDLE_ONLY_CPU
+  if (paddle::platform::is_gpu_place(pl)) {
+    return GetGPUBuddyAllocator(pl.device)->Alloc(size);
+  }
+#endif  // PADDLE_ONLY_CPU
+  PADDLE_ASSERT(paddle::platform::is_cpu_place(pl));
+  return GetCPUBuddyAllocator()->Alloc(size);
+}
+
+void Free(paddle::platform::Place pl, void* p) {
+#ifndef PADDLE_ONLY_CPU
+  if (paddle::platform::is_gpu_place(pl)) {
+    GetGPUBuddyAllocator(pl.device)->Free(p);
+  }
+#endif  // PADDLE_ONLY_CPU
+  PADDLE_ASSERT(paddle::platform::is_cpu_place(pl));
+  GetCPUBuddyAllocator()->Free(p);
+}
+
+size_t Used(paddle::platform::Place pl) {
+#ifndef PADDLE_ONLY_CPU
+  if (paddle::platform::is_gpu_place(pl)) {
+    return GetGPUBuddyAllocator(pl.device)->Used();
+  }
+#endif  // PADDLE_ONLY_CPU
+  PADDLE_ASSERT(paddle::platform::is_cpu_place(pl));
   return GetCPUBuddyAllocator()->Used();
 }
 
-template <>
-size_t Alloc<GPUPlace>(GPUPlace pl) {
-  return GetGPUBuddyAllocator(pl.device)->Used();
-}
-#endif  // PADDLE_WITH_GPU
-
 }  // namespace memory
 }  // namespace paddle
diff --git a/paddle/memory/memory.h b/paddle/memory/memory.h
index 8c15a133bb4e9762d4264ee0d02ad96a3ed33e30..0bc609205eca2c53d85b1a2533f8f36d5b19595e 100644
--- a/paddle/memory/memory.h
+++ b/paddle/memory/memory.h
@@ -19,19 +19,9 @@ limitations under the License. */
 namespace paddle {
 namespace memory {
 
-template <typename paddle::framework::Place>
-void* Alloc(Place, size_t);
-template <typename paddle::framework::Place>
-void Free(Place, void*);
-template <typename paddle::framework::Place>
-size_t Used(Place);
-
-// Staging memory means "pinned" host memory that can be mapped into
-// the CUDA memory space and accessed by the device rapidly.  Don't
-// allocate too much staging memory; otherwise system performance will
-// degrade because the OS cannot find enough swap memory space.
-void* AllocStaging(CPUPlace, size_t);
-void* FreeStaging(CPUPlace, size_t);
+void* Alloc(paddle::framework::Place, size_t);
+void Free(paddle::framework::Place, void*);
+size_t Used(paddle::framework::Place);
 
 }  // namespace memory
 }  // namespace paddle