diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h
index f7a6b5ba84ca1762bd903790aa3c0346b22ed035..6eb678e301ad0f282dad433e1b19ebe0a5c26a3f 100644
--- a/paddle/fluid/framework/tensor.h
+++ b/paddle/fluid/framework/tensor.h
@@ -45,11 +45,10 @@ class Tensor {
   friend struct EigenVector;
 
  public:
-  Tensor() : offset_(0), is_pinned_(false) {}
+  Tensor() : offset_(0) {}
 
   /*! Constructor with place should only be used in pybind. */
-  explicit Tensor(const platform::Place& place)
-      : offset_(0), is_pinned_(false) {
+  explicit Tensor(const platform::Place& place) : offset_(0) {
     holder_->set_place(place);
   }
 
@@ -70,12 +69,11 @@ class Tensor {
    * @note    If not exist, then allocation.
    */
   template <typename T>
-  inline T* mutable_data(platform::Place place, bool is_pinned = false);
+  inline T* mutable_data(platform::Place place);
 
-  inline void* mutable_data(platform::Place place, std::type_index type,
-                            bool is_pinned = false);
+  inline void* mutable_data(platform::Place place, std::type_index type);
 
-  inline void* mutable_data(platform::Place place, bool is_pinned = false);
+  inline void* mutable_data(platform::Place place);
 
   /**
    * @brief     Return a pointer to mutable memory block.
@@ -86,8 +84,7 @@ class Tensor {
    * @note      If not exist, then allocation.
    */
   template <typename T>
-  inline T* mutable_data(DDim dims, platform::Place place,
-                         bool is_pinned = false);
+  inline T* mutable_data(DDim dims, platform::Place place);
 
   /*! Return the dimensions of the memory block. */
   inline const DDim& dims() const;
@@ -152,14 +149,12 @@ class Tensor {
 
   template <typename Place>
   struct PlaceholderImpl : public Placeholder {
-    PlaceholderImpl(Place place, size_t size, std::type_index type,
-                    bool is_pinned = false)
-        : ptr_(static_cast<uint8_t*>(memory::Alloc(place, size, is_pinned)),
-               memory::PODDeleter<uint8_t, Place>(place, is_pinned)),
+    PlaceholderImpl(Place place, size_t size, std::type_index type)
+        : ptr_(static_cast<uint8_t*>(memory::Alloc(place, size)),
+               memory::PODDeleter<uint8_t, Place>(place)),
           place_(place),
           size_(size),
-          type_(type),
-          is_pinned_(is_pinned) {
+          type_(type) {
       PADDLE_ENFORCE_NOT_NULL(ptr_, "Insufficient %s memory to allocation.",
                               (is_cpu_place(place_) ? "CPU" : "GPU"));
     }
@@ -182,9 +177,6 @@ class Tensor {
 
     /* the current type of memory */
     std::type_index type_;
-
-    /*! use pinned memory or not. */
-    bool is_pinned_;
   };
 
   /*! holds the memory block if allocated. */
@@ -219,7 +211,6 @@ class Tensor {
    *          PlaceHolder::ptr_ and where the tensor data really begins.
    */
   size_t offset_;
-  bool is_pinned_;
 };
 
 inline void Tensor::switch_place(platform::Place new_place) {
diff --git a/paddle/fluid/framework/tensor_impl.h b/paddle/fluid/framework/tensor_impl.h
index 113814971e115fa88bd0ded34017fa26a9dd5803..7a4839044008338dda43f75b5ee6def500b78270 100644
--- a/paddle/fluid/framework/tensor_impl.h
+++ b/paddle/fluid/framework/tensor_impl.h
@@ -101,21 +101,19 @@ inline T* Tensor::data() {
 }
 
 template <typename T>
-inline T* Tensor::mutable_data(DDim dims, platform::Place place,
-                               bool is_pinned) {
+inline T* Tensor::mutable_data(DDim dims, platform::Place place) {
   static_assert(std::is_pod<T>::value, "T must be POD");
   Resize(dims);
-  return mutable_data<T>(place, is_pinned);
+  return mutable_data<T>(place);
 }
 
 template <typename T>
-inline T* Tensor::mutable_data(platform::Place place, bool is_pinned) {
+inline T* Tensor::mutable_data(platform::Place place) {
   static_assert(std::is_pod<T>::value, "T must be POD");
-  return reinterpret_cast<T*>(mutable_data(place, typeid(T), is_pinned));
+  return reinterpret_cast<T*>(mutable_data(place, typeid(T)));
 }
 
-inline void* Tensor::mutable_data(platform::Place place, std::type_index type,
-                                  bool is_pinned) {
+inline void* Tensor::mutable_data(platform::Place place, std::type_index type) {
   if (holder_ != nullptr) {
     holder_->set_type(type);
   }
@@ -129,27 +127,26 @@ inline void* Tensor::mutable_data(platform::Place place, std::type_index type,
       holder_->size() < size + offset_) {
     if (platform::is_cpu_place(place)) {
       holder_.reset(new PlaceholderImpl<platform::CPUPlace>(
-          boost::get<platform::CPUPlace>(place), size, type, is_pinned));
+          boost::get<platform::CPUPlace>(place), size, type));
     } else if (platform::is_gpu_place(place)) {
 #ifndef PADDLE_WITH_CUDA
       PADDLE_THROW("'CUDAPlace' is not supported in CPU only device.");
     }
 #else
       holder_.reset(new PlaceholderImpl<platform::CUDAPlace>(
-          boost::get<platform::CUDAPlace>(place), size, type, is_pinned));
+          boost::get<platform::CUDAPlace>(place), size, type));
     }
 #endif
     offset_ = 0;
-    is_pinned_ = is_pinned;
   }
   return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
                                  offset_);
 }
 
-inline void* Tensor::mutable_data(platform::Place place, bool is_pinned) {
+inline void* Tensor::mutable_data(platform::Place place) {
   PADDLE_ENFORCE(this->holder_ != nullptr,
                  "Cannot invoke mutable data if current hold nothing");
-  return mutable_data(place, holder_->type(), is_pinned);
+  return mutable_data(place, holder_->type());
 }
 
 inline Tensor& Tensor::ShareDataWith(const Tensor& src) {
@@ -191,8 +188,6 @@ inline const DDim& Tensor::dims() const { return dims_; }
 
 inline int64_t Tensor::numel() const { return product(dims_); }
 
-inline bool Tensor::isPinned() const { return is_pinned_; }
-
 inline Tensor ReshapeToMatrix(const Tensor& src, int num_col_dims) {
   Tensor res;
   res.ShareDataWith(src);
diff --git a/paddle/fluid/memory/detail/system_allocator.cc b/paddle/fluid/memory/detail/system_allocator.cc
index 71d28dcbade1bcb48d2e906c61e03236860cb7d0..d20c5c86826f52ce671f92dd2f033fccece66b93 100644
--- a/paddle/fluid/memory/detail/system_allocator.cc
+++ b/paddle/fluid/memory/detail/system_allocator.cc
@@ -123,20 +123,20 @@ bool GPUAllocator::UseGpu() const { return true; }
 // memory. It’s locked to a physical address.
 void* CUDAPinnedAllocator::Alloc(size_t& index, size_t size) {
   if (size <= 0) return nullptr;
-  void* p;
-  // NOTE: here, we use GpuMaxAllocSize() as the maximum memory size
+
+  // NOTE: here, we use CpuMaxAllocSize()/2 as the maximum memory size
   // of host pinned allocation. Allocates too much would reduce
   // the amount of memory available to the underlying system for paging.
-
-  size_t usable = paddle::platform::GpuMaxAllocSize() - fallback_alloc_size_;
+  size_t usable = CpuMaxAllocSize() / 2 - cuda_pinnd_alloc_size_;
 
   if (size > usable) return nullptr;
 
   // PINNED memory is visible to all CUDA contexts.
   cudaError_t result = cudaMallocHost(&p, size);
+
   if (result == cudaSuccess) {
-    index = 1;
-    fallback_alloc_size_ += size;
+    index = 1;  // PINNED memory
+    cuda_pinnd_alloc_size_ += size;
     return p;
   }
 
@@ -147,8 +147,8 @@ void CUDAPinnedAllocator::Free(void* p, size_t size, size_t index) {
   cudaError_t err;
   PADDLE_ASSERT(index == 1);
 
-  PADDLE_ASSERT(fallback_alloc_size_ >= size);
-  fallback_alloc_size_ -= size;
+  PADDLE_ASSERT(cuda_pinnd_alloc_size_ >= size);
+  cuda_pinnd_alloc_size_ -= size;
   err = cudaFreeHost(p);
 
   // Purposefully allow cudaErrorCudartUnloading, because
diff --git a/paddle/fluid/memory/detail/system_allocator.h b/paddle/fluid/memory/detail/system_allocator.h
index 3e024125fabb8bbff094ed4455f164dfd4cae163..c2f474f4b66cb486dc4dc0a9365940c7359eb18d 100644
--- a/paddle/fluid/memory/detail/system_allocator.h
+++ b/paddle/fluid/memory/detail/system_allocator.h
@@ -59,9 +59,7 @@ class CUDAPinnedAllocator : public SystemAllocator {
   virtual bool UseGpu() const;
 
  private:
-  size_t gpu_alloc_size_ =
-      0;  // TODO(zcd): how to define the upper limit of CUDAPinnedMemory?
-  size_t fallback_alloc_size_ = 0;
+  size_t cuda_pinnd_alloc_size_ = 0;
 };
 #endif
 
diff --git a/paddle/fluid/memory/memory.cc b/paddle/fluid/memory/memory.cc
index f2d5f250bfb56fb522416e83ab4c5315a9f533f0..6da9f00656cb74a876881e13469e5edbaa363724 100644
--- a/paddle/fluid/memory/memory.cc
+++ b/paddle/fluid/memory/memory.cc
@@ -38,8 +38,7 @@ BuddyAllocator* GetCPUBuddyAllocator() {
 }
 
 template <>
-void* Alloc<platform::CPUPlace>(platform::CPUPlace place, size_t size,
-                                bool is_pinned) {
+void* Alloc<platform::CPUPlace>(platform::CPUPlace place, size_t size) {
   VLOG(10) << "Allocate " << size << " bytes on " << platform::Place(place);
   void* p = GetCPUBuddyAllocator()->Alloc(size);
   VLOG(10) << "  pointer=" << p;
@@ -47,8 +46,7 @@ void* Alloc<platform::CPUPlace>(platform::CPUPlace place, size_t size,
 }
 
 template <>
-void Free<platform::CPUPlace>(platform::CPUPlace place, void* p,
-                              bool is_pinned) {
+void Free<platform::CPUPlace>(platform::CPUPlace place, void* p) {
   VLOG(10) << "Free pointer=" << p << " on " << platform::Place(place);
   GetCPUBuddyAllocator()->Free(p);
 }
@@ -85,27 +83,13 @@ BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) {
 }
 
 BuddyAllocator* GetCUDAPinnedBuddyAllocator(int gpu_id) {
-  static BuddyAllocator** as = NULL;
+  static BuddyAllocator* as = NULL;
   if (as == NULL) {
-    int gpu_num = platform::GetCUDADeviceCount();
-    as = new BuddyAllocator*[gpu_num];
-    for (int gpu = 0; gpu < gpu_num; gpu++) {
-      as[gpu] = nullptr;
-    }
-  }
-  platform::SetDeviceId(gpu_id);
-  if (!as[gpu_id]) {
-    as[gpu_id] = new BuddyAllocator(new detail::CUDAPinnedAllocator,
-                                    platform::GpuMinChunkSize(),
-                                    platform::GpuMaxChunkSize());
-    VLOG(10) << "\n\nNOTE: each GPU device use "
-             << FLAGS_fraction_of_gpu_memory_to_use * 100
-             << "% of GPU memory.\n"
-             << "You can set GFlags environment variable '"
-             << "FLAGS_fraction_of_gpu_memory_to_use"
-             << "' to change the fraction of GPU usage.\n\n";
+    as = new BuddyAllocator(new detail::CUDAPinnedAllocator,
+                            platform::CpuMinChunkSize(),
+                            platform::CpuMaxChunkSize());
   }
-  return as[gpu_id];
+  return as;
 }
 
 template <>
@@ -114,16 +98,9 @@ size_t Used<platform::CUDAPlace>(platform::CUDAPlace place) {
 }
 
 template <>
-void* Alloc<platform::CUDAPlace>(platform::CUDAPlace place, size_t size,
-                                 bool is_pinned) {
-  void* ptr;
-  if (is_pinned) {
-    auto* buddy_allocator = GetCUDAPinnedBuddyAllocator(place.device);
-    ptr = buddy_allocator->Alloc(size);
-  } else {
-    auto* buddy_allocator = GetGPUBuddyAllocator(place.device);
-    ptr = buddy_allocator->Alloc(size);
-  }
+void* Alloc<platform::CUDAPlace>(platform::CUDAPlace place, size_t size) {
+  auto* buddy_allocator = GetGPUBuddyAllocator(place.device);
+  void* ptr = buddy_allocator->Alloc(size);
 
   if (ptr == nullptr) {
     int cur_dev = platform::GetCurrentDeviceId();
@@ -142,13 +119,39 @@ void* Alloc<platform::CUDAPlace>(platform::CUDAPlace place, size_t size,
 }
 
 template <>
-void Free<platform::CUDAPlace>(platform::CUDAPlace place, void* p,
-                               bool is_pinned) {
-  if (is_pinned) {
-    GetCUDAPinnedBuddyAllocator(place.device)->Free(p);
-  } else {
-    GetGPUBuddyAllocator(place.device)->Free(p);
+void Free<platform::CUDAPlace>(platform::CUDAPlace place, void* p) {
+  GetGPUBuddyAllocator(place.device)->Free(p);
+}
+
+size_t Used<platform::CUDAPinnedPlace>(platform::CUDAPinnedPlace place) {
+  return GetGPUBuddyAllocator(place.device)->Used();
+}
+
+template <>
+void* Alloc<platform::CUDAPinnedPlace>(platform::CUDAPinnedPlace place,
+                                       size_t size) {
+  auto* buddy_allocator = GetCUDAPinnedBuddyAllocator(place.device);
+  void* ptr = buddy_allocator->Alloc(size);
+
+  if (ptr == nullptr) {
+    int cur_dev = platform::GetCurrentDeviceId();
+    platform::SetDeviceId(place.device);
+    size_t avail, total;
+    platform::GpuMemoryUsage(avail, total);
+    LOG(WARNING) << "Cannot allocate " << size << " bytes in GPU "
+                 << place.device << ", available " << avail << " bytes";
+    LOG(WARNING) << "total " << total;
+    LOG(WARNING) << "GpuMinChunkSize " << platform::GpuMinChunkSize();
+    LOG(WARNING) << "GpuMaxChunkSize " << platform::GpuMaxChunkSize();
+    LOG(WARNING) << "GPU memory used: " << Used<platform::CUDAPlace>(place);
+    platform::SetDeviceId(cur_dev);
   }
+  return ptr;
+}
+
+template <>
+void Free<platform::CUDAPinnedPlace>(platform::CUDAPinnedPlace place, void* p) {
+  GetCUDAPinnedBuddyAllocator(place.device)->Free(p);
 }
 
 #endif
@@ -165,6 +168,10 @@ size_t Usage::operator()(const platform::CUDAPlace& gpu) const {
 #endif
 }
 
+size_t Usage::operator()(const platform::CUDAPinnedPlace& cuda_pinned) const {
+  return Used(cuda_pinned);
+}
+
 size_t memory_usage(const platform::Place& p) {
   return boost::apply_visitor(Usage(), p);
 }
diff --git a/paddle/fluid/memory/memory.h b/paddle/fluid/memory/memory.h
index 062bfc880e78dc5d90c567ffe5c4e521704c9ca6..fba7372e7184ef4a566d101210c55746b6313d5c 100644
--- a/paddle/fluid/memory/memory.h
+++ b/paddle/fluid/memory/memory.h
@@ -57,6 +57,7 @@ size_t Used(Place place);
 struct Usage : public boost::static_visitor<size_t> {
   size_t operator()(const platform::CPUPlace& cpu) const;
   size_t operator()(const platform::CUDAPlace& gpu) const;
+  size_t operator()(const platform::CUDAPinnedPlace& cuda_pinned) const;
 };
 
 size_t memory_usage(const platform::Place& p);
diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h
index 202394c7be7e103a609dd0999fc883c794ef0edd..e25cfe60b1e85303c59c847dcd5aebb508652679 100644
--- a/paddle/fluid/platform/device_context.h
+++ b/paddle/fluid/platform/device_context.h
@@ -118,6 +118,18 @@ struct DefaultDeviceContextType<platform::CUDAPlace> {
   using TYPE = CUDADeviceContext;
 };
 
+// Currently, CUDAPinnedDeviceContext is only used to data copying.
+// class CUDAPinnedDeviceContext : public DeviceContext {
+// public:
+//  CUDAPinnedDeviceContext();
+//  explicit CUDAPinnedDeviceContext(CUDAPinnedPlace place);
+//
+//  Place GetPlace() const override;
+//
+// private:
+//  CUDAPinnedPlace place_;
+//};
+
 #endif
 
 #ifdef PADDLE_WITH_MKLDNN
diff --git a/paddle/fluid/platform/place.cc b/paddle/fluid/platform/place.cc
index de8f958eb012cb1ac563cbbbac8951e439bf8f33..11ca87c21166437abe07faa3ddddee1a77cd4e7c 100644
--- a/paddle/fluid/platform/place.cc
+++ b/paddle/fluid/platform/place.cc
@@ -40,12 +40,19 @@ const Place &get_place() { return the_default_place; }
 
 const CUDAPlace default_gpu() { return CUDAPlace(0); }
 const CPUPlace default_cpu() { return CPUPlace(); }
+const CUDAPinnedPlace default_cuda_pinned() { return CUDAPinnedPlace(); }
 
 bool is_gpu_place(const Place &p) {
   return boost::apply_visitor(IsCUDAPlace(), p);
 }
 
-bool is_cpu_place(const Place &p) { return !is_gpu_place(p); }
+bool is_cpu_place(const Place &p) {
+  return boost::apply_visitor(IsCPUPlace(), p);
+}
+
+bool is_cuda_pinned_place(const Place &p) {
+  return boost::apply_visitor(IsCUDAPinnedPlace(), p);
+}
 
 bool places_are_same_class(const Place &p1, const Place &p2) {
   return p1.which() == p2.which();
@@ -53,7 +60,7 @@ bool places_are_same_class(const Place &p1, const Place &p2) {
 
 bool is_same_place(const Place &p1, const Place &p2) {
   if (places_are_same_class(p1, p2)) {
-    if (is_cpu_place(p1)) {
+    if (is_cpu_place(p1) || is_cuda_pinned_place(p1)) {
       return true;
     } else {
       return boost::get<CUDAPlace>(p1) == boost::get<CUDAPlace>(p2);
diff --git a/paddle/fluid/platform/place.h b/paddle/fluid/platform/place.h
index 4cc8b377b8b671eb5a446ecbae21ba9628fbd2c8..8f3acd8df6b6ee5ed0cf574ea0bc0c92be59170a 100644
--- a/paddle/fluid/platform/place.h
+++ b/paddle/fluid/platform/place.h
@@ -45,12 +45,33 @@ struct CUDAPlace {
   int device;
 };
 
+struct CUDAPinnedPlace {
+  CUDAPinnedPlace() {}
+
+  // needed for variant equality comparison
+  inline bool operator==(const CUDAPinnedPlace &) const { return true; }
+  inline bool operator!=(const CUDAPinnedPlace &) const { return false; }
+};
+
 struct IsCUDAPlace : public boost::static_visitor<bool> {
   bool operator()(const CPUPlace &) const { return false; }
   bool operator()(const CUDAPlace &gpu) const { return true; }
+  bool operator()(const CUDAPinnedPlace &) const { return false; }
 };
 
-typedef boost::variant<CUDAPlace, CPUPlace> Place;
+struct IsCPUPlace : public boost::static_visitor<bool> {
+  bool operator()(const CPUPlace &cpu) const { return true; }
+  bool operator()(const CUDAPlace &) const { return false; }
+  bool operator()(const CUDAPinnedPlace &) const { return false; }
+};
+
+struct IsCUDAPinnedPlace : public boost::static_visitor<bool> {
+  bool operator()(const CPUPlace &) const { return false; }
+  bool operator()(const CUDAPlace &) const { return false; }
+  bool operator()(const CUDAPinnedPlace &cuda_pinned) const { return true; }
+};
+
+typedef boost::variant<CUDAPlace, CPUPlace, CUDAPinnedPlace> Place;
 
 using PlaceList = std::vector<Place>;
 
@@ -59,9 +80,11 @@ const Place &get_place();
 
 const CUDAPlace default_gpu();
 const CPUPlace default_cpu();
+const CUDAPinnedPlace default_cuda_pinned();
 
 bool is_gpu_place(const Place &);
 bool is_cpu_place(const Place &);
+bool is_cuda_pinned_place(const Place &);
 bool places_are_same_class(const Place &, const Place &);
 bool is_same_place(const Place &, const Place &);
 
@@ -97,6 +120,11 @@ struct PlaceVisitorWrapper
     return typename Visitor::result_type();
 #endif
   }
+
+  typename Visitor::result_type operator()(
+      const CUDAPinnedPlace &cuda_pinned) const {
+    return visitor_(cuda_pinned);
+  }
 };
 
 template <typename Visitor>