diff --git a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_facade_test.cc b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_facade_test.cc
index 8b8fb5d93881271bd684b01319fc6f5de0c3f190..518f5e0131bb6478ed4334d28548a8751bec46f1 100644
--- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_facade_test.cc
+++ b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_facade_test.cc
@@ -43,8 +43,8 @@ TEST(allocator, allocator) {
   FLAGS_allocator_strategy = "auto_growth_best_fit";
 
   auto &instance = AllocatorFacade::Instance();
-  platform::Place place;
   size_t size = 1024;
+  platform::Place place;
 
   {
     place = platform::CPUPlace();
diff --git a/paddle/fluid/memory/allocation/cpu_allocator.cc b/paddle/fluid/memory/allocation/cpu_allocator.cc
index 0fb2e6e1496fcfed74121f0fae5ab87744f75aa2..8dd4de49b644c85b032b4a84138816ad8fa239db 100644
--- a/paddle/fluid/memory/allocation/cpu_allocator.cc
+++ b/paddle/fluid/memory/allocation/cpu_allocator.cc
@@ -20,25 +20,27 @@ namespace paddle {
 namespace memory {
 namespace allocation {
 
-CPUAllocation::CPUAllocation(void *ptr, size_t size)
-    : Allocation(ptr, size, platform::CPUPlace()) {}
-
 bool CPUAllocator::IsAllocThreadSafe() const { return true; }
 
 void CPUAllocator::FreeImpl(Allocation *allocation) {
-  PADDLE_ENFORCE_NOT_NULL(dynamic_cast<CPUAllocation *>(allocation));
-  free(allocation->ptr());
+  void *p = allocation->ptr();
+#ifdef _WIN32
+  _aligned_free(p);
+#else
+  free(p);
+#endif
   delete allocation;
 }
 
 Allocation *CPUAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
-  void *ptr;
-  auto status = posix_memalign(&ptr, kAlignment, size);
-  if (UNLIKELY(status) != 0) {
-    throw BadAlloc(string::Sprintf("Cannot allocate cpu memory %d. Errno is %d",
-                                   size, status));
-  }
-  return new CPUAllocation(ptr, size);
+  void *p;
+#ifdef _WIN32
+  p = _aligned_malloc(size, 4096);
+#else
+  PADDLE_ENFORCE_EQ(posix_memalign(&p, 4096, size), 0, "Alloc %ld error!",
+                    size);
+#endif
+  return new Allocation(p, size, platform::CPUPlace());
 }
 }  // namespace allocation
 }  // namespace memory
diff --git a/paddle/fluid/memory/allocation/cpu_allocator.h b/paddle/fluid/memory/allocation/cpu_allocator.h
index 9e0c2551860954f4681be6e1a223aa61ae3a4df0..b4d215a434461fc120a6557bfb72f074186684ae 100644
--- a/paddle/fluid/memory/allocation/cpu_allocator.h
+++ b/paddle/fluid/memory/allocation/cpu_allocator.h
@@ -31,12 +31,6 @@ namespace allocation {
 //
 // NOTE(yy): It is no need to use `BestFitAllocator` in CPU. We can import
 // an open-sourced allocator into Paddle.
-class CPUAllocator;
-class CPUAllocation : public Allocation {
- public:
-  CPUAllocation(void* ptr, size_t size);
-};
-
 class CPUAllocator : public Allocator {
  public:
   constexpr static size_t kAlignment = 64u;
diff --git a/paddle/fluid/memory/allocation/cuda_allocator.cc b/paddle/fluid/memory/allocation/cuda_allocator.cc
index 2e7c4ee78f4dcc51391233cca8ef896784550da1..895a24a6a2a6b8e399ec2ace48136d1ef16c62f6 100644
--- a/paddle/fluid/memory/allocation/cuda_allocator.cc
+++ b/paddle/fluid/memory/allocation/cuda_allocator.cc
@@ -25,14 +25,12 @@ namespace allocation {
 bool CUDAAllocator::IsAllocThreadSafe() const { return true; }
 void CUDAAllocator::FreeImpl(Allocation* allocation) {
   platform::CUDADeviceGuard guard(place_.device);
-  auto* cuda_allocation = dynamic_cast<CUDAAllocation*>(allocation);
-  PADDLE_ENFORCE_NOT_NULL(cuda_allocation);
-  PADDLE_ENFORCE_EQ(boost::get<platform::CUDAPlace>(cuda_allocation->place()),
+  PADDLE_ENFORCE_EQ(boost::get<platform::CUDAPlace>(allocation->place()),
                     place_);
   PADDLE_ENFORCE(cudaFree(allocation->ptr()));
-  VLOG(2) << "cudaFree is called";
   delete allocation;
 }
+
 Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
   platform::CUDADeviceGuard guard(place_.device);
   void* ptr;
@@ -42,8 +40,9 @@ Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
         "Cannot allocate %d on GPU %d, cuda status %d, %s", size, place_.device,
         status, cudaGetErrorString(status)));
   }
-  return new CUDAAllocation(ptr, size, platform::Place(place_));
+  return new Allocation(ptr, size, platform::Place(place_));
 }
+
 }  // namespace allocation
 }  // namespace memory
 }  // namespace paddle
diff --git a/paddle/fluid/memory/allocation/cuda_allocator.h b/paddle/fluid/memory/allocation/cuda_allocator.h
index 962f9a7c028a77ccb451fa06d295110c46a8cdc4..580a2d1df1d5997a27180740393741ec8973bf18 100644
--- a/paddle/fluid/memory/allocation/cuda_allocator.h
+++ b/paddle/fluid/memory/allocation/cuda_allocator.h
@@ -20,13 +20,6 @@ namespace paddle {
 namespace memory {
 namespace allocation {
 
-// CUDA System allocator and allocation.
-// Just a flag type.
-class CUDAAllocation : public Allocation {
- public:
-  using Allocation::Allocation;
-};
-
 class CUDAAllocator : public Allocator {
  public:
   explicit CUDAAllocator(const platform::CUDAPlace& place) : place_(place) {}
diff --git a/paddle/fluid/memory/allocation/pinned_allocator.cc b/paddle/fluid/memory/allocation/pinned_allocator.cc
index dfc52edf9c8b3539c565d00e291c121b62c2e22a..5a3d817211750d3e19e65344d1eab5a96800c674 100644
--- a/paddle/fluid/memory/allocation/pinned_allocator.cc
+++ b/paddle/fluid/memory/allocation/pinned_allocator.cc
@@ -21,19 +21,14 @@ namespace memory {
 namespace allocation {
 bool CPUPinnedAllocator::IsAllocThreadSafe() const { return true; }
 void CPUPinnedAllocator::FreeImpl(Allocation *allocation) {
-  PADDLE_ENFORCE_NOT_NULL(dynamic_cast<CPUPinnedAllocation *>(allocation));
   PADDLE_ENFORCE(cudaFreeHost(allocation->ptr()));
   delete allocation;
 }
 Allocation *CPUPinnedAllocator::AllocateImpl(size_t size,
                                              Allocator::Attr attr) {
-  // PADDLE_ENFORCE_EQ(
-  //    attr, kCrossDevice,
-  //    "CPUPinnedAllocator should be used for Cross-Device Communication");
-
   void *ptr;
   PADDLE_ENFORCE(cudaHostAlloc(&ptr, size, cudaHostAllocPortable));
-  return new CPUPinnedAllocation(ptr, size);
+  return new Allocation(ptr, size, platform::CUDAPinnedPlace());
 }
 }  // namespace allocation
 }  // namespace memory
diff --git a/paddle/fluid/memory/allocation/pinned_allocator.h b/paddle/fluid/memory/allocation/pinned_allocator.h
index 3acb1f0c5ae35261667296280356efdc4f4b7649..deeb55a8fb0396a312286f5c2692114e9e4afc8d 100644
--- a/paddle/fluid/memory/allocation/pinned_allocator.h
+++ b/paddle/fluid/memory/allocation/pinned_allocator.h
@@ -20,12 +20,6 @@ namespace memory {
 namespace allocation {
 
 // Allocator uses `cudaHostAlloc`
-class CPUPinnedAllocation : public Allocation {
- public:
-  CPUPinnedAllocation(void *ptr, size_t size)
-      : Allocation(ptr, size, platform::CUDAPinnedPlace()) {}
-};
-
 class CPUPinnedAllocator : public Allocator {
  public:
   bool IsAllocThreadSafe() const override;