diff --git a/paddle/fluid/memory/allocation/cuda_device_context_allocation.h b/paddle/fluid/memory/allocation/cuda_device_context_allocation.h
index cf0d8792d0ab4cb7bd1e23344950d924aae71280..02011f88c1d9d80b24c7bd1c28747a85e4738711 100644
--- a/paddle/fluid/memory/allocation/cuda_device_context_allocation.h
+++ b/paddle/fluid/memory/allocation/cuda_device_context_allocation.h
@@ -20,6 +20,12 @@ namespace paddle {
 namespace memory {
 namespace allocation {
 
+/**
+ * CUDADeviceContextAllocation is a wrapper of the underbeneath allocation.
+ * CUDADeviceContextAllocation adds a CUDA stream callback for the underbeneath
+ * allocation so that CUDADeviceContextAllocation can be used in a CUDA stream
+ * which deletes allocation in the callback.
+ */
 class CUDADeviceContextAllocation : public Allocation {
  public:
   explicit CUDADeviceContextAllocation(AllocationPtr allocation);
diff --git a/paddle/fluid/memory/allocation/cuda_device_context_allocator.h b/paddle/fluid/memory/allocation/cuda_device_context_allocator.h
index e27cb72af6e0961fb1aafcf9e7587b81f38c541a..34bd1176db9cd1bfa57fd5dee401705539f974ad 100644
--- a/paddle/fluid/memory/allocation/cuda_device_context_allocator.h
+++ b/paddle/fluid/memory/allocation/cuda_device_context_allocator.h
@@ -24,6 +24,12 @@ namespace paddle {
 namespace memory {
 namespace allocation {
 
+/**
+ * CUDADeviceContextAllocator will allocate a CUDADeviceContextAllocation
+ * after waiting for a self-created event on the default stream. It does so to
+ * let the non-default stream be able to allocate GPU memory which will be
+ * released by stream callback
+ */
 class CUDADeviceContextAllocator : public Allocator {
  public:
   explicit CUDADeviceContextAllocator(platform::CUDAPlace place,
diff --git a/paddle/fluid/memory/allocation/cuda_device_context_allocator_pool.h b/paddle/fluid/memory/allocation/cuda_device_context_allocator_pool.h
index 03b7c34f71e8ad5141fec6c8d50c2f4dbd781654..b423f226d94492c4ce0d8c8752b0cca2b1745bb3 100644
--- a/paddle/fluid/memory/allocation/cuda_device_context_allocator_pool.h
+++ b/paddle/fluid/memory/allocation/cuda_device_context_allocator_pool.h
@@ -24,6 +24,13 @@ namespace paddle {
 namespace memory {
 namespace allocation {
 
+/**
+ * CUDADeviceContextAllocatorPool is a singletion stores mapping from
+ * CUDAPlace(s) to std::shared_ptr<CUDADeviceContextAllocator>. When a
+ * CUDADeviceContext's compute stream isn't default stream, it can call this
+ * class to allocate GPU memory which will be released by a callback after
+ * stream execution.
+ */
 class CUDADeviceContextAllocatorPool {
  public:
   static CUDADeviceContextAllocatorPool &Instance();