diff --git a/paddle/framework/mixed_vector.h b/paddle/framework/mixed_vector.h
index fe9d8a44a5fb149202b3a9cf0f3a0b05135f4b20..d388da4f2c79de9e60c972824254aeb014f8aecf 100644
--- a/paddle/framework/mixed_vector.h
+++ b/paddle/framework/mixed_vector.h
@@ -25,13 +25,17 @@
 namespace paddle {
 namespace framework {
 
+// Vector<T> implements the std::vector interface, and can get Data or
+// MutableData from any place. The data will be synced implicitly inside.
 template <typename T>
 class Vector {
  public:
   using value_type = T;
 
+  // Default ctor. Create empty Vector
   Vector() { InitEmpty(); }
 
+  // Fill vector with value. The vector size is `count`.
   explicit Vector(size_t count, const T& value = T()) {
     if (count == 0) {
       InitEmpty();
@@ -44,6 +48,7 @@ class Vector {
     }
   }
 
+  // Ctor with init_list
   Vector(std::initializer_list<T> init) {
     if (init.size() == 0) {
       InitEmpty();
@@ -52,6 +57,7 @@ class Vector {
     }
   }
 
+  // implicit cast from std::vector.
   template <typename U>
   Vector(const std::vector<U>& dat) {  // NOLINT
     if (dat.size() == 0) {
@@ -61,8 +67,10 @@ class Vector {
     }
   }
 
+  // Copy ctor
   Vector(const Vector<T>& other) { this->operator=(other); }
 
+  // Copy operator
   Vector<T>& operator=(const Vector<T>& other) {
     if (other.size() != 0) {
       this->InitByIter(other.size(), other.begin(), other.end());
@@ -72,27 +80,31 @@ class Vector {
     return *this;
   }
 
+  // Move ctor
   Vector(Vector<T>&& other) {
     this->size_ = other.size_;
     this->flag_ = other.flag_;
-    if (other.cuda_vec_.capacity()) {
+    if (other.cuda_vec_.memory_size()) {
       this->cuda_vec_.ShareDataWith(other.cuda_vec_);
     }
-    if (other.cpu_vec_.capacity()) {
+    if (other.cpu_vec_.memory_size()) {
       this->cpu_vec_.ShareDataWith(other.cpu_vec_);
     }
   }
 
+  // CPU data access method. Mutable.
   T& operator[](size_t i) {
     MutableCPU();
     return const_cast<T*>(cpu_vec_.data<T>())[i];
   }
 
+  // CPU data access method. Immutable.
   const T& operator[](size_t i) const {
     ImmutableCPU();
     return cpu_vec_.data<T>()[i];
   }
 
+  // std::vector iterator methods. Based on CPU data access method
   size_t size() const { return size_; }
 
   T* begin() { return &this->operator[](0); }
@@ -116,17 +128,22 @@ class Vector {
     return *it;
   }
 
+  T* data() { return begin(); }
+
+  const T* data() const { return begin(); }
+
   const T& front() const { return *begin(); }
+  // end of std::vector iterator methods
 
+  // assign this from iterator.
+  // NOTE: the iterator must support `end-begin`
   template <typename Iter>
   void assign(Iter begin, Iter end) {
     InitByIter(end - begin, begin, end);
   }
 
-  T* data() { return begin(); }
-
-  const T* data() const { return begin(); }
-
+  // push_back. If the previous capacity is not enough, the memory will
+  // double.
   void push_back(T elem) {
     if (size_ + 1 > capacity()) {
       reserve((size_ + 1) << 1);
@@ -135,6 +152,19 @@ class Vector {
     ++size_;
   }
 
+  // extend a vector by iterator.
+  // NOTE: the iterator must support end-begin
+  template <typename It>
+  void Extend(It begin, It end) {
+    size_t pre_size = size_;
+    resize(pre_size + (end - begin));
+    T* ptr = this->begin() + pre_size;
+    for (; begin < end; ++begin, ++ptr) {
+      *ptr = *begin;
+    }
+  }
+
+  // resize the vector
   void resize(size_t size) {
     if (size + 1 < capacity()) {
       size_ = size;
@@ -145,7 +175,7 @@ class Vector {
       T* ptr = cpu_tensor.mutable_data<T>(
           framework::make_ddim({static_cast<int64_t>(size)}), cpu);
       const T* old_ptr =
-          cpu_vec_.capacity() == 0 ? nullptr : cpu_vec_.data<T>();
+          cpu_vec_.memory_size() == 0 ? nullptr : cpu_vec_.data<T>();
       if (old_ptr != nullptr) {
         std::copy(old_ptr, old_ptr + size_, ptr);
       }
@@ -154,6 +184,7 @@ class Vector {
     }
   }
 
+  // get cuda ptr. immutable
   const T* CUDAData(platform::Place place) const {
     PADDLE_ENFORCE(platform::is_gpu_place(place),
                    "CUDA Data must on CUDA place");
@@ -161,37 +192,31 @@ class Vector {
     return cuda_vec_.data<T>();
   }
 
+  // get cuda ptr. mutable
   T* CUDAMutableData(platform::Place place) {
     const T* ptr = CUDAData(place);
     flag_ = kDirty | kDataInCUDA;
     return const_cast<T*>(ptr);
   }
 
-  template <typename It>
-  void Extend(It begin, It end) {
-    size_t pre_size = size_;
-    resize(pre_size + (end - begin));
-    T* ptr = this->begin() + pre_size;
-    for (; begin < end; ++begin, ++ptr) {
-      *ptr = *begin;
-    }
-  }
-
+  // clear
   void clear() {
     size_ = 0;
     flag_ = kDirty | kDataInCPU;
   }
 
   size_t capacity() const {
-    return cpu_vec_.capacity() / SizeOfType(typeid(T));
+    return cpu_vec_.memory_size() / SizeOfType(typeid(T));
   }
 
+  // reserve data
   void reserve(size_t size) {
     size_t pre_size = size_;
     resize(size);
     resize(pre_size);
   }
 
+  // the unify method to access CPU or CUDA data. immutable.
   const T* Data(platform::Place place) const {
     if (platform::is_gpu_place(place)) {
       return CUDAData(place);
@@ -200,6 +225,7 @@ class Vector {
     }
   }
 
+  // the unify method to access CPU or CUDA data. mutable.
   T* MutableData(platform::Place place) {
     if (platform::is_gpu_place(place)) {
       return CUDAMutableData(place);
@@ -208,6 +234,7 @@ class Vector {
     }
   }
 
+  // implicit cast operator. Vector can be cast to std::vector implicitly.
   operator std::vector<T>() const {
     std::vector<T> result;
     result.resize(size());
@@ -243,7 +270,12 @@ class Vector {
     size_ = size;
   }
 
-  enum DataFlag { kDataInCPU = 0x01, kDataInCUDA = 0x02, kDirty = 0x10 };
+  enum DataFlag {
+    kDataInCPU = 0x01,
+    kDataInCUDA = 0x02,
+    // kDirty means the data has been changed in one device.
+    kDirty = 0x10
+  };
 
   void MutableCPU() {
     if (IsInCUDA() && IsDirty()) {
diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h
index a8767a75430b98c6b0aada69ace72be6dd127562..be09b7c94507b99b5b4cbfe6f2039c74ec76b4a2 100644
--- a/paddle/framework/tensor.h
+++ b/paddle/framework/tensor.h
@@ -120,6 +120,7 @@ class Tensor {
     return holder_->type();
   }
 
+  // memory size returns the holding memory size in byte.
   size_t memory_size() const;
 
   inline void check_memory_size() const;
@@ -128,10 +129,6 @@ class Tensor {
 
   inline void set_layout(const DataLayout layout) { layout_ = layout; }
 
-  size_t capacity() const {
-    return holder_ == nullptr ? 0UL : holder_->size() - offset_;
-  }
-
  private:
   friend class LoDTensor;
 
diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h
index 6dcaa024245f78df5bfba073c2cec5686fee657e..f75cc31b3994840d9a54a6d45b18ad0e305c966a 100644
--- a/paddle/framework/tensor_impl.h
+++ b/paddle/framework/tensor_impl.h
@@ -62,14 +62,14 @@ inline void Tensor::check_memory_size() const {
   PADDLE_ENFORCE_NOT_NULL(
       holder_, "Tensor holds no memory. Call Tensor::mutable_data first.");
   PADDLE_ENFORCE_GE(
-      holder_->size(), memory_size() + offset_,
+      numel() * SizeOfType(type()), memory_size(),
       "Tensor's dims_ is out of bound. Call Tensor::mutable_data "
       "first to re-allocate memory.\n"
       "or maybe the required data-type mismatches the data already stored.");
 }
 
 inline size_t Tensor::memory_size() const {
-  return holder_ == nullptr ? 0UL : numel() * SizeOfType(type());
+  return holder_ == nullptr ? 0UL : holder_->size() - offset_;
 }
 
 template <typename T>