add use_pinned

eaa90d38 · chengduoZH · 236b7dd2 · eaa90d38 · eaa90d38
显示空白变更内容
内联并排

Showing with 36 addition and 19 deletion

paddle/fluid/framework/tensor.h paddle/fluid/framework/tensor.h +22 -10

paddle/fluid/framework/tensor_impl.h paddle/fluid/framework/tensor_impl.h +14 -9

未找到文件。
--- a/paddle/fluid/framework/tensor.h
+++ b/paddle/fluid/framework/tensor.h
@@ -45,10 +45,11 @@ class Tensor {
  friend struct EigenVector;
 public:
-  Tensor() : offset_(0) {}
+  Tensor() : offset_(0), use_pinned_(false) {}
  /*! Constructor with place should only be used in pybind. */
-  explicit Tensor(const platform::Place& place) : offset_(0) {
+  explicit Tensor(const platform::Place& place)
+      : offset_(0), use_pinned_(false) {
    holder_->set_place(place);
  }
@@ -69,11 +70,12 @@ class Tensor {
   * @note    If not exist, then allocation.
   */
  template <typename T>
-  inline T* mutable_data(platform::Place place);
+  inline T* mutable_data(platform::Place place, bool use_pinned = false);
-  inline void* mutable_data(platform::Place place, std::type_index type);
+  inline void* mutable_data(platform::Place place, std::type_index type,
+                            bool use_pinned = false);
-  inline void* mutable_data(platform::Place place);
+  inline void* mutable_data(platform::Place place, bool use_pinned = false);
  /**
   * @brief     Return a pointer to mutable memory block.
@@ -84,7 +86,8 @@ class Tensor {
   * @note      If not exist, then allocation.
   */
  template <typename T>
-  inline T* mutable_data(DDim dims, platform::Place place);
+  inline T* mutable_data(DDim dims, platform::Place place,
+                         bool use_pinned = false);
  /*! Return the dimensions of the memory block. */
  inline const DDim& dims() const;
@@ -92,6 +95,9 @@ class Tensor {
  /*! Return the numel of the memory block. */
  inline int64_t numel() const;
+  /*! Return the numel of the memory block. */
+  inline bool isPinned() const;
  /*! Resize the dimensions of the memory block. */
  inline Tensor& Resize(const DDim& dims);
@@ -146,12 +152,14 @@ class Tensor {
  template <typename Place>
  struct PlaceholderImpl : public Placeholder {
-    PlaceholderImpl(Place place, size_t size, std::type_index type)
+    PlaceholderImpl(Place place, size_t size, std::type_index type,
-        : ptr_(static_cast<uint8_t*>(memory::Alloc(place, size)),
+                    bool use_pinned = false)
-               memory::PODDeleter<uint8_t, Place>(place)),
+        : ptr_(static_cast<uint8_t*>(memory::Alloc(place, size, use_pinned)),
+               memory::PODDeleter<uint8_t, Place>(place, use_pinned)),
          place_(place),
          size_(size),
-          type_(type) {
+          type_(type),
+          use_pinned_(use_pinned) {
      PADDLE_ENFORCE_NOT_NULL(ptr_, "Insufficient %s memory to allocation.",
                              (is_cpu_place(place_) ? "CPU" : "GPU"));
    }
@@ -174,6 +182,9 @@ class Tensor {
    /* the current type of memory */
    std::type_index type_;
+    /*! use pinned memory or not. */
+    bool use_pinned_;
  };
  /*! holds the memory block if allocated. */
@@ -208,6 +219,7 @@ class Tensor {
   *          PlaceHolder::ptr_ and where the tensor data really begins.
   */
  size_t offset_;
+  bool use_pinned_;
 };
 inline void Tensor::switch_place(platform::Place new_place) {

--- a/paddle/fluid/framework/tensor_impl.h
+++ b/paddle/fluid/framework/tensor_impl.h
@@ -101,19 +101,21 @@ inline T* Tensor::data() {
 }
 template <typename T>
-inline T* Tensor::mutable_data(DDim dims, platform::Place place) {
+inline T* Tensor::mutable_data(DDim dims, platform::Place place,
+                               bool use_pinned) {
  static_assert(std::is_pod<T>::value, "T must be POD");
  Resize(dims);
-  return mutable_data<T>(place);
+  return mutable_data<T>(place, use_pinned);
 }
 template <typename T>
-inline T* Tensor::mutable_data(platform::Place place) {
+inline T* Tensor::mutable_data(platform::Place place, bool use_pinned) {
  static_assert(std::is_pod<T>::value, "T must be POD");
-  return reinterpret_cast<T*>(mutable_data(place, typeid(T)));
+  return reinterpret_cast<T*>(mutable_data(place, typeid(T), use_pinned));
 }
-inline void* Tensor::mutable_data(platform::Place place, std::type_index type) {
+inline void* Tensor::mutable_data(platform::Place place, std::type_index type,
+                                  bool use_pinned) {
  if (holder_ != nullptr) {
    holder_->set_type(type);
  }
@@ -127,26 +129,27 @@ inline void* Tensor::mutable_data(platform::Place place, std::type_index type) {
      holder_->size() < size + offset_) {
    if (platform::is_cpu_place(place)) {
      holder_.reset(new PlaceholderImpl<platform::CPUPlace>(
-          boost::get<platform::CPUPlace>(place), size, type));
+          boost::get<platform::CPUPlace>(place), size, type, use_pinned));
    } else if (platform::is_gpu_place(place)) {
 #ifndef PADDLE_WITH_CUDA
      PADDLE_THROW("'CUDAPlace' is not supported in CPU only device.");
    }
 #else
      holder_.reset(new PlaceholderImpl<platform::CUDAPlace>(
-          boost::get<platform::CUDAPlace>(place), size, type));
+          boost::get<platform::CUDAPlace>(place), size, type, use_pinned));
    }
 #endif
    offset_ = 0;
+    use_pinned_ = use_pinned;
  }
  return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
                                 offset_);
 }
-inline void* Tensor::mutable_data(platform::Place place) {
+inline void* Tensor::mutable_data(platform::Place place, bool use_pinned) {
  PADDLE_ENFORCE(this->holder_ != nullptr,
                 "Cannot invoke mutable data if current hold nothing");
-  return mutable_data(place, holder_->type());
+  return mutable_data(place, holder_->type(), use_pinned);
 }
 inline Tensor& Tensor::ShareDataWith(const Tensor& src) {
@@ -188,6 +191,8 @@ inline const DDim& Tensor::dims() const { return dims_; }
 inline int64_t Tensor::numel() const { return product(dims_); }
+inline bool Tensor::isPinned() const { return use_pinned_; }
 inline Tensor ReshapeToMatrix(const Tensor& src, int num_col_dims) {
  Tensor res;
  res.ShareDataWith(src);