提交 eaa90d38 编写于 作者: C chengduoZH

add use_pinned

上级 236b7dd2
......@@ -45,10 +45,11 @@ class Tensor {
friend struct EigenVector;
public:
Tensor() : offset_(0) {}
Tensor() : offset_(0), use_pinned_(false) {}
/*! Constructor with place should only be used in pybind. */
explicit Tensor(const platform::Place& place) : offset_(0) {
explicit Tensor(const platform::Place& place)
: offset_(0), use_pinned_(false) {
holder_->set_place(place);
}
......@@ -69,11 +70,12 @@ class Tensor {
* @note If not exist, then allocation.
*/
template <typename T>
inline T* mutable_data(platform::Place place);
inline T* mutable_data(platform::Place place, bool use_pinned = false);
inline void* mutable_data(platform::Place place, std::type_index type);
inline void* mutable_data(platform::Place place, std::type_index type,
bool use_pinned = false);
inline void* mutable_data(platform::Place place);
inline void* mutable_data(platform::Place place, bool use_pinned = false);
/**
* @brief Return a pointer to mutable memory block.
......@@ -84,7 +86,8 @@ class Tensor {
* @note If not exist, then allocation.
*/
template <typename T>
inline T* mutable_data(DDim dims, platform::Place place);
inline T* mutable_data(DDim dims, platform::Place place,
bool use_pinned = false);
/*! Return the dimensions of the memory block. */
inline const DDim& dims() const;
......@@ -92,6 +95,9 @@ class Tensor {
/*! Return the numel of the memory block. */
inline int64_t numel() const;
/*! Return the numel of the memory block. */
inline bool isPinned() const;
/*! Resize the dimensions of the memory block. */
inline Tensor& Resize(const DDim& dims);
......@@ -146,12 +152,14 @@ class Tensor {
template <typename Place>
struct PlaceholderImpl : public Placeholder {
PlaceholderImpl(Place place, size_t size, std::type_index type)
: ptr_(static_cast<uint8_t*>(memory::Alloc(place, size)),
memory::PODDeleter<uint8_t, Place>(place)),
PlaceholderImpl(Place place, size_t size, std::type_index type,
bool use_pinned = false)
: ptr_(static_cast<uint8_t*>(memory::Alloc(place, size, use_pinned)),
memory::PODDeleter<uint8_t, Place>(place, use_pinned)),
place_(place),
size_(size),
type_(type) {
type_(type),
use_pinned_(use_pinned) {
PADDLE_ENFORCE_NOT_NULL(ptr_, "Insufficient %s memory to allocation.",
(is_cpu_place(place_) ? "CPU" : "GPU"));
}
......@@ -174,6 +182,9 @@ class Tensor {
/* the current type of memory */
std::type_index type_;
/*! use pinned memory or not. */
bool use_pinned_;
};
/*! holds the memory block if allocated. */
......@@ -208,6 +219,7 @@ class Tensor {
* PlaceHolder::ptr_ and where the tensor data really begins.
*/
size_t offset_;
bool use_pinned_;
};
inline void Tensor::switch_place(platform::Place new_place) {
......
......@@ -101,19 +101,21 @@ inline T* Tensor::data() {
}
template <typename T>
inline T* Tensor::mutable_data(DDim dims, platform::Place place) {
inline T* Tensor::mutable_data(DDim dims, platform::Place place,
bool use_pinned) {
static_assert(std::is_pod<T>::value, "T must be POD");
Resize(dims);
return mutable_data<T>(place);
return mutable_data<T>(place, use_pinned);
}
template <typename T>
inline T* Tensor::mutable_data(platform::Place place) {
inline T* Tensor::mutable_data(platform::Place place, bool use_pinned) {
static_assert(std::is_pod<T>::value, "T must be POD");
return reinterpret_cast<T*>(mutable_data(place, typeid(T)));
return reinterpret_cast<T*>(mutable_data(place, typeid(T), use_pinned));
}
inline void* Tensor::mutable_data(platform::Place place, std::type_index type) {
inline void* Tensor::mutable_data(platform::Place place, std::type_index type,
bool use_pinned) {
if (holder_ != nullptr) {
holder_->set_type(type);
}
......@@ -127,26 +129,27 @@ inline void* Tensor::mutable_data(platform::Place place, std::type_index type) {
holder_->size() < size + offset_) {
if (platform::is_cpu_place(place)) {
holder_.reset(new PlaceholderImpl<platform::CPUPlace>(
boost::get<platform::CPUPlace>(place), size, type));
boost::get<platform::CPUPlace>(place), size, type, use_pinned));
} else if (platform::is_gpu_place(place)) {
#ifndef PADDLE_WITH_CUDA
PADDLE_THROW("'CUDAPlace' is not supported in CPU only device.");
}
#else
holder_.reset(new PlaceholderImpl<platform::CUDAPlace>(
boost::get<platform::CUDAPlace>(place), size, type));
boost::get<platform::CUDAPlace>(place), size, type, use_pinned));
}
#endif
offset_ = 0;
use_pinned_ = use_pinned;
}
return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
offset_);
}
inline void* Tensor::mutable_data(platform::Place place) {
inline void* Tensor::mutable_data(platform::Place place, bool use_pinned) {
PADDLE_ENFORCE(this->holder_ != nullptr,
"Cannot invoke mutable data if current hold nothing");
return mutable_data(place, holder_->type());
return mutable_data(place, holder_->type(), use_pinned);
}
inline Tensor& Tensor::ShareDataWith(const Tensor& src) {
......@@ -188,6 +191,8 @@ inline const DDim& Tensor::dims() const { return dims_; }
inline int64_t Tensor::numel() const { return product(dims_); }
inline bool Tensor::isPinned() const { return use_pinned_; }
inline Tensor ReshapeToMatrix(const Tensor& src, int num_col_dims) {
Tensor res;
res.ShareDataWith(src);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册