提交 63320f72 编写于 作者: D dzhwinter

"add some interfaces"

上级 6f28084b
......@@ -48,12 +48,26 @@ namespace framework {
*/
struct LoD : public std::vector<Vector<size_t>> {
using std::vector<Vector<size_t>>::vector;
platform::Place place() const {
if (this->size() == 0) {
// Not Initialze Yet.
return platform::CPUPlace();
} else {
return this->front().place();
}
}
void CopyFromCUDA() {
for (auto it = this->begin(); it != this->end(); ++it) {
it->CopyFromCUDA();
}
}
void CopyToPeer(platform::Place place) {
for (auto it = this->begin(); it != this->end(); ++it) {
it->mutable_data(place);
}
}
};
std::ostream& operator<<(std::ostream& os, const LoD& lod);
......@@ -115,7 +129,13 @@ class LoDTensor : public Tensor {
explicit LoDTensor(const LoD& lod) : lod_(lod) {}
void set_lod(const LoD& lod) { lod_ = lod; }
void set_lod(const LoD& lod) {
lod_ = lod;
if (holder_ != nullptr &&
platform::is_same_place(holder_->place(), lod.place())) {
lod_.CopyToPeer(holder_->place());
}
}
const LoD& lod() const { return lod_; }
......
......@@ -40,14 +40,15 @@ class Vector : public std::vector<T> {
Vector() {}
Vector(const std::vector<T> &v) : std::vector<T>(v) {} // NOLINT
virtual ~Vector() {
#ifdef PADDLE_WITH_CUDA
if (cuda_ptr_ != nullptr) {
memory::Free<platform::CUDAPlace>(place_, cuda_ptr_);
}
#endif
}
inline platform::Place place() const { return place_; }
/*! Return a pointer to constant memory block. */
inline const T *data(platform::Place place) const;
/*! Return a pointer to mutable memory block. */
inline T *mutable_data(platform::Place place);
// TODO(dzhwinter): below interfaces should be removed
/* Get device vector */
T *cuda_data() {
CopyToCUDA();
......@@ -68,25 +69,71 @@ class Vector : public std::vector<T> {
void CopyToPeer(platform::Place);
private:
void *cuda_ptr_ = nullptr;
std::shared_ptr<void> cuda_ptr_;
size_t cuda_size_ = 0; // device vector numel
platform::CUDAPlace place_;
};
template <typename T>
void Vector<T>::CopyToCUDA() {
inline const T *Vector<T>::data(platform::Place place) const {
if (platform::is_cpu_place(place)) {
return std::vector<T>::data();
} else if (platform::is_gpu_place(place)) {
if (cuda_ptr_ == nullptr) {
return nullptr;
}
if (platform::is_same_place(place, place_)) {
return static_cast<const T *>(cuda_ptr_.get());
} else {
PADDLE_THROW(
"Unmatched place. Please use `mutable_data` copy lod to the target "
"Place first.");
}
} else {
PADDLE_THROW("Unsupport Place.");
}
}
template <typename T>
inline T *Vector<T>::mutable_data(platform::Place place) {
if (platform::is_cpu_place(place)) {
return std::vector<T>::data();
} else if (platform::is_gpu_place(place)) {
if (!platform::is_same_place(place, place_)) {
place_ = boost::get<platform::CUDAPlace>(place);
}
#ifdef PADDLE_WITH_CUDA
if (cuda_size_ < this->size()) {
if (cuda_ptr_ != nullptr) {
memory::Free<platform::CUDAPlace>(place_, cuda_ptr_);
if (cuda_size_ < this->size() || cuda_ptr_ == nullptr) {
cuda_ptr_.reset(
memory::Alloc<platform::CUDAPlace>(place_, this->size() * sizeof(T)),
memory::PlainDeleter<void, platform::CUDAPlace>(place_));
}
cuda_ptr_ =
memory::Alloc<platform::CUDAPlace>(place_, this->size() * sizeof(T));
cuda_size_ = this->size();
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
auto *ctx = pool.GetByPlace(place_);
memory::Copy(place_, cuda_ptr_.get(), platform::CPUPlace(),
static_cast<const void *>(this->data()),
this->size() * sizeof(T), ctx->stream());
ctx->Wait();
return static_cast<T *>(cuda_ptr_.get());
#endif
} else {
PADDLE_THROW("Unsupport Place.");
}
}
template <typename T>
void Vector<T>::CopyToCUDA() {
#ifdef PADDLE_WITH_CUDA
if (cuda_size_ < this->size() || cuda_ptr_ == nullptr) {
cuda_ptr_.reset(
memory::Alloc<platform::CUDAPlace>(this->size() * sizeof(T)),
memory::PlainDeleter<void, platform::CUDAPlace>(place_));
}
cuda_size_ = this->size();
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
auto *ctx = pool.GetByPlace(place_);
memory::Copy(place_, cuda_ptr_, platform::CPUPlace(),
memory::Copy(place_, cuda_ptr_.get(), platform::CPUPlace(),
static_cast<const void *>(this->data()),
this->size() * sizeof(T), ctx->stream());
ctx->Wait();
......@@ -104,32 +151,11 @@ void Vector<T>::CopyFromCUDA() {
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
auto *ctx = pool.GetByPlace(place_);
memory::Copy(platform::CPUPlace(), static_cast<void *>(this->data()), place_,
static_cast<const void *>(cuda_ptr_), this->size() * sizeof(T),
ctx->stream());
ctx->Wait();
#endif
}
template <typename T>
void Vector<T>::CopyToPeer(platform::Place peer_place) {
#ifdef PADDLE_WITH_CUDA
auto *ctx = platform::DeviceContextPool::Instance().GetByPlace(place_);
void *peer_cuda_ptr = memory::Alloc<platform::CUDAPlace>(
boost::get<platform::CUDAPlace>(peer_place), this->size() * sizeof(T));
memory::Copy(boost::get<platform::CUDAPlace>(peer_place), peer_cuda_ptr,
place_, cuda_ptr_, this->size() * sizeof(T), ctx->stream());
static_cast<const void *>(cuda_ptr_.get()),
this->size() * sizeof(T), ctx->stream());
ctx->Wait();
memory::Free<platform::CUDAPlace>(place_, cuda_ptr_);
place_ = boost::get<platform::CUDAPlace>(peer_place);
cuda_ptr_ = peer_cuda_ptr;
#endif
}
template class Vector<int>;
template class Vector<unsigned>;
template class Vector<size_t>;
template class Vector<int64_t>;
} // namespace framework
} // namespace paddle
......@@ -81,5 +81,23 @@ class PODDeleter {
Place place_;
};
/**
* \brief Free memory block in one place does not meet POD
*
* \note In some cases, custom deleter is used to
* deallocate the memory automatically for
* std::unique_ptr<T> in tensor.h.
*
*/
template <typename T, typename Place>
class PlainDeleter {
public:
explicit PlainDeleter(Place place) : place_(place) {}
void operator()(T* ptr) { Free(place_, reinterpret_cast<void*>(ptr)); }
private:
Place place_;
};
} // namespace memory
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册