提交 63320f72 编写于 作者: D dzhwinter

"add some interfaces"

上级 6f28084b
...@@ -48,12 +48,26 @@ namespace framework { ...@@ -48,12 +48,26 @@ namespace framework {
*/ */
struct LoD : public std::vector<Vector<size_t>> { struct LoD : public std::vector<Vector<size_t>> {
using std::vector<Vector<size_t>>::vector; using std::vector<Vector<size_t>>::vector;
platform::Place place() const {
if (this->size() == 0) {
// Not Initialze Yet.
return platform::CPUPlace();
} else {
return this->front().place();
}
}
void CopyFromCUDA() { void CopyFromCUDA() {
for (auto it = this->begin(); it != this->end(); ++it) { for (auto it = this->begin(); it != this->end(); ++it) {
it->CopyFromCUDA(); it->CopyFromCUDA();
} }
} }
void CopyToPeer(platform::Place place) {
for (auto it = this->begin(); it != this->end(); ++it) {
it->mutable_data(place);
}
}
}; };
std::ostream& operator<<(std::ostream& os, const LoD& lod); std::ostream& operator<<(std::ostream& os, const LoD& lod);
...@@ -115,7 +129,13 @@ class LoDTensor : public Tensor { ...@@ -115,7 +129,13 @@ class LoDTensor : public Tensor {
explicit LoDTensor(const LoD& lod) : lod_(lod) {} explicit LoDTensor(const LoD& lod) : lod_(lod) {}
void set_lod(const LoD& lod) { lod_ = lod; } void set_lod(const LoD& lod) {
lod_ = lod;
if (holder_ != nullptr &&
platform::is_same_place(holder_->place(), lod.place())) {
lod_.CopyToPeer(holder_->place());
}
}
const LoD& lod() const { return lod_; } const LoD& lod() const { return lod_; }
......
...@@ -40,14 +40,15 @@ class Vector : public std::vector<T> { ...@@ -40,14 +40,15 @@ class Vector : public std::vector<T> {
Vector() {} Vector() {}
Vector(const std::vector<T> &v) : std::vector<T>(v) {} // NOLINT Vector(const std::vector<T> &v) : std::vector<T>(v) {} // NOLINT
virtual ~Vector() { inline platform::Place place() const { return place_; }
#ifdef PADDLE_WITH_CUDA
if (cuda_ptr_ != nullptr) { /*! Return a pointer to constant memory block. */
memory::Free<platform::CUDAPlace>(place_, cuda_ptr_); inline const T *data(platform::Place place) const;
}
#endif
}
/*! Return a pointer to mutable memory block. */
inline T *mutable_data(platform::Place place);
// TODO(dzhwinter): below interfaces should be removed
/* Get device vector */ /* Get device vector */
T *cuda_data() { T *cuda_data() {
CopyToCUDA(); CopyToCUDA();
...@@ -68,25 +69,71 @@ class Vector : public std::vector<T> { ...@@ -68,25 +69,71 @@ class Vector : public std::vector<T> {
void CopyToPeer(platform::Place); void CopyToPeer(platform::Place);
private: private:
void *cuda_ptr_ = nullptr; std::shared_ptr<void> cuda_ptr_;
size_t cuda_size_ = 0; // device vector numel size_t cuda_size_ = 0; // device vector numel
platform::CUDAPlace place_; platform::CUDAPlace place_;
}; };
template <typename T> template <typename T>
void Vector<T>::CopyToCUDA() { inline const T *Vector<T>::data(platform::Place place) const {
if (platform::is_cpu_place(place)) {
return std::vector<T>::data();
} else if (platform::is_gpu_place(place)) {
if (cuda_ptr_ == nullptr) {
return nullptr;
}
if (platform::is_same_place(place, place_)) {
return static_cast<const T *>(cuda_ptr_.get());
} else {
PADDLE_THROW(
"Unmatched place. Please use `mutable_data` copy lod to the target "
"Place first.");
}
} else {
PADDLE_THROW("Unsupport Place.");
}
}
template <typename T>
inline T *Vector<T>::mutable_data(platform::Place place) {
if (platform::is_cpu_place(place)) {
return std::vector<T>::data();
} else if (platform::is_gpu_place(place)) {
if (!platform::is_same_place(place, place_)) {
place_ = boost::get<platform::CUDAPlace>(place);
}
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
if (cuda_size_ < this->size()) { if (cuda_size_ < this->size() || cuda_ptr_ == nullptr) {
if (cuda_ptr_ != nullptr) { cuda_ptr_.reset(
memory::Free<platform::CUDAPlace>(place_, cuda_ptr_); memory::Alloc<platform::CUDAPlace>(place_, this->size() * sizeof(T)),
memory::PlainDeleter<void, platform::CUDAPlace>(place_));
} }
cuda_ptr_ = cuda_size_ = this->size();
memory::Alloc<platform::CUDAPlace>(place_, this->size() * sizeof(T)); platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
auto *ctx = pool.GetByPlace(place_);
memory::Copy(place_, cuda_ptr_.get(), platform::CPUPlace(),
static_cast<const void *>(this->data()),
this->size() * sizeof(T), ctx->stream());
ctx->Wait();
return static_cast<T *>(cuda_ptr_.get());
#endif
} else {
PADDLE_THROW("Unsupport Place.");
}
}
template <typename T>
void Vector<T>::CopyToCUDA() {
#ifdef PADDLE_WITH_CUDA
if (cuda_size_ < this->size() || cuda_ptr_ == nullptr) {
cuda_ptr_.reset(
memory::Alloc<platform::CUDAPlace>(this->size() * sizeof(T)),
memory::PlainDeleter<void, platform::CUDAPlace>(place_));
} }
cuda_size_ = this->size(); cuda_size_ = this->size();
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
auto *ctx = pool.GetByPlace(place_); auto *ctx = pool.GetByPlace(place_);
memory::Copy(place_, cuda_ptr_, platform::CPUPlace(), memory::Copy(place_, cuda_ptr_.get(), platform::CPUPlace(),
static_cast<const void *>(this->data()), static_cast<const void *>(this->data()),
this->size() * sizeof(T), ctx->stream()); this->size() * sizeof(T), ctx->stream());
ctx->Wait(); ctx->Wait();
...@@ -104,32 +151,11 @@ void Vector<T>::CopyFromCUDA() { ...@@ -104,32 +151,11 @@ void Vector<T>::CopyFromCUDA() {
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
auto *ctx = pool.GetByPlace(place_); auto *ctx = pool.GetByPlace(place_);
memory::Copy(platform::CPUPlace(), static_cast<void *>(this->data()), place_, memory::Copy(platform::CPUPlace(), static_cast<void *>(this->data()), place_,
static_cast<const void *>(cuda_ptr_), this->size() * sizeof(T), static_cast<const void *>(cuda_ptr_.get()),
ctx->stream()); this->size() * sizeof(T), ctx->stream());
ctx->Wait();
#endif
}
template <typename T>
void Vector<T>::CopyToPeer(platform::Place peer_place) {
#ifdef PADDLE_WITH_CUDA
auto *ctx = platform::DeviceContextPool::Instance().GetByPlace(place_);
void *peer_cuda_ptr = memory::Alloc<platform::CUDAPlace>(
boost::get<platform::CUDAPlace>(peer_place), this->size() * sizeof(T));
memory::Copy(boost::get<platform::CUDAPlace>(peer_place), peer_cuda_ptr,
place_, cuda_ptr_, this->size() * sizeof(T), ctx->stream());
ctx->Wait(); ctx->Wait();
memory::Free<platform::CUDAPlace>(place_, cuda_ptr_);
place_ = boost::get<platform::CUDAPlace>(peer_place);
cuda_ptr_ = peer_cuda_ptr;
#endif #endif
} }
template class Vector<int>;
template class Vector<unsigned>;
template class Vector<size_t>;
template class Vector<int64_t>;
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -81,5 +81,23 @@ class PODDeleter { ...@@ -81,5 +81,23 @@ class PODDeleter {
Place place_; Place place_;
}; };
/**
* \brief Free memory block in one place does not meet POD
*
* \note In some cases, custom deleter is used to
* deallocate the memory automatically for
* std::unique_ptr<T> in tensor.h.
*
*/
template <typename T, typename Place>
class PlainDeleter {
public:
explicit PlainDeleter(Place place) : place_(place) {}
void operator()(T* ptr) { Free(place_, reinterpret_cast<void*>(ptr)); }
private:
Place place_;
};
} // namespace memory } // namespace memory
} // namespace paddle } // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册