提交 e1913bc5 编写于 作者: Y Yu Yang

Fix MixedVector

上级 6d2c6f96
...@@ -28,31 +28,27 @@ class COWPtr { ...@@ -28,31 +28,27 @@ class COWPtr {
private: private:
RefPtr m_sp; RefPtr m_sp;
void detach() {
T* tmp = m_sp.get();
if (!(tmp == nullptr || m_sp.unique())) {
m_sp = RefPtr(new T(*tmp));
}
}
public: public:
COWPtr() : m_sp(nullptr) {} COWPtr() : m_sp(nullptr) {}
explicit COWPtr(T* t) : m_sp(t) {} explicit COWPtr(T* t) : m_sp(t) {}
explicit COWPtr(const RefPtr& refptr) : m_sp(refptr) {}
const T& Data() const { return operator*(); } const T& Data() const { return *m_sp; }
T* MutableData() { return operator->(); } T* MutableData() {
DetachIfNotUnique();
return m_sp.get();
}
const T& operator*() const { return *m_sp; } void DetachIfNotUnique() {
T& operator*() { T* tmp = m_sp.get();
detach(); if (!(tmp == nullptr || m_sp.unique())) {
return *m_sp; Detach();
}
} }
const T* operator->() const { return m_sp.operator->(); }
T* operator->() { void Detach() {
detach(); T* tmp = m_sp.get();
return m_sp.operator->(); m_sp = RefPtr(new T(*tmp));
} }
}; };
} // namespace details } // namespace details
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <algorithm> #include <algorithm>
#include <initializer_list> #include <initializer_list>
#include <memory> #include <memory>
#include <mutex> // NOLINT
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/framework/details/cow_ptr.h" #include "paddle/fluid/framework/details/cow_ptr.h"
...@@ -51,6 +52,7 @@ struct CUDABuffer { ...@@ -51,6 +52,7 @@ struct CUDABuffer {
ClearMemory(); ClearMemory();
place_ = boost::get<platform::CUDAPlace>(place); place_ = boost::get<platform::CUDAPlace>(place);
data_ = memory::Alloc(place_, size); data_ = memory::Alloc(place_, size);
PADDLE_ENFORCE_NOT_NULL(data_);
size_ = size; size_ = size;
} }
...@@ -62,7 +64,7 @@ struct CUDABuffer { ...@@ -62,7 +64,7 @@ struct CUDABuffer {
private: private:
void ClearMemory() const { void ClearMemory() const {
if (data_) { if (data_ != nullptr) {
memory::Free(place_, data_); memory::Free(place_, data_);
} }
} }
...@@ -89,6 +91,7 @@ class Vector { ...@@ -89,6 +91,7 @@ class Vector {
template <typename U> template <typename U>
explicit VectorData(const std::vector<U> &dat) explicit VectorData(const std::vector<U> &dat)
: cpu_(dat), flag_(kDataInCPU) {} : cpu_(dat), flag_(kDataInCPU) {}
~VectorData() {}
VectorData(const VectorData &o) { VectorData(const VectorData &o) {
o.ImmutableCPU(); o.ImmutableCPU();
...@@ -215,7 +218,7 @@ class Vector { ...@@ -215,7 +218,7 @@ class Vector {
size_t capacity() const { return cpu_.capacity(); } size_t capacity() const { return cpu_.capacity(); }
// reserve data // reserve data
void reserve(size_t size) { cpu_.reserve(size); } void reserve(size_t size) const { cpu_.reserve(size); }
// implicit cast operator. Vector can be cast to std::vector implicitly. // implicit cast operator. Vector can be cast to std::vector implicitly.
operator std::vector<T>() const { operator std::vector<T>() const {
...@@ -229,6 +232,17 @@ class Vector { ...@@ -229,6 +232,17 @@ class Vector {
return cpu_ == other.cpu_; return cpu_ == other.cpu_;
} }
std::mutex &Mutex() const { return mtx_; }
std::unique_ptr<platform::CUDAPlace> CUDAPlace() const {
if (gpu_.data_ == nullptr) {
return nullptr;
} else {
return std::unique_ptr<platform::CUDAPlace>(
new platform::CUDAPlace(gpu_.place_));
}
}
private: private:
enum DataFlag { enum DataFlag {
kDataInCPU = 0x01, kDataInCPU = 0x01,
...@@ -239,10 +253,15 @@ class Vector { ...@@ -239,10 +253,15 @@ class Vector {
void CopyToCPU() const { void CopyToCPU() const {
// COPY GPU Data To CPU // COPY GPU Data To CPU
auto *dev_ctx = static_cast<platform::CUDADeviceContext *>(
platform::DeviceContextPool::Instance().Get(
platform::Place(gpu_.place_)));
auto stream = dev_ctx->stream();
void *src = gpu_.data_; void *src = gpu_.data_;
void *dst = cpu_.data(); void *dst = cpu_.data();
memory::Copy(platform::CPUPlace(), dst, gpu_.place_, src, gpu_.size_, memory::Copy(platform::CPUPlace(), dst, gpu_.place_, src, gpu_.size_,
nullptr); stream);
dev_ctx->Wait();
} }
void MutableCPU() { void MutableCPU() {
...@@ -260,7 +279,7 @@ class Vector { ...@@ -260,7 +279,7 @@ class Vector {
SetFlag(kDataInCUDA); SetFlag(kDataInCUDA);
} else if (IsInCUDA() && } else if (IsInCUDA() &&
!(boost::get<platform::CUDAPlace>(place) == gpu_.place_)) { !(boost::get<platform::CUDAPlace>(place) == gpu_.place_)) {
CopyCUDADataToAnotherPlace(place); PADDLE_THROW("This situation should not happen");
// Still dirty // Still dirty
} else { } else {
// Dirty && DataInCUDA && Device is same // Dirty && DataInCUDA && Device is same
...@@ -272,28 +291,21 @@ class Vector { ...@@ -272,28 +291,21 @@ class Vector {
CopyCPUDataToCUDA(place); CopyCPUDataToCUDA(place);
SetFlag(kDataInCUDA); SetFlag(kDataInCUDA);
} else if (!(boost::get<platform::CUDAPlace>(place) == gpu_.place_)) { } else if (!(boost::get<platform::CUDAPlace>(place) == gpu_.place_)) {
CopyCUDADataToAnotherPlace(place); PADDLE_THROW("This situation should not happen.");
} else { } else {
// Not Dirty && DataInCUDA && Device is same // Not Dirty && DataInCUDA && Device is same
// Do nothing. // Do nothing.
} }
} }
} }
void CopyCUDADataToAnotherPlace(const platform::Place &place) const {
details::CUDABuffer tmp(place, gpu_.size_);
const void *src = gpu_.data_;
void *dst = tmp.data_;
memory::Copy(tmp.place_, dst, gpu_.place_, src, gpu_.size_, nullptr);
gpu_.Swap(tmp);
}
void CopyCPUDataToCUDA(const platform::Place &place) const { void CopyCPUDataToCUDA(const platform::Place &place) const {
void *src = cpu_.data(); void *src = cpu_.data();
gpu_.Resize(place, cpu_.size() * sizeof(T)); gpu_.Resize(place, cpu_.size() * sizeof(T));
void *dst = gpu_.data_; void *dst = gpu_.data_;
auto stream = static_cast<platform::CUDADeviceContext *>( auto *dev_ctx = static_cast<platform::CUDADeviceContext *>(
platform::DeviceContextPool::Instance().Get(place)) platform::DeviceContextPool::Instance().Get(place));
->stream(); auto stream = dev_ctx->stream();
memory::Copy(gpu_.place_, dst, platform::CPUPlace(), src, gpu_.size_, memory::Copy(gpu_.place_, dst, platform::CPUPlace(), src, gpu_.size_,
stream); stream);
} }
...@@ -319,6 +331,8 @@ class Vector { ...@@ -319,6 +331,8 @@ class Vector {
mutable std::vector<T> cpu_; mutable std::vector<T> cpu_;
mutable details::CUDABuffer gpu_; mutable details::CUDABuffer gpu_;
mutable int flag_; mutable int flag_;
mutable std::mutex mtx_;
}; };
public: public:
...@@ -350,81 +364,103 @@ class Vector { ...@@ -350,81 +364,103 @@ class Vector {
Vector(Vector<T> &&other) { m_ = std::move(other.m_); } Vector(Vector<T> &&other) { m_ = std::move(other.m_); }
// CPU data access method. Mutable. // CPU data access method. Mutable.
T &operator[](size_t i) { return (*m_)[i]; } T &operator[](size_t i) { return (*m_.MutableData())[i]; }
// CPU data access method. Immutable. // CPU data access method. Immutable.
const T &operator[](size_t i) const { return (*m_)[i]; } const T &operator[](size_t i) const { return m_.Data()[i]; }
// std::vector iterator methods. Based on CPU data access method // std::vector iterator methods. Based on CPU data access method
size_t size() const { return m_->size(); } size_t size() const { return m_.Data().size(); }
iterator begin() { return m_->begin(); } iterator begin() { return m_.MutableData()->begin(); }
iterator end() { return m_->end(); } iterator end() { return m_.MutableData()->end(); }
T &front() { return m_->front(); } T &front() { return m_.MutableData()->front(); }
T &back() { return m_->back(); } T &back() { return m_.MutableData()->back(); }
const_iterator begin() const { return m_->begin(); } const_iterator begin() const { return m_.Data().begin(); }
const_iterator end() const { return m_->end(); } const_iterator end() const { return m_.Data().end(); }
const_iterator cbegin() const { return begin(); } const_iterator cbegin() const { return begin(); }
const_iterator cend() const { return end(); } const_iterator cend() const { return end(); }
const T &back() const { return m_->back(); } const T &back() const { return m_.Data().back(); }
T *data() { return m_->data(); } T *data() { return m_.MutableData()->data(); }
const T *data() const { return m_->data(); } const T *data() const { return m_.Data().data(); }
const T &front() const { return m_->front(); } const T &front() const { return m_.Data().front(); }
// end of std::vector iterator methods // end of std::vector iterator methods
// assign this from iterator. // assign this from iterator.
// NOTE: the iterator must support `end-begin` // NOTE: the iterator must support `end-begin`
template <typename Iter> template <typename Iter>
void assign(Iter begin, Iter end) { void assign(Iter begin, Iter end) {
m_->assign(begin, end); m_.MutableData()->assign(begin, end);
} }
// push_back. If the previous capacity is not enough, the memory will // push_back. If the previous capacity is not enough, the memory will
// double. // double.
void push_back(T elem) { m_->push_back(elem); } void push_back(T elem) { m_.MutableData()->push_back(elem); }
// extend a vector by iterator. // extend a vector by iterator.
// NOTE: the iterator must support end-begin // NOTE: the iterator must support end-begin
template <typename It> template <typename It>
void Extend(It begin, It end) { void Extend(It begin, It end) {
m_->Extend(begin, end); m_.MutableData()->Extend(begin, end);
} }
// resize the vector // resize the vector
void resize(size_t size) { void resize(size_t size) {
if (m_.Data().size() != size) { if (m_.Data().size() != size) {
m_->resize(size); m_.MutableData()->resize(size);
} }
} }
// get cuda ptr. immutable // get cuda ptr. immutable
const T *CUDAData(platform::Place place) const { const T *CUDAData(platform::Place place) const {
{
auto &mtx = m_.Data().Mutex();
std::lock_guard<std::mutex> guard(mtx);
auto cuda_place = m_.Data().CUDAPlace();
if (cuda_place == nullptr ||
*cuda_place == boost::get<platform::CUDAPlace>(place)) {
return m_.Data().CUDAData(place); return m_.Data().CUDAData(place);
} }
}
// If m_ contains CUDAData in a different place. Detach manually.
m_.Detach();
return CUDAData(place);
}
// get cuda ptr. mutable // get cuda ptr. mutable
T *CUDAMutableData(platform::Place place) { T *CUDAMutableData(platform::Place place) {
return m_->CUDAMutableData(place); {
auto &mtx = m_.Data().Mutex();
std::lock_guard<std::mutex> guard(mtx);
auto cuda_place = m_.Data().CUDAPlace();
if (cuda_place == nullptr ||
*cuda_place == boost::get<platform::CUDAPlace>(place)) {
return m_.MutableData()->CUDAMutableData(place);
}
}
// If m_ contains CUDAData in a different place. Detach manually.
m_.Detach();
return CUDAMutableData(place);
} }
// clear // clear
void clear() { m_->clear(); } void clear() { m_.MutableData()->clear(); }
size_t capacity() const { return m_->capacity(); } size_t capacity() const { return m_.Data().capacity(); }
// reserve data // reserve data
void reserve(size_t size) { m_->reserve(size); } void reserve(size_t size) { m_.Data().reserve(size); }
// the unify method to access CPU or CUDA data. immutable. // the unify method to access CPU or CUDA data. immutable.
const T *Data(platform::Place place) const { const T *Data(platform::Place place) const {
...@@ -445,7 +481,7 @@ class Vector { ...@@ -445,7 +481,7 @@ class Vector {
} }
// implicit cast operator. Vector can be cast to std::vector implicitly. // implicit cast operator. Vector can be cast to std::vector implicitly.
operator std::vector<T>() const { return *m_; } operator std::vector<T>() const { return m_.Data(); }
bool operator==(const Vector<T> &other) const { bool operator==(const Vector<T> &other) const {
if (size() != other.size()) return false; if (size() != other.size()) return false;
...@@ -463,7 +499,7 @@ class Vector { ...@@ -463,7 +499,7 @@ class Vector {
private: private:
// Vector is an COW object. // Vector is an COW object.
details::COWPtr<VectorData> m_; mutable details::COWPtr<VectorData> m_;
}; };
#else // PADDLE_WITH_CUDA #else // PADDLE_WITH_CUDA
......
...@@ -127,10 +127,8 @@ class LookupTableGradCUDAKernel : public framework::OpKernel<T> { ...@@ -127,10 +127,8 @@ class LookupTableGradCUDAKernel : public framework::OpKernel<T> {
auto gpu_place = boost::get<platform::CUDAPlace>(context.GetPlace()); auto gpu_place = boost::get<platform::CUDAPlace>(context.GetPlace());
// TODO(yuyang18): Strange code here. // TODO(yuyang18): Strange code here.
memory::Copy(platform::CPUPlace(), memory::Copy(gpu_place, new_rows.CUDAMutableData(context.GetPlace()),
new_rows.CUDAMutableData(context.GetPlace()), gpu_place, gpu_place, ids_data, ids_num * sizeof(int64_t), stream);
ids_data, ids_num * sizeof(int64_t), stream);
d_table->set_rows(new_rows); d_table->set_rows(new_rows);
auto *d_table_value = d_table->mutable_value(); auto *d_table_value = d_table->mutable_value();
......
...@@ -89,7 +89,7 @@ class SGDOpCUDAKernel : public framework::OpKernel<T> { ...@@ -89,7 +89,7 @@ class SGDOpCUDAKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_EQ(in_height, out_dims[0]); PADDLE_ENFORCE_EQ(in_height, out_dims[0]);
auto& in_value = grad->value(); auto& in_value = grad->value();
framework::Vector<int64_t> in_rows(grad->rows()); auto& in_rows = grad->rows();
int64_t in_row_numel = in_value.numel() / in_rows.size(); int64_t in_row_numel = in_value.numel() / in_rows.size();
PADDLE_ENFORCE_EQ(in_row_numel, param_out->numel() / in_height); PADDLE_ENFORCE_EQ(in_row_numel, param_out->numel() / in_height);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册