From 6d2c6f96f190114cf2d8729f1fbac9fb6f6e50cc Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 20 Sep 2018 08:47:16 +0000 Subject: [PATCH] Revert "Revert "Merge pull request #13431 from chengduoZH/refine_lod"" This reverts commit a6c8d6b9a2e452fbc5b4aa1bea2e718e14ec5d2b. --- paddle/fluid/framework/details/cow_ptr.h | 84 +-- .../fluid/framework/details/cow_ptr_test.cc | 8 + paddle/fluid/framework/mixed_vector.h | 567 ++++++++++-------- paddle/fluid/operators/detection_map_op.h | 28 +- paddle/fluid/operators/extract_rows_op.cc | 2 +- .../operators/math/selected_rows_functor.cu | 10 +- paddle/fluid/operators/sum_op.h | 1 - .../paddle/fluid/tests/unittests/op_test.py | 2 +- .../tests/unittests/test_detection_map_op.py | 5 +- 9 files changed, 381 insertions(+), 326 deletions(-) diff --git a/paddle/fluid/framework/details/cow_ptr.h b/paddle/fluid/framework/details/cow_ptr.h index 21f75957be5..4fb015b0ffe 100644 --- a/paddle/fluid/framework/details/cow_ptr.h +++ b/paddle/fluid/framework/details/cow_ptr.h @@ -20,79 +20,41 @@ namespace paddle { namespace framework { namespace details { -// Change it to thread safe flags if needed. -class ThreadUnsafeOwnershipFlags { +template +class COWPtr { public: - explicit ThreadUnsafeOwnershipFlags(bool flag) : flag_(flag) {} - - ThreadUnsafeOwnershipFlags(const ThreadUnsafeOwnershipFlags& other) = delete; - ThreadUnsafeOwnershipFlags& operator=( - const ThreadUnsafeOwnershipFlags& other) = delete; - ThreadUnsafeOwnershipFlags(ThreadUnsafeOwnershipFlags&& other) = default; + typedef std::shared_ptr RefPtr; - void SetOwnership(bool flag) { flag_ = flag; } + private: + RefPtr m_sp; - // Invoke the callback if it is not owned. - template - void AcquireOwnershipOnce(Callback acquire) { - if (!flag_) { - acquire(); - flag_ = true; + void detach() { + T* tmp = m_sp.get(); + if (!(tmp == nullptr || m_sp.unique())) { + m_sp = RefPtr(new T(*tmp)); } } - private: - bool flag_; -}; - -// Copy-On-Write pointer. -// It will hold a T* pointer, and only copy once when `MutableData` is invoked. -// -// The template parameter OwnershipFlags should have: -// * a constructor takes a bool. True if own. -// * SetOwnership(bool flag). -// * AcquireOwnershipOnce(Callback). It will invoke the callback if it is not -// owned. -// -// https://en.wikipedia.org/wiki/Copy-on-write -template -class COWPtr { public: - // Ctor from raw pointer. - explicit COWPtr(T* ptr) : payload_(ptr), ownership_{true} {} + COWPtr() : m_sp(nullptr) {} + explicit COWPtr(T* t) : m_sp(t) {} + explicit COWPtr(const RefPtr& refptr) : m_sp(refptr) {} - // Move methods. Steal ownership from origin - COWPtr(COWPtr&& other) - : payload_(other.payload_), ownership_{std::move(other.ownership_)} {} - COWPtr& operator=(COWPtr&& origin) = default; + const T& Data() const { return operator*(); } - // Copy methods. Not own payload - COWPtr(const COWPtr& other) : payload_(other.payload_), ownership_{false} {} - COWPtr& operator=(const COWPtr& other) { - payload_ = other.payload_; - ownership_.SetOwnership(false); - return *this; - } - - // Access read only data. - const T& Data() const { return *payload_; } + T* MutableData() { return operator->(); } - // Access mutable data. If the data is not owned, the data will be copied - // before. - T* MutableData() { - ownership_.AcquireOwnershipOnce( - [this] { payload_.reset(new T(*payload_)); }); - return payload_.get(); + const T& operator*() const { return *m_sp; } + T& operator*() { + detach(); + return *m_sp; + } + const T* operator->() const { return m_sp.operator->(); } + T* operator->() { + detach(); + return m_sp.operator->(); } - - private: - // Actual data pointer. - std::shared_ptr payload_; - - // Ownership flag. - OwnershipFlags ownership_; }; - } // namespace details } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/details/cow_ptr_test.cc b/paddle/fluid/framework/details/cow_ptr_test.cc index d2142af277c..5b055d7cb4d 100644 --- a/paddle/fluid/framework/details/cow_ptr_test.cc +++ b/paddle/fluid/framework/details/cow_ptr_test.cc @@ -30,6 +30,14 @@ TEST(COWPtr, all) { ASSERT_EQ(ptr2.Data(), 10); } +TEST(COWPtr, change_old) { + COWPtr ptr(new int{0}); + COWPtr ptr2 = ptr; + *ptr.MutableData() = 10; + ASSERT_EQ(ptr2.Data(), 0); + ASSERT_EQ(ptr.Data(), 10); +} + } // namespace details } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/mixed_vector.h b/paddle/fluid/framework/mixed_vector.h index 7836ecb1272..ba2c41eb896 100644 --- a/paddle/fluid/framework/mixed_vector.h +++ b/paddle/fluid/framework/mixed_vector.h @@ -17,10 +17,12 @@ #include #include #include +#include #include - +#include "paddle/fluid/framework/details/cow_ptr.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor_util.h" +#include "paddle/fluid/memory/memcpy.h" #include "glog/logging.h" @@ -28,206 +30,401 @@ namespace paddle { namespace framework { #if defined(PADDLE_WITH_CUDA) +namespace details { +struct CUDABuffer { + void *data_{nullptr}; + size_t size_{0}; + platform::CUDAPlace place_; + + CUDABuffer() {} + CUDABuffer(platform::Place place, size_t size) + : size_(size), place_(boost::get(place)) { + data_ = memory::Alloc(place_, size); + } + + ~CUDABuffer() { ClearMemory(); } + + CUDABuffer(const CUDABuffer &o) = delete; + CUDABuffer &operator=(const CUDABuffer &o) = delete; + + void Resize(platform::Place place, size_t size) { + ClearMemory(); + place_ = boost::get(place); + data_ = memory::Alloc(place_, size); + size_ = size; + } + + void Swap(CUDABuffer &o) { + std::swap(data_, o.data_); + std::swap(place_, o.place_); + std::swap(size_, o.size_); + } + + private: + void ClearMemory() const { + if (data_) { + memory::Free(place_, data_); + } + } +}; +} // namespace details + // Vector implements the std::vector interface, and can get Data or // MutableData from any place. The data will be synced implicitly inside. template class Vector { public: using value_type = T; + using iterator = typename std::vector::iterator; + using const_iterator = typename std::vector::const_iterator; - // Default ctor. Create empty Vector - Vector() { InitEmpty(); } + private: + // The actual class to implement vector logic + class VectorData { + public: + VectorData() : flag_(kDataInCPU) {} + VectorData(size_t count, const T &value) + : cpu_(count, value), flag_(kDataInCPU) {} + VectorData(std::initializer_list init) : cpu_(init), flag_(kDataInCPU) {} + template + explicit VectorData(const std::vector &dat) + : cpu_(dat), flag_(kDataInCPU) {} + + VectorData(const VectorData &o) { + o.ImmutableCPU(); + cpu_ = o.cpu_; + flag_ = kDataInCPU; + } - // Fill vector with value. The vector size is `count`. - explicit Vector(size_t count, const T &value = T()) { - InitEmpty(); - if (count != 0) { - resize(count); - T *ptr = begin(); - for (size_t i = 0; i < count; ++i) { - ptr[i] = value; + VectorData &operator=(const VectorData &o) { + o.ImmutableCPU(); + cpu_ = o.cpu_; + flag_ = kDataInCPU; + details::CUDABuffer null; + gpu_.Swap(null); + return *this; + } + + T &operator[](size_t i) { + MutableCPU(); + return cpu_[i]; + } + + const T &operator[](size_t i) const { + ImmutableCPU(); + return cpu_[i]; + } + + size_t size() const { return cpu_.size(); } + + iterator begin() { + MutableCPU(); + return cpu_.begin(); + } + + iterator end() { + MutableCPU(); + return cpu_.end(); + } + + T &front() { + MutableCPU(); + return cpu_.front(); + } + + T &back() { + MutableCPU(); + return cpu_.back(); + } + + const_iterator begin() const { + ImmutableCPU(); + return cpu_.begin(); + } + + const_iterator end() const { + ImmutableCPU(); + return cpu_.end(); + } + + const T &back() const { + ImmutableCPU(); + return cpu_.back(); + } + + T *data() { return &(*this)[0]; } + + const T *data() const { return &(*this)[0]; } + + const T &front() const { + ImmutableCPU(); + return cpu_.front(); + } + + // assign this from iterator. + // NOTE: the iterator must support `end-begin` + template + void assign(Iter begin, Iter end) { + MutableCPU(); + cpu_.assign(begin, end); + } + + // push_back. If the previous capacity is not enough, the memory will + // double. + void push_back(T elem) { + MutableCPU(); + cpu_.push_back(elem); + } + + // extend a vector by iterator. + // NOTE: the iterator must support end-begin + template + void Extend(It begin, It end) { + MutableCPU(); + auto out_it = std::back_inserter>(this->cpu_); + std::copy(begin, end, out_it); + } + + // resize the vector + void resize(size_t size) { + MutableCPU(); + cpu_.resize(size); + } + + // get cuda ptr. immutable + const T *CUDAData(platform::Place place) const { + PADDLE_ENFORCE(platform::is_gpu_place(place), + "CUDA Data must on CUDA place"); + ImmutableCUDA(place); + return reinterpret_cast(gpu_.data_); + } + + // get cuda ptr. mutable + T *CUDAMutableData(platform::Place place) { + const T *ptr = CUDAData(place); + flag_ = kDirty | kDataInCUDA; + return const_cast(ptr); + } + + // clear + void clear() { + cpu_.clear(); + flag_ = kDirty | kDataInCPU; + } + + size_t capacity() const { return cpu_.capacity(); } + + // reserve data + void reserve(size_t size) { cpu_.reserve(size); } + + // implicit cast operator. Vector can be cast to std::vector implicitly. + operator std::vector() const { + ImmutableCPU(); + return cpu_; + } + + bool operator==(const VectorData &other) const { + ImmutableCPU(); + other.ImmutableCPU(); + return cpu_ == other.cpu_; + } + + private: + enum DataFlag { + kDataInCPU = 0x01, + kDataInCUDA = 0x02, + // kDirty means the data has been changed in one device. + kDirty = 0x10 + }; + + void CopyToCPU() const { + // COPY GPU Data To CPU + void *src = gpu_.data_; + void *dst = cpu_.data(); + memory::Copy(platform::CPUPlace(), dst, gpu_.place_, src, gpu_.size_, + nullptr); + } + + void MutableCPU() { + if (IsInCUDA() && IsDirty()) { + CopyToCPU(); } + flag_ = kDirty | kDataInCPU; } - } - // Ctor with init_list - Vector(std::initializer_list init) { - if (init.size() == 0) { - InitEmpty(); - } else { - InitByIter(init.size(), init.begin(), init.end()); + void ImmutableCUDA(platform::Place place) const { + if (IsDirty()) { + if (IsInCPU()) { + CopyCPUDataToCUDA(place); + UnsetFlag(kDirty); + SetFlag(kDataInCUDA); + } else if (IsInCUDA() && + !(boost::get(place) == gpu_.place_)) { + CopyCUDADataToAnotherPlace(place); + // Still dirty + } else { + // Dirty && DataInCUDA && Device is same + // Do nothing + } + } else { + if (!IsInCUDA()) { + // Even data is not dirty. However, data is not in CUDA. Copy data. + CopyCPUDataToCUDA(place); + SetFlag(kDataInCUDA); + } else if (!(boost::get(place) == gpu_.place_)) { + CopyCUDADataToAnotherPlace(place); + } else { + // Not Dirty && DataInCUDA && Device is same + // Do nothing. + } + } } - } + void CopyCUDADataToAnotherPlace(const platform::Place &place) const { + details::CUDABuffer tmp(place, gpu_.size_); + const void *src = gpu_.data_; + void *dst = tmp.data_; + + memory::Copy(tmp.place_, dst, gpu_.place_, src, gpu_.size_, nullptr); + gpu_.Swap(tmp); + } + void CopyCPUDataToCUDA(const platform::Place &place) const { + void *src = cpu_.data(); + gpu_.Resize(place, cpu_.size() * sizeof(T)); + void *dst = gpu_.data_; + auto stream = static_cast( + platform::DeviceContextPool::Instance().Get(place)) + ->stream(); + memory::Copy(gpu_.place_, dst, platform::CPUPlace(), src, gpu_.size_, + stream); + } + + void ImmutableCPU() const { + if (IsDirty() && !IsInCPU()) { // If data has been changed in CUDA, or + // CPU has no data. + CopyToCPU(); + UnsetFlag(kDirty); + } + SetFlag(kDataInCPU); + } + + void UnsetFlag(int flag) const { flag_ &= ~flag; } + void SetFlag(int flag) const { flag_ |= flag; } + + bool IsDirty() const { return flag_ & kDirty; } + + bool IsInCUDA() const { return flag_ & kDataInCUDA; } + + bool IsInCPU() const { return flag_ & kDataInCPU; } + + mutable std::vector cpu_; + mutable details::CUDABuffer gpu_; + mutable int flag_; + }; + + public: + // Default ctor. Create empty Vector + Vector() : m_(new VectorData()) {} + + // Fill vector with value. The vector size is `count`. + explicit Vector(size_t count, const T &value = T()) + : m_(new VectorData(count, value)) {} + + // Ctor with init_list + Vector(std::initializer_list init) : m_(new VectorData(init)) {} // implicit cast from std::vector. template - Vector(const std::vector &dat) { // NOLINT - if (dat.size() == 0) { - InitEmpty(); - } else { - InitByIter(dat.size(), dat.begin(), dat.end()); - } + Vector(const std::vector &dat) : m_(new VectorData(dat)) { // NOLINT } // Copy ctor - Vector(const Vector &other) { this->operator=(other); } + Vector(const Vector &other) { m_ = other.m_; } // Copy operator Vector &operator=(const Vector &other) { - if (other.size() != 0) { - this->InitByIter(other.size(), other.begin(), other.end()); - } else { - InitEmpty(); - } + m_ = other.m_; return *this; } // Move ctor - Vector(Vector &&other) { - this->size_ = other.size_; - this->flag_ = other.flag_; - if (other.cuda_vec_.memory_size()) { - this->cuda_vec_.ShareDataWith(other.cuda_vec_); - } - if (other.cpu_vec_.memory_size()) { - this->cpu_vec_.ShareDataWith(other.cpu_vec_); - } - } + Vector(Vector &&other) { m_ = std::move(other.m_); } // CPU data access method. Mutable. - T &operator[](size_t i) { - MutableCPU(); - return const_cast(cpu_vec_.data())[i]; - } + T &operator[](size_t i) { return (*m_)[i]; } // CPU data access method. Immutable. - const T &operator[](size_t i) const { - ImmutableCPU(); - return cpu_vec_.data()[i]; - } + const T &operator[](size_t i) const { return (*m_)[i]; } // std::vector iterator methods. Based on CPU data access method - size_t size() const { return size_; } + size_t size() const { return m_->size(); } - T *begin() { return capacity() == 0 ? &EmptyDummy() : &this->operator[](0); } + iterator begin() { return m_->begin(); } - T *end() { - return capacity() == 0 ? &EmptyDummy() : &this->operator[](size()); - } + iterator end() { return m_->end(); } - T &front() { return *begin(); } + T &front() { return m_->front(); } - T &back() { - auto it = end(); - --it; - return *it; - } + T &back() { return m_->back(); } - const T *begin() const { - return capacity() == 0 ? &EmptyDummy() : &this->operator[](0); - } + const_iterator begin() const { return m_->begin(); } - const T *end() const { - return capacity() == 0 ? &EmptyDummy() : &this->operator[](size()); - } + const_iterator end() const { return m_->end(); } - const T *cbegin() const { return begin(); } + const_iterator cbegin() const { return begin(); } - const T *cend() const { return end(); } + const_iterator cend() const { return end(); } - const T &back() const { - auto it = end(); - --it; - return *it; - } + const T &back() const { return m_->back(); } - T *data() { return begin(); } + T *data() { return m_->data(); } - const T *data() const { return begin(); } + const T *data() const { return m_->data(); } - const T &front() const { return *begin(); } + const T &front() const { return m_->front(); } // end of std::vector iterator methods // assign this from iterator. // NOTE: the iterator must support `end-begin` template void assign(Iter begin, Iter end) { - InitByIter(end - begin, begin, end); + m_->assign(begin, end); } // push_back. If the previous capacity is not enough, the memory will // double. - void push_back(T elem) { - if (size_ + 1 > capacity()) { - reserve((size_ + 1) << 1); - } - *end() = elem; - ++size_; - } + void push_back(T elem) { m_->push_back(elem); } // extend a vector by iterator. // NOTE: the iterator must support end-begin template void Extend(It begin, It end) { - size_t pre_size = size_; - resize(pre_size + (end - begin)); - T *ptr = this->begin() + pre_size; - for (; begin < end; ++begin, ++ptr) { - *ptr = *begin; - } + m_->Extend(begin, end); } // resize the vector void resize(size_t size) { - if (size + 1 <= capacity()) { - size_ = size; - } else { - MutableCPU(); - Tensor cpu_tensor; - platform::Place cpu = platform::CPUPlace(); - T *ptr = cpu_tensor.mutable_data( - framework::make_ddim({static_cast(size)}), cpu); - const T *old_ptr = - cpu_vec_.memory_size() == 0 ? nullptr : cpu_vec_.data(); - if (old_ptr != nullptr) { - std::copy(old_ptr, old_ptr + size_, ptr); - } - size_ = size; - cpu_vec_.ShareDataWith(cpu_tensor); + if (m_.Data().size() != size) { + m_->resize(size); } } // get cuda ptr. immutable const T *CUDAData(platform::Place place) const { - PADDLE_ENFORCE(platform::is_gpu_place(place), - "CUDA Data must on CUDA place"); - ImmutableCUDA(place); - return cuda_vec_.data(); + return m_.Data().CUDAData(place); } // get cuda ptr. mutable T *CUDAMutableData(platform::Place place) { - const T *ptr = CUDAData(place); - flag_ = kDirty | kDataInCUDA; - return const_cast(ptr); + return m_->CUDAMutableData(place); } // clear - void clear() { - size_ = 0; - flag_ = kDirty | kDataInCPU; - } + void clear() { m_->clear(); } - size_t capacity() const { - return cpu_vec_.memory_size() / SizeOfType(typeid(T)); - } + size_t capacity() const { return m_->capacity(); } // reserve data - void reserve(size_t size) { - size_t pre_size = size_; - resize(size); - resize(pre_size); - } + void reserve(size_t size) { m_->reserve(size); } // the unify method to access CPU or CUDA data. immutable. const T *Data(platform::Place place) const { @@ -248,12 +445,7 @@ class Vector { } // implicit cast operator. Vector can be cast to std::vector implicitly. - operator std::vector() const { - std::vector result; - result.resize(size()); - std::copy(begin(), end(), result.begin()); - return result; - } + operator std::vector() const { return *m_; } bool operator==(const Vector &other) const { if (size() != other.size()) return false; @@ -267,118 +459,11 @@ class Vector { return true; } - private: - void InitEmpty() { - size_ = 0; - flag_ = kDataInCPU; - } - - template - void InitByIter(size_t size, Iter begin, Iter end) { - platform::Place cpu = platform::CPUPlace(); - T *ptr = this->cpu_vec_.template mutable_data( - framework::make_ddim({static_cast(size)}), cpu); - for (size_t i = 0; i < size; ++i) { - *ptr++ = *begin++; - } - flag_ = kDataInCPU | kDirty; - size_ = size; - } - - enum DataFlag { - kDataInCPU = 0x01, - kDataInCUDA = 0x02, - // kDirty means the data has been changed in one device. - kDirty = 0x10 - }; - - void CopyToCPU() const { - // COPY GPU Data To CPU - TensorCopy(cuda_vec_, platform::CPUPlace(), &cpu_vec_); - WaitPlace(cuda_vec_.place()); - } - - void MutableCPU() { - if (IsInCUDA() && IsDirty()) { - CopyToCPU(); - } - flag_ = kDirty | kDataInCPU; - } - - void ImmutableCUDA(platform::Place place) const { - if (IsDirty()) { - if (IsInCPU()) { - TensorCopy(cpu_vec_, boost::get(place), - &cuda_vec_); - WaitPlace(place); - UnsetFlag(kDirty); - SetFlag(kDataInCUDA); - } else if (IsInCUDA() && !(place == cuda_vec_.place())) { - framework::Tensor tmp; - TensorCopy(cuda_vec_, boost::get(place), &tmp); - WaitPlace(cuda_vec_.place()); - cuda_vec_.ShareDataWith(tmp); - // Still dirty - } else { - // Dirty && DataInCUDA && Device is same - // Do nothing - } - } else { - if (!IsInCUDA()) { - // Even data is not dirty. However, data is not in CUDA. Copy data. - TensorCopy(cpu_vec_, boost::get(place), - &cuda_vec_); - WaitPlace(place); - SetFlag(kDataInCUDA); - } else if (!(place == cuda_vec_.place())) { - framework::Tensor tmp; - WaitPlace(cuda_vec_.place()); - TensorCopy(cuda_vec_, boost::get(place), &tmp); - WaitPlace(cuda_vec_.place()); - WaitPlace(place); - cuda_vec_.ShareDataWith(tmp); - } else { - // Not Dirty && DataInCUDA && Device is same - // Do nothing. - } - } - } - - void ImmutableCPU() const { - if (IsDirty() && - !IsInCPU()) { // If data has been changed in CUDA, or CPU has no data. - CopyToCPU(); - UnsetFlag(kDirty); - } - SetFlag(kDataInCPU); - } - - void UnsetFlag(int flag) const { flag_ &= ~flag; } - void SetFlag(int flag) const { flag_ |= flag; } + const void *Handle() const { return &m_.Data(); } - bool IsDirty() const { return flag_ & kDirty; } - - bool IsInCUDA() const { return flag_ & kDataInCUDA; } - - bool IsInCPU() const { return flag_ & kDataInCPU; } - - static void WaitPlace(const platform::Place place) { - if (platform::is_gpu_place(place)) { - platform::DeviceContextPool::Instance() - .Get(boost::get(place)) - ->Wait(); - } - } - - static T &EmptyDummy() { - static T dummy = T(); - return dummy; - } - - mutable int flag_; - mutable Tensor cpu_vec_; - mutable Tensor cuda_vec_; - size_t size_; + private: + // Vector is an COW object. + details::COWPtr m_; }; #else // PADDLE_WITH_CUDA diff --git a/paddle/fluid/operators/detection_map_op.h b/paddle/fluid/operators/detection_map_op.h index dd1ab85fd8d..dd5d138a1e9 100644 --- a/paddle/fluid/operators/detection_map_op.h +++ b/paddle/fluid/operators/detection_map_op.h @@ -76,8 +76,8 @@ class DetectionMAPOpKernel : public framework::OpKernel { auto ap_type = GetAPType(ctx.Attr("ap_type")); int class_num = ctx.Attr("class_num"); - auto label_lod = in_label->lod(); - auto detect_lod = in_detect->lod(); + auto& label_lod = in_label->lod(); + auto& detect_lod = in_detect->lod(); PADDLE_ENFORCE_EQ(label_lod.size(), 1UL, "Only support one level sequence now."); PADDLE_ENFORCE_EQ(label_lod[0].size(), detect_lod[0].size(), @@ -166,11 +166,11 @@ class DetectionMAPOpKernel : public framework::OpKernel { auto labels = framework::EigenTensor::From(input_label); auto detect = framework::EigenTensor::From(input_detect); - auto label_lod = input_label.lod(); - auto detect_lod = input_detect.lod(); + auto& label_lod = input_label.lod(); + auto& detect_lod = input_detect.lod(); int batch_size = label_lod[0].size() - 1; - auto label_index = label_lod[0]; + auto& label_index = label_lod[0]; for (int n = 0; n < batch_size; ++n) { std::map> boxes; @@ -274,7 +274,6 @@ class DetectionMAPOpKernel : public framework::OpKernel { output_true_pos->set_lod(true_pos_lod); output_false_pos->set_lod(false_pos_lod); - return; } void GetInputPos(const framework::Tensor& input_pos_count, @@ -292,7 +291,7 @@ class DetectionMAPOpKernel : public framework::OpKernel { auto SetData = [](const framework::LoDTensor& pos_tensor, std::map>>& pos) { const T* pos_data = pos_tensor.data(); - auto pos_data_lod = pos_tensor.lod()[0]; + auto& pos_data_lod = pos_tensor.lod()[0]; for (size_t i = 0; i < pos_data_lod.size() - 1; ++i) { for (size_t j = pos_data_lod[i]; j < pos_data_lod[i + 1]; ++j) { T score = pos_data[j * 2]; @@ -317,20 +316,23 @@ class DetectionMAPOpKernel : public framework::OpKernel { std::map>>* false_pos) const { int batch_size = gt_boxes.size(); for (int n = 0; n < batch_size; ++n) { - auto image_gt_boxes = gt_boxes[n]; - for (auto it = image_gt_boxes.begin(); it != image_gt_boxes.end(); ++it) { + auto& image_gt_boxes = gt_boxes[n]; + for (auto& image_gt_box : image_gt_boxes) { size_t count = 0; - auto labeled_bboxes = it->second; + auto& labeled_bboxes = image_gt_box.second; if (evaluate_difficult) { count = labeled_bboxes.size(); } else { - for (size_t i = 0; i < labeled_bboxes.size(); ++i) - if (!(labeled_bboxes[i].is_difficult)) ++count; + for (auto& box : labeled_bboxes) { + if (!box.is_difficult) { + ++count; + } + } } if (count == 0) { continue; } - int label = it->first; + int label = image_gt_box.first; if (label_pos_count->find(label) == label_pos_count->end()) { (*label_pos_count)[label] = count; } else { diff --git a/paddle/fluid/operators/extract_rows_op.cc b/paddle/fluid/operators/extract_rows_op.cc index 9a297d03cfb..3acae3bcdf4 100644 --- a/paddle/fluid/operators/extract_rows_op.cc +++ b/paddle/fluid/operators/extract_rows_op.cc @@ -50,7 +50,7 @@ class ExtractRowsOp : public framework::OperatorBase { auto &in = scope.FindVar(Input("X"))->Get(); auto out = scope.FindVar(Output("Out"))->GetMutable(); - auto in_rows = in.rows(); + auto &in_rows = in.rows(); auto out_dim = framework::make_ddim( std::vector{static_cast(in_rows.size()), 1}); auto dst_ptr = out->mutable_data(out_dim, in.place()); diff --git a/paddle/fluid/operators/math/selected_rows_functor.cu b/paddle/fluid/operators/math/selected_rows_functor.cu index a92762c7fea..d559aaa7210 100644 --- a/paddle/fluid/operators/math/selected_rows_functor.cu +++ b/paddle/fluid/operators/math/selected_rows_functor.cu @@ -60,11 +60,9 @@ struct SelectedRowsAdd { auto out_place = context.GetPlace(); PADDLE_ENFORCE(platform::is_gpu_place(out_place)); - memory::Copy( - boost::get(out_place), out_data, - boost::get(in1_place), in1_data, - in1_value.numel() * sizeof(T), - reinterpret_cast(context).stream()); + memory::Copy(boost::get(out_place), out_data, + boost::get(in1_place), in1_data, + in1_value.numel() * sizeof(T), context.stream()); auto* in2_data = in2_value.data(); memory::Copy(boost::get(out_place), @@ -148,7 +146,7 @@ struct SelectedRowsAddTo { auto in1_height = input1.height(); PADDLE_ENFORCE_EQ(in1_height, input2->height()); - framework::Vector in1_rows(input1.rows()); + auto& in1_rows = input1.rows(); auto& in2_rows = *(input2->mutable_rows()); auto& in1_value = input1.value(); diff --git a/paddle/fluid/operators/sum_op.h b/paddle/fluid/operators/sum_op.h index 6dffe527c10..2c4c2411259 100644 --- a/paddle/fluid/operators/sum_op.h +++ b/paddle/fluid/operators/sum_op.h @@ -123,7 +123,6 @@ class SumKernel : public framework::OpKernel { out_value->Resize(framework::make_ddim(in_dim)); out_value->mutable_data(context.GetPlace()); - // if all the input sparse vars are empty, no need to // merge these vars. if (first_dim == 0UL) { diff --git a/python/paddle/fluid/tests/unittests/op_test.py b/python/paddle/fluid/tests/unittests/op_test.py index b5549c507ed..e97643cddef 100644 --- a/python/paddle/fluid/tests/unittests/op_test.py +++ b/python/paddle/fluid/tests/unittests/op_test.py @@ -345,7 +345,7 @@ class OpTest(unittest.TestCase): actual_t, expect_t, atol=atol, equal_nan=equal_nan), "Output (" + out_name + ") has diff at " + str(place) + "\nExpect " + str(expect_t) + "\n" + "But Got" + - str(actual_t)) + str(actual_t) + " in class " + self.__class__.__name__) if isinstance(expect, tuple): self.assertListEqual(actual.recursive_sequence_lengths(), expect[1], "Output (" + out_name + diff --git a/python/paddle/fluid/tests/unittests/test_detection_map_op.py b/python/paddle/fluid/tests/unittests/test_detection_map_op.py index f6eb8f2c6d8..0c5343a97d5 100644 --- a/python/paddle/fluid/tests/unittests/test_detection_map_op.py +++ b/python/paddle/fluid/tests/unittests/test_detection_map_op.py @@ -20,6 +20,7 @@ import six import sys import collections import math +import paddle.fluid as fluid from op_test import OpTest @@ -32,7 +33,7 @@ class TestDetectionMAPOp(OpTest): self.detect = np.array(self.detect).astype('float32') self.mAP = np.array(self.mAP).astype('float32') - if (len(self.class_pos_count) > 0): + if len(self.class_pos_count) > 0: self.class_pos_count = np.array(self.class_pos_count).astype( 'int32') self.true_pos = np.array(self.true_pos).astype('float32') @@ -273,7 +274,7 @@ class TestDetectionMAPOp11Point(TestDetectionMAPOp): class TestDetectionMAPOpMultiBatch(TestDetectionMAPOp): def init_test_case(self): super(TestDetectionMAPOpMultiBatch, self).init_test_case() - self.class_pos_count = [0, 2, 1] + self.class_pos_count = [0, 2, 1, 0] self.true_pos_lod = [[0, 3, 2]] self.true_pos = [[0.7, 1.], [0.3, 0.], [0.2, 1.], [0.8, 0.], [0.1, 1.]] self.false_pos_lod = [[0, 3, 2]] -- GitLab