未验证 提交 728c0624 编写于 作者: X xiongkun 提交者: GitHub

change Vector to std::vector and provide MixVector class as a helper … (#39559)

* change Vector to std::vector and provide MixVector class as a helper wrapper class

* solve the multi-gpu hang problem

* remove the duplicate template instantialize

* Copy vector to cpu

* add CopyToCPU

* xxx

* final version: fix the problem of all reduce

* remove mixvector dependence

* fix

* merge

* fix code

* fix by CI
上级 d56a0a1b
...@@ -31,15 +31,17 @@ TEST(LoD, data) { ...@@ -31,15 +31,17 @@ TEST(LoD, data) {
lod.push_back(std::vector<size_t>({0, 1, 6, 8, 10, 11})); lod.push_back(std::vector<size_t>({0, 1, 6, 8, 10, 11}));
auto& v = lod[0]; auto& v = lod[0];
paddle::framework::MixVector<size_t> mix_vector_v(&v);
paddle::platform::CUDAPlace gpu(0); paddle::platform::CUDAPlace gpu(0);
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
hipLaunchKernelGGL(test, dim3(1), dim3(1), 0, 0, v.CUDAMutableData(gpu), hipLaunchKernelGGL(test, dim3(1), dim3(1), 0, 0,
v.size()); mix_vector_v.CUDAMutableData(gpu), v.size());
hipDeviceSynchronize(); hipDeviceSynchronize();
#else #else
test<<<1, 1>>>(v.CUDAMutableData(gpu), v.size()); test<<<1, 1>>>(mix_vector_v.CUDAMutableData(gpu), v.size());
cudaDeviceSynchronize(); cudaDeviceSynchronize();
#endif #endif
mix_vector_v.CopyToCPU();
for (size_t i = 0; i < v.size(); ++i) { for (size_t i = 0; i < v.size(); ++i) {
EXPECT_EQ(v[i], i * 2); EXPECT_EQ(v[i], i * 2);
} }
...@@ -62,15 +64,17 @@ TEST(LoDTensor, LoDInGPU) { ...@@ -62,15 +64,17 @@ TEST(LoDTensor, LoDInGPU) {
EXPECT_EQ(lod_tensor.lod_element(0, 4).first, 8UL); EXPECT_EQ(lod_tensor.lod_element(0, 4).first, 8UL);
auto lod = lod_tensor.lod(); auto lod = lod_tensor.lod();
paddle::framework::MixVector<size_t> mix_vector(&(lod[0]));
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
hipLaunchKernelGGL(test, dim3(1), dim3(8), 0, 0, hipLaunchKernelGGL(test, dim3(1), dim3(8), 0, 0,
lod[0].CUDAMutableData(place), lod[0].size()); mix_vector.CUDAMutableData(place), lod[0].size());
hipDeviceSynchronize(); hipDeviceSynchronize();
#else #else
test<<<1, 8>>>(lod[0].CUDAMutableData(place), lod[0].size()); test<<<1, 8>>>(mix_vector.CUDAMutableData(place), lod[0].size());
cudaDeviceSynchronize(); cudaDeviceSynchronize();
#endif #endif
mix_vector.CopyToCPU();
for (size_t i = 0; i < src_lod[0].size(); ++i) { for (size_t i = 0; i < src_lod[0].size(); ++i) {
EXPECT_EQ(lod[0].data()[i], src_lod[0].data()[i] * 2); EXPECT_EQ(lod[0].data()[i], src_lod[0].data()[i] * 2);
......
...@@ -64,19 +64,20 @@ void CopyCPUDataToCUDAHelper(std::vector<T> *cpu_, ...@@ -64,19 +64,20 @@ void CopyCPUDataToCUDAHelper(std::vector<T> *cpu_,
auto stream = dev_ctx->stream(); auto stream = dev_ctx->stream();
paddle::memory::Copy(OptionalCUDAPlace(*gpu_).get(), dst, paddle::memory::Copy(OptionalCUDAPlace(*gpu_).get(), dst,
platform::CPUPlace(), src, *gpu_memory_size_, stream); platform::CPUPlace(), src, *gpu_memory_size_, stream);
dev_ctx->Wait();
#endif #endif
} }
#define INSTANTIATE_VECTOR_FOR_TYPE(__TYPE__) \ #define INSTANTIATE_VECTOR_FOR_TYPE(__TYPE__) \
template <> \ template <> \
void Vector<__TYPE__>::VectorData::CopyToCPU() const { \ void MixVector<__TYPE__>::VectorData::CopyToCPU() const { \
CopyToCPUHelper<__TYPE__>(&cpu_, &gpu_, &gpu_memory_size_); \ CopyToCPUHelper<__TYPE__>(cpu_, &gpu_, &gpu_memory_size_); \
} \ } \
\ \
template <> \ template <> \
void Vector<__TYPE__>::VectorData::CopyCPUDataToCUDA( \ void MixVector<__TYPE__>::VectorData::CopyCPUDataToCUDA( \
const platform::Place &place) const { \ const platform::Place &place) const { \
CopyCPUDataToCUDAHelper<__TYPE__>(&cpu_, &gpu_, &gpu_memory_size_, place); \ CopyCPUDataToCUDAHelper<__TYPE__>(cpu_, &gpu_, &gpu_memory_size_, place); \
} }
INSTANTIATE_VECTOR_FOR_TYPE(size_t) INSTANTIATE_VECTOR_FOR_TYPE(size_t)
......
...@@ -22,7 +22,6 @@ limitations under the License. */ ...@@ -22,7 +22,6 @@ limitations under the License. */
#include <vector> #include <vector>
#include "glog/logging.h" #include "glog/logging.h"
#include "paddle/fluid/framework/details/cow_ptr.h"
#include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/utils/none.h" #include "paddle/utils/none.h"
#include "paddle/utils/optional.h" #include "paddle/utils/optional.h"
...@@ -30,6 +29,9 @@ limitations under the License. */ ...@@ -30,6 +29,9 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
template <class T>
using Vector = std::vector<T>;
inline paddle::optional<platform::CUDAPlace> OptionalCUDAPlace( inline paddle::optional<platform::CUDAPlace> OptionalCUDAPlace(
const paddle::memory::allocation::AllocationPtr &gpu_) { const paddle::memory::allocation::AllocationPtr &gpu_) {
return gpu_ == nullptr ? paddle::none return gpu_ == nullptr ? paddle::none
...@@ -39,7 +41,7 @@ inline paddle::optional<platform::CUDAPlace> OptionalCUDAPlace( ...@@ -39,7 +41,7 @@ inline paddle::optional<platform::CUDAPlace> OptionalCUDAPlace(
// Vector<T> implements the std::vector interface, and can get Data or // Vector<T> implements the std::vector interface, and can get Data or
// MutableData from any place. The data will be synced implicitly inside. // MutableData from any place. The data will be synced implicitly inside.
template <typename T> template <typename T>
class Vector { class MixVector {
public: public:
using value_type = T; using value_type = T;
using iterator = typename std::vector<T>::iterator; using iterator = typename std::vector<T>::iterator;
...@@ -49,82 +51,68 @@ class Vector { ...@@ -49,82 +51,68 @@ class Vector {
// The actual class to implement vector logic // The actual class to implement vector logic
class VectorData { class VectorData {
public: public:
VectorData() : flag_(kDataInCPU) {}
VectorData(size_t count, const T &value)
: cpu_(count, value), flag_(kDataInCPU) {}
VectorData(std::initializer_list<T> init) : cpu_(init), flag_(kDataInCPU) {}
template <typename U> template <typename U>
explicit VectorData(const std::vector<U> &dat) explicit VectorData(std::vector<U> *dat) : cpu_(dat), flag_(kDataInCPU) {}
: cpu_(dat), flag_(kDataInCPU) {}
~VectorData() {} ~VectorData() {}
VectorData(const VectorData &o) { VectorData(const VectorData &o) = delete;
o.ImmutableCPU();
cpu_ = o.cpu_;
flag_ = kDataInCPU;
}
VectorData &operator=(const VectorData &o) { VectorData &operator=(const VectorData &o) = delete;
o.ImmutableCPU();
cpu_ = o.cpu_;
flag_ = kDataInCPU;
return *this;
}
T &operator[](size_t i) { T &operator[](size_t i) {
MutableCPU(); MutableCPU();
return cpu_[i]; return (*cpu_)[i];
} }
const T &operator[](size_t i) const { const T &operator[](size_t i) const {
ImmutableCPU(); ImmutableCPU();
return cpu_[i]; return (*cpu_)[i];
} }
size_t size() const { return cpu_.size(); } size_t size() const { return (*cpu_).size(); }
iterator begin() { iterator begin() {
MutableCPU(); MutableCPU();
return cpu_.begin(); return (*cpu_).begin();
} }
iterator end() { iterator end() {
MutableCPU(); MutableCPU();
return cpu_.end(); return (*cpu_).end();
} }
T &front() { T &front() {
MutableCPU(); MutableCPU();
return cpu_.front(); return (*cpu_).front();
} }
T &back() { T &back() {
MutableCPU(); MutableCPU();
return cpu_.back(); return (*cpu_).back();
} }
const_iterator begin() const { const_iterator begin() const {
ImmutableCPU(); ImmutableCPU();
return cpu_.begin(); return (*cpu_).begin();
} }
const_iterator end() const { const_iterator end() const {
ImmutableCPU(); ImmutableCPU();
return cpu_.end(); return (*cpu_).end();
} }
const T &back() const { const T &back() const {
ImmutableCPU(); ImmutableCPU();
return cpu_.back(); return (*cpu_).back();
} }
T *data() { return &(*this)[0]; } T *data() { return cpu_->data(); }
const T *data() const { return &(*this)[0]; } const T *data() const { return cpu_->data(); }
const T &front() const { const T &front() const {
ImmutableCPU(); ImmutableCPU();
return cpu_.front(); return (*cpu_).front();
} }
// assign this from iterator. // assign this from iterator.
...@@ -132,14 +120,14 @@ class Vector { ...@@ -132,14 +120,14 @@ class Vector {
template <typename Iter> template <typename Iter>
void assign(Iter begin, Iter end) { void assign(Iter begin, Iter end) {
MutableCPU(); MutableCPU();
cpu_.assign(begin, end); (*cpu_).assign(begin, end);
} }
// push_back. If the previous capacity is not enough, the memory will // push_back. If the previous capacity is not enough, the memory will
// double. // double.
void push_back(T elem) { void push_back(T elem) {
MutableCPU(); MutableCPU();
cpu_.push_back(elem); (*cpu_).push_back(elem);
} }
// extend a vector by iterator. // extend a vector by iterator.
...@@ -147,14 +135,14 @@ class Vector { ...@@ -147,14 +135,14 @@ class Vector {
template <typename It> template <typename It>
void Extend(It begin, It end) { void Extend(It begin, It end) {
MutableCPU(); MutableCPU();
auto out_it = std::back_inserter<std::vector<T>>(this->cpu_); auto out_it = std::back_inserter<std::vector<T>>(*(this->cpu_));
std::copy(begin, end, out_it); std::copy(begin, end, out_it);
} }
// resize the vector // resize the vector
void resize(size_t size) { void resize(size_t size) {
MutableCPU(); MutableCPU();
cpu_.resize(size); (*cpu_).resize(size);
} }
// get cuda ptr. immutable // get cuda ptr. immutable
...@@ -176,26 +164,16 @@ class Vector { ...@@ -176,26 +164,16 @@ class Vector {
// clear // clear
void clear() { void clear() {
cpu_.clear(); (*cpu_).clear();
flag_ = kDirty | kDataInCPU; flag_ = kDirty | kDataInCPU;
} }
size_t capacity() const { return cpu_.capacity(); } std::vector<T> *get_vector() { return cpu_; }
// reserve data
void reserve(size_t size) const { cpu_.reserve(size); }
// implicit cast operator. Vector can be cast to std::vector implicitly. size_t capacity() const { return (*cpu_).capacity(); }
operator std::vector<T>() const {
ImmutableCPU();
return cpu_;
}
bool operator==(const VectorData &other) const { // reserve data
ImmutableCPU(); void reserve(size_t size) const { (*cpu_).reserve(size); }
other.ImmutableCPU();
return cpu_ == other.cpu_;
}
std::mutex &Mutex() const { return mtx_; } std::mutex &Mutex() const { return mtx_; }
...@@ -203,6 +181,13 @@ class Vector { ...@@ -203,6 +181,13 @@ class Vector {
return OptionalCUDAPlace(gpu_); return OptionalCUDAPlace(gpu_);
} }
void MutableCPU() {
if (IsInCUDA() && IsDirty()) {
CopyToCPU();
}
flag_ = kDirty | kDataInCPU;
}
private: private:
enum DataFlag { enum DataFlag {
kDataInCPU = 0x01, kDataInCPU = 0x01,
...@@ -213,13 +198,6 @@ class Vector { ...@@ -213,13 +198,6 @@ class Vector {
void CopyToCPU() const; void CopyToCPU() const;
void MutableCPU() {
if (IsInCUDA() && IsDirty()) {
CopyToCPU();
}
flag_ = kDirty | kDataInCPU;
}
void ImmutableCUDA(platform::Place place) const { void ImmutableCUDA(platform::Place place) const {
if (IsDirty()) { if (IsDirty()) {
if (IsInCPU()) { if (IsInCPU()) {
...@@ -269,7 +247,7 @@ class Vector { ...@@ -269,7 +247,7 @@ class Vector {
bool IsInCPU() const { return flag_ & kDataInCPU; } bool IsInCPU() const { return flag_ & kDataInCPU; }
mutable std::vector<T> cpu_; std::vector<T> *cpu_;
mutable paddle::memory::allocation::AllocationPtr gpu_; mutable paddle::memory::allocation::AllocationPtr gpu_;
mutable size_t gpu_memory_size_{0}; mutable size_t gpu_memory_size_{0};
mutable int flag_; mutable int flag_;
...@@ -278,89 +256,77 @@ class Vector { ...@@ -278,89 +256,77 @@ class Vector {
}; };
public: public:
// Default ctor. Create empty Vector
Vector() : m_(new VectorData()) {}
// Fill vector with value. The vector size is `count`.
explicit Vector(size_t count, const T &value = T())
: m_(new VectorData(count, value)) {}
// Ctor with init_list
Vector(std::initializer_list<T> init) : m_(new VectorData(init)) {}
// implicit cast from std::vector. // implicit cast from std::vector.
template <typename U> template <typename U>
Vector(const std::vector<U> &dat) : m_(new VectorData(dat)) { // NOLINT MixVector(const std::vector<U> *dat) { // NOLINT
m_.reset(new VectorData(const_cast<std::vector<U> *>(dat)));
} }
// Copy ctor // Copy ctor
Vector(const Vector<T> &other) { m_ = other.m_; } MixVector(const MixVector<T> &other) = delete;
// Copy operator // Copy operator
Vector<T> &operator=(const Vector<T> &other) { MixVector<T> &operator=(const MixVector<T> &other) = delete;
m_ = other.m_;
return *this;
}
// Move ctor // Move ctor
Vector(Vector<T> &&other) { m_ = std::move(other.m_); } MixVector(MixVector<T> &&other) = delete;
// CPU data access method. Mutable. // CPU data access method. Mutable.
T &operator[](size_t i) { return (*m_.MutableData())[i]; } T &operator[](size_t i) { return (*m_)[i]; }
// CPU data access method. Immutable. // CPU data access method. Immutable.
const T &operator[](size_t i) const { return m_.Data()[i]; } const T &operator[](size_t i) const { return (*m_)[i]; }
// std::vector iterator methods. Based on CPU data access method // std::vector iterator methods. Based on CPU data access method
size_t size() const { return m_.Data().size(); } size_t size() const { return m_->size(); }
iterator begin() { return m_.MutableData()->begin(); } iterator begin() { return m_->begin(); }
iterator end() { return m_.MutableData()->end(); } iterator end() { return m_->end(); }
T &front() { return m_.MutableData()->front(); } T &front() { return m_->front(); }
T &back() { return m_.MutableData()->back(); } T &back() { return m_->back(); }
const_iterator begin() const { return m_.Data().begin(); } const_iterator begin() const { return m_->begin(); }
const_iterator end() const { return m_.Data().end(); } const_iterator end() const { return m_->end(); }
const_iterator cbegin() const { return begin(); } const_iterator cbegin() const { return begin(); }
const_iterator cend() const { return end(); } const_iterator cend() const { return end(); }
const T &back() const { return m_.Data().back(); } const T &back() const { return m_->back(); }
T *data() { return m_.MutableData()->data(); } T *data() { return m_->data(); }
const T *data() const { return m_.Data().data(); } const T *data() const { return m_->data(); }
const T &front() const { return m_.Data().front(); } const T &front() const { return m_->front(); }
// end of std::vector iterator methods // end of std::vector iterator methods
// assign this from iterator. // assign this from iterator.
// NOTE: the iterator must support `end-begin` // NOTE: the iterator must support `end-begin`
template <typename Iter> template <typename Iter>
void assign(Iter begin, Iter end) { void assign(Iter begin, Iter end) {
m_.MutableData()->assign(begin, end); m_->assign(begin, end);
} }
// push_back. If the previous capacity is not enough, the memory will // push_back. If the previous capacity is not enough, the memory will
// double. // double.
void push_back(T elem) { m_.MutableData()->push_back(elem); } void push_back(T elem) { m_->push_back(elem); }
// extend a vector by iterator. // extend a vector by iterator.
// NOTE: the iterator must support end-begin // NOTE: the iterator must support end-begin
template <typename It> template <typename It>
void Extend(It begin, It end) { void Extend(It begin, It end) {
m_.MutableData()->Extend(begin, end); m_->Extend(begin, end);
} }
// resize the vector // resize the vector
void resize(size_t size) { void resize(size_t size) {
if (m_.Data().size() != size) { if (m_->size() != size) {
m_.MutableData()->resize(size); m_->resize(size);
} }
} }
...@@ -368,15 +334,15 @@ class Vector { ...@@ -368,15 +334,15 @@ class Vector {
const T *CUDAData(platform::Place place) const { const T *CUDAData(platform::Place place) const {
{ {
platform::CUDAPlace p(place.GetDeviceId()); platform::CUDAPlace p(place.GetDeviceId());
auto &mtx = m_.Data().Mutex(); auto &mtx = m_->Mutex();
std::lock_guard<std::mutex> guard(mtx); std::lock_guard<std::mutex> guard(mtx);
auto cuda_place = m_.Data().CUDAPlace(); auto cuda_place = m_->CUDAPlace();
if (cuda_place == paddle::none || cuda_place == p) { if (cuda_place == paddle::none || cuda_place == p) {
return m_.Data().CUDAData(place); return m_->CUDAData(place);
} }
} }
// If m_ contains CUDAData in a different place. Detach manually. m_->MutableCPU();
m_.Detach(); m_.reset(new VectorData(m_->get_vector()));
return CUDAData(place); return CUDAData(place);
} }
...@@ -384,25 +350,25 @@ class Vector { ...@@ -384,25 +350,25 @@ class Vector {
T *CUDAMutableData(platform::Place place) { T *CUDAMutableData(platform::Place place) {
{ {
platform::CUDAPlace p(place.GetDeviceId()); platform::CUDAPlace p(place.GetDeviceId());
auto &mtx = m_.Data().Mutex(); auto &mtx = m_->Mutex();
std::lock_guard<std::mutex> guard(mtx); std::lock_guard<std::mutex> guard(mtx);
auto cuda_place = m_.Data().CUDAPlace(); auto cuda_place = m_->CUDAPlace();
if (cuda_place == paddle::none || cuda_place == p) { if (cuda_place == paddle::none || cuda_place == p) {
return m_.MutableData()->CUDAMutableData(place); return m_->CUDAMutableData(place);
} }
} }
// If m_ contains CUDAData in a different place. Detach manually. m_->MutableCPU();
m_.Detach(); m_.reset(new VectorData(m_->get_vector()));
return CUDAMutableData(place); return CUDAMutableData(place);
} }
// clear // clear
void clear() { m_.MutableData()->clear(); } void clear() { m_->clear(); }
size_t capacity() const { return m_.Data().capacity(); } size_t capacity() const { return m_->capacity(); }
// reserve data // reserve data
void reserve(size_t size) { m_.Data().reserve(size); } void reserve(size_t size) { m_->reserve(size); }
// the unify method to access CPU or CUDA data. immutable. // the unify method to access CPU or CUDA data. immutable.
const T *Data(platform::Place place) const { const T *Data(platform::Place place) const {
...@@ -422,26 +388,12 @@ class Vector { ...@@ -422,26 +388,12 @@ class Vector {
} }
} }
// implicit cast operator. Vector can be cast to std::vector implicitly. void CopyToCPU() { m_->MutableCPU(); }
operator std::vector<T>() const { return m_.Data(); }
bool operator==(const Vector<T> &other) const {
if (size() != other.size()) return false;
auto it1 = cbegin();
auto it2 = other.cbegin();
for (; it1 < cend(); ++it1, ++it2) {
if (*it1 != *it2) {
return false;
}
}
return true;
}
const void *Handle() const { return &m_.Data(); } const void *Handle() const { return m_.get(); }
private: private:
// Vector is an COW object. mutable std::unique_ptr<VectorData> m_;
mutable details::COWPtr<VectorData> m_;
}; };
}; // namespace framework }; // namespace framework
......
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
template <typename T> template <typename T>
using vec = paddle::framework::Vector<T>; using vec = paddle::framework::MixVector<T>;
using gpuStream_t = paddle::gpuStream_t; using gpuStream_t = paddle::gpuStream_t;
static __global__ void multiply_10(int* ptr) { static __global__ void multiply_10(int* ptr) {
...@@ -44,10 +44,11 @@ gpuStream_t GetCUDAStream(paddle::platform::CUDAPlace place) { ...@@ -44,10 +44,11 @@ gpuStream_t GetCUDAStream(paddle::platform::CUDAPlace place) {
} }
TEST(mixed_vector, GPU_VECTOR) { TEST(mixed_vector, GPU_VECTOR) {
vec<int> tmp; std::vector<int> x;
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 10; ++i) {
tmp.push_back(i); x.push_back(i);
} }
vec<int> tmp(&x);
ASSERT_EQ(tmp.size(), 10UL); ASSERT_EQ(tmp.size(), 10UL);
paddle::platform::CUDAPlace gpu(0); paddle::platform::CUDAPlace gpu(0);
...@@ -70,10 +71,11 @@ TEST(mixed_vector, MultiGPU) { ...@@ -70,10 +71,11 @@ TEST(mixed_vector, MultiGPU) {
return; return;
} }
vec<int> tmp; std::vector<int> x;
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 10; ++i) {
tmp.push_back(i); x.push_back(i);
} }
vec<int> tmp(&x);
ASSERT_EQ(tmp.size(), 10UL); ASSERT_EQ(tmp.size(), 10UL);
paddle::platform::CUDAPlace gpu0(0); paddle::platform::CUDAPlace gpu0(0);
paddle::platform::SetDeviceId(0); paddle::platform::SetDeviceId(0);
......
...@@ -30,6 +30,7 @@ limitations under the License. */ ...@@ -30,6 +30,7 @@ limitations under the License. */
#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/ddim.h"
#include "paddle/phi/core/stream.h" #include "paddle/phi/core/stream.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
namespace paddle { namespace paddle {
......
...@@ -1455,22 +1455,10 @@ std::ostream& print_tensor<paddle::platform::complex<double>>( ...@@ -1455,22 +1455,10 @@ std::ostream& print_tensor<paddle::platform::complex<double>>(
} }
std::ostream& operator<<(std::ostream& os, const LoD& lod) { std::ostream& operator<<(std::ostream& os, const LoD& lod) {
os << "{"; // NOTE(xiongkun):
for (auto& v : lod) { // https://stackoverflow.com/questions/5195512/namespaces-and-operator-resolution
os << "{"; // if we don't redefine, the operator << of pten / framework LoD is not found.
bool is_first = true; paddle::string::operator<<(os, lod);
for (auto& i : v) {
if (is_first) {
os << i;
is_first = false;
} else {
os << ", " << i;
}
}
os << "}";
}
os << "}";
return os; return os;
} }
...@@ -1479,6 +1467,11 @@ std::ostream& operator<<(std::ostream& os, const LoD& lod) { ...@@ -1479,6 +1467,11 @@ std::ostream& operator<<(std::ostream& os, const LoD& lod) {
namespace phi { namespace phi {
std::ostream& operator<<(std::ostream& os, const LoD& lod) {
paddle::string::operator<<(os, lod);
return os;
}
std::ostream& operator<<(std::ostream& os, const phi::DenseTensor& t) { std::ostream& operator<<(std::ostream& os, const phi::DenseTensor& t) {
if (t.lod().size() > 0) { if (t.lod().size() > 0) {
os << " - lod: " << t.lod() << "\n"; os << " - lod: " << t.lod() << "\n";
......
...@@ -90,6 +90,7 @@ static void AllReduce(const phi::SelectedRows &src, phi::SelectedRows *dst, ...@@ -90,6 +90,7 @@ static void AllReduce(const phi::SelectedRows &src, phi::SelectedRows *dst,
platform::DeviceContextPool::Instance().Get(place)); platform::DeviceContextPool::Instance().Get(place));
bool use_calc_stream = (dev_ctx->stream() == stream); bool use_calc_stream = (dev_ctx->stream() == stream);
VLOG(4) << "Is use calculate stream: " << use_calc_stream;
// 1. Gather rows number from all workers. Here use ncclAllGather to do this, // 1. Gather rows number from all workers. Here use ncclAllGather to do this,
// but we can use other ways to implement is in the future // but we can use other ways to implement is in the future
...@@ -97,7 +98,9 @@ static void AllReduce(const phi::SelectedRows &src, phi::SelectedRows *dst, ...@@ -97,7 +98,9 @@ static void AllReduce(const phi::SelectedRows &src, phi::SelectedRows *dst,
framework::Vector<int64_t> rows_num_vector(strategy.nranks_); framework::Vector<int64_t> rows_num_vector(strategy.nranks_);
rows_num_vector[strategy.local_rank_] = static_cast<int64_t>(src_rows.size()); rows_num_vector[strategy.local_rank_] = static_cast<int64_t>(src_rows.size());
// CUDAMutableData use CalStream // CUDAMutableData use CalStream
auto *gpu_rows_num_ptr = rows_num_vector.CUDAMutableData(place); paddle::framework::MixVector<int64_t> mixv_rows_num_vector(&rows_num_vector);
auto *gpu_rows_num_ptr = mixv_rows_num_vector.CUDAMutableData(place);
VLOG(4) << "start dev_ctx->wait";
if (!use_calc_stream) { if (!use_calc_stream) {
dev_ctx->Wait(); dev_ctx->Wait();
} }
...@@ -109,6 +112,7 @@ static void AllReduce(const phi::SelectedRows &src, phi::SelectedRows *dst, ...@@ -109,6 +112,7 @@ static void AllReduce(const phi::SelectedRows &src, phi::SelectedRows *dst,
platform::GpuStreamSync(stream); platform::GpuStreamSync(stream);
} }
mixv_rows_num_vector.CopyToCPU();
const auto *cpu_rows_num_ptr = rows_num_vector.data(); const auto *cpu_rows_num_ptr = rows_num_vector.data();
auto rows_num = auto rows_num =
std::accumulate(cpu_rows_num_ptr, cpu_rows_num_ptr + strategy.nranks_, std::accumulate(cpu_rows_num_ptr, cpu_rows_num_ptr + strategy.nranks_,
...@@ -121,8 +125,10 @@ static void AllReduce(const phi::SelectedRows &src, phi::SelectedRows *dst, ...@@ -121,8 +125,10 @@ static void AllReduce(const phi::SelectedRows &src, phi::SelectedRows *dst,
auto *dst_rows = dst->mutable_rows(); auto *dst_rows = dst->mutable_rows();
dst_rows->resize(rows_num); dst_rows->resize(rows_num);
auto *dst_rows_ptr = dst_rows->CUDAMutableData(place); paddle::framework::MixVector<int64_t> mixv_dst_rows(dst_rows);
const auto *src_rows_ptr = src_rows.CUDAData(place); auto *dst_rows_ptr = mixv_dst_rows.CUDAMutableData(place);
paddle::framework::MixVector<int64_t> mixv_src_rows(&src_rows);
const auto *src_rows_ptr = mixv_src_rows.CUDAData(place);
auto *dst_tensor = dst->mutable_value(); auto *dst_tensor = dst->mutable_value();
auto dims = src_tensor.dims(); auto dims = src_tensor.dims();
...@@ -150,24 +156,28 @@ static void AllReduce(const phi::SelectedRows &src, phi::SelectedRows *dst, ...@@ -150,24 +156,28 @@ static void AllReduce(const phi::SelectedRows &src, phi::SelectedRows *dst,
PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllGather( PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllGather(
src_tensor_ptr, dst_tensor_ptr, value_sendcount, nccl_dtype, src_tensor_ptr, dst_tensor_ptr, value_sendcount, nccl_dtype,
comm->comm(), stream)); comm->comm(), stream));
return; } else {
} for (int i = 0; i < strategy.nranks_; ++i) {
for (int i = 0; i < strategy.nranks_; ++i) { if (cpu_rows_num_ptr[i] > 0) {
if (cpu_rows_num_ptr[i] > 0) { // 2. Broadcast the rows of SelectedRows
// 2. Broadcast the rows of SelectedRows PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclBroadcast(
PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclBroadcast( src_rows_ptr, dst_rows_ptr + row_offset, cpu_rows_num_ptr[i],
src_rows_ptr, dst_rows_ptr + row_offset, cpu_rows_num_ptr[i], ncclInt64, i, comm->comm(), stream));
ncclInt64, i, comm->comm(), stream)); // 3. Broadcast the tensor data of SelectedRows
// 3. Broadcast the tensor data of SelectedRows auto *dst_tensor_ptr_i = reinterpret_cast<uint8_t *>(dst_tensor_ptr) +
auto *dst_tensor_ptr_i = reinterpret_cast<uint8_t *>(dst_tensor_ptr) + row_offset * feature_size * sizeof_dtype;
row_offset * feature_size * sizeof_dtype; PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclBroadcast(
PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclBroadcast( src_tensor_ptr, dst_tensor_ptr_i,
src_tensor_ptr, dst_tensor_ptr_i, cpu_rows_num_ptr[i] * feature_size, cpu_rows_num_ptr[i] * feature_size, nccl_dtype, i, comm->comm(),
nccl_dtype, i, comm->comm(), stream)); stream));
row_offset += cpu_rows_num_ptr[i]; row_offset += cpu_rows_num_ptr[i];
}
} }
} }
if (!use_calc_stream) {
platform::GpuStreamSync(stream);
}
mixv_dst_rows.CopyToCPU();
VLOG(3) << "Original SelectedRows rows: " VLOG(3) << "Original SelectedRows rows: "
<< string::join_strings(src_rows, ','); << string::join_strings(src_rows, ',');
VLOG(3) << "Result SelectedRows rows: " VLOG(3) << "Result SelectedRows rows: "
......
...@@ -143,7 +143,7 @@ void GLOOParallelContext::AllReduce(const phi::SelectedRows &src, ...@@ -143,7 +143,7 @@ void GLOOParallelContext::AllReduce(const phi::SelectedRows &src,
auto dtype = framework::TransToProtoVarType(src_tensor.dtype()); auto dtype = framework::TransToProtoVarType(src_tensor.dtype());
// 1. Gather rows number from all workers. Here use ncclAllGather to do this, // 1. Gather rows number from all workers. Here use ncclAllGather to do this,
// but we can use other ways to implement is in the future // but we can use other ways to implement is in the future
const auto &src_rows = src.rows(); auto &src_rows = src.rows();
auto gloo_wrapper = framework::GlooWrapper::GetInstance(); auto gloo_wrapper = framework::GlooWrapper::GetInstance();
size_t local_row_num = src_rows.size(); size_t local_row_num = src_rows.size();
std::vector<size_t> rows_num_vector = std::vector<size_t> rows_num_vector =
...@@ -157,8 +157,10 @@ void GLOOParallelContext::AllReduce(const phi::SelectedRows &src, ...@@ -157,8 +157,10 @@ void GLOOParallelContext::AllReduce(const phi::SelectedRows &src,
<< ", height: " << src.height(); << ", height: " << src.height();
auto *dst_rows = dst->mutable_rows(); auto *dst_rows = dst->mutable_rows();
dst_rows->resize(rows_num); dst_rows->resize(rows_num);
auto *dst_rows_ptr = dst_rows->MutableData(place); paddle::framework::MixVector<int64_t> mixv_dst_rows(dst_rows);
const int64_t *src_rows_ptr = src_rows.Data(place); auto *dst_rows_ptr = mixv_dst_rows.MutableData(place);
paddle::framework::MixVector<int64_t> mixv_src_rows(&src_rows);
const int64_t *src_rows_ptr = mixv_src_rows.Data(place);
auto *dst_tensor = dst->mutable_value(); auto *dst_tensor = dst->mutable_value();
auto dims = src_tensor.dims(); auto dims = src_tensor.dims();
......
...@@ -38,8 +38,6 @@ void SetLoD(DstLoD* dst, const SrcLoD& src) { ...@@ -38,8 +38,6 @@ void SetLoD(DstLoD* dst, const SrcLoD& src) {
dst->emplace_back(v); dst->emplace_back(v);
} }
} }
template void SetLoD<paddle::lite::LoD, framework::LoD>(
paddle::lite::LoD* dst, const framework::LoD& src);
template void SetLoD<framework::LoD, paddle::lite::LoD>( template void SetLoD<framework::LoD, paddle::lite::LoD>(
framework::LoD* dst, const paddle::lite::LoD& src); framework::LoD* dst, const paddle::lite::LoD& src);
......
...@@ -110,10 +110,12 @@ class CTCAlignOpCUDAKernel : public framework::OpKernel<T> { ...@@ -110,10 +110,12 @@ class CTCAlignOpCUDAKernel : public framework::OpKernel<T> {
// merge elements and delete blank // merge elements and delete blank
T* output_data = output->mutable_data<T>({num_tokens, 1}, ctx.GetPlace()); T* output_data = output->mutable_data<T>({num_tokens, 1}, ctx.GetPlace());
paddle::framework::MixVector<size_t> mixv_input_lod(&input_lod[level]);
MergeAndDelCudaKernel<T><<<1, 1, 0, stream>>>( MergeAndDelCudaKernel<T><<<1, 1, 0, stream>>>(
num_tokens, tokens, num_seq, num_tokens, tokens, num_seq,
input_lod[level].CUDAMutableData(ctx.GetPlace()), blank, mixv_input_lod.CUDAMutableData(ctx.GetPlace()), blank, merge_repeated,
merge_repeated, dev_out_lod0_ptr, output_data); dev_out_lod0_ptr, output_data);
mixv_input_lod.CopyToCPU();
// set output lod // set output lod
std::vector<size_t> host_out_lod0(dev_out_lod0.begin(), std::vector<size_t> host_out_lod0(dev_out_lod0.begin(),
......
...@@ -149,11 +149,12 @@ class CVMGradCUDAKernel : public framework::OpKernel<T> { ...@@ -149,11 +149,12 @@ class CVMGradCUDAKernel : public framework::OpKernel<T> {
batch_size, lod[lod.size() - 1], batch_size, lod[lod.size() - 1],
platform::errors::PreconditionNotMet( platform::errors::PreconditionNotMet(
"Output(X@GRAD)'s dim[0] must be equal to last element of lod")); "Output(X@GRAD)'s dim[0] must be equal to last element of lod"));
paddle::framework::MixVector<size_t> mixv_lod(&lod);
CvmGradComputeKernel<<<(dx_numel + PADDLE_CUDA_NUM_THREADS - 1) / CvmGradComputeKernel<<<(dx_numel + PADDLE_CUDA_NUM_THREADS - 1) /
PADDLE_CUDA_NUM_THREADS, PADDLE_CUDA_NUM_THREADS,
PADDLE_CUDA_NUM_THREADS, 0, stream>>>( PADDLE_CUDA_NUM_THREADS, 0, stream>>>(
use_cvm, item_size, cvm_data, dout_data, dx_data, true, use_cvm, item_size, cvm_data, dout_data, dx_data, true,
lod.CUDAData(context.GetPlace()), lod.size(), dx_numel); mixv_lod.CUDAData(context.GetPlace()), lod.size(), dx_numel);
} }
} }
}; };
......
...@@ -57,9 +57,11 @@ class GPUBoxClipKernel : public framework::OpKernel<T> { ...@@ -57,9 +57,11 @@ class GPUBoxClipKernel : public framework::OpKernel<T> {
auto stream = dev_ctx.stream(); auto stream = dev_ctx.stream();
const size_t batch_size = lod.back().size() - 1; const size_t batch_size = lod.back().size() - 1;
T *output_data = output->mutable_data<T>(dev_ctx.GetPlace()); T *output_data = output->mutable_data<T>(dev_ctx.GetPlace());
paddle::framework::MixVector<size_t> mix_vector(&abs_offset_lod[0]);
GPUBoxClip<T, 512><<<batch_size, 512, 0, stream>>>( GPUBoxClip<T, 512><<<batch_size, 512, 0, stream>>>(
input->data<T>(), abs_offset_lod[0].CUDAMutableData(dev_ctx.GetPlace()), input->data<T>(), mix_vector.CUDAMutableData(dev_ctx.GetPlace()),
bbox_width, im_info->data<T>(), output_data); bbox_width, im_info->data<T>(), output_data);
mix_vector.CopyToCPU();
} }
}; };
......
...@@ -108,7 +108,8 @@ class TargetAssignKernel : public framework::OpKernel<T> { ...@@ -108,7 +108,8 @@ class TargetAssignKernel : public framework::OpKernel<T> {
auto x_lod = x->lod().back(); auto x_lod = x->lod().back();
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
size_t* x_lod_data = x_lod.MutableData(ctx.GetPlace()); paddle::framework::MixVector<size_t> mixv_x_lod(&x_lod);
size_t* x_lod_data = mixv_x_lod.MutableData(ctx.GetPlace());
#else #else
size_t* x_lod_data = x_lod.data(); size_t* x_lod_data = x_lod.data();
#endif #endif
...@@ -116,6 +117,9 @@ class TargetAssignKernel : public framework::OpKernel<T> { ...@@ -116,6 +117,9 @@ class TargetAssignKernel : public framework::OpKernel<T> {
TargetAssignFunctor<T, WT> functor(x_data, match_idx_data, x_lod_data, TargetAssignFunctor<T, WT> functor(x_data, match_idx_data, x_lod_data,
mismatch_value, n, m, p, k, out_data, mismatch_value, n, m, p, k, out_data,
out_wt_data); out_wt_data);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
mixv_x_lod.CopyToCPU();
#endif
auto& device_ctx = ctx.template device_context<DeviceContext>(); auto& device_ctx = ctx.template device_context<DeviceContext>();
platform::ForRange<DeviceContext> for_range(device_ctx, n * m); platform::ForRange<DeviceContext> for_range(device_ctx, n * m);
...@@ -130,13 +134,17 @@ class TargetAssignKernel : public framework::OpKernel<T> { ...@@ -130,13 +134,17 @@ class TargetAssignKernel : public framework::OpKernel<T> {
const int* neg_idx_data = neg_indices->data<int>(); const int* neg_idx_data = neg_indices->data<int>();
auto neg_lod = neg_indices->lod().back(); auto neg_lod = neg_indices->lod().back();
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
size_t* neg_lod_data = neg_lod.MutableData(ctx.GetPlace()); paddle::framework::MixVector<size_t> mixv_neg_lod(&neg_lod);
size_t* neg_lod_data = mixv_neg_lod.MutableData(ctx.GetPlace());
#else #else
size_t* neg_lod_data = neg_lod.data(); size_t* neg_lod_data = neg_lod.data();
#endif #endif
NegTargetAssignFunctor<DeviceContext, T, WT> neg_trg_functor; NegTargetAssignFunctor<DeviceContext, T, WT> neg_trg_functor;
neg_trg_functor(device_ctx, neg_idx_data, neg_lod_data, n, m, k, neg_trg_functor(device_ctx, neg_idx_data, neg_lod_data, n, m, k,
mismatch_value, out_data, out_wt_data); mismatch_value, out_data, out_wt_data);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
mixv_neg_lod.CopyToCPU();
#endif
} }
} }
}; };
......
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include <iostream> #include <iostream>
#include <memory> #include <memory>
#include "dnnl.hpp" #include "dnnl.hpp"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/fused/multi_gru_op.h" #include "paddle/fluid/operators/fused/multi_gru_op.h"
#include "paddle/fluid/platform/errors.h" #include "paddle/fluid/platform/errors.h"
......
...@@ -164,8 +164,10 @@ class LookupTableGradCUDAKernel : public framework::OpKernel<T> { ...@@ -164,8 +164,10 @@ class LookupTableGradCUDAKernel : public framework::OpKernel<T> {
auto gpu_place = context.GetPlace(); auto gpu_place = context.GetPlace();
// TODO(yuyang18): Strange code here. // TODO(yuyang18): Strange code here.
memory::Copy(gpu_place, new_rows.CUDAMutableData(context.GetPlace()), paddle::framework::MixVector<int64_t> mixv_new_rows(&new_rows);
memory::Copy(gpu_place, mixv_new_rows.CUDAMutableData(context.GetPlace()),
gpu_place, ids_data, ids_num * sizeof(int64_t), stream); gpu_place, ids_data, ids_num * sizeof(int64_t), stream);
mixv_new_rows.CopyToCPU();
d_table->set_rows(new_rows); d_table->set_rows(new_rows);
auto *d_table_value = d_table->mutable_value(); auto *d_table_value = d_table->mutable_value();
......
...@@ -152,14 +152,16 @@ struct LookupTableV2GradCUDAFunctor { ...@@ -152,14 +152,16 @@ struct LookupTableV2GradCUDAFunctor {
new_rows.resize(ids_num); new_rows.resize(ids_num);
auto gpu_place = context_.GetPlace(); auto gpu_place = context_.GetPlace();
paddle::framework::MixVector<int64_t> mixv_new_rows(&new_rows);
if (!std::is_same<IdT, int64_t>::value) { if (!std::is_same<IdT, int64_t>::value) {
InputTypeConvert<<<grids, threads, 0, stream>>>( InputTypeConvert<<<grids, threads, 0, stream>>>(
ids_data, ids_num, new_rows.MutableData(gpu_place)); ids_data, ids_num, mixv_new_rows.MutableData(gpu_place));
} else { } else {
memory::Copy(gpu_place, new_rows.CUDAMutableData(gpu_place), gpu_place, memory::Copy(gpu_place, mixv_new_rows.CUDAMutableData(gpu_place),
ids_data, ids_num * sizeof(int64_t), stream); gpu_place, ids_data, ids_num * sizeof(int64_t), stream);
} }
mixv_new_rows.CopyToCPU();
d_table->set_rows(new_rows); d_table->set_rows(new_rows);
auto *d_table_value = d_table->mutable_value(); auto *d_table_value = d_table->mutable_value();
......
...@@ -357,8 +357,9 @@ class BeamSearchFunctor<platform::CUDADeviceContext, T> { ...@@ -357,8 +357,9 @@ class BeamSearchFunctor<platform::CUDADeviceContext, T> {
framework::LoD selected_lod(2); framework::LoD selected_lod(2);
selected_lod[0].assign(abs_lod[level].begin(), abs_lod[level].end()); selected_lod[0].assign(abs_lod[level].begin(), abs_lod[level].end());
selected_lod[1].resize(scores->dims()[0] + 1); selected_lod[1].resize(scores->dims()[0] + 1);
size_t* selected_offsets = paddle::framework::MixVector<size_t> mix_vector(&selected_lod[1]);
selected_lod[1].CUDAMutableData(context.GetPlace()); paddle::framework::MixVector<size_t> mixv_abs(&abs_lod[level]);
size_t* selected_offsets = mix_vector.CUDAMutableData(context.GetPlace());
if (num_seqs == 1) { if (num_seqs == 1) {
const int seq_length = static_cast<int>(abs_lod[level][1]); const int seq_length = static_cast<int>(abs_lod[level][1]);
...@@ -377,7 +378,7 @@ class BeamSearchFunctor<platform::CUDADeviceContext, T> { ...@@ -377,7 +378,7 @@ class BeamSearchFunctor<platform::CUDADeviceContext, T> {
is_accumulated, num_used_threads)); is_accumulated, num_used_threads));
} }
} else if (num_seqs <= 4) { } else if (num_seqs <= 4) {
const size_t* seq_offsets = abs_lod[level].CUDAData(context.GetPlace()); const size_t* seq_offsets = mixv_abs.CUDAData(context.GetPlace());
// Use only 1 block // Use only 1 block
const int kMaxThreadsPerSeq = 32; const int kMaxThreadsPerSeq = 32;
const int kMaxSeqs = 4; const int kMaxSeqs = 4;
...@@ -400,6 +401,7 @@ class BeamSearchFunctor<platform::CUDADeviceContext, T> { ...@@ -400,6 +401,7 @@ class BeamSearchFunctor<platform::CUDADeviceContext, T> {
} }
context.Wait(); context.Wait();
mix_vector.CopyToCPU();
if (!framework::CheckLoD(selected_lod)) { if (!framework::CheckLoD(selected_lod)) {
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
"lod %s is not right in" "lod %s is not right in"
......
...@@ -170,7 +170,8 @@ struct SelectedRowsAddTo<platform::CPUDeviceContext, T> { ...@@ -170,7 +170,8 @@ struct SelectedRowsAddTo<platform::CPUDeviceContext, T> {
auto* in2_value = input2->mutable_value(); auto* in2_value = input2->mutable_value();
// concat rows // concat rows
in2_rows.Extend(in1_rows.begin(), in1_rows.end()); paddle::framework::MixVector<int64_t> mixv_in2_rows(&in2_rows);
mixv_in2_rows.Extend(in1_rows.begin(), in1_rows.end());
auto in1_place = input1.place(); auto in1_place = input1.place();
PADDLE_ENFORCE_EQ(platform::is_cpu_place(in1_place), true, PADDLE_ENFORCE_EQ(platform::is_cpu_place(in1_place), true,
......
...@@ -161,9 +161,10 @@ struct SelectedRowsAddTensor<platform::CUDADeviceContext, T> { ...@@ -161,9 +161,10 @@ struct SelectedRowsAddTensor<platform::CUDADeviceContext, T> {
const int block_size = 256; const int block_size = 256;
dim3 threads(block_size, 1); dim3 threads(block_size, 1);
dim3 grid(in1_rows.size(), 1); dim3 grid(in1_rows.size(), 1);
paddle::framework::MixVector<int64_t> mixv_in1_rows(&in1_rows);
SelectedRowsAddTensorKernel< SelectedRowsAddTensorKernel<
T, block_size><<<grid, threads, 0, context.stream()>>>( T, block_size><<<grid, threads, 0, context.stream()>>>(
in1_data, in1_rows.CUDAData(context.GetPlace()), out_data, in1_data, mixv_in1_rows.CUDAData(context.GetPlace()), out_data,
in1_row_numel); in1_row_numel);
auto out_eigen = framework::EigenVector<T>::Flatten(*output); auto out_eigen = framework::EigenVector<T>::Flatten(*output);
...@@ -198,8 +199,9 @@ struct SelectedRowsAddTo<platform::CUDADeviceContext, T> { ...@@ -198,8 +199,9 @@ struct SelectedRowsAddTo<platform::CUDADeviceContext, T> {
auto* in2_value = input2->mutable_value(); auto* in2_value = input2->mutable_value();
// concat rows // concat rows
paddle::framework::MixVector<int64_t> mixv_in2_rows(&in2_rows);
if (in1_rows.size()) { if (in1_rows.size()) {
in2_rows.Extend(in1_rows.begin(), in1_rows.end()); mixv_in2_rows.Extend(in1_rows.begin(), in1_rows.end());
} }
auto in1_place = input1.place(); auto in1_place = input1.place();
...@@ -274,9 +276,10 @@ struct SelectedRowsAddToTensor<platform::CUDADeviceContext, T> { ...@@ -274,9 +276,10 @@ struct SelectedRowsAddToTensor<platform::CUDADeviceContext, T> {
const int block_size = 256; const int block_size = 256;
dim3 threads(block_size, 1); dim3 threads(block_size, 1);
dim3 grid(in1_rows.size(), 1); dim3 grid(in1_rows.size(), 1);
paddle::framework::MixVector<int64_t> mixv_in1_rows(&in1_rows);
SelectedRowsAddToTensorKernel< SelectedRowsAddToTensorKernel<
T, block_size><<<grid, threads, 0, context.stream()>>>( T, block_size><<<grid, threads, 0, context.stream()>>>(
in1_data, in1_rows.CUDAData(context.GetPlace()), in2_data, in1_data, mixv_in1_rows.CUDAData(context.GetPlace()), in2_data,
in1_row_numel); in1_row_numel);
} }
}; };
...@@ -356,10 +359,13 @@ struct MergeAdd<platform::CUDADeviceContext, T> { ...@@ -356,10 +359,13 @@ struct MergeAdd<platform::CUDADeviceContext, T> {
dim3 threads(block_size, 1); dim3 threads(block_size, 1);
dim3 grid1(input_rows.size(), 1); dim3 grid1(input_rows.size(), 1);
paddle::framework::MixVector<int64_t> mix_vector_input(&input_rows);
paddle::framework::MixVector<int64_t> mix_vector_out(out.mutable_rows());
MergeAddKernel<T, 256><<<grid1, threads, 0, context.stream()>>>( MergeAddKernel<T, 256><<<grid1, threads, 0, context.stream()>>>(
input_data, input_rows.CUDAData(context.GetPlace()), out_data, input_data, mix_vector_input.CUDAData(context.GetPlace()), out_data,
out.mutable_rows()->CUDAMutableData(context.GetPlace()), mix_vector_out.CUDAMutableData(context.GetPlace()), out.rows().size(),
out.rows().size(), input_width); input_width);
mix_vector_out.CopyToCPU();
} }
void operator()(const platform::CUDADeviceContext& context, void operator()(const platform::CUDADeviceContext& context,
...@@ -423,10 +429,13 @@ struct MergeAdd<platform::CUDADeviceContext, T> { ...@@ -423,10 +429,13 @@ struct MergeAdd<platform::CUDADeviceContext, T> {
auto& input_rows = input->rows(); auto& input_rows = input->rows();
dim3 grid1(input_rows.size(), 1); dim3 grid1(input_rows.size(), 1);
paddle::framework::MixVector<int64_t> mix_vector_input(&input_rows);
paddle::framework::MixVector<int64_t> mix_vector_out(out.mutable_rows());
MergeAddKernel<T, 256><<<grid1, threads, 0, context.stream()>>>( MergeAddKernel<T, 256><<<grid1, threads, 0, context.stream()>>>(
input_data, input_rows.CUDAData(context.GetPlace()), out_data, input_data, mix_vector_input.CUDAData(context.GetPlace()), out_data,
out.mutable_rows()->CUDAMutableData(context.GetPlace()), mix_vector_out.CUDAMutableData(context.GetPlace()), out.rows().size(),
out.rows().size(), input_width); input_width);
mix_vector_out.CopyToCPU();
} }
} }
}; };
......
...@@ -72,8 +72,9 @@ class CopyMatrixRowsFunctor<platform::CUDADeviceContext, T> { ...@@ -72,8 +72,9 @@ class CopyMatrixRowsFunctor<platform::CUDADeviceContext, T> {
dim3 threads(128, 8); dim3 threads(128, 8);
dim3 grid(8, 1); dim3 grid(8, 1);
auto stream = context.stream(); auto stream = context.stream();
paddle::framework::MixVector<size_t> mix_index_lod(&index_lod);
CopyMatrixRowsKernel<T, 128, 8, 8><<<grid, threads, 0, stream>>>( CopyMatrixRowsKernel<T, 128, 8, 8><<<grid, threads, 0, stream>>>(
src_data, dst_data, index_lod.CUDAData(context.GetPlace()), height, src_data, dst_data, mix_index_lod.CUDAData(context.GetPlace()), height,
width, is_src_index); width, is_src_index);
} }
}; };
......
...@@ -59,7 +59,7 @@ class PaddingLoDTensorFunctor<platform::CUDADeviceContext, T> { ...@@ -59,7 +59,7 @@ class PaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
int lod_level = 0, bool norm_by_times = false, int lod_level = 0, bool norm_by_times = false,
const PadLayout layout = kBatchLengthWidth) { const PadLayout layout = kBatchLengthWidth) {
auto seq_lod = seq_tensor.lod(); auto seq_lod = seq_tensor.lod();
const auto seq_offsets = framework::ToAbsOffset(seq_lod)[lod_level]; auto seq_offsets = framework::ToAbsOffset(seq_lod)[lod_level];
const auto& seq_tensor_dims = seq_tensor.dims(); const auto& seq_tensor_dims = seq_tensor.dims();
const auto& pad_tensor_dims = pad_tensor->dims(); const auto& pad_tensor_dims = pad_tensor->dims();
int max_seq_len = MaximumSequenceLength(seq_offsets); int max_seq_len = MaximumSequenceLength(seq_offsets);
...@@ -104,10 +104,11 @@ class PaddingLoDTensorFunctor<platform::CUDADeviceContext, T> { ...@@ -104,10 +104,11 @@ class PaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
T* pad_data = pad_tensor->data<T>(); T* pad_data = pad_tensor->data<T>();
const T* pad_value_data = pad_value.data<T>(); const T* pad_value_data = pad_value.data<T>();
paddle::framework::MixVector<size_t> mix_vector_seq_offsets(&seq_offsets);
SequencePaddingKernel<T, kSeqToPad><<<grid, threads, 0, context.stream()>>>( SequencePaddingKernel<T, kSeqToPad><<<grid, threads, 0, context.stream()>>>(
pad_data, seq_data, pad_value_data, pad_value.numel() == 1, pad_data, seq_data, pad_value_data, pad_value.numel() == 1,
seq_offsets.CUDAData(context.GetPlace()), seq_num, pad_seq_len, mix_vector_seq_offsets.CUDAData(context.GetPlace()), seq_num,
step_width, norm_by_times, layout); pad_seq_len, step_width, norm_by_times, layout);
} }
}; };
...@@ -157,9 +158,10 @@ class UnpaddingLoDTensorFunctor<platform::CUDADeviceContext, T> { ...@@ -157,9 +158,10 @@ class UnpaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
const T* pad_data = pad_tensor.data<T>(); const T* pad_data = pad_tensor.data<T>();
T* seq_data = seq_tensor->data<T>(); T* seq_data = seq_tensor->data<T>();
paddle::framework::MixVector<size_t> mixv_seq_offsets(&seq_offsets);
SequencePaddingKernel<T, kPadToSeq><<<grid, threads, 0, context.stream()>>>( SequencePaddingKernel<T, kPadToSeq><<<grid, threads, 0, context.stream()>>>(
seq_data, pad_data, nullptr, false, seq_data, pad_data, nullptr, false,
seq_offsets.CUDAData(context.GetPlace()), seq_num, pad_seq_len, mixv_seq_offsets.CUDAData(context.GetPlace()), seq_num, pad_seq_len,
step_width, norm_by_times, layout); step_width, norm_by_times, layout);
} }
}; };
......
...@@ -168,41 +168,42 @@ class SequencePoolFunctor<platform::CUDADeviceContext, T> { ...@@ -168,41 +168,42 @@ class SequencePoolFunctor<platform::CUDADeviceContext, T> {
const size_t item_dim = output->numel() / output->dims()[0]; const size_t item_dim = output->numel() / output->dims()[0];
dim3 threads(1024, 1); dim3 threads(1024, 1);
dim3 grid(std::max(static_cast<int>(lod.size()) - 1, 1), 1); dim3 grid(std::max(static_cast<int>(lod.size()) - 1, 1), 1);
paddle::framework::MixVector<size_t> mix_vector(&lod);
if (pooltype == "MAX") { if (pooltype == "MAX") {
sequence_pool_kernel< sequence_pool_kernel<
T, MaxPoolFunctor<T>><<<grid, threads, 0, context.stream()>>>( T, MaxPoolFunctor<T>><<<grid, threads, 0, context.stream()>>>(
MaxPoolFunctor<T>(), input.data<T>(), pad_value, MaxPoolFunctor<T>(), input.data<T>(), pad_value,
lod.CUDAData(context.GetPlace()), lod.size(), item_dim, mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim,
output->mutable_data<T>(context.GetPlace()), index->data<int>()); output->mutable_data<T>(context.GetPlace()), index->data<int>());
} else if (pooltype == "AVERAGE") { } else if (pooltype == "AVERAGE") {
sequence_pool_kernel< sequence_pool_kernel<
T, AvgPoolFunctor<T>><<<grid, threads, 0, context.stream()>>>( T, AvgPoolFunctor<T>><<<grid, threads, 0, context.stream()>>>(
AvgPoolFunctor<T>(), input.data<T>(), pad_value, AvgPoolFunctor<T>(), input.data<T>(), pad_value,
lod.CUDAData(context.GetPlace()), lod.size(), item_dim, mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim,
output->mutable_data<T>(context.GetPlace()), nullptr); output->mutable_data<T>(context.GetPlace()), nullptr);
} else if (pooltype == "SUM") { } else if (pooltype == "SUM") {
sequence_pool_kernel< sequence_pool_kernel<
T, SumPoolFunctor<T>><<<grid, threads, 0, context.stream()>>>( T, SumPoolFunctor<T>><<<grid, threads, 0, context.stream()>>>(
SumPoolFunctor<T>(), input.data<T>(), pad_value, SumPoolFunctor<T>(), input.data<T>(), pad_value,
lod.CUDAData(context.GetPlace()), lod.size(), item_dim, mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim,
output->mutable_data<T>(context.GetPlace()), nullptr); output->mutable_data<T>(context.GetPlace()), nullptr);
} else if (pooltype == "SQRT") { } else if (pooltype == "SQRT") {
sequence_pool_kernel< sequence_pool_kernel<
T, SqrtPoolFunctor<T>><<<grid, threads, 0, context.stream()>>>( T, SqrtPoolFunctor<T>><<<grid, threads, 0, context.stream()>>>(
SqrtPoolFunctor<T>(), input.data<T>(), pad_value, SqrtPoolFunctor<T>(), input.data<T>(), pad_value,
lod.CUDAData(context.GetPlace()), lod.size(), item_dim, mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim,
output->mutable_data<T>(context.GetPlace()), nullptr); output->mutable_data<T>(context.GetPlace()), nullptr);
} else if (pooltype == "LAST") { } else if (pooltype == "LAST") {
sequence_pool_kernel< sequence_pool_kernel<
T, LastPoolFunctor<T>><<<grid, threads, 0, context.stream()>>>( T, LastPoolFunctor<T>><<<grid, threads, 0, context.stream()>>>(
LastPoolFunctor<T>(), input.data<T>(), pad_value, LastPoolFunctor<T>(), input.data<T>(), pad_value,
lod.CUDAData(context.GetPlace()), lod.size(), item_dim, mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim,
output->mutable_data<T>(context.GetPlace()), nullptr); output->mutable_data<T>(context.GetPlace()), nullptr);
} else if (pooltype == "FIRST") { } else if (pooltype == "FIRST") {
sequence_pool_kernel< sequence_pool_kernel<
T, FirstPoolFunctor<T>><<<grid, threads, 0, context.stream()>>>( T, FirstPoolFunctor<T>><<<grid, threads, 0, context.stream()>>>(
FirstPoolFunctor<T>(), input.data<T>(), pad_value, FirstPoolFunctor<T>(), input.data<T>(), pad_value,
lod.CUDAData(context.GetPlace()), lod.size(), item_dim, mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim,
output->mutable_data<T>(context.GetPlace()), nullptr); output->mutable_data<T>(context.GetPlace()), nullptr);
} else { } else {
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
...@@ -335,41 +336,42 @@ class SequencePoolGradFunctor<platform::CUDADeviceContext, T> { ...@@ -335,41 +336,42 @@ class SequencePoolGradFunctor<platform::CUDADeviceContext, T> {
const size_t item_dim = in_grad->numel() / in_grad->dims()[0]; const size_t item_dim = in_grad->numel() / in_grad->dims()[0];
dim3 threads(1024, 1); dim3 threads(1024, 1);
dim3 grid(std::max(static_cast<int>(lod.size()) - 1, 1), 1); dim3 grid(std::max(static_cast<int>(lod.size()) - 1, 1), 1);
paddle::framework::MixVector<size_t> mix_vector(&lod);
if (pooltype == "MAX") { if (pooltype == "MAX") {
sequence_pool_grad_kernel< sequence_pool_grad_kernel<
T, MaxPoolGradFunctor<T>><<<grid, threads, 0, context.stream()>>>( T, MaxPoolGradFunctor<T>><<<grid, threads, 0, context.stream()>>>(
MaxPoolGradFunctor<T>(), out_grad.data<T>(), MaxPoolGradFunctor<T>(), out_grad.data<T>(),
lod.CUDAData(context.GetPlace()), lod.size(), item_dim, mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim,
in_grad->mutable_data<T>(context.GetPlace()), index->data<int>()); in_grad->mutable_data<T>(context.GetPlace()), index->data<int>());
} else if (pooltype == "AVERAGE") { } else if (pooltype == "AVERAGE") {
sequence_pool_grad_kernel< sequence_pool_grad_kernel<
T, AvgPoolGradFunctor<T>><<<grid, threads, 0, context.stream()>>>( T, AvgPoolGradFunctor<T>><<<grid, threads, 0, context.stream()>>>(
AvgPoolGradFunctor<T>(), out_grad.data<T>(), AvgPoolGradFunctor<T>(), out_grad.data<T>(),
lod.CUDAData(context.GetPlace()), lod.size(), item_dim, mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim,
in_grad->mutable_data<T>(context.GetPlace()), nullptr); in_grad->mutable_data<T>(context.GetPlace()), nullptr);
} else if (pooltype == "SUM") { } else if (pooltype == "SUM") {
sequence_pool_grad_kernel< sequence_pool_grad_kernel<
T, SumPoolGradFunctor<T>><<<grid, threads, 0, context.stream()>>>( T, SumPoolGradFunctor<T>><<<grid, threads, 0, context.stream()>>>(
SumPoolGradFunctor<T>(), out_grad.data<T>(), SumPoolGradFunctor<T>(), out_grad.data<T>(),
lod.CUDAData(context.GetPlace()), lod.size(), item_dim, mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim,
in_grad->mutable_data<T>(context.GetPlace()), nullptr); in_grad->mutable_data<T>(context.GetPlace()), nullptr);
} else if (pooltype == "SQRT") { } else if (pooltype == "SQRT") {
sequence_pool_grad_kernel< sequence_pool_grad_kernel<
T, SqrtPoolGradFunctor<T>><<<grid, threads, 0, context.stream()>>>( T, SqrtPoolGradFunctor<T>><<<grid, threads, 0, context.stream()>>>(
SqrtPoolGradFunctor<T>(), out_grad.data<T>(), SqrtPoolGradFunctor<T>(), out_grad.data<T>(),
lod.CUDAData(context.GetPlace()), lod.size(), item_dim, mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim,
in_grad->mutable_data<T>(context.GetPlace()), nullptr); in_grad->mutable_data<T>(context.GetPlace()), nullptr);
} else if (pooltype == "LAST") { } else if (pooltype == "LAST") {
sequence_pool_grad_kernel< sequence_pool_grad_kernel<
T, LastPoolGradFunctor<T>><<<grid, threads, 0, context.stream()>>>( T, LastPoolGradFunctor<T>><<<grid, threads, 0, context.stream()>>>(
LastPoolGradFunctor<T>(), out_grad.data<T>(), LastPoolGradFunctor<T>(), out_grad.data<T>(),
lod.CUDAData(context.GetPlace()), lod.size(), item_dim, mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim,
in_grad->mutable_data<T>(context.GetPlace()), nullptr); in_grad->mutable_data<T>(context.GetPlace()), nullptr);
} else if (pooltype == "FIRST") { } else if (pooltype == "FIRST") {
sequence_pool_grad_kernel< sequence_pool_grad_kernel<
T, FirstPoolGradFunctor<T>><<<grid, threads, 0, context.stream()>>>( T, FirstPoolGradFunctor<T>><<<grid, threads, 0, context.stream()>>>(
FirstPoolGradFunctor<T>(), out_grad.data<T>(), FirstPoolGradFunctor<T>(), out_grad.data<T>(),
lod.CUDAData(context.GetPlace()), lod.size(), item_dim, mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim,
in_grad->mutable_data<T>(context.GetPlace()), nullptr); in_grad->mutable_data<T>(context.GetPlace()), nullptr);
} else { } else {
......
...@@ -41,21 +41,23 @@ class ScaleLoDTensorFunctor<platform::CUDADeviceContext, T> { ...@@ -41,21 +41,23 @@ class ScaleLoDTensorFunctor<platform::CUDADeviceContext, T> {
auto lod = seq->lod(); auto lod = seq->lod();
const size_t num_seq = lod[level].size() - 1; const size_t num_seq = lod[level].size() - 1;
const size_t seq_width = seq->numel() / seq->dims()[0]; const size_t seq_width = seq->numel() / seq->dims()[0];
framework::LoD abs_offset_lod = framework::ToAbsOffset(lod); auto abs_offset_lod = framework::ToAbsOffset(lod);
T* seq_data = seq->mutable_data<T>(context.GetPlace()); T* seq_data = seq->mutable_data<T>(context.GetPlace());
paddle::framework::MixVector<size_t> mix_vector(&(abs_offset_lod[level]));
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
hipLaunchKernelGGL( hipLaunchKernelGGL(
HIP_KERNEL_NAME(SequenceScaleKernel<T, PADDLE_CUDA_NUM_THREADS>), HIP_KERNEL_NAME(SequenceScaleKernel<T, PADDLE_CUDA_NUM_THREADS>),
dim3(num_seq), dim3(PADDLE_CUDA_NUM_THREADS), 0, context.stream(), dim3(num_seq), dim3(PADDLE_CUDA_NUM_THREADS), 0, context.stream(),
seq_data, abs_offset_lod[level].CUDAMutableData(context.GetPlace()), seq_data, mix_vector.CUDAMutableData(context.GetPlace()), scales,
scales, seq_width); seq_width);
#else #else
SequenceScaleKernel<T, PADDLE_CUDA_NUM_THREADS><<< SequenceScaleKernel<T, PADDLE_CUDA_NUM_THREADS><<<
num_seq, PADDLE_CUDA_NUM_THREADS, 0, context.stream()>>>( num_seq, PADDLE_CUDA_NUM_THREADS, 0, context.stream()>>>(
seq_data, abs_offset_lod[level].CUDAMutableData(context.GetPlace()), seq_data, mix_vector.CUDAMutableData(context.GetPlace()), scales,
scales, seq_width); seq_width);
#endif #endif
mix_vector.CopyToCPU();
} }
}; };
......
...@@ -96,12 +96,14 @@ struct SparseAdagradFunctor<platform::CUDADeviceContext, T> { ...@@ -96,12 +96,14 @@ struct SparseAdagradFunctor<platform::CUDADeviceContext, T> {
const int block_size = 256; const int block_size = 256;
dim3 threads(block_size, 1); dim3 threads(block_size, 1);
dim3 grid2(1, merge_rows.size()); dim3 grid2(1, merge_rows.size());
paddle::framework::MixVector<int64_t> mixv_merge_rows(&merge_rows);
SparseAdagradFunctorKernel< SparseAdagradFunctorKernel<
T, 256><<<grid2, threads, 0, T, 256><<<grid2, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context) reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>( .stream()>>>(
grad_merge_data, merge_rows.CUDAMutableData(context.GetPlace()), lr, grad_merge_data, mixv_merge_rows.CUDAMutableData(context.GetPlace()),
param_data, moment_data, grad_width, epsilon); lr, param_data, moment_data, grad_width, epsilon);
mixv_merge_rows.CopyToCPU();
} }
}; };
......
...@@ -345,7 +345,10 @@ class AdamOpCUDAKernel : public framework::OpKernel<T> { ...@@ -345,7 +345,10 @@ class AdamOpCUDAKernel : public framework::OpKernel<T> {
auto& grad_merge = *grad_merge_ptr; auto& grad_merge = *grad_merge_ptr;
auto& grad_tensor = grad_merge.value(); auto& grad_tensor = grad_merge.value();
const T* grad_data = grad_tensor.template data<T>(); const T* grad_data = grad_tensor.template data<T>();
const int64_t* rows = grad_merge.rows().Data(ctx.GetPlace()); auto* grad_merge_rows = &grad_merge.rows();
paddle::framework::MixVector<int64_t> mixv_grad_merge_rows(
grad_merge_rows);
const int64_t* rows = mixv_grad_merge_rows.Data(ctx.GetPlace());
auto row_numel = grad_tensor.numel() / grad_merge.rows().size(); auto row_numel = grad_tensor.numel() / grad_merge.rows().size();
if (beta1_pow->place() == platform::CPUPlace() && if (beta1_pow->place() == platform::CPUPlace() &&
......
...@@ -592,7 +592,10 @@ class AdamOpKernel : public framework::OpKernel<T> { ...@@ -592,7 +592,10 @@ class AdamOpKernel : public framework::OpKernel<T> {
auto& grad_merge = *grad_merge_ptr; auto& grad_merge = *grad_merge_ptr;
auto& grad_tensor = grad_merge.value(); auto& grad_tensor = grad_merge.value();
const T* grad_data = grad_tensor.template data<T>(); const T* grad_data = grad_tensor.template data<T>();
const int64_t* rows = grad_merge.rows().Data(ctx.GetPlace()); auto* grad_merge_rows = &grad_merge.rows();
paddle::framework::MixVector<int64_t> mixv_grad_merge_rows(
grad_merge_rows);
const int64_t* rows = mixv_grad_merge_rows.Data(ctx.GetPlace());
auto row_numel = grad_tensor.numel() / grad_merge.rows().size(); auto row_numel = grad_tensor.numel() / grad_merge.rows().size();
SparseAdamFunctor<T, CPUAdam> functor( SparseAdamFunctor<T, CPUAdam> functor(
......
...@@ -368,7 +368,10 @@ class AdamWOpCUDAKernel : public framework::OpKernel<T> { ...@@ -368,7 +368,10 @@ class AdamWOpCUDAKernel : public framework::OpKernel<T> {
auto& grad_merge = *grad_merge_ptr; auto& grad_merge = *grad_merge_ptr;
auto& grad_tensor = grad_merge.value(); auto& grad_tensor = grad_merge.value();
const T* grad_data = grad_tensor.template data<T>(); const T* grad_data = grad_tensor.template data<T>();
const int64_t* rows = grad_merge.rows().Data(ctx.GetPlace()); auto* grad_merge_rows = &grad_merge.rows();
paddle::framework::MixVector<int64_t> mixv_grad_merge_rows(
grad_merge_rows);
const int64_t* rows = mixv_grad_merge_rows.Data(ctx.GetPlace());
auto row_numel = grad_tensor.numel() / grad_merge.rows().size(); auto row_numel = grad_tensor.numel() / grad_merge.rows().size();
if (beta1_pow->place() == platform::CPUPlace() && if (beta1_pow->place() == platform::CPUPlace() &&
......
...@@ -189,7 +189,9 @@ class FTRLOpKernel : public framework::OpKernel<T> { ...@@ -189,7 +189,9 @@ class FTRLOpKernel : public framework::OpKernel<T> {
merge_func(ctx.template device_context<DeviceContext>(), *grad, merge_func(ctx.template device_context<DeviceContext>(), *grad,
merged_grad); merged_grad);
const int64_t* rows = merged_grad->rows().Data(ctx.GetPlace()); auto* merged_rows = merged_grad->mutable_rows();
paddle::framework::MixVector<int64_t> mixv_merged_rows(merged_rows);
const int64_t* rows = mixv_merged_rows.Data(ctx.GetPlace());
auto row_numel = static_cast<int64_t>(merged_grad->value().dims()[1]); auto row_numel = static_cast<int64_t>(merged_grad->value().dims()[1]);
auto row_height = static_cast<int64_t>(merged_grad->rows().size()); auto row_height = static_cast<int64_t>(merged_grad->rows().size());
......
...@@ -594,7 +594,10 @@ class LambOpKernel : public framework::OpKernel<T> { ...@@ -594,7 +594,10 @@ class LambOpKernel : public framework::OpKernel<T> {
auto& grad_merge = *grad_merge_ptr; auto& grad_merge = *grad_merge_ptr;
auto& grad_tensor = grad_merge.value(); auto& grad_tensor = grad_merge.value();
const T* grad_data = grad_tensor.template data<T>(); const T* grad_data = grad_tensor.template data<T>();
const int64_t* rows = grad_merge.rows().Data(ctx.GetPlace()); auto* grad_merge_rows = &grad_merge.rows();
paddle::framework::MixVector<int64_t> mixv_grad_merge_rows(
grad_merge_rows);
const int64_t* rows = mixv_grad_merge_rows.Data(ctx.GetPlace());
auto row_numel = grad_tensor.numel() / grad_merge.rows().size(); auto row_numel = grad_tensor.numel() / grad_merge.rows().size();
if (platform::is_gpu_place(ctx.GetPlace()) && if (platform::is_gpu_place(ctx.GetPlace()) &&
beta1_pow.place() == platform::CPUPlace() && beta1_pow.place() == platform::CPUPlace() &&
......
...@@ -561,7 +561,10 @@ class MomentumOpKernel : public framework::OpKernel<T> { ...@@ -561,7 +561,10 @@ class MomentumOpKernel : public framework::OpKernel<T> {
merge_func(ctx.template device_context<DeviceContext>(), *grad, merge_func(ctx.template device_context<DeviceContext>(), *grad,
merged_grad); merged_grad);
const int64_t* rows = merged_grad->rows().Data(ctx.GetPlace()); auto* grad_merge_rows = merged_grad->mutable_rows();
paddle::framework::MixVector<int64_t> mixv_grad_merge_rows(
grad_merge_rows);
const int64_t* rows = mixv_grad_merge_rows.Data(ctx.GetPlace());
int64_t row_numel = int64_t row_numel =
merged_grad->value().numel() / merged_grad->rows().size(); merged_grad->value().numel() / merged_grad->rows().size();
platform::ForRange<DeviceContext> for_range( platform::ForRange<DeviceContext> for_range(
......
...@@ -227,7 +227,10 @@ class RmspropOpKernel : public framework::OpKernel<T> { ...@@ -227,7 +227,10 @@ class RmspropOpKernel : public framework::OpKernel<T> {
merge_func(dev_ctx, grad, merged_grad); merge_func(dev_ctx, grad, merged_grad);
platform::ForRange<DeviceContext> for_range(dev_ctx, limit); platform::ForRange<DeviceContext> for_range(dev_ctx, limit);
const int64_t *rows = merged_grad->rows().Data(ctx.GetPlace()); auto &grad_merge_rows = merged_grad->rows();
paddle::framework::MixVector<int64_t> mixv_grad_merge_rows(
&grad_merge_rows);
const int64_t *rows = mixv_grad_merge_rows.Data(ctx.GetPlace());
auto &merged_tensor = merged_grad->value(); auto &merged_tensor = merged_grad->value();
int64_t row_count = merged_grad->rows().size(); int64_t row_count = merged_grad->rows().size();
......
...@@ -148,11 +148,11 @@ class SGDOpKernel<platform::CUDADeviceContext, T> ...@@ -148,11 +148,11 @@ class SGDOpKernel<platform::CUDADeviceContext, T>
int thread_x = kThreadsPerBlock; int thread_x = kThreadsPerBlock;
int max_threads = ctx.cuda_device_context().GetMaxPhysicalThreadCount(); int max_threads = ctx.cuda_device_context().GetMaxPhysicalThreadCount();
int max_blocks = std::max(max_threads / kThreadsPerBlock, 1); int max_blocks = std::max(max_threads / kThreadsPerBlock, 1);
paddle::framework::MixVector<int64_t> mixv_in_rows(&in_rows);
SparseSGDFunctorKernel<<<max_blocks, thread_x, 0, SparseSGDFunctorKernel<<<max_blocks, thread_x, 0,
ctx.cuda_device_context().stream()>>>( ctx.cuda_device_context().stream()>>>(
in_data, in_rows.CUDAData(ctx.GetPlace()), learning_rate->data<T>(), in_data, mixv_in_rows.CUDAData(ctx.GetPlace()),
out_data, in_row_numel, in_rows.size()); learning_rate->data<T>(), out_data, in_row_numel, in_rows.size());
} else { } else {
PADDLE_ENFORCE_EQ(false, true, PADDLE_ENFORCE_EQ(false, true,
......
...@@ -336,7 +336,8 @@ class RowConvKernel<platform::CUDADeviceContext, T> ...@@ -336,7 +336,8 @@ class RowConvKernel<platform::CUDADeviceContext, T>
int num_sequence = batch_indices.size() - 1; int num_sequence = batch_indices.size() - 1;
int future_context = Filter->dims()[0]; int future_context = Filter->dims()[0];
size_t *idx = batch_indices.CUDAMutableData(context.GetPlace()); paddle::framework::MixVector<size_t> mix_vector(&batch_indices);
size_t *idx = mix_vector.CUDAMutableData(context.GetPlace());
auto stream = context.cuda_device_context().stream(); auto stream = context.cuda_device_context().stream();
if (future_context <= 32) { if (future_context <= 32) {
...@@ -352,6 +353,7 @@ class RowConvKernel<platform::CUDADeviceContext, T> ...@@ -352,6 +353,7 @@ class RowConvKernel<platform::CUDADeviceContext, T>
RowConvForward<T><<<grid_dim, block_dim, 0, stream>>>( RowConvForward<T><<<grid_dim, block_dim, 0, stream>>>(
in, weight, num_sequence, input_dim, future_context, idx, out); in, weight, num_sequence, input_dim, future_context, idx, out);
} }
mix_vector.CopyToCPU();
} }
}; };
...@@ -392,7 +394,8 @@ class RowConvGradKernel<platform::CUDADeviceContext, T> ...@@ -392,7 +394,8 @@ class RowConvGradKernel<platform::CUDADeviceContext, T>
// int input_dim = X->dims()[1]; // int input_dim = X->dims()[1];
int num_sequence = batch_indices.size() - 1; int num_sequence = batch_indices.size() - 1;
int future_context = Filter->dims()[0]; int future_context = Filter->dims()[0];
size_t *idx = batch_indices.CUDAMutableData(context.GetPlace()); paddle::framework::MixVector<size_t> mixv_batch_indices(&batch_indices);
size_t *idx = mixv_batch_indices.CUDAMutableData(context.GetPlace());
auto &device_ctx = context.cuda_device_context(); auto &device_ctx = context.cuda_device_context();
phi::funcs::SetConstant<platform::CUDADeviceContext, T> zero; phi::funcs::SetConstant<platform::CUDADeviceContext, T> zero;
...@@ -444,6 +447,7 @@ class RowConvGradKernel<platform::CUDADeviceContext, T> ...@@ -444,6 +447,7 @@ class RowConvGradKernel<platform::CUDADeviceContext, T>
dout, weights, num_sequence, input_dim, future_context, idx, din); dout, weights, num_sequence, input_dim, future_context, idx, din);
} }
} }
mixv_batch_indices.CopyToCPU();
} }
}; };
} // namespace operators } // namespace operators
......
...@@ -71,7 +71,8 @@ class SequenceEnumerateOpCUDAKernel : public framework::OpKernel<T> { ...@@ -71,7 +71,8 @@ class SequenceEnumerateOpCUDAKernel : public framework::OpKernel<T> {
out->Resize({in_dims[0], win_size}); out->Resize({in_dims[0], win_size});
auto out_data = out->mutable_data<T>(context.GetPlace()); auto out_data = out->mutable_data<T>(context.GetPlace());
// Copy LoD to GPU // Copy LoD to GPU
const size_t* dev_in_lod_ptr = lod0.CUDAData(context.GetPlace()); paddle::framework::MixVector<size_t> mixv_lod0(&lod0);
const size_t* dev_in_lod_ptr = mixv_lod0.CUDAData(context.GetPlace());
// Calc output tensor // Calc output tensor
CalcOutPut<<<(in_len - 1) / PADDLE_CUDA_NUM_THREADS + 1, CalcOutPut<<<(in_len - 1) / PADDLE_CUDA_NUM_THREADS + 1,
PADDLE_CUDA_NUM_THREADS, 0, stream>>>( PADDLE_CUDA_NUM_THREADS, 0, stream>>>(
......
...@@ -88,7 +88,8 @@ class SequenceEraseOpCUDAKernel : public framework::OpKernel<T> { ...@@ -88,7 +88,8 @@ class SequenceEraseOpCUDAKernel : public framework::OpKernel<T> {
// Copy LoD to GPU // Copy LoD to GPU
auto last_lod = lod[lod.size() - 1]; auto last_lod = lod[lod.size() - 1];
auto lod_len = last_lod.size(); auto lod_len = last_lod.size();
const size_t* dev_in_lod_ptr = last_lod.CUDAData(ctx.GetPlace()); paddle::framework::MixVector<size_t> mixv_last_lod(&last_lod);
const size_t* dev_in_lod_ptr = mixv_last_lod.CUDAData(ctx.GetPlace());
// Calc output LoD // Calc output LoD
thrust::device_vector<size_t> dev_out_lod(lod_len); thrust::device_vector<size_t> dev_out_lod(lod_len);
size_t* dev_out_lod_ptr = thrust::raw_pointer_cast(dev_out_lod.data()); size_t* dev_out_lod_ptr = thrust::raw_pointer_cast(dev_out_lod.data());
......
...@@ -81,8 +81,9 @@ struct SequenceExpandAsFunctor<platform::CUDADeviceContext, T> { ...@@ -81,8 +81,9 @@ struct SequenceExpandAsFunctor<platform::CUDADeviceContext, T> {
dim3 block_size(thread_x); dim3 block_size(thread_x);
dim3 grid_size(block_x); dim3 grid_size(block_x);
paddle::framework::MixVector<size_t> mixv_ref_lod(&ref_lod);
sequence_expand_as_kernel<<<grid_size, block_size, 0, context.stream()>>>( sequence_expand_as_kernel<<<grid_size, block_size, 0, context.stream()>>>(
x.data<T>(), ref_lod.CUDAData(context.GetPlace()), height, width, x.data<T>(), mixv_ref_lod.CUDAData(context.GetPlace()), height, width,
out->mutable_data<T>(context.GetPlace())); out->mutable_data<T>(context.GetPlace()));
} }
}; };
...@@ -107,10 +108,11 @@ struct SequenceExpandAsGradFunctor<platform::CUDADeviceContext, T> { ...@@ -107,10 +108,11 @@ struct SequenceExpandAsGradFunctor<platform::CUDADeviceContext, T> {
dim3 block_size(thread_x); dim3 block_size(thread_x);
dim3 grid_size(block_x); dim3 grid_size(block_x);
paddle::framework::MixVector<size_t> mixv_ref_lod(&ref_lod);
sequence_expand_as_grad_kernel<<<grid_size, block_size, 0, sequence_expand_as_grad_kernel<<<grid_size, block_size, 0,
context.stream()>>>( context.stream()>>>(
dout.data<T>(), ref_lod.CUDAData(context.GetPlace()), height, width, dout.data<T>(), mixv_ref_lod.CUDAData(context.GetPlace()), height,
dx->mutable_data<T>(context.GetPlace())); width, dx->mutable_data<T>(context.GetPlace()));
} }
}; };
......
...@@ -157,7 +157,9 @@ struct SequenceExpandFunctor<platform::CUDADeviceContext, T> { ...@@ -157,7 +157,9 @@ struct SequenceExpandFunctor<platform::CUDADeviceContext, T> {
out_offset[2 * x_lod_size + i] = ref_lod[i]; out_offset[2 * x_lod_size + i] = ref_lod[i];
} }
const size_t* out_offset_data = out_offset.CUDAData(context.GetPlace()); paddle::framework::MixVector<size_t> mixv_out_offset(&out_offset);
const size_t* out_offset_data =
mixv_out_offset.CUDAData(context.GetPlace());
const size_t* x_lod_data = out_offset_data + x_lod_size; const size_t* x_lod_data = out_offset_data + x_lod_size;
const size_t* ref_lod_data = out_offset_data + 2 * x_lod_size; const size_t* ref_lod_data = out_offset_data + 2 * x_lod_size;
...@@ -193,11 +195,14 @@ struct SequenceExpandGradFunctor<platform::CUDADeviceContext, T> { ...@@ -193,11 +195,14 @@ struct SequenceExpandGradFunctor<platform::CUDADeviceContext, T> {
int block_x = static_cast<int>(ref_lod.size()); int block_x = static_cast<int>(ref_lod.size());
dim3 block_size(thread_x, thread_y, thread_z); dim3 block_size(thread_x, thread_y, thread_z);
dim3 grid_size(block_x, 1); dim3 grid_size(block_x, 1);
paddle::framework::MixVector<size_t> mixv_ref_lod(&ref_lod);
paddle::framework::MixVector<size_t> mixv_x_lod(&x_lod);
paddle::framework::MixVector<size_t> mixv_out_offset(&out_offset);
sequence_expand_grad_kernel<<<grid_size, block_size, 0, context.stream()>>>( sequence_expand_grad_kernel<<<grid_size, block_size, 0, context.stream()>>>(
dout.data<T>(), ref_lod.CUDAData(context.GetPlace()), dout.data<T>(), mixv_ref_lod.CUDAData(context.GetPlace()),
x_lod.CUDAData(context.GetPlace()), mixv_x_lod.CUDAData(context.GetPlace()),
out_offset.CUDAData(context.GetPlace()), ref_lod.size(), x_item_length, mixv_out_offset.CUDAData(context.GetPlace()), ref_lod.size(),
dx->mutable_data<T>(context.GetPlace())); x_item_length, dx->mutable_data<T>(context.GetPlace()));
} }
}; };
......
...@@ -132,7 +132,9 @@ class SequenceReverseOpKernel : public framework::OpKernel<T> { ...@@ -132,7 +132,9 @@ class SequenceReverseOpKernel : public framework::OpKernel<T> {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if (platform::is_gpu_place(ctx.GetPlace())) { if (platform::is_gpu_place(ctx.GetPlace())) {
lod = x.lod()[0].CUDAData(ctx.GetPlace()); auto xlod = x.lod()[0];
paddle::framework::MixVector<size_t> mixv_xlod(&xlod);
lod = mixv_xlod.CUDAData(ctx.GetPlace());
} else { } else {
#endif #endif
lod = x.lod()[0].data(); lod = x.lod()[0].data();
......
...@@ -133,9 +133,10 @@ struct SequenceSoftmaxFunctor<platform::CUDADeviceContext, T> { ...@@ -133,9 +133,10 @@ struct SequenceSoftmaxFunctor<platform::CUDADeviceContext, T> {
dim3 block_size(thread_x); dim3 block_size(thread_x);
dim3 grid_size(max_blocks); dim3 grid_size(max_blocks);
paddle::framework::MixVector<size_t> mixv_ref_lod(&ref_lod);
sequence_softmax_kernel< sequence_softmax_kernel<
T, kThreadsPerBlock><<<grid_size, block_size, 0, context.stream()>>>( T, kThreadsPerBlock><<<grid_size, block_size, 0, context.stream()>>>(
x.data<T>(), ref_lod.CUDAData(context.GetPlace()), height, x.data<T>(), mixv_ref_lod.CUDAData(context.GetPlace()), height,
out->mutable_data<T>(context.GetPlace())); out->mutable_data<T>(context.GetPlace()));
} }
}; };
...@@ -156,10 +157,12 @@ struct SequenceSoftmaxGradFunctor<platform::CUDADeviceContext, T> { ...@@ -156,10 +157,12 @@ struct SequenceSoftmaxGradFunctor<platform::CUDADeviceContext, T> {
dim3 block_size(thread_x); dim3 block_size(thread_x);
dim3 grid_size(max_blocks); dim3 grid_size(max_blocks);
paddle::framework::MixVector<size_t> mixv_ref_lod(&ref_lod);
sequence_softmax_grad_kernel< sequence_softmax_grad_kernel<
T, kThreadsPerBlock><<<grid_size, block_size, 0, context.stream()>>>( T, kThreadsPerBlock><<<grid_size, block_size, 0, context.stream()>>>(
dout.data<T>(), out.data<T>(), ref_lod.CUDAData(context.GetPlace()), dout.data<T>(), out.data<T>(),
height, dx->mutable_data<T>(context.GetPlace())); mixv_ref_lod.CUDAData(context.GetPlace()), height,
dx->mutable_data<T>(context.GetPlace()));
} }
}; };
......
...@@ -292,7 +292,7 @@ namespace paddle { ...@@ -292,7 +292,7 @@ namespace paddle {
paddle::experimental::complex128, \ paddle::experimental::complex128, \
__VA_ARGS__) \ __VA_ARGS__) \
default: \ default: \
PADDLE_THROW(paddle::platform::errors::InvalidArgument( \ PADDLE_THROW(phi::errors::InvalidArgument( \
"Invalid enum data type `%d`.", static_cast<int>(__dtype__))); \ "Invalid enum data type `%d`.", static_cast<int>(__dtype__))); \
} \ } \
}() }()
......
...@@ -19,7 +19,7 @@ namespace experimental { ...@@ -19,7 +19,7 @@ namespace experimental {
ExternalStorage::ExternalStorage(void* ptr, ExternalStorage::ExternalStorage(void* ptr,
size_t size, size_t size,
const paddle::platform::Place& place) const phi::Place& place)
: phi::Storage(std::make_shared<phi::Allocation>(ptr, size, place)), : phi::Storage(std::make_shared<phi::Allocation>(ptr, size, place)),
size_(size) {} size_(size) {}
...@@ -29,11 +29,11 @@ ExternalStorage::ExternalStorage(const phi::intrusive_ptr<phi::Storage>& root, ...@@ -29,11 +29,11 @@ ExternalStorage::ExternalStorage(const phi::intrusive_ptr<phi::Storage>& root,
: Storage(std::make_shared<phi::Allocation>( : Storage(std::make_shared<phi::Allocation>(
static_cast<uint8_t*>(root->data()) + delta, size, root->place())), static_cast<uint8_t*>(root->data()) + delta, size, root->place())),
size_(size) { size_(size) {
PADDLE_ENFORCE_LE(static_cast<size_t>(delta + size), PADDLE_ENFORCE_LE(
root->size(), static_cast<size_t>(delta + size),
paddle::platform::errors::InvalidArgument( root->size(),
"The size of the external storage does " phi::errors::InvalidArgument("The size of the external storage does "
"not meet the metadata requirements.")); "not meet the metadata requirements."));
} }
} // namespace experimental } // namespace experimental
......
...@@ -30,7 +30,7 @@ class ExternalStorage : public phi::Storage { ...@@ -30,7 +30,7 @@ class ExternalStorage : public phi::Storage {
static const char* name() { return "ExternalStorage"; } static const char* name() { return "ExternalStorage"; }
void Realloc(size_t n) override { void Realloc(size_t n) override {
PADDLE_THROW(paddle::platform::errors::Unavailable( PADDLE_THROW(phi::errors::Unavailable(
"The external shared storage cannot be reallocated.")); "The external shared storage cannot be reallocated."));
} }
...@@ -55,7 +55,7 @@ class ExternalStorage : public phi::Storage { ...@@ -55,7 +55,7 @@ class ExternalStorage : public phi::Storage {
const phi::Place& place() const override { const phi::Place& place() const override {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
data_, data_,
paddle::platform::errors::Unavailable( phi::errors::Unavailable(
"Unable to visit place as data_ has not been initialized yet.")); "Unable to visit place as data_ has not been initialized yet."));
return data_->place(); return data_->place();
} }
......
...@@ -54,7 +54,7 @@ bool HasCUDNN() { ...@@ -54,7 +54,7 @@ bool HasCUDNN() {
void EnforceCUDNNLoaded(const char* fn_name) { void EnforceCUDNNLoaded(const char* fn_name) {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
cudnn_dso_handle, cudnn_dso_handle,
paddle::platform::errors::PreconditionNotMet( phi::errors::PreconditionNotMet(
"Cannot load cudnn shared library. Cannot invoke method %s.", "Cannot load cudnn shared library. Cannot invoke method %s.",
fn_name)); fn_name));
} }
......
...@@ -33,7 +33,7 @@ bool HasCUFFT() { ...@@ -33,7 +33,7 @@ bool HasCUFFT() {
void EnforceCUFFTLoaded(const char* fn_name) { void EnforceCUFFTLoaded(const char* fn_name) {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
cufft_dso_handle, cufft_dso_handle,
paddle::platform::errors::PreconditionNotMet( phi::errors::PreconditionNotMet(
"Cannot load cufft shared library. Cannot invoke method %s.", "Cannot load cufft shared library. Cannot invoke method %s.",
fn_name)); fn_name));
} }
......
...@@ -24,7 +24,7 @@ limitations under the License. */ ...@@ -24,7 +24,7 @@ limitations under the License. */
#include <windows.h> #include <windows.h>
#endif #endif
// TODO(wilber): The pten computing library requires a component to manage flags // TODO(wilber): The phi computing library requires a component to manage flags
// (maybe not use gflags). // (maybe not use gflags).
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "glog/logging.h" #include "glog/logging.h"
...@@ -299,8 +299,8 @@ static inline void* GetDsoHandleFromSearchPath( ...@@ -299,8 +299,8 @@ static inline void* GetDsoHandleFromSearchPath(
#endif // !_WIN32 #endif // !_WIN32
if (throw_on_error) { if (throw_on_error) {
// NOTE: Special error report case, no need to change its format // NOTE: Special error report case, no need to change its format
PADDLE_THROW(paddle::platform::errors::PreconditionNotMet( PADDLE_THROW(
error_msg, dso_name, errorno)); phi::errors::PreconditionNotMet(error_msg, dso_name, errorno));
} else { } else {
LOG(WARNING) << paddle::string::Sprintf(error_msg, dso_name, errorno); LOG(WARNING) << paddle::string::Sprintf(error_msg, dso_name, errorno);
} }
...@@ -547,14 +547,11 @@ void* GetOpDsoHandle(const std::string& dso_name) { ...@@ -547,14 +547,11 @@ void* GetOpDsoHandle(const std::string& dso_name) {
void* GetNvtxDsoHandle() { void* GetNvtxDsoHandle() {
#if defined(__APPLE__) || defined(__OSX__) #if defined(__APPLE__) || defined(__OSX__)
PADDLE_THROW( PADDLE_THROW(phi::errors::Unimplemented("Nvtx do not support Apple."));
paddle::platform::errors::Unimplemented("Nvtx do not support Apple."));
#elif defined(_WIN32) #elif defined(_WIN32)
PADDLE_THROW( PADDLE_THROW(phi::errors::Unimplemented("Nvtx do not support Windows."));
paddle::platform::errors::Unimplemented("Nvtx do not support Windows."));
#elif !defined(PADDLE_WITH_CUDA) #elif !defined(PADDLE_WITH_CUDA)
PADDLE_THROW(paddle::platform::errors::Unimplemented( PADDLE_THROW(phi::errors::Unimplemented("Nvtx do not support without CUDA."));
"Nvtx do not support without CUDA."));
#else #else
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libnvToolsExt.so"); return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libnvToolsExt.so");
#endif #endif
......
...@@ -58,7 +58,7 @@ bool HasCUDNN() { ...@@ -58,7 +58,7 @@ bool HasCUDNN() {
void EnforceCUDNNLoaded(const char* fn_name) { void EnforceCUDNNLoaded(const char* fn_name) {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
miopen_dso_handle, miopen_dso_handle,
paddle::platform::errors::PreconditionNotMet( phi::errors::PreconditionNotMet(
"Cannot load miopen shared library. Cannot invoke method %s.", "Cannot load miopen shared library. Cannot invoke method %s.",
fn_name)); fn_name));
} }
......
...@@ -54,21 +54,21 @@ extern void* tensorrt_plugin_dso_handle; ...@@ -54,21 +54,21 @@ extern void* tensorrt_plugin_dso_handle;
}; \ }; \
extern DynLoad__##__name __name extern DynLoad__##__name __name
#define DECLARE_DYNAMIC_LOAD_TENSORRT_NON_POINTER_WRAP(__name) \ #define DECLARE_DYNAMIC_LOAD_TENSORRT_NON_POINTER_WRAP(__name) \
struct DynLoad__##__name { \ struct DynLoad__##__name { \
template <typename... Args> \ template <typename... Args> \
auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \ auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \
std::call_once(tensorrt_dso_flag, []() { \ std::call_once(tensorrt_dso_flag, []() { \
tensorrt_dso_handle = phi::dynload::GetTensorRtHandle(); \ tensorrt_dso_handle = phi::dynload::GetTensorRtHandle(); \
}); \ }); \
static void* p_##__name = dlsym(tensorrt_dso_handle, #__name); \ static void* p_##__name = dlsym(tensorrt_dso_handle, #__name); \
PADDLE_ENFORCE_NOT_NULL(p_##__name, \ PADDLE_ENFORCE_NOT_NULL( \
paddle::platform::errors::Unavailable( \ p_##__name, \
"Load tensorrt api %s failed", #__name)); \ phi::errors::Unavailable("Load tensorrt api %s failed", #__name)); \
using tensorrt_func = decltype(&::__name); \ using tensorrt_func = decltype(&::__name); \
return reinterpret_cast<tensorrt_func>(p_##__name)(args...); \ return reinterpret_cast<tensorrt_func>(p_##__name)(args...); \
} \ } \
}; \ }; \
extern DynLoad__##__name __name extern DynLoad__##__name __name
#define DECLARE_DYNAMIC_LOAD_TENSORRT_PLUGIN_WRAP(__name) \ #define DECLARE_DYNAMIC_LOAD_TENSORRT_PLUGIN_WRAP(__name) \
...@@ -80,7 +80,7 @@ extern void* tensorrt_plugin_dso_handle; ...@@ -80,7 +80,7 @@ extern void* tensorrt_plugin_dso_handle;
}); \ }); \
static void* p_##__name = dlsym(tensorrt_plugin_dso_handle, #__name); \ static void* p_##__name = dlsym(tensorrt_plugin_dso_handle, #__name); \
PADDLE_ENFORCE_NOT_NULL(p_##__name, \ PADDLE_ENFORCE_NOT_NULL(p_##__name, \
paddle::platform::errors::Unavailable( \ phi::errors::Unavailable( \
"Load tensorrt plugin %s failed", #__name)); \ "Load tensorrt plugin %s failed", #__name)); \
using tensorrt_plugin_func = decltype(&::__name); \ using tensorrt_plugin_func = decltype(&::__name); \
return reinterpret_cast<tensorrt_plugin_func>(p_##__name)(args...); \ return reinterpret_cast<tensorrt_plugin_func>(p_##__name)(args...); \
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
#include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_info.h"
// TODO(pten): remove fluid headers. // TODO(phi): remove fluid headers.
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
static std::once_flag g_device_props_size_init_flag; static std::once_flag g_device_props_size_init_flag;
...@@ -74,13 +74,13 @@ int GetGPUDeviceCount() { ...@@ -74,13 +74,13 @@ int GetGPUDeviceCount() {
} }
int GetGPUComputeCapability(int id) { int GetGPUComputeCapability(int id) {
PADDLE_ENFORCE_LT(id, PADDLE_ENFORCE_LT(
GetGPUDeviceCount(), id,
paddle::platform::errors::InvalidArgument( GetGPUDeviceCount(),
"Device id must be less than GPU count, " phi::errors::InvalidArgument("Device id must be less than GPU count, "
"but received id is: %d. GPU count is: %d.", "but received id is: %d. GPU count is: %d.",
id, id,
GetGPUDeviceCount())); GetGPUDeviceCount()));
int major, minor; int major, minor;
auto major_error_code = auto major_error_code =
cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, id); cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, id);
...@@ -93,26 +93,26 @@ int GetGPUComputeCapability(int id) { ...@@ -93,26 +93,26 @@ int GetGPUComputeCapability(int id) {
} }
int GetGPURuntimeVersion(int id) { int GetGPURuntimeVersion(int id) {
PADDLE_ENFORCE_LT(id, PADDLE_ENFORCE_LT(
GetGPUDeviceCount(), id,
paddle::platform::errors::InvalidArgument( GetGPUDeviceCount(),
"Device id must be less than GPU count, " phi::errors::InvalidArgument("Device id must be less than GPU count, "
"but received id is: %d. GPU count is: %d.", "but received id is: %d. GPU count is: %d.",
id, id,
GetGPUDeviceCount())); GetGPUDeviceCount()));
int runtime_version = 0; int runtime_version = 0;
PADDLE_ENFORCE_GPU_SUCCESS(cudaRuntimeGetVersion(&runtime_version)); PADDLE_ENFORCE_GPU_SUCCESS(cudaRuntimeGetVersion(&runtime_version));
return runtime_version; return runtime_version;
} }
int GetGPUDriverVersion(int id) { int GetGPUDriverVersion(int id) {
PADDLE_ENFORCE_LT(id, PADDLE_ENFORCE_LT(
GetGPUDeviceCount(), id,
paddle::platform::errors::InvalidArgument( GetGPUDeviceCount(),
"Device id must be less than GPU count, " phi::errors::InvalidArgument("Device id must be less than GPU count, "
"but received id is: %d. GPU count is: %d.", "but received id is: %d. GPU count is: %d.",
id, id,
GetGPUDeviceCount())); GetGPUDeviceCount()));
int driver_version = 0; int driver_version = 0;
PADDLE_ENFORCE_GPU_SUCCESS(cudaDriverGetVersion(&driver_version)); PADDLE_ENFORCE_GPU_SUCCESS(cudaDriverGetVersion(&driver_version));
return driver_version; return driver_version;
...@@ -125,13 +125,13 @@ bool TensorCoreAvailable() { ...@@ -125,13 +125,13 @@ bool TensorCoreAvailable() {
} }
int GetGPUMultiProcessors(int id) { int GetGPUMultiProcessors(int id) {
PADDLE_ENFORCE_LT(id, PADDLE_ENFORCE_LT(
GetGPUDeviceCount(), id,
paddle::platform::errors::InvalidArgument( GetGPUDeviceCount(),
"Device id must be less than GPU count, " phi::errors::InvalidArgument("Device id must be less than GPU count, "
"but received id is: %d. GPU count is: %d.", "but received id is: %d. GPU count is: %d.",
id, id,
GetGPUDeviceCount())); GetGPUDeviceCount()));
int count; int count;
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
cudaDeviceGetAttribute(&count, cudaDevAttrMultiProcessorCount, id)); cudaDeviceGetAttribute(&count, cudaDevAttrMultiProcessorCount, id));
...@@ -139,13 +139,13 @@ int GetGPUMultiProcessors(int id) { ...@@ -139,13 +139,13 @@ int GetGPUMultiProcessors(int id) {
} }
int GetGPUMaxThreadsPerMultiProcessor(int id) { int GetGPUMaxThreadsPerMultiProcessor(int id) {
PADDLE_ENFORCE_LT(id, PADDLE_ENFORCE_LT(
GetGPUDeviceCount(), id,
paddle::platform::errors::InvalidArgument( GetGPUDeviceCount(),
"Device id must be less than GPU count, " phi::errors::InvalidArgument("Device id must be less than GPU count, "
"but received id is: %d. GPU count is: %d.", "but received id is: %d. GPU count is: %d.",
id, id,
GetGPUDeviceCount())); GetGPUDeviceCount()));
int count; int count;
PADDLE_ENFORCE_GPU_SUCCESS(cudaDeviceGetAttribute( PADDLE_ENFORCE_GPU_SUCCESS(cudaDeviceGetAttribute(
&count, cudaDevAttrMaxThreadsPerMultiProcessor, id)); &count, cudaDevAttrMaxThreadsPerMultiProcessor, id));
...@@ -154,13 +154,13 @@ int GetGPUMaxThreadsPerMultiProcessor(int id) { ...@@ -154,13 +154,13 @@ int GetGPUMaxThreadsPerMultiProcessor(int id) {
} }
int GetGPUMaxThreadsPerBlock(int id) { int GetGPUMaxThreadsPerBlock(int id) {
PADDLE_ENFORCE_LT(id, PADDLE_ENFORCE_LT(
GetGPUDeviceCount(), id,
paddle::platform::errors::InvalidArgument( GetGPUDeviceCount(),
"Device id must be less than GPU count, " phi::errors::InvalidArgument("Device id must be less than GPU count, "
"but received id is: %d. GPU count is: %d.", "but received id is: %d. GPU count is: %d.",
id, id,
GetGPUDeviceCount())); GetGPUDeviceCount()));
int count; int count;
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
cudaDeviceGetAttribute(&count, cudaDevAttrMaxThreadsPerBlock, id)); cudaDeviceGetAttribute(&count, cudaDevAttrMaxThreadsPerBlock, id));
...@@ -174,13 +174,13 @@ int GetCurrentDeviceId() { ...@@ -174,13 +174,13 @@ int GetCurrentDeviceId() {
} }
std::array<int, 3> GetGpuMaxGridDimSize(int id) { std::array<int, 3> GetGpuMaxGridDimSize(int id) {
PADDLE_ENFORCE_LT(id, PADDLE_ENFORCE_LT(
GetGPUDeviceCount(), id,
paddle::platform::errors::InvalidArgument( GetGPUDeviceCount(),
"Device id must be less than GPU count, " phi::errors::InvalidArgument("Device id must be less than GPU count, "
"but received id is: %d. GPU count is: %d.", "but received id is: %d. GPU count is: %d.",
id, id,
GetGPUDeviceCount())); GetGPUDeviceCount()));
std::array<int, 3> ret; std::array<int, 3> ret;
int size; int size;
auto error_code_x = cudaDeviceGetAttribute(&size, cudaDevAttrMaxGridDimX, id); auto error_code_x = cudaDeviceGetAttribute(&size, cudaDevAttrMaxGridDimX, id);
...@@ -213,7 +213,7 @@ const gpuDeviceProp &GetDeviceProperties(int id) { ...@@ -213,7 +213,7 @@ const gpuDeviceProp &GetDeviceProperties(int id) {
} }
if (id < 0 || id >= static_cast<int>(g_device_props.size())) { if (id < 0 || id >= static_cast<int>(g_device_props.size())) {
PADDLE_THROW(paddle::platform::errors::OutOfRange( PADDLE_THROW(phi::errors::OutOfRange(
"The device id %d is out of range [0, %d), where %d is the number of " "The device id %d is out of range [0, %d), where %d is the number of "
"devices on this machine. Because the device id should be greater than " "devices on this machine. Because the device id should be greater than "
"or equal to zero and smaller than the number of gpus. Please input " "or equal to zero and smaller than the number of gpus. Please input "
...@@ -233,13 +233,13 @@ const gpuDeviceProp &GetDeviceProperties(int id) { ...@@ -233,13 +233,13 @@ const gpuDeviceProp &GetDeviceProperties(int id) {
void SetDeviceId(int id) { void SetDeviceId(int id) {
// TODO(qijun): find a better way to cache the cuda device count // TODO(qijun): find a better way to cache the cuda device count
PADDLE_ENFORCE_LT(id, PADDLE_ENFORCE_LT(
GetGPUDeviceCount(), id,
paddle::platform::errors::InvalidArgument( GetGPUDeviceCount(),
"Device id must be less than GPU count, " phi::errors::InvalidArgument("Device id must be less than GPU count, "
"but received id is: %d. GPU count is: %d.", "but received id is: %d. GPU count is: %d.",
id, id,
GetGPUDeviceCount())); GetGPUDeviceCount()));
PADDLE_RETRY_CUDA_SUCCESS(cudaSetDevice(id)); PADDLE_RETRY_CUDA_SUCCESS(cudaSetDevice(id));
} }
...@@ -294,13 +294,13 @@ gpuError_t GpuGetLastError() { return cudaGetLastError(); } ...@@ -294,13 +294,13 @@ gpuError_t GpuGetLastError() { return cudaGetLastError(); }
// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#um-requirements // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#um-requirements
// for more detail about managed memory requirements // for more detail about managed memory requirements
bool IsGPUManagedMemorySupported(int dev_id) { bool IsGPUManagedMemorySupported(int dev_id) {
PADDLE_ENFORCE_LT(dev_id, PADDLE_ENFORCE_LT(
GetGPUDeviceCount(), dev_id,
paddle::platform::errors::InvalidArgument( GetGPUDeviceCount(),
"Device id must be less than GPU count, " phi::errors::InvalidArgument("Device id must be less than GPU count, "
"but received id is: %d. GPU count is: %d.", "but received id is: %d. GPU count is: %d.",
dev_id, dev_id,
GetGPUDeviceCount())); GetGPUDeviceCount()));
#if defined(__linux__) || defined(_WIN32) #if defined(__linux__) || defined(_WIN32)
int ManagedMemoryAttr; int ManagedMemoryAttr;
PADDLE_ENFORCE_GPU_SUCCESS(cudaDeviceGetAttribute( PADDLE_ENFORCE_GPU_SUCCESS(cudaDeviceGetAttribute(
...@@ -312,13 +312,13 @@ bool IsGPUManagedMemorySupported(int dev_id) { ...@@ -312,13 +312,13 @@ bool IsGPUManagedMemorySupported(int dev_id) {
} }
bool IsGPUManagedMemoryOversubscriptionSupported(int dev_id) { bool IsGPUManagedMemoryOversubscriptionSupported(int dev_id) {
PADDLE_ENFORCE_LT(dev_id, PADDLE_ENFORCE_LT(
GetGPUDeviceCount(), dev_id,
paddle::platform::errors::InvalidArgument( GetGPUDeviceCount(),
"Device id must be less than GPU count, " phi::errors::InvalidArgument("Device id must be less than GPU count, "
"but received id is: %d. GPU count is: %d.", "but received id is: %d. GPU count is: %d.",
dev_id, dev_id,
GetGPUDeviceCount())); GetGPUDeviceCount()));
#ifdef __linux__ #ifdef __linux__
return IsGPUManagedMemorySupported(dev_id) && return IsGPUManagedMemorySupported(dev_id) &&
GetGPUComputeCapability(dev_id) >= 60; GetGPUComputeCapability(dev_id) >= 60;
......
...@@ -100,12 +100,12 @@ struct GpuLaunchConfig { ...@@ -100,12 +100,12 @@ struct GpuLaunchConfig {
inline GpuLaunchConfig GetGpuLaunchConfig1D(const phi::GPUContext& context, inline GpuLaunchConfig GetGpuLaunchConfig1D(const phi::GPUContext& context,
int64_t numel, int64_t numel,
int vec_size = 1) { int vec_size = 1) {
PADDLE_ENFORCE_GT(numel, PADDLE_ENFORCE_GT(
0, numel,
paddle::platform::errors::InvalidArgument( 0,
"element quantity should be greater than 0," phi::errors::InvalidArgument("element quantity should be greater than 0,"
" but received value is: %d.", " but received value is: %d.",
numel)); numel));
// Get compute_capability // Get compute_capability
const int capability = context.GetComputeCapability(); const int capability = context.GetComputeCapability();
/* If thread number per block is 64/128/256/512, cuda performs better.*/ /* If thread number per block is 64/128/256/512, cuda performs better.*/
...@@ -142,18 +142,18 @@ inline GpuLaunchConfig GetGpuLaunchConfig1D(const phi::GPUContext& context, ...@@ -142,18 +142,18 @@ inline GpuLaunchConfig GetGpuLaunchConfig1D(const phi::GPUContext& context,
inline GpuLaunchConfig GetGpuLaunchConfig2D(const phi::GPUContext& context, inline GpuLaunchConfig GetGpuLaunchConfig2D(const phi::GPUContext& context,
int x_dim, int x_dim,
int y_dim) { int y_dim) {
PADDLE_ENFORCE_GT(x_dim, PADDLE_ENFORCE_GT(
0, x_dim,
paddle::platform::errors::InvalidArgument( 0,
"x dim number should greater than 0," phi::errors::InvalidArgument("x dim number should greater than 0,"
" but received value is: %d", " but received value is: %d",
x_dim)); x_dim));
PADDLE_ENFORCE_GT(y_dim, PADDLE_ENFORCE_GT(
0, y_dim,
paddle::platform::errors::InvalidArgument( 0,
"y dim number should greater than 0," phi::errors::InvalidArgument("y dim number should greater than 0,"
" but received value is: %d", " but received value is: %d",
y_dim)); y_dim));
const int kThreadsPerBlock = 256; const int kThreadsPerBlock = 256;
int block_cols = (std::min)(x_dim, kThreadsPerBlock); int block_cols = (std::min)(x_dim, kThreadsPerBlock);
......
...@@ -78,13 +78,13 @@ int GetGPUDeviceCount() { ...@@ -78,13 +78,13 @@ int GetGPUDeviceCount() {
} }
int GetGPUComputeCapability(int id) { int GetGPUComputeCapability(int id) {
PADDLE_ENFORCE_LT(id, PADDLE_ENFORCE_LT(
GetGPUDeviceCount(), id,
paddle::platform::errors::InvalidArgument( GetGPUDeviceCount(),
"Device id must be less than GPU count, " phi::errors::InvalidArgument("Device id must be less than GPU count, "
"but received id is: %d. GPU count is: %d.", "but received id is: %d. GPU count is: %d.",
id, id,
GetGPUDeviceCount())); GetGPUDeviceCount()));
int major, minor; int major, minor;
auto major_error_code = hipDeviceGetAttribute( auto major_error_code = hipDeviceGetAttribute(
&major, hipDeviceAttributeComputeCapabilityMajor, id); &major, hipDeviceAttributeComputeCapabilityMajor, id);
...@@ -97,26 +97,26 @@ int GetGPUComputeCapability(int id) { ...@@ -97,26 +97,26 @@ int GetGPUComputeCapability(int id) {
} }
int GetGPURuntimeVersion(int id) { int GetGPURuntimeVersion(int id) {
PADDLE_ENFORCE_LT(id, PADDLE_ENFORCE_LT(
GetGPUDeviceCount(), id,
paddle::platform::errors::InvalidArgument( GetGPUDeviceCount(),
"Device id must be less than GPU count, " phi::errors::InvalidArgument("Device id must be less than GPU count, "
"but received id is: %d. GPU count is: %d.", "but received id is: %d. GPU count is: %d.",
id, id,
GetGPUDeviceCount())); GetGPUDeviceCount()));
int runtime_version = 0; int runtime_version = 0;
PADDLE_ENFORCE_GPU_SUCCESS(hipRuntimeGetVersion(&runtime_version)); PADDLE_ENFORCE_GPU_SUCCESS(hipRuntimeGetVersion(&runtime_version));
return runtime_version; return runtime_version;
} }
int GetGPUDriverVersion(int id) { int GetGPUDriverVersion(int id) {
PADDLE_ENFORCE_LT(id, PADDLE_ENFORCE_LT(
GetGPUDeviceCount(), id,
paddle::platform::errors::InvalidArgument( GetGPUDeviceCount(),
"Device id must be less than GPU count, " phi::errors::InvalidArgument("Device id must be less than GPU count, "
"but received id is: %d. GPU count is: %d.", "but received id is: %d. GPU count is: %d.",
id, id,
GetGPUDeviceCount())); GetGPUDeviceCount()));
int driver_version = 0; int driver_version = 0;
PADDLE_ENFORCE_GPU_SUCCESS(hipDriverGetVersion(&driver_version)); PADDLE_ENFORCE_GPU_SUCCESS(hipDriverGetVersion(&driver_version));
return driver_version; return driver_version;
...@@ -125,13 +125,13 @@ int GetGPUDriverVersion(int id) { ...@@ -125,13 +125,13 @@ int GetGPUDriverVersion(int id) {
bool TensorCoreAvailable() { return false; } bool TensorCoreAvailable() { return false; }
int GetGPUMultiProcessors(int id) { int GetGPUMultiProcessors(int id) {
PADDLE_ENFORCE_LT(id, PADDLE_ENFORCE_LT(
GetGPUDeviceCount(), id,
paddle::platform::errors::InvalidArgument( GetGPUDeviceCount(),
"Device id must be less than GPU count, " phi::errors::InvalidArgument("Device id must be less than GPU count, "
"but received id is: %d. GPU count is: %d.", "but received id is: %d. GPU count is: %d.",
id, id,
GetGPUDeviceCount())); GetGPUDeviceCount()));
int count; int count;
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
hipDeviceGetAttribute(&count, hipDeviceAttributeMultiprocessorCount, id)); hipDeviceGetAttribute(&count, hipDeviceAttributeMultiprocessorCount, id));
...@@ -139,13 +139,13 @@ int GetGPUMultiProcessors(int id) { ...@@ -139,13 +139,13 @@ int GetGPUMultiProcessors(int id) {
} }
int GetGPUMaxThreadsPerMultiProcessor(int id) { int GetGPUMaxThreadsPerMultiProcessor(int id) {
PADDLE_ENFORCE_LT(id, PADDLE_ENFORCE_LT(
GetGPUDeviceCount(), id,
paddle::platform::errors::InvalidArgument( GetGPUDeviceCount(),
"Device id must be less than GPU count, " phi::errors::InvalidArgument("Device id must be less than GPU count, "
"but received id is: %d. GPU count is: %d.", "but received id is: %d. GPU count is: %d.",
id, id,
GetGPUDeviceCount())); GetGPUDeviceCount()));
int count; int count;
PADDLE_ENFORCE_GPU_SUCCESS(hipDeviceGetAttribute( PADDLE_ENFORCE_GPU_SUCCESS(hipDeviceGetAttribute(
&count, hipDeviceAttributeMaxThreadsPerMultiProcessor, id)); &count, hipDeviceAttributeMaxThreadsPerMultiProcessor, id));
...@@ -154,13 +154,13 @@ int GetGPUMaxThreadsPerMultiProcessor(int id) { ...@@ -154,13 +154,13 @@ int GetGPUMaxThreadsPerMultiProcessor(int id) {
} }
int GetGPUMaxThreadsPerBlock(int id) { int GetGPUMaxThreadsPerBlock(int id) {
PADDLE_ENFORCE_LT(id, PADDLE_ENFORCE_LT(
GetGPUDeviceCount(), id,
paddle::platform::errors::InvalidArgument( GetGPUDeviceCount(),
"Device id must be less than GPU count, " phi::errors::InvalidArgument("Device id must be less than GPU count, "
"but received id is: %d. GPU count is: %d.", "but received id is: %d. GPU count is: %d.",
id, id,
GetGPUDeviceCount())); GetGPUDeviceCount()));
int count; int count;
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
hipDeviceGetAttribute(&count, hipDeviceAttributeMaxThreadsPerBlock, id)); hipDeviceGetAttribute(&count, hipDeviceAttributeMaxThreadsPerBlock, id));
...@@ -174,13 +174,13 @@ int GetCurrentDeviceId() { ...@@ -174,13 +174,13 @@ int GetCurrentDeviceId() {
} }
std::array<int, 3> GetGpuMaxGridDimSize(int id) { std::array<int, 3> GetGpuMaxGridDimSize(int id) {
PADDLE_ENFORCE_LT(id, PADDLE_ENFORCE_LT(
GetGPUDeviceCount(), id,
paddle::platform::errors::InvalidArgument( GetGPUDeviceCount(),
"Device id must be less than GPU count, " phi::errors::InvalidArgument("Device id must be less than GPU count, "
"but received id is: %d. GPU count is: %d.", "but received id is: %d. GPU count is: %d.",
id, id,
GetGPUDeviceCount())); GetGPUDeviceCount()));
std::array<int, 3> ret; std::array<int, 3> ret;
int size; int size;
auto error_code_x = auto error_code_x =
...@@ -216,7 +216,7 @@ const gpuDeviceProp &GetDeviceProperties(int id) { ...@@ -216,7 +216,7 @@ const gpuDeviceProp &GetDeviceProperties(int id) {
} }
if (id < 0 || id >= static_cast<int>(g_device_props.size())) { if (id < 0 || id >= static_cast<int>(g_device_props.size())) {
PADDLE_THROW(paddle::platform::errors::OutOfRange( PADDLE_THROW(phi::errors::OutOfRange(
"The device id %d is out of range [0, %d), where %d is the number of " "The device id %d is out of range [0, %d), where %d is the number of "
"devices on this machine. Because the device id should be greater than " "devices on this machine. Because the device id should be greater than "
"or equal to zero and smaller than the number of gpus. Please input " "or equal to zero and smaller than the number of gpus. Please input "
...@@ -235,13 +235,13 @@ const gpuDeviceProp &GetDeviceProperties(int id) { ...@@ -235,13 +235,13 @@ const gpuDeviceProp &GetDeviceProperties(int id) {
void SetDeviceId(int id) { void SetDeviceId(int id) {
// TODO(qijun): find a better way to cache the cuda device count // TODO(qijun): find a better way to cache the cuda device count
PADDLE_ENFORCE_LT(id, PADDLE_ENFORCE_LT(
GetGPUDeviceCount(), id,
paddle::platform::errors::InvalidArgument( GetGPUDeviceCount(),
"Device id must be less than GPU count, " phi::errors::InvalidArgument("Device id must be less than GPU count, "
"but received id is: %d. GPU count is: %d.", "but received id is: %d. GPU count is: %d.",
id, id,
GetGPUDeviceCount())); GetGPUDeviceCount()));
PADDLE_RETRY_CUDA_SUCCESS(hipSetDevice(id)); PADDLE_RETRY_CUDA_SUCCESS(hipSetDevice(id));
} }
...@@ -293,13 +293,13 @@ void GpuDeviceSync() { PADDLE_ENFORCE_GPU_SUCCESS(hipDeviceSynchronize()); } ...@@ -293,13 +293,13 @@ void GpuDeviceSync() { PADDLE_ENFORCE_GPU_SUCCESS(hipDeviceSynchronize()); }
gpuError_t GpuGetLastError() { return hipGetLastError(); } gpuError_t GpuGetLastError() { return hipGetLastError(); }
bool IsGPUManagedMemorySupported(int dev_id) { bool IsGPUManagedMemorySupported(int dev_id) {
PADDLE_ENFORCE_LT(dev_id, PADDLE_ENFORCE_LT(
GetGPUDeviceCount(), dev_id,
paddle::platform::errors::InvalidArgument( GetGPUDeviceCount(),
"Device id must be less than GPU count, " phi::errors::InvalidArgument("Device id must be less than GPU count, "
"but received id is: %d. GPU count is: %d.", "but received id is: %d. GPU count is: %d.",
dev_id, dev_id,
GetGPUDeviceCount())); GetGPUDeviceCount()));
#if defined(__linux__) || defined(_WIN32) #if defined(__linux__) || defined(_WIN32)
int ManagedMemoryAttr; int ManagedMemoryAttr;
PADDLE_ENFORCE_GPU_SUCCESS(hipDeviceGetAttribute( PADDLE_ENFORCE_GPU_SUCCESS(hipDeviceGetAttribute(
...@@ -311,13 +311,13 @@ bool IsGPUManagedMemorySupported(int dev_id) { ...@@ -311,13 +311,13 @@ bool IsGPUManagedMemorySupported(int dev_id) {
} }
bool IsGPUManagedMemoryOversubscriptionSupported(int dev_id) { bool IsGPUManagedMemoryOversubscriptionSupported(int dev_id) {
PADDLE_ENFORCE_LT(dev_id, PADDLE_ENFORCE_LT(
GetGPUDeviceCount(), dev_id,
paddle::platform::errors::InvalidArgument( GetGPUDeviceCount(),
"Device id must be less than GPU count, " phi::errors::InvalidArgument("Device id must be less than GPU count, "
"but received id is: %d. GPU count is: %d.", "but received id is: %d. GPU count is: %d.",
dev_id, dev_id,
GetGPUDeviceCount())); GetGPUDeviceCount()));
#ifdef __linux__ #ifdef __linux__
return IsGPUManagedMemorySupported(dev_id) && return IsGPUManagedMemorySupported(dev_id) &&
GetGPUComputeCapability(dev_id) >= 60; GetGPUComputeCapability(dev_id) >= 60;
......
...@@ -173,7 +173,7 @@ DEFINE_EXTERNAL_API_TYPE(BKCLResult_t, BKCL_SUCCESS); ...@@ -173,7 +173,7 @@ DEFINE_EXTERNAL_API_TYPE(BKCLResult_t, BKCL_SUCCESS);
::phi::backends::xpu::details::ExternalApiType< \ ::phi::backends::xpu::details::ExternalApiType< \
__XPU_STATUS_TYPE__>::kSuccess; \ __XPU_STATUS_TYPE__>::kSuccess; \
if (UNLIKELY(__cond__ != __success_type__)) { \ if (UNLIKELY(__cond__ != __success_type__)) { \
auto __summary__ = paddle::platform::errors::External( \ auto __summary__ = phi::errors::External( \
::phi::backends::xpu::build_xpu_error_msg(__cond__)); \ ::phi::backends::xpu::build_xpu_error_msg(__cond__)); \
__THROW_ERROR_INTERNAL__(__summary__); \ __THROW_ERROR_INTERNAL__(__summary__); \
} \ } \
...@@ -183,7 +183,7 @@ DEFINE_EXTERNAL_API_TYPE(BKCLResult_t, BKCL_SUCCESS); ...@@ -183,7 +183,7 @@ DEFINE_EXTERNAL_API_TYPE(BKCLResult_t, BKCL_SUCCESS);
do { \ do { \
auto __cond__ = (COND); \ auto __cond__ = (COND); \
if (UNLIKELY(__cond__ != baidu::xpu::api::Error_t::SUCCESS)) { \ if (UNLIKELY(__cond__ != baidu::xpu::api::Error_t::SUCCESS)) { \
auto __summary__ = paddle::platform::errors::External( \ auto __summary__ = phi::errors::External( \
::phi::backends::xpu::build_xpu_xdnn_error_msg(__cond__, MSG)); \ ::phi::backends::xpu::build_xpu_xdnn_error_msg(__cond__, MSG)); \
__THROW_ERROR_INTERNAL__(__summary__); \ __THROW_ERROR_INTERNAL__(__summary__); \
} \ } \
...@@ -192,7 +192,7 @@ DEFINE_EXTERNAL_API_TYPE(BKCLResult_t, BKCL_SUCCESS); ...@@ -192,7 +192,7 @@ DEFINE_EXTERNAL_API_TYPE(BKCLResult_t, BKCL_SUCCESS);
#define PADDLE_ENFORCE_XDNN_NOT_NULL(ptr) \ #define PADDLE_ENFORCE_XDNN_NOT_NULL(ptr) \
do { \ do { \
if (UNLIKELY(ptr == nullptr)) { \ if (UNLIKELY(ptr == nullptr)) { \
auto __summary__ = paddle::platform::errors::External( \ auto __summary__ = phi::errors::External( \
::phi::backends::xpu::build_xpu_xdnn_error_msg( \ ::phi::backends::xpu::build_xpu_xdnn_error_msg( \
baidu::xpu::api::Error_t::NO_ENOUGH_WORKSPACE, \ baidu::xpu::api::Error_t::NO_ENOUGH_WORKSPACE, \
"XPU memory is not enough")); \ "XPU memory is not enough")); \
......
...@@ -100,7 +100,7 @@ void SetXPUDeviceId(int id) { ...@@ -100,7 +100,7 @@ void SetXPUDeviceId(int id) {
PADDLE_ENFORCE_LT( PADDLE_ENFORCE_LT(
id, id,
GetXPUDeviceCount(), GetXPUDeviceCount(),
paddle::platform::errors::InvalidArgument("id must less than XPU count")); phi::errors::InvalidArgument("id must less than XPU count"));
PADDLE_ENFORCE_XPU_SUCCESS(xpu_set_device(id)); PADDLE_ENFORCE_XPU_SUCCESS(xpu_set_device(id));
} }
......
...@@ -13,8 +13,8 @@ cc_library(kernel_context SRCS kernel_context.cc DEPS pten_enforce pten_context) ...@@ -13,8 +13,8 @@ cc_library(kernel_context SRCS kernel_context.cc DEPS pten_enforce pten_context)
cc_library(ddim SRCS ddim.cc DEPS pten_enforce) cc_library(ddim SRCS ddim.cc DEPS pten_enforce)
cc_library(tensor_base SRCS tensor_base.cc allocator.cc DEPS pten_enforce) cc_library(tensor_base SRCS tensor_base.cc allocator.cc DEPS pten_enforce)
cc_library(tensor_meta SRCS tensor_meta.cc DEPS pten_enforce mixed_vector) cc_library(tensor_meta SRCS tensor_meta.cc DEPS pten_enforce)
cc_library(lod_utils SRCS lod_utils.cc DEPS pten_enforce mixed_vector) cc_library(lod_utils SRCS lod_utils.cc DEPS pten_enforce)
cc_library(pten_device_context SRCS device_context.cc DEPS tensor_base) cc_library(pten_device_context SRCS device_context.cc DEPS tensor_base)
cc_library(dense_tensor SRCS dense_tensor.cc dense_tensor_impl.cc DEPS fluid_convert_utils tensor_meta tensor_base) cc_library(dense_tensor SRCS dense_tensor.cc dense_tensor_impl.cc DEPS fluid_convert_utils tensor_meta tensor_base)
...@@ -23,7 +23,7 @@ cc_library(sparse_csr_tensor SRCS sparse_csr_tensor.cc DEPS dense_tensor tensor_ ...@@ -23,7 +23,7 @@ cc_library(sparse_csr_tensor SRCS sparse_csr_tensor.cc DEPS dense_tensor tensor_
cc_library(meta_tensor SRCS meta_tensor.cc DEPS tensor_base tensor_meta dense_tensor) cc_library(meta_tensor SRCS meta_tensor.cc DEPS tensor_base tensor_meta dense_tensor)
cc_library(infermeta_utils SRCS infermeta_utils.cc DEPS meta_tensor) cc_library(infermeta_utils SRCS infermeta_utils.cc DEPS meta_tensor)
cc_library(selected_rows SRCS selected_rows_impl.cc DEPS dense_tensor mixed_vector pten_enforce ddim) cc_library(selected_rows SRCS selected_rows_impl.cc DEPS dense_tensor pten_enforce ddim)
cc_library(pten_custom_kernel SRCS custom_kernel.cc DEPS kernel_factory convert_utils) cc_library(pten_custom_kernel SRCS custom_kernel.cc DEPS kernel_factory convert_utils)
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#pragma once #pragma once
#include <initializer_list> #include <initializer_list>
#include <numeric>
#include <stdexcept> #include <stdexcept>
#include <string> #include <string>
#include <vector> #include <vector>
......
...@@ -73,7 +73,7 @@ void* DenseTensor::AllocateFrom(Allocator* allocator, ...@@ -73,7 +73,7 @@ void* DenseTensor::AllocateFrom(Allocator* allocator,
size_t requested_size) { size_t requested_size) {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
allocator, allocator,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"Required allocator shall not be nullptr, but received nullptr.")); "Required allocator shall not be nullptr, but received nullptr."));
if (this->dtype() != dtype) { if (this->dtype() != dtype) {
VLOG(10) << "change data type in mutbale_data, target dtype - " << dtype; VLOG(10) << "change data type in mutbale_data, target dtype - " << dtype;
...@@ -81,13 +81,13 @@ void* DenseTensor::AllocateFrom(Allocator* allocator, ...@@ -81,13 +81,13 @@ void* DenseTensor::AllocateFrom(Allocator* allocator,
} }
PADDLE_ENFORCE( PADDLE_ENFORCE(
valid(), valid(),
paddle::platform::errors::PreconditionNotMet( phi::errors::PreconditionNotMet(
"The meta data must be valid when call the mutable data function.")); "The meta data must be valid when call the mutable data function."));
size_t bytes = numel() * SizeOf(this->dtype()); size_t bytes = numel() * SizeOf(this->dtype());
if (requested_size) { if (requested_size) {
PADDLE_ENFORCE_GE(requested_size, PADDLE_ENFORCE_GE(requested_size,
bytes, bytes,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The reserved size %d should be enough to meet the " "The reserved size %d should be enough to meet the "
"volume required by metadata %d.", "volume required by metadata %d.",
requested_size, requested_size,
...@@ -112,7 +112,7 @@ const T* DenseTensor::data() const { ...@@ -112,7 +112,7 @@ const T* DenseTensor::data() const {
check_memory_size(); check_memory_size();
PADDLE_ENFORCE( PADDLE_ENFORCE(
(dtype() == paddle::experimental::CppTypeToDataType<T>::Type()), (dtype() == paddle::experimental::CppTypeToDataType<T>::Type()),
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The type of data we are trying to retrieve does not match the " "The type of data we are trying to retrieve does not match the "
"type of data currently contained in the container.")); "type of data currently contained in the container."));
return static_cast<const T*>(data()); return static_cast<const T*>(data());
...@@ -123,7 +123,7 @@ T* DenseTensor::data() { ...@@ -123,7 +123,7 @@ T* DenseTensor::data() {
check_memory_size(); check_memory_size();
PADDLE_ENFORCE( PADDLE_ENFORCE(
(dtype() == paddle::experimental::CppTypeToDataType<T>::Type()), (dtype() == paddle::experimental::CppTypeToDataType<T>::Type()),
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The type of data we are trying to retrieve does not match the " "The type of data we are trying to retrieve does not match the "
"type of data currently contained in the container.")); "type of data currently contained in the container."));
return static_cast<T*>(data()); return static_cast<T*>(data());
...@@ -133,7 +133,7 @@ void* DenseTensor::data() { ...@@ -133,7 +133,7 @@ void* DenseTensor::data() {
check_memory_size(); check_memory_size();
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
holder_, holder_,
paddle::platform::errors::PreconditionNotMet( phi::errors::PreconditionNotMet(
"The storage must be valid when call the data function.")); "The storage must be valid when call the data function."));
return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(holder_->ptr()) + return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
meta_.offset); meta_.offset);
...@@ -143,7 +143,7 @@ const void* DenseTensor::data() const { ...@@ -143,7 +143,7 @@ const void* DenseTensor::data() const {
check_memory_size(); check_memory_size();
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
holder_, holder_,
paddle::platform::errors::PreconditionNotMet( phi::errors::PreconditionNotMet(
"The storage must be valid when call the data function.")); "The storage must be valid when call the data function."));
return reinterpret_cast<const void*>( return reinterpret_cast<const void*>(
reinterpret_cast<uintptr_t>(holder_->ptr()) + meta_.offset); reinterpret_cast<uintptr_t>(holder_->ptr()) + meta_.offset);
...@@ -151,7 +151,7 @@ const void* DenseTensor::data() const { ...@@ -151,7 +151,7 @@ const void* DenseTensor::data() const {
void DenseTensor::set_meta(DenseTensorMeta&& meta) { void DenseTensor::set_meta(DenseTensorMeta&& meta) {
PADDLE_ENFORCE(!meta_.valid(), PADDLE_ENFORCE(!meta_.valid(),
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"Only when the original attribute of Tensor is " "Only when the original attribute of Tensor is "
"incomplete, can it be reset.")); "incomplete, can it be reset."));
meta_ = std::move(meta); meta_ = std::move(meta);
...@@ -160,7 +160,7 @@ void DenseTensor::set_meta(DenseTensorMeta&& meta) { ...@@ -160,7 +160,7 @@ void DenseTensor::set_meta(DenseTensorMeta&& meta) {
void DenseTensor::set_meta(const DenseTensorMeta& meta) { void DenseTensor::set_meta(const DenseTensorMeta& meta) {
PADDLE_ENFORCE( PADDLE_ENFORCE(
meta.valid(), meta.valid(),
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"Input meta is invalid, please check the meta attribute.")); "Input meta is invalid, please check the meta attribute."));
meta_.dims = meta.dims; meta_.dims = meta.dims;
meta_.dtype = meta.dtype; meta_.dtype = meta.dtype;
......
...@@ -54,22 +54,22 @@ DenseTensor(intrusive_ptr<Storage> storage, DenseTensorMeta&& meta); ...@@ -54,22 +54,22 @@ DenseTensor(intrusive_ptr<Storage> storage, DenseTensorMeta&& meta);
inline bool IsInitialized() const { return holder_ != nullptr; } inline bool IsInitialized() const { return holder_ != nullptr; }
template <typename T> template <typename T>
T* mutable_data(const paddle::platform::Place& place, T* mutable_data(const phi::Place& place,
size_t requested_size = 0); size_t requested_size = 0);
template <typename T> template <typename T>
T* mutable_data(const DDim& dims, T* mutable_data(const DDim& dims,
const paddle::platform::Place& place, const phi::Place& place,
size_t requested_size = 0); size_t requested_size = 0);
void* mutable_data(const paddle::platform::Place& place, void* mutable_data(const phi::Place& place,
paddle::experimental::DataType type, paddle::experimental::DataType type,
size_t requested_size = 0); size_t requested_size = 0);
void* mutable_data(const paddle::platform::Place& place, void* mutable_data(const phi::Place& place,
size_t requested_size = 0); size_t requested_size = 0);
void* mutable_data(const paddle::platform::Place& place, void* mutable_data(const phi::Place& place,
paddle::experimental::DataType type, paddle::experimental::DataType type,
const phi::Stream& stream); const phi::Stream& stream);
......
...@@ -25,6 +25,7 @@ limitations under the License. */ ...@@ -25,6 +25,7 @@ limitations under the License. */
#include "paddle/phi/core/macros.h" #include "paddle/phi/core/macros.h"
#include "paddle/phi/core/meta_tensor.h" #include "paddle/phi/core/meta_tensor.h"
#include "paddle/phi/core/type_defs.h" #include "paddle/phi/core/type_defs.h"
#include "paddle/utils/any.h"
#include "paddle/utils/flat_hash_map.h" #include "paddle/utils/flat_hash_map.h"
#include "paddle/utils/small_vector.h" #include "paddle/utils/small_vector.h"
......
...@@ -69,7 +69,7 @@ void KernelContext::AssignInputRange(std::pair<int, int>&& range, size_t idx) { ...@@ -69,7 +69,7 @@ void KernelContext::AssignInputRange(std::pair<int, int>&& range, size_t idx) {
} else if (idx == input_range_.size()) { } else if (idx == input_range_.size()) {
input_range_.emplace_back(range); input_range_.emplace_back(range);
} else { } else {
PADDLE_THROW(paddle::platform::errors::PreconditionNotMet( PADDLE_THROW(phi::errors::PreconditionNotMet(
"Invalid idx when trying to set InputRange, " "Invalid idx when trying to set InputRange, "
"index is `%d`, it is greater than the size(%d) of InputRange.", "index is `%d`, it is greater than the size(%d) of InputRange.",
idx, idx,
...@@ -83,7 +83,7 @@ void KernelContext::AssignOutputRange(std::pair<int, int>&& range, size_t idx) { ...@@ -83,7 +83,7 @@ void KernelContext::AssignOutputRange(std::pair<int, int>&& range, size_t idx) {
} else if (idx == output_range_.size()) { } else if (idx == output_range_.size()) {
output_range_.emplace_back(range); output_range_.emplace_back(range);
} else { } else {
PADDLE_THROW(paddle::platform::errors::PreconditionNotMet( PADDLE_THROW(phi::errors::PreconditionNotMet(
"Invalid idx when trying to set InputRange, " "Invalid idx when trying to set InputRange, "
"index is `%d`, it is greater than the size(%d) of InputRange.", "index is `%d`, it is greater than the size(%d) of InputRange.",
idx, idx,
......
...@@ -13,18 +13,11 @@ ...@@ -13,18 +13,11 @@
// limitations under the License. // limitations under the License.
#pragma once #pragma once
#include <cstddef>
// See Note [ Why still include the fluid headers? ] #include <vector>
#ifndef PADDLE_WITH_CUSTOM_KERNEL
#include "paddle/fluid/framework/mixed_vector.h"
#endif
namespace phi { namespace phi {
#ifndef PADDLE_WITH_CUSTOM_KERNEL using LoD = std::vector<std::vector<std::size_t>>;
using LoD = std::vector<paddle::framework::Vector<size_t>>;
#else
using LoD = std::vector<std::vector<size_t>>;
#endif
void AppendLoD(LoD* lod, const LoD& lod_length); void AppendLoD(LoD* lod, const LoD& lod_length);
...@@ -40,4 +33,4 @@ void AppendLoD(LoD* lod, const LoD& lod_length); ...@@ -40,4 +33,4 @@ void AppendLoD(LoD* lod, const LoD& lod_length);
*/ */
LoD ConvertToLengthBasedLoD(const LoD& offset_lod); LoD ConvertToLengthBasedLoD(const LoD& offset_lod);
} // namespace pten } // namespace phi
...@@ -55,25 +55,17 @@ class SelectedRows : public TensorBase, ...@@ -55,25 +55,17 @@ class SelectedRows : public TensorBase,
void set_height(int64_t height) { impl_->set_height(height); } void set_height(int64_t height) { impl_->set_height(height); }
const paddle::framework::Vector<int64_t>& rows() const { const std::vector<int64_t>& rows() const { return impl_->rows(); }
return impl_->rows();
}
paddle::framework::Vector<int64_t>* mutable_rows() { std::vector<int64_t>* mutable_rows() { return impl_->mutable_rows(); }
return impl_->mutable_rows();
}
void set_rows(const paddle::framework::Vector<int64_t>& rows) {
impl_->set_rows(rows);
}
void set_rows(const std::vector<int64_t>& rows) { impl_->set_rows(rows); }
/* /*
* @brief Get the index of key in rows * @brief Get the index of key in rows
* *
* @return -1 if the key does not exists. * @return -1 if the key does not exists.
*/ */
int64_t Index(int64_t key) const { return impl_->Index(key); } int64_t Index(int64_t key) const { return impl_->Index(key); }
/* /*
* @brief whether has the specified key in the table. * @brief whether has the specified key in the table.
* *
......
...@@ -28,7 +28,7 @@ struct ReAllocateVisitor { ...@@ -28,7 +28,7 @@ struct ReAllocateVisitor {
template <typename T> template <typename T>
void operator()() const { void operator()() const {
phi::DenseTensor cpu_tensor; phi::DenseTensor cpu_tensor;
paddle::platform::CPUPlace cpu; phi::CPUPlace cpu;
T* ptr = cpu_tensor.mutable_data<T>(dims_, cpu); T* ptr = cpu_tensor.mutable_data<T>(dims_, cpu);
const T* old_ptr = const T* old_ptr =
tensor_->memory_size() == 0 ? nullptr : tensor_->data<T>(); tensor_->memory_size() == 0 ? nullptr : tensor_->data<T>();
...@@ -57,7 +57,7 @@ struct TensorCopyVisitor { ...@@ -57,7 +57,7 @@ struct TensorCopyVisitor {
template <typename T> template <typename T>
void apply() const { void apply() const {
// TODO(Yancey1989): support other place // TODO(Yancey1989): support other place
paddle::platform::CPUPlace cpu; phi::CPUPlace cpu;
paddle::memory::Copy(cpu, paddle::memory::Copy(cpu,
dst_->mutable_data<T>(cpu) + dst_offset_, dst_->mutable_data<T>(cpu) + dst_offset_,
cpu, cpu,
...@@ -82,7 +82,7 @@ struct TensorFillVisitor { ...@@ -82,7 +82,7 @@ struct TensorFillVisitor {
template <typename T> template <typename T>
void apply() const { void apply() const {
// TODO(qiao): support other place // TODO(qiao): support other place
paddle::platform::CPUPlace cpu; phi::CPUPlace cpu;
auto* tensor_data = dst_->mutable_data<T>(cpu); auto* tensor_data = dst_->mutable_data<T>(cpu);
auto* start = tensor_data + dst_offset_; auto* start = tensor_data + dst_offset_;
auto* end = start + size_; auto* end = start + size_;
...@@ -121,16 +121,16 @@ int64_t SelectedRowsImpl::AutoGrownIndex(int64_t key, ...@@ -121,16 +121,16 @@ int64_t SelectedRowsImpl::AutoGrownIndex(int64_t key,
auto iter = id_to_index_.find(key); auto iter = id_to_index_.find(key);
if (iter == id_to_index_.end()) { if (iter == id_to_index_.end()) {
rwlock_->UNLock(); rwlock_->UNLock();
PADDLE_ENFORCE_EQ(auto_grown, PADDLE_ENFORCE_EQ(
true, auto_grown,
paddle::platform::errors::NotFound( true,
"Input key(%lld) is not found.", key)); phi::errors::NotFound("Input key(%lld) is not found.", key));
rwlock_->WRLock(); rwlock_->WRLock();
auto map_size = id_to_index_.size(); auto map_size = id_to_index_.size();
auto vector_size = rows_.size(); auto vector_size = rows_.size();
if (map_size != vector_size) { if (map_size != vector_size) {
rwlock_->UNLock(); rwlock_->UNLock();
PADDLE_THROW(paddle::platform::errors::InvalidArgument( PADDLE_THROW(phi::errors::InvalidArgument(
"Row map size(%zu) should be equal to rows size(%zu).", "Row map size(%zu) should be equal to rows size(%zu).",
map_size, map_size,
vector_size)); vector_size));
...@@ -140,7 +140,7 @@ int64_t SelectedRowsImpl::AutoGrownIndex(int64_t key, ...@@ -140,7 +140,7 @@ int64_t SelectedRowsImpl::AutoGrownIndex(int64_t key,
int row_num = rows_.size(); int row_num = rows_.size();
if (row_num == value_->dims()[0]) { if (row_num == value_->dims()[0]) {
rwlock_->UNLock(); rwlock_->UNLock();
PADDLE_THROW(paddle::platform::errors::InvalidArgument( PADDLE_THROW(phi::errors::InvalidArgument(
"Selected rows is full, then length exceed the length of first " "Selected rows is full, then length exceed the length of first "
"dimension (%d).", "dimension (%d).",
row_num)); row_num));
...@@ -187,7 +187,7 @@ void SelectedRowsImpl::Get(const phi::DenseTensor& ids, ...@@ -187,7 +187,7 @@ void SelectedRowsImpl::Get(const phi::DenseTensor& ids,
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
value_width, value_width,
value->numel() / value->dims()[0], value->numel() / value->dims()[0],
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"Output tensor should have the same shape with table " "Output tensor should have the same shape with table "
"except the first dimmension, excepted value width not counting " "except the first dimmension, excepted value width not counting "
"the first dimension is %d, actual value width is %d.", "the first dimension is %d, actual value width is %d.",
......
...@@ -27,8 +27,6 @@ limitations under the License. */ ...@@ -27,8 +27,6 @@ limitations under the License. */
#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/utils/rw_lock.h" #include "paddle/phi/core/utils/rw_lock.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/framework/mixed_vector.h"
namespace phi { namespace phi {
class SelectedRowsImpl { class SelectedRowsImpl {
/* /*
...@@ -68,13 +66,11 @@ class SelectedRowsImpl { ...@@ -68,13 +66,11 @@ class SelectedRowsImpl {
void set_height(int64_t height) { height_ = height; } void set_height(int64_t height) { height_ = height; }
const paddle::framework::Vector<int64_t>& rows() const { return rows_; } const std::vector<int64_t>& rows() const { return rows_; }
paddle::framework::Vector<int64_t>* mutable_rows() { return &rows_; } std::vector<int64_t>* mutable_rows() { return &rows_; }
void set_rows(const paddle::framework::Vector<int64_t>& rows) { void set_rows(const std::vector<int64_t>& rows) { rows_ = rows; }
rows_ = rows;
}
/* /*
* @brief Get the index of key in rows * @brief Get the index of key in rows
...@@ -84,7 +80,7 @@ class SelectedRowsImpl { ...@@ -84,7 +80,7 @@ class SelectedRowsImpl {
int64_t Index(int64_t key) const { int64_t Index(int64_t key) const {
auto it = std::find(rows_.begin(), rows_.end(), key); auto it = std::find(rows_.begin(), rows_.end(), key);
if (it == rows_.end()) { if (it == rows_.end()) {
PADDLE_THROW(paddle::platform::errors::NotFound( PADDLE_THROW(phi::errors::NotFound(
"Input id (%lld) is not in current rows table.", key)); "Input id (%lld) is not in current rows table.", key));
} }
return static_cast<int64_t>(std::distance(rows_.begin(), it)); return static_cast<int64_t>(std::distance(rows_.begin(), it));
...@@ -156,10 +152,7 @@ class SelectedRowsImpl { ...@@ -156,10 +152,7 @@ class SelectedRowsImpl {
/// \brief Returns the dims of the tensor. /// \brief Returns the dims of the tensor.
/// \return The dims of the tensor. /// \return The dims of the tensor.
const DDim& dims() const noexcept { const DDim& dims() const noexcept { return value_->dims(); }
return value_->dims();
// return phi::make_ddim(dims);
}
/// \brief Returns the data type of the tensor. /// \brief Returns the data type of the tensor.
/// \return The data type of the tensor. /// \return The data type of the tensor.
...@@ -185,7 +178,7 @@ class SelectedRowsImpl { ...@@ -185,7 +178,7 @@ class SelectedRowsImpl {
// Notice: rows can be duplicate. We can have {0, 4, 7, 0, 5, 7, 9} here. // Notice: rows can be duplicate. We can have {0, 4, 7, 0, 5, 7, 9} here.
// SelectedRowsImpl are simply concated when adding together. Until a // SelectedRowsImpl are simply concated when adding together. Until a
// SelectedRowsImpl add a Tensor, will the duplicate rows be handled. // SelectedRowsImpl add a Tensor, will the duplicate rows be handled.
paddle::framework::Vector<int64_t> rows_; std::vector<int64_t> rows_;
std::unordered_map<int64_t, int64_t> std::unordered_map<int64_t, int64_t>
id_to_index_; // should not be used when rows_ has duplicate member id_to_index_; // should not be used when rows_ has duplicate member
std::unique_ptr<DenseTensor> value_{nullptr}; std::unique_ptr<DenseTensor> value_{nullptr};
......
...@@ -69,17 +69,17 @@ void SparseCooTensor::Resize(const DDim& dense_dims, ...@@ -69,17 +69,17 @@ void SparseCooTensor::Resize(const DDim& dense_dims,
const int64_t non_zero_num) { const int64_t non_zero_num) {
PADDLE_ENFORCE_GE(non_zero_num, PADDLE_ENFORCE_GE(non_zero_num,
this->nnz(), this->nnz(),
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"the non_zero_num must be greater than or equal to the " "the non_zero_num must be greater than or equal to the "
"origin non_zero_num.")); "origin non_zero_num."));
PADDLE_ENFORCE_GE(sparse_dim, PADDLE_ENFORCE_GE(sparse_dim,
1, 1,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"the sparse_dim must be greater than or equal 1.")); "the sparse_dim must be greater than or equal 1."));
PADDLE_ENFORCE_LE( PADDLE_ENFORCE_LE(
sparse_dim, sparse_dim,
dense_dims.size(), dense_dims.size(),
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"the sparse_dim must be less than or equal dense_dims.")); "the sparse_dim must be less than or equal dense_dims."));
DDim indices_dims = phi::make_ddim({sparse_dim, non_zero_num}); DDim indices_dims = phi::make_ddim({sparse_dim, non_zero_num});
......
...@@ -20,7 +20,7 @@ inline void check_shape(const DDim& dims) { ...@@ -20,7 +20,7 @@ inline void check_shape(const DDim& dims) {
bool valid = dims.size() == 2 || dims.size() == 3; bool valid = dims.size() == 2 || dims.size() == 3;
PADDLE_ENFORCE(valid, PADDLE_ENFORCE(valid,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"the SparseCsrTensor only support 2-D Tensor.")); "the SparseCsrTensor only support 2-D Tensor."));
} }
#define Check(non_zero_crows, non_zero_cols, non_zero_elements, dims) \ #define Check(non_zero_crows, non_zero_cols, non_zero_elements, dims) \
...@@ -29,12 +29,12 @@ inline void check_shape(const DDim& dims) { ...@@ -29,12 +29,12 @@ inline void check_shape(const DDim& dims) {
PADDLE_ENFORCE_EQ( \ PADDLE_ENFORCE_EQ( \
non_zero_cols.place(), \ non_zero_cols.place(), \
non_zero_crows.place(), \ non_zero_crows.place(), \
paddle::platform::errors::InvalidArgument( \ phi::errors::InvalidArgument( \
"non_zero_crows and non_zero_cols must have the same place.")); \ "non_zero_crows and non_zero_cols must have the same place.")); \
PADDLE_ENFORCE_EQ( \ PADDLE_ENFORCE_EQ( \
non_zero_cols.place(), \ non_zero_cols.place(), \
non_zero_elements.place(), \ non_zero_elements.place(), \
paddle::platform::errors::InvalidArgument( \ phi::errors::InvalidArgument( \
"non_zero_cols and non_zero_elements must have the same place.")); \ "non_zero_cols and non_zero_elements must have the same place.")); \
} }
...@@ -77,7 +77,7 @@ void* SparseCsrTensor::AllocateFrom(Allocator* allocator, ...@@ -77,7 +77,7 @@ void* SparseCsrTensor::AllocateFrom(Allocator* allocator,
void SparseCsrTensor::Resize(const DDim& dense_dims, void SparseCsrTensor::Resize(const DDim& dense_dims,
const int64_t non_zero_num) { const int64_t non_zero_num) {
PADDLE_ENFORCE(this->initialized(), PADDLE_ENFORCE(this->initialized(),
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"the SparseCsrTensor must be initialized when call Resize " "the SparseCsrTensor must be initialized when call Resize "
"function.")); "function."));
check_shape(dense_dims); check_shape(dense_dims);
......
...@@ -20,6 +20,8 @@ limitations under the License. */ ...@@ -20,6 +20,8 @@ limitations under the License. */
#include "paddle/phi/common/data_type.h" #include "paddle/phi/common/data_type.h"
#include "paddle/phi/common/layout.h" #include "paddle/phi/common/layout.h"
#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/ddim.h"
#include "paddle/utils/any.h"
#include "paddle/utils/optional.h"
// Note: mixed_vector include many header now, LoD will be // Note: mixed_vector include many header now, LoD will be
// used on CUDA device? Can we use small_vector here? // used on CUDA device? Can we use small_vector here?
...@@ -31,11 +33,7 @@ limitations under the License. */ ...@@ -31,11 +33,7 @@ limitations under the License. */
namespace phi { namespace phi {
using DDim = phi::DDim; using DDim = phi::DDim;
#ifndef PADDLE_WITH_CUSTOM_KERNEL
using LoD = std::vector<paddle::framework::Vector<size_t>>;
#else
using LoD = std::vector<std::vector<size_t>>; using LoD = std::vector<std::vector<size_t>>;
#endif
/// \brief The meta data of dense tensor. Take the structure type /// \brief The meta data of dense tensor. Take the structure type
/// and use all default operations. /// and use all default operations.
/// ///
......
...@@ -23,7 +23,7 @@ void DotInferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out) { ...@@ -23,7 +23,7 @@ void DotInferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out) {
auto x_rank = static_cast<size_t>(x_dims.size()); auto x_rank = static_cast<size_t>(x_dims.size());
PADDLE_ENFORCE_EQ(true, PADDLE_ENFORCE_EQ(true,
1 == x_rank || 2 == x_rank, 1 == x_rank || 2 == x_rank,
paddle::platform::errors::PreconditionNotMet( phi::errors::PreconditionNotMet(
"ShapeError: The dimensions of input tensor X (%s) " "ShapeError: The dimensions of input tensor X (%s) "
"should be 1 or 2", "should be 1 or 2",
x_dims.to_str())); x_dims.to_str()));
...@@ -32,7 +32,7 @@ void DotInferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out) { ...@@ -32,7 +32,7 @@ void DotInferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
true, true,
x_rank == static_cast<size_t>(y_dims.size()), x_rank == static_cast<size_t>(y_dims.size()),
paddle::platform::errors::PreconditionNotMet( phi::errors::PreconditionNotMet(
"ShapeError: The shape of input tensor Y: %s should match with " "ShapeError: The shape of input tensor Y: %s should match with "
"input tenosr X: %s", "input tenosr X: %s",
y_dims.to_str(), y_dims.to_str(),
...@@ -47,7 +47,7 @@ void DotInferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out) { ...@@ -47,7 +47,7 @@ void DotInferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out) {
PADDLE_ENFORCE_EQ(true, PADDLE_ENFORCE_EQ(true,
shape_match, shape_match,
paddle::platform::errors::PreconditionNotMet( phi::errors::PreconditionNotMet(
"ShapeError: The shape of input tensor X: %s should " "ShapeError: The shape of input tensor X: %s should "
"be exactly the same " "be exactly the same "
"with input tensor Y: %s", "with input tensor Y: %s",
...@@ -71,12 +71,12 @@ void MatmulInferMeta(const MetaTensor& x, ...@@ -71,12 +71,12 @@ void MatmulInferMeta(const MetaTensor& x,
auto ndims_y = dims_y.size(); auto ndims_y = dims_y.size();
PADDLE_ENFORCE_GT(ndims_x, PADDLE_ENFORCE_GT(ndims_x,
0UL, 0UL,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The Input(x) dims size must be greater than 0," "The Input(x) dims size must be greater than 0,"
" but reviced dims size is 0. ")); " but reviced dims size is 0. "));
PADDLE_ENFORCE_GT(ndims_y, PADDLE_ENFORCE_GT(ndims_y,
0UL, 0UL,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The Input(y) dims size must be greater than 0," "The Input(y) dims size must be greater than 0,"
" but reviced dims size is 0. ")); " but reviced dims size is 0. "));
...@@ -150,7 +150,7 @@ void ElementwiseRawInferMeta(const MetaTensor& x, ...@@ -150,7 +150,7 @@ void ElementwiseRawInferMeta(const MetaTensor& x,
if (x_dims.size() == y_dims.size()) { if (x_dims.size() == y_dims.size()) {
PADDLE_ENFORCE_EQ((axis == -1) || (axis == 0), PADDLE_ENFORCE_EQ((axis == -1) || (axis == 0),
true, true,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"axis should be -1 or 0 while the dimension of " "axis should be -1 or 0 while the dimension of "
"tensor X (%s) is equal to the dimension of " "tensor X (%s) is equal to the dimension of "
"tensor Y (%s), but received axis: %s", "tensor Y (%s), but received axis: %s",
...@@ -160,7 +160,7 @@ void ElementwiseRawInferMeta(const MetaTensor& x, ...@@ -160,7 +160,7 @@ void ElementwiseRawInferMeta(const MetaTensor& x,
} }
PADDLE_ENFORCE_EQ((axis >= (-1 * max_dim)) && (axis < max_dim), PADDLE_ENFORCE_EQ((axis >= (-1 * max_dim)) && (axis < max_dim),
true, true,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The axis range must be [%s, %s), but axis is %s. " "The axis range must be [%s, %s), but axis is %s. "
"Please set the axis again.", "Please set the axis again.",
-1 * max_dim, -1 * max_dim,
......
...@@ -24,7 +24,7 @@ void ConcatInferMeta(const std::vector<MetaTensor>& x, ...@@ -24,7 +24,7 @@ void ConcatInferMeta(const std::vector<MetaTensor>& x,
MetaConfig config) { MetaConfig config) {
PADDLE_ENFORCE_GE(x.size(), PADDLE_ENFORCE_GE(x.size(),
0UL, 0UL,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The size of input meta vector should be greater" "The size of input meta vector should be greater"
"than 0.")); "than 0."));
...@@ -34,7 +34,7 @@ void ConcatInferMeta(const std::vector<MetaTensor>& x, ...@@ -34,7 +34,7 @@ void ConcatInferMeta(const std::vector<MetaTensor>& x,
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
axis >= -rank && axis < rank, axis >= -rank && axis < rank,
true, true,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The axis is expected to be in range of [%d, %d), but got %d", "The axis is expected to be in range of [%d, %d), but got %d",
-rank, -rank,
rank, rank,
......
...@@ -38,11 +38,11 @@ void FlattenInferMeta(const MetaTensor& x, ...@@ -38,11 +38,11 @@ void FlattenInferMeta(const MetaTensor& x,
if (stop_axis < 0) { if (stop_axis < 0) {
stop_axis = stop_axis + in_dims_size; stop_axis = stop_axis + in_dims_size;
} }
PADDLE_ENFORCE_GE(stop_axis, PADDLE_ENFORCE_GE(
start_axis, stop_axis,
paddle::platform::errors::InvalidArgument( start_axis,
"The stop_axis should be greater" phi::errors::InvalidArgument("The stop_axis should be greater"
"than or equal to start_axis.")); "than or equal to start_axis."));
int64_t outer = 1; int64_t outer = 1;
std::vector<int32_t> out_shape; std::vector<int32_t> out_shape;
...@@ -113,7 +113,7 @@ static phi::DDim ValidateShape(const std::vector<int64_t> shape, ...@@ -113,7 +113,7 @@ static phi::DDim ValidateShape(const std::vector<int64_t> shape,
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
unk_dim_idx, unk_dim_idx,
-1, -1,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"Only one dimension value of 'shape' in ReshapeOp can " "Only one dimension value of 'shape' in ReshapeOp can "
"be -1. But received shape = [%s], shape[%d] is also -1.", "be -1. But received shape = [%s], shape[%d] is also -1.",
phi::make_ddim(shape), phi::make_ddim(shape),
...@@ -123,7 +123,7 @@ static phi::DDim ValidateShape(const std::vector<int64_t> shape, ...@@ -123,7 +123,7 @@ static phi::DDim ValidateShape(const std::vector<int64_t> shape,
PADDLE_ENFORCE_LT( PADDLE_ENFORCE_LT(
static_cast<int>(i), static_cast<int>(i),
in_dims.size(), in_dims.size(),
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The index of 0 in `shape` must be less than " "The index of 0 in `shape` must be less than "
"the input tensor X's dimensions. " "the input tensor X's dimensions. "
"But received shape = [%s], shape[%d] = 0, X's shape = [%s], " "But received shape = [%s], shape[%d] = 0, X's shape = [%s], "
...@@ -136,7 +136,7 @@ static phi::DDim ValidateShape(const std::vector<int64_t> shape, ...@@ -136,7 +136,7 @@ static phi::DDim ValidateShape(const std::vector<int64_t> shape,
PADDLE_ENFORCE_GT( PADDLE_ENFORCE_GT(
shape[i], shape[i],
0, 0,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"Each dimension value of 'shape' in ReshapeOp must not " "Each dimension value of 'shape' in ReshapeOp must not "
"be negative except one unknown dimension. " "be negative except one unknown dimension. "
"But received shape = [%s], shape[%d] = %d.", "But received shape = [%s], shape[%d] = %d.",
...@@ -161,7 +161,7 @@ static phi::DDim ValidateShape(const std::vector<int64_t> shape, ...@@ -161,7 +161,7 @@ static phi::DDim ValidateShape(const std::vector<int64_t> shape,
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
output_shape[unk_dim_idx] * capacity, output_shape[unk_dim_idx] * capacity,
-in_size, -in_size,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The 'shape' attribute in ReshapeOp is invalid. " "The 'shape' attribute in ReshapeOp is invalid. "
"The input tensor X'size must be divisible by known " "The input tensor X'size must be divisible by known "
"capacity of 'shape'. " "capacity of 'shape'. "
...@@ -179,7 +179,7 @@ static phi::DDim ValidateShape(const std::vector<int64_t> shape, ...@@ -179,7 +179,7 @@ static phi::DDim ValidateShape(const std::vector<int64_t> shape,
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
capacity, capacity,
in_size, in_size,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The 'shape' in ReshapeOp is invalid. " "The 'shape' in ReshapeOp is invalid. "
"The input tensor X'size must be equal to the capacity of " "The input tensor X'size must be equal to the capacity of "
"'shape'. " "'shape'. "
...@@ -199,7 +199,7 @@ static phi::DDim ValidateShape(const std::vector<int64_t> shape, ...@@ -199,7 +199,7 @@ static phi::DDim ValidateShape(const std::vector<int64_t> shape,
PADDLE_ENFORCE_LE( PADDLE_ENFORCE_LE(
capacity, capacity,
in_size, in_size,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The 'shape' in ReshapeOp is invalid. " "The 'shape' in ReshapeOp is invalid. "
"The input tensor X's shape = [%s], X's capacity = %d." "The input tensor X's shape = [%s], X's capacity = %d."
"But the target shape of Out is [%s], the " "But the target shape of Out is [%s], the "
...@@ -364,7 +364,7 @@ void SplitInferMeta(const MetaTensor& x, ...@@ -364,7 +364,7 @@ void SplitInferMeta(const MetaTensor& x,
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
axis_value >= -rank && axis_value < rank, axis_value >= -rank && axis_value < rank,
true, true,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The axis is expected to be in range of [%d, %d), but got %d", "The axis is expected to be in range of [%d, %d), but got %d",
-rank, -rank,
rank, rank,
...@@ -383,7 +383,7 @@ void SplitInferMeta(const MetaTensor& x, ...@@ -383,7 +383,7 @@ void SplitInferMeta(const MetaTensor& x,
PADDLE_ENFORCE_EQ(input_axis_dim % num, PADDLE_ENFORCE_EQ(input_axis_dim % num,
0, 0,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The input's size along the split dimension " "The input's size along the split dimension "
"must be evenly divisible by Attr(num_or_sections). " "must be evenly divisible by Attr(num_or_sections). "
"But received Attr(num_or_sections) " "But received Attr(num_or_sections) "
...@@ -416,7 +416,7 @@ void SplitInferMeta(const MetaTensor& x, ...@@ -416,7 +416,7 @@ void SplitInferMeta(const MetaTensor& x,
if (config.is_runtime) { if (config.is_runtime) {
PADDLE_ENFORCE_LE(num_of_unknow, PADDLE_ENFORCE_LE(num_of_unknow,
1, 1,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"Only one dimension value of Attr(num_or_sections) " "Only one dimension value of Attr(num_or_sections) "
"in SplitOp can be -1. " "in SplitOp can be -1. "
"But received Attr(num_or_sections) = [%s].", "But received Attr(num_or_sections) = [%s].",
...@@ -430,7 +430,7 @@ void SplitInferMeta(const MetaTensor& x, ...@@ -430,7 +430,7 @@ void SplitInferMeta(const MetaTensor& x,
PADDLE_ENFORCE_LT( PADDLE_ENFORCE_LT(
sum_of_section, sum_of_section,
input_axis_dim, input_axis_dim,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"Sum of Attr(num_or_sections) other than unknown section " "Sum of Attr(num_or_sections) other than unknown section "
"must be less than the input's " "must be less than the input's "
"size " "size "
...@@ -447,7 +447,7 @@ void SplitInferMeta(const MetaTensor& x, ...@@ -447,7 +447,7 @@ void SplitInferMeta(const MetaTensor& x,
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
sum_of_section, sum_of_section,
input_axis_dim, input_axis_dim,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"Sum of Attr(num_or_sections) must be equal to the input's " "Sum of Attr(num_or_sections) must be equal to the input's "
"size " "size "
"along the split dimension. But received Attr(num_or_sections)" "along the split dimension. But received Attr(num_or_sections)"
......
...@@ -54,7 +54,7 @@ void ConcatKernel(const Context& dev_ctx, ...@@ -54,7 +54,7 @@ void ConcatKernel(const Context& dev_ctx,
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
x[i].lod().size(), x[i].lod().size(),
lod_size_0, lod_size_0,
paddle::platform::errors::Unimplemented( phi::errors::Unimplemented(
"The lod level of all input LoDTensors should be same. " "The lod level of all input LoDTensors should be same. "
"Maybe different lod level of input LoDTensors can concat," "Maybe different lod level of input LoDTensors can concat,"
"it is not supported currently. The lod level of %dth input " "it is not supported currently. The lod level of %dth input "
......
...@@ -127,7 +127,7 @@ struct SameDimsDivideFunctor< ...@@ -127,7 +127,7 @@ struct SameDimsDivideFunctor<
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& y, const DenseTensor& y,
DenseTensor* z) { DenseTensor* z) {
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"If use SameDimsDivideFunctor, template args(T) must be floating " "If use SameDimsDivideFunctor, template args(T) must be floating "
"point. "); "point. ");
} }
...@@ -278,12 +278,10 @@ void CommonForwardBroadcastCPU(const DenseTensor& x, ...@@ -278,12 +278,10 @@ void CommonForwardBroadcastCPU(const DenseTensor& x,
std::vector<int> index_array(max_dim, 0); std::vector<int> index_array(max_dim, 0);
const T* x_data = x.data<T>(); const T* x_data = x.data<T>();
const T* y_data = y.data<T>(); const T* y_data = y.data<T>();
PADDLE_ENFORCE_NOT_NULL(x_data, PADDLE_ENFORCE_NOT_NULL(
paddle::platform::errors::InvalidArgument( x_data, phi::errors::InvalidArgument("The input X should not be empty."));
"The input X should not be empty.")); PADDLE_ENFORCE_NOT_NULL(
PADDLE_ENFORCE_NOT_NULL(y_data, y_data, phi::errors::InvalidArgument("The input Y should not be empty."));
paddle::platform::errors::InvalidArgument(
"The input Y should not be empty."));
OutType* out_data = ctx.Alloc<OutType>(z); OutType* out_data = ctx.Alloc<OutType>(z);
const int out_size = std::accumulate( const int out_size = std::accumulate(
...@@ -317,12 +315,12 @@ void CommonElementwiseBroadcastForward(const CPUContext& dev_ctx, ...@@ -317,12 +315,12 @@ void CommonElementwiseBroadcastForward(const CPUContext& dev_ctx,
PADDLE_ENFORCE_GE( PADDLE_ENFORCE_GE(
axis, axis,
0, 0,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"Axis should be great than or equal to 0, but received axis is %d.", "Axis should be great than or equal to 0, but received axis is %d.",
axis)); axis));
PADDLE_ENFORCE_LT(axis, PADDLE_ENFORCE_LT(axis,
max_dim, max_dim,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"Axis should be less than %d, but received axis is %d.", "Axis should be less than %d, but received axis is %d.",
max_dim, max_dim,
axis)); axis));
...@@ -385,12 +383,12 @@ void ElementwiseCompute(const CPUContext& dev_ctx, ...@@ -385,12 +383,12 @@ void ElementwiseCompute(const CPUContext& dev_ctx,
PADDLE_ENFORCE_GE( PADDLE_ENFORCE_GE(
axis, axis,
0, 0,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"Axis should be great than or equal to 0, but received axis is %d.", "Axis should be great than or equal to 0, but received axis is %d.",
axis)); axis));
PADDLE_ENFORCE_LT(axis, PADDLE_ENFORCE_LT(axis,
max_dim, max_dim,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"Axis should be less than %d, but received axis is %d.", "Axis should be less than %d, but received axis is %d.",
max_dim, max_dim,
axis)); axis));
...@@ -630,12 +628,12 @@ void ElemwiseGradComputeWithBroadcast(const CPUContext& ctx, ...@@ -630,12 +628,12 @@ void ElemwiseGradComputeWithBroadcast(const CPUContext& ctx,
PADDLE_ENFORCE_GE( PADDLE_ENFORCE_GE(
axis, axis,
0, 0,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"Axis should be great than or equal to 0, but received axis is %d.", "Axis should be great than or equal to 0, but received axis is %d.",
axis)); axis));
PADDLE_ENFORCE_LT(axis, PADDLE_ENFORCE_LT(axis,
max_dim, max_dim,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"Axis should be less than %d, but received axis is %d.", "Axis should be less than %d, but received axis is %d.",
max_dim, max_dim,
axis)); axis));
......
...@@ -48,7 +48,7 @@ void MaskedSelectKernel(const Context& dev_ctx, ...@@ -48,7 +48,7 @@ void MaskedSelectKernel(const Context& dev_ctx,
DDim out_dim{out_size}; DDim out_dim{out_size};
out->Resize(out_dim); out->Resize(out_dim);
auto out_data = out->mutable_data<T>(paddle::platform::CPUPlace()); auto out_data = out->mutable_data<T>(phi::CPUPlace());
int index = 0; int index = 0;
for (int i = 0; i < mask_size; i++) { for (int i = 0; i < mask_size; i++) {
......
...@@ -42,12 +42,12 @@ inline void GetBroadcastDimsArrays(const DDim &x_dims, ...@@ -42,12 +42,12 @@ inline void GetBroadcastDimsArrays(const DDim &x_dims,
PADDLE_ENFORCE_GE( PADDLE_ENFORCE_GE(
axis, axis,
0, 0,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"Axis should be great than or equal to 0, but received axis is %d.", "Axis should be great than or equal to 0, but received axis is %d.",
axis)); axis));
PADDLE_ENFORCE_LT(axis, PADDLE_ENFORCE_LT(axis,
max_dim, max_dim,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"Axis should be less than %d, but received axis is %d.", "Axis should be less than %d, but received axis is %d.",
max_dim, max_dim,
axis)); axis));
...@@ -72,7 +72,7 @@ inline void GetBroadcastDimsArrays(const DDim &x_dims, ...@@ -72,7 +72,7 @@ inline void GetBroadcastDimsArrays(const DDim &x_dims,
x_dims_array[i] == y_dims_array[i] || x_dims_array[i] <= 1 || x_dims_array[i] == y_dims_array[i] || x_dims_array[i] <= 1 ||
y_dims_array[i] <= 1, y_dims_array[i] <= 1,
true, true,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"Broadcast dimension mismatch. Operands could " "Broadcast dimension mismatch. Operands could "
"not be broadcast together with the shape of X = [%s] and " "not be broadcast together with the shape of X = [%s] and "
"the shape of Y = [%s]. Received [%d] in X is not equal to " "the shape of Y = [%s]. Received [%d] in X is not equal to "
......
...@@ -23,7 +23,7 @@ static inline int64_t ComputeAxis(int64_t axis, int64_t rank) { ...@@ -23,7 +23,7 @@ static inline int64_t ComputeAxis(int64_t axis, int64_t rank) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
axis >= -rank && axis < rank, axis >= -rank && axis < rank,
true, true,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The axis is expected to be in range of [%d, %d), but got %d", "The axis is expected to be in range of [%d, %d), but got %d",
-rank, -rank,
rank, rank,
...@@ -42,17 +42,17 @@ static inline phi::DDim ComputeAndCheckShape( ...@@ -42,17 +42,17 @@ static inline phi::DDim ComputeAndCheckShape(
auto out_dims = inputs_dims[0]; auto out_dims = inputs_dims[0];
size_t in_zero_dims_size = out_dims.size(); size_t in_zero_dims_size = out_dims.size();
for (size_t i = 1; i < n; i++) { for (size_t i = 1; i < n; i++) {
PADDLE_ENFORCE_EQ(inputs_dims[i].size(), PADDLE_ENFORCE_EQ(
out_dims.size(), inputs_dims[i].size(),
paddle::platform::errors::InvalidArgument( out_dims.size(),
"The shape of input[0] and input[%d] " phi::errors::InvalidArgument("The shape of input[0] and input[%d] "
"is expected to be equal." "is expected to be equal."
"But received input[0]'s shape = " "But received input[0]'s shape = "
"[%s], input[%d]'s shape = [%s].", "[%s], input[%d]'s shape = [%s].",
i, i,
inputs_dims[0], inputs_dims[0],
i, i,
inputs_dims[i])); inputs_dims[i]));
for (size_t j = 0; j < in_zero_dims_size; j++) { for (size_t j = 0; j < in_zero_dims_size; j++) {
if (j == axis) { if (j == axis) {
if (is_runtime) { if (is_runtime) {
...@@ -71,7 +71,7 @@ static inline phi::DDim ComputeAndCheckShape( ...@@ -71,7 +71,7 @@ static inline phi::DDim ComputeAndCheckShape(
// check all shape in run time // check all shape in run time
PADDLE_ENFORCE_EQ(inputs_dims[0][j], PADDLE_ENFORCE_EQ(inputs_dims[0][j],
inputs_dims[i][j], inputs_dims[i][j],
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The %d-th dimension of input[0] and input[%d] " "The %d-th dimension of input[0] and input[%d] "
"is expected to be equal." "is expected to be equal."
"But received input[0]'s shape = " "But received input[0]'s shape = "
...@@ -92,4 +92,4 @@ static inline phi::DDim ComputeAndCheckShape( ...@@ -92,4 +92,4 @@ static inline phi::DDim ComputeAndCheckShape(
} }
} // namespace funcs } // namespace funcs
} // namespace pten } // namespace phi
...@@ -21,7 +21,7 @@ limitations under the License. */ ...@@ -21,7 +21,7 @@ limitations under the License. */
namespace phi { namespace phi {
// EigenDim converts paddle::platform::DDim into Eigen::DSizes. // EigenDim converts phi::DDim into Eigen::DSizes.
template <int D> template <int D>
struct EigenDim { struct EigenDim {
using Type = Eigen::DSizes<Eigen::DenseIndex, D>; using Type = Eigen::DSizes<Eigen::DenseIndex, D>;
...@@ -29,7 +29,7 @@ struct EigenDim { ...@@ -29,7 +29,7 @@ struct EigenDim {
static Type From(const DDim& dims) { static Type From(const DDim& dims) {
PADDLE_ENFORCE_EQ(arity(dims), PADDLE_ENFORCE_EQ(arity(dims),
D, D,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"Input dimension size should be equal to %d, but " "Input dimension size should be equal to %d, but "
"received dimension size is %d.", "received dimension size is %d.",
arity(dims), arity(dims),
...@@ -42,7 +42,7 @@ struct EigenDim { ...@@ -42,7 +42,7 @@ struct EigenDim {
} }
}; };
// Interpret paddle::platform::Tensor as EigenTensor and EigenConstTensor. // Interpret phi::Tensor as EigenTensor and EigenConstTensor.
template <typename T, template <typename T,
size_t D, size_t D,
int MajorType = Eigen::RowMajor, int MajorType = Eigen::RowMajor,
...@@ -86,7 +86,7 @@ struct EigenMatrix : public EigenTensor<T, 2, MajorType, IndexType> { ...@@ -86,7 +86,7 @@ struct EigenMatrix : public EigenTensor<T, 2, MajorType, IndexType> {
int rank = tensor.dims().size(); int rank = tensor.dims().size();
PADDLE_ENFORCE_EQ((num_col_dims > 0 && num_col_dims < rank), PADDLE_ENFORCE_EQ((num_col_dims > 0 && num_col_dims < rank),
true, true,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"Input dimension number(num_col_dims) must be " "Input dimension number(num_col_dims) must be "
"between 0 and %d, but received number is %d.", "between 0 and %d, but received number is %d.",
rank, rank,
...@@ -100,7 +100,7 @@ struct EigenMatrix : public EigenTensor<T, 2, MajorType, IndexType> { ...@@ -100,7 +100,7 @@ struct EigenMatrix : public EigenTensor<T, 2, MajorType, IndexType> {
int rank = tensor.dims().size(); int rank = tensor.dims().size();
PADDLE_ENFORCE_EQ((num_col_dims > 0 && num_col_dims < rank), PADDLE_ENFORCE_EQ((num_col_dims > 0 && num_col_dims < rank),
true, true,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"Input dimension number(num_col_dims) must be " "Input dimension number(num_col_dims) must be "
"between 0 and %d, but received number is %d.", "between 0 and %d, but received number is %d.",
rank, rank,
......
...@@ -343,7 +343,7 @@ inline void get_mid_dims(const DDim &x_dims, ...@@ -343,7 +343,7 @@ inline void get_mid_dims(const DDim &x_dims,
if (x_dims[i + axis] != y_dims[i]) { if (x_dims[i + axis] != y_dims[i]) {
PADDLE_ENFORCE_EQ(y_dims[i] == 1 || x_dims[i + axis] == 1, PADDLE_ENFORCE_EQ(y_dims[i] == 1 || x_dims[i + axis] == 1,
true, true,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"Broadcast dimension mismatch. Operands " "Broadcast dimension mismatch. Operands "
"could not be broadcast together with the shape of " "could not be broadcast together with the shape of "
"X = [%s] and the shape of Y = [%s]. Received [%d] " "X = [%s] and the shape of Y = [%s]. Received [%d] "
...@@ -754,7 +754,7 @@ void ElementwiseKernel(const KPDevice &ctx, ...@@ -754,7 +754,7 @@ void ElementwiseKernel(const KPDevice &ctx,
const int kArity = Traits::arity; const int kArity = Traits::arity;
PADDLE_ENFORCE_EQ(ins.size(), PADDLE_ENFORCE_EQ(ins.size(),
kArity, kArity,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The number of inputs is expected to be equal to the " "The number of inputs is expected to be equal to the "
"arity of functor. But recieved: the number of inputs " "arity of functor. But recieved: the number of inputs "
"is %d, the arity of functor is %d.", "is %d, the arity of functor is %d.",
...@@ -762,7 +762,7 @@ void ElementwiseKernel(const KPDevice &ctx, ...@@ -762,7 +762,7 @@ void ElementwiseKernel(const KPDevice &ctx,
kArity)); kArity));
PADDLE_ENFORCE_EQ(outs->size(), PADDLE_ENFORCE_EQ(outs->size(),
NumOuts, NumOuts,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"Number of outputs shall equal to number of functions, " "Number of outputs shall equal to number of functions, "
"but number of outputs is %d, of functions is %d.", "but number of outputs is %d, of functions is %d.",
outs->size(), outs->size(),
...@@ -773,7 +773,7 @@ void ElementwiseKernel(const KPDevice &ctx, ...@@ -773,7 +773,7 @@ void ElementwiseKernel(const KPDevice &ctx,
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
(*outs)[i]->dims(), (*outs)[i]->dims(),
(*outs)[0]->dims(), (*outs)[0]->dims(),
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The shape of each output tensor shall be identical yet, " "The shape of each output tensor shall be identical yet, "
"but %dth output tensor`s shape is not.", "but %dth output tensor`s shape is not.",
i)); i));
...@@ -796,7 +796,7 @@ void ElementwiseKernel(const KPDevice &ctx, ...@@ -796,7 +796,7 @@ void ElementwiseKernel(const KPDevice &ctx,
ctx, ins, outs, func); ctx, ins, outs, func);
break; break;
default: { default: {
PADDLE_THROW(paddle::platform::errors::Unimplemented( PADDLE_THROW(phi::errors::Unimplemented(
"Unsupported vectorized size: %d !", vec_size)); "Unsupported vectorized size: %d !", vec_size));
break; break;
} }
......
...@@ -184,7 +184,7 @@ struct TensorSetConstantCPU { ...@@ -184,7 +184,7 @@ struct TensorSetConstantCPU {
: tensor_(tensor), value_(value) {} : tensor_(tensor), value_(value) {}
template <typename T> template <typename T>
void apply() const { void apply() const {
auto cpu = paddle::platform::CPUPlace(); auto cpu = phi::CPUPlace();
auto* begin = tensor_->mutable_data<T>(cpu); auto* begin = tensor_->mutable_data<T>(cpu);
std::fill(begin, begin + tensor_->numel(), static_cast<T>(value_)); std::fill(begin, begin + tensor_->numel(), static_cast<T>(value_));
} }
...@@ -197,8 +197,7 @@ void set_constant_with_place<paddle::platform::XPUPlace>( ...@@ -197,8 +197,7 @@ void set_constant_with_place<paddle::platform::XPUPlace>(
const paddle::platform::DeviceContext& context, const paddle::platform::DeviceContext& context,
paddle::framework::Tensor* tensor, paddle::framework::Tensor* tensor,
float value) { float value) {
PADDLE_THROW( PADDLE_THROW(phi::errors::Unimplemented("XPUPlace is not supported"));
paddle::platform::errors::Unimplemented("XPUPlace is not supported"));
} }
template <> template <>
...@@ -206,8 +205,7 @@ void set_constant_with_place<paddle::platform::NPUPlace>( ...@@ -206,8 +205,7 @@ void set_constant_with_place<paddle::platform::NPUPlace>(
const paddle::platform::DeviceContext& context, const paddle::platform::DeviceContext& context,
paddle::framework::Tensor* tensor, paddle::framework::Tensor* tensor,
float value) { float value) {
PADDLE_THROW( PADDLE_THROW(phi::errors::Unimplemented("NPUPlace is not supported"));
paddle::platform::errors::Unimplemented("NPUPlace is not supported"));
} }
template <> template <>
...@@ -215,8 +213,7 @@ void set_constant_with_place<paddle::platform::NPUPinnedPlace>( ...@@ -215,8 +213,7 @@ void set_constant_with_place<paddle::platform::NPUPinnedPlace>(
const paddle::platform::DeviceContext& context, const paddle::platform::DeviceContext& context,
paddle::framework::Tensor* tensor, paddle::framework::Tensor* tensor,
float value) { float value) {
PADDLE_THROW(paddle::platform::errors::Unimplemented( PADDLE_THROW(phi::errors::Unimplemented("NPUPinnedPlace is not supported"));
"NPUPinnedPlace is not supported"));
} }
template <> template <>
...@@ -224,8 +221,7 @@ void set_constant_with_place<paddle::platform::IPUPlace>( ...@@ -224,8 +221,7 @@ void set_constant_with_place<paddle::platform::IPUPlace>(
const paddle::platform::DeviceContext& context, const paddle::platform::DeviceContext& context,
paddle::framework::Tensor* tensor, paddle::framework::Tensor* tensor,
float value) { float value) {
PADDLE_THROW( PADDLE_THROW(phi::errors::Unimplemented("IPUPlace is not supported"));
paddle::platform::errors::Unimplemented("IPUPlace is not supported"));
} }
template <> template <>
...@@ -233,12 +229,11 @@ void set_constant_with_place<paddle::platform::CustomPlace>( ...@@ -233,12 +229,11 @@ void set_constant_with_place<paddle::platform::CustomPlace>(
const paddle::platform::DeviceContext& context, const paddle::platform::DeviceContext& context,
paddle::framework::Tensor* tensor, paddle::framework::Tensor* tensor,
float value) { float value) {
PADDLE_THROW( PADDLE_THROW(phi::errors::Unimplemented("CustomPlace is not supported"));
paddle::platform::errors::Unimplemented("CustomPlace is not supported"));
} }
template <> template <>
void set_constant_with_place<paddle::platform::CPUPlace>( void set_constant_with_place<phi::CPUPlace>(
const paddle::platform::DeviceContext& context, const paddle::platform::DeviceContext& context,
paddle::framework::Tensor* tensor, paddle::framework::Tensor* tensor,
float value) { float value) {
...@@ -250,8 +245,7 @@ void set_constant_with_place<paddle::platform::MLUPlace>( ...@@ -250,8 +245,7 @@ void set_constant_with_place<paddle::platform::MLUPlace>(
const paddle::platform::DeviceContext& context, const paddle::platform::DeviceContext& context,
paddle::framework::Tensor* tensor, paddle::framework::Tensor* tensor,
float value) { float value) {
PADDLE_THROW( PADDLE_THROW(phi::errors::Unimplemented("MLUPlace is not supported"));
paddle::platform::errors::Unimplemented("MLUPlace is not supported"));
} }
template <> template <>
...@@ -286,7 +280,7 @@ void set_constant(const paddle::platform::DeviceContext& context, ...@@ -286,7 +280,7 @@ void set_constant(const paddle::platform::DeviceContext& context,
// tensor->place().apply_visitor(func); // tensor->place().apply_visitor(func);
paddle::platform::VisitPlace(tensor->place(), func); paddle::platform::VisitPlace(tensor->place(), func);
#else #else
func(paddle::platform::CPUPlace()); func(phi::CPUPlace());
#endif #endif
} }
...@@ -302,7 +296,7 @@ struct RowwiseAdd<paddle::platform::CPUDeviceContext, T> { ...@@ -302,7 +296,7 @@ struct RowwiseAdd<paddle::platform::CPUDeviceContext, T> {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
vector.numel(), vector.numel(),
size, size,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The input vector size" "The input vector size"
" should be equal to the size of each row of input tensor." " should be equal to the size of each row of input tensor."
" Expected vector size=%d, but received %d", " Expected vector size=%d, but received %d",
...@@ -312,7 +306,7 @@ struct RowwiseAdd<paddle::platform::CPUDeviceContext, T> { ...@@ -312,7 +306,7 @@ struct RowwiseAdd<paddle::platform::CPUDeviceContext, T> {
const char* out_dims_cstr = out_dims.to_str().c_str(); const char* out_dims_cstr = out_dims.to_str().c_str();
PADDLE_ENFORCE_EQ(out_dims, PADDLE_ENFORCE_EQ(out_dims,
in_dims, in_dims,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The output tensor shape should be same as the input" "The output tensor shape should be same as the input"
" tensor shape. Expected output tensor shape: %s," " tensor shape. Expected output tensor shape: %s,"
" but received %s", " but received %s",
......
...@@ -257,7 +257,7 @@ struct RowwiseAdd<paddle::platform::CUDADeviceContext, T> { ...@@ -257,7 +257,7 @@ struct RowwiseAdd<paddle::platform::CUDADeviceContext, T> {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
vector.numel(), vector.numel(),
size, size,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The input vector size" "The input vector size"
" should be equal to the size of each row of input tensor." " should be equal to the size of each row of input tensor."
" Expected vector size=%d, but received %d", " Expected vector size=%d, but received %d",
...@@ -268,7 +268,7 @@ struct RowwiseAdd<paddle::platform::CUDADeviceContext, T> { ...@@ -268,7 +268,7 @@ struct RowwiseAdd<paddle::platform::CUDADeviceContext, T> {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
out_dims, out_dims,
in_dims, in_dims,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The output tensor shape should be same as the input tensor" "The output tensor shape should be same as the input tensor"
" shape. Expected output tensor shape: %s," " shape. Expected output tensor shape: %s,"
" but received %s", " but received %s",
...@@ -303,7 +303,7 @@ void ColwiseSum<paddle::platform::CUDADeviceContext, double>::operator()( ...@@ -303,7 +303,7 @@ void ColwiseSum<paddle::platform::CUDADeviceContext, double>::operator()(
auto size = input.numel() / in_dims[0]; auto size = input.numel() / in_dims[0];
PADDLE_ENFORCE_EQ(vector->numel(), PADDLE_ENFORCE_EQ(vector->numel(),
size, size,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The size of input vector" "The size of input vector"
" should be equal to the size of input tensor column" " should be equal to the size of input tensor column"
" dimension. Expected vector size=%d, but received %d", " dimension. Expected vector size=%d, but received %d",
...@@ -339,7 +339,7 @@ void RowwiseSum<paddle::platform::CUDADeviceContext, double>::operator()( ...@@ -339,7 +339,7 @@ void RowwiseSum<paddle::platform::CUDADeviceContext, double>::operator()(
auto size = input.numel() / in_dims[0]; auto size = input.numel() / in_dims[0];
PADDLE_ENFORCE_EQ(vector->numel(), PADDLE_ENFORCE_EQ(vector->numel(),
in_dims[0], in_dims[0],
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The size of input vector" "The size of input vector"
" should be equal to the size of input tensor row" " should be equal to the size of input tensor row"
" dimension. Expected vector size=%d, but received %d", " dimension. Expected vector size=%d, but received %d",
......
...@@ -115,7 +115,7 @@ struct TensorSetConstantXPU { ...@@ -115,7 +115,7 @@ struct TensorSetConstantXPU {
std::fill(data_cpu.get(), data_cpu.get() + numel, static_cast<T>(value_)); std::fill(data_cpu.get(), data_cpu.get() + numel, static_cast<T>(value_));
paddle::memory::Copy(place_, paddle::memory::Copy(place_,
begin, begin,
paddle::platform::CPUPlace(), phi::CPUPlace(),
static_cast<void*>(data_cpu.get()), static_cast<void*>(data_cpu.get()),
numel * sizeof(T)); numel * sizeof(T));
} }
......
...@@ -74,7 +74,7 @@ void ColwiseSum<DeviceContext, T>::operator()( ...@@ -74,7 +74,7 @@ void ColwiseSum<DeviceContext, T>::operator()(
auto size = input.numel() / in_dims[0]; auto size = input.numel() / in_dims[0];
PADDLE_ENFORCE_EQ(out->numel(), PADDLE_ENFORCE_EQ(out->numel(),
size, size,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The size of output tensor " "The size of output tensor "
"should be equal to the size of input tensor column" "should be equal to the size of input tensor column"
" dimension. Expected output size=%d, but received %d", " dimension. Expected output size=%d, but received %d",
...@@ -102,7 +102,7 @@ class ColwiseSum<paddle::platform::CPUDeviceContext, T> { ...@@ -102,7 +102,7 @@ class ColwiseSum<paddle::platform::CPUDeviceContext, T> {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
out->numel(), out->numel(),
size, size,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The size of output tensor " "The size of output tensor "
"should be equal to the size of input tensor column" "should be equal to the size of input tensor column"
" dimension. Expected output size=%d, but received %d", " dimension. Expected output size=%d, but received %d",
...@@ -130,15 +130,14 @@ void RowwiseMean<DeviceContext, T>::operator()( ...@@ -130,15 +130,14 @@ void RowwiseMean<DeviceContext, T>::operator()(
const paddle::framework::Tensor& input, const paddle::framework::Tensor& input,
paddle::framework::Tensor* out) { paddle::framework::Tensor* out) {
auto in_dims = input.dims(); auto in_dims = input.dims();
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(in_dims.size(),
in_dims.size(), 2U,
2U, phi::errors::InvalidArgument("The rank of input tensor "
paddle::platform::errors::InvalidArgument("The rank of input tensor " "should be 2, but received %d",
"should be 2, but received %d", in_dims.size()));
in_dims.size()));
PADDLE_ENFORCE_EQ(out->numel(), PADDLE_ENFORCE_EQ(out->numel(),
in_dims[0], in_dims[0],
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The size of output tensor " "The size of output tensor "
"should be equal to the size of input tensor row" "should be equal to the size of input tensor row"
" dimension. Expected output size=%d, but received %d", " dimension. Expected output size=%d, but received %d",
...@@ -161,18 +160,18 @@ class RowwiseMean<paddle::platform::CPUDeviceContext, T> { ...@@ -161,18 +160,18 @@ class RowwiseMean<paddle::platform::CPUDeviceContext, T> {
const paddle::framework::Tensor& input, const paddle::framework::Tensor& input,
paddle::framework::Tensor* out) { paddle::framework::Tensor* out) {
auto& in_dims = input.dims(); auto& in_dims = input.dims();
PADDLE_ENFORCE_EQ(in_dims.size(), PADDLE_ENFORCE_EQ(
2U, in_dims.size(),
paddle::platform::errors::InvalidArgument( 2U,
"The rank of input tensor " phi::errors::InvalidArgument("The rank of input tensor "
"should be 2, but received %d", "should be 2, but received %d",
in_dims.size())); in_dims.size()));
auto height = in_dims[0]; auto height = in_dims[0];
auto size = in_dims[1]; auto size = in_dims[1];
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
out->numel(), out->numel(),
height, height,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The size of output tensor " "The size of output tensor "
"should be equal to the size of input tensor row" "should be equal to the size of input tensor row"
" dimension. Expected output size=%d, but received %d", " dimension. Expected output size=%d, but received %d",
...@@ -198,15 +197,14 @@ void RowwiseSum<DeviceContext, T>::operator()( ...@@ -198,15 +197,14 @@ void RowwiseSum<DeviceContext, T>::operator()(
const paddle::framework::Tensor& input, const paddle::framework::Tensor& input,
paddle::framework::Tensor* out) { paddle::framework::Tensor* out) {
auto in_dims = input.dims(); auto in_dims = input.dims();
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(in_dims.size(),
in_dims.size(), 2U,
2U, phi::errors::InvalidArgument("The rank of input tensor "
paddle::platform::errors::InvalidArgument("The rank of input tensor " "should be 2, but received %d",
"should be 2, but received %d", in_dims.size()));
in_dims.size()));
PADDLE_ENFORCE_EQ(out->numel(), PADDLE_ENFORCE_EQ(out->numel(),
in_dims[0], in_dims[0],
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The size of output tensor " "The size of output tensor "
"should be equal to the size of input tensor row" "should be equal to the size of input tensor row"
" dimension. Expected output size=%d, but received %d", " dimension. Expected output size=%d, but received %d",
...@@ -229,18 +227,18 @@ class RowwiseSum<paddle::platform::CPUDeviceContext, T> { ...@@ -229,18 +227,18 @@ class RowwiseSum<paddle::platform::CPUDeviceContext, T> {
const paddle::framework::Tensor& input, const paddle::framework::Tensor& input,
paddle::framework::Tensor* out) { paddle::framework::Tensor* out) {
auto& in_dims = input.dims(); auto& in_dims = input.dims();
PADDLE_ENFORCE_EQ(in_dims.size(), PADDLE_ENFORCE_EQ(
2U, in_dims.size(),
paddle::platform::errors::InvalidArgument( 2U,
"The rank of input tensor " phi::errors::InvalidArgument("The rank of input tensor "
"should be 2, but received %d", "should be 2, but received %d",
in_dims.size())); in_dims.size()));
auto height = in_dims[0]; auto height = in_dims[0];
auto size = in_dims[1]; auto size = in_dims[1];
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
out->numel(), out->numel(),
height, height,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The size of output tensor " "The size of output tensor "
"should be equal to the size of input tensor row" "should be equal to the size of input tensor row"
" dimension. Expected output size=%d, but received %d", " dimension. Expected output size=%d, but received %d",
......
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
#include <algorithm> #include <algorithm>
#include <vector> #include <vector>
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/operators/math/concat_and_split.h" #include "paddle/fluid/operators/math/concat_and_split.h"
#include "paddle/fluid/platform/cuda_graph_with_memory_pool.h" #include "paddle/fluid/platform/cuda_graph_with_memory_pool.h"
...@@ -329,7 +328,7 @@ void ConcatImpl(const Context& context, ...@@ -329,7 +328,7 @@ void ConcatImpl(const Context& context,
inputs_data, in_num); inputs_data, in_num);
paddle::memory::Copy(context.GetPlace(), paddle::memory::Copy(context.GetPlace(),
tmp_dev_ins_data->ptr(), tmp_dev_ins_data->ptr(),
paddle::platform::CPUPlace(), phi::CPUPlace(),
restored, restored,
in_num * sizeof(T*), in_num * sizeof(T*),
context.stream()); context.stream());
...@@ -376,7 +375,7 @@ void ConcatImpl(const Context& context, ...@@ -376,7 +375,7 @@ void ConcatImpl(const Context& context,
inputs_col, inputs_col_num); inputs_col, inputs_col_num);
paddle::memory::Copy(context.GetPlace(), paddle::memory::Copy(context.GetPlace(),
tmp_dev_ins_col_data->ptr(), tmp_dev_ins_col_data->ptr(),
paddle::platform::CPUPlace(), phi::CPUPlace(),
restored, restored,
inputs_col_num * sizeof(int64_t), inputs_col_num * sizeof(int64_t),
context.stream()); context.stream());
...@@ -488,7 +487,7 @@ void SplitImpl(const Context& context, ...@@ -488,7 +487,7 @@ void SplitImpl(const Context& context,
outputs_data, o_num); outputs_data, o_num);
paddle::memory::Copy(context.GetPlace(), paddle::memory::Copy(context.GetPlace(),
tmp_dev_outs_data->ptr(), tmp_dev_outs_data->ptr(),
paddle::platform::CPUPlace(), phi::CPUPlace(),
restored, restored,
o_num * sizeof(T*), o_num * sizeof(T*),
context.stream()); context.stream());
...@@ -535,7 +534,7 @@ void SplitImpl(const Context& context, ...@@ -535,7 +534,7 @@ void SplitImpl(const Context& context,
outputs_cols, outputs_cols_num); outputs_cols, outputs_cols_num);
paddle::memory::Copy(context.GetPlace(), paddle::memory::Copy(context.GetPlace(),
tmp_dev_ins_col_data->ptr(), tmp_dev_ins_col_data->ptr(),
paddle::platform::CPUPlace(), phi::CPUPlace(),
restored, restored,
outputs_cols_num * sizeof(int64_t), outputs_cols_num * sizeof(int64_t),
context.stream()); context.stream());
......
...@@ -54,7 +54,7 @@ void ConcatKernel(const Context& dev_ctx, ...@@ -54,7 +54,7 @@ void ConcatKernel(const Context& dev_ctx,
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
x[i].lod().size(), x[i].lod().size(),
lod_size_0, lod_size_0,
paddle::platform::errors::Unimplemented( phi::errors::Unimplemented(
"The lod level of all input LoDTensors should be same. " "The lod level of all input LoDTensors should be same. "
"Maybe different lod level of input LoDTensors can concat," "Maybe different lod level of input LoDTensors can concat,"
"it is not supported currently. The lod level of %dth input " "it is not supported currently. The lod level of %dth input "
......
...@@ -35,7 +35,7 @@ void Copy(const Context& dev_ctx, ...@@ -35,7 +35,7 @@ void Copy(const Context& dev_ctx,
auto dst_place = dst->place(); auto dst_place = dst->place();
if (src_place == dst_place && paddle::platform::is_cpu_place(src_place)) { if (src_place == dst_place && paddle::platform::is_cpu_place(src_place)) {
PADDLE_THROW(paddle::platform::errors::InvalidArgument( PADDLE_THROW(phi::errors::InvalidArgument(
"The src and dst tensor are all CPU tensor, you should call copy " "The src and dst tensor are all CPU tensor, you should call copy "
"function in CPU mode.")); "function in CPU mode."));
} }
...@@ -74,13 +74,13 @@ void Copy(const Context& dev_ctx, ...@@ -74,13 +74,13 @@ void Copy(const Context& dev_ctx,
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
paddle::platform::is_gpu_place(ctx_place), paddle::platform::is_gpu_place(ctx_place),
true, true,
paddle::platform::errors::PreconditionNotMet( phi::errors::PreconditionNotMet(
"Context place error, excepted GPUPlace, but actually %s.", "Context place error, excepted GPUPlace, but actually %s.",
ctx_place)); ctx_place));
auto ctx_gpu_place = ctx_place; auto ctx_gpu_place = ctx_place;
PADDLE_ENFORCE_EQ(src_gpu_place, PADDLE_ENFORCE_EQ(src_gpu_place,
ctx_gpu_place, ctx_gpu_place,
paddle::platform::errors::Unavailable( phi::errors::Unavailable(
"Source place and context place do not match, source " "Source place and context place do not match, source "
"place is %s, context place is %s.", "place is %s, context place is %s.",
src_gpu_place, src_gpu_place,
...@@ -98,13 +98,13 @@ void Copy(const Context& dev_ctx, ...@@ -98,13 +98,13 @@ void Copy(const Context& dev_ctx,
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
paddle::platform::is_gpu_place(ctx_place), paddle::platform::is_gpu_place(ctx_place),
true, true,
paddle::platform::errors::PreconditionNotMet( phi::errors::PreconditionNotMet(
"Context place error, excepted GPUPlace, but actually %s.", "Context place error, excepted GPUPlace, but actually %s.",
ctx_place)); ctx_place));
auto ctx_gpu_place = ctx_place; auto ctx_gpu_place = ctx_place;
PADDLE_ENFORCE_EQ(dst_gpu_place, PADDLE_ENFORCE_EQ(dst_gpu_place,
ctx_gpu_place, ctx_gpu_place,
paddle::platform::errors::Unavailable( phi::errors::Unavailable(
"Destination place and context place do not match, " "Destination place and context place do not match, "
"destination place is %s, context place is %s.", "destination place is %s, context place is %s.",
dst_gpu_place, dst_gpu_place,
...@@ -121,14 +121,14 @@ void Copy(const Context& dev_ctx, ...@@ -121,14 +121,14 @@ void Copy(const Context& dev_ctx,
auto ctx_place = dev_ctx.GetPlace(); auto ctx_place = dev_ctx.GetPlace();
PADDLE_ENFORCE_EQ(paddle::platform::is_gpu_place(ctx_place), PADDLE_ENFORCE_EQ(paddle::platform::is_gpu_place(ctx_place),
true, true,
paddle::platform::errors::PreconditionNotMet( phi::errors::PreconditionNotMet(
"Device context place mismatch. When copying Tensor " "Device context place mismatch. When copying Tensor "
"data from GPU memory to CUDA Pinned memory, current " "data from GPU memory to CUDA Pinned memory, current "
"device context place should be GPU.")); "device context place should be GPU."));
auto ctx_gpu_place = ctx_place; auto ctx_gpu_place = ctx_place;
PADDLE_ENFORCE_EQ(src_gpu_place, PADDLE_ENFORCE_EQ(src_gpu_place,
ctx_gpu_place, ctx_gpu_place,
paddle::platform::errors::PreconditionNotMet( phi::errors::PreconditionNotMet(
"The source GPU device and current device context do " "The source GPU device and current device context do "
"not match. The source GPU device number is %d, but " "not match. The source GPU device number is %d, but "
"device context GPU number is %d.", "device context GPU number is %d.",
...@@ -146,14 +146,14 @@ void Copy(const Context& dev_ctx, ...@@ -146,14 +146,14 @@ void Copy(const Context& dev_ctx,
auto ctx_place = dev_ctx.GetPlace(); auto ctx_place = dev_ctx.GetPlace();
PADDLE_ENFORCE_EQ(paddle::platform::is_gpu_place(ctx_place), PADDLE_ENFORCE_EQ(paddle::platform::is_gpu_place(ctx_place),
true, true,
paddle::platform::errors::PreconditionNotMet( phi::errors::PreconditionNotMet(
"Device context place mismatch. When copying Tensor " "Device context place mismatch. When copying Tensor "
"data from CUDA Pinned memory to GPU memory, current " "data from CUDA Pinned memory to GPU memory, current "
"device context place should be GPU.")); "device context place should be GPU."));
auto ctx_gpu_place = ctx_place; auto ctx_gpu_place = ctx_place;
PADDLE_ENFORCE_EQ(dst_gpu_place, PADDLE_ENFORCE_EQ(dst_gpu_place,
ctx_gpu_place, ctx_gpu_place,
paddle::platform::errors::PreconditionNotMet( phi::errors::PreconditionNotMet(
"The target GPU device and current device context do " "The target GPU device and current device context do "
"not match. The target GPU device number is %d, but " "not match. The target GPU device number is %d, but "
"device context GPU number is %d.", "device context GPU number is %d.",
...@@ -172,7 +172,7 @@ void Copy(const Context& dev_ctx, ...@@ -172,7 +172,7 @@ void Copy(const Context& dev_ctx,
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
paddle::platform::is_gpu_place(ctx_place), paddle::platform::is_gpu_place(ctx_place),
true, true,
paddle::platform::errors::PreconditionNotMet( phi::errors::PreconditionNotMet(
"Context place error, excepted GPUPlace, but actually %s.", "Context place error, excepted GPUPlace, but actually %s.",
ctx_place)); ctx_place));
auto stream = auto stream =
...@@ -195,12 +195,12 @@ void Copy(const Context& dev_ctx, ...@@ -195,12 +195,12 @@ void Copy(const Context& dev_ctx,
paddle::memory::Copy( paddle::memory::Copy(
dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream); dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream);
} else { } else {
PADDLE_THROW(paddle::platform::errors::Unavailable( PADDLE_THROW(phi::errors::Unavailable(
"Context place dose not match the source and destination place.")); "Context place dose not match the source and destination place."));
} }
} }
} else { } else {
PADDLE_THROW(paddle::platform::errors::InvalidArgument( PADDLE_THROW(phi::errors::InvalidArgument(
"Place type error. Please check the place of src and dst Tensor.")); "Place type error. Please check the place of src and dst Tensor."));
} }
} }
......
...@@ -714,7 +714,7 @@ void CommonGradBroadcastCUDA(const DenseTensor &x, ...@@ -714,7 +714,7 @@ void CommonGradBroadcastCUDA(const DenseTensor &x,
DX_OP dx_op, DX_OP dx_op,
DY_OP dy_op) { DY_OP dy_op) {
const auto gplace = ctx.GetPlace(); const auto gplace = ctx.GetPlace();
auto cplace = paddle::platform::CPUPlace(); auto cplace = phi::CPUPlace();
const T *x_data = x.data<T>(); const T *x_data = x.data<T>();
const T *y_data = y.data<T>(); const T *y_data = y.data<T>();
const Tout *out_data = out.data<Tout>(); const Tout *out_data = out.data<Tout>();
...@@ -1339,12 +1339,12 @@ void ElemwiseGradComputeWithBroadcast(const GPUContext &ctx, ...@@ -1339,12 +1339,12 @@ void ElemwiseGradComputeWithBroadcast(const GPUContext &ctx,
PADDLE_ENFORCE_GE( PADDLE_ENFORCE_GE(
axis, axis,
0, 0,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"Axis should be great than or equal to 0, but received axis is %d.", "Axis should be great than or equal to 0, but received axis is %d.",
axis)); axis));
PADDLE_ENFORCE_LT(axis, PADDLE_ENFORCE_LT(axis,
max_dim, max_dim,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"Axis should be less than %d, but received axis is %d.", "Axis should be less than %d, but received axis is %d.",
max_dim, max_dim,
axis)); axis));
......
...@@ -111,9 +111,9 @@ void HistogramKernel(const Context& dev_ctx, ...@@ -111,9 +111,9 @@ void HistogramKernel(const Context& dev_ctx,
DenseTensor input_min_cpu, input_max_cpu; DenseTensor input_min_cpu, input_max_cpu;
paddle::framework::TensorCopySync( paddle::framework::TensorCopySync(
input_min_t, paddle::platform::CPUPlace(), &input_min_cpu); input_min_t, phi::CPUPlace(), &input_min_cpu);
paddle::framework::TensorCopySync( paddle::framework::TensorCopySync(
input_max_t, paddle::platform::CPUPlace(), &input_max_cpu); input_max_t, phi::CPUPlace(), &input_max_cpu);
output_min = input_min_cpu.data<T>()[0]; output_min = input_min_cpu.data<T>()[0];
output_max = input_max_cpu.data<T>()[0]; output_max = input_max_cpu.data<T>()[0];
......
...@@ -59,7 +59,7 @@ void FullLikeKernel(const Context& dev_ctx, ...@@ -59,7 +59,7 @@ void FullLikeKernel(const Context& dev_ctx,
(common_type_value <= (common_type_value <=
static_cast<CommonType>(std::numeric_limits<T>::max())), static_cast<CommonType>(std::numeric_limits<T>::max())),
true, true,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The filled value is out of range for target type, " "The filled value is out of range for target type, "
"current kernel type is %s, the range should between %f " "current kernel type is %s, the range should between %f "
"and %f, but now value is %f.", "and %f, but now value is %f.",
......
...@@ -38,7 +38,7 @@ static void GetBroadcastFromDims(const int x_ndim, ...@@ -38,7 +38,7 @@ static void GetBroadcastFromDims(const int x_ndim,
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
x_bd_dims[i] == y_bd_dims[i] || x_bd_dims[i] <= 1 || y_bd_dims[i] <= 1, x_bd_dims[i] == y_bd_dims[i] || x_bd_dims[i] <= 1 || y_bd_dims[i] <= 1,
true, true,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"Input(X) and Input(Y) has error dim." "Input(X) and Input(Y) has error dim."
"X_broadcast's shape[%s] must be equal to Y_broadcast's shape[%s]," "X_broadcast's shape[%s] must be equal to Y_broadcast's shape[%s],"
"or X_broadcast's shape[%s] <= 1, or Y_broadcast's shape[%s] <= 1," "or X_broadcast's shape[%s] <= 1, or Y_broadcast's shape[%s] <= 1,"
...@@ -110,7 +110,7 @@ void MatMulFunction(const Context& dev_ctx, ...@@ -110,7 +110,7 @@ void MatMulFunction(const Context& dev_ctx,
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
M, M,
N, N,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"X's numbers must be equal to Y's numbers," "X's numbers must be equal to Y's numbers,"
"when X/Y's dims =1. But received X has [%d] elements," "when X/Y's dims =1. But received X has [%d] elements,"
"received Y has [%d] elements", "received Y has [%d] elements",
...@@ -135,27 +135,27 @@ void MatMulFunction(const Context& dev_ctx, ...@@ -135,27 +135,27 @@ void MatMulFunction(const Context& dev_ctx,
if (x_ndim == 1) { if (x_ndim == 1) {
const int N = X.numel(); const int N = X.numel();
if (trans_y) { if (trans_y) {
PADDLE_ENFORCE_EQ(y_dims[y_ndim - 1], PADDLE_ENFORCE_EQ(
N, y_dims[y_ndim - 1],
paddle::platform::errors::InvalidArgument( N,
"Input(Y) has error dim." phi::errors::InvalidArgument("Input(Y) has error dim."
"Y'dims[%d] must be equal to %d" "Y'dims[%d] must be equal to %d"
"But received Y'dims[%d] is %d", "But received Y'dims[%d] is %d",
y_ndim - 1, y_ndim - 1,
N, N,
y_ndim - 1, y_ndim - 1,
y_dims[y_ndim - 1])); y_dims[y_ndim - 1]));
} else { } else {
PADDLE_ENFORCE_EQ(y_dims[y_ndim - 2], PADDLE_ENFORCE_EQ(
N, y_dims[y_ndim - 2],
paddle::platform::errors::InvalidArgument( N,
"Input(Y) has error dim." phi::errors::InvalidArgument("Input(Y) has error dim."
"Y'dims[%d] must be equal to %d" "Y'dims[%d] must be equal to %d"
"But received Y'dims[%d] is %d", "But received Y'dims[%d] is %d",
y_ndim - 2, y_ndim - 2,
N, N,
y_ndim - 2, y_ndim - 2,
y_dims[y_ndim - 2])); y_dims[y_ndim - 2]));
} }
std::vector<std::int64_t> out_dims(y_ndim - 1); std::vector<std::int64_t> out_dims(y_ndim - 1);
if (trans_y) { if (trans_y) {
...@@ -213,27 +213,27 @@ void MatMulFunction(const Context& dev_ctx, ...@@ -213,27 +213,27 @@ void MatMulFunction(const Context& dev_ctx,
if (y_ndim == 1) { if (y_ndim == 1) {
const int N = Y.numel(); const int N = Y.numel();
if (trans_x) { if (trans_x) {
PADDLE_ENFORCE_EQ(x_dims[x_ndim - 2], PADDLE_ENFORCE_EQ(
N, x_dims[x_ndim - 2],
paddle::platform::errors::InvalidArgument( N,
"Input(X) has error dim." phi::errors::InvalidArgument("Input(X) has error dim."
"X'dims[%d] must be equal to %d" "X'dims[%d] must be equal to %d"
"But received X'dims[%d] is %d", "But received X'dims[%d] is %d",
x_ndim - 2, x_ndim - 2,
N, N,
x_ndim - 2, x_ndim - 2,
x_dims[x_ndim - 2])); x_dims[x_ndim - 2]));
} else { } else {
PADDLE_ENFORCE_EQ(x_dims[x_ndim - 1], PADDLE_ENFORCE_EQ(
N, x_dims[x_ndim - 1],
paddle::platform::errors::InvalidArgument( N,
"Input(X) has error dim." phi::errors::InvalidArgument("Input(X) has error dim."
"X'dims[%d] must be equal to %d" "X'dims[%d] must be equal to %d"
"But received X'dims[%d] is %d", "But received X'dims[%d] is %d",
x_ndim - 1, x_ndim - 1,
N, N,
x_ndim - 1, x_ndim - 1,
x_dims[x_ndim - 1])); x_dims[x_ndim - 1]));
} }
std::vector<std::int64_t> out_dims(x_ndim - 1); std::vector<std::int64_t> out_dims(x_ndim - 1);
if (trans_x) { if (trans_x) {
...@@ -292,27 +292,27 @@ void MatMulFunction(const Context& dev_ctx, ...@@ -292,27 +292,27 @@ void MatMulFunction(const Context& dev_ctx,
const int M = trans_x ? x_dims[x_ndim - 1] : x_dims[x_ndim - 2]; const int M = trans_x ? x_dims[x_ndim - 1] : x_dims[x_ndim - 2];
const int K = trans_x ? x_dims[x_ndim - 2] : x_dims[x_ndim - 1]; const int K = trans_x ? x_dims[x_ndim - 2] : x_dims[x_ndim - 1];
if (trans_y) { if (trans_y) {
PADDLE_ENFORCE_EQ(y_dims[y_ndim - 1], PADDLE_ENFORCE_EQ(
K, y_dims[y_ndim - 1],
paddle::platform::errors::InvalidArgument( K,
"Input(Y) has error dim." phi::errors::InvalidArgument("Input(Y) has error dim."
"Y'dims[%d] must be equal to %d" "Y'dims[%d] must be equal to %d"
"But received Y'dims[%d] is %d", "But received Y'dims[%d] is %d",
y_ndim - 1, y_ndim - 1,
K, K,
y_ndim - 1, y_ndim - 1,
y_dims[y_ndim - 1])); y_dims[y_ndim - 1]));
} else { } else {
PADDLE_ENFORCE_EQ(y_dims[y_ndim - 2], PADDLE_ENFORCE_EQ(
K, y_dims[y_ndim - 2],
paddle::platform::errors::InvalidArgument( K,
"Input(Y) has error dim." phi::errors::InvalidArgument("Input(Y) has error dim."
"Y'dims[%d] must be equal to %d" "Y'dims[%d] must be equal to %d"
"But received Y'dims[%d] is %d", "But received Y'dims[%d] is %d",
y_ndim - 2, y_ndim - 2,
K, K,
y_ndim - 2, y_ndim - 2,
y_dims[y_ndim - 2])); y_dims[y_ndim - 2]));
} }
const int N = trans_y ? y_dims[y_ndim - 2] : y_dims[y_ndim - 1]; const int N = trans_y ? y_dims[y_ndim - 2] : y_dims[y_ndim - 1];
const int ndim = (std::max)(x_ndim, y_ndim); const int ndim = (std::max)(x_ndim, y_ndim);
...@@ -493,16 +493,16 @@ void MatmulKernel(const Context& dev_ctx, ...@@ -493,16 +493,16 @@ void MatmulKernel(const Context& dev_ctx,
bool transpose_x, bool transpose_x,
bool transpose_y, bool transpose_y,
DenseTensor* out) { DenseTensor* out) {
PADDLE_ENFORCE_NE(phi::product(x.dims()), PADDLE_ENFORCE_NE(
0, phi::product(x.dims()),
paddle::platform::errors::InvalidArgument( 0,
"The Input(X) dims size must not be equal 0," phi::errors::InvalidArgument("The Input(X) dims size must not be equal 0,"
" but reviced dims size is 0. ")); " but reviced dims size is 0. "));
PADDLE_ENFORCE_NE(phi::product(y.dims()), PADDLE_ENFORCE_NE(
0, phi::product(y.dims()),
paddle::platform::errors::InvalidArgument( 0,
"The Input(Y) dims size must not be equal 0," phi::errors::InvalidArgument("The Input(Y) dims size must not be equal 0,"
" but reviced dims size is 0. ")); " but reviced dims size is 0. "));
MatMulFunction<Context, T>(dev_ctx, x, y, out, transpose_x, transpose_y); MatMulFunction<Context, T>(dev_ctx, x, y, out, transpose_x, transpose_y);
} }
......
...@@ -41,7 +41,7 @@ inline int64_t GetNonZeroNum(const DenseTensor& dense, ...@@ -41,7 +41,7 @@ inline int64_t GetNonZeroNum(const DenseTensor& dense,
PADDLE_ENFORCE_GE( PADDLE_ENFORCE_GE(
dims.size(), dims.size(),
sparse_dim, sparse_dim,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"sparse_dim(%d) should be less than or equal to dense.dim(%d)", "sparse_dim(%d) should be less than or equal to dense.dim(%d)",
sparse_dim, sparse_dim,
dims.size())); dims.size()));
...@@ -161,7 +161,7 @@ void SparseCooToCsrKernel(const Context& dev_ctx, ...@@ -161,7 +161,7 @@ void SparseCooToCsrKernel(const Context& dev_ctx,
bool valid = x_dims.size() == 2 || x_dims.size() == 3; bool valid = x_dims.size() == 2 || x_dims.size() == 3;
PADDLE_ENFORCE_EQ(valid, PADDLE_ENFORCE_EQ(valid,
true, true,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"SparseCsrTensor only support 2-D or 3-D matrix")); "SparseCsrTensor only support 2-D or 3-D matrix"));
const int64_t non_zero_num = x.nnz(); const int64_t non_zero_num = x.nnz();
if (non_zero_num <= 0) return; if (non_zero_num <= 0) return;
......
...@@ -379,7 +379,7 @@ void SparseCooToCsrKernel(const Context& dev_ctx, ...@@ -379,7 +379,7 @@ void SparseCooToCsrKernel(const Context& dev_ctx,
bool valid = x_dims.size() == 2 || x_dims.size() == 3; bool valid = x_dims.size() == 2 || x_dims.size() == 3;
PADDLE_ENFORCE_EQ(valid, PADDLE_ENFORCE_EQ(valid,
true, true,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"SparseCsrTensor only support 2-D or 3-D matrix")); "SparseCsrTensor only support 2-D or 3-D matrix"));
const int64_t non_zero_num = x.nnz(); const int64_t non_zero_num = x.nnz();
if (non_zero_num <= 0) return; if (non_zero_num <= 0) return;
......
...@@ -97,7 +97,7 @@ void DenseToSparseCsrKernel(const Context& dev_ctx, ...@@ -97,7 +97,7 @@ void DenseToSparseCsrKernel(const Context& dev_ctx,
bool valid = x_dims.size() == 2 || x_dims.size() == 3; bool valid = x_dims.size() == 2 || x_dims.size() == 3;
PADDLE_ENFORCE_EQ(valid, PADDLE_ENFORCE_EQ(valid,
true, true,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"SparseCsrTensor only support 2-D or 3-D Tensor.")); "SparseCsrTensor only support 2-D or 3-D Tensor."));
const int64_t sparse_dim = x_dims.size() == 2 ? 2 : 3; const int64_t sparse_dim = x_dims.size() == 2 ? 2 : 3;
DenseTensor indices = phi::Empty<T, Context>(dev_ctx); DenseTensor indices = phi::Empty<T, Context>(dev_ctx);
......
...@@ -62,7 +62,7 @@ void Copy(const Context& dev_ctx, ...@@ -62,7 +62,7 @@ void Copy(const Context& dev_ctx,
} }
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
} else { } else {
PADDLE_THROW(paddle::platform::errors::Unimplemented( PADDLE_THROW(phi::errors::Unimplemented(
"Copy from %s to %s is not supported.", src_place, dst_place)); "Copy from %s to %s is not supported.", src_place, dst_place));
} }
} }
......
...@@ -32,13 +32,13 @@ void ScaleKernel(const Context& dev_ctx, ...@@ -32,13 +32,13 @@ void ScaleKernel(const Context& dev_ctx,
DenseTensor* out) { DenseTensor* out) {
out->mutable_data<T>(dev_ctx.GetPlace()); out->mutable_data<T>(dev_ctx.GetPlace());
PADDLE_ENFORCE_EQ(x.dims(), PADDLE_ENFORCE_EQ(
out->dims(), x.dims(),
paddle::platform::errors::InvalidArgument( out->dims(),
"In and out should have the same dim," phi::errors::InvalidArgument("In and out should have the same dim,"
" expected %s, but got %s.", " expected %s, but got %s.",
x.dims().to_str().c_str(), x.dims().to_str().c_str(),
out->dims().to_str().c_str())); out->dims().to_str().c_str()));
using XPUType = typename XPUTypeTrait<T>::Type; using XPUType = typename XPUTypeTrait<T>::Type;
int r = xpu::scale(dev_ctx.x_context(), int r = xpu::scale(dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(x.data<T>()), reinterpret_cast<const XPUType*>(x.data<T>()),
...@@ -50,7 +50,7 @@ void ScaleKernel(const Context& dev_ctx, ...@@ -50,7 +50,7 @@ void ScaleKernel(const Context& dev_ctx,
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
r, r,
XPU_SUCCESS, XPU_SUCCESS,
paddle::platform::errors::External( phi::errors::External(
"XPU scale kernel return wrong value[%d %s]", r, XPUAPIErrorMsg[r])); "XPU scale kernel return wrong value[%d %s]", r, XPUAPIErrorMsg[r]));
} }
......
...@@ -29,8 +29,7 @@ class FancyAllocator : public phi::Allocator { ...@@ -29,8 +29,7 @@ class FancyAllocator : public phi::Allocator {
AllocationPtr Allocate(size_t bytes_size) override { AllocationPtr Allocate(size_t bytes_size) override {
void* data = ::operator new(bytes_size); void* data = ::operator new(bytes_size);
auto* allocation = auto* allocation = new phi::Allocation(data, bytes_size, phi::CPUPlace());
new phi::Allocation(data, bytes_size, paddle::platform::CPUPlace());
return AllocationPtr(allocation, Delete); return AllocationPtr(allocation, Delete);
} }
}; };
......
...@@ -85,7 +85,7 @@ TEST(dense_tensor, ctor) { ...@@ -85,7 +85,7 @@ TEST(dense_tensor, ctor) {
r = r && (t.dims() == m.dims); r = r && (t.dims() == m.dims);
r = r && (t.dtype() == m.dtype); r = r && (t.dtype() == m.dtype);
r = r && (t.layout() == m.layout); r = r && (t.layout() == m.layout);
r = r && (t.place() == paddle::platform::CPUPlace()); r = r && (t.place() == phi::CPUPlace());
r = r && t.initialized(); r = r && t.initialized();
r = r && t.IsSharedWith(t); r = r && t.IsSharedWith(t);
return r; return r;
......
...@@ -53,7 +53,7 @@ TEST(sparse_coo_tensor, construct) { ...@@ -53,7 +53,7 @@ TEST(sparse_coo_tensor, construct) {
CHECK(sparse.dims() == dense_dims); CHECK(sparse.dims() == dense_dims);
CHECK(sparse.dtype() == DataType::FLOAT32); CHECK(sparse.dtype() == DataType::FLOAT32);
CHECK(sparse.layout() == DataLayout::SPARSE_COO); CHECK(sparse.layout() == DataLayout::SPARSE_COO);
CHECK(sparse.place() == paddle::platform::CPUPlace()); CHECK(sparse.place() == phi::CPUPlace());
} }
TEST(sparse_coo_tensor, other_function) { TEST(sparse_coo_tensor, other_function) {
......
...@@ -133,6 +133,8 @@ ...@@ -133,6 +133,8 @@
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include "paddle/utils/string/to_string.h"
namespace paddle { namespace paddle {
namespace string { namespace string {
namespace tinyformat { namespace tinyformat {
......
...@@ -56,5 +56,26 @@ inline std::string to_string(const char* v) { ...@@ -56,5 +56,26 @@ inline std::string to_string(const char* v) {
return std::string(v); return std::string(v);
} }
inline std::ostream& operator<<(std::ostream& os,
const std::vector<std::vector<size_t>>& lod) {
os << "{";
for (auto& v : lod) {
os << "{";
bool is_first = true;
for (auto& i : v) {
if (is_first) {
os << i;
is_first = false;
} else {
os << ", " << i;
}
}
os << "}";
}
os << "}";
return os;
}
} // namespace string } // namespace string
} // namespace paddle } // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册