未验证 提交 bbe879fc 编写于 作者: Z Zhanlue Yang 提交者: GitHub

[Unify Tensors PR #2] Replaced pten::LoD with paddle::framework::LoD (#38275)

* Replaced pten::LoD with paddle::framework::LoD

* Overrided CPUVector with CUDAVector

* Refactored paddle::framework::Vector
上级 6ff3596e
...@@ -91,15 +91,16 @@ endif() ...@@ -91,15 +91,16 @@ endif()
cc_test(copy_same_tensor_test SRCS copy_same_tensor_test.cc DEPS tensor) cc_test(copy_same_tensor_test SRCS copy_same_tensor_test.cc DEPS tensor)
cc_test(eigen_test SRCS eigen_test.cc DEPS tensor) cc_test(eigen_test SRCS eigen_test.cc DEPS tensor)
cc_library(mixed_vector SRCS mixed_vector.cc DEPS device_context)
if(WITH_GPU) if(WITH_GPU)
nv_test(mixed_vector_test SRCS mixed_vector_test.cc mixed_vector_test.cu DEPS place memory device_context tensor) nv_test(mixed_vector_test SRCS mixed_vector_test.cc mixed_vector_test.cu DEPS mixed_vector place memory device_context tensor)
elseif(WITH_ROCM) elseif(WITH_ROCM)
hip_test(mixed_vector_test SRCS mixed_vector_test.cc mixed_vector_test.cu DEPS place memory device_context tensor) hip_test(mixed_vector_test SRCS mixed_vector_test.cc mixed_vector_test.cu DEPS mixed_vector place memory device_context tensor)
else() else()
cc_test(mixed_vector_test SRCS mixed_vector_test.cc DEPS place memory device_context tensor) cc_test(mixed_vector_test SRCS mixed_vector_test.cc DEPS mixed_vector place memory device_context tensor)
endif() endif()
cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto version) cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim mixed_vector place tensor framework_proto version)
cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor memory) cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor memory)
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/mixed_vector.h"
#include <algorithm>
#include <initializer_list>
#include <memory>
#include <mutex> // NOLINT
#include <utility>
#include <vector>
#include "glog/logging.h"
#include "paddle/fluid/framework/details/cow_ptr.h"
#include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/utils/none.h"
#include "paddle/utils/optional.h"
namespace paddle {
namespace framework {
template <typename T>
void CopyToCPUHelper(std::vector<T> *cpu_, paddle::memory::AllocationPtr *gpu_,
size_t *gpu_memory_size_) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
// COPY GPU Data To CPU
auto *dev_ctx = static_cast<platform::CUDADeviceContext *>(
platform::DeviceContextPool::Instance().Get((*gpu_)->place()));
auto stream = dev_ctx->stream();
void *src = (*gpu_)->ptr();
void *dst = cpu_->data();
paddle::memory::Copy(platform::CPUPlace(), dst,
OptionalCUDAPlace(*gpu_).get(), src, *gpu_memory_size_,
stream);
dev_ctx->Wait();
#endif
}
template <typename T>
void CopyCPUDataToCUDAHelper(std::vector<T> *cpu_,
paddle::memory::AllocationPtr *gpu_,
size_t *gpu_memory_size_,
const platform::Place &place) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
void *src = cpu_->data();
*gpu_memory_size_ = cpu_->size() * sizeof(T); // sizeof(T)
(*gpu_) = memory::Alloc(place, *gpu_memory_size_);
void *dst = (*gpu_)->ptr();
auto *dev_ctx = static_cast<platform::CUDADeviceContext *>(
platform::DeviceContextPool::Instance().Get(place));
auto stream = dev_ctx->stream();
paddle::memory::Copy(OptionalCUDAPlace(*gpu_).get(), dst,
platform::CPUPlace(), src, *gpu_memory_size_, stream);
#endif
}
#define INSTANTIATE_VECTOR_FOR_TYPE(__TYPE__) \
template <> \
void Vector<__TYPE__>::VectorData::CopyToCPU() const { \
CopyToCPUHelper<__TYPE__>(&cpu_, &gpu_, &gpu_memory_size_); \
} \
\
template <> \
void Vector<__TYPE__>::VectorData::CopyCPUDataToCUDA( \
const platform::Place &place) const { \
CopyCPUDataToCUDAHelper<__TYPE__>(&cpu_, &gpu_, &gpu_memory_size_, place); \
}
INSTANTIATE_VECTOR_FOR_TYPE(size_t)
INSTANTIATE_VECTOR_FOR_TYPE(int)
INSTANTIATE_VECTOR_FOR_TYPE(int64_t)
}; // namespace framework
} // namespace paddle
...@@ -23,17 +23,21 @@ limitations under the License. */ ...@@ -23,17 +23,21 @@ limitations under the License. */
#include "glog/logging.h" #include "glog/logging.h"
#include "paddle/fluid/framework/details/cow_ptr.h" #include "paddle/fluid/framework/details/cow_ptr.h"
#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/utils/none.h" #include "paddle/utils/none.h"
#include "paddle/utils/optional.h" #include "paddle/utils/optional.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) inline paddle::optional<platform::CUDAPlace> OptionalCUDAPlace(
const paddle::memory::allocation::AllocationPtr &gpu_) {
return gpu_ == nullptr
? paddle::none
: paddle::optional<platform::CUDAPlace>(
BOOST_GET_CONST(platform::CUDAPlace, gpu_->place()));
}
// Vector<T> implements the std::vector interface, and can get Data or // Vector<T> implements the std::vector interface, and can get Data or
// MutableData from any place. The data will be synced implicitly inside. // MutableData from any place. The data will be synced implicitly inside.
template <typename T> template <typename T>
...@@ -198,10 +202,7 @@ class Vector { ...@@ -198,10 +202,7 @@ class Vector {
std::mutex &Mutex() const { return mtx_; } std::mutex &Mutex() const { return mtx_; }
paddle::optional<platform::CUDAPlace> CUDAPlace() const { paddle::optional<platform::CUDAPlace> CUDAPlace() const {
return gpu_ == nullptr return OptionalCUDAPlace(gpu_);
? paddle::none
: paddle::optional<platform::CUDAPlace>(
BOOST_GET_CONST(platform::CUDAPlace, gpu_->place()));
} }
private: private:
...@@ -212,17 +213,7 @@ class Vector { ...@@ -212,17 +213,7 @@ class Vector {
kDirty = 0x10 kDirty = 0x10
}; };
void CopyToCPU() const { void CopyToCPU() const;
// COPY GPU Data To CPU
auto *dev_ctx = static_cast<platform::CUDADeviceContext *>(
platform::DeviceContextPool::Instance().Get(gpu_->place()));
auto stream = dev_ctx->stream();
void *src = gpu_->ptr();
void *dst = cpu_.data();
paddle::memory::Copy(platform::CPUPlace(), dst, CUDAPlace().get(), src,
gpu_memory_size_, stream);
dev_ctx->Wait();
}
void MutableCPU() { void MutableCPU() {
if (IsInCUDA() && IsDirty()) { if (IsInCUDA() && IsDirty()) {
...@@ -260,17 +251,7 @@ class Vector { ...@@ -260,17 +251,7 @@ class Vector {
} }
} }
void CopyCPUDataToCUDA(const platform::Place &place) const { void CopyCPUDataToCUDA(const platform::Place &place) const;
void *src = cpu_.data();
gpu_memory_size_ = cpu_.size() * sizeof(T);
gpu_ = memory::Alloc(place, gpu_memory_size_);
void *dst = gpu_->ptr();
auto *dev_ctx = static_cast<platform::CUDADeviceContext *>(
platform::DeviceContextPool::Instance().Get(place));
auto stream = dev_ctx->stream();
paddle::memory::Copy(CUDAPlace().get(), dst, platform::CPUPlace(), src,
gpu_memory_size_, stream);
}
void ImmutableCPU() const { void ImmutableCPU() const {
if (IsDirty() && !IsInCPU()) { // If data has been changed in CUDA, or if (IsDirty() && !IsInCPU()) { // If data has been changed in CUDA, or
...@@ -291,7 +272,7 @@ class Vector { ...@@ -291,7 +272,7 @@ class Vector {
bool IsInCPU() const { return flag_ & kDataInCPU; } bool IsInCPU() const { return flag_ & kDataInCPU; }
mutable std::vector<T> cpu_; mutable std::vector<T> cpu_;
mutable paddle::memory::AllocationPtr gpu_; mutable paddle::memory::allocation::AllocationPtr gpu_;
mutable size_t gpu_memory_size_{0}; mutable size_t gpu_memory_size_{0};
mutable int flag_; mutable int flag_;
...@@ -465,81 +446,5 @@ class Vector { ...@@ -465,81 +446,5 @@ class Vector {
mutable details::COWPtr<VectorData> m_; mutable details::COWPtr<VectorData> m_;
}; };
#else // PADDLE_WITH_CUDA
template <typename T>
class CPUVector : public std::vector<T, std::allocator<T>> {
public:
CPUVector() : std::vector<T>() {}
CPUVector(size_t count, const T &value = T()) // NOLINT
: std::vector<T>(count, value) {}
CPUVector(std::initializer_list<T> init) : std::vector<T>(init) {}
CPUVector(const std::vector<T> &other) : std::vector<T>(other) {} // NOLINT
CPUVector(const CPUVector<T> &other) : std::vector<T>(other) {}
CPUVector(CPUVector<T> &&other) : std::vector<T>(std::move(other)) {}
CPUVector(std::vector<T> &&other) // NOLINT
: std::vector<T>(std::move(other)) {}
CPUVector &operator=(const CPUVector &other) {
this->assign(other.begin(), other.end());
return *this;
}
CPUVector &operator=(const std::vector<T> &other) {
this->assign(other.begin(), other.end());
return *this;
}
friend std::ostream &operator<<(std::ostream &os, const CPUVector<T> &other) {
std::stringstream ss;
for (auto v : other) {
os << v << " ";
}
return os;
}
T &operator[](size_t id) { return this->at(id); }
const T &operator[](size_t id) const { return this->at(id); }
template <typename D>
void Extend(const D &begin, const D &end) {
this->reserve(this->size() + size_t(end - begin));
this->insert(this->end(), begin, end);
}
const T *CUDAData(platform::Place place) const {
PADDLE_THROW(platform::errors::Unavailable(
"Vector::CUDAData() method is not supported in CPU-only version."));
}
T *CUDAMutableData(platform::Place place) {
PADDLE_THROW(platform::errors::Unavailable(
"Vector::CUDAMutableData() method is not supported in CPU-only "
"version."));
}
const T *Data(platform::Place place) const {
PADDLE_ENFORCE_EQ(
platform::is_cpu_place(place), true,
platform::errors::Unavailable(
"Vector::Data() method is not supported when not in CPUPlace."));
return this->data();
}
T *MutableData(platform::Place place) {
PADDLE_ENFORCE_EQ(
platform::is_cpu_place(place), true,
platform::errors::Unavailable("Vector::MutableData() method is not "
"supported when not in CPUPlace."));
return this->data();
}
const void *Handle() const { return static_cast<const void *>(this); }
};
template <typename T>
using Vector = CPUVector<T>;
#endif // PADDLE_WITH_CUDA
}; // namespace framework }; // namespace framework
} // namespace paddle } // namespace paddle
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/device_context.h"
template <typename T> template <typename T>
using vec = paddle::framework::Vector<T>; using vec = paddle::framework::Vector<T>;
......
...@@ -31,13 +31,9 @@ namespace operators { ...@@ -31,13 +31,9 @@ namespace operators {
using Tensor = framework::Tensor; using Tensor = framework::Tensor;
using SelectedRows = framework::SelectedRows; using SelectedRows = framework::SelectedRows;
using LoDTensor = framework::LoDTensor; using LoDTensor = framework::LoDTensor;
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
template <typename T> template <typename T>
using Vector = framework::Vector<T>; using Vector = framework::Vector<T>;
#else
template <typename T>
using Vector = framework::CPUVector<T>;
#endif
template <typename T> template <typename T>
class FilterByInstagKernel : public framework::OpKernel<T> { class FilterByInstagKernel : public framework::OpKernel<T> {
......
...@@ -33,13 +33,9 @@ namespace paddle { ...@@ -33,13 +33,9 @@ namespace paddle {
namespace operators { namespace operators {
using Tensor = framework::Tensor; using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor; using LoDTensor = framework::LoDTensor;
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
template <typename T> template <typename T>
using Vector = framework::Vector<T>; using Vector = framework::Vector<T>;
#else
template <typename T>
using Vector = framework::CPUVector<T>;
#endif
template <typename T> template <typename T>
class ShuffleBatchKernel : public framework::OpKernel<T> { class ShuffleBatchKernel : public framework::OpKernel<T> {
......
...@@ -8,7 +8,7 @@ endif() ...@@ -8,7 +8,7 @@ endif()
cc_library(kernel_factory SRCS kernel_factory.cc DEPS enforce convert_utils) cc_library(kernel_factory SRCS kernel_factory.cc DEPS enforce convert_utils)
cc_library(kernel_context SRCS kernel_context.cc DEPS enforce pten_context) cc_library(kernel_context SRCS kernel_context.cc DEPS enforce pten_context)
cc_library(tensor_base SRCS tensor_base.cc allocator.cc storage.cc DEPS enforce) cc_library(tensor_base SRCS tensor_base.cc allocator.cc storage.cc DEPS enforce)
cc_library(tensor_meta SRCS tensor_meta.cc DEPS enforce) cc_library(tensor_meta SRCS tensor_meta.cc DEPS enforce mixed_vector)
cc_library(dense_tensor SRCS dense_tensor.cc DEPS tensor_meta tensor_base) cc_library(dense_tensor SRCS dense_tensor.cc DEPS tensor_meta tensor_base)
...@@ -83,9 +83,7 @@ class DenseTensor : public TensorBase, ...@@ -83,9 +83,7 @@ class DenseTensor : public TensorBase,
/// \brief Returns the lod of the tensor. /// \brief Returns the lod of the tensor.
/// \return The lod of the tensor. /// \return The lod of the tensor.
const std::vector<std::vector<size_t>>& lod() const noexcept { const LoD& lod() const noexcept { return meta_.lod; }
return meta_.lod;
}
/// \brief Returns the data type of the tensor. /// \brief Returns the data type of the tensor.
/// \return The data type of the tensor. /// \return The data type of the tensor.
......
...@@ -27,7 +27,7 @@ DenseTensorMeta::DenseTensorMeta(DataType dtype, ...@@ -27,7 +27,7 @@ DenseTensorMeta::DenseTensorMeta(DataType dtype,
DenseTensorMeta::DenseTensorMeta(DataType dtype, DenseTensorMeta::DenseTensorMeta(DataType dtype,
const DDim& dims, const DDim& dims,
DataLayout layout, DataLayout layout,
const std::vector<std::vector<size_t>>& lod) const LoD& lod)
: dims(dims), dtype(dtype), layout(layout), lod(lod) {} : dims(dims), dtype(dtype), layout(layout), lod(lod) {}
bool DenseTensorMeta::valid() const noexcept { bool DenseTensorMeta::valid() const noexcept {
......
...@@ -22,15 +22,16 @@ limitations under the License. */ ...@@ -22,15 +22,16 @@ limitations under the License. */
// See Note [ Why still include the fluid headers? ] // See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/ddim.h"
// Note: mixed_vector include many header now, LoD will be // Note: mixed_vector include many header now, LoD will be
// used on CUDA device? Can we use small_vector here? // used on CUDA device? Can we use small_vector here?
// #include "paddle/fluid/framework/mixed_vector.h" // @zhanlve: Rollback to original LoD for now
#include "paddle/fluid/framework/mixed_vector.h"
namespace pten { namespace pten {
using DDim = paddle::framework::DDim; using DDim = paddle::framework::DDim;
using LoD = std::vector<std::vector<size_t>>; using LoD = std::vector<paddle::framework::Vector<size_t>>;
/// \brief The meta data of dense tensor. Take the structure type /// \brief The meta data of dense tensor. Take the structure type
/// and use all default operations. /// and use all default operations.
/// ///
...@@ -44,7 +45,7 @@ struct DenseTensorMeta { ...@@ -44,7 +45,7 @@ struct DenseTensorMeta {
DenseTensorMeta(DataType dtype, DenseTensorMeta(DataType dtype,
const DDim& dims, const DDim& dims,
DataLayout layout, DataLayout layout,
const std::vector<std::vector<size_t>>& lod); const LoD& lod);
/// \brief Test whether the metadata is valid. Does not throw exceptions. /// \brief Test whether the metadata is valid. Does not throw exceptions.
/// \return Whether the metadata is valid. /// \return Whether the metadata is valid.
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/pten/api/lib/utils/tensor_utils.h" #include "paddle/pten/api/lib/utils/tensor_utils.h"
#include "paddle/pten/core/tensor_meta.h"
namespace paddle { namespace paddle {
namespace tests { namespace tests {
...@@ -30,7 +31,7 @@ TEST(tensor_utils, dense_tensor_to_lod_tensor) { ...@@ -30,7 +31,7 @@ TEST(tensor_utils, dense_tensor_to_lod_tensor) {
const DDim dims({2, 1}); const DDim dims({2, 1});
const DataType dtype{DataType::FLOAT32}; const DataType dtype{DataType::FLOAT32};
const DataLayout layout{DataLayout::NCHW}; const DataLayout layout{DataLayout::NCHW};
const std::vector<std::vector<size_t>> lod{{0, 2}}; const pten::LoD lod{{0, 2}};
DenseTensorMeta meta(dtype, dims, layout, lod); DenseTensorMeta meta(dtype, dims, layout, lod);
auto alloc = auto alloc =
...@@ -46,7 +47,7 @@ TEST(tensor_utils, dense_tensor_to_lod_tensor) { ...@@ -46,7 +47,7 @@ TEST(tensor_utils, dense_tensor_to_lod_tensor) {
CHECK(dense_tensor.lod().size() == lod_tensor.lod().size()); CHECK(dense_tensor.lod().size() == lod_tensor.lod().size());
CHECK(dense_tensor.lod()[0] == CHECK(dense_tensor.lod()[0] ==
static_cast<std::vector<size_t>>((lod_tensor.lod()[0]))); static_cast<paddle::framework::Vector<size_t>>((lod_tensor.lod()[0])));
CHECK(dense_tensor.dtype() == pten::TransToPtenDataType(lod_tensor.type())); CHECK(dense_tensor.dtype() == pten::TransToPtenDataType(lod_tensor.type()));
CHECK(dense_tensor.layout() == CHECK(dense_tensor.layout() ==
pten::TransToPtenDataLayout(lod_tensor.layout())); pten::TransToPtenDataLayout(lod_tensor.layout()));
......
...@@ -25,7 +25,7 @@ TEST(dense_tensor, meta) { ...@@ -25,7 +25,7 @@ TEST(dense_tensor, meta) {
const DataType dtype{DataType::INT8}; const DataType dtype{DataType::INT8};
const DataLayout layout{DataLayout::NHWC}; const DataLayout layout{DataLayout::NHWC};
// TODO(Shixiaowei02): need to check the lod is valid. // TODO(Shixiaowei02): need to check the lod is valid.
const std::vector<std::vector<size_t>> lod{}; const LoD lod{};
DenseTensorMeta meta_0; DenseTensorMeta meta_0;
CHECK(!meta_0.valid()); CHECK(!meta_0.valid());
...@@ -72,7 +72,7 @@ TEST(dense_tensor, ctor) { ...@@ -72,7 +72,7 @@ TEST(dense_tensor, ctor) {
const DDim dims({1, 2}); const DDim dims({1, 2});
const DataType dtype{DataType::INT8}; const DataType dtype{DataType::INT8};
const DataLayout layout{DataLayout::NHWC}; const DataLayout layout{DataLayout::NHWC};
const std::vector<std::vector<size_t>> lod{}; const LoD lod{};
DenseTensorMeta meta(dtype, dims, layout, lod); DenseTensorMeta meta(dtype, dims, layout, lod);
auto alloc = std::make_shared<FancyAllocator>(); auto alloc = std::make_shared<FancyAllocator>();
...@@ -106,7 +106,7 @@ TEST(dense_tensor, resize) { ...@@ -106,7 +106,7 @@ TEST(dense_tensor, resize) {
const DDim dims({1, 2}); const DDim dims({1, 2});
const DataType dtype{DataType::INT8}; const DataType dtype{DataType::INT8};
const DataLayout layout{DataLayout::NHWC}; const DataLayout layout{DataLayout::NHWC};
const std::vector<std::vector<size_t>> lod{}; const LoD lod{};
DenseTensorMeta meta(dtype, dims, layout, lod); DenseTensorMeta meta(dtype, dims, layout, lod);
auto alloc = std::make_shared<FancyAllocator>(); auto alloc = std::make_shared<FancyAllocator>();
...@@ -126,7 +126,7 @@ TEST(dense_tensor, shallow_copy) { ...@@ -126,7 +126,7 @@ TEST(dense_tensor, shallow_copy) {
const DDim dims({1, 2}); const DDim dims({1, 2});
const DataType dtype{DataType::INT8}; const DataType dtype{DataType::INT8};
const DataLayout layout{DataLayout::NHWC}; const DataLayout layout{DataLayout::NHWC};
const std::vector<std::vector<size_t>> lod{}; const LoD lod{};
DenseTensorMeta meta(dtype, dims, layout, lod); DenseTensorMeta meta(dtype, dims, layout, lod);
auto alloc = std::make_shared<FancyAllocator>(); auto alloc = std::make_shared<FancyAllocator>();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册