未验证 提交 bbe879fc 编写于 作者: Z Zhanlue Yang 提交者: GitHub

[Unify Tensors PR #2] Replaced pten::LoD with paddle::framework::LoD (#38275)

* Replaced pten::LoD with paddle::framework::LoD

* Overrided CPUVector with CUDAVector

* Refactored paddle::framework::Vector
上级 6ff3596e
......@@ -91,15 +91,16 @@ endif()
cc_test(copy_same_tensor_test SRCS copy_same_tensor_test.cc DEPS tensor)
cc_test(eigen_test SRCS eigen_test.cc DEPS tensor)
cc_library(mixed_vector SRCS mixed_vector.cc DEPS device_context)
if(WITH_GPU)
nv_test(mixed_vector_test SRCS mixed_vector_test.cc mixed_vector_test.cu DEPS place memory device_context tensor)
nv_test(mixed_vector_test SRCS mixed_vector_test.cc mixed_vector_test.cu DEPS mixed_vector place memory device_context tensor)
elseif(WITH_ROCM)
hip_test(mixed_vector_test SRCS mixed_vector_test.cc mixed_vector_test.cu DEPS place memory device_context tensor)
hip_test(mixed_vector_test SRCS mixed_vector_test.cc mixed_vector_test.cu DEPS mixed_vector place memory device_context tensor)
else()
cc_test(mixed_vector_test SRCS mixed_vector_test.cc DEPS place memory device_context tensor)
cc_test(mixed_vector_test SRCS mixed_vector_test.cc DEPS mixed_vector place memory device_context tensor)
endif()
cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto version)
cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim mixed_vector place tensor framework_proto version)
cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor memory)
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/mixed_vector.h"
#include <algorithm>
#include <initializer_list>
#include <memory>
#include <mutex> // NOLINT
#include <utility>
#include <vector>
#include "glog/logging.h"
#include "paddle/fluid/framework/details/cow_ptr.h"
#include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/utils/none.h"
#include "paddle/utils/optional.h"
namespace paddle {
namespace framework {
template <typename T>
void CopyToCPUHelper(std::vector<T> *cpu_, paddle::memory::AllocationPtr *gpu_,
size_t *gpu_memory_size_) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
// COPY GPU Data To CPU
auto *dev_ctx = static_cast<platform::CUDADeviceContext *>(
platform::DeviceContextPool::Instance().Get((*gpu_)->place()));
auto stream = dev_ctx->stream();
void *src = (*gpu_)->ptr();
void *dst = cpu_->data();
paddle::memory::Copy(platform::CPUPlace(), dst,
OptionalCUDAPlace(*gpu_).get(), src, *gpu_memory_size_,
stream);
dev_ctx->Wait();
#endif
}
template <typename T>
void CopyCPUDataToCUDAHelper(std::vector<T> *cpu_,
paddle::memory::AllocationPtr *gpu_,
size_t *gpu_memory_size_,
const platform::Place &place) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
void *src = cpu_->data();
*gpu_memory_size_ = cpu_->size() * sizeof(T); // sizeof(T)
(*gpu_) = memory::Alloc(place, *gpu_memory_size_);
void *dst = (*gpu_)->ptr();
auto *dev_ctx = static_cast<platform::CUDADeviceContext *>(
platform::DeviceContextPool::Instance().Get(place));
auto stream = dev_ctx->stream();
paddle::memory::Copy(OptionalCUDAPlace(*gpu_).get(), dst,
platform::CPUPlace(), src, *gpu_memory_size_, stream);
#endif
}
#define INSTANTIATE_VECTOR_FOR_TYPE(__TYPE__) \
template <> \
void Vector<__TYPE__>::VectorData::CopyToCPU() const { \
CopyToCPUHelper<__TYPE__>(&cpu_, &gpu_, &gpu_memory_size_); \
} \
\
template <> \
void Vector<__TYPE__>::VectorData::CopyCPUDataToCUDA( \
const platform::Place &place) const { \
CopyCPUDataToCUDAHelper<__TYPE__>(&cpu_, &gpu_, &gpu_memory_size_, place); \
}
INSTANTIATE_VECTOR_FOR_TYPE(size_t)
INSTANTIATE_VECTOR_FOR_TYPE(int)
INSTANTIATE_VECTOR_FOR_TYPE(int64_t)
}; // namespace framework
} // namespace paddle
......@@ -23,17 +23,21 @@ limitations under the License. */
#include "glog/logging.h"
#include "paddle/fluid/framework/details/cow_ptr.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/utils/none.h"
#include "paddle/utils/optional.h"
namespace paddle {
namespace framework {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
inline paddle::optional<platform::CUDAPlace> OptionalCUDAPlace(
const paddle::memory::allocation::AllocationPtr &gpu_) {
return gpu_ == nullptr
? paddle::none
: paddle::optional<platform::CUDAPlace>(
BOOST_GET_CONST(platform::CUDAPlace, gpu_->place()));
}
// Vector<T> implements the std::vector interface, and can get Data or
// MutableData from any place. The data will be synced implicitly inside.
template <typename T>
......@@ -198,10 +202,7 @@ class Vector {
std::mutex &Mutex() const { return mtx_; }
paddle::optional<platform::CUDAPlace> CUDAPlace() const {
return gpu_ == nullptr
? paddle::none
: paddle::optional<platform::CUDAPlace>(
BOOST_GET_CONST(platform::CUDAPlace, gpu_->place()));
return OptionalCUDAPlace(gpu_);
}
private:
......@@ -212,17 +213,7 @@ class Vector {
kDirty = 0x10
};
void CopyToCPU() const {
// COPY GPU Data To CPU
auto *dev_ctx = static_cast<platform::CUDADeviceContext *>(
platform::DeviceContextPool::Instance().Get(gpu_->place()));
auto stream = dev_ctx->stream();
void *src = gpu_->ptr();
void *dst = cpu_.data();
paddle::memory::Copy(platform::CPUPlace(), dst, CUDAPlace().get(), src,
gpu_memory_size_, stream);
dev_ctx->Wait();
}
void CopyToCPU() const;
void MutableCPU() {
if (IsInCUDA() && IsDirty()) {
......@@ -260,17 +251,7 @@ class Vector {
}
}
void CopyCPUDataToCUDA(const platform::Place &place) const {
void *src = cpu_.data();
gpu_memory_size_ = cpu_.size() * sizeof(T);
gpu_ = memory::Alloc(place, gpu_memory_size_);
void *dst = gpu_->ptr();
auto *dev_ctx = static_cast<platform::CUDADeviceContext *>(
platform::DeviceContextPool::Instance().Get(place));
auto stream = dev_ctx->stream();
paddle::memory::Copy(CUDAPlace().get(), dst, platform::CPUPlace(), src,
gpu_memory_size_, stream);
}
void CopyCPUDataToCUDA(const platform::Place &place) const;
void ImmutableCPU() const {
if (IsDirty() && !IsInCPU()) { // If data has been changed in CUDA, or
......@@ -291,7 +272,7 @@ class Vector {
bool IsInCPU() const { return flag_ & kDataInCPU; }
mutable std::vector<T> cpu_;
mutable paddle::memory::AllocationPtr gpu_;
mutable paddle::memory::allocation::AllocationPtr gpu_;
mutable size_t gpu_memory_size_{0};
mutable int flag_;
......@@ -465,81 +446,5 @@ class Vector {
mutable details::COWPtr<VectorData> m_;
};
#else // PADDLE_WITH_CUDA
template <typename T>
class CPUVector : public std::vector<T, std::allocator<T>> {
public:
CPUVector() : std::vector<T>() {}
CPUVector(size_t count, const T &value = T()) // NOLINT
: std::vector<T>(count, value) {}
CPUVector(std::initializer_list<T> init) : std::vector<T>(init) {}
CPUVector(const std::vector<T> &other) : std::vector<T>(other) {} // NOLINT
CPUVector(const CPUVector<T> &other) : std::vector<T>(other) {}
CPUVector(CPUVector<T> &&other) : std::vector<T>(std::move(other)) {}
CPUVector(std::vector<T> &&other) // NOLINT
: std::vector<T>(std::move(other)) {}
CPUVector &operator=(const CPUVector &other) {
this->assign(other.begin(), other.end());
return *this;
}
CPUVector &operator=(const std::vector<T> &other) {
this->assign(other.begin(), other.end());
return *this;
}
friend std::ostream &operator<<(std::ostream &os, const CPUVector<T> &other) {
std::stringstream ss;
for (auto v : other) {
os << v << " ";
}
return os;
}
T &operator[](size_t id) { return this->at(id); }
const T &operator[](size_t id) const { return this->at(id); }
template <typename D>
void Extend(const D &begin, const D &end) {
this->reserve(this->size() + size_t(end - begin));
this->insert(this->end(), begin, end);
}
const T *CUDAData(platform::Place place) const {
PADDLE_THROW(platform::errors::Unavailable(
"Vector::CUDAData() method is not supported in CPU-only version."));
}
T *CUDAMutableData(platform::Place place) {
PADDLE_THROW(platform::errors::Unavailable(
"Vector::CUDAMutableData() method is not supported in CPU-only "
"version."));
}
const T *Data(platform::Place place) const {
PADDLE_ENFORCE_EQ(
platform::is_cpu_place(place), true,
platform::errors::Unavailable(
"Vector::Data() method is not supported when not in CPUPlace."));
return this->data();
}
T *MutableData(platform::Place place) {
PADDLE_ENFORCE_EQ(
platform::is_cpu_place(place), true,
platform::errors::Unavailable("Vector::MutableData() method is not "
"supported when not in CPUPlace."));
return this->data();
}
const void *Handle() const { return static_cast<const void *>(this); }
};
template <typename T>
using Vector = CPUVector<T>;
#endif // PADDLE_WITH_CUDA
}; // namespace framework
} // namespace paddle
......@@ -25,6 +25,7 @@
#include "gtest/gtest.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/device_context.h"
template <typename T>
using vec = paddle::framework::Vector<T>;
......
......@@ -31,13 +31,9 @@ namespace operators {
using Tensor = framework::Tensor;
using SelectedRows = framework::SelectedRows;
using LoDTensor = framework::LoDTensor;
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
template <typename T>
using Vector = framework::Vector<T>;
#else
template <typename T>
using Vector = framework::CPUVector<T>;
#endif
template <typename T>
class FilterByInstagKernel : public framework::OpKernel<T> {
......
......@@ -33,13 +33,9 @@ namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
template <typename T>
using Vector = framework::Vector<T>;
#else
template <typename T>
using Vector = framework::CPUVector<T>;
#endif
template <typename T>
class ShuffleBatchKernel : public framework::OpKernel<T> {
......
......@@ -8,7 +8,7 @@ endif()
cc_library(kernel_factory SRCS kernel_factory.cc DEPS enforce convert_utils)
cc_library(kernel_context SRCS kernel_context.cc DEPS enforce pten_context)
cc_library(tensor_base SRCS tensor_base.cc allocator.cc storage.cc DEPS enforce)
cc_library(tensor_meta SRCS tensor_meta.cc DEPS enforce)
cc_library(tensor_meta SRCS tensor_meta.cc DEPS enforce mixed_vector)
cc_library(dense_tensor SRCS dense_tensor.cc DEPS tensor_meta tensor_base)
......@@ -83,9 +83,7 @@ class DenseTensor : public TensorBase,
/// \brief Returns the lod of the tensor.
/// \return The lod of the tensor.
const std::vector<std::vector<size_t>>& lod() const noexcept {
return meta_.lod;
}
const LoD& lod() const noexcept { return meta_.lod; }
/// \brief Returns the data type of the tensor.
/// \return The data type of the tensor.
......
......@@ -27,7 +27,7 @@ DenseTensorMeta::DenseTensorMeta(DataType dtype,
DenseTensorMeta::DenseTensorMeta(DataType dtype,
const DDim& dims,
DataLayout layout,
const std::vector<std::vector<size_t>>& lod)
const LoD& lod)
: dims(dims), dtype(dtype), layout(layout), lod(lod) {}
bool DenseTensorMeta::valid() const noexcept {
......
......@@ -22,15 +22,16 @@ limitations under the License. */
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/framework/ddim.h"
// Note: mixed_vector include many header now, LoD will be
// used on CUDA device? Can we use small_vector here?
// #include "paddle/fluid/framework/mixed_vector.h"
// @zhanlve: Rollback to original LoD for now
#include "paddle/fluid/framework/mixed_vector.h"
namespace pten {
using DDim = paddle::framework::DDim;
using LoD = std::vector<std::vector<size_t>>;
using LoD = std::vector<paddle::framework::Vector<size_t>>;
/// \brief The meta data of dense tensor. Take the structure type
/// and use all default operations.
///
......@@ -44,7 +45,7 @@ struct DenseTensorMeta {
DenseTensorMeta(DataType dtype,
const DDim& dims,
DataLayout layout,
const std::vector<std::vector<size_t>>& lod);
const LoD& lod);
/// \brief Test whether the metadata is valid. Does not throw exceptions.
/// \return Whether the metadata is valid.
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#include "gtest/gtest.h"
#include "paddle/pten/api/lib/utils/tensor_utils.h"
#include "paddle/pten/core/tensor_meta.h"
namespace paddle {
namespace tests {
......@@ -30,7 +31,7 @@ TEST(tensor_utils, dense_tensor_to_lod_tensor) {
const DDim dims({2, 1});
const DataType dtype{DataType::FLOAT32};
const DataLayout layout{DataLayout::NCHW};
const std::vector<std::vector<size_t>> lod{{0, 2}};
const pten::LoD lod{{0, 2}};
DenseTensorMeta meta(dtype, dims, layout, lod);
auto alloc =
......@@ -46,7 +47,7 @@ TEST(tensor_utils, dense_tensor_to_lod_tensor) {
CHECK(dense_tensor.lod().size() == lod_tensor.lod().size());
CHECK(dense_tensor.lod()[0] ==
static_cast<std::vector<size_t>>((lod_tensor.lod()[0])));
static_cast<paddle::framework::Vector<size_t>>((lod_tensor.lod()[0])));
CHECK(dense_tensor.dtype() == pten::TransToPtenDataType(lod_tensor.type()));
CHECK(dense_tensor.layout() ==
pten::TransToPtenDataLayout(lod_tensor.layout()));
......
......@@ -25,7 +25,7 @@ TEST(dense_tensor, meta) {
const DataType dtype{DataType::INT8};
const DataLayout layout{DataLayout::NHWC};
// TODO(Shixiaowei02): need to check the lod is valid.
const std::vector<std::vector<size_t>> lod{};
const LoD lod{};
DenseTensorMeta meta_0;
CHECK(!meta_0.valid());
......@@ -72,7 +72,7 @@ TEST(dense_tensor, ctor) {
const DDim dims({1, 2});
const DataType dtype{DataType::INT8};
const DataLayout layout{DataLayout::NHWC};
const std::vector<std::vector<size_t>> lod{};
const LoD lod{};
DenseTensorMeta meta(dtype, dims, layout, lod);
auto alloc = std::make_shared<FancyAllocator>();
......@@ -106,7 +106,7 @@ TEST(dense_tensor, resize) {
const DDim dims({1, 2});
const DataType dtype{DataType::INT8};
const DataLayout layout{DataLayout::NHWC};
const std::vector<std::vector<size_t>> lod{};
const LoD lod{};
DenseTensorMeta meta(dtype, dims, layout, lod);
auto alloc = std::make_shared<FancyAllocator>();
......@@ -126,7 +126,7 @@ TEST(dense_tensor, shallow_copy) {
const DDim dims({1, 2});
const DataType dtype{DataType::INT8};
const DataLayout layout{DataLayout::NHWC};
const std::vector<std::vector<size_t>> lod{};
const LoD lod{};
DenseTensorMeta meta(dtype, dims, layout, lod);
auto alloc = std::make_shared<FancyAllocator>();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册