未验证 提交 7a171e3c 编写于 作者: Z zyfncg 提交者: GitHub

[Phi] Remove shared_storage (#42821)

* remove shared_storage

* fix bug

* fix rnn bug
上级 155fe05b
......@@ -90,9 +90,7 @@ void ScaleAPI(const paddle::experimental::Tensor& x, float scale, float bias,
size_t bytes_size =
phi::product(dense_tensor->dims()) * SizeOf(dense_tensor->dtype());
auto dense_out = std::make_shared<phi::DenseTensor>(
phi::make_intrusive<paddle::experimental::SharedStorage>(
paddle::memory::Alloc(place, bytes_size)),
std::move(tensor_meta));
paddle::memory::Alloc(place, bytes_size), std::move(tensor_meta));
// Handle Device Context
const paddle::platform::Place& expected_kernel_place =
Controller::Instance().GetExpectedPlace();
......
......@@ -50,9 +50,7 @@ paddle::experimental::Tensor hook_function(
auto place = t_dense->place();
size_t bytes_size = phi::product(t_dense->dims()) * SizeOf(t_dense->dtype());
auto ret_dense = std::make_shared<phi::DenseTensor>(
phi::make_intrusive<paddle::experimental::SharedStorage>(
paddle::memory::Alloc(place, bytes_size)),
std::move(ret_meta));
paddle::memory::Alloc(place, bytes_size), std::move(ret_meta));
float* t_ptr = t_dense->mutable_data<float>(place);
float* ret_ptr = ret_dense->mutable_data<float>(place);
......
......@@ -46,9 +46,7 @@ paddle::experimental::Tensor hook_function(
auto place = t_dense->place();
size_t bytes_size = phi::product(t_dense->dims()) * SizeOf(t_dense->dtype());
auto ret_dense = std::make_shared<phi::DenseTensor>(
phi::make_intrusive<paddle::experimental::SharedStorage>(
paddle::memory::Alloc(place, bytes_size)),
std::move(ret_meta));
paddle::memory::Alloc(place, bytes_size), std::move(ret_meta));
float* t_ptr = t_dense->mutable_data<float>(place);
float* ret_ptr = ret_dense->mutable_data<float>(place);
......
......@@ -46,9 +46,7 @@ paddle::experimental::Tensor hook_function(
auto place = t_dense->place();
size_t bytes_size = phi::product(t_dense->dims()) * SizeOf(t_dense->dtype());
auto ret_dense = std::make_shared<phi::DenseTensor>(
phi::make_intrusive<paddle::experimental::SharedStorage>(
paddle::memory::Alloc(place, bytes_size)),
std::move(ret_meta));
paddle::memory::Alloc(place, bytes_size), std::move(ret_meta));
float* t_ptr = t_dense->mutable_data<float>(place);
float* ret_ptr = ret_dense->mutable_data<float>(place);
......
......@@ -84,7 +84,7 @@ void EmptyTensorInitializer(TensorObject* self, const std::string& name,
} else {
// TODO(dev): we need enhance check for ddims.
dense_tensor = std::make_shared<phi::DenseTensor>(
phi::make_intrusive<paddle::experimental::SharedStorage>(place),
std::make_shared<phi::Allocation>(),
phi::DenseTensorMeta(paddle::framework::TransToPhiDataType(dtype),
ddims));
}
......
......@@ -65,14 +65,10 @@ Tensor to_sparse_coo_impl(const Tensor& x, const int64_t sparse_dim) {
// 5. Prepare outputs
// create empty SparseCooTensor
phi::DenseTensor non_zero_indices(
phi::make_intrusive<paddle::experimental::SharedStorage>(
phi::TransToPhiPlace(kernel_key.backend())),
std::move(indices_meta));
phi::DenseTensor non_zero_elements(
phi::make_intrusive<paddle::experimental::SharedStorage>(
phi::TransToPhiPlace(kernel_key.backend())),
std::move(elements_meta));
phi::DenseTensor non_zero_indices(std::make_shared<phi::Allocation>(),
std::move(indices_meta));
phi::DenseTensor non_zero_elements(std::make_shared<phi::Allocation>(),
std::move(elements_meta));
auto coo = std::make_shared<phi::SparseCooTensor>(
non_zero_indices, non_zero_elements, x.dims());
......@@ -127,18 +123,12 @@ Tensor to_sparse_csr_impl(const Tensor& x) {
// 5. Prepare outputs
// create empty SparseCooTensor
phi::DenseTensor non_zero_crows(
phi::make_intrusive<paddle::experimental::SharedStorage>(
phi::TransToPhiPlace(kernel_key.backend())),
std::move(crows_meta));
phi::DenseTensor non_zero_cols(
phi::make_intrusive<paddle::experimental::SharedStorage>(
phi::TransToPhiPlace(kernel_key.backend())),
std::move(cols_meta));
phi::DenseTensor non_zero_elements(
phi::make_intrusive<paddle::experimental::SharedStorage>(
phi::TransToPhiPlace(kernel_key.backend())),
std::move(elements_meta));
phi::DenseTensor non_zero_crows(std::make_shared<phi::Allocation>(),
std::move(crows_meta));
phi::DenseTensor non_zero_cols(std::make_shared<phi::Allocation>(),
std::move(cols_meta));
phi::DenseTensor non_zero_elements(std::make_shared<phi::Allocation>(),
std::move(elements_meta));
auto csr = std::make_shared<phi::SparseCsrTensor>(
non_zero_crows, non_zero_cols, non_zero_elements, x.dims());
......@@ -192,9 +182,7 @@ Tensor to_dense_impl(const Tensor& x) {
// 5. Prepare outputs
// create empty SparseCooTensor
auto dense_out = std::make_shared<phi::DenseTensor>(
phi::make_intrusive<paddle::experimental::SharedStorage>(
phi::TransToPhiPlace(kernel_key.backend())),
std::move(dense_meta));
std::make_shared<phi::Allocation>(), std::move(dense_meta));
kernel_context.EmplaceBackOutput(dense_out.get());
Tensor out;
......
......@@ -65,79 +65,6 @@ class ExternalStorage : public phi::Storage {
int64_t size_{0};
};
class SharedStorage : public phi::Storage {
public:
explicit SharedStorage(
const std::shared_ptr<paddle::memory::Allocation>& allocation)
: Storage(allocation) {
CHECK(allocation);
place_ = allocation->place();
size_ = allocation->size();
}
// In order to be compatible with the original Tensor design and execution
// system, we need to allow the uninitialized SharedStorage to exist,
// and it can be removed after the compatibility phase is over in the future
explicit SharedStorage(const phi::Place& place) { place_ = place; }
void Realloc(size_t n) override {
this->Clear();
data_ = paddle::memory::AllocShared(place(), n);
size_ = n;
}
static const char* name() { return "SharedStorage"; }
void Clear() override {
data_ = nullptr;
size_ = 0;
}
void set_data_shared(
const std::shared_ptr<paddle::memory::Allocation>& holder) override {
data_ = holder;
if (holder) {
size_ = holder->size();
place_ = holder->place();
}
}
std::shared_ptr<paddle::memory::Allocation>&& move_data_shared() override {
size_ = 0;
place_ = phi::Place();
return std::move(data_);
}
size_t size() const noexcept override {
return data_ ? data_->size() : size_;
}
const phi::Place& place() const override {
return data_ ? data_->place() : place_;
}
bool OwnsMemory() const noexcept override { return false; }
const std::shared_ptr<paddle::memory::Allocation>& GetAllocation() {
return data_;
}
// Temporary method: For compatible with fluid Tensor and improve performance
void ResetAllocation(std::shared_ptr<paddle::memory::Allocation> allocation) {
data_ = allocation;
size_ = allocation->size();
place_ = allocation->place();
}
// Temporary method: For compatible with fluid Tensor and improve performance
void ResetAllocationPlace(const phi::Place& place) { place_ = place; }
// Temporary method: For compatible with fluid Tensor and improve performance
void Reset() { this->Clear(); }
private:
phi::Place place_;
int64_t size_{0};
};
class TensorStorage : public paddle::memory::allocation::Allocation {
public:
explicit TensorStorage(phi::intrusive_ptr<phi::Storage> storage)
......
......@@ -118,7 +118,7 @@ void GetShuffledInput(const DeviceContext& dev_ctx,
std::vector<int> perm_axis(input.dims().size());
GetShuffledDim(input.dims(), &shuffled_dims, dims, &perm_axis);
shuffled_input->ResizeAndAllocate(shuffled_dims);
shuffled_input->Resize(shuffled_dims);
dev_ctx.template Alloc<OutT>(shuffled_input);
phi::funcs::TransposeNormal<DeviceContext, OutT> trans;
......@@ -132,10 +132,7 @@ void HandleLargeDim(const DeviceContext& dev_ctx,
const std::vector<int64_t>& dims,
bool keep_dim) {
// shuffle the reduced dim to the end
phi::DenseTensor shuffled_input = phi::DenseTensor(
phi::make_intrusive<paddle::experimental::SharedStorage>(input.place()),
input.meta());
phi::DenseTensor shuffled_input;
GetShuffledInput<DeviceContext, OutT>(dev_ctx, input, &shuffled_input, dims);
// transpose to 2D tensor whose shape is {unreduced, reduced}.
......
......@@ -99,7 +99,7 @@ class RNNDescriptors {
// ------------------- cudnn dropout descriptors ---------------------
size_t state_size;
bool is_initialized = dropout_state->IsInitialized();
bool is_initialized = dropout_state->initialized();
if (!is_test_ && !is_initialized) {
#ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS(
......
......@@ -171,24 +171,17 @@ void SparseCooToCsrKernel(const Context& dev_ctx,
int batchs = x_dims.size() == 2 ? 1 : x_dims[0];
int rows = x_dims.size() == 2 ? x_dims[0] : x_dims[1];
const auto place = dev_ctx.GetPlace();
DenseTensorMeta crows_meta(
DataType::INT64, {batchs * (rows + 1)}, DataLayout::NCHW);
DenseTensorMeta cols_meta(DataType::INT64, {non_zero_num}, DataLayout::NCHW);
DenseTensorMeta values_meta(
x.dtype(), {non_zero_num}, x.non_zero_elements().layout());
phi::DenseTensor non_zero_crows(
phi::make_intrusive<paddle::experimental::SharedStorage>(place),
std::move(crows_meta));
phi::DenseTensor non_zero_cols(
phi::make_intrusive<paddle::experimental::SharedStorage>(place),
std::move(cols_meta));
phi::DenseTensor non_zero_elements(
phi::make_intrusive<paddle::experimental::SharedStorage>(place),
std::move(values_meta));
int64_t* csr_crows_data = non_zero_crows.mutable_data<int64_t>(place);
int64_t* csr_cols_data = non_zero_cols.mutable_data<int64_t>(place);
T* csr_values_data = non_zero_elements.mutable_data<T>(place);
phi::DenseTensor non_zero_crows;
non_zero_crows.Resize({batchs * (rows + 1)});
int64_t* csr_crows_data = dev_ctx.template Alloc<int64_t>(&non_zero_crows);
phi::DenseTensor non_zero_cols;
non_zero_cols.Resize({non_zero_num});
int64_t* csr_cols_data = dev_ctx.template Alloc<int64_t>(&non_zero_cols);
phi::DenseTensor non_zero_elements;
non_zero_elements.Resize({non_zero_num});
T* csr_values_data = dev_ctx.template Alloc<T>(&non_zero_elements);
const auto& coo_indices = x.non_zero_indices();
const auto& coo_values = x.non_zero_elements();
......
......@@ -173,20 +173,12 @@ void DenseToSparseCooKernel(const Context& dev_ctx,
const auto values_dims =
phi::funcs::sparse::InferDenseDims(x_dims, sparse_dim, non_zero_num);
DenseTensorMeta indices_meta(DataType::INT64,
{sparse_dim, static_cast<int64_t>(non_zero_num)},
DataLayout::NCHW);
DenseTensorMeta values_meta(x.meta().dtype, values_dims, x.meta().layout);
phi::DenseTensor indices(
phi::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(indices_meta));
phi::DenseTensor values(
phi::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(values_meta));
int64_t* indices_data = indices.mutable_data<int64_t>(place);
T* sparse_data = values.mutable_data<T>(place);
phi::DenseTensor indices = phi::Empty<int64_t>(
dev_ctx, {sparse_dim, static_cast<int64_t>(non_zero_num)});
int64_t* indices_data = indices.data<int64_t>();
phi::DenseTensor values;
values.Resize(values_dims);
T* sparse_data = dev_ctx.template Alloc<T>(&values);
// 3. calc indices by indexs and get values by indexs
config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, non_zero_num, 1);
......@@ -382,24 +374,13 @@ void SparseCooToCsrKernel(const Context& dev_ctx,
int batchs = x_dims.size() == 2 ? 1 : x_dims[0];
int rows = x_dims.size() == 2 ? x_dims[0] : x_dims[1];
const auto place = dev_ctx.GetPlace();
DenseTensorMeta crows_meta(
DataType::INT64, {batchs * (rows + 1)}, DataLayout::NCHW);
DenseTensorMeta cols_meta(DataType::INT64, {non_zero_num}, DataLayout::NCHW);
DenseTensorMeta values_meta(
x.dtype(), {non_zero_num}, x.non_zero_elements().layout());
phi::DenseTensor non_zero_crows(
phi::make_intrusive<paddle::experimental::SharedStorage>(place),
std::move(crows_meta));
phi::DenseTensor non_zero_cols(
phi::make_intrusive<paddle::experimental::SharedStorage>(place),
std::move(cols_meta));
phi::DenseTensor non_zero_elements(
phi::make_intrusive<paddle::experimental::SharedStorage>(place),
std::move(values_meta));
int64_t* csr_crows_data = non_zero_crows.mutable_data<int64_t>(place);
int64_t* csr_cols_data = non_zero_cols.mutable_data<int64_t>(place);
T* csr_values_data = non_zero_elements.mutable_data<T>(place);
phi::DenseTensor non_zero_crows =
phi::Empty<int64_t>(dev_ctx, {batchs * (rows + 1)});
phi::DenseTensor non_zero_cols = phi::Empty<int64_t>(dev_ctx, {non_zero_num});
phi::DenseTensor non_zero_elements = phi::Empty<T>(dev_ctx, {non_zero_num});
int64_t* csr_crows_data = non_zero_crows.data<int64_t>();
int64_t* csr_cols_data = non_zero_cols.data<int64_t>();
T* csr_values_data = non_zero_elements.data<T>();
const auto& coo_indices = x.non_zero_indices();
const auto& coo_values = x.non_zero_elements();
......@@ -416,10 +397,8 @@ void SparseCooToCsrKernel(const Context& dev_ctx,
auto config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, batchs, 1);
if (batchs > 1) {
DenseTensorMeta batchs_meta(DataType::INT64, {batchs}, DataLayout::NCHW);
phi::DenseTensor batchs_offset(
phi::make_intrusive<paddle::experimental::SharedStorage>(place),
std::move(batchs_meta));
int64_t* batchs_offset_ptr = batchs_offset.mutable_data<int64_t>(place);
phi::DenseTensor batchs_offset = phi::Empty<int64_t>(dev_ctx, {batchs});
int64_t* batchs_offset_ptr = batchs_offset.data<int64_t>();
GetBatchsOffset<<<config.block_per_grid.x,
config.thread_per_block.x,
0,
......
......@@ -69,10 +69,7 @@ PADDLE_API Tensor scale_kernel_context(const Tensor& x,
kernel_context.EmplaceBackAttr(bias);
kernel_context.EmplaceBackAttr(bias_after_scale);
auto dense_out = std::make_shared<phi::DenseTensor>(
phi::make_intrusive<paddle::experimental::SharedStorage>(
phi::TransToPhiPlace(kernel_backend)),
phi::DenseTensorMeta());
auto dense_out = std::make_shared<phi::DenseTensor>();
phi::MetaTensor meta_out(dense_out.get());
phi::UnchangedInferMeta(*dense_x, &meta_out);
kernel_context.EmplaceBackOutput(dense_out.get());
......@@ -236,10 +233,7 @@ Tensor scale_switch_case(const Tensor& x,
auto dense_x = std::dynamic_pointer_cast<phi::DenseTensor>(x.impl());
auto dense_out = std::make_shared<phi::DenseTensor>(
phi::make_intrusive<paddle::experimental::SharedStorage>(
phi::TransToPhiPlace(kernel_backend)),
phi::DenseTensorMeta());
auto dense_out = std::make_shared<phi::DenseTensor>();
phi::MetaTensor meta_out(dense_out.get());
phi::UnchangedInferMeta(*dense_x, &meta_out);
......
......@@ -264,10 +264,7 @@ TEST(CustomKernel, custom_kernel_dot) {
kernel_context.EmplaceBackAttr(fake_attr_int64_vec);
kernel_context.EmplaceBackAttr(fake_attr_int_vec);
auto dense_out = std::make_shared<phi::DenseTensor>(
phi::make_intrusive<paddle::experimental::SharedStorage>(
phi::TransToPhiPlace(backend)),
phi::DenseTensorMeta());
auto dense_out = std::make_shared<phi::DenseTensor>();
phi::MetaTensor meta_out(dense_out.get());
phi::DotInferMeta(*dense_x, *dense_y, &meta_out);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册