From a7edb3f3292e547e34087002816608995b75a6e9 Mon Sep 17 00:00:00 2001 From: zhangkaihuo Date: Thu, 27 Jan 2022 13:02:19 +0800 Subject: [PATCH] Add SparseCooTensor and SparseCsrTensor (#38906) * fix bug: 1. atten: set the default value of attn_dropout_rate to None 2. ffn: add activation parameter * for pure fp16 * Add a SparseCsrTensor * remove unused functional * remove const * remove SetMemoberTensor * remove non_zero_nums_, the number of non zero elements of each batch can be obtained from the crows * SparseCooTensor * add SetMember * merge upstream; add SetMember * merge upstream * merge upstream; add newline at end of file * add newline at end of file * remove newline at end of file * remove newline at end of file * stash * user pten::framework::make_ddim * user pten::framework::make_ddim * merge upstream; use the latest mutable_data * merge upstream; use the latest mutable_data * return mutable dense tensor --- paddle/pten/CMakeLists.txt | 2 +- paddle/pten/common/layout.h | 10 + paddle/pten/core/CMakeLists.txt | 2 + paddle/pten/core/kernel_utils.h | 16 ++ paddle/pten/core/sparse_coo_tensor.cc | 107 ++++++++++ paddle/pten/core/sparse_coo_tensor.h | 180 +++++++++++++++++ paddle/pten/core/sparse_csr_tensor.cc | 103 ++++++++++ paddle/pten/core/sparse_csr_tensor.h | 184 ++++++++++++++++++ paddle/pten/kernels/copy_kernel.h | 7 + paddle/pten/kernels/gpu/copy_kernel.cu | 18 ++ paddle/pten/tests/core/CMakeLists.txt | 2 + .../pten/tests/core/test_sparse_coo_tensor.cc | 93 +++++++++ .../pten/tests/core/test_sparse_csr_tensor.cc | 108 ++++++++++ .../incubate/nn/layer/fused_transformer.py | 2 +- 14 files changed, 832 insertions(+), 2 deletions(-) create mode 100644 paddle/pten/core/sparse_coo_tensor.cc create mode 100644 paddle/pten/core/sparse_coo_tensor.h create mode 100644 paddle/pten/core/sparse_csr_tensor.cc create mode 100644 paddle/pten/core/sparse_csr_tensor.h create mode 100644 paddle/pten/tests/core/test_sparse_coo_tensor.cc create mode 100644 paddle/pten/tests/core/test_sparse_csr_tensor.cc diff --git a/paddle/pten/CMakeLists.txt b/paddle/pten/CMakeLists.txt index 78e86c12cb4..5722993aec0 100644 --- a/paddle/pten/CMakeLists.txt +++ b/paddle/pten/CMakeLists.txt @@ -21,7 +21,7 @@ add_subdirectory(ops) add_subdirectory(tests) # make an unity target for compile deps -set(PTEN_DEPS convert_utils dense_tensor pten_context kernel_factory kernel_context arg_map_context infermeta lod_utils op_compat_infos) +set(PTEN_DEPS convert_utils dense_tensor pten_context kernel_factory kernel_context arg_map_context infermeta lod_utils op_compat_infos sparse_csr_tensor sparse_coo_tensor) get_property(pten_kernels GLOBAL PROPERTY PTEN_KERNELS) # keep this message for debug, remove it later if needless message(STATUS "All standard pten kernels: ${pten_kernels}") diff --git a/paddle/pten/common/layout.h b/paddle/pten/common/layout.h index cfcc4f76693..57aa8863cb4 100644 --- a/paddle/pten/common/layout.h +++ b/paddle/pten/common/layout.h @@ -27,6 +27,8 @@ enum class DataLayout { NHWC, NCHW, MKLDNN, + SPARSE_COO, + SPARSE_CSR, NUM_DATA_LAYOUTS, // See Note [ Why we need ALL in basic kernel key member? ] ALL_LAYOUT = UNDEFINED, @@ -64,6 +66,10 @@ inline DataLayout StringToDataLayout(const std::string& str) { return DataLayout::kAnyLayout; } else if (s == "MKLDNNLAYOUT") { return DataLayout::kMKLDNN; + } else if (s == "SPARSE_COO") { + return DataLayout::SPARSE_COO; + } else if (s == "SPARSE_CSR") { + return DataLayout::SPARSE_CSR; } else { PD_THROW("Unknown data layout type string: ", s, "."); } @@ -79,6 +85,10 @@ inline std::string DataLayoutToString(const DataLayout& layout) { return "Undefined(AnyLayout)"; case DataLayout::kMKLDNN: return "MKLDNN"; + case DataLayout::SPARSE_COO: + return "SPARSE_COO"; + case DataLayout::SPARSE_CSR: + return "SPARSE_CSR"; default: PD_THROW("Unknown Data Layout type ", static_cast(layout), "."); } diff --git a/paddle/pten/core/CMakeLists.txt b/paddle/pten/core/CMakeLists.txt index f83b80fca1f..ab6a9931973 100644 --- a/paddle/pten/core/CMakeLists.txt +++ b/paddle/pten/core/CMakeLists.txt @@ -22,6 +22,8 @@ cc_library(kernel_context SRCS kernel_context.cc DEPS pten_enforce pten_context) cc_library(tensor_base SRCS tensor_base.cc allocator.cc storage.cc DEPS pten_enforce) cc_library(tensor_meta SRCS tensor_meta.cc DEPS pten_enforce mixed_vector) cc_library(lod_utils SRCS lod_utils.cc DEPS pten_enforce mixed_vector) +cc_library(sparse_coo_tensor SRCS sparse_coo_tensor.cc DEPS tensor_meta tensor_base) +cc_library(sparse_csr_tensor SRCS sparse_csr_tensor.cc DEPS dense_tensor tensor_base) cc_library(dense_tensor SRCS dense_tensor.cc dense_tensor_impl.cc DEPS convert_utils tensor_meta tensor_base) cc_library(pten_device_context SRCS device_context.cc DEPS tensor_base ) diff --git a/paddle/pten/core/kernel_utils.h b/paddle/pten/core/kernel_utils.h index 85fe2f22836..d48572db5a2 100644 --- a/paddle/pten/core/kernel_utils.h +++ b/paddle/pten/core/kernel_utils.h @@ -20,6 +20,8 @@ #include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/kernel_context.h" #include "paddle/pten/core/kernel_def.h" +#include "paddle/pten/core/sparse_coo_tensor.h" +#include "paddle/pten/core/sparse_csr_tensor.h" // See Note [ Why still include the fluid headers? ] #include "paddle/pten/core/enforce.h" @@ -213,6 +215,14 @@ struct KernelImpl { PT_SPECIALIZE_KernelCallHelper_FOR_INPUT(DenseTensor); PT_SPECIALIZE_KernelCallHelper_FOR_OPTIONAL_INPUT(DenseTensor); PT_SPECIALIZE_KernelCallHelper_FOR_MULTI_INPUT(DenseTensor); + + PT_SPECIALIZE_KernelCallHelper_FOR_INPUT(SparseCooTensor); + PT_SPECIALIZE_KernelCallHelper_FOR_OPTIONAL_INPUT(SparseCooTensor); + PT_SPECIALIZE_KernelCallHelper_FOR_MULTI_INPUT(SparseCooTensor); + + PT_SPECIALIZE_KernelCallHelper_FOR_INPUT(SparseCsrTensor); + PT_SPECIALIZE_KernelCallHelper_FOR_OPTIONAL_INPUT(SparseCsrTensor); + PT_SPECIALIZE_KernelCallHelper_FOR_MULTI_INPUT(SparseCsrTensor); // TODO(chenweihang): adapt SelectedRows // PT_SPECIALIZE_KernelCallHelper_FOR_INPUT(SelectedRowsTensor); @@ -234,6 +244,12 @@ struct KernelImpl { PT_SPECIALIZE_KernelCallHelper_FOR_OUTPUT(DenseTensor); PT_SPECIALIZE_KernelCallHelper_FOR_MULTI_OUTPUT(DenseTensor); + + PT_SPECIALIZE_KernelCallHelper_FOR_OUTPUT(SparseCooTensor); + PT_SPECIALIZE_KernelCallHelper_FOR_MULTI_OUTPUT(SparseCooTensor); + + PT_SPECIALIZE_KernelCallHelper_FOR_OUTPUT(SparseCsrTensor); + PT_SPECIALIZE_KernelCallHelper_FOR_MULTI_OUTPUT(SparseCsrTensor); // TODO(chenweihang): adapt SelectedRows // PT_SPECIALIZE_KernelCallHelper_FOR_OUTPUT(SelectedRowsTensor); diff --git a/paddle/pten/core/sparse_coo_tensor.cc b/paddle/pten/core/sparse_coo_tensor.cc new file mode 100644 index 00000000000..0966fe1d80f --- /dev/null +++ b/paddle/pten/core/sparse_coo_tensor.cc @@ -0,0 +1,107 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/pten/core/sparse_coo_tensor.h" + +namespace pten { + +SparseCooTensor::SparseCooTensor(const DenseTensor& non_zero_indices, + const DenseTensor& non_zero_elements, + const DDim& dims) + : non_zero_indices_(non_zero_indices), + non_zero_elements_(non_zero_elements), + coalesced_(false), + dims_(dims) {} + +SparseCooTensor::SparseCooTensor(DenseTensor&& non_zero_indices, + DenseTensor&& non_zero_elements, + const DDim& dims) + : non_zero_indices_(non_zero_indices), + non_zero_elements_(non_zero_elements), + coalesced_(false), + dims_(dims) {} + +SparseCooTensor::SparseCooTensor(const SparseCooTensor& other) + : non_zero_indices_(other.non_zero_indices_), + non_zero_elements_(other.non_zero_elements_), + dims_(other.dims_) { + this->coalesced_ = other.coalesced_; +} + +SparseCooTensor SparseCooTensor::operator=(const SparseCooTensor& other) { + this->dims_ = other.dims_; + this->non_zero_indices_ = other.non_zero_indices_; + this->non_zero_elements_ = other.non_zero_elements_; + this->coalesced_ = other.coalesced_; + return *this; +} + +int64_t SparseCooTensor::nnz() const { + const auto indices_dims = non_zero_indices_.dims(); + if (indices_dims.size() == 0) { + return 0; + } else if (indices_dims.size() == 1) { + return indices_dims[0]; + } else { + return indices_dims[1]; + } +} + +void SparseCooTensor::Resize(const DDim& dense_dims, + const int64_t sparse_dim, + const int64_t non_zero_num) { + PADDLE_ENFORCE_GE(non_zero_num, + this->nnz(), + paddle::platform::errors::InvalidArgument( + "the non_zero_num must be greater than or equal to the " + "origin non_zero_num.")); + PADDLE_ENFORCE_GE(sparse_dim, + 1, + paddle::platform::errors::InvalidArgument( + "the sparse_dim must be greater than or equal 1.")); + PADDLE_ENFORCE_LE( + sparse_dim, + dense_dims.size(), + paddle::platform::errors::InvalidArgument( + "the sparse_dim must be less than or equal dense_dims.")); + + DDim indices_dims = pten::framework::make_ddim({sparse_dim, non_zero_num}); + auto dense_dim = dense_dims.size() - sparse_dim; + DDim values_dims; + if (dense_dim) { + std::vector dense_dim_vec(dense_dim + 1); + dense_dim_vec[0] = non_zero_num; + memcpy(&dense_dim_vec[1], + dense_dims.Get() + sparse_dim, + dense_dim * sizeof(dense_dims[0])); + values_dims = pten::framework::make_ddim(dense_dim_vec); + } else { + values_dims = pten::framework::make_ddim({non_zero_num}); + } + + this->non_zero_indices_.Resize(indices_dims); + this->non_zero_elements_.Resize(values_dims); +} + +void SparseCooTensor::SetMember(const DenseTensor& non_zero_indices, + const DenseTensor& non_zero_elements, + const DDim& dims, + const bool coalesced) { + this->non_zero_indices_ = non_zero_indices; + this->non_zero_elements_ = non_zero_elements; + this->dims_ = dims_; + this->coalesced_ = coalesced; +} + +} // namespace pten diff --git a/paddle/pten/core/sparse_coo_tensor.h b/paddle/pten/core/sparse_coo_tensor.h new file mode 100644 index 00000000000..ff4241d3287 --- /dev/null +++ b/paddle/pten/core/sparse_coo_tensor.h @@ -0,0 +1,180 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/core/tensor_base.h" +#include "paddle/pten/core/tensor_meta.h" + +namespace pten { + +/// \brief The SparseCooTensor uses two DenseTensors to represent +/// the non zero elements and the indices of non zero elements of +/// original DenseTensor. +/// where non_zero_elements_ represents the non zero elements of original +/// DenseTensor. +/// non_zero_indices_ represents the indices of non zero elements in original +/// DenseTensor. +class SparseCooTensor : public TensorBase, + public TypeInfoTraits { + public: + /// \brief Create the sparse coo tensor + /// \param non_zero_indices The indices of non zero elements in original dense + /// tensor. + /// \param non_zero_elements The non zero elements of original dense tensor. + /// \param dims The dims of original dense tensor. + SparseCooTensor(const DenseTensor& non_zero_indices, + const DenseTensor& non_zero_elements, + const DDim& dims); + + /// \brief Create the sparse coo tensor + /// \param non_zero_indices The indices of non zero elements in original dense + /// tensor. + /// \param non_zero_elements The non zero elements of original dense tensor. + /// \param dims The dims of original dense tensor. + SparseCooTensor(DenseTensor&& non_zero_indices, + DenseTensor&& non_zero_elements, + const DDim& dims); + + /// \brief SparseCooTensor shallow copy constructor. + SparseCooTensor(const SparseCooTensor& other); + + /// \brief move constructor + SparseCooTensor(SparseCooTensor&& other); + + /// \brief SparseCooTensor shallow copy assignment. + SparseCooTensor operator=(const SparseCooTensor& other); + + /// \brief Destroy the tensor object and release exclusive resources. + virtual ~SparseCooTensor() = default; + + /// \brief Returns the indices of non zero elemetns in original dense tensor. + /// \return The indices of non zero elemetns in original dense tensor. + const DenseTensor& non_zero_indices() const { return non_zero_indices_; } + + /// \brief Returns the non zero elemetns in original dense tensor. + /// \return The non zero elemetns in original dense tensor. + const DenseTensor& non_zero_elements() const { return non_zero_elements_; } + + /// \brief Returns whether the indices has coalesced + /// \return whether the indices has coalesced + bool coalesced() const { return coalesced_; } + + /// \brief Set the coalesced + /// \param coalesced whether the indices has coalesced + void SetCoalesced(const bool coalesced) { coalesced_ = coalesced; } + + /// \brief Returns the name of the class for type traits. + /// \return The name of the class. + static const char* name() { return "SparseCooTensor"; } + + /// \brief Returns the total number of non zero elements in original + /// DenseTensor + int64_t nnz() const; + + /// \brief Return the number of elements contained in original dense tensor + /// \return The number of elements contained in original dense tensor + int64_t numel() const { return product(dims_); } + + /// \brief Returns the dims of the original dense tensor. + /// \return The dims of the original dense tensor. + const DDim& dims() const noexcept override { return dims_; } + + /// \brief Returns the data type of the tensor. + /// \return The data type of the tensor. + DataType dtype() const noexcept override { + return non_zero_elements_.dtype(); + } + + /// \brief Returns the data layout of the tensor. + /// \return The data layout of the tensor. + DataLayout layout() const { return DataLayout::SPARSE_COO; } + + /// \brief Returns the data place of the tensor. + /// \return The data place of the tensor. + const Place& place() const override { return non_zero_elements_.place(); } + + /// \brief Test whether the non_zero_elements_ metadata is valid. + /// \return Whether the non_zero_elements_ metadata is valid. + bool valid() const noexcept { return non_zero_elements_.valid(); } + + /// \brief Test whether the non_zero_elements_ storage is allocated. + /// return Whether the non_zero_elements_ storage is allocated. + bool initialized() const override { return non_zero_elements_.initialized(); } + + /// \brief resize sparse coo tensor. + /// \param dense_dims The dims of original dense tensor. + /// \param sparse_dim number of sparse dimensions + /// \param non_zero_num The total number of non zero element + void Resize(const DDim& dense_dim, + const int64_t sparse_dim, + const int64_t non_zero_num); + + /// \brief set the member of sparse coo tensor. + /// \param non_zero_indices The indices of non zero elements in original dense + /// tensor. + /// \param non_zero_elements The non zero elements of original dense tensor. + /// \param dims The dims of original dense tensor. + /// \param coalesced whether the indices has coalesced. + void SetMember(const DenseTensor& non_zero_indices, + const DenseTensor& non_zero_elements, + const DDim& dims, + const bool coalesced = false); + + /// \brief Get a mutable pointer of non_zero_indices_. + /// return a mutable pointer of non_zero_indices_. + DenseTensor* mutable_non_zero_indices() { return &non_zero_indices_; } + + /// \brief Get a mutable pointer of non_zero_elements. + /// return a mutable pointer of non_zero_elements. + DenseTensor* mutable_non_zero_elements() { return &non_zero_elements_; } + + private: + // save the indices of non zero elements in original dense tensor + DenseTensor non_zero_indices_; + // save the non zero elements of original dense tensor + DenseTensor non_zero_elements_; + /// whether the indices has coalesced + bool coalesced_ = false; + // save the number of non zero elements in each batch + DDim dims_; + /* --------------------------- */ + /* example: non zero element is scalar */ + /* --------------------------- */ + /* + dense_x = [[0, 1, 0, 0], + [2, 0, 0, 3], + [0, 0, 4, 0], + [0, 5, 0, 6]] + dims_ = (4, 4) + non_zero_elements_ = [1, 2, 3, 4, 5 ,6] + non_zero_indices_ = [[0, 1, 1, 2, 3, 3], + [1, 0, 3, 2, 1, 3]] + */ + /* --------------------------- */ + /* example: non zero element is tensor */ + /* --------------------------- */ + /* + dense_x = [[0, 1, 0, 0], + [0, 0, 0, 0], + [0, 0, 4, 0], + [0, 0, 0, 0]] + dims_ = (4, 4) + non_zero_elements_ = [[0, 1, 0, 0], [0, 0, 4, 0]] + non_zero_indices_ = [0, 2], + */ +}; + +} // namespace pten diff --git a/paddle/pten/core/sparse_csr_tensor.cc b/paddle/pten/core/sparse_csr_tensor.cc new file mode 100644 index 00000000000..9cb9163cb6f --- /dev/null +++ b/paddle/pten/core/sparse_csr_tensor.cc @@ -0,0 +1,103 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/pten/core/sparse_csr_tensor.h" + +namespace pten { + +inline void check_shape(const DDim& dims) { + bool valid = dims.size() == 2 || dims.size() == 3; + + PADDLE_ENFORCE(valid, + paddle::platform::errors::InvalidArgument( + "the SparseCsrTensor only support 2-D Tensor.")); +} +#define Check(non_zero_crows, non_zero_cols, non_zero_elements, dims) \ + { \ + check_shape(dims); \ + PADDLE_ENFORCE_EQ(dims.size(), \ + 2, \ + paddle::platform::errors::InvalidArgument( \ + "the SparseCsrTensor only support 2-D Tensor.")); \ + PADDLE_ENFORCE_EQ( \ + non_zero_cols.place(), \ + non_zero_crows.place(), \ + paddle::platform::errors::InvalidArgument( \ + "non_zero_crows and non_zero_cols must have the same place.")); \ + PADDLE_ENFORCE_EQ( \ + non_zero_cols.place(), \ + non_zero_elements.place(), \ + paddle::platform::errors::InvalidArgument( \ + "non_zero_cols and non_zero_elements must have the same place.")); \ + } + +SparseCsrTensor::SparseCsrTensor(const DenseTensor& non_zero_crows, + const DenseTensor& non_zero_cols, + const DenseTensor& non_zero_elements, + const DDim& dims) + : non_zero_crows_(non_zero_crows), + non_zero_cols_(non_zero_cols), + non_zero_elements_(non_zero_elements), + dims_(dims) { + Check(non_zero_crows_, non_zero_cols_, non_zero_elements_, dims_); +} + +SparseCsrTensor::SparseCsrTensor(const SparseCsrTensor& other) + : non_zero_crows_(other.non_zero_crows_), + non_zero_cols_(other.non_zero_cols_), + non_zero_elements_(other.non_zero_elements_), + dims_(other.dims_) {} + +SparseCsrTensor& SparseCsrTensor::operator=(const SparseCsrTensor& other) { + this->dims_ = other.dims(); + this->non_zero_crows_ = other.non_zero_crows(); + this->non_zero_cols_ = other.non_zero_cols(); + this->non_zero_elements_ = other.non_zero_elements(); + return *this; +} + +void SparseCsrTensor::Resize(const DDim& dense_dims, + const int64_t non_zero_num) { + PADDLE_ENFORCE(this->initialized(), + paddle::platform::errors::InvalidArgument( + "the SparseCsrTensor must be initialized when call Resize " + "function.")); + check_shape(dense_dims); + + int64_t crows_size = dense_dims[0] + 1; + if (dense_dims.size() == 3) { + // batch_size = dims[0] + crows_size = dense_dims[0] * (dense_dims[1] + 1); + } + + DDim crows_dims = pten::framework::make_ddim({crows_size}); + this->non_zero_crows_.Resize(crows_dims); + + DDim col_dims = pten::framework::make_ddim({non_zero_num}); + this->non_zero_cols_.Resize(col_dims); + this->non_zero_elements_.Resize(col_dims); +} + +void SparseCsrTensor::SetMember(const DenseTensor& non_zero_crows, + const DenseTensor& non_zero_cols, + const DenseTensor& non_zero_elements, + const DDim& dims) { + Check(non_zero_crows, non_zero_cols, non_zero_elements, dims); + this->non_zero_crows_ = non_zero_crows; + this->non_zero_cols_ = non_zero_cols; + this->non_zero_elements_ = non_zero_elements; + this->dims_ = dims; +} + +} // namespace pten diff --git a/paddle/pten/core/sparse_csr_tensor.h b/paddle/pten/core/sparse_csr_tensor.h new file mode 100644 index 00000000000..9a3322ba17c --- /dev/null +++ b/paddle/pten/core/sparse_csr_tensor.h @@ -0,0 +1,184 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/pten/core/allocator.h" +#include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/core/tensor_base.h" +#include "paddle/pten/core/tensor_meta.h" + +namespace pten { + +class CompatibleDenseTensorUtils; + +/// \brief The SparseCsrTensor uses three 1-D DenseTensors to represent +/// the row index , column index and non zero elements of the original +/// DenseTensor. +/// where non_zero_crows_ represents the compressed row index, +/// non_zero_cols_ represents the column index of non zero elements in original +/// DenseTensor, +/// non_zero_elements_ represents the non zero elements of original DenseTensor. +class SparseCsrTensor : public TensorBase, + public TypeInfoTraits { + public: + /// \brief Because sparse csr tensor is a resource handle, we provide a + /// default + /// move constructor to support move semantics. + SparseCsrTensor(SparseCsrTensor&& other) = default; + + /// \brief SparseCsrTensor shallow copy constructor. + SparseCsrTensor(const SparseCsrTensor& other); + + /// \brief create the sparse csr tensor. + /// \param non_zero_crows The compresessed row index of non zero elements in + /// original dense tensor. + /// \param non_zero_cols The column index of non zero elements in original + /// dense tensor. + /// \param non_zero_elements The non zero elements of original dense tensor. + /// \param dims The dims of original dense tensor. + SparseCsrTensor(const DenseTensor& non_zero_crows, + const DenseTensor& non_zero_cols, + const DenseTensor& non_zero_elements, + const DDim& dims); + + /// \brief SparseCsrTensor shallow copy assignment. + SparseCsrTensor& operator=(const SparseCsrTensor& other); + + /// \brief Destroy the tensor object and release exclusive resources. + virtual ~SparseCsrTensor() = default; + + public: + /// \brief Returns the name of the class for type traits. + /// \return The name of the class. + static const char* name() { return "SparseCsrTensor"; } + + /// \brief Returns the compressed row index of non zero elemetns in original + /// dense tensor. + /// \return The compressed row index of non zero elemetns in original dense + /// tensor. + const DenseTensor& non_zero_crows() const { return non_zero_crows_; } + + /// \brief Returns the column index of non zero elemetns in original dense + /// tensor. + /// \return The column index of non zero elemetns in original dense tensor. + const DenseTensor& non_zero_cols() const { return non_zero_cols_; } + + /// \brief Returns the non zero elemetns in original dense tensor. + /// \return The non zero elemetns in original dense tensor. + const DenseTensor& non_zero_elements() const { return non_zero_elements_; } + + /// \brief Return the number of elements contained in original dense tensor + /// \return The number of elements contained in original dense tensor + int64_t numel() const { return product(dims_); } + + /// \brief Returns the dims of the original dense tensor. + /// \return The dims of the original dense tensor. + const DDim& dims() const noexcept override { return dims_; } + + /// \brief Returns the data type of the tensor. + /// \return The data type of the tensor. + DataType dtype() const noexcept override { + return non_zero_elements_.dtype(); + } + + /// \brief Returns the data layout of the tensor. + /// \return The data layout of the tensor. + DataLayout layout() const { return DataLayout::SPARSE_CSR; } + + /// \brief Returns the data place of the tensor. + /// \return The data place of the tensor. + const Place& place() const override { return non_zero_elements_.place(); } + + /// \brief Test whether the non_zero_elements_ metadata is valid. + /// \return Whether the non_zero_elements_ metadata is valid. + bool valid() const noexcept { return non_zero_elements_.valid(); } + + /// \brief Test whether the non_zero_elements_ storage is allocated. + /// return Whether the non_zero_elements_ storage is allocated. + bool initialized() const override { return non_zero_elements_.initialized(); } + + /// \brief resize sparse csr tensor. + /// \param dense_dims The dims of original dense tensor. + /// \param non_zero_num The total number of non zero element + void Resize(const DDim& dense_dims, const int64_t non_zero_num); + + /// \brief set the member of sparse csr tensor. + /// \param non_zero_crows The compresessed row index of non zero elements in + /// original dense tensor. + /// \param non_zero_cols The column index of non zero elements in original + /// dense tensor. + /// \param non_zero_elements The non zero elements of original dense tensor. + /// \param dims The dims of original dense tensor. + void SetMember(const DenseTensor& non_zero_crows, + const DenseTensor& non_zero_cols, + const DenseTensor& non_zero_elements, + const DDim& dims); + + /// \brief Get a mutable pointer of non_zero_crows. + /// return a mutable pointer of non_zero_crows. + DenseTensor* mutable_non_zero_crows() { return &non_zero_crows_; } + + /// \brief Get a mutable pointer of non_zero_cols. + /// return a mutable pointer of non_zero_cols. + DenseTensor* mutable_non_zero_cols() { return &non_zero_cols_; } + + /// \brief Get a mutable pointer of non_zero_elements. + /// return a mutable pointer of non_zero_elements. + DenseTensor* mutable_non_zero_elements() { return &non_zero_elements_; } + + private: + // save the compressed rows information of non zero elements + DenseTensor non_zero_crows_; + // save the columns information of non zero elements + DenseTensor non_zero_cols_; + // save the non zero elements + DenseTensor non_zero_elements_; + // save the number of non zero elements in each batch + DDim dims_; + /* --------------------------- */ + /* example: 2-D Tensor */ + /* --------------------------- */ + /* + x = [[0, 1, 0, 0], + [2, 0, 0, 3], + [0, 0, 4, 0], + [0, 5, 0, 6]] + dims_ = (4, 4) + non_zero_elements_ = [1, 2, 3, 4, 5 ,6] + non_zero_crows_ = [0, 1, 3, 4, 6] + non_zero_cols_ = [1, 0, 3, 2, 1, 3] + */ + + /* --------------------------- */ + /* example: 3-D Tensor */ + /* the non zero elements of different batch will be concat together */ + /* --------------------------- */ + /* + x = [[[0, 1, 0, 0], + [2, 0, 0, 3], + [0, 0, 4, 0], + [0, 5, 0, 6]], + [[0, 1, 0, 0], + [2, 0, 0, 3], + [0, 0, 4, 0], + [0, 5, 0, 0]]] + dims_ = (2, 4, 4) + non_zero_elements_ = [1, 2, 3, 4, 5 ,6, 1, 2, 3, 4, 5] + non_zero_crows_ = [0, 1, 3, 4, 6, 0, 1, 2, 4, 5] + non_zero_cols_ = [1, 0, 3, 2, 1, 3, 1, 0, 3, 2, 1] + */ +}; + +} // namespace pten diff --git a/paddle/pten/kernels/copy_kernel.h b/paddle/pten/kernels/copy_kernel.h index a481908892e..b2ca18e4d28 100644 --- a/paddle/pten/kernels/copy_kernel.h +++ b/paddle/pten/kernels/copy_kernel.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/core/sparse_csr_tensor.h" namespace pten { @@ -24,4 +25,10 @@ void Copy(const Context& dev_ctx, bool blocking, DenseTensor* dst); +template +void CopySparse(const Context& dev_ctx, + const SparseCsrTensor& src, + bool blocking, + SparseCsrTensor* dst); + } // namespace pten diff --git a/paddle/pten/kernels/gpu/copy_kernel.cu b/paddle/pten/kernels/gpu/copy_kernel.cu index d2578723158..f540f96ab25 100644 --- a/paddle/pten/kernels/gpu/copy_kernel.cu +++ b/paddle/pten/kernels/gpu/copy_kernel.cu @@ -215,7 +215,25 @@ void Copy(const Context& dev_ctx, } } +template +void CopySparse(const Context& dev_ctx, + const SparseCsrTensor& src, + bool blocking, + SparseCsrTensor* dst) { + Copy(dev_ctx, src.non_zero_crows(), blocking, dst->mutable_non_zero_crows()); + Copy(dev_ctx, src.non_zero_cols(), blocking, dst->mutable_non_zero_cols()); + Copy(dev_ctx, + src.non_zero_elements(), + blocking, + dst->mutable_non_zero_elements()); +} + } // namespace pten PT_REGISTER_GENERAL_KERNEL( copy, GPU, ALL_LAYOUT, pten::Copy, ALL_DTYPE) {} +PT_REGISTER_GENERAL_KERNEL(copy_sparse, + GPU, + ALL_LAYOUT, + pten::CopySparse, + ALL_DTYPE) {} diff --git a/paddle/pten/tests/core/CMakeLists.txt b/paddle/pten/tests/core/CMakeLists.txt index 27a0173ef6f..60a0ca28541 100644 --- a/paddle/pten/tests/core/CMakeLists.txt +++ b/paddle/pten/tests/core/CMakeLists.txt @@ -3,6 +3,8 @@ cc_test(test_intrusive_ptr SRCS test_intrusive_ptr.cc) cc_test(test_type_info SRCS test_type_info.cc) cc_test(test_convert_utils SRCS test_convert_utils.cc DEPS convert_utils) cc_test(test_kernel_factory SRCS test_kernel_factory.cc DEPS kernel_factory scale_kernel) +cc_test(test_sparse_coo_tensor SRCS test_sparse_coo_tensor.cc DEPS dense_tensor sparse_coo_tensor) +cc_test(test_sparse_csr_tensor SRCS test_sparse_csr_tensor.cc DEPS dense_tensor sparse_csr_tensor) cc_test(test_op_utils SRCS test_op_utils.cc DEPS op_compat_infos) cc_test(test_pten_device_context SRCS test_device_context.cc DEPS pten_context cpu_context) diff --git a/paddle/pten/tests/core/test_sparse_coo_tensor.cc b/paddle/pten/tests/core/test_sparse_coo_tensor.cc new file mode 100644 index 00000000000..fdec4910e82 --- /dev/null +++ b/paddle/pten/tests/core/test_sparse_coo_tensor.cc @@ -0,0 +1,93 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "gtest/gtest.h" + +#include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/core/sparse_coo_tensor.h" +#include "paddle/pten/tests/core/allocator.h" + +namespace pten { +namespace tests { + +TEST(sparse_coo_tensor, construct) { + pten::CPUPlace cpu; + auto dense_dims = pten::framework::make_ddim({3, 3}); + std::vector non_zero_data = {1.0, 2.0, 3.0}; + std::vector indices_data = {0, 1, 2, 0, 2, 1}; + auto fancy_allocator = std::unique_ptr(new FancyAllocator); + auto* alloc = fancy_allocator.get(); + auto indices_dims = + pten::framework::make_ddim({2, static_cast(non_zero_data.size())}); + DenseTensorMeta indices_meta(DataType::INT64, indices_dims, DataLayout::NCHW); + DenseTensor indices(alloc, indices_meta); + memcpy(indices.mutable_data(cpu), + &indices_data[0], + indices_data.size() * sizeof(int64_t)); + + auto elements_dims = + pten::framework::make_ddim({static_cast(non_zero_data.size())}); + DenseTensorMeta elements_meta( + DataType::FLOAT32, elements_dims, DataLayout::NCHW); + DenseTensor elements(alloc, elements_meta); + + memcpy(elements.mutable_data(cpu), + &non_zero_data[0], + non_zero_data.size() * sizeof(float)); + + SparseCooTensor sparse(indices, elements, dense_dims); + + CHECK(sparse.initialized() == true); + CHECK_EQ(sparse.nnz(), static_cast(non_zero_data.size())); + CHECK_EQ(sparse.numel(), 9); + CHECK(sparse.dims() == dense_dims); + CHECK(sparse.dtype() == DataType::FLOAT32); + CHECK(sparse.layout() == DataLayout::SPARSE_COO); + CHECK(sparse.place() == paddle::platform::CPUPlace()); +} + +TEST(sparse_coo_tensor, other_function) { + auto fancy_allocator = std::unique_ptr(new FancyAllocator); + auto* alloc = fancy_allocator.get(); + auto dense_dims = pten::framework::make_ddim({4, 4}); + const int non_zero_num = 2; + auto indices_dims = pten::framework::make_ddim({2, non_zero_num}); + DenseTensorMeta indices_meta(DataType::INT64, indices_dims, DataLayout::NCHW); + DenseTensor indices(alloc, indices_meta); + + auto elements_dims = pten::framework::make_ddim({non_zero_num}); + DenseTensorMeta elements_meta( + DataType::FLOAT32, elements_dims, DataLayout::NCHW); + DenseTensor elements(alloc, elements_meta); + + SparseCooTensor coo(indices, elements, dense_dims); + CHECK(coo.initialized()); + CHECK_EQ(coo.dims(), dense_dims); + + // Test Resize + auto dense_dims_3d = pten::framework::make_ddim({2, 4, 4}); + coo.Resize(dense_dims_3d, 1, 3); + CHECK_EQ(coo.nnz(), 3); + + // Test shallow_copy + SparseCooTensor coo2(coo); + CHECK(coo.dims() == coo2.dims()); + + // Test shallow_copy_assignment + SparseCooTensor coo3 = coo2; + CHECK(coo3.dims() == coo2.dims()); +} + +} // namespace tests +} // namespace pten diff --git a/paddle/pten/tests/core/test_sparse_csr_tensor.cc b/paddle/pten/tests/core/test_sparse_csr_tensor.cc new file mode 100644 index 00000000000..d4d498cdf86 --- /dev/null +++ b/paddle/pten/tests/core/test_sparse_csr_tensor.cc @@ -0,0 +1,108 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "gtest/gtest.h" + +#include "paddle/pten/api/lib/utils/allocator.h" +#include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/core/sparse_csr_tensor.h" +#include "paddle/pten/tests/core/allocator.h" + +namespace pten { +namespace tests { + +TEST(sparse_csr_tensor, construct) { + pten::CPUPlace cpu; + auto dense_dims = pten::framework::make_ddim({3, 3}); + std::vector non_zero_data = {1.0, 2.0, 3.0}; + std::vector crows_data = {0, 1, 1, 3}; + std::vector cols_data = {1, 0, 2}; + + auto fancy_allocator = std::unique_ptr(new FancyAllocator); + auto alloc = fancy_allocator.get(); + // create non_zero_crows + auto crows_dims = + pten::framework::make_ddim({static_cast(crows_data.size())}); + DenseTensorMeta crows_meta(DataType::INT64, crows_dims, DataLayout::NCHW); + DenseTensor crows(alloc, crows_meta); + memcpy(crows.mutable_data(cpu), + &crows_data[0], + crows_data.size() * sizeof(int64_t)); + + // create non_zero_cols + auto cols_dims = + pten::framework::make_ddim({static_cast(cols_data.size())}); + DenseTensorMeta cols_meta(DataType::INT64, cols_dims, DataLayout::NCHW); + DenseTensor cols(alloc, cols_meta); + memcpy(cols.mutable_data(cpu), + &cols_data[0], + cols_data.size() * sizeof(int64_t)); + + // create non_zero_elements + auto elements_dims = + pten::framework::make_ddim({static_cast(non_zero_data.size())}); + DenseTensorMeta elements_meta( + DataType::FLOAT32, elements_dims, DataLayout::NCHW); + DenseTensor elements(alloc, elements_meta); + memcpy(elements.mutable_data(cpu), + &non_zero_data[0], + non_zero_data.size() * sizeof(float)); + + SparseCsrTensor sparse(crows, cols, elements, dense_dims); + + CHECK_EQ(sparse.non_zero_cols().numel(), + static_cast(non_zero_data.size())); + CHECK_EQ(sparse.numel(), 9); + CHECK(sparse.dims() == dense_dims); + CHECK(sparse.dtype() == DataType::FLOAT32); + CHECK(sparse.layout() == DataLayout::SPARSE_CSR); + CHECK(sparse.place() == paddle::platform::CPUPlace()); + CHECK(sparse.initialized() == true); +} + +TEST(sparse_csr_tensor, other_function) { + auto fancy_allocator = std::unique_ptr(new FancyAllocator); + auto alloc = fancy_allocator.get(); + auto dense_dims = pten::framework::make_ddim({4, 4}); + auto crows_dims = pten::framework::make_ddim({dense_dims[0] + 1}); + DenseTensorMeta crows_meta(DataType::INT64, crows_dims, DataLayout::NCHW); + DenseTensor crows(alloc, crows_meta); + + const int64_t non_zero_num = 5; + auto cols_dims = pten::framework::make_ddim({non_zero_num}); + DenseTensorMeta cols_meta(DataType::INT64, cols_dims, DataLayout::NCHW); + DenseTensor cols(alloc, cols_meta); + DenseTensorMeta values_meta(DataType::FLOAT32, cols_dims, DataLayout::NCHW); + DenseTensor values(alloc, values_meta); + + SparseCsrTensor csr(crows, cols, values, dense_dims); + CHECK(csr.initialized()); + CHECK_EQ(csr.dims(), dense_dims); + + // Test Resize + auto dense_dims_3d = pten::framework::make_ddim({2, 4, 4}); + csr.Resize(dense_dims_3d, 2); + CHECK_EQ(csr.non_zero_cols().numel(), 2); + + // Test shallow_copy + SparseCsrTensor csr2(csr); + CHECK(csr.dims() == csr2.dims()); + + // Test shallow_copy_assignment + SparseCsrTensor csr3 = csr2; + CHECK(csr3.dims() == csr2.dims()); +} + +} // namespace tests +} // namespace pten diff --git a/python/paddle/incubate/nn/layer/fused_transformer.py b/python/paddle/incubate/nn/layer/fused_transformer.py index d38e8d1193b..ca14c551754 100644 --- a/python/paddle/incubate/nn/layer/fused_transformer.py +++ b/python/paddle/incubate/nn/layer/fused_transformer.py @@ -75,7 +75,7 @@ class FusedMultiHeadAttention(Layer): embed_dim, num_heads, dropout_rate=0.5, - attn_dropout_rate=0.5, + attn_dropout_rate=None, kdim=None, vdim=None, normalize_before=False, -- GitLab