From e32c43758e9d9b9572b0b16c7576fda8747352c9 Mon Sep 17 00:00:00 2001 From: LiYuRio <63526175+LiYuRio@users.noreply.github.com> Date: Tue, 13 Jun 2023 21:42:34 +0800 Subject: [PATCH] Construct dist tensor (#54425) * construct dist tensor * move constructor to header --- paddle/fluid/pybind/auto_parallel_py.cc | 9 +- paddle/fluid/pybind/eager.cc | 210 ++++++++++++++++-- paddle/fluid/pybind/eager_method.cc | 27 +++ paddle/fluid/pybind/eager_properties.cc | 22 ++ paddle/fluid/pybind/eager_utils.cc | 35 +++ paddle/fluid/pybind/eager_utils.h | 15 ++ paddle/fluid/pybind/tensor.cc | 14 ++ paddle/phi/api/include/tensor.h | 7 + paddle/phi/api/lib/kernel_dispatch.h | 4 + paddle/phi/api/lib/tensor.cc | 10 + paddle/phi/core/dense_tensor.h | 6 + .../distributed/auto_parallel/CMakeLists.txt | 10 +- .../distributed/auto_parallel/dist_tensor.cc | 69 ++++++ .../distributed/auto_parallel/dist_tensor.h | 130 +++++++++++ paddle/phi/core/utils/type_info.cc | 8 + python/paddle/tensor/to_string.py | 24 ++ test/auto_parallel/CMakeLists.txt | 1 + test/auto_parallel/test_dist_tensor.py | 54 +++++ 18 files changed, 637 insertions(+), 18 deletions(-) create mode 100644 paddle/phi/core/distributed/auto_parallel/dist_tensor.cc create mode 100644 paddle/phi/core/distributed/auto_parallel/dist_tensor.h create mode 100644 test/auto_parallel/test_dist_tensor.py diff --git a/paddle/fluid/pybind/auto_parallel_py.cc b/paddle/fluid/pybind/auto_parallel_py.cc index fdac30be8f0..1b78d7bd257 100644 --- a/paddle/fluid/pybind/auto_parallel_py.cc +++ b/paddle/fluid/pybind/auto_parallel_py.cc @@ -43,6 +43,8 @@ using phi::distributed::auto_parallel::Machine; using phi::distributed::auto_parallel::ProcessMesh; using phi::distributed::auto_parallel::TensorDistAttr; +PyTypeObject *g_tensor_dist_attr_pytype = nullptr; + static inline const ProcessMesh *get_tensor_process_mesh( const TensorDistAttr &self) { if (self.process_mesh().empty()) { @@ -225,8 +227,11 @@ void BindAutoParallel(py::module *m) { py::arg("memo")) .def("__str__", &DeviceMesh::to_string); - py::class_(*m, "TensorDistAttr") - .def(py::init<>()) + py::class_> py_dist_attr( + *m, "TensorDistAttr"); + g_tensor_dist_attr_pytype = + reinterpret_cast(py_dist_attr.ptr()); + py_dist_attr.def(py::init<>()) .def(py::init([](const VarDesc &var_desc) { auto shape = paddle::distributed::auto_parallel::get_tensor_shape(&var_desc); diff --git a/paddle/fluid/pybind/eager.cc b/paddle/fluid/pybind/eager.cc index a732ed0c1ec..adf62731f03 100644 --- a/paddle/fluid/pybind/eager.cc +++ b/paddle/fluid/pybind/eager.cc @@ -41,6 +41,14 @@ limitations under the License. */ #include "paddle/fluid/pybind/exception.h" #include "paddle/fluid/pybind/tensor_py.h" #include "paddle/phi/core/string_tensor.h" + +#ifdef PADDLE_WITH_DISTRIBUTE +#include "paddle/phi/core/distributed/auto_parallel/dist_attr.h" +#include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h" +using phi::distributed::auto_parallel::DistTensor; +using phi::distributed::auto_parallel::TensorDistAttr; +#endif + namespace paddle { namespace pybind { @@ -60,6 +68,52 @@ PyObject* TensorNew(PyTypeObject* type, PyObject* args, PyObject* kwargs) { return obj; } +#ifdef PADDLE_WITH_DISTRIBUTE +void EmptyDistTensorInitializer( + TensorObject* self, + const std::string& name, + const paddle::platform::Place& place, + const std::shared_ptr& dist_attr, + bool persistable = false, + int stop_gradient = -1, + framework::proto::VarType::Type dtype = + paddle::framework::proto::VarType::FP32, + const std::vector& dims = {0}) { + auto ddims = phi::make_ddim(dims); + self->tensor.set_name(name); + auto autograd_meta = egr::EagerUtils::autograd_meta(&(self->tensor)); + autograd_meta->SetPersistable(persistable); + if (stop_gradient != -1) { + autograd_meta->SetStopGradient(static_cast(stop_gradient)); + } + + std::shared_ptr dist_tensor = nullptr; + if (dims.size() == 1 && dims[0] == 0) { + std::shared_ptr allocation_ptr = nullptr; + dist_tensor = std::make_shared( + allocation_ptr, + phi::DenseTensorMeta(paddle::framework::TransToPhiDataType(dtype), + ddims), + dist_attr); + } else { + dist_tensor = std::make_shared( + std::make_shared(), + phi::DenseTensorMeta(paddle::framework::TransToPhiDataType(dtype), + ddims), + dist_attr); + } + self->tensor.set_impl(dist_tensor); + + if (!autograd_meta->GetMutableGradNode()) { + autograd_meta->SetGradNode( + std::make_shared(autograd_meta)); + VLOG(3) << "Tensor(" << name + << ") have not GradNode, add GradNodeAccumulation" + << autograd_meta->GradNode() << " for it."; + } +} +#endif + // TODO(jiabin): Overload this once we need more constructor in Python void EmptyTensorInitializer(TensorObject* self, const std::string& name, @@ -82,6 +136,7 @@ void EmptyTensorInitializer(TensorObject* self, // TODO(jiabin): Maybe support LOD later std::shared_ptr dense_tensor = nullptr; if (dims.size() == 1 && dims[0] == 0) { + VLOG(0) << "Create dense tensor with dims[0] equal to 0"; std::shared_ptr allocation_ptr = nullptr; dense_tensor = std::make_shared( allocation_ptr, @@ -129,6 +184,48 @@ void EmptyStringTensorInitializer(TensorObject* self, self->tensor.set_impl(string_tensor); } +#ifdef PADDLE_WITH_DISTRIBUTE +void InitDistTensorWithNumpyValue(TensorObject* self, + const py::object& array, + const paddle::platform::Place& place, + bool zero_copy = false) { + PADDLE_ENFORCE_EQ( + self->tensor.defined(), + true, + paddle::platform::errors::Fatal( + "Calling InitDistTensorWithNumpyValue of Eager Tensor without " + "EmptyDistTensorInitializer is " + "forbidden. Please check your code and make sure you new a " + "eager tensor before init it with NumPy.")); + DistTensor* dist_tensor_ptr = + static_cast(self->tensor.impl().get()); + phi::DenseTensor* impl_ptr = + static_cast(dist_tensor_ptr->mutable_value()); + + if (platform::is_cpu_place(place)) { + SetTensorFromPyArray(impl_ptr, array, place, zero_copy); + } else if (platform::is_xpu_place(place)) { + SetTensorFromPyArray(impl_ptr, array, place, zero_copy); + } else if (platform::is_gpu_place(place)) { + SetTensorFromPyArray( + impl_ptr, array, place, zero_copy); + } else if (platform::is_cuda_pinned_place(place)) { + SetTensorFromPyArray( + impl_ptr, array, place, zero_copy); + } else if (platform::is_custom_place(place)) { + SetTensorFromPyArray( + impl_ptr, array, place, zero_copy); + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "Place should be one of " + "CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/CustomPlace")); + } + + // TODO(dev): dist_tensor meta is not equal to dense tensor meta + dist_tensor_ptr->set_meta(impl_ptr->meta()); +} +#endif + void InitTensorWithNumpyValue(TensorObject* self, const py::object& array, const paddle::platform::Place& place, @@ -143,6 +240,7 @@ void InitTensorWithNumpyValue(TensorObject* self, "eager tensor before init it with NumPy.")); phi::DenseTensor* impl_ptr = static_cast(self->tensor.impl().get()); + if (platform::is_cpu_place(place)) { SetTensorFromPyArray(impl_ptr, array, place, zero_copy); } else if (platform::is_xpu_place(place)) { @@ -186,6 +284,39 @@ void InitStringTensorWithNumpyValue(TensorObject* self, const py::object& obj) { } } +#ifdef PADDLE_WITH_DISTRIBUTE +void InitDistTensorWithTensor( + TensorObject* self, + const paddle::Tensor& src, + const paddle::platform::Place& place, + const std::string& name, + const std::shared_ptr& dist_attr) { + PADDLE_ENFORCE(src.is_dense_tensor(), + paddle::platform::errors::InvalidArgument( + "DistTensor can only initialize by DenseTensor")); + self->tensor.set_name(name); + if (place == src.place()) { + std::shared_ptr tensor = + std::static_pointer_cast(src.impl()); + self->tensor.set_impl(std::make_shared(tensor, dist_attr)); + VLOG(4) << "Same place, do ShareDataWith"; + } else { + std::shared_ptr tensor = + std::static_pointer_cast( + src.copy_to(place, true).impl()); + self->tensor.set_impl(std::make_shared(tensor, dist_attr)); + VLOG(4) << "Different place, do TensorCopy"; + } + if (src.get_autograd_meta()) { + egr::EagerUtils::autograd_meta(&(self->tensor)) + ->SetPersistable( + egr::EagerUtils::unsafe_autograd_meta(src)->Persistable()); + } else { + egr::EagerUtils::autograd_meta(&(self->tensor))->SetPersistable(false); + } +} +#endif + void InitTensorWithTensor(TensorObject* self, const paddle::Tensor& src, const paddle::platform::Place& place, @@ -283,6 +414,25 @@ paddle::platform::Place ParsePlace( return place; } +#ifdef PADDLE_WITH_DISTRIBUTE +std::shared_ptr ParseDistAttrArgs( + std::unordered_map kws_map, + std::unordered_map kw_order_map, + PyObject* args, + bool flag_kwargs, + Py_ssize_t args_num) { + std::shared_ptr dist_attr = nullptr; + if (kw_order_map["dist_attr"] <= args_num) { + dist_attr = CastPyArg2DistAttr( + PyTuple_GET_ITEM(args, kw_order_map["dist_attr"] - 1), + kw_order_map["dist_attr"] - 1); + } else if (flag_kwargs && kws_map["dist_attr"] != NULL) { + dist_attr = CastPyArg2DistAttr(kws_map["dist_attr"], 0); + } + return dist_attr; +} +#endif + // boolean arguments: zero_copy, stop_gradient, persistable int ParseBooleanArgs(std::string key, std::unordered_map kws_map, @@ -347,13 +497,13 @@ void AutoInitTensorByPyArray(TensorObject* py_tensor_ptr, // kw_order_map's value is the position of the arguments respectively. // If u want to update this constructor with new arguments, // need to update this map and to add or change related code. - std::unordered_map kw_order_map{ - {"value", 1}, - {"place", 2}, - {"persistable", 3}, - {"zero_copy", 4}, - {"name", 5}, - {"stop_gradient", 6}}; + std::unordered_map kw_order_map{{"value", 1}, + {"place", 2}, + {"persistable", 3}, + {"zero_copy", 4}, + {"name", 5}, + {"stop_gradient", 6}, + {"dist_attr", 7}}; py::object numpy_value = py::object(); paddle::platform::Place place = @@ -378,6 +528,18 @@ void AutoInitTensorByPyArray(TensorObject* py_tensor_ptr, stop_gradient = ParseBooleanArgs( "stop_gradient", kws_map, kw_order_map, args, flag_kwargs, args_num); +#ifdef PADDLE_WITH_DISTRIBUTE + std::shared_ptr dist_attr = + ParseDistAttrArgs(kws_map, kw_order_map, args, flag_kwargs, args_num); + + if (dist_attr) { + EmptyDistTensorInitializer( + py_tensor_ptr, act_name, place, dist_attr, persistable, stop_gradient); + InitDistTensorWithNumpyValue(py_tensor_ptr, numpy_value, place, zero_copy); + return; + } +#endif + EmptyTensorInitializer( py_tensor_ptr, act_name, place, persistable, stop_gradient); InitTensorWithNumpyValue(py_tensor_ptr, numpy_value, place, zero_copy); @@ -399,7 +561,7 @@ void AutoInitTensorByTensor(TensorObject* py_tensor_ptr, // If u want to update this constructor with new arguments, // need to update this map and to add or change related code. std::unordered_map kw_order_map{ - {"value", 1}, {"place", 2}, {"name", 3}}; + {"value", 1}, {"place", 2}, {"name", 3}, {"dist_attr", 4}}; paddle::platform::Place place = egr::Controller::Instance().GetExpectedPlace(); @@ -408,6 +570,11 @@ void AutoInitTensorByTensor(TensorObject* py_tensor_ptr, place = ParsePlace(kws_map, kw_order_map, args, flag_kwargs, args_num); act_name = ParseName(kws_map, kw_order_map, args, flag_kwargs, args_num); +#ifdef PADDLE_WITH_DISTRIBUTE + std::shared_ptr dist_attr = + ParseDistAttrArgs(kws_map, kw_order_map, args, flag_kwargs, args_num); +#endif + if (init_by_egr_tensor) { paddle::Tensor src_tensor; if (kw_order_map["value"] <= args_num) { @@ -426,7 +593,16 @@ void AutoInitTensorByTensor(TensorObject* py_tensor_ptr, "way.")); } } +#ifdef PADDLE_WITH_DISTRIBUTE + if (dist_attr) { + InitDistTensorWithTensor( + py_tensor_ptr, src_tensor, place, act_name, dist_attr); + } else { + InitTensorWithTensor(py_tensor_ptr, src_tensor, place, act_name); + } +#else InitTensorWithTensor(py_tensor_ptr, src_tensor, place, act_name); +#endif } else { // init by framework tensor phi::DenseTensor src_tensor; @@ -545,7 +721,8 @@ void AutoInitStringTensorByStringTensor( * ** persistable: bool, * ** zero_copy: bool, * ** name: std::string, - * ** stop_gradient: bool) + * ** stop_gradient: bool, + * ** dist_attr: phi::distributed::TensorDistAttr) * 4. * def __init__ ( * ** value: ndarray) @@ -558,7 +735,8 @@ void AutoInitStringTensorByStringTensor( * def __init__ ( * ** tensor: Tensor, * ** place: paddle::platform::Place, - * ** name: std::string) + * ** name: std::string, + * ** dist_attr: phi::distributed::TensorDistAttr) * 7. (multi-place) (should have at least one parameter, one parameter similar * to case 5, zero parameter equals to case 1.) * def __init__ ( @@ -583,6 +761,7 @@ int TensorInit(PyObject* self, PyObject* args, PyObject* kwargs) { PyObject* kw_dims = NULL; PyObject* kw_dtype = NULL; PyObject* kw_type = NULL; + PyObject* kw_dist_attr = NULL; // the keywords argument static char* kwlist[] = {const_cast("value"), @@ -594,6 +773,7 @@ int TensorInit(PyObject* self, PyObject* args, PyObject* kwargs) { const_cast("dims"), const_cast("dtype"), const_cast("type"), + const_cast("dist_attr"), NULL}; // 'O' Store a Python object (without any conversion) in a C object pointer, @@ -604,7 +784,7 @@ int TensorInit(PyObject* self, PyObject* args, PyObject* kwargs) { // which enhance case2, case3, case4, case5, case6, case7. bool flag_ = PyArg_ParseTupleAndKeywords(args, kwargs, - "|OOOOOOOOO", + "|OOOOOOOOOO", kwlist, &kw_value, &kw_place, @@ -614,7 +794,8 @@ int TensorInit(PyObject* self, PyObject* args, PyObject* kwargs) { &kw_stop_gradient, &kw_dims, &kw_dtype, - &kw_type); + &kw_type, + &kw_dist_attr); // helper map std::unordered_map kws_map{ @@ -626,7 +807,8 @@ int TensorInit(PyObject* self, PyObject* args, PyObject* kwargs) { {"stop_gradient", kw_stop_gradient}, {"dims", kw_dims}, {"dtype", kw_dtype}, - {"type", kw_type}}; + {"type", kw_type}, + {"dist_attr", kw_dist_attr}}; PADDLE_ENFORCE_EQ(flag_, true, @@ -636,7 +818,7 @@ int TensorInit(PyObject* self, PyObject* args, PyObject* kwargs) { "sure you are on the right way. " "The expected arguments as follow: (" "value, place, persistable, zero_copy, " - "name, stop_gradient, dims, dtype, type)")); + "name, stop_gradient, dims, dtype, type, dist_attr)")); PADDLE_ENFORCE_NOT_NULL( self, diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc index 4c88cf30223..eb0e895cf57 100644 --- a/paddle/fluid/pybind/eager_method.cc +++ b/paddle/fluid/pybind/eager_method.cc @@ -62,6 +62,9 @@ typedef SSIZE_T ssize_t; #include "paddle/phi/core/flags.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/math_function.h" +#ifdef PADDLE_WITH_DISTRIBUTE +#include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h" +#endif PHI_DECLARE_bool(set_to_1d); @@ -796,6 +799,15 @@ static PyObject* tensor_method_get_underline_tensor(TensorObject* self, auto* tensor = static_cast(self->tensor.impl().get()); VLOG(6) << "tensor: " << tensor->IsInitialized(); return ToPyObject(tensor); + } else if (self->tensor.is_dist_tensor()) { +#ifdef PADDLE_WITH_DISTRIBUTE + auto* tensor = static_cast( + self->tensor.impl().get()); + VLOG(6) << "dist tensor: " << tensor->IsInitialized(); + return ToPyObject(tensor); +#else + RETURN_PY_NONE +#endif } else { RETURN_PY_NONE } @@ -1697,6 +1709,17 @@ static PyObject* tensor_method_is_dense(TensorObject* self, EAGER_CATCH_AND_THROW_RETURN_NULL } +static PyObject* tensor_method_is_dist(TensorObject* self, + PyObject* args, + PyObject* kwargs) { + EAGER_TRY + if (!self->tensor.defined()) { + return ToPyObject(false); + } + return ToPyObject(self->tensor.is_dist_tensor()); + EAGER_CATCH_AND_THROW_RETURN_NULL +} + static PyObject* tensor_method_is_sparse(TensorObject* self, PyObject* args, PyObject* kwargs) { @@ -2051,6 +2074,10 @@ PyMethodDef variable_methods[] = { (PyCFunction)(void (*)(void))tensor_method_is_dense, METH_VARARGS | METH_KEYWORDS, NULL}, + {"is_dist", + (PyCFunction)(void (*)(void))tensor_method_is_dist, + METH_VARARGS | METH_KEYWORDS, + NULL}, {"_zero_grads", (PyCFunction)(void (*)(void))tensor__zero_grads, METH_VARARGS | METH_KEYWORDS, diff --git a/paddle/fluid/pybind/eager_properties.cc b/paddle/fluid/pybind/eager_properties.cc index 1c2ba1b1dee..028d843c4d7 100644 --- a/paddle/fluid/pybind/eager_properties.cc +++ b/paddle/fluid/pybind/eager_properties.cc @@ -158,6 +158,23 @@ int tensor_properties_set_persistable(TensorObject* self, EAGER_CATCH_AND_THROW_RETURN_NEG } +PyObject* tensor_properties_get_dist_attr(TensorObject* self, void* closure) { + EAGER_TRY + if (self->tensor.is_dist_tensor()) { +#ifdef PADDLE_WITH_DISTRIBUTE + phi::distributed::auto_parallel::DistTensor* dist_tensor = + static_cast( + self->tensor.impl().get()); + return ToPyObject(dist_tensor->dist_attr().get()); +#else + RETURN_PY_NONE +#endif + } else { + RETURN_PY_NONE + } + EAGER_CATCH_AND_THROW_RETURN_NULL +} + PyObject* tensor_properties_get_shape(TensorObject* self, void* closure) { EAGER_TRY std::vector value; @@ -311,6 +328,11 @@ struct PyGetSetDef variable_properties[] = { // nullptr, // nullptr}, {"place", (getter)tensor_properties_get_place, nullptr, nullptr, nullptr}, + {"dist_attr", + (getter)tensor_properties_get_dist_attr, + nullptr, + nullptr, + nullptr}, {"_place_str", (getter)tensor_properties_get_place_str, nullptr, diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc index dd5a06757c3..8c3bf619947 100644 --- a/paddle/fluid/pybind/eager_utils.cc +++ b/paddle/fluid/pybind/eager_utils.cc @@ -59,6 +59,9 @@ extern PyTypeObject* g_customplace_pytype; extern PyTypeObject* g_framework_tensor_pytype; extern PyTypeObject* g_framework_lodtensorarray_pytype; extern PyTypeObject* g_jit_function_pytype; +#ifdef PADDLE_WITH_DISTRIBUTE +extern PyTypeObject* g_tensor_dist_attr_pytype; +#endif int TensorDtype2NumpyDtype(phi::DataType dtype) { switch (dtype) { @@ -540,6 +543,23 @@ platform::Place CastPyArg2Place(PyObject* obj, ssize_t arg_pos) { return place; } +#ifdef PADDLE_WITH_DISTRIBUTE +using phi::distributed::auto_parallel::TensorDistAttr; +std::shared_ptr CastPyArg2DistAttr(PyObject* obj, + ssize_t arg_pos) { + if (PyObject_IsInstance( + obj, reinterpret_cast(g_tensor_dist_attr_pytype))) { + return ::pybind11::handle(obj).cast>(); + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "argument (position %d) must be " + "TensorDistAttr, but got %s", + arg_pos + 1, + reinterpret_cast(obj->ob_type)->tp_name)); + } +} +#endif + phi::DenseTensor CastPyArg2FrameworkTensor(PyObject* obj, ssize_t arg_pos) { if (PyObject_TypeCheck(obj, g_framework_tensor_pytype)) { return ::pybind11::handle(obj).cast(); @@ -838,6 +858,21 @@ PyObject* ToPyObject(const phi::DenseTensor* value) { return obj.ptr(); } +#ifdef PADDLE_WITH_DISTRIBUTE +PyObject* ToPyObject(const phi::distributed::auto_parallel::DistTensor* value) { + auto obj = ::pybind11::cast(value, py::return_value_policy::reference); + obj.inc_ref(); + return obj.ptr(); +} + +PyObject* ToPyObject( + const phi::distributed::auto_parallel::TensorDistAttr* value) { + auto obj = ::pybind11::cast(value, py::return_value_policy::reference); + obj.inc_ref(); + return obj.ptr(); +} +#endif + PyObject* ToPyObject(const phi::SelectedRows* value) { auto obj = ::pybind11::cast(value, py::return_value_policy::reference); obj.inc_ref(); diff --git a/paddle/fluid/pybind/eager_utils.h b/paddle/fluid/pybind/eager_utils.h index 8f83e8f880f..3d8e6371d01 100644 --- a/paddle/fluid/pybind/eager_utils.h +++ b/paddle/fluid/pybind/eager_utils.h @@ -37,6 +37,11 @@ typedef SSIZE_T ssize_t; #include "paddle/utils/pybind.h" #include "pybind11/pybind11.h" #include "pybind11/stl.h" +#ifdef PADDLE_WITH_DISTRIBUTE +#include "paddle/phi/core/distributed/auto_parallel/dist_attr.h" +#include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h" +#endif + namespace paddle { class CustomOpKernelContext; namespace framework { @@ -106,6 +111,11 @@ PyObject* ToPyObject(const std::vector>& value, bool return_py_none_if_not_initialize = false); PyObject* ToPyObject(const platform::Place& value); PyObject* ToPyObject(const phi::DenseTensor* value); +#ifdef PADDLE_WITH_DISTRIBUTE +PyObject* ToPyObject(const phi::distributed::auto_parallel::DistTensor* value); +PyObject* ToPyObject( + const phi::distributed::auto_parallel::TensorDistAttr* value); +#endif PyObject* ToPyObject(const phi::SelectedRows* value); PyObject* ToPyObject(const paddle::framework::proto::VarType::Type& dtype); PyObject* ToPyObject(const paddle::framework::proto::VarType& type); @@ -287,6 +297,11 @@ paddle::DataType CastPyArg2DataType(PyObject* obj, const std::string& op_type, ssize_t arg_pos); +#ifdef PADDLE_WITH_DISTRIBUTE +std::shared_ptr +CastPyArg2DistAttr(PyObject* obj, ssize_t arg_pos); +#endif + paddle::optional GetOptionalTensorFromArgs( const std::string& op_type, const std::string& arg_name, diff --git a/paddle/fluid/pybind/tensor.cc b/paddle/fluid/pybind/tensor.cc index bdd2a50d96e..ff47d1a2117 100644 --- a/paddle/fluid/pybind/tensor.cc +++ b/paddle/fluid/pybind/tensor.cc @@ -174,6 +174,9 @@ limitations under the License. */ #include "paddle/phi/kernels/autotune/cache.h" #include "paddle/phi/kernels/autotune/switch_autotune.h" #include "pybind11/stl.h" +#ifdef PADDLE_WITH_DISTRIBUTE +#include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h" +#endif PHI_DECLARE_bool(use_mkldnn); PHI_DECLARE_bool(use_shm_cache); @@ -1021,6 +1024,17 @@ void BindTensor(pybind11::module &m) { // NOLINT })); #endif +#ifdef PADDLE_WITH_DISTRIBUTE + using phi::distributed::auto_parallel::DistTensor; + py::class_(m, "DistTensor") + .def( + "get_tensor", + [](DistTensor &self) { return self.mutable_value(); }, + py::return_value_policy::reference) + .def("numel", + [](DistTensor &self) -> int64_t { return self.value().numel(); }); +#endif + py::class_(m, "SelectedRows") .def("__init__", [](phi::SelectedRows &instance) { diff --git a/paddle/phi/api/include/tensor.h b/paddle/phi/api/include/tensor.h index fb4a3c6f7d2..b626df6c670 100644 --- a/paddle/phi/api/include/tensor.h +++ b/paddle/phi/api/include/tensor.h @@ -219,6 +219,13 @@ class PADDLE_API Tensor final { */ bool is_dense_tensor() const; + /** + * @brief Determine whether tensor is DistTensor + * + * @return bool + */ + bool is_dist_tensor() const; + /** * @brief Determine whether tensor is SelectedRows * diff --git a/paddle/phi/api/lib/kernel_dispatch.h b/paddle/phi/api/lib/kernel_dispatch.h index 23b375eaf6e..9d05fb15519 100644 --- a/paddle/phi/api/lib/kernel_dispatch.h +++ b/paddle/phi/api/lib/kernel_dispatch.h @@ -28,6 +28,10 @@ limitations under the License. */ #include "paddle/phi/core/sparse_coo_tensor.h" #include "paddle/phi/core/sparse_csr_tensor.h" +#ifdef PADDLE_WITH_DISTRIBUTE +#include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h" +#endif + // TODO(chenweihang): split Key, Kernel, Factory into diff files #include "paddle/phi/core/kernel_factory.h" diff --git a/paddle/phi/api/lib/tensor.cc b/paddle/phi/api/lib/tensor.cc index e9c68367b16..e8caf525308 100644 --- a/paddle/phi/api/lib/tensor.cc +++ b/paddle/phi/api/lib/tensor.cc @@ -34,6 +34,9 @@ limitations under the License. */ #include "paddle/phi/core/tensor_base.h" #include "paddle/phi/core/tensor_meta.h" #include "paddle/phi/core/tensor_utils.h" +#ifdef PADDLE_WITH_DISTRIBUTE +#include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h" +#endif namespace paddle { @@ -128,6 +131,13 @@ DataLayout Tensor::layout() const { return impl_->layout(); } bool Tensor::is_dense_tensor() const { return phi::DenseTensor::classof(impl_.get()); } +bool Tensor::is_dist_tensor() const { +#ifdef PADDLE_WITH_DISTRIBUTE + return phi::distributed::auto_parallel::DistTensor::classof(impl_.get()); +#else + return false; +#endif +} bool Tensor::is_selected_rows() const { return phi::SelectedRows::classof(impl_.get()); } diff --git a/paddle/phi/core/dense_tensor.h b/paddle/phi/core/dense_tensor.h index f4f1a7cb25c..2cfdd7493c4 100644 --- a/paddle/phi/core/dense_tensor.h +++ b/paddle/phi/core/dense_tensor.h @@ -29,6 +29,11 @@ limitations under the License. */ namespace phi { class DenseTensorUtils; +namespace distributed { +namespace auto_parallel { +class DistTensor; +} // namespace auto_parallel +} // namespace distributed /// \brief The Dense tensor stores values in a contiguous sequential block /// of memory where all values are represented. Tensors or multi-dimensional @@ -181,6 +186,7 @@ class DenseTensor : public TensorBase, private: friend class DenseTensorUtils; + friend class phi::distributed::auto_parallel::DistTensor; protected: DenseTensorMeta meta_; diff --git a/paddle/phi/core/distributed/auto_parallel/CMakeLists.txt b/paddle/phi/core/distributed/auto_parallel/CMakeLists.txt index d6e52ca8044..db639bba5f4 100644 --- a/paddle/phi/core/distributed/auto_parallel/CMakeLists.txt +++ b/paddle/phi/core/distributed/auto_parallel/CMakeLists.txt @@ -1,4 +1,10 @@ proto_library(auto_parallel_proto SRCS auto_parallel.proto) -collect_srcs(core_srcs SRCS device_mesh.cc process_mesh.cc dist_attr.cc - dist_mapper.cc) +collect_srcs( + core_srcs + SRCS + device_mesh.cc + process_mesh.cc + dist_attr.cc + dist_mapper.cc + dist_tensor.cc) diff --git a/paddle/phi/core/distributed/auto_parallel/dist_tensor.cc b/paddle/phi/core/distributed/auto_parallel/dist_tensor.cc new file mode 100644 index 00000000000..a1f052ed512 --- /dev/null +++ b/paddle/phi/core/distributed/auto_parallel/dist_tensor.cc @@ -0,0 +1,69 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h" + +namespace phi { +namespace distributed { +namespace auto_parallel { + +void* DistTensor::AllocateFrom(Allocator* allocator, + DataType dtype, + size_t requested_size, + bool fake_alloc) { + return value_->AllocateFrom(allocator, dtype, requested_size, fake_alloc); +} + +const Place& DistTensor::place() const { + PADDLE_ENFORCE_NOT_NULL( + value_->holder_, + phi::errors::PreconditionNotMet( + "Tensor not initialized yet when DenseTensor::place() is called.")); + return value_->holder_->place(); +} + +int64_t DistTensor::numel() const { + if (meta_.is_scalar) { + return 1; + } + return product(meta_.dims); +} + +void DistTensor::set_meta(DenseTensorMeta&& meta) { + PADDLE_ENFORCE_EQ(meta_.valid(), + false, + phi::errors::InvalidArgument( + "Only when the original attribute of Tensor is " + "incomplete, can it be reset.")); + meta_ = std::move(meta); +} + +void DistTensor::set_meta(const DenseTensorMeta& meta) { + PADDLE_ENFORCE_EQ( + meta.valid(), + true, + phi::errors::InvalidArgument( + "Input meta is invalid, please check the meta attribute.")); + meta_.dims = meta.dims; + meta_.dtype = meta.dtype; + meta_.is_scalar = meta.is_scalar; + meta_.layout = meta.layout; + meta_.lod = meta.lod; + meta_.offset = meta.offset; + meta_.use_gpudnn = meta.use_gpudnn; +} + +} // namespace auto_parallel +} // namespace distributed +} // namespace phi diff --git a/paddle/phi/core/distributed/auto_parallel/dist_tensor.h b/paddle/phi/core/distributed/auto_parallel/dist_tensor.h new file mode 100644 index 00000000000..ef14abe0c45 --- /dev/null +++ b/paddle/phi/core/distributed/auto_parallel/dist_tensor.h @@ -0,0 +1,130 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include "paddle/phi/core/dense_tensor.h" + +namespace phi { + +namespace distributed { +namespace auto_parallel { + +class TensorDistAttr; + +class DistTensor final + : public phi::TensorBase, + public phi::TypeInfoTraits { + public: + /// \brief Construct a dist tensor and allocate space. + /// \param a The allocator used to allocate space. + /// \param meta The meta data of dense tensor. + DistTensor(Allocator* a, + const DenseTensorMeta& meta, + const std::shared_ptr& dist_attr) + : meta_(meta), dist_attr_(dist_attr) { + value_ = std::make_unique(a, meta); + } + + DistTensor(Allocator* a, + DenseTensorMeta&& meta, + const std::shared_ptr& dist_attr) + : meta_(std::move(meta)), dist_attr_(dist_attr) { + value_ = std::make_unique(a, meta); + } + + DistTensor(const std::shared_ptr& holder, + const DenseTensorMeta& meta, + const std::shared_ptr& dist_attr) + : meta_(meta), dist_attr_(dist_attr) { + value_ = std::make_unique(holder, meta); + } + + DistTensor(const std::shared_ptr& dense_tensor, + const std::shared_ptr& dist_attr) + : dist_attr_(dist_attr) { + value_ = std::make_unique(*dense_tensor); + set_meta(dense_tensor->meta()); + } + + ~DistTensor() = default; + + static const char* name() { return "DistTensor"; } + + const DenseTensor& value() const { return *value_; } + + DenseTensor* mutable_value() { return value_.get(); } + + const std::shared_ptr& dist_attr() const { + return dist_attr_; + } + + /// \brief Returns the number of elements contained in tensor. + /// \return The number of elements contained in tensor. + int64_t numel() const override; + + /// \brief Returns the dims of the tensor. + /// \return The dims of the tensor. + const DDim& dims() const override { return meta_.dims; } + + /// \brief Test whether the storage is allocated. + /// \return Whether the storage is allocated. + bool initialized() const override { + return value_->holder_ && value_->holder_->ptr(); + } + + bool IsInitialized() const { return value_->holder_ != nullptr; } + + /// \brief Test whether the metadata is valid. + /// \return Whether the metadata is valid. + bool valid() const override { return meta_.valid(); } + + /// \brief Allocate memory with requested size from allocator. + /// \return The mutable data pointer value of type T. + void* AllocateFrom(Allocator* allocator, + DataType dtype, + size_t requested_size = 0, + bool fake_alloc = false) override; + + /// \brief Returns the data type of the tensor. + /// \return The data type of the tensor. + DataType dtype() const override { return meta_.dtype; } + + /// \brief Returns the data layout of the tensor. + /// \return The data layout of the tensor. + DataLayout layout() const override { return meta_.layout; } + + /// \brief Returns the data place of the tensor. + /// \return The data place of the tensor. + const Place& place() const override; + + const DenseTensorMeta& meta() const noexcept { return meta_; } + + /// \brief Sets the meta information of the tensor. Only when the original + /// attribute of Tensor is incomplete, can it be reset. + /// \param meta The meta information of the tensor. + void set_meta(DenseTensorMeta&& meta); + + void set_meta(const DenseTensorMeta& meta); + + private: + DenseTensorMeta meta_; + std::shared_ptr dist_attr_{nullptr}; + std::unique_ptr value_{nullptr}; +}; + +} // namespace auto_parallel +} // namespace distributed +} // namespace phi diff --git a/paddle/phi/core/utils/type_info.cc b/paddle/phi/core/utils/type_info.cc index 457824820e5..2a554525024 100644 --- a/paddle/phi/core/utils/type_info.cc +++ b/paddle/phi/core/utils/type_info.cc @@ -24,6 +24,9 @@ limitations under the License. */ #include "paddle/phi/core/storage_properties.h" #include "paddle/phi/core/string_tensor.h" #include "paddle/phi/core/tensor_array.h" +#ifdef PADDLE_WITH_DISTRIBUTE +#include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h" +#endif #include "paddle/phi/core/utils/type_info.h" namespace phi { @@ -52,6 +55,11 @@ template class TypeInfoTraits; template class TypeInfoTraits; template class TypeInfoTraits; +#ifdef PADDLE_WITH_DISTRIBUTE +template class TypeInfoTraits; +#endif + #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || \ defined(PADDLE_WITH_XPU_KP) template class TypeInfoTraits; diff --git a/python/paddle/tensor/to_string.py b/python/paddle/tensor/to_string.py index 0d8cd9a6b81..a6018084941 100644 --- a/python/paddle/tensor/to_string.py +++ b/python/paddle/tensor/to_string.py @@ -362,6 +362,27 @@ def sparse_tensor_to_string(tensor, prefix='Tensor'): ) +def dist_tensor_to_string(tensor, prefix='Tensor'): + # TODO(dev): Complete tensor will be printed after reshard + # is ready. + indent = len(prefix) + 1 + dtype = convert_dtype(tensor.dtype) + if tensor.dtype == core.VarDesc.VarType.BF16: + dtype = 'bfloat16' + + _template = "{prefix}(shape={shape}, dtype={dtype}, place={place}, stop_gradient={stop_gradient}, dist_attr={dist_attr},\n{indent}{data})" + return _template.format( + prefix=prefix, + shape=tensor.shape, + dtype=dtype, + place=tensor._place_str, + stop_gradient=tensor.stop_gradient, + dist_attr=tensor.dist_attr, + indent=' ' * indent, + data=None, + ) + + def tensor_to_string(tensor, prefix='Tensor'): indent = len(prefix) + 1 @@ -374,6 +395,9 @@ def tensor_to_string(tensor, prefix='Tensor'): if tensor.is_sparse(): return sparse_tensor_to_string(tensor, prefix) + if tensor.is_dist(): + return dist_tensor_to_string(tensor, prefix) + if not tensor._is_dense_tensor_hold_allocation(): return "Tensor(Not initialized)" else: diff --git a/test/auto_parallel/CMakeLists.txt b/test/auto_parallel/CMakeLists.txt index 61c1b578030..a43b51cc8ac 100644 --- a/test/auto_parallel/CMakeLists.txt +++ b/test/auto_parallel/CMakeLists.txt @@ -142,6 +142,7 @@ if(WITH_DISTRIBUTE AND WITH_GPU) py_test_modules(test_dist_saver MODULES test_dist_saver) py_test_modules(test_engine_save_load MODULES test_engine_save_load) py_test_modules(test_rule_based_tuner MODULES test_rule_based_tuner) + py_test_modules(test_dist_tensor MODULES test_dist_tensor) # End of unittests WITH single card WITHOUT timeout endif() diff --git a/test/auto_parallel/test_dist_tensor.py b/test/auto_parallel/test_dist_tensor.py new file mode 100644 index 00000000000..58ebc085004 --- /dev/null +++ b/test/auto_parallel/test_dist_tensor.py @@ -0,0 +1,54 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle + + +class TestDistTensor(unittest.TestCase): + def test_dist_tensor_creation(self): + shape = [10, 5] + dist_attr = paddle.fluid.core.TensorDistAttr() + + # create dist tensor using numpy + dist_tensor_with_numpy = paddle.Tensor( + np.ones(shape, dtype=np.float32), dist_attr=dist_attr + ) + + # create dist tensor using tensor + dist_tensor_with_tensor = paddle.Tensor( + paddle.ones(shape), dist_attr=dist_attr + ) + + # create normal tensor + tensor = paddle.ones(shape) + + # test dist tensor properties + self.assertEqual(dist_tensor_with_numpy.shape, shape) + self.assertEqual(dist_tensor_with_tensor.shape, shape) + self.assertEqual(dist_tensor_with_numpy.is_dist(), True) + self.assertEqual(dist_tensor_with_tensor.is_dist(), True) + self.assertEqual(tensor.is_dist(), False) + self.assertEqual( + str(dist_tensor_with_numpy), str(dist_tensor_with_tensor) + ) + self.assertEqual(dist_tensor_with_numpy.dist_attr, dist_attr) + self.assertEqual(dist_tensor_with_tensor.dist_attr, dist_attr) + + +if __name__ == "__main__": + unittest.main() -- GitLab