From c1931beb72ff8429ec44230c014777840fb231b0 Mon Sep 17 00:00:00 2001 From: Jiabin Yang <360788950@qq.com> Date: Thu, 17 Mar 2022 10:26:08 +0800 Subject: [PATCH] fix copy_ problem by doing it with phi copy (#40521) * fix copy_ problem by doing it with phi copy * improve test coverage * refactor copy with sr kernel --- .../data_structure_tests/eager_tensor_test.cc | 53 +++++++++ paddle/fluid/pybind/eager_method.cc | 15 ++- paddle/fluid/pybind/eager_properties.cc | 2 +- paddle/phi/api/include/tensor.h | 4 +- paddle/phi/api/lib/CMakeLists.txt | 2 +- paddle/phi/api/lib/tensor_method.cc | 102 ++++++++++++++++-- .../phi/kernels/selected_rows/copy_kernel.cc | 49 +++++++++ .../phi/kernels/selected_rows/copy_kernel.h | 31 ++++++ 8 files changed, 248 insertions(+), 10 deletions(-) create mode 100644 paddle/phi/kernels/selected_rows/copy_kernel.cc create mode 100644 paddle/phi/kernels/selected_rows/copy_kernel.h diff --git a/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc b/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc index 1683f4ed5fb..c8b2d22dcf9 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc @@ -17,6 +17,14 @@ #include "paddle/fluid/eager/eager_tensor.h" #include "paddle/phi/api/lib/utils/allocator.h" +#include "paddle/phi/core/kernel_registry.h" + +PD_DECLARE_KERNEL(copy, CPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(copy_sr, CPU, ALL_LAYOUT); +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +PD_DECLARE_KERNEL(copy, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(copy_sr, GPU, ALL_LAYOUT); +#endif namespace eager_test { using AbstractAutogradMeta = paddle::experimental::AbstractAutogradMeta; @@ -151,5 +159,50 @@ TEST(EagerVariable, Constructor) { CHECK_EQ(dt3_tmp_ptr[1], 10.0f); t4.reset(); CHECK(t4.defined() == false); + + VLOG(6) << "Check Tensor Copy_"; + std::vector rows = {1, 2}; + std::vector dims = {2}; + paddle::experimental::Tensor t7(std::make_shared(rows, 2)); + std::dynamic_pointer_cast(t7.impl()) + ->mutable_value() + ->Resize(phi::make_ddim(dims)); + auto* dt7_tmp_ptr = std::dynamic_pointer_cast(t7.impl()) + ->mutable_value() + ->mutable_data(paddle::platform::CPUPlace()); + dt7_tmp_ptr[0] = 6.0f; + dt7_tmp_ptr[1] = 11.0f; + + paddle::experimental::Tensor t8; + paddle::experimental::Tensor t5; +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + paddle::experimental::Tensor t6; + paddle::experimental::Tensor t9; + VLOG(6) << "Check Tensor Copy_ Selected Rows"; + t8.copy_(t7, paddle::platform::CUDAPlace(0), true); + t9.copy_(t8, paddle::platform::CPUPlace(), true); + auto* dt9_tmp_ptr = std::dynamic_pointer_cast(t9.impl()) + ->value() + .data(); + CHECK_EQ(dt9_tmp_ptr[0], 6.0f); + CHECK_EQ(dt9_tmp_ptr[1], 11.0f); + CHECK_EQ(std::dynamic_pointer_cast(t9.impl())->height(), + 2); + + VLOG(6) << "Check Tensor Copy_ Dense Tensor"; + t5.copy_(t3, paddle::platform::CUDAPlace(0), true); + t6.copy_(t5, paddle::platform::CPUPlace(), true); + auto* dt6_tmp_ptr = + std::dynamic_pointer_cast(t6.impl())->data(); + CHECK_EQ(dt6_tmp_ptr[0], 5.0f); + CHECK_EQ(dt6_tmp_ptr[1], 10.0f); +#else + t5.copy_(t3, paddle::platform::CPUPlace(), true); + auto* dt5_tmp_ptr = + std::dynamic_pointer_cast(t5.impl())->data(); + CHECK_EQ(dt5_tmp_ptr[0], 5.0f); + CHECK_EQ(dt5_tmp_ptr[1], 10.0f); +#endif + VLOG(6) << "Finish"; } diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc index 082ec382c79..7f8fcd351fe 100644 --- a/paddle/fluid/pybind/eager_method.cc +++ b/paddle/fluid/pybind/eager_method.cc @@ -226,6 +226,19 @@ static PyObject* tensor_method__copy_to(TensorObject* self, PyObject* args, EAGER_CATCH_AND_THROW_RETURN_NULL } +static PyObject* tensor_method_cpu(TensorObject* self, PyObject* args, + PyObject* kwargs) { + EAGER_TRY + auto cp_tensor = + self->tensor.copy_to(phi::TransToPhiBackend(phi::CPUPlace()), true); + egr::EagerUtils::autograd_meta(&cp_tensor)->SetStopGradient(true); + egr::EagerUtils::autograd_meta(&cp_tensor) + ->SetPersistable( + egr::EagerUtils::autograd_meta(&(self->tensor))->Persistable()); + return ToPyObject(cp_tensor); + EAGER_CATCH_AND_THROW_RETURN_NULL +} + static PyObject* tensor_method_reconstruct_from_(TensorObject* self, PyObject* args, PyObject* kwargs) { @@ -264,7 +277,7 @@ static PyObject* tensor_method_copy_(TensorObject* self, PyObject* args, egr::EagerUtils::autograd_meta(&(src_tensor))->Persistable()); } - self->tensor.copy_(src_tensor, blocking); + self->tensor.copy_(src_tensor, self->tensor.inner_place(), blocking); VLOG(6) << "Finish Copy Tensor " << src_tensor.name() << " to " << self->tensor.name(); diff --git a/paddle/fluid/pybind/eager_properties.cc b/paddle/fluid/pybind/eager_properties.cc index 2572866b8f5..ff8980d727e 100644 --- a/paddle/fluid/pybind/eager_properties.cc +++ b/paddle/fluid/pybind/eager_properties.cc @@ -96,7 +96,7 @@ int tensor_properties_set_grad(TensorObject* self, PyObject* value, "Detected NULL grad" "Please check if you have manually cleared" "the grad inside autograd_meta")); - grad->copy_(src, true); + grad->copy_(src, self->tensor.inner_place(), true); return 0; EAGER_CATCH_AND_THROW_RETURN_ZERO } diff --git a/paddle/phi/api/include/tensor.h b/paddle/phi/api/include/tensor.h index c268742fa56..1312710a80f 100644 --- a/paddle/phi/api/include/tensor.h +++ b/paddle/phi/api/include/tensor.h @@ -397,7 +397,9 @@ class PADDLE_API Tensor final { * @param blocking, Should we copy this in sync way. * @return void */ - void copy_(const Tensor& src, const bool blocking); + void copy_(const Tensor& src, + const phi::Place& target_place, + const bool blocking); /** * @brief Cast datatype from one to another * diff --git a/paddle/phi/api/lib/CMakeLists.txt b/paddle/phi/api/lib/CMakeLists.txt index 42bf7a8103f..4cbca072362 100644 --- a/paddle/phi/api/lib/CMakeLists.txt +++ b/paddle/phi/api/lib/CMakeLists.txt @@ -148,4 +148,4 @@ cc_library(phi_bw_function_api SRCS ${bw_api_source_file} DEPS phi_tensor_raw ph cc_library(sparse_api SRCS ${sparse_api_source_file} DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils sparse_api_custom_impl) cc_library(sparse_bw_api SRCS ${sparse_bw_api_source_file} DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils sparse_api sparse_api_custom_impl) -cc_library(phi_tensor SRCS tensor_method.cc DEPS phi_tensor_raw phi_function_api) +cc_library(phi_tensor SRCS tensor_method.cc DEPS phi_tensor_raw phi_function_api api_gen_utils kernel_dispatch infermeta) diff --git a/paddle/phi/api/lib/tensor_method.cc b/paddle/phi/api/lib/tensor_method.cc index 885e29b27fa..cc797507e68 100644 --- a/paddle/phi/api/lib/tensor_method.cc +++ b/paddle/phi/api/lib/tensor_method.cc @@ -19,9 +19,12 @@ limitations under the License. */ #include "paddle/phi/core/compat/convert_utils.h" #include "paddle/phi/core/tensor_base.h" +#include "paddle/phi/api/lib/api_gen_utils.h" +#include "paddle/phi/api/lib/kernel_dispatch.h" +#include "paddle/phi/infermeta/unary.h" + namespace paddle { namespace experimental { - // declare cast api Tensor cast(const Tensor &x, DataType out_dtype); Tensor copy_to(const Tensor &x, Backend backend, bool blocking); @@ -67,12 +70,18 @@ template PADDLE_API Tensor Tensor::copy_to>( template PADDLE_API Tensor Tensor::copy_to(const PlaceType &target_place) const; -void Tensor::copy_(const Tensor &src, bool blocking) { +void Tensor::copy_(const Tensor &src, + const phi::Place &target_place, + bool blocking) { if (!src.is_initialized()) { + VLOG(8) << "Src is empty, skip copy"; return; } + // Prepare copy kernel key and outputs + auto kernel_key_set = ParseKernelKeyByInputArgs(src); + KernelType kernel_type = ParseKernelTypeByInputArgs(src); VLOG(3) << "Deep copy Tensor from " << src.name() << " to " << name(); - if (defined()) { + if (is_initialized()) { PADDLE_ENFORCE_EQ(dtype(), src.dtype(), platform::errors::PreconditionNotMet( @@ -87,10 +96,91 @@ void Tensor::copy_(const Tensor &src, bool blocking) { "Copy cannot be performed!", name(), src.name())); + PADDLE_ENFORCE_EQ(target_place, + inner_place(), + platform::errors::PreconditionNotMet( + "Place is different of dst tensor and args %s, which " + "current tensor holds %s " + "Copy cannot be performed!", + target_place.DebugString(), + inner_place().DebugString())); + kernel_key_set.backend_set = + kernel_key_set.backend_set | + BackendSet(phi::TransToPhiBackend(inner_place())); + } else { + // Deep Copy AutoGrad info from src to self. + *autograd_meta_ = *(src.autograd_meta_); + } + + auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); + auto *dev_ctx = GetDeviceContextByBackend(kernel_key.backend()); + Backend kernel_backend = Backend::UNDEFINED; + DataLayout kernel_layout = DataLayout::UNDEFINED; + DataType kernel_data_type = DataType::UNDEFINED; + + if (kernel_backend == Backend::UNDEFINED || + kernel_layout == DataLayout::UNDEFINED || + kernel_data_type == DataType::UNDEFINED) { + if (kernel_backend == Backend::UNDEFINED) { + kernel_backend = kernel_key.backend(); + } + if (kernel_layout == DataLayout::UNDEFINED) { + kernel_layout = kernel_key.layout(); + } + if (kernel_data_type == DataType::UNDEFINED) { + kernel_data_type = kernel_key.dtype(); + } + } + + if (kernel_type == KernelType::DENSE_TENSOR_KENREL) { + auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( + "copy", {kernel_backend, kernel_layout, kernel_data_type}); + VLOG(6) << "copy API kernel key: " << kernel_key; + VLOG(6) << "copy API kernel: " << kernel; + using kernel_signature = void (*)(const platform::DeviceContext &, + const phi::DenseTensor &, + phi::Place, + bool, + phi::DenseTensor *); + SetKernelOutput(kernel_backend, this); + phi::MetaTensor meta_out(impl_.get()); + phi::UnchangedInferMeta( + MakeMetaTensor( + *(std::static_pointer_cast(src.impl_))), + &meta_out); + auto *kernel_fn = kernel.GetVariadicKernelFn(); + (*kernel_fn)(*dev_ctx, + (*(std::static_pointer_cast(src.impl_))), + target_place, + blocking, + static_cast(impl_.get())); + } else if (kernel_type == KernelType::SELECTED_ROWS_KENREL) { + auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( + "copy_sr", {kernel_backend, kernel_layout, kernel_data_type}); + VLOG(6) << "copy API kernel key: " << kernel_key; + VLOG(6) << "copy API kernel: " << kernel; + using kernel_signature = void (*)(const platform::DeviceContext &, + const phi::SelectedRows &, + phi::Place, + bool, + phi::SelectedRows *); + SetSelectedRowsKernelOutput(kernel_backend, this); + phi::MetaTensor meta_out(impl_.get()); + phi::UnchangedInferMeta( + MakeMetaTensor( + *(std::static_pointer_cast(src.impl_))), + &meta_out); + auto *kernel_fn = kernel.GetVariadicKernelFn(); + (*kernel_fn)(*dev_ctx, + (*(std::static_pointer_cast(src.impl_))), + target_place, + blocking, + static_cast(impl_.get())); + } else { + PADDLE_THROW(paddle::platform::errors::InvalidArgument( + "We currently only support dense tensor copy for now and if u need to " + "copy selected rows please raise a issue.")); } - auto copy_tensor = - src.copy_to(phi::TransToPhiBackend(src.inner_place()), blocking); - set_impl(copy_tensor.impl()); } } // namespace experimental diff --git a/paddle/phi/kernels/selected_rows/copy_kernel.cc b/paddle/phi/kernels/selected_rows/copy_kernel.cc new file mode 100644 index 00000000000..cf71ab0583f --- /dev/null +++ b/paddle/phi/kernels/selected_rows/copy_kernel.cc @@ -0,0 +1,49 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/kernels/selected_rows/copy_kernel.h" + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/common/bfloat16.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/copy_kernel.h" +namespace phi { +namespace sr { + +template +void Copy(const Context& dev_ctx, + const SelectedRows& src, + Place dst_place, + bool blocking, + SelectedRows* dst) { + if (src.value().Holder() != dst->value().Holder() || + src.value().data() != dst->value().data()) { + dst->set_rows(src.rows()); + dst->set_height(src.height()); + } + phi::Copy( + dev_ctx, src.value(), dst_place, blocking, dst->mutable_value()); +} + +} // namespace sr +} // namespace phi + +PD_REGISTER_GENERAL_KERNEL( + copy_sr, CPU, ALL_LAYOUT, phi::sr::Copy, ALL_DTYPE) {} + +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +PD_REGISTER_GENERAL_KERNEL( + copy_sr, GPU, ALL_LAYOUT, phi::sr::Copy, ALL_DTYPE) {} +#endif diff --git a/paddle/phi/kernels/selected_rows/copy_kernel.h b/paddle/phi/kernels/selected_rows/copy_kernel.h new file mode 100644 index 00000000000..4aa848bea2a --- /dev/null +++ b/paddle/phi/kernels/selected_rows/copy_kernel.h @@ -0,0 +1,31 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/core/selected_rows.h" +#include "paddle/phi/core/sparse_csr_tensor.h" + +namespace phi { +namespace sr { + +template +void Copy(const Context& dev_ctx, + const SelectedRows& src, + Place dst_place, + bool blocking, + SelectedRows* dst); + +} // namespace sr +} // namespace phi -- GitLab