未验证 提交 c1931beb 编写于 作者: J Jiabin Yang 提交者: GitHub

fix copy_ problem by doing it with phi copy (#40521)

* fix copy_ problem by doing it with phi copy

* improve test coverage

* refactor copy with sr kernel
上级 b1b24463
......@@ -17,6 +17,14 @@
#include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/phi/api/lib/utils/allocator.h"
#include "paddle/phi/core/kernel_registry.h"
PD_DECLARE_KERNEL(copy, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(copy_sr, CPU, ALL_LAYOUT);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PD_DECLARE_KERNEL(copy, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(copy_sr, GPU, ALL_LAYOUT);
#endif
namespace eager_test {
using AbstractAutogradMeta = paddle::experimental::AbstractAutogradMeta;
......@@ -151,5 +159,50 @@ TEST(EagerVariable, Constructor) {
CHECK_EQ(dt3_tmp_ptr[1], 10.0f);
t4.reset();
CHECK(t4.defined() == false);
VLOG(6) << "Check Tensor Copy_";
std::vector<int64_t> rows = {1, 2};
std::vector<int64_t> dims = {2};
paddle::experimental::Tensor t7(std::make_shared<phi::SelectedRows>(rows, 2));
std::dynamic_pointer_cast<phi::SelectedRows>(t7.impl())
->mutable_value()
->Resize(phi::make_ddim(dims));
auto* dt7_tmp_ptr = std::dynamic_pointer_cast<phi::SelectedRows>(t7.impl())
->mutable_value()
->mutable_data<float>(paddle::platform::CPUPlace());
dt7_tmp_ptr[0] = 6.0f;
dt7_tmp_ptr[1] = 11.0f;
paddle::experimental::Tensor t8;
paddle::experimental::Tensor t5;
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
paddle::experimental::Tensor t6;
paddle::experimental::Tensor t9;
VLOG(6) << "Check Tensor Copy_ Selected Rows";
t8.copy_(t7, paddle::platform::CUDAPlace(0), true);
t9.copy_(t8, paddle::platform::CPUPlace(), true);
auto* dt9_tmp_ptr = std::dynamic_pointer_cast<phi::SelectedRows>(t9.impl())
->value()
.data<float>();
CHECK_EQ(dt9_tmp_ptr[0], 6.0f);
CHECK_EQ(dt9_tmp_ptr[1], 11.0f);
CHECK_EQ(std::dynamic_pointer_cast<phi::SelectedRows>(t9.impl())->height(),
2);
VLOG(6) << "Check Tensor Copy_ Dense Tensor";
t5.copy_(t3, paddle::platform::CUDAPlace(0), true);
t6.copy_(t5, paddle::platform::CPUPlace(), true);
auto* dt6_tmp_ptr =
std::dynamic_pointer_cast<phi::DenseTensor>(t6.impl())->data<float>();
CHECK_EQ(dt6_tmp_ptr[0], 5.0f);
CHECK_EQ(dt6_tmp_ptr[1], 10.0f);
#else
t5.copy_(t3, paddle::platform::CPUPlace(), true);
auto* dt5_tmp_ptr =
std::dynamic_pointer_cast<phi::DenseTensor>(t5.impl())->data<float>();
CHECK_EQ(dt5_tmp_ptr[0], 5.0f);
CHECK_EQ(dt5_tmp_ptr[1], 10.0f);
#endif
VLOG(6) << "Finish";
}
......@@ -226,6 +226,19 @@ static PyObject* tensor_method__copy_to(TensorObject* self, PyObject* args,
EAGER_CATCH_AND_THROW_RETURN_NULL
}
static PyObject* tensor_method_cpu(TensorObject* self, PyObject* args,
PyObject* kwargs) {
EAGER_TRY
auto cp_tensor =
self->tensor.copy_to(phi::TransToPhiBackend(phi::CPUPlace()), true);
egr::EagerUtils::autograd_meta(&cp_tensor)->SetStopGradient(true);
egr::EagerUtils::autograd_meta(&cp_tensor)
->SetPersistable(
egr::EagerUtils::autograd_meta(&(self->tensor))->Persistable());
return ToPyObject(cp_tensor);
EAGER_CATCH_AND_THROW_RETURN_NULL
}
static PyObject* tensor_method_reconstruct_from_(TensorObject* self,
PyObject* args,
PyObject* kwargs) {
......@@ -264,7 +277,7 @@ static PyObject* tensor_method_copy_(TensorObject* self, PyObject* args,
egr::EagerUtils::autograd_meta(&(src_tensor))->Persistable());
}
self->tensor.copy_(src_tensor, blocking);
self->tensor.copy_(src_tensor, self->tensor.inner_place(), blocking);
VLOG(6) << "Finish Copy Tensor " << src_tensor.name() << " to "
<< self->tensor.name();
......
......@@ -96,7 +96,7 @@ int tensor_properties_set_grad(TensorObject* self, PyObject* value,
"Detected NULL grad"
"Please check if you have manually cleared"
"the grad inside autograd_meta"));
grad->copy_(src, true);
grad->copy_(src, self->tensor.inner_place(), true);
return 0;
EAGER_CATCH_AND_THROW_RETURN_ZERO
}
......
......@@ -397,7 +397,9 @@ class PADDLE_API Tensor final {
* @param blocking, Should we copy this in sync way.
* @return void
*/
void copy_(const Tensor& src, const bool blocking);
void copy_(const Tensor& src,
const phi::Place& target_place,
const bool blocking);
/**
* @brief Cast datatype from one to another
*
......
......@@ -148,4 +148,4 @@ cc_library(phi_bw_function_api SRCS ${bw_api_source_file} DEPS phi_tensor_raw ph
cc_library(sparse_api SRCS ${sparse_api_source_file} DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils sparse_api_custom_impl)
cc_library(sparse_bw_api SRCS ${sparse_bw_api_source_file} DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils sparse_api sparse_api_custom_impl)
cc_library(phi_tensor SRCS tensor_method.cc DEPS phi_tensor_raw phi_function_api)
cc_library(phi_tensor SRCS tensor_method.cc DEPS phi_tensor_raw phi_function_api api_gen_utils kernel_dispatch infermeta)
......@@ -19,9 +19,12 @@ limitations under the License. */
#include "paddle/phi/core/compat/convert_utils.h"
#include "paddle/phi/core/tensor_base.h"
#include "paddle/phi/api/lib/api_gen_utils.h"
#include "paddle/phi/api/lib/kernel_dispatch.h"
#include "paddle/phi/infermeta/unary.h"
namespace paddle {
namespace experimental {
// declare cast api
Tensor cast(const Tensor &x, DataType out_dtype);
Tensor copy_to(const Tensor &x, Backend backend, bool blocking);
......@@ -67,12 +70,18 @@ template PADDLE_API Tensor Tensor::copy_to<phi::dtype::complex<double>>(
template PADDLE_API Tensor
Tensor::copy_to<phi::dtype::float16>(const PlaceType &target_place) const;
void Tensor::copy_(const Tensor &src, bool blocking) {
void Tensor::copy_(const Tensor &src,
const phi::Place &target_place,
bool blocking) {
if (!src.is_initialized()) {
VLOG(8) << "Src is empty, skip copy";
return;
}
// Prepare copy kernel key and outputs
auto kernel_key_set = ParseKernelKeyByInputArgs(src);
KernelType kernel_type = ParseKernelTypeByInputArgs(src);
VLOG(3) << "Deep copy Tensor from " << src.name() << " to " << name();
if (defined()) {
if (is_initialized()) {
PADDLE_ENFORCE_EQ(dtype(),
src.dtype(),
platform::errors::PreconditionNotMet(
......@@ -87,10 +96,91 @@ void Tensor::copy_(const Tensor &src, bool blocking) {
"Copy cannot be performed!",
name(),
src.name()));
PADDLE_ENFORCE_EQ(target_place,
inner_place(),
platform::errors::PreconditionNotMet(
"Place is different of dst tensor and args %s, which "
"current tensor holds %s "
"Copy cannot be performed!",
target_place.DebugString(),
inner_place().DebugString()));
kernel_key_set.backend_set =
kernel_key_set.backend_set |
BackendSet(phi::TransToPhiBackend(inner_place()));
} else {
// Deep Copy AutoGrad info from src to self.
*autograd_meta_ = *(src.autograd_meta_);
}
auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey();
auto *dev_ctx = GetDeviceContextByBackend(kernel_key.backend());
Backend kernel_backend = Backend::UNDEFINED;
DataLayout kernel_layout = DataLayout::UNDEFINED;
DataType kernel_data_type = DataType::UNDEFINED;
if (kernel_backend == Backend::UNDEFINED ||
kernel_layout == DataLayout::UNDEFINED ||
kernel_data_type == DataType::UNDEFINED) {
if (kernel_backend == Backend::UNDEFINED) {
kernel_backend = kernel_key.backend();
}
if (kernel_layout == DataLayout::UNDEFINED) {
kernel_layout = kernel_key.layout();
}
if (kernel_data_type == DataType::UNDEFINED) {
kernel_data_type = kernel_key.dtype();
}
}
if (kernel_type == KernelType::DENSE_TENSOR_KENREL) {
auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
"copy", {kernel_backend, kernel_layout, kernel_data_type});
VLOG(6) << "copy API kernel key: " << kernel_key;
VLOG(6) << "copy API kernel: " << kernel;
using kernel_signature = void (*)(const platform::DeviceContext &,
const phi::DenseTensor &,
phi::Place,
bool,
phi::DenseTensor *);
SetKernelOutput(kernel_backend, this);
phi::MetaTensor meta_out(impl_.get());
phi::UnchangedInferMeta(
MakeMetaTensor(
*(std::static_pointer_cast<phi::DenseTensor>(src.impl_))),
&meta_out);
auto *kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();
(*kernel_fn)(*dev_ctx,
(*(std::static_pointer_cast<phi::DenseTensor>(src.impl_))),
target_place,
blocking,
static_cast<phi::DenseTensor *>(impl_.get()));
} else if (kernel_type == KernelType::SELECTED_ROWS_KENREL) {
auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
"copy_sr", {kernel_backend, kernel_layout, kernel_data_type});
VLOG(6) << "copy API kernel key: " << kernel_key;
VLOG(6) << "copy API kernel: " << kernel;
using kernel_signature = void (*)(const platform::DeviceContext &,
const phi::SelectedRows &,
phi::Place,
bool,
phi::SelectedRows *);
SetSelectedRowsKernelOutput(kernel_backend, this);
phi::MetaTensor meta_out(impl_.get());
phi::UnchangedInferMeta(
MakeMetaTensor(
*(std::static_pointer_cast<phi::SelectedRows>(src.impl_))),
&meta_out);
auto *kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();
(*kernel_fn)(*dev_ctx,
(*(std::static_pointer_cast<phi::SelectedRows>(src.impl_))),
target_place,
blocking,
static_cast<phi::SelectedRows *>(impl_.get()));
} else {
PADDLE_THROW(paddle::platform::errors::InvalidArgument(
"We currently only support dense tensor copy for now and if u need to "
"copy selected rows please raise a issue."));
}
auto copy_tensor =
src.copy_to(phi::TransToPhiBackend(src.inner_place()), blocking);
set_impl(copy_tensor.impl());
}
} // namespace experimental
......
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/kernels/selected_rows/copy_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/bfloat16.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/copy_kernel.h"
namespace phi {
namespace sr {
template <typename Context>
void Copy(const Context& dev_ctx,
const SelectedRows& src,
Place dst_place,
bool blocking,
SelectedRows* dst) {
if (src.value().Holder() != dst->value().Holder() ||
src.value().data() != dst->value().data()) {
dst->set_rows(src.rows());
dst->set_height(src.height());
}
phi::Copy<Context>(
dev_ctx, src.value(), dst_place, blocking, dst->mutable_value());
}
} // namespace sr
} // namespace phi
PD_REGISTER_GENERAL_KERNEL(
copy_sr, CPU, ALL_LAYOUT, phi::sr::Copy<phi::CPUContext>, ALL_DTYPE) {}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PD_REGISTER_GENERAL_KERNEL(
copy_sr, GPU, ALL_LAYOUT, phi::sr::Copy<phi::GPUContext>, ALL_DTYPE) {}
#endif
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/selected_rows.h"
#include "paddle/phi/core/sparse_csr_tensor.h"
namespace phi {
namespace sr {
template <typename Context>
void Copy(const Context& dev_ctx,
const SelectedRows& src,
Place dst_place,
bool blocking,
SelectedRows* dst);
} // namespace sr
} // namespace phi
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册