未验证 提交 057cdb95 编写于 作者: E engineer1109 提交者: GitHub

decouple tensor_utils (#50264)

fix X

remove TensorCopy

codestyle

add fluid memory header

fix symbol

fix cmake

fix cmake

fix context

fix header

fix place

fix context

fix context

fix context

fix code

fix custom context

fix custom context

fix copy

fix data_transform

fix style

remove changes of custom

fix scalar
上级 fcb746cb
...@@ -22,8 +22,6 @@ limitations under the License. */ ...@@ -22,8 +22,6 @@ limitations under the License. */
#include "paddle/phi/kernels/cast_kernel.h" #include "paddle/phi/kernels/cast_kernel.h"
#include "paddle/phi/kernels/transfer_layout_kernel.h" #include "paddle/phi/kernels/transfer_layout_kernel.h"
#include "paddle/fluid/framework/tensor_util.h"
namespace paddle { namespace paddle {
namespace experimental { namespace experimental {
...@@ -169,8 +167,8 @@ inline phi::DenseTensor TransDataPlace(const phi::DenseTensor& tensor, ...@@ -169,8 +167,8 @@ inline phi::DenseTensor TransDataPlace(const phi::DenseTensor& tensor,
VLOG(3) << "DeviceTransform in, src_place " << tensor.place() VLOG(3) << "DeviceTransform in, src_place " << tensor.place()
<< " dst_place: " << dst_place; << " dst_place: " << dst_place;
auto& pool = phi::DeviceContextPool::Instance();
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
auto& pool = paddle::platform::DeviceContextPool::Instance();
// NOTE(yy): TransDataPlace should wait for computation of input. // NOTE(yy): TransDataPlace should wait for computation of input.
if (!platform::is_cuda_pinned_place(tensor.place())) { if (!platform::is_cuda_pinned_place(tensor.place())) {
pool.Get(tensor.place())->Wait(); pool.Get(tensor.place())->Wait();
...@@ -188,7 +186,13 @@ inline phi::DenseTensor TransDataPlace(const phi::DenseTensor& tensor, ...@@ -188,7 +186,13 @@ inline phi::DenseTensor TransDataPlace(const phi::DenseTensor& tensor,
// But the embarrassment is that this solution this solution makes training // But the embarrassment is that this solution this solution makes training
// slower. // slower.
phi::DenseTensor out; phi::DenseTensor out;
paddle::framework::TensorCopySync(tensor, dst_place, &out); phi::DeviceContext* dev_ctx;
if (dst_place.GetType() != AllocationType::CPU) {
dev_ctx = pool.Get(dst_place);
} else {
dev_ctx = pool.Get(tensor.place());
}
phi::Copy(*dev_ctx, tensor, dst_place, true, &out);
return out; return out;
} }
......
...@@ -63,7 +63,7 @@ if(WITH_CUSTOM_DEVICE) ...@@ -63,7 +63,7 @@ if(WITH_CUSTOM_DEVICE)
cc_test( cc_test(
custom_device_test custom_device_test
SRCS custom/custom_device_test.cc SRCS custom/custom_device_test.cc
DEPS phi_backends phi_device_context gradient_accumulator) DEPS phi_tensor_utils phi_backends phi_device_context gradient_accumulator)
cc_test( cc_test(
capi_test capi_test
SRCS custom/capi_test.cc SRCS custom/capi_test.cc
......
...@@ -15,8 +15,8 @@ endif() ...@@ -15,8 +15,8 @@ endif()
cc_library( cc_library(
scalar scalar
SRCS scalar.cc SRCS scalar.cc
DEPS phi_enforce tensor) DEPS phi_enforce phi_tensor_utils)
cc_library( cc_library(
int_array int_array
SRCS int_array.cc SRCS int_array.cc
DEPS phi_enforce tensor) DEPS phi_enforce phi_tensor_utils)
...@@ -14,8 +14,10 @@ limitations under the License. */ ...@@ -14,8 +14,10 @@ limitations under the License. */
#include "paddle/phi/common/int_array.h" #include "paddle/phi/common/int_array.h"
#include "paddle/fluid/framework/tensor_util.h" #include "paddle/phi/backends/all_context.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/common/place.h" #include "paddle/phi/common/place.h"
#include "paddle/phi/core/tensor_utils.h"
namespace paddle { namespace paddle {
namespace experimental { namespace experimental {
...@@ -28,7 +30,9 @@ IntArrayBase<phi::DenseTensor>::IntArrayBase( ...@@ -28,7 +30,9 @@ IntArrayBase<phi::DenseTensor>::IntArrayBase(
AssignDataFromTensor(tensor); AssignDataFromTensor(tensor);
} else { } else {
phi::DenseTensor tensor_tmp; phi::DenseTensor tensor_tmp;
paddle::framework::TensorCopySync(tensor, CPUPlace(), &tensor_tmp); phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance();
auto dev_ctx = pool.Get(tensor.place());
phi::Copy(*dev_ctx, tensor, CPUPlace(), true, &tensor_tmp);
AssignDataFromTensor(tensor_tmp); AssignDataFromTensor(tensor_tmp);
} }
} }
...@@ -45,8 +49,9 @@ IntArrayBase<phi::DenseTensor>::IntArrayBase( ...@@ -45,8 +49,9 @@ IntArrayBase<phi::DenseTensor>::IntArrayBase(
array_.push_back(*tensor_list[i].template data<int32_t>()); array_.push_back(*tensor_list[i].template data<int32_t>());
} else { } else {
phi::DenseTensor tensor_tmp; phi::DenseTensor tensor_tmp;
paddle::framework::TensorCopySync( phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance();
tensor_list[i], CPUPlace(), &tensor_tmp); auto dev_ctx = pool.Get(tensor_list[i].place());
phi::Copy(*dev_ctx, tensor_list[i], CPUPlace(), true, &tensor_tmp);
array_.push_back(*tensor_tmp.template data<int32_t>()); array_.push_back(*tensor_tmp.template data<int32_t>());
} }
break; break;
...@@ -55,8 +60,9 @@ IntArrayBase<phi::DenseTensor>::IntArrayBase( ...@@ -55,8 +60,9 @@ IntArrayBase<phi::DenseTensor>::IntArrayBase(
array_.push_back(*tensor_list[i].template data<int64_t>()); array_.push_back(*tensor_list[i].template data<int64_t>());
} else { } else {
phi::DenseTensor tensor_tmp; phi::DenseTensor tensor_tmp;
paddle::framework::TensorCopySync( phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance();
tensor_list[i], CPUPlace(), &tensor_tmp); auto dev_ctx = pool.Get(tensor_list[i].place());
phi::Copy(*dev_ctx, tensor_list[i], CPUPlace(), true, &tensor_tmp);
array_.push_back(*tensor_tmp.template data<int64_t>()); array_.push_back(*tensor_tmp.template data<int64_t>());
} }
break; break;
......
...@@ -14,9 +14,11 @@ limitations under the License. */ ...@@ -14,9 +14,11 @@ limitations under the License. */
#include "paddle/phi/common/scalar.h" #include "paddle/phi/common/scalar.h"
#include "paddle/fluid/framework/tensor_util.h" #include "paddle/phi/backends/all_context.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/common/place.h" #include "paddle/phi/common/place.h"
#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/tensor_utils.h"
namespace paddle { namespace paddle {
namespace experimental { namespace experimental {
...@@ -31,9 +33,11 @@ ScalarBase<phi::DenseTensor>::ScalarBase(const phi::DenseTensor& tensor_in) ...@@ -31,9 +33,11 @@ ScalarBase<phi::DenseTensor>::ScalarBase(const phi::DenseTensor& tensor_in)
"now Tensor has `%d` elements", "now Tensor has `%d` elements",
tensor_in.numel())); tensor_in.numel()));
auto cpu_place = phi::CPUPlace(); auto cpu_place = phi::CPUPlace();
if (!paddle::platform::is_same_place(tensor_in.place(), cpu_place)) { if (tensor_in.place().GetType() != phi::AllocationType::CPU) {
phi::DenseTensor tensor; phi::DenseTensor tensor;
framework::TensorCopySync(tensor_in, cpu_place, &tensor); phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance();
auto dev_ctx = pool.Get(tensor_in.place());
phi::Copy(*dev_ctx, tensor_in, cpu_place, true, &tensor);
GetDataFromTensor(tensor); GetDataFromTensor(tensor);
} else { } else {
GetDataFromTensor(tensor_in); GetDataFromTensor(tensor_in);
......
...@@ -36,7 +36,7 @@ void Copy(const Context& dev_ctx, ...@@ -36,7 +36,7 @@ void Copy(const Context& dev_ctx,
const auto& src_place = src.place(); const auto& src_place = src.place();
if (&src == dst) { if (&src == dst) {
if (paddle::platform::is_same_place(src_place, dst_place)) { if (src_place.GetType() == dst_place.GetType()) {
VLOG(6) << "Skip copy the same data(" << src_ptr << ") from " << src_place VLOG(6) << "Skip copy the same data(" << src_ptr << ") from " << src_place
<< " to " << dst_place; << " to " << dst_place;
} else { } else {
...@@ -54,24 +54,24 @@ void Copy(const Context& dev_ctx, ...@@ -54,24 +54,24 @@ void Copy(const Context& dev_ctx,
dst->Resize(src.dims()); dst->Resize(src.dims());
void* dst_ptr = nullptr; void* dst_ptr = nullptr;
if (paddle::platform::is_cpu_place(dst_place)) { if (dst_place.GetType() == AllocationType::CPU) {
dst_ptr = dev_ctx.HostAlloc(dst, src.dtype()); dst_ptr = dev_ctx.HostAlloc(dst, src.dtype());
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
dst->set_layout(src.layout()); dst->set_layout(src.layout());
#endif #endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
} else if (paddle::platform::is_gpu_place(dst_place) || } else if (dst_place.GetType() == AllocationType::GPU ||
paddle::platform::is_cuda_pinned_place(dst_place)) { dst_place.GetType() == AllocationType::GPUPINNED) {
dst_ptr = dev_ctx.Alloc( dst_ptr = dev_ctx.Alloc(
dst, src.dtype(), 0, paddle::platform::is_cuda_pinned_place(dst_place)); dst, src.dtype(), 0, dst_place.GetType() == AllocationType::GPUPINNED);
#endif #endif
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
} else if (paddle::platform::is_xpu_place(dst_place)) { } else if (dst_place.GetType() == AllocationType::XPU) {
dst_ptr = dev_ctx.Alloc(dst, src.dtype()); dst_ptr = dev_ctx.Alloc(dst, src.dtype());
#endif #endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE #ifdef PADDLE_WITH_CUSTOM_DEVICE
} else if (paddle::platform::is_custom_place(dst_place)) { } else if (dst_place.GetType() == AllocationType::CUSTOM) {
dst_ptr = dev_ctx.Alloc(dst, src.dtype()); dst_ptr = dev_ctx.Alloc(dst, src.dtype());
#endif #endif
} }
...@@ -98,22 +98,22 @@ void Copy(const Context& dev_ctx, ...@@ -98,22 +98,22 @@ void Copy(const Context& dev_ctx,
VLOG(4) << "src:" << src_ptr << ", dst:" << dst_ptr; VLOG(4) << "src:" << src_ptr << ", dst:" << dst_ptr;
CHECK(dst->layout() == src.layout()); CHECK(dst->layout() == src.layout());
if (paddle::platform::is_cpu_place(src_place) && if (src_place.GetType() == AllocationType::CPU &&
paddle::platform::is_cpu_place(dst_place)) { dst_place.GetType() == AllocationType::CPU) {
paddle::memory::Copy(src_place, dst_ptr, src_place, src_ptr, size); paddle::memory::Copy(src_place, dst_ptr, src_place, src_ptr, size);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
} else if ((paddle::platform::is_cpu_place(src_place) || } else if ((src_place.GetType() == AllocationType::CPU ||
paddle::platform::is_cuda_pinned_place(src_place)) && // NOLINT src_place.GetType() == AllocationType::GPUPINNED) && // NOLINT
(paddle::platform::is_cpu_place(dst_place) || (dst_place.GetType() == AllocationType::CPU ||
paddle::platform::is_cuda_pinned_place(dst_place))) { dst_place.GetType() == AllocationType::GPUPINNED)) {
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size, nullptr); paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size, nullptr);
} else if (paddle::platform::is_gpu_place(src_place) && // NOLINT } else if (src_place.GetType() == AllocationType::GPU && // NOLINT
paddle::platform::is_cpu_place(dst_place)) { dst_place.GetType() == AllocationType::CPU) {
auto src_gpu_place = src_place; auto src_gpu_place = src_place;
auto dst_cpu_place = dst_place; auto dst_cpu_place = dst_place;
auto ctx_place = dev_ctx.GetPlace(); auto ctx_place = dev_ctx.GetPlace();
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
paddle::platform::is_gpu_place(ctx_place), ctx_place.GetType() == AllocationType::GPU,
true, true,
errors::PreconditionNotMet( errors::PreconditionNotMet(
"Context place error, excepted GPUPlace, but actually %s.", "Context place error, excepted GPUPlace, but actually %s.",
...@@ -131,14 +131,14 @@ void Copy(const Context& dev_ctx, ...@@ -131,14 +131,14 @@ void Copy(const Context& dev_ctx,
: reinterpret_cast<const phi::GPUContext&>(dev_ctx).stream(); : reinterpret_cast<const phi::GPUContext&>(dev_ctx).stream();
paddle::memory::Copy( paddle::memory::Copy(
dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream); dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream);
} else if ((paddle::platform::is_cpu_place(src_place) || } else if ((src_place.GetType() == AllocationType::CPU ||
paddle::platform::is_cuda_pinned_place(src_place)) && // NOLINT src_place.GetType() == AllocationType::GPUPINNED) && // NOLINT
paddle::platform::is_gpu_place(dst_place)) { dst_place.GetType() == AllocationType::GPU) {
auto src_cpu_place = src_place; auto src_cpu_place = src_place;
auto dst_gpu_place = dst_place; auto dst_gpu_place = dst_place;
auto ctx_place = dev_ctx.GetPlace(); auto ctx_place = dev_ctx.GetPlace();
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
paddle::platform::is_gpu_place(ctx_place), ctx_place.GetType() == AllocationType::GPU,
true, true,
errors::PreconditionNotMet( errors::PreconditionNotMet(
"Context place error, excepted GPUPlace, but actually %s.", "Context place error, excepted GPUPlace, but actually %s.",
...@@ -156,13 +156,13 @@ void Copy(const Context& dev_ctx, ...@@ -156,13 +156,13 @@ void Copy(const Context& dev_ctx,
: reinterpret_cast<const phi::GPUContext&>(dev_ctx).stream(); : reinterpret_cast<const phi::GPUContext&>(dev_ctx).stream();
paddle::memory::Copy( paddle::memory::Copy(
dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size, stream); dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size, stream);
} else if (paddle::platform::is_gpu_place(src_place) && // NOLINT } else if (src_place.GetType() == AllocationType::GPU && // NOLINT
paddle::platform::is_gpu_place(dst_place)) { dst_place.GetType() == AllocationType::GPU) {
auto src_gpu_place = src_place; auto src_gpu_place = src_place;
auto dst_gpu_place = dst_place; auto dst_gpu_place = dst_place;
auto ctx_place = dev_ctx.GetPlace(); auto ctx_place = dev_ctx.GetPlace();
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
paddle::platform::is_gpu_place(ctx_place), ctx_place.GetType() == AllocationType::GPU,
true, true,
errors::PreconditionNotMet( errors::PreconditionNotMet(
"Context place error, excepted GPUPlace, but actually %s.", "Context place error, excepted GPUPlace, but actually %s.",
...@@ -170,20 +170,16 @@ void Copy(const Context& dev_ctx, ...@@ -170,20 +170,16 @@ void Copy(const Context& dev_ctx,
auto stream = auto stream =
blocking ? nullptr blocking ? nullptr
: reinterpret_cast<const phi::GPUContext&>(dev_ctx).stream(); : reinterpret_cast<const phi::GPUContext&>(dev_ctx).stream();
if (paddle::platform::is_same_place(src_place, dst_place)) { if (src_place.GetType() == dst_place.GetType()) {
paddle::memory::Copy( paddle::memory::Copy(
dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream); dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream);
} else { } else {
if (paddle::platform::is_same_place(ctx_place, src_place)) { if (ctx_place.GetType() == src_place.GetType()) {
paddle::memory::Copy( paddle::memory::Copy(
dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream); dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream);
paddle::platform::DeviceContextPool::Instance() phi::DeviceContextPool::Instance().Get(src.place())->Wait();
.Get(src.place()) } else if (ctx_place.GetType() == dst_place.GetType()) {
->Wait(); phi::DeviceContextPool::Instance().Get(src.place())->Wait();
} else if (paddle::platform::is_same_place(ctx_place, dst_place)) {
paddle::platform::DeviceContextPool::Instance()
.Get(src.place())
->Wait();
paddle::memory::Copy( paddle::memory::Copy(
dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream); dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream);
} else { } else {
...@@ -191,13 +187,13 @@ void Copy(const Context& dev_ctx, ...@@ -191,13 +187,13 @@ void Copy(const Context& dev_ctx,
"Context place dose not match the source and destination place.")); "Context place dose not match the source and destination place."));
} }
} }
} else if (paddle::platform::is_gpu_place(src_place) && // NOLINT } else if (src_place.GetType() == AllocationType::GPU && // NOLINT
paddle::platform::is_cuda_pinned_place(dst_place)) { dst_place.GetType() == AllocationType::GPUPINNED) {
auto src_gpu_place = src_place; auto src_gpu_place = src_place;
auto dst_cuda_pinned_place = dst_place; auto dst_cuda_pinned_place = dst_place;
auto ctx_place = dev_ctx.GetPlace(); auto ctx_place = dev_ctx.GetPlace();
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
paddle::platform::is_gpu_place(ctx_place), ctx_place.GetType() == AllocationType::GPU,
true, true,
errors::PreconditionNotMet( errors::PreconditionNotMet(
"Context place error, excepted GPUPlace, but actually %s.", "Context place error, excepted GPUPlace, but actually %s.",
...@@ -217,14 +213,14 @@ void Copy(const Context& dev_ctx, ...@@ -217,14 +213,14 @@ void Copy(const Context& dev_ctx,
dst_cuda_pinned_place, dst_ptr, src_gpu_place, src_ptr, size, stream); dst_cuda_pinned_place, dst_ptr, src_gpu_place, src_ptr, size, stream);
#endif #endif
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
} else if (paddle::platform::is_xpu_place(src_place) && // NOLINT } else if (src_place.GetType() == AllocationType::XPU && // NOLINT
paddle::platform::is_cpu_place(dst_place)) { dst_place.GetType() == AllocationType::CPU) {
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
} else if (paddle::platform::is_cpu_place(src_place) && } else if (src_place.GetType() == AllocationType::CPU &&
paddle::platform::is_xpu_place(dst_place)) { dst_place.GetType() == AllocationType::XPU) {
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
} else if (paddle::platform::is_xpu_place(src_place) && } else if (src_place.GetType() == AllocationType::XPU &&
paddle::platform::is_xpu_place(dst_place)) { dst_place.GetType() == AllocationType::XPU) {
if (src_ptr == dst_ptr) { if (src_ptr == dst_ptr) {
VLOG(3) << "Skip copy the same data async from " << src_place << " to " VLOG(3) << "Skip copy the same data async from " << src_place << " to "
<< dst_place; << dst_place;
...@@ -233,32 +229,26 @@ void Copy(const Context& dev_ctx, ...@@ -233,32 +229,26 @@ void Copy(const Context& dev_ctx,
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
#endif #endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE #ifdef PADDLE_WITH_CUSTOM_DEVICE
} else if (paddle::platform::is_custom_place(src_place) && // NOLINT } else if (src_place.GetType() == AllocationType::CUSTOM && // NOLINT
paddle::platform::is_cpu_place(dst_place)) { dst_place.GetType() == AllocationType::CPU) {
auto stream = auto stream =
blocking blocking
? nullptr ? nullptr
: reinterpret_cast<const paddle::platform::CustomDeviceContext&>( : reinterpret_cast<const phi::CustomContext&>(dev_ctx).stream();
dev_ctx)
.stream();
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size, stream); paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size, stream);
} else if (paddle::platform::is_cpu_place(src_place) && // NOLINT } else if (src_place.GetType() == AllocationType::CPU && // NOLINT
paddle::platform::is_custom_place(dst_place)) { dst_place.GetType() == AllocationType::CUSTOM) {
auto stream = auto stream =
blocking blocking
? nullptr ? nullptr
: reinterpret_cast<const paddle::platform::CustomDeviceContext&>( : reinterpret_cast<const phi::CustomContext&>(dev_ctx).stream();
dev_ctx)
.stream();
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size, stream); paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size, stream);
} else if (paddle::platform::is_custom_place(src_place) && // NOLINT } else if (src_place.GetType() == AllocationType::CUSTOM && // NOLINT
paddle::platform::is_custom_place(dst_place)) { dst_place.GetType() == AllocationType::CUSTOM) {
auto stream = auto stream =
blocking blocking
? nullptr ? nullptr
: reinterpret_cast<const paddle::platform::CustomDeviceContext&>( : reinterpret_cast<const phi::CustomContext&>(dev_ctx).stream();
dev_ctx)
.stream();
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size, stream); paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size, stream);
#endif #endif
} else { } else {
...@@ -435,11 +425,11 @@ void TensorFromVector(const std::vector<T>& src, ...@@ -435,11 +425,11 @@ void TensorFromVector(const std::vector<T>& src,
auto dst_ptr = static_cast<void*>(dst->data<T>()); auto dst_ptr = static_cast<void*>(dst->data<T>());
auto size = src.size() * sizeof(T); auto size = src.size() * sizeof(T);
if (paddle::platform::is_cpu_place(dst_place)) { if (dst_place.GetType() == AllocationType::CPU) {
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
} }
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
else if (paddle::platform::is_gpu_place(dst_place)) { // NOLINT else if (dst_place.GetType() == AllocationType::GPU) { // NOLINT
paddle::memory::Copy( paddle::memory::Copy(
dst_place, dst_place,
dst_ptr, dst_ptr,
...@@ -450,7 +440,7 @@ void TensorFromVector(const std::vector<T>& src, ...@@ -450,7 +440,7 @@ void TensorFromVector(const std::vector<T>& src,
} }
#endif #endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE #ifdef PADDLE_WITH_CUSTOM_DEVICE
else if (paddle::platform::is_custom_place(dst_place)) { // NOLINT else if (dst_place.GetType() == AllocationType::CUSTOM) { // NOLINT
paddle::memory::Copy( paddle::memory::Copy(
dst_place, dst_place,
dst_ptr, dst_ptr,
...@@ -461,7 +451,7 @@ void TensorFromVector(const std::vector<T>& src, ...@@ -461,7 +451,7 @@ void TensorFromVector(const std::vector<T>& src,
} }
#endif #endif
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
else if (paddle::platform::is_xpu_place(dst_place)) { // NOLINT else if (dst_place.GetType() == AllocationType::XPU) { // NOLINT
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
} }
#endif #endif
...@@ -490,11 +480,11 @@ void TensorFromVector(const std::vector<bool>& src, ...@@ -490,11 +480,11 @@ void TensorFromVector(const std::vector<bool>& src,
auto dst_ptr = ctx.template Alloc<bool>(dst); auto dst_ptr = ctx.template Alloc<bool>(dst);
auto size = src.size() * sizeof(bool); auto size = src.size() * sizeof(bool);
if (paddle::platform::is_cpu_place(dst_place)) { if (dst_place.GetType() == AllocationType::CPU) {
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
} }
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
else if (paddle::platform::is_gpu_place(dst_place)) { // NOLINT else if (dst_place.GetType() == AllocationType::GPU) { // NOLINT
paddle::memory::Copy( paddle::memory::Copy(
dst_place, dst_place,
dst_ptr, dst_ptr,
...@@ -505,13 +495,13 @@ void TensorFromVector(const std::vector<bool>& src, ...@@ -505,13 +495,13 @@ void TensorFromVector(const std::vector<bool>& src,
} }
#endif #endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE #ifdef PADDLE_WITH_CUSTOM_DEVICE
else if (paddle::platform::is_custom_place(dst_place)) { // NOLINT else if (dst_place.GetType() == AllocationType::CUSTOM) { // NOLINT
auto stream = reinterpret_cast<const phi::CustomContext&>(ctx).stream(); auto stream = reinterpret_cast<const phi::CustomContext&>(ctx).stream();
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size, stream); paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size, stream);
} }
#endif #endif
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
else if (paddle::platform::is_xpu_place(dst_place)) { // NOLINT else if (dst_place.GetType() == AllocationType::XPU) { // NOLINT
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
} }
#endif #endif
...@@ -583,11 +573,11 @@ void TensorFromArray(const T* src, ...@@ -583,11 +573,11 @@ void TensorFromArray(const T* src,
auto dst_ptr = static_cast<void*>(dst->data<T>()); auto dst_ptr = static_cast<void*>(dst->data<T>());
auto size = array_size * sizeof(T); auto size = array_size * sizeof(T);
if (paddle::platform::is_cpu_place(dst_place)) { if (dst_place.GetType() == AllocationType::CPU) {
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
} }
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
else if (paddle::platform::is_gpu_place(dst_place)) { // NOLINT else if (dst_place.GetType() == AllocationType::GPU) { // NOLINT
paddle::memory::Copy( paddle::memory::Copy(
dst_place, dst_place,
dst_ptr, dst_ptr,
...@@ -598,7 +588,7 @@ void TensorFromArray(const T* src, ...@@ -598,7 +588,7 @@ void TensorFromArray(const T* src,
} }
#endif #endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE #ifdef PADDLE_WITH_CUSTOM_DEVICE
else if (paddle::platform::is_custom_place(dst_place)) { // NOLINT else if (dst_place.GetType() == AllocationType::CUSTOM) { // NOLINT
paddle::memory::Copy( paddle::memory::Copy(
dst_place, dst_place,
dst_ptr, dst_ptr,
...@@ -609,7 +599,7 @@ void TensorFromArray(const T* src, ...@@ -609,7 +599,7 @@ void TensorFromArray(const T* src,
} }
#endif #endif
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
else if (paddle::platform::is_xpu_place(dst_place)) { // NOLINT else if (dst_place.GetType() == AllocationType::XPU) { // NOLINT
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
} }
#endif #endif
...@@ -684,11 +674,11 @@ void TensorToVector(const phi::DenseTensor& src, ...@@ -684,11 +674,11 @@ void TensorToVector(const phi::DenseTensor& src,
dst->resize(src.numel()); dst->resize(src.numel());
auto dst_ptr = static_cast<void*>(dst->data()); auto dst_ptr = static_cast<void*>(dst->data());
if (paddle::platform::is_cpu_place(src.place())) { if (src.place().GetType() == AllocationType::CPU) {
paddle::memory::Copy(dst_place, dst_ptr, src.place(), src_ptr, size); paddle::memory::Copy(dst_place, dst_ptr, src.place(), src_ptr, size);
} }
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
else if (paddle::platform::is_gpu_place(src.place())) { // NOLINT else if (src.place().GetType() == AllocationType::GPU) { // NOLINT
paddle::memory::Copy( paddle::memory::Copy(
dst_place, dst_place,
dst_ptr, dst_ptr,
...@@ -699,12 +689,12 @@ void TensorToVector(const phi::DenseTensor& src, ...@@ -699,12 +689,12 @@ void TensorToVector(const phi::DenseTensor& src,
} }
#endif #endif
#if defined(PADDLE_WITH_XPU) #if defined(PADDLE_WITH_XPU)
else if (paddle::platform::is_xpu_place(src.place())) { // NOLINT else if (src.place().GetType() == AllocationType::XPU) { // NOLINT
paddle::memory::Copy(dst_place, dst_ptr, src.place(), src_ptr, size); paddle::memory::Copy(dst_place, dst_ptr, src.place(), src_ptr, size);
} }
#endif #endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE #ifdef PADDLE_WITH_CUSTOM_DEVICE
else if (paddle::platform::is_custom_place(src.place())) { // NOLINT else if (src.place().GetType() == AllocationType::CUSTOM) { // NOLINT
paddle::memory::Copy( paddle::memory::Copy(
dst_place, dst_ptr, src.place(), src_ptr, size, nullptr); dst_place, dst_ptr, src.place(), src_ptr, size, nullptr);
} }
...@@ -728,11 +718,11 @@ void TensorToVector(const phi::DenseTensor& src, ...@@ -728,11 +718,11 @@ void TensorToVector(const phi::DenseTensor& src,
dst->resize(src.numel()); dst->resize(src.numel());
auto dst_ptr = static_cast<void*>(array); auto dst_ptr = static_cast<void*>(array);
if (paddle::platform::is_cpu_place(src.place())) { if (src.place().GetType() == AllocationType::CPU) {
paddle::memory::Copy(dst_place, dst_ptr, src.place(), src_ptr, size); paddle::memory::Copy(dst_place, dst_ptr, src.place(), src_ptr, size);
} }
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
else if (paddle::platform::is_gpu_place(src.place())) { // NOLINT else if (src.place().GetType() == AllocationType::GPU) { // NOLINT
paddle::memory::Copy( paddle::memory::Copy(
dst_place, dst_place,
dst_ptr, dst_ptr,
...@@ -743,12 +733,12 @@ void TensorToVector(const phi::DenseTensor& src, ...@@ -743,12 +733,12 @@ void TensorToVector(const phi::DenseTensor& src,
} }
#endif #endif
#if defined(PADDLE_WITH_XPU) #if defined(PADDLE_WITH_XPU)
else if (paddle::platform::is_xpu_place(src.place())) { // NOLINT else if (src.place().GetType() == AllocationType::XPU) { // NOLINT
paddle::memory::Copy(dst_place, dst_ptr, src.place(), src_ptr, size); paddle::memory::Copy(dst_place, dst_ptr, src.place(), src_ptr, size);
} }
#endif #endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE #ifdef PADDLE_WITH_CUSTOM_DEVICE
else if (paddle::platform::is_custom_place(src.place())) { // NOLINT else if (src.place().GetType() == AllocationType::CUSTOM) { // NOLINT
paddle::memory::Copy( paddle::memory::Copy(
dst_place, dst_ptr, src.place(), src_ptr, size, nullptr); dst_place, dst_ptr, src.place(), src_ptr, size, nullptr);
} }
...@@ -805,7 +795,7 @@ void TensorToVector(const phi::DenseTensor& src, std::vector<T>* dst) { ...@@ -805,7 +795,7 @@ void TensorToVector(const phi::DenseTensor& src, std::vector<T>* dst) {
auto dst_ptr = static_cast<void*>(dst->data()); auto dst_ptr = static_cast<void*>(dst->data());
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
paddle::platform::is_cpu_place(src.place()), src.place().GetType() == AllocationType::CPU,
true, true,
phi::errors::InvalidArgument( phi::errors::InvalidArgument(
"The input tensor should be CPU device, but actually it is in %s.", "The input tensor should be CPU device, but actually it is in %s.",
...@@ -821,12 +811,12 @@ void TensorToVector(const phi::DenseTensor& src, std::vector<bool>* dst) { ...@@ -821,12 +811,12 @@ void TensorToVector(const phi::DenseTensor& src, std::vector<bool>* dst) {
bool* array = new bool[src.numel()]; bool* array = new bool[src.numel()];
paddle::platform::CPUPlace dst_place{}; phi::CPUPlace dst_place{};
dst->resize(src.numel()); dst->resize(src.numel());
auto dst_ptr = static_cast<void*>(array); auto dst_ptr = static_cast<void*>(array);
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
paddle::platform::is_cpu_place(src.place()), src.place().GetType() == AllocationType::CPU,
true, true,
phi::errors::InvalidArgument( phi::errors::InvalidArgument(
"The input tensor should be CPU device, but actually it is in %s.", "The input tensor should be CPU device, but actually it is in %s.",
...@@ -891,7 +881,7 @@ phi::DenseTensor ReshapeToMatrix(const phi::DenseTensor& src, ...@@ -891,7 +881,7 @@ phi::DenseTensor ReshapeToMatrix(const phi::DenseTensor& src,
template <typename T> template <typename T>
T GetValue(const phi::DenseTensor* x) { T GetValue(const phi::DenseTensor* x) {
T value = static_cast<T>(0); T value = static_cast<T>(0);
if (!paddle::platform::is_cpu_place(x->place())) { if (x->place().GetType() != AllocationType::CPU) {
phi::DenseTensor cpu_x{}; phi::DenseTensor cpu_x{};
phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance(); phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance();
phi::DeviceContext* dev_ctx = pool.Get(x->place()); phi::DeviceContext* dev_ctx = pool.Get(x->place());
......
...@@ -24,8 +24,6 @@ ...@@ -24,8 +24,6 @@
#include "paddle/phi/kernels/isfinite_kernel.h" #include "paddle/phi/kernels/isfinite_kernel.h"
#include "paddle/phi/kernels/reduce_all_kernel.h" #include "paddle/phi/kernels/reduce_all_kernel.h"
#include "paddle/fluid/framework/tensor_util.h"
namespace phi { namespace phi {
// Utils // Utils
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/batch_norm_kernel.h" #include "paddle/phi/kernels/batch_norm_kernel.h"
...@@ -163,7 +162,7 @@ void BatchNormGradRawKernel(const Context& ctx, ...@@ -163,7 +162,7 @@ void BatchNormGradRawKernel(const Context& ctx,
} }
if (d_x && (N * sample_size) == 1 && !use_global_stats) { if (d_x && (N * sample_size) == 1 && !use_global_stats) {
paddle::framework::TensorCopy(*d_y, ctx.GetPlace(), d_x); phi::Copy(ctx, *d_y, ctx.GetPlace(), false, d_x);
return; return;
} }
......
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
#include "paddle/phi/kernels/batch_norm_kernel.h" #include "paddle/phi/kernels/batch_norm_kernel.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/common.h"
...@@ -106,7 +105,7 @@ void BatchNormKernel(const Context& ctx, ...@@ -106,7 +105,7 @@ void BatchNormKernel(const Context& ctx,
if ((N * sample_size) == 1) { if ((N * sample_size) == 1) {
// Only 1 element in normalization dimension, // Only 1 element in normalization dimension,
// we skip the batch norm calculation, let y = x. // we skip the batch norm calculation, let y = x.
paddle::framework::TensorCopy(x, ctx.GetPlace(), y); phi::Copy(ctx, x, ctx.GetPlace(), false, y);
return; return;
} }
......
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
#include <vector> #include <vector>
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/phi/common/float16.h" #include "paddle/phi/common/float16.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/enforce.h"
......
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
#include "paddle/phi/kernels/cross_kernel.h" #include "paddle/phi/kernels/cross_kernel.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
......
...@@ -572,7 +572,7 @@ static void Interpolate1DCPUFwd( ...@@ -572,7 +572,7 @@ static void Interpolate1DCPUFwd(
dev_ctx.template Alloc<T>(output); dev_ctx.template Alloc<T>(output);
if (in_w == out_w) { if (in_w == out_w) {
paddle::framework::TensorCopy(x, dev_ctx.GetPlace(), output); phi::Copy(dev_ctx, x, dev_ctx.GetPlace(), false, output);
return; return;
} }
...@@ -702,7 +702,7 @@ static void Interpolate2DCPUFwd( ...@@ -702,7 +702,7 @@ static void Interpolate2DCPUFwd(
dev_ctx.template Alloc<T>(output); dev_ctx.template Alloc<T>(output);
if (in_h == out_h && in_w == out_w) { if (in_h == out_h && in_w == out_w) {
paddle::framework::TensorCopy(x, dev_ctx.GetPlace(), output); phi::Copy(dev_ctx, x, dev_ctx.GetPlace(), false, output);
return; return;
} }
...@@ -897,7 +897,7 @@ static void Interpolate3DCPUFwd( ...@@ -897,7 +897,7 @@ static void Interpolate3DCPUFwd(
dev_ctx.template Alloc<T>(output); dev_ctx.template Alloc<T>(output);
if (in_d == out_d && in_h == out_h && in_w == out_w) { if (in_d == out_d && in_h == out_h && in_w == out_w) {
paddle::framework::TensorCopy(x, dev_ctx.GetPlace(), output); phi::Copy(dev_ctx, x, dev_ctx.GetPlace(), false, output);
return; return;
} }
......
...@@ -23,7 +23,6 @@ ...@@ -23,7 +23,6 @@
#include "paddle/phi/backends/xpu/enforce_xpu.h" #include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/backends/xpu/xpu_header.h" #include "paddle/phi/backends/xpu/xpu_header.h"
// See Note [ Why still include the fluid headers? ] // See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memcpy.h"
#endif #endif
......
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
#pragma once #pragma once
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/phi/common/layout.h" #include "paddle/phi/common/layout.h"
#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/ddim.h"
#include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/common.h"
...@@ -83,8 +82,10 @@ inline std::vector<int> get_new_shape( ...@@ -83,8 +82,10 @@ inline std::vector<int> get_new_shape(
const std::vector<const DenseTensor*>& list_new_shape_tensor) { const std::vector<const DenseTensor*>& list_new_shape_tensor) {
// get tensor from // get tensor from
std::vector<int> vec_new_shape; std::vector<int> vec_new_shape;
auto& pool = phi::DeviceContextPool::Instance();
for (size_t i = 0; i < list_new_shape_tensor.size(); ++i) { for (size_t i = 0; i < list_new_shape_tensor.size(); ++i) {
auto tensor = list_new_shape_tensor[i]; auto tensor = list_new_shape_tensor[i];
phi::DeviceContext* dev_ctx = pool.Get(tensor->place());
PADDLE_ENFORCE_EQ(tensor->dims() == phi::make_ddim({1}) || PADDLE_ENFORCE_EQ(tensor->dims() == phi::make_ddim({1}) ||
tensor->dims() == phi::make_ddim({}), tensor->dims() == phi::make_ddim({}),
true, true,
...@@ -96,15 +97,14 @@ inline std::vector<int> get_new_shape( ...@@ -96,15 +97,14 @@ inline std::vector<int> get_new_shape(
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
if (tensor->place().GetType() == phi::AllocationType::XPU) { if (tensor->place().GetType() == phi::AllocationType::XPU) {
DenseTensor temp; DenseTensor temp;
paddle::framework::TensorCopySync(*tensor, phi::CPUPlace(), &temp); phi::Copy(*dev_ctx, *tensor, phi::CPUPlace(), true, &temp);
vec_new_shape.push_back(static_cast<int32_t>(*temp.data<int32_t>())); vec_new_shape.push_back(static_cast<int32_t>(*temp.data<int32_t>()));
continue; continue;
} }
#endif #endif
if (paddle::platform::is_gpu_place(tensor->place())) { if (tensor->place().GetType() == phi::AllocationType::GPU) {
DenseTensor temp; DenseTensor temp;
paddle::framework::TensorCopySync( phi::Copy(*dev_ctx, *tensor, phi::CPUPlace(), true, &temp);
*tensor, paddle::platform::CPUPlace(), &temp);
vec_new_shape.push_back(static_cast<int32_t>(*temp.data<int32_t>())); vec_new_shape.push_back(static_cast<int32_t>(*temp.data<int32_t>()));
} else { } else {
vec_new_shape.push_back(static_cast<int32_t>(*tensor->data<int32_t>())); vec_new_shape.push_back(static_cast<int32_t>(*tensor->data<int32_t>()));
...@@ -120,22 +120,24 @@ inline std::vector<T> get_new_data_from_tensor( ...@@ -120,22 +120,24 @@ inline std::vector<T> get_new_data_from_tensor(
std::vector<T> vec_new_data; std::vector<T> vec_new_data;
auto* new_data = new_data_tensor->data<T>(); auto* new_data = new_data_tensor->data<T>();
DenseTensor cpu_starts_tensor; DenseTensor cpu_starts_tensor;
auto& pool = phi::DeviceContextPool::Instance();
phi::DeviceContext* dev_ctx = pool.Get(new_data_tensor->place());
if (paddle::platform::is_gpu_place(new_data_tensor->place())) { if (paddle::platform::is_gpu_place(new_data_tensor->place())) {
paddle::framework::TensorCopySync( phi::Copy(
*new_data_tensor, paddle::platform::CPUPlace(), &cpu_starts_tensor); *dev_ctx, *new_data_tensor, phi::CPUPlace(), true, &cpu_starts_tensor);
new_data = cpu_starts_tensor.data<T>(); new_data = cpu_starts_tensor.data<T>();
} }
#ifdef PADDLE_WITH_ASCEND_CL #ifdef PADDLE_WITH_ASCEND_CL
if (paddle::platform::is_npu_place(new_data_tensor->place())) { if (paddle::platform::is_npu_place(new_data_tensor->place())) {
paddle::framework::TensorCopySync( phi::Copy(
*new_data_tensor, paddle::platform::CPUPlace(), &cpu_starts_tensor); *dev_ctx, *new_data_tensor, phi::CPUPlace(), true, &cpu_starts_tensor);
new_data = cpu_starts_tensor.data<T>(); new_data = cpu_starts_tensor.data<T>();
} }
#endif #endif
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
if (paddle::platform::is_xpu_place(new_data_tensor->place())) { if (paddle::platform::is_xpu_place(new_data_tensor->place())) {
paddle::framework::TensorCopySync( phi::Copy(
*new_data_tensor, paddle::platform::CPUPlace(), &cpu_starts_tensor); *dev_ctx, *new_data_tensor, phi::CPUPlace(), true, &cpu_starts_tensor);
new_data = cpu_starts_tensor.data<T>(); new_data = cpu_starts_tensor.data<T>();
} }
#endif #endif
......
...@@ -19,7 +19,6 @@ limitations under the License. */ ...@@ -19,7 +19,6 @@ limitations under the License. */
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/enforce.h"
......
...@@ -19,7 +19,6 @@ ...@@ -19,7 +19,6 @@
#include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/empty_kernel.h"
#include "paddle/phi/kernels/impl/amp_kernel_impl.h" #include "paddle/phi/kernels/impl/amp_kernel_impl.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/memory/memory.h" #include "paddle/fluid/memory/memory.h"
namespace phi { namespace phi {
......
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
#include <vector> #include <vector>
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/phi/common/float16.h" #include "paddle/phi/common/float16.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/enforce.h"
...@@ -86,8 +85,7 @@ void BroadcastTensorsGradKernel(const Context& ctx, ...@@ -86,8 +85,7 @@ void BroadcastTensorsGradKernel(const Context& ctx,
ctx.template Alloc<T>(output_tensor); ctx.template Alloc<T>(output_tensor);
if (just_copy) { if (just_copy) {
// Turns out to be a No-Op, simply copy tensors // Turns out to be a No-Op, simply copy tensors
paddle::framework::TensorCopy( phi::Copy(ctx, *input_tensor, ctx.GetPlace(), false, output_tensor);
*input_tensor, ctx.GetPlace(), ctx, output_tensor);
} else { } else {
// reduce_sum implementation on CUDA // reduce_sum implementation on CUDA
funcs::ReduceKernel<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>( funcs::ReduceKernel<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
......
...@@ -29,7 +29,7 @@ namespace cub = hipcub; ...@@ -29,7 +29,7 @@ namespace cub = hipcub;
#include <iterator> #include <iterator>
#include <random> #include <random>
#include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/memory/memcpy.h"
#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/core/tensor_utils.h"
......
...@@ -27,8 +27,8 @@ ...@@ -27,8 +27,8 @@
namespace cub = hipcub; namespace cub = hipcub;
#endif #endif
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/phi/core/generator.h" #include "paddle/phi/core/generator.h"
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/kernels/funcs/distribution_helper.h" #include "paddle/phi/kernels/funcs/distribution_helper.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
...@@ -103,7 +103,7 @@ struct OneHotGenerator<GPUContext, T> { ...@@ -103,7 +103,7 @@ struct OneHotGenerator<GPUContext, T> {
DenseTensor input_tensor; DenseTensor input_tensor;
input_tensor.Resize(out->dims()); input_tensor.Resize(out->dims());
ctx.template Alloc<T>(&input_tensor); ctx.template Alloc<T>(&input_tensor);
paddle::framework::TensorCopy(*out, ctx.GetPlace(), &input_tensor); phi::Copy(ctx, *out, ctx.GetPlace(), false, &input_tensor);
funcs::set_constant(ctx, out, 0.0); funcs::set_constant(ctx, out, 0.0);
OneHotCUDAKernel<T, thread_size> OneHotCUDAKernel<T, thread_size>
<<<block_size, thread_size, 0, ctx.stream()>>>( <<<block_size, thread_size, 0, ctx.stream()>>>(
......
...@@ -693,8 +693,7 @@ static void Interpolate1DCUDAFwd( ...@@ -693,8 +693,7 @@ static void Interpolate1DCUDAFwd(
} }
if (out_size) { if (out_size) {
DenseTensor sizes; DenseTensor sizes;
paddle::framework::TensorCopySync( phi::Copy(dev_ctx, *out_size, phi::CPUPlace(), true, &sizes);
*out_size, paddle::platform::CPUPlace(), &sizes);
auto size_data = sizes.data<int>(); auto size_data = sizes.data<int>();
out_w = size_data[0]; out_w = size_data[0];
} }
...@@ -714,7 +713,7 @@ static void Interpolate1DCUDAFwd( ...@@ -714,7 +713,7 @@ static void Interpolate1DCUDAFwd(
auto output_data = dev_ctx.template Alloc<T>(output); auto output_data = dev_ctx.template Alloc<T>(output);
if (in_w == out_w) { if (in_w == out_w) {
paddle::framework::TensorCopy(input, dev_ctx.GetPlace(), output); phi::Copy(dev_ctx, input, dev_ctx.GetPlace(), false, output);
return; return;
} }
...@@ -834,8 +833,8 @@ static void Interpolate2DCUDAFwd( ...@@ -834,8 +833,8 @@ static void Interpolate2DCUDAFwd(
} }
if (out_size) { if (out_size) {
DenseTensor sizes; DenseTensor sizes;
paddle::framework::TensorCopySync( phi::Copy(dev_ctx, *out_size, phi::CPUPlace(), true, &sizes);
*out_size, paddle::platform::CPUPlace(), &sizes);
auto size_data = sizes.data<int>(); auto size_data = sizes.data<int>();
out_h = size_data[0]; out_h = size_data[0];
out_w = size_data[1]; out_w = size_data[1];
...@@ -862,7 +861,7 @@ static void Interpolate2DCUDAFwd( ...@@ -862,7 +861,7 @@ static void Interpolate2DCUDAFwd(
auto output_data = dev_ctx.template Alloc<T>(output); auto output_data = dev_ctx.template Alloc<T>(output);
if (in_h == out_h && in_w == out_w) { if (in_h == out_h && in_w == out_w) {
paddle::framework::TensorCopy(input, dev_ctx.GetPlace(), output); phi::Copy(dev_ctx, input, dev_ctx.GetPlace(), false, output);
return; return;
} }
...@@ -1110,8 +1109,7 @@ static void Interpolate3DCUDAFwd( ...@@ -1110,8 +1109,7 @@ static void Interpolate3DCUDAFwd(
} }
if (out_size) { if (out_size) {
DenseTensor sizes; DenseTensor sizes;
paddle::framework::TensorCopySync( phi::Copy(dev_ctx, *out_size, phi::CPUPlace(), true, &sizes);
*out_size, paddle::platform::CPUPlace(), &sizes);
auto size_data = sizes.data<int>(); auto size_data = sizes.data<int>();
out_d = size_data[0]; out_d = size_data[0];
out_h = size_data[1]; out_h = size_data[1];
...@@ -1144,7 +1142,7 @@ static void Interpolate3DCUDAFwd( ...@@ -1144,7 +1142,7 @@ static void Interpolate3DCUDAFwd(
auto output_data = dev_ctx.template Alloc<T>(output); auto output_data = dev_ctx.template Alloc<T>(output);
if (in_d == out_d && in_h == out_h && in_w == out_w) { if (in_d == out_d && in_h == out_h && in_w == out_w) {
paddle::framework::TensorCopy(input, dev_ctx.GetPlace(), output); phi::Copy(dev_ctx, input, dev_ctx.GetPlace(), false, output);
return; return;
} }
......
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
#pragma once #pragma once
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/common.h"
...@@ -58,8 +57,7 @@ void MeshgridForward(const Context& ctx, ...@@ -58,8 +57,7 @@ void MeshgridForward(const Context& ctx,
view_shape[i] = shape[i]; view_shape[i] = shape[i];
DenseTensor reshape_ins_tensor; DenseTensor reshape_ins_tensor;
paddle::framework::TensorCopy( phi::Copy(ctx, *ins[i], ctx.GetPlace(), false, &reshape_ins_tensor);
*ins[i], ctx.GetPlace(), ctx, &reshape_ins_tensor);
DDim out_dims_reshape = phi::make_ddim(view_shape); DDim out_dims_reshape = phi::make_ddim(view_shape);
reshape_ins_tensor.Resize(out_dims_reshape); reshape_ins_tensor.Resize(out_dims_reshape);
DDim out_dims = phi::make_ddim(shape); DDim out_dims = phi::make_ddim(shape);
......
...@@ -18,7 +18,6 @@ ...@@ -18,7 +18,6 @@
#include <vector> #include <vector>
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/common/float16.h" #include "paddle/phi/common/float16.h"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册