diff --git a/paddle/phi/api/lib/data_transform.cc b/paddle/phi/api/lib/data_transform.cc index c6a773ebe5fc77600dea614129c1163a69c504a7..ad3e85d4696b793ec8a0ac09c94f9fbd17872188 100644 --- a/paddle/phi/api/lib/data_transform.cc +++ b/paddle/phi/api/lib/data_transform.cc @@ -22,8 +22,6 @@ limitations under the License. */ #include "paddle/phi/kernels/cast_kernel.h" #include "paddle/phi/kernels/transfer_layout_kernel.h" -#include "paddle/fluid/framework/tensor_util.h" - namespace paddle { namespace experimental { @@ -169,8 +167,8 @@ inline phi::DenseTensor TransDataPlace(const phi::DenseTensor& tensor, VLOG(3) << "DeviceTransform in, src_place " << tensor.place() << " dst_place: " << dst_place; + auto& pool = phi::DeviceContextPool::Instance(); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - auto& pool = paddle::platform::DeviceContextPool::Instance(); // NOTE(yy): TransDataPlace should wait for computation of input. if (!platform::is_cuda_pinned_place(tensor.place())) { pool.Get(tensor.place())->Wait(); @@ -188,7 +186,13 @@ inline phi::DenseTensor TransDataPlace(const phi::DenseTensor& tensor, // But the embarrassment is that this solution this solution makes training // slower. phi::DenseTensor out; - paddle::framework::TensorCopySync(tensor, dst_place, &out); + phi::DeviceContext* dev_ctx; + if (dst_place.GetType() != AllocationType::CPU) { + dev_ctx = pool.Get(dst_place); + } else { + dev_ctx = pool.Get(tensor.place()); + } + phi::Copy(*dev_ctx, tensor, dst_place, true, &out); return out; } diff --git a/paddle/phi/backends/CMakeLists.txt b/paddle/phi/backends/CMakeLists.txt index 3b2314b0963cf6ba6f53f7758bbe4c3e9e551fa8..e90cdc9e0663abe516c932d49b4572debe420c4b 100644 --- a/paddle/phi/backends/CMakeLists.txt +++ b/paddle/phi/backends/CMakeLists.txt @@ -63,7 +63,7 @@ if(WITH_CUSTOM_DEVICE) cc_test( custom_device_test SRCS custom/custom_device_test.cc - DEPS phi_backends phi_device_context gradient_accumulator) + DEPS phi_tensor_utils phi_backends phi_device_context gradient_accumulator) cc_test( capi_test SRCS custom/capi_test.cc diff --git a/paddle/phi/common/CMakeLists.txt b/paddle/phi/common/CMakeLists.txt index d96cb3e895a1ae52034d1f243ed8cd3664cc906c..bbe40970f75bc0077eee5b7e4f27611eef297711 100644 --- a/paddle/phi/common/CMakeLists.txt +++ b/paddle/phi/common/CMakeLists.txt @@ -15,8 +15,8 @@ endif() cc_library( scalar SRCS scalar.cc - DEPS phi_enforce tensor) + DEPS phi_enforce phi_tensor_utils) cc_library( int_array SRCS int_array.cc - DEPS phi_enforce tensor) + DEPS phi_enforce phi_tensor_utils) diff --git a/paddle/phi/common/int_array.cc b/paddle/phi/common/int_array.cc index 4aadae48c158fecf3335e2a96b324984eea2cf6c..707d4513df5f3ebf0e2c7700258dec0ae64aad64 100644 --- a/paddle/phi/common/int_array.cc +++ b/paddle/phi/common/int_array.cc @@ -14,8 +14,10 @@ limitations under the License. */ #include "paddle/phi/common/int_array.h" -#include "paddle/fluid/framework/tensor_util.h" +#include "paddle/phi/backends/all_context.h" +#include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/place.h" +#include "paddle/phi/core/tensor_utils.h" namespace paddle { namespace experimental { @@ -28,7 +30,9 @@ IntArrayBase::IntArrayBase( AssignDataFromTensor(tensor); } else { phi::DenseTensor tensor_tmp; - paddle::framework::TensorCopySync(tensor, CPUPlace(), &tensor_tmp); + phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance(); + auto dev_ctx = pool.Get(tensor.place()); + phi::Copy(*dev_ctx, tensor, CPUPlace(), true, &tensor_tmp); AssignDataFromTensor(tensor_tmp); } } @@ -45,8 +49,9 @@ IntArrayBase::IntArrayBase( array_.push_back(*tensor_list[i].template data()); } else { phi::DenseTensor tensor_tmp; - paddle::framework::TensorCopySync( - tensor_list[i], CPUPlace(), &tensor_tmp); + phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance(); + auto dev_ctx = pool.Get(tensor_list[i].place()); + phi::Copy(*dev_ctx, tensor_list[i], CPUPlace(), true, &tensor_tmp); array_.push_back(*tensor_tmp.template data()); } break; @@ -55,8 +60,9 @@ IntArrayBase::IntArrayBase( array_.push_back(*tensor_list[i].template data()); } else { phi::DenseTensor tensor_tmp; - paddle::framework::TensorCopySync( - tensor_list[i], CPUPlace(), &tensor_tmp); + phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance(); + auto dev_ctx = pool.Get(tensor_list[i].place()); + phi::Copy(*dev_ctx, tensor_list[i], CPUPlace(), true, &tensor_tmp); array_.push_back(*tensor_tmp.template data()); } break; diff --git a/paddle/phi/common/scalar.cc b/paddle/phi/common/scalar.cc index b558739418d71111d60d6a44e609c66fa7666056..1b161d9ac6088e65abf204458ee000a92ff0bf64 100644 --- a/paddle/phi/common/scalar.cc +++ b/paddle/phi/common/scalar.cc @@ -14,9 +14,11 @@ limitations under the License. */ #include "paddle/phi/common/scalar.h" -#include "paddle/fluid/framework/tensor_util.h" +#include "paddle/phi/backends/all_context.h" +#include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/enforce.h" +#include "paddle/phi/core/tensor_utils.h" namespace paddle { namespace experimental { @@ -31,9 +33,11 @@ ScalarBase::ScalarBase(const phi::DenseTensor& tensor_in) "now Tensor has `%d` elements", tensor_in.numel())); auto cpu_place = phi::CPUPlace(); - if (!paddle::platform::is_same_place(tensor_in.place(), cpu_place)) { + if (tensor_in.place().GetType() != phi::AllocationType::CPU) { phi::DenseTensor tensor; - framework::TensorCopySync(tensor_in, cpu_place, &tensor); + phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance(); + auto dev_ctx = pool.Get(tensor_in.place()); + phi::Copy(*dev_ctx, tensor_in, cpu_place, true, &tensor); GetDataFromTensor(tensor); } else { GetDataFromTensor(tensor_in); diff --git a/paddle/phi/core/tensor_utils.cc b/paddle/phi/core/tensor_utils.cc index 379558b0b5de626afe984134cea444da20595088..79f4388c096bdef6c1702aaa21fecac64e3cdda8 100644 --- a/paddle/phi/core/tensor_utils.cc +++ b/paddle/phi/core/tensor_utils.cc @@ -36,7 +36,7 @@ void Copy(const Context& dev_ctx, const auto& src_place = src.place(); if (&src == dst) { - if (paddle::platform::is_same_place(src_place, dst_place)) { + if (src_place.GetType() == dst_place.GetType()) { VLOG(6) << "Skip copy the same data(" << src_ptr << ") from " << src_place << " to " << dst_place; } else { @@ -54,24 +54,24 @@ void Copy(const Context& dev_ctx, dst->Resize(src.dims()); void* dst_ptr = nullptr; - if (paddle::platform::is_cpu_place(dst_place)) { + if (dst_place.GetType() == AllocationType::CPU) { dst_ptr = dev_ctx.HostAlloc(dst, src.dtype()); #ifdef PADDLE_WITH_MKLDNN dst->set_layout(src.layout()); #endif #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - } else if (paddle::platform::is_gpu_place(dst_place) || - paddle::platform::is_cuda_pinned_place(dst_place)) { + } else if (dst_place.GetType() == AllocationType::GPU || + dst_place.GetType() == AllocationType::GPUPINNED) { dst_ptr = dev_ctx.Alloc( - dst, src.dtype(), 0, paddle::platform::is_cuda_pinned_place(dst_place)); + dst, src.dtype(), 0, dst_place.GetType() == AllocationType::GPUPINNED); #endif #ifdef PADDLE_WITH_XPU - } else if (paddle::platform::is_xpu_place(dst_place)) { + } else if (dst_place.GetType() == AllocationType::XPU) { dst_ptr = dev_ctx.Alloc(dst, src.dtype()); #endif #ifdef PADDLE_WITH_CUSTOM_DEVICE - } else if (paddle::platform::is_custom_place(dst_place)) { + } else if (dst_place.GetType() == AllocationType::CUSTOM) { dst_ptr = dev_ctx.Alloc(dst, src.dtype()); #endif } @@ -98,22 +98,22 @@ void Copy(const Context& dev_ctx, VLOG(4) << "src:" << src_ptr << ", dst:" << dst_ptr; CHECK(dst->layout() == src.layout()); - if (paddle::platform::is_cpu_place(src_place) && - paddle::platform::is_cpu_place(dst_place)) { + if (src_place.GetType() == AllocationType::CPU && + dst_place.GetType() == AllocationType::CPU) { paddle::memory::Copy(src_place, dst_ptr, src_place, src_ptr, size); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - } else if ((paddle::platform::is_cpu_place(src_place) || - paddle::platform::is_cuda_pinned_place(src_place)) && // NOLINT - (paddle::platform::is_cpu_place(dst_place) || - paddle::platform::is_cuda_pinned_place(dst_place))) { + } else if ((src_place.GetType() == AllocationType::CPU || + src_place.GetType() == AllocationType::GPUPINNED) && // NOLINT + (dst_place.GetType() == AllocationType::CPU || + dst_place.GetType() == AllocationType::GPUPINNED)) { paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size, nullptr); - } else if (paddle::platform::is_gpu_place(src_place) && // NOLINT - paddle::platform::is_cpu_place(dst_place)) { + } else if (src_place.GetType() == AllocationType::GPU && // NOLINT + dst_place.GetType() == AllocationType::CPU) { auto src_gpu_place = src_place; auto dst_cpu_place = dst_place; auto ctx_place = dev_ctx.GetPlace(); PADDLE_ENFORCE_EQ( - paddle::platform::is_gpu_place(ctx_place), + ctx_place.GetType() == AllocationType::GPU, true, errors::PreconditionNotMet( "Context place error, excepted GPUPlace, but actually %s.", @@ -131,14 +131,14 @@ void Copy(const Context& dev_ctx, : reinterpret_cast(dev_ctx).stream(); paddle::memory::Copy( dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream); - } else if ((paddle::platform::is_cpu_place(src_place) || - paddle::platform::is_cuda_pinned_place(src_place)) && // NOLINT - paddle::platform::is_gpu_place(dst_place)) { + } else if ((src_place.GetType() == AllocationType::CPU || + src_place.GetType() == AllocationType::GPUPINNED) && // NOLINT + dst_place.GetType() == AllocationType::GPU) { auto src_cpu_place = src_place; auto dst_gpu_place = dst_place; auto ctx_place = dev_ctx.GetPlace(); PADDLE_ENFORCE_EQ( - paddle::platform::is_gpu_place(ctx_place), + ctx_place.GetType() == AllocationType::GPU, true, errors::PreconditionNotMet( "Context place error, excepted GPUPlace, but actually %s.", @@ -156,13 +156,13 @@ void Copy(const Context& dev_ctx, : reinterpret_cast(dev_ctx).stream(); paddle::memory::Copy( dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size, stream); - } else if (paddle::platform::is_gpu_place(src_place) && // NOLINT - paddle::platform::is_gpu_place(dst_place)) { + } else if (src_place.GetType() == AllocationType::GPU && // NOLINT + dst_place.GetType() == AllocationType::GPU) { auto src_gpu_place = src_place; auto dst_gpu_place = dst_place; auto ctx_place = dev_ctx.GetPlace(); PADDLE_ENFORCE_EQ( - paddle::platform::is_gpu_place(ctx_place), + ctx_place.GetType() == AllocationType::GPU, true, errors::PreconditionNotMet( "Context place error, excepted GPUPlace, but actually %s.", @@ -170,20 +170,16 @@ void Copy(const Context& dev_ctx, auto stream = blocking ? nullptr : reinterpret_cast(dev_ctx).stream(); - if (paddle::platform::is_same_place(src_place, dst_place)) { + if (src_place.GetType() == dst_place.GetType()) { paddle::memory::Copy( dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream); } else { - if (paddle::platform::is_same_place(ctx_place, src_place)) { + if (ctx_place.GetType() == src_place.GetType()) { paddle::memory::Copy( dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream); - paddle::platform::DeviceContextPool::Instance() - .Get(src.place()) - ->Wait(); - } else if (paddle::platform::is_same_place(ctx_place, dst_place)) { - paddle::platform::DeviceContextPool::Instance() - .Get(src.place()) - ->Wait(); + phi::DeviceContextPool::Instance().Get(src.place())->Wait(); + } else if (ctx_place.GetType() == dst_place.GetType()) { + phi::DeviceContextPool::Instance().Get(src.place())->Wait(); paddle::memory::Copy( dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream); } else { @@ -191,13 +187,13 @@ void Copy(const Context& dev_ctx, "Context place dose not match the source and destination place.")); } } - } else if (paddle::platform::is_gpu_place(src_place) && // NOLINT - paddle::platform::is_cuda_pinned_place(dst_place)) { + } else if (src_place.GetType() == AllocationType::GPU && // NOLINT + dst_place.GetType() == AllocationType::GPUPINNED) { auto src_gpu_place = src_place; auto dst_cuda_pinned_place = dst_place; auto ctx_place = dev_ctx.GetPlace(); PADDLE_ENFORCE_EQ( - paddle::platform::is_gpu_place(ctx_place), + ctx_place.GetType() == AllocationType::GPU, true, errors::PreconditionNotMet( "Context place error, excepted GPUPlace, but actually %s.", @@ -217,14 +213,14 @@ void Copy(const Context& dev_ctx, dst_cuda_pinned_place, dst_ptr, src_gpu_place, src_ptr, size, stream); #endif #ifdef PADDLE_WITH_XPU - } else if (paddle::platform::is_xpu_place(src_place) && // NOLINT - paddle::platform::is_cpu_place(dst_place)) { + } else if (src_place.GetType() == AllocationType::XPU && // NOLINT + dst_place.GetType() == AllocationType::CPU) { paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); - } else if (paddle::platform::is_cpu_place(src_place) && - paddle::platform::is_xpu_place(dst_place)) { + } else if (src_place.GetType() == AllocationType::CPU && + dst_place.GetType() == AllocationType::XPU) { paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); - } else if (paddle::platform::is_xpu_place(src_place) && - paddle::platform::is_xpu_place(dst_place)) { + } else if (src_place.GetType() == AllocationType::XPU && + dst_place.GetType() == AllocationType::XPU) { if (src_ptr == dst_ptr) { VLOG(3) << "Skip copy the same data async from " << src_place << " to " << dst_place; @@ -233,32 +229,26 @@ void Copy(const Context& dev_ctx, paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); #endif #ifdef PADDLE_WITH_CUSTOM_DEVICE - } else if (paddle::platform::is_custom_place(src_place) && // NOLINT - paddle::platform::is_cpu_place(dst_place)) { + } else if (src_place.GetType() == AllocationType::CUSTOM && // NOLINT + dst_place.GetType() == AllocationType::CPU) { auto stream = blocking ? nullptr - : reinterpret_cast( - dev_ctx) - .stream(); + : reinterpret_cast(dev_ctx).stream(); paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size, stream); - } else if (paddle::platform::is_cpu_place(src_place) && // NOLINT - paddle::platform::is_custom_place(dst_place)) { + } else if (src_place.GetType() == AllocationType::CPU && // NOLINT + dst_place.GetType() == AllocationType::CUSTOM) { auto stream = blocking ? nullptr - : reinterpret_cast( - dev_ctx) - .stream(); + : reinterpret_cast(dev_ctx).stream(); paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size, stream); - } else if (paddle::platform::is_custom_place(src_place) && // NOLINT - paddle::platform::is_custom_place(dst_place)) { + } else if (src_place.GetType() == AllocationType::CUSTOM && // NOLINT + dst_place.GetType() == AllocationType::CUSTOM) { auto stream = blocking ? nullptr - : reinterpret_cast( - dev_ctx) - .stream(); + : reinterpret_cast(dev_ctx).stream(); paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size, stream); #endif } else { @@ -435,11 +425,11 @@ void TensorFromVector(const std::vector& src, auto dst_ptr = static_cast(dst->data()); auto size = src.size() * sizeof(T); - if (paddle::platform::is_cpu_place(dst_place)) { + if (dst_place.GetType() == AllocationType::CPU) { paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); } #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - else if (paddle::platform::is_gpu_place(dst_place)) { // NOLINT + else if (dst_place.GetType() == AllocationType::GPU) { // NOLINT paddle::memory::Copy( dst_place, dst_ptr, @@ -450,7 +440,7 @@ void TensorFromVector(const std::vector& src, } #endif #ifdef PADDLE_WITH_CUSTOM_DEVICE - else if (paddle::platform::is_custom_place(dst_place)) { // NOLINT + else if (dst_place.GetType() == AllocationType::CUSTOM) { // NOLINT paddle::memory::Copy( dst_place, dst_ptr, @@ -461,7 +451,7 @@ void TensorFromVector(const std::vector& src, } #endif #ifdef PADDLE_WITH_XPU - else if (paddle::platform::is_xpu_place(dst_place)) { // NOLINT + else if (dst_place.GetType() == AllocationType::XPU) { // NOLINT paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); } #endif @@ -490,11 +480,11 @@ void TensorFromVector(const std::vector& src, auto dst_ptr = ctx.template Alloc(dst); auto size = src.size() * sizeof(bool); - if (paddle::platform::is_cpu_place(dst_place)) { + if (dst_place.GetType() == AllocationType::CPU) { paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); } -#ifdef PADDLE_WITH_CUDA - else if (paddle::platform::is_gpu_place(dst_place)) { // NOLINT +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + else if (dst_place.GetType() == AllocationType::GPU) { // NOLINT paddle::memory::Copy( dst_place, dst_ptr, @@ -505,13 +495,13 @@ void TensorFromVector(const std::vector& src, } #endif #ifdef PADDLE_WITH_CUSTOM_DEVICE - else if (paddle::platform::is_custom_place(dst_place)) { // NOLINT + else if (dst_place.GetType() == AllocationType::CUSTOM) { // NOLINT auto stream = reinterpret_cast(ctx).stream(); paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size, stream); } #endif #ifdef PADDLE_WITH_XPU - else if (paddle::platform::is_xpu_place(dst_place)) { // NOLINT + else if (dst_place.GetType() == AllocationType::XPU) { // NOLINT paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); } #endif @@ -583,11 +573,11 @@ void TensorFromArray(const T* src, auto dst_ptr = static_cast(dst->data()); auto size = array_size * sizeof(T); - if (paddle::platform::is_cpu_place(dst_place)) { + if (dst_place.GetType() == AllocationType::CPU) { paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); } #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - else if (paddle::platform::is_gpu_place(dst_place)) { // NOLINT + else if (dst_place.GetType() == AllocationType::GPU) { // NOLINT paddle::memory::Copy( dst_place, dst_ptr, @@ -598,7 +588,7 @@ void TensorFromArray(const T* src, } #endif #ifdef PADDLE_WITH_CUSTOM_DEVICE - else if (paddle::platform::is_custom_place(dst_place)) { // NOLINT + else if (dst_place.GetType() == AllocationType::CUSTOM) { // NOLINT paddle::memory::Copy( dst_place, dst_ptr, @@ -609,7 +599,7 @@ void TensorFromArray(const T* src, } #endif #ifdef PADDLE_WITH_XPU - else if (paddle::platform::is_xpu_place(dst_place)) { // NOLINT + else if (dst_place.GetType() == AllocationType::XPU) { // NOLINT paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); } #endif @@ -684,11 +674,11 @@ void TensorToVector(const phi::DenseTensor& src, dst->resize(src.numel()); auto dst_ptr = static_cast(dst->data()); - if (paddle::platform::is_cpu_place(src.place())) { + if (src.place().GetType() == AllocationType::CPU) { paddle::memory::Copy(dst_place, dst_ptr, src.place(), src_ptr, size); } #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - else if (paddle::platform::is_gpu_place(src.place())) { // NOLINT + else if (src.place().GetType() == AllocationType::GPU) { // NOLINT paddle::memory::Copy( dst_place, dst_ptr, @@ -699,12 +689,12 @@ void TensorToVector(const phi::DenseTensor& src, } #endif #if defined(PADDLE_WITH_XPU) - else if (paddle::platform::is_xpu_place(src.place())) { // NOLINT + else if (src.place().GetType() == AllocationType::XPU) { // NOLINT paddle::memory::Copy(dst_place, dst_ptr, src.place(), src_ptr, size); } #endif #ifdef PADDLE_WITH_CUSTOM_DEVICE - else if (paddle::platform::is_custom_place(src.place())) { // NOLINT + else if (src.place().GetType() == AllocationType::CUSTOM) { // NOLINT paddle::memory::Copy( dst_place, dst_ptr, src.place(), src_ptr, size, nullptr); } @@ -728,11 +718,11 @@ void TensorToVector(const phi::DenseTensor& src, dst->resize(src.numel()); auto dst_ptr = static_cast(array); - if (paddle::platform::is_cpu_place(src.place())) { + if (src.place().GetType() == AllocationType::CPU) { paddle::memory::Copy(dst_place, dst_ptr, src.place(), src_ptr, size); } #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - else if (paddle::platform::is_gpu_place(src.place())) { // NOLINT + else if (src.place().GetType() == AllocationType::GPU) { // NOLINT paddle::memory::Copy( dst_place, dst_ptr, @@ -743,12 +733,12 @@ void TensorToVector(const phi::DenseTensor& src, } #endif #if defined(PADDLE_WITH_XPU) - else if (paddle::platform::is_xpu_place(src.place())) { // NOLINT + else if (src.place().GetType() == AllocationType::XPU) { // NOLINT paddle::memory::Copy(dst_place, dst_ptr, src.place(), src_ptr, size); } #endif #ifdef PADDLE_WITH_CUSTOM_DEVICE - else if (paddle::platform::is_custom_place(src.place())) { // NOLINT + else if (src.place().GetType() == AllocationType::CUSTOM) { // NOLINT paddle::memory::Copy( dst_place, dst_ptr, src.place(), src_ptr, size, nullptr); } @@ -805,7 +795,7 @@ void TensorToVector(const phi::DenseTensor& src, std::vector* dst) { auto dst_ptr = static_cast(dst->data()); PADDLE_ENFORCE_EQ( - paddle::platform::is_cpu_place(src.place()), + src.place().GetType() == AllocationType::CPU, true, phi::errors::InvalidArgument( "The input tensor should be CPU device, but actually it is in %s.", @@ -821,12 +811,12 @@ void TensorToVector(const phi::DenseTensor& src, std::vector* dst) { bool* array = new bool[src.numel()]; - paddle::platform::CPUPlace dst_place{}; + phi::CPUPlace dst_place{}; dst->resize(src.numel()); auto dst_ptr = static_cast(array); PADDLE_ENFORCE_EQ( - paddle::platform::is_cpu_place(src.place()), + src.place().GetType() == AllocationType::CPU, true, phi::errors::InvalidArgument( "The input tensor should be CPU device, but actually it is in %s.", @@ -891,7 +881,7 @@ phi::DenseTensor ReshapeToMatrix(const phi::DenseTensor& src, template T GetValue(const phi::DenseTensor* x) { T value = static_cast(0); - if (!paddle::platform::is_cpu_place(x->place())) { + if (x->place().GetType() != AllocationType::CPU) { phi::DenseTensor cpu_x{}; phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance(); phi::DeviceContext* dev_ctx = pool.Get(x->place()); diff --git a/paddle/phi/kernels/cpu/amp_kernel.cc b/paddle/phi/kernels/cpu/amp_kernel.cc index 23048ba337df888f8aafa957c473840bec8d45fa..7625339042589c89e670e50392d9c251c4aed32c 100644 --- a/paddle/phi/kernels/cpu/amp_kernel.cc +++ b/paddle/phi/kernels/cpu/amp_kernel.cc @@ -24,8 +24,6 @@ #include "paddle/phi/kernels/isfinite_kernel.h" #include "paddle/phi/kernels/reduce_all_kernel.h" -#include "paddle/fluid/framework/tensor_util.h" - namespace phi { // Utils diff --git a/paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc b/paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc index 49555410f99201ebec6adf8b0708c8f0ab4f8b9f..c2da486e9f7521b21f2953977738d52c2e5ddf87 100644 --- a/paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/tensor_util.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/batch_norm_kernel.h" @@ -163,7 +162,7 @@ void BatchNormGradRawKernel(const Context& ctx, } if (d_x && (N * sample_size) == 1 && !use_global_stats) { - paddle::framework::TensorCopy(*d_y, ctx.GetPlace(), d_x); + phi::Copy(ctx, *d_y, ctx.GetPlace(), false, d_x); return; } diff --git a/paddle/phi/kernels/cpu/batch_norm_kernel.cc b/paddle/phi/kernels/cpu/batch_norm_kernel.cc index 332df1d9f137ebf82db97af1dad24d56d85d8c91..8768b78c6ff07abee91b6648a955159f31533d09 100644 --- a/paddle/phi/kernels/cpu/batch_norm_kernel.cc +++ b/paddle/phi/kernels/cpu/batch_norm_kernel.cc @@ -14,7 +14,6 @@ #include "paddle/phi/kernels/batch_norm_kernel.h" -#include "paddle/fluid/framework/tensor_util.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/eigen/common.h" @@ -106,7 +105,7 @@ void BatchNormKernel(const Context& ctx, if ((N * sample_size) == 1) { // Only 1 element in normalization dimension, // we skip the batch norm calculation, let y = x. - paddle::framework::TensorCopy(x, ctx.GetPlace(), y); + phi::Copy(ctx, x, ctx.GetPlace(), false, y); return; } diff --git a/paddle/phi/kernels/cpu/broadcast_tensors_grad_kernel.cc b/paddle/phi/kernels/cpu/broadcast_tensors_grad_kernel.cc index 0d549ae46e2170ef202a323417c073a9b631e2e9..ba257be5e2c4e5fab43f75f4cb3f7a69cc419da0 100644 --- a/paddle/phi/kernels/cpu/broadcast_tensors_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/broadcast_tensors_grad_kernel.cc @@ -16,7 +16,6 @@ #include -#include "paddle/fluid/framework/tensor_util.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/enforce.h" diff --git a/paddle/phi/kernels/cpu/cross_kernel.cc b/paddle/phi/kernels/cpu/cross_kernel.cc index a37efa2d3ccdbf8ad8a385fbe51c72c5162862ea..55e25ffca4c8ca3141586b69e0e57d14d526e95f 100644 --- a/paddle/phi/kernels/cpu/cross_kernel.cc +++ b/paddle/phi/kernels/cpu/cross_kernel.cc @@ -14,7 +14,6 @@ #include "paddle/phi/kernels/cross_kernel.h" -#include "paddle/fluid/framework/tensor_util.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" diff --git a/paddle/phi/kernels/cpu/interpolate_kernel.cc b/paddle/phi/kernels/cpu/interpolate_kernel.cc index 13a97b4f5644e50bdc892956d3c4efc41cffb096..1cdde3a7b1e1a1220e091bdc136f187dcbaeb870 100644 --- a/paddle/phi/kernels/cpu/interpolate_kernel.cc +++ b/paddle/phi/kernels/cpu/interpolate_kernel.cc @@ -572,7 +572,7 @@ static void Interpolate1DCPUFwd( dev_ctx.template Alloc(output); if (in_w == out_w) { - paddle::framework::TensorCopy(x, dev_ctx.GetPlace(), output); + phi::Copy(dev_ctx, x, dev_ctx.GetPlace(), false, output); return; } @@ -702,7 +702,7 @@ static void Interpolate2DCPUFwd( dev_ctx.template Alloc(output); if (in_h == out_h && in_w == out_w) { - paddle::framework::TensorCopy(x, dev_ctx.GetPlace(), output); + phi::Copy(dev_ctx, x, dev_ctx.GetPlace(), false, output); return; } @@ -897,7 +897,7 @@ static void Interpolate3DCPUFwd( dev_ctx.template Alloc(output); if (in_d == out_d && in_h == out_h && in_w == out_w) { - paddle::framework::TensorCopy(x, dev_ctx.GetPlace(), output); + phi::Copy(dev_ctx, x, dev_ctx.GetPlace(), false, output); return; } diff --git a/paddle/phi/kernels/funcs/adam_functors.h b/paddle/phi/kernels/funcs/adam_functors.h index 4edc83ca30a28f6aa233f58bb9710f7a62870df6..e508c11030a64ed2b95d7b95a1eaf7e7ac585ada 100644 --- a/paddle/phi/kernels/funcs/adam_functors.h +++ b/paddle/phi/kernels/funcs/adam_functors.h @@ -23,7 +23,6 @@ #include "paddle/phi/backends/xpu/enforce_xpu.h" #include "paddle/phi/backends/xpu/xpu_header.h" // See Note [ Why still include the fluid headers? ] -#include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/memory/memcpy.h" #endif diff --git a/paddle/phi/kernels/funcs/interpolate_function.h b/paddle/phi/kernels/funcs/interpolate_function.h index 53b0577fc29d776a38a927d095f4bf3db88a0fdf..2a11be43b5df696a02fac6942293f5448c315bc1 100644 --- a/paddle/phi/kernels/funcs/interpolate_function.h +++ b/paddle/phi/kernels/funcs/interpolate_function.h @@ -14,7 +14,6 @@ #pragma once -#include "paddle/fluid/framework/tensor_util.h" #include "paddle/phi/common/layout.h" #include "paddle/phi/core/ddim.h" #include "paddle/phi/kernels/funcs/eigen/common.h" @@ -83,8 +82,10 @@ inline std::vector get_new_shape( const std::vector& list_new_shape_tensor) { // get tensor from std::vector vec_new_shape; + auto& pool = phi::DeviceContextPool::Instance(); for (size_t i = 0; i < list_new_shape_tensor.size(); ++i) { auto tensor = list_new_shape_tensor[i]; + phi::DeviceContext* dev_ctx = pool.Get(tensor->place()); PADDLE_ENFORCE_EQ(tensor->dims() == phi::make_ddim({1}) || tensor->dims() == phi::make_ddim({}), true, @@ -96,15 +97,14 @@ inline std::vector get_new_shape( #ifdef PADDLE_WITH_XPU if (tensor->place().GetType() == phi::AllocationType::XPU) { DenseTensor temp; - paddle::framework::TensorCopySync(*tensor, phi::CPUPlace(), &temp); + phi::Copy(*dev_ctx, *tensor, phi::CPUPlace(), true, &temp); vec_new_shape.push_back(static_cast(*temp.data())); continue; } #endif - if (paddle::platform::is_gpu_place(tensor->place())) { + if (tensor->place().GetType() == phi::AllocationType::GPU) { DenseTensor temp; - paddle::framework::TensorCopySync( - *tensor, paddle::platform::CPUPlace(), &temp); + phi::Copy(*dev_ctx, *tensor, phi::CPUPlace(), true, &temp); vec_new_shape.push_back(static_cast(*temp.data())); } else { vec_new_shape.push_back(static_cast(*tensor->data())); @@ -120,22 +120,24 @@ inline std::vector get_new_data_from_tensor( std::vector vec_new_data; auto* new_data = new_data_tensor->data(); DenseTensor cpu_starts_tensor; + auto& pool = phi::DeviceContextPool::Instance(); + phi::DeviceContext* dev_ctx = pool.Get(new_data_tensor->place()); if (paddle::platform::is_gpu_place(new_data_tensor->place())) { - paddle::framework::TensorCopySync( - *new_data_tensor, paddle::platform::CPUPlace(), &cpu_starts_tensor); + phi::Copy( + *dev_ctx, *new_data_tensor, phi::CPUPlace(), true, &cpu_starts_tensor); new_data = cpu_starts_tensor.data(); } #ifdef PADDLE_WITH_ASCEND_CL if (paddle::platform::is_npu_place(new_data_tensor->place())) { - paddle::framework::TensorCopySync( - *new_data_tensor, paddle::platform::CPUPlace(), &cpu_starts_tensor); + phi::Copy( + *dev_ctx, *new_data_tensor, phi::CPUPlace(), true, &cpu_starts_tensor); new_data = cpu_starts_tensor.data(); } #endif #ifdef PADDLE_WITH_XPU if (paddle::platform::is_xpu_place(new_data_tensor->place())) { - paddle::framework::TensorCopySync( - *new_data_tensor, paddle::platform::CPUPlace(), &cpu_starts_tensor); + phi::Copy( + *dev_ctx, *new_data_tensor, phi::CPUPlace(), true, &cpu_starts_tensor); new_data = cpu_starts_tensor.data(); } #endif diff --git a/paddle/phi/kernels/funcs/math_function.h b/paddle/phi/kernels/funcs/math_function.h index 6f1cac49352e3a97420a1ed341ccc1119b4674bd..7a4143c875c5fcce6df7c5e93039fac60624f844 100644 --- a/paddle/phi/kernels/funcs/math_function.h +++ b/paddle/phi/kernels/funcs/math_function.h @@ -19,7 +19,6 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/enforce.h" diff --git a/paddle/phi/kernels/gpu/amp_kernel.cu b/paddle/phi/kernels/gpu/amp_kernel.cu index 919663a75e6cc9efb7e57c377a19d0a9fcec4ff0..a17f698d431e24c419c7a9ad45177af0c3c9f69f 100644 --- a/paddle/phi/kernels/gpu/amp_kernel.cu +++ b/paddle/phi/kernels/gpu/amp_kernel.cu @@ -19,7 +19,6 @@ #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/impl/amp_kernel_impl.h" -#include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/memory/memory.h" namespace phi { diff --git a/paddle/phi/kernels/gpu/broadcast_tensors_grad_kernel.cu b/paddle/phi/kernels/gpu/broadcast_tensors_grad_kernel.cu index a9cc8f591be7c3cb2e9122280fc99c684283f7fd..7acfd33e94a9a415d892440956a071479a9c4665 100644 --- a/paddle/phi/kernels/gpu/broadcast_tensors_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/broadcast_tensors_grad_kernel.cu @@ -16,7 +16,6 @@ #include -#include "paddle/fluid/framework/tensor_util.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/enforce.h" @@ -86,8 +85,7 @@ void BroadcastTensorsGradKernel(const Context& ctx, ctx.template Alloc(output_tensor); if (just_copy) { // Turns out to be a No-Op, simply copy tensors - paddle::framework::TensorCopy( - *input_tensor, ctx.GetPlace(), ctx, output_tensor); + phi::Copy(ctx, *input_tensor, ctx.GetPlace(), false, output_tensor); } else { // reduce_sum implementation on CUDA funcs::ReduceKernel>( diff --git a/paddle/phi/kernels/gpu/class_center_sample_kernel.cu b/paddle/phi/kernels/gpu/class_center_sample_kernel.cu index a98fdfaa8fc10a12eb1bc8bc2f6053361909d4fc..698ec44e6123bd357c8606486da750d98a1aca18 100644 --- a/paddle/phi/kernels/gpu/class_center_sample_kernel.cu +++ b/paddle/phi/kernels/gpu/class_center_sample_kernel.cu @@ -29,7 +29,7 @@ namespace cub = hipcub; #include #include -#include "paddle/fluid/framework/tensor_util.h" +#include "paddle/fluid/memory/memcpy.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/tensor_utils.h" diff --git a/paddle/phi/kernels/gpu/gumbel_softmax_kernel.cu b/paddle/phi/kernels/gpu/gumbel_softmax_kernel.cu index 072c38b1303070b1bbd56ac43a761ae7858867b6..dcbf003281f24a8897d94390a5b81bd369e1f3e5 100644 --- a/paddle/phi/kernels/gpu/gumbel_softmax_kernel.cu +++ b/paddle/phi/kernels/gpu/gumbel_softmax_kernel.cu @@ -27,8 +27,8 @@ namespace cub = hipcub; #endif -#include "paddle/fluid/framework/tensor_util.h" #include "paddle/phi/core/generator.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/distribution_helper.h" #include "paddle/phi/kernels/funcs/math_function.h" @@ -103,7 +103,7 @@ struct OneHotGenerator { DenseTensor input_tensor; input_tensor.Resize(out->dims()); ctx.template Alloc(&input_tensor); - paddle::framework::TensorCopy(*out, ctx.GetPlace(), &input_tensor); + phi::Copy(ctx, *out, ctx.GetPlace(), false, &input_tensor); funcs::set_constant(ctx, out, 0.0); OneHotCUDAKernel <<>>( diff --git a/paddle/phi/kernels/gpu/interpolate_kernel.cu b/paddle/phi/kernels/gpu/interpolate_kernel.cu index 2510ff8a5453a598ee1c24f4c71dead6beea5edf..9aa5d55201c0b97088078bfe0f3a7f2ae9f6ba08 100644 --- a/paddle/phi/kernels/gpu/interpolate_kernel.cu +++ b/paddle/phi/kernels/gpu/interpolate_kernel.cu @@ -693,8 +693,7 @@ static void Interpolate1DCUDAFwd( } if (out_size) { DenseTensor sizes; - paddle::framework::TensorCopySync( - *out_size, paddle::platform::CPUPlace(), &sizes); + phi::Copy(dev_ctx, *out_size, phi::CPUPlace(), true, &sizes); auto size_data = sizes.data(); out_w = size_data[0]; } @@ -714,7 +713,7 @@ static void Interpolate1DCUDAFwd( auto output_data = dev_ctx.template Alloc(output); if (in_w == out_w) { - paddle::framework::TensorCopy(input, dev_ctx.GetPlace(), output); + phi::Copy(dev_ctx, input, dev_ctx.GetPlace(), false, output); return; } @@ -834,8 +833,8 @@ static void Interpolate2DCUDAFwd( } if (out_size) { DenseTensor sizes; - paddle::framework::TensorCopySync( - *out_size, paddle::platform::CPUPlace(), &sizes); + phi::Copy(dev_ctx, *out_size, phi::CPUPlace(), true, &sizes); + auto size_data = sizes.data(); out_h = size_data[0]; out_w = size_data[1]; @@ -862,7 +861,7 @@ static void Interpolate2DCUDAFwd( auto output_data = dev_ctx.template Alloc(output); if (in_h == out_h && in_w == out_w) { - paddle::framework::TensorCopy(input, dev_ctx.GetPlace(), output); + phi::Copy(dev_ctx, input, dev_ctx.GetPlace(), false, output); return; } @@ -1110,8 +1109,7 @@ static void Interpolate3DCUDAFwd( } if (out_size) { DenseTensor sizes; - paddle::framework::TensorCopySync( - *out_size, paddle::platform::CPUPlace(), &sizes); + phi::Copy(dev_ctx, *out_size, phi::CPUPlace(), true, &sizes); auto size_data = sizes.data(); out_d = size_data[0]; out_h = size_data[1]; @@ -1144,7 +1142,7 @@ static void Interpolate3DCUDAFwd( auto output_data = dev_ctx.template Alloc(output); if (in_d == out_d && in_h == out_h && in_w == out_w) { - paddle::framework::TensorCopy(input, dev_ctx.GetPlace(), output); + phi::Copy(dev_ctx, input, dev_ctx.GetPlace(), false, output); return; } diff --git a/paddle/phi/kernels/impl/meshgrid_kernel_impl.h b/paddle/phi/kernels/impl/meshgrid_kernel_impl.h index e66632498f67029a8abda3ea499573f32272fc19..dfe162a270a9b59a8b12ec789c0ee63c447ac071 100644 --- a/paddle/phi/kernels/impl/meshgrid_kernel_impl.h +++ b/paddle/phi/kernels/impl/meshgrid_kernel_impl.h @@ -14,7 +14,6 @@ #pragma once -#include "paddle/fluid/framework/tensor_util.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/eigen/common.h" @@ -58,8 +57,7 @@ void MeshgridForward(const Context& ctx, view_shape[i] = shape[i]; DenseTensor reshape_ins_tensor; - paddle::framework::TensorCopy( - *ins[i], ctx.GetPlace(), ctx, &reshape_ins_tensor); + phi::Copy(ctx, *ins[i], ctx.GetPlace(), false, &reshape_ins_tensor); DDim out_dims_reshape = phi::make_ddim(view_shape); reshape_ins_tensor.Resize(out_dims_reshape); DDim out_dims = phi::make_ddim(shape); diff --git a/paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu b/paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu index f1e8497004520f74f045cf1aa4ea4590f33656b2..c405061adbf5a829ebb450ddf01bd5450c614e06 100644 --- a/paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu +++ b/paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu @@ -18,7 +18,6 @@ #include -#include "paddle/fluid/framework/tensor_util.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/common/float16.h"