未验证 提交 057cdb95 编写于 作者: E engineer1109 提交者: GitHub

decouple tensor_utils (#50264)

fix X

remove TensorCopy

codestyle

add fluid memory header

fix symbol

fix cmake

fix cmake

fix context

fix header

fix place

fix context

fix context

fix context

fix code

fix custom context

fix custom context

fix copy

fix data_transform

fix style

remove changes of custom

fix scalar
上级 fcb746cb
......@@ -22,8 +22,6 @@ limitations under the License. */
#include "paddle/phi/kernels/cast_kernel.h"
#include "paddle/phi/kernels/transfer_layout_kernel.h"
#include "paddle/fluid/framework/tensor_util.h"
namespace paddle {
namespace experimental {
......@@ -169,8 +167,8 @@ inline phi::DenseTensor TransDataPlace(const phi::DenseTensor& tensor,
VLOG(3) << "DeviceTransform in, src_place " << tensor.place()
<< " dst_place: " << dst_place;
auto& pool = phi::DeviceContextPool::Instance();
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
auto& pool = paddle::platform::DeviceContextPool::Instance();
// NOTE(yy): TransDataPlace should wait for computation of input.
if (!platform::is_cuda_pinned_place(tensor.place())) {
pool.Get(tensor.place())->Wait();
......@@ -188,7 +186,13 @@ inline phi::DenseTensor TransDataPlace(const phi::DenseTensor& tensor,
// But the embarrassment is that this solution this solution makes training
// slower.
phi::DenseTensor out;
paddle::framework::TensorCopySync(tensor, dst_place, &out);
phi::DeviceContext* dev_ctx;
if (dst_place.GetType() != AllocationType::CPU) {
dev_ctx = pool.Get(dst_place);
} else {
dev_ctx = pool.Get(tensor.place());
}
phi::Copy(*dev_ctx, tensor, dst_place, true, &out);
return out;
}
......
......@@ -63,7 +63,7 @@ if(WITH_CUSTOM_DEVICE)
cc_test(
custom_device_test
SRCS custom/custom_device_test.cc
DEPS phi_backends phi_device_context gradient_accumulator)
DEPS phi_tensor_utils phi_backends phi_device_context gradient_accumulator)
cc_test(
capi_test
SRCS custom/capi_test.cc
......
......@@ -15,8 +15,8 @@ endif()
cc_library(
scalar
SRCS scalar.cc
DEPS phi_enforce tensor)
DEPS phi_enforce phi_tensor_utils)
cc_library(
int_array
SRCS int_array.cc
DEPS phi_enforce tensor)
DEPS phi_enforce phi_tensor_utils)
......@@ -14,8 +14,10 @@ limitations under the License. */
#include "paddle/phi/common/int_array.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/phi/backends/all_context.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/tensor_utils.h"
namespace paddle {
namespace experimental {
......@@ -28,7 +30,9 @@ IntArrayBase<phi::DenseTensor>::IntArrayBase(
AssignDataFromTensor(tensor);
} else {
phi::DenseTensor tensor_tmp;
paddle::framework::TensorCopySync(tensor, CPUPlace(), &tensor_tmp);
phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance();
auto dev_ctx = pool.Get(tensor.place());
phi::Copy(*dev_ctx, tensor, CPUPlace(), true, &tensor_tmp);
AssignDataFromTensor(tensor_tmp);
}
}
......@@ -45,8 +49,9 @@ IntArrayBase<phi::DenseTensor>::IntArrayBase(
array_.push_back(*tensor_list[i].template data<int32_t>());
} else {
phi::DenseTensor tensor_tmp;
paddle::framework::TensorCopySync(
tensor_list[i], CPUPlace(), &tensor_tmp);
phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance();
auto dev_ctx = pool.Get(tensor_list[i].place());
phi::Copy(*dev_ctx, tensor_list[i], CPUPlace(), true, &tensor_tmp);
array_.push_back(*tensor_tmp.template data<int32_t>());
}
break;
......@@ -55,8 +60,9 @@ IntArrayBase<phi::DenseTensor>::IntArrayBase(
array_.push_back(*tensor_list[i].template data<int64_t>());
} else {
phi::DenseTensor tensor_tmp;
paddle::framework::TensorCopySync(
tensor_list[i], CPUPlace(), &tensor_tmp);
phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance();
auto dev_ctx = pool.Get(tensor_list[i].place());
phi::Copy(*dev_ctx, tensor_list[i], CPUPlace(), true, &tensor_tmp);
array_.push_back(*tensor_tmp.template data<int64_t>());
}
break;
......
......@@ -14,9 +14,11 @@ limitations under the License. */
#include "paddle/phi/common/scalar.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/phi/backends/all_context.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/tensor_utils.h"
namespace paddle {
namespace experimental {
......@@ -31,9 +33,11 @@ ScalarBase<phi::DenseTensor>::ScalarBase(const phi::DenseTensor& tensor_in)
"now Tensor has `%d` elements",
tensor_in.numel()));
auto cpu_place = phi::CPUPlace();
if (!paddle::platform::is_same_place(tensor_in.place(), cpu_place)) {
if (tensor_in.place().GetType() != phi::AllocationType::CPU) {
phi::DenseTensor tensor;
framework::TensorCopySync(tensor_in, cpu_place, &tensor);
phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance();
auto dev_ctx = pool.Get(tensor_in.place());
phi::Copy(*dev_ctx, tensor_in, cpu_place, true, &tensor);
GetDataFromTensor(tensor);
} else {
GetDataFromTensor(tensor_in);
......
......@@ -36,7 +36,7 @@ void Copy(const Context& dev_ctx,
const auto& src_place = src.place();
if (&src == dst) {
if (paddle::platform::is_same_place(src_place, dst_place)) {
if (src_place.GetType() == dst_place.GetType()) {
VLOG(6) << "Skip copy the same data(" << src_ptr << ") from " << src_place
<< " to " << dst_place;
} else {
......@@ -54,24 +54,24 @@ void Copy(const Context& dev_ctx,
dst->Resize(src.dims());
void* dst_ptr = nullptr;
if (paddle::platform::is_cpu_place(dst_place)) {
if (dst_place.GetType() == AllocationType::CPU) {
dst_ptr = dev_ctx.HostAlloc(dst, src.dtype());
#ifdef PADDLE_WITH_MKLDNN
dst->set_layout(src.layout());
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
} else if (paddle::platform::is_gpu_place(dst_place) ||
paddle::platform::is_cuda_pinned_place(dst_place)) {
} else if (dst_place.GetType() == AllocationType::GPU ||
dst_place.GetType() == AllocationType::GPUPINNED) {
dst_ptr = dev_ctx.Alloc(
dst, src.dtype(), 0, paddle::platform::is_cuda_pinned_place(dst_place));
dst, src.dtype(), 0, dst_place.GetType() == AllocationType::GPUPINNED);
#endif
#ifdef PADDLE_WITH_XPU
} else if (paddle::platform::is_xpu_place(dst_place)) {
} else if (dst_place.GetType() == AllocationType::XPU) {
dst_ptr = dev_ctx.Alloc(dst, src.dtype());
#endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE
} else if (paddle::platform::is_custom_place(dst_place)) {
} else if (dst_place.GetType() == AllocationType::CUSTOM) {
dst_ptr = dev_ctx.Alloc(dst, src.dtype());
#endif
}
......@@ -98,22 +98,22 @@ void Copy(const Context& dev_ctx,
VLOG(4) << "src:" << src_ptr << ", dst:" << dst_ptr;
CHECK(dst->layout() == src.layout());
if (paddle::platform::is_cpu_place(src_place) &&
paddle::platform::is_cpu_place(dst_place)) {
if (src_place.GetType() == AllocationType::CPU &&
dst_place.GetType() == AllocationType::CPU) {
paddle::memory::Copy(src_place, dst_ptr, src_place, src_ptr, size);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
} else if ((paddle::platform::is_cpu_place(src_place) ||
paddle::platform::is_cuda_pinned_place(src_place)) && // NOLINT
(paddle::platform::is_cpu_place(dst_place) ||
paddle::platform::is_cuda_pinned_place(dst_place))) {
} else if ((src_place.GetType() == AllocationType::CPU ||
src_place.GetType() == AllocationType::GPUPINNED) && // NOLINT
(dst_place.GetType() == AllocationType::CPU ||
dst_place.GetType() == AllocationType::GPUPINNED)) {
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size, nullptr);
} else if (paddle::platform::is_gpu_place(src_place) && // NOLINT
paddle::platform::is_cpu_place(dst_place)) {
} else if (src_place.GetType() == AllocationType::GPU && // NOLINT
dst_place.GetType() == AllocationType::CPU) {
auto src_gpu_place = src_place;
auto dst_cpu_place = dst_place;
auto ctx_place = dev_ctx.GetPlace();
PADDLE_ENFORCE_EQ(
paddle::platform::is_gpu_place(ctx_place),
ctx_place.GetType() == AllocationType::GPU,
true,
errors::PreconditionNotMet(
"Context place error, excepted GPUPlace, but actually %s.",
......@@ -131,14 +131,14 @@ void Copy(const Context& dev_ctx,
: reinterpret_cast<const phi::GPUContext&>(dev_ctx).stream();
paddle::memory::Copy(
dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream);
} else if ((paddle::platform::is_cpu_place(src_place) ||
paddle::platform::is_cuda_pinned_place(src_place)) && // NOLINT
paddle::platform::is_gpu_place(dst_place)) {
} else if ((src_place.GetType() == AllocationType::CPU ||
src_place.GetType() == AllocationType::GPUPINNED) && // NOLINT
dst_place.GetType() == AllocationType::GPU) {
auto src_cpu_place = src_place;
auto dst_gpu_place = dst_place;
auto ctx_place = dev_ctx.GetPlace();
PADDLE_ENFORCE_EQ(
paddle::platform::is_gpu_place(ctx_place),
ctx_place.GetType() == AllocationType::GPU,
true,
errors::PreconditionNotMet(
"Context place error, excepted GPUPlace, but actually %s.",
......@@ -156,13 +156,13 @@ void Copy(const Context& dev_ctx,
: reinterpret_cast<const phi::GPUContext&>(dev_ctx).stream();
paddle::memory::Copy(
dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size, stream);
} else if (paddle::platform::is_gpu_place(src_place) && // NOLINT
paddle::platform::is_gpu_place(dst_place)) {
} else if (src_place.GetType() == AllocationType::GPU && // NOLINT
dst_place.GetType() == AllocationType::GPU) {
auto src_gpu_place = src_place;
auto dst_gpu_place = dst_place;
auto ctx_place = dev_ctx.GetPlace();
PADDLE_ENFORCE_EQ(
paddle::platform::is_gpu_place(ctx_place),
ctx_place.GetType() == AllocationType::GPU,
true,
errors::PreconditionNotMet(
"Context place error, excepted GPUPlace, but actually %s.",
......@@ -170,20 +170,16 @@ void Copy(const Context& dev_ctx,
auto stream =
blocking ? nullptr
: reinterpret_cast<const phi::GPUContext&>(dev_ctx).stream();
if (paddle::platform::is_same_place(src_place, dst_place)) {
if (src_place.GetType() == dst_place.GetType()) {
paddle::memory::Copy(
dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream);
} else {
if (paddle::platform::is_same_place(ctx_place, src_place)) {
if (ctx_place.GetType() == src_place.GetType()) {
paddle::memory::Copy(
dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream);
paddle::platform::DeviceContextPool::Instance()
.Get(src.place())
->Wait();
} else if (paddle::platform::is_same_place(ctx_place, dst_place)) {
paddle::platform::DeviceContextPool::Instance()
.Get(src.place())
->Wait();
phi::DeviceContextPool::Instance().Get(src.place())->Wait();
} else if (ctx_place.GetType() == dst_place.GetType()) {
phi::DeviceContextPool::Instance().Get(src.place())->Wait();
paddle::memory::Copy(
dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream);
} else {
......@@ -191,13 +187,13 @@ void Copy(const Context& dev_ctx,
"Context place dose not match the source and destination place."));
}
}
} else if (paddle::platform::is_gpu_place(src_place) && // NOLINT
paddle::platform::is_cuda_pinned_place(dst_place)) {
} else if (src_place.GetType() == AllocationType::GPU && // NOLINT
dst_place.GetType() == AllocationType::GPUPINNED) {
auto src_gpu_place = src_place;
auto dst_cuda_pinned_place = dst_place;
auto ctx_place = dev_ctx.GetPlace();
PADDLE_ENFORCE_EQ(
paddle::platform::is_gpu_place(ctx_place),
ctx_place.GetType() == AllocationType::GPU,
true,
errors::PreconditionNotMet(
"Context place error, excepted GPUPlace, but actually %s.",
......@@ -217,14 +213,14 @@ void Copy(const Context& dev_ctx,
dst_cuda_pinned_place, dst_ptr, src_gpu_place, src_ptr, size, stream);
#endif
#ifdef PADDLE_WITH_XPU
} else if (paddle::platform::is_xpu_place(src_place) && // NOLINT
paddle::platform::is_cpu_place(dst_place)) {
} else if (src_place.GetType() == AllocationType::XPU && // NOLINT
dst_place.GetType() == AllocationType::CPU) {
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
} else if (paddle::platform::is_cpu_place(src_place) &&
paddle::platform::is_xpu_place(dst_place)) {
} else if (src_place.GetType() == AllocationType::CPU &&
dst_place.GetType() == AllocationType::XPU) {
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
} else if (paddle::platform::is_xpu_place(src_place) &&
paddle::platform::is_xpu_place(dst_place)) {
} else if (src_place.GetType() == AllocationType::XPU &&
dst_place.GetType() == AllocationType::XPU) {
if (src_ptr == dst_ptr) {
VLOG(3) << "Skip copy the same data async from " << src_place << " to "
<< dst_place;
......@@ -233,32 +229,26 @@ void Copy(const Context& dev_ctx,
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
#endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE
} else if (paddle::platform::is_custom_place(src_place) && // NOLINT
paddle::platform::is_cpu_place(dst_place)) {
} else if (src_place.GetType() == AllocationType::CUSTOM && // NOLINT
dst_place.GetType() == AllocationType::CPU) {
auto stream =
blocking
? nullptr
: reinterpret_cast<const paddle::platform::CustomDeviceContext&>(
dev_ctx)
.stream();
: reinterpret_cast<const phi::CustomContext&>(dev_ctx).stream();
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size, stream);
} else if (paddle::platform::is_cpu_place(src_place) && // NOLINT
paddle::platform::is_custom_place(dst_place)) {
} else if (src_place.GetType() == AllocationType::CPU && // NOLINT
dst_place.GetType() == AllocationType::CUSTOM) {
auto stream =
blocking
? nullptr
: reinterpret_cast<const paddle::platform::CustomDeviceContext&>(
dev_ctx)
.stream();
: reinterpret_cast<const phi::CustomContext&>(dev_ctx).stream();
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size, stream);
} else if (paddle::platform::is_custom_place(src_place) && // NOLINT
paddle::platform::is_custom_place(dst_place)) {
} else if (src_place.GetType() == AllocationType::CUSTOM && // NOLINT
dst_place.GetType() == AllocationType::CUSTOM) {
auto stream =
blocking
? nullptr
: reinterpret_cast<const paddle::platform::CustomDeviceContext&>(
dev_ctx)
.stream();
: reinterpret_cast<const phi::CustomContext&>(dev_ctx).stream();
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size, stream);
#endif
} else {
......@@ -435,11 +425,11 @@ void TensorFromVector(const std::vector<T>& src,
auto dst_ptr = static_cast<void*>(dst->data<T>());
auto size = src.size() * sizeof(T);
if (paddle::platform::is_cpu_place(dst_place)) {
if (dst_place.GetType() == AllocationType::CPU) {
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
else if (paddle::platform::is_gpu_place(dst_place)) { // NOLINT
else if (dst_place.GetType() == AllocationType::GPU) { // NOLINT
paddle::memory::Copy(
dst_place,
dst_ptr,
......@@ -450,7 +440,7 @@ void TensorFromVector(const std::vector<T>& src,
}
#endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE
else if (paddle::platform::is_custom_place(dst_place)) { // NOLINT
else if (dst_place.GetType() == AllocationType::CUSTOM) { // NOLINT
paddle::memory::Copy(
dst_place,
dst_ptr,
......@@ -461,7 +451,7 @@ void TensorFromVector(const std::vector<T>& src,
}
#endif
#ifdef PADDLE_WITH_XPU
else if (paddle::platform::is_xpu_place(dst_place)) { // NOLINT
else if (dst_place.GetType() == AllocationType::XPU) { // NOLINT
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
}
#endif
......@@ -490,11 +480,11 @@ void TensorFromVector(const std::vector<bool>& src,
auto dst_ptr = ctx.template Alloc<bool>(dst);
auto size = src.size() * sizeof(bool);
if (paddle::platform::is_cpu_place(dst_place)) {
if (dst_place.GetType() == AllocationType::CPU) {
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
}
#ifdef PADDLE_WITH_CUDA
else if (paddle::platform::is_gpu_place(dst_place)) { // NOLINT
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
else if (dst_place.GetType() == AllocationType::GPU) { // NOLINT
paddle::memory::Copy(
dst_place,
dst_ptr,
......@@ -505,13 +495,13 @@ void TensorFromVector(const std::vector<bool>& src,
}
#endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE
else if (paddle::platform::is_custom_place(dst_place)) { // NOLINT
else if (dst_place.GetType() == AllocationType::CUSTOM) { // NOLINT
auto stream = reinterpret_cast<const phi::CustomContext&>(ctx).stream();
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size, stream);
}
#endif
#ifdef PADDLE_WITH_XPU
else if (paddle::platform::is_xpu_place(dst_place)) { // NOLINT
else if (dst_place.GetType() == AllocationType::XPU) { // NOLINT
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
}
#endif
......@@ -583,11 +573,11 @@ void TensorFromArray(const T* src,
auto dst_ptr = static_cast<void*>(dst->data<T>());
auto size = array_size * sizeof(T);
if (paddle::platform::is_cpu_place(dst_place)) {
if (dst_place.GetType() == AllocationType::CPU) {
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
else if (paddle::platform::is_gpu_place(dst_place)) { // NOLINT
else if (dst_place.GetType() == AllocationType::GPU) { // NOLINT
paddle::memory::Copy(
dst_place,
dst_ptr,
......@@ -598,7 +588,7 @@ void TensorFromArray(const T* src,
}
#endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE
else if (paddle::platform::is_custom_place(dst_place)) { // NOLINT
else if (dst_place.GetType() == AllocationType::CUSTOM) { // NOLINT
paddle::memory::Copy(
dst_place,
dst_ptr,
......@@ -609,7 +599,7 @@ void TensorFromArray(const T* src,
}
#endif
#ifdef PADDLE_WITH_XPU
else if (paddle::platform::is_xpu_place(dst_place)) { // NOLINT
else if (dst_place.GetType() == AllocationType::XPU) { // NOLINT
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
}
#endif
......@@ -684,11 +674,11 @@ void TensorToVector(const phi::DenseTensor& src,
dst->resize(src.numel());
auto dst_ptr = static_cast<void*>(dst->data());
if (paddle::platform::is_cpu_place(src.place())) {
if (src.place().GetType() == AllocationType::CPU) {
paddle::memory::Copy(dst_place, dst_ptr, src.place(), src_ptr, size);
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
else if (paddle::platform::is_gpu_place(src.place())) { // NOLINT
else if (src.place().GetType() == AllocationType::GPU) { // NOLINT
paddle::memory::Copy(
dst_place,
dst_ptr,
......@@ -699,12 +689,12 @@ void TensorToVector(const phi::DenseTensor& src,
}
#endif
#if defined(PADDLE_WITH_XPU)
else if (paddle::platform::is_xpu_place(src.place())) { // NOLINT
else if (src.place().GetType() == AllocationType::XPU) { // NOLINT
paddle::memory::Copy(dst_place, dst_ptr, src.place(), src_ptr, size);
}
#endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE
else if (paddle::platform::is_custom_place(src.place())) { // NOLINT
else if (src.place().GetType() == AllocationType::CUSTOM) { // NOLINT
paddle::memory::Copy(
dst_place, dst_ptr, src.place(), src_ptr, size, nullptr);
}
......@@ -728,11 +718,11 @@ void TensorToVector(const phi::DenseTensor& src,
dst->resize(src.numel());
auto dst_ptr = static_cast<void*>(array);
if (paddle::platform::is_cpu_place(src.place())) {
if (src.place().GetType() == AllocationType::CPU) {
paddle::memory::Copy(dst_place, dst_ptr, src.place(), src_ptr, size);
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
else if (paddle::platform::is_gpu_place(src.place())) { // NOLINT
else if (src.place().GetType() == AllocationType::GPU) { // NOLINT
paddle::memory::Copy(
dst_place,
dst_ptr,
......@@ -743,12 +733,12 @@ void TensorToVector(const phi::DenseTensor& src,
}
#endif
#if defined(PADDLE_WITH_XPU)
else if (paddle::platform::is_xpu_place(src.place())) { // NOLINT
else if (src.place().GetType() == AllocationType::XPU) { // NOLINT
paddle::memory::Copy(dst_place, dst_ptr, src.place(), src_ptr, size);
}
#endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE
else if (paddle::platform::is_custom_place(src.place())) { // NOLINT
else if (src.place().GetType() == AllocationType::CUSTOM) { // NOLINT
paddle::memory::Copy(
dst_place, dst_ptr, src.place(), src_ptr, size, nullptr);
}
......@@ -805,7 +795,7 @@ void TensorToVector(const phi::DenseTensor& src, std::vector<T>* dst) {
auto dst_ptr = static_cast<void*>(dst->data());
PADDLE_ENFORCE_EQ(
paddle::platform::is_cpu_place(src.place()),
src.place().GetType() == AllocationType::CPU,
true,
phi::errors::InvalidArgument(
"The input tensor should be CPU device, but actually it is in %s.",
......@@ -821,12 +811,12 @@ void TensorToVector(const phi::DenseTensor& src, std::vector<bool>* dst) {
bool* array = new bool[src.numel()];
paddle::platform::CPUPlace dst_place{};
phi::CPUPlace dst_place{};
dst->resize(src.numel());
auto dst_ptr = static_cast<void*>(array);
PADDLE_ENFORCE_EQ(
paddle::platform::is_cpu_place(src.place()),
src.place().GetType() == AllocationType::CPU,
true,
phi::errors::InvalidArgument(
"The input tensor should be CPU device, but actually it is in %s.",
......@@ -891,7 +881,7 @@ phi::DenseTensor ReshapeToMatrix(const phi::DenseTensor& src,
template <typename T>
T GetValue(const phi::DenseTensor* x) {
T value = static_cast<T>(0);
if (!paddle::platform::is_cpu_place(x->place())) {
if (x->place().GetType() != AllocationType::CPU) {
phi::DenseTensor cpu_x{};
phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance();
phi::DeviceContext* dev_ctx = pool.Get(x->place());
......
......@@ -24,8 +24,6 @@
#include "paddle/phi/kernels/isfinite_kernel.h"
#include "paddle/phi/kernels/reduce_all_kernel.h"
#include "paddle/fluid/framework/tensor_util.h"
namespace phi {
// Utils
......
......@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/batch_norm_kernel.h"
......@@ -163,7 +162,7 @@ void BatchNormGradRawKernel(const Context& ctx,
}
if (d_x && (N * sample_size) == 1 && !use_global_stats) {
paddle::framework::TensorCopy(*d_y, ctx.GetPlace(), d_x);
phi::Copy(ctx, *d_y, ctx.GetPlace(), false, d_x);
return;
}
......
......@@ -14,7 +14,6 @@
#include "paddle/phi/kernels/batch_norm_kernel.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
......@@ -106,7 +105,7 @@ void BatchNormKernel(const Context& ctx,
if ((N * sample_size) == 1) {
// Only 1 element in normalization dimension,
// we skip the batch norm calculation, let y = x.
paddle::framework::TensorCopy(x, ctx.GetPlace(), y);
phi::Copy(ctx, x, ctx.GetPlace(), false, y);
return;
}
......
......@@ -16,7 +16,6 @@
#include <vector>
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/enforce.h"
......
......@@ -14,7 +14,6 @@
#include "paddle/phi/kernels/cross_kernel.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/kernel_registry.h"
......
......@@ -572,7 +572,7 @@ static void Interpolate1DCPUFwd(
dev_ctx.template Alloc<T>(output);
if (in_w == out_w) {
paddle::framework::TensorCopy(x, dev_ctx.GetPlace(), output);
phi::Copy(dev_ctx, x, dev_ctx.GetPlace(), false, output);
return;
}
......@@ -702,7 +702,7 @@ static void Interpolate2DCPUFwd(
dev_ctx.template Alloc<T>(output);
if (in_h == out_h && in_w == out_w) {
paddle::framework::TensorCopy(x, dev_ctx.GetPlace(), output);
phi::Copy(dev_ctx, x, dev_ctx.GetPlace(), false, output);
return;
}
......@@ -897,7 +897,7 @@ static void Interpolate3DCPUFwd(
dev_ctx.template Alloc<T>(output);
if (in_d == out_d && in_h == out_h && in_w == out_w) {
paddle::framework::TensorCopy(x, dev_ctx.GetPlace(), output);
phi::Copy(dev_ctx, x, dev_ctx.GetPlace(), false, output);
return;
}
......
......@@ -23,7 +23,6 @@
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/backends/xpu/xpu_header.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/memory/memcpy.h"
#endif
......
......@@ -14,7 +14,6 @@
#pragma once
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/core/ddim.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
......@@ -83,8 +82,10 @@ inline std::vector<int> get_new_shape(
const std::vector<const DenseTensor*>& list_new_shape_tensor) {
// get tensor from
std::vector<int> vec_new_shape;
auto& pool = phi::DeviceContextPool::Instance();
for (size_t i = 0; i < list_new_shape_tensor.size(); ++i) {
auto tensor = list_new_shape_tensor[i];
phi::DeviceContext* dev_ctx = pool.Get(tensor->place());
PADDLE_ENFORCE_EQ(tensor->dims() == phi::make_ddim({1}) ||
tensor->dims() == phi::make_ddim({}),
true,
......@@ -96,15 +97,14 @@ inline std::vector<int> get_new_shape(
#ifdef PADDLE_WITH_XPU
if (tensor->place().GetType() == phi::AllocationType::XPU) {
DenseTensor temp;
paddle::framework::TensorCopySync(*tensor, phi::CPUPlace(), &temp);
phi::Copy(*dev_ctx, *tensor, phi::CPUPlace(), true, &temp);
vec_new_shape.push_back(static_cast<int32_t>(*temp.data<int32_t>()));
continue;
}
#endif
if (paddle::platform::is_gpu_place(tensor->place())) {
if (tensor->place().GetType() == phi::AllocationType::GPU) {
DenseTensor temp;
paddle::framework::TensorCopySync(
*tensor, paddle::platform::CPUPlace(), &temp);
phi::Copy(*dev_ctx, *tensor, phi::CPUPlace(), true, &temp);
vec_new_shape.push_back(static_cast<int32_t>(*temp.data<int32_t>()));
} else {
vec_new_shape.push_back(static_cast<int32_t>(*tensor->data<int32_t>()));
......@@ -120,22 +120,24 @@ inline std::vector<T> get_new_data_from_tensor(
std::vector<T> vec_new_data;
auto* new_data = new_data_tensor->data<T>();
DenseTensor cpu_starts_tensor;
auto& pool = phi::DeviceContextPool::Instance();
phi::DeviceContext* dev_ctx = pool.Get(new_data_tensor->place());
if (paddle::platform::is_gpu_place(new_data_tensor->place())) {
paddle::framework::TensorCopySync(
*new_data_tensor, paddle::platform::CPUPlace(), &cpu_starts_tensor);
phi::Copy(
*dev_ctx, *new_data_tensor, phi::CPUPlace(), true, &cpu_starts_tensor);
new_data = cpu_starts_tensor.data<T>();
}
#ifdef PADDLE_WITH_ASCEND_CL
if (paddle::platform::is_npu_place(new_data_tensor->place())) {
paddle::framework::TensorCopySync(
*new_data_tensor, paddle::platform::CPUPlace(), &cpu_starts_tensor);
phi::Copy(
*dev_ctx, *new_data_tensor, phi::CPUPlace(), true, &cpu_starts_tensor);
new_data = cpu_starts_tensor.data<T>();
}
#endif
#ifdef PADDLE_WITH_XPU
if (paddle::platform::is_xpu_place(new_data_tensor->place())) {
paddle::framework::TensorCopySync(
*new_data_tensor, paddle::platform::CPUPlace(), &cpu_starts_tensor);
phi::Copy(
*dev_ctx, *new_data_tensor, phi::CPUPlace(), true, &cpu_starts_tensor);
new_data = cpu_starts_tensor.data<T>();
}
#endif
......
......@@ -19,7 +19,6 @@ limitations under the License. */
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/enforce.h"
......
......@@ -19,7 +19,6 @@
#include "paddle/phi/kernels/empty_kernel.h"
#include "paddle/phi/kernels/impl/amp_kernel_impl.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/memory/memory.h"
namespace phi {
......
......@@ -16,7 +16,6 @@
#include <vector>
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/enforce.h"
......@@ -86,8 +85,7 @@ void BroadcastTensorsGradKernel(const Context& ctx,
ctx.template Alloc<T>(output_tensor);
if (just_copy) {
// Turns out to be a No-Op, simply copy tensors
paddle::framework::TensorCopy(
*input_tensor, ctx.GetPlace(), ctx, output_tensor);
phi::Copy(ctx, *input_tensor, ctx.GetPlace(), false, output_tensor);
} else {
// reduce_sum implementation on CUDA
funcs::ReduceKernel<T, T, kps::AddFunctor, kps::IdentityFunctor<T>>(
......
......@@ -29,7 +29,7 @@ namespace cub = hipcub;
#include <iterator>
#include <random>
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/tensor_utils.h"
......
......@@ -27,8 +27,8 @@
namespace cub = hipcub;
#endif
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/phi/core/generator.h"
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/kernels/funcs/distribution_helper.h"
#include "paddle/phi/kernels/funcs/math_function.h"
......@@ -103,7 +103,7 @@ struct OneHotGenerator<GPUContext, T> {
DenseTensor input_tensor;
input_tensor.Resize(out->dims());
ctx.template Alloc<T>(&input_tensor);
paddle::framework::TensorCopy(*out, ctx.GetPlace(), &input_tensor);
phi::Copy(ctx, *out, ctx.GetPlace(), false, &input_tensor);
funcs::set_constant(ctx, out, 0.0);
OneHotCUDAKernel<T, thread_size>
<<<block_size, thread_size, 0, ctx.stream()>>>(
......
......@@ -693,8 +693,7 @@ static void Interpolate1DCUDAFwd(
}
if (out_size) {
DenseTensor sizes;
paddle::framework::TensorCopySync(
*out_size, paddle::platform::CPUPlace(), &sizes);
phi::Copy(dev_ctx, *out_size, phi::CPUPlace(), true, &sizes);
auto size_data = sizes.data<int>();
out_w = size_data[0];
}
......@@ -714,7 +713,7 @@ static void Interpolate1DCUDAFwd(
auto output_data = dev_ctx.template Alloc<T>(output);
if (in_w == out_w) {
paddle::framework::TensorCopy(input, dev_ctx.GetPlace(), output);
phi::Copy(dev_ctx, input, dev_ctx.GetPlace(), false, output);
return;
}
......@@ -834,8 +833,8 @@ static void Interpolate2DCUDAFwd(
}
if (out_size) {
DenseTensor sizes;
paddle::framework::TensorCopySync(
*out_size, paddle::platform::CPUPlace(), &sizes);
phi::Copy(dev_ctx, *out_size, phi::CPUPlace(), true, &sizes);
auto size_data = sizes.data<int>();
out_h = size_data[0];
out_w = size_data[1];
......@@ -862,7 +861,7 @@ static void Interpolate2DCUDAFwd(
auto output_data = dev_ctx.template Alloc<T>(output);
if (in_h == out_h && in_w == out_w) {
paddle::framework::TensorCopy(input, dev_ctx.GetPlace(), output);
phi::Copy(dev_ctx, input, dev_ctx.GetPlace(), false, output);
return;
}
......@@ -1110,8 +1109,7 @@ static void Interpolate3DCUDAFwd(
}
if (out_size) {
DenseTensor sizes;
paddle::framework::TensorCopySync(
*out_size, paddle::platform::CPUPlace(), &sizes);
phi::Copy(dev_ctx, *out_size, phi::CPUPlace(), true, &sizes);
auto size_data = sizes.data<int>();
out_d = size_data[0];
out_h = size_data[1];
......@@ -1144,7 +1142,7 @@ static void Interpolate3DCUDAFwd(
auto output_data = dev_ctx.template Alloc<T>(output);
if (in_d == out_d && in_h == out_h && in_w == out_w) {
paddle::framework::TensorCopy(input, dev_ctx.GetPlace(), output);
phi::Copy(dev_ctx, input, dev_ctx.GetPlace(), false, output);
return;
}
......
......@@ -14,7 +14,6 @@
#pragma once
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
......@@ -58,8 +57,7 @@ void MeshgridForward(const Context& ctx,
view_shape[i] = shape[i];
DenseTensor reshape_ins_tensor;
paddle::framework::TensorCopy(
*ins[i], ctx.GetPlace(), ctx, &reshape_ins_tensor);
phi::Copy(ctx, *ins[i], ctx.GetPlace(), false, &reshape_ins_tensor);
DDim out_dims_reshape = phi::make_ddim(view_shape);
reshape_ins_tensor.Resize(out_dims_reshape);
DDim out_dims = phi::make_ddim(shape);
......
......@@ -18,7 +18,6 @@
#include <vector>
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/common/float16.h"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册