From 5a000900bc4babaafb3ff67bfcc42a0db6cf3da7 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Thu, 18 Nov 2021 23:12:18 -0600 Subject: [PATCH] [PTen] Add copy_to and to method for Tensor (#37262) * add copy_to and to method for Tensor * polish msg format * fix details error * fix copy_to test compile failed * fix typo --- paddle/pten/api/CMakeLists.txt | 2 +- paddle/pten/api/all.h | 2 + paddle/pten/api/include/tensor.h | 13 +- paddle/pten/api/include/utils.h | 28 +++++ paddle/pten/api/lib/CMakeLists.txt | 1 + paddle/pten/api/lib/ext_compat_utils.cc | 18 ++- paddle/pten/api/lib/ext_compat_utils.h | 5 +- paddle/pten/api/lib/tensor.cc | 18 +-- paddle/pten/api/lib/utils.cc | 71 +++++++++++ paddle/pten/kernels/cpu/manipulation.cc | 4 +- paddle/pten/kernels/cpu/utils.cc | 6 +- paddle/pten/kernels/cpu/utils.h | 5 +- paddle/pten/kernels/cuda/utils.cu | 42 +++---- paddle/pten/kernels/cuda/utils.h | 2 +- paddle/pten/kernels/xpu/manipulation.cc | 4 +- paddle/pten/kernels/xpu/utils.cc | 1 + paddle/pten/kernels/xpu/utils.h | 1 + paddle/pten/tests/api/CMakeLists.txt | 5 +- paddle/pten/tests/api/test_to_api.cc | 116 ++++++++++++++++++ .../pten/tests/kernels/test_copy_dev_api.cc | 2 +- 20 files changed, 299 insertions(+), 47 deletions(-) create mode 100644 paddle/pten/api/include/utils.h create mode 100644 paddle/pten/api/lib/utils.cc create mode 100644 paddle/pten/tests/api/test_to_api.cc diff --git a/paddle/pten/api/CMakeLists.txt b/paddle/pten/api/CMakeLists.txt index 387da3bc68f..09df2c01fd9 100644 --- a/paddle/pten/api/CMakeLists.txt +++ b/paddle/pten/api/CMakeLists.txt @@ -1,3 +1,3 @@ add_subdirectory(lib) -cc_library(pten_api SRCS all.cc DEPS linalg_api math_api creation_api manipulation_api) +cc_library(pten_api SRCS all.cc DEPS linalg_api math_api creation_api manipulation_api utils_api) diff --git a/paddle/pten/api/all.h b/paddle/pten/api/all.h index e4d079de9d4..a55d3332bcd 100644 --- a/paddle/pten/api/all.h +++ b/paddle/pten/api/all.h @@ -30,6 +30,7 @@ limitations under the License. */ #include "paddle/pten/api/include/manipulation.h" #include "paddle/pten/api/include/math.h" #include "paddle/pten/api/include/tensor.h" +#include "paddle/pten/api/include/utils.h" // pten common headers #include "paddle/pten/common/backend.h" @@ -51,3 +52,4 @@ PT_DECLARE_API(Creation); PT_DECLARE_API(Linalg); PT_DECLARE_API(Manipulation); PT_DECLARE_API(Math); +PT_DECLARE_API(Utils); diff --git a/paddle/pten/api/include/tensor.h b/paddle/pten/api/include/tensor.h index 255a8c844e9..982ec29f2be 100644 --- a/paddle/pten/api/include/tensor.h +++ b/paddle/pten/api/include/tensor.h @@ -31,6 +31,7 @@ using gpuStream_t = hipStream_t; #include "paddle/pten/api/ext/dll_decl.h" #include "paddle/pten/api/ext/place.h" +#include "paddle/pten/common/backend.h" #include "paddle/pten/common/data_type.h" #include "paddle/pten/common/layout.h" @@ -317,9 +318,11 @@ class PD_DLL_DECL Tensor final { /** * @brief Copy the current Tensor data to the specified device - * and return the new Tensor. - * It's usually used to set the input tensor data. - * This is a deprecated method and may be removed in the future! + * and return the new Tensor. It's usually used to set the input tensor data. + * Note: The Tensor's `copy_to` method is deprecated since version 2.3, and + * will be removed in version 2.4, please use `to` method instead. reason: + * copying a Tensor to another device does not need to specify the + * data type template argument * * @tparam T * @param target_place, the target place of which the tensor will copy to. @@ -334,7 +337,9 @@ class PD_DLL_DECL Tensor final { * @param place, the target place of which the tensor will copy to. * @return Tensor */ - Tensor to(const PlaceType& place) const; + // TODO(chenweihang): replace Backend by new Place, may be append dtype and + // layout arguments in the future + Tensor to(Backend backend, bool blocking) const; /** * @brief Cast datatype from one to another diff --git a/paddle/pten/api/include/utils.h b/paddle/pten/api/include/utils.h new file mode 100644 index 00000000000..1a73e2ac96f --- /dev/null +++ b/paddle/pten/api/include/utils.h @@ -0,0 +1,28 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/pten/api/include/tensor.h" +#include "paddle/pten/common/backend.h" + +namespace paddle { +namespace experimental { + +// TODO(chenweihang): Replace backend by place when place is ready +// TODO(chenweihang): Add layout and dtype argument if needed +PD_DLL_DECL Tensor to(const Tensor& x, Backend backend, bool blocking); + +} // namespace experimental +} // namespace paddle diff --git a/paddle/pten/api/lib/CMakeLists.txt b/paddle/pten/api/lib/CMakeLists.txt index 2f605e491da..f30a3c89eb6 100644 --- a/paddle/pten/api/lib/CMakeLists.txt +++ b/paddle/pten/api/lib/CMakeLists.txt @@ -18,3 +18,4 @@ cc_library(math_api SRCS math.cc DEPS pten_tensor pten kernel_dispatch) cc_library(linalg_api SRCS linalg.cc DEPS pten_tensor pten kernel_dispatch) cc_library(creation_api SRCS creation.cc DEPS pten_tensor pten kernel_dispatch) cc_library(manipulation_api SRCS manipulation.cc DEPS pten_tensor pten kernel_dispatch) +cc_library(utils_api SRCS utils.cc DEPS pten_tensor pten kernel_dispatch) diff --git a/paddle/pten/api/lib/ext_compat_utils.cc b/paddle/pten/api/lib/ext_compat_utils.cc index 3aef1257072..b7250d15794 100644 --- a/paddle/pten/api/lib/ext_compat_utils.cc +++ b/paddle/pten/api/lib/ext_compat_utils.cc @@ -18,7 +18,7 @@ limitations under the License. */ namespace paddle { namespace experimental { -platform::Place ConvertExtPlaceToInnerPlace(const PlaceType& p) { +platform::Place ConvertExtPlaceToInnerPlace(PlaceType p) { if (p == PlaceType::kCPU) { return platform::Place(platform::CPUPlace()); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) @@ -50,5 +50,21 @@ PlaceType ConvertInnerPlaceToExtPlace(const platform::Place& p) { return PlaceType::kUNK; } +Backend ConvertExtPlaceToBackend(PlaceType p) { + switch (p) { + case PlaceType::kCPU: + return Backend::CPU; +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + case PlaceType::kGPU: + return Backend::CUDA; +#endif + default: + PADDLE_THROW( + platform::errors::Unimplemented("Unsupported place type `%s` when " + "casting enum place to backend.", + static_cast(p))); + } +} + } // namespace experimental } // namespace paddle diff --git a/paddle/pten/api/lib/ext_compat_utils.h b/paddle/pten/api/lib/ext_compat_utils.h index 3a4ea20ff88..1e8c83f9afc 100644 --- a/paddle/pten/api/lib/ext_compat_utils.h +++ b/paddle/pten/api/lib/ext_compat_utils.h @@ -16,13 +16,16 @@ limitations under the License. */ #include "paddle/fluid/platform/place.h" #include "paddle/pten/api/ext/place.h" +#include "paddle/pten/common/backend.h" namespace paddle { namespace experimental { -platform::Place ConvertExtPlaceToInnerPlace(const PlaceType& p); +platform::Place ConvertExtPlaceToInnerPlace(PlaceType p); PlaceType ConvertInnerPlaceToExtPlace(const platform::Place& p); +Backend ConvertExtPlaceToBackend(PlaceType p); + } // namespace experimental } // namespace paddle diff --git a/paddle/pten/api/lib/tensor.cc b/paddle/pten/api/lib/tensor.cc index cf89bafaabe..6eae7dc3708 100644 --- a/paddle/pten/api/lib/tensor.cc +++ b/paddle/pten/api/lib/tensor.cc @@ -19,6 +19,7 @@ limitations under the License. */ #include #include "glog/logging.h" +#include "paddle/pten/api/include/utils.h" #include "paddle/pten/api/lib/ext_compat_utils.h" #include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/api/lib/utils/storage.h" @@ -279,10 +280,12 @@ gpuStream_t Tensor::stream() const { template Tensor Tensor::copy_to(const PlaceType &target_place) const { - PADDLE_THROW(platform::errors::Unimplemented( - "The copy_to operation is not supported now, " - "and it will be implemented by calling the copy kernel later.")); - return Tensor(); + LOG(WARNING) << "The Tensor's `copy_to` method is deprecated since version " + "2.3, and will be removed in version 2.4, please use `to` " + "method instead. " + "reason: copying a Tensor to another device does not need " + "to specify the data type template argument."; + return to(ConvertExtPlaceToBackend(target_place), /*blocking=*/false); } template PD_DLL_DECL Tensor @@ -308,11 +311,8 @@ template PD_DLL_DECL Tensor Tensor::copy_to>( template PD_DLL_DECL Tensor Tensor::copy_to(const PlaceType &target_place) const; -Tensor Tensor::to(const PlaceType &target_place) const { - PADDLE_THROW(platform::errors::Unimplemented( - "The to operation is not supported now, " - "and it will be implemented by calling the copy kernel later.")); - return Tensor(); +Tensor Tensor::to(Backend backend, bool blocking) const { + return experimental::to(*this, backend, blocking); } Tensor Tensor::cast(const DataType &target_type) const { diff --git a/paddle/pten/api/lib/utils.cc b/paddle/pten/api/lib/utils.cc new file mode 100644 index 00000000000..a09e1b2c9e1 --- /dev/null +++ b/paddle/pten/api/lib/utils.cc @@ -0,0 +1,71 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/pten/api/include/utils.h" + +#include + +#include "glog/logging.h" + +#include "paddle/pten/api/include/registry.h" +#include "paddle/pten/api/lib/kernel_dispatch.h" +#include "paddle/pten/api/lib/utils/allocator.h" +#include "paddle/pten/include/core.h" +#include "paddle/pten/include/infershape.h" + +namespace paddle { +namespace experimental { + +PD_DLL_DECL Tensor to(const Tensor& x, Backend backend, bool blocking) { + // 1. Get kernel signature and kernel + auto kernel_key_set = ParseKernelKeyByInputArgs(x); + kernel_key_set.backend_set = kernel_key_set.backend_set | BackendSet(backend); + auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey(); + auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError( + "copy", kernel_key); + + VLOG(0) << "to API kernel key: " << kernel_key; + VLOG(0) << "to API kernel: " << kernel; + + // 2. Get Device Context + auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend()); + auto kernel_context = pten::KernelContext(dev_ctx); + + // 3. Auto data transform + auto dense_x = std::dynamic_pointer_cast(x.impl()); + kernel_context.EmplaceBackInput(dense_x); + kernel_context.EmplaceBackAttr(blocking); + + // 4. InferMeta + auto out_meta = UnchangedInferShape(dense_x->meta()); + + // 5. Prepare outputs + const auto allocator = + std::make_shared( + pten::TransToFluidPlace(backend)); + auto dense_out = std::make_shared(allocator, out_meta); + kernel_context.EmplaceBackOutput(dense_out); + Tensor out; + out.set_impl(dense_out); + + // 6. Call kernel + kernel(&kernel_context); + + return out; +} + +} // namespace experimental +} // namespace paddle + +PT_REGISTER_API(Utils); diff --git a/paddle/pten/kernels/cpu/manipulation.cc b/paddle/pten/kernels/cpu/manipulation.cc index cc2826c77b7..79b2c96dcac 100644 --- a/paddle/pten/kernels/cpu/manipulation.cc +++ b/paddle/pten/kernels/cpu/manipulation.cc @@ -26,7 +26,7 @@ void Flatten(const CPUContext& dev_ctx, int stop_axis, DenseTensor* out) { auto out_dims = out->dims(); - pten::Copy(dev_ctx, x, out); + pten::Copy(dev_ctx, x, false, out); out->Resize(out_dims); } @@ -51,7 +51,7 @@ void ReshapeFromVectorValImpl(const CPUContext& dev_ctx, bool set_lod) { auto out_meta = InferShapeFromVecValue(x.meta(), shape); if (&x != out) { - pten::Copy(dev_ctx, x, out); + pten::Copy(dev_ctx, x, false, out); } if (set_lod) { out->Resize(out_meta.dims, out_meta.lod); diff --git a/paddle/pten/kernels/cpu/utils.cc b/paddle/pten/kernels/cpu/utils.cc index e089eabb0e5..b462ef70c2f 100644 --- a/paddle/pten/kernels/cpu/utils.cc +++ b/paddle/pten/kernels/cpu/utils.cc @@ -19,7 +19,11 @@ limitations under the License. */ namespace pten { -void Copy(const CPUContext& dev_ctx, const DenseTensor& src, DenseTensor* dst) { +// NOTE(chenweihang): blocking is useless in cpu kernel +void Copy(const CPUContext& dev_ctx, + const DenseTensor& src, + bool blocking, + DenseTensor* dst) { auto* src_ptr = src.data(); const auto& src_place = src.place(); const auto& dst_place = dst->place(); diff --git a/paddle/pten/kernels/cpu/utils.h b/paddle/pten/kernels/cpu/utils.h index 38f601b4cf9..527346f005f 100644 --- a/paddle/pten/kernels/cpu/utils.h +++ b/paddle/pten/kernels/cpu/utils.h @@ -23,6 +23,9 @@ namespace pten { using CPUContext = paddle::platform::CPUDeviceContext; -void Copy(const CPUContext& dev_ctx, const DenseTensor& src, DenseTensor* dst); +void Copy(const CPUContext& dev_ctx, + const DenseTensor& src, + bool blocking, + DenseTensor* dst); } // namespace pten diff --git a/paddle/pten/kernels/cuda/utils.cu b/paddle/pten/kernels/cuda/utils.cu index 04cf1413cba..24da650d1f3 100644 --- a/paddle/pten/kernels/cuda/utils.cu +++ b/paddle/pten/kernels/cuda/utils.cu @@ -22,7 +22,7 @@ namespace pten { void Copy(const CUDAContext& dev_ctx, const DenseTensor& src, - bool is_sync, + bool blocking, DenseTensor* dst) { auto* src_ptr = src.data(); const auto& src_place = src.place(); @@ -97,10 +97,10 @@ void Copy(const CUDAContext& dev_ctx, src_gpu_place, ctx_gpu_place)); auto stream = - is_sync ? nullptr - : reinterpret_cast( - dev_ctx) - .stream(); + blocking ? nullptr + : reinterpret_cast( + dev_ctx) + .stream(); paddle::memory::Copy( dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream); } else if (paddle::platform::is_cpu_place(src_place) && // NOLINT @@ -125,10 +125,10 @@ void Copy(const CUDAContext& dev_ctx, dst_gpu_place, ctx_gpu_place)); auto stream = - is_sync ? nullptr - : reinterpret_cast( - dev_ctx) - .stream(); + blocking ? nullptr + : reinterpret_cast( + dev_ctx) + .stream(); paddle::memory::Copy( dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size, stream); } else if (paddle::platform::is_gpu_place(src_place) && // NOLINT @@ -155,10 +155,10 @@ void Copy(const CUDAContext& dev_ctx, src_gpu_place.device, ctx_gpu_place.device)); auto stream = - is_sync ? nullptr - : reinterpret_cast( - dev_ctx) - .stream(); + blocking ? nullptr + : reinterpret_cast( + dev_ctx) + .stream(); paddle::memory::Copy( dst_cuda_pinned_place, dst_ptr, src_gpu_place, src_ptr, size, stream); } else if (paddle::platform::is_cuda_pinned_place(src_place) && // NOLINT @@ -185,10 +185,10 @@ void Copy(const CUDAContext& dev_ctx, dst_gpu_place.device, ctx_gpu_place.device)); auto stream = - is_sync ? nullptr - : reinterpret_cast( - dev_ctx) - .stream(); + blocking ? nullptr + : reinterpret_cast( + dev_ctx) + .stream(); paddle::memory::Copy( dst_gpu_place, dst_ptr, src_cuda_pinned_place, src_ptr, size, stream); } else if (paddle::platform::is_gpu_place(src_place) && // NOLINT @@ -205,10 +205,10 @@ void Copy(const CUDAContext& dev_ctx, "Context place error, excepted GPUPlace, but actually %s.", ctx_place)); auto stream = - is_sync ? nullptr - : reinterpret_cast( - dev_ctx) - .stream(); + blocking ? nullptr + : reinterpret_cast( + dev_ctx) + .stream(); if (paddle::platform::is_same_place(src_place, dst_place)) { paddle::memory::Copy( dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream); diff --git a/paddle/pten/kernels/cuda/utils.h b/paddle/pten/kernels/cuda/utils.h index cc24628ee3a..d375c1cea23 100644 --- a/paddle/pten/kernels/cuda/utils.h +++ b/paddle/pten/kernels/cuda/utils.h @@ -28,7 +28,7 @@ using CUDAContext = paddle::platform::CUDADeviceContext; void Copy(const CUDAContext& dev_ctx, const DenseTensor& src, - bool is_sync, + bool blocking, DenseTensor* dst); } // namespace pten diff --git a/paddle/pten/kernels/xpu/manipulation.cc b/paddle/pten/kernels/xpu/manipulation.cc index e23c7b2c6d4..2a726e1cb25 100644 --- a/paddle/pten/kernels/xpu/manipulation.cc +++ b/paddle/pten/kernels/xpu/manipulation.cc @@ -26,7 +26,7 @@ void Flatten(const XPUContext& dev_ctx, int stop_axis, DenseTensor* out) { auto out_dims = out->dims(); - pten::Copy(dev_ctx, x, out); + pten::Copy(dev_ctx, x, false, out); out->Resize(out_dims); } @@ -59,7 +59,7 @@ void ReshapeFromVectorVal(const XPUContext& dev_ctx, out->Resize(out_meta.dims); return; } - pten::Copy(dev_ctx, x, out); + pten::Copy(dev_ctx, x, false, out); out->Resize(out_meta.dims); } diff --git a/paddle/pten/kernels/xpu/utils.cc b/paddle/pten/kernels/xpu/utils.cc index 9bfe493f5ff..329dc2baf87 100644 --- a/paddle/pten/kernels/xpu/utils.cc +++ b/paddle/pten/kernels/xpu/utils.cc @@ -21,6 +21,7 @@ namespace pten { void Copy(const XPUDeviceContext& dev_ctx, const DenseTensor& src, + bool blocking, DenseTensor* dst) { auto* src_ptr = src.data(); auto* dst_ptr = dst->mutable_data(); diff --git a/paddle/pten/kernels/xpu/utils.h b/paddle/pten/kernels/xpu/utils.h index c92812ed688..6e34502eb23 100644 --- a/paddle/pten/kernels/xpu/utils.h +++ b/paddle/pten/kernels/xpu/utils.h @@ -27,6 +27,7 @@ using XPUDeviceContext = paddle::platform::XPUDeviceContext; void Copy(const XPUDeviceContext& dev_ctx, const DenseTensor& src, + bool blocking, DenseTensor* dst); } // namespace pten diff --git a/paddle/pten/tests/api/CMakeLists.txt b/paddle/pten/tests/api/CMakeLists.txt index 9e688d8200b..fdff473ddbb 100644 --- a/paddle/pten/tests/api/CMakeLists.txt +++ b/paddle/pten/tests/api/CMakeLists.txt @@ -1,7 +1,7 @@ if(WITH_ROCM) - hip_test(test_pten_tensor SRCS test_pten_tensor.cc DEPS pten_tensor glog) + hip_test(test_pten_tensor SRCS test_pten_tensor.cc DEPS pten_tensor utils_api glog) else() - cc_test(test_pten_tensor SRCS test_pten_tensor.cc DEPS pten_tensor glog) + cc_test(test_pten_tensor SRCS test_pten_tensor.cc DEPS pten_tensor utils_api glog) endif() cc_test(test_pten_exception SRCS test_pten_exception.cc DEPS gtest) @@ -15,4 +15,5 @@ cc_test(test_fill_api SRCS test_fill_api.cc DEPS pten_tensor pten_api pten_api_u cc_test(test_flatten_api SRCS test_flatten_api.cc DEPS pten_tensor pten_api pten_api_utils) cc_test(test_elementwise_api SRCS test_elementwise_api.cc DEPS pten_tensor pten_api pten_api_utils) cc_test(test_reshape_api SRCS test_reshape_api.cc DEPS pten_tensor pten_api pten_api_utils) +cc_test(test_to_api SRCS test_to_api.cc DEPS pten_tensor pten_api pten_api_utils) cc_test(test_slice_api SRCS test_slice_api.cc DEPS pten_tensor pten_api pten_api_utils) diff --git a/paddle/pten/tests/api/test_to_api.cc b/paddle/pten/tests/api/test_to_api.cc new file mode 100644 index 00000000000..278735033e3 --- /dev/null +++ b/paddle/pten/tests/api/test_to_api.cc @@ -0,0 +1,116 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include + +#include "paddle/pten/api/include/utils.h" + +#include "paddle/pten/api/lib/utils/allocator.h" +#include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/core/kernel_registry.h" + +PT_DECLARE_MODULE(UtilsCPU); + +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +PT_DECLARE_MODULE(UtilsCUDA); +#endif + +namespace pten { +namespace tests { + +namespace framework = paddle::framework; +using DDim = paddle::framework::DDim; + +paddle::experimental::Tensor CreateInputTensor() { + const auto alloc = std::make_shared( + paddle::platform::CPUPlace()); + auto dense_x = std::make_shared( + alloc, + pten::DenseTensorMeta(pten::DataType::INT64, + framework::make_ddim({3, 4}), + pten::DataLayout::NCHW)); + auto* dense_x_data = dense_x->mutable_data(); + + for (int64_t i = 0; i < 12; ++i) { + dense_x_data[i] = i; + } + + return paddle::experimental::Tensor(dense_x); +} + +void CheckOutputResult(const paddle::experimental::Tensor& out) { + ASSERT_EQ(out.dims().size(), 2); + ASSERT_EQ(out.dims()[0], 3); + ASSERT_EQ(out.dims()[1], 4); + ASSERT_EQ(out.is_cpu(), true); + ASSERT_EQ(out.type(), pten::DataType::INT64); + ASSERT_EQ(out.layout(), pten::DataLayout::NCHW); + ASSERT_EQ(out.initialized(), true); + + for (int64_t i = 0; i < 12; ++i) { + ASSERT_EQ(out.data()[i], i); + } +} + +TEST(API, to) { + // 1. create tensor + auto x = CreateInputTensor(); + +// 2. test API +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + auto tmp = paddle::experimental::to(x, pten::Backend::CUDA, false); + auto out = paddle::experimental::to(tmp, pten::Backend::CPU, true); +#else + auto out = paddle::experimental::to(x, pten::Backend::CPU, false); +#endif + + // 3. check result + CheckOutputResult(out); +} + +TEST(Tensor, to) { + // 1. create tensor + auto x = CreateInputTensor(); + +// 2. test API +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + auto tmp = x.to(pten::Backend::CUDA, false); + auto out = tmp.to(pten::Backend::CPU, true); +#else + auto out = x.to(pten::Backend::CPU, false); +#endif + + // 3. check result + CheckOutputResult(out); +} + +TEST(Tensor, copy_to) { + // 1. create tensor + auto x = CreateInputTensor(); + +// 2. test API +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + auto tmp = x.copy_to(paddle::PlaceType::kGPU); + auto out = tmp.copy_to(paddle::PlaceType::kCPU); +#else + auto out = x.copy_to(paddle::PlaceType::kCPU); +#endif + + // 3. check result + CheckOutputResult(out); +} + +} // namespace tests +} // namespace pten diff --git a/paddle/pten/tests/kernels/test_copy_dev_api.cc b/paddle/pten/tests/kernels/test_copy_dev_api.cc index 322280ad560..16192be0e1c 100644 --- a/paddle/pten/tests/kernels/test_copy_dev_api.cc +++ b/paddle/pten/tests/kernels/test_copy_dev_api.cc @@ -54,7 +54,7 @@ TEST(DEV_API, copy) { // 2. test API auto& pool = paddle::platform::DeviceContextPool::Instance(); auto* dev_ctx = pool.GetByPlace(paddle::platform::CPUPlace()); - pten::Copy(*dev_ctx, *(dense_src.get()), dense_dst.get()); + pten::Copy(*dev_ctx, *(dense_src.get()), false, dense_dst.get()); // 3. check result for (int64_t i = 0; i < dense_src->numel(); i++) { -- GitLab