diff --git a/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc b/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc index a82965303af14f96a1ef6ce0168af2cce5615c32..cd81d3e482981d2ea22cd15c86de036b00e8dda5 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc @@ -21,13 +21,6 @@ #include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" -PD_DECLARE_KERNEL(copy, CPU, ALL_LAYOUT); -PD_DECLARE_KERNEL(copy_sr, CPU, ALL_LAYOUT); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -PD_DECLARE_KERNEL(copy, GPU, ALL_LAYOUT); -PD_DECLARE_KERNEL(copy_sr, GPU, ALL_LAYOUT); -#endif - namespace eager_test { using AbstractAutogradMeta = paddle::experimental::AbstractAutogradMeta; class AutogradMetaTest : public AbstractAutogradMeta { @@ -212,7 +205,8 @@ TEST(EagerVariable, Constructor) { TEST(EagerVariable, DataLayout) { paddle::experimental::Tensor tensor; phi::DenseTensorMeta meta = - phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 1, 1, 1}), + phi::DenseTensorMeta(phi::DataType::FLOAT32, + phi::make_ddim({1, 1, 1, 1}), paddle::experimental::DataLayout::UNDEFINED); std::shared_ptr dt = std::make_shared( std::make_unique( diff --git a/paddle/fluid/eager/tests/task_tests/backward_test.cc b/paddle/fluid/eager/tests/task_tests/backward_test.cc index c6d4514fa8e33b0ab772819ca4babc21676a38c4..c91ac93897cd3d64d48332f5c2ec490a928689ee 100644 --- a/paddle/fluid/eager/tests/task_tests/backward_test.cc +++ b/paddle/fluid/eager/tests/task_tests/backward_test.cc @@ -30,7 +30,6 @@ #include "paddle/phi/core/tensor_meta.h" PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT); -PD_DECLARE_KERNEL(copy, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT); namespace egr { @@ -44,9 +43,12 @@ TEST(Backward, SingleNodeEmptyGrad) { // Create Target Tensor paddle::experimental::Tensor target_tensor = - egr_utils_api::CreateTensorWithValue( - ddim, paddle::platform::CPUPlace(), phi::DataType::FLOAT32, - phi::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); + egr_utils_api::CreateTensorWithValue(ddim, + paddle::platform::CPUPlace(), + phi::DataType::FLOAT32, + phi::DataLayout::NCHW, + 1.0 /*value*/, + false /*is_leaf*/); paddle::experimental::Tensor leaf_tensor; { @@ -92,17 +94,24 @@ TEST(Backward, SingleNodeCustomGrad) { paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); // Create Target Tensor - paddle::experimental::Tensor tensor = egr_utils_api::CreateTensorWithValue( - ddim, paddle::platform::CPUPlace(), phi::DataType::FLOAT32, - phi::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); + paddle::experimental::Tensor tensor = + egr_utils_api::CreateTensorWithValue(ddim, + paddle::platform::CPUPlace(), + phi::DataType::FLOAT32, + phi::DataLayout::NCHW, + 1.0 /*value*/, + false /*is_leaf*/); target_tensors.emplace_back(std::move(tensor)); std::vector grad_tensors; // Create Grad Tensor paddle::experimental::Tensor grad_tensor = - egr_utils_api::CreateTensorWithValue( - ddim, paddle::platform::CPUPlace(), phi::DataType::FLOAT32, - phi::DataLayout::NCHW, 10.0 /*value*/, false /*is_leaf*/); + egr_utils_api::CreateTensorWithValue(ddim, + paddle::platform::CPUPlace(), + phi::DataType::FLOAT32, + phi::DataLayout::NCHW, + 10.0 /*value*/, + false /*is_leaf*/); grad_tensors.emplace_back(std::move(grad_tensor)); paddle::experimental::Tensor leaf_tensor; @@ -157,9 +166,13 @@ TEST(Backward, LinearNodes) { paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); // Create Target Tensor - paddle::experimental::Tensor tensor = egr_utils_api::CreateTensorWithValue( - ddim, paddle::platform::CPUPlace(), phi::DataType::FLOAT32, - phi::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); + paddle::experimental::Tensor tensor = + egr_utils_api::CreateTensorWithValue(ddim, + paddle::platform::CPUPlace(), + phi::DataType::FLOAT32, + phi::DataLayout::NCHW, + 1.0 /*value*/, + false /*is_leaf*/); target_tensors.emplace_back(std::move(tensor)); paddle::experimental::Tensor leaf_tensor; @@ -229,25 +242,39 @@ TEST(Backward, WithAccumulation) { // Create Target Tensor std::vector target_tensors; - paddle::experimental::Tensor tensor0 = egr_utils_api::CreateTensorWithValue( - ddim, paddle::platform::CPUPlace(), phi::DataType::FLOAT32, - phi::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); - paddle::experimental::Tensor tensor1 = egr_utils_api::CreateTensorWithValue( - ddim, paddle::platform::CPUPlace(), phi::DataType::FLOAT32, - phi::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); + paddle::experimental::Tensor tensor0 = + egr_utils_api::CreateTensorWithValue(ddim, + paddle::platform::CPUPlace(), + phi::DataType::FLOAT32, + phi::DataLayout::NCHW, + 1.0 /*value*/, + false /*is_leaf*/); + paddle::experimental::Tensor tensor1 = + egr_utils_api::CreateTensorWithValue(ddim, + paddle::platform::CPUPlace(), + phi::DataType::FLOAT32, + phi::DataLayout::NCHW, + 1.0 /*value*/, + false /*is_leaf*/); target_tensors.emplace_back(std::move(tensor0)); target_tensors.emplace_back(std::move(tensor1)); // Create Grad Tensor std::vector grad_tensors; paddle::experimental::Tensor grad_tensor0 = - egr_utils_api::CreateTensorWithValue( - ddim, paddle::platform::CPUPlace(), phi::DataType::FLOAT32, - phi::DataLayout::NCHW, 5.0 /*value*/, false /*is_leaf*/); + egr_utils_api::CreateTensorWithValue(ddim, + paddle::platform::CPUPlace(), + phi::DataType::FLOAT32, + phi::DataLayout::NCHW, + 5.0 /*value*/, + false /*is_leaf*/); paddle::experimental::Tensor grad_tensor1 = - egr_utils_api::CreateTensorWithValue( - ddim, paddle::platform::CPUPlace(), phi::DataType::FLOAT32, - phi::DataLayout::NCHW, 10.0 /*value*/, false /*is_leaf*/); + egr_utils_api::CreateTensorWithValue(ddim, + paddle::platform::CPUPlace(), + phi::DataType::FLOAT32, + phi::DataLayout::NCHW, + 10.0 /*value*/, + false /*is_leaf*/); grad_tensors.emplace_back(std::move(grad_tensor0)); grad_tensors.emplace_back(std::move(grad_tensor1)); diff --git a/paddle/fluid/eager/tests/task_tests/grad_test.cc b/paddle/fluid/eager/tests/task_tests/grad_test.cc index 8d6c4d7843fb2263dff7ce06133b01ed011b8163..30c0e92511a7c595f9390c42a70098e584c883b5 100644 --- a/paddle/fluid/eager/tests/task_tests/grad_test.cc +++ b/paddle/fluid/eager/tests/task_tests/grad_test.cc @@ -29,7 +29,6 @@ #include "paddle/phi/core/tensor_meta.h" PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT); -PD_DECLARE_KERNEL(copy, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT); namespace egr { @@ -43,15 +42,21 @@ TEST(Grad, SingleNodeEmptyGrad) { // Create Target Tensor (output) paddle::experimental::Tensor output_tensor = - egr_utils_api::CreateTensorWithValue( - ddim, paddle::platform::CPUPlace(), phi::DataType::FLOAT32, - phi::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); + egr_utils_api::CreateTensorWithValue(ddim, + paddle::platform::CPUPlace(), + phi::DataType::FLOAT32, + phi::DataLayout::NCHW, + 1.0 /*value*/, + false /*is_leaf*/); // Create input tensor const paddle::experimental::Tensor leaf_tensor = - egr_utils_api::CreateTensorWithValue( - ddim, paddle::platform::CPUPlace(), phi::DataType::FLOAT32, - phi::DataLayout::NCHW, 1.0 /*value*/, true /*is_leaf*/); + egr_utils_api::CreateTensorWithValue(ddim, + paddle::platform::CPUPlace(), + phi::DataType::FLOAT32, + phi::DataLayout::NCHW, + 1.0 /*value*/, + true /*is_leaf*/); { // Create Scale Node @@ -103,23 +108,33 @@ TEST(Grad, SingleNodeCustomGrad) { paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); // Create Target Tensor - paddle::experimental::Tensor tensor = egr_utils_api::CreateTensorWithValue( - ddim, paddle::platform::CPUPlace(), phi::DataType::FLOAT32, - phi::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); + paddle::experimental::Tensor tensor = + egr_utils_api::CreateTensorWithValue(ddim, + paddle::platform::CPUPlace(), + phi::DataType::FLOAT32, + phi::DataLayout::NCHW, + 1.0 /*value*/, + false /*is_leaf*/); target_tensors.emplace_back(std::move(tensor)); std::vector grad_tensors; // Create Grad Tensor paddle::experimental::Tensor grad_tensor = - egr_utils_api::CreateTensorWithValue( - ddim, paddle::platform::CPUPlace(), phi::DataType::FLOAT32, - phi::DataLayout::NCHW, 10.0 /*value*/, false /*is_leaf*/); + egr_utils_api::CreateTensorWithValue(ddim, + paddle::platform::CPUPlace(), + phi::DataType::FLOAT32, + phi::DataLayout::NCHW, + 10.0 /*value*/, + false /*is_leaf*/); grad_tensors.emplace_back(std::move(grad_tensor)); paddle::experimental::Tensor leaf_tensor = - egr_utils_api::CreateTensorWithValue( - ddim, paddle::platform::CPUPlace(), phi::DataType::FLOAT32, - phi::DataLayout::NCHW, 1.0 /*value*/, true /*is_leaf*/); + egr_utils_api::CreateTensorWithValue(ddim, + paddle::platform::CPUPlace(), + phi::DataType::FLOAT32, + phi::DataLayout::NCHW, + 1.0 /*value*/, + true /*is_leaf*/); { // Create Scale Node @@ -172,15 +187,22 @@ TEST(Grad, LinearNodes) { paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); // Create Target Tensor - paddle::experimental::Tensor tensor = egr_utils_api::CreateTensorWithValue( - ddim, paddle::platform::CPUPlace(), phi::DataType::FLOAT32, - phi::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); + paddle::experimental::Tensor tensor = + egr_utils_api::CreateTensorWithValue(ddim, + paddle::platform::CPUPlace(), + phi::DataType::FLOAT32, + phi::DataLayout::NCHW, + 1.0 /*value*/, + false /*is_leaf*/); target_tensors.emplace_back(std::move(tensor)); paddle::experimental::Tensor leaf_tensor = - egr_utils_api::CreateTensorWithValue( - ddim, paddle::platform::CPUPlace(), phi::DataType::FLOAT32, - phi::DataLayout::NCHW, 1.0 /*value*/, true /*is_leaf*/); + egr_utils_api::CreateTensorWithValue(ddim, + paddle::platform::CPUPlace(), + phi::DataType::FLOAT32, + phi::DataLayout::NCHW, + 1.0 /*value*/, + true /*is_leaf*/); { // Create Node0 auto node0_ptr = std::make_shared(1, 1); @@ -247,25 +269,39 @@ TEST(Grad, WithAccumulation) { // Create Target Tensor std::vector target_tensors; - paddle::experimental::Tensor tensor0 = egr_utils_api::CreateTensorWithValue( - ddim, paddle::platform::CPUPlace(), phi::DataType::FLOAT32, - phi::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); - paddle::experimental::Tensor tensor1 = egr_utils_api::CreateTensorWithValue( - ddim, paddle::platform::CPUPlace(), phi::DataType::FLOAT32, - phi::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); + paddle::experimental::Tensor tensor0 = + egr_utils_api::CreateTensorWithValue(ddim, + paddle::platform::CPUPlace(), + phi::DataType::FLOAT32, + phi::DataLayout::NCHW, + 1.0 /*value*/, + false /*is_leaf*/); + paddle::experimental::Tensor tensor1 = + egr_utils_api::CreateTensorWithValue(ddim, + paddle::platform::CPUPlace(), + phi::DataType::FLOAT32, + phi::DataLayout::NCHW, + 1.0 /*value*/, + false /*is_leaf*/); target_tensors.emplace_back(std::move(tensor0)); target_tensors.emplace_back(std::move(tensor1)); // Create Grad Tensor std::vector grad_tensors; paddle::experimental::Tensor grad_tensor0 = - egr_utils_api::CreateTensorWithValue( - ddim, paddle::platform::CPUPlace(), phi::DataType::FLOAT32, - phi::DataLayout::NCHW, 5.0 /*value*/, false /*is_leaf*/); + egr_utils_api::CreateTensorWithValue(ddim, + paddle::platform::CPUPlace(), + phi::DataType::FLOAT32, + phi::DataLayout::NCHW, + 5.0 /*value*/, + false /*is_leaf*/); paddle::experimental::Tensor grad_tensor1 = - egr_utils_api::CreateTensorWithValue( - ddim, paddle::platform::CPUPlace(), phi::DataType::FLOAT32, - phi::DataLayout::NCHW, 10.0 /*value*/, false /*is_leaf*/); + egr_utils_api::CreateTensorWithValue(ddim, + paddle::platform::CPUPlace(), + phi::DataType::FLOAT32, + phi::DataLayout::NCHW, + 10.0 /*value*/, + false /*is_leaf*/); grad_tensors.emplace_back(std::move(grad_tensor0)); grad_tensors.emplace_back(std::move(grad_tensor1)); diff --git a/paddle/fluid/jit/layer_test.cc b/paddle/fluid/jit/layer_test.cc index 881c0602920c5921241dc42132ae63edc7f83ece..ef35d254c57a1ec49d75459572e3d08a993c9faa 100644 --- a/paddle/fluid/jit/layer_test.cc +++ b/paddle/fluid/jit/layer_test.cc @@ -21,7 +21,7 @@ #include "paddle/fluid/imperative/tracer.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/fluid/jit/layer.h" diff --git a/paddle/fluid/operators/transpose_op.cu.h b/paddle/fluid/operators/transpose_op.cu.h index f9d91fec4c3f6a5ae88ae8fcac021601a1a78738..1b90ad2c313849e6ed0cff0b6ce966442a850fd4 100644 --- a/paddle/fluid/operators/transpose_op.cu.h +++ b/paddle/fluid/operators/transpose_op.cu.h @@ -20,9 +20,9 @@ limitations under the License. */ #include "paddle/fluid/platform/fast_divmod.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/autotune/auto_tune_base.h" #include "paddle/phi/kernels/autotune/cache.h" -#include "paddle/phi/kernels/copy_kernel.h" namespace paddle { namespace operators { @@ -41,7 +41,9 @@ struct GreaterThan { // Value can be decided in compile time. template -constexpr bool CheckProperTileSize(int tile_long, int tile_short, int size_T, +constexpr bool CheckProperTileSize(int tile_long, + int tile_short, + int size_T, FUN op) { return (size_T == 16 && ((tile_long == INT_32 && op(tile_short, 4)) || (tile_long == 2 * INT_32 && op(tile_short, 4)) || @@ -79,7 +81,8 @@ constexpr bool CheckNonLongTileSize(int tile_long, int tile_short, int size_T) { // Use SM to do data transfer, load a tile into SM then store out. // All tile read and write are colascing, so can speedup memory copy template -__global__ void TilingSwapDim1And2(const T* __restrict__ input, Dim3 input_dims, +__global__ void TilingSwapDim1And2(const T* __restrict__ input, + Dim3 input_dims, T* __restrict__ output) { assert(blockDim.x == NumThreads); assert(blockDim.y == 1); @@ -218,12 +221,14 @@ __global__ void TilingSwapDim1And2(const T* __restrict__ input, Dim3 input_dims, template bool SelectProperTileSize(std::vector>* tiles) { PADDLE_ENFORCE_LE( - TSIZE, 16, + TSIZE, + 16, platform::errors::InvalidArgument( "The tile size should smaller than 16, but received is:%d.", TSIZE)); PADDLE_ENFORCE_EQ( - (TSIZE & (TSIZE - 1)), 0, + (TSIZE & (TSIZE - 1)), + 0, platform::errors::InvalidArgument( "Data types should be powers of 2, but reived size is:%d.", TSIZE)); @@ -269,29 +274,37 @@ struct SystemElemType<16> { }; template -void LaunchNarrowDims2TransposeKernel(const phi::GPUContext& d, int tile_size_i, - int tile_size_j, int total_tiles_count, - const T* input, const Dim3& input_dims, +void LaunchNarrowDims2TransposeKernel(const phi::GPUContext& d, + int tile_size_i, + int tile_size_j, + int total_tiles_count, + const T* input, + const Dim3& input_dims, T* output) { constexpr int NumThreads = tile_long; if (tile_size_i <= tile_long && tile_size_j <= tile_short) { TilingSwapDim1And2 - <<>>(input, input_dims, - output); + <<>>( + input, input_dims, output); } else { TilingSwapDim1And2 - <<>>(input, input_dims, - output); + <<>>( + input, input_dims, output); } } template struct NarrowDims2TransposeDispatch { - static void DoTranspose(const phi::GPUContext& d, int tile_size_i, - int tile_size_j, int total_tiles_count, - const T* input, const Dim3& input_dims, T* output) { + static void DoTranspose(const phi::GPUContext& d, + int tile_size_i, + int tile_size_j, + int total_tiles_count, + const T* input, + const Dim3& input_dims, + T* output) { PADDLE_ENFORCE_EQ( - (tile_long & (tile_long - 1)), 0, + (tile_long & (tile_long - 1)), + 0, platform::errors::InvalidArgument( "The length of the longer side of the tile should be power of 2." " But received value is:%d.", @@ -302,7 +315,12 @@ struct NarrowDims2TransposeDispatch { if (request_satisfied) { LaunchNarrowDims2TransposeKernel( - d, tile_size_i, tile_size_j, total_tiles_count, input, input_dims, + d, + tile_size_i, + tile_size_j, + total_tiles_count, + input, + input_dims, output); return; } @@ -312,11 +330,21 @@ struct NarrowDims2TransposeDispatch { if (long_side_request_not_satisfied) { NarrowDims2TransposeDispatch::DoTranspose( - d, tile_size_i, tile_size_j, total_tiles_count, input, input_dims, + d, + tile_size_i, + tile_size_j, + total_tiles_count, + input, + input_dims, output); } else { NarrowDims2TransposeDispatch::DoTranspose( - d, tile_size_i, tile_size_j, total_tiles_count, input, input_dims, + d, + tile_size_i, + tile_size_j, + total_tiles_count, + input, + input_dims, output); } } @@ -325,14 +353,22 @@ struct NarrowDims2TransposeDispatch { // If Not long tile size, goto this function when compile. template struct NarrowDims2TransposeDispatch< - T, tile_long, tile_short, - typename std::enable_if< - CheckNonLongTileSize(tile_long, tile_short, sizeof(T)), void>::type> { - static void DoTranspose(const phi::GPUContext& d, int tile_size_i, - int tile_size_j, int total_tiles_count, - const T* input, const Dim3& input_dims, T* output) { + T, + tile_long, + tile_short, + typename std::enable_if::type> { + static void DoTranspose(const phi::GPUContext& d, + int tile_size_i, + int tile_size_j, + int total_tiles_count, + const T* input, + const Dim3& input_dims, + T* output) { PADDLE_ENFORCE_EQ( - (tile_long & (tile_long - 1)), 0, + (tile_long & (tile_long - 1)), + 0, platform::errors::InvalidArgument( "The length of the longer side of the tile should be power of 2." " But received value is:%d.", @@ -343,13 +379,23 @@ struct NarrowDims2TransposeDispatch< if (request_satisfied) { LaunchNarrowDims2TransposeKernel( - d, tile_size_i, tile_size_j, total_tiles_count, input, input_dims, + d, + tile_size_i, + tile_size_j, + total_tiles_count, + input, + input_dims, output); return; } NarrowDims2TransposeDispatch::DoTranspose( - d, tile_size_i, tile_size_j, total_tiles_count, input, input_dims, + d, + tile_size_i, + tile_size_j, + total_tiles_count, + input, + input_dims, output); } }; @@ -357,34 +403,49 @@ struct NarrowDims2TransposeDispatch< // If long tile size, goto this function when compile. template struct NarrowDims2TransposeDispatch< - T, tile_long, tile_short, + T, + tile_long, + tile_short, typename std::enable_if::type> { - static void DoTranspose(const phi::GPUContext& d, int tile_size_i, - int tile_size_j, int total_tiles_count, - const T* input, const Dim3& input_dims, T* output) { + static void DoTranspose(const phi::GPUContext& d, + int tile_size_i, + int tile_size_j, + int total_tiles_count, + const T* input, + const Dim3& input_dims, + T* output) { PADDLE_ENFORCE_EQ( - (tile_long & (tile_long - 1)), 0, + (tile_long & (tile_long - 1)), + 0, platform::errors::InvalidArgument( "The length of the longer side of the tile should be power of 2," " but received is:%d.", tile_long)); LaunchNarrowDims2TransposeKernel( - d, tile_size_i, tile_size_j, total_tiles_count, input, input_dims, + d, + tile_size_i, + tile_size_j, + total_tiles_count, + input, + input_dims, output); } }; template -void SwapDim1And2InNarrow(const phi::GPUContext& d, const T* input, - const Dim3& input_dims, T* output, +void SwapDim1And2InNarrow(const phi::GPUContext& d, + const T* input, + const Dim3& input_dims, + T* output, const int kMinTileSize) { // First get available tile sizes for the data type requested as backups std::vector> tile_sele; auto ret = SelectProperTileSize(&tile_sele); PADDLE_ENFORCE_EQ( - ret, true, + ret, + true, platform::errors::InvalidArgument( "SelectProperTileSize should return true, but return value is:%d.", ret)); @@ -451,16 +512,22 @@ void SwapDim1And2InNarrow(const phi::GPUContext& d, const T* input, using ElemType = typename SystemElemType::type; NarrowDims2TransposeDispatch::DoTranspose( - d, select_tile_size_i, select_tile_size_j, total_tiles_count, - reinterpret_cast(input), input_dims, + d, + select_tile_size_i, + select_tile_size_j, + total_tiles_count, + reinterpret_cast(input), + input_dims, reinterpret_cast(output)); } // This is for case that cannot do coalescing read and write. // Or input is too small to split into tiles. template -__global__ void TransposeSimpleKernel(int nthreads, const T* __restrict__ input, - Dim3 input_dims, T* __restrict__ output) { +__global__ void TransposeSimpleKernel(int nthreads, + const T* __restrict__ input, + Dim3 input_dims, + T* __restrict__ output) { Dim3 output_dims; output_dims[pos0] = input_dims[0]; output_dims[pos1] = input_dims[1]; @@ -482,8 +549,10 @@ __global__ void TransposeSimpleKernel(int nthreads, const T* __restrict__ input, // Here suppose convert all tensor to dim3, so just change dim1 and 2. template -void SendSwapDim1And2InTranspose(const phi::GPUContext& d, const T* input, - const Dim3& input_dims, T* output) { +void SendSwapDim1And2InTranspose(const phi::GPUContext& d, + const T* input, + const Dim3& input_dims, + T* output) { // Suppose tile size > 16 static const int kMinTileSize = 16; static const int kMinNarrowTileSize = 96; @@ -508,8 +577,8 @@ void SendSwapDim1And2InTranspose(const phi::GPUContext& d, const T* input, input_dims_aligned[0] * input_dims_aligned[1] * input_dims_aligned[2]; TilingSwapDim1And2 - <<>>(input, input_dims, - output); + <<>>( + input, input_dims, output); } else if (narrow_tile) { // If input shape is like Rect, such as 2X100, use Narrow tile size. @@ -529,8 +598,10 @@ void SendSwapDim1And2InTranspose(const phi::GPUContext& d, const T* input, template struct SwapDim1And2InTranspose { typedef phi::GPUContext Device; - void operator()(const Device& d, const T* in, - const std::vector& combined_dims, T* out) { + void operator()(const Device& d, + const T* in, + const std::vector& combined_dims, + T* out) { Dim3 input_dims = {static_cast(combined_dims[0]), static_cast(combined_dims[1]), static_cast(combined_dims[2])}; @@ -541,8 +612,10 @@ struct SwapDim1And2InTranspose { template struct SwapDim0And2InTranspose { typedef phi::GPUContext Device; - void operator()(const Device& d, const T* in, - const std::vector& combined_dims, T* out) { + void operator()(const Device& d, + const T* in, + const std::vector& combined_dims, + T* out) { Dim3 input_dims = {static_cast(combined_dims[0]), static_cast(combined_dims[1]), static_cast(combined_dims[2])}; @@ -562,11 +635,13 @@ inline void CombineTransposeDim3(const framework::DDim& shape, const std::vector& perm, std::vector* new_perm, framework::DDim* new_dims) { - PADDLE_ENFORCE_EQ(shape.size(), perm.size(), + PADDLE_ENFORCE_EQ(shape.size(), + perm.size(), platform::errors::InvalidArgument( " shape should have the save dim with perm, but" " received shape size is:%d, perm size is:%d.", - shape.size(), perm.size())); + shape.size(), + perm.size())); std::vector dim_vec; if (shape.size() == 1) { @@ -614,8 +689,10 @@ inline void CombineTransposeDim3(const framework::DDim& shape, template struct TransposeSimple { - static bool run(const phi::GPUContext& ctx, const Tensor& in, - const std::vector perm, Tensor* out) { + static bool run(const phi::GPUContext& ctx, + const Tensor& in, + const std::vector perm, + Tensor* out) { // First reduce the dimensions of the input tensor if possible. std::vector new_perm; framework::DDim new_dims; @@ -805,7 +882,8 @@ __global__ void VectorizedPermuteKernel(PermuteParams params, // A general kernel for normal case, only support vectorized write. template __global__ void GeneralPermuteKernel(PermuteParams params, - const T* __restrict__ src, T* dst, + const T* __restrict__ src, + T* dst, const size_t main_cnt, const size_t tail_cnt, const size_t offset) { @@ -859,10 +937,12 @@ __global__ void GeneralPermuteKernel(PermuteParams params, // A Gerneral permute method that drectly find the dst data // coordinate in the source data. template -inline void LaunchPermuteKernel(const phi::GPUContext& ctx, const IndexT count, +inline void LaunchPermuteKernel(const phi::GPUContext& ctx, + const IndexT count, const PermuteType perm_type, const std::vector& dims, - const std::vector& perm, const T* src, + const std::vector& perm, + const T* src, T* dst) { size_t main_count = count / VecSize; auto params = PermuteParams(dims, perm); @@ -871,15 +951,13 @@ inline void LaunchPermuteKernel(const phi::GPUContext& ctx, const IndexT count, if (perm_type == PermuteType::kNormalPermute) { size_t tail_count = count - main_count * VecSize; size_t offset = count - tail_count; - GeneralPermuteKernel< - T, IndexT, VecSize, - Rank><<>>( - params, src, dst, main_count, tail_count, offset); + GeneralPermuteKernel + <<>>( + params, src, dst, main_count, tail_count, offset); } else { - VectorizedPermuteKernel< - T, IndexT, VecSize, - Rank><<>>( - params, main_count, src, dst); + VectorizedPermuteKernel + <<>>( + params, main_count, src, dst); } } @@ -889,12 +967,13 @@ inline void LaunchPermuteRankDispatch(const phi::GPUContext& ctx, const PermuteType perm_type, const std::vector& dims, const std::vector& perm, - const T* src, T* dst) { -#define CALL_DISPATCH_RANK(rank) \ - case rank: { \ - LaunchPermuteKernel(ctx, count, perm_type, dims, \ - perm, src, dst); \ - break; \ + const T* src, + T* dst) { +#define CALL_DISPATCH_RANK(rank) \ + case rank: { \ + LaunchPermuteKernel( \ + ctx, count, perm_type, dims, perm, src, dst); \ + break; \ } switch (dims.size()) { @@ -915,7 +994,9 @@ inline void LaunchPermuteRankDispatch(const phi::GPUContext& ctx, // https://developer.nvidia.com/blog/efficient-matrix-transpose-cuda-cc/ template __global__ void BatchTransposeKernel(const T* __restrict__ src_data, - T* dst_data, IndexT rows, IndexT cols) { + T* dst_data, + IndexT rows, + IndexT cols) { using VecT = phi::AlignedVector; __shared__ VecT tile[kTileSize][kShareCol]; @@ -961,10 +1042,13 @@ __global__ void BatchTransposeKernel(const T* __restrict__ src_data, // With the byte limitation of shared_memory, the VecSize shall be restricted // for the type whose byte-size is less than 8. -template 8 ? 1 : Size)> inline void LaunchTransposeKernel(const phi::GPUContext& ctx, - const std::vector& dims, const T* src, + const std::vector& dims, + const T* src, T* dst) { auto rank = dims.size(); IndexT num_batches = (rank == 2) ? 1 : dims[0]; @@ -976,9 +1060,8 @@ inline void LaunchTransposeKernel(const phi::GPUContext& ctx, dim3 blocks(num_tile_cols, num_tile_rows, num_batches); dim3 threads(kTileSize, kBlockRows, 1); - BatchTransposeKernel<<>>( - src, dst, rows, cols); + BatchTransposeKernel + <<>>(src, dst, rows, cols); } template @@ -987,16 +1070,18 @@ inline void LaunchWithDispatchVecSize(const phi::GPUContext& ctx, const PermuteType perm_type, const std::vector& dims, const std::vector& perm, - const T* src, T* dst, IndexT count) { -#define CALL_DISPATCH_VEC_SIZE(vec_size) \ - case vec_size: { \ - if (perm_type == PermuteType::kTranspose) { \ - LaunchTransposeKernel(ctx, dims, src, dst); \ - } else { \ - LaunchPermuteRankDispatch(ctx, count, perm_type, \ - dims, perm, src, dst); \ - } \ - break; \ + const T* src, + T* dst, + IndexT count) { +#define CALL_DISPATCH_VEC_SIZE(vec_size) \ + case vec_size: { \ + if (perm_type == PermuteType::kTranspose) { \ + LaunchTransposeKernel(ctx, dims, src, dst); \ + } else { \ + LaunchPermuteRankDispatch( \ + ctx, count, perm_type, dims, perm, src, dst); \ + } \ + break; \ } switch (vec_size) { @@ -1014,45 +1099,64 @@ inline void LaunchWithDispatchVecSize(const phi::GPUContext& ctx, template inline void LaunchWithDispatchIndex(const phi::GPUContext& ctx, - const size_t count, const int vec_size, + const size_t count, + const int vec_size, const PermuteType perm_type, const std::vector& dims, - const std::vector& perm, const T* src, + const std::vector& perm, + const T* src, T* dst) { if (count < std::numeric_limits::max()) { - LaunchWithDispatchVecSize(ctx, vec_size, perm_type, dims, perm, - src, dst, + LaunchWithDispatchVecSize(ctx, + vec_size, + perm_type, + dims, + perm, + src, + dst, static_cast(count)); } else { int64_t cnt = static_cast(count); - LaunchWithDispatchVecSize(ctx, vec_size, perm_type, dims, perm, - src, dst, + LaunchWithDispatchVecSize(ctx, + vec_size, + perm_type, + dims, + perm, + src, + dst, static_cast(count)); } } template -inline void SimplifyThenLaunch(const int rank, const DeviceContext& ctx, - const Tensor& in, Tensor* out, +inline void SimplifyThenLaunch(const int rank, + const DeviceContext& ctx, + const Tensor& in, + Tensor* out, const std::vector& perm) { int sm_count = ctx.GetSMCount(); auto src_dims = phi::vectorize(in.dims()); - auto simplifier = DimsSimplifier(sm_count, rank, perm, src_dims, - in.data(), out->data()); + auto simplifier = DimsSimplifier( + sm_count, rank, perm, src_dims, in.data(), out->data()); if (simplifier.GetPermType() == PermuteType::kCopy) { // If perm is [0,1,2,3], then just operate a DtoD copy. phi::Copy(ctx, in, ctx.GetPlace(), false, out); } else { - LaunchWithDispatchIndex( - ctx, simplifier.GetCount(), simplifier.GetVecSize(), - simplifier.GetPermType(), simplifier.GetDims(), simplifier.GetPerm(), - in.data(), out->data()); + LaunchWithDispatchIndex(ctx, + simplifier.GetCount(), + simplifier.GetVecSize(), + simplifier.GetPermType(), + simplifier.GetDims(), + simplifier.GetPerm(), + in.data(), + out->data()); } } template -size_t GetTransposeKey(const int rank, const Tensor& in, +size_t GetTransposeKey(const int rank, + const Tensor& in, const std::vector& perm) { auto in_shape = phi::vectorize(in.dims()); return phi::autotune::GetKey( @@ -1060,15 +1164,19 @@ size_t GetTransposeKey(const int rank, const Tensor& in, } template -void TransposeGPUKernelDriver(const phi::GPUContext& dev_ctx, const int rank, +void TransposeGPUKernelDriver(const phi::GPUContext& dev_ctx, + const int rank, const Tensor& in, - const std::vector& perm, Tensor* out) { + const std::vector& perm, + Tensor* out) { PADDLE_ENFORCE_LT( - rank, phi::DDim::kMaxRank, + rank, + phi::DDim::kMaxRank, platform::errors::OutOfRange( "The maximum dimension rank of " "tensor is expected to be less than %d, but here is %d.", - phi::DDim::kMaxRank, rank)); + phi::DDim::kMaxRank, + rank)); auto ret = TransposeSimple::run(dev_ctx, in, perm, out); if (!ret) { diff --git a/paddle/phi/api/lib/data_transform.cc b/paddle/phi/api/lib/data_transform.cc index 4803616812cd01f76142542b8a52cd672bbfe5da..4dafc7a7ee57977bd49b22d666776dd06b1b4f8a 100644 --- a/paddle/phi/api/lib/data_transform.cc +++ b/paddle/phi/api/lib/data_transform.cc @@ -19,8 +19,8 @@ limitations under the License. */ #include "paddle/phi/api/lib/utils/allocator.h" #include "paddle/phi/backends/all_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/cast_kernel.h" -#include "paddle/phi/kernels/copy_kernel.h" #include "paddle/phi/kernels/transfer_layout_kernel.h" #include "paddle/fluid/framework/tensor_util.h" diff --git a/paddle/phi/api/lib/tensor_copy.cc b/paddle/phi/api/lib/tensor_copy.cc index fb18a3b05c77e4b3f7b12ab59418f45ceb0bdf56..5b0bb52daaea30b6506ad081f32aacfd9ad5a6a7 100644 --- a/paddle/phi/api/lib/tensor_copy.cc +++ b/paddle/phi/api/lib/tensor_copy.cc @@ -20,6 +20,7 @@ limitations under the License. */ #include "paddle/phi/core/compat/convert_utils.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/meta_tensor.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/infermeta/unary.h" namespace paddle { @@ -31,10 +32,7 @@ void copy(const Tensor& src, const Place& place, bool blocking, Tensor* dst) { kernel_key_set.backend_set | BackendSet(phi::TransToPhiBackend(place)); auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); - VLOG(6) << "copy API kernel key: " << kernel_key; - auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( - "copy", kernel_key); - VLOG(6) << "copy API kernel: " << kernel; + VLOG(6) << "start copy. "; auto target_place = phi::TransToPhiPlace(kernel_key.backend()); auto& pool = paddle::experimental::DeviceContextPool::Instance(); @@ -47,14 +45,9 @@ void copy(const Tensor& src, const Place& place, bool blocking, Tensor* dst) { phi::MetaTensor meta_out(kernel_out); phi::UnchangedInferMeta(*dense_x, &meta_out); - using kernel_signature = void (*)(const platform::DeviceContext&, - const phi::DenseTensor&, - phi::Place, - bool, - phi::DenseTensor*); + phi::Copy(*dev_ctx, *dense_x, place, blocking, kernel_out); - auto* kernel_fn = kernel.GetVariadicKernelFn(); - (*kernel_fn)(*dev_ctx, *dense_x, place, blocking, kernel_out); + VLOG(6) << "copy finished. "; } } // namespace experimental diff --git a/paddle/phi/api/lib/tensor_method.cc b/paddle/phi/api/lib/tensor_method.cc index fbeeb3332eadb9cf6319d1442fcf99746e7422d7..2ead95e11b7eb210e3d4add501e8258ca7af4763 100644 --- a/paddle/phi/api/lib/tensor_method.cc +++ b/paddle/phi/api/lib/tensor_method.cc @@ -19,9 +19,11 @@ limitations under the License. */ #include "paddle/phi/core/compat/convert_utils.h" #include "paddle/phi/core/tensor_base.h" +#include "paddle/phi/api/include/context_pool.h" #include "paddle/phi/api/include/sparse_api.h" #include "paddle/phi/api/lib/api_gen_utils.h" #include "paddle/phi/api/lib/kernel_dispatch.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/infermeta/unary.h" // clang-format off @@ -113,9 +115,15 @@ void Tensor::copy_(const Tensor &src, // Deep Copy AutoGrad info from src to self. *autograd_meta_ = *(src.autograd_meta_); } - + kernel_key_set.backend_set = + kernel_key_set.backend_set | + BackendSet(phi::TransToPhiBackend(target_place)); auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); - auto *dev_ctx = GetDeviceContextByBackend(kernel_key.backend()); + auto place = phi::TransToPhiPlace(kernel_key.backend()); + auto& pool = paddle::experimental::DeviceContextPool::Instance(); + auto* dev_ctx = pool.GetMutable( + place.GetType() == target_place.GetType() ? target_place : place); + Backend kernel_backend = Backend::UNDEFINED; DataLayout kernel_layout = DataLayout::UNDEFINED; DataType kernel_data_type = DataType::UNDEFINED; @@ -135,49 +143,29 @@ void Tensor::copy_(const Tensor &src, } if (kernel_type == KernelType::DENSE_TENSOR_KENREL) { - auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( - "copy", {kernel_backend, kernel_layout, kernel_data_type}); - VLOG(6) << "copy API kernel key: " << kernel_key; - VLOG(6) << "copy API kernel: " << kernel; - using kernel_signature = void (*)(const platform::DeviceContext &, - const phi::DenseTensor &, - phi::Place, - bool, - phi::DenseTensor *); SetKernelOutput(kernel_backend, this); phi::MetaTensor meta_out(impl_.get()); phi::UnchangedInferMeta( MakeMetaTensor( *(std::static_pointer_cast(src.impl_))), &meta_out); - auto *kernel_fn = kernel.GetVariadicKernelFn(); - (*kernel_fn)(*dev_ctx, - (*(std::static_pointer_cast(src.impl_))), - target_place, - blocking, - static_cast(impl_.get())); + phi::Copy(*dev_ctx, + (*(std::static_pointer_cast(src.impl_))), + target_place, + blocking, + static_cast(impl_.get())); } else if (kernel_type == KernelType::SELECTED_ROWS_KENREL) { - auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( - "copy_sr", {kernel_backend, kernel_layout, kernel_data_type}); - VLOG(6) << "copy API kernel key: " << kernel_key; - VLOG(6) << "copy API kernel: " << kernel; - using kernel_signature = void (*)(const platform::DeviceContext &, - const phi::SelectedRows &, - phi::Place, - bool, - phi::SelectedRows *); SetSelectedRowsKernelOutput(kernel_backend, this); phi::MetaTensor meta_out(impl_.get()); phi::UnchangedInferMeta( MakeMetaTensor( *(std::static_pointer_cast(src.impl_))), &meta_out); - auto *kernel_fn = kernel.GetVariadicKernelFn(); - (*kernel_fn)(*dev_ctx, - (*(std::static_pointer_cast(src.impl_))), - target_place, - blocking, - static_cast(impl_.get())); + phi::Copy(*dev_ctx, + (*(std::static_pointer_cast(src.impl_))), + target_place, + blocking, + static_cast(impl_.get())); } else if (kernel_type == KernelType::SPARSE_COO_KERNEL) { auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( "copy_sparse_coo", {kernel_backend, kernel_layout, kernel_data_type}); diff --git a/paddle/phi/core/CMakeLists.txt b/paddle/phi/core/CMakeLists.txt index 8b180a2c2aeff82b06cffb1d430bca9740c62d6e..d7ffa1b82f15101ccbb1692c7346e17363224a62 100644 --- a/paddle/phi/core/CMakeLists.txt +++ b/paddle/phi/core/CMakeLists.txt @@ -81,3 +81,43 @@ if(WITH_MKLDNN) add_dependencies(dense_tensor mkldnn) add_dependencies(tensor_base mkldnn) endif() + +if(WITH_GPU) + nv_library( + phi_tensor_utils + SRCS tensor_utils.cc + DEPS cpu_context + gpu_context + dense_tensor + selected_rows + malloc + memcpy + device_context) +elseif(WITH_ROCM) + hip_library( + phi_tensor_utils + SRCS tensor_utils.cc + DEPS cpu_context + gpu_context + dense_tensor + selected_rows + malloc + memcpy + device_context) +elseif(WITH_XPU_KP) + xpu_library( + phi_tensor_utils + SRCS tensor_utils.cc + DEPS cpu_context + xpu_context + dense_tensor + selected_rows + malloc + memcpy + device_context) +else() + cc_library( + phi_tensor_utils + SRCS tensor_utils.cc + DEPS cpu_context dense_tensor selected_rows malloc memcpy device_context) +endif() diff --git a/paddle/phi/kernels/gpu/copy_kernel.cu b/paddle/phi/core/tensor_utils.cc similarity index 63% rename from paddle/phi/kernels/gpu/copy_kernel.cu rename to paddle/phi/core/tensor_utils.cc index 16eff5b26e38a06950166079dc46d77c5a61a57c..f6743a0c1849b2350b67810840b5737a6387b153 100644 --- a/paddle/phi/kernels/gpu/copy_kernel.cu +++ b/paddle/phi/core/tensor_utils.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/common/data_type.h" @@ -43,30 +43,49 @@ void Copy(const Context& dev_ctx, void* dst_ptr = nullptr; if (paddle::platform::is_cpu_place(dst_place)) { dst_ptr = dev_ctx.HostAlloc(dst, src.dtype()); - } else if (paddle::platform::is_cuda_pinned_place(dst_place)) { - // now we only can use mutable_data to Alloc pinned memory here, - // dev_ctx can not alloc pinned memory now - dst_ptr = dst->mutable_data(dst_place, src.dtype()); - } else { +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + } else if (paddle::platform::is_gpu_place(dst_place) || + paddle::platform::is_cuda_pinned_place(dst_place)) { dst_ptr = dev_ctx.Alloc( dst, src.dtype(), 0, paddle::platform::is_cuda_pinned_place(dst_place)); +#endif + +#ifdef PADDLE_WITH_XPU + } else if (paddle::platform::is_xpu_place(dst_place)) { + dst_ptr = dev_ctx.Alloc(dst, src.dtype()); +#endif + } + + auto size = src.numel() * paddle::experimental::SizeOf(src.dtype()); + if (UNLIKELY(size) == 0) { + return; } + PADDLE_ENFORCE_EQ( + dst->place(), + dst_place, + phi::errors::Unavailable( + "The Dst Tensor's place and dst_place do not match, Tensor's place " + "place is %s, dst_place is %s.", + dst->place(), + dst_place)); + if (src_ptr == dst_ptr && src_place == dst_place) { VLOG(3) << "Skip copy the same data async from " << src_place << " to " << dst_place; return; } VLOG(4) << "src:" << src_ptr << ", dst:" << dst_ptr; - CHECK(dst->layout() == src.layout()); - auto size = src.numel() * paddle::experimental::SizeOf(src.dtype()); - - if ((paddle::platform::is_cpu_place(src_place) || - paddle::platform::is_cuda_pinned_place(src_place)) && // NOLINT - (paddle::platform::is_cpu_place(dst_place) || - paddle::platform::is_cuda_pinned_place(dst_place))) { + if (paddle::platform::is_cpu_place(src_place) && + paddle::platform::is_cpu_place(dst_place)) { + paddle::memory::Copy(src_place, dst_ptr, src_place, src_ptr, size); +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + } else if ((paddle::platform::is_cpu_place(src_place) || + paddle::platform::is_cuda_pinned_place(src_place)) && // NOLINT + (paddle::platform::is_cpu_place(dst_place) || + paddle::platform::is_cuda_pinned_place(dst_place))) { paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size, nullptr); } else if (paddle::platform::is_gpu_place(src_place) && // NOLINT paddle::platform::is_cpu_place(dst_place)) { @@ -176,13 +195,87 @@ void Copy(const Context& dev_ctx, : reinterpret_cast(dev_ctx).stream(); paddle::memory::Copy( dst_cuda_pinned_place, dst_ptr, src_gpu_place, src_ptr, size, stream); +#endif + } +#ifdef PADDLE_WITH_XPU + else if (paddle::platform::is_xpu_place(src_place) && // NOLINT + paddle::platform::is_cpu_place(dst_place)) { + paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); + } else if (paddle::platform::is_cpu_place(src_place) && + paddle::platform::is_xpu_place(dst_place)) { + paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); + } else if (paddle::platform::is_xpu_place(src_place) && + paddle::platform::is_xpu_place(dst_place)) { + if (src_ptr == dst_ptr) { + VLOG(3) << "Skip copy the same data async from " << src_place << " to " + << dst_place; + return; + } + paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); } else { - PADDLE_THROW(phi::errors::InvalidArgument( - "Place type error. Please check the place of src and dst Tensor.")); + PADDLE_THROW(phi::errors::Unimplemented( + "Copy from %s to %s is not supported.", src_place, dst_place)); } +#endif } -} // namespace phi +template +void Copy(const Context& dev_ctx, + const SelectedRows& src, + Place dst_place, + bool blocking, + SelectedRows* dst) { + if (src.value().Holder() != dst->value().Holder() || + src.value().data() != dst->value().data()) { + dst->set_rows(src.rows()); + dst->set_height(src.height()); + } + Copy( + dev_ctx, src.value(), dst_place, blocking, dst->mutable_value()); +} + +template void Copy(const CPUContext& dev_ctx, + const DenseTensor& src, + Place dst_place, + bool blocking, + DenseTensor* dst); + +template void Copy(const DeviceContext& dev_ctx, + const DenseTensor& src, + Place dst_place, + bool blocking, + DenseTensor* dst); + +template void Copy(const CPUContext& dev_ctx, + const SelectedRows& src, + Place dst_place, + bool blocking, + SelectedRows* dst); +template void Copy(const DeviceContext& dev_ctx, + const SelectedRows& src, + Place dst_place, + bool blocking, + SelectedRows* dst); -PD_REGISTER_GENERAL_KERNEL( - copy, GPU, ALL_LAYOUT, phi::Copy, ALL_DTYPE) {} +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +template void Copy(const GPUContext& dev_ctx, + const DenseTensor& src, + Place dst_place, + bool blocking, + DenseTensor* dst); +template void Copy(const GPUContext& dev_ctx, + const SelectedRows& src, + Place dst_place, + bool blocking, + SelectedRows* dst); +#endif + +#ifdef PADDLE_WITH_XPU +template void Copy(const XPUContext& dev_ctx, + const DenseTensor& src, + Place dst_place, + bool blocking, + DenseTensor* dst); +#endif + +} // namespace phi diff --git a/paddle/phi/core/tensor_utils.h b/paddle/phi/core/tensor_utils.h index abf8aeff4d3ab047809bad8ba902075824cf263e..1c490fd53931c7f5f168dca6a576a015a1aae99f 100644 --- a/paddle/phi/core/tensor_utils.h +++ b/paddle/phi/core/tensor_utils.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/selected_rows.h" #include "paddle/phi/core/tensor_meta.h" namespace phi { @@ -70,4 +71,18 @@ class DenseTensorUtils { } }; +template +void Copy(const Context& dev_ctx, + const DenseTensor& src, + Place dst_place, + bool blocking, + DenseTensor* dst); + +template +void Copy(const Context& dev_ctx, + const SelectedRows& src, + Place dst_place, + bool blocking, + SelectedRows* dst); + } // namespace phi diff --git a/paddle/phi/kernels/CMakeLists.txt b/paddle/phi/kernels/CMakeLists.txt index ad71823e3c019a59d75ca926765d55861b877109..1611c89667cc10857c5bc399125360fa9fbaf325 100644 --- a/paddle/phi/kernels/CMakeLists.txt +++ b/paddle/phi/kernels/CMakeLists.txt @@ -29,7 +29,8 @@ set(COMMON_KERNEL_DEPS arg_map_context convert_utils lod_utils - custom_kernel) + custom_kernel + phi_tensor_utils) set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} eigen_function diff --git a/paddle/phi/kernels/assign_kernel.cc b/paddle/phi/kernels/assign_kernel.cc index 3d8e4db08bba15349b42f1b5294c5121e97b297c..16e9bb384b5f3f78e97203c760646fe3fe7df634 100644 --- a/paddle/phi/kernels/assign_kernel.cc +++ b/paddle/phi/kernels/assign_kernel.cc @@ -16,7 +16,7 @@ #include "paddle/fluid/framework/tensor_util.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/utils/optional.h" namespace phi { diff --git a/paddle/phi/kernels/autotune/auto_tune_test.cu b/paddle/phi/kernels/autotune/auto_tune_test.cu index 8701a0572fcd82ce758f94273196f64462995e80..d80790dbf2c15ad2a24c7f5fa2fd5cbf95ab0f0a 100644 --- a/paddle/phi/kernels/autotune/auto_tune_test.cu +++ b/paddle/phi/kernels/autotune/auto_tune_test.cu @@ -20,8 +20,8 @@ #include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/tensor_meta.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/autotune/auto_tune_base.h" -#include "paddle/phi/kernels/copy_kernel.h" #include "paddle/phi/kernels/funcs/aligned_vector.h" namespace tune = phi::autotune; diff --git a/paddle/phi/kernels/copy_kernel.h b/paddle/phi/kernels/copy_kernel.h deleted file mode 100644 index 21b59d8d11b8d5806e16fc08015311454b8b7518..0000000000000000000000000000000000000000 --- a/paddle/phi/kernels/copy_kernel.h +++ /dev/null @@ -1,27 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/phi/core/dense_tensor.h" - -namespace phi { - -template -void Copy(const Context& dev_ctx, - const DenseTensor& src, - Place dst_place, - bool blocking, - DenseTensor* dst); -} // namespace phi diff --git a/paddle/phi/kernels/cpu/adam_kernel.cc b/paddle/phi/kernels/cpu/adam_kernel.cc index 339d690310f45cdc5ec8c3e52d305b45a24b1f47..03e2a539640ea6720c41f9158c76d3396d32736f 100644 --- a/paddle/phi/kernels/cpu/adam_kernel.cc +++ b/paddle/phi/kernels/cpu/adam_kernel.cc @@ -20,7 +20,7 @@ #include "paddle/fluid/operators/jit/kernels.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/adam_functors.h" DECLARE_int32(inner_op_parallelism); diff --git a/paddle/phi/kernels/cpu/copy_kernel.cc b/paddle/phi/kernels/cpu/copy_kernel.cc deleted file mode 100644 index fa11fd05bf1d656a075b996f8688d755b28cc034..0000000000000000000000000000000000000000 --- a/paddle/phi/kernels/cpu/copy_kernel.cc +++ /dev/null @@ -1,61 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/phi/kernels/copy_kernel.h" - -#include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/common/data_type.h" -#include "paddle/phi/core/compat/convert_utils.h" -#include "paddle/phi/core/kernel_registry.h" - -// See Note [ Why still include the fluid headers? ] -#include "paddle/fluid/memory/memcpy.h" - -namespace phi { - -// NOTE(chenweihang): blocking is useless in cpu kernel -template -void Copy(const Context& dev_ctx, - const DenseTensor& src, - Place dst_place, - bool blocking, - DenseTensor* dst) { - auto* src_ptr = src.data(); - const auto& src_place = src.place(); - - VLOG(3) << "TensorCopy " << src.dims() << " from " << src.place() << " to " - << src_place; - - dst->Resize(src.dims()); - auto* dst_ptr = dev_ctx.HostAlloc(dst, src.dtype()); - - if (src_ptr == dst_ptr) { - VLOG(3) << "Skip copy the same data async from " << src_place << " to " - << src_place; - return; - } - VLOG(4) << "src:" << src_ptr << ", dst:" << dst_ptr; - CHECK(dst->layout() == src.layout()); - - auto size = src.numel() * paddle::experimental::SizeOf(src.dtype()); - - if (paddle::platform::is_cpu_place(src_place)) { - paddle::memory::Copy(src_place, dst_ptr, src_place, src_ptr, size); - } -} - -} // namespace phi - -PD_REGISTER_GENERAL_KERNEL( - copy, CPU, ALL_LAYOUT, phi::Copy, ALL_DTYPE) {} diff --git a/paddle/phi/kernels/cpu/cross_entropy_grad_kernel.cc b/paddle/phi/kernels/cpu/cross_entropy_grad_kernel.cc index 021fdac225330814e6bd1d25b9f5a061b9b75207..305a9accc490f8cd7ff2db63da751554d725e95a 100644 --- a/paddle/phi/kernels/cpu/cross_entropy_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/cross_entropy_grad_kernel.cc @@ -16,8 +16,8 @@ limitations under the License. */ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/core/visit_type.h" -#include "paddle/phi/kernels/copy_kernel.h" #include "paddle/phi/kernels/funcs/axis_utils.h" #include "paddle/phi/kernels/funcs/eigen/common.h" diff --git a/paddle/phi/kernels/cpu/cross_entropy_kernel.cc b/paddle/phi/kernels/cpu/cross_entropy_kernel.cc index bd3eb3eb754c3959a831c0529a2aa6fe09c0916f..27675fa8b5a548fdc230aa358afc9015fd1a75ab 100644 --- a/paddle/phi/kernels/cpu/cross_entropy_kernel.cc +++ b/paddle/phi/kernels/cpu/cross_entropy_kernel.cc @@ -17,7 +17,7 @@ limitations under the License. */ #include "paddle/fluid/operators/math/cross_entropy.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/axis_utils.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/softmax_kernel.h" diff --git a/paddle/phi/kernels/cpu/elementwise_divide_grad_kernel.cc b/paddle/phi/kernels/cpu/elementwise_divide_grad_kernel.cc index b6541ec0e681847d2d45da5ff70fba4075562d8e..a0e2611f92cfcfeea27985ab4b2bdb708555a64c 100644 --- a/paddle/phi/kernels/cpu/elementwise_divide_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/elementwise_divide_grad_kernel.cc @@ -16,7 +16,7 @@ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/cpu/elementwise_grad.h" #include "paddle/phi/kernels/funcs/elementwise_functor.h" #include "paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h" diff --git a/paddle/phi/kernels/cpu/elementwise_grad_kernel.cc b/paddle/phi/kernels/cpu/elementwise_grad_kernel.cc index ee384cc75193c1b6fed3361ead0c635a8790068c..287d41b545515d30d610c984b5b25c1fd91f7cb6 100644 --- a/paddle/phi/kernels/cpu/elementwise_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/elementwise_grad_kernel.cc @@ -16,7 +16,7 @@ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/cpu/elementwise_grad.h" #include "paddle/phi/kernels/funcs/elementwise_functor.h" #include "paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h" diff --git a/paddle/phi/kernels/cpu/elementwise_multiply_grad_kernel.cc b/paddle/phi/kernels/cpu/elementwise_multiply_grad_kernel.cc index 6055541c805f0adef86b50e2944d821ea952dcbc..4cef9fef460be2fe500432f79ebc55fb4aadec44 100644 --- a/paddle/phi/kernels/cpu/elementwise_multiply_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/elementwise_multiply_grad_kernel.cc @@ -16,7 +16,7 @@ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/cpu/elementwise_grad.h" #include "paddle/phi/kernels/funcs/elementwise_functor.h" #include "paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h" diff --git a/paddle/phi/kernels/cpu/index_select_impl.h b/paddle/phi/kernels/cpu/index_select_impl.h index 163174580ff785910cc749711b2f917391a691ff..24b561c03366e90fe6381a8fce6e0f924f4bb21e 100644 --- a/paddle/phi/kernels/cpu/index_select_impl.h +++ b/paddle/phi/kernels/cpu/index_select_impl.h @@ -15,7 +15,7 @@ #pragma once #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/phi/kernels/cpu/kthvalue_grad_kernel.cc b/paddle/phi/kernels/cpu/kthvalue_grad_kernel.cc index de7dfd167b76d03993d10e341edc748505982d7e..386d41984b0ad8f763de8a1a1375d34e43f469dd 100644 --- a/paddle/phi/kernels/cpu/kthvalue_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/kthvalue_grad_kernel.cc @@ -16,7 +16,7 @@ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/phi/kernels/cpu/layer_norm_grad_kernel.cc b/paddle/phi/kernels/cpu/layer_norm_grad_kernel.cc index 081a32b4f245b5e988ddfe4c99a8202f870d37af..58d69cb3454e7577d391e6dcf7bcab1f4afea9be 100644 --- a/paddle/phi/kernels/cpu/layer_norm_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/layer_norm_grad_kernel.cc @@ -22,7 +22,7 @@ #endif #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/elementwise_base.h" #include "paddle/phi/kernels/funcs/elementwise_functor.h" diff --git a/paddle/phi/kernels/cpu/mode_grad_kernel.cc b/paddle/phi/kernels/cpu/mode_grad_kernel.cc index ca813c1757eacce24ecea8687b7b80bd43c5e8f9..05675cf1ab4119d0bda77104da491cba045d7e32 100644 --- a/paddle/phi/kernels/cpu/mode_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/mode_grad_kernel.cc @@ -16,7 +16,7 @@ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/mode.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/put_along_axis_grad_kernel.cc b/paddle/phi/kernels/cpu/put_along_axis_grad_kernel.cc index e94d09e0337f27df2ee228b0b10e817a6f192803..ca92fcee12144baf3dd9d416fc5465ea8a756d69 100644 --- a/paddle/phi/kernels/cpu/put_along_axis_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/put_along_axis_grad_kernel.cc @@ -19,7 +19,7 @@ #include "paddle/fluid/platform/place.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/put_along_axis_kernel.cc b/paddle/phi/kernels/cpu/put_along_axis_kernel.cc index 83c9a915ee6357c4462f64b1e193e546973560ce..a297843b0c7cddd5d07f5cba71ccc279ebb7784f 100644 --- a/paddle/phi/kernels/cpu/put_along_axis_kernel.cc +++ b/paddle/phi/kernels/cpu/put_along_axis_kernel.cc @@ -19,7 +19,7 @@ #include "paddle/fluid/platform/place.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/rnn_functor.h b/paddle/phi/kernels/cpu/rnn_functor.h index 911814647d6c03855b9268b3a6766a8be3811539..e6139b45272e9cea1f21e7678e805b83bbc854c5 100644 --- a/paddle/phi/kernels/cpu/rnn_functor.h +++ b/paddle/phi/kernels/cpu/rnn_functor.h @@ -17,7 +17,7 @@ #include "paddle/fluid/framework/generator.h" #include "paddle/fluid/operators/utils.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" diff --git a/paddle/phi/kernels/cpu/rnn_grad_kernel.cc b/paddle/phi/kernels/cpu/rnn_grad_kernel.cc index 1cd4add7d50e6da06c016475f434fd646e22effa..b4ec6652eb9757324eea60b9d7832a342436c497 100644 --- a/paddle/phi/kernels/cpu/rnn_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/rnn_grad_kernel.cc @@ -16,7 +16,7 @@ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/cpu/rnn_functor.h" #include "paddle/phi/kernels/funcs/activation_functor.h" #include "paddle/phi/kernels/funcs/blas/blas.h" diff --git a/paddle/phi/kernels/cpu/rnn_kernel.cc b/paddle/phi/kernels/cpu/rnn_kernel.cc index e2e784b2943ccd42e61634f944abb22607d3c325..c46bba8c23f3af72bcaccfe49de339548a7a52b3 100644 --- a/paddle/phi/kernels/cpu/rnn_kernel.cc +++ b/paddle/phi/kernels/cpu/rnn_kernel.cc @@ -16,7 +16,7 @@ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/cpu/rnn_functor.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/full_kernel.h" diff --git a/paddle/phi/kernels/cpu/scatter_grad_kernel.cc b/paddle/phi/kernels/cpu/scatter_grad_kernel.cc index f09015f24a136a023988b5241da4d298e6643d08..9fb1136e766e67682f0ef9eef177417dbc49e338 100644 --- a/paddle/phi/kernels/cpu/scatter_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/scatter_grad_kernel.cc @@ -16,7 +16,7 @@ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/gather.h" #include "paddle/phi/kernels/funcs/scatter.h" diff --git a/paddle/phi/kernels/cpu/scatter_kernel.cc b/paddle/phi/kernels/cpu/scatter_kernel.cc index 7032c3bb5a3357f062873c05cec40d4a8267c68f..2c3e8a2f31d0987da44bf8417a787c87d3e6ae18 100644 --- a/paddle/phi/kernels/cpu/scatter_kernel.cc +++ b/paddle/phi/kernels/cpu/scatter_kernel.cc @@ -16,7 +16,7 @@ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/scatter.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/scatter_nd_add_grad_kernel.cc b/paddle/phi/kernels/cpu/scatter_nd_add_grad_kernel.cc index 7c3665c5d2e2ef5ebef91d5a1d320ed61d7f6be3..844e6370caf733a4ee689d1a3d6eba4e0e245832 100644 --- a/paddle/phi/kernels/cpu/scatter_nd_add_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/scatter_nd_add_grad_kernel.cc @@ -16,7 +16,7 @@ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/gather.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/scatter_nd_add_kernel.cc b/paddle/phi/kernels/cpu/scatter_nd_add_kernel.cc index 31e2f4c716122dacb945c3e3871f77f344e1b6dd..dcdec2343fbbb70bfe057490a446caf548a7b504 100644 --- a/paddle/phi/kernels/cpu/scatter_nd_add_kernel.cc +++ b/paddle/phi/kernels/cpu/scatter_nd_add_kernel.cc @@ -16,7 +16,7 @@ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/scatter.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/viterbi_decode_kernel.cc b/paddle/phi/kernels/cpu/viterbi_decode_kernel.cc index c98a098aa0e6fe462caae458cc3e6372496349fa..ae6bb5ae4fc558b3ea3915f46ead2eb696ef3692 100644 --- a/paddle/phi/kernels/cpu/viterbi_decode_kernel.cc +++ b/paddle/phi/kernels/cpu/viterbi_decode_kernel.cc @@ -21,7 +21,7 @@ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/funcs/compare_functors.h" #include "paddle/phi/kernels/funcs/concat_and_split_functor.h" diff --git a/paddle/phi/kernels/flatten_grad_kernel.cc b/paddle/phi/kernels/flatten_grad_kernel.cc index 73d963f606e3f1a26125dce66b0298c213c8bdff..031f4afe98b42e93c16824a605a16087dd98bc27 100644 --- a/paddle/phi/kernels/flatten_grad_kernel.cc +++ b/paddle/phi/kernels/flatten_grad_kernel.cc @@ -16,7 +16,7 @@ #include "paddle/phi/backends/all_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" namespace phi { diff --git a/paddle/phi/kernels/flatten_kernel.cc b/paddle/phi/kernels/flatten_kernel.cc index 006d3438288c1e3c6fa02069bf2fec99ccdf6469..58ba3d70a345c364c0f3e60e6c2dc80968634e87 100644 --- a/paddle/phi/kernels/flatten_kernel.cc +++ b/paddle/phi/kernels/flatten_kernel.cc @@ -16,8 +16,8 @@ #include "paddle/phi/backends/all_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/infermeta/unary.h" -#include "paddle/phi/kernels/copy_kernel.h" #include "paddle/phi/kernels/funcs/common_shape.h" namespace phi { diff --git a/paddle/phi/kernels/funcs/mode.h b/paddle/phi/kernels/funcs/mode.h index 3bd6c19545e16c1bca2881c4804db43af2b0b227..632b0ce7e151041e71eb5b25ad19ff4f11d68a56 100644 --- a/paddle/phi/kernels/funcs/mode.h +++ b/paddle/phi/kernels/funcs/mode.h @@ -35,7 +35,7 @@ #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/phi/kernels/funcs/strided_slice.h b/paddle/phi/kernels/funcs/strided_slice.h index c39a9694e18e57ca139d12bd49168f611553ac5e..4d045bdeb596c8f55e9ca5c90000f66d7116b565 100644 --- a/paddle/phi/kernels/funcs/strided_slice.h +++ b/paddle/phi/kernels/funcs/strided_slice.h @@ -20,7 +20,7 @@ #include "paddle/phi/core/ddim.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/phi/kernels/gpu/adam_kernel.cu b/paddle/phi/kernels/gpu/adam_kernel.cu index 1322428270db5435b671c21b96f200e0806ea402..59aa4cf597e86606b759810c87eb1af11eea5c5c 100644 --- a/paddle/phi/kernels/gpu/adam_kernel.cu +++ b/paddle/phi/kernels/gpu/adam_kernel.cu @@ -24,7 +24,7 @@ #include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/adam_functors.h" #include "paddle/phi/kernels/funcs/for_range.h" diff --git a/paddle/phi/kernels/gpu/adamw_kernel.cu b/paddle/phi/kernels/gpu/adamw_kernel.cu index cead67fd39a21f664b5013cba847bf84bdf26b7e..9ce4d229f10c625337e159b731de248d6943fe8b 100644 --- a/paddle/phi/kernels/gpu/adamw_kernel.cu +++ b/paddle/phi/kernels/gpu/adamw_kernel.cu @@ -24,7 +24,7 @@ #include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/adam_functors.h" #include "paddle/phi/kernels/funcs/for_range.h" diff --git a/paddle/phi/kernels/gpu/arange_kernel.cu b/paddle/phi/kernels/gpu/arange_kernel.cu index 9ea0d7c5393c37cf51bd37be86a45c4b3432cc64..858191c44ee5d36abe2b41e4d346a776a74517a3 100644 --- a/paddle/phi/kernels/gpu/arange_kernel.cu +++ b/paddle/phi/kernels/gpu/arange_kernel.cu @@ -16,7 +16,7 @@ #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/range_function.h" namespace phi { diff --git a/paddle/phi/kernels/gpu/cross_entropy_grad_kernel.cu b/paddle/phi/kernels/gpu/cross_entropy_grad_kernel.cu index 94d91cbcbbd28aacb3c8dba1820d414967a313b4..5d40304c5e0c669d1af3b22bb9a79ecb6b34f0ef 100644 --- a/paddle/phi/kernels/gpu/cross_entropy_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/cross_entropy_grad_kernel.cu @@ -28,8 +28,8 @@ namespace cub = hipcub; #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/core/visit_type.h" -#include "paddle/phi/kernels/copy_kernel.h" #include "paddle/phi/kernels/funcs/axis_utils.h" #include "paddle/phi/kernels/funcs/for_range.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/phi/kernels/gpu/cross_entropy_kernel.cu b/paddle/phi/kernels/gpu/cross_entropy_kernel.cu index 75a4658ee7dadb6ba7cace55cad6801a00ade081..1a4559d5cd6b58c274b1e7aee565e818d8d39ef2 100644 --- a/paddle/phi/kernels/gpu/cross_entropy_kernel.cu +++ b/paddle/phi/kernels/gpu/cross_entropy_kernel.cu @@ -28,8 +28,8 @@ namespace cub = hipcub; #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/core/visit_type.h" -#include "paddle/phi/kernels/copy_kernel.h" #include "paddle/phi/kernels/funcs/axis_utils.h" #include "paddle/phi/kernels/funcs/for_range.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/phi/kernels/gpu/elementwise_add_grad_kernel.cu b/paddle/phi/kernels/gpu/elementwise_add_grad_kernel.cu index 517fbcba158b8dfb65b69c22650d49a96e9389b1..26ddb68c4b1ba63da0e5bf711332c4dc52f862f9 100644 --- a/paddle/phi/kernels/gpu/elementwise_add_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/elementwise_add_grad_kernel.cu @@ -19,7 +19,7 @@ #include "paddle/phi/common/complex.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/elementwise_functor.h" #include "paddle/phi/kernels/gpu/elementwise_grad.h" #include "paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h" diff --git a/paddle/phi/kernels/gpu/elementwise_grad.h b/paddle/phi/kernels/gpu/elementwise_grad.h index 9c1ced3c1bd11fba2c6f36dcbeb416af09b47cbf..e8f01be89737045efdef5c6cbce3a57e9f9acee1 100644 --- a/paddle/phi/kernels/gpu/elementwise_grad.h +++ b/paddle/phi/kernels/gpu/elementwise_grad.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "paddle/phi/common/place.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/broadcast_function.h" #include "paddle/phi/kernels/funcs/elementwise_grad_base.h" #include "paddle/phi/kernels/funcs/reduce_function.h" diff --git a/paddle/phi/kernels/gpu/elementwise_grad_kernel.cu b/paddle/phi/kernels/gpu/elementwise_grad_kernel.cu index 3e7430fd84eafbbea8c52384e10b79f6c028ab5a..4921cf884c4e4af17a2eaea9ce1f7cbfccd9e44b 100644 --- a/paddle/phi/kernels/gpu/elementwise_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/elementwise_grad_kernel.cu @@ -19,7 +19,7 @@ #include "paddle/phi/common/complex.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/elementwise_functor.h" #include "paddle/phi/kernels/gpu/elementwise_grad.h" #include "paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h" diff --git a/paddle/phi/kernels/gpu/elementwise_subtract_grad_kernel.cu b/paddle/phi/kernels/gpu/elementwise_subtract_grad_kernel.cu index 2edf7a132ed7d92cefa46193cda097aa9da11ac7..376b2ec8424a98c8a94db0d1b2276a07b8de55a0 100644 --- a/paddle/phi/kernels/gpu/elementwise_subtract_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/elementwise_subtract_grad_kernel.cu @@ -17,7 +17,7 @@ #include "paddle/phi/common/complex.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/elementwise_grad_kernel.h" #include "paddle/phi/kernels/funcs/elementwise_functor.h" #include "paddle/phi/kernels/gpu/elementwise_grad.h" diff --git a/paddle/phi/kernels/gpu/instance_norm_grad_kernel.cu b/paddle/phi/kernels/gpu/instance_norm_grad_kernel.cu index d7ea2340afc7d36b85217a5bb4260e931888372a..35ac4233f374e400bd63398502ea02c327899783 100644 --- a/paddle/phi/kernels/gpu/instance_norm_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/instance_norm_grad_kernel.cu @@ -17,7 +17,7 @@ #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/norm_utils.h" #include "paddle/phi/kernels/gpu/instance_norm_utils.h" diff --git a/paddle/phi/kernels/gpu/linspace_kernel.cu b/paddle/phi/kernels/gpu/linspace_kernel.cu index 66a3f833d276a9c1644f513f92af809140aa48a5..9db11381cbc150984606fe24bdcd4a61b70908d8 100644 --- a/paddle/phi/kernels/gpu/linspace_kernel.cu +++ b/paddle/phi/kernels/gpu/linspace_kernel.cu @@ -17,7 +17,7 @@ #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/math_function.h" namespace phi { diff --git a/paddle/phi/kernels/gpu/logspace_kernel.cu b/paddle/phi/kernels/gpu/logspace_kernel.cu index 95a196fe1b224110a1f76724b26005d4f6212a1c..b5e4904fdcf69a16590b180720f8df4293869586 100644 --- a/paddle/phi/kernels/gpu/logspace_kernel.cu +++ b/paddle/phi/kernels/gpu/logspace_kernel.cu @@ -16,7 +16,7 @@ #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/data_type_transform.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/phi/kernels/gpu/psroi_pool_grad_kernel.cu b/paddle/phi/kernels/gpu/psroi_pool_grad_kernel.cu index 8b58340efd5edbe4b1ba99dca3020a48caf3d1bf..6ecaaef1870a1a41e4f5dae6a210bbfa2fb07b44 100644 --- a/paddle/phi/kernels/gpu/psroi_pool_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/psroi_pool_grad_kernel.cu @@ -19,7 +19,7 @@ #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/psroi_pool_kernel.h" diff --git a/paddle/phi/kernels/gpu/psroi_pool_kernel.cu b/paddle/phi/kernels/gpu/psroi_pool_kernel.cu index e0b17a55933b704264c18ff9978d4fa9e28cb531..a8fed022f91048bbace85db439b933fed8a191ce 100644 --- a/paddle/phi/kernels/gpu/psroi_pool_kernel.cu +++ b/paddle/phi/kernels/gpu/psroi_pool_kernel.cu @@ -20,7 +20,7 @@ #include "paddle/fluid/memory/memory.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" namespace phi { diff --git a/paddle/phi/kernels/gpu/put_along_axis_grad_kernel.cu b/paddle/phi/kernels/gpu/put_along_axis_grad_kernel.cu index f553da361f1fe825a17962ed2ac5c9463b509f6b..62c93a989e518bfc85013d638023e93466c542df 100644 --- a/paddle/phi/kernels/gpu/put_along_axis_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/put_along_axis_grad_kernel.cu @@ -19,7 +19,7 @@ #include "paddle/fluid/platform/place.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" namespace phi { diff --git a/paddle/phi/kernels/gpu/put_along_axis_kernel.cu b/paddle/phi/kernels/gpu/put_along_axis_kernel.cu index d363c0c28364c065117fe53967234484871979af..b4fde608b1e7883ffc37cfaaff22aac108549790 100644 --- a/paddle/phi/kernels/gpu/put_along_axis_kernel.cu +++ b/paddle/phi/kernels/gpu/put_along_axis_kernel.cu @@ -19,7 +19,7 @@ #include "paddle/fluid/platform/place.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" namespace phi { diff --git a/paddle/phi/kernels/gpu/scatter_grad_kernel.cu b/paddle/phi/kernels/gpu/scatter_grad_kernel.cu index 7f93fd0a9058c88127ab6443417b2cd3f8ef1fec..1750ad2a3ae5d6a4816751935ef60fc1c78720bf 100644 --- a/paddle/phi/kernels/gpu/scatter_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/scatter_grad_kernel.cu @@ -16,7 +16,7 @@ #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/gather.cu.h" #include "paddle/phi/kernels/funcs/scatter.cu.h" diff --git a/paddle/phi/kernels/gpu/scatter_kernel.cu b/paddle/phi/kernels/gpu/scatter_kernel.cu index af8919bec41cedba82c146924db18468ff2c4997..a088754381d457dc3498927c5a615c33698ec34a 100644 --- a/paddle/phi/kernels/gpu/scatter_kernel.cu +++ b/paddle/phi/kernels/gpu/scatter_kernel.cu @@ -16,7 +16,7 @@ #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/scatter.cu.h" namespace phi { diff --git a/paddle/phi/kernels/gpu/scatter_nd_add_grad_kernel.cu b/paddle/phi/kernels/gpu/scatter_nd_add_grad_kernel.cu index 66b373f3b2891e5f61610ff61bc4d4e5ff9647ea..135c683bedb5a556cec84a75a42055855e22e780 100644 --- a/paddle/phi/kernels/gpu/scatter_nd_add_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/scatter_nd_add_grad_kernel.cu @@ -16,7 +16,7 @@ #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/gather.cu.h" namespace phi { diff --git a/paddle/phi/kernels/gpu/scatter_nd_add_kernel.cu b/paddle/phi/kernels/gpu/scatter_nd_add_kernel.cu index a7b8bebd38ce261a543651b3512e47569aa6366d..563b8868ad38c7e6821299bff2b0c7b802b1ab45 100644 --- a/paddle/phi/kernels/gpu/scatter_nd_add_kernel.cu +++ b/paddle/phi/kernels/gpu/scatter_nd_add_kernel.cu @@ -16,7 +16,7 @@ #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/scatter.cu.h" namespace phi { diff --git a/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits.h b/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits.h index c300b6d3f3daaec0bfd5891d1e968ad92063f4f9..84a24449b3a1c60b3dec5ae962214b1228438d7a 100644 --- a/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits.h +++ b/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits.h @@ -22,7 +22,7 @@ #include "paddle/phi/backends/gpu/gpu_helper.h" #include "paddle/phi/core/hostdevice.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/elementwise_base.h" #include "paddle/phi/kernels/gpu/reduce.h" diff --git a/paddle/phi/kernels/gpu/top_k_kernel.cu b/paddle/phi/kernels/gpu/top_k_kernel.cu index e5038e0f3be7099d0d0ccbd6a225e12e6289769f..e0b7bba50d6e9bc3200b69e176f58062289dbd37 100644 --- a/paddle/phi/kernels/gpu/top_k_kernel.cu +++ b/paddle/phi/kernels/gpu/top_k_kernel.cu @@ -17,7 +17,7 @@ #include "paddle/fluid/operators/top_k_function_cuda.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/gather.cu.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/phi/kernels/gpu/unique_kernel.cu b/paddle/phi/kernels/gpu/unique_kernel.cu index 2f24a44c232035df5827bdeebab1e43d1acdc507..3d44c9af03c07c33593ef583590ce03bcd8cdfbc 100644 --- a/paddle/phi/kernels/gpu/unique_kernel.cu +++ b/paddle/phi/kernels/gpu/unique_kernel.cu @@ -28,7 +28,7 @@ #include "paddle/fluid/framework/tensor_util.h" // TensorToVector() #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/unique_functor.h" namespace phi { diff --git a/paddle/phi/kernels/gpu/viterbi_decode_kernel.cu b/paddle/phi/kernels/gpu/viterbi_decode_kernel.cu index dc04c69ec70d5a3a6d31a9e03cd91e8237544a1f..224651326d7626a088e76d092349bf917727a97a 100644 --- a/paddle/phi/kernels/gpu/viterbi_decode_kernel.cu +++ b/paddle/phi/kernels/gpu/viterbi_decode_kernel.cu @@ -33,7 +33,7 @@ namespace cub = hipcub; #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/funcs/compare_functors.h" #include "paddle/phi/kernels/funcs/concat_and_split_functor.h" diff --git a/paddle/phi/kernels/impl/cholesky_solve_grad_kernel_impl.h b/paddle/phi/kernels/impl/cholesky_solve_grad_kernel_impl.h index f68a3e596299f252d10754ebb8b8c11da8b8dd19..22bb4973ea44675175c34185feaf096a124f07c6 100644 --- a/paddle/phi/kernels/impl/cholesky_solve_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/cholesky_solve_grad_kernel_impl.h @@ -17,7 +17,6 @@ #include "paddle/phi/kernels/cholesky_solve_grad_kernel.h" #include "paddle/phi/kernels/cholesky_solve_kernel.h" #include "paddle/phi/kernels/complex_kernel.h" -#include "paddle/phi/kernels/copy_kernel.h" #include "paddle/phi/kernels/elementwise_add_kernel.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/expand_kernel.h" diff --git a/paddle/phi/kernels/impl/cholesky_solve_kernel_impl.h b/paddle/phi/kernels/impl/cholesky_solve_kernel_impl.h index 1cc8acc21f352638d59717997b8aa4f56cf656e5..562ff25317ec9b523de5a3ce8964dd36672c52de 100644 --- a/paddle/phi/kernels/impl/cholesky_solve_kernel_impl.h +++ b/paddle/phi/kernels/impl/cholesky_solve_kernel_impl.h @@ -16,7 +16,6 @@ #include "paddle/phi/kernels/cholesky_solve_kernel.h" #include "paddle/phi/kernels/complex_kernel.h" -#include "paddle/phi/kernels/copy_kernel.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/expand_kernel.h" #include "paddle/phi/kernels/funcs/common_shape.h" diff --git a/paddle/phi/kernels/impl/determinant_grad_kernel_impl.h b/paddle/phi/kernels/impl/determinant_grad_kernel_impl.h index d9c3333fc24cbe98104c2d01ff8dd162273657b6..248305b7fc0c9415330ef884f737db876895f7e4 100644 --- a/paddle/phi/kernels/impl/determinant_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/determinant_grad_kernel_impl.h @@ -14,7 +14,7 @@ #pragma once -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/determinant_grad_kernel.h" #include "paddle/phi/kernels/elementwise_multiply_kernel.h" #include "paddle/phi/kernels/empty_kernel.h" diff --git a/paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h b/paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h index 4b4a75727a55cfb8d998b8640d63d0b7254e05f2..da74280b2674dcc91df488b18e21e4dc16b51cb6 100644 --- a/paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h @@ -16,7 +16,7 @@ limitations under the License. */ #include "paddle/phi/common/complex.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/broadcast_function.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/elementwise_functor.h" diff --git a/paddle/phi/kernels/impl/expand_as_grad_kernel_impl.h b/paddle/phi/kernels/impl/expand_as_grad_kernel_impl.h index 6ef282d470333e8099b668e5dd7d2e4c68beff3e..998c54e77fe01300a2befe9ac70f1311929bada3 100644 --- a/paddle/phi/kernels/impl/expand_as_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/expand_as_grad_kernel_impl.h @@ -14,7 +14,7 @@ #pragma once -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/impl/expand_as_kernel_impl.h" namespace phi { diff --git a/paddle/phi/kernels/impl/expand_grad_kernel_impl.h b/paddle/phi/kernels/impl/expand_grad_kernel_impl.h index a4fc7157eeaf8196bbae0fb3d849e3fe4c775f66..31cb87da25f65517ba8b83ca4d5f7782d895cbf7 100644 --- a/paddle/phi/kernels/impl/expand_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/expand_grad_kernel_impl.h @@ -14,7 +14,7 @@ #pragma once -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" #include "paddle/phi/kernels/impl/expand_kernel_impl.h" diff --git a/paddle/phi/kernels/impl/meshgrid_kernel_impl.h b/paddle/phi/kernels/impl/meshgrid_kernel_impl.h index e5e7f785b8127ca5473c69d856848885533ff8be..e66632498f67029a8abda3ea499573f32272fc19 100644 --- a/paddle/phi/kernels/impl/meshgrid_kernel_impl.h +++ b/paddle/phi/kernels/impl/meshgrid_kernel_impl.h @@ -16,7 +16,7 @@ #include "paddle/fluid/framework/tensor_util.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" #include "paddle/phi/kernels/meshgrid_kernel.h" diff --git a/paddle/phi/kernels/impl/set_value_grad_kernel_impl.h b/paddle/phi/kernels/impl/set_value_grad_kernel_impl.h index 40543645b01d1635de7e0c7a67fd106eef2a561c..de930734be6122245073c8f283281dda284b45ad 100644 --- a/paddle/phi/kernels/impl/set_value_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/set_value_grad_kernel_impl.h @@ -16,7 +16,7 @@ #include "paddle/phi/common/int_array.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/full_kernel.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" diff --git a/paddle/phi/kernels/impl/set_value_kernel_impl.h b/paddle/phi/kernels/impl/set_value_kernel_impl.h index 4859a7348e5be05a5dade37c62fdb22a59e0037b..a0f594e9d5859f7e8a68efe34c06d3111fec0441 100644 --- a/paddle/phi/kernels/impl/set_value_kernel_impl.h +++ b/paddle/phi/kernels/impl/set_value_kernel_impl.h @@ -17,7 +17,7 @@ #include "paddle/phi/common/int_array.h" #include "paddle/phi/common/scalar.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/funcs/broadcast_function.h" #include "paddle/phi/kernels/funcs/eigen/common.h" diff --git a/paddle/phi/kernels/impl/size_kernel_impl.h b/paddle/phi/kernels/impl/size_kernel_impl.h index 7b781dba3ad2365de3c0f6ba52a746243300e573..f9757bc4477569247c1aac11fd6523cca8945951 100644 --- a/paddle/phi/kernels/impl/size_kernel_impl.h +++ b/paddle/phi/kernels/impl/size_kernel_impl.h @@ -14,7 +14,7 @@ #pragma once -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" namespace phi { diff --git a/paddle/phi/kernels/impl/squeeze_grad_kernel_impl.h b/paddle/phi/kernels/impl/squeeze_grad_kernel_impl.h index c74aa5c7243f3ccd24c2e44042ea88826637b6a5..1e3dfd66ece805deefa9570d1f6c1e5600bd7a10 100644 --- a/paddle/phi/kernels/impl/squeeze_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/squeeze_grad_kernel_impl.h @@ -14,7 +14,7 @@ #pragma once #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" namespace phi { template diff --git a/paddle/phi/kernels/impl/squeeze_kernel_impl.h b/paddle/phi/kernels/impl/squeeze_kernel_impl.h index bb1627d40925fcb229d3cb0ba1f54eb1b7d10010..b4c94d619cc2aa5a509def783fa756900e83e51d 100644 --- a/paddle/phi/kernels/impl/squeeze_kernel_impl.h +++ b/paddle/phi/kernels/impl/squeeze_kernel_impl.h @@ -14,7 +14,7 @@ #pragma once #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/unsqueeze.h" namespace phi { diff --git a/paddle/phi/kernels/impl/tile_grad_kernel_impl.h b/paddle/phi/kernels/impl/tile_grad_kernel_impl.h index 9e56e50534d19bec29b6ee9fdc05ae4a1210dd41..05f9139b1485b637f4ebc1d3368411e8120bf92e 100644 --- a/paddle/phi/kernels/impl/tile_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/tile_grad_kernel_impl.h @@ -16,7 +16,7 @@ #include #include -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" #include "paddle/phi/kernels/tile_grad_kernel.h" diff --git a/paddle/phi/kernels/impl/triangular_solve_grad_kernel_impl.h b/paddle/phi/kernels/impl/triangular_solve_grad_kernel_impl.h index 3ea75b036a5a203b30a10ea2b7113067151d9ae3..8faca812a0218bf2bd92592597f0ad4d7bf92ca7 100644 --- a/paddle/phi/kernels/impl/triangular_solve_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/triangular_solve_grad_kernel_impl.h @@ -14,7 +14,7 @@ #pragma once -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/common_shape.h" diff --git a/paddle/phi/kernels/impl/unsqueeze_grad_kernel_impl.h b/paddle/phi/kernels/impl/unsqueeze_grad_kernel_impl.h index 54b332ea4c898d10b63037375805d07f04ab2e63..ff45ec49b7c5d002eca94b187d719fa86c386cf9 100644 --- a/paddle/phi/kernels/impl/unsqueeze_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/unsqueeze_grad_kernel_impl.h @@ -14,7 +14,7 @@ #pragma once #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" namespace phi { template diff --git a/paddle/phi/kernels/impl/unsqueeze_kernel_impl.h b/paddle/phi/kernels/impl/unsqueeze_kernel_impl.h index 02110d631fb4def572d8c49c662d4df77d9c233f..4f81fa6c423414170cf8ee0db92e1d10e72da50d 100644 --- a/paddle/phi/kernels/impl/unsqueeze_kernel_impl.h +++ b/paddle/phi/kernels/impl/unsqueeze_kernel_impl.h @@ -14,7 +14,7 @@ #pragma once #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/unsqueeze.h" namespace phi { diff --git a/paddle/phi/kernels/impl/warpctc_kernel_impl.h b/paddle/phi/kernels/impl/warpctc_kernel_impl.h index 6c792507c6f778ac09c94c74e5b068317ee695e2..c8f8d28ce11ed3bbda5cd96b6cd8aff6fdf2f0af 100644 --- a/paddle/phi/kernels/impl/warpctc_kernel_impl.h +++ b/paddle/phi/kernels/impl/warpctc_kernel_impl.h @@ -20,7 +20,7 @@ #include "paddle/fluid/operators/math/sequence_scale.h" #include "paddle/phi/backends/dynload/warpctc.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/utils/optional.h" diff --git a/paddle/phi/kernels/reshape_grad_kernel.cc b/paddle/phi/kernels/reshape_grad_kernel.cc index 35f85ba86aa34e1cc7e98a9296b319d660294d48..c4b92c4f760a26089dc7a92462266200a0bbf8ca 100644 --- a/paddle/phi/kernels/reshape_grad_kernel.cc +++ b/paddle/phi/kernels/reshape_grad_kernel.cc @@ -16,7 +16,7 @@ #include "paddle/phi/backends/all_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" namespace phi { diff --git a/paddle/phi/kernels/reshape_kernel.cc b/paddle/phi/kernels/reshape_kernel.cc index a723ea19d3456aa66d9d73e594ad22a590b6a4b2..632a63c9ab7ffda212c685885961fa2a924b9455 100644 --- a/paddle/phi/kernels/reshape_kernel.cc +++ b/paddle/phi/kernels/reshape_kernel.cc @@ -16,8 +16,8 @@ #include "paddle/phi/backends/all_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/infermeta/unary.h" -#include "paddle/phi/kernels/copy_kernel.h" #include "paddle/phi/kernels/funcs/common_shape.h" namespace phi { diff --git a/paddle/phi/kernels/reverse_kernel.cc b/paddle/phi/kernels/reverse_kernel.cc index c6c2781a07bf6af06707c6fe4bcc884b9454c8c4..d89e68e7389fd2e6f70dba3f72fa15ceb2b6d20c 100644 --- a/paddle/phi/kernels/reverse_kernel.cc +++ b/paddle/phi/kernels/reverse_kernel.cc @@ -16,7 +16,7 @@ #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" namespace phi { diff --git a/paddle/phi/kernels/selected_rows/copy_kernel.cc b/paddle/phi/kernels/selected_rows/copy_kernel.cc deleted file mode 100644 index cf71ab0583f6120e7bf10f26f00024b27a56ca79..0000000000000000000000000000000000000000 --- a/paddle/phi/kernels/selected_rows/copy_kernel.cc +++ /dev/null @@ -1,49 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/phi/kernels/selected_rows/copy_kernel.h" - -#include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/backends/gpu/gpu_context.h" -#include "paddle/phi/common/bfloat16.h" -#include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" -namespace phi { -namespace sr { - -template -void Copy(const Context& dev_ctx, - const SelectedRows& src, - Place dst_place, - bool blocking, - SelectedRows* dst) { - if (src.value().Holder() != dst->value().Holder() || - src.value().data() != dst->value().data()) { - dst->set_rows(src.rows()); - dst->set_height(src.height()); - } - phi::Copy( - dev_ctx, src.value(), dst_place, blocking, dst->mutable_value()); -} - -} // namespace sr -} // namespace phi - -PD_REGISTER_GENERAL_KERNEL( - copy_sr, CPU, ALL_LAYOUT, phi::sr::Copy, ALL_DTYPE) {} - -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -PD_REGISTER_GENERAL_KERNEL( - copy_sr, GPU, ALL_LAYOUT, phi::sr::Copy, ALL_DTYPE) {} -#endif diff --git a/paddle/phi/kernels/selected_rows/copy_kernel.h b/paddle/phi/kernels/selected_rows/copy_kernel.h deleted file mode 100644 index 4aa848bea2a717ffcda4dff562ec56a702b7dbc5..0000000000000000000000000000000000000000 --- a/paddle/phi/kernels/selected_rows/copy_kernel.h +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "paddle/phi/core/selected_rows.h" -#include "paddle/phi/core/sparse_csr_tensor.h" - -namespace phi { -namespace sr { - -template -void Copy(const Context& dev_ctx, - const SelectedRows& src, - Place dst_place, - bool blocking, - SelectedRows* dst); - -} // namespace sr -} // namespace phi diff --git a/paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc b/paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc index d96c707538e41fbd5f20950d31ddfe9283bf514d..ba5d6feb48f58375ca166d3541bafbf0d3b4082e 100644 --- a/paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc +++ b/paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc @@ -19,7 +19,7 @@ #include "paddle/fluid/operators/math/selected_rows_functor.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/adam_functors.h" namespace phi { diff --git a/paddle/phi/kernels/selected_rows/gpu/adam_kernel.cu b/paddle/phi/kernels/selected_rows/gpu/adam_kernel.cu index 842e05fe58e683bab89f145a17f6efdd43aca7a0..9aecbb8e99cd81d4ad6612aa0dc02d405bb98c35 100644 --- a/paddle/phi/kernels/selected_rows/gpu/adam_kernel.cu +++ b/paddle/phi/kernels/selected_rows/gpu/adam_kernel.cu @@ -20,7 +20,7 @@ #include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/adam_functors.h" #include "paddle/phi/kernels/funcs/for_range.h" diff --git a/paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu b/paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu index 6e0123d2fcaf96f38e0db29179d0a8ced7f7d71b..e04784c2620d5d6367bca649f9b3445edc09a609 100644 --- a/paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu +++ b/paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu @@ -24,7 +24,7 @@ #include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/adam_functors.h" #include "paddle/phi/kernels/funcs/for_range.h" diff --git a/paddle/phi/kernels/sparse/copy_kernel.cc b/paddle/phi/kernels/sparse/copy_kernel.cc index 705c19e020c84f7fff04ebef0129b1928047ec98..76726f0ffcce02ad8b9f1e425e9f8a3cbddaf980 100644 --- a/paddle/phi/kernels/sparse/copy_kernel.cc +++ b/paddle/phi/kernels/sparse/copy_kernel.cc @@ -19,7 +19,7 @@ limitations under the License. */ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/sparse_coo_tensor.h" #include "paddle/phi/core/sparse_csr_tensor.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" namespace phi { namespace sparse { diff --git a/paddle/phi/kernels/sparse/cpu/convolution_grad_kernel.cc b/paddle/phi/kernels/sparse/cpu/convolution_grad_kernel.cc index 5a981fb8df350c300f9410220db361a3e9e02296..a675853ac47c1d987469e4b78f3028706bf16f88 100644 --- a/paddle/phi/kernels/sparse/cpu/convolution_grad_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/convolution_grad_kernel.cc @@ -15,7 +15,6 @@ limitations under the License. */ #include "paddle/phi/kernels/sparse/convolution_grad_kernel.h" #include "paddle/phi/core/visit_type.h" -#include "paddle/phi/kernels/copy_kernel.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/sparse/cpu/convolution.h" diff --git a/paddle/phi/kernels/sparse/cpu/full_kernel.cc b/paddle/phi/kernels/sparse/cpu/full_kernel.cc index 3c8be16626202f8f33fe4460c50886ca36965ca5..b848751deb9c83aea0f4f2864c4f1f93d70cf5d4 100644 --- a/paddle/phi/kernels/sparse/cpu/full_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/full_kernel.cc @@ -17,7 +17,7 @@ limitations under the License. */ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" diff --git a/paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc b/paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc index 37579ae85640ddda33a13255030c296c8f9a53cd..cf2acd85573331adc954acb852c6d31890e80238 100644 --- a/paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/sparse_mask_kernel.cc @@ -18,8 +18,8 @@ limitations under the License. */ #include "paddle/phi/core/ddim.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/core/visit_type.h" -#include "paddle/phi/kernels/copy_kernel.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/sparse/flatten_indices.h" diff --git a/paddle/phi/kernels/sparse/cpu/sparse_pool_grad_kernel.cc b/paddle/phi/kernels/sparse/cpu/sparse_pool_grad_kernel.cc index fdf8e5aa7ebf2e80edbb36f62be8d76cef8c244d..64c843c07a6ef4a6198fe6c38af66faa2bbb48bf 100644 --- a/paddle/phi/kernels/sparse/cpu/sparse_pool_grad_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/sparse_pool_grad_kernel.cc @@ -15,8 +15,8 @@ limitations under the License. */ #include "paddle/phi/kernels/sparse/sparse_pool_grad_kernel.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/core/visit_type.h" -#include "paddle/phi/kernels/copy_kernel.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/funcs/pooling.h" #include "paddle/phi/kernels/funcs/sparse/convolution.h" diff --git a/paddle/phi/kernels/sparse/empty_kernel.cc b/paddle/phi/kernels/sparse/empty_kernel.cc index 4b7a5fe615af5093ce2f0682477967e675914a85..2d04f9352147b22df342b975a5bc276381d66a83 100644 --- a/paddle/phi/kernels/sparse/empty_kernel.cc +++ b/paddle/phi/kernels/sparse/empty_kernel.cc @@ -17,7 +17,7 @@ limitations under the License. */ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" namespace phi { namespace sparse { diff --git a/paddle/phi/kernels/sparse/gpu/convolution.cu.h b/paddle/phi/kernels/sparse/gpu/convolution.cu.h index 24a7387d4fe1972786c9dfdf5b15ebc37b989901..d56575cddbfe263f6cc11b07ffa46551001943c2 100644 --- a/paddle/phi/kernels/sparse/gpu/convolution.cu.h +++ b/paddle/phi/kernels/sparse/gpu/convolution.cu.h @@ -23,7 +23,7 @@ limitations under the License. */ #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/index_impl.cu.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/sparse/utils.cu.h" diff --git a/paddle/phi/kernels/sparse/gpu/convolution_grad_kernel.cu b/paddle/phi/kernels/sparse/gpu/convolution_grad_kernel.cu index 805c417b8dbc1fc70cfb3965212a65edeb2b5c32..1f82f2ff93e9658d0f0877c8c4fd915e129e7853 100644 --- a/paddle/phi/kernels/sparse/gpu/convolution_grad_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/convolution_grad_kernel.cu @@ -20,8 +20,8 @@ limitations under the License. */ #include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_meta.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/core/visit_type.h" -#include "paddle/phi/kernels/copy_kernel.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/scatter.cu.h" diff --git a/paddle/phi/kernels/sparse/gpu/full_kernel.cu b/paddle/phi/kernels/sparse/gpu/full_kernel.cu index 500217d6edc502aa53b6b6f71700185b06390eea..a3dc5a9534bbbbda5a85618a6d57b1e43b058305 100644 --- a/paddle/phi/kernels/sparse/gpu/full_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/full_kernel.cu @@ -17,7 +17,7 @@ limitations under the License. */ #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/elementwise_base.h" namespace phi { diff --git a/paddle/phi/kernels/sparse/gpu/matmul_grad_kernel.cu b/paddle/phi/kernels/sparse/gpu/matmul_grad_kernel.cu index 8bc162eaae25348582504954ab5393c075fd519a..d5c128fea6f2949a84bd9b1e9e104eca82197ee5 100644 --- a/paddle/phi/kernels/sparse/gpu/matmul_grad_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/matmul_grad_kernel.cu @@ -18,7 +18,7 @@ limitations under the License. */ #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/funcs/sparse/sparse_blas.h" #include "paddle/phi/kernels/sparse/empty_kernel.h" diff --git a/paddle/phi/kernels/sparse/gpu/matmul_kernel.cu b/paddle/phi/kernels/sparse/gpu/matmul_kernel.cu index df5a4b57520d65a1e0fdc6632fbc0d7bf9309ecc..9357bbd2ad08386ab42a7375829ea3bf22eba4f3 100644 --- a/paddle/phi/kernels/sparse/gpu/matmul_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/matmul_kernel.cu @@ -23,7 +23,7 @@ limitations under the License. */ #include "paddle/phi/core/meta_tensor.h" #include "paddle/phi/core/sparse_coo_tensor.h" #include "paddle/phi/core/sparse_csr_tensor.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/funcs/sparse/sparse_blas.h" #include "paddle/phi/kernels/sparse/empty_kernel.h" diff --git a/paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu b/paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu index 2153d9dfe6853b3971f4697ce27c60f59aeb2d11..21d6850bdc4aa8b41a3e9e289777e5d260300cf2 100644 --- a/paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/sparse_mask_kernel.cu @@ -21,8 +21,8 @@ limitations under the License. */ #include "paddle/phi/core/ddim.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/core/visit_type.h" -#include "paddle/phi/kernels/copy_kernel.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/sparse/flatten_indices.cu.h" diff --git a/paddle/phi/kernels/sparse/gpu/sparse_pool_grad_kernel.cu b/paddle/phi/kernels/sparse/gpu/sparse_pool_grad_kernel.cu index 669ecb017dc02e3b1352f7ac250981adb4c2b737..5fe6e68c1e83f978ada43a6db697006cdd5bc6b9 100644 --- a/paddle/phi/kernels/sparse/gpu/sparse_pool_grad_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/sparse_pool_grad_kernel.cu @@ -18,8 +18,8 @@ limitations under the License. */ #include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/core/visit_type.h" -#include "paddle/phi/kernels/copy_kernel.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/pooling.h" diff --git a/paddle/phi/kernels/sparse/unary_grad_kernel.cc b/paddle/phi/kernels/sparse/unary_grad_kernel.cc index 1fd3ef271129903370fd76942e67d28d91d62806..cd844532e938f08a0b066cac11fc534b02fb1460 100644 --- a/paddle/phi/kernels/sparse/unary_grad_kernel.cc +++ b/paddle/phi/kernels/sparse/unary_grad_kernel.cc @@ -19,8 +19,8 @@ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/sparse_coo_tensor.h" #include "paddle/phi/core/sparse_csr_tensor.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/activation_grad_kernel.h" -#include "paddle/phi/kernels/copy_kernel.h" #include "paddle/phi/kernels/empty_kernel.h" #define DEFINE_SPARSE_UNARY_GRAD_KERNEL(DenseKernelFunc) \ diff --git a/paddle/phi/kernels/sparse/unary_kernel.cc b/paddle/phi/kernels/sparse/unary_kernel.cc index e02d7757664faa6c0bd920eb1c8bcdcee92113e3..2999536b34ee96bbb120722dbb929eb168734b2a 100644 --- a/paddle/phi/kernels/sparse/unary_kernel.cc +++ b/paddle/phi/kernels/sparse/unary_kernel.cc @@ -19,8 +19,8 @@ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/sparse_coo_tensor.h" #include "paddle/phi/core/sparse_csr_tensor.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/activation_kernel.h" -#include "paddle/phi/kernels/copy_kernel.h" #include "paddle/phi/kernels/empty_kernel.h" #define DEFINE_SPARSE_UNARY_KERNEL(DenseKernelFunc) \ diff --git a/paddle/phi/kernels/strings/gpu/strings_copy_kernel.cu b/paddle/phi/kernels/strings/gpu/strings_copy_kernel.cu index c49b41e0d3f29b69beb4b93c2d1c2095edc1f37b..fb9d32264b00f67398f171f54632404c32f8cf5b 100644 --- a/paddle/phi/kernels/strings/gpu/strings_copy_kernel.cu +++ b/paddle/phi/kernels/strings/gpu/strings_copy_kernel.cu @@ -19,7 +19,7 @@ limitations under the License. */ #include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/common/pstring.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/strings/gpu/copy_utils.h" diff --git a/paddle/phi/kernels/xpu/copy_kernel.cc b/paddle/phi/kernels/xpu/copy_kernel.cc deleted file mode 100644 index fb931ef18a85668ce49d02dc9730cbf3b1436113..0000000000000000000000000000000000000000 --- a/paddle/phi/kernels/xpu/copy_kernel.cc +++ /dev/null @@ -1,80 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/phi/kernels/copy_kernel.h" - -#include "paddle/phi/backends/xpu/xpu_context.h" -#include "paddle/phi/common/data_type.h" -#include "paddle/phi/core/compat/convert_utils.h" -#include "paddle/phi/core/kernel_registry.h" - -// See Note [ Why still include the fluid headers? ] -#include "paddle/fluid/memory/memcpy.h" - -namespace phi { - -template -void Copy(const Context& dev_ctx, - const DenseTensor& src, - Place dst_place, - bool blocking, - DenseTensor* dst) { - auto* src_ptr = src.data(); - void* dst_ptr = nullptr; - - dst->Resize(src.dims()); - if (paddle::platform::is_cpu_place(dst_place)) { - dst_ptr = dev_ctx.HostAlloc(dst, src.dtype()); - } else { - dst_ptr = dev_ctx.Alloc(dst, src.dtype()); - } - const auto& src_place = src.place(); - - if (src_ptr == dst_ptr && src_place == dst_place) { - VLOG(3) << "Skip copy the same data async from " << src_place << " to " - << dst_place; - return; - } - VLOG(4) << "src:" << src_ptr << ", dst:" << dst_ptr; - - VLOG(3) << "TensorCopy " << src.dims() << " from " << src.place() << " to " - << dst_place; - - CHECK(dst->layout() == src.layout()); - auto size = src.numel() * paddle::experimental::SizeOf(src.dtype()); - - if (paddle::platform::is_xpu_place(src_place) && // NOLINT - paddle::platform::is_cpu_place(dst_place)) { - paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); - } else if (paddle::platform::is_cpu_place(src_place) && - paddle::platform::is_xpu_place(dst_place)) { - paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); - } else if (paddle::platform::is_xpu_place(src_place) && - paddle::platform::is_xpu_place(dst_place)) { - if (src_ptr == dst_ptr) { - VLOG(3) << "Skip copy the same data async from " << src_place << " to " - << dst_place; - return; - } - paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); - } else { - PADDLE_THROW(phi::errors::Unimplemented( - "Copy from %s to %s is not supported.", src_place, dst_place)); - } -} - -} // namespace phi - -PD_REGISTER_GENERAL_KERNEL( - copy, XPU, ALL_LAYOUT, phi::Copy, ALL_DTYPE) {} diff --git a/paddle/phi/tests/api/test_data_transform.cc b/paddle/phi/tests/api/test_data_transform.cc index 7e8204ea6c7a2a0086e9dd5495857357d2a9115e..36f4b19e566b97cda8e0db97c7f9ad2d2a9d4e07 100644 --- a/paddle/phi/tests/api/test_data_transform.cc +++ b/paddle/phi/tests/api/test_data_transform.cc @@ -29,7 +29,6 @@ PD_DECLARE_KERNEL(matmul, CPU, ALL_LAYOUT); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) PD_DECLARE_KERNEL(full, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(matmul, GPU, ALL_LAYOUT); -PD_DECLARE_KERNEL(copy, GPU, ALL_LAYOUT); #endif namespace paddle { diff --git a/paddle/phi/tests/api/test_fill_api.cc b/paddle/phi/tests/api/test_fill_api.cc index cae56fd6634454ad9d47dd94c620c253ce0f0723..58f74321f493186db99b1df465e4d9497b5e37ad 100644 --- a/paddle/phi/tests/api/test_fill_api.cc +++ b/paddle/phi/tests/api/test_fill_api.cc @@ -22,7 +22,6 @@ limitations under the License. */ #include "paddle/phi/core/kernel_registry.h" PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT); -PD_DECLARE_KERNEL(copy, CPU, ALL_LAYOUT); namespace paddle { namespace tests { diff --git a/paddle/phi/tests/api/test_matmul_api.cc b/paddle/phi/tests/api/test_matmul_api.cc index c54c5398280e127694d78b15ad8d4eb60e5def6c..ff8bd8bfff6041a04363babf0c2172d13eb845a2 100644 --- a/paddle/phi/tests/api/test_matmul_api.cc +++ b/paddle/phi/tests/api/test_matmul_api.cc @@ -22,7 +22,7 @@ limitations under the License. */ #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" // See Note [ Why still include the fluid headers? ] #include "paddle/fluid/platform/device_context.h" diff --git a/paddle/phi/tests/api/test_pten_tensor.cc b/paddle/phi/tests/api/test_pten_tensor.cc index 590717b8d7b77a1937ff897339e7ab7430a024f8..049aa1c355a17d95ba920b529f3f1e09b7a0b5ab 100644 --- a/paddle/phi/tests/api/test_pten_tensor.cc +++ b/paddle/phi/tests/api/test_pten_tensor.cc @@ -17,12 +17,6 @@ #include "paddle/phi/api/include/tensor.h" #include "paddle/phi/core/kernel_registry.h" -PD_DECLARE_KERNEL(copy, CPU, ALL_LAYOUT); - -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -PD_DECLARE_KERNEL(copy, GPU, ALL_LAYOUT); -#endif - namespace paddle { namespace tests { diff --git a/paddle/phi/tests/api/test_scale_api.cc b/paddle/phi/tests/api/test_scale_api.cc index 2795ebcf28611680c27189927d7d9414abb5b4f9..a4999cf0907a12c1f2254f4cc4c85680bc14605a 100644 --- a/paddle/phi/tests/api/test_scale_api.cc +++ b/paddle/phi/tests/api/test_scale_api.cc @@ -25,7 +25,6 @@ limitations under the License. */ PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(scale, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(scale_sr, CPU, ALL_LAYOUT); -PD_DECLARE_KERNEL(copy, CPU, ALL_LAYOUT); namespace paddle { namespace tests { diff --git a/paddle/phi/tests/api/test_to_api.cc b/paddle/phi/tests/api/test_to_api.cc index dcf433482516ffe9e44060f9d58d5c56625d07a8..1580dd08f7cd11109aa8f2b9e38b3c74bf486788 100644 --- a/paddle/phi/tests/api/test_to_api.cc +++ b/paddle/phi/tests/api/test_to_api.cc @@ -21,11 +21,6 @@ limitations under the License. */ #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" -PD_DECLARE_KERNEL(copy, CPU, ALL_LAYOUT); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -PD_DECLARE_KERNEL(copy, GPU, ALL_LAYOUT); -#endif - namespace paddle { namespace tests { diff --git a/paddle/phi/tests/common/test_int_array.cc b/paddle/phi/tests/common/test_int_array.cc index 30ad7cdd74c590375730bd6d08f6ebc588e0ab44..c97eac38b1360ad5ffe4a41eb6d03f176331a0d8 100644 --- a/paddle/phi/tests/common/test_int_array.cc +++ b/paddle/phi/tests/common/test_int_array.cc @@ -22,10 +22,8 @@ limitations under the License. */ #include "paddle/phi/kernels/full_kernel.h" PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT); -PD_DECLARE_KERNEL(copy, CPU, ALL_LAYOUT); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) PD_DECLARE_KERNEL(full, GPU, ALL_LAYOUT); -PD_DECLARE_KERNEL(copy, GPU, ALL_LAYOUT); #endif namespace phi { diff --git a/paddle/phi/tests/common/test_scalar.cu b/paddle/phi/tests/common/test_scalar.cu index 89b41ef1e583fb135d5626c2aaff001bcb544c6d..50b9e198da08b4578a2c208526060e0eddbb8299 100644 --- a/paddle/phi/tests/common/test_scalar.cu +++ b/paddle/phi/tests/common/test_scalar.cu @@ -25,8 +25,6 @@ limitations under the License. */ #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" -PD_DECLARE_KERNEL(copy, GPU, ALL_LAYOUT); - namespace phi { namespace tests { diff --git a/paddle/phi/tests/kernels/test_copy_dev_api.cc b/paddle/phi/tests/kernels/test_copy_dev_api.cc index 9eba14ebc81a89d975c1c1436fd5ebf50133e4aa..1c9b17ed613e435cafda162a833c7a8e79cb1e06 100644 --- a/paddle/phi/tests/kernels/test_copy_dev_api.cc +++ b/paddle/phi/tests/kernels/test_copy_dev_api.cc @@ -21,7 +21,7 @@ limitations under the License. */ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" namespace phi { namespace tests { diff --git a/paddle/phi/tests/kernels/test_flatten_dev_api.cc b/paddle/phi/tests/kernels/test_flatten_dev_api.cc index 23ee9869c0e51bca4857be71a8464df894a77237..fb1cdee7e5fba4cd3ce063aa16e0bc4f6239c59b 100644 --- a/paddle/phi/tests/kernels/test_flatten_dev_api.cc +++ b/paddle/phi/tests/kernels/test_flatten_dev_api.cc @@ -23,16 +23,6 @@ limitations under the License. */ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/flatten_kernel.h" -PD_DECLARE_KERNEL(copy, CPU, ALL_LAYOUT); - -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -PD_DECLARE_KERNEL(copy, GPU, ALL_LAYOUT); -#endif - -#ifdef PADDLE_WITH_XPU -PD_DECLARE_KERNEL(copy, XPU, ALL_LAYOUT); -#endif - namespace phi { namespace tests { diff --git a/paddle/phi/tests/kernels/test_sparse_conv3d_dev_api.cc b/paddle/phi/tests/kernels/test_sparse_conv3d_dev_api.cc index b7d56cb0d2b0686bd303cf6e239191a520aeb0a2..bb84690cd07ee4f7e37a1fd0bcfa19c2c8e2bda0 100644 --- a/paddle/phi/tests/kernels/test_sparse_conv3d_dev_api.cc +++ b/paddle/phi/tests/kernels/test_sparse_conv3d_dev_api.cc @@ -21,7 +21,7 @@ limitations under the License. */ #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/sparse/convolution_grad_kernel.h" #include "paddle/phi/kernels/sparse/convolution_kernel.h" diff --git a/paddle/phi/tests/kernels/test_sparse_pool_dev_api.cc b/paddle/phi/tests/kernels/test_sparse_pool_dev_api.cc index 5640da399f4e50749c3524cded13e8002567b998..7d7cd1ceaf57ed1bcaeadee1c6eb22061731abd8 100644 --- a/paddle/phi/tests/kernels/test_sparse_pool_dev_api.cc +++ b/paddle/phi/tests/kernels/test_sparse_pool_dev_api.cc @@ -21,7 +21,7 @@ limitations under the License. */ #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/sparse/sparse_pool_grad_kernel.h" #include "paddle/phi/kernels/sparse/sparse_pool_kernel.h" diff --git a/paddle/phi/tests/kernels/test_sparse_utils_dev_api.cc b/paddle/phi/tests/kernels/test_sparse_utils_dev_api.cc index 0c1a7bbb3d80681b12660a627cf9f51709afcff1..d4f1d6efb5d93d090a690f53727c95658528e02c 100644 --- a/paddle/phi/tests/kernels/test_sparse_utils_dev_api.cc +++ b/paddle/phi/tests/kernels/test_sparse_utils_dev_api.cc @@ -22,7 +22,7 @@ limitations under the License. */ #include "paddle/phi/common/place.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/sparse/sparse_utils_kernel.h" namespace phi {