From 35d7d1f0a2873593f77ee0fe1b1cefa86ba08e6f Mon Sep 17 00:00:00 2001 From: Huang Jiyi <43315610+huangjiyi@users.noreply.github.com> Date: Wed, 8 Feb 2023 10:43:29 +0800 Subject: [PATCH] move mixed_vector (#50282) --- .../fluid/distributed/collective/reducer.cc | 6 +-- .../distributed/ps/service/brpc_utils.cc | 2 +- .../fluid/distributed/test/brpc_utils_test.cc | 8 +-- paddle/fluid/framework/CMakeLists.txt | 20 ------- paddle/fluid/framework/data_feed.cc | 2 +- paddle/fluid/framework/data_type_test.cc | 1 + paddle/fluid/framework/dlpack_tensor.cc | 1 + paddle/fluid/framework/dlpack_tensor_test.cc | 1 + paddle/fluid/framework/eigen_test.cc | 5 +- paddle/fluid/framework/fleet/heter_wrapper.cc | 4 +- paddle/fluid/framework/lod_tensor.h | 4 +- paddle/fluid/framework/lod_tensor_test.cu | 4 +- paddle/fluid/framework/tensor.h | 4 +- paddle/fluid/imperative/all_reduce.cc | 8 +-- paddle/fluid/imperative/gloo_context.cc | 4 +- paddle/fluid/operators/assign_op_test.cc | 2 +- paddle/fluid/operators/ctc_align_op.cu | 2 +- paddle/fluid/operators/cvm_op.cu | 2 +- .../fluid/operators/detection/box_clip_op.cu | 2 +- .../detection/collect_fpn_proposals_op.cu | 2 +- .../detection/generate_proposals_op.cu | 2 +- .../operators/detection/target_assign_op.h | 4 +- paddle/fluid/operators/filter_by_instag_op.cu | 16 +++--- paddle/fluid/operators/filter_by_instag_op.h | 4 +- .../fused/fused_embedding_seq_pool_op.h | 2 +- .../operators/fused/fused_seqpool_cvm_op.cu | 12 ++--- .../fused/mkldnn/multi_gru_mkldnn_op.cc | 4 +- paddle/fluid/operators/gru_op.cc | 2 +- paddle/fluid/operators/gru_op.cu.cc | 2 +- paddle/fluid/operators/gru_op.h | 4 +- paddle/fluid/operators/lookup_table_op.cu | 4 +- paddle/fluid/operators/lookup_table_v2_op.cu | 4 +- paddle/fluid/operators/lstm_op.h | 6 +-- paddle/fluid/operators/lstmp_op.h | 6 +-- paddle/fluid/operators/math/beam_search.cu | 4 +- .../fluid/operators/math/sequence_padding.cc | 2 +- .../fluid/operators/math/sequence_padding.cu | 4 +- .../fluid/operators/math/sequence_padding.h | 6 +-- .../fluid/operators/math/sequence_pooling.cu | 4 +- paddle/fluid/operators/optimizers/ftrl_op.h | 2 +- paddle/fluid/operators/optimizers/sgd_op.cu | 2 +- paddle/fluid/operators/row_conv_op.cc | 4 +- paddle/fluid/operators/row_conv_op.cu | 8 +-- .../sequence_ops/sequence_enumerate_op.cu | 2 +- .../sequence_ops/sequence_erase_op.cu | 2 +- .../sequence_ops/sequence_expand_as_op.cu | 15 +++--- .../sequence_ops/sequence_expand_as_op.h | 36 ++++++------- .../sequence_ops/sequence_expand_op.cu | 37 +++++++------ .../sequence_ops/sequence_expand_op.h | 52 +++++++++--------- .../sequence_ops/sequence_reverse_op.h | 2 +- .../sequence_ops/sequence_softmax_op.cu | 8 +-- .../sequence_ops/sequence_softmax_op.h | 15 +++--- .../sequence_topk_avg_pooling_op.h | 2 +- paddle/fluid/operators/shuffle_batch_op.h | 4 +- paddle/fluid/operators/tdm_child_op.h | 2 +- paddle/fluid/operators/tdm_sampler_op.h | 2 +- paddle/fluid/pybind/tensor.cc | 2 +- paddle/phi/core/CMakeLists.txt | 5 ++ .../framework => phi/core}/mixed_vector.cc | 29 +++++----- .../framework => phi/core}/mixed_vector.h | 53 ++++++++++--------- .../phi/kernels/cpu/edit_distance_kernel.cc | 6 +-- .../kernels/funcs/selected_rows_functor.cc | 8 +-- .../kernels/funcs/selected_rows_functor.cu | 22 ++++---- paddle/phi/kernels/funcs/sequence2batch.cc | 2 +- paddle/phi/kernels/funcs/sequence2batch.cu | 4 +- paddle/phi/kernels/funcs/sequence2batch.h | 2 +- paddle/phi/kernels/funcs/sequence_scale.cu | 2 +- paddle/phi/kernels/gpu/adagrad_kernel.cu | 4 +- .../phi/kernels/gpu/edit_distance_kernel.cu | 4 +- .../phi/kernels/gpu/embedding_grad_kernel.cu | 6 +-- paddle/phi/kernels/gpu/sgd_kernel.cu | 4 +- .../phi/kernels/impl/momentum_kernel_impl.h | 2 +- paddle/phi/kernels/impl/rmsprop_kernel_impl.h | 2 +- paddle/phi/kernels/impl/warpctc_kernel_impl.h | 6 +-- .../kernels/selected_rows/cpu/adam_kernel.cc | 2 +- .../kernels/selected_rows/gpu/adam_kernel.cu | 2 +- .../kernels/selected_rows/gpu/adamw_kernel.cu | 2 +- .../hsigmoid_loss_grad_kernel.cc | 4 +- .../selected_rows/impl/lamb_kernel_impl.h | 2 +- paddle/phi/tests/core/CMakeLists.txt | 17 ++++++ .../tests/core/test_mixed_vector.cc} | 10 ++-- .../tests/core/test_mixed_vector.cu} | 27 +++++----- tools/parallel_UT_rule.py | 4 +- 83 files changed, 299 insertions(+), 305 deletions(-) rename paddle/{fluid/framework => phi/core}/mixed_vector.cc (79%) rename paddle/{fluid/framework => phi/core}/mixed_vector.h (87%) rename paddle/{fluid/framework/mixed_vector_test.cc => phi/tests/core/test_mixed_vector.cc} (88%) rename paddle/{fluid/framework/mixed_vector_test.cu => phi/tests/core/test_mixed_vector.cu} (80%) diff --git a/paddle/fluid/distributed/collective/reducer.cc b/paddle/fluid/distributed/collective/reducer.cc index 379bc57d559..9bc230b1650 100644 --- a/paddle/fluid/distributed/collective/reducer.cc +++ b/paddle/fluid/distributed/collective/reducer.cc @@ -1113,7 +1113,7 @@ void EagerReducer::AllReduceSparse(EagerGroup *group, const auto &rank_ = process_group_->GetRank(); const auto &size_ = process_group_->GetSize(); - framework::Vector rows_num_vector(size_); + phi::Vector rows_num_vector(size_); rows_num_vector[rank_] = static_cast(src_rows.size()); Tensor rows_num_tensor = paddle::experimental::empty( @@ -1183,7 +1183,7 @@ void EagerReducer::AllReduceSparse(EagerGroup *group, } process_group_->AllGather(in, out)->Synchronize(); - framework::Vector dst_rows_vector(rows_num, 0); + phi::Vector dst_rows_vector(rows_num, 0); auto *dst_rows_dense_tensor = std::dynamic_pointer_cast(dst_rows_tensor.impl()) .get(); @@ -1262,7 +1262,7 @@ void EagerReducer::AllReduceSparse(EagerGroup *group, Tensor dst_rows_tensor = paddle::experimental::concat(rows_tensors, phi::Scalar(0)); - framework::Vector dst_rows_vector(rows_num, 0); + phi::Vector dst_rows_vector(rows_num, 0); auto *dst_rows_dense_tensor = std::dynamic_pointer_cast(dst_rows_tensor.impl()) .get(); diff --git a/paddle/fluid/distributed/ps/service/brpc_utils.cc b/paddle/fluid/distributed/ps/service/brpc_utils.cc index f07b66e1e90..f1493ece37b 100644 --- a/paddle/fluid/distributed/ps/service/brpc_utils.cc +++ b/paddle/fluid/distributed/ps/service/brpc_utils.cc @@ -236,7 +236,7 @@ void DeserializeLodTensor(framework::Variable* var, framework::LoD lod; for (int i = 0; i < msg.lod_level(); ++i) { - framework::Vector v; + phi::Vector v; for (int j = 0; j < msg.lod(i).lod_data_size(); ++j) { v.push_back(msg.lod(i).lod_data(j)); } diff --git a/paddle/fluid/distributed/test/brpc_utils_test.cc b/paddle/fluid/distributed/test/brpc_utils_test.cc index 3614dedeeb4..4ff9f2709b8 100644 --- a/paddle/fluid/distributed/test/brpc_utils_test.cc +++ b/paddle/fluid/distributed/test/brpc_utils_test.cc @@ -39,7 +39,7 @@ void CreateVarsOnScope(framework::Scope* scope, auto* tensor1 = var1->GetMutable(); tensor1->Resize(phi::make_ddim({512, 8, 4, 2})); framework::LoD lod1; - lod1.push_back(framework::Vector({1, 3, 8})); + lod1.push_back(phi::Vector({1, 3, 8})); tensor1->set_lod(lod1); tensor1->mutable_data(*place); phi::funcs::set_constant(ctx, tensor1, 31.9); @@ -49,7 +49,7 @@ void CreateVarsOnScope(framework::Scope* scope, auto* tensor2 = var2->GetMutable(); tensor2->Resize(phi::make_ddim({1000, 64})); framework::LoD lod2; - lod2.push_back(framework::Vector({1, 1})); + lod2.push_back(phi::Vector({1, 1})); tensor2->set_lod(lod2); tensor2->mutable_data(*place); phi::funcs::set_constant(ctx, tensor2, 100); @@ -98,7 +98,7 @@ void RunMultiVarMsg(platform::Place place) { framework::Variable* var1 = scope_recv.FindVar("x1"); auto* tensor1 = var1->GetMutable(); EXPECT_EQ(tensor1->dims(), phi::make_ddim({512, 8, 4, 2})); - // EXPECT_EQ(tensor1->lod(), framework::Vector({1, 3, 8})); + // EXPECT_EQ(tensor1->lod(), phi::Vector({1, 3, 8})); auto* tensor_data1 = const_cast(tensor1->data()); int tensor_numel1 = 512 * 8 * 4 * 2; for (int i = 0; i < tensor_numel1; ++i) @@ -108,7 +108,7 @@ void RunMultiVarMsg(platform::Place place) { framework::Variable* var2 = scope_recv.FindVar("x2"); auto* tensor2 = var2->GetMutable(); EXPECT_EQ(tensor2->dims(), phi::make_ddim({1000, 64})); - // EXPECT_EQ(tensor2->lod(), framework::Vector({1, 1})); + // EXPECT_EQ(tensor2->lod(), phi::Vector({1, 1})); auto* tensor_data2 = const_cast(tensor2->data()); int tensor_numel2 = 1000 * 64; for (int i = 0; i < tensor_numel2; ++i) EXPECT_EQ(tensor_data2[i], 100); diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index b86d921b9c5..2400a81958c 100755 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -162,27 +162,7 @@ cc_test( eigen_test SRCS eigen_test.cc DEPS tensor) -cc_library( - mixed_vector - SRCS mixed_vector.cc - DEPS device_context place memory) -if(WITH_GPU) - nv_test( - mixed_vector_test - SRCS mixed_vector_test.cc mixed_vector_test.cu - DEPS mixed_vector place memory device_context tensor) -elseif(WITH_ROCM) - hip_test( - mixed_vector_test - SRCS mixed_vector_test.cc mixed_vector_test.cu - DEPS mixed_vector place memory device_context tensor) -else() - cc_test( - mixed_vector_test - SRCS mixed_vector_test.cc - DEPS mixed_vector place memory device_context tensor) -endif() cc_library( lod_tensor SRCS lod_tensor.cc diff --git a/paddle/fluid/framework/data_feed.cc b/paddle/fluid/framework/data_feed.cc index 471183aaa9a..9e7ff1612b3 100644 --- a/paddle/fluid/framework/data_feed.cc +++ b/paddle/fluid/framework/data_feed.cc @@ -2815,7 +2815,7 @@ void SlotRecordInMemoryDataFeed::BuildSlotBatchGPU(const int ins_num) { LoD& lod = (*feed->mutable_lod()); lod.resize(1); lod[0].resize(offset_cols_size); - paddle::framework::MixVector mixv_lod(&lod[0]); + phi::MixVector mixv_lod(&lod[0]); memcpy(mixv_lod.MutableData(platform::CPUPlace()), off_start_ptr, offset_cols_size * sizeof(size_t)); diff --git a/paddle/fluid/framework/data_type_test.cc b/paddle/fluid/framework/data_type_test.cc index 9f36bd46368..8454b8e4171 100644 --- a/paddle/fluid/framework/data_type_test.cc +++ b/paddle/fluid/framework/data_type_test.cc @@ -18,6 +18,7 @@ #include "gtest/gtest.h" #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/platform/place.h" TEST(DataType, float16) { using paddle::platform::CPUPlace; diff --git a/paddle/fluid/framework/dlpack_tensor.cc b/paddle/fluid/framework/dlpack_tensor.cc index 9bf34850665..7300ac8083a 100644 --- a/paddle/fluid/framework/dlpack_tensor.cc +++ b/paddle/fluid/framework/dlpack_tensor.cc @@ -15,6 +15,7 @@ #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/data_type.h" +#include "paddle/fluid/platform/place.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/dlpack_tensor_test.cc b/paddle/fluid/framework/dlpack_tensor_test.cc index 0ccc5bb4ad1..f6b28b0a22e 100644 --- a/paddle/fluid/framework/dlpack_tensor_test.cc +++ b/paddle/fluid/framework/dlpack_tensor_test.cc @@ -18,6 +18,7 @@ #include #include "paddle/fluid/platform/device/gpu/gpu_info.h" +#include "paddle/fluid/platform/place.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/eigen_test.cc b/paddle/fluid/framework/eigen_test.cc index 1ce55c8a8de..4771922986b 100644 --- a/paddle/fluid/framework/eigen_test.cc +++ b/paddle/fluid/framework/eigen_test.cc @@ -12,10 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/eigen.h" - #include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/platform/place.h" + #include "paddle/phi/core/ddim.h" namespace paddle { diff --git a/paddle/fluid/framework/fleet/heter_wrapper.cc b/paddle/fluid/framework/fleet/heter_wrapper.cc index 9d83d519d09..2cae0721aef 100644 --- a/paddle/fluid/framework/fleet/heter_wrapper.cc +++ b/paddle/fluid/framework/fleet/heter_wrapper.cc @@ -158,7 +158,7 @@ void HeterWrapper::DeSerializeToTensor(Scope* scope, LoD lod; for (int i = 0; i < req_var.lod_level(); ++i) { - framework::Vector v; + phi::Vector v; for (int j = 0; j < req_var.lod(i).lod_data_size(); ++j) { v.push_back(req_var.lod(i).lod_data(j)); } @@ -203,7 +203,7 @@ void HeterWrapper::DeSerializeToTensor(Scope* scope, LoD lod; for (int i = 0; i < req_var.lod_level(); ++i) { - framework::Vector v; + phi::Vector v; for (int j = 0; j < req_var.lod(i).lod_data_size(); ++j) { v.push_back(req_var.lod(i).lod_data(j)); } diff --git a/paddle/fluid/framework/lod_tensor.h b/paddle/fluid/framework/lod_tensor.h index 78d4c47cb45..68aa8fceee9 100644 --- a/paddle/fluid/framework/lod_tensor.h +++ b/paddle/fluid/framework/lod_tensor.h @@ -21,12 +21,12 @@ limitations under the License. */ #include #include -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" #include "paddle/phi/core/ddim.h" #include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/mixed_vector.h" namespace paddle { namespace framework { @@ -54,7 +54,7 @@ void MergeLoDTensor(phi::DenseTensor* target, * 0 2 4 7 * 0 2 5 7 10 12 15 20 */ -using LoD = std::vector>; +using LoD = std::vector>; std::string LoDToString(const LoD& lod); diff --git a/paddle/fluid/framework/lod_tensor_test.cu b/paddle/fluid/framework/lod_tensor_test.cu index f4054e580f6..cabd73598a0 100644 --- a/paddle/fluid/framework/lod_tensor_test.cu +++ b/paddle/fluid/framework/lod_tensor_test.cu @@ -31,7 +31,7 @@ TEST(LoD, data) { lod.push_back(std::vector({0, 1, 6, 8, 10, 11})); auto& v = lod[0]; - paddle::framework::MixVector mix_vector_v(&v); + phi::MixVector mix_vector_v(&v); paddle::platform::CUDAPlace gpu(0); #ifdef PADDLE_WITH_HIP hipLaunchKernelGGL(test, @@ -69,7 +69,7 @@ TEST(DenseTensor, LoDInGPU) { EXPECT_EQ(lod_tensor.lod_element(0, 4).first, 8UL); auto lod = lod_tensor.lod(); - paddle::framework::MixVector mix_vector(&(lod[0])); + phi::MixVector mix_vector(&(lod[0])); #ifdef PADDLE_WITH_HIP hipLaunchKernelGGL(test, diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h index 451a2309892..d581838f6db 100644 --- a/paddle/fluid/framework/tensor.h +++ b/paddle/fluid/framework/tensor.h @@ -15,15 +15,15 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/mixed_vector.h" #include "paddle/phi/core/sparse_coo_tensor.h" #include "paddle/phi/core/sparse_csr_tensor.h" namespace paddle { namespace framework { -using LoD = std::vector>; +using LoD = std::vector>; } // namespace framework } // namespace paddle diff --git a/paddle/fluid/imperative/all_reduce.cc b/paddle/fluid/imperative/all_reduce.cc index f7cc7bc85ff..d5f03924e28 100644 --- a/paddle/fluid/imperative/all_reduce.cc +++ b/paddle/fluid/imperative/all_reduce.cc @@ -104,10 +104,10 @@ static void AllReduce(const phi::SelectedRows &src, // 1. Gather rows number from all workers. Here use ncclAllGather to do this, // but we can use other ways to implement is in the future const auto &src_rows = src.rows(); - framework::Vector rows_num_vector(strategy.nranks_); + phi::Vector rows_num_vector(strategy.nranks_); rows_num_vector[strategy.local_rank_] = static_cast(src_rows.size()); // CUDAMutableData use CalStream - paddle::framework::MixVector mixv_rows_num_vector(&rows_num_vector); + phi::MixVector mixv_rows_num_vector(&rows_num_vector); auto *gpu_rows_num_ptr = mixv_rows_num_vector.CUDAMutableData(place); VLOG(4) << "start dev_ctx->wait"; if (!use_calc_stream) { @@ -138,9 +138,9 @@ static void AllReduce(const phi::SelectedRows &src, auto *dst_rows = dst->mutable_rows(); dst_rows->resize(rows_num); - paddle::framework::MixVector mixv_dst_rows(dst_rows); + phi::MixVector mixv_dst_rows(dst_rows); auto *dst_rows_ptr = mixv_dst_rows.CUDAMutableData(place); - paddle::framework::MixVector mixv_src_rows(&src_rows); + phi::MixVector mixv_src_rows(&src_rows); const auto *src_rows_ptr = mixv_src_rows.CUDAData(place); auto *dst_tensor = dst->mutable_value(); diff --git a/paddle/fluid/imperative/gloo_context.cc b/paddle/fluid/imperative/gloo_context.cc index e41ba185e38..c59aea5fdea 100644 --- a/paddle/fluid/imperative/gloo_context.cc +++ b/paddle/fluid/imperative/gloo_context.cc @@ -158,9 +158,9 @@ void GLOOParallelContext::AllReduce(const phi::SelectedRows &src, << ", height: " << src.height(); auto *dst_rows = dst->mutable_rows(); dst_rows->resize(rows_num); - paddle::framework::MixVector mixv_dst_rows(dst_rows); + phi::MixVector mixv_dst_rows(dst_rows); auto *dst_rows_ptr = mixv_dst_rows.MutableData(place); - paddle::framework::MixVector mixv_src_rows(&src_rows); + phi::MixVector mixv_src_rows(&src_rows); const int64_t *src_rows_ptr = mixv_src_rows.Data(place); auto *dst_tensor = dst->mutable_value(); diff --git a/paddle/fluid/operators/assign_op_test.cc b/paddle/fluid/operators/assign_op_test.cc index 500c1c17a16..cc6c915c09a 100644 --- a/paddle/fluid/operators/assign_op_test.cc +++ b/paddle/fluid/operators/assign_op_test.cc @@ -98,7 +98,7 @@ TEST(AssignOp, AssignSelectedRows) { assign_functor(input); auto& out_selected_row = output.Get(); - const paddle::framework::Vector& out_rows = out_selected_row.rows(); + const phi::Vector& out_rows = out_selected_row.rows(); EXPECT_EQ(rows.size(), out_rows.size()); for (size_t i = 0; i < rows.size(); ++i) { EXPECT_EQ(rows[i], out_rows[i]); diff --git a/paddle/fluid/operators/ctc_align_op.cu b/paddle/fluid/operators/ctc_align_op.cu index cef3cf25ff6..aa65ac62abf 100644 --- a/paddle/fluid/operators/ctc_align_op.cu +++ b/paddle/fluid/operators/ctc_align_op.cu @@ -129,7 +129,7 @@ class CTCAlignOpCUDAKernel : public framework::OpKernel { // merge elements and delete blank T* output_data = output->mutable_data({num_tokens, 1}, ctx.GetPlace()); - paddle::framework::MixVector mixv_input_lod(&input_lod[level]); + phi::MixVector mixv_input_lod(&input_lod[level]); MergeAndDelCudaKernel <<<1, 1, 0, stream>>>(num_tokens, tokens, diff --git a/paddle/fluid/operators/cvm_op.cu b/paddle/fluid/operators/cvm_op.cu index 400e025f820..dd90f09b29d 100644 --- a/paddle/fluid/operators/cvm_op.cu +++ b/paddle/fluid/operators/cvm_op.cu @@ -166,7 +166,7 @@ class CVMGradCUDAKernel : public framework::OpKernel { lod[lod.size() - 1], platform::errors::PreconditionNotMet( "Output(X@GRAD)'s dim[0] must be equal to last element of lod")); - paddle::framework::MixVector mixv_lod(&lod); + phi::MixVector mixv_lod(&lod); CvmGradComputeKernel<<<(dx_numel + PADDLE_CUDA_NUM_THREADS - 1) / PADDLE_CUDA_NUM_THREADS, PADDLE_CUDA_NUM_THREADS, diff --git a/paddle/fluid/operators/detection/box_clip_op.cu b/paddle/fluid/operators/detection/box_clip_op.cu index 79f3b18b2df..4a1558de906 100644 --- a/paddle/fluid/operators/detection/box_clip_op.cu +++ b/paddle/fluid/operators/detection/box_clip_op.cu @@ -59,7 +59,7 @@ class GPUBoxClipKernel : public framework::OpKernel { auto stream = dev_ctx.stream(); const size_t batch_size = lod.back().size() - 1; T *output_data = output->mutable_data(dev_ctx.GetPlace()); - paddle::framework::MixVector mix_vector(&abs_offset_lod[0]); + phi::MixVector mix_vector(&abs_offset_lod[0]); GPUBoxClip<<>>( input->data(), mix_vector.CUDAMutableData(dev_ctx.GetPlace()), diff --git a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu index 29cf8da067f..3f9a55225ca 100644 --- a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu +++ b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu @@ -19,7 +19,6 @@ namespace cub = hipcub; #include -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/operators/detection/bbox_util.h" @@ -28,6 +27,7 @@ namespace cub = hipcub; #include "paddle/fluid/operators/strided_memcpy.h" #include "paddle/fluid/platform/for_range.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" +#include "paddle/phi/core/mixed_vector.h" #include "paddle/phi/kernels/funcs/gather.cu.h" namespace paddle { diff --git a/paddle/fluid/operators/detection/generate_proposals_op.cu b/paddle/fluid/operators/detection/generate_proposals_op.cu index 5d7a034c28a..d24cbcb81d0 100644 --- a/paddle/fluid/operators/detection/generate_proposals_op.cu +++ b/paddle/fluid/operators/detection/generate_proposals_op.cu @@ -18,10 +18,10 @@ limitations under the License. */ #include #include -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memory.h" #include "paddle/fluid/operators/detection/bbox_util.cu.h" +#include "paddle/phi/core/mixed_vector.h" #include "paddle/phi/kernels/funcs/gather.cu.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/fluid/operators/detection/target_assign_op.h b/paddle/fluid/operators/detection/target_assign_op.h index 26e5d90ab70..3319dffd226 100644 --- a/paddle/fluid/operators/detection/target_assign_op.h +++ b/paddle/fluid/operators/detection/target_assign_op.h @@ -121,7 +121,7 @@ class TargetAssignKernel : public framework::OpKernel { auto x_lod = x->lod().back(); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - paddle::framework::MixVector mixv_x_lod(&x_lod); + phi::MixVector mixv_x_lod(&x_lod); size_t* x_lod_data = mixv_x_lod.MutableData(ctx.GetPlace()); #else size_t* x_lod_data = x_lod.data(); @@ -155,7 +155,7 @@ class TargetAssignKernel : public framework::OpKernel { const int* neg_idx_data = neg_indices->data(); auto neg_lod = neg_indices->lod().back(); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - paddle::framework::MixVector mixv_neg_lod(&neg_lod); + phi::MixVector mixv_neg_lod(&neg_lod); size_t* neg_lod_data = mixv_neg_lod.MutableData(ctx.GetPlace()); #else size_t* neg_lod_data = neg_lod.data(); diff --git a/paddle/fluid/operators/filter_by_instag_op.cu b/paddle/fluid/operators/filter_by_instag_op.cu index c07a69177b8..0b57cb1a77a 100644 --- a/paddle/fluid/operators/filter_by_instag_op.cu +++ b/paddle/fluid/operators/filter_by_instag_op.cu @@ -30,11 +30,11 @@ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/enforce.h" +#include "paddle/phi/core/mixed_vector.h" #if defined(PADDLE_WITH_CUDA) namespace cg = cooperative_groups; @@ -46,7 +46,7 @@ namespace operators { using SelectedRows = phi::SelectedRows; template -using Vector = framework::Vector; +using Vector = phi::Vector; #define WARP_SIZE 32 #define MAX_WARP_NUM 32 @@ -376,7 +376,7 @@ class FilterByInstagGPUKernel : public framework::OpKernel { } const size_t x2_lods_size = x2_lods.size() - 1; - paddle::framework::MixVector mixv_x2_lods(&x2_lods); + phi::MixVector mixv_x2_lods(&x2_lods); size_t* x2_lods_data = mixv_x2_lods.CUDAMutableData(gpu_place); @@ -401,7 +401,7 @@ class FilterByInstagGPUKernel : public framework::OpKernel { } } - paddle::framework::MixVector mixv_x1_lods(&x1_lods); + phi::MixVector mixv_x1_lods(&x1_lods); size_t* x1_lods_data = mixv_x1_lods.CUDAMutableData(gpu_place); auto* x1_data = x1->data(); @@ -433,12 +433,12 @@ class FilterByInstagGPUKernel : public framework::OpKernel { Vector out_lods(x2_lods_size + 1, 0); Vector map_lods(x2_lods_size + 1, 0); - paddle::framework::MixVector mixv_out_lods(&out_lods); - paddle::framework::MixVector mixv_map_lods(&map_lods); + phi::MixVector mixv_out_lods(&out_lods); + phi::MixVector mixv_map_lods(&map_lods); // thrust::device_vector out_idx(1); Vector out_idx(1, 0); - paddle::framework::MixVector mixv_out_idx(&out_idx); + phi::MixVector mixv_out_idx(&out_idx); size_t* out_idx_data = mixv_out_idx.CUDAMutableData(gpu_place); size_t* out_lods_data = mixv_out_lods.CUDAMutableData(gpu_place); @@ -500,7 +500,7 @@ class FilterByInstagGPUKernel : public framework::OpKernel { } else { Vector map_lods(2, 0); - paddle::framework::MixVector mixv_map_lods(&map_lods); + phi::MixVector mixv_map_lods(&map_lods); thrust::device_ptr map_data_ptr(map_data); map_data_ptr[0] = 0; diff --git a/paddle/fluid/operators/filter_by_instag_op.h b/paddle/fluid/operators/filter_by_instag_op.h index 95e6611d935..28d0208ba89 100644 --- a/paddle/fluid/operators/filter_by_instag_op.h +++ b/paddle/fluid/operators/filter_by_instag_op.h @@ -23,16 +23,16 @@ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memcpy.h" +#include "paddle/phi/core/mixed_vector.h" namespace paddle { namespace operators { using SelectedRows = phi::SelectedRows; template -using Vector = framework::Vector; +using Vector = phi::Vector; template class FilterByInstagKernel : public framework::OpKernel { diff --git a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h index 9fa62a37045..bc8821ef929 100644 --- a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h +++ b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h @@ -256,7 +256,7 @@ class FusedEmbeddingSeqPoolGradKernel : public framework::OpKernel { auto lod = ids->lod()[0]; int64_t out_width = d_output->dims()[1]; - framework::Vector *new_rows = d_table->mutable_rows(); + phi::Vector *new_rows = d_table->mutable_rows(); new_rows->resize(ids_num); std::memcpy(&(*new_rows)[0], ids_data, ids_num * sizeof(int64_t)); diff --git a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu index 98c2fc9896f..5c7d6fd0745 100644 --- a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu +++ b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu @@ -14,16 +14,16 @@ #include -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/operators/fused/fused_seqpool_cvm_op.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" +#include "paddle/phi/core/mixed_vector.h" namespace paddle { namespace operators { template -using Vector = framework::Vector; +using Vector = phi::Vector; #define CUDA_KERNEL_LOOP(i, n) \ for (auto i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ @@ -441,7 +441,7 @@ class FusedSeqpoolCVMCUDAKernel : public framework::OpKernel { int embedding_size = inputs[0]->numel() / inputs[0]->dims()[0]; int batch_size = -1; - std::vector *> mix_lods_v(slot_size); + std::vector *> mix_lods_v(slot_size); for (size_t i = 0; i < slot_size; ++i) { const auto *input = inputs[i]; @@ -480,7 +480,7 @@ class FusedSeqpoolCVMCUDAKernel : public framework::OpKernel { } output_data[i] = reinterpret_cast( dev_ctx.Alloc(output, output->numel() * sizeof(T))); - mix_lods_v[i] = new paddle::framework::MixVector(&lods); + mix_lods_v[i] = new phi::MixVector(&lods); lods_data[i] = mix_lods_v[i]->CUDAData(ctx.GetPlace()); seqpool_outputs[i].Resize({batch_size, embedding_size}); seqpool_output_data[i] = reinterpret_cast(dev_ctx.Alloc( @@ -527,7 +527,7 @@ class FusedSeqpoolCVMGradCUDAKernel : public framework::OpKernel { int embedding_size = in_grads[0]->numel() / in_grads[0]->dims()[0]; int batch_size = -1; - std::vector *> mix_lods_v(slot_size); + std::vector *> mix_lods_v(slot_size); for (size_t i = 0; i < slot_size; ++i) { auto *in_grad = in_grads[i]; @@ -563,7 +563,7 @@ class FusedSeqpoolCVMGradCUDAKernel : public framework::OpKernel { in_grads_data[i] = reinterpret_cast( dev_ctx.Alloc(in_grad, in_grad->numel() * sizeof(T))); - mix_lods_v[i] = new paddle::framework::MixVector(&lods); + mix_lods_v[i] = new phi::MixVector(&lods); lods_data[i] = mix_lods_v[i]->CUDAData(ctx.GetPlace()); cvm_data[i] = reinterpret_cast(cvm->data()); } diff --git a/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc b/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc index 3ce6d18bde4..01b12af1a22 100644 --- a/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc +++ b/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc @@ -17,11 +17,11 @@ limitations under the License. */ #include #include "dnnl.hpp" // NOLINT -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/operators/fused/multi_gru_op.h" #include "paddle/phi/backends/onednn/onednn_reuse.h" +#include "paddle/phi/core/mixed_vector.h" namespace paddle { namespace operators { @@ -678,7 +678,7 @@ class MultiGRUHandler { const std::vector biases_; phi::DenseTensor* hidden_; std::vector attrs_; - const paddle::framework::Vector& x_lod_; + const phi::Vector& x_lod_; }; template diff --git a/paddle/fluid/operators/gru_op.cc b/paddle/fluid/operators/gru_op.cc index ed7dfa03494..2d58438dbf3 100644 --- a/paddle/fluid/operators/gru_op.cc +++ b/paddle/fluid/operators/gru_op.cc @@ -372,7 +372,7 @@ class GRUCPUKernel : public framework::OpKernel { const_cast(weight_data + 2 * frame_size * frame_size); phi::DenseTensor ordered_h0; - framework::Vector order(batch_gate->lod()[2]); + phi::Vector order(batch_gate->lod()[2]); if (h0) { // Since the batch computing for GRU reorders the input sequences diff --git a/paddle/fluid/operators/gru_op.cu.cc b/paddle/fluid/operators/gru_op.cu.cc index 53006c55f6b..0d3686bb495 100644 --- a/paddle/fluid/operators/gru_op.cu.cc +++ b/paddle/fluid/operators/gru_op.cu.cc @@ -75,7 +75,7 @@ class GRUKernel : public framework::OpKernel { const_cast(weight_data + 2 * frame_size * frame_size); phi::DenseTensor ordered_h0; - framework::Vector order(batch_gate->lod()[2]); + phi::Vector order(batch_gate->lod()[2]); if (h0) { // Since the batch computing for GRU reorders the input sequences diff --git a/paddle/fluid/operators/gru_op.h b/paddle/fluid/operators/gru_op.h index 286bf9fe273..760a33a161c 100644 --- a/paddle/fluid/operators/gru_op.h +++ b/paddle/fluid/operators/gru_op.h @@ -28,7 +28,7 @@ namespace operators { template inline void ReorderInitState(const DeviceContext& ctx, const phi::DenseTensor& src, - framework::Vector index_lod, + phi::Vector index_lod, phi::DenseTensor* dst, bool indexed_src) { phi::funcs::CopyMatrixRowsFunctor row_shuffle; @@ -79,7 +79,7 @@ class GRUGradKernel : public framework::OpKernel { phi::DenseTensor ordered_h0, ordered_h0_grad; - framework::Vector order(batch_gate->lod()[2]); + phi::Vector order(batch_gate->lod()[2]); if (h0) { ReorderInitState( diff --git a/paddle/fluid/operators/lookup_table_op.cu b/paddle/fluid/operators/lookup_table_op.cu index 1052e5117e4..32946d65785 100644 --- a/paddle/fluid/operators/lookup_table_op.cu +++ b/paddle/fluid/operators/lookup_table_op.cu @@ -169,12 +169,12 @@ class LookupTableGradCUDAKernel : public framework::OpKernel { auto stream = dev_ctx.stream(); // copy GPU memory to CPU pinned memory - framework::Vector new_rows; + phi::Vector new_rows; new_rows.resize(ids_num); auto gpu_place = context.GetPlace(); // TODO(yuyang18): Strange code here. - paddle::framework::MixVector mixv_new_rows(&new_rows); + phi::MixVector mixv_new_rows(&new_rows); memory::Copy(gpu_place, mixv_new_rows.CUDAMutableData(context.GetPlace()), gpu_place, diff --git a/paddle/fluid/operators/lookup_table_v2_op.cu b/paddle/fluid/operators/lookup_table_v2_op.cu index a3d8c91d862..11c35293ebe 100644 --- a/paddle/fluid/operators/lookup_table_v2_op.cu +++ b/paddle/fluid/operators/lookup_table_v2_op.cu @@ -159,11 +159,11 @@ struct LookupTableV2GradCUDAFunctor { dim3 threads(128, 8); dim3 grids(8, 1); auto stream = dev_ctx.stream(); - framework::Vector new_rows; + phi::Vector new_rows; new_rows.resize(ids_num); auto gpu_place = context_.GetPlace(); - paddle::framework::MixVector mixv_new_rows(&new_rows); + phi::MixVector mixv_new_rows(&new_rows); if (!std::is_same::value) { InputTypeConvert<<>>( ids_data, ids_num, mixv_new_rows.MutableData(gpu_place)); diff --git a/paddle/fluid/operators/lstm_op.h b/paddle/fluid/operators/lstm_op.h index d5ced3edd2a..cba58781565 100644 --- a/paddle/fluid/operators/lstm_op.h +++ b/paddle/fluid/operators/lstm_op.h @@ -27,7 +27,7 @@ namespace operators { template inline void ReorderInitState(const DeviceContext& ctx, const phi::DenseTensor& src, - framework::Vector index_lod, + phi::Vector index_lod, phi::DenseTensor* dst, bool indexed_src) { phi::funcs::CopyMatrixRowsFunctor row_shuffle; @@ -95,7 +95,7 @@ class LSTMKernel : public framework::OpKernel { lstm_value.prev_state_value = nullptr; phi::DenseTensor ordered_c0; - framework::Vector order(batch_gate->lod()[2]); + phi::Vector order(batch_gate->lod()[2]); if (cell_t0) { // Since the batch computing for LSTM reorders the input sequence @@ -236,7 +236,7 @@ class LSTMGradKernel : public framework::OpKernel { // ordered_h0_g/c0_g is the reordered gradient of hidden/cell // initialization. phi::DenseTensor ordered_h0, ordered_c0, ordered_h0_g, ordered_c0_g; - framework::Vector order(batch_gate->lod()[2]); + phi::Vector order(batch_gate->lod()[2]); if (c0) { ReorderInitState( diff --git a/paddle/fluid/operators/lstmp_op.h b/paddle/fluid/operators/lstmp_op.h index c26a421966e..3272ca84d8a 100644 --- a/paddle/fluid/operators/lstmp_op.h +++ b/paddle/fluid/operators/lstmp_op.h @@ -70,7 +70,7 @@ class _ClipGradFunctor { template inline void ReorderInitState(const DeviceContext& ctx, const phi::DenseTensor& src, - framework::Vector index, + phi::Vector index, phi::DenseTensor* dst, bool indexed_src) { phi::funcs::CopyMatrixRowsFunctor row_shuffle; @@ -158,7 +158,7 @@ class LSTMPKernel : public framework::OpKernel { phi::DenseTensor ordered_c0; phi::DenseTensor ordered_h0; - framework::Vector order(batch_gate->lod()[2]); + phi::Vector order(batch_gate->lod()[2]); if (cell_t0) { // Since the batch computing for LSTMP reorders the input sequence @@ -350,7 +350,7 @@ class LSTMPGradKernel : public framework::OpKernel { // initialization. phi::DenseTensor ordered_h0, ordered_c0, ordered_h0_g, ordered_c0_g; - framework::Vector order(batch_gate->lod()[2]); + phi::Vector order(batch_gate->lod()[2]); if (c0) { ReorderInitState( diff --git a/paddle/fluid/operators/math/beam_search.cu b/paddle/fluid/operators/math/beam_search.cu index 400f10558e1..b9f5624db73 100644 --- a/paddle/fluid/operators/math/beam_search.cu +++ b/paddle/fluid/operators/math/beam_search.cu @@ -446,8 +446,8 @@ class BeamSearchFunctor { framework::LoD selected_lod(2); selected_lod[0].assign(abs_lod[level].begin(), abs_lod[level].end()); selected_lod[1].resize(scores->dims()[0] + 1); - paddle::framework::MixVector mix_vector(&selected_lod[1]); - paddle::framework::MixVector mixv_abs(&abs_lod[level]); + phi::MixVector mix_vector(&selected_lod[1]); + phi::MixVector mixv_abs(&abs_lod[level]); size_t* selected_offsets = mix_vector.CUDAMutableData(context.GetPlace()); if (num_seqs == 1) { diff --git a/paddle/fluid/operators/math/sequence_padding.cc b/paddle/fluid/operators/math/sequence_padding.cc index 6f53246abf9..17239669836 100644 --- a/paddle/fluid/operators/math/sequence_padding.cc +++ b/paddle/fluid/operators/math/sequence_padding.cc @@ -28,7 +28,7 @@ namespace math { template void CopyValidData(phi::DenseTensor* dst_tensor, const phi::DenseTensor* src_tensor, - const framework::Vector& seq_offsets, + const phi::Vector& seq_offsets, int pad_seq_len, int step_width, bool norm_by_len, diff --git a/paddle/fluid/operators/math/sequence_padding.cu b/paddle/fluid/operators/math/sequence_padding.cu index f1f2785fe0a..c9810f22402 100644 --- a/paddle/fluid/operators/math/sequence_padding.cu +++ b/paddle/fluid/operators/math/sequence_padding.cu @@ -124,7 +124,7 @@ class PaddingLoDTensorFunctor { T* pad_data = pad_tensor->data(); const T* pad_value_data = pad_value.data(); - paddle::framework::MixVector mix_vector_seq_offsets(&seq_offsets); + phi::MixVector mix_vector_seq_offsets(&seq_offsets); SequencePaddingKernel<<>>( pad_data, seq_data, @@ -191,7 +191,7 @@ class UnpaddingLoDTensorFunctor { const T* pad_data = pad_tensor.data(); T* seq_data = seq_tensor->data(); - paddle::framework::MixVector mixv_seq_offsets(&seq_offsets); + phi::MixVector mixv_seq_offsets(&seq_offsets); SequencePaddingKernel<<>>( seq_data, pad_data, diff --git a/paddle/fluid/operators/math/sequence_padding.h b/paddle/fluid/operators/math/sequence_padding.h index 9e7db9f03ed..ec59309f04f 100644 --- a/paddle/fluid/operators/math/sequence_padding.h +++ b/paddle/fluid/operators/math/sequence_padding.h @@ -29,7 +29,7 @@ enum PadLayout { kBatchLengthWidth = 0, kLengthBatchWidth }; enum CopyType { kSeqToPad, kPadToSeq }; inline static size_t MaximumSequenceLength( - const framework::Vector& seq_offset) { + const phi::Vector& seq_offset) { size_t seq_num = seq_offset.size() - 1; size_t max_seq_len = 0; for (size_t i = 0; i < seq_num; ++i) { @@ -39,7 +39,7 @@ inline static size_t MaximumSequenceLength( } inline static size_t TotalSequenceLength( - const framework::Vector& seq_offset) { + const phi::Vector& seq_offset) { size_t seq_num = seq_offset.size() - 1; size_t total_seq_len = 0; for (size_t i = 0; i < seq_num; ++i) { @@ -50,7 +50,7 @@ inline static size_t TotalSequenceLength( inline static void CheckDims(const framework::DDim& seq_tensor_dims, const framework::DDim& pad_tensor_dims, - const framework::Vector& seq_offset, + const phi::Vector& seq_offset, int64_t padded_seq_len, int64_t step_width, const PadLayout& layout) { diff --git a/paddle/fluid/operators/math/sequence_pooling.cu b/paddle/fluid/operators/math/sequence_pooling.cu index 530b68bbfbb..e56f0025a0e 100644 --- a/paddle/fluid/operators/math/sequence_pooling.cu +++ b/paddle/fluid/operators/math/sequence_pooling.cu @@ -203,7 +203,7 @@ class SequencePoolFunctor { const size_t item_dim = output->numel() / output->dims()[0]; dim3 threads(1024, 1); dim3 grid(std::max(static_cast(lod.size()) - 1, 1), 1); - paddle::framework::MixVector mix_vector(&lod); + phi::MixVector mix_vector(&lod); if (pooltype == "MAX") { sequence_pool_kernel> <<>>( @@ -421,7 +421,7 @@ class SequencePoolGradFunctor { const size_t item_dim = in_grad->numel() / in_grad->dims()[0]; dim3 threads(1024, 1); dim3 grid(std::max(static_cast(lod.size()) - 1, 1), 1); - paddle::framework::MixVector mix_vector(&lod); + phi::MixVector mix_vector(&lod); if (pooltype == "MAX") { sequence_pool_grad_kernel> <<>>( diff --git a/paddle/fluid/operators/optimizers/ftrl_op.h b/paddle/fluid/operators/optimizers/ftrl_op.h index 99e210ce51e..6feabd46a2d 100644 --- a/paddle/fluid/operators/optimizers/ftrl_op.h +++ b/paddle/fluid/operators/optimizers/ftrl_op.h @@ -197,7 +197,7 @@ class FTRLOpKernel : public framework::OpKernel { ctx.template device_context(), *grad, merged_grad); auto* merged_rows = merged_grad->mutable_rows(); - paddle::framework::MixVector mixv_merged_rows(merged_rows); + phi::MixVector mixv_merged_rows(merged_rows); const int64_t* rows = mixv_merged_rows.Data(ctx.GetPlace()); auto row_numel = static_cast(merged_grad->value().dims()[1]); auto row_height = static_cast(merged_grad->rows().size()); diff --git a/paddle/fluid/operators/optimizers/sgd_op.cu b/paddle/fluid/operators/optimizers/sgd_op.cu index 79cce5abdb0..ee7b0f4c7e4 100644 --- a/paddle/fluid/operators/optimizers/sgd_op.cu +++ b/paddle/fluid/operators/optimizers/sgd_op.cu @@ -164,7 +164,7 @@ class SGDOpKernel : public framework::OpKernel { int thread_x = kThreadsPerBlock; int max_threads = ctx.cuda_device_context().GetMaxPhysicalThreadCount(); int max_blocks = std::max(max_threads / kThreadsPerBlock, 1); - paddle::framework::MixVector mixv_in_rows(&in_rows); + phi::MixVector mixv_in_rows(&in_rows); SparseSGDFunctorKernel<< : public framework::OpKernel { } else { batch_size = x->lod()[0].size() - 1; } - framework::Vector batch_indices(batch_size + 1); + phi::Vector batch_indices(batch_size + 1); int input_dim = 0; int timesteps = 0; if (is_tensor) { @@ -231,7 +231,7 @@ class RowConvGradKernel : public framework::OpKernel { } else { batch_size = x->lod()[0].size() - 1; } - framework::Vector batch_indices(batch_size + 1); + phi::Vector batch_indices(batch_size + 1); int timesteps = 0; int input_dim = 0; if (is_tensor) { diff --git a/paddle/fluid/operators/row_conv_op.cu b/paddle/fluid/operators/row_conv_op.cu index 81f140b36fc..931e34d2f3e 100644 --- a/paddle/fluid/operators/row_conv_op.cu +++ b/paddle/fluid/operators/row_conv_op.cu @@ -338,7 +338,7 @@ class RowConvKernel : public framework::OpKernel { batch_size = X->lod()[0].size() - 1; } int input_dim = 0; - framework::Vector batch_indices(batch_size + 1); + phi::Vector batch_indices(batch_size + 1); int timesteps = X->dims()[1]; if (is_tensor) { for (int i = 0; i < batch_size + 1; i++) { @@ -352,7 +352,7 @@ class RowConvKernel : public framework::OpKernel { int num_sequence = batch_indices.size() - 1; int future_context = Filter->dims()[0]; - paddle::framework::MixVector mix_vector(&batch_indices); + phi::MixVector mix_vector(&batch_indices); size_t *idx = mix_vector.CUDAMutableData(context.GetPlace()); auto stream = context.cuda_device_context().stream(); @@ -397,7 +397,7 @@ class RowConvGradKernel : public framework::OpKernel { } int input_dim = 0; - framework::Vector batch_indices(batch_size + 1); + phi::Vector batch_indices(batch_size + 1); int timesteps = X->dims()[1]; if (is_tensor) { for (int i = 0; i < batch_size + 1; i++) { @@ -411,7 +411,7 @@ class RowConvGradKernel : public framework::OpKernel { // int input_dim = X->dims()[1]; int num_sequence = batch_indices.size() - 1; int future_context = Filter->dims()[0]; - paddle::framework::MixVector mixv_batch_indices(&batch_indices); + phi::MixVector mixv_batch_indices(&batch_indices); size_t *idx = mixv_batch_indices.CUDAMutableData(context.GetPlace()); auto &device_ctx = context.cuda_device_context(); diff --git a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu index ee69333f924..fc78007338c 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu +++ b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu @@ -76,7 +76,7 @@ class SequenceEnumerateOpCUDAKernel : public framework::OpKernel { out->Resize({in_dims[0], win_size}); auto out_data = out->mutable_data(context.GetPlace()); // Copy LoD to GPU - paddle::framework::MixVector mixv_lod0(&lod0); + phi::MixVector mixv_lod0(&lod0); const size_t* dev_in_lod_ptr = mixv_lod0.CUDAData(context.GetPlace()); // Calc output tensor CalcOutPut<<<(in_len - 1) / PADDLE_CUDA_NUM_THREADS + 1, diff --git a/paddle/fluid/operators/sequence_ops/sequence_erase_op.cu b/paddle/fluid/operators/sequence_ops/sequence_erase_op.cu index b573df956df..5a07c7f3924 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_erase_op.cu +++ b/paddle/fluid/operators/sequence_ops/sequence_erase_op.cu @@ -97,7 +97,7 @@ class SequenceEraseOpCUDAKernel : public framework::OpKernel { // Copy LoD to GPU auto last_lod = lod[lod.size() - 1]; auto lod_len = last_lod.size(); - paddle::framework::MixVector mixv_last_lod(&last_lod); + phi::MixVector mixv_last_lod(&last_lod); const size_t* dev_in_lod_ptr = mixv_last_lod.CUDAData(ctx.GetPlace()); // Calc output LoD thrust::device_vector dev_out_lod(lod_len); diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu index d5fecace6d7..cd01e37fdcf 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu +++ b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu @@ -65,11 +65,10 @@ static __global__ void sequence_expand_as_grad_kernel( template struct SequenceExpandAsFunctor { - void operator()( - const phi::GPUContext &context, - const phi::DenseTensor &x, - const framework::Vector &ref_lod, /*expand referenced lod*/ - phi::DenseTensor *out) { + void operator()(const phi::GPUContext &context, + const phi::DenseTensor &x, + const phi::Vector &ref_lod, /*expand referenced lod*/ + phi::DenseTensor *out) { int height = x.dims()[0]; int width = phi::product(x.dims()) / height; @@ -84,7 +83,7 @@ struct SequenceExpandAsFunctor { dim3 block_size(thread_x); dim3 grid_size(block_x); - paddle::framework::MixVector mixv_ref_lod(&ref_lod); + phi::MixVector mixv_ref_lod(&ref_lod); sequence_expand_as_kernel<<>>( x.data(), mixv_ref_lod.CUDAData(context.GetPlace()), @@ -98,7 +97,7 @@ template struct SequenceExpandAsGradFunctor { void operator()(const phi::GPUContext &context, const phi::DenseTensor &dout, - const framework::Vector &ref_lod, /*expand based lod*/ + const phi::Vector &ref_lod, /*expand based lod*/ phi::DenseTensor *dx) { int height = dx->dims()[0]; int width = phi::product(dx->dims()) / height; @@ -114,7 +113,7 @@ struct SequenceExpandAsGradFunctor { dim3 block_size(thread_x); dim3 grid_size(block_x); - paddle::framework::MixVector mixv_ref_lod(&ref_lod); + phi::MixVector mixv_ref_lod(&ref_lod); sequence_expand_as_grad_kernel<< struct SequenceExpandAsFunctor { - void operator()( - const DeviceContext &ctx, - const phi::DenseTensor &x, - const framework::Vector &ref_lod, /*expand referenced lod*/ - phi::DenseTensor *out); + void operator()(const DeviceContext &ctx, + const phi::DenseTensor &x, + const phi::Vector &ref_lod, /*expand referenced lod*/ + phi::DenseTensor *out); }; template struct SequenceExpandAsGradFunctor { - void operator()( - const DeviceContext &ctx, - const phi::DenseTensor &dout, - const framework::Vector &ref_lod, /*expand referenced lod*/ - phi::DenseTensor *dx); + void operator()(const DeviceContext &ctx, + const phi::DenseTensor &dout, + const phi::Vector &ref_lod, /*expand referenced lod*/ + phi::DenseTensor *dx); }; template struct SequenceExpandAsFunctor { - void operator()( - const phi::CPUContext &context, - const phi::DenseTensor &x, - const framework::Vector &ref_lod, /*expand referenced lod*/ - phi::DenseTensor *out) { + void operator()(const phi::CPUContext &context, + const phi::DenseTensor &x, + const phi::Vector &ref_lod, /*expand referenced lod*/ + phi::DenseTensor *out) { int64_t height = x.dims()[0]; int64_t width = phi::product(x.dims()) / height; @@ -122,11 +119,10 @@ class SequenceExpandAsKernel : public framework::OpKernel { * */ template struct SequenceExpandAsGradFunctor { - void operator()( - const phi::CPUContext &context, - const phi::DenseTensor &dout, - const framework::Vector &ref_lod, /*expand referenced lod*/ - phi::DenseTensor *dx) { + void operator()(const phi::CPUContext &context, + const phi::DenseTensor &dout, + const phi::Vector &ref_lod, /*expand referenced lod*/ + phi::DenseTensor *dx) { int64_t height = dx->dims()[0]; int64_t width = phi::product(dx->dims()) / height; diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_op.cu b/paddle/fluid/operators/sequence_ops/sequence_expand_op.cu index e4ebd47878c..0272637b84b 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_expand_op.cu +++ b/paddle/fluid/operators/sequence_ops/sequence_expand_op.cu @@ -82,9 +82,9 @@ __global__ void sequence_expand_grad_kernel(const T* dout_data, } } -void GetOutputOffset(const framework::Vector& x_lod, - const framework::Vector& ref_lod, - framework::Vector* out_offset) { +void GetOutputOffset(const phi::Vector& x_lod, + const phi::Vector& ref_lod, + phi::Vector* out_offset) { size_t offset = 0; int lod_size = static_cast(x_lod.size()); for (int i = 0; i < static_cast(x_lod.size()); ++i) { @@ -99,8 +99,8 @@ template static int ExpandByMemoryCopy(const phi::GPUContext& context, const LoDTensor& x, LoDTensor* out, - const framework::Vector& x_lod, - const framework::Vector& ref_lod, + const phi::Vector& x_lod, + const phi::Vector& ref_lod, bool do_copy) { auto out_data = out->data(); auto x_data = x.data(); @@ -143,12 +143,11 @@ static int ExpandByMemoryCopy(const phi::GPUContext& context, template struct SequenceExpandFunctor { - void operator()( - const phi::GPUContext& context, - const LoDTensor& x, - const framework::Vector& x_lod, /*expand source lod*/ - const framework::Vector& ref_lod, /*expand referenced lod*/ - LoDTensor* out) { + void operator()(const phi::GPUContext& context, + const LoDTensor& x, + const phi::Vector& x_lod, /*expand source lod*/ + const phi::Vector& ref_lod, /*expand referenced lod*/ + LoDTensor* out) { int num_copys = ExpandByMemoryCopy(context, x, out, x_lod, ref_lod, false); // Sometimes direct copies will be faster, this maybe need deeply analysis. @@ -157,7 +156,7 @@ struct SequenceExpandFunctor { } else { int x_item_length = x.numel() / x.dims()[0]; size_t x_lod_size = x_lod.size(); - framework::Vector out_offset(x_lod_size * 2 + ref_lod.size()); + phi::Vector out_offset(x_lod_size * 2 + ref_lod.size()); GetOutputOffset(x_lod, ref_lod, &out_offset); for (size_t i = 0; i < x_lod_size; ++i) { @@ -167,7 +166,7 @@ struct SequenceExpandFunctor { out_offset[2 * x_lod_size + i] = ref_lod[i]; } - paddle::framework::MixVector mixv_out_offset(&out_offset); + phi::MixVector mixv_out_offset(&out_offset); const size_t* out_offset_data = mixv_out_offset.CUDAData(context.GetPlace()); const size_t* x_lod_data = out_offset_data + x_lod_size; @@ -197,11 +196,11 @@ template struct SequenceExpandGradFunctor { void operator()(const phi::GPUContext& context, const LoDTensor& dout, - const framework::Vector& x_lod, /*expand source lod*/ - const framework::Vector& ref_lod, /*expand based lod*/ + const phi::Vector& x_lod, /*expand source lod*/ + const phi::Vector& ref_lod, /*expand based lod*/ LoDTensor* dx) { int x_item_length = phi::product(dx->dims()) / dx->dims()[0]; - framework::Vector out_offset(x_lod.size()); + phi::Vector out_offset(x_lod.size()); GetOutputOffset(x_lod, ref_lod, &out_offset); int thread_x = std::min(32, std::max(static_cast(ref_lod.size()), 16)); @@ -210,9 +209,9 @@ struct SequenceExpandGradFunctor { int block_x = static_cast(ref_lod.size()); dim3 block_size(thread_x, thread_y, thread_z); dim3 grid_size(block_x, 1); - paddle::framework::MixVector mixv_ref_lod(&ref_lod); - paddle::framework::MixVector mixv_x_lod(&x_lod); - paddle::framework::MixVector mixv_out_offset(&out_offset); + phi::MixVector mixv_ref_lod(&ref_lod); + phi::MixVector mixv_x_lod(&x_lod); + phi::MixVector mixv_out_offset(&out_offset); sequence_expand_grad_kernel<<>>( dout.data(), mixv_ref_lod.CUDAData(context.GetPlace()), diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_op.h b/paddle/fluid/operators/sequence_ops/sequence_expand_op.h index 1366fe87ab3..7a7a6f7b3e7 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_expand_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_expand_op.h @@ -29,32 +29,29 @@ using EigenMatrix = phi::EigenMatrix; template struct SequenceExpandFunctor { - void operator()( - const DeviceContext& ctx, - const phi::DenseTensor& x, - const framework::Vector& x_lod, /*expand source lod*/ - const framework::Vector& ref_lod, /*expand referenced lod*/ - phi::DenseTensor* out); + void operator()(const DeviceContext& ctx, + const phi::DenseTensor& x, + const phi::Vector& x_lod, /*expand source lod*/ + const phi::Vector& ref_lod, /*expand referenced lod*/ + phi::DenseTensor* out); }; template struct SequenceExpandGradFunctor { - void operator()( - const DeviceContext& ctx, - const phi::DenseTensor& dout, - const framework::Vector& x_lod, /*expand source lod*/ - const framework::Vector& ref_lod, /*expand referenced lod*/ - phi::DenseTensor* dx); + void operator()(const DeviceContext& ctx, + const phi::DenseTensor& dout, + const phi::Vector& x_lod, /*expand source lod*/ + const phi::Vector& ref_lod, /*expand referenced lod*/ + phi::DenseTensor* dx); }; template struct SequenceExpandFunctor { - void operator()( - const phi::CPUContext& context, - const phi::DenseTensor& x, - const framework::Vector& x_lod, /*expand source lod*/ - const framework::Vector& ref_lod, /*expand referenced lod*/ - phi::DenseTensor* out) { + void operator()(const phi::CPUContext& context, + const phi::DenseTensor& x, + const phi::Vector& x_lod, /*expand source lod*/ + const phi::Vector& ref_lod, /*expand referenced lod*/ + phi::DenseTensor* out) { int out_offset = 0; int x_item_length = x.numel() / x.dims()[0]; auto out_data = out->data(); @@ -112,7 +109,7 @@ class SequenceExpandKernel : public framework::OpKernel { } // x lod level is at most 1. - framework::Vector out_lod; + phi::Vector out_lod; if (x_lod.size() == 1) { out_lod.push_back(0); int out_offset = 0; @@ -130,7 +127,7 @@ class SequenceExpandKernel : public framework::OpKernel { auto& ref_lod = *out->mutable_lod(); ref_lod[0] = out_lod; } - framework::Vector ref_x_lod; + phi::Vector ref_x_lod; if (x->lod().size() == 1) { ref_x_lod = x->lod()[0]; } else { @@ -161,12 +158,11 @@ class SequenceExpandKernel : public framework::OpKernel { * */ template struct SequenceExpandGradFunctor { - void operator()( - const phi::CPUContext& context, - const phi::DenseTensor& dout, - const framework::Vector& x_lod, /*expand source lod*/ - const framework::Vector& ref_lod, /*expand referenced lod*/ - phi::DenseTensor* dx) { + void operator()(const phi::CPUContext& context, + const phi::DenseTensor& dout, + const phi::Vector& x_lod, /*expand source lod*/ + const phi::Vector& ref_lod, /*expand referenced lod*/ + phi::DenseTensor* dx) { int dout_offset = 0; for (size_t i = 1; i < ref_lod.size(); ++i) { int repeat_num = ref_lod[i] - ref_lod[i - 1]; @@ -214,8 +210,8 @@ class SequenceExpandGradKernel : public framework::OpKernel { return; } - framework::Vector ref_x_lod; - framework::Vector ref_lod = y_lod[ref_level]; + phi::Vector ref_x_lod; + phi::Vector ref_lod = y_lod[ref_level]; if (x->lod().size() == 1) { ref_x_lod = x->lod()[0]; } else { diff --git a/paddle/fluid/operators/sequence_ops/sequence_reverse_op.h b/paddle/fluid/operators/sequence_ops/sequence_reverse_op.h index bc95daa1422..24e0ee2d11a 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_reverse_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_reverse_op.h @@ -139,7 +139,7 @@ class SequenceReverseOpKernel : public framework::OpKernel { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) if (platform::is_gpu_place(ctx.GetPlace())) { auto xlod = x.lod()[0]; - paddle::framework::MixVector mixv_xlod(&xlod); + phi::MixVector mixv_xlod(&xlod); lod = mixv_xlod.CUDAData(ctx.GetPlace()); } else { #endif diff --git a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu index e58cff60aea..3ff7793a195 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu +++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu @@ -124,7 +124,7 @@ template struct SequenceSoftmaxFunctor { void operator()(const phi::GPUContext &context, const LoDTensor &x, - const framework::Vector &ref_lod, /*referenced lod*/ + const phi::Vector &ref_lod, /*referenced lod*/ LoDTensor *out) { int height = ref_lod.size() - 1; @@ -135,7 +135,7 @@ struct SequenceSoftmaxFunctor { dim3 block_size(thread_x); dim3 grid_size(max_blocks); - paddle::framework::MixVector mixv_ref_lod(&ref_lod); + phi::MixVector mixv_ref_lod(&ref_lod); sequence_softmax_kernel <<>>( x.data(), @@ -150,7 +150,7 @@ struct SequenceSoftmaxGradFunctor { void operator()(const phi::GPUContext &context, const LoDTensor &dout, const LoDTensor &out, - const framework::Vector &ref_lod, /*referenced lod*/ + const phi::Vector &ref_lod, /*referenced lod*/ LoDTensor *dx) { size_t height = ref_lod.size() - 1; @@ -162,7 +162,7 @@ struct SequenceSoftmaxGradFunctor { dim3 block_size(thread_x); dim3 grid_size(max_blocks); - paddle::framework::MixVector mixv_ref_lod(&ref_lod); + phi::MixVector mixv_ref_lod(&ref_lod); sequence_softmax_grad_kernel <<>>( dout.data(), diff --git a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h index 03036a0babf..dfa5919dc0b 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h @@ -21,11 +21,10 @@ namespace operators { template struct SequenceSoftmaxFunctor { - void operator()( - const DeviceContext &ctx, - const phi::DenseTensor &x, - const framework::Vector &ref_lod, /*expand referenced lod*/ - phi::DenseTensor *out); + void operator()(const DeviceContext &ctx, + const phi::DenseTensor &x, + const phi::Vector &ref_lod, /*expand referenced lod*/ + phi::DenseTensor *out); }; template @@ -33,7 +32,7 @@ struct SequenceSoftmaxGradFunctor { void operator()(const DeviceContext &ctx, const phi::DenseTensor &dout, const phi::DenseTensor &out, - const framework::Vector &ref_lod, /*referenced lod*/ + const phi::Vector &ref_lod, /*referenced lod*/ phi::DenseTensor *dx); }; @@ -41,7 +40,7 @@ template struct SequenceSoftmaxFunctor { void operator()(const phi::CPUContext &ctx, const phi::DenseTensor &x, - const framework::Vector &ref_lod, /*referenced lod*/ + const phi::Vector &ref_lod, /*referenced lod*/ phi::DenseTensor *out) { size_t height = ref_lod.size() - 1; const T *in_data = x.data(); @@ -64,7 +63,7 @@ struct SequenceSoftmaxGradFunctor { void operator()(const phi::CPUContext &ctx, const phi::DenseTensor &dout, const phi::DenseTensor &out, - const framework::Vector &ref_lod, /*referenced lod*/ + const phi::Vector &ref_lod, /*referenced lod*/ phi::DenseTensor *dx) { size_t height = ref_lod.size() - 1; diff --git a/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.h b/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.h index 5de8c56f3c6..bb7cd5c1b0b 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.h @@ -116,7 +116,7 @@ class SequenceTopkAvgPoolingKernel : public framework::OpKernel { auto pos_data = pos->mutable_data(context.GetPlace()); int offset = 0; - framework::Vector vec_out_lod; + phi::Vector vec_out_lod; vec_out_lod.reserve(batch_size + 1); for (int i = 0; i <= batch_size; ++i) { offset = row_lod[i]; diff --git a/paddle/fluid/operators/shuffle_batch_op.h b/paddle/fluid/operators/shuffle_batch_op.h index 4bc1289bf46..fbf961ab225 100644 --- a/paddle/fluid/operators/shuffle_batch_op.h +++ b/paddle/fluid/operators/shuffle_batch_op.h @@ -25,16 +25,16 @@ #include "glog/logging.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/platform/timer.h" +#include "paddle/phi/core/mixed_vector.h" namespace paddle { namespace operators { template -using Vector = framework::Vector; +using Vector = phi::Vector; template class ShuffleBatchKernel : public framework::OpKernel { diff --git a/paddle/fluid/operators/tdm_child_op.h b/paddle/fluid/operators/tdm_child_op.h index b41453b849b..0064567887e 100644 --- a/paddle/fluid/operators/tdm_child_op.h +++ b/paddle/fluid/operators/tdm_child_op.h @@ -22,8 +22,8 @@ #include #include "gflags/gflags.h" -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/phi/core/mixed_vector.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/tdm_sampler_op.h b/paddle/fluid/operators/tdm_sampler_op.h index 1ba0e2c66be..1b0e92b4d9b 100644 --- a/paddle/fluid/operators/tdm_sampler_op.h +++ b/paddle/fluid/operators/tdm_sampler_op.h @@ -22,9 +22,9 @@ #include #include "gflags/gflags.h" -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/sampler.h" +#include "paddle/phi/core/mixed_vector.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/pybind/tensor.cc b/paddle/fluid/pybind/tensor.cc index 570920022e8..aa42a932357 100644 --- a/paddle/fluid/pybind/tensor.cc +++ b/paddle/fluid/pybind/tensor.cc @@ -1095,7 +1095,7 @@ void BindTensor(pybind11::module &m) { // NOLINT #if !defined(PADDLE_WITH_CUDA) && !defined(PADDLE_WITH_HIP) self.set_rows(rows); #else - Vector new_rows(rows); + std::vector new_rows(rows); self.set_rows(new_rows); #endif }) diff --git a/paddle/phi/core/CMakeLists.txt b/paddle/phi/core/CMakeLists.txt index e47e3a731c4..85df40cc47c 100644 --- a/paddle/phi/core/CMakeLists.txt +++ b/paddle/phi/core/CMakeLists.txt @@ -114,6 +114,11 @@ cc_library( SRCS custom_kernel.cc DEPS kernel_factory) +cc_library( + mixed_vector + SRCS mixed_vector.cc + DEPS device_context place memory) + # Will remove once we implemented MKLDNN_Tensor if(WITH_MKLDNN) add_dependencies(dense_tensor mkldnn) diff --git a/paddle/fluid/framework/mixed_vector.cc b/paddle/phi/core/mixed_vector.cc similarity index 79% rename from paddle/fluid/framework/mixed_vector.cc rename to paddle/phi/core/mixed_vector.cc index c3c3581a6a7..eb1e34e7b23 100644 --- a/paddle/fluid/framework/mixed_vector.cc +++ b/paddle/phi/core/mixed_vector.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/mixed_vector.h" +#include "paddle/phi/core/mixed_vector.h" #include #include @@ -22,28 +22,26 @@ limitations under the License. */ #include #include "glog/logging.h" -#include "paddle/fluid/framework/details/cow_ptr.h" #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/memory/memcpy.h" -#include "paddle/fluid/platform/device_context.h" +#include "paddle/phi/backends/all_context.h" #include "paddle/utils/none.h" #include "paddle/utils/optional.h" -namespace paddle { -namespace framework { +namespace phi { template void CopyToCPUHelper(std::vector *cpu_, - paddle::memory::AllocationPtr *gpu_, + phi::Allocator::AllocationPtr *gpu_, size_t *gpu_memory_size_) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) // COPY GPU Data To CPU auto *dev_ctx = static_cast( - platform::DeviceContextPool::Instance().Get((*gpu_)->place())); + phi::DeviceContextPool::Instance().Get((*gpu_)->place())); auto stream = dev_ctx->stream(); void *src = (*gpu_)->ptr(); void *dst = cpu_->data(); - paddle::memory::Copy(platform::CPUPlace(), + paddle::memory::Copy(phi::CPUPlace(), dst, OptionalCUDAPlace(*gpu_).get(), src, @@ -55,20 +53,20 @@ void CopyToCPUHelper(std::vector *cpu_, template void CopyCPUDataToCUDAHelper(std::vector *cpu_, - paddle::memory::AllocationPtr *gpu_, + phi::Allocator::AllocationPtr *gpu_, size_t *gpu_memory_size_, - const platform::Place &place) { + const phi::Place &place) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) void *src = cpu_->data(); *gpu_memory_size_ = cpu_->size() * sizeof(T); // sizeof(T) - (*gpu_) = memory::Alloc(place, *gpu_memory_size_); + (*gpu_) = paddle::memory::Alloc(place, *gpu_memory_size_); void *dst = (*gpu_)->ptr(); auto *dev_ctx = static_cast( - platform::DeviceContextPool::Instance().Get(place)); + phi::DeviceContextPool::Instance().Get(place)); auto stream = dev_ctx->stream(); paddle::memory::Copy(OptionalCUDAPlace(*gpu_).get(), dst, - platform::CPUPlace(), + phi::CPUPlace(), src, *gpu_memory_size_, stream); @@ -84,7 +82,7 @@ void CopyCPUDataToCUDAHelper(std::vector *cpu_, \ template <> \ void MixVector<__TYPE__>::VectorData::CopyCPUDataToCUDA( \ - const platform::Place &place) const { \ + const phi::Place &place) const { \ CopyCPUDataToCUDAHelper<__TYPE__>(cpu_, &gpu_, &gpu_memory_size_, place); \ } @@ -92,5 +90,4 @@ INSTANTIATE_VECTOR_FOR_TYPE(size_t) INSTANTIATE_VECTOR_FOR_TYPE(int) INSTANTIATE_VECTOR_FOR_TYPE(int64_t) -}; // namespace framework -} // namespace paddle +}; // namespace phi diff --git a/paddle/fluid/framework/mixed_vector.h b/paddle/phi/core/mixed_vector.h similarity index 87% rename from paddle/fluid/framework/mixed_vector.h rename to paddle/phi/core/mixed_vector.h index f94bff41472..d25a646608d 100644 --- a/paddle/fluid/framework/mixed_vector.h +++ b/paddle/phi/core/mixed_vector.h @@ -22,20 +22,22 @@ limitations under the License. */ #include #include "glog/logging.h" -#include "paddle/fluid/memory/allocation/allocator.h" +#include "paddle/phi/common/place.h" +#include "paddle/phi/core/allocator.h" +#include "paddle/phi/core/enforce.h" +#include "paddle/phi/core/errors.h" #include "paddle/utils/none.h" #include "paddle/utils/optional.h" -namespace paddle { -namespace framework { +namespace phi { template using Vector = std::vector; -inline paddle::optional OptionalCUDAPlace( - const paddle::memory::allocation::AllocationPtr &gpu_) { +inline paddle::optional OptionalCUDAPlace( + const phi::Allocator::AllocationPtr &gpu_) { return gpu_ == nullptr ? paddle::none - : paddle::optional(gpu_->place()); + : paddle::optional(gpu_->place()); } // Vector implements the std::vector interface, and can get Data or @@ -146,18 +148,18 @@ class MixVector { } // get cuda ptr. immutable - const T *CUDAData(platform::Place place) const { + const T *CUDAData(phi::Place place) const { PADDLE_ENFORCE_EQ( - platform::is_gpu_place(place), + place.GetType() == phi::AllocationType::GPU, true, - platform::errors::Unavailable( + phi::errors::Unavailable( "Place mismatch, CUDA Data must be on CUDA place.")); ImmutableCUDA(place); return reinterpret_cast(gpu_->ptr()); } // get cuda ptr. mutable - T *CUDAMutableData(platform::Place place) { + T *CUDAMutableData(phi::Place place) { const T *ptr = CUDAData(place); flag_ = kDirty | kDataInCUDA; return const_cast(ptr); @@ -178,7 +180,7 @@ class MixVector { std::mutex &Mutex() const { return mtx_; } - paddle::optional CUDAPlace() const { + paddle::optional CUDAPlace() const { return OptionalCUDAPlace(gpu_); } @@ -199,7 +201,7 @@ class MixVector { void CopyToCPU() const; - void ImmutableCUDA(platform::Place place) const { + void ImmutableCUDA(phi::Place place) const { if (IsDirty()) { if (IsInCPU()) { CopyCPUDataToCUDA(place); @@ -207,7 +209,7 @@ class MixVector { SetFlag(kDataInCUDA); } else if (IsInCUDA() && !(place == gpu_->place())) { PADDLE_THROW( - platform::errors::Unavailable("Unexpected data place mismatch.")); + phi::errors::Unavailable("Unexpected data place mismatch.")); // Still dirty } else { // Dirty && DataInCUDA && Device is same @@ -220,7 +222,7 @@ class MixVector { SetFlag(kDataInCUDA); } else if (!(place == gpu_->place())) { PADDLE_THROW( - platform::errors::Unavailable("Unexpected data place mismatch.")); + phi::errors::Unavailable("Unexpected data place mismatch.")); } else { // Not Dirty && DataInCUDA && Device is same // Do nothing. @@ -228,7 +230,7 @@ class MixVector { } } - void CopyCPUDataToCUDA(const platform::Place &place) const; + void CopyCPUDataToCUDA(const phi::Place &place) const; void ImmutableCPU() const { if (IsDirty() && !IsInCPU()) { // If data has been changed in CUDA, or @@ -249,7 +251,7 @@ class MixVector { bool IsInCPU() const { return flag_ & kDataInCPU; } std::vector *cpu_; - mutable paddle::memory::allocation::AllocationPtr gpu_; + mutable phi::Allocator::AllocationPtr gpu_; mutable size_t gpu_memory_size_{0}; mutable int flag_; @@ -332,9 +334,9 @@ class MixVector { } // get cuda ptr. immutable - const T *CUDAData(platform::Place place) const { + const T *CUDAData(phi::Place place) const { { - platform::CUDAPlace p(place.GetDeviceId()); + phi::GPUPlace p(place.GetDeviceId()); auto &mtx = m_->Mutex(); std::lock_guard guard(mtx); auto cuda_place = m_->CUDAPlace(); @@ -348,9 +350,9 @@ class MixVector { } // get cuda ptr. mutable - T *CUDAMutableData(platform::Place place) { + T *CUDAMutableData(phi::Place place) { { - platform::CUDAPlace p(place.GetDeviceId()); + phi::GPUPlace p(place.GetDeviceId()); auto &mtx = m_->Mutex(); std::lock_guard guard(mtx); auto cuda_place = m_->CUDAPlace(); @@ -372,8 +374,8 @@ class MixVector { void reserve(size_t size) { m_->reserve(size); } // the unify method to access CPU or CUDA data. immutable. - const T *Data(platform::Place place) const { - if (platform::is_gpu_place(place)) { + const T *Data(phi::Place place) const { + if (place.GetType() == phi::AllocationType::GPU) { return CUDAData(place); } else { return data(); @@ -381,8 +383,8 @@ class MixVector { } // the unify method to access CPU or CUDA data. mutable. - T *MutableData(platform::Place place) { - if (platform::is_gpu_place(place)) { + T *MutableData(phi::Place place) { + if (place.GetType() == phi::AllocationType::GPU) { return CUDAMutableData(place); } else { return data(); @@ -397,5 +399,4 @@ class MixVector { mutable std::unique_ptr m_; }; -}; // namespace framework -} // namespace paddle +}; // namespace phi diff --git a/paddle/phi/kernels/cpu/edit_distance_kernel.cc b/paddle/phi/kernels/cpu/edit_distance_kernel.cc index 735086ba0ed..190bc3fa552 100644 --- a/paddle/phi/kernels/cpu/edit_distance_kernel.cc +++ b/paddle/phi/kernels/cpu/edit_distance_kernel.cc @@ -14,10 +14,10 @@ #include "paddle/phi/kernels/edit_distance_kernel.h" -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/complex.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/mixed_vector.h" #include "paddle/phi/kernels/funcs/eigen/common.h" namespace phi { @@ -34,8 +34,8 @@ void EditDistanceKernel(const Context& ctx, int64_t* seq_num_data = ctx.template Alloc(sequencenum); auto batch_size = hyps.dims()[0]; - paddle::framework::Vector hyp_lod(batch_size + 1); - paddle::framework::Vector ref_lod(batch_size + 1); + phi::Vector hyp_lod(batch_size + 1); + phi::Vector ref_lod(batch_size + 1); bool use_length = hypslength.get_ptr() != nullptr; diff --git a/paddle/phi/kernels/funcs/selected_rows_functor.cc b/paddle/phi/kernels/funcs/selected_rows_functor.cc index fb087660612..e2a3513a428 100644 --- a/paddle/phi/kernels/funcs/selected_rows_functor.cc +++ b/paddle/phi/kernels/funcs/selected_rows_functor.cc @@ -14,8 +14,8 @@ limitations under the License. */ #include "paddle/phi/kernels/funcs/selected_rows_functor.h" -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/platform/device/device_wrapper.h" +#include "paddle/phi/core/mixed_vector.h" #ifdef PADDLE_WITH_MKLDNN #include "paddle/phi/backends/onednn/axpy_handler.h" @@ -200,7 +200,7 @@ struct SelectedRowsAddTo { auto* in2_value = input2->mutable_value(); // concat rows - paddle::framework::MixVector mixv_in2_rows(&in2_rows); + phi::MixVector mixv_in2_rows(&in2_rows); mixv_in2_rows.Extend(in1_rows.begin(), in1_rows.end()); auto in1_place = input1.place(); @@ -254,7 +254,7 @@ struct SelectedRowsSumTo { std::vector in2_rows; in2_rows.reserve(in2_rows.size() + size); for (auto iter = input1.begin(); iter != input1.end(); ++iter) { - const paddle::framework::Vector& in_rows = (*iter)->rows(); + const phi::Vector& in_rows = (*iter)->rows(); in2_rows.insert(in2_rows.end(), in_rows.begin(), in_rows.end()); } input2->set_rows(in2_rows); @@ -646,7 +646,7 @@ struct MergeAdd { const phi::SelectedRows& input, phi::SelectedRows* output, const bool sorted_result = false) { - paddle::framework::Vector input_rows(input.rows()); + phi::Vector input_rows(input.rows()); if (input_rows.size() == 0) { return; } diff --git a/paddle/phi/kernels/funcs/selected_rows_functor.cu b/paddle/phi/kernels/funcs/selected_rows_functor.cu index 8f409466e19..91f186415fe 100644 --- a/paddle/phi/kernels/funcs/selected_rows_functor.cu +++ b/paddle/phi/kernels/funcs/selected_rows_functor.cu @@ -40,7 +40,7 @@ struct SelectedRowsAdd { input2.height())); output->set_height(in1_height); - paddle::framework::Vector in1_rows(input1.rows()); + phi::Vector in1_rows(input1.rows()); auto& in2_rows = input2.rows(); std::vector out_rows; out_rows.reserve(in1_rows.size() + in2_rows.size()); @@ -189,7 +189,7 @@ struct SelectedRowsAddTensor { const int block_size = 256; dim3 threads(block_size, 1); dim3 grid(in1_rows.size(), 1); - paddle::framework::MixVector mixv_in1_rows(&in1_rows); + phi::MixVector mixv_in1_rows(&in1_rows); SelectedRowsAddTensorKernel <<>>( in1_data, @@ -231,7 +231,7 @@ struct SelectedRowsAddTo { auto* in2_value = input2->mutable_value(); // concat rows - paddle::framework::MixVector mixv_in2_rows(&in2_rows); + phi::MixVector mixv_in2_rows(&in2_rows); if (in1_rows.size()) { mixv_in2_rows.Extend(in1_rows.begin(), in1_rows.end()); } @@ -318,7 +318,7 @@ struct SelectedRowsAddToTensor { const int block_size = 256; dim3 threads(block_size, 1); dim3 grid(in1_rows.size(), 1); - paddle::framework::MixVector mixv_in1_rows(&in1_rows); + phi::MixVector mixv_in1_rows(&in1_rows); SelectedRowsAddToTensorKernel <<>>( in1_data, @@ -378,7 +378,7 @@ struct MergeAddImpl { const phi::SelectedRows& input, phi::SelectedRows* output, const bool sorted_result = false) { - paddle::framework::Vector input_rows(input.rows()); + phi::Vector input_rows(input.rows()); if (input_rows.size() == 0) { return; } @@ -386,7 +386,7 @@ struct MergeAddImpl { phi::SelectedRows& out = *output; std::set row_set(input_rows.begin(), input_rows.end()); std::vector merge_rows_cpu(row_set.begin(), row_set.end()); - paddle::framework::Vector merge_rows(merge_rows_cpu); + phi::Vector merge_rows(merge_rows_cpu); auto input_width = input.value().dims()[1]; @@ -407,8 +407,8 @@ struct MergeAddImpl { dim3 threads(block_size, 1); dim3 grid1(input_rows.size(), 1); - paddle::framework::MixVector mix_vector_input(&input_rows); - paddle::framework::MixVector mix_vector_out(out.mutable_rows()); + phi::MixVector mix_vector_input(&input_rows); + phi::MixVector mix_vector_out(out.mutable_rows()); MergeAddKernel<<>>( input_data, mix_vector_input.CUDAData(context.GetPlace()), @@ -459,7 +459,7 @@ struct MergeAddImpl { } std::vector merge_rows_cpu(merged_row_set.begin(), merged_row_set.end()); - paddle::framework::Vector merge_rows(merge_rows_cpu); + phi::Vector merge_rows(merge_rows_cpu); out.set_rows(merge_rows); out.set_height(input_height); @@ -485,8 +485,8 @@ struct MergeAddImpl { auto& input_rows = input->rows(); dim3 grid1(input_rows.size(), 1); - paddle::framework::MixVector mix_vector_input(&input_rows); - paddle::framework::MixVector mix_vector_out(out.mutable_rows()); + phi::MixVector mix_vector_input(&input_rows); + phi::MixVector mix_vector_out(out.mutable_rows()); MergeAddKernel<<>>( input_data, mix_vector_input.CUDAData(context.GetPlace()), diff --git a/paddle/phi/kernels/funcs/sequence2batch.cc b/paddle/phi/kernels/funcs/sequence2batch.cc index 302dd6ec6ac..11a687cdeaf 100644 --- a/paddle/phi/kernels/funcs/sequence2batch.cc +++ b/paddle/phi/kernels/funcs/sequence2batch.cc @@ -22,7 +22,7 @@ class CopyMatrixRowsFunctor { public: void operator()(const phi::CPUContext& context, const phi::DenseTensor& src, - paddle::framework::Vector index_lod, + phi::Vector index_lod, phi::DenseTensor* dst, bool is_src_index) { size_t* index = index_lod.data(); diff --git a/paddle/phi/kernels/funcs/sequence2batch.cu b/paddle/phi/kernels/funcs/sequence2batch.cu index 6c8ec9bca01..f743f1b33dd 100644 --- a/paddle/phi/kernels/funcs/sequence2batch.cu +++ b/paddle/phi/kernels/funcs/sequence2batch.cu @@ -43,7 +43,7 @@ class CopyMatrixRowsFunctor { public: void operator()(const phi::GPUContext& context, const phi::DenseTensor& src, - paddle::framework::Vector index_lod, + phi::Vector index_lod, phi::DenseTensor* dst, bool is_src_index) { auto src_dims = src.dims(); @@ -79,7 +79,7 @@ class CopyMatrixRowsFunctor { dim3 threads(128, 8); dim3 grid(8, 1); auto stream = context.stream(); - paddle::framework::MixVector mix_index_lod(&index_lod); + phi::MixVector mix_index_lod(&index_lod); CopyMatrixRowsKernel<<>>( src_data, dst_data, diff --git a/paddle/phi/kernels/funcs/sequence2batch.h b/paddle/phi/kernels/funcs/sequence2batch.h index 4fba1d0709f..31fbad6d2f3 100644 --- a/paddle/phi/kernels/funcs/sequence2batch.h +++ b/paddle/phi/kernels/funcs/sequence2batch.h @@ -38,7 +38,7 @@ class CopyMatrixRowsFunctor { // The indexed rows are based on the input index. void operator()(const DeviceContext& context, const phi::DenseTensor& src, - paddle::framework::Vector index_lod, + phi::Vector index_lod, phi::DenseTensor* dst, bool is_src_index); }; diff --git a/paddle/phi/kernels/funcs/sequence_scale.cu b/paddle/phi/kernels/funcs/sequence_scale.cu index 06d8db04be6..8feea62a3d7 100644 --- a/paddle/phi/kernels/funcs/sequence_scale.cu +++ b/paddle/phi/kernels/funcs/sequence_scale.cu @@ -46,7 +46,7 @@ class ScaleLoDTensorFunctor { const size_t seq_width = seq->numel() / seq->dims()[0]; auto abs_offset_lod = paddle::framework::ToAbsOffset(lod); T* seq_data = context.template Alloc(seq); - paddle::framework::MixVector mix_vector(&(abs_offset_lod[level])); + phi::MixVector mix_vector(&(abs_offset_lod[level])); #ifdef PADDLE_WITH_HIP hipLaunchKernelGGL( diff --git a/paddle/phi/kernels/gpu/adagrad_kernel.cu b/paddle/phi/kernels/gpu/adagrad_kernel.cu index 53169c8bcfb..8f3d7a3c558 100644 --- a/paddle/phi/kernels/gpu/adagrad_kernel.cu +++ b/paddle/phi/kernels/gpu/adagrad_kernel.cu @@ -88,7 +88,7 @@ struct SparseAdagradFunctor { phi::funcs::scatter::MergeAdd merge_func; auto grad_merge = merge_func(context, grad); auto* grad_merge_data = grad_merge.mutable_value()->template data(); - paddle::framework::Vector merge_rows(grad_merge.rows()); + phi::Vector merge_rows(grad_merge.rows()); // 2. m += g_m * g_m auto grad_square = SquareSelectedRows(context, grad_merge); @@ -104,7 +104,7 @@ struct SparseAdagradFunctor { const int block_size = 256; dim3 threads(block_size, 1); dim3 grid2(1, merge_rows.size()); - paddle::framework::MixVector mixv_merge_rows(&merge_rows); + phi::MixVector mixv_merge_rows(&merge_rows); SparseAdagradFunctorKernel <<(ctx).stream(); - paddle::framework::Vector hyp_lod(batch_size + 1); - paddle::framework::Vector ref_lod(batch_size + 1); + phi::Vector hyp_lod(batch_size + 1); + phi::Vector ref_lod(batch_size + 1); bool use_length = hypslength.get_ptr() != nullptr; diff --git a/paddle/phi/kernels/gpu/embedding_grad_kernel.cu b/paddle/phi/kernels/gpu/embedding_grad_kernel.cu index 8bb00f07592..bad7019a608 100644 --- a/paddle/phi/kernels/gpu/embedding_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/embedding_grad_kernel.cu @@ -14,12 +14,12 @@ #include "paddle/phi/kernels/embedding_grad_kernel.h" -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/memory/memcpy.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/mixed_vector.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/embedding_util.h" @@ -173,11 +173,11 @@ struct EmbeddingSparseGradCUDAFunctor { dim3 threads(128, 8); dim3 grids(8, 1); auto stream = dev_ctx_.stream(); - paddle::framework::Vector new_rows; + phi::Vector new_rows; new_rows.resize(ids_num); auto gpu_place = dev_ctx_.GetPlace(); - paddle::framework::MixVector mixv_new_rows(&new_rows); + phi::MixVector mixv_new_rows(&new_rows); if (!std::is_same::value) { InputTypeConvert<<>>( ids_data, ids_num, mixv_new_rows.MutableData(gpu_place)); diff --git a/paddle/phi/kernels/gpu/sgd_kernel.cu b/paddle/phi/kernels/gpu/sgd_kernel.cu index b7cf9e5badc..73115a58fa9 100644 --- a/paddle/phi/kernels/gpu/sgd_kernel.cu +++ b/paddle/phi/kernels/gpu/sgd_kernel.cu @@ -14,12 +14,12 @@ #include "paddle/phi/kernels/sgd_kernel.h" -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_helper.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" #include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/mixed_vector.h" namespace phi { @@ -156,7 +156,7 @@ void SGDDenseParamSparseGradKernel( int thread_x = kThreadsPerBlock; int max_threads = dev_ctx.GetMaxPhysicalThreadCount(); int max_blocks = std::max(max_threads / kThreadsPerBlock, 1); - paddle::framework::MixVector mixv_in_rows(&in_rows); + phi::MixVector mixv_in_rows(&in_rows); SparseSGDFunctorKernel<<>>( in_data, mixv_in_rows.CUDAData(dev_ctx.GetPlace()), diff --git a/paddle/phi/kernels/impl/momentum_kernel_impl.h b/paddle/phi/kernels/impl/momentum_kernel_impl.h index f3e22e5d944..332787740af 100644 --- a/paddle/phi/kernels/impl/momentum_kernel_impl.h +++ b/paddle/phi/kernels/impl/momentum_kernel_impl.h @@ -551,7 +551,7 @@ void MomentumSparseImpl(const Context& ctx, merge_func(ctx, grad, merged_grad); auto* grad_merge_rows = merged_grad->mutable_rows(); - paddle::framework::MixVector mixv_grad_merge_rows(grad_merge_rows); + phi::MixVector mixv_grad_merge_rows(grad_merge_rows); const int64_t* rows = mixv_grad_merge_rows.Data(ctx.GetPlace()); int64_t row_numel = merged_grad->value().numel() / merged_grad->rows().size(); funcs::ForRange for_range(ctx, param.numel()); diff --git a/paddle/phi/kernels/impl/rmsprop_kernel_impl.h b/paddle/phi/kernels/impl/rmsprop_kernel_impl.h index f2a56ff6b8e..a0cb0a887b6 100644 --- a/paddle/phi/kernels/impl/rmsprop_kernel_impl.h +++ b/paddle/phi/kernels/impl/rmsprop_kernel_impl.h @@ -309,7 +309,7 @@ void RmspropSparseKernel(const Context &ctx, funcs::ForRange for_range(ctx, limit); auto &grad_merge_rows = merged_grad->rows(); - paddle::framework::MixVector mixv_grad_merge_rows(&grad_merge_rows); + phi::MixVector mixv_grad_merge_rows(&grad_merge_rows); const int64_t *rows = mixv_grad_merge_rows.Data(ctx.GetPlace()); auto &merged_tensor = merged_grad->value(); diff --git a/paddle/phi/kernels/impl/warpctc_kernel_impl.h b/paddle/phi/kernels/impl/warpctc_kernel_impl.h index d2fc934c71b..baabf8465b7 100644 --- a/paddle/phi/kernels/impl/warpctc_kernel_impl.h +++ b/paddle/phi/kernels/impl/warpctc_kernel_impl.h @@ -236,8 +236,8 @@ void WarpctcKernel(const Context& dev_ctx, DenseTensor* loss, DenseTensor* warpctcgrad) { size_t num_sequences, sequence_width, max_sequence_length; - paddle::framework::Vector logits_lod; - paddle::framework::Vector label_lod; + phi::Vector logits_lod; + phi::Vector label_lod; if (logits_length.is_initialized() && labels_length.is_initialized()) { num_sequences = logits.dims()[1]; sequence_width = logits.dims()[2]; @@ -397,7 +397,7 @@ void WarpctcKernel(const Context& dev_ctx, paddle::operators::math::TotalSequenceLength(label_lod)), 1}); dev_ctx.template HostAlloc(&warpctc_label); - std::vector> lod; + std::vector> lod; lod.push_back(label_lod); warpctc_label.set_lod(lod); diff --git a/paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc b/paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc index b58bcd02580..b9bc98425c9 100644 --- a/paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc +++ b/paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc @@ -126,7 +126,7 @@ void AdamDenseParamSparseGradKernel( auto& grad_tensor = grad_merge.value(); const T* grad_data = grad_tensor.template data(); auto* grad_merge_rows = &grad_merge.rows(); - paddle::framework::MixVector mixv_grad_merge_rows(grad_merge_rows); + phi::MixVector mixv_grad_merge_rows(grad_merge_rows); const int64_t* rows = mixv_grad_merge_rows.Data(dev_ctx.GetPlace()); auto row_numel = grad_tensor.numel() / grad_merge.rows().size(); diff --git a/paddle/phi/kernels/selected_rows/gpu/adam_kernel.cu b/paddle/phi/kernels/selected_rows/gpu/adam_kernel.cu index a4b3f14306d..81e3a33c359 100644 --- a/paddle/phi/kernels/selected_rows/gpu/adam_kernel.cu +++ b/paddle/phi/kernels/selected_rows/gpu/adam_kernel.cu @@ -198,7 +198,7 @@ void AdamDenseParamSparseGradKernel( auto& grad_tensor = grad_merge.value(); const T* grad_data = grad_tensor.template data(); auto* grad_merge_rows = &grad_merge.rows(); - paddle::framework::MixVector mixv_grad_merge_rows(grad_merge_rows); + phi::MixVector mixv_grad_merge_rows(grad_merge_rows); const int64_t* rows = mixv_grad_merge_rows.Data(dev_ctx.GetPlace()); auto row_numel = grad_tensor.numel() / grad_merge.rows().size(); diff --git a/paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu b/paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu index 90c95492eee..f1e84970045 100644 --- a/paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu +++ b/paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu @@ -222,7 +222,7 @@ void AdamwDenseParamSparseGradKernel( auto& grad_tensor = grad_merge.value(); const T* grad_data = grad_tensor.template data(); auto* grad_merge_rows = &grad_merge.rows(); - paddle::framework::MixVector mixv_grad_merge_rows(grad_merge_rows); + phi::MixVector mixv_grad_merge_rows(grad_merge_rows); const int64_t* rows = mixv_grad_merge_rows.Data(dev_ctx.GetPlace()); auto row_numel = grad_tensor.numel() / grad_merge.rows().size(); diff --git a/paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.cc b/paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.cc index 2512304944e..9a4fd216cd5 100644 --- a/paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.cc +++ b/paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.cc @@ -14,9 +14,9 @@ #include "paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.h" -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/mixed_vector.h" #include "paddle/phi/kernels/cpu/hsigmoid_loss_grad.h" namespace phi { @@ -54,7 +54,7 @@ void HSigmoidLossGradKernel(const Context& ctx, PADDLE_ENFORCE_NOT_NULL( path.get_ptr(), errors::NotFound("Custom tree must be set for sparse mode!")); - paddle::framework::Vector real_rows = PathToRows(*path); + phi::Vector real_rows = PathToRows(*path); w_grad->set_rows(real_rows); // Build a map of id -> row_index to speed up finding the index of one id w_grad->set_height(w.dims()[0]); diff --git a/paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h b/paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h index cac9ef79721..0e948589f8b 100644 --- a/paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h +++ b/paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h @@ -221,7 +221,7 @@ void ComputeRowImpl(const Context& dev_ctx, auto& grad_tensor = grad_merge.value(); const T* grad_data = grad_tensor.template data(); auto* grad_merge_rows = &grad_merge.rows(); - paddle::framework::MixVector mixv_grad_merge_rows(grad_merge_rows); + phi::MixVector mixv_grad_merge_rows(grad_merge_rows); const int64_t* rows = mixv_grad_merge_rows.Data(dev_ctx.GetPlace()); auto row_numel = grad_tensor.numel() / grad_merge.rows().size(); if (paddle::platform::is_gpu_place(dev_ctx.GetPlace()) && diff --git a/paddle/phi/tests/core/CMakeLists.txt b/paddle/phi/tests/core/CMakeLists.txt index 3dc29205375..26c7e3b4389 100644 --- a/paddle/phi/tests/core/CMakeLists.txt +++ b/paddle/phi/tests/core/CMakeLists.txt @@ -70,3 +70,20 @@ cc_test( test_tensor_array SRCS test_tensor_array.cc DEPS tensor_array) + +if(WITH_GPU) + nv_test( + test_mixed_vector + SRCS test_mixed_vector.cc test_mixed_vector.cu + DEPS mixed_vector place memory device_context tensor) +elseif(WITH_ROCM) + hip_test( + test_mixed_vector + SRCS test_mixed_vector.cc test_mixed_vector.cu + DEPS mixed_vector place memory device_context tensor) +else() + cc_test( + test_mixed_vector + SRCS test_mixed_vector.cc + DEPS mixed_vector place memory device_context tensor) +endif() diff --git a/paddle/fluid/framework/mixed_vector_test.cc b/paddle/phi/tests/core/test_mixed_vector.cc similarity index 88% rename from paddle/fluid/framework/mixed_vector_test.cc rename to paddle/phi/tests/core/test_mixed_vector.cc index 6b39d80a43a..14b8c542225 100644 --- a/paddle/fluid/framework/mixed_vector_test.cc +++ b/paddle/phi/tests/core/test_mixed_vector.cc @@ -12,7 +12,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/mixed_vector.h" +#include "paddle/phi/core/mixed_vector.h" #include "glog/logging.h" #include "gtest/gtest-message.h" @@ -21,7 +21,7 @@ #include "gtest/gtest_pred_impl.h" template -using vec = paddle::framework::Vector; +using vec = phi::Vector; TEST(mixed_vector, CPU_VECTOR) { vec tmp; @@ -44,7 +44,7 @@ TEST(mixed_vector, CPU_VECTOR) { } TEST(mixed_vector, InitWithCount) { - paddle::framework::Vector vec(10, 10); + phi::Vector vec(10, 10); for (int i = 0; i < 10; ++i) { ASSERT_EQ(vec[i], 10); } @@ -58,7 +58,7 @@ TEST(mixed_vector, ForEach) { } TEST(mixed_vector, Reserve) { - paddle::framework::Vector vec; + phi::Vector vec; vec.reserve(1); vec.push_back(0); vec.push_back(0); @@ -66,7 +66,7 @@ TEST(mixed_vector, Reserve) { } TEST(mixed_vector, Resize) { - paddle::framework::Vector vec; + phi::Vector vec; vec.resize(1); vec.push_back(0); vec.push_back(0); diff --git a/paddle/fluid/framework/mixed_vector_test.cu b/paddle/phi/tests/core/test_mixed_vector.cu similarity index 80% rename from paddle/fluid/framework/mixed_vector_test.cu rename to paddle/phi/tests/core/test_mixed_vector.cu index 61d256ffb22..92467d44013 100644 --- a/paddle/fluid/framework/mixed_vector_test.cu +++ b/paddle/phi/tests/core/test_mixed_vector.cu @@ -23,13 +23,14 @@ #include "glog/logging.h" #include "gtest/gtest.h" -#include "paddle/fluid/framework/mixed_vector.h" -#include "paddle/fluid/platform/device/gpu/gpu_info.h" -#include "paddle/fluid/platform/device_context.h" +#include "paddle/phi/backends/all_context.h" +#include "paddle/phi/backends/gpu/gpu_info.h" +#include "paddle/phi/common/place.h" +#include "paddle/phi/core/mixed_vector.h" template -using vec = paddle::framework::MixVector; -using gpuStream_t = paddle::gpuStream_t; +using vec = phi::MixVector; +using gpuStream_t = phi::gpuStream_t; static __global__ void multiply_10(int* ptr) { for (int i = 0; i < 10; ++i) { @@ -37,9 +38,9 @@ static __global__ void multiply_10(int* ptr) { } } -gpuStream_t GetCUDAStream(paddle::platform::CUDAPlace place) { +gpuStream_t GetCUDAStream(phi::GPUPlace place) { return reinterpret_cast( - paddle::platform::DeviceContextPool::Instance().Get(place)) + phi::DeviceContextPool::Instance().Get(place)) ->stream(); } @@ -50,7 +51,7 @@ TEST(mixed_vector, GPU_VECTOR) { } vec tmp(&x); ASSERT_EQ(tmp.size(), 10UL); - paddle::platform::CUDAPlace gpu(0); + phi::GPUPlace gpu(0); #ifdef PADDLE_WITH_HIP hipLaunchKernelGGL(multiply_10, @@ -69,7 +70,7 @@ TEST(mixed_vector, GPU_VECTOR) { } TEST(mixed_vector, MultiGPU) { - if (paddle::platform::GetGPUDeviceCount() < 2) { + if (phi::backends::gpu::GetGPUDeviceCount() < 2) { LOG(WARNING) << "Skip mixed_vector.MultiGPU since there are not multiple " "GPUs in your machine."; return; @@ -81,8 +82,8 @@ TEST(mixed_vector, MultiGPU) { } vec tmp(&x); ASSERT_EQ(tmp.size(), 10UL); - paddle::platform::CUDAPlace gpu0(0); - paddle::platform::SetDeviceId(0); + phi::GPUPlace gpu0(0); + phi::backends::gpu::SetDeviceId(0); #ifdef PADDLE_WITH_HIP hipLaunchKernelGGL(multiply_10, @@ -94,9 +95,9 @@ TEST(mixed_vector, MultiGPU) { #else multiply_10<<<1, 1, 0, GetCUDAStream(gpu0)>>>(tmp.MutableData(gpu0)); #endif - paddle::platform::CUDAPlace gpu1(1); + phi::GPUPlace gpu1(1); auto* gpu1_ptr = tmp.MutableData(gpu1); - paddle::platform::SetDeviceId(1); + phi::backends::gpu::SetDeviceId(1); #ifdef PADDLE_WITH_HIP hipLaunchKernelGGL( diff --git a/tools/parallel_UT_rule.py b/tools/parallel_UT_rule.py index f5c57a312d8..8d22fd6c249 100755 --- a/tools/parallel_UT_rule.py +++ b/tools/parallel_UT_rule.py @@ -913,7 +913,7 @@ FOURTH_HIGH_PARALLEL_JOB_NEW = [ 'test_mix_precision_all_reduce_fuse', 'test_spp_op', 'test_op_converter', - 'mixed_vector_test', + 'test_mixed_vector', 'test_roi_align_op', 'test_pad_constant_like', 'test_mul_op', @@ -2288,7 +2288,7 @@ TETRAD_PARALLEL_JOB = [ 'device_context_test', 'test_reference_count_pass_last_lived_ops', 'copy_same_tensor_test', - 'mixed_vector_test', + 'test_mixed_vector', 'op_registry_test', 'test_prepare_op', 'data_device_transform_test', -- GitLab