diff --git a/paddle/fluid/distributed/collective/reducer.cc b/paddle/fluid/distributed/collective/reducer.cc index 379bc57d5594e7feb55769e5d0a3049cf45fce5b..9bc230b16503028275c63f82fc19a4366019ccc5 100644 --- a/paddle/fluid/distributed/collective/reducer.cc +++ b/paddle/fluid/distributed/collective/reducer.cc @@ -1113,7 +1113,7 @@ void EagerReducer::AllReduceSparse(EagerGroup *group, const auto &rank_ = process_group_->GetRank(); const auto &size_ = process_group_->GetSize(); - framework::Vector rows_num_vector(size_); + phi::Vector rows_num_vector(size_); rows_num_vector[rank_] = static_cast(src_rows.size()); Tensor rows_num_tensor = paddle::experimental::empty( @@ -1183,7 +1183,7 @@ void EagerReducer::AllReduceSparse(EagerGroup *group, } process_group_->AllGather(in, out)->Synchronize(); - framework::Vector dst_rows_vector(rows_num, 0); + phi::Vector dst_rows_vector(rows_num, 0); auto *dst_rows_dense_tensor = std::dynamic_pointer_cast(dst_rows_tensor.impl()) .get(); @@ -1262,7 +1262,7 @@ void EagerReducer::AllReduceSparse(EagerGroup *group, Tensor dst_rows_tensor = paddle::experimental::concat(rows_tensors, phi::Scalar(0)); - framework::Vector dst_rows_vector(rows_num, 0); + phi::Vector dst_rows_vector(rows_num, 0); auto *dst_rows_dense_tensor = std::dynamic_pointer_cast(dst_rows_tensor.impl()) .get(); diff --git a/paddle/fluid/distributed/ps/service/brpc_utils.cc b/paddle/fluid/distributed/ps/service/brpc_utils.cc index f07b66e1e90b9df7e99befd026d73dd8319bd46e..f1493ece37b4b17addfe89111afa1ccc7dab3d40 100644 --- a/paddle/fluid/distributed/ps/service/brpc_utils.cc +++ b/paddle/fluid/distributed/ps/service/brpc_utils.cc @@ -236,7 +236,7 @@ void DeserializeLodTensor(framework::Variable* var, framework::LoD lod; for (int i = 0; i < msg.lod_level(); ++i) { - framework::Vector v; + phi::Vector v; for (int j = 0; j < msg.lod(i).lod_data_size(); ++j) { v.push_back(msg.lod(i).lod_data(j)); } diff --git a/paddle/fluid/distributed/test/brpc_utils_test.cc b/paddle/fluid/distributed/test/brpc_utils_test.cc index 3614dedeeb45e5ca39005e6ce3ca5b5ac2dcaba1..4ff9f2709b81c0b257d31ccb5517078de17f4ec3 100644 --- a/paddle/fluid/distributed/test/brpc_utils_test.cc +++ b/paddle/fluid/distributed/test/brpc_utils_test.cc @@ -39,7 +39,7 @@ void CreateVarsOnScope(framework::Scope* scope, auto* tensor1 = var1->GetMutable(); tensor1->Resize(phi::make_ddim({512, 8, 4, 2})); framework::LoD lod1; - lod1.push_back(framework::Vector({1, 3, 8})); + lod1.push_back(phi::Vector({1, 3, 8})); tensor1->set_lod(lod1); tensor1->mutable_data(*place); phi::funcs::set_constant(ctx, tensor1, 31.9); @@ -49,7 +49,7 @@ void CreateVarsOnScope(framework::Scope* scope, auto* tensor2 = var2->GetMutable(); tensor2->Resize(phi::make_ddim({1000, 64})); framework::LoD lod2; - lod2.push_back(framework::Vector({1, 1})); + lod2.push_back(phi::Vector({1, 1})); tensor2->set_lod(lod2); tensor2->mutable_data(*place); phi::funcs::set_constant(ctx, tensor2, 100); @@ -98,7 +98,7 @@ void RunMultiVarMsg(platform::Place place) { framework::Variable* var1 = scope_recv.FindVar("x1"); auto* tensor1 = var1->GetMutable(); EXPECT_EQ(tensor1->dims(), phi::make_ddim({512, 8, 4, 2})); - // EXPECT_EQ(tensor1->lod(), framework::Vector({1, 3, 8})); + // EXPECT_EQ(tensor1->lod(), phi::Vector({1, 3, 8})); auto* tensor_data1 = const_cast(tensor1->data()); int tensor_numel1 = 512 * 8 * 4 * 2; for (int i = 0; i < tensor_numel1; ++i) @@ -108,7 +108,7 @@ void RunMultiVarMsg(platform::Place place) { framework::Variable* var2 = scope_recv.FindVar("x2"); auto* tensor2 = var2->GetMutable(); EXPECT_EQ(tensor2->dims(), phi::make_ddim({1000, 64})); - // EXPECT_EQ(tensor2->lod(), framework::Vector({1, 1})); + // EXPECT_EQ(tensor2->lod(), phi::Vector({1, 1})); auto* tensor_data2 = const_cast(tensor2->data()); int tensor_numel2 = 1000 * 64; for (int i = 0; i < tensor_numel2; ++i) EXPECT_EQ(tensor_data2[i], 100); diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index b86d921b9c533fa11cb6145ea247acfeaf4864c1..2400a81958cf5a372f0e010af9ed17bb6dc370f8 100755 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -162,27 +162,7 @@ cc_test( eigen_test SRCS eigen_test.cc DEPS tensor) -cc_library( - mixed_vector - SRCS mixed_vector.cc - DEPS device_context place memory) -if(WITH_GPU) - nv_test( - mixed_vector_test - SRCS mixed_vector_test.cc mixed_vector_test.cu - DEPS mixed_vector place memory device_context tensor) -elseif(WITH_ROCM) - hip_test( - mixed_vector_test - SRCS mixed_vector_test.cc mixed_vector_test.cu - DEPS mixed_vector place memory device_context tensor) -else() - cc_test( - mixed_vector_test - SRCS mixed_vector_test.cc - DEPS mixed_vector place memory device_context tensor) -endif() cc_library( lod_tensor SRCS lod_tensor.cc diff --git a/paddle/fluid/framework/data_feed.cc b/paddle/fluid/framework/data_feed.cc index 471183aaa9ab2977cbb73131e3512ea318aff978..9e7ff1612b360dd09cf424ff28031f7c85d09b9a 100644 --- a/paddle/fluid/framework/data_feed.cc +++ b/paddle/fluid/framework/data_feed.cc @@ -2815,7 +2815,7 @@ void SlotRecordInMemoryDataFeed::BuildSlotBatchGPU(const int ins_num) { LoD& lod = (*feed->mutable_lod()); lod.resize(1); lod[0].resize(offset_cols_size); - paddle::framework::MixVector mixv_lod(&lod[0]); + phi::MixVector mixv_lod(&lod[0]); memcpy(mixv_lod.MutableData(platform::CPUPlace()), off_start_ptr, offset_cols_size * sizeof(size_t)); diff --git a/paddle/fluid/framework/data_type_test.cc b/paddle/fluid/framework/data_type_test.cc index 9f36bd46368906b13ab2622fd184ce3bf52d69c8..8454b8e4171e7535e3af042b3d540997df5e0518 100644 --- a/paddle/fluid/framework/data_type_test.cc +++ b/paddle/fluid/framework/data_type_test.cc @@ -18,6 +18,7 @@ #include "gtest/gtest.h" #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/platform/place.h" TEST(DataType, float16) { using paddle::platform::CPUPlace; diff --git a/paddle/fluid/framework/dlpack_tensor.cc b/paddle/fluid/framework/dlpack_tensor.cc index 9bf348506655b281804f113de014d9c113496f99..7300ac8083a78465c818f22ed15a825523405a6f 100644 --- a/paddle/fluid/framework/dlpack_tensor.cc +++ b/paddle/fluid/framework/dlpack_tensor.cc @@ -15,6 +15,7 @@ #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/data_type.h" +#include "paddle/fluid/platform/place.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/dlpack_tensor_test.cc b/paddle/fluid/framework/dlpack_tensor_test.cc index 0ccc5bb4ad1a4453cc0539fc8193311a11bc7cff..f6b28b0a22ebc2919d85b7e3b207870f64988107 100644 --- a/paddle/fluid/framework/dlpack_tensor_test.cc +++ b/paddle/fluid/framework/dlpack_tensor_test.cc @@ -18,6 +18,7 @@ #include #include "paddle/fluid/platform/device/gpu/gpu_info.h" +#include "paddle/fluid/platform/place.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/eigen_test.cc b/paddle/fluid/framework/eigen_test.cc index 1ce55c8a8de2bb20b8a02d39ec2bfb084c2a7409..4771922986b62e4c4966124fdc96892ba32fb1c5 100644 --- a/paddle/fluid/framework/eigen_test.cc +++ b/paddle/fluid/framework/eigen_test.cc @@ -12,10 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/eigen.h" - #include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/platform/place.h" + #include "paddle/phi/core/ddim.h" namespace paddle { diff --git a/paddle/fluid/framework/fleet/heter_wrapper.cc b/paddle/fluid/framework/fleet/heter_wrapper.cc index 9d83d519d09b4c84e20ebb002f4a8980c1e9a41e..2cae0721aefa9989333b5263895efad1ca6235ff 100644 --- a/paddle/fluid/framework/fleet/heter_wrapper.cc +++ b/paddle/fluid/framework/fleet/heter_wrapper.cc @@ -158,7 +158,7 @@ void HeterWrapper::DeSerializeToTensor(Scope* scope, LoD lod; for (int i = 0; i < req_var.lod_level(); ++i) { - framework::Vector v; + phi::Vector v; for (int j = 0; j < req_var.lod(i).lod_data_size(); ++j) { v.push_back(req_var.lod(i).lod_data(j)); } @@ -203,7 +203,7 @@ void HeterWrapper::DeSerializeToTensor(Scope* scope, LoD lod; for (int i = 0; i < req_var.lod_level(); ++i) { - framework::Vector v; + phi::Vector v; for (int j = 0; j < req_var.lod(i).lod_data_size(); ++j) { v.push_back(req_var.lod(i).lod_data(j)); } diff --git a/paddle/fluid/framework/lod_tensor.h b/paddle/fluid/framework/lod_tensor.h index 78d4c47cb451de6db1496223f24757d890126e51..68aa8fceee96d40e3b9fdcce537955260b64b4e5 100644 --- a/paddle/fluid/framework/lod_tensor.h +++ b/paddle/fluid/framework/lod_tensor.h @@ -21,12 +21,12 @@ limitations under the License. */ #include #include -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" #include "paddle/phi/core/ddim.h" #include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/mixed_vector.h" namespace paddle { namespace framework { @@ -54,7 +54,7 @@ void MergeLoDTensor(phi::DenseTensor* target, * 0 2 4 7 * 0 2 5 7 10 12 15 20 */ -using LoD = std::vector>; +using LoD = std::vector>; std::string LoDToString(const LoD& lod); diff --git a/paddle/fluid/framework/lod_tensor_test.cu b/paddle/fluid/framework/lod_tensor_test.cu index f4054e580f65495faa277ec3d849f758a49afa60..cabd73598a015c585a01025131830876b7e78bd2 100644 --- a/paddle/fluid/framework/lod_tensor_test.cu +++ b/paddle/fluid/framework/lod_tensor_test.cu @@ -31,7 +31,7 @@ TEST(LoD, data) { lod.push_back(std::vector({0, 1, 6, 8, 10, 11})); auto& v = lod[0]; - paddle::framework::MixVector mix_vector_v(&v); + phi::MixVector mix_vector_v(&v); paddle::platform::CUDAPlace gpu(0); #ifdef PADDLE_WITH_HIP hipLaunchKernelGGL(test, @@ -69,7 +69,7 @@ TEST(DenseTensor, LoDInGPU) { EXPECT_EQ(lod_tensor.lod_element(0, 4).first, 8UL); auto lod = lod_tensor.lod(); - paddle::framework::MixVector mix_vector(&(lod[0])); + phi::MixVector mix_vector(&(lod[0])); #ifdef PADDLE_WITH_HIP hipLaunchKernelGGL(test, diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h index 451a2309892f849d5e33fd368fade5a3b11baa2b..d581838f6dbd22785d17b46a2ee74c47381a76d5 100644 --- a/paddle/fluid/framework/tensor.h +++ b/paddle/fluid/framework/tensor.h @@ -15,15 +15,15 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/mixed_vector.h" #include "paddle/phi/core/sparse_coo_tensor.h" #include "paddle/phi/core/sparse_csr_tensor.h" namespace paddle { namespace framework { -using LoD = std::vector>; +using LoD = std::vector>; } // namespace framework } // namespace paddle diff --git a/paddle/fluid/imperative/all_reduce.cc b/paddle/fluid/imperative/all_reduce.cc index f7cc7bc85ff921bdf9d8867c7476cef7563d794b..d5f03924e28c17270913e193ac9d5f2bd36d0c29 100644 --- a/paddle/fluid/imperative/all_reduce.cc +++ b/paddle/fluid/imperative/all_reduce.cc @@ -104,10 +104,10 @@ static void AllReduce(const phi::SelectedRows &src, // 1. Gather rows number from all workers. Here use ncclAllGather to do this, // but we can use other ways to implement is in the future const auto &src_rows = src.rows(); - framework::Vector rows_num_vector(strategy.nranks_); + phi::Vector rows_num_vector(strategy.nranks_); rows_num_vector[strategy.local_rank_] = static_cast(src_rows.size()); // CUDAMutableData use CalStream - paddle::framework::MixVector mixv_rows_num_vector(&rows_num_vector); + phi::MixVector mixv_rows_num_vector(&rows_num_vector); auto *gpu_rows_num_ptr = mixv_rows_num_vector.CUDAMutableData(place); VLOG(4) << "start dev_ctx->wait"; if (!use_calc_stream) { @@ -138,9 +138,9 @@ static void AllReduce(const phi::SelectedRows &src, auto *dst_rows = dst->mutable_rows(); dst_rows->resize(rows_num); - paddle::framework::MixVector mixv_dst_rows(dst_rows); + phi::MixVector mixv_dst_rows(dst_rows); auto *dst_rows_ptr = mixv_dst_rows.CUDAMutableData(place); - paddle::framework::MixVector mixv_src_rows(&src_rows); + phi::MixVector mixv_src_rows(&src_rows); const auto *src_rows_ptr = mixv_src_rows.CUDAData(place); auto *dst_tensor = dst->mutable_value(); diff --git a/paddle/fluid/imperative/gloo_context.cc b/paddle/fluid/imperative/gloo_context.cc index e41ba185e38f6ca372f75ef7f36844fadb7ea042..c59aea5fdeaa048d1ecd61695a4b7d4accda0f2d 100644 --- a/paddle/fluid/imperative/gloo_context.cc +++ b/paddle/fluid/imperative/gloo_context.cc @@ -158,9 +158,9 @@ void GLOOParallelContext::AllReduce(const phi::SelectedRows &src, << ", height: " << src.height(); auto *dst_rows = dst->mutable_rows(); dst_rows->resize(rows_num); - paddle::framework::MixVector mixv_dst_rows(dst_rows); + phi::MixVector mixv_dst_rows(dst_rows); auto *dst_rows_ptr = mixv_dst_rows.MutableData(place); - paddle::framework::MixVector mixv_src_rows(&src_rows); + phi::MixVector mixv_src_rows(&src_rows); const int64_t *src_rows_ptr = mixv_src_rows.Data(place); auto *dst_tensor = dst->mutable_value(); diff --git a/paddle/fluid/operators/assign_op_test.cc b/paddle/fluid/operators/assign_op_test.cc index 500c1c17a16b721080310bd5b0e755c2c81d0f12..cc6c915c09a40c3e3e2d155c7971f0abae8a5604 100644 --- a/paddle/fluid/operators/assign_op_test.cc +++ b/paddle/fluid/operators/assign_op_test.cc @@ -98,7 +98,7 @@ TEST(AssignOp, AssignSelectedRows) { assign_functor(input); auto& out_selected_row = output.Get(); - const paddle::framework::Vector& out_rows = out_selected_row.rows(); + const phi::Vector& out_rows = out_selected_row.rows(); EXPECT_EQ(rows.size(), out_rows.size()); for (size_t i = 0; i < rows.size(); ++i) { EXPECT_EQ(rows[i], out_rows[i]); diff --git a/paddle/fluid/operators/ctc_align_op.cu b/paddle/fluid/operators/ctc_align_op.cu index cef3cf25ff6ff84adda5cf3e0a9ded0275a22d87..aa65ac62abff1d43504a153e4a62722add5dbe15 100644 --- a/paddle/fluid/operators/ctc_align_op.cu +++ b/paddle/fluid/operators/ctc_align_op.cu @@ -129,7 +129,7 @@ class CTCAlignOpCUDAKernel : public framework::OpKernel { // merge elements and delete blank T* output_data = output->mutable_data({num_tokens, 1}, ctx.GetPlace()); - paddle::framework::MixVector mixv_input_lod(&input_lod[level]); + phi::MixVector mixv_input_lod(&input_lod[level]); MergeAndDelCudaKernel <<<1, 1, 0, stream>>>(num_tokens, tokens, diff --git a/paddle/fluid/operators/cvm_op.cu b/paddle/fluid/operators/cvm_op.cu index 400e025f8203079b037c83b183e70e0d811e5a5d..dd90f09b29d15f214edcff0ebdf1bc41befd5b16 100644 --- a/paddle/fluid/operators/cvm_op.cu +++ b/paddle/fluid/operators/cvm_op.cu @@ -166,7 +166,7 @@ class CVMGradCUDAKernel : public framework::OpKernel { lod[lod.size() - 1], platform::errors::PreconditionNotMet( "Output(X@GRAD)'s dim[0] must be equal to last element of lod")); - paddle::framework::MixVector mixv_lod(&lod); + phi::MixVector mixv_lod(&lod); CvmGradComputeKernel<<<(dx_numel + PADDLE_CUDA_NUM_THREADS - 1) / PADDLE_CUDA_NUM_THREADS, PADDLE_CUDA_NUM_THREADS, diff --git a/paddle/fluid/operators/detection/box_clip_op.cu b/paddle/fluid/operators/detection/box_clip_op.cu index 79f3b18b2dfceebcc3722e74e5c986f1b5978379..4a1558de906b3c3f0a4829b934cb14ee855cba2e 100644 --- a/paddle/fluid/operators/detection/box_clip_op.cu +++ b/paddle/fluid/operators/detection/box_clip_op.cu @@ -59,7 +59,7 @@ class GPUBoxClipKernel : public framework::OpKernel { auto stream = dev_ctx.stream(); const size_t batch_size = lod.back().size() - 1; T *output_data = output->mutable_data(dev_ctx.GetPlace()); - paddle::framework::MixVector mix_vector(&abs_offset_lod[0]); + phi::MixVector mix_vector(&abs_offset_lod[0]); GPUBoxClip<<>>( input->data(), mix_vector.CUDAMutableData(dev_ctx.GetPlace()), diff --git a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu index 29cf8da067f8492c98838b69aa37cd1ea527dd0e..3f9a55225ca342941a041fa9b9360825c7019b93 100644 --- a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu +++ b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu @@ -19,7 +19,6 @@ namespace cub = hipcub; #include -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/operators/detection/bbox_util.h" @@ -28,6 +27,7 @@ namespace cub = hipcub; #include "paddle/fluid/operators/strided_memcpy.h" #include "paddle/fluid/platform/for_range.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" +#include "paddle/phi/core/mixed_vector.h" #include "paddle/phi/kernels/funcs/gather.cu.h" namespace paddle { diff --git a/paddle/fluid/operators/detection/generate_proposals_op.cu b/paddle/fluid/operators/detection/generate_proposals_op.cu index 5d7a034c28a8f9227967922267dcc617f5434995..d24cbcb81d0196ebab4e0fab58f66fa379bd87f3 100644 --- a/paddle/fluid/operators/detection/generate_proposals_op.cu +++ b/paddle/fluid/operators/detection/generate_proposals_op.cu @@ -18,10 +18,10 @@ limitations under the License. */ #include #include -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memory.h" #include "paddle/fluid/operators/detection/bbox_util.cu.h" +#include "paddle/phi/core/mixed_vector.h" #include "paddle/phi/kernels/funcs/gather.cu.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/fluid/operators/detection/target_assign_op.h b/paddle/fluid/operators/detection/target_assign_op.h index 26e5d90ab704ffbf9f0285a0e72b487d6522cd08..3319dffd226148913b99f60c1ba2ca6847e26edd 100644 --- a/paddle/fluid/operators/detection/target_assign_op.h +++ b/paddle/fluid/operators/detection/target_assign_op.h @@ -121,7 +121,7 @@ class TargetAssignKernel : public framework::OpKernel { auto x_lod = x->lod().back(); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - paddle::framework::MixVector mixv_x_lod(&x_lod); + phi::MixVector mixv_x_lod(&x_lod); size_t* x_lod_data = mixv_x_lod.MutableData(ctx.GetPlace()); #else size_t* x_lod_data = x_lod.data(); @@ -155,7 +155,7 @@ class TargetAssignKernel : public framework::OpKernel { const int* neg_idx_data = neg_indices->data(); auto neg_lod = neg_indices->lod().back(); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - paddle::framework::MixVector mixv_neg_lod(&neg_lod); + phi::MixVector mixv_neg_lod(&neg_lod); size_t* neg_lod_data = mixv_neg_lod.MutableData(ctx.GetPlace()); #else size_t* neg_lod_data = neg_lod.data(); diff --git a/paddle/fluid/operators/filter_by_instag_op.cu b/paddle/fluid/operators/filter_by_instag_op.cu index c07a69177b83273dff41130df93e3509e922ce17..0b57cb1a77a5f1c335d114e81eac1eaa7f0450dd 100644 --- a/paddle/fluid/operators/filter_by_instag_op.cu +++ b/paddle/fluid/operators/filter_by_instag_op.cu @@ -30,11 +30,11 @@ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/enforce.h" +#include "paddle/phi/core/mixed_vector.h" #if defined(PADDLE_WITH_CUDA) namespace cg = cooperative_groups; @@ -46,7 +46,7 @@ namespace operators { using SelectedRows = phi::SelectedRows; template -using Vector = framework::Vector; +using Vector = phi::Vector; #define WARP_SIZE 32 #define MAX_WARP_NUM 32 @@ -376,7 +376,7 @@ class FilterByInstagGPUKernel : public framework::OpKernel { } const size_t x2_lods_size = x2_lods.size() - 1; - paddle::framework::MixVector mixv_x2_lods(&x2_lods); + phi::MixVector mixv_x2_lods(&x2_lods); size_t* x2_lods_data = mixv_x2_lods.CUDAMutableData(gpu_place); @@ -401,7 +401,7 @@ class FilterByInstagGPUKernel : public framework::OpKernel { } } - paddle::framework::MixVector mixv_x1_lods(&x1_lods); + phi::MixVector mixv_x1_lods(&x1_lods); size_t* x1_lods_data = mixv_x1_lods.CUDAMutableData(gpu_place); auto* x1_data = x1->data(); @@ -433,12 +433,12 @@ class FilterByInstagGPUKernel : public framework::OpKernel { Vector out_lods(x2_lods_size + 1, 0); Vector map_lods(x2_lods_size + 1, 0); - paddle::framework::MixVector mixv_out_lods(&out_lods); - paddle::framework::MixVector mixv_map_lods(&map_lods); + phi::MixVector mixv_out_lods(&out_lods); + phi::MixVector mixv_map_lods(&map_lods); // thrust::device_vector out_idx(1); Vector out_idx(1, 0); - paddle::framework::MixVector mixv_out_idx(&out_idx); + phi::MixVector mixv_out_idx(&out_idx); size_t* out_idx_data = mixv_out_idx.CUDAMutableData(gpu_place); size_t* out_lods_data = mixv_out_lods.CUDAMutableData(gpu_place); @@ -500,7 +500,7 @@ class FilterByInstagGPUKernel : public framework::OpKernel { } else { Vector map_lods(2, 0); - paddle::framework::MixVector mixv_map_lods(&map_lods); + phi::MixVector mixv_map_lods(&map_lods); thrust::device_ptr map_data_ptr(map_data); map_data_ptr[0] = 0; diff --git a/paddle/fluid/operators/filter_by_instag_op.h b/paddle/fluid/operators/filter_by_instag_op.h index 95e6611d9351f547a80f445d2749b0d5b8b4adfe..28d0208ba896405fba38a091010ef28c3da20bca 100644 --- a/paddle/fluid/operators/filter_by_instag_op.h +++ b/paddle/fluid/operators/filter_by_instag_op.h @@ -23,16 +23,16 @@ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memcpy.h" +#include "paddle/phi/core/mixed_vector.h" namespace paddle { namespace operators { using SelectedRows = phi::SelectedRows; template -using Vector = framework::Vector; +using Vector = phi::Vector; template class FilterByInstagKernel : public framework::OpKernel { diff --git a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h index 9fa62a37045471157835f425a0cc961692c35c33..bc8821ef92932b9cf514a052a1120b70747a05b3 100644 --- a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h +++ b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h @@ -256,7 +256,7 @@ class FusedEmbeddingSeqPoolGradKernel : public framework::OpKernel { auto lod = ids->lod()[0]; int64_t out_width = d_output->dims()[1]; - framework::Vector *new_rows = d_table->mutable_rows(); + phi::Vector *new_rows = d_table->mutable_rows(); new_rows->resize(ids_num); std::memcpy(&(*new_rows)[0], ids_data, ids_num * sizeof(int64_t)); diff --git a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu index 98c2fc9896fee0b5569e834b1692d83b0b226bef..5c7d6fd07454c1e02e611322bc934780bb8a8ab0 100644 --- a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu +++ b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu @@ -14,16 +14,16 @@ #include -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/operators/fused/fused_seqpool_cvm_op.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" +#include "paddle/phi/core/mixed_vector.h" namespace paddle { namespace operators { template -using Vector = framework::Vector; +using Vector = phi::Vector; #define CUDA_KERNEL_LOOP(i, n) \ for (auto i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ @@ -441,7 +441,7 @@ class FusedSeqpoolCVMCUDAKernel : public framework::OpKernel { int embedding_size = inputs[0]->numel() / inputs[0]->dims()[0]; int batch_size = -1; - std::vector *> mix_lods_v(slot_size); + std::vector *> mix_lods_v(slot_size); for (size_t i = 0; i < slot_size; ++i) { const auto *input = inputs[i]; @@ -480,7 +480,7 @@ class FusedSeqpoolCVMCUDAKernel : public framework::OpKernel { } output_data[i] = reinterpret_cast( dev_ctx.Alloc(output, output->numel() * sizeof(T))); - mix_lods_v[i] = new paddle::framework::MixVector(&lods); + mix_lods_v[i] = new phi::MixVector(&lods); lods_data[i] = mix_lods_v[i]->CUDAData(ctx.GetPlace()); seqpool_outputs[i].Resize({batch_size, embedding_size}); seqpool_output_data[i] = reinterpret_cast(dev_ctx.Alloc( @@ -527,7 +527,7 @@ class FusedSeqpoolCVMGradCUDAKernel : public framework::OpKernel { int embedding_size = in_grads[0]->numel() / in_grads[0]->dims()[0]; int batch_size = -1; - std::vector *> mix_lods_v(slot_size); + std::vector *> mix_lods_v(slot_size); for (size_t i = 0; i < slot_size; ++i) { auto *in_grad = in_grads[i]; @@ -563,7 +563,7 @@ class FusedSeqpoolCVMGradCUDAKernel : public framework::OpKernel { in_grads_data[i] = reinterpret_cast( dev_ctx.Alloc(in_grad, in_grad->numel() * sizeof(T))); - mix_lods_v[i] = new paddle::framework::MixVector(&lods); + mix_lods_v[i] = new phi::MixVector(&lods); lods_data[i] = mix_lods_v[i]->CUDAData(ctx.GetPlace()); cvm_data[i] = reinterpret_cast(cvm->data()); } diff --git a/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc b/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc index 3ce6d18bde4c93d77924eb36c58d5bbc62c4a375..01b12af1a22c586c2a5b9cff1a623d9bc38c7a03 100644 --- a/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc +++ b/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc @@ -17,11 +17,11 @@ limitations under the License. */ #include #include "dnnl.hpp" // NOLINT -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/operators/fused/multi_gru_op.h" #include "paddle/phi/backends/onednn/onednn_reuse.h" +#include "paddle/phi/core/mixed_vector.h" namespace paddle { namespace operators { @@ -678,7 +678,7 @@ class MultiGRUHandler { const std::vector biases_; phi::DenseTensor* hidden_; std::vector attrs_; - const paddle::framework::Vector& x_lod_; + const phi::Vector& x_lod_; }; template diff --git a/paddle/fluid/operators/gru_op.cc b/paddle/fluid/operators/gru_op.cc index ed7dfa03494053839e11a56867c9fab0eb333cfd..2d58438dbf35ea0fcca7a3930f13f716752a257b 100644 --- a/paddle/fluid/operators/gru_op.cc +++ b/paddle/fluid/operators/gru_op.cc @@ -372,7 +372,7 @@ class GRUCPUKernel : public framework::OpKernel { const_cast(weight_data + 2 * frame_size * frame_size); phi::DenseTensor ordered_h0; - framework::Vector order(batch_gate->lod()[2]); + phi::Vector order(batch_gate->lod()[2]); if (h0) { // Since the batch computing for GRU reorders the input sequences diff --git a/paddle/fluid/operators/gru_op.cu.cc b/paddle/fluid/operators/gru_op.cu.cc index 53006c55f6b98d2cdb3a0f0585ff0847bc219272..0d3686bb495ef903a0926c1591faeac8cc394335 100644 --- a/paddle/fluid/operators/gru_op.cu.cc +++ b/paddle/fluid/operators/gru_op.cu.cc @@ -75,7 +75,7 @@ class GRUKernel : public framework::OpKernel { const_cast(weight_data + 2 * frame_size * frame_size); phi::DenseTensor ordered_h0; - framework::Vector order(batch_gate->lod()[2]); + phi::Vector order(batch_gate->lod()[2]); if (h0) { // Since the batch computing for GRU reorders the input sequences diff --git a/paddle/fluid/operators/gru_op.h b/paddle/fluid/operators/gru_op.h index 286bf9fe2732d42c9d047bde45f0d0058a40751c..760a33a161cabbcd7c9b1ce41915336f2a5087b4 100644 --- a/paddle/fluid/operators/gru_op.h +++ b/paddle/fluid/operators/gru_op.h @@ -28,7 +28,7 @@ namespace operators { template inline void ReorderInitState(const DeviceContext& ctx, const phi::DenseTensor& src, - framework::Vector index_lod, + phi::Vector index_lod, phi::DenseTensor* dst, bool indexed_src) { phi::funcs::CopyMatrixRowsFunctor row_shuffle; @@ -79,7 +79,7 @@ class GRUGradKernel : public framework::OpKernel { phi::DenseTensor ordered_h0, ordered_h0_grad; - framework::Vector order(batch_gate->lod()[2]); + phi::Vector order(batch_gate->lod()[2]); if (h0) { ReorderInitState( diff --git a/paddle/fluid/operators/lookup_table_op.cu b/paddle/fluid/operators/lookup_table_op.cu index 1052e5117e434e0974da93801d0153359ea9e3da..32946d65785a97a863c6c40f8390910a23b2d3b0 100644 --- a/paddle/fluid/operators/lookup_table_op.cu +++ b/paddle/fluid/operators/lookup_table_op.cu @@ -169,12 +169,12 @@ class LookupTableGradCUDAKernel : public framework::OpKernel { auto stream = dev_ctx.stream(); // copy GPU memory to CPU pinned memory - framework::Vector new_rows; + phi::Vector new_rows; new_rows.resize(ids_num); auto gpu_place = context.GetPlace(); // TODO(yuyang18): Strange code here. - paddle::framework::MixVector mixv_new_rows(&new_rows); + phi::MixVector mixv_new_rows(&new_rows); memory::Copy(gpu_place, mixv_new_rows.CUDAMutableData(context.GetPlace()), gpu_place, diff --git a/paddle/fluid/operators/lookup_table_v2_op.cu b/paddle/fluid/operators/lookup_table_v2_op.cu index a3d8c91d86265248a8d678bcf8a11b84af87f59d..11c35293ebe345556c5bbb2bc1302175583e23e6 100644 --- a/paddle/fluid/operators/lookup_table_v2_op.cu +++ b/paddle/fluid/operators/lookup_table_v2_op.cu @@ -159,11 +159,11 @@ struct LookupTableV2GradCUDAFunctor { dim3 threads(128, 8); dim3 grids(8, 1); auto stream = dev_ctx.stream(); - framework::Vector new_rows; + phi::Vector new_rows; new_rows.resize(ids_num); auto gpu_place = context_.GetPlace(); - paddle::framework::MixVector mixv_new_rows(&new_rows); + phi::MixVector mixv_new_rows(&new_rows); if (!std::is_same::value) { InputTypeConvert<<>>( ids_data, ids_num, mixv_new_rows.MutableData(gpu_place)); diff --git a/paddle/fluid/operators/lstm_op.h b/paddle/fluid/operators/lstm_op.h index d5ced3edd2addf37bf26bb3f8319d80232d4dfde..cba587815657a04a5f99e7b66f332f6a258052be 100644 --- a/paddle/fluid/operators/lstm_op.h +++ b/paddle/fluid/operators/lstm_op.h @@ -27,7 +27,7 @@ namespace operators { template inline void ReorderInitState(const DeviceContext& ctx, const phi::DenseTensor& src, - framework::Vector index_lod, + phi::Vector index_lod, phi::DenseTensor* dst, bool indexed_src) { phi::funcs::CopyMatrixRowsFunctor row_shuffle; @@ -95,7 +95,7 @@ class LSTMKernel : public framework::OpKernel { lstm_value.prev_state_value = nullptr; phi::DenseTensor ordered_c0; - framework::Vector order(batch_gate->lod()[2]); + phi::Vector order(batch_gate->lod()[2]); if (cell_t0) { // Since the batch computing for LSTM reorders the input sequence @@ -236,7 +236,7 @@ class LSTMGradKernel : public framework::OpKernel { // ordered_h0_g/c0_g is the reordered gradient of hidden/cell // initialization. phi::DenseTensor ordered_h0, ordered_c0, ordered_h0_g, ordered_c0_g; - framework::Vector order(batch_gate->lod()[2]); + phi::Vector order(batch_gate->lod()[2]); if (c0) { ReorderInitState( diff --git a/paddle/fluid/operators/lstmp_op.h b/paddle/fluid/operators/lstmp_op.h index c26a421966e7b6f0b72c041a5d92a92f1df60ce6..3272ca84d8a90bd7b33111136b5698e36cc454f1 100644 --- a/paddle/fluid/operators/lstmp_op.h +++ b/paddle/fluid/operators/lstmp_op.h @@ -70,7 +70,7 @@ class _ClipGradFunctor { template inline void ReorderInitState(const DeviceContext& ctx, const phi::DenseTensor& src, - framework::Vector index, + phi::Vector index, phi::DenseTensor* dst, bool indexed_src) { phi::funcs::CopyMatrixRowsFunctor row_shuffle; @@ -158,7 +158,7 @@ class LSTMPKernel : public framework::OpKernel { phi::DenseTensor ordered_c0; phi::DenseTensor ordered_h0; - framework::Vector order(batch_gate->lod()[2]); + phi::Vector order(batch_gate->lod()[2]); if (cell_t0) { // Since the batch computing for LSTMP reorders the input sequence @@ -350,7 +350,7 @@ class LSTMPGradKernel : public framework::OpKernel { // initialization. phi::DenseTensor ordered_h0, ordered_c0, ordered_h0_g, ordered_c0_g; - framework::Vector order(batch_gate->lod()[2]); + phi::Vector order(batch_gate->lod()[2]); if (c0) { ReorderInitState( diff --git a/paddle/fluid/operators/math/beam_search.cu b/paddle/fluid/operators/math/beam_search.cu index 400f10558e15515cf02ede27da965654162e0e20..b9f5624db7317927075110f561ec47aa3c91398a 100644 --- a/paddle/fluid/operators/math/beam_search.cu +++ b/paddle/fluid/operators/math/beam_search.cu @@ -446,8 +446,8 @@ class BeamSearchFunctor { framework::LoD selected_lod(2); selected_lod[0].assign(abs_lod[level].begin(), abs_lod[level].end()); selected_lod[1].resize(scores->dims()[0] + 1); - paddle::framework::MixVector mix_vector(&selected_lod[1]); - paddle::framework::MixVector mixv_abs(&abs_lod[level]); + phi::MixVector mix_vector(&selected_lod[1]); + phi::MixVector mixv_abs(&abs_lod[level]); size_t* selected_offsets = mix_vector.CUDAMutableData(context.GetPlace()); if (num_seqs == 1) { diff --git a/paddle/fluid/operators/math/sequence_padding.cc b/paddle/fluid/operators/math/sequence_padding.cc index 6f53246abf9b8f7abcae57b130553cc96f97cdf5..1723966983682b063945f3a8ce35e5eb0ade4f99 100644 --- a/paddle/fluid/operators/math/sequence_padding.cc +++ b/paddle/fluid/operators/math/sequence_padding.cc @@ -28,7 +28,7 @@ namespace math { template void CopyValidData(phi::DenseTensor* dst_tensor, const phi::DenseTensor* src_tensor, - const framework::Vector& seq_offsets, + const phi::Vector& seq_offsets, int pad_seq_len, int step_width, bool norm_by_len, diff --git a/paddle/fluid/operators/math/sequence_padding.cu b/paddle/fluid/operators/math/sequence_padding.cu index f1f2785fe0a276f37d1a98e4edda2950bdbd57cd..c9810f2240218e55d2f1341cf6858794d8f26c7c 100644 --- a/paddle/fluid/operators/math/sequence_padding.cu +++ b/paddle/fluid/operators/math/sequence_padding.cu @@ -124,7 +124,7 @@ class PaddingLoDTensorFunctor { T* pad_data = pad_tensor->data(); const T* pad_value_data = pad_value.data(); - paddle::framework::MixVector mix_vector_seq_offsets(&seq_offsets); + phi::MixVector mix_vector_seq_offsets(&seq_offsets); SequencePaddingKernel<<>>( pad_data, seq_data, @@ -191,7 +191,7 @@ class UnpaddingLoDTensorFunctor { const T* pad_data = pad_tensor.data(); T* seq_data = seq_tensor->data(); - paddle::framework::MixVector mixv_seq_offsets(&seq_offsets); + phi::MixVector mixv_seq_offsets(&seq_offsets); SequencePaddingKernel<<>>( seq_data, pad_data, diff --git a/paddle/fluid/operators/math/sequence_padding.h b/paddle/fluid/operators/math/sequence_padding.h index 9e7db9f03ed4929479d9493026d9cd3ad39d3543..ec59309f04f171d21468241db09502c13f3816da 100644 --- a/paddle/fluid/operators/math/sequence_padding.h +++ b/paddle/fluid/operators/math/sequence_padding.h @@ -29,7 +29,7 @@ enum PadLayout { kBatchLengthWidth = 0, kLengthBatchWidth }; enum CopyType { kSeqToPad, kPadToSeq }; inline static size_t MaximumSequenceLength( - const framework::Vector& seq_offset) { + const phi::Vector& seq_offset) { size_t seq_num = seq_offset.size() - 1; size_t max_seq_len = 0; for (size_t i = 0; i < seq_num; ++i) { @@ -39,7 +39,7 @@ inline static size_t MaximumSequenceLength( } inline static size_t TotalSequenceLength( - const framework::Vector& seq_offset) { + const phi::Vector& seq_offset) { size_t seq_num = seq_offset.size() - 1; size_t total_seq_len = 0; for (size_t i = 0; i < seq_num; ++i) { @@ -50,7 +50,7 @@ inline static size_t TotalSequenceLength( inline static void CheckDims(const framework::DDim& seq_tensor_dims, const framework::DDim& pad_tensor_dims, - const framework::Vector& seq_offset, + const phi::Vector& seq_offset, int64_t padded_seq_len, int64_t step_width, const PadLayout& layout) { diff --git a/paddle/fluid/operators/math/sequence_pooling.cu b/paddle/fluid/operators/math/sequence_pooling.cu index 530b68bbfbb3c9aa36862f55444e9f7b9457bb80..e56f0025a0e664c1c093adcbed30184d4c32ea33 100644 --- a/paddle/fluid/operators/math/sequence_pooling.cu +++ b/paddle/fluid/operators/math/sequence_pooling.cu @@ -203,7 +203,7 @@ class SequencePoolFunctor { const size_t item_dim = output->numel() / output->dims()[0]; dim3 threads(1024, 1); dim3 grid(std::max(static_cast(lod.size()) - 1, 1), 1); - paddle::framework::MixVector mix_vector(&lod); + phi::MixVector mix_vector(&lod); if (pooltype == "MAX") { sequence_pool_kernel> <<>>( @@ -421,7 +421,7 @@ class SequencePoolGradFunctor { const size_t item_dim = in_grad->numel() / in_grad->dims()[0]; dim3 threads(1024, 1); dim3 grid(std::max(static_cast(lod.size()) - 1, 1), 1); - paddle::framework::MixVector mix_vector(&lod); + phi::MixVector mix_vector(&lod); if (pooltype == "MAX") { sequence_pool_grad_kernel> <<>>( diff --git a/paddle/fluid/operators/optimizers/ftrl_op.h b/paddle/fluid/operators/optimizers/ftrl_op.h index 99e210ce51e96600a3c9b44e897e7e0ab843d458..6feabd46a2de6d8f5145c16b6a42f5401de5c0df 100644 --- a/paddle/fluid/operators/optimizers/ftrl_op.h +++ b/paddle/fluid/operators/optimizers/ftrl_op.h @@ -197,7 +197,7 @@ class FTRLOpKernel : public framework::OpKernel { ctx.template device_context(), *grad, merged_grad); auto* merged_rows = merged_grad->mutable_rows(); - paddle::framework::MixVector mixv_merged_rows(merged_rows); + phi::MixVector mixv_merged_rows(merged_rows); const int64_t* rows = mixv_merged_rows.Data(ctx.GetPlace()); auto row_numel = static_cast(merged_grad->value().dims()[1]); auto row_height = static_cast(merged_grad->rows().size()); diff --git a/paddle/fluid/operators/optimizers/sgd_op.cu b/paddle/fluid/operators/optimizers/sgd_op.cu index 79cce5abdb03fb9f7c0a8ea154c69a2619861c17..ee7b0f4c7e46ed8a85fbc17ce692ba051f0e3339 100644 --- a/paddle/fluid/operators/optimizers/sgd_op.cu +++ b/paddle/fluid/operators/optimizers/sgd_op.cu @@ -164,7 +164,7 @@ class SGDOpKernel : public framework::OpKernel { int thread_x = kThreadsPerBlock; int max_threads = ctx.cuda_device_context().GetMaxPhysicalThreadCount(); int max_blocks = std::max(max_threads / kThreadsPerBlock, 1); - paddle::framework::MixVector mixv_in_rows(&in_rows); + phi::MixVector mixv_in_rows(&in_rows); SparseSGDFunctorKernel<< : public framework::OpKernel { } else { batch_size = x->lod()[0].size() - 1; } - framework::Vector batch_indices(batch_size + 1); + phi::Vector batch_indices(batch_size + 1); int input_dim = 0; int timesteps = 0; if (is_tensor) { @@ -231,7 +231,7 @@ class RowConvGradKernel : public framework::OpKernel { } else { batch_size = x->lod()[0].size() - 1; } - framework::Vector batch_indices(batch_size + 1); + phi::Vector batch_indices(batch_size + 1); int timesteps = 0; int input_dim = 0; if (is_tensor) { diff --git a/paddle/fluid/operators/row_conv_op.cu b/paddle/fluid/operators/row_conv_op.cu index 81f140b36fce4a6dcff48e790f211e65564a38a0..931e34d2f3ee6ed3cf66ab48edff0b5d269c340a 100644 --- a/paddle/fluid/operators/row_conv_op.cu +++ b/paddle/fluid/operators/row_conv_op.cu @@ -338,7 +338,7 @@ class RowConvKernel : public framework::OpKernel { batch_size = X->lod()[0].size() - 1; } int input_dim = 0; - framework::Vector batch_indices(batch_size + 1); + phi::Vector batch_indices(batch_size + 1); int timesteps = X->dims()[1]; if (is_tensor) { for (int i = 0; i < batch_size + 1; i++) { @@ -352,7 +352,7 @@ class RowConvKernel : public framework::OpKernel { int num_sequence = batch_indices.size() - 1; int future_context = Filter->dims()[0]; - paddle::framework::MixVector mix_vector(&batch_indices); + phi::MixVector mix_vector(&batch_indices); size_t *idx = mix_vector.CUDAMutableData(context.GetPlace()); auto stream = context.cuda_device_context().stream(); @@ -397,7 +397,7 @@ class RowConvGradKernel : public framework::OpKernel { } int input_dim = 0; - framework::Vector batch_indices(batch_size + 1); + phi::Vector batch_indices(batch_size + 1); int timesteps = X->dims()[1]; if (is_tensor) { for (int i = 0; i < batch_size + 1; i++) { @@ -411,7 +411,7 @@ class RowConvGradKernel : public framework::OpKernel { // int input_dim = X->dims()[1]; int num_sequence = batch_indices.size() - 1; int future_context = Filter->dims()[0]; - paddle::framework::MixVector mixv_batch_indices(&batch_indices); + phi::MixVector mixv_batch_indices(&batch_indices); size_t *idx = mixv_batch_indices.CUDAMutableData(context.GetPlace()); auto &device_ctx = context.cuda_device_context(); diff --git a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu index ee69333f924fee9e81fee1c07fe4b3803feab04a..fc78007338c734da7e0f843a2d84f465b178d0b7 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu +++ b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu @@ -76,7 +76,7 @@ class SequenceEnumerateOpCUDAKernel : public framework::OpKernel { out->Resize({in_dims[0], win_size}); auto out_data = out->mutable_data(context.GetPlace()); // Copy LoD to GPU - paddle::framework::MixVector mixv_lod0(&lod0); + phi::MixVector mixv_lod0(&lod0); const size_t* dev_in_lod_ptr = mixv_lod0.CUDAData(context.GetPlace()); // Calc output tensor CalcOutPut<<<(in_len - 1) / PADDLE_CUDA_NUM_THREADS + 1, diff --git a/paddle/fluid/operators/sequence_ops/sequence_erase_op.cu b/paddle/fluid/operators/sequence_ops/sequence_erase_op.cu index b573df956df1c1280dfce56b6b70981400715d76..5a07c7f3924f65d21c8e382abf10c0ecf2e94b0b 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_erase_op.cu +++ b/paddle/fluid/operators/sequence_ops/sequence_erase_op.cu @@ -97,7 +97,7 @@ class SequenceEraseOpCUDAKernel : public framework::OpKernel { // Copy LoD to GPU auto last_lod = lod[lod.size() - 1]; auto lod_len = last_lod.size(); - paddle::framework::MixVector mixv_last_lod(&last_lod); + phi::MixVector mixv_last_lod(&last_lod); const size_t* dev_in_lod_ptr = mixv_last_lod.CUDAData(ctx.GetPlace()); // Calc output LoD thrust::device_vector dev_out_lod(lod_len); diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu index d5fecace6d7678432c46c69043263c84a9791438..cd01e37fdcf94c106fc7570d917566bdba206e85 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu +++ b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu @@ -65,11 +65,10 @@ static __global__ void sequence_expand_as_grad_kernel( template struct SequenceExpandAsFunctor { - void operator()( - const phi::GPUContext &context, - const phi::DenseTensor &x, - const framework::Vector &ref_lod, /*expand referenced lod*/ - phi::DenseTensor *out) { + void operator()(const phi::GPUContext &context, + const phi::DenseTensor &x, + const phi::Vector &ref_lod, /*expand referenced lod*/ + phi::DenseTensor *out) { int height = x.dims()[0]; int width = phi::product(x.dims()) / height; @@ -84,7 +83,7 @@ struct SequenceExpandAsFunctor { dim3 block_size(thread_x); dim3 grid_size(block_x); - paddle::framework::MixVector mixv_ref_lod(&ref_lod); + phi::MixVector mixv_ref_lod(&ref_lod); sequence_expand_as_kernel<<>>( x.data(), mixv_ref_lod.CUDAData(context.GetPlace()), @@ -98,7 +97,7 @@ template struct SequenceExpandAsGradFunctor { void operator()(const phi::GPUContext &context, const phi::DenseTensor &dout, - const framework::Vector &ref_lod, /*expand based lod*/ + const phi::Vector &ref_lod, /*expand based lod*/ phi::DenseTensor *dx) { int height = dx->dims()[0]; int width = phi::product(dx->dims()) / height; @@ -114,7 +113,7 @@ struct SequenceExpandAsGradFunctor { dim3 block_size(thread_x); dim3 grid_size(block_x); - paddle::framework::MixVector mixv_ref_lod(&ref_lod); + phi::MixVector mixv_ref_lod(&ref_lod); sequence_expand_as_grad_kernel<< struct SequenceExpandAsFunctor { - void operator()( - const DeviceContext &ctx, - const phi::DenseTensor &x, - const framework::Vector &ref_lod, /*expand referenced lod*/ - phi::DenseTensor *out); + void operator()(const DeviceContext &ctx, + const phi::DenseTensor &x, + const phi::Vector &ref_lod, /*expand referenced lod*/ + phi::DenseTensor *out); }; template struct SequenceExpandAsGradFunctor { - void operator()( - const DeviceContext &ctx, - const phi::DenseTensor &dout, - const framework::Vector &ref_lod, /*expand referenced lod*/ - phi::DenseTensor *dx); + void operator()(const DeviceContext &ctx, + const phi::DenseTensor &dout, + const phi::Vector &ref_lod, /*expand referenced lod*/ + phi::DenseTensor *dx); }; template struct SequenceExpandAsFunctor { - void operator()( - const phi::CPUContext &context, - const phi::DenseTensor &x, - const framework::Vector &ref_lod, /*expand referenced lod*/ - phi::DenseTensor *out) { + void operator()(const phi::CPUContext &context, + const phi::DenseTensor &x, + const phi::Vector &ref_lod, /*expand referenced lod*/ + phi::DenseTensor *out) { int64_t height = x.dims()[0]; int64_t width = phi::product(x.dims()) / height; @@ -122,11 +119,10 @@ class SequenceExpandAsKernel : public framework::OpKernel { * */ template struct SequenceExpandAsGradFunctor { - void operator()( - const phi::CPUContext &context, - const phi::DenseTensor &dout, - const framework::Vector &ref_lod, /*expand referenced lod*/ - phi::DenseTensor *dx) { + void operator()(const phi::CPUContext &context, + const phi::DenseTensor &dout, + const phi::Vector &ref_lod, /*expand referenced lod*/ + phi::DenseTensor *dx) { int64_t height = dx->dims()[0]; int64_t width = phi::product(dx->dims()) / height; diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_op.cu b/paddle/fluid/operators/sequence_ops/sequence_expand_op.cu index e4ebd47878cb2b018a2f4227545fc2c33b41edd1..0272637b84b776e50334d95f192b21c68ceab3b2 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_expand_op.cu +++ b/paddle/fluid/operators/sequence_ops/sequence_expand_op.cu @@ -82,9 +82,9 @@ __global__ void sequence_expand_grad_kernel(const T* dout_data, } } -void GetOutputOffset(const framework::Vector& x_lod, - const framework::Vector& ref_lod, - framework::Vector* out_offset) { +void GetOutputOffset(const phi::Vector& x_lod, + const phi::Vector& ref_lod, + phi::Vector* out_offset) { size_t offset = 0; int lod_size = static_cast(x_lod.size()); for (int i = 0; i < static_cast(x_lod.size()); ++i) { @@ -99,8 +99,8 @@ template static int ExpandByMemoryCopy(const phi::GPUContext& context, const LoDTensor& x, LoDTensor* out, - const framework::Vector& x_lod, - const framework::Vector& ref_lod, + const phi::Vector& x_lod, + const phi::Vector& ref_lod, bool do_copy) { auto out_data = out->data(); auto x_data = x.data(); @@ -143,12 +143,11 @@ static int ExpandByMemoryCopy(const phi::GPUContext& context, template struct SequenceExpandFunctor { - void operator()( - const phi::GPUContext& context, - const LoDTensor& x, - const framework::Vector& x_lod, /*expand source lod*/ - const framework::Vector& ref_lod, /*expand referenced lod*/ - LoDTensor* out) { + void operator()(const phi::GPUContext& context, + const LoDTensor& x, + const phi::Vector& x_lod, /*expand source lod*/ + const phi::Vector& ref_lod, /*expand referenced lod*/ + LoDTensor* out) { int num_copys = ExpandByMemoryCopy(context, x, out, x_lod, ref_lod, false); // Sometimes direct copies will be faster, this maybe need deeply analysis. @@ -157,7 +156,7 @@ struct SequenceExpandFunctor { } else { int x_item_length = x.numel() / x.dims()[0]; size_t x_lod_size = x_lod.size(); - framework::Vector out_offset(x_lod_size * 2 + ref_lod.size()); + phi::Vector out_offset(x_lod_size * 2 + ref_lod.size()); GetOutputOffset(x_lod, ref_lod, &out_offset); for (size_t i = 0; i < x_lod_size; ++i) { @@ -167,7 +166,7 @@ struct SequenceExpandFunctor { out_offset[2 * x_lod_size + i] = ref_lod[i]; } - paddle::framework::MixVector mixv_out_offset(&out_offset); + phi::MixVector mixv_out_offset(&out_offset); const size_t* out_offset_data = mixv_out_offset.CUDAData(context.GetPlace()); const size_t* x_lod_data = out_offset_data + x_lod_size; @@ -197,11 +196,11 @@ template struct SequenceExpandGradFunctor { void operator()(const phi::GPUContext& context, const LoDTensor& dout, - const framework::Vector& x_lod, /*expand source lod*/ - const framework::Vector& ref_lod, /*expand based lod*/ + const phi::Vector& x_lod, /*expand source lod*/ + const phi::Vector& ref_lod, /*expand based lod*/ LoDTensor* dx) { int x_item_length = phi::product(dx->dims()) / dx->dims()[0]; - framework::Vector out_offset(x_lod.size()); + phi::Vector out_offset(x_lod.size()); GetOutputOffset(x_lod, ref_lod, &out_offset); int thread_x = std::min(32, std::max(static_cast(ref_lod.size()), 16)); @@ -210,9 +209,9 @@ struct SequenceExpandGradFunctor { int block_x = static_cast(ref_lod.size()); dim3 block_size(thread_x, thread_y, thread_z); dim3 grid_size(block_x, 1); - paddle::framework::MixVector mixv_ref_lod(&ref_lod); - paddle::framework::MixVector mixv_x_lod(&x_lod); - paddle::framework::MixVector mixv_out_offset(&out_offset); + phi::MixVector mixv_ref_lod(&ref_lod); + phi::MixVector mixv_x_lod(&x_lod); + phi::MixVector mixv_out_offset(&out_offset); sequence_expand_grad_kernel<<>>( dout.data(), mixv_ref_lod.CUDAData(context.GetPlace()), diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_op.h b/paddle/fluid/operators/sequence_ops/sequence_expand_op.h index 1366fe87ab30813b0b56fc4a3631af96729e2a39..7a7a6f7b3e7148fca4ed50d8f017158147de4815 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_expand_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_expand_op.h @@ -29,32 +29,29 @@ using EigenMatrix = phi::EigenMatrix; template struct SequenceExpandFunctor { - void operator()( - const DeviceContext& ctx, - const phi::DenseTensor& x, - const framework::Vector& x_lod, /*expand source lod*/ - const framework::Vector& ref_lod, /*expand referenced lod*/ - phi::DenseTensor* out); + void operator()(const DeviceContext& ctx, + const phi::DenseTensor& x, + const phi::Vector& x_lod, /*expand source lod*/ + const phi::Vector& ref_lod, /*expand referenced lod*/ + phi::DenseTensor* out); }; template struct SequenceExpandGradFunctor { - void operator()( - const DeviceContext& ctx, - const phi::DenseTensor& dout, - const framework::Vector& x_lod, /*expand source lod*/ - const framework::Vector& ref_lod, /*expand referenced lod*/ - phi::DenseTensor* dx); + void operator()(const DeviceContext& ctx, + const phi::DenseTensor& dout, + const phi::Vector& x_lod, /*expand source lod*/ + const phi::Vector& ref_lod, /*expand referenced lod*/ + phi::DenseTensor* dx); }; template struct SequenceExpandFunctor { - void operator()( - const phi::CPUContext& context, - const phi::DenseTensor& x, - const framework::Vector& x_lod, /*expand source lod*/ - const framework::Vector& ref_lod, /*expand referenced lod*/ - phi::DenseTensor* out) { + void operator()(const phi::CPUContext& context, + const phi::DenseTensor& x, + const phi::Vector& x_lod, /*expand source lod*/ + const phi::Vector& ref_lod, /*expand referenced lod*/ + phi::DenseTensor* out) { int out_offset = 0; int x_item_length = x.numel() / x.dims()[0]; auto out_data = out->data(); @@ -112,7 +109,7 @@ class SequenceExpandKernel : public framework::OpKernel { } // x lod level is at most 1. - framework::Vector out_lod; + phi::Vector out_lod; if (x_lod.size() == 1) { out_lod.push_back(0); int out_offset = 0; @@ -130,7 +127,7 @@ class SequenceExpandKernel : public framework::OpKernel { auto& ref_lod = *out->mutable_lod(); ref_lod[0] = out_lod; } - framework::Vector ref_x_lod; + phi::Vector ref_x_lod; if (x->lod().size() == 1) { ref_x_lod = x->lod()[0]; } else { @@ -161,12 +158,11 @@ class SequenceExpandKernel : public framework::OpKernel { * */ template struct SequenceExpandGradFunctor { - void operator()( - const phi::CPUContext& context, - const phi::DenseTensor& dout, - const framework::Vector& x_lod, /*expand source lod*/ - const framework::Vector& ref_lod, /*expand referenced lod*/ - phi::DenseTensor* dx) { + void operator()(const phi::CPUContext& context, + const phi::DenseTensor& dout, + const phi::Vector& x_lod, /*expand source lod*/ + const phi::Vector& ref_lod, /*expand referenced lod*/ + phi::DenseTensor* dx) { int dout_offset = 0; for (size_t i = 1; i < ref_lod.size(); ++i) { int repeat_num = ref_lod[i] - ref_lod[i - 1]; @@ -214,8 +210,8 @@ class SequenceExpandGradKernel : public framework::OpKernel { return; } - framework::Vector ref_x_lod; - framework::Vector ref_lod = y_lod[ref_level]; + phi::Vector ref_x_lod; + phi::Vector ref_lod = y_lod[ref_level]; if (x->lod().size() == 1) { ref_x_lod = x->lod()[0]; } else { diff --git a/paddle/fluid/operators/sequence_ops/sequence_reverse_op.h b/paddle/fluid/operators/sequence_ops/sequence_reverse_op.h index bc95daa14227c68455c4827f6a3284c53de7c14f..24e0ee2d11a0fe7ae7b34929c396fcdc70eb103d 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_reverse_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_reverse_op.h @@ -139,7 +139,7 @@ class SequenceReverseOpKernel : public framework::OpKernel { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) if (platform::is_gpu_place(ctx.GetPlace())) { auto xlod = x.lod()[0]; - paddle::framework::MixVector mixv_xlod(&xlod); + phi::MixVector mixv_xlod(&xlod); lod = mixv_xlod.CUDAData(ctx.GetPlace()); } else { #endif diff --git a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu index e58cff60aea4850dfe1f256ba5e36f2cc3273764..3ff7793a1956a2a44b7381277c87066bd8824b5d 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu +++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu @@ -124,7 +124,7 @@ template struct SequenceSoftmaxFunctor { void operator()(const phi::GPUContext &context, const LoDTensor &x, - const framework::Vector &ref_lod, /*referenced lod*/ + const phi::Vector &ref_lod, /*referenced lod*/ LoDTensor *out) { int height = ref_lod.size() - 1; @@ -135,7 +135,7 @@ struct SequenceSoftmaxFunctor { dim3 block_size(thread_x); dim3 grid_size(max_blocks); - paddle::framework::MixVector mixv_ref_lod(&ref_lod); + phi::MixVector mixv_ref_lod(&ref_lod); sequence_softmax_kernel <<>>( x.data(), @@ -150,7 +150,7 @@ struct SequenceSoftmaxGradFunctor { void operator()(const phi::GPUContext &context, const LoDTensor &dout, const LoDTensor &out, - const framework::Vector &ref_lod, /*referenced lod*/ + const phi::Vector &ref_lod, /*referenced lod*/ LoDTensor *dx) { size_t height = ref_lod.size() - 1; @@ -162,7 +162,7 @@ struct SequenceSoftmaxGradFunctor { dim3 block_size(thread_x); dim3 grid_size(max_blocks); - paddle::framework::MixVector mixv_ref_lod(&ref_lod); + phi::MixVector mixv_ref_lod(&ref_lod); sequence_softmax_grad_kernel <<>>( dout.data(), diff --git a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h index 03036a0babf3b6d33d68a3ce614047bef983e42a..dfa5919dc0b1a17fc77105a4dcba1565abdb560c 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h @@ -21,11 +21,10 @@ namespace operators { template struct SequenceSoftmaxFunctor { - void operator()( - const DeviceContext &ctx, - const phi::DenseTensor &x, - const framework::Vector &ref_lod, /*expand referenced lod*/ - phi::DenseTensor *out); + void operator()(const DeviceContext &ctx, + const phi::DenseTensor &x, + const phi::Vector &ref_lod, /*expand referenced lod*/ + phi::DenseTensor *out); }; template @@ -33,7 +32,7 @@ struct SequenceSoftmaxGradFunctor { void operator()(const DeviceContext &ctx, const phi::DenseTensor &dout, const phi::DenseTensor &out, - const framework::Vector &ref_lod, /*referenced lod*/ + const phi::Vector &ref_lod, /*referenced lod*/ phi::DenseTensor *dx); }; @@ -41,7 +40,7 @@ template struct SequenceSoftmaxFunctor { void operator()(const phi::CPUContext &ctx, const phi::DenseTensor &x, - const framework::Vector &ref_lod, /*referenced lod*/ + const phi::Vector &ref_lod, /*referenced lod*/ phi::DenseTensor *out) { size_t height = ref_lod.size() - 1; const T *in_data = x.data(); @@ -64,7 +63,7 @@ struct SequenceSoftmaxGradFunctor { void operator()(const phi::CPUContext &ctx, const phi::DenseTensor &dout, const phi::DenseTensor &out, - const framework::Vector &ref_lod, /*referenced lod*/ + const phi::Vector &ref_lod, /*referenced lod*/ phi::DenseTensor *dx) { size_t height = ref_lod.size() - 1; diff --git a/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.h b/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.h index 5de8c56f3c6eccc46772c28c3e7ce6cb4b18125d..bb7cd5c1b0b77cdfd65cfcf4bb9a03ce9be6cc2a 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.h @@ -116,7 +116,7 @@ class SequenceTopkAvgPoolingKernel : public framework::OpKernel { auto pos_data = pos->mutable_data(context.GetPlace()); int offset = 0; - framework::Vector vec_out_lod; + phi::Vector vec_out_lod; vec_out_lod.reserve(batch_size + 1); for (int i = 0; i <= batch_size; ++i) { offset = row_lod[i]; diff --git a/paddle/fluid/operators/shuffle_batch_op.h b/paddle/fluid/operators/shuffle_batch_op.h index 4bc1289bf468c46001b15b5b410b70f00e9b1e6d..fbf961ab225a796970abf5db61c2d961d28b5551 100644 --- a/paddle/fluid/operators/shuffle_batch_op.h +++ b/paddle/fluid/operators/shuffle_batch_op.h @@ -25,16 +25,16 @@ #include "glog/logging.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/platform/timer.h" +#include "paddle/phi/core/mixed_vector.h" namespace paddle { namespace operators { template -using Vector = framework::Vector; +using Vector = phi::Vector; template class ShuffleBatchKernel : public framework::OpKernel { diff --git a/paddle/fluid/operators/tdm_child_op.h b/paddle/fluid/operators/tdm_child_op.h index b41453b849bc4802474823f3fcbe6e943c604700..0064567887eb0f7f5135019e9dc29f2c9fc8705d 100644 --- a/paddle/fluid/operators/tdm_child_op.h +++ b/paddle/fluid/operators/tdm_child_op.h @@ -22,8 +22,8 @@ #include #include "gflags/gflags.h" -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/phi/core/mixed_vector.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/tdm_sampler_op.h b/paddle/fluid/operators/tdm_sampler_op.h index 1ba0e2c66be8d5ad284c260d3a14c1ecdedc96ea..1b0e92b4d9b068a9d0cdc2841f199a577858b2fc 100644 --- a/paddle/fluid/operators/tdm_sampler_op.h +++ b/paddle/fluid/operators/tdm_sampler_op.h @@ -22,9 +22,9 @@ #include #include "gflags/gflags.h" -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/sampler.h" +#include "paddle/phi/core/mixed_vector.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/pybind/tensor.cc b/paddle/fluid/pybind/tensor.cc index 570920022e8a9bc4e53bbfc9d2a8f2e75852fad7..aa42a9323573ce163bb17a23106e304c24e718ea 100644 --- a/paddle/fluid/pybind/tensor.cc +++ b/paddle/fluid/pybind/tensor.cc @@ -1095,7 +1095,7 @@ void BindTensor(pybind11::module &m) { // NOLINT #if !defined(PADDLE_WITH_CUDA) && !defined(PADDLE_WITH_HIP) self.set_rows(rows); #else - Vector new_rows(rows); + std::vector new_rows(rows); self.set_rows(new_rows); #endif }) diff --git a/paddle/phi/core/CMakeLists.txt b/paddle/phi/core/CMakeLists.txt index e47e3a731c41465d569f3ab8b74e92fa172dc2a9..85df40cc47c5c2fe8e090477c3cba8b9220405ec 100644 --- a/paddle/phi/core/CMakeLists.txt +++ b/paddle/phi/core/CMakeLists.txt @@ -114,6 +114,11 @@ cc_library( SRCS custom_kernel.cc DEPS kernel_factory) +cc_library( + mixed_vector + SRCS mixed_vector.cc + DEPS device_context place memory) + # Will remove once we implemented MKLDNN_Tensor if(WITH_MKLDNN) add_dependencies(dense_tensor mkldnn) diff --git a/paddle/fluid/framework/mixed_vector.cc b/paddle/phi/core/mixed_vector.cc similarity index 79% rename from paddle/fluid/framework/mixed_vector.cc rename to paddle/phi/core/mixed_vector.cc index c3c3581a6a785b248ec393d6d9a1c380bea78745..eb1e34e7b235fdb986597b2527f58b511b70a72b 100644 --- a/paddle/fluid/framework/mixed_vector.cc +++ b/paddle/phi/core/mixed_vector.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/mixed_vector.h" +#include "paddle/phi/core/mixed_vector.h" #include #include @@ -22,28 +22,26 @@ limitations under the License. */ #include #include "glog/logging.h" -#include "paddle/fluid/framework/details/cow_ptr.h" #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/memory/memcpy.h" -#include "paddle/fluid/platform/device_context.h" +#include "paddle/phi/backends/all_context.h" #include "paddle/utils/none.h" #include "paddle/utils/optional.h" -namespace paddle { -namespace framework { +namespace phi { template void CopyToCPUHelper(std::vector *cpu_, - paddle::memory::AllocationPtr *gpu_, + phi::Allocator::AllocationPtr *gpu_, size_t *gpu_memory_size_) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) // COPY GPU Data To CPU auto *dev_ctx = static_cast( - platform::DeviceContextPool::Instance().Get((*gpu_)->place())); + phi::DeviceContextPool::Instance().Get((*gpu_)->place())); auto stream = dev_ctx->stream(); void *src = (*gpu_)->ptr(); void *dst = cpu_->data(); - paddle::memory::Copy(platform::CPUPlace(), + paddle::memory::Copy(phi::CPUPlace(), dst, OptionalCUDAPlace(*gpu_).get(), src, @@ -55,20 +53,20 @@ void CopyToCPUHelper(std::vector *cpu_, template void CopyCPUDataToCUDAHelper(std::vector *cpu_, - paddle::memory::AllocationPtr *gpu_, + phi::Allocator::AllocationPtr *gpu_, size_t *gpu_memory_size_, - const platform::Place &place) { + const phi::Place &place) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) void *src = cpu_->data(); *gpu_memory_size_ = cpu_->size() * sizeof(T); // sizeof(T) - (*gpu_) = memory::Alloc(place, *gpu_memory_size_); + (*gpu_) = paddle::memory::Alloc(place, *gpu_memory_size_); void *dst = (*gpu_)->ptr(); auto *dev_ctx = static_cast( - platform::DeviceContextPool::Instance().Get(place)); + phi::DeviceContextPool::Instance().Get(place)); auto stream = dev_ctx->stream(); paddle::memory::Copy(OptionalCUDAPlace(*gpu_).get(), dst, - platform::CPUPlace(), + phi::CPUPlace(), src, *gpu_memory_size_, stream); @@ -84,7 +82,7 @@ void CopyCPUDataToCUDAHelper(std::vector *cpu_, \ template <> \ void MixVector<__TYPE__>::VectorData::CopyCPUDataToCUDA( \ - const platform::Place &place) const { \ + const phi::Place &place) const { \ CopyCPUDataToCUDAHelper<__TYPE__>(cpu_, &gpu_, &gpu_memory_size_, place); \ } @@ -92,5 +90,4 @@ INSTANTIATE_VECTOR_FOR_TYPE(size_t) INSTANTIATE_VECTOR_FOR_TYPE(int) INSTANTIATE_VECTOR_FOR_TYPE(int64_t) -}; // namespace framework -} // namespace paddle +}; // namespace phi diff --git a/paddle/fluid/framework/mixed_vector.h b/paddle/phi/core/mixed_vector.h similarity index 87% rename from paddle/fluid/framework/mixed_vector.h rename to paddle/phi/core/mixed_vector.h index f94bff41472ffc1296fbcaa7de1d7e0ff978821c..d25a646608d3d20091d19adfce8a0c5eda6cde0f 100644 --- a/paddle/fluid/framework/mixed_vector.h +++ b/paddle/phi/core/mixed_vector.h @@ -22,20 +22,22 @@ limitations under the License. */ #include #include "glog/logging.h" -#include "paddle/fluid/memory/allocation/allocator.h" +#include "paddle/phi/common/place.h" +#include "paddle/phi/core/allocator.h" +#include "paddle/phi/core/enforce.h" +#include "paddle/phi/core/errors.h" #include "paddle/utils/none.h" #include "paddle/utils/optional.h" -namespace paddle { -namespace framework { +namespace phi { template using Vector = std::vector; -inline paddle::optional OptionalCUDAPlace( - const paddle::memory::allocation::AllocationPtr &gpu_) { +inline paddle::optional OptionalCUDAPlace( + const phi::Allocator::AllocationPtr &gpu_) { return gpu_ == nullptr ? paddle::none - : paddle::optional(gpu_->place()); + : paddle::optional(gpu_->place()); } // Vector implements the std::vector interface, and can get Data or @@ -146,18 +148,18 @@ class MixVector { } // get cuda ptr. immutable - const T *CUDAData(platform::Place place) const { + const T *CUDAData(phi::Place place) const { PADDLE_ENFORCE_EQ( - platform::is_gpu_place(place), + place.GetType() == phi::AllocationType::GPU, true, - platform::errors::Unavailable( + phi::errors::Unavailable( "Place mismatch, CUDA Data must be on CUDA place.")); ImmutableCUDA(place); return reinterpret_cast(gpu_->ptr()); } // get cuda ptr. mutable - T *CUDAMutableData(platform::Place place) { + T *CUDAMutableData(phi::Place place) { const T *ptr = CUDAData(place); flag_ = kDirty | kDataInCUDA; return const_cast(ptr); @@ -178,7 +180,7 @@ class MixVector { std::mutex &Mutex() const { return mtx_; } - paddle::optional CUDAPlace() const { + paddle::optional CUDAPlace() const { return OptionalCUDAPlace(gpu_); } @@ -199,7 +201,7 @@ class MixVector { void CopyToCPU() const; - void ImmutableCUDA(platform::Place place) const { + void ImmutableCUDA(phi::Place place) const { if (IsDirty()) { if (IsInCPU()) { CopyCPUDataToCUDA(place); @@ -207,7 +209,7 @@ class MixVector { SetFlag(kDataInCUDA); } else if (IsInCUDA() && !(place == gpu_->place())) { PADDLE_THROW( - platform::errors::Unavailable("Unexpected data place mismatch.")); + phi::errors::Unavailable("Unexpected data place mismatch.")); // Still dirty } else { // Dirty && DataInCUDA && Device is same @@ -220,7 +222,7 @@ class MixVector { SetFlag(kDataInCUDA); } else if (!(place == gpu_->place())) { PADDLE_THROW( - platform::errors::Unavailable("Unexpected data place mismatch.")); + phi::errors::Unavailable("Unexpected data place mismatch.")); } else { // Not Dirty && DataInCUDA && Device is same // Do nothing. @@ -228,7 +230,7 @@ class MixVector { } } - void CopyCPUDataToCUDA(const platform::Place &place) const; + void CopyCPUDataToCUDA(const phi::Place &place) const; void ImmutableCPU() const { if (IsDirty() && !IsInCPU()) { // If data has been changed in CUDA, or @@ -249,7 +251,7 @@ class MixVector { bool IsInCPU() const { return flag_ & kDataInCPU; } std::vector *cpu_; - mutable paddle::memory::allocation::AllocationPtr gpu_; + mutable phi::Allocator::AllocationPtr gpu_; mutable size_t gpu_memory_size_{0}; mutable int flag_; @@ -332,9 +334,9 @@ class MixVector { } // get cuda ptr. immutable - const T *CUDAData(platform::Place place) const { + const T *CUDAData(phi::Place place) const { { - platform::CUDAPlace p(place.GetDeviceId()); + phi::GPUPlace p(place.GetDeviceId()); auto &mtx = m_->Mutex(); std::lock_guard guard(mtx); auto cuda_place = m_->CUDAPlace(); @@ -348,9 +350,9 @@ class MixVector { } // get cuda ptr. mutable - T *CUDAMutableData(platform::Place place) { + T *CUDAMutableData(phi::Place place) { { - platform::CUDAPlace p(place.GetDeviceId()); + phi::GPUPlace p(place.GetDeviceId()); auto &mtx = m_->Mutex(); std::lock_guard guard(mtx); auto cuda_place = m_->CUDAPlace(); @@ -372,8 +374,8 @@ class MixVector { void reserve(size_t size) { m_->reserve(size); } // the unify method to access CPU or CUDA data. immutable. - const T *Data(platform::Place place) const { - if (platform::is_gpu_place(place)) { + const T *Data(phi::Place place) const { + if (place.GetType() == phi::AllocationType::GPU) { return CUDAData(place); } else { return data(); @@ -381,8 +383,8 @@ class MixVector { } // the unify method to access CPU or CUDA data. mutable. - T *MutableData(platform::Place place) { - if (platform::is_gpu_place(place)) { + T *MutableData(phi::Place place) { + if (place.GetType() == phi::AllocationType::GPU) { return CUDAMutableData(place); } else { return data(); @@ -397,5 +399,4 @@ class MixVector { mutable std::unique_ptr m_; }; -}; // namespace framework -} // namespace paddle +}; // namespace phi diff --git a/paddle/phi/kernels/cpu/edit_distance_kernel.cc b/paddle/phi/kernels/cpu/edit_distance_kernel.cc index 735086ba0edcdc63e65166e0bad74eaa7cdaa42d..190bc3fa55266a739c0c019bf9cd5eca07df0c3d 100644 --- a/paddle/phi/kernels/cpu/edit_distance_kernel.cc +++ b/paddle/phi/kernels/cpu/edit_distance_kernel.cc @@ -14,10 +14,10 @@ #include "paddle/phi/kernels/edit_distance_kernel.h" -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/complex.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/mixed_vector.h" #include "paddle/phi/kernels/funcs/eigen/common.h" namespace phi { @@ -34,8 +34,8 @@ void EditDistanceKernel(const Context& ctx, int64_t* seq_num_data = ctx.template Alloc(sequencenum); auto batch_size = hyps.dims()[0]; - paddle::framework::Vector hyp_lod(batch_size + 1); - paddle::framework::Vector ref_lod(batch_size + 1); + phi::Vector hyp_lod(batch_size + 1); + phi::Vector ref_lod(batch_size + 1); bool use_length = hypslength.get_ptr() != nullptr; diff --git a/paddle/phi/kernels/funcs/selected_rows_functor.cc b/paddle/phi/kernels/funcs/selected_rows_functor.cc index fb087660612ec5e03397aaad2a7aaa360b6b6b33..e2a3513a428dc41165e7150ebae75778094fc28a 100644 --- a/paddle/phi/kernels/funcs/selected_rows_functor.cc +++ b/paddle/phi/kernels/funcs/selected_rows_functor.cc @@ -14,8 +14,8 @@ limitations under the License. */ #include "paddle/phi/kernels/funcs/selected_rows_functor.h" -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/platform/device/device_wrapper.h" +#include "paddle/phi/core/mixed_vector.h" #ifdef PADDLE_WITH_MKLDNN #include "paddle/phi/backends/onednn/axpy_handler.h" @@ -200,7 +200,7 @@ struct SelectedRowsAddTo { auto* in2_value = input2->mutable_value(); // concat rows - paddle::framework::MixVector mixv_in2_rows(&in2_rows); + phi::MixVector mixv_in2_rows(&in2_rows); mixv_in2_rows.Extend(in1_rows.begin(), in1_rows.end()); auto in1_place = input1.place(); @@ -254,7 +254,7 @@ struct SelectedRowsSumTo { std::vector in2_rows; in2_rows.reserve(in2_rows.size() + size); for (auto iter = input1.begin(); iter != input1.end(); ++iter) { - const paddle::framework::Vector& in_rows = (*iter)->rows(); + const phi::Vector& in_rows = (*iter)->rows(); in2_rows.insert(in2_rows.end(), in_rows.begin(), in_rows.end()); } input2->set_rows(in2_rows); @@ -646,7 +646,7 @@ struct MergeAdd { const phi::SelectedRows& input, phi::SelectedRows* output, const bool sorted_result = false) { - paddle::framework::Vector input_rows(input.rows()); + phi::Vector input_rows(input.rows()); if (input_rows.size() == 0) { return; } diff --git a/paddle/phi/kernels/funcs/selected_rows_functor.cu b/paddle/phi/kernels/funcs/selected_rows_functor.cu index 8f409466e19b192ad7f8d8624beeed701b9bf881..91f186415fec8672ae3cba6759cd0eea286998ca 100644 --- a/paddle/phi/kernels/funcs/selected_rows_functor.cu +++ b/paddle/phi/kernels/funcs/selected_rows_functor.cu @@ -40,7 +40,7 @@ struct SelectedRowsAdd { input2.height())); output->set_height(in1_height); - paddle::framework::Vector in1_rows(input1.rows()); + phi::Vector in1_rows(input1.rows()); auto& in2_rows = input2.rows(); std::vector out_rows; out_rows.reserve(in1_rows.size() + in2_rows.size()); @@ -189,7 +189,7 @@ struct SelectedRowsAddTensor { const int block_size = 256; dim3 threads(block_size, 1); dim3 grid(in1_rows.size(), 1); - paddle::framework::MixVector mixv_in1_rows(&in1_rows); + phi::MixVector mixv_in1_rows(&in1_rows); SelectedRowsAddTensorKernel <<>>( in1_data, @@ -231,7 +231,7 @@ struct SelectedRowsAddTo { auto* in2_value = input2->mutable_value(); // concat rows - paddle::framework::MixVector mixv_in2_rows(&in2_rows); + phi::MixVector mixv_in2_rows(&in2_rows); if (in1_rows.size()) { mixv_in2_rows.Extend(in1_rows.begin(), in1_rows.end()); } @@ -318,7 +318,7 @@ struct SelectedRowsAddToTensor { const int block_size = 256; dim3 threads(block_size, 1); dim3 grid(in1_rows.size(), 1); - paddle::framework::MixVector mixv_in1_rows(&in1_rows); + phi::MixVector mixv_in1_rows(&in1_rows); SelectedRowsAddToTensorKernel <<>>( in1_data, @@ -378,7 +378,7 @@ struct MergeAddImpl { const phi::SelectedRows& input, phi::SelectedRows* output, const bool sorted_result = false) { - paddle::framework::Vector input_rows(input.rows()); + phi::Vector input_rows(input.rows()); if (input_rows.size() == 0) { return; } @@ -386,7 +386,7 @@ struct MergeAddImpl { phi::SelectedRows& out = *output; std::set row_set(input_rows.begin(), input_rows.end()); std::vector merge_rows_cpu(row_set.begin(), row_set.end()); - paddle::framework::Vector merge_rows(merge_rows_cpu); + phi::Vector merge_rows(merge_rows_cpu); auto input_width = input.value().dims()[1]; @@ -407,8 +407,8 @@ struct MergeAddImpl { dim3 threads(block_size, 1); dim3 grid1(input_rows.size(), 1); - paddle::framework::MixVector mix_vector_input(&input_rows); - paddle::framework::MixVector mix_vector_out(out.mutable_rows()); + phi::MixVector mix_vector_input(&input_rows); + phi::MixVector mix_vector_out(out.mutable_rows()); MergeAddKernel<<>>( input_data, mix_vector_input.CUDAData(context.GetPlace()), @@ -459,7 +459,7 @@ struct MergeAddImpl { } std::vector merge_rows_cpu(merged_row_set.begin(), merged_row_set.end()); - paddle::framework::Vector merge_rows(merge_rows_cpu); + phi::Vector merge_rows(merge_rows_cpu); out.set_rows(merge_rows); out.set_height(input_height); @@ -485,8 +485,8 @@ struct MergeAddImpl { auto& input_rows = input->rows(); dim3 grid1(input_rows.size(), 1); - paddle::framework::MixVector mix_vector_input(&input_rows); - paddle::framework::MixVector mix_vector_out(out.mutable_rows()); + phi::MixVector mix_vector_input(&input_rows); + phi::MixVector mix_vector_out(out.mutable_rows()); MergeAddKernel<<>>( input_data, mix_vector_input.CUDAData(context.GetPlace()), diff --git a/paddle/phi/kernels/funcs/sequence2batch.cc b/paddle/phi/kernels/funcs/sequence2batch.cc index 302dd6ec6ac62ace8ad7319cb0cde0940c1e1c87..11a687cdeafac8e73141a0c666c4eda35060e12b 100644 --- a/paddle/phi/kernels/funcs/sequence2batch.cc +++ b/paddle/phi/kernels/funcs/sequence2batch.cc @@ -22,7 +22,7 @@ class CopyMatrixRowsFunctor { public: void operator()(const phi::CPUContext& context, const phi::DenseTensor& src, - paddle::framework::Vector index_lod, + phi::Vector index_lod, phi::DenseTensor* dst, bool is_src_index) { size_t* index = index_lod.data(); diff --git a/paddle/phi/kernels/funcs/sequence2batch.cu b/paddle/phi/kernels/funcs/sequence2batch.cu index 6c8ec9bca017ad03d12d177325af6dce8b62a92e..f743f1b33ddc0075b55eef72fa7aa46801628986 100644 --- a/paddle/phi/kernels/funcs/sequence2batch.cu +++ b/paddle/phi/kernels/funcs/sequence2batch.cu @@ -43,7 +43,7 @@ class CopyMatrixRowsFunctor { public: void operator()(const phi::GPUContext& context, const phi::DenseTensor& src, - paddle::framework::Vector index_lod, + phi::Vector index_lod, phi::DenseTensor* dst, bool is_src_index) { auto src_dims = src.dims(); @@ -79,7 +79,7 @@ class CopyMatrixRowsFunctor { dim3 threads(128, 8); dim3 grid(8, 1); auto stream = context.stream(); - paddle::framework::MixVector mix_index_lod(&index_lod); + phi::MixVector mix_index_lod(&index_lod); CopyMatrixRowsKernel<<>>( src_data, dst_data, diff --git a/paddle/phi/kernels/funcs/sequence2batch.h b/paddle/phi/kernels/funcs/sequence2batch.h index 4fba1d0709f0d71609b33e1f2a60a90b77a96f90..31fbad6d2f32e46363511d77a330c3cd3647a1b0 100644 --- a/paddle/phi/kernels/funcs/sequence2batch.h +++ b/paddle/phi/kernels/funcs/sequence2batch.h @@ -38,7 +38,7 @@ class CopyMatrixRowsFunctor { // The indexed rows are based on the input index. void operator()(const DeviceContext& context, const phi::DenseTensor& src, - paddle::framework::Vector index_lod, + phi::Vector index_lod, phi::DenseTensor* dst, bool is_src_index); }; diff --git a/paddle/phi/kernels/funcs/sequence_scale.cu b/paddle/phi/kernels/funcs/sequence_scale.cu index 06d8db04be6d4d370366ee0226960b255da75284..8feea62a3d7b9221b31ff41ec1dcf050814ce026 100644 --- a/paddle/phi/kernels/funcs/sequence_scale.cu +++ b/paddle/phi/kernels/funcs/sequence_scale.cu @@ -46,7 +46,7 @@ class ScaleLoDTensorFunctor { const size_t seq_width = seq->numel() / seq->dims()[0]; auto abs_offset_lod = paddle::framework::ToAbsOffset(lod); T* seq_data = context.template Alloc(seq); - paddle::framework::MixVector mix_vector(&(abs_offset_lod[level])); + phi::MixVector mix_vector(&(abs_offset_lod[level])); #ifdef PADDLE_WITH_HIP hipLaunchKernelGGL( diff --git a/paddle/phi/kernels/gpu/adagrad_kernel.cu b/paddle/phi/kernels/gpu/adagrad_kernel.cu index 53169c8bcfb60d41c550dae6f54ddcb593fc97b6..8f3d7a3c5580a76acd96c541765380347ef1401f 100644 --- a/paddle/phi/kernels/gpu/adagrad_kernel.cu +++ b/paddle/phi/kernels/gpu/adagrad_kernel.cu @@ -88,7 +88,7 @@ struct SparseAdagradFunctor { phi::funcs::scatter::MergeAdd merge_func; auto grad_merge = merge_func(context, grad); auto* grad_merge_data = grad_merge.mutable_value()->template data(); - paddle::framework::Vector merge_rows(grad_merge.rows()); + phi::Vector merge_rows(grad_merge.rows()); // 2. m += g_m * g_m auto grad_square = SquareSelectedRows(context, grad_merge); @@ -104,7 +104,7 @@ struct SparseAdagradFunctor { const int block_size = 256; dim3 threads(block_size, 1); dim3 grid2(1, merge_rows.size()); - paddle::framework::MixVector mixv_merge_rows(&merge_rows); + phi::MixVector mixv_merge_rows(&merge_rows); SparseAdagradFunctorKernel <<(ctx).stream(); - paddle::framework::Vector hyp_lod(batch_size + 1); - paddle::framework::Vector ref_lod(batch_size + 1); + phi::Vector hyp_lod(batch_size + 1); + phi::Vector ref_lod(batch_size + 1); bool use_length = hypslength.get_ptr() != nullptr; diff --git a/paddle/phi/kernels/gpu/embedding_grad_kernel.cu b/paddle/phi/kernels/gpu/embedding_grad_kernel.cu index 8bb00f075929c04710b5c5b24ab1d15388d38bb7..bad7019a60837971bb328f808b8ed618e1d22d66 100644 --- a/paddle/phi/kernels/gpu/embedding_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/embedding_grad_kernel.cu @@ -14,12 +14,12 @@ #include "paddle/phi/kernels/embedding_grad_kernel.h" -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/memory/memcpy.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/mixed_vector.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/embedding_util.h" @@ -173,11 +173,11 @@ struct EmbeddingSparseGradCUDAFunctor { dim3 threads(128, 8); dim3 grids(8, 1); auto stream = dev_ctx_.stream(); - paddle::framework::Vector new_rows; + phi::Vector new_rows; new_rows.resize(ids_num); auto gpu_place = dev_ctx_.GetPlace(); - paddle::framework::MixVector mixv_new_rows(&new_rows); + phi::MixVector mixv_new_rows(&new_rows); if (!std::is_same::value) { InputTypeConvert<<>>( ids_data, ids_num, mixv_new_rows.MutableData(gpu_place)); diff --git a/paddle/phi/kernels/gpu/sgd_kernel.cu b/paddle/phi/kernels/gpu/sgd_kernel.cu index b7cf9e5badce0c86470e84570e5bd1f5bb34c439..73115a58fa9e0e657268a84ad05fc1dfdb33221c 100644 --- a/paddle/phi/kernels/gpu/sgd_kernel.cu +++ b/paddle/phi/kernels/gpu/sgd_kernel.cu @@ -14,12 +14,12 @@ #include "paddle/phi/kernels/sgd_kernel.h" -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_helper.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" #include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/mixed_vector.h" namespace phi { @@ -156,7 +156,7 @@ void SGDDenseParamSparseGradKernel( int thread_x = kThreadsPerBlock; int max_threads = dev_ctx.GetMaxPhysicalThreadCount(); int max_blocks = std::max(max_threads / kThreadsPerBlock, 1); - paddle::framework::MixVector mixv_in_rows(&in_rows); + phi::MixVector mixv_in_rows(&in_rows); SparseSGDFunctorKernel<<>>( in_data, mixv_in_rows.CUDAData(dev_ctx.GetPlace()), diff --git a/paddle/phi/kernels/impl/momentum_kernel_impl.h b/paddle/phi/kernels/impl/momentum_kernel_impl.h index f3e22e5d944651eec57507146b643632adece22c..332787740af45b6620f08347b052f1dad1103206 100644 --- a/paddle/phi/kernels/impl/momentum_kernel_impl.h +++ b/paddle/phi/kernels/impl/momentum_kernel_impl.h @@ -551,7 +551,7 @@ void MomentumSparseImpl(const Context& ctx, merge_func(ctx, grad, merged_grad); auto* grad_merge_rows = merged_grad->mutable_rows(); - paddle::framework::MixVector mixv_grad_merge_rows(grad_merge_rows); + phi::MixVector mixv_grad_merge_rows(grad_merge_rows); const int64_t* rows = mixv_grad_merge_rows.Data(ctx.GetPlace()); int64_t row_numel = merged_grad->value().numel() / merged_grad->rows().size(); funcs::ForRange for_range(ctx, param.numel()); diff --git a/paddle/phi/kernels/impl/rmsprop_kernel_impl.h b/paddle/phi/kernels/impl/rmsprop_kernel_impl.h index f2a56ff6b8e08d715da76c85bc4349633933427d..a0cb0a887b6c39ddd71c2ddc990ce8199511590d 100644 --- a/paddle/phi/kernels/impl/rmsprop_kernel_impl.h +++ b/paddle/phi/kernels/impl/rmsprop_kernel_impl.h @@ -309,7 +309,7 @@ void RmspropSparseKernel(const Context &ctx, funcs::ForRange for_range(ctx, limit); auto &grad_merge_rows = merged_grad->rows(); - paddle::framework::MixVector mixv_grad_merge_rows(&grad_merge_rows); + phi::MixVector mixv_grad_merge_rows(&grad_merge_rows); const int64_t *rows = mixv_grad_merge_rows.Data(ctx.GetPlace()); auto &merged_tensor = merged_grad->value(); diff --git a/paddle/phi/kernels/impl/warpctc_kernel_impl.h b/paddle/phi/kernels/impl/warpctc_kernel_impl.h index d2fc934c71b5220a1add37a203b62cce58ecdce8..baabf8465b7f0958bf15656971feb9d067e47a1c 100644 --- a/paddle/phi/kernels/impl/warpctc_kernel_impl.h +++ b/paddle/phi/kernels/impl/warpctc_kernel_impl.h @@ -236,8 +236,8 @@ void WarpctcKernel(const Context& dev_ctx, DenseTensor* loss, DenseTensor* warpctcgrad) { size_t num_sequences, sequence_width, max_sequence_length; - paddle::framework::Vector logits_lod; - paddle::framework::Vector label_lod; + phi::Vector logits_lod; + phi::Vector label_lod; if (logits_length.is_initialized() && labels_length.is_initialized()) { num_sequences = logits.dims()[1]; sequence_width = logits.dims()[2]; @@ -397,7 +397,7 @@ void WarpctcKernel(const Context& dev_ctx, paddle::operators::math::TotalSequenceLength(label_lod)), 1}); dev_ctx.template HostAlloc(&warpctc_label); - std::vector> lod; + std::vector> lod; lod.push_back(label_lod); warpctc_label.set_lod(lod); diff --git a/paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc b/paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc index b58bcd02580531ec3de6c1700adee846047d6fce..b9bc98425c9bf5326c27fcbdeba8f00800e53ba2 100644 --- a/paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc +++ b/paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc @@ -126,7 +126,7 @@ void AdamDenseParamSparseGradKernel( auto& grad_tensor = grad_merge.value(); const T* grad_data = grad_tensor.template data(); auto* grad_merge_rows = &grad_merge.rows(); - paddle::framework::MixVector mixv_grad_merge_rows(grad_merge_rows); + phi::MixVector mixv_grad_merge_rows(grad_merge_rows); const int64_t* rows = mixv_grad_merge_rows.Data(dev_ctx.GetPlace()); auto row_numel = grad_tensor.numel() / grad_merge.rows().size(); diff --git a/paddle/phi/kernels/selected_rows/gpu/adam_kernel.cu b/paddle/phi/kernels/selected_rows/gpu/adam_kernel.cu index a4b3f14306de392f816b2a51ee2da7c3fbc90a65..81e3a33c359d1a73817ba0e3e88cf6b227a2fffb 100644 --- a/paddle/phi/kernels/selected_rows/gpu/adam_kernel.cu +++ b/paddle/phi/kernels/selected_rows/gpu/adam_kernel.cu @@ -198,7 +198,7 @@ void AdamDenseParamSparseGradKernel( auto& grad_tensor = grad_merge.value(); const T* grad_data = grad_tensor.template data(); auto* grad_merge_rows = &grad_merge.rows(); - paddle::framework::MixVector mixv_grad_merge_rows(grad_merge_rows); + phi::MixVector mixv_grad_merge_rows(grad_merge_rows); const int64_t* rows = mixv_grad_merge_rows.Data(dev_ctx.GetPlace()); auto row_numel = grad_tensor.numel() / grad_merge.rows().size(); diff --git a/paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu b/paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu index 90c95492eee67d82299ba68158e3ea04f60bf17f..f1e8497004520f74f045cf1aa4ea4590f33656b2 100644 --- a/paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu +++ b/paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu @@ -222,7 +222,7 @@ void AdamwDenseParamSparseGradKernel( auto& grad_tensor = grad_merge.value(); const T* grad_data = grad_tensor.template data(); auto* grad_merge_rows = &grad_merge.rows(); - paddle::framework::MixVector mixv_grad_merge_rows(grad_merge_rows); + phi::MixVector mixv_grad_merge_rows(grad_merge_rows); const int64_t* rows = mixv_grad_merge_rows.Data(dev_ctx.GetPlace()); auto row_numel = grad_tensor.numel() / grad_merge.rows().size(); diff --git a/paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.cc b/paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.cc index 2512304944e85c76ba437a57a503add3a87aa656..9a4fd216cd52851b8d62b09df10c853ccd159fa1 100644 --- a/paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.cc +++ b/paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.cc @@ -14,9 +14,9 @@ #include "paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.h" -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/mixed_vector.h" #include "paddle/phi/kernels/cpu/hsigmoid_loss_grad.h" namespace phi { @@ -54,7 +54,7 @@ void HSigmoidLossGradKernel(const Context& ctx, PADDLE_ENFORCE_NOT_NULL( path.get_ptr(), errors::NotFound("Custom tree must be set for sparse mode!")); - paddle::framework::Vector real_rows = PathToRows(*path); + phi::Vector real_rows = PathToRows(*path); w_grad->set_rows(real_rows); // Build a map of id -> row_index to speed up finding the index of one id w_grad->set_height(w.dims()[0]); diff --git a/paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h b/paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h index cac9ef797215f44f468ffe566c4d6befa8845e84..0e948589f8bdf665ccd222891e2f55946d7fdf40 100644 --- a/paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h +++ b/paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h @@ -221,7 +221,7 @@ void ComputeRowImpl(const Context& dev_ctx, auto& grad_tensor = grad_merge.value(); const T* grad_data = grad_tensor.template data(); auto* grad_merge_rows = &grad_merge.rows(); - paddle::framework::MixVector mixv_grad_merge_rows(grad_merge_rows); + phi::MixVector mixv_grad_merge_rows(grad_merge_rows); const int64_t* rows = mixv_grad_merge_rows.Data(dev_ctx.GetPlace()); auto row_numel = grad_tensor.numel() / grad_merge.rows().size(); if (paddle::platform::is_gpu_place(dev_ctx.GetPlace()) && diff --git a/paddle/phi/tests/core/CMakeLists.txt b/paddle/phi/tests/core/CMakeLists.txt index 3dc2920537568be749924644bcfab90cf622446e..26c7e3b4389a8b5456b6c305ab5f93cf3998ad7c 100644 --- a/paddle/phi/tests/core/CMakeLists.txt +++ b/paddle/phi/tests/core/CMakeLists.txt @@ -70,3 +70,20 @@ cc_test( test_tensor_array SRCS test_tensor_array.cc DEPS tensor_array) + +if(WITH_GPU) + nv_test( + test_mixed_vector + SRCS test_mixed_vector.cc test_mixed_vector.cu + DEPS mixed_vector place memory device_context tensor) +elseif(WITH_ROCM) + hip_test( + test_mixed_vector + SRCS test_mixed_vector.cc test_mixed_vector.cu + DEPS mixed_vector place memory device_context tensor) +else() + cc_test( + test_mixed_vector + SRCS test_mixed_vector.cc + DEPS mixed_vector place memory device_context tensor) +endif() diff --git a/paddle/fluid/framework/mixed_vector_test.cc b/paddle/phi/tests/core/test_mixed_vector.cc similarity index 88% rename from paddle/fluid/framework/mixed_vector_test.cc rename to paddle/phi/tests/core/test_mixed_vector.cc index 6b39d80a43af5d69361a4c38a5a1581a9b24a738..14b8c542225b786cc1a5b385532e2dc6020bb011 100644 --- a/paddle/fluid/framework/mixed_vector_test.cc +++ b/paddle/phi/tests/core/test_mixed_vector.cc @@ -12,7 +12,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/mixed_vector.h" +#include "paddle/phi/core/mixed_vector.h" #include "glog/logging.h" #include "gtest/gtest-message.h" @@ -21,7 +21,7 @@ #include "gtest/gtest_pred_impl.h" template -using vec = paddle::framework::Vector; +using vec = phi::Vector; TEST(mixed_vector, CPU_VECTOR) { vec tmp; @@ -44,7 +44,7 @@ TEST(mixed_vector, CPU_VECTOR) { } TEST(mixed_vector, InitWithCount) { - paddle::framework::Vector vec(10, 10); + phi::Vector vec(10, 10); for (int i = 0; i < 10; ++i) { ASSERT_EQ(vec[i], 10); } @@ -58,7 +58,7 @@ TEST(mixed_vector, ForEach) { } TEST(mixed_vector, Reserve) { - paddle::framework::Vector vec; + phi::Vector vec; vec.reserve(1); vec.push_back(0); vec.push_back(0); @@ -66,7 +66,7 @@ TEST(mixed_vector, Reserve) { } TEST(mixed_vector, Resize) { - paddle::framework::Vector vec; + phi::Vector vec; vec.resize(1); vec.push_back(0); vec.push_back(0); diff --git a/paddle/fluid/framework/mixed_vector_test.cu b/paddle/phi/tests/core/test_mixed_vector.cu similarity index 80% rename from paddle/fluid/framework/mixed_vector_test.cu rename to paddle/phi/tests/core/test_mixed_vector.cu index 61d256ffb2283bf46d7c2a6487229623e95bd54f..92467d440137e06a19af4ec9701123ed9e48e4b4 100644 --- a/paddle/fluid/framework/mixed_vector_test.cu +++ b/paddle/phi/tests/core/test_mixed_vector.cu @@ -23,13 +23,14 @@ #include "glog/logging.h" #include "gtest/gtest.h" -#include "paddle/fluid/framework/mixed_vector.h" -#include "paddle/fluid/platform/device/gpu/gpu_info.h" -#include "paddle/fluid/platform/device_context.h" +#include "paddle/phi/backends/all_context.h" +#include "paddle/phi/backends/gpu/gpu_info.h" +#include "paddle/phi/common/place.h" +#include "paddle/phi/core/mixed_vector.h" template -using vec = paddle::framework::MixVector; -using gpuStream_t = paddle::gpuStream_t; +using vec = phi::MixVector; +using gpuStream_t = phi::gpuStream_t; static __global__ void multiply_10(int* ptr) { for (int i = 0; i < 10; ++i) { @@ -37,9 +38,9 @@ static __global__ void multiply_10(int* ptr) { } } -gpuStream_t GetCUDAStream(paddle::platform::CUDAPlace place) { +gpuStream_t GetCUDAStream(phi::GPUPlace place) { return reinterpret_cast( - paddle::platform::DeviceContextPool::Instance().Get(place)) + phi::DeviceContextPool::Instance().Get(place)) ->stream(); } @@ -50,7 +51,7 @@ TEST(mixed_vector, GPU_VECTOR) { } vec tmp(&x); ASSERT_EQ(tmp.size(), 10UL); - paddle::platform::CUDAPlace gpu(0); + phi::GPUPlace gpu(0); #ifdef PADDLE_WITH_HIP hipLaunchKernelGGL(multiply_10, @@ -69,7 +70,7 @@ TEST(mixed_vector, GPU_VECTOR) { } TEST(mixed_vector, MultiGPU) { - if (paddle::platform::GetGPUDeviceCount() < 2) { + if (phi::backends::gpu::GetGPUDeviceCount() < 2) { LOG(WARNING) << "Skip mixed_vector.MultiGPU since there are not multiple " "GPUs in your machine."; return; @@ -81,8 +82,8 @@ TEST(mixed_vector, MultiGPU) { } vec tmp(&x); ASSERT_EQ(tmp.size(), 10UL); - paddle::platform::CUDAPlace gpu0(0); - paddle::platform::SetDeviceId(0); + phi::GPUPlace gpu0(0); + phi::backends::gpu::SetDeviceId(0); #ifdef PADDLE_WITH_HIP hipLaunchKernelGGL(multiply_10, @@ -94,9 +95,9 @@ TEST(mixed_vector, MultiGPU) { #else multiply_10<<<1, 1, 0, GetCUDAStream(gpu0)>>>(tmp.MutableData(gpu0)); #endif - paddle::platform::CUDAPlace gpu1(1); + phi::GPUPlace gpu1(1); auto* gpu1_ptr = tmp.MutableData(gpu1); - paddle::platform::SetDeviceId(1); + phi::backends::gpu::SetDeviceId(1); #ifdef PADDLE_WITH_HIP hipLaunchKernelGGL( diff --git a/tools/parallel_UT_rule.py b/tools/parallel_UT_rule.py index f5c57a312d84583bcf367ccf85666ce80363a2ec..8d22fd6c2496e1c0748cdb33b1e4bd45e91ccf6e 100755 --- a/tools/parallel_UT_rule.py +++ b/tools/parallel_UT_rule.py @@ -913,7 +913,7 @@ FOURTH_HIGH_PARALLEL_JOB_NEW = [ 'test_mix_precision_all_reduce_fuse', 'test_spp_op', 'test_op_converter', - 'mixed_vector_test', + 'test_mixed_vector', 'test_roi_align_op', 'test_pad_constant_like', 'test_mul_op', @@ -2288,7 +2288,7 @@ TETRAD_PARALLEL_JOB = [ 'device_context_test', 'test_reference_count_pass_last_lived_ops', 'copy_same_tensor_test', - 'mixed_vector_test', + 'test_mixed_vector', 'op_registry_test', 'test_prepare_op', 'data_device_transform_test',