未验证 提交 35d7d1f0 编写于 作者: H Huang Jiyi 提交者: GitHub

move mixed_vector (#50282)

上级 e92e3aab
...@@ -1113,7 +1113,7 @@ void EagerReducer::AllReduceSparse(EagerGroup *group, ...@@ -1113,7 +1113,7 @@ void EagerReducer::AllReduceSparse(EagerGroup *group,
const auto &rank_ = process_group_->GetRank(); const auto &rank_ = process_group_->GetRank();
const auto &size_ = process_group_->GetSize(); const auto &size_ = process_group_->GetSize();
framework::Vector<int64_t> rows_num_vector(size_); phi::Vector<int64_t> rows_num_vector(size_);
rows_num_vector[rank_] = static_cast<int64_t>(src_rows.size()); rows_num_vector[rank_] = static_cast<int64_t>(src_rows.size());
Tensor rows_num_tensor = paddle::experimental::empty( Tensor rows_num_tensor = paddle::experimental::empty(
...@@ -1183,7 +1183,7 @@ void EagerReducer::AllReduceSparse(EagerGroup *group, ...@@ -1183,7 +1183,7 @@ void EagerReducer::AllReduceSparse(EagerGroup *group,
} }
process_group_->AllGather(in, out)->Synchronize(); process_group_->AllGather(in, out)->Synchronize();
framework::Vector<int64_t> dst_rows_vector(rows_num, 0); phi::Vector<int64_t> dst_rows_vector(rows_num, 0);
auto *dst_rows_dense_tensor = auto *dst_rows_dense_tensor =
std::dynamic_pointer_cast<phi::DenseTensor>(dst_rows_tensor.impl()) std::dynamic_pointer_cast<phi::DenseTensor>(dst_rows_tensor.impl())
.get(); .get();
...@@ -1262,7 +1262,7 @@ void EagerReducer::AllReduceSparse(EagerGroup *group, ...@@ -1262,7 +1262,7 @@ void EagerReducer::AllReduceSparse(EagerGroup *group,
Tensor dst_rows_tensor = Tensor dst_rows_tensor =
paddle::experimental::concat(rows_tensors, phi::Scalar(0)); paddle::experimental::concat(rows_tensors, phi::Scalar(0));
framework::Vector<int64_t> dst_rows_vector(rows_num, 0); phi::Vector<int64_t> dst_rows_vector(rows_num, 0);
auto *dst_rows_dense_tensor = auto *dst_rows_dense_tensor =
std::dynamic_pointer_cast<phi::DenseTensor>(dst_rows_tensor.impl()) std::dynamic_pointer_cast<phi::DenseTensor>(dst_rows_tensor.impl())
.get(); .get();
......
...@@ -236,7 +236,7 @@ void DeserializeLodTensor(framework::Variable* var, ...@@ -236,7 +236,7 @@ void DeserializeLodTensor(framework::Variable* var,
framework::LoD lod; framework::LoD lod;
for (int i = 0; i < msg.lod_level(); ++i) { for (int i = 0; i < msg.lod_level(); ++i) {
framework::Vector<size_t> v; phi::Vector<size_t> v;
for (int j = 0; j < msg.lod(i).lod_data_size(); ++j) { for (int j = 0; j < msg.lod(i).lod_data_size(); ++j) {
v.push_back(msg.lod(i).lod_data(j)); v.push_back(msg.lod(i).lod_data(j));
} }
......
...@@ -39,7 +39,7 @@ void CreateVarsOnScope(framework::Scope* scope, ...@@ -39,7 +39,7 @@ void CreateVarsOnScope(framework::Scope* scope,
auto* tensor1 = var1->GetMutable<phi::DenseTensor>(); auto* tensor1 = var1->GetMutable<phi::DenseTensor>();
tensor1->Resize(phi::make_ddim({512, 8, 4, 2})); tensor1->Resize(phi::make_ddim({512, 8, 4, 2}));
framework::LoD lod1; framework::LoD lod1;
lod1.push_back(framework::Vector<size_t>({1, 3, 8})); lod1.push_back(phi::Vector<size_t>({1, 3, 8}));
tensor1->set_lod(lod1); tensor1->set_lod(lod1);
tensor1->mutable_data<float>(*place); tensor1->mutable_data<float>(*place);
phi::funcs::set_constant(ctx, tensor1, 31.9); phi::funcs::set_constant(ctx, tensor1, 31.9);
...@@ -49,7 +49,7 @@ void CreateVarsOnScope(framework::Scope* scope, ...@@ -49,7 +49,7 @@ void CreateVarsOnScope(framework::Scope* scope,
auto* tensor2 = var2->GetMutable<phi::DenseTensor>(); auto* tensor2 = var2->GetMutable<phi::DenseTensor>();
tensor2->Resize(phi::make_ddim({1000, 64})); tensor2->Resize(phi::make_ddim({1000, 64}));
framework::LoD lod2; framework::LoD lod2;
lod2.push_back(framework::Vector<size_t>({1, 1})); lod2.push_back(phi::Vector<size_t>({1, 1}));
tensor2->set_lod(lod2); tensor2->set_lod(lod2);
tensor2->mutable_data<int>(*place); tensor2->mutable_data<int>(*place);
phi::funcs::set_constant(ctx, tensor2, 100); phi::funcs::set_constant(ctx, tensor2, 100);
...@@ -98,7 +98,7 @@ void RunMultiVarMsg(platform::Place place) { ...@@ -98,7 +98,7 @@ void RunMultiVarMsg(platform::Place place) {
framework::Variable* var1 = scope_recv.FindVar("x1"); framework::Variable* var1 = scope_recv.FindVar("x1");
auto* tensor1 = var1->GetMutable<phi::DenseTensor>(); auto* tensor1 = var1->GetMutable<phi::DenseTensor>();
EXPECT_EQ(tensor1->dims(), phi::make_ddim({512, 8, 4, 2})); EXPECT_EQ(tensor1->dims(), phi::make_ddim({512, 8, 4, 2}));
// EXPECT_EQ(tensor1->lod(), framework::Vector<size_t>({1, 3, 8})); // EXPECT_EQ(tensor1->lod(), phi::Vector<size_t>({1, 3, 8}));
auto* tensor_data1 = const_cast<float*>(tensor1->data<float>()); auto* tensor_data1 = const_cast<float*>(tensor1->data<float>());
int tensor_numel1 = 512 * 8 * 4 * 2; int tensor_numel1 = 512 * 8 * 4 * 2;
for (int i = 0; i < tensor_numel1; ++i) for (int i = 0; i < tensor_numel1; ++i)
...@@ -108,7 +108,7 @@ void RunMultiVarMsg(platform::Place place) { ...@@ -108,7 +108,7 @@ void RunMultiVarMsg(platform::Place place) {
framework::Variable* var2 = scope_recv.FindVar("x2"); framework::Variable* var2 = scope_recv.FindVar("x2");
auto* tensor2 = var2->GetMutable<phi::DenseTensor>(); auto* tensor2 = var2->GetMutable<phi::DenseTensor>();
EXPECT_EQ(tensor2->dims(), phi::make_ddim({1000, 64})); EXPECT_EQ(tensor2->dims(), phi::make_ddim({1000, 64}));
// EXPECT_EQ(tensor2->lod(), framework::Vector<size_t>({1, 1})); // EXPECT_EQ(tensor2->lod(), phi::Vector<size_t>({1, 1}));
auto* tensor_data2 = const_cast<int*>(tensor2->data<int>()); auto* tensor_data2 = const_cast<int*>(tensor2->data<int>());
int tensor_numel2 = 1000 * 64; int tensor_numel2 = 1000 * 64;
for (int i = 0; i < tensor_numel2; ++i) EXPECT_EQ(tensor_data2[i], 100); for (int i = 0; i < tensor_numel2; ++i) EXPECT_EQ(tensor_data2[i], 100);
......
...@@ -162,27 +162,7 @@ cc_test( ...@@ -162,27 +162,7 @@ cc_test(
eigen_test eigen_test
SRCS eigen_test.cc SRCS eigen_test.cc
DEPS tensor) DEPS tensor)
cc_library(
mixed_vector
SRCS mixed_vector.cc
DEPS device_context place memory)
if(WITH_GPU)
nv_test(
mixed_vector_test
SRCS mixed_vector_test.cc mixed_vector_test.cu
DEPS mixed_vector place memory device_context tensor)
elseif(WITH_ROCM)
hip_test(
mixed_vector_test
SRCS mixed_vector_test.cc mixed_vector_test.cu
DEPS mixed_vector place memory device_context tensor)
else()
cc_test(
mixed_vector_test
SRCS mixed_vector_test.cc
DEPS mixed_vector place memory device_context tensor)
endif()
cc_library( cc_library(
lod_tensor lod_tensor
SRCS lod_tensor.cc SRCS lod_tensor.cc
......
...@@ -2815,7 +2815,7 @@ void SlotRecordInMemoryDataFeed::BuildSlotBatchGPU(const int ins_num) { ...@@ -2815,7 +2815,7 @@ void SlotRecordInMemoryDataFeed::BuildSlotBatchGPU(const int ins_num) {
LoD& lod = (*feed->mutable_lod()); LoD& lod = (*feed->mutable_lod());
lod.resize(1); lod.resize(1);
lod[0].resize(offset_cols_size); lod[0].resize(offset_cols_size);
paddle::framework::MixVector<size_t> mixv_lod(&lod[0]); phi::MixVector<size_t> mixv_lod(&lod[0]);
memcpy(mixv_lod.MutableData(platform::CPUPlace()), memcpy(mixv_lod.MutableData(platform::CPUPlace()),
off_start_ptr, off_start_ptr,
offset_cols_size * sizeof(size_t)); offset_cols_size * sizeof(size_t));
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/place.h"
TEST(DataType, float16) { TEST(DataType, float16) {
using paddle::platform::CPUPlace; using paddle::platform::CPUPlace;
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/platform/place.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/place.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -12,10 +12,11 @@ ...@@ -12,10 +12,11 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/eigen.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/ddim.h"
namespace paddle { namespace paddle {
......
...@@ -158,7 +158,7 @@ void HeterWrapper::DeSerializeToTensor(Scope* scope, ...@@ -158,7 +158,7 @@ void HeterWrapper::DeSerializeToTensor(Scope* scope,
LoD lod; LoD lod;
for (int i = 0; i < req_var.lod_level(); ++i) { for (int i = 0; i < req_var.lod_level(); ++i) {
framework::Vector<size_t> v; phi::Vector<size_t> v;
for (int j = 0; j < req_var.lod(i).lod_data_size(); ++j) { for (int j = 0; j < req_var.lod(i).lod_data_size(); ++j) {
v.push_back(req_var.lod(i).lod_data(j)); v.push_back(req_var.lod(i).lod_data(j));
} }
...@@ -203,7 +203,7 @@ void HeterWrapper::DeSerializeToTensor(Scope* scope, ...@@ -203,7 +203,7 @@ void HeterWrapper::DeSerializeToTensor(Scope* scope,
LoD lod; LoD lod;
for (int i = 0; i < req_var.lod_level(); ++i) { for (int i = 0; i < req_var.lod_level(); ++i) {
framework::Vector<size_t> v; phi::Vector<size_t> v;
for (int j = 0; j < req_var.lod(i).lod_data_size(); ++j) { for (int j = 0; j < req_var.lod(i).lod_data_size(); ++j) {
v.push_back(req_var.lod(i).lod_data(j)); v.push_back(req_var.lod(i).lod_data(j));
} }
......
...@@ -21,12 +21,12 @@ limitations under the License. */ ...@@ -21,12 +21,12 @@ limitations under the License. */
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/ddim.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/mixed_vector.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -54,7 +54,7 @@ void MergeLoDTensor(phi::DenseTensor* target, ...@@ -54,7 +54,7 @@ void MergeLoDTensor(phi::DenseTensor* target,
* 0 2 4 7 * 0 2 4 7
* 0 2 5 7 10 12 15 20 * 0 2 5 7 10 12 15 20
*/ */
using LoD = std::vector<Vector<size_t>>; using LoD = std::vector<phi::Vector<size_t>>;
std::string LoDToString(const LoD& lod); std::string LoDToString(const LoD& lod);
......
...@@ -31,7 +31,7 @@ TEST(LoD, data) { ...@@ -31,7 +31,7 @@ TEST(LoD, data) {
lod.push_back(std::vector<size_t>({0, 1, 6, 8, 10, 11})); lod.push_back(std::vector<size_t>({0, 1, 6, 8, 10, 11}));
auto& v = lod[0]; auto& v = lod[0];
paddle::framework::MixVector<size_t> mix_vector_v(&v); phi::MixVector<size_t> mix_vector_v(&v);
paddle::platform::CUDAPlace gpu(0); paddle::platform::CUDAPlace gpu(0);
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
hipLaunchKernelGGL(test, hipLaunchKernelGGL(test,
...@@ -69,7 +69,7 @@ TEST(DenseTensor, LoDInGPU) { ...@@ -69,7 +69,7 @@ TEST(DenseTensor, LoDInGPU) {
EXPECT_EQ(lod_tensor.lod_element(0, 4).first, 8UL); EXPECT_EQ(lod_tensor.lod_element(0, 4).first, 8UL);
auto lod = lod_tensor.lod(); auto lod = lod_tensor.lod();
paddle::framework::MixVector<size_t> mix_vector(&(lod[0])); phi::MixVector<size_t> mix_vector(&(lod[0]));
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
hipLaunchKernelGGL(test, hipLaunchKernelGGL(test,
......
...@@ -15,15 +15,15 @@ limitations under the License. */ ...@@ -15,15 +15,15 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/mixed_vector.h"
#include "paddle/phi/core/sparse_coo_tensor.h" #include "paddle/phi/core/sparse_coo_tensor.h"
#include "paddle/phi/core/sparse_csr_tensor.h" #include "paddle/phi/core/sparse_csr_tensor.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
using LoD = std::vector<paddle::framework::Vector<size_t>>; using LoD = std::vector<phi::Vector<size_t>>;
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -104,10 +104,10 @@ static void AllReduce(const phi::SelectedRows &src, ...@@ -104,10 +104,10 @@ static void AllReduce(const phi::SelectedRows &src,
// 1. Gather rows number from all workers. Here use ncclAllGather to do this, // 1. Gather rows number from all workers. Here use ncclAllGather to do this,
// but we can use other ways to implement is in the future // but we can use other ways to implement is in the future
const auto &src_rows = src.rows(); const auto &src_rows = src.rows();
framework::Vector<int64_t> rows_num_vector(strategy.nranks_); phi::Vector<int64_t> rows_num_vector(strategy.nranks_);
rows_num_vector[strategy.local_rank_] = static_cast<int64_t>(src_rows.size()); rows_num_vector[strategy.local_rank_] = static_cast<int64_t>(src_rows.size());
// CUDAMutableData use CalStream // CUDAMutableData use CalStream
paddle::framework::MixVector<int64_t> mixv_rows_num_vector(&rows_num_vector); phi::MixVector<int64_t> mixv_rows_num_vector(&rows_num_vector);
auto *gpu_rows_num_ptr = mixv_rows_num_vector.CUDAMutableData(place); auto *gpu_rows_num_ptr = mixv_rows_num_vector.CUDAMutableData(place);
VLOG(4) << "start dev_ctx->wait"; VLOG(4) << "start dev_ctx->wait";
if (!use_calc_stream) { if (!use_calc_stream) {
...@@ -138,9 +138,9 @@ static void AllReduce(const phi::SelectedRows &src, ...@@ -138,9 +138,9 @@ static void AllReduce(const phi::SelectedRows &src,
auto *dst_rows = dst->mutable_rows(); auto *dst_rows = dst->mutable_rows();
dst_rows->resize(rows_num); dst_rows->resize(rows_num);
paddle::framework::MixVector<int64_t> mixv_dst_rows(dst_rows); phi::MixVector<int64_t> mixv_dst_rows(dst_rows);
auto *dst_rows_ptr = mixv_dst_rows.CUDAMutableData(place); auto *dst_rows_ptr = mixv_dst_rows.CUDAMutableData(place);
paddle::framework::MixVector<int64_t> mixv_src_rows(&src_rows); phi::MixVector<int64_t> mixv_src_rows(&src_rows);
const auto *src_rows_ptr = mixv_src_rows.CUDAData(place); const auto *src_rows_ptr = mixv_src_rows.CUDAData(place);
auto *dst_tensor = dst->mutable_value(); auto *dst_tensor = dst->mutable_value();
......
...@@ -158,9 +158,9 @@ void GLOOParallelContext::AllReduce(const phi::SelectedRows &src, ...@@ -158,9 +158,9 @@ void GLOOParallelContext::AllReduce(const phi::SelectedRows &src,
<< ", height: " << src.height(); << ", height: " << src.height();
auto *dst_rows = dst->mutable_rows(); auto *dst_rows = dst->mutable_rows();
dst_rows->resize(rows_num); dst_rows->resize(rows_num);
paddle::framework::MixVector<int64_t> mixv_dst_rows(dst_rows); phi::MixVector<int64_t> mixv_dst_rows(dst_rows);
auto *dst_rows_ptr = mixv_dst_rows.MutableData(place); auto *dst_rows_ptr = mixv_dst_rows.MutableData(place);
paddle::framework::MixVector<int64_t> mixv_src_rows(&src_rows); phi::MixVector<int64_t> mixv_src_rows(&src_rows);
const int64_t *src_rows_ptr = mixv_src_rows.Data(place); const int64_t *src_rows_ptr = mixv_src_rows.Data(place);
auto *dst_tensor = dst->mutable_value(); auto *dst_tensor = dst->mutable_value();
......
...@@ -98,7 +98,7 @@ TEST(AssignOp, AssignSelectedRows) { ...@@ -98,7 +98,7 @@ TEST(AssignOp, AssignSelectedRows) {
assign_functor(input); assign_functor(input);
auto& out_selected_row = output.Get<phi::SelectedRows>(); auto& out_selected_row = output.Get<phi::SelectedRows>();
const paddle::framework::Vector<int64_t>& out_rows = out_selected_row.rows(); const phi::Vector<int64_t>& out_rows = out_selected_row.rows();
EXPECT_EQ(rows.size(), out_rows.size()); EXPECT_EQ(rows.size(), out_rows.size());
for (size_t i = 0; i < rows.size(); ++i) { for (size_t i = 0; i < rows.size(); ++i) {
EXPECT_EQ(rows[i], out_rows[i]); EXPECT_EQ(rows[i], out_rows[i]);
......
...@@ -129,7 +129,7 @@ class CTCAlignOpCUDAKernel : public framework::OpKernel<T> { ...@@ -129,7 +129,7 @@ class CTCAlignOpCUDAKernel : public framework::OpKernel<T> {
// merge elements and delete blank // merge elements and delete blank
T* output_data = output->mutable_data<T>({num_tokens, 1}, ctx.GetPlace()); T* output_data = output->mutable_data<T>({num_tokens, 1}, ctx.GetPlace());
paddle::framework::MixVector<size_t> mixv_input_lod(&input_lod[level]); phi::MixVector<size_t> mixv_input_lod(&input_lod[level]);
MergeAndDelCudaKernel<T> MergeAndDelCudaKernel<T>
<<<1, 1, 0, stream>>>(num_tokens, <<<1, 1, 0, stream>>>(num_tokens,
tokens, tokens,
......
...@@ -166,7 +166,7 @@ class CVMGradCUDAKernel : public framework::OpKernel<T> { ...@@ -166,7 +166,7 @@ class CVMGradCUDAKernel : public framework::OpKernel<T> {
lod[lod.size() - 1], lod[lod.size() - 1],
platform::errors::PreconditionNotMet( platform::errors::PreconditionNotMet(
"Output(X@GRAD)'s dim[0] must be equal to last element of lod")); "Output(X@GRAD)'s dim[0] must be equal to last element of lod"));
paddle::framework::MixVector<size_t> mixv_lod(&lod); phi::MixVector<size_t> mixv_lod(&lod);
CvmGradComputeKernel<<<(dx_numel + PADDLE_CUDA_NUM_THREADS - 1) / CvmGradComputeKernel<<<(dx_numel + PADDLE_CUDA_NUM_THREADS - 1) /
PADDLE_CUDA_NUM_THREADS, PADDLE_CUDA_NUM_THREADS,
PADDLE_CUDA_NUM_THREADS, PADDLE_CUDA_NUM_THREADS,
......
...@@ -59,7 +59,7 @@ class GPUBoxClipKernel : public framework::OpKernel<T> { ...@@ -59,7 +59,7 @@ class GPUBoxClipKernel : public framework::OpKernel<T> {
auto stream = dev_ctx.stream(); auto stream = dev_ctx.stream();
const size_t batch_size = lod.back().size() - 1; const size_t batch_size = lod.back().size() - 1;
T *output_data = output->mutable_data<T>(dev_ctx.GetPlace()); T *output_data = output->mutable_data<T>(dev_ctx.GetPlace());
paddle::framework::MixVector<size_t> mix_vector(&abs_offset_lod[0]); phi::MixVector<size_t> mix_vector(&abs_offset_lod[0]);
GPUBoxClip<T, 512><<<batch_size, 512, 0, stream>>>( GPUBoxClip<T, 512><<<batch_size, 512, 0, stream>>>(
input->data<T>(), input->data<T>(),
mix_vector.CUDAMutableData(dev_ctx.GetPlace()), mix_vector.CUDAMutableData(dev_ctx.GetPlace()),
......
...@@ -19,7 +19,6 @@ namespace cub = hipcub; ...@@ -19,7 +19,6 @@ namespace cub = hipcub;
#include <paddle/fluid/memory/allocation/allocator.h> #include <paddle/fluid/memory/allocation/allocator.h>
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/operators/detection/bbox_util.h" #include "paddle/fluid/operators/detection/bbox_util.h"
...@@ -28,6 +27,7 @@ namespace cub = hipcub; ...@@ -28,6 +27,7 @@ namespace cub = hipcub;
#include "paddle/fluid/operators/strided_memcpy.h" #include "paddle/fluid/operators/strided_memcpy.h"
#include "paddle/fluid/platform/for_range.h" #include "paddle/fluid/platform/for_range.h"
#include "paddle/phi/backends/gpu/gpu_primitives.h" #include "paddle/phi/backends/gpu/gpu_primitives.h"
#include "paddle/phi/core/mixed_vector.h"
#include "paddle/phi/kernels/funcs/gather.cu.h" #include "paddle/phi/kernels/funcs/gather.cu.h"
namespace paddle { namespace paddle {
......
...@@ -18,10 +18,10 @@ limitations under the License. */ ...@@ -18,10 +18,10 @@ limitations under the License. */
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/memory.h" #include "paddle/fluid/memory/memory.h"
#include "paddle/fluid/operators/detection/bbox_util.cu.h" #include "paddle/fluid/operators/detection/bbox_util.cu.h"
#include "paddle/phi/core/mixed_vector.h"
#include "paddle/phi/kernels/funcs/gather.cu.h" #include "paddle/phi/kernels/funcs/gather.cu.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
......
...@@ -121,7 +121,7 @@ class TargetAssignKernel : public framework::OpKernel<T> { ...@@ -121,7 +121,7 @@ class TargetAssignKernel : public framework::OpKernel<T> {
auto x_lod = x->lod().back(); auto x_lod = x->lod().back();
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
paddle::framework::MixVector<size_t> mixv_x_lod(&x_lod); phi::MixVector<size_t> mixv_x_lod(&x_lod);
size_t* x_lod_data = mixv_x_lod.MutableData(ctx.GetPlace()); size_t* x_lod_data = mixv_x_lod.MutableData(ctx.GetPlace());
#else #else
size_t* x_lod_data = x_lod.data(); size_t* x_lod_data = x_lod.data();
...@@ -155,7 +155,7 @@ class TargetAssignKernel : public framework::OpKernel<T> { ...@@ -155,7 +155,7 @@ class TargetAssignKernel : public framework::OpKernel<T> {
const int* neg_idx_data = neg_indices->data<int>(); const int* neg_idx_data = neg_indices->data<int>();
auto neg_lod = neg_indices->lod().back(); auto neg_lod = neg_indices->lod().back();
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
paddle::framework::MixVector<size_t> mixv_neg_lod(&neg_lod); phi::MixVector<size_t> mixv_neg_lod(&neg_lod);
size_t* neg_lod_data = mixv_neg_lod.MutableData(ctx.GetPlace()); size_t* neg_lod_data = mixv_neg_lod.MutableData(ctx.GetPlace());
#else #else
size_t* neg_lod_data = neg_lod.data(); size_t* neg_lod_data = neg_lod.data();
......
...@@ -30,11 +30,11 @@ ...@@ -30,11 +30,11 @@
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/phi/core/mixed_vector.h"
#if defined(PADDLE_WITH_CUDA) #if defined(PADDLE_WITH_CUDA)
namespace cg = cooperative_groups; namespace cg = cooperative_groups;
...@@ -46,7 +46,7 @@ namespace operators { ...@@ -46,7 +46,7 @@ namespace operators {
using SelectedRows = phi::SelectedRows; using SelectedRows = phi::SelectedRows;
template <typename T> template <typename T>
using Vector = framework::Vector<T>; using Vector = phi::Vector<T>;
#define WARP_SIZE 32 #define WARP_SIZE 32
#define MAX_WARP_NUM 32 #define MAX_WARP_NUM 32
...@@ -376,7 +376,7 @@ class FilterByInstagGPUKernel : public framework::OpKernel<T> { ...@@ -376,7 +376,7 @@ class FilterByInstagGPUKernel : public framework::OpKernel<T> {
} }
const size_t x2_lods_size = x2_lods.size() - 1; const size_t x2_lods_size = x2_lods.size() - 1;
paddle::framework::MixVector<size_t> mixv_x2_lods(&x2_lods); phi::MixVector<size_t> mixv_x2_lods(&x2_lods);
size_t* x2_lods_data = mixv_x2_lods.CUDAMutableData(gpu_place); size_t* x2_lods_data = mixv_x2_lods.CUDAMutableData(gpu_place);
...@@ -401,7 +401,7 @@ class FilterByInstagGPUKernel : public framework::OpKernel<T> { ...@@ -401,7 +401,7 @@ class FilterByInstagGPUKernel : public framework::OpKernel<T> {
} }
} }
paddle::framework::MixVector<size_t> mixv_x1_lods(&x1_lods); phi::MixVector<size_t> mixv_x1_lods(&x1_lods);
size_t* x1_lods_data = mixv_x1_lods.CUDAMutableData(gpu_place); size_t* x1_lods_data = mixv_x1_lods.CUDAMutableData(gpu_place);
auto* x1_data = x1->data<T>(); auto* x1_data = x1->data<T>();
...@@ -433,12 +433,12 @@ class FilterByInstagGPUKernel : public framework::OpKernel<T> { ...@@ -433,12 +433,12 @@ class FilterByInstagGPUKernel : public framework::OpKernel<T> {
Vector<size_t> out_lods(x2_lods_size + 1, 0); Vector<size_t> out_lods(x2_lods_size + 1, 0);
Vector<size_t> map_lods(x2_lods_size + 1, 0); Vector<size_t> map_lods(x2_lods_size + 1, 0);
paddle::framework::MixVector<size_t> mixv_out_lods(&out_lods); phi::MixVector<size_t> mixv_out_lods(&out_lods);
paddle::framework::MixVector<size_t> mixv_map_lods(&map_lods); phi::MixVector<size_t> mixv_map_lods(&map_lods);
// thrust::device_vector<size_t> out_idx(1); // thrust::device_vector<size_t> out_idx(1);
Vector<size_t> out_idx(1, 0); Vector<size_t> out_idx(1, 0);
paddle::framework::MixVector<size_t> mixv_out_idx(&out_idx); phi::MixVector<size_t> mixv_out_idx(&out_idx);
size_t* out_idx_data = mixv_out_idx.CUDAMutableData(gpu_place); size_t* out_idx_data = mixv_out_idx.CUDAMutableData(gpu_place);
size_t* out_lods_data = mixv_out_lods.CUDAMutableData(gpu_place); size_t* out_lods_data = mixv_out_lods.CUDAMutableData(gpu_place);
...@@ -500,7 +500,7 @@ class FilterByInstagGPUKernel : public framework::OpKernel<T> { ...@@ -500,7 +500,7 @@ class FilterByInstagGPUKernel : public framework::OpKernel<T> {
} else { } else {
Vector<size_t> map_lods(2, 0); Vector<size_t> map_lods(2, 0);
paddle::framework::MixVector<size_t> mixv_map_lods(&map_lods); phi::MixVector<size_t> mixv_map_lods(&map_lods);
thrust::device_ptr<int64_t> map_data_ptr(map_data); thrust::device_ptr<int64_t> map_data_ptr(map_data);
map_data_ptr[0] = 0; map_data_ptr[0] = 0;
......
...@@ -23,16 +23,16 @@ ...@@ -23,16 +23,16 @@
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memcpy.h"
#include "paddle/phi/core/mixed_vector.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using SelectedRows = phi::SelectedRows; using SelectedRows = phi::SelectedRows;
template <typename T> template <typename T>
using Vector = framework::Vector<T>; using Vector = phi::Vector<T>;
template <typename T> template <typename T>
class FilterByInstagKernel : public framework::OpKernel<T> { class FilterByInstagKernel : public framework::OpKernel<T> {
......
...@@ -256,7 +256,7 @@ class FusedEmbeddingSeqPoolGradKernel : public framework::OpKernel<T> { ...@@ -256,7 +256,7 @@ class FusedEmbeddingSeqPoolGradKernel : public framework::OpKernel<T> {
auto lod = ids->lod()[0]; auto lod = ids->lod()[0];
int64_t out_width = d_output->dims()[1]; int64_t out_width = d_output->dims()[1];
framework::Vector<int64_t> *new_rows = d_table->mutable_rows(); phi::Vector<int64_t> *new_rows = d_table->mutable_rows();
new_rows->resize(ids_num); new_rows->resize(ids_num);
std::memcpy(&(*new_rows)[0], ids_data, ids_num * sizeof(int64_t)); std::memcpy(&(*new_rows)[0], ids_data, ids_num * sizeof(int64_t));
......
...@@ -14,16 +14,16 @@ ...@@ -14,16 +14,16 @@
#include <string> #include <string>
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/operators/fused/fused_seqpool_cvm_op.h" #include "paddle/fluid/operators/fused/fused_seqpool_cvm_op.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/phi/core/mixed_vector.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename T> template <typename T>
using Vector = framework::Vector<T>; using Vector = phi::Vector<T>;
#define CUDA_KERNEL_LOOP(i, n) \ #define CUDA_KERNEL_LOOP(i, n) \
for (auto i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ for (auto i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
...@@ -441,7 +441,7 @@ class FusedSeqpoolCVMCUDAKernel : public framework::OpKernel<T> { ...@@ -441,7 +441,7 @@ class FusedSeqpoolCVMCUDAKernel : public framework::OpKernel<T> {
int embedding_size = inputs[0]->numel() / inputs[0]->dims()[0]; int embedding_size = inputs[0]->numel() / inputs[0]->dims()[0];
int batch_size = -1; int batch_size = -1;
std::vector<paddle::framework::MixVector<size_t> *> mix_lods_v(slot_size); std::vector<phi::MixVector<size_t> *> mix_lods_v(slot_size);
for (size_t i = 0; i < slot_size; ++i) { for (size_t i = 0; i < slot_size; ++i) {
const auto *input = inputs[i]; const auto *input = inputs[i];
...@@ -480,7 +480,7 @@ class FusedSeqpoolCVMCUDAKernel : public framework::OpKernel<T> { ...@@ -480,7 +480,7 @@ class FusedSeqpoolCVMCUDAKernel : public framework::OpKernel<T> {
} }
output_data[i] = reinterpret_cast<T *>( output_data[i] = reinterpret_cast<T *>(
dev_ctx.Alloc<T>(output, output->numel() * sizeof(T))); dev_ctx.Alloc<T>(output, output->numel() * sizeof(T)));
mix_lods_v[i] = new paddle::framework::MixVector<size_t>(&lods); mix_lods_v[i] = new phi::MixVector<size_t>(&lods);
lods_data[i] = mix_lods_v[i]->CUDAData(ctx.GetPlace()); lods_data[i] = mix_lods_v[i]->CUDAData(ctx.GetPlace());
seqpool_outputs[i].Resize({batch_size, embedding_size}); seqpool_outputs[i].Resize({batch_size, embedding_size});
seqpool_output_data[i] = reinterpret_cast<T *>(dev_ctx.Alloc<T>( seqpool_output_data[i] = reinterpret_cast<T *>(dev_ctx.Alloc<T>(
...@@ -527,7 +527,7 @@ class FusedSeqpoolCVMGradCUDAKernel : public framework::OpKernel<T> { ...@@ -527,7 +527,7 @@ class FusedSeqpoolCVMGradCUDAKernel : public framework::OpKernel<T> {
int embedding_size = in_grads[0]->numel() / in_grads[0]->dims()[0]; int embedding_size = in_grads[0]->numel() / in_grads[0]->dims()[0];
int batch_size = -1; int batch_size = -1;
std::vector<paddle::framework::MixVector<size_t> *> mix_lods_v(slot_size); std::vector<phi::MixVector<size_t> *> mix_lods_v(slot_size);
for (size_t i = 0; i < slot_size; ++i) { for (size_t i = 0; i < slot_size; ++i) {
auto *in_grad = in_grads[i]; auto *in_grad = in_grads[i];
...@@ -563,7 +563,7 @@ class FusedSeqpoolCVMGradCUDAKernel : public framework::OpKernel<T> { ...@@ -563,7 +563,7 @@ class FusedSeqpoolCVMGradCUDAKernel : public framework::OpKernel<T> {
in_grads_data[i] = reinterpret_cast<T *>( in_grads_data[i] = reinterpret_cast<T *>(
dev_ctx.Alloc<T>(in_grad, in_grad->numel() * sizeof(T))); dev_ctx.Alloc<T>(in_grad, in_grad->numel() * sizeof(T)));
mix_lods_v[i] = new paddle::framework::MixVector<size_t>(&lods); mix_lods_v[i] = new phi::MixVector<size_t>(&lods);
lods_data[i] = mix_lods_v[i]->CUDAData(ctx.GetPlace()); lods_data[i] = mix_lods_v[i]->CUDAData(ctx.GetPlace());
cvm_data[i] = reinterpret_cast<const T *>(cvm->data<T>()); cvm_data[i] = reinterpret_cast<const T *>(cvm->data<T>());
} }
......
...@@ -17,11 +17,11 @@ limitations under the License. */ ...@@ -17,11 +17,11 @@ limitations under the License. */
#include <memory> #include <memory>
#include "dnnl.hpp" // NOLINT #include "dnnl.hpp" // NOLINT
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/fused/multi_gru_op.h" #include "paddle/fluid/operators/fused/multi_gru_op.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h" #include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/phi/core/mixed_vector.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -678,7 +678,7 @@ class MultiGRUHandler { ...@@ -678,7 +678,7 @@ class MultiGRUHandler {
const std::vector<const phi::DenseTensor*> biases_; const std::vector<const phi::DenseTensor*> biases_;
phi::DenseTensor* hidden_; phi::DenseTensor* hidden_;
std::vector<dnnl::primitive_attr> attrs_; std::vector<dnnl::primitive_attr> attrs_;
const paddle::framework::Vector<size_t>& x_lod_; const phi::Vector<size_t>& x_lod_;
}; };
template <typename T> template <typename T>
......
...@@ -372,7 +372,7 @@ class GRUCPUKernel : public framework::OpKernel<T> { ...@@ -372,7 +372,7 @@ class GRUCPUKernel : public framework::OpKernel<T> {
const_cast<T*>(weight_data + 2 * frame_size * frame_size); const_cast<T*>(weight_data + 2 * frame_size * frame_size);
phi::DenseTensor ordered_h0; phi::DenseTensor ordered_h0;
framework::Vector<size_t> order(batch_gate->lod()[2]); phi::Vector<size_t> order(batch_gate->lod()[2]);
if (h0) { if (h0) {
// Since the batch computing for GRU reorders the input sequences // Since the batch computing for GRU reorders the input sequences
......
...@@ -75,7 +75,7 @@ class GRUKernel : public framework::OpKernel<T> { ...@@ -75,7 +75,7 @@ class GRUKernel : public framework::OpKernel<T> {
const_cast<T*>(weight_data + 2 * frame_size * frame_size); const_cast<T*>(weight_data + 2 * frame_size * frame_size);
phi::DenseTensor ordered_h0; phi::DenseTensor ordered_h0;
framework::Vector<size_t> order(batch_gate->lod()[2]); phi::Vector<size_t> order(batch_gate->lod()[2]);
if (h0) { if (h0) {
// Since the batch computing for GRU reorders the input sequences // Since the batch computing for GRU reorders the input sequences
......
...@@ -28,7 +28,7 @@ namespace operators { ...@@ -28,7 +28,7 @@ namespace operators {
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
inline void ReorderInitState(const DeviceContext& ctx, inline void ReorderInitState(const DeviceContext& ctx,
const phi::DenseTensor& src, const phi::DenseTensor& src,
framework::Vector<size_t> index_lod, phi::Vector<size_t> index_lod,
phi::DenseTensor* dst, phi::DenseTensor* dst,
bool indexed_src) { bool indexed_src) {
phi::funcs::CopyMatrixRowsFunctor<DeviceContext, T> row_shuffle; phi::funcs::CopyMatrixRowsFunctor<DeviceContext, T> row_shuffle;
...@@ -79,7 +79,7 @@ class GRUGradKernel : public framework::OpKernel<T> { ...@@ -79,7 +79,7 @@ class GRUGradKernel : public framework::OpKernel<T> {
phi::DenseTensor ordered_h0, ordered_h0_grad; phi::DenseTensor ordered_h0, ordered_h0_grad;
framework::Vector<size_t> order(batch_gate->lod()[2]); phi::Vector<size_t> order(batch_gate->lod()[2]);
if (h0) { if (h0) {
ReorderInitState<DeviceContext, T>( ReorderInitState<DeviceContext, T>(
......
...@@ -169,12 +169,12 @@ class LookupTableGradCUDAKernel : public framework::OpKernel<T> { ...@@ -169,12 +169,12 @@ class LookupTableGradCUDAKernel : public framework::OpKernel<T> {
auto stream = dev_ctx.stream(); auto stream = dev_ctx.stream();
// copy GPU memory to CPU pinned memory // copy GPU memory to CPU pinned memory
framework::Vector<int64_t> new_rows; phi::Vector<int64_t> new_rows;
new_rows.resize(ids_num); new_rows.resize(ids_num);
auto gpu_place = context.GetPlace(); auto gpu_place = context.GetPlace();
// TODO(yuyang18): Strange code here. // TODO(yuyang18): Strange code here.
paddle::framework::MixVector<int64_t> mixv_new_rows(&new_rows); phi::MixVector<int64_t> mixv_new_rows(&new_rows);
memory::Copy(gpu_place, memory::Copy(gpu_place,
mixv_new_rows.CUDAMutableData(context.GetPlace()), mixv_new_rows.CUDAMutableData(context.GetPlace()),
gpu_place, gpu_place,
......
...@@ -159,11 +159,11 @@ struct LookupTableV2GradCUDAFunctor { ...@@ -159,11 +159,11 @@ struct LookupTableV2GradCUDAFunctor {
dim3 threads(128, 8); dim3 threads(128, 8);
dim3 grids(8, 1); dim3 grids(8, 1);
auto stream = dev_ctx.stream(); auto stream = dev_ctx.stream();
framework::Vector<int64_t> new_rows; phi::Vector<int64_t> new_rows;
new_rows.resize(ids_num); new_rows.resize(ids_num);
auto gpu_place = context_.GetPlace(); auto gpu_place = context_.GetPlace();
paddle::framework::MixVector<int64_t> mixv_new_rows(&new_rows); phi::MixVector<int64_t> mixv_new_rows(&new_rows);
if (!std::is_same<IdT, int64_t>::value) { if (!std::is_same<IdT, int64_t>::value) {
InputTypeConvert<<<grids, threads, 0, stream>>>( InputTypeConvert<<<grids, threads, 0, stream>>>(
ids_data, ids_num, mixv_new_rows.MutableData(gpu_place)); ids_data, ids_num, mixv_new_rows.MutableData(gpu_place));
......
...@@ -27,7 +27,7 @@ namespace operators { ...@@ -27,7 +27,7 @@ namespace operators {
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
inline void ReorderInitState(const DeviceContext& ctx, inline void ReorderInitState(const DeviceContext& ctx,
const phi::DenseTensor& src, const phi::DenseTensor& src,
framework::Vector<size_t> index_lod, phi::Vector<size_t> index_lod,
phi::DenseTensor* dst, phi::DenseTensor* dst,
bool indexed_src) { bool indexed_src) {
phi::funcs::CopyMatrixRowsFunctor<DeviceContext, T> row_shuffle; phi::funcs::CopyMatrixRowsFunctor<DeviceContext, T> row_shuffle;
...@@ -95,7 +95,7 @@ class LSTMKernel : public framework::OpKernel<T> { ...@@ -95,7 +95,7 @@ class LSTMKernel : public framework::OpKernel<T> {
lstm_value.prev_state_value = nullptr; lstm_value.prev_state_value = nullptr;
phi::DenseTensor ordered_c0; phi::DenseTensor ordered_c0;
framework::Vector<size_t> order(batch_gate->lod()[2]); phi::Vector<size_t> order(batch_gate->lod()[2]);
if (cell_t0) { if (cell_t0) {
// Since the batch computing for LSTM reorders the input sequence // Since the batch computing for LSTM reorders the input sequence
...@@ -236,7 +236,7 @@ class LSTMGradKernel : public framework::OpKernel<T> { ...@@ -236,7 +236,7 @@ class LSTMGradKernel : public framework::OpKernel<T> {
// ordered_h0_g/c0_g is the reordered gradient of hidden/cell // ordered_h0_g/c0_g is the reordered gradient of hidden/cell
// initialization. // initialization.
phi::DenseTensor ordered_h0, ordered_c0, ordered_h0_g, ordered_c0_g; phi::DenseTensor ordered_h0, ordered_c0, ordered_h0_g, ordered_c0_g;
framework::Vector<size_t> order(batch_gate->lod()[2]); phi::Vector<size_t> order(batch_gate->lod()[2]);
if (c0) { if (c0) {
ReorderInitState<DeviceContext, T>( ReorderInitState<DeviceContext, T>(
......
...@@ -70,7 +70,7 @@ class _ClipGradFunctor { ...@@ -70,7 +70,7 @@ class _ClipGradFunctor {
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
inline void ReorderInitState(const DeviceContext& ctx, inline void ReorderInitState(const DeviceContext& ctx,
const phi::DenseTensor& src, const phi::DenseTensor& src,
framework::Vector<size_t> index, phi::Vector<size_t> index,
phi::DenseTensor* dst, phi::DenseTensor* dst,
bool indexed_src) { bool indexed_src) {
phi::funcs::CopyMatrixRowsFunctor<DeviceContext, T> row_shuffle; phi::funcs::CopyMatrixRowsFunctor<DeviceContext, T> row_shuffle;
...@@ -158,7 +158,7 @@ class LSTMPKernel : public framework::OpKernel<T> { ...@@ -158,7 +158,7 @@ class LSTMPKernel : public framework::OpKernel<T> {
phi::DenseTensor ordered_c0; phi::DenseTensor ordered_c0;
phi::DenseTensor ordered_h0; phi::DenseTensor ordered_h0;
framework::Vector<size_t> order(batch_gate->lod()[2]); phi::Vector<size_t> order(batch_gate->lod()[2]);
if (cell_t0) { if (cell_t0) {
// Since the batch computing for LSTMP reorders the input sequence // Since the batch computing for LSTMP reorders the input sequence
...@@ -350,7 +350,7 @@ class LSTMPGradKernel : public framework::OpKernel<T> { ...@@ -350,7 +350,7 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
// initialization. // initialization.
phi::DenseTensor ordered_h0, ordered_c0, ordered_h0_g, ordered_c0_g; phi::DenseTensor ordered_h0, ordered_c0, ordered_h0_g, ordered_c0_g;
framework::Vector<size_t> order(batch_gate->lod()[2]); phi::Vector<size_t> order(batch_gate->lod()[2]);
if (c0) { if (c0) {
ReorderInitState<DeviceContext, T>( ReorderInitState<DeviceContext, T>(
......
...@@ -446,8 +446,8 @@ class BeamSearchFunctor<phi::GPUContext, T> { ...@@ -446,8 +446,8 @@ class BeamSearchFunctor<phi::GPUContext, T> {
framework::LoD selected_lod(2); framework::LoD selected_lod(2);
selected_lod[0].assign(abs_lod[level].begin(), abs_lod[level].end()); selected_lod[0].assign(abs_lod[level].begin(), abs_lod[level].end());
selected_lod[1].resize(scores->dims()[0] + 1); selected_lod[1].resize(scores->dims()[0] + 1);
paddle::framework::MixVector<size_t> mix_vector(&selected_lod[1]); phi::MixVector<size_t> mix_vector(&selected_lod[1]);
paddle::framework::MixVector<size_t> mixv_abs(&abs_lod[level]); phi::MixVector<size_t> mixv_abs(&abs_lod[level]);
size_t* selected_offsets = mix_vector.CUDAMutableData(context.GetPlace()); size_t* selected_offsets = mix_vector.CUDAMutableData(context.GetPlace());
if (num_seqs == 1) { if (num_seqs == 1) {
......
...@@ -28,7 +28,7 @@ namespace math { ...@@ -28,7 +28,7 @@ namespace math {
template <typename T> template <typename T>
void CopyValidData(phi::DenseTensor* dst_tensor, void CopyValidData(phi::DenseTensor* dst_tensor,
const phi::DenseTensor* src_tensor, const phi::DenseTensor* src_tensor,
const framework::Vector<size_t>& seq_offsets, const phi::Vector<size_t>& seq_offsets,
int pad_seq_len, int pad_seq_len,
int step_width, int step_width,
bool norm_by_len, bool norm_by_len,
......
...@@ -124,7 +124,7 @@ class PaddingLoDTensorFunctor<phi::GPUContext, T> { ...@@ -124,7 +124,7 @@ class PaddingLoDTensorFunctor<phi::GPUContext, T> {
T* pad_data = pad_tensor->data<T>(); T* pad_data = pad_tensor->data<T>();
const T* pad_value_data = pad_value.data<T>(); const T* pad_value_data = pad_value.data<T>();
paddle::framework::MixVector<size_t> mix_vector_seq_offsets(&seq_offsets); phi::MixVector<size_t> mix_vector_seq_offsets(&seq_offsets);
SequencePaddingKernel<T, kSeqToPad><<<grid, threads, 0, context.stream()>>>( SequencePaddingKernel<T, kSeqToPad><<<grid, threads, 0, context.stream()>>>(
pad_data, pad_data,
seq_data, seq_data,
...@@ -191,7 +191,7 @@ class UnpaddingLoDTensorFunctor<phi::GPUContext, T> { ...@@ -191,7 +191,7 @@ class UnpaddingLoDTensorFunctor<phi::GPUContext, T> {
const T* pad_data = pad_tensor.data<T>(); const T* pad_data = pad_tensor.data<T>();
T* seq_data = seq_tensor->data<T>(); T* seq_data = seq_tensor->data<T>();
paddle::framework::MixVector<size_t> mixv_seq_offsets(&seq_offsets); phi::MixVector<size_t> mixv_seq_offsets(&seq_offsets);
SequencePaddingKernel<T, kPadToSeq><<<grid, threads, 0, context.stream()>>>( SequencePaddingKernel<T, kPadToSeq><<<grid, threads, 0, context.stream()>>>(
seq_data, seq_data,
pad_data, pad_data,
......
...@@ -29,7 +29,7 @@ enum PadLayout { kBatchLengthWidth = 0, kLengthBatchWidth }; ...@@ -29,7 +29,7 @@ enum PadLayout { kBatchLengthWidth = 0, kLengthBatchWidth };
enum CopyType { kSeqToPad, kPadToSeq }; enum CopyType { kSeqToPad, kPadToSeq };
inline static size_t MaximumSequenceLength( inline static size_t MaximumSequenceLength(
const framework::Vector<size_t>& seq_offset) { const phi::Vector<size_t>& seq_offset) {
size_t seq_num = seq_offset.size() - 1; size_t seq_num = seq_offset.size() - 1;
size_t max_seq_len = 0; size_t max_seq_len = 0;
for (size_t i = 0; i < seq_num; ++i) { for (size_t i = 0; i < seq_num; ++i) {
...@@ -39,7 +39,7 @@ inline static size_t MaximumSequenceLength( ...@@ -39,7 +39,7 @@ inline static size_t MaximumSequenceLength(
} }
inline static size_t TotalSequenceLength( inline static size_t TotalSequenceLength(
const framework::Vector<size_t>& seq_offset) { const phi::Vector<size_t>& seq_offset) {
size_t seq_num = seq_offset.size() - 1; size_t seq_num = seq_offset.size() - 1;
size_t total_seq_len = 0; size_t total_seq_len = 0;
for (size_t i = 0; i < seq_num; ++i) { for (size_t i = 0; i < seq_num; ++i) {
...@@ -50,7 +50,7 @@ inline static size_t TotalSequenceLength( ...@@ -50,7 +50,7 @@ inline static size_t TotalSequenceLength(
inline static void CheckDims(const framework::DDim& seq_tensor_dims, inline static void CheckDims(const framework::DDim& seq_tensor_dims,
const framework::DDim& pad_tensor_dims, const framework::DDim& pad_tensor_dims,
const framework::Vector<size_t>& seq_offset, const phi::Vector<size_t>& seq_offset,
int64_t padded_seq_len, int64_t padded_seq_len,
int64_t step_width, int64_t step_width,
const PadLayout& layout) { const PadLayout& layout) {
......
...@@ -203,7 +203,7 @@ class SequencePoolFunctor<phi::GPUContext, T> { ...@@ -203,7 +203,7 @@ class SequencePoolFunctor<phi::GPUContext, T> {
const size_t item_dim = output->numel() / output->dims()[0]; const size_t item_dim = output->numel() / output->dims()[0];
dim3 threads(1024, 1); dim3 threads(1024, 1);
dim3 grid(std::max(static_cast<int>(lod.size()) - 1, 1), 1); dim3 grid(std::max(static_cast<int>(lod.size()) - 1, 1), 1);
paddle::framework::MixVector<size_t> mix_vector(&lod); phi::MixVector<size_t> mix_vector(&lod);
if (pooltype == "MAX") { if (pooltype == "MAX") {
sequence_pool_kernel<T, MaxPoolFunctor<T>> sequence_pool_kernel<T, MaxPoolFunctor<T>>
<<<grid, threads, 0, context.stream()>>>( <<<grid, threads, 0, context.stream()>>>(
...@@ -421,7 +421,7 @@ class SequencePoolGradFunctor<phi::GPUContext, T> { ...@@ -421,7 +421,7 @@ class SequencePoolGradFunctor<phi::GPUContext, T> {
const size_t item_dim = in_grad->numel() / in_grad->dims()[0]; const size_t item_dim = in_grad->numel() / in_grad->dims()[0];
dim3 threads(1024, 1); dim3 threads(1024, 1);
dim3 grid(std::max(static_cast<int>(lod.size()) - 1, 1), 1); dim3 grid(std::max(static_cast<int>(lod.size()) - 1, 1), 1);
paddle::framework::MixVector<size_t> mix_vector(&lod); phi::MixVector<size_t> mix_vector(&lod);
if (pooltype == "MAX") { if (pooltype == "MAX") {
sequence_pool_grad_kernel<T, MaxPoolGradFunctor<T>> sequence_pool_grad_kernel<T, MaxPoolGradFunctor<T>>
<<<grid, threads, 0, context.stream()>>>( <<<grid, threads, 0, context.stream()>>>(
......
...@@ -197,7 +197,7 @@ class FTRLOpKernel : public framework::OpKernel<T> { ...@@ -197,7 +197,7 @@ class FTRLOpKernel : public framework::OpKernel<T> {
ctx.template device_context<DeviceContext>(), *grad, merged_grad); ctx.template device_context<DeviceContext>(), *grad, merged_grad);
auto* merged_rows = merged_grad->mutable_rows(); auto* merged_rows = merged_grad->mutable_rows();
paddle::framework::MixVector<int64_t> mixv_merged_rows(merged_rows); phi::MixVector<int64_t> mixv_merged_rows(merged_rows);
const int64_t* rows = mixv_merged_rows.Data(ctx.GetPlace()); const int64_t* rows = mixv_merged_rows.Data(ctx.GetPlace());
auto row_numel = static_cast<int64_t>(merged_grad->value().dims()[1]); auto row_numel = static_cast<int64_t>(merged_grad->value().dims()[1]);
auto row_height = static_cast<int64_t>(merged_grad->rows().size()); auto row_height = static_cast<int64_t>(merged_grad->rows().size());
......
...@@ -164,7 +164,7 @@ class SGDOpKernel<phi::GPUContext, T> : public framework::OpKernel<T> { ...@@ -164,7 +164,7 @@ class SGDOpKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
int thread_x = kThreadsPerBlock; int thread_x = kThreadsPerBlock;
int max_threads = ctx.cuda_device_context().GetMaxPhysicalThreadCount(); int max_threads = ctx.cuda_device_context().GetMaxPhysicalThreadCount();
int max_blocks = std::max(max_threads / kThreadsPerBlock, 1); int max_blocks = std::max(max_threads / kThreadsPerBlock, 1);
paddle::framework::MixVector<int64_t> mixv_in_rows(&in_rows); phi::MixVector<int64_t> mixv_in_rows(&in_rows);
SparseSGDFunctorKernel<<<max_blocks, SparseSGDFunctorKernel<<<max_blocks,
thread_x, thread_x,
0, 0,
......
...@@ -153,7 +153,7 @@ class RowConvKernel<phi::CPUContext, T> : public framework::OpKernel<T> { ...@@ -153,7 +153,7 @@ class RowConvKernel<phi::CPUContext, T> : public framework::OpKernel<T> {
} else { } else {
batch_size = x->lod()[0].size() - 1; batch_size = x->lod()[0].size() - 1;
} }
framework::Vector<size_t> batch_indices(batch_size + 1); phi::Vector<size_t> batch_indices(batch_size + 1);
int input_dim = 0; int input_dim = 0;
int timesteps = 0; int timesteps = 0;
if (is_tensor) { if (is_tensor) {
...@@ -231,7 +231,7 @@ class RowConvGradKernel<phi::CPUContext, T> : public framework::OpKernel<T> { ...@@ -231,7 +231,7 @@ class RowConvGradKernel<phi::CPUContext, T> : public framework::OpKernel<T> {
} else { } else {
batch_size = x->lod()[0].size() - 1; batch_size = x->lod()[0].size() - 1;
} }
framework::Vector<size_t> batch_indices(batch_size + 1); phi::Vector<size_t> batch_indices(batch_size + 1);
int timesteps = 0; int timesteps = 0;
int input_dim = 0; int input_dim = 0;
if (is_tensor) { if (is_tensor) {
......
...@@ -338,7 +338,7 @@ class RowConvKernel<phi::GPUContext, T> : public framework::OpKernel<T> { ...@@ -338,7 +338,7 @@ class RowConvKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
batch_size = X->lod()[0].size() - 1; batch_size = X->lod()[0].size() - 1;
} }
int input_dim = 0; int input_dim = 0;
framework::Vector<size_t> batch_indices(batch_size + 1); phi::Vector<size_t> batch_indices(batch_size + 1);
int timesteps = X->dims()[1]; int timesteps = X->dims()[1];
if (is_tensor) { if (is_tensor) {
for (int i = 0; i < batch_size + 1; i++) { for (int i = 0; i < batch_size + 1; i++) {
...@@ -352,7 +352,7 @@ class RowConvKernel<phi::GPUContext, T> : public framework::OpKernel<T> { ...@@ -352,7 +352,7 @@ class RowConvKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
int num_sequence = batch_indices.size() - 1; int num_sequence = batch_indices.size() - 1;
int future_context = Filter->dims()[0]; int future_context = Filter->dims()[0];
paddle::framework::MixVector<size_t> mix_vector(&batch_indices); phi::MixVector<size_t> mix_vector(&batch_indices);
size_t *idx = mix_vector.CUDAMutableData(context.GetPlace()); size_t *idx = mix_vector.CUDAMutableData(context.GetPlace());
auto stream = context.cuda_device_context().stream(); auto stream = context.cuda_device_context().stream();
...@@ -397,7 +397,7 @@ class RowConvGradKernel<phi::GPUContext, T> : public framework::OpKernel<T> { ...@@ -397,7 +397,7 @@ class RowConvGradKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
} }
int input_dim = 0; int input_dim = 0;
framework::Vector<size_t> batch_indices(batch_size + 1); phi::Vector<size_t> batch_indices(batch_size + 1);
int timesteps = X->dims()[1]; int timesteps = X->dims()[1];
if (is_tensor) { if (is_tensor) {
for (int i = 0; i < batch_size + 1; i++) { for (int i = 0; i < batch_size + 1; i++) {
...@@ -411,7 +411,7 @@ class RowConvGradKernel<phi::GPUContext, T> : public framework::OpKernel<T> { ...@@ -411,7 +411,7 @@ class RowConvGradKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
// int input_dim = X->dims()[1]; // int input_dim = X->dims()[1];
int num_sequence = batch_indices.size() - 1; int num_sequence = batch_indices.size() - 1;
int future_context = Filter->dims()[0]; int future_context = Filter->dims()[0];
paddle::framework::MixVector<size_t> mixv_batch_indices(&batch_indices); phi::MixVector<size_t> mixv_batch_indices(&batch_indices);
size_t *idx = mixv_batch_indices.CUDAMutableData(context.GetPlace()); size_t *idx = mixv_batch_indices.CUDAMutableData(context.GetPlace());
auto &device_ctx = context.cuda_device_context(); auto &device_ctx = context.cuda_device_context();
......
...@@ -76,7 +76,7 @@ class SequenceEnumerateOpCUDAKernel : public framework::OpKernel<T> { ...@@ -76,7 +76,7 @@ class SequenceEnumerateOpCUDAKernel : public framework::OpKernel<T> {
out->Resize({in_dims[0], win_size}); out->Resize({in_dims[0], win_size});
auto out_data = out->mutable_data<T>(context.GetPlace()); auto out_data = out->mutable_data<T>(context.GetPlace());
// Copy LoD to GPU // Copy LoD to GPU
paddle::framework::MixVector<size_t> mixv_lod0(&lod0); phi::MixVector<size_t> mixv_lod0(&lod0);
const size_t* dev_in_lod_ptr = mixv_lod0.CUDAData(context.GetPlace()); const size_t* dev_in_lod_ptr = mixv_lod0.CUDAData(context.GetPlace());
// Calc output tensor // Calc output tensor
CalcOutPut<<<(in_len - 1) / PADDLE_CUDA_NUM_THREADS + 1, CalcOutPut<<<(in_len - 1) / PADDLE_CUDA_NUM_THREADS + 1,
......
...@@ -97,7 +97,7 @@ class SequenceEraseOpCUDAKernel : public framework::OpKernel<T> { ...@@ -97,7 +97,7 @@ class SequenceEraseOpCUDAKernel : public framework::OpKernel<T> {
// Copy LoD to GPU // Copy LoD to GPU
auto last_lod = lod[lod.size() - 1]; auto last_lod = lod[lod.size() - 1];
auto lod_len = last_lod.size(); auto lod_len = last_lod.size();
paddle::framework::MixVector<size_t> mixv_last_lod(&last_lod); phi::MixVector<size_t> mixv_last_lod(&last_lod);
const size_t* dev_in_lod_ptr = mixv_last_lod.CUDAData(ctx.GetPlace()); const size_t* dev_in_lod_ptr = mixv_last_lod.CUDAData(ctx.GetPlace());
// Calc output LoD // Calc output LoD
thrust::device_vector<size_t> dev_out_lod(lod_len); thrust::device_vector<size_t> dev_out_lod(lod_len);
......
...@@ -65,10 +65,9 @@ static __global__ void sequence_expand_as_grad_kernel( ...@@ -65,10 +65,9 @@ static __global__ void sequence_expand_as_grad_kernel(
template <typename T> template <typename T>
struct SequenceExpandAsFunctor<phi::GPUContext, T> { struct SequenceExpandAsFunctor<phi::GPUContext, T> {
void operator()( void operator()(const phi::GPUContext &context,
const phi::GPUContext &context,
const phi::DenseTensor &x, const phi::DenseTensor &x,
const framework::Vector<size_t> &ref_lod, /*expand referenced lod*/ const phi::Vector<size_t> &ref_lod, /*expand referenced lod*/
phi::DenseTensor *out) { phi::DenseTensor *out) {
int height = x.dims()[0]; int height = x.dims()[0];
int width = phi::product(x.dims()) / height; int width = phi::product(x.dims()) / height;
...@@ -84,7 +83,7 @@ struct SequenceExpandAsFunctor<phi::GPUContext, T> { ...@@ -84,7 +83,7 @@ struct SequenceExpandAsFunctor<phi::GPUContext, T> {
dim3 block_size(thread_x); dim3 block_size(thread_x);
dim3 grid_size(block_x); dim3 grid_size(block_x);
paddle::framework::MixVector<size_t> mixv_ref_lod(&ref_lod); phi::MixVector<size_t> mixv_ref_lod(&ref_lod);
sequence_expand_as_kernel<<<grid_size, block_size, 0, context.stream()>>>( sequence_expand_as_kernel<<<grid_size, block_size, 0, context.stream()>>>(
x.data<T>(), x.data<T>(),
mixv_ref_lod.CUDAData(context.GetPlace()), mixv_ref_lod.CUDAData(context.GetPlace()),
...@@ -98,7 +97,7 @@ template <typename T> ...@@ -98,7 +97,7 @@ template <typename T>
struct SequenceExpandAsGradFunctor<phi::GPUContext, T> { struct SequenceExpandAsGradFunctor<phi::GPUContext, T> {
void operator()(const phi::GPUContext &context, void operator()(const phi::GPUContext &context,
const phi::DenseTensor &dout, const phi::DenseTensor &dout,
const framework::Vector<size_t> &ref_lod, /*expand based lod*/ const phi::Vector<size_t> &ref_lod, /*expand based lod*/
phi::DenseTensor *dx) { phi::DenseTensor *dx) {
int height = dx->dims()[0]; int height = dx->dims()[0];
int width = phi::product(dx->dims()) / height; int width = phi::product(dx->dims()) / height;
...@@ -114,7 +113,7 @@ struct SequenceExpandAsGradFunctor<phi::GPUContext, T> { ...@@ -114,7 +113,7 @@ struct SequenceExpandAsGradFunctor<phi::GPUContext, T> {
dim3 block_size(thread_x); dim3 block_size(thread_x);
dim3 grid_size(block_x); dim3 grid_size(block_x);
paddle::framework::MixVector<size_t> mixv_ref_lod(&ref_lod); phi::MixVector<size_t> mixv_ref_lod(&ref_lod);
sequence_expand_as_grad_kernel<<<grid_size, sequence_expand_as_grad_kernel<<<grid_size,
block_size, block_size,
0, 0,
......
...@@ -26,28 +26,25 @@ namespace operators { ...@@ -26,28 +26,25 @@ namespace operators {
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
struct SequenceExpandAsFunctor { struct SequenceExpandAsFunctor {
void operator()( void operator()(const DeviceContext &ctx,
const DeviceContext &ctx,
const phi::DenseTensor &x, const phi::DenseTensor &x,
const framework::Vector<size_t> &ref_lod, /*expand referenced lod*/ const phi::Vector<size_t> &ref_lod, /*expand referenced lod*/
phi::DenseTensor *out); phi::DenseTensor *out);
}; };
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
struct SequenceExpandAsGradFunctor { struct SequenceExpandAsGradFunctor {
void operator()( void operator()(const DeviceContext &ctx,
const DeviceContext &ctx,
const phi::DenseTensor &dout, const phi::DenseTensor &dout,
const framework::Vector<size_t> &ref_lod, /*expand referenced lod*/ const phi::Vector<size_t> &ref_lod, /*expand referenced lod*/
phi::DenseTensor *dx); phi::DenseTensor *dx);
}; };
template <typename T> template <typename T>
struct SequenceExpandAsFunctor<phi::CPUContext, T> { struct SequenceExpandAsFunctor<phi::CPUContext, T> {
void operator()( void operator()(const phi::CPUContext &context,
const phi::CPUContext &context,
const phi::DenseTensor &x, const phi::DenseTensor &x,
const framework::Vector<size_t> &ref_lod, /*expand referenced lod*/ const phi::Vector<size_t> &ref_lod, /*expand referenced lod*/
phi::DenseTensor *out) { phi::DenseTensor *out) {
int64_t height = x.dims()[0]; int64_t height = x.dims()[0];
int64_t width = phi::product(x.dims()) / height; int64_t width = phi::product(x.dims()) / height;
...@@ -122,10 +119,9 @@ class SequenceExpandAsKernel : public framework::OpKernel<T> { ...@@ -122,10 +119,9 @@ class SequenceExpandAsKernel : public framework::OpKernel<T> {
* */ * */
template <typename T> template <typename T>
struct SequenceExpandAsGradFunctor<phi::CPUContext, T> { struct SequenceExpandAsGradFunctor<phi::CPUContext, T> {
void operator()( void operator()(const phi::CPUContext &context,
const phi::CPUContext &context,
const phi::DenseTensor &dout, const phi::DenseTensor &dout,
const framework::Vector<size_t> &ref_lod, /*expand referenced lod*/ const phi::Vector<size_t> &ref_lod, /*expand referenced lod*/
phi::DenseTensor *dx) { phi::DenseTensor *dx) {
int64_t height = dx->dims()[0]; int64_t height = dx->dims()[0];
int64_t width = phi::product(dx->dims()) / height; int64_t width = phi::product(dx->dims()) / height;
......
...@@ -82,9 +82,9 @@ __global__ void sequence_expand_grad_kernel(const T* dout_data, ...@@ -82,9 +82,9 @@ __global__ void sequence_expand_grad_kernel(const T* dout_data,
} }
} }
void GetOutputOffset(const framework::Vector<size_t>& x_lod, void GetOutputOffset(const phi::Vector<size_t>& x_lod,
const framework::Vector<size_t>& ref_lod, const phi::Vector<size_t>& ref_lod,
framework::Vector<size_t>* out_offset) { phi::Vector<size_t>* out_offset) {
size_t offset = 0; size_t offset = 0;
int lod_size = static_cast<int>(x_lod.size()); int lod_size = static_cast<int>(x_lod.size());
for (int i = 0; i < static_cast<int>(x_lod.size()); ++i) { for (int i = 0; i < static_cast<int>(x_lod.size()); ++i) {
...@@ -99,8 +99,8 @@ template <typename T> ...@@ -99,8 +99,8 @@ template <typename T>
static int ExpandByMemoryCopy(const phi::GPUContext& context, static int ExpandByMemoryCopy(const phi::GPUContext& context,
const LoDTensor& x, const LoDTensor& x,
LoDTensor* out, LoDTensor* out,
const framework::Vector<size_t>& x_lod, const phi::Vector<size_t>& x_lod,
const framework::Vector<size_t>& ref_lod, const phi::Vector<size_t>& ref_lod,
bool do_copy) { bool do_copy) {
auto out_data = out->data<T>(); auto out_data = out->data<T>();
auto x_data = x.data<T>(); auto x_data = x.data<T>();
...@@ -143,11 +143,10 @@ static int ExpandByMemoryCopy(const phi::GPUContext& context, ...@@ -143,11 +143,10 @@ static int ExpandByMemoryCopy(const phi::GPUContext& context,
template <typename T> template <typename T>
struct SequenceExpandFunctor<phi::GPUContext, T> { struct SequenceExpandFunctor<phi::GPUContext, T> {
void operator()( void operator()(const phi::GPUContext& context,
const phi::GPUContext& context,
const LoDTensor& x, const LoDTensor& x,
const framework::Vector<size_t>& x_lod, /*expand source lod*/ const phi::Vector<size_t>& x_lod, /*expand source lod*/
const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/ const phi::Vector<size_t>& ref_lod, /*expand referenced lod*/
LoDTensor* out) { LoDTensor* out) {
int num_copys = int num_copys =
ExpandByMemoryCopy<T>(context, x, out, x_lod, ref_lod, false); ExpandByMemoryCopy<T>(context, x, out, x_lod, ref_lod, false);
...@@ -157,7 +156,7 @@ struct SequenceExpandFunctor<phi::GPUContext, T> { ...@@ -157,7 +156,7 @@ struct SequenceExpandFunctor<phi::GPUContext, T> {
} else { } else {
int x_item_length = x.numel() / x.dims()[0]; int x_item_length = x.numel() / x.dims()[0];
size_t x_lod_size = x_lod.size(); size_t x_lod_size = x_lod.size();
framework::Vector<size_t> out_offset(x_lod_size * 2 + ref_lod.size()); phi::Vector<size_t> out_offset(x_lod_size * 2 + ref_lod.size());
GetOutputOffset(x_lod, ref_lod, &out_offset); GetOutputOffset(x_lod, ref_lod, &out_offset);
for (size_t i = 0; i < x_lod_size; ++i) { for (size_t i = 0; i < x_lod_size; ++i) {
...@@ -167,7 +166,7 @@ struct SequenceExpandFunctor<phi::GPUContext, T> { ...@@ -167,7 +166,7 @@ struct SequenceExpandFunctor<phi::GPUContext, T> {
out_offset[2 * x_lod_size + i] = ref_lod[i]; out_offset[2 * x_lod_size + i] = ref_lod[i];
} }
paddle::framework::MixVector<size_t> mixv_out_offset(&out_offset); phi::MixVector<size_t> mixv_out_offset(&out_offset);
const size_t* out_offset_data = const size_t* out_offset_data =
mixv_out_offset.CUDAData(context.GetPlace()); mixv_out_offset.CUDAData(context.GetPlace());
const size_t* x_lod_data = out_offset_data + x_lod_size; const size_t* x_lod_data = out_offset_data + x_lod_size;
...@@ -197,11 +196,11 @@ template <typename T> ...@@ -197,11 +196,11 @@ template <typename T>
struct SequenceExpandGradFunctor<phi::GPUContext, T> { struct SequenceExpandGradFunctor<phi::GPUContext, T> {
void operator()(const phi::GPUContext& context, void operator()(const phi::GPUContext& context,
const LoDTensor& dout, const LoDTensor& dout,
const framework::Vector<size_t>& x_lod, /*expand source lod*/ const phi::Vector<size_t>& x_lod, /*expand source lod*/
const framework::Vector<size_t>& ref_lod, /*expand based lod*/ const phi::Vector<size_t>& ref_lod, /*expand based lod*/
LoDTensor* dx) { LoDTensor* dx) {
int x_item_length = phi::product(dx->dims()) / dx->dims()[0]; int x_item_length = phi::product(dx->dims()) / dx->dims()[0];
framework::Vector<size_t> out_offset(x_lod.size()); phi::Vector<size_t> out_offset(x_lod.size());
GetOutputOffset(x_lod, ref_lod, &out_offset); GetOutputOffset(x_lod, ref_lod, &out_offset);
int thread_x = std::min(32, std::max(static_cast<int>(ref_lod.size()), 16)); int thread_x = std::min(32, std::max(static_cast<int>(ref_lod.size()), 16));
...@@ -210,9 +209,9 @@ struct SequenceExpandGradFunctor<phi::GPUContext, T> { ...@@ -210,9 +209,9 @@ struct SequenceExpandGradFunctor<phi::GPUContext, T> {
int block_x = static_cast<int>(ref_lod.size()); int block_x = static_cast<int>(ref_lod.size());
dim3 block_size(thread_x, thread_y, thread_z); dim3 block_size(thread_x, thread_y, thread_z);
dim3 grid_size(block_x, 1); dim3 grid_size(block_x, 1);
paddle::framework::MixVector<size_t> mixv_ref_lod(&ref_lod); phi::MixVector<size_t> mixv_ref_lod(&ref_lod);
paddle::framework::MixVector<size_t> mixv_x_lod(&x_lod); phi::MixVector<size_t> mixv_x_lod(&x_lod);
paddle::framework::MixVector<size_t> mixv_out_offset(&out_offset); phi::MixVector<size_t> mixv_out_offset(&out_offset);
sequence_expand_grad_kernel<<<grid_size, block_size, 0, context.stream()>>>( sequence_expand_grad_kernel<<<grid_size, block_size, 0, context.stream()>>>(
dout.data<T>(), dout.data<T>(),
mixv_ref_lod.CUDAData(context.GetPlace()), mixv_ref_lod.CUDAData(context.GetPlace()),
......
...@@ -29,31 +29,28 @@ using EigenMatrix = phi::EigenMatrix<T, MajorType, IndexType>; ...@@ -29,31 +29,28 @@ using EigenMatrix = phi::EigenMatrix<T, MajorType, IndexType>;
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
struct SequenceExpandFunctor { struct SequenceExpandFunctor {
void operator()( void operator()(const DeviceContext& ctx,
const DeviceContext& ctx,
const phi::DenseTensor& x, const phi::DenseTensor& x,
const framework::Vector<size_t>& x_lod, /*expand source lod*/ const phi::Vector<size_t>& x_lod, /*expand source lod*/
const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/ const phi::Vector<size_t>& ref_lod, /*expand referenced lod*/
phi::DenseTensor* out); phi::DenseTensor* out);
}; };
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
struct SequenceExpandGradFunctor { struct SequenceExpandGradFunctor {
void operator()( void operator()(const DeviceContext& ctx,
const DeviceContext& ctx,
const phi::DenseTensor& dout, const phi::DenseTensor& dout,
const framework::Vector<size_t>& x_lod, /*expand source lod*/ const phi::Vector<size_t>& x_lod, /*expand source lod*/
const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/ const phi::Vector<size_t>& ref_lod, /*expand referenced lod*/
phi::DenseTensor* dx); phi::DenseTensor* dx);
}; };
template <typename T> template <typename T>
struct SequenceExpandFunctor<phi::CPUContext, T> { struct SequenceExpandFunctor<phi::CPUContext, T> {
void operator()( void operator()(const phi::CPUContext& context,
const phi::CPUContext& context,
const phi::DenseTensor& x, const phi::DenseTensor& x,
const framework::Vector<size_t>& x_lod, /*expand source lod*/ const phi::Vector<size_t>& x_lod, /*expand source lod*/
const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/ const phi::Vector<size_t>& ref_lod, /*expand referenced lod*/
phi::DenseTensor* out) { phi::DenseTensor* out) {
int out_offset = 0; int out_offset = 0;
int x_item_length = x.numel() / x.dims()[0]; int x_item_length = x.numel() / x.dims()[0];
...@@ -112,7 +109,7 @@ class SequenceExpandKernel : public framework::OpKernel<T> { ...@@ -112,7 +109,7 @@ class SequenceExpandKernel : public framework::OpKernel<T> {
} }
// x lod level is at most 1. // x lod level is at most 1.
framework::Vector<size_t> out_lod; phi::Vector<size_t> out_lod;
if (x_lod.size() == 1) { if (x_lod.size() == 1) {
out_lod.push_back(0); out_lod.push_back(0);
int out_offset = 0; int out_offset = 0;
...@@ -130,7 +127,7 @@ class SequenceExpandKernel : public framework::OpKernel<T> { ...@@ -130,7 +127,7 @@ class SequenceExpandKernel : public framework::OpKernel<T> {
auto& ref_lod = *out->mutable_lod(); auto& ref_lod = *out->mutable_lod();
ref_lod[0] = out_lod; ref_lod[0] = out_lod;
} }
framework::Vector<size_t> ref_x_lod; phi::Vector<size_t> ref_x_lod;
if (x->lod().size() == 1) { if (x->lod().size() == 1) {
ref_x_lod = x->lod()[0]; ref_x_lod = x->lod()[0];
} else { } else {
...@@ -161,11 +158,10 @@ class SequenceExpandKernel : public framework::OpKernel<T> { ...@@ -161,11 +158,10 @@ class SequenceExpandKernel : public framework::OpKernel<T> {
* */ * */
template <typename T> template <typename T>
struct SequenceExpandGradFunctor<phi::CPUContext, T> { struct SequenceExpandGradFunctor<phi::CPUContext, T> {
void operator()( void operator()(const phi::CPUContext& context,
const phi::CPUContext& context,
const phi::DenseTensor& dout, const phi::DenseTensor& dout,
const framework::Vector<size_t>& x_lod, /*expand source lod*/ const phi::Vector<size_t>& x_lod, /*expand source lod*/
const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/ const phi::Vector<size_t>& ref_lod, /*expand referenced lod*/
phi::DenseTensor* dx) { phi::DenseTensor* dx) {
int dout_offset = 0; int dout_offset = 0;
for (size_t i = 1; i < ref_lod.size(); ++i) { for (size_t i = 1; i < ref_lod.size(); ++i) {
...@@ -214,8 +210,8 @@ class SequenceExpandGradKernel : public framework::OpKernel<T> { ...@@ -214,8 +210,8 @@ class SequenceExpandGradKernel : public framework::OpKernel<T> {
return; return;
} }
framework::Vector<size_t> ref_x_lod; phi::Vector<size_t> ref_x_lod;
framework::Vector<size_t> ref_lod = y_lod[ref_level]; phi::Vector<size_t> ref_lod = y_lod[ref_level];
if (x->lod().size() == 1) { if (x->lod().size() == 1) {
ref_x_lod = x->lod()[0]; ref_x_lod = x->lod()[0];
} else { } else {
......
...@@ -139,7 +139,7 @@ class SequenceReverseOpKernel : public framework::OpKernel<T> { ...@@ -139,7 +139,7 @@ class SequenceReverseOpKernel : public framework::OpKernel<T> {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if (platform::is_gpu_place(ctx.GetPlace())) { if (platform::is_gpu_place(ctx.GetPlace())) {
auto xlod = x.lod()[0]; auto xlod = x.lod()[0];
paddle::framework::MixVector<size_t> mixv_xlod(&xlod); phi::MixVector<size_t> mixv_xlod(&xlod);
lod = mixv_xlod.CUDAData(ctx.GetPlace()); lod = mixv_xlod.CUDAData(ctx.GetPlace());
} else { } else {
#endif #endif
......
...@@ -124,7 +124,7 @@ template <typename T> ...@@ -124,7 +124,7 @@ template <typename T>
struct SequenceSoftmaxFunctor<phi::GPUContext, T> { struct SequenceSoftmaxFunctor<phi::GPUContext, T> {
void operator()(const phi::GPUContext &context, void operator()(const phi::GPUContext &context,
const LoDTensor &x, const LoDTensor &x,
const framework::Vector<size_t> &ref_lod, /*referenced lod*/ const phi::Vector<size_t> &ref_lod, /*referenced lod*/
LoDTensor *out) { LoDTensor *out) {
int height = ref_lod.size() - 1; int height = ref_lod.size() - 1;
...@@ -135,7 +135,7 @@ struct SequenceSoftmaxFunctor<phi::GPUContext, T> { ...@@ -135,7 +135,7 @@ struct SequenceSoftmaxFunctor<phi::GPUContext, T> {
dim3 block_size(thread_x); dim3 block_size(thread_x);
dim3 grid_size(max_blocks); dim3 grid_size(max_blocks);
paddle::framework::MixVector<size_t> mixv_ref_lod(&ref_lod); phi::MixVector<size_t> mixv_ref_lod(&ref_lod);
sequence_softmax_kernel<T, kThreadsPerBlock> sequence_softmax_kernel<T, kThreadsPerBlock>
<<<grid_size, block_size, 0, context.stream()>>>( <<<grid_size, block_size, 0, context.stream()>>>(
x.data<T>(), x.data<T>(),
...@@ -150,7 +150,7 @@ struct SequenceSoftmaxGradFunctor<phi::GPUContext, T> { ...@@ -150,7 +150,7 @@ struct SequenceSoftmaxGradFunctor<phi::GPUContext, T> {
void operator()(const phi::GPUContext &context, void operator()(const phi::GPUContext &context,
const LoDTensor &dout, const LoDTensor &dout,
const LoDTensor &out, const LoDTensor &out,
const framework::Vector<size_t> &ref_lod, /*referenced lod*/ const phi::Vector<size_t> &ref_lod, /*referenced lod*/
LoDTensor *dx) { LoDTensor *dx) {
size_t height = ref_lod.size() - 1; size_t height = ref_lod.size() - 1;
...@@ -162,7 +162,7 @@ struct SequenceSoftmaxGradFunctor<phi::GPUContext, T> { ...@@ -162,7 +162,7 @@ struct SequenceSoftmaxGradFunctor<phi::GPUContext, T> {
dim3 block_size(thread_x); dim3 block_size(thread_x);
dim3 grid_size(max_blocks); dim3 grid_size(max_blocks);
paddle::framework::MixVector<size_t> mixv_ref_lod(&ref_lod); phi::MixVector<size_t> mixv_ref_lod(&ref_lod);
sequence_softmax_grad_kernel<T, kThreadsPerBlock> sequence_softmax_grad_kernel<T, kThreadsPerBlock>
<<<grid_size, block_size, 0, context.stream()>>>( <<<grid_size, block_size, 0, context.stream()>>>(
dout.data<T>(), dout.data<T>(),
......
...@@ -21,10 +21,9 @@ namespace operators { ...@@ -21,10 +21,9 @@ namespace operators {
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
struct SequenceSoftmaxFunctor { struct SequenceSoftmaxFunctor {
void operator()( void operator()(const DeviceContext &ctx,
const DeviceContext &ctx,
const phi::DenseTensor &x, const phi::DenseTensor &x,
const framework::Vector<size_t> &ref_lod, /*expand referenced lod*/ const phi::Vector<size_t> &ref_lod, /*expand referenced lod*/
phi::DenseTensor *out); phi::DenseTensor *out);
}; };
...@@ -33,7 +32,7 @@ struct SequenceSoftmaxGradFunctor { ...@@ -33,7 +32,7 @@ struct SequenceSoftmaxGradFunctor {
void operator()(const DeviceContext &ctx, void operator()(const DeviceContext &ctx,
const phi::DenseTensor &dout, const phi::DenseTensor &dout,
const phi::DenseTensor &out, const phi::DenseTensor &out,
const framework::Vector<size_t> &ref_lod, /*referenced lod*/ const phi::Vector<size_t> &ref_lod, /*referenced lod*/
phi::DenseTensor *dx); phi::DenseTensor *dx);
}; };
...@@ -41,7 +40,7 @@ template <typename T> ...@@ -41,7 +40,7 @@ template <typename T>
struct SequenceSoftmaxFunctor<phi::CPUContext, T> { struct SequenceSoftmaxFunctor<phi::CPUContext, T> {
void operator()(const phi::CPUContext &ctx, void operator()(const phi::CPUContext &ctx,
const phi::DenseTensor &x, const phi::DenseTensor &x,
const framework::Vector<size_t> &ref_lod, /*referenced lod*/ const phi::Vector<size_t> &ref_lod, /*referenced lod*/
phi::DenseTensor *out) { phi::DenseTensor *out) {
size_t height = ref_lod.size() - 1; size_t height = ref_lod.size() - 1;
const T *in_data = x.data<T>(); const T *in_data = x.data<T>();
...@@ -64,7 +63,7 @@ struct SequenceSoftmaxGradFunctor<phi::CPUContext, T> { ...@@ -64,7 +63,7 @@ struct SequenceSoftmaxGradFunctor<phi::CPUContext, T> {
void operator()(const phi::CPUContext &ctx, void operator()(const phi::CPUContext &ctx,
const phi::DenseTensor &dout, const phi::DenseTensor &dout,
const phi::DenseTensor &out, const phi::DenseTensor &out,
const framework::Vector<size_t> &ref_lod, /*referenced lod*/ const phi::Vector<size_t> &ref_lod, /*referenced lod*/
phi::DenseTensor *dx) { phi::DenseTensor *dx) {
size_t height = ref_lod.size() - 1; size_t height = ref_lod.size() - 1;
......
...@@ -116,7 +116,7 @@ class SequenceTopkAvgPoolingKernel : public framework::OpKernel<T> { ...@@ -116,7 +116,7 @@ class SequenceTopkAvgPoolingKernel : public framework::OpKernel<T> {
auto pos_data = pos->mutable_data<int>(context.GetPlace()); auto pos_data = pos->mutable_data<int>(context.GetPlace());
int offset = 0; int offset = 0;
framework::Vector<size_t> vec_out_lod; phi::Vector<size_t> vec_out_lod;
vec_out_lod.reserve(batch_size + 1); vec_out_lod.reserve(batch_size + 1);
for (int i = 0; i <= batch_size; ++i) { for (int i = 0; i <= batch_size; ++i) {
offset = row_lod[i]; offset = row_lod[i];
......
...@@ -25,16 +25,16 @@ ...@@ -25,16 +25,16 @@
#include "glog/logging.h" #include "glog/logging.h"
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/timer.h" #include "paddle/fluid/platform/timer.h"
#include "paddle/phi/core/mixed_vector.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename T> template <typename T>
using Vector = framework::Vector<T>; using Vector = phi::Vector<T>;
template <typename T> template <typename T>
class ShuffleBatchKernel : public framework::OpKernel<T> { class ShuffleBatchKernel : public framework::OpKernel<T> {
......
...@@ -22,8 +22,8 @@ ...@@ -22,8 +22,8 @@
#include <vector> #include <vector>
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/core/mixed_vector.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -22,9 +22,9 @@ ...@@ -22,9 +22,9 @@
#include <vector> #include <vector>
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/sampler.h" #include "paddle/fluid/operators/math/sampler.h"
#include "paddle/phi/core/mixed_vector.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -1095,7 +1095,7 @@ void BindTensor(pybind11::module &m) { // NOLINT ...@@ -1095,7 +1095,7 @@ void BindTensor(pybind11::module &m) { // NOLINT
#if !defined(PADDLE_WITH_CUDA) && !defined(PADDLE_WITH_HIP) #if !defined(PADDLE_WITH_CUDA) && !defined(PADDLE_WITH_HIP)
self.set_rows(rows); self.set_rows(rows);
#else #else
Vector<int64_t> new_rows(rows); std::vector<int64_t> new_rows(rows);
self.set_rows(new_rows); self.set_rows(new_rows);
#endif #endif
}) })
......
...@@ -114,6 +114,11 @@ cc_library( ...@@ -114,6 +114,11 @@ cc_library(
SRCS custom_kernel.cc SRCS custom_kernel.cc
DEPS kernel_factory) DEPS kernel_factory)
cc_library(
mixed_vector
SRCS mixed_vector.cc
DEPS device_context place memory)
# Will remove once we implemented MKLDNN_Tensor # Will remove once we implemented MKLDNN_Tensor
if(WITH_MKLDNN) if(WITH_MKLDNN)
add_dependencies(dense_tensor mkldnn) add_dependencies(dense_tensor mkldnn)
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/phi/core/mixed_vector.h"
#include <algorithm> #include <algorithm>
#include <initializer_list> #include <initializer_list>
...@@ -22,28 +22,26 @@ limitations under the License. */ ...@@ -22,28 +22,26 @@ limitations under the License. */
#include <vector> #include <vector>
#include "glog/logging.h" #include "glog/logging.h"
#include "paddle/fluid/framework/details/cow_ptr.h"
#include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/phi/backends/all_context.h"
#include "paddle/utils/none.h" #include "paddle/utils/none.h"
#include "paddle/utils/optional.h" #include "paddle/utils/optional.h"
namespace paddle { namespace phi {
namespace framework {
template <typename T> template <typename T>
void CopyToCPUHelper(std::vector<T> *cpu_, void CopyToCPUHelper(std::vector<T> *cpu_,
paddle::memory::AllocationPtr *gpu_, phi::Allocator::AllocationPtr *gpu_,
size_t *gpu_memory_size_) { size_t *gpu_memory_size_) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
// COPY GPU Data To CPU // COPY GPU Data To CPU
auto *dev_ctx = static_cast<phi::GPUContext *>( auto *dev_ctx = static_cast<phi::GPUContext *>(
platform::DeviceContextPool::Instance().Get((*gpu_)->place())); phi::DeviceContextPool::Instance().Get((*gpu_)->place()));
auto stream = dev_ctx->stream(); auto stream = dev_ctx->stream();
void *src = (*gpu_)->ptr(); void *src = (*gpu_)->ptr();
void *dst = cpu_->data(); void *dst = cpu_->data();
paddle::memory::Copy(platform::CPUPlace(), paddle::memory::Copy(phi::CPUPlace(),
dst, dst,
OptionalCUDAPlace(*gpu_).get(), OptionalCUDAPlace(*gpu_).get(),
src, src,
...@@ -55,20 +53,20 @@ void CopyToCPUHelper(std::vector<T> *cpu_, ...@@ -55,20 +53,20 @@ void CopyToCPUHelper(std::vector<T> *cpu_,
template <typename T> template <typename T>
void CopyCPUDataToCUDAHelper(std::vector<T> *cpu_, void CopyCPUDataToCUDAHelper(std::vector<T> *cpu_,
paddle::memory::AllocationPtr *gpu_, phi::Allocator::AllocationPtr *gpu_,
size_t *gpu_memory_size_, size_t *gpu_memory_size_,
const platform::Place &place) { const phi::Place &place) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
void *src = cpu_->data(); void *src = cpu_->data();
*gpu_memory_size_ = cpu_->size() * sizeof(T); // sizeof(T) *gpu_memory_size_ = cpu_->size() * sizeof(T); // sizeof(T)
(*gpu_) = memory::Alloc(place, *gpu_memory_size_); (*gpu_) = paddle::memory::Alloc(place, *gpu_memory_size_);
void *dst = (*gpu_)->ptr(); void *dst = (*gpu_)->ptr();
auto *dev_ctx = static_cast<phi::GPUContext *>( auto *dev_ctx = static_cast<phi::GPUContext *>(
platform::DeviceContextPool::Instance().Get(place)); phi::DeviceContextPool::Instance().Get(place));
auto stream = dev_ctx->stream(); auto stream = dev_ctx->stream();
paddle::memory::Copy(OptionalCUDAPlace(*gpu_).get(), paddle::memory::Copy(OptionalCUDAPlace(*gpu_).get(),
dst, dst,
platform::CPUPlace(), phi::CPUPlace(),
src, src,
*gpu_memory_size_, *gpu_memory_size_,
stream); stream);
...@@ -84,7 +82,7 @@ void CopyCPUDataToCUDAHelper(std::vector<T> *cpu_, ...@@ -84,7 +82,7 @@ void CopyCPUDataToCUDAHelper(std::vector<T> *cpu_,
\ \
template <> \ template <> \
void MixVector<__TYPE__>::VectorData::CopyCPUDataToCUDA( \ void MixVector<__TYPE__>::VectorData::CopyCPUDataToCUDA( \
const platform::Place &place) const { \ const phi::Place &place) const { \
CopyCPUDataToCUDAHelper<__TYPE__>(cpu_, &gpu_, &gpu_memory_size_, place); \ CopyCPUDataToCUDAHelper<__TYPE__>(cpu_, &gpu_, &gpu_memory_size_, place); \
} }
...@@ -92,5 +90,4 @@ INSTANTIATE_VECTOR_FOR_TYPE(size_t) ...@@ -92,5 +90,4 @@ INSTANTIATE_VECTOR_FOR_TYPE(size_t)
INSTANTIATE_VECTOR_FOR_TYPE(int) INSTANTIATE_VECTOR_FOR_TYPE(int)
INSTANTIATE_VECTOR_FOR_TYPE(int64_t) INSTANTIATE_VECTOR_FOR_TYPE(int64_t)
}; // namespace framework }; // namespace phi
} // namespace paddle
...@@ -22,20 +22,22 @@ limitations under the License. */ ...@@ -22,20 +22,22 @@ limitations under the License. */
#include <vector> #include <vector>
#include "glog/logging.h" #include "glog/logging.h"
#include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/phi/common/place.h"
#include "paddle/phi/core/allocator.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/errors.h"
#include "paddle/utils/none.h" #include "paddle/utils/none.h"
#include "paddle/utils/optional.h" #include "paddle/utils/optional.h"
namespace paddle { namespace phi {
namespace framework {
template <class T> template <class T>
using Vector = std::vector<T>; using Vector = std::vector<T>;
inline paddle::optional<platform::CUDAPlace> OptionalCUDAPlace( inline paddle::optional<phi::GPUPlace> OptionalCUDAPlace(
const paddle::memory::allocation::AllocationPtr &gpu_) { const phi::Allocator::AllocationPtr &gpu_) {
return gpu_ == nullptr ? paddle::none return gpu_ == nullptr ? paddle::none
: paddle::optional<platform::CUDAPlace>(gpu_->place()); : paddle::optional<phi::GPUPlace>(gpu_->place());
} }
// Vector<T> implements the std::vector interface, and can get Data or // Vector<T> implements the std::vector interface, and can get Data or
...@@ -146,18 +148,18 @@ class MixVector { ...@@ -146,18 +148,18 @@ class MixVector {
} }
// get cuda ptr. immutable // get cuda ptr. immutable
const T *CUDAData(platform::Place place) const { const T *CUDAData(phi::Place place) const {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
platform::is_gpu_place(place), place.GetType() == phi::AllocationType::GPU,
true, true,
platform::errors::Unavailable( phi::errors::Unavailable(
"Place mismatch, CUDA Data must be on CUDA place.")); "Place mismatch, CUDA Data must be on CUDA place."));
ImmutableCUDA(place); ImmutableCUDA(place);
return reinterpret_cast<T *>(gpu_->ptr()); return reinterpret_cast<T *>(gpu_->ptr());
} }
// get cuda ptr. mutable // get cuda ptr. mutable
T *CUDAMutableData(platform::Place place) { T *CUDAMutableData(phi::Place place) {
const T *ptr = CUDAData(place); const T *ptr = CUDAData(place);
flag_ = kDirty | kDataInCUDA; flag_ = kDirty | kDataInCUDA;
return const_cast<T *>(ptr); return const_cast<T *>(ptr);
...@@ -178,7 +180,7 @@ class MixVector { ...@@ -178,7 +180,7 @@ class MixVector {
std::mutex &Mutex() const { return mtx_; } std::mutex &Mutex() const { return mtx_; }
paddle::optional<platform::CUDAPlace> CUDAPlace() const { paddle::optional<phi::GPUPlace> CUDAPlace() const {
return OptionalCUDAPlace(gpu_); return OptionalCUDAPlace(gpu_);
} }
...@@ -199,7 +201,7 @@ class MixVector { ...@@ -199,7 +201,7 @@ class MixVector {
void CopyToCPU() const; void CopyToCPU() const;
void ImmutableCUDA(platform::Place place) const { void ImmutableCUDA(phi::Place place) const {
if (IsDirty()) { if (IsDirty()) {
if (IsInCPU()) { if (IsInCPU()) {
CopyCPUDataToCUDA(place); CopyCPUDataToCUDA(place);
...@@ -207,7 +209,7 @@ class MixVector { ...@@ -207,7 +209,7 @@ class MixVector {
SetFlag(kDataInCUDA); SetFlag(kDataInCUDA);
} else if (IsInCUDA() && !(place == gpu_->place())) { } else if (IsInCUDA() && !(place == gpu_->place())) {
PADDLE_THROW( PADDLE_THROW(
platform::errors::Unavailable("Unexpected data place mismatch.")); phi::errors::Unavailable("Unexpected data place mismatch."));
// Still dirty // Still dirty
} else { } else {
// Dirty && DataInCUDA && Device is same // Dirty && DataInCUDA && Device is same
...@@ -220,7 +222,7 @@ class MixVector { ...@@ -220,7 +222,7 @@ class MixVector {
SetFlag(kDataInCUDA); SetFlag(kDataInCUDA);
} else if (!(place == gpu_->place())) { } else if (!(place == gpu_->place())) {
PADDLE_THROW( PADDLE_THROW(
platform::errors::Unavailable("Unexpected data place mismatch.")); phi::errors::Unavailable("Unexpected data place mismatch."));
} else { } else {
// Not Dirty && DataInCUDA && Device is same // Not Dirty && DataInCUDA && Device is same
// Do nothing. // Do nothing.
...@@ -228,7 +230,7 @@ class MixVector { ...@@ -228,7 +230,7 @@ class MixVector {
} }
} }
void CopyCPUDataToCUDA(const platform::Place &place) const; void CopyCPUDataToCUDA(const phi::Place &place) const;
void ImmutableCPU() const { void ImmutableCPU() const {
if (IsDirty() && !IsInCPU()) { // If data has been changed in CUDA, or if (IsDirty() && !IsInCPU()) { // If data has been changed in CUDA, or
...@@ -249,7 +251,7 @@ class MixVector { ...@@ -249,7 +251,7 @@ class MixVector {
bool IsInCPU() const { return flag_ & kDataInCPU; } bool IsInCPU() const { return flag_ & kDataInCPU; }
std::vector<T> *cpu_; std::vector<T> *cpu_;
mutable paddle::memory::allocation::AllocationPtr gpu_; mutable phi::Allocator::AllocationPtr gpu_;
mutable size_t gpu_memory_size_{0}; mutable size_t gpu_memory_size_{0};
mutable int flag_; mutable int flag_;
...@@ -332,9 +334,9 @@ class MixVector { ...@@ -332,9 +334,9 @@ class MixVector {
} }
// get cuda ptr. immutable // get cuda ptr. immutable
const T *CUDAData(platform::Place place) const { const T *CUDAData(phi::Place place) const {
{ {
platform::CUDAPlace p(place.GetDeviceId()); phi::GPUPlace p(place.GetDeviceId());
auto &mtx = m_->Mutex(); auto &mtx = m_->Mutex();
std::lock_guard<std::mutex> guard(mtx); std::lock_guard<std::mutex> guard(mtx);
auto cuda_place = m_->CUDAPlace(); auto cuda_place = m_->CUDAPlace();
...@@ -348,9 +350,9 @@ class MixVector { ...@@ -348,9 +350,9 @@ class MixVector {
} }
// get cuda ptr. mutable // get cuda ptr. mutable
T *CUDAMutableData(platform::Place place) { T *CUDAMutableData(phi::Place place) {
{ {
platform::CUDAPlace p(place.GetDeviceId()); phi::GPUPlace p(place.GetDeviceId());
auto &mtx = m_->Mutex(); auto &mtx = m_->Mutex();
std::lock_guard<std::mutex> guard(mtx); std::lock_guard<std::mutex> guard(mtx);
auto cuda_place = m_->CUDAPlace(); auto cuda_place = m_->CUDAPlace();
...@@ -372,8 +374,8 @@ class MixVector { ...@@ -372,8 +374,8 @@ class MixVector {
void reserve(size_t size) { m_->reserve(size); } void reserve(size_t size) { m_->reserve(size); }
// the unify method to access CPU or CUDA data. immutable. // the unify method to access CPU or CUDA data. immutable.
const T *Data(platform::Place place) const { const T *Data(phi::Place place) const {
if (platform::is_gpu_place(place)) { if (place.GetType() == phi::AllocationType::GPU) {
return CUDAData(place); return CUDAData(place);
} else { } else {
return data(); return data();
...@@ -381,8 +383,8 @@ class MixVector { ...@@ -381,8 +383,8 @@ class MixVector {
} }
// the unify method to access CPU or CUDA data. mutable. // the unify method to access CPU or CUDA data. mutable.
T *MutableData(platform::Place place) { T *MutableData(phi::Place place) {
if (platform::is_gpu_place(place)) { if (place.GetType() == phi::AllocationType::GPU) {
return CUDAMutableData(place); return CUDAMutableData(place);
} else { } else {
return data(); return data();
...@@ -397,5 +399,4 @@ class MixVector { ...@@ -397,5 +399,4 @@ class MixVector {
mutable std::unique_ptr<VectorData> m_; mutable std::unique_ptr<VectorData> m_;
}; };
}; // namespace framework }; // namespace phi
} // namespace paddle
...@@ -14,10 +14,10 @@ ...@@ -14,10 +14,10 @@
#include "paddle/phi/kernels/edit_distance_kernel.h" #include "paddle/phi/kernels/edit_distance_kernel.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/common/complex.h" #include "paddle/phi/common/complex.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/mixed_vector.h"
#include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/common.h"
namespace phi { namespace phi {
...@@ -34,8 +34,8 @@ void EditDistanceKernel(const Context& ctx, ...@@ -34,8 +34,8 @@ void EditDistanceKernel(const Context& ctx,
int64_t* seq_num_data = ctx.template Alloc<int64_t>(sequencenum); int64_t* seq_num_data = ctx.template Alloc<int64_t>(sequencenum);
auto batch_size = hyps.dims()[0]; auto batch_size = hyps.dims()[0];
paddle::framework::Vector<size_t> hyp_lod(batch_size + 1); phi::Vector<size_t> hyp_lod(batch_size + 1);
paddle::framework::Vector<size_t> ref_lod(batch_size + 1); phi::Vector<size_t> ref_lod(batch_size + 1);
bool use_length = hypslength.get_ptr() != nullptr; bool use_length = hypslength.get_ptr() != nullptr;
......
...@@ -14,8 +14,8 @@ limitations under the License. */ ...@@ -14,8 +14,8 @@ limitations under the License. */
#include "paddle/phi/kernels/funcs/selected_rows_functor.h" #include "paddle/phi/kernels/funcs/selected_rows_functor.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/platform/device/device_wrapper.h" #include "paddle/fluid/platform/device/device_wrapper.h"
#include "paddle/phi/core/mixed_vector.h"
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
#include "paddle/phi/backends/onednn/axpy_handler.h" #include "paddle/phi/backends/onednn/axpy_handler.h"
...@@ -200,7 +200,7 @@ struct SelectedRowsAddTo<phi::CPUContext, T> { ...@@ -200,7 +200,7 @@ struct SelectedRowsAddTo<phi::CPUContext, T> {
auto* in2_value = input2->mutable_value(); auto* in2_value = input2->mutable_value();
// concat rows // concat rows
paddle::framework::MixVector<int64_t> mixv_in2_rows(&in2_rows); phi::MixVector<int64_t> mixv_in2_rows(&in2_rows);
mixv_in2_rows.Extend(in1_rows.begin(), in1_rows.end()); mixv_in2_rows.Extend(in1_rows.begin(), in1_rows.end());
auto in1_place = input1.place(); auto in1_place = input1.place();
...@@ -254,7 +254,7 @@ struct SelectedRowsSumTo<phi::CPUContext, T> { ...@@ -254,7 +254,7 @@ struct SelectedRowsSumTo<phi::CPUContext, T> {
std::vector<int64_t> in2_rows; std::vector<int64_t> in2_rows;
in2_rows.reserve(in2_rows.size() + size); in2_rows.reserve(in2_rows.size() + size);
for (auto iter = input1.begin(); iter != input1.end(); ++iter) { for (auto iter = input1.begin(); iter != input1.end(); ++iter) {
const paddle::framework::Vector<int64_t>& in_rows = (*iter)->rows(); const phi::Vector<int64_t>& in_rows = (*iter)->rows();
in2_rows.insert(in2_rows.end(), in_rows.begin(), in_rows.end()); in2_rows.insert(in2_rows.end(), in_rows.begin(), in_rows.end());
} }
input2->set_rows(in2_rows); input2->set_rows(in2_rows);
...@@ -646,7 +646,7 @@ struct MergeAdd<phi::XPUContext, T> { ...@@ -646,7 +646,7 @@ struct MergeAdd<phi::XPUContext, T> {
const phi::SelectedRows& input, const phi::SelectedRows& input,
phi::SelectedRows* output, phi::SelectedRows* output,
const bool sorted_result = false) { const bool sorted_result = false) {
paddle::framework::Vector<int64_t> input_rows(input.rows()); phi::Vector<int64_t> input_rows(input.rows());
if (input_rows.size() == 0) { if (input_rows.size() == 0) {
return; return;
} }
......
...@@ -40,7 +40,7 @@ struct SelectedRowsAdd<phi::GPUContext, T> { ...@@ -40,7 +40,7 @@ struct SelectedRowsAdd<phi::GPUContext, T> {
input2.height())); input2.height()));
output->set_height(in1_height); output->set_height(in1_height);
paddle::framework::Vector<int64_t> in1_rows(input1.rows()); phi::Vector<int64_t> in1_rows(input1.rows());
auto& in2_rows = input2.rows(); auto& in2_rows = input2.rows();
std::vector<int64_t> out_rows; std::vector<int64_t> out_rows;
out_rows.reserve(in1_rows.size() + in2_rows.size()); out_rows.reserve(in1_rows.size() + in2_rows.size());
...@@ -189,7 +189,7 @@ struct SelectedRowsAddTensor<phi::GPUContext, T> { ...@@ -189,7 +189,7 @@ struct SelectedRowsAddTensor<phi::GPUContext, T> {
const int block_size = 256; const int block_size = 256;
dim3 threads(block_size, 1); dim3 threads(block_size, 1);
dim3 grid(in1_rows.size(), 1); dim3 grid(in1_rows.size(), 1);
paddle::framework::MixVector<int64_t> mixv_in1_rows(&in1_rows); phi::MixVector<int64_t> mixv_in1_rows(&in1_rows);
SelectedRowsAddTensorKernel<T, block_size> SelectedRowsAddTensorKernel<T, block_size>
<<<grid, threads, 0, context.stream()>>>( <<<grid, threads, 0, context.stream()>>>(
in1_data, in1_data,
...@@ -231,7 +231,7 @@ struct SelectedRowsAddTo<phi::GPUContext, T> { ...@@ -231,7 +231,7 @@ struct SelectedRowsAddTo<phi::GPUContext, T> {
auto* in2_value = input2->mutable_value(); auto* in2_value = input2->mutable_value();
// concat rows // concat rows
paddle::framework::MixVector<int64_t> mixv_in2_rows(&in2_rows); phi::MixVector<int64_t> mixv_in2_rows(&in2_rows);
if (in1_rows.size()) { if (in1_rows.size()) {
mixv_in2_rows.Extend(in1_rows.begin(), in1_rows.end()); mixv_in2_rows.Extend(in1_rows.begin(), in1_rows.end());
} }
...@@ -318,7 +318,7 @@ struct SelectedRowsAddToTensor<phi::GPUContext, T> { ...@@ -318,7 +318,7 @@ struct SelectedRowsAddToTensor<phi::GPUContext, T> {
const int block_size = 256; const int block_size = 256;
dim3 threads(block_size, 1); dim3 threads(block_size, 1);
dim3 grid(in1_rows.size(), 1); dim3 grid(in1_rows.size(), 1);
paddle::framework::MixVector<int64_t> mixv_in1_rows(&in1_rows); phi::MixVector<int64_t> mixv_in1_rows(&in1_rows);
SelectedRowsAddToTensorKernel<T, block_size> SelectedRowsAddToTensorKernel<T, block_size>
<<<grid, threads, 0, context.stream()>>>( <<<grid, threads, 0, context.stream()>>>(
in1_data, in1_data,
...@@ -378,7 +378,7 @@ struct MergeAddImpl { ...@@ -378,7 +378,7 @@ struct MergeAddImpl {
const phi::SelectedRows& input, const phi::SelectedRows& input,
phi::SelectedRows* output, phi::SelectedRows* output,
const bool sorted_result = false) { const bool sorted_result = false) {
paddle::framework::Vector<int64_t> input_rows(input.rows()); phi::Vector<int64_t> input_rows(input.rows());
if (input_rows.size() == 0) { if (input_rows.size() == 0) {
return; return;
} }
...@@ -386,7 +386,7 @@ struct MergeAddImpl { ...@@ -386,7 +386,7 @@ struct MergeAddImpl {
phi::SelectedRows& out = *output; phi::SelectedRows& out = *output;
std::set<int64_t> row_set(input_rows.begin(), input_rows.end()); std::set<int64_t> row_set(input_rows.begin(), input_rows.end());
std::vector<int64_t> merge_rows_cpu(row_set.begin(), row_set.end()); std::vector<int64_t> merge_rows_cpu(row_set.begin(), row_set.end());
paddle::framework::Vector<int64_t> merge_rows(merge_rows_cpu); phi::Vector<int64_t> merge_rows(merge_rows_cpu);
auto input_width = input.value().dims()[1]; auto input_width = input.value().dims()[1];
...@@ -407,8 +407,8 @@ struct MergeAddImpl { ...@@ -407,8 +407,8 @@ struct MergeAddImpl {
dim3 threads(block_size, 1); dim3 threads(block_size, 1);
dim3 grid1(input_rows.size(), 1); dim3 grid1(input_rows.size(), 1);
paddle::framework::MixVector<int64_t> mix_vector_input(&input_rows); phi::MixVector<int64_t> mix_vector_input(&input_rows);
paddle::framework::MixVector<int64_t> mix_vector_out(out.mutable_rows()); phi::MixVector<int64_t> mix_vector_out(out.mutable_rows());
MergeAddKernel<T, 256><<<grid1, threads, 0, context.stream()>>>( MergeAddKernel<T, 256><<<grid1, threads, 0, context.stream()>>>(
input_data, input_data,
mix_vector_input.CUDAData(context.GetPlace()), mix_vector_input.CUDAData(context.GetPlace()),
...@@ -459,7 +459,7 @@ struct MergeAddImpl { ...@@ -459,7 +459,7 @@ struct MergeAddImpl {
} }
std::vector<int64_t> merge_rows_cpu(merged_row_set.begin(), std::vector<int64_t> merge_rows_cpu(merged_row_set.begin(),
merged_row_set.end()); merged_row_set.end());
paddle::framework::Vector<int64_t> merge_rows(merge_rows_cpu); phi::Vector<int64_t> merge_rows(merge_rows_cpu);
out.set_rows(merge_rows); out.set_rows(merge_rows);
out.set_height(input_height); out.set_height(input_height);
...@@ -485,8 +485,8 @@ struct MergeAddImpl { ...@@ -485,8 +485,8 @@ struct MergeAddImpl {
auto& input_rows = input->rows(); auto& input_rows = input->rows();
dim3 grid1(input_rows.size(), 1); dim3 grid1(input_rows.size(), 1);
paddle::framework::MixVector<int64_t> mix_vector_input(&input_rows); phi::MixVector<int64_t> mix_vector_input(&input_rows);
paddle::framework::MixVector<int64_t> mix_vector_out(out.mutable_rows()); phi::MixVector<int64_t> mix_vector_out(out.mutable_rows());
MergeAddKernel<T, 256><<<grid1, threads, 0, context.stream()>>>( MergeAddKernel<T, 256><<<grid1, threads, 0, context.stream()>>>(
input_data, input_data,
mix_vector_input.CUDAData(context.GetPlace()), mix_vector_input.CUDAData(context.GetPlace()),
......
...@@ -22,7 +22,7 @@ class CopyMatrixRowsFunctor<phi::CPUContext, T> { ...@@ -22,7 +22,7 @@ class CopyMatrixRowsFunctor<phi::CPUContext, T> {
public: public:
void operator()(const phi::CPUContext& context, void operator()(const phi::CPUContext& context,
const phi::DenseTensor& src, const phi::DenseTensor& src,
paddle::framework::Vector<size_t> index_lod, phi::Vector<size_t> index_lod,
phi::DenseTensor* dst, phi::DenseTensor* dst,
bool is_src_index) { bool is_src_index) {
size_t* index = index_lod.data(); size_t* index = index_lod.data();
......
...@@ -43,7 +43,7 @@ class CopyMatrixRowsFunctor<phi::GPUContext, T> { ...@@ -43,7 +43,7 @@ class CopyMatrixRowsFunctor<phi::GPUContext, T> {
public: public:
void operator()(const phi::GPUContext& context, void operator()(const phi::GPUContext& context,
const phi::DenseTensor& src, const phi::DenseTensor& src,
paddle::framework::Vector<size_t> index_lod, phi::Vector<size_t> index_lod,
phi::DenseTensor* dst, phi::DenseTensor* dst,
bool is_src_index) { bool is_src_index) {
auto src_dims = src.dims(); auto src_dims = src.dims();
...@@ -79,7 +79,7 @@ class CopyMatrixRowsFunctor<phi::GPUContext, T> { ...@@ -79,7 +79,7 @@ class CopyMatrixRowsFunctor<phi::GPUContext, T> {
dim3 threads(128, 8); dim3 threads(128, 8);
dim3 grid(8, 1); dim3 grid(8, 1);
auto stream = context.stream(); auto stream = context.stream();
paddle::framework::MixVector<size_t> mix_index_lod(&index_lod); phi::MixVector<size_t> mix_index_lod(&index_lod);
CopyMatrixRowsKernel<T, 128, 8, 8><<<grid, threads, 0, stream>>>( CopyMatrixRowsKernel<T, 128, 8, 8><<<grid, threads, 0, stream>>>(
src_data, src_data,
dst_data, dst_data,
......
...@@ -38,7 +38,7 @@ class CopyMatrixRowsFunctor { ...@@ -38,7 +38,7 @@ class CopyMatrixRowsFunctor {
// The indexed rows are based on the input index. // The indexed rows are based on the input index.
void operator()(const DeviceContext& context, void operator()(const DeviceContext& context,
const phi::DenseTensor& src, const phi::DenseTensor& src,
paddle::framework::Vector<size_t> index_lod, phi::Vector<size_t> index_lod,
phi::DenseTensor* dst, phi::DenseTensor* dst,
bool is_src_index); bool is_src_index);
}; };
......
...@@ -46,7 +46,7 @@ class ScaleLoDTensorFunctor<phi::GPUContext, T> { ...@@ -46,7 +46,7 @@ class ScaleLoDTensorFunctor<phi::GPUContext, T> {
const size_t seq_width = seq->numel() / seq->dims()[0]; const size_t seq_width = seq->numel() / seq->dims()[0];
auto abs_offset_lod = paddle::framework::ToAbsOffset(lod); auto abs_offset_lod = paddle::framework::ToAbsOffset(lod);
T* seq_data = context.template Alloc<T>(seq); T* seq_data = context.template Alloc<T>(seq);
paddle::framework::MixVector<size_t> mix_vector(&(abs_offset_lod[level])); phi::MixVector<size_t> mix_vector(&(abs_offset_lod[level]));
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
hipLaunchKernelGGL( hipLaunchKernelGGL(
......
...@@ -88,7 +88,7 @@ struct SparseAdagradFunctor<phi::GPUContext, T> { ...@@ -88,7 +88,7 @@ struct SparseAdagradFunctor<phi::GPUContext, T> {
phi::funcs::scatter::MergeAdd<phi::GPUContext, T> merge_func; phi::funcs::scatter::MergeAdd<phi::GPUContext, T> merge_func;
auto grad_merge = merge_func(context, grad); auto grad_merge = merge_func(context, grad);
auto* grad_merge_data = grad_merge.mutable_value()->template data<T>(); auto* grad_merge_data = grad_merge.mutable_value()->template data<T>();
paddle::framework::Vector<int64_t> merge_rows(grad_merge.rows()); phi::Vector<int64_t> merge_rows(grad_merge.rows());
// 2. m += g_m * g_m // 2. m += g_m * g_m
auto grad_square = auto grad_square =
SquareSelectedRows<phi::GPUContext, T>(context, grad_merge); SquareSelectedRows<phi::GPUContext, T>(context, grad_merge);
...@@ -104,7 +104,7 @@ struct SparseAdagradFunctor<phi::GPUContext, T> { ...@@ -104,7 +104,7 @@ struct SparseAdagradFunctor<phi::GPUContext, T> {
const int block_size = 256; const int block_size = 256;
dim3 threads(block_size, 1); dim3 threads(block_size, 1);
dim3 grid2(1, merge_rows.size()); dim3 grid2(1, merge_rows.size());
paddle::framework::MixVector<int64_t> mixv_merge_rows(&merge_rows); phi::MixVector<int64_t> mixv_merge_rows(&merge_rows);
SparseAdagradFunctorKernel<T, 256> SparseAdagradFunctorKernel<T, 256>
<<<grid2, <<<grid2,
threads, threads,
......
...@@ -87,8 +87,8 @@ void EditDistanceKernel(const Context& ctx, ...@@ -87,8 +87,8 @@ void EditDistanceKernel(const Context& ctx,
auto stream = reinterpret_cast<const phi::GPUContext&>(ctx).stream(); auto stream = reinterpret_cast<const phi::GPUContext&>(ctx).stream();
paddle::framework::Vector<size_t> hyp_lod(batch_size + 1); phi::Vector<size_t> hyp_lod(batch_size + 1);
paddle::framework::Vector<size_t> ref_lod(batch_size + 1); phi::Vector<size_t> ref_lod(batch_size + 1);
bool use_length = hypslength.get_ptr() != nullptr; bool use_length = hypslength.get_ptr() != nullptr;
......
...@@ -14,12 +14,12 @@ ...@@ -14,12 +14,12 @@
#include "paddle/phi/kernels/embedding_grad_kernel.h" #include "paddle/phi/kernels/embedding_grad_kernel.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memcpy.h"
#include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_primitives.h" #include "paddle/phi/backends/gpu/gpu_primitives.h"
#include "paddle/phi/common/data_type.h" #include "paddle/phi/common/data_type.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/mixed_vector.h"
#include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/embedding_util.h" #include "paddle/phi/kernels/funcs/embedding_util.h"
...@@ -173,11 +173,11 @@ struct EmbeddingSparseGradCUDAFunctor { ...@@ -173,11 +173,11 @@ struct EmbeddingSparseGradCUDAFunctor {
dim3 threads(128, 8); dim3 threads(128, 8);
dim3 grids(8, 1); dim3 grids(8, 1);
auto stream = dev_ctx_.stream(); auto stream = dev_ctx_.stream();
paddle::framework::Vector<int64_t> new_rows; phi::Vector<int64_t> new_rows;
new_rows.resize(ids_num); new_rows.resize(ids_num);
auto gpu_place = dev_ctx_.GetPlace(); auto gpu_place = dev_ctx_.GetPlace();
paddle::framework::MixVector<int64_t> mixv_new_rows(&new_rows); phi::MixVector<int64_t> mixv_new_rows(&new_rows);
if (!std::is_same<IdT, int64_t>::value) { if (!std::is_same<IdT, int64_t>::value) {
InputTypeConvert<<<grids, threads, 0, stream>>>( InputTypeConvert<<<grids, threads, 0, stream>>>(
ids_data, ids_num, mixv_new_rows.MutableData(gpu_place)); ids_data, ids_num, mixv_new_rows.MutableData(gpu_place));
......
...@@ -14,12 +14,12 @@ ...@@ -14,12 +14,12 @@
#include "paddle/phi/kernels/sgd_kernel.h" #include "paddle/phi/kernels/sgd_kernel.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_helper.h" #include "paddle/phi/backends/gpu/gpu_helper.h"
#include "paddle/phi/backends/gpu/gpu_primitives.h" #include "paddle/phi/backends/gpu/gpu_primitives.h"
#include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/mixed_vector.h"
namespace phi { namespace phi {
...@@ -156,7 +156,7 @@ void SGDDenseParamSparseGradKernel( ...@@ -156,7 +156,7 @@ void SGDDenseParamSparseGradKernel(
int thread_x = kThreadsPerBlock; int thread_x = kThreadsPerBlock;
int max_threads = dev_ctx.GetMaxPhysicalThreadCount(); int max_threads = dev_ctx.GetMaxPhysicalThreadCount();
int max_blocks = std::max(max_threads / kThreadsPerBlock, 1); int max_blocks = std::max(max_threads / kThreadsPerBlock, 1);
paddle::framework::MixVector<int64_t> mixv_in_rows(&in_rows); phi::MixVector<int64_t> mixv_in_rows(&in_rows);
SparseSGDFunctorKernel<<<max_blocks, thread_x, 0, dev_ctx.stream()>>>( SparseSGDFunctorKernel<<<max_blocks, thread_x, 0, dev_ctx.stream()>>>(
in_data, in_data,
mixv_in_rows.CUDAData(dev_ctx.GetPlace()), mixv_in_rows.CUDAData(dev_ctx.GetPlace()),
......
...@@ -551,7 +551,7 @@ void MomentumSparseImpl(const Context& ctx, ...@@ -551,7 +551,7 @@ void MomentumSparseImpl(const Context& ctx,
merge_func(ctx, grad, merged_grad); merge_func(ctx, grad, merged_grad);
auto* grad_merge_rows = merged_grad->mutable_rows(); auto* grad_merge_rows = merged_grad->mutable_rows();
paddle::framework::MixVector<int64_t> mixv_grad_merge_rows(grad_merge_rows); phi::MixVector<int64_t> mixv_grad_merge_rows(grad_merge_rows);
const int64_t* rows = mixv_grad_merge_rows.Data(ctx.GetPlace()); const int64_t* rows = mixv_grad_merge_rows.Data(ctx.GetPlace());
int64_t row_numel = merged_grad->value().numel() / merged_grad->rows().size(); int64_t row_numel = merged_grad->value().numel() / merged_grad->rows().size();
funcs::ForRange<Context> for_range(ctx, param.numel()); funcs::ForRange<Context> for_range(ctx, param.numel());
......
...@@ -309,7 +309,7 @@ void RmspropSparseKernel(const Context &ctx, ...@@ -309,7 +309,7 @@ void RmspropSparseKernel(const Context &ctx,
funcs::ForRange<Context> for_range(ctx, limit); funcs::ForRange<Context> for_range(ctx, limit);
auto &grad_merge_rows = merged_grad->rows(); auto &grad_merge_rows = merged_grad->rows();
paddle::framework::MixVector<int64_t> mixv_grad_merge_rows(&grad_merge_rows); phi::MixVector<int64_t> mixv_grad_merge_rows(&grad_merge_rows);
const int64_t *rows = mixv_grad_merge_rows.Data(ctx.GetPlace()); const int64_t *rows = mixv_grad_merge_rows.Data(ctx.GetPlace());
auto &merged_tensor = merged_grad->value(); auto &merged_tensor = merged_grad->value();
......
...@@ -236,8 +236,8 @@ void WarpctcKernel(const Context& dev_ctx, ...@@ -236,8 +236,8 @@ void WarpctcKernel(const Context& dev_ctx,
DenseTensor* loss, DenseTensor* loss,
DenseTensor* warpctcgrad) { DenseTensor* warpctcgrad) {
size_t num_sequences, sequence_width, max_sequence_length; size_t num_sequences, sequence_width, max_sequence_length;
paddle::framework::Vector<size_t> logits_lod; phi::Vector<size_t> logits_lod;
paddle::framework::Vector<size_t> label_lod; phi::Vector<size_t> label_lod;
if (logits_length.is_initialized() && labels_length.is_initialized()) { if (logits_length.is_initialized() && labels_length.is_initialized()) {
num_sequences = logits.dims()[1]; num_sequences = logits.dims()[1];
sequence_width = logits.dims()[2]; sequence_width = logits.dims()[2];
...@@ -397,7 +397,7 @@ void WarpctcKernel(const Context& dev_ctx, ...@@ -397,7 +397,7 @@ void WarpctcKernel(const Context& dev_ctx,
paddle::operators::math::TotalSequenceLength(label_lod)), paddle::operators::math::TotalSequenceLength(label_lod)),
1}); 1});
dev_ctx.template HostAlloc<int>(&warpctc_label); dev_ctx.template HostAlloc<int>(&warpctc_label);
std::vector<paddle::framework::Vector<size_t>> lod; std::vector<phi::Vector<size_t>> lod;
lod.push_back(label_lod); lod.push_back(label_lod);
warpctc_label.set_lod(lod); warpctc_label.set_lod(lod);
......
...@@ -126,7 +126,7 @@ void AdamDenseParamSparseGradKernel( ...@@ -126,7 +126,7 @@ void AdamDenseParamSparseGradKernel(
auto& grad_tensor = grad_merge.value(); auto& grad_tensor = grad_merge.value();
const T* grad_data = grad_tensor.template data<T>(); const T* grad_data = grad_tensor.template data<T>();
auto* grad_merge_rows = &grad_merge.rows(); auto* grad_merge_rows = &grad_merge.rows();
paddle::framework::MixVector<int64_t> mixv_grad_merge_rows(grad_merge_rows); phi::MixVector<int64_t> mixv_grad_merge_rows(grad_merge_rows);
const int64_t* rows = mixv_grad_merge_rows.Data(dev_ctx.GetPlace()); const int64_t* rows = mixv_grad_merge_rows.Data(dev_ctx.GetPlace());
auto row_numel = grad_tensor.numel() / grad_merge.rows().size(); auto row_numel = grad_tensor.numel() / grad_merge.rows().size();
......
...@@ -198,7 +198,7 @@ void AdamDenseParamSparseGradKernel( ...@@ -198,7 +198,7 @@ void AdamDenseParamSparseGradKernel(
auto& grad_tensor = grad_merge.value(); auto& grad_tensor = grad_merge.value();
const T* grad_data = grad_tensor.template data<T>(); const T* grad_data = grad_tensor.template data<T>();
auto* grad_merge_rows = &grad_merge.rows(); auto* grad_merge_rows = &grad_merge.rows();
paddle::framework::MixVector<int64_t> mixv_grad_merge_rows(grad_merge_rows); phi::MixVector<int64_t> mixv_grad_merge_rows(grad_merge_rows);
const int64_t* rows = mixv_grad_merge_rows.Data(dev_ctx.GetPlace()); const int64_t* rows = mixv_grad_merge_rows.Data(dev_ctx.GetPlace());
auto row_numel = grad_tensor.numel() / grad_merge.rows().size(); auto row_numel = grad_tensor.numel() / grad_merge.rows().size();
......
...@@ -222,7 +222,7 @@ void AdamwDenseParamSparseGradKernel( ...@@ -222,7 +222,7 @@ void AdamwDenseParamSparseGradKernel(
auto& grad_tensor = grad_merge.value(); auto& grad_tensor = grad_merge.value();
const T* grad_data = grad_tensor.template data<T>(); const T* grad_data = grad_tensor.template data<T>();
auto* grad_merge_rows = &grad_merge.rows(); auto* grad_merge_rows = &grad_merge.rows();
paddle::framework::MixVector<int64_t> mixv_grad_merge_rows(grad_merge_rows); phi::MixVector<int64_t> mixv_grad_merge_rows(grad_merge_rows);
const int64_t* rows = mixv_grad_merge_rows.Data(dev_ctx.GetPlace()); const int64_t* rows = mixv_grad_merge_rows.Data(dev_ctx.GetPlace());
auto row_numel = grad_tensor.numel() / grad_merge.rows().size(); auto row_numel = grad_tensor.numel() / grad_merge.rows().size();
......
...@@ -14,9 +14,9 @@ ...@@ -14,9 +14,9 @@
#include "paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.h" #include "paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/mixed_vector.h"
#include "paddle/phi/kernels/cpu/hsigmoid_loss_grad.h" #include "paddle/phi/kernels/cpu/hsigmoid_loss_grad.h"
namespace phi { namespace phi {
...@@ -54,7 +54,7 @@ void HSigmoidLossGradKernel(const Context& ctx, ...@@ -54,7 +54,7 @@ void HSigmoidLossGradKernel(const Context& ctx,
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
path.get_ptr(), path.get_ptr(),
errors::NotFound("Custom tree must be set for sparse mode!")); errors::NotFound("Custom tree must be set for sparse mode!"));
paddle::framework::Vector<int64_t> real_rows = PathToRows(*path); phi::Vector<int64_t> real_rows = PathToRows(*path);
w_grad->set_rows(real_rows); w_grad->set_rows(real_rows);
// Build a map of id -> row_index to speed up finding the index of one id // Build a map of id -> row_index to speed up finding the index of one id
w_grad->set_height(w.dims()[0]); w_grad->set_height(w.dims()[0]);
......
...@@ -221,7 +221,7 @@ void ComputeRowImpl(const Context& dev_ctx, ...@@ -221,7 +221,7 @@ void ComputeRowImpl(const Context& dev_ctx,
auto& grad_tensor = grad_merge.value(); auto& grad_tensor = grad_merge.value();
const T* grad_data = grad_tensor.template data<T>(); const T* grad_data = grad_tensor.template data<T>();
auto* grad_merge_rows = &grad_merge.rows(); auto* grad_merge_rows = &grad_merge.rows();
paddle::framework::MixVector<int64_t> mixv_grad_merge_rows(grad_merge_rows); phi::MixVector<int64_t> mixv_grad_merge_rows(grad_merge_rows);
const int64_t* rows = mixv_grad_merge_rows.Data(dev_ctx.GetPlace()); const int64_t* rows = mixv_grad_merge_rows.Data(dev_ctx.GetPlace());
auto row_numel = grad_tensor.numel() / grad_merge.rows().size(); auto row_numel = grad_tensor.numel() / grad_merge.rows().size();
if (paddle::platform::is_gpu_place(dev_ctx.GetPlace()) && if (paddle::platform::is_gpu_place(dev_ctx.GetPlace()) &&
......
...@@ -70,3 +70,20 @@ cc_test( ...@@ -70,3 +70,20 @@ cc_test(
test_tensor_array test_tensor_array
SRCS test_tensor_array.cc SRCS test_tensor_array.cc
DEPS tensor_array) DEPS tensor_array)
if(WITH_GPU)
nv_test(
test_mixed_vector
SRCS test_mixed_vector.cc test_mixed_vector.cu
DEPS mixed_vector place memory device_context tensor)
elseif(WITH_ROCM)
hip_test(
test_mixed_vector
SRCS test_mixed_vector.cc test_mixed_vector.cu
DEPS mixed_vector place memory device_context tensor)
else()
cc_test(
test_mixed_vector
SRCS test_mixed_vector.cc
DEPS mixed_vector place memory device_context tensor)
endif()
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/phi/core/mixed_vector.h"
#include "glog/logging.h" #include "glog/logging.h"
#include "gtest/gtest-message.h" #include "gtest/gtest-message.h"
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
#include "gtest/gtest_pred_impl.h" #include "gtest/gtest_pred_impl.h"
template <typename T> template <typename T>
using vec = paddle::framework::Vector<T>; using vec = phi::Vector<T>;
TEST(mixed_vector, CPU_VECTOR) { TEST(mixed_vector, CPU_VECTOR) {
vec<int> tmp; vec<int> tmp;
...@@ -44,7 +44,7 @@ TEST(mixed_vector, CPU_VECTOR) { ...@@ -44,7 +44,7 @@ TEST(mixed_vector, CPU_VECTOR) {
} }
TEST(mixed_vector, InitWithCount) { TEST(mixed_vector, InitWithCount) {
paddle::framework::Vector<int> vec(10, 10); phi::Vector<int> vec(10, 10);
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 10; ++i) {
ASSERT_EQ(vec[i], 10); ASSERT_EQ(vec[i], 10);
} }
...@@ -58,7 +58,7 @@ TEST(mixed_vector, ForEach) { ...@@ -58,7 +58,7 @@ TEST(mixed_vector, ForEach) {
} }
TEST(mixed_vector, Reserve) { TEST(mixed_vector, Reserve) {
paddle::framework::Vector<int> vec; phi::Vector<int> vec;
vec.reserve(1); vec.reserve(1);
vec.push_back(0); vec.push_back(0);
vec.push_back(0); vec.push_back(0);
...@@ -66,7 +66,7 @@ TEST(mixed_vector, Reserve) { ...@@ -66,7 +66,7 @@ TEST(mixed_vector, Reserve) {
} }
TEST(mixed_vector, Resize) { TEST(mixed_vector, Resize) {
paddle::framework::Vector<int> vec; phi::Vector<int> vec;
vec.resize(1); vec.resize(1);
vec.push_back(0); vec.push_back(0);
vec.push_back(0); vec.push_back(0);
......
...@@ -23,13 +23,14 @@ ...@@ -23,13 +23,14 @@
#include "glog/logging.h" #include "glog/logging.h"
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/phi/backends/all_context.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/phi/common/place.h"
#include "paddle/phi/core/mixed_vector.h"
template <typename T> template <typename T>
using vec = paddle::framework::MixVector<T>; using vec = phi::MixVector<T>;
using gpuStream_t = paddle::gpuStream_t; using gpuStream_t = phi::gpuStream_t;
static __global__ void multiply_10(int* ptr) { static __global__ void multiply_10(int* ptr) {
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 10; ++i) {
...@@ -37,9 +38,9 @@ static __global__ void multiply_10(int* ptr) { ...@@ -37,9 +38,9 @@ static __global__ void multiply_10(int* ptr) {
} }
} }
gpuStream_t GetCUDAStream(paddle::platform::CUDAPlace place) { gpuStream_t GetCUDAStream(phi::GPUPlace place) {
return reinterpret_cast<const phi::GPUContext*>( return reinterpret_cast<const phi::GPUContext*>(
paddle::platform::DeviceContextPool::Instance().Get(place)) phi::DeviceContextPool::Instance().Get(place))
->stream(); ->stream();
} }
...@@ -50,7 +51,7 @@ TEST(mixed_vector, GPU_VECTOR) { ...@@ -50,7 +51,7 @@ TEST(mixed_vector, GPU_VECTOR) {
} }
vec<int> tmp(&x); vec<int> tmp(&x);
ASSERT_EQ(tmp.size(), 10UL); ASSERT_EQ(tmp.size(), 10UL);
paddle::platform::CUDAPlace gpu(0); phi::GPUPlace gpu(0);
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
hipLaunchKernelGGL(multiply_10, hipLaunchKernelGGL(multiply_10,
...@@ -69,7 +70,7 @@ TEST(mixed_vector, GPU_VECTOR) { ...@@ -69,7 +70,7 @@ TEST(mixed_vector, GPU_VECTOR) {
} }
TEST(mixed_vector, MultiGPU) { TEST(mixed_vector, MultiGPU) {
if (paddle::platform::GetGPUDeviceCount() < 2) { if (phi::backends::gpu::GetGPUDeviceCount() < 2) {
LOG(WARNING) << "Skip mixed_vector.MultiGPU since there are not multiple " LOG(WARNING) << "Skip mixed_vector.MultiGPU since there are not multiple "
"GPUs in your machine."; "GPUs in your machine.";
return; return;
...@@ -81,8 +82,8 @@ TEST(mixed_vector, MultiGPU) { ...@@ -81,8 +82,8 @@ TEST(mixed_vector, MultiGPU) {
} }
vec<int> tmp(&x); vec<int> tmp(&x);
ASSERT_EQ(tmp.size(), 10UL); ASSERT_EQ(tmp.size(), 10UL);
paddle::platform::CUDAPlace gpu0(0); phi::GPUPlace gpu0(0);
paddle::platform::SetDeviceId(0); phi::backends::gpu::SetDeviceId(0);
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
hipLaunchKernelGGL(multiply_10, hipLaunchKernelGGL(multiply_10,
...@@ -94,9 +95,9 @@ TEST(mixed_vector, MultiGPU) { ...@@ -94,9 +95,9 @@ TEST(mixed_vector, MultiGPU) {
#else #else
multiply_10<<<1, 1, 0, GetCUDAStream(gpu0)>>>(tmp.MutableData(gpu0)); multiply_10<<<1, 1, 0, GetCUDAStream(gpu0)>>>(tmp.MutableData(gpu0));
#endif #endif
paddle::platform::CUDAPlace gpu1(1); phi::GPUPlace gpu1(1);
auto* gpu1_ptr = tmp.MutableData(gpu1); auto* gpu1_ptr = tmp.MutableData(gpu1);
paddle::platform::SetDeviceId(1); phi::backends::gpu::SetDeviceId(1);
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
hipLaunchKernelGGL( hipLaunchKernelGGL(
......
...@@ -913,7 +913,7 @@ FOURTH_HIGH_PARALLEL_JOB_NEW = [ ...@@ -913,7 +913,7 @@ FOURTH_HIGH_PARALLEL_JOB_NEW = [
'test_mix_precision_all_reduce_fuse', 'test_mix_precision_all_reduce_fuse',
'test_spp_op', 'test_spp_op',
'test_op_converter', 'test_op_converter',
'mixed_vector_test', 'test_mixed_vector',
'test_roi_align_op', 'test_roi_align_op',
'test_pad_constant_like', 'test_pad_constant_like',
'test_mul_op', 'test_mul_op',
...@@ -2288,7 +2288,7 @@ TETRAD_PARALLEL_JOB = [ ...@@ -2288,7 +2288,7 @@ TETRAD_PARALLEL_JOB = [
'device_context_test', 'device_context_test',
'test_reference_count_pass_last_lived_ops', 'test_reference_count_pass_last_lived_ops',
'copy_same_tensor_test', 'copy_same_tensor_test',
'mixed_vector_test', 'test_mixed_vector',
'op_registry_test', 'op_registry_test',
'test_prepare_op', 'test_prepare_op',
'data_device_transform_test', 'data_device_transform_test',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册