未验证 提交 35d7d1f0 编写于 作者: H Huang Jiyi 提交者: GitHub

move mixed_vector (#50282)

上级 e92e3aab
......@@ -1113,7 +1113,7 @@ void EagerReducer::AllReduceSparse(EagerGroup *group,
const auto &rank_ = process_group_->GetRank();
const auto &size_ = process_group_->GetSize();
framework::Vector<int64_t> rows_num_vector(size_);
phi::Vector<int64_t> rows_num_vector(size_);
rows_num_vector[rank_] = static_cast<int64_t>(src_rows.size());
Tensor rows_num_tensor = paddle::experimental::empty(
......@@ -1183,7 +1183,7 @@ void EagerReducer::AllReduceSparse(EagerGroup *group,
}
process_group_->AllGather(in, out)->Synchronize();
framework::Vector<int64_t> dst_rows_vector(rows_num, 0);
phi::Vector<int64_t> dst_rows_vector(rows_num, 0);
auto *dst_rows_dense_tensor =
std::dynamic_pointer_cast<phi::DenseTensor>(dst_rows_tensor.impl())
.get();
......@@ -1262,7 +1262,7 @@ void EagerReducer::AllReduceSparse(EagerGroup *group,
Tensor dst_rows_tensor =
paddle::experimental::concat(rows_tensors, phi::Scalar(0));
framework::Vector<int64_t> dst_rows_vector(rows_num, 0);
phi::Vector<int64_t> dst_rows_vector(rows_num, 0);
auto *dst_rows_dense_tensor =
std::dynamic_pointer_cast<phi::DenseTensor>(dst_rows_tensor.impl())
.get();
......
......@@ -236,7 +236,7 @@ void DeserializeLodTensor(framework::Variable* var,
framework::LoD lod;
for (int i = 0; i < msg.lod_level(); ++i) {
framework::Vector<size_t> v;
phi::Vector<size_t> v;
for (int j = 0; j < msg.lod(i).lod_data_size(); ++j) {
v.push_back(msg.lod(i).lod_data(j));
}
......
......@@ -39,7 +39,7 @@ void CreateVarsOnScope(framework::Scope* scope,
auto* tensor1 = var1->GetMutable<phi::DenseTensor>();
tensor1->Resize(phi::make_ddim({512, 8, 4, 2}));
framework::LoD lod1;
lod1.push_back(framework::Vector<size_t>({1, 3, 8}));
lod1.push_back(phi::Vector<size_t>({1, 3, 8}));
tensor1->set_lod(lod1);
tensor1->mutable_data<float>(*place);
phi::funcs::set_constant(ctx, tensor1, 31.9);
......@@ -49,7 +49,7 @@ void CreateVarsOnScope(framework::Scope* scope,
auto* tensor2 = var2->GetMutable<phi::DenseTensor>();
tensor2->Resize(phi::make_ddim({1000, 64}));
framework::LoD lod2;
lod2.push_back(framework::Vector<size_t>({1, 1}));
lod2.push_back(phi::Vector<size_t>({1, 1}));
tensor2->set_lod(lod2);
tensor2->mutable_data<int>(*place);
phi::funcs::set_constant(ctx, tensor2, 100);
......@@ -98,7 +98,7 @@ void RunMultiVarMsg(platform::Place place) {
framework::Variable* var1 = scope_recv.FindVar("x1");
auto* tensor1 = var1->GetMutable<phi::DenseTensor>();
EXPECT_EQ(tensor1->dims(), phi::make_ddim({512, 8, 4, 2}));
// EXPECT_EQ(tensor1->lod(), framework::Vector<size_t>({1, 3, 8}));
// EXPECT_EQ(tensor1->lod(), phi::Vector<size_t>({1, 3, 8}));
auto* tensor_data1 = const_cast<float*>(tensor1->data<float>());
int tensor_numel1 = 512 * 8 * 4 * 2;
for (int i = 0; i < tensor_numel1; ++i)
......@@ -108,7 +108,7 @@ void RunMultiVarMsg(platform::Place place) {
framework::Variable* var2 = scope_recv.FindVar("x2");
auto* tensor2 = var2->GetMutable<phi::DenseTensor>();
EXPECT_EQ(tensor2->dims(), phi::make_ddim({1000, 64}));
// EXPECT_EQ(tensor2->lod(), framework::Vector<size_t>({1, 1}));
// EXPECT_EQ(tensor2->lod(), phi::Vector<size_t>({1, 1}));
auto* tensor_data2 = const_cast<int*>(tensor2->data<int>());
int tensor_numel2 = 1000 * 64;
for (int i = 0; i < tensor_numel2; ++i) EXPECT_EQ(tensor_data2[i], 100);
......
......@@ -162,27 +162,7 @@ cc_test(
eigen_test
SRCS eigen_test.cc
DEPS tensor)
cc_library(
mixed_vector
SRCS mixed_vector.cc
DEPS device_context place memory)
if(WITH_GPU)
nv_test(
mixed_vector_test
SRCS mixed_vector_test.cc mixed_vector_test.cu
DEPS mixed_vector place memory device_context tensor)
elseif(WITH_ROCM)
hip_test(
mixed_vector_test
SRCS mixed_vector_test.cc mixed_vector_test.cu
DEPS mixed_vector place memory device_context tensor)
else()
cc_test(
mixed_vector_test
SRCS mixed_vector_test.cc
DEPS mixed_vector place memory device_context tensor)
endif()
cc_library(
lod_tensor
SRCS lod_tensor.cc
......
......@@ -2815,7 +2815,7 @@ void SlotRecordInMemoryDataFeed::BuildSlotBatchGPU(const int ins_num) {
LoD& lod = (*feed->mutable_lod());
lod.resize(1);
lod[0].resize(offset_cols_size);
paddle::framework::MixVector<size_t> mixv_lod(&lod[0]);
phi::MixVector<size_t> mixv_lod(&lod[0]);
memcpy(mixv_lod.MutableData(platform::CPUPlace()),
off_start_ptr,
offset_cols_size * sizeof(size_t));
......
......@@ -18,6 +18,7 @@
#include "gtest/gtest.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/place.h"
TEST(DataType, float16) {
using paddle::platform::CPUPlace;
......
......@@ -15,6 +15,7 @@
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/platform/place.h"
namespace paddle {
namespace framework {
......
......@@ -18,6 +18,7 @@
#include <gtest/gtest.h>
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/place.h"
namespace paddle {
namespace framework {
......
......@@ -12,10 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/eigen.h"
#include <gtest/gtest.h>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/core/ddim.h"
namespace paddle {
......
......@@ -158,7 +158,7 @@ void HeterWrapper::DeSerializeToTensor(Scope* scope,
LoD lod;
for (int i = 0; i < req_var.lod_level(); ++i) {
framework::Vector<size_t> v;
phi::Vector<size_t> v;
for (int j = 0; j < req_var.lod(i).lod_data_size(); ++j) {
v.push_back(req_var.lod(i).lod_data(j));
}
......@@ -203,7 +203,7 @@ void HeterWrapper::DeSerializeToTensor(Scope* scope,
LoD lod;
for (int i = 0; i < req_var.lod_level(); ++i) {
framework::Vector<size_t> v;
phi::Vector<size_t> v;
for (int j = 0; j < req_var.lod(i).lod_data_size(); ++j) {
v.push_back(req_var.lod(i).lod_data(j));
}
......
......@@ -21,12 +21,12 @@ limitations under the License. */
#include <utility>
#include <vector>
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/core/ddim.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/mixed_vector.h"
namespace paddle {
namespace framework {
......@@ -54,7 +54,7 @@ void MergeLoDTensor(phi::DenseTensor* target,
* 0 2 4 7
* 0 2 5 7 10 12 15 20
*/
using LoD = std::vector<Vector<size_t>>;
using LoD = std::vector<phi::Vector<size_t>>;
std::string LoDToString(const LoD& lod);
......
......@@ -31,7 +31,7 @@ TEST(LoD, data) {
lod.push_back(std::vector<size_t>({0, 1, 6, 8, 10, 11}));
auto& v = lod[0];
paddle::framework::MixVector<size_t> mix_vector_v(&v);
phi::MixVector<size_t> mix_vector_v(&v);
paddle::platform::CUDAPlace gpu(0);
#ifdef PADDLE_WITH_HIP
hipLaunchKernelGGL(test,
......@@ -69,7 +69,7 @@ TEST(DenseTensor, LoDInGPU) {
EXPECT_EQ(lod_tensor.lod_element(0, 4).first, 8UL);
auto lod = lod_tensor.lod();
paddle::framework::MixVector<size_t> mix_vector(&(lod[0]));
phi::MixVector<size_t> mix_vector(&(lod[0]));
#ifdef PADDLE_WITH_HIP
hipLaunchKernelGGL(test,
......
......@@ -15,15 +15,15 @@ limitations under the License. */
#pragma once
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/mixed_vector.h"
#include "paddle/phi/core/sparse_coo_tensor.h"
#include "paddle/phi/core/sparse_csr_tensor.h"
namespace paddle {
namespace framework {
using LoD = std::vector<paddle::framework::Vector<size_t>>;
using LoD = std::vector<phi::Vector<size_t>>;
} // namespace framework
} // namespace paddle
......@@ -104,10 +104,10 @@ static void AllReduce(const phi::SelectedRows &src,
// 1. Gather rows number from all workers. Here use ncclAllGather to do this,
// but we can use other ways to implement is in the future
const auto &src_rows = src.rows();
framework::Vector<int64_t> rows_num_vector(strategy.nranks_);
phi::Vector<int64_t> rows_num_vector(strategy.nranks_);
rows_num_vector[strategy.local_rank_] = static_cast<int64_t>(src_rows.size());
// CUDAMutableData use CalStream
paddle::framework::MixVector<int64_t> mixv_rows_num_vector(&rows_num_vector);
phi::MixVector<int64_t> mixv_rows_num_vector(&rows_num_vector);
auto *gpu_rows_num_ptr = mixv_rows_num_vector.CUDAMutableData(place);
VLOG(4) << "start dev_ctx->wait";
if (!use_calc_stream) {
......@@ -138,9 +138,9 @@ static void AllReduce(const phi::SelectedRows &src,
auto *dst_rows = dst->mutable_rows();
dst_rows->resize(rows_num);
paddle::framework::MixVector<int64_t> mixv_dst_rows(dst_rows);
phi::MixVector<int64_t> mixv_dst_rows(dst_rows);
auto *dst_rows_ptr = mixv_dst_rows.CUDAMutableData(place);
paddle::framework::MixVector<int64_t> mixv_src_rows(&src_rows);
phi::MixVector<int64_t> mixv_src_rows(&src_rows);
const auto *src_rows_ptr = mixv_src_rows.CUDAData(place);
auto *dst_tensor = dst->mutable_value();
......
......@@ -158,9 +158,9 @@ void GLOOParallelContext::AllReduce(const phi::SelectedRows &src,
<< ", height: " << src.height();
auto *dst_rows = dst->mutable_rows();
dst_rows->resize(rows_num);
paddle::framework::MixVector<int64_t> mixv_dst_rows(dst_rows);
phi::MixVector<int64_t> mixv_dst_rows(dst_rows);
auto *dst_rows_ptr = mixv_dst_rows.MutableData(place);
paddle::framework::MixVector<int64_t> mixv_src_rows(&src_rows);
phi::MixVector<int64_t> mixv_src_rows(&src_rows);
const int64_t *src_rows_ptr = mixv_src_rows.Data(place);
auto *dst_tensor = dst->mutable_value();
......
......@@ -98,7 +98,7 @@ TEST(AssignOp, AssignSelectedRows) {
assign_functor(input);
auto& out_selected_row = output.Get<phi::SelectedRows>();
const paddle::framework::Vector<int64_t>& out_rows = out_selected_row.rows();
const phi::Vector<int64_t>& out_rows = out_selected_row.rows();
EXPECT_EQ(rows.size(), out_rows.size());
for (size_t i = 0; i < rows.size(); ++i) {
EXPECT_EQ(rows[i], out_rows[i]);
......
......@@ -129,7 +129,7 @@ class CTCAlignOpCUDAKernel : public framework::OpKernel<T> {
// merge elements and delete blank
T* output_data = output->mutable_data<T>({num_tokens, 1}, ctx.GetPlace());
paddle::framework::MixVector<size_t> mixv_input_lod(&input_lod[level]);
phi::MixVector<size_t> mixv_input_lod(&input_lod[level]);
MergeAndDelCudaKernel<T>
<<<1, 1, 0, stream>>>(num_tokens,
tokens,
......
......@@ -166,7 +166,7 @@ class CVMGradCUDAKernel : public framework::OpKernel<T> {
lod[lod.size() - 1],
platform::errors::PreconditionNotMet(
"Output(X@GRAD)'s dim[0] must be equal to last element of lod"));
paddle::framework::MixVector<size_t> mixv_lod(&lod);
phi::MixVector<size_t> mixv_lod(&lod);
CvmGradComputeKernel<<<(dx_numel + PADDLE_CUDA_NUM_THREADS - 1) /
PADDLE_CUDA_NUM_THREADS,
PADDLE_CUDA_NUM_THREADS,
......
......@@ -59,7 +59,7 @@ class GPUBoxClipKernel : public framework::OpKernel<T> {
auto stream = dev_ctx.stream();
const size_t batch_size = lod.back().size() - 1;
T *output_data = output->mutable_data<T>(dev_ctx.GetPlace());
paddle::framework::MixVector<size_t> mix_vector(&abs_offset_lod[0]);
phi::MixVector<size_t> mix_vector(&abs_offset_lod[0]);
GPUBoxClip<T, 512><<<batch_size, 512, 0, stream>>>(
input->data<T>(),
mix_vector.CUDAMutableData(dev_ctx.GetPlace()),
......
......@@ -19,7 +19,6 @@ namespace cub = hipcub;
#include <paddle/fluid/memory/allocation/allocator.h>
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/operators/detection/bbox_util.h"
......@@ -28,6 +27,7 @@ namespace cub = hipcub;
#include "paddle/fluid/operators/strided_memcpy.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/phi/backends/gpu/gpu_primitives.h"
#include "paddle/phi/core/mixed_vector.h"
#include "paddle/phi/kernels/funcs/gather.cu.h"
namespace paddle {
......
......@@ -18,10 +18,10 @@ limitations under the License. */
#include <string>
#include <vector>
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/memory.h"
#include "paddle/fluid/operators/detection/bbox_util.cu.h"
#include "paddle/phi/core/mixed_vector.h"
#include "paddle/phi/kernels/funcs/gather.cu.h"
#include "paddle/phi/kernels/funcs/math_function.h"
......
......@@ -121,7 +121,7 @@ class TargetAssignKernel : public framework::OpKernel<T> {
auto x_lod = x->lod().back();
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
paddle::framework::MixVector<size_t> mixv_x_lod(&x_lod);
phi::MixVector<size_t> mixv_x_lod(&x_lod);
size_t* x_lod_data = mixv_x_lod.MutableData(ctx.GetPlace());
#else
size_t* x_lod_data = x_lod.data();
......@@ -155,7 +155,7 @@ class TargetAssignKernel : public framework::OpKernel<T> {
const int* neg_idx_data = neg_indices->data<int>();
auto neg_lod = neg_indices->lod().back();
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
paddle::framework::MixVector<size_t> mixv_neg_lod(&neg_lod);
phi::MixVector<size_t> mixv_neg_lod(&neg_lod);
size_t* neg_lod_data = mixv_neg_lod.MutableData(ctx.GetPlace());
#else
size_t* neg_lod_data = neg_lod.data();
......
......@@ -30,11 +30,11 @@
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/phi/core/mixed_vector.h"
#if defined(PADDLE_WITH_CUDA)
namespace cg = cooperative_groups;
......@@ -46,7 +46,7 @@ namespace operators {
using SelectedRows = phi::SelectedRows;
template <typename T>
using Vector = framework::Vector<T>;
using Vector = phi::Vector<T>;
#define WARP_SIZE 32
#define MAX_WARP_NUM 32
......@@ -376,7 +376,7 @@ class FilterByInstagGPUKernel : public framework::OpKernel<T> {
}
const size_t x2_lods_size = x2_lods.size() - 1;
paddle::framework::MixVector<size_t> mixv_x2_lods(&x2_lods);
phi::MixVector<size_t> mixv_x2_lods(&x2_lods);
size_t* x2_lods_data = mixv_x2_lods.CUDAMutableData(gpu_place);
......@@ -401,7 +401,7 @@ class FilterByInstagGPUKernel : public framework::OpKernel<T> {
}
}
paddle::framework::MixVector<size_t> mixv_x1_lods(&x1_lods);
phi::MixVector<size_t> mixv_x1_lods(&x1_lods);
size_t* x1_lods_data = mixv_x1_lods.CUDAMutableData(gpu_place);
auto* x1_data = x1->data<T>();
......@@ -433,12 +433,12 @@ class FilterByInstagGPUKernel : public framework::OpKernel<T> {
Vector<size_t> out_lods(x2_lods_size + 1, 0);
Vector<size_t> map_lods(x2_lods_size + 1, 0);
paddle::framework::MixVector<size_t> mixv_out_lods(&out_lods);
paddle::framework::MixVector<size_t> mixv_map_lods(&map_lods);
phi::MixVector<size_t> mixv_out_lods(&out_lods);
phi::MixVector<size_t> mixv_map_lods(&map_lods);
// thrust::device_vector<size_t> out_idx(1);
Vector<size_t> out_idx(1, 0);
paddle::framework::MixVector<size_t> mixv_out_idx(&out_idx);
phi::MixVector<size_t> mixv_out_idx(&out_idx);
size_t* out_idx_data = mixv_out_idx.CUDAMutableData(gpu_place);
size_t* out_lods_data = mixv_out_lods.CUDAMutableData(gpu_place);
......@@ -500,7 +500,7 @@ class FilterByInstagGPUKernel : public framework::OpKernel<T> {
} else {
Vector<size_t> map_lods(2, 0);
paddle::framework::MixVector<size_t> mixv_map_lods(&map_lods);
phi::MixVector<size_t> mixv_map_lods(&map_lods);
thrust::device_ptr<int64_t> map_data_ptr(map_data);
map_data_ptr[0] = 0;
......
......@@ -23,16 +23,16 @@
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/phi/core/mixed_vector.h"
namespace paddle {
namespace operators {
using SelectedRows = phi::SelectedRows;
template <typename T>
using Vector = framework::Vector<T>;
using Vector = phi::Vector<T>;
template <typename T>
class FilterByInstagKernel : public framework::OpKernel<T> {
......
......@@ -256,7 +256,7 @@ class FusedEmbeddingSeqPoolGradKernel : public framework::OpKernel<T> {
auto lod = ids->lod()[0];
int64_t out_width = d_output->dims()[1];
framework::Vector<int64_t> *new_rows = d_table->mutable_rows();
phi::Vector<int64_t> *new_rows = d_table->mutable_rows();
new_rows->resize(ids_num);
std::memcpy(&(*new_rows)[0], ids_data, ids_num * sizeof(int64_t));
......
......@@ -14,16 +14,16 @@
#include <string>
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/operators/fused/fused_seqpool_cvm_op.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/phi/core/mixed_vector.h"
namespace paddle {
namespace operators {
template <typename T>
using Vector = framework::Vector<T>;
using Vector = phi::Vector<T>;
#define CUDA_KERNEL_LOOP(i, n) \
for (auto i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
......@@ -441,7 +441,7 @@ class FusedSeqpoolCVMCUDAKernel : public framework::OpKernel<T> {
int embedding_size = inputs[0]->numel() / inputs[0]->dims()[0];
int batch_size = -1;
std::vector<paddle::framework::MixVector<size_t> *> mix_lods_v(slot_size);
std::vector<phi::MixVector<size_t> *> mix_lods_v(slot_size);
for (size_t i = 0; i < slot_size; ++i) {
const auto *input = inputs[i];
......@@ -480,7 +480,7 @@ class FusedSeqpoolCVMCUDAKernel : public framework::OpKernel<T> {
}
output_data[i] = reinterpret_cast<T *>(
dev_ctx.Alloc<T>(output, output->numel() * sizeof(T)));
mix_lods_v[i] = new paddle::framework::MixVector<size_t>(&lods);
mix_lods_v[i] = new phi::MixVector<size_t>(&lods);
lods_data[i] = mix_lods_v[i]->CUDAData(ctx.GetPlace());
seqpool_outputs[i].Resize({batch_size, embedding_size});
seqpool_output_data[i] = reinterpret_cast<T *>(dev_ctx.Alloc<T>(
......@@ -527,7 +527,7 @@ class FusedSeqpoolCVMGradCUDAKernel : public framework::OpKernel<T> {
int embedding_size = in_grads[0]->numel() / in_grads[0]->dims()[0];
int batch_size = -1;
std::vector<paddle::framework::MixVector<size_t> *> mix_lods_v(slot_size);
std::vector<phi::MixVector<size_t> *> mix_lods_v(slot_size);
for (size_t i = 0; i < slot_size; ++i) {
auto *in_grad = in_grads[i];
......@@ -563,7 +563,7 @@ class FusedSeqpoolCVMGradCUDAKernel : public framework::OpKernel<T> {
in_grads_data[i] = reinterpret_cast<T *>(
dev_ctx.Alloc<T>(in_grad, in_grad->numel() * sizeof(T)));
mix_lods_v[i] = new paddle::framework::MixVector<size_t>(&lods);
mix_lods_v[i] = new phi::MixVector<size_t>(&lods);
lods_data[i] = mix_lods_v[i]->CUDAData(ctx.GetPlace());
cvm_data[i] = reinterpret_cast<const T *>(cvm->data<T>());
}
......
......@@ -17,11 +17,11 @@ limitations under the License. */
#include <memory>
#include "dnnl.hpp" // NOLINT
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/fused/multi_gru_op.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/phi/core/mixed_vector.h"
namespace paddle {
namespace operators {
......@@ -678,7 +678,7 @@ class MultiGRUHandler {
const std::vector<const phi::DenseTensor*> biases_;
phi::DenseTensor* hidden_;
std::vector<dnnl::primitive_attr> attrs_;
const paddle::framework::Vector<size_t>& x_lod_;
const phi::Vector<size_t>& x_lod_;
};
template <typename T>
......
......@@ -372,7 +372,7 @@ class GRUCPUKernel : public framework::OpKernel<T> {
const_cast<T*>(weight_data + 2 * frame_size * frame_size);
phi::DenseTensor ordered_h0;
framework::Vector<size_t> order(batch_gate->lod()[2]);
phi::Vector<size_t> order(batch_gate->lod()[2]);
if (h0) {
// Since the batch computing for GRU reorders the input sequences
......
......@@ -75,7 +75,7 @@ class GRUKernel : public framework::OpKernel<T> {
const_cast<T*>(weight_data + 2 * frame_size * frame_size);
phi::DenseTensor ordered_h0;
framework::Vector<size_t> order(batch_gate->lod()[2]);
phi::Vector<size_t> order(batch_gate->lod()[2]);
if (h0) {
// Since the batch computing for GRU reorders the input sequences
......
......@@ -28,7 +28,7 @@ namespace operators {
template <typename DeviceContext, typename T>
inline void ReorderInitState(const DeviceContext& ctx,
const phi::DenseTensor& src,
framework::Vector<size_t> index_lod,
phi::Vector<size_t> index_lod,
phi::DenseTensor* dst,
bool indexed_src) {
phi::funcs::CopyMatrixRowsFunctor<DeviceContext, T> row_shuffle;
......@@ -79,7 +79,7 @@ class GRUGradKernel : public framework::OpKernel<T> {
phi::DenseTensor ordered_h0, ordered_h0_grad;
framework::Vector<size_t> order(batch_gate->lod()[2]);
phi::Vector<size_t> order(batch_gate->lod()[2]);
if (h0) {
ReorderInitState<DeviceContext, T>(
......
......@@ -169,12 +169,12 @@ class LookupTableGradCUDAKernel : public framework::OpKernel<T> {
auto stream = dev_ctx.stream();
// copy GPU memory to CPU pinned memory
framework::Vector<int64_t> new_rows;
phi::Vector<int64_t> new_rows;
new_rows.resize(ids_num);
auto gpu_place = context.GetPlace();
// TODO(yuyang18): Strange code here.
paddle::framework::MixVector<int64_t> mixv_new_rows(&new_rows);
phi::MixVector<int64_t> mixv_new_rows(&new_rows);
memory::Copy(gpu_place,
mixv_new_rows.CUDAMutableData(context.GetPlace()),
gpu_place,
......
......@@ -159,11 +159,11 @@ struct LookupTableV2GradCUDAFunctor {
dim3 threads(128, 8);
dim3 grids(8, 1);
auto stream = dev_ctx.stream();
framework::Vector<int64_t> new_rows;
phi::Vector<int64_t> new_rows;
new_rows.resize(ids_num);
auto gpu_place = context_.GetPlace();
paddle::framework::MixVector<int64_t> mixv_new_rows(&new_rows);
phi::MixVector<int64_t> mixv_new_rows(&new_rows);
if (!std::is_same<IdT, int64_t>::value) {
InputTypeConvert<<<grids, threads, 0, stream>>>(
ids_data, ids_num, mixv_new_rows.MutableData(gpu_place));
......
......@@ -27,7 +27,7 @@ namespace operators {
template <typename DeviceContext, typename T>
inline void ReorderInitState(const DeviceContext& ctx,
const phi::DenseTensor& src,
framework::Vector<size_t> index_lod,
phi::Vector<size_t> index_lod,
phi::DenseTensor* dst,
bool indexed_src) {
phi::funcs::CopyMatrixRowsFunctor<DeviceContext, T> row_shuffle;
......@@ -95,7 +95,7 @@ class LSTMKernel : public framework::OpKernel<T> {
lstm_value.prev_state_value = nullptr;
phi::DenseTensor ordered_c0;
framework::Vector<size_t> order(batch_gate->lod()[2]);
phi::Vector<size_t> order(batch_gate->lod()[2]);
if (cell_t0) {
// Since the batch computing for LSTM reorders the input sequence
......@@ -236,7 +236,7 @@ class LSTMGradKernel : public framework::OpKernel<T> {
// ordered_h0_g/c0_g is the reordered gradient of hidden/cell
// initialization.
phi::DenseTensor ordered_h0, ordered_c0, ordered_h0_g, ordered_c0_g;
framework::Vector<size_t> order(batch_gate->lod()[2]);
phi::Vector<size_t> order(batch_gate->lod()[2]);
if (c0) {
ReorderInitState<DeviceContext, T>(
......
......@@ -70,7 +70,7 @@ class _ClipGradFunctor {
template <typename DeviceContext, typename T>
inline void ReorderInitState(const DeviceContext& ctx,
const phi::DenseTensor& src,
framework::Vector<size_t> index,
phi::Vector<size_t> index,
phi::DenseTensor* dst,
bool indexed_src) {
phi::funcs::CopyMatrixRowsFunctor<DeviceContext, T> row_shuffle;
......@@ -158,7 +158,7 @@ class LSTMPKernel : public framework::OpKernel<T> {
phi::DenseTensor ordered_c0;
phi::DenseTensor ordered_h0;
framework::Vector<size_t> order(batch_gate->lod()[2]);
phi::Vector<size_t> order(batch_gate->lod()[2]);
if (cell_t0) {
// Since the batch computing for LSTMP reorders the input sequence
......@@ -350,7 +350,7 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
// initialization.
phi::DenseTensor ordered_h0, ordered_c0, ordered_h0_g, ordered_c0_g;
framework::Vector<size_t> order(batch_gate->lod()[2]);
phi::Vector<size_t> order(batch_gate->lod()[2]);
if (c0) {
ReorderInitState<DeviceContext, T>(
......
......@@ -446,8 +446,8 @@ class BeamSearchFunctor<phi::GPUContext, T> {
framework::LoD selected_lod(2);
selected_lod[0].assign(abs_lod[level].begin(), abs_lod[level].end());
selected_lod[1].resize(scores->dims()[0] + 1);
paddle::framework::MixVector<size_t> mix_vector(&selected_lod[1]);
paddle::framework::MixVector<size_t> mixv_abs(&abs_lod[level]);
phi::MixVector<size_t> mix_vector(&selected_lod[1]);
phi::MixVector<size_t> mixv_abs(&abs_lod[level]);
size_t* selected_offsets = mix_vector.CUDAMutableData(context.GetPlace());
if (num_seqs == 1) {
......
......@@ -28,7 +28,7 @@ namespace math {
template <typename T>
void CopyValidData(phi::DenseTensor* dst_tensor,
const phi::DenseTensor* src_tensor,
const framework::Vector<size_t>& seq_offsets,
const phi::Vector<size_t>& seq_offsets,
int pad_seq_len,
int step_width,
bool norm_by_len,
......
......@@ -124,7 +124,7 @@ class PaddingLoDTensorFunctor<phi::GPUContext, T> {
T* pad_data = pad_tensor->data<T>();
const T* pad_value_data = pad_value.data<T>();
paddle::framework::MixVector<size_t> mix_vector_seq_offsets(&seq_offsets);
phi::MixVector<size_t> mix_vector_seq_offsets(&seq_offsets);
SequencePaddingKernel<T, kSeqToPad><<<grid, threads, 0, context.stream()>>>(
pad_data,
seq_data,
......@@ -191,7 +191,7 @@ class UnpaddingLoDTensorFunctor<phi::GPUContext, T> {
const T* pad_data = pad_tensor.data<T>();
T* seq_data = seq_tensor->data<T>();
paddle::framework::MixVector<size_t> mixv_seq_offsets(&seq_offsets);
phi::MixVector<size_t> mixv_seq_offsets(&seq_offsets);
SequencePaddingKernel<T, kPadToSeq><<<grid, threads, 0, context.stream()>>>(
seq_data,
pad_data,
......
......@@ -29,7 +29,7 @@ enum PadLayout { kBatchLengthWidth = 0, kLengthBatchWidth };
enum CopyType { kSeqToPad, kPadToSeq };
inline static size_t MaximumSequenceLength(
const framework::Vector<size_t>& seq_offset) {
const phi::Vector<size_t>& seq_offset) {
size_t seq_num = seq_offset.size() - 1;
size_t max_seq_len = 0;
for (size_t i = 0; i < seq_num; ++i) {
......@@ -39,7 +39,7 @@ inline static size_t MaximumSequenceLength(
}
inline static size_t TotalSequenceLength(
const framework::Vector<size_t>& seq_offset) {
const phi::Vector<size_t>& seq_offset) {
size_t seq_num = seq_offset.size() - 1;
size_t total_seq_len = 0;
for (size_t i = 0; i < seq_num; ++i) {
......@@ -50,7 +50,7 @@ inline static size_t TotalSequenceLength(
inline static void CheckDims(const framework::DDim& seq_tensor_dims,
const framework::DDim& pad_tensor_dims,
const framework::Vector<size_t>& seq_offset,
const phi::Vector<size_t>& seq_offset,
int64_t padded_seq_len,
int64_t step_width,
const PadLayout& layout) {
......
......@@ -203,7 +203,7 @@ class SequencePoolFunctor<phi::GPUContext, T> {
const size_t item_dim = output->numel() / output->dims()[0];
dim3 threads(1024, 1);
dim3 grid(std::max(static_cast<int>(lod.size()) - 1, 1), 1);
paddle::framework::MixVector<size_t> mix_vector(&lod);
phi::MixVector<size_t> mix_vector(&lod);
if (pooltype == "MAX") {
sequence_pool_kernel<T, MaxPoolFunctor<T>>
<<<grid, threads, 0, context.stream()>>>(
......@@ -421,7 +421,7 @@ class SequencePoolGradFunctor<phi::GPUContext, T> {
const size_t item_dim = in_grad->numel() / in_grad->dims()[0];
dim3 threads(1024, 1);
dim3 grid(std::max(static_cast<int>(lod.size()) - 1, 1), 1);
paddle::framework::MixVector<size_t> mix_vector(&lod);
phi::MixVector<size_t> mix_vector(&lod);
if (pooltype == "MAX") {
sequence_pool_grad_kernel<T, MaxPoolGradFunctor<T>>
<<<grid, threads, 0, context.stream()>>>(
......
......@@ -197,7 +197,7 @@ class FTRLOpKernel : public framework::OpKernel<T> {
ctx.template device_context<DeviceContext>(), *grad, merged_grad);
auto* merged_rows = merged_grad->mutable_rows();
paddle::framework::MixVector<int64_t> mixv_merged_rows(merged_rows);
phi::MixVector<int64_t> mixv_merged_rows(merged_rows);
const int64_t* rows = mixv_merged_rows.Data(ctx.GetPlace());
auto row_numel = static_cast<int64_t>(merged_grad->value().dims()[1]);
auto row_height = static_cast<int64_t>(merged_grad->rows().size());
......
......@@ -164,7 +164,7 @@ class SGDOpKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
int thread_x = kThreadsPerBlock;
int max_threads = ctx.cuda_device_context().GetMaxPhysicalThreadCount();
int max_blocks = std::max(max_threads / kThreadsPerBlock, 1);
paddle::framework::MixVector<int64_t> mixv_in_rows(&in_rows);
phi::MixVector<int64_t> mixv_in_rows(&in_rows);
SparseSGDFunctorKernel<<<max_blocks,
thread_x,
0,
......
......@@ -153,7 +153,7 @@ class RowConvKernel<phi::CPUContext, T> : public framework::OpKernel<T> {
} else {
batch_size = x->lod()[0].size() - 1;
}
framework::Vector<size_t> batch_indices(batch_size + 1);
phi::Vector<size_t> batch_indices(batch_size + 1);
int input_dim = 0;
int timesteps = 0;
if (is_tensor) {
......@@ -231,7 +231,7 @@ class RowConvGradKernel<phi::CPUContext, T> : public framework::OpKernel<T> {
} else {
batch_size = x->lod()[0].size() - 1;
}
framework::Vector<size_t> batch_indices(batch_size + 1);
phi::Vector<size_t> batch_indices(batch_size + 1);
int timesteps = 0;
int input_dim = 0;
if (is_tensor) {
......
......@@ -338,7 +338,7 @@ class RowConvKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
batch_size = X->lod()[0].size() - 1;
}
int input_dim = 0;
framework::Vector<size_t> batch_indices(batch_size + 1);
phi::Vector<size_t> batch_indices(batch_size + 1);
int timesteps = X->dims()[1];
if (is_tensor) {
for (int i = 0; i < batch_size + 1; i++) {
......@@ -352,7 +352,7 @@ class RowConvKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
int num_sequence = batch_indices.size() - 1;
int future_context = Filter->dims()[0];
paddle::framework::MixVector<size_t> mix_vector(&batch_indices);
phi::MixVector<size_t> mix_vector(&batch_indices);
size_t *idx = mix_vector.CUDAMutableData(context.GetPlace());
auto stream = context.cuda_device_context().stream();
......@@ -397,7 +397,7 @@ class RowConvGradKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
}
int input_dim = 0;
framework::Vector<size_t> batch_indices(batch_size + 1);
phi::Vector<size_t> batch_indices(batch_size + 1);
int timesteps = X->dims()[1];
if (is_tensor) {
for (int i = 0; i < batch_size + 1; i++) {
......@@ -411,7 +411,7 @@ class RowConvGradKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
// int input_dim = X->dims()[1];
int num_sequence = batch_indices.size() - 1;
int future_context = Filter->dims()[0];
paddle::framework::MixVector<size_t> mixv_batch_indices(&batch_indices);
phi::MixVector<size_t> mixv_batch_indices(&batch_indices);
size_t *idx = mixv_batch_indices.CUDAMutableData(context.GetPlace());
auto &device_ctx = context.cuda_device_context();
......
......@@ -76,7 +76,7 @@ class SequenceEnumerateOpCUDAKernel : public framework::OpKernel<T> {
out->Resize({in_dims[0], win_size});
auto out_data = out->mutable_data<T>(context.GetPlace());
// Copy LoD to GPU
paddle::framework::MixVector<size_t> mixv_lod0(&lod0);
phi::MixVector<size_t> mixv_lod0(&lod0);
const size_t* dev_in_lod_ptr = mixv_lod0.CUDAData(context.GetPlace());
// Calc output tensor
CalcOutPut<<<(in_len - 1) / PADDLE_CUDA_NUM_THREADS + 1,
......
......@@ -97,7 +97,7 @@ class SequenceEraseOpCUDAKernel : public framework::OpKernel<T> {
// Copy LoD to GPU
auto last_lod = lod[lod.size() - 1];
auto lod_len = last_lod.size();
paddle::framework::MixVector<size_t> mixv_last_lod(&last_lod);
phi::MixVector<size_t> mixv_last_lod(&last_lod);
const size_t* dev_in_lod_ptr = mixv_last_lod.CUDAData(ctx.GetPlace());
// Calc output LoD
thrust::device_vector<size_t> dev_out_lod(lod_len);
......
......@@ -65,10 +65,9 @@ static __global__ void sequence_expand_as_grad_kernel(
template <typename T>
struct SequenceExpandAsFunctor<phi::GPUContext, T> {
void operator()(
const phi::GPUContext &context,
void operator()(const phi::GPUContext &context,
const phi::DenseTensor &x,
const framework::Vector<size_t> &ref_lod, /*expand referenced lod*/
const phi::Vector<size_t> &ref_lod, /*expand referenced lod*/
phi::DenseTensor *out) {
int height = x.dims()[0];
int width = phi::product(x.dims()) / height;
......@@ -84,7 +83,7 @@ struct SequenceExpandAsFunctor<phi::GPUContext, T> {
dim3 block_size(thread_x);
dim3 grid_size(block_x);
paddle::framework::MixVector<size_t> mixv_ref_lod(&ref_lod);
phi::MixVector<size_t> mixv_ref_lod(&ref_lod);
sequence_expand_as_kernel<<<grid_size, block_size, 0, context.stream()>>>(
x.data<T>(),
mixv_ref_lod.CUDAData(context.GetPlace()),
......@@ -98,7 +97,7 @@ template <typename T>
struct SequenceExpandAsGradFunctor<phi::GPUContext, T> {
void operator()(const phi::GPUContext &context,
const phi::DenseTensor &dout,
const framework::Vector<size_t> &ref_lod, /*expand based lod*/
const phi::Vector<size_t> &ref_lod, /*expand based lod*/
phi::DenseTensor *dx) {
int height = dx->dims()[0];
int width = phi::product(dx->dims()) / height;
......@@ -114,7 +113,7 @@ struct SequenceExpandAsGradFunctor<phi::GPUContext, T> {
dim3 block_size(thread_x);
dim3 grid_size(block_x);
paddle::framework::MixVector<size_t> mixv_ref_lod(&ref_lod);
phi::MixVector<size_t> mixv_ref_lod(&ref_lod);
sequence_expand_as_grad_kernel<<<grid_size,
block_size,
0,
......
......@@ -26,28 +26,25 @@ namespace operators {
template <typename DeviceContext, typename T>
struct SequenceExpandAsFunctor {
void operator()(
const DeviceContext &ctx,
void operator()(const DeviceContext &ctx,
const phi::DenseTensor &x,
const framework::Vector<size_t> &ref_lod, /*expand referenced lod*/
const phi::Vector<size_t> &ref_lod, /*expand referenced lod*/
phi::DenseTensor *out);
};
template <typename DeviceContext, typename T>
struct SequenceExpandAsGradFunctor {
void operator()(
const DeviceContext &ctx,
void operator()(const DeviceContext &ctx,
const phi::DenseTensor &dout,
const framework::Vector<size_t> &ref_lod, /*expand referenced lod*/
const phi::Vector<size_t> &ref_lod, /*expand referenced lod*/
phi::DenseTensor *dx);
};
template <typename T>
struct SequenceExpandAsFunctor<phi::CPUContext, T> {
void operator()(
const phi::CPUContext &context,
void operator()(const phi::CPUContext &context,
const phi::DenseTensor &x,
const framework::Vector<size_t> &ref_lod, /*expand referenced lod*/
const phi::Vector<size_t> &ref_lod, /*expand referenced lod*/
phi::DenseTensor *out) {
int64_t height = x.dims()[0];
int64_t width = phi::product(x.dims()) / height;
......@@ -122,10 +119,9 @@ class SequenceExpandAsKernel : public framework::OpKernel<T> {
* */
template <typename T>
struct SequenceExpandAsGradFunctor<phi::CPUContext, T> {
void operator()(
const phi::CPUContext &context,
void operator()(const phi::CPUContext &context,
const phi::DenseTensor &dout,
const framework::Vector<size_t> &ref_lod, /*expand referenced lod*/
const phi::Vector<size_t> &ref_lod, /*expand referenced lod*/
phi::DenseTensor *dx) {
int64_t height = dx->dims()[0];
int64_t width = phi::product(dx->dims()) / height;
......
......@@ -82,9 +82,9 @@ __global__ void sequence_expand_grad_kernel(const T* dout_data,
}
}
void GetOutputOffset(const framework::Vector<size_t>& x_lod,
const framework::Vector<size_t>& ref_lod,
framework::Vector<size_t>* out_offset) {
void GetOutputOffset(const phi::Vector<size_t>& x_lod,
const phi::Vector<size_t>& ref_lod,
phi::Vector<size_t>* out_offset) {
size_t offset = 0;
int lod_size = static_cast<int>(x_lod.size());
for (int i = 0; i < static_cast<int>(x_lod.size()); ++i) {
......@@ -99,8 +99,8 @@ template <typename T>
static int ExpandByMemoryCopy(const phi::GPUContext& context,
const LoDTensor& x,
LoDTensor* out,
const framework::Vector<size_t>& x_lod,
const framework::Vector<size_t>& ref_lod,
const phi::Vector<size_t>& x_lod,
const phi::Vector<size_t>& ref_lod,
bool do_copy) {
auto out_data = out->data<T>();
auto x_data = x.data<T>();
......@@ -143,11 +143,10 @@ static int ExpandByMemoryCopy(const phi::GPUContext& context,
template <typename T>
struct SequenceExpandFunctor<phi::GPUContext, T> {
void operator()(
const phi::GPUContext& context,
void operator()(const phi::GPUContext& context,
const LoDTensor& x,
const framework::Vector<size_t>& x_lod, /*expand source lod*/
const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/
const phi::Vector<size_t>& x_lod, /*expand source lod*/
const phi::Vector<size_t>& ref_lod, /*expand referenced lod*/
LoDTensor* out) {
int num_copys =
ExpandByMemoryCopy<T>(context, x, out, x_lod, ref_lod, false);
......@@ -157,7 +156,7 @@ struct SequenceExpandFunctor<phi::GPUContext, T> {
} else {
int x_item_length = x.numel() / x.dims()[0];
size_t x_lod_size = x_lod.size();
framework::Vector<size_t> out_offset(x_lod_size * 2 + ref_lod.size());
phi::Vector<size_t> out_offset(x_lod_size * 2 + ref_lod.size());
GetOutputOffset(x_lod, ref_lod, &out_offset);
for (size_t i = 0; i < x_lod_size; ++i) {
......@@ -167,7 +166,7 @@ struct SequenceExpandFunctor<phi::GPUContext, T> {
out_offset[2 * x_lod_size + i] = ref_lod[i];
}
paddle::framework::MixVector<size_t> mixv_out_offset(&out_offset);
phi::MixVector<size_t> mixv_out_offset(&out_offset);
const size_t* out_offset_data =
mixv_out_offset.CUDAData(context.GetPlace());
const size_t* x_lod_data = out_offset_data + x_lod_size;
......@@ -197,11 +196,11 @@ template <typename T>
struct SequenceExpandGradFunctor<phi::GPUContext, T> {
void operator()(const phi::GPUContext& context,
const LoDTensor& dout,
const framework::Vector<size_t>& x_lod, /*expand source lod*/
const framework::Vector<size_t>& ref_lod, /*expand based lod*/
const phi::Vector<size_t>& x_lod, /*expand source lod*/
const phi::Vector<size_t>& ref_lod, /*expand based lod*/
LoDTensor* dx) {
int x_item_length = phi::product(dx->dims()) / dx->dims()[0];
framework::Vector<size_t> out_offset(x_lod.size());
phi::Vector<size_t> out_offset(x_lod.size());
GetOutputOffset(x_lod, ref_lod, &out_offset);
int thread_x = std::min(32, std::max(static_cast<int>(ref_lod.size()), 16));
......@@ -210,9 +209,9 @@ struct SequenceExpandGradFunctor<phi::GPUContext, T> {
int block_x = static_cast<int>(ref_lod.size());
dim3 block_size(thread_x, thread_y, thread_z);
dim3 grid_size(block_x, 1);
paddle::framework::MixVector<size_t> mixv_ref_lod(&ref_lod);
paddle::framework::MixVector<size_t> mixv_x_lod(&x_lod);
paddle::framework::MixVector<size_t> mixv_out_offset(&out_offset);
phi::MixVector<size_t> mixv_ref_lod(&ref_lod);
phi::MixVector<size_t> mixv_x_lod(&x_lod);
phi::MixVector<size_t> mixv_out_offset(&out_offset);
sequence_expand_grad_kernel<<<grid_size, block_size, 0, context.stream()>>>(
dout.data<T>(),
mixv_ref_lod.CUDAData(context.GetPlace()),
......
......@@ -29,31 +29,28 @@ using EigenMatrix = phi::EigenMatrix<T, MajorType, IndexType>;
template <typename DeviceContext, typename T>
struct SequenceExpandFunctor {
void operator()(
const DeviceContext& ctx,
void operator()(const DeviceContext& ctx,
const phi::DenseTensor& x,
const framework::Vector<size_t>& x_lod, /*expand source lod*/
const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/
const phi::Vector<size_t>& x_lod, /*expand source lod*/
const phi::Vector<size_t>& ref_lod, /*expand referenced lod*/
phi::DenseTensor* out);
};
template <typename DeviceContext, typename T>
struct SequenceExpandGradFunctor {
void operator()(
const DeviceContext& ctx,
void operator()(const DeviceContext& ctx,
const phi::DenseTensor& dout,
const framework::Vector<size_t>& x_lod, /*expand source lod*/
const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/
const phi::Vector<size_t>& x_lod, /*expand source lod*/
const phi::Vector<size_t>& ref_lod, /*expand referenced lod*/
phi::DenseTensor* dx);
};
template <typename T>
struct SequenceExpandFunctor<phi::CPUContext, T> {
void operator()(
const phi::CPUContext& context,
void operator()(const phi::CPUContext& context,
const phi::DenseTensor& x,
const framework::Vector<size_t>& x_lod, /*expand source lod*/
const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/
const phi::Vector<size_t>& x_lod, /*expand source lod*/
const phi::Vector<size_t>& ref_lod, /*expand referenced lod*/
phi::DenseTensor* out) {
int out_offset = 0;
int x_item_length = x.numel() / x.dims()[0];
......@@ -112,7 +109,7 @@ class SequenceExpandKernel : public framework::OpKernel<T> {
}
// x lod level is at most 1.
framework::Vector<size_t> out_lod;
phi::Vector<size_t> out_lod;
if (x_lod.size() == 1) {
out_lod.push_back(0);
int out_offset = 0;
......@@ -130,7 +127,7 @@ class SequenceExpandKernel : public framework::OpKernel<T> {
auto& ref_lod = *out->mutable_lod();
ref_lod[0] = out_lod;
}
framework::Vector<size_t> ref_x_lod;
phi::Vector<size_t> ref_x_lod;
if (x->lod().size() == 1) {
ref_x_lod = x->lod()[0];
} else {
......@@ -161,11 +158,10 @@ class SequenceExpandKernel : public framework::OpKernel<T> {
* */
template <typename T>
struct SequenceExpandGradFunctor<phi::CPUContext, T> {
void operator()(
const phi::CPUContext& context,
void operator()(const phi::CPUContext& context,
const phi::DenseTensor& dout,
const framework::Vector<size_t>& x_lod, /*expand source lod*/
const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/
const phi::Vector<size_t>& x_lod, /*expand source lod*/
const phi::Vector<size_t>& ref_lod, /*expand referenced lod*/
phi::DenseTensor* dx) {
int dout_offset = 0;
for (size_t i = 1; i < ref_lod.size(); ++i) {
......@@ -214,8 +210,8 @@ class SequenceExpandGradKernel : public framework::OpKernel<T> {
return;
}
framework::Vector<size_t> ref_x_lod;
framework::Vector<size_t> ref_lod = y_lod[ref_level];
phi::Vector<size_t> ref_x_lod;
phi::Vector<size_t> ref_lod = y_lod[ref_level];
if (x->lod().size() == 1) {
ref_x_lod = x->lod()[0];
} else {
......
......@@ -139,7 +139,7 @@ class SequenceReverseOpKernel : public framework::OpKernel<T> {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if (platform::is_gpu_place(ctx.GetPlace())) {
auto xlod = x.lod()[0];
paddle::framework::MixVector<size_t> mixv_xlod(&xlod);
phi::MixVector<size_t> mixv_xlod(&xlod);
lod = mixv_xlod.CUDAData(ctx.GetPlace());
} else {
#endif
......
......@@ -124,7 +124,7 @@ template <typename T>
struct SequenceSoftmaxFunctor<phi::GPUContext, T> {
void operator()(const phi::GPUContext &context,
const LoDTensor &x,
const framework::Vector<size_t> &ref_lod, /*referenced lod*/
const phi::Vector<size_t> &ref_lod, /*referenced lod*/
LoDTensor *out) {
int height = ref_lod.size() - 1;
......@@ -135,7 +135,7 @@ struct SequenceSoftmaxFunctor<phi::GPUContext, T> {
dim3 block_size(thread_x);
dim3 grid_size(max_blocks);
paddle::framework::MixVector<size_t> mixv_ref_lod(&ref_lod);
phi::MixVector<size_t> mixv_ref_lod(&ref_lod);
sequence_softmax_kernel<T, kThreadsPerBlock>
<<<grid_size, block_size, 0, context.stream()>>>(
x.data<T>(),
......@@ -150,7 +150,7 @@ struct SequenceSoftmaxGradFunctor<phi::GPUContext, T> {
void operator()(const phi::GPUContext &context,
const LoDTensor &dout,
const LoDTensor &out,
const framework::Vector<size_t> &ref_lod, /*referenced lod*/
const phi::Vector<size_t> &ref_lod, /*referenced lod*/
LoDTensor *dx) {
size_t height = ref_lod.size() - 1;
......@@ -162,7 +162,7 @@ struct SequenceSoftmaxGradFunctor<phi::GPUContext, T> {
dim3 block_size(thread_x);
dim3 grid_size(max_blocks);
paddle::framework::MixVector<size_t> mixv_ref_lod(&ref_lod);
phi::MixVector<size_t> mixv_ref_lod(&ref_lod);
sequence_softmax_grad_kernel<T, kThreadsPerBlock>
<<<grid_size, block_size, 0, context.stream()>>>(
dout.data<T>(),
......
......@@ -21,10 +21,9 @@ namespace operators {
template <typename DeviceContext, typename T>
struct SequenceSoftmaxFunctor {
void operator()(
const DeviceContext &ctx,
void operator()(const DeviceContext &ctx,
const phi::DenseTensor &x,
const framework::Vector<size_t> &ref_lod, /*expand referenced lod*/
const phi::Vector<size_t> &ref_lod, /*expand referenced lod*/
phi::DenseTensor *out);
};
......@@ -33,7 +32,7 @@ struct SequenceSoftmaxGradFunctor {
void operator()(const DeviceContext &ctx,
const phi::DenseTensor &dout,
const phi::DenseTensor &out,
const framework::Vector<size_t> &ref_lod, /*referenced lod*/
const phi::Vector<size_t> &ref_lod, /*referenced lod*/
phi::DenseTensor *dx);
};
......@@ -41,7 +40,7 @@ template <typename T>
struct SequenceSoftmaxFunctor<phi::CPUContext, T> {
void operator()(const phi::CPUContext &ctx,
const phi::DenseTensor &x,
const framework::Vector<size_t> &ref_lod, /*referenced lod*/
const phi::Vector<size_t> &ref_lod, /*referenced lod*/
phi::DenseTensor *out) {
size_t height = ref_lod.size() - 1;
const T *in_data = x.data<T>();
......@@ -64,7 +63,7 @@ struct SequenceSoftmaxGradFunctor<phi::CPUContext, T> {
void operator()(const phi::CPUContext &ctx,
const phi::DenseTensor &dout,
const phi::DenseTensor &out,
const framework::Vector<size_t> &ref_lod, /*referenced lod*/
const phi::Vector<size_t> &ref_lod, /*referenced lod*/
phi::DenseTensor *dx) {
size_t height = ref_lod.size() - 1;
......
......@@ -116,7 +116,7 @@ class SequenceTopkAvgPoolingKernel : public framework::OpKernel<T> {
auto pos_data = pos->mutable_data<int>(context.GetPlace());
int offset = 0;
framework::Vector<size_t> vec_out_lod;
phi::Vector<size_t> vec_out_lod;
vec_out_lod.reserve(batch_size + 1);
for (int i = 0; i <= batch_size; ++i) {
offset = row_lod[i];
......
......@@ -25,16 +25,16 @@
#include "glog/logging.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/timer.h"
#include "paddle/phi/core/mixed_vector.h"
namespace paddle {
namespace operators {
template <typename T>
using Vector = framework::Vector<T>;
using Vector = phi::Vector<T>;
template <typename T>
class ShuffleBatchKernel : public framework::OpKernel<T> {
......
......@@ -22,8 +22,8 @@
#include <vector>
#include "gflags/gflags.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/core/mixed_vector.h"
namespace paddle {
namespace operators {
......
......@@ -22,9 +22,9 @@
#include <vector>
#include "gflags/gflags.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/sampler.h"
#include "paddle/phi/core/mixed_vector.h"
namespace paddle {
namespace operators {
......
......@@ -1095,7 +1095,7 @@ void BindTensor(pybind11::module &m) { // NOLINT
#if !defined(PADDLE_WITH_CUDA) && !defined(PADDLE_WITH_HIP)
self.set_rows(rows);
#else
Vector<int64_t> new_rows(rows);
std::vector<int64_t> new_rows(rows);
self.set_rows(new_rows);
#endif
})
......
......@@ -114,6 +114,11 @@ cc_library(
SRCS custom_kernel.cc
DEPS kernel_factory)
cc_library(
mixed_vector
SRCS mixed_vector.cc
DEPS device_context place memory)
# Will remove once we implemented MKLDNN_Tensor
if(WITH_MKLDNN)
add_dependencies(dense_tensor mkldnn)
......
......@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/phi/core/mixed_vector.h"
#include <algorithm>
#include <initializer_list>
......@@ -22,28 +22,26 @@ limitations under the License. */
#include <vector>
#include "glog/logging.h"
#include "paddle/fluid/framework/details/cow_ptr.h"
#include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/phi/backends/all_context.h"
#include "paddle/utils/none.h"
#include "paddle/utils/optional.h"
namespace paddle {
namespace framework {
namespace phi {
template <typename T>
void CopyToCPUHelper(std::vector<T> *cpu_,
paddle::memory::AllocationPtr *gpu_,
phi::Allocator::AllocationPtr *gpu_,
size_t *gpu_memory_size_) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
// COPY GPU Data To CPU
auto *dev_ctx = static_cast<phi::GPUContext *>(
platform::DeviceContextPool::Instance().Get((*gpu_)->place()));
phi::DeviceContextPool::Instance().Get((*gpu_)->place()));
auto stream = dev_ctx->stream();
void *src = (*gpu_)->ptr();
void *dst = cpu_->data();
paddle::memory::Copy(platform::CPUPlace(),
paddle::memory::Copy(phi::CPUPlace(),
dst,
OptionalCUDAPlace(*gpu_).get(),
src,
......@@ -55,20 +53,20 @@ void CopyToCPUHelper(std::vector<T> *cpu_,
template <typename T>
void CopyCPUDataToCUDAHelper(std::vector<T> *cpu_,
paddle::memory::AllocationPtr *gpu_,
phi::Allocator::AllocationPtr *gpu_,
size_t *gpu_memory_size_,
const platform::Place &place) {
const phi::Place &place) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
void *src = cpu_->data();
*gpu_memory_size_ = cpu_->size() * sizeof(T); // sizeof(T)
(*gpu_) = memory::Alloc(place, *gpu_memory_size_);
(*gpu_) = paddle::memory::Alloc(place, *gpu_memory_size_);
void *dst = (*gpu_)->ptr();
auto *dev_ctx = static_cast<phi::GPUContext *>(
platform::DeviceContextPool::Instance().Get(place));
phi::DeviceContextPool::Instance().Get(place));
auto stream = dev_ctx->stream();
paddle::memory::Copy(OptionalCUDAPlace(*gpu_).get(),
dst,
platform::CPUPlace(),
phi::CPUPlace(),
src,
*gpu_memory_size_,
stream);
......@@ -84,7 +82,7 @@ void CopyCPUDataToCUDAHelper(std::vector<T> *cpu_,
\
template <> \
void MixVector<__TYPE__>::VectorData::CopyCPUDataToCUDA( \
const platform::Place &place) const { \
const phi::Place &place) const { \
CopyCPUDataToCUDAHelper<__TYPE__>(cpu_, &gpu_, &gpu_memory_size_, place); \
}
......@@ -92,5 +90,4 @@ INSTANTIATE_VECTOR_FOR_TYPE(size_t)
INSTANTIATE_VECTOR_FOR_TYPE(int)
INSTANTIATE_VECTOR_FOR_TYPE(int64_t)
}; // namespace framework
} // namespace paddle
}; // namespace phi
......@@ -22,20 +22,22 @@ limitations under the License. */
#include <vector>
#include "glog/logging.h"
#include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/allocator.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/errors.h"
#include "paddle/utils/none.h"
#include "paddle/utils/optional.h"
namespace paddle {
namespace framework {
namespace phi {
template <class T>
using Vector = std::vector<T>;
inline paddle::optional<platform::CUDAPlace> OptionalCUDAPlace(
const paddle::memory::allocation::AllocationPtr &gpu_) {
inline paddle::optional<phi::GPUPlace> OptionalCUDAPlace(
const phi::Allocator::AllocationPtr &gpu_) {
return gpu_ == nullptr ? paddle::none
: paddle::optional<platform::CUDAPlace>(gpu_->place());
: paddle::optional<phi::GPUPlace>(gpu_->place());
}
// Vector<T> implements the std::vector interface, and can get Data or
......@@ -146,18 +148,18 @@ class MixVector {
}
// get cuda ptr. immutable
const T *CUDAData(platform::Place place) const {
const T *CUDAData(phi::Place place) const {
PADDLE_ENFORCE_EQ(
platform::is_gpu_place(place),
place.GetType() == phi::AllocationType::GPU,
true,
platform::errors::Unavailable(
phi::errors::Unavailable(
"Place mismatch, CUDA Data must be on CUDA place."));
ImmutableCUDA(place);
return reinterpret_cast<T *>(gpu_->ptr());
}
// get cuda ptr. mutable
T *CUDAMutableData(platform::Place place) {
T *CUDAMutableData(phi::Place place) {
const T *ptr = CUDAData(place);
flag_ = kDirty | kDataInCUDA;
return const_cast<T *>(ptr);
......@@ -178,7 +180,7 @@ class MixVector {
std::mutex &Mutex() const { return mtx_; }
paddle::optional<platform::CUDAPlace> CUDAPlace() const {
paddle::optional<phi::GPUPlace> CUDAPlace() const {
return OptionalCUDAPlace(gpu_);
}
......@@ -199,7 +201,7 @@ class MixVector {
void CopyToCPU() const;
void ImmutableCUDA(platform::Place place) const {
void ImmutableCUDA(phi::Place place) const {
if (IsDirty()) {
if (IsInCPU()) {
CopyCPUDataToCUDA(place);
......@@ -207,7 +209,7 @@ class MixVector {
SetFlag(kDataInCUDA);
} else if (IsInCUDA() && !(place == gpu_->place())) {
PADDLE_THROW(
platform::errors::Unavailable("Unexpected data place mismatch."));
phi::errors::Unavailable("Unexpected data place mismatch."));
// Still dirty
} else {
// Dirty && DataInCUDA && Device is same
......@@ -220,7 +222,7 @@ class MixVector {
SetFlag(kDataInCUDA);
} else if (!(place == gpu_->place())) {
PADDLE_THROW(
platform::errors::Unavailable("Unexpected data place mismatch."));
phi::errors::Unavailable("Unexpected data place mismatch."));
} else {
// Not Dirty && DataInCUDA && Device is same
// Do nothing.
......@@ -228,7 +230,7 @@ class MixVector {
}
}
void CopyCPUDataToCUDA(const platform::Place &place) const;
void CopyCPUDataToCUDA(const phi::Place &place) const;
void ImmutableCPU() const {
if (IsDirty() && !IsInCPU()) { // If data has been changed in CUDA, or
......@@ -249,7 +251,7 @@ class MixVector {
bool IsInCPU() const { return flag_ & kDataInCPU; }
std::vector<T> *cpu_;
mutable paddle::memory::allocation::AllocationPtr gpu_;
mutable phi::Allocator::AllocationPtr gpu_;
mutable size_t gpu_memory_size_{0};
mutable int flag_;
......@@ -332,9 +334,9 @@ class MixVector {
}
// get cuda ptr. immutable
const T *CUDAData(platform::Place place) const {
const T *CUDAData(phi::Place place) const {
{
platform::CUDAPlace p(place.GetDeviceId());
phi::GPUPlace p(place.GetDeviceId());
auto &mtx = m_->Mutex();
std::lock_guard<std::mutex> guard(mtx);
auto cuda_place = m_->CUDAPlace();
......@@ -348,9 +350,9 @@ class MixVector {
}
// get cuda ptr. mutable
T *CUDAMutableData(platform::Place place) {
T *CUDAMutableData(phi::Place place) {
{
platform::CUDAPlace p(place.GetDeviceId());
phi::GPUPlace p(place.GetDeviceId());
auto &mtx = m_->Mutex();
std::lock_guard<std::mutex> guard(mtx);
auto cuda_place = m_->CUDAPlace();
......@@ -372,8 +374,8 @@ class MixVector {
void reserve(size_t size) { m_->reserve(size); }
// the unify method to access CPU or CUDA data. immutable.
const T *Data(platform::Place place) const {
if (platform::is_gpu_place(place)) {
const T *Data(phi::Place place) const {
if (place.GetType() == phi::AllocationType::GPU) {
return CUDAData(place);
} else {
return data();
......@@ -381,8 +383,8 @@ class MixVector {
}
// the unify method to access CPU or CUDA data. mutable.
T *MutableData(platform::Place place) {
if (platform::is_gpu_place(place)) {
T *MutableData(phi::Place place) {
if (place.GetType() == phi::AllocationType::GPU) {
return CUDAMutableData(place);
} else {
return data();
......@@ -397,5 +399,4 @@ class MixVector {
mutable std::unique_ptr<VectorData> m_;
};
}; // namespace framework
} // namespace paddle
}; // namespace phi
......@@ -14,10 +14,10 @@
#include "paddle/phi/kernels/edit_distance_kernel.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/common/complex.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/mixed_vector.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
namespace phi {
......@@ -34,8 +34,8 @@ void EditDistanceKernel(const Context& ctx,
int64_t* seq_num_data = ctx.template Alloc<int64_t>(sequencenum);
auto batch_size = hyps.dims()[0];
paddle::framework::Vector<size_t> hyp_lod(batch_size + 1);
paddle::framework::Vector<size_t> ref_lod(batch_size + 1);
phi::Vector<size_t> hyp_lod(batch_size + 1);
phi::Vector<size_t> ref_lod(batch_size + 1);
bool use_length = hypslength.get_ptr() != nullptr;
......
......@@ -14,8 +14,8 @@ limitations under the License. */
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/platform/device/device_wrapper.h"
#include "paddle/phi/core/mixed_vector.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/phi/backends/onednn/axpy_handler.h"
......@@ -200,7 +200,7 @@ struct SelectedRowsAddTo<phi::CPUContext, T> {
auto* in2_value = input2->mutable_value();
// concat rows
paddle::framework::MixVector<int64_t> mixv_in2_rows(&in2_rows);
phi::MixVector<int64_t> mixv_in2_rows(&in2_rows);
mixv_in2_rows.Extend(in1_rows.begin(), in1_rows.end());
auto in1_place = input1.place();
......@@ -254,7 +254,7 @@ struct SelectedRowsSumTo<phi::CPUContext, T> {
std::vector<int64_t> in2_rows;
in2_rows.reserve(in2_rows.size() + size);
for (auto iter = input1.begin(); iter != input1.end(); ++iter) {
const paddle::framework::Vector<int64_t>& in_rows = (*iter)->rows();
const phi::Vector<int64_t>& in_rows = (*iter)->rows();
in2_rows.insert(in2_rows.end(), in_rows.begin(), in_rows.end());
}
input2->set_rows(in2_rows);
......@@ -646,7 +646,7 @@ struct MergeAdd<phi::XPUContext, T> {
const phi::SelectedRows& input,
phi::SelectedRows* output,
const bool sorted_result = false) {
paddle::framework::Vector<int64_t> input_rows(input.rows());
phi::Vector<int64_t> input_rows(input.rows());
if (input_rows.size() == 0) {
return;
}
......
......@@ -40,7 +40,7 @@ struct SelectedRowsAdd<phi::GPUContext, T> {
input2.height()));
output->set_height(in1_height);
paddle::framework::Vector<int64_t> in1_rows(input1.rows());
phi::Vector<int64_t> in1_rows(input1.rows());
auto& in2_rows = input2.rows();
std::vector<int64_t> out_rows;
out_rows.reserve(in1_rows.size() + in2_rows.size());
......@@ -189,7 +189,7 @@ struct SelectedRowsAddTensor<phi::GPUContext, T> {
const int block_size = 256;
dim3 threads(block_size, 1);
dim3 grid(in1_rows.size(), 1);
paddle::framework::MixVector<int64_t> mixv_in1_rows(&in1_rows);
phi::MixVector<int64_t> mixv_in1_rows(&in1_rows);
SelectedRowsAddTensorKernel<T, block_size>
<<<grid, threads, 0, context.stream()>>>(
in1_data,
......@@ -231,7 +231,7 @@ struct SelectedRowsAddTo<phi::GPUContext, T> {
auto* in2_value = input2->mutable_value();
// concat rows
paddle::framework::MixVector<int64_t> mixv_in2_rows(&in2_rows);
phi::MixVector<int64_t> mixv_in2_rows(&in2_rows);
if (in1_rows.size()) {
mixv_in2_rows.Extend(in1_rows.begin(), in1_rows.end());
}
......@@ -318,7 +318,7 @@ struct SelectedRowsAddToTensor<phi::GPUContext, T> {
const int block_size = 256;
dim3 threads(block_size, 1);
dim3 grid(in1_rows.size(), 1);
paddle::framework::MixVector<int64_t> mixv_in1_rows(&in1_rows);
phi::MixVector<int64_t> mixv_in1_rows(&in1_rows);
SelectedRowsAddToTensorKernel<T, block_size>
<<<grid, threads, 0, context.stream()>>>(
in1_data,
......@@ -378,7 +378,7 @@ struct MergeAddImpl {
const phi::SelectedRows& input,
phi::SelectedRows* output,
const bool sorted_result = false) {
paddle::framework::Vector<int64_t> input_rows(input.rows());
phi::Vector<int64_t> input_rows(input.rows());
if (input_rows.size() == 0) {
return;
}
......@@ -386,7 +386,7 @@ struct MergeAddImpl {
phi::SelectedRows& out = *output;
std::set<int64_t> row_set(input_rows.begin(), input_rows.end());
std::vector<int64_t> merge_rows_cpu(row_set.begin(), row_set.end());
paddle::framework::Vector<int64_t> merge_rows(merge_rows_cpu);
phi::Vector<int64_t> merge_rows(merge_rows_cpu);
auto input_width = input.value().dims()[1];
......@@ -407,8 +407,8 @@ struct MergeAddImpl {
dim3 threads(block_size, 1);
dim3 grid1(input_rows.size(), 1);
paddle::framework::MixVector<int64_t> mix_vector_input(&input_rows);
paddle::framework::MixVector<int64_t> mix_vector_out(out.mutable_rows());
phi::MixVector<int64_t> mix_vector_input(&input_rows);
phi::MixVector<int64_t> mix_vector_out(out.mutable_rows());
MergeAddKernel<T, 256><<<grid1, threads, 0, context.stream()>>>(
input_data,
mix_vector_input.CUDAData(context.GetPlace()),
......@@ -459,7 +459,7 @@ struct MergeAddImpl {
}
std::vector<int64_t> merge_rows_cpu(merged_row_set.begin(),
merged_row_set.end());
paddle::framework::Vector<int64_t> merge_rows(merge_rows_cpu);
phi::Vector<int64_t> merge_rows(merge_rows_cpu);
out.set_rows(merge_rows);
out.set_height(input_height);
......@@ -485,8 +485,8 @@ struct MergeAddImpl {
auto& input_rows = input->rows();
dim3 grid1(input_rows.size(), 1);
paddle::framework::MixVector<int64_t> mix_vector_input(&input_rows);
paddle::framework::MixVector<int64_t> mix_vector_out(out.mutable_rows());
phi::MixVector<int64_t> mix_vector_input(&input_rows);
phi::MixVector<int64_t> mix_vector_out(out.mutable_rows());
MergeAddKernel<T, 256><<<grid1, threads, 0, context.stream()>>>(
input_data,
mix_vector_input.CUDAData(context.GetPlace()),
......
......@@ -22,7 +22,7 @@ class CopyMatrixRowsFunctor<phi::CPUContext, T> {
public:
void operator()(const phi::CPUContext& context,
const phi::DenseTensor& src,
paddle::framework::Vector<size_t> index_lod,
phi::Vector<size_t> index_lod,
phi::DenseTensor* dst,
bool is_src_index) {
size_t* index = index_lod.data();
......
......@@ -43,7 +43,7 @@ class CopyMatrixRowsFunctor<phi::GPUContext, T> {
public:
void operator()(const phi::GPUContext& context,
const phi::DenseTensor& src,
paddle::framework::Vector<size_t> index_lod,
phi::Vector<size_t> index_lod,
phi::DenseTensor* dst,
bool is_src_index) {
auto src_dims = src.dims();
......@@ -79,7 +79,7 @@ class CopyMatrixRowsFunctor<phi::GPUContext, T> {
dim3 threads(128, 8);
dim3 grid(8, 1);
auto stream = context.stream();
paddle::framework::MixVector<size_t> mix_index_lod(&index_lod);
phi::MixVector<size_t> mix_index_lod(&index_lod);
CopyMatrixRowsKernel<T, 128, 8, 8><<<grid, threads, 0, stream>>>(
src_data,
dst_data,
......
......@@ -38,7 +38,7 @@ class CopyMatrixRowsFunctor {
// The indexed rows are based on the input index.
void operator()(const DeviceContext& context,
const phi::DenseTensor& src,
paddle::framework::Vector<size_t> index_lod,
phi::Vector<size_t> index_lod,
phi::DenseTensor* dst,
bool is_src_index);
};
......
......@@ -46,7 +46,7 @@ class ScaleLoDTensorFunctor<phi::GPUContext, T> {
const size_t seq_width = seq->numel() / seq->dims()[0];
auto abs_offset_lod = paddle::framework::ToAbsOffset(lod);
T* seq_data = context.template Alloc<T>(seq);
paddle::framework::MixVector<size_t> mix_vector(&(abs_offset_lod[level]));
phi::MixVector<size_t> mix_vector(&(abs_offset_lod[level]));
#ifdef PADDLE_WITH_HIP
hipLaunchKernelGGL(
......
......@@ -88,7 +88,7 @@ struct SparseAdagradFunctor<phi::GPUContext, T> {
phi::funcs::scatter::MergeAdd<phi::GPUContext, T> merge_func;
auto grad_merge = merge_func(context, grad);
auto* grad_merge_data = grad_merge.mutable_value()->template data<T>();
paddle::framework::Vector<int64_t> merge_rows(grad_merge.rows());
phi::Vector<int64_t> merge_rows(grad_merge.rows());
// 2. m += g_m * g_m
auto grad_square =
SquareSelectedRows<phi::GPUContext, T>(context, grad_merge);
......@@ -104,7 +104,7 @@ struct SparseAdagradFunctor<phi::GPUContext, T> {
const int block_size = 256;
dim3 threads(block_size, 1);
dim3 grid2(1, merge_rows.size());
paddle::framework::MixVector<int64_t> mixv_merge_rows(&merge_rows);
phi::MixVector<int64_t> mixv_merge_rows(&merge_rows);
SparseAdagradFunctorKernel<T, 256>
<<<grid2,
threads,
......
......@@ -87,8 +87,8 @@ void EditDistanceKernel(const Context& ctx,
auto stream = reinterpret_cast<const phi::GPUContext&>(ctx).stream();
paddle::framework::Vector<size_t> hyp_lod(batch_size + 1);
paddle::framework::Vector<size_t> ref_lod(batch_size + 1);
phi::Vector<size_t> hyp_lod(batch_size + 1);
phi::Vector<size_t> ref_lod(batch_size + 1);
bool use_length = hypslength.get_ptr() != nullptr;
......
......@@ -14,12 +14,12 @@
#include "paddle/phi/kernels/embedding_grad_kernel.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_primitives.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/mixed_vector.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/embedding_util.h"
......@@ -173,11 +173,11 @@ struct EmbeddingSparseGradCUDAFunctor {
dim3 threads(128, 8);
dim3 grids(8, 1);
auto stream = dev_ctx_.stream();
paddle::framework::Vector<int64_t> new_rows;
phi::Vector<int64_t> new_rows;
new_rows.resize(ids_num);
auto gpu_place = dev_ctx_.GetPlace();
paddle::framework::MixVector<int64_t> mixv_new_rows(&new_rows);
phi::MixVector<int64_t> mixv_new_rows(&new_rows);
if (!std::is_same<IdT, int64_t>::value) {
InputTypeConvert<<<grids, threads, 0, stream>>>(
ids_data, ids_num, mixv_new_rows.MutableData(gpu_place));
......
......@@ -14,12 +14,12 @@
#include "paddle/phi/kernels/sgd_kernel.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_helper.h"
#include "paddle/phi/backends/gpu/gpu_primitives.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/mixed_vector.h"
namespace phi {
......@@ -156,7 +156,7 @@ void SGDDenseParamSparseGradKernel(
int thread_x = kThreadsPerBlock;
int max_threads = dev_ctx.GetMaxPhysicalThreadCount();
int max_blocks = std::max(max_threads / kThreadsPerBlock, 1);
paddle::framework::MixVector<int64_t> mixv_in_rows(&in_rows);
phi::MixVector<int64_t> mixv_in_rows(&in_rows);
SparseSGDFunctorKernel<<<max_blocks, thread_x, 0, dev_ctx.stream()>>>(
in_data,
mixv_in_rows.CUDAData(dev_ctx.GetPlace()),
......
......@@ -551,7 +551,7 @@ void MomentumSparseImpl(const Context& ctx,
merge_func(ctx, grad, merged_grad);
auto* grad_merge_rows = merged_grad->mutable_rows();
paddle::framework::MixVector<int64_t> mixv_grad_merge_rows(grad_merge_rows);
phi::MixVector<int64_t> mixv_grad_merge_rows(grad_merge_rows);
const int64_t* rows = mixv_grad_merge_rows.Data(ctx.GetPlace());
int64_t row_numel = merged_grad->value().numel() / merged_grad->rows().size();
funcs::ForRange<Context> for_range(ctx, param.numel());
......
......@@ -309,7 +309,7 @@ void RmspropSparseKernel(const Context &ctx,
funcs::ForRange<Context> for_range(ctx, limit);
auto &grad_merge_rows = merged_grad->rows();
paddle::framework::MixVector<int64_t> mixv_grad_merge_rows(&grad_merge_rows);
phi::MixVector<int64_t> mixv_grad_merge_rows(&grad_merge_rows);
const int64_t *rows = mixv_grad_merge_rows.Data(ctx.GetPlace());
auto &merged_tensor = merged_grad->value();
......
......@@ -236,8 +236,8 @@ void WarpctcKernel(const Context& dev_ctx,
DenseTensor* loss,
DenseTensor* warpctcgrad) {
size_t num_sequences, sequence_width, max_sequence_length;
paddle::framework::Vector<size_t> logits_lod;
paddle::framework::Vector<size_t> label_lod;
phi::Vector<size_t> logits_lod;
phi::Vector<size_t> label_lod;
if (logits_length.is_initialized() && labels_length.is_initialized()) {
num_sequences = logits.dims()[1];
sequence_width = logits.dims()[2];
......@@ -397,7 +397,7 @@ void WarpctcKernel(const Context& dev_ctx,
paddle::operators::math::TotalSequenceLength(label_lod)),
1});
dev_ctx.template HostAlloc<int>(&warpctc_label);
std::vector<paddle::framework::Vector<size_t>> lod;
std::vector<phi::Vector<size_t>> lod;
lod.push_back(label_lod);
warpctc_label.set_lod(lod);
......
......@@ -126,7 +126,7 @@ void AdamDenseParamSparseGradKernel(
auto& grad_tensor = grad_merge.value();
const T* grad_data = grad_tensor.template data<T>();
auto* grad_merge_rows = &grad_merge.rows();
paddle::framework::MixVector<int64_t> mixv_grad_merge_rows(grad_merge_rows);
phi::MixVector<int64_t> mixv_grad_merge_rows(grad_merge_rows);
const int64_t* rows = mixv_grad_merge_rows.Data(dev_ctx.GetPlace());
auto row_numel = grad_tensor.numel() / grad_merge.rows().size();
......
......@@ -198,7 +198,7 @@ void AdamDenseParamSparseGradKernel(
auto& grad_tensor = grad_merge.value();
const T* grad_data = grad_tensor.template data<T>();
auto* grad_merge_rows = &grad_merge.rows();
paddle::framework::MixVector<int64_t> mixv_grad_merge_rows(grad_merge_rows);
phi::MixVector<int64_t> mixv_grad_merge_rows(grad_merge_rows);
const int64_t* rows = mixv_grad_merge_rows.Data(dev_ctx.GetPlace());
auto row_numel = grad_tensor.numel() / grad_merge.rows().size();
......
......@@ -222,7 +222,7 @@ void AdamwDenseParamSparseGradKernel(
auto& grad_tensor = grad_merge.value();
const T* grad_data = grad_tensor.template data<T>();
auto* grad_merge_rows = &grad_merge.rows();
paddle::framework::MixVector<int64_t> mixv_grad_merge_rows(grad_merge_rows);
phi::MixVector<int64_t> mixv_grad_merge_rows(grad_merge_rows);
const int64_t* rows = mixv_grad_merge_rows.Data(dev_ctx.GetPlace());
auto row_numel = grad_tensor.numel() / grad_merge.rows().size();
......
......@@ -14,9 +14,9 @@
#include "paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/mixed_vector.h"
#include "paddle/phi/kernels/cpu/hsigmoid_loss_grad.h"
namespace phi {
......@@ -54,7 +54,7 @@ void HSigmoidLossGradKernel(const Context& ctx,
PADDLE_ENFORCE_NOT_NULL(
path.get_ptr(),
errors::NotFound("Custom tree must be set for sparse mode!"));
paddle::framework::Vector<int64_t> real_rows = PathToRows(*path);
phi::Vector<int64_t> real_rows = PathToRows(*path);
w_grad->set_rows(real_rows);
// Build a map of id -> row_index to speed up finding the index of one id
w_grad->set_height(w.dims()[0]);
......
......@@ -221,7 +221,7 @@ void ComputeRowImpl(const Context& dev_ctx,
auto& grad_tensor = grad_merge.value();
const T* grad_data = grad_tensor.template data<T>();
auto* grad_merge_rows = &grad_merge.rows();
paddle::framework::MixVector<int64_t> mixv_grad_merge_rows(grad_merge_rows);
phi::MixVector<int64_t> mixv_grad_merge_rows(grad_merge_rows);
const int64_t* rows = mixv_grad_merge_rows.Data(dev_ctx.GetPlace());
auto row_numel = grad_tensor.numel() / grad_merge.rows().size();
if (paddle::platform::is_gpu_place(dev_ctx.GetPlace()) &&
......
......@@ -70,3 +70,20 @@ cc_test(
test_tensor_array
SRCS test_tensor_array.cc
DEPS tensor_array)
if(WITH_GPU)
nv_test(
test_mixed_vector
SRCS test_mixed_vector.cc test_mixed_vector.cu
DEPS mixed_vector place memory device_context tensor)
elseif(WITH_ROCM)
hip_test(
test_mixed_vector
SRCS test_mixed_vector.cc test_mixed_vector.cu
DEPS mixed_vector place memory device_context tensor)
else()
cc_test(
test_mixed_vector
SRCS test_mixed_vector.cc
DEPS mixed_vector place memory device_context tensor)
endif()
......@@ -12,7 +12,7 @@
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/phi/core/mixed_vector.h"
#include "glog/logging.h"
#include "gtest/gtest-message.h"
......@@ -21,7 +21,7 @@
#include "gtest/gtest_pred_impl.h"
template <typename T>
using vec = paddle::framework::Vector<T>;
using vec = phi::Vector<T>;
TEST(mixed_vector, CPU_VECTOR) {
vec<int> tmp;
......@@ -44,7 +44,7 @@ TEST(mixed_vector, CPU_VECTOR) {
}
TEST(mixed_vector, InitWithCount) {
paddle::framework::Vector<int> vec(10, 10);
phi::Vector<int> vec(10, 10);
for (int i = 0; i < 10; ++i) {
ASSERT_EQ(vec[i], 10);
}
......@@ -58,7 +58,7 @@ TEST(mixed_vector, ForEach) {
}
TEST(mixed_vector, Reserve) {
paddle::framework::Vector<int> vec;
phi::Vector<int> vec;
vec.reserve(1);
vec.push_back(0);
vec.push_back(0);
......@@ -66,7 +66,7 @@ TEST(mixed_vector, Reserve) {
}
TEST(mixed_vector, Resize) {
paddle::framework::Vector<int> vec;
phi::Vector<int> vec;
vec.resize(1);
vec.push_back(0);
vec.push_back(0);
......
......@@ -23,13 +23,14 @@
#include "glog/logging.h"
#include "gtest/gtest.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/phi/backends/all_context.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/mixed_vector.h"
template <typename T>
using vec = paddle::framework::MixVector<T>;
using gpuStream_t = paddle::gpuStream_t;
using vec = phi::MixVector<T>;
using gpuStream_t = phi::gpuStream_t;
static __global__ void multiply_10(int* ptr) {
for (int i = 0; i < 10; ++i) {
......@@ -37,9 +38,9 @@ static __global__ void multiply_10(int* ptr) {
}
}
gpuStream_t GetCUDAStream(paddle::platform::CUDAPlace place) {
gpuStream_t GetCUDAStream(phi::GPUPlace place) {
return reinterpret_cast<const phi::GPUContext*>(
paddle::platform::DeviceContextPool::Instance().Get(place))
phi::DeviceContextPool::Instance().Get(place))
->stream();
}
......@@ -50,7 +51,7 @@ TEST(mixed_vector, GPU_VECTOR) {
}
vec<int> tmp(&x);
ASSERT_EQ(tmp.size(), 10UL);
paddle::platform::CUDAPlace gpu(0);
phi::GPUPlace gpu(0);
#ifdef PADDLE_WITH_HIP
hipLaunchKernelGGL(multiply_10,
......@@ -69,7 +70,7 @@ TEST(mixed_vector, GPU_VECTOR) {
}
TEST(mixed_vector, MultiGPU) {
if (paddle::platform::GetGPUDeviceCount() < 2) {
if (phi::backends::gpu::GetGPUDeviceCount() < 2) {
LOG(WARNING) << "Skip mixed_vector.MultiGPU since there are not multiple "
"GPUs in your machine.";
return;
......@@ -81,8 +82,8 @@ TEST(mixed_vector, MultiGPU) {
}
vec<int> tmp(&x);
ASSERT_EQ(tmp.size(), 10UL);
paddle::platform::CUDAPlace gpu0(0);
paddle::platform::SetDeviceId(0);
phi::GPUPlace gpu0(0);
phi::backends::gpu::SetDeviceId(0);
#ifdef PADDLE_WITH_HIP
hipLaunchKernelGGL(multiply_10,
......@@ -94,9 +95,9 @@ TEST(mixed_vector, MultiGPU) {
#else
multiply_10<<<1, 1, 0, GetCUDAStream(gpu0)>>>(tmp.MutableData(gpu0));
#endif
paddle::platform::CUDAPlace gpu1(1);
phi::GPUPlace gpu1(1);
auto* gpu1_ptr = tmp.MutableData(gpu1);
paddle::platform::SetDeviceId(1);
phi::backends::gpu::SetDeviceId(1);
#ifdef PADDLE_WITH_HIP
hipLaunchKernelGGL(
......
......@@ -913,7 +913,7 @@ FOURTH_HIGH_PARALLEL_JOB_NEW = [
'test_mix_precision_all_reduce_fuse',
'test_spp_op',
'test_op_converter',
'mixed_vector_test',
'test_mixed_vector',
'test_roi_align_op',
'test_pad_constant_like',
'test_mul_op',
......@@ -2288,7 +2288,7 @@ TETRAD_PARALLEL_JOB = [
'device_context_test',
'test_reference_count_pass_last_lived_ops',
'copy_same_tensor_test',
'mixed_vector_test',
'test_mixed_vector',
'op_registry_test',
'test_prepare_op',
'data_device_transform_test',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册