未验证 提交 4527d249 编写于 作者: 张春乔 提交者: GitHub

Remove LoDTensor and Tensor in fluid except operators folder (#48416)

* Update communicator.cc

* Update communicator.cc

* remove LoDTensor

* remove LoDTensor and Tensor
上级 d80330fe
......@@ -28,7 +28,6 @@ limitations under the License. */
namespace paddle {
namespace distributed {
using LoDTensor = phi::DenseTensor;
using phi::SelectedRows;
const uint32_t MAX_FEASIGN_NUM = 1024 * 100 * 100;
......@@ -97,11 +96,11 @@ void Communicator::RpcRecvDense(const std::vector<std::string> &varnames,
regions.reserve(varnames.size());
for (auto &t : varnames) {
Variable *var = scope->Var(t);
LoDTensor *tensor = var->GetMutable<LoDTensor>();
phi::DenseTensor *tensor = var->GetMutable<phi::DenseTensor>();
if (platform::is_gpu_place(tensor->place())) {
#ifdef PADDLE_WITH_CUDA
Variable *temp_var = xpu_temp_scope_->Var(t);
LoDTensor *temp_tensor = temp_var->GetMutable<LoDTensor>();
phi::DenseTensor *temp_tensor = temp_var->GetMutable<phi::DenseTensor>();
temp_tensor->Resize(tensor->dims());
float *temp_data = temp_tensor->mutable_data<float>(platform::CPUPlace());
paddle::distributed::Region reg(temp_data, tensor->numel());
......@@ -122,7 +121,7 @@ void Communicator::RpcRecvDense(const std::vector<std::string> &varnames,
for (auto &t : varnames) {
Variable *var = scope->FindVar(t);
LoDTensor *tensor = var->GetMutable<LoDTensor>();
phi::DenseTensor *tensor = var->GetMutable<phi::DenseTensor>();
VLOG(3) << "Communicator::RecvNoBarrier Var " << t << " On gpu? "
<< platform::is_gpu_place(tensor->place());
......@@ -132,8 +131,8 @@ void Communicator::RpcRecvDense(const std::vector<std::string> &varnames,
<< " Temp_data[-1] " << temp_recv_data[tensor->numel() - 1];
if (platform::is_gpu_place(tensor->place())) {
#ifdef PADDLE_WITH_CUDA
LoDTensor *temp_tensor =
xpu_temp_scope_->FindVar(t)->GetMutable<LoDTensor>();
phi::DenseTensor *temp_tensor =
xpu_temp_scope_->FindVar(t)->GetMutable<phi::DenseTensor>();
framework::TensorCopy(*temp_tensor, tensor->place(), tensor);
float *temp_data = temp_tensor->mutable_data<float>(platform::CPUPlace());
VLOG(1) << "Communicator::RpcRecvDense Var " << t << " table_id "
......@@ -157,11 +156,11 @@ void Communicator::RpcSendDenseParam(const std::vector<std::string> &varnames,
for (auto &t : varnames) {
Variable *var = scope.FindVar(t);
CHECK(var != nullptr) << "var[" << t << "] not found";
LoDTensor *tensor = var->GetMutable<LoDTensor>();
phi::DenseTensor *tensor = var->GetMutable<phi::DenseTensor>();
if (platform::is_gpu_place(tensor->place())) {
#ifdef PADDLE_WITH_CUDA
Variable *temp_var = xpu_temp_scope_->Var(t);
LoDTensor *temp_tensor = temp_var->GetMutable<LoDTensor>();
phi::DenseTensor *temp_tensor = temp_var->GetMutable<phi::DenseTensor>();
temp_tensor->Resize(tensor->dims());
float *temp_data = temp_tensor->mutable_data<float>(platform::CPUPlace());
framework::TensorCopy(*tensor, platform::CPUPlace(), temp_tensor);
......@@ -203,7 +202,8 @@ void Communicator::RpcSendDense(const CommContext &ctx,
float *data = dense_data->data();
uint32_t pos = 0;
for (size_t i = 0; i < var_names.size(); ++i) {
const LoDTensor tensor = scope.FindVar(var_names[i])->Get<LoDTensor>();
const phi::DenseTensor tensor =
scope.FindVar(var_names[i])->Get<phi::DenseTensor>();
size_t count = static_cast<size_t>(tensor.numel());
const float *g = tensor.data<float>();
CHECK(pos + count <= dense_data->size())
......@@ -472,13 +472,13 @@ void AsyncCommunicator::RecvNoBarrier() {
auto var_names = iter.second;
for (auto &t : var_names) {
Variable *var = recv_scope_->FindVar(t);
LoDTensor *tensor = var->GetMutable<LoDTensor>();
phi::DenseTensor *tensor = var->GetMutable<phi::DenseTensor>();
VLOG(3) << "AsyncCommunicator::RecvNoBarrier Var " << t << " On gpu? "
<< platform::is_gpu_place(tensor->place());
if (platform::is_gpu_place(tensor->place())) {
#ifdef PADDLE_WITH_CUDA
LoDTensor *temp_tensor =
xpu_temp_scope_->FindVar(t)->GetMutable<LoDTensor>();
phi::DenseTensor *temp_tensor =
xpu_temp_scope_->FindVar(t)->GetMutable<phi::DenseTensor>();
framework::TensorCopy(*temp_tensor, tensor->place(), tensor);
#endif
}
......@@ -591,8 +591,8 @@ void AsyncCommunicator::PullSparseToTensorSync(
uint64_t padding_id,
platform::Place place,
bool is_training,
std::vector<const LoDTensor *> *inputs,
std::vector<LoDTensor *> *outputs) {
std::vector<const phi::DenseTensor *> *inputs,
std::vector<phi::DenseTensor *> *outputs) {
std::vector<uint64_t> fea_keys;
std::vector<float *> pull_result_ptr;
fea_keys.reserve(MAX_FEASIGN_NUM / 100);
......
......@@ -25,7 +25,6 @@ limitations under the License. */
namespace paddle {
namespace distributed {
using LoDTensor = phi::DenseTensor;
using framework::ProgramDesc;
using framework::VarDesc;
using framework::Variable;
......@@ -232,7 +231,7 @@ std::future<int32_t> FleetWrapper::PullSparseVarsAsync(
if (var == nullptr) {
continue;
}
LoDTensor* tensor = var->GetMutable<LoDTensor>();
phi::DenseTensor* tensor = var->GetMutable<phi::DenseTensor>();
CHECK(tensor != nullptr) << "tensor of var " << name << " is null";
int64_t* ids = tensor->data<int64_t>();
size_t len = tensor->numel();
......@@ -279,7 +278,7 @@ void FleetWrapper::PullSparseVarsSync(
if (var == nullptr) {
continue;
}
LoDTensor* tensor = var->GetMutable<LoDTensor>();
phi::DenseTensor* tensor = var->GetMutable<phi::DenseTensor>();
CHECK(tensor != nullptr) << "tensor of var " << name << " is null";
int64_t* ids = tensor->data<int64_t>();
size_t len = tensor->numel();
......@@ -327,13 +326,14 @@ void FleetWrapper::PullSparseVarsSync(
// is_training is true means training, false means inference, the behavior is
// different on pserver
void FleetWrapper::PullSparseToTensorSync(const uint64_t table_id,
int fea_dim,
uint64_t padding_id,
platform::Place place,
bool is_training,
std::vector<const LoDTensor*>* inputs,
std::vector<LoDTensor*>* outputs) {
void FleetWrapper::PullSparseToTensorSync(
const uint64_t table_id,
int fea_dim,
uint64_t padding_id,
platform::Place place,
bool is_training,
std::vector<const phi::DenseTensor*>* inputs,
std::vector<phi::DenseTensor*>* outputs) {
std::vector<uint64_t> fea_keys;
std::vector<float*> pull_result_ptr;
fea_keys.reserve(MAX_FEASIGN_NUM / 100);
......@@ -398,7 +398,7 @@ void FleetWrapper::PullDenseVarsAsync(
varname = var_names[i] + "pin";
}
Variable* var = scope.FindVar(varname);
LoDTensor* tensor = var->GetMutable<LoDTensor>();
phi::DenseTensor* tensor = var->GetMutable<phi::DenseTensor>();
float* w = tensor->data<float>();
paddle::distributed::Region reg(w, tensor->numel());
regions[i] = std::move(reg);
......@@ -417,7 +417,7 @@ void FleetWrapper::PullDenseVarsSync(
regions.reserve(var_names.size());
for (auto& t : var_names) {
Variable* var = scope.FindVar(t);
LoDTensor* tensor = var->GetMutable<LoDTensor>();
phi::DenseTensor* tensor = var->GetMutable<phi::DenseTensor>();
if (!platform::is_gpu_place(tensor->place())) {
float* w = tensor->data<float>();
paddle::distributed::Region reg(w, tensor->numel());
......@@ -437,7 +437,7 @@ void FleetWrapper::PushDenseParamSync(
for (auto& t : var_names) {
Variable* var = scope.FindVar(t);
CHECK(var != nullptr) << "var[" << t << "] not found";
LoDTensor* tensor = var->GetMutable<LoDTensor>();
phi::DenseTensor* tensor = var->GetMutable<phi::DenseTensor>();
if (!platform::is_gpu_place(tensor->place())) {
float* g = tensor->mutable_data<float>(place);
paddle::distributed::Region reg(g, tensor->numel());
......@@ -468,7 +468,7 @@ void FleetWrapper::PushDenseVarsAsync(
for (auto& t : var_names) {
Variable* var = scope.FindVar(t);
CHECK(var != nullptr) << "var[" << t << "] not found";
LoDTensor* tensor = var->GetMutable<LoDTensor>();
phi::DenseTensor* tensor = var->GetMutable<phi::DenseTensor>();
int count = tensor->numel();
float* g = tensor->mutable_data<float>(place);
// TODO(zhaocaibei123): how to get batch_size in op?
......@@ -544,8 +544,8 @@ void FleetWrapper::PushSparseFromTensorWithLabelAsync(
const std::string& click_name,
platform::Place place,
const std::vector<std::string>& input_names,
std::vector<const LoDTensor*>* inputs,
std::vector<const LoDTensor*>* outputs) {
std::vector<const phi::DenseTensor*>* inputs,
std::vector<const phi::DenseTensor*>* outputs) {
// not support
return;
}
......@@ -555,11 +555,11 @@ void FleetWrapper::PushSparseFromTensorAsync(
int fea_dim,
uint64_t padding_id,
platform::Place place,
std::vector<const LoDTensor*>* inputs,
std::vector<const phi::DenseTensor*>* inputs,
std::vector<int>& slots,
const LoDTensor* shows,
const LoDTensor* clks,
std::vector<LoDTensor*>* outputs,
const phi::DenseTensor* shows,
const phi::DenseTensor* clks,
std::vector<phi::DenseTensor*>* outputs,
bool use_cvm_op) {
CHECK(slots.size() == inputs->size());
int batch_size = -1;
......@@ -777,7 +777,7 @@ void FleetWrapper::ShrinkDenseTable(int table_id,
Variable* var = scope->FindVar(name);
CHECK(var != nullptr) << "var[" << name << "] not found";
VLOG(3) << "prepare shrink dense batch_sum";
LoDTensor* tensor = var->GetMutable<LoDTensor>();
phi::DenseTensor* tensor = var->GetMutable<phi::DenseTensor>();
float* g = tensor->data<float>();
// show_batch_sum += N * log(decay)
......@@ -787,7 +787,7 @@ void FleetWrapper::ShrinkDenseTable(int table_id,
Variable* var_size = scope->FindVar(size_name);
CHECK(var_size != nullptr) << "var[" << size_name << "] not found";
VLOG(3) << "shrink dense batch_sum: " << name << ", " << size_name;
float* g_size = var_size->GetMutable<LoDTensor>()->data<float>();
float* g_size = var_size->GetMutable<phi::DenseTensor>()->data<float>();
for (int k = 0; k < tensor->numel(); k += emb_dim) {
g[k] = g[k] + g_size[k] * log(decay);
......@@ -797,7 +797,7 @@ void FleetWrapper::ShrinkDenseTable(int table_id,
} else {
Variable* var = scope->FindVar(name);
CHECK(var != nullptr) << "var[" << name << "] not found";
LoDTensor* tensor = var->GetMutable<LoDTensor>();
phi::DenseTensor* tensor = var->GetMutable<phi::DenseTensor>();
float* g = tensor->data<float>();
paddle::distributed::Region reg(g, tensor->numel());
regions.emplace_back(std::move(reg));
......
......@@ -47,7 +47,6 @@ namespace distributed {
class PSCore;
using LoDTensor = phi::DenseTensor;
using framework::Scope;
using framework::Variable;
using phi::SelectedRows;
......@@ -111,13 +110,14 @@ class FleetWrapper {
// is_training is true means training, false means inference, the behavior is
// different on pserver
void PullSparseToTensorSync(const uint64_t table_id,
int fea_dim,
uint64_t padding_id,
platform::Place place,
bool is_training,
std::vector<const LoDTensor*>* inputs, // NOLINT
std::vector<LoDTensor*>* outputs); // NOLINT
void PullSparseToTensorSync(
const uint64_t table_id,
int fea_dim,
uint64_t padding_id,
platform::Place place,
bool is_training,
std::vector<const phi::DenseTensor*>* inputs, // NOLINT
std::vector<phi::DenseTensor*>* outputs); // NOLINT
// pull dense variables from server in sync mod
// Param<in>: scope, table_id, var_names
......@@ -188,18 +188,18 @@ class FleetWrapper {
const std::string& click_name,
platform::Place place,
const std::vector<std::string>& input_names,
std::vector<const LoDTensor*>* inputs, // NOLINT
std::vector<const LoDTensor*>* outputs); // NOLINT
std::vector<const phi::DenseTensor*>* inputs, // NOLINT
std::vector<const phi::DenseTensor*>* outputs); // NOLINT
void PushSparseFromTensorAsync(const uint64_t table_id,
int fea_dim,
uint64_t padding_id,
platform::Place place,
std::vector<const LoDTensor*>* inputs,
std::vector<const phi::DenseTensor*>* inputs,
std::vector<int>& slots, // NOLINT
const LoDTensor* shows,
const LoDTensor* clicks,
std::vector<LoDTensor*>* outputs,
const phi::DenseTensor* shows,
const phi::DenseTensor* clicks,
std::vector<phi::DenseTensor*>* outputs,
bool use_cvm_op = false);
// Push sparse variables to server in Async mode
// Param<In>: scope, table_id, fea_keys, sparse_grad_names
......
......@@ -47,7 +47,6 @@ namespace distributed {
class PSCore;
using LoDTensor = phi::DenseTensor;
using framework::Scope;
using framework::Variable;
using phi::SelectedRows;
......
......@@ -35,7 +35,6 @@
namespace paddle {
using LoDTensor = phi::DenseTensor;
using framework::Variable;
using framework::ir::Graph;
using platform::CPUPlace;
......@@ -48,19 +47,19 @@ using EigenMatrixArray =
using ConstEigenMatrixArrayMap = Eigen::Map<const EigenMatrixArray>;
using string::PrettyLogH1;
using VariableNameMap = std::map<std::string, std::vector<std::string>>;
static LoDTensor CreateScaleTensor(int64_t channels_num = 1);
static phi::DenseTensor CreateScaleTensor(int64_t channels_num = 1);
static void check_var(const Variable* var, const std::string& var_name) {
PADDLE_ENFORCE_NOT_NULL(
var,
platform::errors::PreconditionNotMet("%s is not in the scope", var_name));
PADDLE_ENFORCE_EQ(
var->IsType<LoDTensor>(),
var->IsType<phi::DenseTensor>(),
true,
platform::errors::PreconditionNotMet("Only support lod tensor now."));
}
static void check_tensor(const LoDTensor& tensor) {
static void check_tensor(const phi::DenseTensor& tensor) {
PADDLE_ENFORCE_GT(
tensor.dims().size(),
0,
......@@ -78,8 +77,8 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateScalesForRNNWeights(
auto* wh_var = predictor_.sub_scope_->FindVar(wh_name);
check_var(wx_var, wx_name);
check_var(wh_var, wh_name);
LoDTensor* wx_tensor = wx_var->GetMutable<LoDTensor>();
LoDTensor* wh_tensor = wh_var->GetMutable<LoDTensor>();
phi::DenseTensor* wx_tensor = wx_var->GetMutable<phi::DenseTensor>();
phi::DenseTensor* wh_tensor = wh_var->GetMutable<phi::DenseTensor>();
if (gru) {
scales_[wx_name] = GetMaxChGRUScalingFactor(*wx_tensor, *wh_tensor);
} else {
......@@ -101,7 +100,7 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateScalesForOpInputs(
if (scales_.find(var_name) != scales_.end()) continue;
auto* var = predictor_.sub_scope_->FindVar(var_name);
check_var(var, var_name);
LoDTensor* var_tensor = var->GetMutable<LoDTensor>();
phi::DenseTensor* var_tensor = var->GetMutable<phi::DenseTensor>();
// force unsigned type if already know it
bool is_unsigned = false;
CalculateSingleScale(
......@@ -118,7 +117,7 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateScalesForOpOutputs(
if (scales_.find(var_name) != scales_.end()) continue;
auto* var = predictor_.sub_scope_->FindVar(var_name);
check_var(var, var_name);
LoDTensor* var_tensor = var->GetMutable<LoDTensor>();
phi::DenseTensor* var_tensor = var->GetMutable<phi::DenseTensor>();
// force unsigned type if already know it
bool is_unsigned = false;
bool compute_scale = true;
......@@ -183,7 +182,7 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateScalesForOpOutputs(
bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() {
PrettyLogH1("--- Calculating scales for quantization");
std::map<std::string, std::map<std::string, LoDTensor>> gathered_data;
std::map<std::string, std::map<std::string, phi::DenseTensor>> gathered_data;
for (const auto* op : predictor_.inference_program_->Block(0).AllOps()) {
if (platform::HasOpINT8DataType(op)) {
// handle inputs first to let is_unsigned be inferred for the outputs
......@@ -198,20 +197,20 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateSingleScale(
const std::string& op_type_name,
const std::string& conn_name,
const std::string& var_name,
const LoDTensor& var_tensor,
const phi::DenseTensor& var_tensor,
bool is_unsigned) {
auto rule = qconfig_->scale_algo(op_type_name, conn_name);
if (rule == ScaleAlgo::NONE) return;
PADDLE_ENFORCE_GT(
var_tensor.numel(),
0,
platform::errors::InvalidArgument(
"MkldnnQuantizer: LoDTensor of variable %s for quantization of op "
"%s of connection %s should not be empty.",
var_name,
op_type_name,
conn_name));
PADDLE_ENFORCE_GT(var_tensor.numel(),
0,
platform::errors::InvalidArgument(
"MkldnnQuantizer: phi::DenseTensor of variable %s for "
"quantization of op "
"%s of connection %s should not be empty.",
var_name,
op_type_name,
conn_name));
switch (rule) {
case ScaleAlgo::MAX:
......@@ -236,8 +235,8 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateSingleScale(
}
}
static LoDTensor CreateScaleTensor(int64_t channels_num) {
LoDTensor scale_tensor;
static phi::DenseTensor CreateScaleTensor(int64_t channels_num) {
phi::DenseTensor scale_tensor;
scale_tensor.Resize({channels_num});
scale_tensor.mutable_data<double>(CPUPlace());
return scale_tensor;
......@@ -272,9 +271,9 @@ std::vector<int> AnalysisPredictor::MkldnnQuantizer::ExpandQuantizedBins(
return expanded_quantized_bins;
}
std::pair<bool, LoDTensor>
std::pair<bool, phi::DenseTensor>
AnalysisPredictor::MkldnnQuantizer::GetKLScalingFactor(
const LoDTensor& var_tensor, bool is_unsigned) const {
const phi::DenseTensor& var_tensor, bool is_unsigned) const {
ConstEigenVectorArrayMap eigen_tensor{
var_tensor.data<float>(), var_tensor.numel(), 1};
int precision_hist_num_bins = 2048;
......@@ -381,15 +380,15 @@ AnalysisPredictor::MkldnnQuantizer::GetKLScalingFactor(
min_kl_index = starting_iter;
}
LoDTensor scale_tensor = CreateScaleTensor();
phi::DenseTensor scale_tensor = CreateScaleTensor();
scale_tensor.data<double>()[0] = 1.0 / ((min_kl_index + 0.5) * bin_width);
return std::make_pair(is_unsigned, scale_tensor);
}
std::pair<bool, LoDTensor>
std::pair<bool, phi::DenseTensor>
AnalysisPredictor::MkldnnQuantizer::GetMaxScalingFactor(
const LoDTensor& var_tensor, bool is_unsigned) const {
const phi::DenseTensor& var_tensor, bool is_unsigned) const {
ConstEigenVectorArrayMap eigen_tensor{
var_tensor.data<float>(), var_tensor.numel(), 1};
float max_abs = eigen_tensor.abs().maxCoeff();
......@@ -402,15 +401,17 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxScalingFactor(
"Tensor is claimed to be unsigned, but its min value (%f) is < 0.0",
min_val));
LoDTensor scale_tensor = CreateScaleTensor();
phi::DenseTensor scale_tensor = CreateScaleTensor();
scale_tensor.data<double>()[0] = 1.0 / max_abs;
return std::make_pair(is_unsigned, scale_tensor);
}
std::pair<bool, LoDTensor>
std::pair<bool, phi::DenseTensor>
AnalysisPredictor::MkldnnQuantizer::GetMaxChScalingFactor(
const LoDTensor& var_tensor, bool is_unsigned, bool is_transposed) const {
const phi::DenseTensor& var_tensor,
bool is_unsigned,
bool is_transposed) const {
check_tensor(var_tensor);
ConstEigenVectorArrayMap eigen_tensor{
......@@ -438,16 +439,17 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxChScalingFactor(
}
int output_channel_axis = is_transposed;
int channels = dims[output_channel_axis];
LoDTensor scale_tensor = CreateScaleTensor(channels);
phi::DenseTensor scale_tensor = CreateScaleTensor(channels);
auto* scale_ptr = scale_tensor.mutable_data<double>(CPUPlace());
std::copy(scales.data(), scales.data() + scales.size(), scale_ptr);
return std::make_pair(is_unsigned, scale_tensor);
}
std::pair<bool, LoDTensor>
std::pair<bool, phi::DenseTensor>
AnalysisPredictor::MkldnnQuantizer::GetMaxChGRUScalingFactor(
const LoDTensor& wx_tensor, const LoDTensor& wh_tensor) const {
const phi::DenseTensor& wx_tensor,
const phi::DenseTensor& wh_tensor) const {
check_tensor(wx_tensor);
check_tensor(wh_tensor);
......@@ -494,16 +496,17 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxChGRUScalingFactor(
transform(scale_ur.begin(), scale_ur.end(), scale_ur.begin(), [](float& c) {
return 1 / c;
});
LoDTensor scale_tensor = CreateScaleTensor(scale_ur.size());
phi::DenseTensor scale_tensor = CreateScaleTensor(scale_ur.size());
auto* scale_ptr = scale_tensor.mutable_data<double>(CPUPlace());
std::copy(scale_ur.begin(), scale_ur.end(), scale_ptr);
bool is_unsigned = false;
return std::make_pair(is_unsigned, scale_tensor);
}
std::pair<bool, LoDTensor>
std::pair<bool, phi::DenseTensor>
AnalysisPredictor::MkldnnQuantizer::GetMaxChLSTMScalingFactor(
const LoDTensor& wx_tensor, const LoDTensor& wh_tensor) const {
const phi::DenseTensor& wx_tensor,
const phi::DenseTensor& wh_tensor) const {
check_tensor(wx_tensor);
check_tensor(wh_tensor);
......@@ -530,7 +533,7 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxChLSTMScalingFactor(
transform(scale.begin(), scale.end(), scale.begin(), [](float& c) {
return 1 / c;
});
LoDTensor scale_tensor = CreateScaleTensor(scale.size());
phi::DenseTensor scale_tensor = CreateScaleTensor(scale.size());
auto* scale_ptr = scale_tensor.mutable_data<double>(CPUPlace());
std::copy(scale.begin(), scale.end(), scale_ptr);
bool is_unsigned = false;
......
......@@ -24,8 +24,6 @@ namespace paddle {
namespace inference {
namespace tensorrt {
using LoDTensor = phi::DenseTensor;
/*
* Convert Input from Fluid to TensorRT Engine.
* Convert Output from TensorRT Engine to Fluid.
......@@ -38,13 +36,17 @@ class EngineIOConverter {
public:
EngineIOConverter() {}
virtual void operator()(const LoDTensor& in, void* out, size_t max_size) {}
virtual void operator()(const void* in, LoDTensor* out, size_t max_size) {}
virtual void operator()(const phi::DenseTensor& in,
void* out,
size_t max_size) {}
virtual void operator()(const void* in,
phi::DenseTensor* out,
size_t max_size) {}
void SetStream(cudaStream_t* stream) { stream_ = stream; }
static void ConvertInput(const std::string& op_type,
const LoDTensor& in,
const phi::DenseTensor& in,
void* out,
size_t max_size,
cudaStream_t* stream) {
......@@ -63,7 +65,7 @@ class EngineIOConverter {
static void ConvertOutput(const std::string& op_type,
const void* in,
LoDTensor* out,
phi::DenseTensor* out,
size_t max_size,
cudaStream_t* stream) {
PADDLE_ENFORCE_NOT_NULL(stream,
......
......@@ -30,8 +30,6 @@ limitations under the License. */
#include "paddle/fluid/platform/float16.h"
using float16 = paddle::platform::float16;
using Tensor = phi::DenseTensor;
using LoDTensor = phi::DenseTensor;
using Scope = paddle::framework::Scope;
using OpDesc = paddle::framework::OpDesc;
using Graph = paddle::framework::ir::Graph;
......
......@@ -28,7 +28,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
using DataLayout = phi::DataLayout;
using NPUAttribute = framework::NPUAttribute;
using NPUAttributeMap = framework::NPUAttributeMap;
......@@ -39,8 +38,8 @@ class NpuOpRunner {
NpuOpRunner();
explicit NpuOpRunner(const std::string &op_type);
NpuOpRunner(const std::string &op_type,
const std::vector<Tensor> &inputs = {},
const std::vector<Tensor> &outputs = {},
const std::vector<phi::DenseTensor> &inputs = {},
const std::vector<phi::DenseTensor> &outputs = {},
const NPUAttributeMap &attrs = {});
// NOTE(zhiqiu): why forbid copy and operator= ?
......@@ -67,12 +66,12 @@ class NpuOpRunner {
NpuOpRunner &AddAttrs(const NPUAttributeMap &attrs);
NpuOpRunner &AddInput(const Tensor &tensor);
NpuOpRunner &AddInput(const phi::DenseTensor &tensor);
// NOTE(zhiqiu): CANN-5.0.2 support input tensors on host.
// Specifically, the tensor of shape, tensor of dims, etc, which are small
// vector/list.
NpuOpRunner &AddInput(const Tensor &tensor, aclMemType mem_type);
NpuOpRunner &AddInput(const phi::DenseTensor &tensor, aclMemType mem_type);
NpuOpRunner &AddInput(std::vector<int32_t> &&dims);
......@@ -82,13 +81,13 @@ class NpuOpRunner {
NpuOpRunner &AddInput(std::vector<double> &&values);
NpuOpRunner &AddOutput(const Tensor &tensor);
NpuOpRunner &AddOutput(const phi::DenseTensor &tensor);
NpuOpRunner &AddInputs(const std::vector<Tensor> &tensors);
NpuOpRunner &AddInputs(const std::vector<phi::DenseTensor> &tensors);
NpuOpRunner &AddInputNames(const std::vector<std::string> &names);
NpuOpRunner &AddOutputs(const std::vector<Tensor> &tensors);
NpuOpRunner &AddOutputs(const std::vector<phi::DenseTensor> &tensors);
aclTensorDesc *GetInputDesc(size_t index);
......@@ -105,21 +104,21 @@ class NpuOpRunner {
void Run(aclrtStream stream = nullptr) const;
static void TypeAdapter(
const std::vector<Tensor> &inputs,
const std::vector<Tensor> &outputs,
const std::vector<phi::DenseTensor> &inputs,
const std::vector<phi::DenseTensor> &outputs,
const NPUAttributeMap &attrs,
const platform::NPUDeviceContext &dev_ctx,
std::function<void(const std::vector<Tensor> &,
const std::vector<Tensor> &,
std::function<void(const std::vector<phi::DenseTensor> &,
const std::vector<phi::DenseTensor> &,
const NPUAttributeMap &,
const platform::NPUDeviceContext &)> op_runner,
const std::vector<framework::proto::VarType::Type> &input_type,
const std::vector<framework::proto::VarType::Type> &output_type);
private:
aclTensorDesc *CreateTensorDesc(Tensor tensor,
aclTensorDesc *CreateTensorDesc(phi::DenseTensor tensor,
aclMemType mem_type = ACL_MEMTYPE_DEVICE);
aclDataBuffer *CreateDataBuffer(Tensor tensor);
aclDataBuffer *CreateDataBuffer(phi::DenseTensor tensor);
private:
std::string op_type_;
......@@ -127,7 +126,7 @@ class NpuOpRunner {
std::vector<aclDataBuffer *> output_buffers_;
std::vector<aclTensorDesc *> input_descs_;
std::vector<aclTensorDesc *> output_descs_;
std::vector<Tensor> host_tensors_;
std::vector<phi::DenseTensor> host_tensors_;
aclopAttr *attr_{nullptr};
};
......@@ -136,7 +135,7 @@ aclDataType ConvertToNpuDtype(framework::proto::VarType::Type dtype);
aclrtStream GetCurrentNPUStream(int device_id = -1);
template <typename T>
void FillNpuTensorWithConstant(Tensor *tensor, T val) {
void FillNpuTensorWithConstant(phi::DenseTensor *tensor, T val) {
PADDLE_ENFORCE_EQ(
tensor->IsInitialized(),
true,
......@@ -148,7 +147,7 @@ void FillNpuTensorWithConstant(Tensor *tensor, T val) {
int numel = tensor->numel();
if (numel == 1) {
Tensor npu_pinned_tensor(tensor->dtype());
phi::DenseTensor npu_pinned_tensor(tensor->dtype());
platform::NPUPinnedPlace npu_pinned_place;
auto npu_pinned_ptr =
npu_pinned_tensor.mutable_data<T>({1}, npu_pinned_place);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册