未验证 提交 4527d249 编写于 作者: 张春乔 提交者: GitHub

Remove LoDTensor and Tensor in fluid except operators folder (#48416)

* Update communicator.cc

* Update communicator.cc

* remove LoDTensor

* remove LoDTensor and Tensor
上级 d80330fe
...@@ -28,7 +28,6 @@ limitations under the License. */ ...@@ -28,7 +28,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace distributed { namespace distributed {
using LoDTensor = phi::DenseTensor;
using phi::SelectedRows; using phi::SelectedRows;
const uint32_t MAX_FEASIGN_NUM = 1024 * 100 * 100; const uint32_t MAX_FEASIGN_NUM = 1024 * 100 * 100;
...@@ -97,11 +96,11 @@ void Communicator::RpcRecvDense(const std::vector<std::string> &varnames, ...@@ -97,11 +96,11 @@ void Communicator::RpcRecvDense(const std::vector<std::string> &varnames,
regions.reserve(varnames.size()); regions.reserve(varnames.size());
for (auto &t : varnames) { for (auto &t : varnames) {
Variable *var = scope->Var(t); Variable *var = scope->Var(t);
LoDTensor *tensor = var->GetMutable<LoDTensor>(); phi::DenseTensor *tensor = var->GetMutable<phi::DenseTensor>();
if (platform::is_gpu_place(tensor->place())) { if (platform::is_gpu_place(tensor->place())) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
Variable *temp_var = xpu_temp_scope_->Var(t); Variable *temp_var = xpu_temp_scope_->Var(t);
LoDTensor *temp_tensor = temp_var->GetMutable<LoDTensor>(); phi::DenseTensor *temp_tensor = temp_var->GetMutable<phi::DenseTensor>();
temp_tensor->Resize(tensor->dims()); temp_tensor->Resize(tensor->dims());
float *temp_data = temp_tensor->mutable_data<float>(platform::CPUPlace()); float *temp_data = temp_tensor->mutable_data<float>(platform::CPUPlace());
paddle::distributed::Region reg(temp_data, tensor->numel()); paddle::distributed::Region reg(temp_data, tensor->numel());
...@@ -122,7 +121,7 @@ void Communicator::RpcRecvDense(const std::vector<std::string> &varnames, ...@@ -122,7 +121,7 @@ void Communicator::RpcRecvDense(const std::vector<std::string> &varnames,
for (auto &t : varnames) { for (auto &t : varnames) {
Variable *var = scope->FindVar(t); Variable *var = scope->FindVar(t);
LoDTensor *tensor = var->GetMutable<LoDTensor>(); phi::DenseTensor *tensor = var->GetMutable<phi::DenseTensor>();
VLOG(3) << "Communicator::RecvNoBarrier Var " << t << " On gpu? " VLOG(3) << "Communicator::RecvNoBarrier Var " << t << " On gpu? "
<< platform::is_gpu_place(tensor->place()); << platform::is_gpu_place(tensor->place());
...@@ -132,8 +131,8 @@ void Communicator::RpcRecvDense(const std::vector<std::string> &varnames, ...@@ -132,8 +131,8 @@ void Communicator::RpcRecvDense(const std::vector<std::string> &varnames,
<< " Temp_data[-1] " << temp_recv_data[tensor->numel() - 1]; << " Temp_data[-1] " << temp_recv_data[tensor->numel() - 1];
if (platform::is_gpu_place(tensor->place())) { if (platform::is_gpu_place(tensor->place())) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
LoDTensor *temp_tensor = phi::DenseTensor *temp_tensor =
xpu_temp_scope_->FindVar(t)->GetMutable<LoDTensor>(); xpu_temp_scope_->FindVar(t)->GetMutable<phi::DenseTensor>();
framework::TensorCopy(*temp_tensor, tensor->place(), tensor); framework::TensorCopy(*temp_tensor, tensor->place(), tensor);
float *temp_data = temp_tensor->mutable_data<float>(platform::CPUPlace()); float *temp_data = temp_tensor->mutable_data<float>(platform::CPUPlace());
VLOG(1) << "Communicator::RpcRecvDense Var " << t << " table_id " VLOG(1) << "Communicator::RpcRecvDense Var " << t << " table_id "
...@@ -157,11 +156,11 @@ void Communicator::RpcSendDenseParam(const std::vector<std::string> &varnames, ...@@ -157,11 +156,11 @@ void Communicator::RpcSendDenseParam(const std::vector<std::string> &varnames,
for (auto &t : varnames) { for (auto &t : varnames) {
Variable *var = scope.FindVar(t); Variable *var = scope.FindVar(t);
CHECK(var != nullptr) << "var[" << t << "] not found"; CHECK(var != nullptr) << "var[" << t << "] not found";
LoDTensor *tensor = var->GetMutable<LoDTensor>(); phi::DenseTensor *tensor = var->GetMutable<phi::DenseTensor>();
if (platform::is_gpu_place(tensor->place())) { if (platform::is_gpu_place(tensor->place())) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
Variable *temp_var = xpu_temp_scope_->Var(t); Variable *temp_var = xpu_temp_scope_->Var(t);
LoDTensor *temp_tensor = temp_var->GetMutable<LoDTensor>(); phi::DenseTensor *temp_tensor = temp_var->GetMutable<phi::DenseTensor>();
temp_tensor->Resize(tensor->dims()); temp_tensor->Resize(tensor->dims());
float *temp_data = temp_tensor->mutable_data<float>(platform::CPUPlace()); float *temp_data = temp_tensor->mutable_data<float>(platform::CPUPlace());
framework::TensorCopy(*tensor, platform::CPUPlace(), temp_tensor); framework::TensorCopy(*tensor, platform::CPUPlace(), temp_tensor);
...@@ -203,7 +202,8 @@ void Communicator::RpcSendDense(const CommContext &ctx, ...@@ -203,7 +202,8 @@ void Communicator::RpcSendDense(const CommContext &ctx,
float *data = dense_data->data(); float *data = dense_data->data();
uint32_t pos = 0; uint32_t pos = 0;
for (size_t i = 0; i < var_names.size(); ++i) { for (size_t i = 0; i < var_names.size(); ++i) {
const LoDTensor tensor = scope.FindVar(var_names[i])->Get<LoDTensor>(); const phi::DenseTensor tensor =
scope.FindVar(var_names[i])->Get<phi::DenseTensor>();
size_t count = static_cast<size_t>(tensor.numel()); size_t count = static_cast<size_t>(tensor.numel());
const float *g = tensor.data<float>(); const float *g = tensor.data<float>();
CHECK(pos + count <= dense_data->size()) CHECK(pos + count <= dense_data->size())
...@@ -472,13 +472,13 @@ void AsyncCommunicator::RecvNoBarrier() { ...@@ -472,13 +472,13 @@ void AsyncCommunicator::RecvNoBarrier() {
auto var_names = iter.second; auto var_names = iter.second;
for (auto &t : var_names) { for (auto &t : var_names) {
Variable *var = recv_scope_->FindVar(t); Variable *var = recv_scope_->FindVar(t);
LoDTensor *tensor = var->GetMutable<LoDTensor>(); phi::DenseTensor *tensor = var->GetMutable<phi::DenseTensor>();
VLOG(3) << "AsyncCommunicator::RecvNoBarrier Var " << t << " On gpu? " VLOG(3) << "AsyncCommunicator::RecvNoBarrier Var " << t << " On gpu? "
<< platform::is_gpu_place(tensor->place()); << platform::is_gpu_place(tensor->place());
if (platform::is_gpu_place(tensor->place())) { if (platform::is_gpu_place(tensor->place())) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
LoDTensor *temp_tensor = phi::DenseTensor *temp_tensor =
xpu_temp_scope_->FindVar(t)->GetMutable<LoDTensor>(); xpu_temp_scope_->FindVar(t)->GetMutable<phi::DenseTensor>();
framework::TensorCopy(*temp_tensor, tensor->place(), tensor); framework::TensorCopy(*temp_tensor, tensor->place(), tensor);
#endif #endif
} }
...@@ -591,8 +591,8 @@ void AsyncCommunicator::PullSparseToTensorSync( ...@@ -591,8 +591,8 @@ void AsyncCommunicator::PullSparseToTensorSync(
uint64_t padding_id, uint64_t padding_id,
platform::Place place, platform::Place place,
bool is_training, bool is_training,
std::vector<const LoDTensor *> *inputs, std::vector<const phi::DenseTensor *> *inputs,
std::vector<LoDTensor *> *outputs) { std::vector<phi::DenseTensor *> *outputs) {
std::vector<uint64_t> fea_keys; std::vector<uint64_t> fea_keys;
std::vector<float *> pull_result_ptr; std::vector<float *> pull_result_ptr;
fea_keys.reserve(MAX_FEASIGN_NUM / 100); fea_keys.reserve(MAX_FEASIGN_NUM / 100);
......
...@@ -25,7 +25,6 @@ limitations under the License. */ ...@@ -25,7 +25,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace distributed { namespace distributed {
using LoDTensor = phi::DenseTensor;
using framework::ProgramDesc; using framework::ProgramDesc;
using framework::VarDesc; using framework::VarDesc;
using framework::Variable; using framework::Variable;
...@@ -232,7 +231,7 @@ std::future<int32_t> FleetWrapper::PullSparseVarsAsync( ...@@ -232,7 +231,7 @@ std::future<int32_t> FleetWrapper::PullSparseVarsAsync(
if (var == nullptr) { if (var == nullptr) {
continue; continue;
} }
LoDTensor* tensor = var->GetMutable<LoDTensor>(); phi::DenseTensor* tensor = var->GetMutable<phi::DenseTensor>();
CHECK(tensor != nullptr) << "tensor of var " << name << " is null"; CHECK(tensor != nullptr) << "tensor of var " << name << " is null";
int64_t* ids = tensor->data<int64_t>(); int64_t* ids = tensor->data<int64_t>();
size_t len = tensor->numel(); size_t len = tensor->numel();
...@@ -279,7 +278,7 @@ void FleetWrapper::PullSparseVarsSync( ...@@ -279,7 +278,7 @@ void FleetWrapper::PullSparseVarsSync(
if (var == nullptr) { if (var == nullptr) {
continue; continue;
} }
LoDTensor* tensor = var->GetMutable<LoDTensor>(); phi::DenseTensor* tensor = var->GetMutable<phi::DenseTensor>();
CHECK(tensor != nullptr) << "tensor of var " << name << " is null"; CHECK(tensor != nullptr) << "tensor of var " << name << " is null";
int64_t* ids = tensor->data<int64_t>(); int64_t* ids = tensor->data<int64_t>();
size_t len = tensor->numel(); size_t len = tensor->numel();
...@@ -327,13 +326,14 @@ void FleetWrapper::PullSparseVarsSync( ...@@ -327,13 +326,14 @@ void FleetWrapper::PullSparseVarsSync(
// is_training is true means training, false means inference, the behavior is // is_training is true means training, false means inference, the behavior is
// different on pserver // different on pserver
void FleetWrapper::PullSparseToTensorSync(const uint64_t table_id, void FleetWrapper::PullSparseToTensorSync(
int fea_dim, const uint64_t table_id,
uint64_t padding_id, int fea_dim,
platform::Place place, uint64_t padding_id,
bool is_training, platform::Place place,
std::vector<const LoDTensor*>* inputs, bool is_training,
std::vector<LoDTensor*>* outputs) { std::vector<const phi::DenseTensor*>* inputs,
std::vector<phi::DenseTensor*>* outputs) {
std::vector<uint64_t> fea_keys; std::vector<uint64_t> fea_keys;
std::vector<float*> pull_result_ptr; std::vector<float*> pull_result_ptr;
fea_keys.reserve(MAX_FEASIGN_NUM / 100); fea_keys.reserve(MAX_FEASIGN_NUM / 100);
...@@ -398,7 +398,7 @@ void FleetWrapper::PullDenseVarsAsync( ...@@ -398,7 +398,7 @@ void FleetWrapper::PullDenseVarsAsync(
varname = var_names[i] + "pin"; varname = var_names[i] + "pin";
} }
Variable* var = scope.FindVar(varname); Variable* var = scope.FindVar(varname);
LoDTensor* tensor = var->GetMutable<LoDTensor>(); phi::DenseTensor* tensor = var->GetMutable<phi::DenseTensor>();
float* w = tensor->data<float>(); float* w = tensor->data<float>();
paddle::distributed::Region reg(w, tensor->numel()); paddle::distributed::Region reg(w, tensor->numel());
regions[i] = std::move(reg); regions[i] = std::move(reg);
...@@ -417,7 +417,7 @@ void FleetWrapper::PullDenseVarsSync( ...@@ -417,7 +417,7 @@ void FleetWrapper::PullDenseVarsSync(
regions.reserve(var_names.size()); regions.reserve(var_names.size());
for (auto& t : var_names) { for (auto& t : var_names) {
Variable* var = scope.FindVar(t); Variable* var = scope.FindVar(t);
LoDTensor* tensor = var->GetMutable<LoDTensor>(); phi::DenseTensor* tensor = var->GetMutable<phi::DenseTensor>();
if (!platform::is_gpu_place(tensor->place())) { if (!platform::is_gpu_place(tensor->place())) {
float* w = tensor->data<float>(); float* w = tensor->data<float>();
paddle::distributed::Region reg(w, tensor->numel()); paddle::distributed::Region reg(w, tensor->numel());
...@@ -437,7 +437,7 @@ void FleetWrapper::PushDenseParamSync( ...@@ -437,7 +437,7 @@ void FleetWrapper::PushDenseParamSync(
for (auto& t : var_names) { for (auto& t : var_names) {
Variable* var = scope.FindVar(t); Variable* var = scope.FindVar(t);
CHECK(var != nullptr) << "var[" << t << "] not found"; CHECK(var != nullptr) << "var[" << t << "] not found";
LoDTensor* tensor = var->GetMutable<LoDTensor>(); phi::DenseTensor* tensor = var->GetMutable<phi::DenseTensor>();
if (!platform::is_gpu_place(tensor->place())) { if (!platform::is_gpu_place(tensor->place())) {
float* g = tensor->mutable_data<float>(place); float* g = tensor->mutable_data<float>(place);
paddle::distributed::Region reg(g, tensor->numel()); paddle::distributed::Region reg(g, tensor->numel());
...@@ -468,7 +468,7 @@ void FleetWrapper::PushDenseVarsAsync( ...@@ -468,7 +468,7 @@ void FleetWrapper::PushDenseVarsAsync(
for (auto& t : var_names) { for (auto& t : var_names) {
Variable* var = scope.FindVar(t); Variable* var = scope.FindVar(t);
CHECK(var != nullptr) << "var[" << t << "] not found"; CHECK(var != nullptr) << "var[" << t << "] not found";
LoDTensor* tensor = var->GetMutable<LoDTensor>(); phi::DenseTensor* tensor = var->GetMutable<phi::DenseTensor>();
int count = tensor->numel(); int count = tensor->numel();
float* g = tensor->mutable_data<float>(place); float* g = tensor->mutable_data<float>(place);
// TODO(zhaocaibei123): how to get batch_size in op? // TODO(zhaocaibei123): how to get batch_size in op?
...@@ -544,8 +544,8 @@ void FleetWrapper::PushSparseFromTensorWithLabelAsync( ...@@ -544,8 +544,8 @@ void FleetWrapper::PushSparseFromTensorWithLabelAsync(
const std::string& click_name, const std::string& click_name,
platform::Place place, platform::Place place,
const std::vector<std::string>& input_names, const std::vector<std::string>& input_names,
std::vector<const LoDTensor*>* inputs, std::vector<const phi::DenseTensor*>* inputs,
std::vector<const LoDTensor*>* outputs) { std::vector<const phi::DenseTensor*>* outputs) {
// not support // not support
return; return;
} }
...@@ -555,11 +555,11 @@ void FleetWrapper::PushSparseFromTensorAsync( ...@@ -555,11 +555,11 @@ void FleetWrapper::PushSparseFromTensorAsync(
int fea_dim, int fea_dim,
uint64_t padding_id, uint64_t padding_id,
platform::Place place, platform::Place place,
std::vector<const LoDTensor*>* inputs, std::vector<const phi::DenseTensor*>* inputs,
std::vector<int>& slots, std::vector<int>& slots,
const LoDTensor* shows, const phi::DenseTensor* shows,
const LoDTensor* clks, const phi::DenseTensor* clks,
std::vector<LoDTensor*>* outputs, std::vector<phi::DenseTensor*>* outputs,
bool use_cvm_op) { bool use_cvm_op) {
CHECK(slots.size() == inputs->size()); CHECK(slots.size() == inputs->size());
int batch_size = -1; int batch_size = -1;
...@@ -777,7 +777,7 @@ void FleetWrapper::ShrinkDenseTable(int table_id, ...@@ -777,7 +777,7 @@ void FleetWrapper::ShrinkDenseTable(int table_id,
Variable* var = scope->FindVar(name); Variable* var = scope->FindVar(name);
CHECK(var != nullptr) << "var[" << name << "] not found"; CHECK(var != nullptr) << "var[" << name << "] not found";
VLOG(3) << "prepare shrink dense batch_sum"; VLOG(3) << "prepare shrink dense batch_sum";
LoDTensor* tensor = var->GetMutable<LoDTensor>(); phi::DenseTensor* tensor = var->GetMutable<phi::DenseTensor>();
float* g = tensor->data<float>(); float* g = tensor->data<float>();
// show_batch_sum += N * log(decay) // show_batch_sum += N * log(decay)
...@@ -787,7 +787,7 @@ void FleetWrapper::ShrinkDenseTable(int table_id, ...@@ -787,7 +787,7 @@ void FleetWrapper::ShrinkDenseTable(int table_id,
Variable* var_size = scope->FindVar(size_name); Variable* var_size = scope->FindVar(size_name);
CHECK(var_size != nullptr) << "var[" << size_name << "] not found"; CHECK(var_size != nullptr) << "var[" << size_name << "] not found";
VLOG(3) << "shrink dense batch_sum: " << name << ", " << size_name; VLOG(3) << "shrink dense batch_sum: " << name << ", " << size_name;
float* g_size = var_size->GetMutable<LoDTensor>()->data<float>(); float* g_size = var_size->GetMutable<phi::DenseTensor>()->data<float>();
for (int k = 0; k < tensor->numel(); k += emb_dim) { for (int k = 0; k < tensor->numel(); k += emb_dim) {
g[k] = g[k] + g_size[k] * log(decay); g[k] = g[k] + g_size[k] * log(decay);
...@@ -797,7 +797,7 @@ void FleetWrapper::ShrinkDenseTable(int table_id, ...@@ -797,7 +797,7 @@ void FleetWrapper::ShrinkDenseTable(int table_id,
} else { } else {
Variable* var = scope->FindVar(name); Variable* var = scope->FindVar(name);
CHECK(var != nullptr) << "var[" << name << "] not found"; CHECK(var != nullptr) << "var[" << name << "] not found";
LoDTensor* tensor = var->GetMutable<LoDTensor>(); phi::DenseTensor* tensor = var->GetMutable<phi::DenseTensor>();
float* g = tensor->data<float>(); float* g = tensor->data<float>();
paddle::distributed::Region reg(g, tensor->numel()); paddle::distributed::Region reg(g, tensor->numel());
regions.emplace_back(std::move(reg)); regions.emplace_back(std::move(reg));
......
...@@ -47,7 +47,6 @@ namespace distributed { ...@@ -47,7 +47,6 @@ namespace distributed {
class PSCore; class PSCore;
using LoDTensor = phi::DenseTensor;
using framework::Scope; using framework::Scope;
using framework::Variable; using framework::Variable;
using phi::SelectedRows; using phi::SelectedRows;
...@@ -111,13 +110,14 @@ class FleetWrapper { ...@@ -111,13 +110,14 @@ class FleetWrapper {
// is_training is true means training, false means inference, the behavior is // is_training is true means training, false means inference, the behavior is
// different on pserver // different on pserver
void PullSparseToTensorSync(const uint64_t table_id, void PullSparseToTensorSync(
int fea_dim, const uint64_t table_id,
uint64_t padding_id, int fea_dim,
platform::Place place, uint64_t padding_id,
bool is_training, platform::Place place,
std::vector<const LoDTensor*>* inputs, // NOLINT bool is_training,
std::vector<LoDTensor*>* outputs); // NOLINT std::vector<const phi::DenseTensor*>* inputs, // NOLINT
std::vector<phi::DenseTensor*>* outputs); // NOLINT
// pull dense variables from server in sync mod // pull dense variables from server in sync mod
// Param<in>: scope, table_id, var_names // Param<in>: scope, table_id, var_names
...@@ -188,18 +188,18 @@ class FleetWrapper { ...@@ -188,18 +188,18 @@ class FleetWrapper {
const std::string& click_name, const std::string& click_name,
platform::Place place, platform::Place place,
const std::vector<std::string>& input_names, const std::vector<std::string>& input_names,
std::vector<const LoDTensor*>* inputs, // NOLINT std::vector<const phi::DenseTensor*>* inputs, // NOLINT
std::vector<const LoDTensor*>* outputs); // NOLINT std::vector<const phi::DenseTensor*>* outputs); // NOLINT
void PushSparseFromTensorAsync(const uint64_t table_id, void PushSparseFromTensorAsync(const uint64_t table_id,
int fea_dim, int fea_dim,
uint64_t padding_id, uint64_t padding_id,
platform::Place place, platform::Place place,
std::vector<const LoDTensor*>* inputs, std::vector<const phi::DenseTensor*>* inputs,
std::vector<int>& slots, // NOLINT std::vector<int>& slots, // NOLINT
const LoDTensor* shows, const phi::DenseTensor* shows,
const LoDTensor* clicks, const phi::DenseTensor* clicks,
std::vector<LoDTensor*>* outputs, std::vector<phi::DenseTensor*>* outputs,
bool use_cvm_op = false); bool use_cvm_op = false);
// Push sparse variables to server in Async mode // Push sparse variables to server in Async mode
// Param<In>: scope, table_id, fea_keys, sparse_grad_names // Param<In>: scope, table_id, fea_keys, sparse_grad_names
......
...@@ -47,7 +47,6 @@ namespace distributed { ...@@ -47,7 +47,6 @@ namespace distributed {
class PSCore; class PSCore;
using LoDTensor = phi::DenseTensor;
using framework::Scope; using framework::Scope;
using framework::Variable; using framework::Variable;
using phi::SelectedRows; using phi::SelectedRows;
......
...@@ -35,7 +35,6 @@ ...@@ -35,7 +35,6 @@
namespace paddle { namespace paddle {
using LoDTensor = phi::DenseTensor;
using framework::Variable; using framework::Variable;
using framework::ir::Graph; using framework::ir::Graph;
using platform::CPUPlace; using platform::CPUPlace;
...@@ -48,19 +47,19 @@ using EigenMatrixArray = ...@@ -48,19 +47,19 @@ using EigenMatrixArray =
using ConstEigenMatrixArrayMap = Eigen::Map<const EigenMatrixArray>; using ConstEigenMatrixArrayMap = Eigen::Map<const EigenMatrixArray>;
using string::PrettyLogH1; using string::PrettyLogH1;
using VariableNameMap = std::map<std::string, std::vector<std::string>>; using VariableNameMap = std::map<std::string, std::vector<std::string>>;
static LoDTensor CreateScaleTensor(int64_t channels_num = 1); static phi::DenseTensor CreateScaleTensor(int64_t channels_num = 1);
static void check_var(const Variable* var, const std::string& var_name) { static void check_var(const Variable* var, const std::string& var_name) {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
var, var,
platform::errors::PreconditionNotMet("%s is not in the scope", var_name)); platform::errors::PreconditionNotMet("%s is not in the scope", var_name));
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
var->IsType<LoDTensor>(), var->IsType<phi::DenseTensor>(),
true, true,
platform::errors::PreconditionNotMet("Only support lod tensor now.")); platform::errors::PreconditionNotMet("Only support lod tensor now."));
} }
static void check_tensor(const LoDTensor& tensor) { static void check_tensor(const phi::DenseTensor& tensor) {
PADDLE_ENFORCE_GT( PADDLE_ENFORCE_GT(
tensor.dims().size(), tensor.dims().size(),
0, 0,
...@@ -78,8 +77,8 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateScalesForRNNWeights( ...@@ -78,8 +77,8 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateScalesForRNNWeights(
auto* wh_var = predictor_.sub_scope_->FindVar(wh_name); auto* wh_var = predictor_.sub_scope_->FindVar(wh_name);
check_var(wx_var, wx_name); check_var(wx_var, wx_name);
check_var(wh_var, wh_name); check_var(wh_var, wh_name);
LoDTensor* wx_tensor = wx_var->GetMutable<LoDTensor>(); phi::DenseTensor* wx_tensor = wx_var->GetMutable<phi::DenseTensor>();
LoDTensor* wh_tensor = wh_var->GetMutable<LoDTensor>(); phi::DenseTensor* wh_tensor = wh_var->GetMutable<phi::DenseTensor>();
if (gru) { if (gru) {
scales_[wx_name] = GetMaxChGRUScalingFactor(*wx_tensor, *wh_tensor); scales_[wx_name] = GetMaxChGRUScalingFactor(*wx_tensor, *wh_tensor);
} else { } else {
...@@ -101,7 +100,7 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateScalesForOpInputs( ...@@ -101,7 +100,7 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateScalesForOpInputs(
if (scales_.find(var_name) != scales_.end()) continue; if (scales_.find(var_name) != scales_.end()) continue;
auto* var = predictor_.sub_scope_->FindVar(var_name); auto* var = predictor_.sub_scope_->FindVar(var_name);
check_var(var, var_name); check_var(var, var_name);
LoDTensor* var_tensor = var->GetMutable<LoDTensor>(); phi::DenseTensor* var_tensor = var->GetMutable<phi::DenseTensor>();
// force unsigned type if already know it // force unsigned type if already know it
bool is_unsigned = false; bool is_unsigned = false;
CalculateSingleScale( CalculateSingleScale(
...@@ -118,7 +117,7 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateScalesForOpOutputs( ...@@ -118,7 +117,7 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateScalesForOpOutputs(
if (scales_.find(var_name) != scales_.end()) continue; if (scales_.find(var_name) != scales_.end()) continue;
auto* var = predictor_.sub_scope_->FindVar(var_name); auto* var = predictor_.sub_scope_->FindVar(var_name);
check_var(var, var_name); check_var(var, var_name);
LoDTensor* var_tensor = var->GetMutable<LoDTensor>(); phi::DenseTensor* var_tensor = var->GetMutable<phi::DenseTensor>();
// force unsigned type if already know it // force unsigned type if already know it
bool is_unsigned = false; bool is_unsigned = false;
bool compute_scale = true; bool compute_scale = true;
...@@ -183,7 +182,7 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateScalesForOpOutputs( ...@@ -183,7 +182,7 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateScalesForOpOutputs(
bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() { bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() {
PrettyLogH1("--- Calculating scales for quantization"); PrettyLogH1("--- Calculating scales for quantization");
std::map<std::string, std::map<std::string, LoDTensor>> gathered_data; std::map<std::string, std::map<std::string, phi::DenseTensor>> gathered_data;
for (const auto* op : predictor_.inference_program_->Block(0).AllOps()) { for (const auto* op : predictor_.inference_program_->Block(0).AllOps()) {
if (platform::HasOpINT8DataType(op)) { if (platform::HasOpINT8DataType(op)) {
// handle inputs first to let is_unsigned be inferred for the outputs // handle inputs first to let is_unsigned be inferred for the outputs
...@@ -198,20 +197,20 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateSingleScale( ...@@ -198,20 +197,20 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateSingleScale(
const std::string& op_type_name, const std::string& op_type_name,
const std::string& conn_name, const std::string& conn_name,
const std::string& var_name, const std::string& var_name,
const LoDTensor& var_tensor, const phi::DenseTensor& var_tensor,
bool is_unsigned) { bool is_unsigned) {
auto rule = qconfig_->scale_algo(op_type_name, conn_name); auto rule = qconfig_->scale_algo(op_type_name, conn_name);
if (rule == ScaleAlgo::NONE) return; if (rule == ScaleAlgo::NONE) return;
PADDLE_ENFORCE_GT( PADDLE_ENFORCE_GT(var_tensor.numel(),
var_tensor.numel(), 0,
0, platform::errors::InvalidArgument(
platform::errors::InvalidArgument( "MkldnnQuantizer: phi::DenseTensor of variable %s for "
"MkldnnQuantizer: LoDTensor of variable %s for quantization of op " "quantization of op "
"%s of connection %s should not be empty.", "%s of connection %s should not be empty.",
var_name, var_name,
op_type_name, op_type_name,
conn_name)); conn_name));
switch (rule) { switch (rule) {
case ScaleAlgo::MAX: case ScaleAlgo::MAX:
...@@ -236,8 +235,8 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateSingleScale( ...@@ -236,8 +235,8 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateSingleScale(
} }
} }
static LoDTensor CreateScaleTensor(int64_t channels_num) { static phi::DenseTensor CreateScaleTensor(int64_t channels_num) {
LoDTensor scale_tensor; phi::DenseTensor scale_tensor;
scale_tensor.Resize({channels_num}); scale_tensor.Resize({channels_num});
scale_tensor.mutable_data<double>(CPUPlace()); scale_tensor.mutable_data<double>(CPUPlace());
return scale_tensor; return scale_tensor;
...@@ -272,9 +271,9 @@ std::vector<int> AnalysisPredictor::MkldnnQuantizer::ExpandQuantizedBins( ...@@ -272,9 +271,9 @@ std::vector<int> AnalysisPredictor::MkldnnQuantizer::ExpandQuantizedBins(
return expanded_quantized_bins; return expanded_quantized_bins;
} }
std::pair<bool, LoDTensor> std::pair<bool, phi::DenseTensor>
AnalysisPredictor::MkldnnQuantizer::GetKLScalingFactor( AnalysisPredictor::MkldnnQuantizer::GetKLScalingFactor(
const LoDTensor& var_tensor, bool is_unsigned) const { const phi::DenseTensor& var_tensor, bool is_unsigned) const {
ConstEigenVectorArrayMap eigen_tensor{ ConstEigenVectorArrayMap eigen_tensor{
var_tensor.data<float>(), var_tensor.numel(), 1}; var_tensor.data<float>(), var_tensor.numel(), 1};
int precision_hist_num_bins = 2048; int precision_hist_num_bins = 2048;
...@@ -381,15 +380,15 @@ AnalysisPredictor::MkldnnQuantizer::GetKLScalingFactor( ...@@ -381,15 +380,15 @@ AnalysisPredictor::MkldnnQuantizer::GetKLScalingFactor(
min_kl_index = starting_iter; min_kl_index = starting_iter;
} }
LoDTensor scale_tensor = CreateScaleTensor(); phi::DenseTensor scale_tensor = CreateScaleTensor();
scale_tensor.data<double>()[0] = 1.0 / ((min_kl_index + 0.5) * bin_width); scale_tensor.data<double>()[0] = 1.0 / ((min_kl_index + 0.5) * bin_width);
return std::make_pair(is_unsigned, scale_tensor); return std::make_pair(is_unsigned, scale_tensor);
} }
std::pair<bool, LoDTensor> std::pair<bool, phi::DenseTensor>
AnalysisPredictor::MkldnnQuantizer::GetMaxScalingFactor( AnalysisPredictor::MkldnnQuantizer::GetMaxScalingFactor(
const LoDTensor& var_tensor, bool is_unsigned) const { const phi::DenseTensor& var_tensor, bool is_unsigned) const {
ConstEigenVectorArrayMap eigen_tensor{ ConstEigenVectorArrayMap eigen_tensor{
var_tensor.data<float>(), var_tensor.numel(), 1}; var_tensor.data<float>(), var_tensor.numel(), 1};
float max_abs = eigen_tensor.abs().maxCoeff(); float max_abs = eigen_tensor.abs().maxCoeff();
...@@ -402,15 +401,17 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxScalingFactor( ...@@ -402,15 +401,17 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxScalingFactor(
"Tensor is claimed to be unsigned, but its min value (%f) is < 0.0", "Tensor is claimed to be unsigned, but its min value (%f) is < 0.0",
min_val)); min_val));
LoDTensor scale_tensor = CreateScaleTensor(); phi::DenseTensor scale_tensor = CreateScaleTensor();
scale_tensor.data<double>()[0] = 1.0 / max_abs; scale_tensor.data<double>()[0] = 1.0 / max_abs;
return std::make_pair(is_unsigned, scale_tensor); return std::make_pair(is_unsigned, scale_tensor);
} }
std::pair<bool, LoDTensor> std::pair<bool, phi::DenseTensor>
AnalysisPredictor::MkldnnQuantizer::GetMaxChScalingFactor( AnalysisPredictor::MkldnnQuantizer::GetMaxChScalingFactor(
const LoDTensor& var_tensor, bool is_unsigned, bool is_transposed) const { const phi::DenseTensor& var_tensor,
bool is_unsigned,
bool is_transposed) const {
check_tensor(var_tensor); check_tensor(var_tensor);
ConstEigenVectorArrayMap eigen_tensor{ ConstEigenVectorArrayMap eigen_tensor{
...@@ -438,16 +439,17 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxChScalingFactor( ...@@ -438,16 +439,17 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxChScalingFactor(
} }
int output_channel_axis = is_transposed; int output_channel_axis = is_transposed;
int channels = dims[output_channel_axis]; int channels = dims[output_channel_axis];
LoDTensor scale_tensor = CreateScaleTensor(channels); phi::DenseTensor scale_tensor = CreateScaleTensor(channels);
auto* scale_ptr = scale_tensor.mutable_data<double>(CPUPlace()); auto* scale_ptr = scale_tensor.mutable_data<double>(CPUPlace());
std::copy(scales.data(), scales.data() + scales.size(), scale_ptr); std::copy(scales.data(), scales.data() + scales.size(), scale_ptr);
return std::make_pair(is_unsigned, scale_tensor); return std::make_pair(is_unsigned, scale_tensor);
} }
std::pair<bool, LoDTensor> std::pair<bool, phi::DenseTensor>
AnalysisPredictor::MkldnnQuantizer::GetMaxChGRUScalingFactor( AnalysisPredictor::MkldnnQuantizer::GetMaxChGRUScalingFactor(
const LoDTensor& wx_tensor, const LoDTensor& wh_tensor) const { const phi::DenseTensor& wx_tensor,
const phi::DenseTensor& wh_tensor) const {
check_tensor(wx_tensor); check_tensor(wx_tensor);
check_tensor(wh_tensor); check_tensor(wh_tensor);
...@@ -494,16 +496,17 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxChGRUScalingFactor( ...@@ -494,16 +496,17 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxChGRUScalingFactor(
transform(scale_ur.begin(), scale_ur.end(), scale_ur.begin(), [](float& c) { transform(scale_ur.begin(), scale_ur.end(), scale_ur.begin(), [](float& c) {
return 1 / c; return 1 / c;
}); });
LoDTensor scale_tensor = CreateScaleTensor(scale_ur.size()); phi::DenseTensor scale_tensor = CreateScaleTensor(scale_ur.size());
auto* scale_ptr = scale_tensor.mutable_data<double>(CPUPlace()); auto* scale_ptr = scale_tensor.mutable_data<double>(CPUPlace());
std::copy(scale_ur.begin(), scale_ur.end(), scale_ptr); std::copy(scale_ur.begin(), scale_ur.end(), scale_ptr);
bool is_unsigned = false; bool is_unsigned = false;
return std::make_pair(is_unsigned, scale_tensor); return std::make_pair(is_unsigned, scale_tensor);
} }
std::pair<bool, LoDTensor> std::pair<bool, phi::DenseTensor>
AnalysisPredictor::MkldnnQuantizer::GetMaxChLSTMScalingFactor( AnalysisPredictor::MkldnnQuantizer::GetMaxChLSTMScalingFactor(
const LoDTensor& wx_tensor, const LoDTensor& wh_tensor) const { const phi::DenseTensor& wx_tensor,
const phi::DenseTensor& wh_tensor) const {
check_tensor(wx_tensor); check_tensor(wx_tensor);
check_tensor(wh_tensor); check_tensor(wh_tensor);
...@@ -530,7 +533,7 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxChLSTMScalingFactor( ...@@ -530,7 +533,7 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxChLSTMScalingFactor(
transform(scale.begin(), scale.end(), scale.begin(), [](float& c) { transform(scale.begin(), scale.end(), scale.begin(), [](float& c) {
return 1 / c; return 1 / c;
}); });
LoDTensor scale_tensor = CreateScaleTensor(scale.size()); phi::DenseTensor scale_tensor = CreateScaleTensor(scale.size());
auto* scale_ptr = scale_tensor.mutable_data<double>(CPUPlace()); auto* scale_ptr = scale_tensor.mutable_data<double>(CPUPlace());
std::copy(scale.begin(), scale.end(), scale_ptr); std::copy(scale.begin(), scale.end(), scale_ptr);
bool is_unsigned = false; bool is_unsigned = false;
......
...@@ -24,8 +24,6 @@ namespace paddle { ...@@ -24,8 +24,6 @@ namespace paddle {
namespace inference { namespace inference {
namespace tensorrt { namespace tensorrt {
using LoDTensor = phi::DenseTensor;
/* /*
* Convert Input from Fluid to TensorRT Engine. * Convert Input from Fluid to TensorRT Engine.
* Convert Output from TensorRT Engine to Fluid. * Convert Output from TensorRT Engine to Fluid.
...@@ -38,13 +36,17 @@ class EngineIOConverter { ...@@ -38,13 +36,17 @@ class EngineIOConverter {
public: public:
EngineIOConverter() {} EngineIOConverter() {}
virtual void operator()(const LoDTensor& in, void* out, size_t max_size) {} virtual void operator()(const phi::DenseTensor& in,
virtual void operator()(const void* in, LoDTensor* out, size_t max_size) {} void* out,
size_t max_size) {}
virtual void operator()(const void* in,
phi::DenseTensor* out,
size_t max_size) {}
void SetStream(cudaStream_t* stream) { stream_ = stream; } void SetStream(cudaStream_t* stream) { stream_ = stream; }
static void ConvertInput(const std::string& op_type, static void ConvertInput(const std::string& op_type,
const LoDTensor& in, const phi::DenseTensor& in,
void* out, void* out,
size_t max_size, size_t max_size,
cudaStream_t* stream) { cudaStream_t* stream) {
...@@ -63,7 +65,7 @@ class EngineIOConverter { ...@@ -63,7 +65,7 @@ class EngineIOConverter {
static void ConvertOutput(const std::string& op_type, static void ConvertOutput(const std::string& op_type,
const void* in, const void* in,
LoDTensor* out, phi::DenseTensor* out,
size_t max_size, size_t max_size,
cudaStream_t* stream) { cudaStream_t* stream) {
PADDLE_ENFORCE_NOT_NULL(stream, PADDLE_ENFORCE_NOT_NULL(stream,
......
...@@ -30,8 +30,6 @@ limitations under the License. */ ...@@ -30,8 +30,6 @@ limitations under the License. */
#include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/float16.h"
using float16 = paddle::platform::float16; using float16 = paddle::platform::float16;
using Tensor = phi::DenseTensor;
using LoDTensor = phi::DenseTensor;
using Scope = paddle::framework::Scope; using Scope = paddle::framework::Scope;
using OpDesc = paddle::framework::OpDesc; using OpDesc = paddle::framework::OpDesc;
using Graph = paddle::framework::ir::Graph; using Graph = paddle::framework::ir::Graph;
......
...@@ -28,7 +28,6 @@ limitations under the License. */ ...@@ -28,7 +28,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
using DataLayout = phi::DataLayout; using DataLayout = phi::DataLayout;
using NPUAttribute = framework::NPUAttribute; using NPUAttribute = framework::NPUAttribute;
using NPUAttributeMap = framework::NPUAttributeMap; using NPUAttributeMap = framework::NPUAttributeMap;
...@@ -39,8 +38,8 @@ class NpuOpRunner { ...@@ -39,8 +38,8 @@ class NpuOpRunner {
NpuOpRunner(); NpuOpRunner();
explicit NpuOpRunner(const std::string &op_type); explicit NpuOpRunner(const std::string &op_type);
NpuOpRunner(const std::string &op_type, NpuOpRunner(const std::string &op_type,
const std::vector<Tensor> &inputs = {}, const std::vector<phi::DenseTensor> &inputs = {},
const std::vector<Tensor> &outputs = {}, const std::vector<phi::DenseTensor> &outputs = {},
const NPUAttributeMap &attrs = {}); const NPUAttributeMap &attrs = {});
// NOTE(zhiqiu): why forbid copy and operator= ? // NOTE(zhiqiu): why forbid copy and operator= ?
...@@ -67,12 +66,12 @@ class NpuOpRunner { ...@@ -67,12 +66,12 @@ class NpuOpRunner {
NpuOpRunner &AddAttrs(const NPUAttributeMap &attrs); NpuOpRunner &AddAttrs(const NPUAttributeMap &attrs);
NpuOpRunner &AddInput(const Tensor &tensor); NpuOpRunner &AddInput(const phi::DenseTensor &tensor);
// NOTE(zhiqiu): CANN-5.0.2 support input tensors on host. // NOTE(zhiqiu): CANN-5.0.2 support input tensors on host.
// Specifically, the tensor of shape, tensor of dims, etc, which are small // Specifically, the tensor of shape, tensor of dims, etc, which are small
// vector/list. // vector/list.
NpuOpRunner &AddInput(const Tensor &tensor, aclMemType mem_type); NpuOpRunner &AddInput(const phi::DenseTensor &tensor, aclMemType mem_type);
NpuOpRunner &AddInput(std::vector<int32_t> &&dims); NpuOpRunner &AddInput(std::vector<int32_t> &&dims);
...@@ -82,13 +81,13 @@ class NpuOpRunner { ...@@ -82,13 +81,13 @@ class NpuOpRunner {
NpuOpRunner &AddInput(std::vector<double> &&values); NpuOpRunner &AddInput(std::vector<double> &&values);
NpuOpRunner &AddOutput(const Tensor &tensor); NpuOpRunner &AddOutput(const phi::DenseTensor &tensor);
NpuOpRunner &AddInputs(const std::vector<Tensor> &tensors); NpuOpRunner &AddInputs(const std::vector<phi::DenseTensor> &tensors);
NpuOpRunner &AddInputNames(const std::vector<std::string> &names); NpuOpRunner &AddInputNames(const std::vector<std::string> &names);
NpuOpRunner &AddOutputs(const std::vector<Tensor> &tensors); NpuOpRunner &AddOutputs(const std::vector<phi::DenseTensor> &tensors);
aclTensorDesc *GetInputDesc(size_t index); aclTensorDesc *GetInputDesc(size_t index);
...@@ -105,21 +104,21 @@ class NpuOpRunner { ...@@ -105,21 +104,21 @@ class NpuOpRunner {
void Run(aclrtStream stream = nullptr) const; void Run(aclrtStream stream = nullptr) const;
static void TypeAdapter( static void TypeAdapter(
const std::vector<Tensor> &inputs, const std::vector<phi::DenseTensor> &inputs,
const std::vector<Tensor> &outputs, const std::vector<phi::DenseTensor> &outputs,
const NPUAttributeMap &attrs, const NPUAttributeMap &attrs,
const platform::NPUDeviceContext &dev_ctx, const platform::NPUDeviceContext &dev_ctx,
std::function<void(const std::vector<Tensor> &, std::function<void(const std::vector<phi::DenseTensor> &,
const std::vector<Tensor> &, const std::vector<phi::DenseTensor> &,
const NPUAttributeMap &, const NPUAttributeMap &,
const platform::NPUDeviceContext &)> op_runner, const platform::NPUDeviceContext &)> op_runner,
const std::vector<framework::proto::VarType::Type> &input_type, const std::vector<framework::proto::VarType::Type> &input_type,
const std::vector<framework::proto::VarType::Type> &output_type); const std::vector<framework::proto::VarType::Type> &output_type);
private: private:
aclTensorDesc *CreateTensorDesc(Tensor tensor, aclTensorDesc *CreateTensorDesc(phi::DenseTensor tensor,
aclMemType mem_type = ACL_MEMTYPE_DEVICE); aclMemType mem_type = ACL_MEMTYPE_DEVICE);
aclDataBuffer *CreateDataBuffer(Tensor tensor); aclDataBuffer *CreateDataBuffer(phi::DenseTensor tensor);
private: private:
std::string op_type_; std::string op_type_;
...@@ -127,7 +126,7 @@ class NpuOpRunner { ...@@ -127,7 +126,7 @@ class NpuOpRunner {
std::vector<aclDataBuffer *> output_buffers_; std::vector<aclDataBuffer *> output_buffers_;
std::vector<aclTensorDesc *> input_descs_; std::vector<aclTensorDesc *> input_descs_;
std::vector<aclTensorDesc *> output_descs_; std::vector<aclTensorDesc *> output_descs_;
std::vector<Tensor> host_tensors_; std::vector<phi::DenseTensor> host_tensors_;
aclopAttr *attr_{nullptr}; aclopAttr *attr_{nullptr};
}; };
...@@ -136,7 +135,7 @@ aclDataType ConvertToNpuDtype(framework::proto::VarType::Type dtype); ...@@ -136,7 +135,7 @@ aclDataType ConvertToNpuDtype(framework::proto::VarType::Type dtype);
aclrtStream GetCurrentNPUStream(int device_id = -1); aclrtStream GetCurrentNPUStream(int device_id = -1);
template <typename T> template <typename T>
void FillNpuTensorWithConstant(Tensor *tensor, T val) { void FillNpuTensorWithConstant(phi::DenseTensor *tensor, T val) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
tensor->IsInitialized(), tensor->IsInitialized(),
true, true,
...@@ -148,7 +147,7 @@ void FillNpuTensorWithConstant(Tensor *tensor, T val) { ...@@ -148,7 +147,7 @@ void FillNpuTensorWithConstant(Tensor *tensor, T val) {
int numel = tensor->numel(); int numel = tensor->numel();
if (numel == 1) { if (numel == 1) {
Tensor npu_pinned_tensor(tensor->dtype()); phi::DenseTensor npu_pinned_tensor(tensor->dtype());
platform::NPUPinnedPlace npu_pinned_place; platform::NPUPinnedPlace npu_pinned_place;
auto npu_pinned_ptr = auto npu_pinned_ptr =
npu_pinned_tensor.mutable_data<T>({1}, npu_pinned_place); npu_pinned_tensor.mutable_data<T>({1}, npu_pinned_place);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册