未验证 提交 8784ec65 编写于 作者: Z zyfncg 提交者: GitHub

【PTen】Remove code of converting Tensor to DensoeTensor (#38926)

* remove MakePtenTensor in BuildKernelContext

* fix a bug caused by storage

* remove WriteBackOutput in dynamic and static mode

* fix complie error of std::max

* fix complie error of std::max

* fix date_type bug

* fix memory alloc bug

* add some debug info

* fix compile problem

* fix problem of data_type check

* comment out some unreached code
上级 90e9233a
...@@ -1192,9 +1192,11 @@ void OperatorWithKernel::RunImpl(const Scope& scope, ...@@ -1192,9 +1192,11 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
platform::EventRole::kInnerOp); platform::EventRole::kInnerOp);
if (run_pten_kernel_) { if (run_pten_kernel_) {
pten::KernelContext pt_kernel_context; pten::KernelContext pt_kernel_context;
// Do data transform before building KernelContext
PreparePtenData(exec_scope, *pt_kernel_, *pt_kernel_signature_,
runtime_ctx);
BuildPtenKernelContext(*runtime_ctx, dev_ctx, &pt_kernel_context); BuildPtenKernelContext(*runtime_ctx, dev_ctx, &pt_kernel_context);
(*pt_kernel_)(&pt_kernel_context); (*pt_kernel_)(&pt_kernel_context);
WriteBackToOutputs(runtime_ctx, &pt_kernel_context);
} else { } else {
(*kernel_func_)( (*kernel_func_)(
ExecutionContext(*this, exec_scope, *dev_ctx, *runtime_ctx)); ExecutionContext(*this, exec_scope, *dev_ctx, *runtime_ctx));
...@@ -1786,6 +1788,62 @@ KernelSignature OperatorWithKernel::GetExpectedPtenKernelArgs( ...@@ -1786,6 +1788,62 @@ KernelSignature OperatorWithKernel::GetExpectedPtenKernelArgs(
pten::TransToPtenKernelName(Type())); pten::TransToPtenKernelName(Type()));
} }
Scope* OperatorWithKernel::PreparePtenData(
const Scope& scope, const pten::Kernel& pt_kernel,
const KernelSignature& pt_kernel_signature, RuntimeContext* ctx) const {
auto& input_names = std::get<0>(pt_kernel_signature.args);
auto input_defs = pt_kernel.args_def().input_defs();
PADDLE_ENFORCE_EQ(input_names.size(), input_defs.size(),
platform::errors::InvalidArgument(
"The size of inputs_args names (%d) must be equal to "
"the size of kernel input_defs (%d).",
input_names.size(), input_defs.size()));
Scope* new_scope = nullptr;
for (size_t i = 0; i < input_defs.size(); ++i) {
auto& in_def = input_defs.at(i);
auto& ins_vector = ctx->inputs.at(input_names[i]);
for (size_t offset = 0; offset < ins_vector.size(); ++offset) {
// Only tensor can be tranfer to another device.
auto* var = ins_vector[offset];
if (var == nullptr || !VarIsTensor(*var)) {
continue;
}
auto* tensor_in = GetLoDTensorOrSelectedRowsValueFromVar(*var);
if (!tensor_in->IsInitialized()) {
continue;
}
auto expected_place = pten::TransToFluidPlace(in_def.backend);
if (platform::is_same_place(tensor_in->place(), expected_place)) {
continue;
}
// TODO(zyfncg): Now there is no kernel which need to transform input
// data, so we commented out following code temporarily,
// and it will be used in the future.
// VLOG(3) << "PTen Transform Variable " << input_names[i] << " from "
// << tensor_in->place() << " to " << expected_place;
// if (!new_scope) {
// new_scope = &scope.NewScope();
// }
// // Create new var with the same name in transfer scopes
// auto* trans_var = new_scope->Var(input_names[i]);
// ins_vector[i] = trans_var;
// // Do transfer
// Tensor out;
// framework::TensorCopySync(*tensor_in, expected_place, &out);
// SetTensorToVariable(*var, out, trans_var);
}
}
return new_scope;
}
void OperatorWithKernel::BuildPtenKernelContext( void OperatorWithKernel::BuildPtenKernelContext(
const RuntimeContext& ctx, platform::DeviceContext* dev_ctx, const RuntimeContext& ctx, platform::DeviceContext* dev_ctx,
pten::KernelContext* pt_kernel_context) const { pten::KernelContext* pt_kernel_context) const {
...@@ -1818,7 +1876,6 @@ void OperatorWithKernel::BuildPtenKernelContext( ...@@ -1818,7 +1876,6 @@ void OperatorWithKernel::BuildPtenKernelContext(
attr_names.size(), attr_defs.size())); attr_names.size(), attr_defs.size()));
for (size_t i = 0; i < input_names.size(); ++i) { for (size_t i = 0; i < input_names.size(); ++i) {
auto& in_def = input_defs.at(i);
auto& ins_vector = ctx.inputs.at(input_names[i]); auto& ins_vector = ctx.inputs.at(input_names[i]);
// calcute the start and end index of the input tensors // calcute the start and end index of the input tensors
...@@ -1827,14 +1884,22 @@ void OperatorWithKernel::BuildPtenKernelContext( ...@@ -1827,14 +1884,22 @@ void OperatorWithKernel::BuildPtenKernelContext(
size_t end_idx = start_idx + ins_vector.size(); size_t end_idx = start_idx + ins_vector.size();
for (size_t offset = 0; offset < ins_vector.size(); ++offset) { for (size_t offset = 0; offset < ins_vector.size(); ++offset) {
pt_kernel_context->EmplaceBackInputWithoutSetRange( const framework::Tensor* tensor_in = nullptr;
experimental::MakePtenTensorBaseFromVar(*ins_vector[offset], in_def)); auto* var = ins_vector[offset];
if (var->IsType<framework::LoDTensor>()) {
tensor_in = &(var->Get<framework::LoDTensor>());
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported input `%s` type when call pt kernel.",
framework::ToTypeName(var->Type())));
} // TODO(zyfncg): Add support for SelectedRows
pt_kernel_context->EmplaceBackInputWithoutSetRange(tensor_in);
} }
pt_kernel_context->AssignInputRange(std::make_pair(start_idx, end_idx), i); pt_kernel_context->AssignInputRange(std::make_pair(start_idx, end_idx), i);
} }
for (size_t i = 0; i < output_names.size(); ++i) { for (size_t i = 0; i < output_names.size(); ++i) {
auto& out_def = output_defs.at(i);
auto& outs_vector = ctx.outputs.at(output_names[i]); auto& outs_vector = ctx.outputs.at(output_names[i]);
size_t start_idx = size_t start_idx =
...@@ -1842,9 +1907,21 @@ void OperatorWithKernel::BuildPtenKernelContext( ...@@ -1842,9 +1907,21 @@ void OperatorWithKernel::BuildPtenKernelContext(
size_t end_idx = start_idx + outs_vector.size(); size_t end_idx = start_idx + outs_vector.size();
for (size_t offset = 0; offset < outs_vector.size(); ++offset) { for (size_t offset = 0; offset < outs_vector.size(); ++offset) {
pt_kernel_context->EmplaceBackOutputWithoutSetRange( framework::Tensor* tensor_out = nullptr;
experimental::MakePtenTensorBaseFromVar(outs_vector[offset], auto* var = outs_vector[offset];
out_def)); if (var->template IsType<framework::LoDTensor>()) {
tensor_out = var->template GetMutable<framework::LoDTensor>();
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported output `%s` type when call pt kernel.",
framework::ToTypeName(var->Type())));
} // TODO(zyfncg): Add support for SelectedRows
experimental::ResetTensorByArgDef(tensor_out, output_defs.at(i));
SetAllocationForOutputTenosr(
tensor_out, pten::TransToFluidPlace(output_defs.at(i).backend));
pt_kernel_context->EmplaceBackOutputWithoutSetRange(tensor_out);
} }
// Deal with the case that some outputs are NULL when run the kernel. // Deal with the case that some outputs are NULL when run the kernel.
......
...@@ -588,6 +588,14 @@ class OperatorWithKernel : public OperatorBase { ...@@ -588,6 +588,14 @@ class OperatorWithKernel : public OperatorBase {
/* member functions for adapting to pten lib */ /* member functions for adapting to pten lib */
void ChoosePtenKernel(const ExecutionContext& ctx) const; void ChoosePtenKernel(const ExecutionContext& ctx) const;
/**
* Transfer data place for pten kernel
* Is this really needed?
*/
Scope* PreparePtenData(const Scope& scope, const pten::Kernel& pt_kernel,
const KernelSignature& pt_kernel_signature,
RuntimeContext* ctx) const;
void BuildPtenKernelContext(const RuntimeContext& ctx, void BuildPtenKernelContext(const RuntimeContext& ctx,
platform::DeviceContext* dev_ctx, platform::DeviceContext* dev_ctx,
pten::KernelContext* pt_kernel_context) const; pten::KernelContext* pt_kernel_context) const;
......
...@@ -137,17 +137,17 @@ KernelArgsNameMakerByOpProto::GetInputArgsNames() { ...@@ -137,17 +137,17 @@ KernelArgsNameMakerByOpProto::GetInputArgsNames() {
auto& in = op_proto_->inputs()[i]; auto& in = op_proto_->inputs()[i];
auto& in_name = in.name(); auto& in_name = in.name();
if ((in.has_extra() && in.extra()) || (in.has_quant() && in.quant())) { if ((in.has_extra() && in.extra()) || (in.has_quant() && in.quant())) {
VLOG(3) << "Parse PtenKernel input: skip extra & quant input - " VLOG(6) << "Parse PtenKernel input: skip extra & quant input - "
<< in_name; << in_name;
continue; continue;
} }
// If contains dispensable input, we should override the // If contains dispensable input, we should override the
// GetExpectedPtenKernelArgs method self // GetExpectedPtenKernelArgs method self
if (in.has_dispensable() && in.dispensable()) { if (in.has_dispensable() && in.dispensable()) {
VLOG(3) << "Parse PtenKernel input: skip dispensable input - " << in_name; VLOG(6) << "Parse PtenKernel input: skip dispensable input - " << in_name;
continue; continue;
} }
VLOG(3) << "Parse PtenKernel input: " << in_name; VLOG(6) << "Parse PtenKernel input: " << in_name;
input_names_.emplace_back(in_name); input_names_.emplace_back(in_name);
} }
return input_names_; return input_names_;
...@@ -159,7 +159,7 @@ KernelArgsNameMakerByOpProto::GetOutputArgsNames() { ...@@ -159,7 +159,7 @@ KernelArgsNameMakerByOpProto::GetOutputArgsNames() {
auto& out = op_proto_->outputs()[i]; auto& out = op_proto_->outputs()[i];
auto& out_name = out.name(); auto& out_name = out.name();
// TODO(chenweihang): outputs also need skip some cases // TODO(chenweihang): outputs also need skip some cases
VLOG(3) << "Parse PtenKernel output: " << out_name; VLOG(6) << "Parse PtenKernel output: " << out_name;
output_names_.emplace_back(out_name); output_names_.emplace_back(out_name);
} }
return output_names_; return output_names_;
...@@ -173,17 +173,17 @@ KernelArgsNameMakerByOpProto::GetAttrsArgsNames() { ...@@ -173,17 +173,17 @@ KernelArgsNameMakerByOpProto::GetAttrsArgsNames() {
if (attr_name == "use_mkldnn" || attr_name == "op_role" || if (attr_name == "use_mkldnn" || attr_name == "op_role" ||
attr_name == "op_role_var" || attr_name == "op_namescope" || attr_name == "op_role_var" || attr_name == "op_namescope" ||
attr_name == "op_callstack" || attr_name == "op_device") { attr_name == "op_callstack" || attr_name == "op_device") {
VLOG(3) << "Parse PtenKernel attribute: skip needless attr - " VLOG(6) << "Parse PtenKernel attribute: skip needless attr - "
<< attr_name; << attr_name;
continue; continue;
} }
if ((attr.has_extra() && attr.extra()) || if ((attr.has_extra() && attr.extra()) ||
(attr.has_quant() && attr.quant())) { (attr.has_quant() && attr.quant())) {
VLOG(3) << "Parse PtenKernel attribute: skip extra & quant attr - " VLOG(6) << "Parse PtenKernel attribute: skip extra & quant attr - "
<< attr_name; << attr_name;
continue; continue;
} }
VLOG(3) << "Parse PtenKernel attribute: " << attr_name; VLOG(6) << "Parse PtenKernel attribute: " << attr_name;
attr_names_.emplace_back(attr_name); attr_names_.emplace_back(attr_name);
} }
...@@ -196,5 +196,23 @@ KernelSignature KernelArgsNameMakerByOpProto::GetKernelSignature() { ...@@ -196,5 +196,23 @@ KernelSignature KernelArgsNameMakerByOpProto::GetKernelSignature() {
GetOutputArgsNames()); GetOutputArgsNames());
} }
void SetAllocationForOutputTenosr(pten::DenseTensor* tensor,
const platform::Place& place) {
if (!tensor->IsInitialized() || !(tensor->place() == place)) {
int dtype_size = tensor->dtype() == DataType::UNDEFINED
? 0
: experimental::SizeOf(tensor->dtype());
int64_t numels = product(tensor->dims());
numels = numels < 0 ? 0 : numels;
auto tmp_allocation_ptr = memory::Alloc(place, numels * dtype_size);
auto& deleter = tmp_allocation_ptr.get_deleter();
auto* allocation_ptr = tmp_allocation_ptr.release();
auto shared_allocation =
std::shared_ptr<pten::Allocation>(allocation_ptr, deleter);
tensor->ResetHolder(shared_allocation);
}
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -72,5 +72,8 @@ class KernelArgsNameMaker { ...@@ -72,5 +72,8 @@ class KernelArgsNameMaker {
virtual const paddle::SmallVector<std::string>& GetAttrsArgsNames() = 0; virtual const paddle::SmallVector<std::string>& GetAttrsArgsNames() = 0;
}; };
void SetAllocationForOutputTenosr(pten::DenseTensor* tensor,
const platform::Place& place);
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -258,6 +258,49 @@ PreparedOp PreparedOp::Prepare(const NameVarMap<VariableWrapper>& ins, ...@@ -258,6 +258,49 @@ PreparedOp PreparedOp::Prepare(const NameVarMap<VariableWrapper>& ins,
default_attrs); default_attrs);
} }
template <typename VarType>
void PreparePtenData(const pten::Kernel& pt_kernel,
const framework::KernelSignature& pt_kernel_signature,
const NameVarMap<VarType>& ins) {
auto& input_names = std::get<0>(pt_kernel_signature.args);
auto& input_defs = pt_kernel.args_def().input_defs();
PADDLE_ENFORCE_EQ(input_names.size(), input_defs.size(),
platform::errors::InvalidArgument(
"the size of inputs_args names (%d) must be equal to "
"the size of kernel input_defs (%d).",
input_names.size(), input_defs.size()));
for (size_t i = 0; i < input_names.size(); ++i) {
auto& in_def = input_defs.at(i);
auto& ins_vector = ins.at(input_names[i]);
for (size_t offset = 0; offset < ins_vector.size(); ++offset) {
auto var_base = ins_vector[offset];
const auto* tensor_in = GetTensorFromVar(var_base->Var());
if (tensor_in && tensor_in->IsInitialized()) {
auto expected_place = pten::TransToFluidPlace(in_def.backend);
if (platform::is_same_place(tensor_in->place(), expected_place)) {
continue;
}
// TODO(zyfncg): Now there is no kernel which need to transform input
// data, so we commented out following code temporarily,
// and it will be used in the future.
// VLOG(3) << "Pten Transform Variable " << var_base->Name() << " from "
// << tensor_in->place() << " to " << expected_place;
// framework::Tensor tmp_tensor;
// framework::TensorCopySync(*tensor_in, expected_place, &tmp_tensor);
// SetTensorToVariable(var_base->Var(), tmp_tensor,
// var_base->MutableVar());
}
}
}
}
template <typename VarType> template <typename VarType>
static void BuildDygraphPtenKernelContext( static void BuildDygraphPtenKernelContext(
const framework::KernelSignature& pt_kernel_signature, const framework::KernelSignature& pt_kernel_signature,
...@@ -294,23 +337,19 @@ static void BuildDygraphPtenKernelContext( ...@@ -294,23 +337,19 @@ static void BuildDygraphPtenKernelContext(
attr_names.size(), attr_defs.size())); attr_names.size(), attr_defs.size()));
for (size_t i = 0; i < input_names.size(); ++i) { for (size_t i = 0; i < input_names.size(); ++i) {
auto& in_def = input_defs.at(i);
auto& ins_vector = ins.at(input_names[i]); auto& ins_vector = ins.at(input_names[i]);
size_t start_idx = (i == 0 ? 0 : kernel_ctx->InputRangeAt(i - 1).second); size_t start_idx = (i == 0 ? 0 : kernel_ctx->InputRangeAt(i - 1).second);
size_t end_idx = start_idx + ins_vector.size(); size_t end_idx = start_idx + ins_vector.size();
for (size_t offset = 0; offset < ins_vector.size(); ++offset) { for (size_t offset = 0; offset < ins_vector.size(); ++offset) {
const auto& variable = ins_vector[offset]->Var(); const auto* tensor_in = GetTensorFromVar(ins_vector[offset]->Var());
kernel_ctx->EmplaceBackInputWithoutSetRange( kernel_ctx->EmplaceBackInputWithoutSetRange(tensor_in);
paddle::experimental::MakePtenTensorBaseFromVar(variable, in_def));
} }
kernel_ctx->AssignInputRange(std::make_pair(start_idx, end_idx), i); kernel_ctx->AssignInputRange(std::make_pair(start_idx, end_idx), i);
} }
for (size_t i = 0; i < output_names.size(); ++i) { for (size_t i = 0; i < output_names.size(); ++i) {
auto& out_def = output_defs.at(i);
size_t start_idx = (i == 0 ? 0 : kernel_ctx->OutputRangeAt(i - 1).second); size_t start_idx = (i == 0 ? 0 : kernel_ctx->OutputRangeAt(i - 1).second);
auto iter = outs.find(output_names[i]); auto iter = outs.find(output_names[i]);
...@@ -325,9 +364,21 @@ static void BuildDygraphPtenKernelContext( ...@@ -325,9 +364,21 @@ static void BuildDygraphPtenKernelContext(
size_t end_idx = start_idx + outs_vector.size(); size_t end_idx = start_idx + outs_vector.size();
for (size_t offset = 0; offset < outs_vector.size(); ++offset) { for (size_t offset = 0; offset < outs_vector.size(); ++offset) {
kernel_ctx->EmplaceBackOutputWithoutSetRange( auto* var = outs_vector[offset]->MutableVar();
paddle::experimental::MakePtenTensorBaseFromVar( framework::Tensor* tensor_out = nullptr;
outs_vector[offset]->MutableVar(), out_def)); if (var->template IsType<framework::LoDTensor>()) {
tensor_out = var->template GetMutable<framework::LoDTensor>();
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported output `%s` type when call pt kernel.",
framework::ToTypeName(var->Type())));
} // TODO(zyfncg): Add support for SelectedRows
experimental::ResetTensorByArgDef(tensor_out, output_defs.at(i));
framework::SetAllocationForOutputTenosr(
tensor_out, pten::TransToFluidPlace(output_defs.at(i).backend));
kernel_ctx->EmplaceBackOutputWithoutSetRange(tensor_out);
} }
kernel_ctx->AssignOutputRange(std::make_pair(start_idx, end_idx), i); kernel_ctx->AssignOutputRange(std::make_pair(start_idx, end_idx), i);
} }
...@@ -430,29 +481,6 @@ static void BuildDygraphPtenKernelContext( ...@@ -430,29 +481,6 @@ static void BuildDygraphPtenKernelContext(
} }
} }
template <typename VarType>
static void WriteBackToOutputs(
const framework::KernelSignature& pt_kernel_signature,
const NameVarMap<VarType>& outs, pten::KernelContext* kernel_ctx) {
auto& output_names = std::get<2>(pt_kernel_signature.args);
for (size_t i = 0; i < output_names.size(); ++i) {
auto iter = outs.find(output_names[i]);
if (iter != outs.end()) {
auto& outs_vector = iter->second;
auto& range_pair = kernel_ctx->OutputRangeAt(i);
auto pten_outs = kernel_ctx->MutableOutputBetween<pten::DenseTensor>(
range_pair.first, range_pair.second);
for (size_t j = 0; j < pten_outs.size(); ++j) {
experimental::MakeVariableFromPtenTensor(pten_outs[j],
outs_vector[j]->MutableVar());
}
}
}
}
template <typename VarType> template <typename VarType>
static void PreparedOpRunImpl( static void PreparedOpRunImpl(
const framework::OperatorBase& op, const framework::RuntimeContext& ctx, const framework::OperatorBase& op, const framework::RuntimeContext& ctx,
...@@ -514,6 +542,8 @@ static void PreparedOpRunPtImpl( ...@@ -514,6 +542,8 @@ static void PreparedOpRunPtImpl(
&ins, &outs, &attrs, &default_attrs, op.Type(), &kernel_type); &ins, &outs, &attrs, &default_attrs, op.Type(), &kernel_type);
op.Info().infer_shape_(&infer_shape_ctx); op.Info().infer_shape_(&infer_shape_ctx);
PreparePtenData<VarType>(pt_kernel, pt_kernel_signature, ins);
pten::KernelContext pt_kernel_context; pten::KernelContext pt_kernel_context;
BuildDygraphPtenKernelContext<VarType>(pt_kernel_signature, pt_kernel, ins, BuildDygraphPtenKernelContext<VarType>(pt_kernel_signature, pt_kernel, ins,
outs, attrs, default_attrs, dev_ctx, outs, attrs, default_attrs, dev_ctx,
...@@ -529,8 +559,6 @@ static void PreparedOpRunPtImpl( ...@@ -529,8 +559,6 @@ static void PreparedOpRunPtImpl(
#endif #endif
} }
WriteBackToOutputs<VarType>(pt_kernel_signature, outs, &pt_kernel_context);
// TODO(chenweihang): add debug flags later // TODO(chenweihang): add debug flags later
if (framework::IsComplexType(kernel_type.data_type_)) { if (framework::IsComplexType(kernel_type.data_type_)) {
HandleComplexGradToRealGrad<VarType>(outs); HandleComplexGradToRealGrad<VarType>(outs);
......
...@@ -54,7 +54,7 @@ PADDLE_API Tensor copy_to(const Tensor& x, Backend backend, bool blocking) { ...@@ -54,7 +54,7 @@ PADDLE_API Tensor copy_to(const Tensor& x, Backend backend, bool blocking) {
// 3. Auto data transform // 3. Auto data transform
auto dense_x = std::dynamic_pointer_cast<pten::DenseTensor>(x.impl()); auto dense_x = std::dynamic_pointer_cast<pten::DenseTensor>(x.impl());
kernel_context.EmplaceBackInput(dense_x); kernel_context.EmplaceBackInput(dense_x.get());
kernel_context.EmplaceBackAttr(blocking); kernel_context.EmplaceBackAttr(blocking);
// 4. InferMeta // 4. InferMeta
...@@ -65,7 +65,7 @@ PADDLE_API Tensor copy_to(const Tensor& x, Backend backend, bool blocking) { ...@@ -65,7 +65,7 @@ PADDLE_API Tensor copy_to(const Tensor& x, Backend backend, bool blocking) {
pten::make_intrusive<paddle::experimental::SharedStorage>( pten::make_intrusive<paddle::experimental::SharedStorage>(
pten::TransToFluidPlace(backend)), pten::TransToFluidPlace(backend)),
std::move(out_meta)); std::move(out_meta));
kernel_context.EmplaceBackOutput(dense_out); kernel_context.EmplaceBackOutput(dense_out.get());
Tensor out; Tensor out;
out.set_impl(dense_out); out.set_impl(dense_out);
......
...@@ -355,98 +355,6 @@ void ReMakePtenDenseTensor(const paddle::framework::Tensor& src, ...@@ -355,98 +355,6 @@ void ReMakePtenDenseTensor(const paddle::framework::Tensor& src,
dst); dst);
} }
void ReMakePtenDenseTensorByArgDefBase(const paddle::framework::Tensor& src,
const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst) {
VLOG(3) << "ReMakePtenDenseTensor based Tensor and TensorArgDef.";
auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst);
meta->dims = src.dims();
meta->dtype = arg_def.dtype;
meta->layout = src.layout();
meta->offset = src.offset();
if (src.IsInitialized() &&
src.place() == pten::TransToFluidPlace(arg_def.backend)) {
dst->ResetHolder(src.Holder());
} else {
// This does not affect the correctness, and will be modified immediately.
// dst->mutable_data(pten::TransToFluidPlace(arg_def.backend));
}
}
void ReMakePtenDenseTensorByArgDef(const paddle::framework::Tensor& src,
const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst) {
auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst);
SetLoD(&meta->lod, src.lod());
ReMakePtenDenseTensorByArgDefBase(
static_cast<const paddle::framework::Tensor&>(src), arg_def, dst);
}
void ReMakePtenDenseTensorFromVar(const framework::Variable& variable,
const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst) {
auto expected_place = pten::TransToFluidPlace(arg_def.backend);
if (variable.IsType<framework::LoDTensor>()) {
const auto& tensor = variable.Get<framework::LoDTensor>();
// check input dtype before ReMakePtenDenseTensor
PADDLE_ENFORCE(
(arg_def.dtype == pten::TransToPtenDataType(tensor.type())),
paddle::platform::errors::InvalidArgument(
"The type of input data is diffrent from the type of the "
"argument's definition in kernel."));
if (!platform::is_same_place(tensor.place(), expected_place)) {
framework::LoDTensor tmp_tensor;
framework::TensorCopySync(tensor, expected_place, &tmp_tensor);
ReMakePtenDenseTensorByArgDef(tmp_tensor, arg_def, dst);
} else {
ReMakePtenDenseTensorByArgDef(tensor, arg_def, dst);
}
} else if (variable.IsType<framework::SelectedRows>()) {
// TODO(chenweihang): now we don't deal with row and height
// by xiaowei's advice
const auto& tensor = variable.Get<framework::SelectedRows>();
PADDLE_ENFORCE(
(arg_def.dtype == pten::TransToPtenDataType(tensor.value().type())),
paddle::platform::errors::InvalidArgument(
"The type of input data is diffrent from the type of the "
"argument's definition in kernel."));
if (!platform::is_same_place(tensor.value().place(), expected_place)) {
framework::Tensor tmp_tensor;
paddle::framework::TensorCopySync(
tensor.value(), expected_place, &tmp_tensor);
// TODO(chenweihang): adapt SelectedRows by xiaowei's design
ReMakePtenDenseTensorByArgDef(tmp_tensor, arg_def, dst);
} else {
ReMakePtenDenseTensorByArgDef(tensor.value(), arg_def, dst);
}
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported shared input `%s` type now when call pt kernel.",
framework::ToTypeName(variable.Type())));
}
}
void ReMakePtenDenseTensorFromVar(framework::Variable* variable,
const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst) {
// mutable_data before run kernel, to avoid share output form
// KernelContext to original tensor
if (variable->template IsType<framework::LoDTensor>()) {
auto* tensor = variable->template GetMutable<framework::LoDTensor>();
ReMakePtenDenseTensorByArgDef(*tensor, arg_def, dst);
} else if (variable->template IsType<framework::SelectedRows>()) {
auto* tensor = variable->template GetMutable<framework::SelectedRows>();
// TODO(chenweihang): adapt SelectedRows by xiaowei's design,
// here the row and height will lost in output!
ReMakePtenDenseTensorByArgDef(tensor->value(), arg_def, dst);
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported shared output `%s` type now when call pt kernel.",
framework::ToTypeName(variable->Type())));
}
}
static bool IsSameAllocation(const std::shared_ptr<memory::Allocation>& a, static bool IsSameAllocation(const std::shared_ptr<memory::Allocation>& a,
const std::shared_ptr<memory::Allocation>& b) { const std::shared_ptr<memory::Allocation>& b) {
return a->ptr() == b->ptr() && a->size() == b->size() && return a->ptr() == b->ptr() && a->size() == b->size() &&
...@@ -489,5 +397,13 @@ void MakeVariableFromPtenTensor(pten::DenseTensor* src, ...@@ -489,5 +397,13 @@ void MakeVariableFromPtenTensor(pten::DenseTensor* src,
} }
} }
void ResetTensorByArgDef(pten::DenseTensor* dst,
const pten::TensorArgDef& arg_def) {
VLOG(5) << "ResetTensor by TensorArgDef.";
auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst);
meta->dtype = arg_def.dtype;
meta->layout = arg_def.layout;
}
} // namespace experimental } // namespace experimental
} // namespace paddle } // namespace paddle
...@@ -67,20 +67,11 @@ void SharesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst); ...@@ -67,20 +67,11 @@ void SharesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst);
void ReMakePtenDenseTensor(const paddle::framework::Tensor& src, void ReMakePtenDenseTensor(const paddle::framework::Tensor& src,
pten::DenseTensor* dst); pten::DenseTensor* dst);
void ReMakePtenDenseTensorByArgDef(const paddle::framework::Tensor& src,
const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst);
void ReMakePtenDenseTensorFromVar(const framework::Variable& variable,
const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst);
void ReMakePtenDenseTensorFromVar(framework::Variable* variable,
const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst);
void MakeVariableFromPtenTensor(pten::DenseTensor* src, void MakeVariableFromPtenTensor(pten::DenseTensor* src,
framework::Variable* variable); framework::Variable* variable);
void ResetTensorByArgDef(pten::DenseTensor* dst,
const pten::TensorArgDef& arg_def);
} // namespace experimental } // namespace experimental
} // namespace paddle } // namespace paddle
...@@ -73,6 +73,7 @@ inline size_t SizeOf(DataType data_type) { ...@@ -73,6 +73,7 @@ inline size_t SizeOf(DataType data_type) {
case DataType::COMPLEX128: case DataType::COMPLEX128:
return 16; return 16;
case DataType::UNDEFINED: case DataType::UNDEFINED:
return 0;
case DataType::NUM_DATA_TYPES: case DataType::NUM_DATA_TYPES:
PD_THROW("Data type `", PD_THROW("Data type `",
static_cast<int>(data_type), static_cast<int>(data_type),
......
...@@ -113,7 +113,8 @@ void* DenseTensor::mutable_data(size_t request_bytes) { ...@@ -113,7 +113,8 @@ void* DenseTensor::mutable_data(size_t request_bytes) {
bytes)); bytes));
bytes = request_bytes; bytes = request_bytes;
} }
if (storage_->size() < bytes + meta_.offset || storage_->size() == 0) { if (!storage_->data() || storage_->size() < bytes + meta_.offset ||
storage_->size() == 0) {
VLOG(10) << "mutbale data realloc, original size: " << storage_->size() VLOG(10) << "mutbale data realloc, original size: " << storage_->size()
<< ", new size: " << bytes; << ", new size: " << bytes;
storage_->Realloc(bytes); storage_->Realloc(bytes);
......
...@@ -16,20 +16,19 @@ ...@@ -16,20 +16,19 @@
namespace pten { namespace pten {
void KernelContext::EmplaceBackInput(std::shared_ptr<TensorBase> input) { void KernelContext::EmplaceBackInput(const TensorBase* input) {
int index = inputs_.size(); int index = inputs_.size();
inputs_.emplace_back(std::move(input)); inputs_.emplace_back(input);
// Record the start and end index of the input // Record the start and end index of the input
input_range_.emplace_back(std::pair<int, int>(index, index + 1)); input_range_.emplace_back(std::pair<int, int>(index, index + 1));
} }
void KernelContext::EmplaceBackInputWithoutSetRange( void KernelContext::EmplaceBackInputWithoutSetRange(const TensorBase* input) {
std::shared_ptr<TensorBase> input) { inputs_.emplace_back(input);
inputs_.emplace_back(std::move(input));
} }
void KernelContext::EmplaceBackInputs( void KernelContext::EmplaceBackInputs(
paddle::SmallVector<std::shared_ptr<TensorBase>> inputs) { paddle::SmallVector<const TensorBase*> inputs) {
int index = inputs_.size(); int index = inputs_.size();
// Record the start and end index of the input // Record the start and end index of the input
input_range_.emplace_back(std::pair<int, int>(index, index + inputs.size())); input_range_.emplace_back(std::pair<int, int>(index, index + inputs.size()));
...@@ -38,25 +37,23 @@ void KernelContext::EmplaceBackInputs( ...@@ -38,25 +37,23 @@ void KernelContext::EmplaceBackInputs(
std::make_move_iterator(inputs.end())); std::make_move_iterator(inputs.end()));
} }
void KernelContext::EmplaceBackOutput(std::shared_ptr<TensorBase> output) { void KernelContext::EmplaceBackOutput(TensorBase* output) {
int index = outputs_.size(); int index = outputs_.size();
outputs_.emplace_back(std::move(output)); outputs_.emplace_back(output);
// Record the start and end index of the input // Record the start and end index of the input
output_range_.emplace_back(std::pair<int, int>(index, index + 1)); output_range_.emplace_back(std::pair<int, int>(index, index + 1));
} }
void KernelContext::EmplaceBackOutputWithoutSetRange( void KernelContext::EmplaceBackOutputWithoutSetRange(TensorBase* output) {
std::shared_ptr<TensorBase> output) { outputs_.emplace_back(output);
outputs_.emplace_back(std::move(output));
} }
void KernelContext::SetOutputWithoutSetRange( void KernelContext::SetOutputWithoutSetRange(int index, TensorBase* output) {
int index, std::shared_ptr<TensorBase> output) { outputs_.at(index) = output;
outputs_.at(index) = std::move(output);
} }
void KernelContext::EmplaceBackOutputs( void KernelContext::EmplaceBackOutputs(
paddle::SmallVector<std::shared_ptr<TensorBase>> outputs) { paddle::SmallVector<TensorBase*> outputs) {
int index = outputs_.size(); int index = outputs_.size();
// Record the start and end index of the input // Record the start and end index of the input
output_range_.emplace_back( output_range_.emplace_back(
...@@ -116,19 +113,5 @@ std::pair<int, int>& KernelContext::MutableOutputRangeAt(size_t idx) { ...@@ -116,19 +113,5 @@ std::pair<int, int>& KernelContext::MutableOutputRangeAt(size_t idx) {
// Temporary method: For compatible with fluid Tensor and improve performance // Temporary method: For compatible with fluid Tensor and improve performance
// Only deal with DenseTensor now // Only deal with DenseTensor now
void KernelContext::ClearData() { void KernelContext::ClearData() { attrs_.clear(); }
for (auto& in : inputs_) {
if (in) {
CompatibleDenseTensorUtils::ClearStorage(
static_cast<DenseTensor*>(in.get()));
}
}
for (auto& out : outputs_) {
if (out) {
CompatibleDenseTensorUtils::ClearStorage(
static_cast<DenseTensor*>(out.get()));
}
}
attrs_.clear();
}
} // namespace pten } // namespace pten
...@@ -51,21 +51,19 @@ class KernelContext { ...@@ -51,21 +51,19 @@ class KernelContext {
return static_cast<const CtxType&>(*dev_ctx_); return static_cast<const CtxType&>(*dev_ctx_);
} }
void EmplaceBackInput(std::shared_ptr<TensorBase> input); void EmplaceBackInput(const TensorBase* input);
void EmplaceBackInputWithoutSetRange(std::shared_ptr<TensorBase> input); void EmplaceBackInputWithoutSetRange(const TensorBase* input);
void EmplaceBackInputs( void EmplaceBackInputs(paddle::SmallVector<const TensorBase*> inputs);
paddle::SmallVector<std::shared_ptr<TensorBase>> inputs);
void EmplaceBackOutput(std::shared_ptr<TensorBase> output); void EmplaceBackOutput(TensorBase* output);
void EmplaceBackOutputWithoutSetRange(std::shared_ptr<TensorBase> output); void EmplaceBackOutputWithoutSetRange(TensorBase* output);
void SetOutputWithoutSetRange(int index, std::shared_ptr<TensorBase> output); void EmplaceBackOutputs(paddle::SmallVector<TensorBase*> outputs);
void EmplaceBackOutputs( void SetOutputWithoutSetRange(int index, TensorBase* output);
paddle::SmallVector<std::shared_ptr<TensorBase>> outputs);
void EmplaceBackAttr(paddle::any attr); void EmplaceBackAttr(paddle::any attr);
...@@ -90,16 +88,12 @@ class KernelContext { ...@@ -90,16 +88,12 @@ class KernelContext {
: paddle::optional<const TensorType&>{paddle::none}; : paddle::optional<const TensorType&>{paddle::none};
} }
std::shared_ptr<TensorBase>& MutableInputPtrAt(size_t idx) {
return inputs_.at(idx);
}
template <typename TensorType> template <typename TensorType>
std::vector<TensorType> MoveInputsBetween(size_t start, size_t end) { std::vector<TensorType> MoveInputsBetween(size_t start, size_t end) {
std::vector<TensorType> v; std::vector<TensorType> v;
for (size_t i = start; i < end; ++i) { for (size_t i = start; i < end; ++i) {
auto t = std::dynamic_pointer_cast<TensorType>(inputs_.at(i)); auto t = std::dynamic_pointer_cast<TensorType>(inputs_.at(i));
v.emplace_back(std::move(*t.get())); v.emplace_back(*t);
inputs_.at(i) = nullptr; inputs_.at(i) = nullptr;
} }
return v; return v;
...@@ -109,21 +103,16 @@ class KernelContext { ...@@ -109,21 +103,16 @@ class KernelContext {
void AssignOutputRange(std::pair<int, int>&& range, size_t idx); void AssignOutputRange(std::pair<int, int>&& range, size_t idx);
template <typename TensorType>
TensorType* MutableInputAt(size_t idx) {
return static_cast<TensorType*>(inputs_.at(idx).get());
}
template <typename TensorType> template <typename TensorType>
TensorType* MutableOutputAt(size_t idx) { TensorType* MutableOutputAt(size_t idx) {
return static_cast<TensorType*>(outputs_.at(idx).get()); return static_cast<TensorType*>(outputs_.at(idx));
} }
template <typename TensorType> template <typename TensorType>
std::vector<TensorType*> MutableOutputBetween(size_t start, size_t end) { std::vector<TensorType*> MutableOutputBetween(size_t start, size_t end) {
std::vector<TensorType*> v; std::vector<TensorType*> v;
for (size_t i = start; i < end; ++i) { for (size_t i = start; i < end; ++i) {
v.emplace_back(static_cast<TensorType*>(outputs_.at(i).get())); v.emplace_back(static_cast<TensorType*>(outputs_.at(i)));
} }
return v; return v;
...@@ -153,8 +142,8 @@ class KernelContext { ...@@ -153,8 +142,8 @@ class KernelContext {
// TODO(chenweihang): Tensor -> Tensor*, Tensor should by managed `scope` // TODO(chenweihang): Tensor -> Tensor*, Tensor should by managed `scope`
// Note: can't use API Tensor here, the inference don't use this API Tensor // Note: can't use API Tensor here, the inference don't use this API Tensor
paddle::SmallVector<std::shared_ptr<TensorBase>> inputs_; paddle::SmallVector<const TensorBase*> inputs_;
paddle::SmallVector<std::shared_ptr<TensorBase>> outputs_; paddle::SmallVector<TensorBase*> outputs_;
paddle::SmallVector<paddle::any> attrs_; paddle::SmallVector<paddle::any> attrs_;
// Only contains input like list[Tensor] need `range` // Only contains input like list[Tensor] need `range`
......
...@@ -31,7 +31,7 @@ void Copy(const Context& dev_ctx, ...@@ -31,7 +31,7 @@ void Copy(const Context& dev_ctx,
DenseTensor* dst) { DenseTensor* dst) {
auto* src_ptr = src.data(); auto* src_ptr = src.data();
const auto& src_place = src.place(); const auto& src_place = src.place();
const auto& dst_place = dst->place(); auto dst_place = dst->place();
if (src_place == dst_place && paddle::platform::is_cpu_place(src_place)) { if (src_place == dst_place && paddle::platform::is_cpu_place(src_place)) {
PADDLE_THROW(paddle::platform::errors::InvalidArgument( PADDLE_THROW(paddle::platform::errors::InvalidArgument(
...@@ -51,6 +51,7 @@ void Copy(const Context& dev_ctx, ...@@ -51,6 +51,7 @@ void Copy(const Context& dev_ctx,
return; return;
} }
VLOG(4) << "src:" << src_ptr << ", dst:" << dst_ptr; VLOG(4) << "src:" << src_ptr << ", dst:" << dst_ptr;
CHECK(dst->layout() == src.layout()); CHECK(dst->layout() == src.layout());
auto size = src.numel() * auto size = src.numel() *
...@@ -208,6 +209,9 @@ void Copy(const Context& dev_ctx, ...@@ -208,6 +209,9 @@ void Copy(const Context& dev_ctx,
"Context place dose not match the source and destination place.")); "Context place dose not match the source and destination place."));
} }
} }
} else {
PADDLE_THROW(paddle::platform::errors::InvalidArgument(
"Place type error. Please check the place of src and dst Tensor."));
} }
} }
......
...@@ -62,7 +62,7 @@ PADDLE_API Tensor scale_kernel_context(const Tensor& x, ...@@ -62,7 +62,7 @@ PADDLE_API Tensor scale_kernel_context(const Tensor& x,
auto kernel_context = pten::KernelContext(dev_ctx); auto kernel_context = pten::KernelContext(dev_ctx);
auto dense_x = std::dynamic_pointer_cast<pten::DenseTensor>(x.impl()); auto dense_x = std::dynamic_pointer_cast<pten::DenseTensor>(x.impl());
kernel_context.EmplaceBackInput(dense_x); kernel_context.EmplaceBackInput(dense_x.get());
kernel_context.EmplaceBackAttr(pten::Scalar(scale)); kernel_context.EmplaceBackAttr(pten::Scalar(scale));
kernel_context.EmplaceBackAttr(bias); kernel_context.EmplaceBackAttr(bias);
...@@ -73,7 +73,7 @@ PADDLE_API Tensor scale_kernel_context(const Tensor& x, ...@@ -73,7 +73,7 @@ PADDLE_API Tensor scale_kernel_context(const Tensor& x,
pten::make_intrusive<paddle::experimental::SharedStorage>( pten::make_intrusive<paddle::experimental::SharedStorage>(
pten::TransToFluidPlace(kernel_backend)), pten::TransToFluidPlace(kernel_backend)),
std::move(out_meta)); std::move(out_meta));
kernel_context.EmplaceBackOutput(dense_out); kernel_context.EmplaceBackOutput(dense_out.get());
Tensor out; Tensor out;
out.set_impl(dense_out); out.set_impl(dense_out);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册