From 8784ec65cc4ebf8fdfce9d78b294165341f348ad Mon Sep 17 00:00:00 2001 From: zyfncg Date: Thu, 20 Jan 2022 10:19:19 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90PTen=E3=80=91Remove=20code=20of=20conv?= =?UTF-8?q?erting=20Tensor=20to=20DensoeTensor=20(#38926)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * remove MakePtenTensor in BuildKernelContext * fix a bug caused by storage * remove WriteBackOutput in dynamic and static mode * fix complie error of std::max * fix complie error of std::max * fix date_type bug * fix memory alloc bug * add some debug info * fix compile problem * fix problem of data_type check * comment out some unreached code --- paddle/fluid/framework/operator.cc | 93 +++++++++++++++-- paddle/fluid/framework/operator.h | 8 ++ paddle/fluid/framework/pten_utils.cc | 32 ++++-- paddle/fluid/framework/pten_utils.h | 3 + paddle/fluid/imperative/prepared_operator.cc | 96 +++++++++++------- paddle/pten/api/lib/utils.cc | 4 +- paddle/pten/api/lib/utils/tensor_utils.cc | 100 ++----------------- paddle/pten/api/lib/utils/tensor_utils.h | 15 +-- paddle/pten/common/data_type.h | 1 + paddle/pten/core/dense_tensor.cc | 3 +- paddle/pten/core/kernel_context.cc | 43 +++----- paddle/pten/core/kernel_context.h | 35 +++---- paddle/pten/kernels/gpu/copy_kernel.cu | 6 +- paddle/pten/tests/api/scale_api.h | 4 +- 14 files changed, 231 insertions(+), 212 deletions(-) diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index ea45ef857df..e69a6c2e88c 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -1192,9 +1192,11 @@ void OperatorWithKernel::RunImpl(const Scope& scope, platform::EventRole::kInnerOp); if (run_pten_kernel_) { pten::KernelContext pt_kernel_context; + // Do data transform before building KernelContext + PreparePtenData(exec_scope, *pt_kernel_, *pt_kernel_signature_, + runtime_ctx); BuildPtenKernelContext(*runtime_ctx, dev_ctx, &pt_kernel_context); (*pt_kernel_)(&pt_kernel_context); - WriteBackToOutputs(runtime_ctx, &pt_kernel_context); } else { (*kernel_func_)( ExecutionContext(*this, exec_scope, *dev_ctx, *runtime_ctx)); @@ -1786,6 +1788,62 @@ KernelSignature OperatorWithKernel::GetExpectedPtenKernelArgs( pten::TransToPtenKernelName(Type())); } +Scope* OperatorWithKernel::PreparePtenData( + const Scope& scope, const pten::Kernel& pt_kernel, + const KernelSignature& pt_kernel_signature, RuntimeContext* ctx) const { + auto& input_names = std::get<0>(pt_kernel_signature.args); + auto input_defs = pt_kernel.args_def().input_defs(); + PADDLE_ENFORCE_EQ(input_names.size(), input_defs.size(), + platform::errors::InvalidArgument( + "The size of inputs_args names (%d) must be equal to " + "the size of kernel input_defs (%d).", + input_names.size(), input_defs.size())); + Scope* new_scope = nullptr; + for (size_t i = 0; i < input_defs.size(); ++i) { + auto& in_def = input_defs.at(i); + auto& ins_vector = ctx->inputs.at(input_names[i]); + for (size_t offset = 0; offset < ins_vector.size(); ++offset) { + // Only tensor can be tranfer to another device. + auto* var = ins_vector[offset]; + if (var == nullptr || !VarIsTensor(*var)) { + continue; + } + + auto* tensor_in = GetLoDTensorOrSelectedRowsValueFromVar(*var); + if (!tensor_in->IsInitialized()) { + continue; + } + + auto expected_place = pten::TransToFluidPlace(in_def.backend); + if (platform::is_same_place(tensor_in->place(), expected_place)) { + continue; + } + + // TODO(zyfncg): Now there is no kernel which need to transform input + // data, so we commented out following code temporarily, + // and it will be used in the future. + + // VLOG(3) << "PTen Transform Variable " << input_names[i] << " from " + // << tensor_in->place() << " to " << expected_place; + + // if (!new_scope) { + // new_scope = &scope.NewScope(); + // } + + // // Create new var with the same name in transfer scopes + // auto* trans_var = new_scope->Var(input_names[i]); + // ins_vector[i] = trans_var; + + // // Do transfer + // Tensor out; + // framework::TensorCopySync(*tensor_in, expected_place, &out); + // SetTensorToVariable(*var, out, trans_var); + } + } + + return new_scope; +} + void OperatorWithKernel::BuildPtenKernelContext( const RuntimeContext& ctx, platform::DeviceContext* dev_ctx, pten::KernelContext* pt_kernel_context) const { @@ -1818,7 +1876,6 @@ void OperatorWithKernel::BuildPtenKernelContext( attr_names.size(), attr_defs.size())); for (size_t i = 0; i < input_names.size(); ++i) { - auto& in_def = input_defs.at(i); auto& ins_vector = ctx.inputs.at(input_names[i]); // calcute the start and end index of the input tensors @@ -1827,14 +1884,22 @@ void OperatorWithKernel::BuildPtenKernelContext( size_t end_idx = start_idx + ins_vector.size(); for (size_t offset = 0; offset < ins_vector.size(); ++offset) { - pt_kernel_context->EmplaceBackInputWithoutSetRange( - experimental::MakePtenTensorBaseFromVar(*ins_vector[offset], in_def)); + const framework::Tensor* tensor_in = nullptr; + auto* var = ins_vector[offset]; + if (var->IsType()) { + tensor_in = &(var->Get()); + } else { + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported input `%s` type when call pt kernel.", + framework::ToTypeName(var->Type()))); + } // TODO(zyfncg): Add support for SelectedRows + + pt_kernel_context->EmplaceBackInputWithoutSetRange(tensor_in); } pt_kernel_context->AssignInputRange(std::make_pair(start_idx, end_idx), i); } for (size_t i = 0; i < output_names.size(); ++i) { - auto& out_def = output_defs.at(i); auto& outs_vector = ctx.outputs.at(output_names[i]); size_t start_idx = @@ -1842,9 +1907,21 @@ void OperatorWithKernel::BuildPtenKernelContext( size_t end_idx = start_idx + outs_vector.size(); for (size_t offset = 0; offset < outs_vector.size(); ++offset) { - pt_kernel_context->EmplaceBackOutputWithoutSetRange( - experimental::MakePtenTensorBaseFromVar(outs_vector[offset], - out_def)); + framework::Tensor* tensor_out = nullptr; + auto* var = outs_vector[offset]; + if (var->template IsType()) { + tensor_out = var->template GetMutable(); + } else { + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported output `%s` type when call pt kernel.", + framework::ToTypeName(var->Type()))); + } // TODO(zyfncg): Add support for SelectedRows + + experimental::ResetTensorByArgDef(tensor_out, output_defs.at(i)); + SetAllocationForOutputTenosr( + tensor_out, pten::TransToFluidPlace(output_defs.at(i).backend)); + + pt_kernel_context->EmplaceBackOutputWithoutSetRange(tensor_out); } // Deal with the case that some outputs are NULL when run the kernel. diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index 3aab9165eae..ad84dbc9be6 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -588,6 +588,14 @@ class OperatorWithKernel : public OperatorBase { /* member functions for adapting to pten lib */ void ChoosePtenKernel(const ExecutionContext& ctx) const; + /** + * Transfer data place for pten kernel + * Is this really needed? + */ + Scope* PreparePtenData(const Scope& scope, const pten::Kernel& pt_kernel, + const KernelSignature& pt_kernel_signature, + RuntimeContext* ctx) const; + void BuildPtenKernelContext(const RuntimeContext& ctx, platform::DeviceContext* dev_ctx, pten::KernelContext* pt_kernel_context) const; diff --git a/paddle/fluid/framework/pten_utils.cc b/paddle/fluid/framework/pten_utils.cc index dddcd914ed2..4e33e641cf1 100644 --- a/paddle/fluid/framework/pten_utils.cc +++ b/paddle/fluid/framework/pten_utils.cc @@ -137,17 +137,17 @@ KernelArgsNameMakerByOpProto::GetInputArgsNames() { auto& in = op_proto_->inputs()[i]; auto& in_name = in.name(); if ((in.has_extra() && in.extra()) || (in.has_quant() && in.quant())) { - VLOG(3) << "Parse PtenKernel input: skip extra & quant input - " + VLOG(6) << "Parse PtenKernel input: skip extra & quant input - " << in_name; continue; } // If contains dispensable input, we should override the // GetExpectedPtenKernelArgs method self if (in.has_dispensable() && in.dispensable()) { - VLOG(3) << "Parse PtenKernel input: skip dispensable input - " << in_name; + VLOG(6) << "Parse PtenKernel input: skip dispensable input - " << in_name; continue; } - VLOG(3) << "Parse PtenKernel input: " << in_name; + VLOG(6) << "Parse PtenKernel input: " << in_name; input_names_.emplace_back(in_name); } return input_names_; @@ -159,7 +159,7 @@ KernelArgsNameMakerByOpProto::GetOutputArgsNames() { auto& out = op_proto_->outputs()[i]; auto& out_name = out.name(); // TODO(chenweihang): outputs also need skip some cases - VLOG(3) << "Parse PtenKernel output: " << out_name; + VLOG(6) << "Parse PtenKernel output: " << out_name; output_names_.emplace_back(out_name); } return output_names_; @@ -173,17 +173,17 @@ KernelArgsNameMakerByOpProto::GetAttrsArgsNames() { if (attr_name == "use_mkldnn" || attr_name == "op_role" || attr_name == "op_role_var" || attr_name == "op_namescope" || attr_name == "op_callstack" || attr_name == "op_device") { - VLOG(3) << "Parse PtenKernel attribute: skip needless attr - " + VLOG(6) << "Parse PtenKernel attribute: skip needless attr - " << attr_name; continue; } if ((attr.has_extra() && attr.extra()) || (attr.has_quant() && attr.quant())) { - VLOG(3) << "Parse PtenKernel attribute: skip extra & quant attr - " + VLOG(6) << "Parse PtenKernel attribute: skip extra & quant attr - " << attr_name; continue; } - VLOG(3) << "Parse PtenKernel attribute: " << attr_name; + VLOG(6) << "Parse PtenKernel attribute: " << attr_name; attr_names_.emplace_back(attr_name); } @@ -196,5 +196,23 @@ KernelSignature KernelArgsNameMakerByOpProto::GetKernelSignature() { GetOutputArgsNames()); } +void SetAllocationForOutputTenosr(pten::DenseTensor* tensor, + const platform::Place& place) { + if (!tensor->IsInitialized() || !(tensor->place() == place)) { + int dtype_size = tensor->dtype() == DataType::UNDEFINED + ? 0 + : experimental::SizeOf(tensor->dtype()); + int64_t numels = product(tensor->dims()); + numels = numels < 0 ? 0 : numels; + auto tmp_allocation_ptr = memory::Alloc(place, numels * dtype_size); + auto& deleter = tmp_allocation_ptr.get_deleter(); + auto* allocation_ptr = tmp_allocation_ptr.release(); + auto shared_allocation = + std::shared_ptr(allocation_ptr, deleter); + + tensor->ResetHolder(shared_allocation); + } +} + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/pten_utils.h b/paddle/fluid/framework/pten_utils.h index 09d96045949..8bbd4f7f3c9 100644 --- a/paddle/fluid/framework/pten_utils.h +++ b/paddle/fluid/framework/pten_utils.h @@ -72,5 +72,8 @@ class KernelArgsNameMaker { virtual const paddle::SmallVector& GetAttrsArgsNames() = 0; }; +void SetAllocationForOutputTenosr(pten::DenseTensor* tensor, + const platform::Place& place); + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/imperative/prepared_operator.cc b/paddle/fluid/imperative/prepared_operator.cc index 6474f3c07fa..bb08191af98 100644 --- a/paddle/fluid/imperative/prepared_operator.cc +++ b/paddle/fluid/imperative/prepared_operator.cc @@ -258,6 +258,49 @@ PreparedOp PreparedOp::Prepare(const NameVarMap& ins, default_attrs); } +template +void PreparePtenData(const pten::Kernel& pt_kernel, + const framework::KernelSignature& pt_kernel_signature, + const NameVarMap& ins) { + auto& input_names = std::get<0>(pt_kernel_signature.args); + auto& input_defs = pt_kernel.args_def().input_defs(); + + PADDLE_ENFORCE_EQ(input_names.size(), input_defs.size(), + platform::errors::InvalidArgument( + "the size of inputs_args names (%d) must be equal to " + "the size of kernel input_defs (%d).", + input_names.size(), input_defs.size())); + + for (size_t i = 0; i < input_names.size(); ++i) { + auto& in_def = input_defs.at(i); + auto& ins_vector = ins.at(input_names[i]); + + for (size_t offset = 0; offset < ins_vector.size(); ++offset) { + auto var_base = ins_vector[offset]; + const auto* tensor_in = GetTensorFromVar(var_base->Var()); + if (tensor_in && tensor_in->IsInitialized()) { + auto expected_place = pten::TransToFluidPlace(in_def.backend); + if (platform::is_same_place(tensor_in->place(), expected_place)) { + continue; + } + + // TODO(zyfncg): Now there is no kernel which need to transform input + // data, so we commented out following code temporarily, + // and it will be used in the future. + + // VLOG(3) << "Pten Transform Variable " << var_base->Name() << " from " + // << tensor_in->place() << " to " << expected_place; + + // framework::Tensor tmp_tensor; + // framework::TensorCopySync(*tensor_in, expected_place, &tmp_tensor); + + // SetTensorToVariable(var_base->Var(), tmp_tensor, + // var_base->MutableVar()); + } + } + } +} + template static void BuildDygraphPtenKernelContext( const framework::KernelSignature& pt_kernel_signature, @@ -294,23 +337,19 @@ static void BuildDygraphPtenKernelContext( attr_names.size(), attr_defs.size())); for (size_t i = 0; i < input_names.size(); ++i) { - auto& in_def = input_defs.at(i); auto& ins_vector = ins.at(input_names[i]); size_t start_idx = (i == 0 ? 0 : kernel_ctx->InputRangeAt(i - 1).second); size_t end_idx = start_idx + ins_vector.size(); for (size_t offset = 0; offset < ins_vector.size(); ++offset) { - const auto& variable = ins_vector[offset]->Var(); - kernel_ctx->EmplaceBackInputWithoutSetRange( - paddle::experimental::MakePtenTensorBaseFromVar(variable, in_def)); + const auto* tensor_in = GetTensorFromVar(ins_vector[offset]->Var()); + kernel_ctx->EmplaceBackInputWithoutSetRange(tensor_in); } kernel_ctx->AssignInputRange(std::make_pair(start_idx, end_idx), i); } for (size_t i = 0; i < output_names.size(); ++i) { - auto& out_def = output_defs.at(i); - size_t start_idx = (i == 0 ? 0 : kernel_ctx->OutputRangeAt(i - 1).second); auto iter = outs.find(output_names[i]); @@ -325,9 +364,21 @@ static void BuildDygraphPtenKernelContext( size_t end_idx = start_idx + outs_vector.size(); for (size_t offset = 0; offset < outs_vector.size(); ++offset) { - kernel_ctx->EmplaceBackOutputWithoutSetRange( - paddle::experimental::MakePtenTensorBaseFromVar( - outs_vector[offset]->MutableVar(), out_def)); + auto* var = outs_vector[offset]->MutableVar(); + framework::Tensor* tensor_out = nullptr; + if (var->template IsType()) { + tensor_out = var->template GetMutable(); + } else { + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported output `%s` type when call pt kernel.", + framework::ToTypeName(var->Type()))); + } // TODO(zyfncg): Add support for SelectedRows + + experimental::ResetTensorByArgDef(tensor_out, output_defs.at(i)); + framework::SetAllocationForOutputTenosr( + tensor_out, pten::TransToFluidPlace(output_defs.at(i).backend)); + + kernel_ctx->EmplaceBackOutputWithoutSetRange(tensor_out); } kernel_ctx->AssignOutputRange(std::make_pair(start_idx, end_idx), i); } @@ -430,29 +481,6 @@ static void BuildDygraphPtenKernelContext( } } -template -static void WriteBackToOutputs( - const framework::KernelSignature& pt_kernel_signature, - const NameVarMap& outs, pten::KernelContext* kernel_ctx) { - auto& output_names = std::get<2>(pt_kernel_signature.args); - - for (size_t i = 0; i < output_names.size(); ++i) { - auto iter = outs.find(output_names[i]); - if (iter != outs.end()) { - auto& outs_vector = iter->second; - - auto& range_pair = kernel_ctx->OutputRangeAt(i); - auto pten_outs = kernel_ctx->MutableOutputBetween( - range_pair.first, range_pair.second); - - for (size_t j = 0; j < pten_outs.size(); ++j) { - experimental::MakeVariableFromPtenTensor(pten_outs[j], - outs_vector[j]->MutableVar()); - } - } - } -} - template static void PreparedOpRunImpl( const framework::OperatorBase& op, const framework::RuntimeContext& ctx, @@ -514,6 +542,8 @@ static void PreparedOpRunPtImpl( &ins, &outs, &attrs, &default_attrs, op.Type(), &kernel_type); op.Info().infer_shape_(&infer_shape_ctx); + PreparePtenData(pt_kernel, pt_kernel_signature, ins); + pten::KernelContext pt_kernel_context; BuildDygraphPtenKernelContext(pt_kernel_signature, pt_kernel, ins, outs, attrs, default_attrs, dev_ctx, @@ -529,8 +559,6 @@ static void PreparedOpRunPtImpl( #endif } - WriteBackToOutputs(pt_kernel_signature, outs, &pt_kernel_context); - // TODO(chenweihang): add debug flags later if (framework::IsComplexType(kernel_type.data_type_)) { HandleComplexGradToRealGrad(outs); diff --git a/paddle/pten/api/lib/utils.cc b/paddle/pten/api/lib/utils.cc index 6eb1e5a3797..f42f3b37f0a 100644 --- a/paddle/pten/api/lib/utils.cc +++ b/paddle/pten/api/lib/utils.cc @@ -54,7 +54,7 @@ PADDLE_API Tensor copy_to(const Tensor& x, Backend backend, bool blocking) { // 3. Auto data transform auto dense_x = std::dynamic_pointer_cast(x.impl()); - kernel_context.EmplaceBackInput(dense_x); + kernel_context.EmplaceBackInput(dense_x.get()); kernel_context.EmplaceBackAttr(blocking); // 4. InferMeta @@ -65,7 +65,7 @@ PADDLE_API Tensor copy_to(const Tensor& x, Backend backend, bool blocking) { pten::make_intrusive( pten::TransToFluidPlace(backend)), std::move(out_meta)); - kernel_context.EmplaceBackOutput(dense_out); + kernel_context.EmplaceBackOutput(dense_out.get()); Tensor out; out.set_impl(dense_out); diff --git a/paddle/pten/api/lib/utils/tensor_utils.cc b/paddle/pten/api/lib/utils/tensor_utils.cc index 93b1957fe14..1420810007d 100644 --- a/paddle/pten/api/lib/utils/tensor_utils.cc +++ b/paddle/pten/api/lib/utils/tensor_utils.cc @@ -355,98 +355,6 @@ void ReMakePtenDenseTensor(const paddle::framework::Tensor& src, dst); } -void ReMakePtenDenseTensorByArgDefBase(const paddle::framework::Tensor& src, - const pten::TensorArgDef& arg_def, - pten::DenseTensor* dst) { - VLOG(3) << "ReMakePtenDenseTensor based Tensor and TensorArgDef."; - auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst); - meta->dims = src.dims(); - meta->dtype = arg_def.dtype; - meta->layout = src.layout(); - meta->offset = src.offset(); - - if (src.IsInitialized() && - src.place() == pten::TransToFluidPlace(arg_def.backend)) { - dst->ResetHolder(src.Holder()); - } else { - // This does not affect the correctness, and will be modified immediately. - // dst->mutable_data(pten::TransToFluidPlace(arg_def.backend)); - } -} - -void ReMakePtenDenseTensorByArgDef(const paddle::framework::Tensor& src, - const pten::TensorArgDef& arg_def, - pten::DenseTensor* dst) { - auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst); - SetLoD(&meta->lod, src.lod()); - ReMakePtenDenseTensorByArgDefBase( - static_cast(src), arg_def, dst); -} - -void ReMakePtenDenseTensorFromVar(const framework::Variable& variable, - const pten::TensorArgDef& arg_def, - pten::DenseTensor* dst) { - auto expected_place = pten::TransToFluidPlace(arg_def.backend); - if (variable.IsType()) { - const auto& tensor = variable.Get(); - // check input dtype before ReMakePtenDenseTensor - PADDLE_ENFORCE( - (arg_def.dtype == pten::TransToPtenDataType(tensor.type())), - paddle::platform::errors::InvalidArgument( - "The type of input data is diffrent from the type of the " - "argument's definition in kernel.")); - if (!platform::is_same_place(tensor.place(), expected_place)) { - framework::LoDTensor tmp_tensor; - framework::TensorCopySync(tensor, expected_place, &tmp_tensor); - ReMakePtenDenseTensorByArgDef(tmp_tensor, arg_def, dst); - } else { - ReMakePtenDenseTensorByArgDef(tensor, arg_def, dst); - } - } else if (variable.IsType()) { - // TODO(chenweihang): now we don't deal with row and height - // by xiaowei's advice - const auto& tensor = variable.Get(); - PADDLE_ENFORCE( - (arg_def.dtype == pten::TransToPtenDataType(tensor.value().type())), - paddle::platform::errors::InvalidArgument( - "The type of input data is diffrent from the type of the " - "argument's definition in kernel.")); - if (!platform::is_same_place(tensor.value().place(), expected_place)) { - framework::Tensor tmp_tensor; - paddle::framework::TensorCopySync( - tensor.value(), expected_place, &tmp_tensor); - // TODO(chenweihang): adapt SelectedRows by xiaowei's design - ReMakePtenDenseTensorByArgDef(tmp_tensor, arg_def, dst); - } else { - ReMakePtenDenseTensorByArgDef(tensor.value(), arg_def, dst); - } - } else { - PADDLE_THROW(platform::errors::Unimplemented( - "Unsupported shared input `%s` type now when call pt kernel.", - framework::ToTypeName(variable.Type()))); - } -} - -void ReMakePtenDenseTensorFromVar(framework::Variable* variable, - const pten::TensorArgDef& arg_def, - pten::DenseTensor* dst) { - // mutable_data before run kernel, to avoid share output form - // KernelContext to original tensor - if (variable->template IsType()) { - auto* tensor = variable->template GetMutable(); - ReMakePtenDenseTensorByArgDef(*tensor, arg_def, dst); - } else if (variable->template IsType()) { - auto* tensor = variable->template GetMutable(); - // TODO(chenweihang): adapt SelectedRows by xiaowei's design, - // here the row and height will lost in output! - ReMakePtenDenseTensorByArgDef(tensor->value(), arg_def, dst); - } else { - PADDLE_THROW(platform::errors::Unimplemented( - "Unsupported shared output `%s` type now when call pt kernel.", - framework::ToTypeName(variable->Type()))); - } -} - static bool IsSameAllocation(const std::shared_ptr& a, const std::shared_ptr& b) { return a->ptr() == b->ptr() && a->size() == b->size() && @@ -489,5 +397,13 @@ void MakeVariableFromPtenTensor(pten::DenseTensor* src, } } +void ResetTensorByArgDef(pten::DenseTensor* dst, + const pten::TensorArgDef& arg_def) { + VLOG(5) << "ResetTensor by TensorArgDef."; + auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst); + meta->dtype = arg_def.dtype; + meta->layout = arg_def.layout; +} + } // namespace experimental } // namespace paddle diff --git a/paddle/pten/api/lib/utils/tensor_utils.h b/paddle/pten/api/lib/utils/tensor_utils.h index 0ac4ac7a331..04c3f0e912b 100644 --- a/paddle/pten/api/lib/utils/tensor_utils.h +++ b/paddle/pten/api/lib/utils/tensor_utils.h @@ -67,20 +67,11 @@ void SharesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst); void ReMakePtenDenseTensor(const paddle::framework::Tensor& src, pten::DenseTensor* dst); -void ReMakePtenDenseTensorByArgDef(const paddle::framework::Tensor& src, - const pten::TensorArgDef& arg_def, - pten::DenseTensor* dst); - -void ReMakePtenDenseTensorFromVar(const framework::Variable& variable, - const pten::TensorArgDef& arg_def, - pten::DenseTensor* dst); - -void ReMakePtenDenseTensorFromVar(framework::Variable* variable, - const pten::TensorArgDef& arg_def, - pten::DenseTensor* dst); - void MakeVariableFromPtenTensor(pten::DenseTensor* src, framework::Variable* variable); +void ResetTensorByArgDef(pten::DenseTensor* dst, + const pten::TensorArgDef& arg_def); + } // namespace experimental } // namespace paddle diff --git a/paddle/pten/common/data_type.h b/paddle/pten/common/data_type.h index a00d68c5354..306507d2d2b 100644 --- a/paddle/pten/common/data_type.h +++ b/paddle/pten/common/data_type.h @@ -73,6 +73,7 @@ inline size_t SizeOf(DataType data_type) { case DataType::COMPLEX128: return 16; case DataType::UNDEFINED: + return 0; case DataType::NUM_DATA_TYPES: PD_THROW("Data type `", static_cast(data_type), diff --git a/paddle/pten/core/dense_tensor.cc b/paddle/pten/core/dense_tensor.cc index 06531fe8bfd..cfe2cfa03ea 100644 --- a/paddle/pten/core/dense_tensor.cc +++ b/paddle/pten/core/dense_tensor.cc @@ -113,7 +113,8 @@ void* DenseTensor::mutable_data(size_t request_bytes) { bytes)); bytes = request_bytes; } - if (storage_->size() < bytes + meta_.offset || storage_->size() == 0) { + if (!storage_->data() || storage_->size() < bytes + meta_.offset || + storage_->size() == 0) { VLOG(10) << "mutbale data realloc, original size: " << storage_->size() << ", new size: " << bytes; storage_->Realloc(bytes); diff --git a/paddle/pten/core/kernel_context.cc b/paddle/pten/core/kernel_context.cc index 74bd6d17f06..34e9fabbe67 100644 --- a/paddle/pten/core/kernel_context.cc +++ b/paddle/pten/core/kernel_context.cc @@ -16,20 +16,19 @@ namespace pten { -void KernelContext::EmplaceBackInput(std::shared_ptr input) { +void KernelContext::EmplaceBackInput(const TensorBase* input) { int index = inputs_.size(); - inputs_.emplace_back(std::move(input)); + inputs_.emplace_back(input); // Record the start and end index of the input input_range_.emplace_back(std::pair(index, index + 1)); } -void KernelContext::EmplaceBackInputWithoutSetRange( - std::shared_ptr input) { - inputs_.emplace_back(std::move(input)); +void KernelContext::EmplaceBackInputWithoutSetRange(const TensorBase* input) { + inputs_.emplace_back(input); } void KernelContext::EmplaceBackInputs( - paddle::SmallVector> inputs) { + paddle::SmallVector inputs) { int index = inputs_.size(); // Record the start and end index of the input input_range_.emplace_back(std::pair(index, index + inputs.size())); @@ -38,25 +37,23 @@ void KernelContext::EmplaceBackInputs( std::make_move_iterator(inputs.end())); } -void KernelContext::EmplaceBackOutput(std::shared_ptr output) { +void KernelContext::EmplaceBackOutput(TensorBase* output) { int index = outputs_.size(); - outputs_.emplace_back(std::move(output)); + outputs_.emplace_back(output); // Record the start and end index of the input output_range_.emplace_back(std::pair(index, index + 1)); } -void KernelContext::EmplaceBackOutputWithoutSetRange( - std::shared_ptr output) { - outputs_.emplace_back(std::move(output)); +void KernelContext::EmplaceBackOutputWithoutSetRange(TensorBase* output) { + outputs_.emplace_back(output); } -void KernelContext::SetOutputWithoutSetRange( - int index, std::shared_ptr output) { - outputs_.at(index) = std::move(output); +void KernelContext::SetOutputWithoutSetRange(int index, TensorBase* output) { + outputs_.at(index) = output; } void KernelContext::EmplaceBackOutputs( - paddle::SmallVector> outputs) { + paddle::SmallVector outputs) { int index = outputs_.size(); // Record the start and end index of the input output_range_.emplace_back( @@ -116,19 +113,5 @@ std::pair& KernelContext::MutableOutputRangeAt(size_t idx) { // Temporary method: For compatible with fluid Tensor and improve performance // Only deal with DenseTensor now -void KernelContext::ClearData() { - for (auto& in : inputs_) { - if (in) { - CompatibleDenseTensorUtils::ClearStorage( - static_cast(in.get())); - } - } - for (auto& out : outputs_) { - if (out) { - CompatibleDenseTensorUtils::ClearStorage( - static_cast(out.get())); - } - } - attrs_.clear(); -} +void KernelContext::ClearData() { attrs_.clear(); } } // namespace pten diff --git a/paddle/pten/core/kernel_context.h b/paddle/pten/core/kernel_context.h index b6cc15c084a..5559b348aa1 100644 --- a/paddle/pten/core/kernel_context.h +++ b/paddle/pten/core/kernel_context.h @@ -51,21 +51,19 @@ class KernelContext { return static_cast(*dev_ctx_); } - void EmplaceBackInput(std::shared_ptr input); + void EmplaceBackInput(const TensorBase* input); - void EmplaceBackInputWithoutSetRange(std::shared_ptr input); + void EmplaceBackInputWithoutSetRange(const TensorBase* input); - void EmplaceBackInputs( - paddle::SmallVector> inputs); + void EmplaceBackInputs(paddle::SmallVector inputs); - void EmplaceBackOutput(std::shared_ptr output); + void EmplaceBackOutput(TensorBase* output); - void EmplaceBackOutputWithoutSetRange(std::shared_ptr output); + void EmplaceBackOutputWithoutSetRange(TensorBase* output); - void SetOutputWithoutSetRange(int index, std::shared_ptr output); + void EmplaceBackOutputs(paddle::SmallVector outputs); - void EmplaceBackOutputs( - paddle::SmallVector> outputs); + void SetOutputWithoutSetRange(int index, TensorBase* output); void EmplaceBackAttr(paddle::any attr); @@ -90,16 +88,12 @@ class KernelContext { : paddle::optional{paddle::none}; } - std::shared_ptr& MutableInputPtrAt(size_t idx) { - return inputs_.at(idx); - } - template std::vector MoveInputsBetween(size_t start, size_t end) { std::vector v; for (size_t i = start; i < end; ++i) { auto t = std::dynamic_pointer_cast(inputs_.at(i)); - v.emplace_back(std::move(*t.get())); + v.emplace_back(*t); inputs_.at(i) = nullptr; } return v; @@ -109,21 +103,16 @@ class KernelContext { void AssignOutputRange(std::pair&& range, size_t idx); - template - TensorType* MutableInputAt(size_t idx) { - return static_cast(inputs_.at(idx).get()); - } - template TensorType* MutableOutputAt(size_t idx) { - return static_cast(outputs_.at(idx).get()); + return static_cast(outputs_.at(idx)); } template std::vector MutableOutputBetween(size_t start, size_t end) { std::vector v; for (size_t i = start; i < end; ++i) { - v.emplace_back(static_cast(outputs_.at(i).get())); + v.emplace_back(static_cast(outputs_.at(i))); } return v; @@ -153,8 +142,8 @@ class KernelContext { // TODO(chenweihang): Tensor -> Tensor*, Tensor should by managed `scope` // Note: can't use API Tensor here, the inference don't use this API Tensor - paddle::SmallVector> inputs_; - paddle::SmallVector> outputs_; + paddle::SmallVector inputs_; + paddle::SmallVector outputs_; paddle::SmallVector attrs_; // Only contains input like list[Tensor] need `range` diff --git a/paddle/pten/kernels/gpu/copy_kernel.cu b/paddle/pten/kernels/gpu/copy_kernel.cu index 10b2aa415d4..1f7a08e8254 100644 --- a/paddle/pten/kernels/gpu/copy_kernel.cu +++ b/paddle/pten/kernels/gpu/copy_kernel.cu @@ -31,7 +31,7 @@ void Copy(const Context& dev_ctx, DenseTensor* dst) { auto* src_ptr = src.data(); const auto& src_place = src.place(); - const auto& dst_place = dst->place(); + auto dst_place = dst->place(); if (src_place == dst_place && paddle::platform::is_cpu_place(src_place)) { PADDLE_THROW(paddle::platform::errors::InvalidArgument( @@ -51,6 +51,7 @@ void Copy(const Context& dev_ctx, return; } VLOG(4) << "src:" << src_ptr << ", dst:" << dst_ptr; + CHECK(dst->layout() == src.layout()); auto size = src.numel() * @@ -208,6 +209,9 @@ void Copy(const Context& dev_ctx, "Context place dose not match the source and destination place.")); } } + } else { + PADDLE_THROW(paddle::platform::errors::InvalidArgument( + "Place type error. Please check the place of src and dst Tensor.")); } } diff --git a/paddle/pten/tests/api/scale_api.h b/paddle/pten/tests/api/scale_api.h index 41143826c45..0ba1d6a0e3f 100644 --- a/paddle/pten/tests/api/scale_api.h +++ b/paddle/pten/tests/api/scale_api.h @@ -62,7 +62,7 @@ PADDLE_API Tensor scale_kernel_context(const Tensor& x, auto kernel_context = pten::KernelContext(dev_ctx); auto dense_x = std::dynamic_pointer_cast(x.impl()); - kernel_context.EmplaceBackInput(dense_x); + kernel_context.EmplaceBackInput(dense_x.get()); kernel_context.EmplaceBackAttr(pten::Scalar(scale)); kernel_context.EmplaceBackAttr(bias); @@ -73,7 +73,7 @@ PADDLE_API Tensor scale_kernel_context(const Tensor& x, pten::make_intrusive( pten::TransToFluidPlace(kernel_backend)), std::move(out_meta)); - kernel_context.EmplaceBackOutput(dense_out); + kernel_context.EmplaceBackOutput(dense_out.get()); Tensor out; out.set_impl(dense_out); -- GitLab