未验证 提交 8784ec65 编写于 作者: Z zyfncg 提交者: GitHub

【PTen】Remove code of converting Tensor to DensoeTensor (#38926)

* remove MakePtenTensor in BuildKernelContext

* fix a bug caused by storage

* remove WriteBackOutput in dynamic and static mode

* fix complie error of std::max

* fix complie error of std::max

* fix date_type bug

* fix memory alloc bug

* add some debug info

* fix compile problem

* fix problem of data_type check

* comment out some unreached code
上级 90e9233a
......@@ -1192,9 +1192,11 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
platform::EventRole::kInnerOp);
if (run_pten_kernel_) {
pten::KernelContext pt_kernel_context;
// Do data transform before building KernelContext
PreparePtenData(exec_scope, *pt_kernel_, *pt_kernel_signature_,
runtime_ctx);
BuildPtenKernelContext(*runtime_ctx, dev_ctx, &pt_kernel_context);
(*pt_kernel_)(&pt_kernel_context);
WriteBackToOutputs(runtime_ctx, &pt_kernel_context);
} else {
(*kernel_func_)(
ExecutionContext(*this, exec_scope, *dev_ctx, *runtime_ctx));
......@@ -1786,6 +1788,62 @@ KernelSignature OperatorWithKernel::GetExpectedPtenKernelArgs(
pten::TransToPtenKernelName(Type()));
}
Scope* OperatorWithKernel::PreparePtenData(
const Scope& scope, const pten::Kernel& pt_kernel,
const KernelSignature& pt_kernel_signature, RuntimeContext* ctx) const {
auto& input_names = std::get<0>(pt_kernel_signature.args);
auto input_defs = pt_kernel.args_def().input_defs();
PADDLE_ENFORCE_EQ(input_names.size(), input_defs.size(),
platform::errors::InvalidArgument(
"The size of inputs_args names (%d) must be equal to "
"the size of kernel input_defs (%d).",
input_names.size(), input_defs.size()));
Scope* new_scope = nullptr;
for (size_t i = 0; i < input_defs.size(); ++i) {
auto& in_def = input_defs.at(i);
auto& ins_vector = ctx->inputs.at(input_names[i]);
for (size_t offset = 0; offset < ins_vector.size(); ++offset) {
// Only tensor can be tranfer to another device.
auto* var = ins_vector[offset];
if (var == nullptr || !VarIsTensor(*var)) {
continue;
}
auto* tensor_in = GetLoDTensorOrSelectedRowsValueFromVar(*var);
if (!tensor_in->IsInitialized()) {
continue;
}
auto expected_place = pten::TransToFluidPlace(in_def.backend);
if (platform::is_same_place(tensor_in->place(), expected_place)) {
continue;
}
// TODO(zyfncg): Now there is no kernel which need to transform input
// data, so we commented out following code temporarily,
// and it will be used in the future.
// VLOG(3) << "PTen Transform Variable " << input_names[i] << " from "
// << tensor_in->place() << " to " << expected_place;
// if (!new_scope) {
// new_scope = &scope.NewScope();
// }
// // Create new var with the same name in transfer scopes
// auto* trans_var = new_scope->Var(input_names[i]);
// ins_vector[i] = trans_var;
// // Do transfer
// Tensor out;
// framework::TensorCopySync(*tensor_in, expected_place, &out);
// SetTensorToVariable(*var, out, trans_var);
}
}
return new_scope;
}
void OperatorWithKernel::BuildPtenKernelContext(
const RuntimeContext& ctx, platform::DeviceContext* dev_ctx,
pten::KernelContext* pt_kernel_context) const {
......@@ -1818,7 +1876,6 @@ void OperatorWithKernel::BuildPtenKernelContext(
attr_names.size(), attr_defs.size()));
for (size_t i = 0; i < input_names.size(); ++i) {
auto& in_def = input_defs.at(i);
auto& ins_vector = ctx.inputs.at(input_names[i]);
// calcute the start and end index of the input tensors
......@@ -1827,14 +1884,22 @@ void OperatorWithKernel::BuildPtenKernelContext(
size_t end_idx = start_idx + ins_vector.size();
for (size_t offset = 0; offset < ins_vector.size(); ++offset) {
pt_kernel_context->EmplaceBackInputWithoutSetRange(
experimental::MakePtenTensorBaseFromVar(*ins_vector[offset], in_def));
const framework::Tensor* tensor_in = nullptr;
auto* var = ins_vector[offset];
if (var->IsType<framework::LoDTensor>()) {
tensor_in = &(var->Get<framework::LoDTensor>());
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported input `%s` type when call pt kernel.",
framework::ToTypeName(var->Type())));
} // TODO(zyfncg): Add support for SelectedRows
pt_kernel_context->EmplaceBackInputWithoutSetRange(tensor_in);
}
pt_kernel_context->AssignInputRange(std::make_pair(start_idx, end_idx), i);
}
for (size_t i = 0; i < output_names.size(); ++i) {
auto& out_def = output_defs.at(i);
auto& outs_vector = ctx.outputs.at(output_names[i]);
size_t start_idx =
......@@ -1842,9 +1907,21 @@ void OperatorWithKernel::BuildPtenKernelContext(
size_t end_idx = start_idx + outs_vector.size();
for (size_t offset = 0; offset < outs_vector.size(); ++offset) {
pt_kernel_context->EmplaceBackOutputWithoutSetRange(
experimental::MakePtenTensorBaseFromVar(outs_vector[offset],
out_def));
framework::Tensor* tensor_out = nullptr;
auto* var = outs_vector[offset];
if (var->template IsType<framework::LoDTensor>()) {
tensor_out = var->template GetMutable<framework::LoDTensor>();
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported output `%s` type when call pt kernel.",
framework::ToTypeName(var->Type())));
} // TODO(zyfncg): Add support for SelectedRows
experimental::ResetTensorByArgDef(tensor_out, output_defs.at(i));
SetAllocationForOutputTenosr(
tensor_out, pten::TransToFluidPlace(output_defs.at(i).backend));
pt_kernel_context->EmplaceBackOutputWithoutSetRange(tensor_out);
}
// Deal with the case that some outputs are NULL when run the kernel.
......
......@@ -588,6 +588,14 @@ class OperatorWithKernel : public OperatorBase {
/* member functions for adapting to pten lib */
void ChoosePtenKernel(const ExecutionContext& ctx) const;
/**
* Transfer data place for pten kernel
* Is this really needed?
*/
Scope* PreparePtenData(const Scope& scope, const pten::Kernel& pt_kernel,
const KernelSignature& pt_kernel_signature,
RuntimeContext* ctx) const;
void BuildPtenKernelContext(const RuntimeContext& ctx,
platform::DeviceContext* dev_ctx,
pten::KernelContext* pt_kernel_context) const;
......
......@@ -137,17 +137,17 @@ KernelArgsNameMakerByOpProto::GetInputArgsNames() {
auto& in = op_proto_->inputs()[i];
auto& in_name = in.name();
if ((in.has_extra() && in.extra()) || (in.has_quant() && in.quant())) {
VLOG(3) << "Parse PtenKernel input: skip extra & quant input - "
VLOG(6) << "Parse PtenKernel input: skip extra & quant input - "
<< in_name;
continue;
}
// If contains dispensable input, we should override the
// GetExpectedPtenKernelArgs method self
if (in.has_dispensable() && in.dispensable()) {
VLOG(3) << "Parse PtenKernel input: skip dispensable input - " << in_name;
VLOG(6) << "Parse PtenKernel input: skip dispensable input - " << in_name;
continue;
}
VLOG(3) << "Parse PtenKernel input: " << in_name;
VLOG(6) << "Parse PtenKernel input: " << in_name;
input_names_.emplace_back(in_name);
}
return input_names_;
......@@ -159,7 +159,7 @@ KernelArgsNameMakerByOpProto::GetOutputArgsNames() {
auto& out = op_proto_->outputs()[i];
auto& out_name = out.name();
// TODO(chenweihang): outputs also need skip some cases
VLOG(3) << "Parse PtenKernel output: " << out_name;
VLOG(6) << "Parse PtenKernel output: " << out_name;
output_names_.emplace_back(out_name);
}
return output_names_;
......@@ -173,17 +173,17 @@ KernelArgsNameMakerByOpProto::GetAttrsArgsNames() {
if (attr_name == "use_mkldnn" || attr_name == "op_role" ||
attr_name == "op_role_var" || attr_name == "op_namescope" ||
attr_name == "op_callstack" || attr_name == "op_device") {
VLOG(3) << "Parse PtenKernel attribute: skip needless attr - "
VLOG(6) << "Parse PtenKernel attribute: skip needless attr - "
<< attr_name;
continue;
}
if ((attr.has_extra() && attr.extra()) ||
(attr.has_quant() && attr.quant())) {
VLOG(3) << "Parse PtenKernel attribute: skip extra & quant attr - "
VLOG(6) << "Parse PtenKernel attribute: skip extra & quant attr - "
<< attr_name;
continue;
}
VLOG(3) << "Parse PtenKernel attribute: " << attr_name;
VLOG(6) << "Parse PtenKernel attribute: " << attr_name;
attr_names_.emplace_back(attr_name);
}
......@@ -196,5 +196,23 @@ KernelSignature KernelArgsNameMakerByOpProto::GetKernelSignature() {
GetOutputArgsNames());
}
void SetAllocationForOutputTenosr(pten::DenseTensor* tensor,
const platform::Place& place) {
if (!tensor->IsInitialized() || !(tensor->place() == place)) {
int dtype_size = tensor->dtype() == DataType::UNDEFINED
? 0
: experimental::SizeOf(tensor->dtype());
int64_t numels = product(tensor->dims());
numels = numels < 0 ? 0 : numels;
auto tmp_allocation_ptr = memory::Alloc(place, numels * dtype_size);
auto& deleter = tmp_allocation_ptr.get_deleter();
auto* allocation_ptr = tmp_allocation_ptr.release();
auto shared_allocation =
std::shared_ptr<pten::Allocation>(allocation_ptr, deleter);
tensor->ResetHolder(shared_allocation);
}
}
} // namespace framework
} // namespace paddle
......@@ -72,5 +72,8 @@ class KernelArgsNameMaker {
virtual const paddle::SmallVector<std::string>& GetAttrsArgsNames() = 0;
};
void SetAllocationForOutputTenosr(pten::DenseTensor* tensor,
const platform::Place& place);
} // namespace framework
} // namespace paddle
......@@ -258,6 +258,49 @@ PreparedOp PreparedOp::Prepare(const NameVarMap<VariableWrapper>& ins,
default_attrs);
}
template <typename VarType>
void PreparePtenData(const pten::Kernel& pt_kernel,
const framework::KernelSignature& pt_kernel_signature,
const NameVarMap<VarType>& ins) {
auto& input_names = std::get<0>(pt_kernel_signature.args);
auto& input_defs = pt_kernel.args_def().input_defs();
PADDLE_ENFORCE_EQ(input_names.size(), input_defs.size(),
platform::errors::InvalidArgument(
"the size of inputs_args names (%d) must be equal to "
"the size of kernel input_defs (%d).",
input_names.size(), input_defs.size()));
for (size_t i = 0; i < input_names.size(); ++i) {
auto& in_def = input_defs.at(i);
auto& ins_vector = ins.at(input_names[i]);
for (size_t offset = 0; offset < ins_vector.size(); ++offset) {
auto var_base = ins_vector[offset];
const auto* tensor_in = GetTensorFromVar(var_base->Var());
if (tensor_in && tensor_in->IsInitialized()) {
auto expected_place = pten::TransToFluidPlace(in_def.backend);
if (platform::is_same_place(tensor_in->place(), expected_place)) {
continue;
}
// TODO(zyfncg): Now there is no kernel which need to transform input
// data, so we commented out following code temporarily,
// and it will be used in the future.
// VLOG(3) << "Pten Transform Variable " << var_base->Name() << " from "
// << tensor_in->place() << " to " << expected_place;
// framework::Tensor tmp_tensor;
// framework::TensorCopySync(*tensor_in, expected_place, &tmp_tensor);
// SetTensorToVariable(var_base->Var(), tmp_tensor,
// var_base->MutableVar());
}
}
}
}
template <typename VarType>
static void BuildDygraphPtenKernelContext(
const framework::KernelSignature& pt_kernel_signature,
......@@ -294,23 +337,19 @@ static void BuildDygraphPtenKernelContext(
attr_names.size(), attr_defs.size()));
for (size_t i = 0; i < input_names.size(); ++i) {
auto& in_def = input_defs.at(i);
auto& ins_vector = ins.at(input_names[i]);
size_t start_idx = (i == 0 ? 0 : kernel_ctx->InputRangeAt(i - 1).second);
size_t end_idx = start_idx + ins_vector.size();
for (size_t offset = 0; offset < ins_vector.size(); ++offset) {
const auto& variable = ins_vector[offset]->Var();
kernel_ctx->EmplaceBackInputWithoutSetRange(
paddle::experimental::MakePtenTensorBaseFromVar(variable, in_def));
const auto* tensor_in = GetTensorFromVar(ins_vector[offset]->Var());
kernel_ctx->EmplaceBackInputWithoutSetRange(tensor_in);
}
kernel_ctx->AssignInputRange(std::make_pair(start_idx, end_idx), i);
}
for (size_t i = 0; i < output_names.size(); ++i) {
auto& out_def = output_defs.at(i);
size_t start_idx = (i == 0 ? 0 : kernel_ctx->OutputRangeAt(i - 1).second);
auto iter = outs.find(output_names[i]);
......@@ -325,9 +364,21 @@ static void BuildDygraphPtenKernelContext(
size_t end_idx = start_idx + outs_vector.size();
for (size_t offset = 0; offset < outs_vector.size(); ++offset) {
kernel_ctx->EmplaceBackOutputWithoutSetRange(
paddle::experimental::MakePtenTensorBaseFromVar(
outs_vector[offset]->MutableVar(), out_def));
auto* var = outs_vector[offset]->MutableVar();
framework::Tensor* tensor_out = nullptr;
if (var->template IsType<framework::LoDTensor>()) {
tensor_out = var->template GetMutable<framework::LoDTensor>();
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported output `%s` type when call pt kernel.",
framework::ToTypeName(var->Type())));
} // TODO(zyfncg): Add support for SelectedRows
experimental::ResetTensorByArgDef(tensor_out, output_defs.at(i));
framework::SetAllocationForOutputTenosr(
tensor_out, pten::TransToFluidPlace(output_defs.at(i).backend));
kernel_ctx->EmplaceBackOutputWithoutSetRange(tensor_out);
}
kernel_ctx->AssignOutputRange(std::make_pair(start_idx, end_idx), i);
}
......@@ -430,29 +481,6 @@ static void BuildDygraphPtenKernelContext(
}
}
template <typename VarType>
static void WriteBackToOutputs(
const framework::KernelSignature& pt_kernel_signature,
const NameVarMap<VarType>& outs, pten::KernelContext* kernel_ctx) {
auto& output_names = std::get<2>(pt_kernel_signature.args);
for (size_t i = 0; i < output_names.size(); ++i) {
auto iter = outs.find(output_names[i]);
if (iter != outs.end()) {
auto& outs_vector = iter->second;
auto& range_pair = kernel_ctx->OutputRangeAt(i);
auto pten_outs = kernel_ctx->MutableOutputBetween<pten::DenseTensor>(
range_pair.first, range_pair.second);
for (size_t j = 0; j < pten_outs.size(); ++j) {
experimental::MakeVariableFromPtenTensor(pten_outs[j],
outs_vector[j]->MutableVar());
}
}
}
}
template <typename VarType>
static void PreparedOpRunImpl(
const framework::OperatorBase& op, const framework::RuntimeContext& ctx,
......@@ -514,6 +542,8 @@ static void PreparedOpRunPtImpl(
&ins, &outs, &attrs, &default_attrs, op.Type(), &kernel_type);
op.Info().infer_shape_(&infer_shape_ctx);
PreparePtenData<VarType>(pt_kernel, pt_kernel_signature, ins);
pten::KernelContext pt_kernel_context;
BuildDygraphPtenKernelContext<VarType>(pt_kernel_signature, pt_kernel, ins,
outs, attrs, default_attrs, dev_ctx,
......@@ -529,8 +559,6 @@ static void PreparedOpRunPtImpl(
#endif
}
WriteBackToOutputs<VarType>(pt_kernel_signature, outs, &pt_kernel_context);
// TODO(chenweihang): add debug flags later
if (framework::IsComplexType(kernel_type.data_type_)) {
HandleComplexGradToRealGrad<VarType>(outs);
......
......@@ -54,7 +54,7 @@ PADDLE_API Tensor copy_to(const Tensor& x, Backend backend, bool blocking) {
// 3. Auto data transform
auto dense_x = std::dynamic_pointer_cast<pten::DenseTensor>(x.impl());
kernel_context.EmplaceBackInput(dense_x);
kernel_context.EmplaceBackInput(dense_x.get());
kernel_context.EmplaceBackAttr(blocking);
// 4. InferMeta
......@@ -65,7 +65,7 @@ PADDLE_API Tensor copy_to(const Tensor& x, Backend backend, bool blocking) {
pten::make_intrusive<paddle::experimental::SharedStorage>(
pten::TransToFluidPlace(backend)),
std::move(out_meta));
kernel_context.EmplaceBackOutput(dense_out);
kernel_context.EmplaceBackOutput(dense_out.get());
Tensor out;
out.set_impl(dense_out);
......
......@@ -355,98 +355,6 @@ void ReMakePtenDenseTensor(const paddle::framework::Tensor& src,
dst);
}
void ReMakePtenDenseTensorByArgDefBase(const paddle::framework::Tensor& src,
const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst) {
VLOG(3) << "ReMakePtenDenseTensor based Tensor and TensorArgDef.";
auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst);
meta->dims = src.dims();
meta->dtype = arg_def.dtype;
meta->layout = src.layout();
meta->offset = src.offset();
if (src.IsInitialized() &&
src.place() == pten::TransToFluidPlace(arg_def.backend)) {
dst->ResetHolder(src.Holder());
} else {
// This does not affect the correctness, and will be modified immediately.
// dst->mutable_data(pten::TransToFluidPlace(arg_def.backend));
}
}
void ReMakePtenDenseTensorByArgDef(const paddle::framework::Tensor& src,
const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst) {
auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst);
SetLoD(&meta->lod, src.lod());
ReMakePtenDenseTensorByArgDefBase(
static_cast<const paddle::framework::Tensor&>(src), arg_def, dst);
}
void ReMakePtenDenseTensorFromVar(const framework::Variable& variable,
const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst) {
auto expected_place = pten::TransToFluidPlace(arg_def.backend);
if (variable.IsType<framework::LoDTensor>()) {
const auto& tensor = variable.Get<framework::LoDTensor>();
// check input dtype before ReMakePtenDenseTensor
PADDLE_ENFORCE(
(arg_def.dtype == pten::TransToPtenDataType(tensor.type())),
paddle::platform::errors::InvalidArgument(
"The type of input data is diffrent from the type of the "
"argument's definition in kernel."));
if (!platform::is_same_place(tensor.place(), expected_place)) {
framework::LoDTensor tmp_tensor;
framework::TensorCopySync(tensor, expected_place, &tmp_tensor);
ReMakePtenDenseTensorByArgDef(tmp_tensor, arg_def, dst);
} else {
ReMakePtenDenseTensorByArgDef(tensor, arg_def, dst);
}
} else if (variable.IsType<framework::SelectedRows>()) {
// TODO(chenweihang): now we don't deal with row and height
// by xiaowei's advice
const auto& tensor = variable.Get<framework::SelectedRows>();
PADDLE_ENFORCE(
(arg_def.dtype == pten::TransToPtenDataType(tensor.value().type())),
paddle::platform::errors::InvalidArgument(
"The type of input data is diffrent from the type of the "
"argument's definition in kernel."));
if (!platform::is_same_place(tensor.value().place(), expected_place)) {
framework::Tensor tmp_tensor;
paddle::framework::TensorCopySync(
tensor.value(), expected_place, &tmp_tensor);
// TODO(chenweihang): adapt SelectedRows by xiaowei's design
ReMakePtenDenseTensorByArgDef(tmp_tensor, arg_def, dst);
} else {
ReMakePtenDenseTensorByArgDef(tensor.value(), arg_def, dst);
}
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported shared input `%s` type now when call pt kernel.",
framework::ToTypeName(variable.Type())));
}
}
void ReMakePtenDenseTensorFromVar(framework::Variable* variable,
const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst) {
// mutable_data before run kernel, to avoid share output form
// KernelContext to original tensor
if (variable->template IsType<framework::LoDTensor>()) {
auto* tensor = variable->template GetMutable<framework::LoDTensor>();
ReMakePtenDenseTensorByArgDef(*tensor, arg_def, dst);
} else if (variable->template IsType<framework::SelectedRows>()) {
auto* tensor = variable->template GetMutable<framework::SelectedRows>();
// TODO(chenweihang): adapt SelectedRows by xiaowei's design,
// here the row and height will lost in output!
ReMakePtenDenseTensorByArgDef(tensor->value(), arg_def, dst);
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported shared output `%s` type now when call pt kernel.",
framework::ToTypeName(variable->Type())));
}
}
static bool IsSameAllocation(const std::shared_ptr<memory::Allocation>& a,
const std::shared_ptr<memory::Allocation>& b) {
return a->ptr() == b->ptr() && a->size() == b->size() &&
......@@ -489,5 +397,13 @@ void MakeVariableFromPtenTensor(pten::DenseTensor* src,
}
}
void ResetTensorByArgDef(pten::DenseTensor* dst,
const pten::TensorArgDef& arg_def) {
VLOG(5) << "ResetTensor by TensorArgDef.";
auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst);
meta->dtype = arg_def.dtype;
meta->layout = arg_def.layout;
}
} // namespace experimental
} // namespace paddle
......@@ -67,20 +67,11 @@ void SharesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst);
void ReMakePtenDenseTensor(const paddle::framework::Tensor& src,
pten::DenseTensor* dst);
void ReMakePtenDenseTensorByArgDef(const paddle::framework::Tensor& src,
const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst);
void ReMakePtenDenseTensorFromVar(const framework::Variable& variable,
const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst);
void ReMakePtenDenseTensorFromVar(framework::Variable* variable,
const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst);
void MakeVariableFromPtenTensor(pten::DenseTensor* src,
framework::Variable* variable);
void ResetTensorByArgDef(pten::DenseTensor* dst,
const pten::TensorArgDef& arg_def);
} // namespace experimental
} // namespace paddle
......@@ -73,6 +73,7 @@ inline size_t SizeOf(DataType data_type) {
case DataType::COMPLEX128:
return 16;
case DataType::UNDEFINED:
return 0;
case DataType::NUM_DATA_TYPES:
PD_THROW("Data type `",
static_cast<int>(data_type),
......
......@@ -113,7 +113,8 @@ void* DenseTensor::mutable_data(size_t request_bytes) {
bytes));
bytes = request_bytes;
}
if (storage_->size() < bytes + meta_.offset || storage_->size() == 0) {
if (!storage_->data() || storage_->size() < bytes + meta_.offset ||
storage_->size() == 0) {
VLOG(10) << "mutbale data realloc, original size: " << storage_->size()
<< ", new size: " << bytes;
storage_->Realloc(bytes);
......
......@@ -16,20 +16,19 @@
namespace pten {
void KernelContext::EmplaceBackInput(std::shared_ptr<TensorBase> input) {
void KernelContext::EmplaceBackInput(const TensorBase* input) {
int index = inputs_.size();
inputs_.emplace_back(std::move(input));
inputs_.emplace_back(input);
// Record the start and end index of the input
input_range_.emplace_back(std::pair<int, int>(index, index + 1));
}
void KernelContext::EmplaceBackInputWithoutSetRange(
std::shared_ptr<TensorBase> input) {
inputs_.emplace_back(std::move(input));
void KernelContext::EmplaceBackInputWithoutSetRange(const TensorBase* input) {
inputs_.emplace_back(input);
}
void KernelContext::EmplaceBackInputs(
paddle::SmallVector<std::shared_ptr<TensorBase>> inputs) {
paddle::SmallVector<const TensorBase*> inputs) {
int index = inputs_.size();
// Record the start and end index of the input
input_range_.emplace_back(std::pair<int, int>(index, index + inputs.size()));
......@@ -38,25 +37,23 @@ void KernelContext::EmplaceBackInputs(
std::make_move_iterator(inputs.end()));
}
void KernelContext::EmplaceBackOutput(std::shared_ptr<TensorBase> output) {
void KernelContext::EmplaceBackOutput(TensorBase* output) {
int index = outputs_.size();
outputs_.emplace_back(std::move(output));
outputs_.emplace_back(output);
// Record the start and end index of the input
output_range_.emplace_back(std::pair<int, int>(index, index + 1));
}
void KernelContext::EmplaceBackOutputWithoutSetRange(
std::shared_ptr<TensorBase> output) {
outputs_.emplace_back(std::move(output));
void KernelContext::EmplaceBackOutputWithoutSetRange(TensorBase* output) {
outputs_.emplace_back(output);
}
void KernelContext::SetOutputWithoutSetRange(
int index, std::shared_ptr<TensorBase> output) {
outputs_.at(index) = std::move(output);
void KernelContext::SetOutputWithoutSetRange(int index, TensorBase* output) {
outputs_.at(index) = output;
}
void KernelContext::EmplaceBackOutputs(
paddle::SmallVector<std::shared_ptr<TensorBase>> outputs) {
paddle::SmallVector<TensorBase*> outputs) {
int index = outputs_.size();
// Record the start and end index of the input
output_range_.emplace_back(
......@@ -116,19 +113,5 @@ std::pair<int, int>& KernelContext::MutableOutputRangeAt(size_t idx) {
// Temporary method: For compatible with fluid Tensor and improve performance
// Only deal with DenseTensor now
void KernelContext::ClearData() {
for (auto& in : inputs_) {
if (in) {
CompatibleDenseTensorUtils::ClearStorage(
static_cast<DenseTensor*>(in.get()));
}
}
for (auto& out : outputs_) {
if (out) {
CompatibleDenseTensorUtils::ClearStorage(
static_cast<DenseTensor*>(out.get()));
}
}
attrs_.clear();
}
void KernelContext::ClearData() { attrs_.clear(); }
} // namespace pten
......@@ -51,21 +51,19 @@ class KernelContext {
return static_cast<const CtxType&>(*dev_ctx_);
}
void EmplaceBackInput(std::shared_ptr<TensorBase> input);
void EmplaceBackInput(const TensorBase* input);
void EmplaceBackInputWithoutSetRange(std::shared_ptr<TensorBase> input);
void EmplaceBackInputWithoutSetRange(const TensorBase* input);
void EmplaceBackInputs(
paddle::SmallVector<std::shared_ptr<TensorBase>> inputs);
void EmplaceBackInputs(paddle::SmallVector<const TensorBase*> inputs);
void EmplaceBackOutput(std::shared_ptr<TensorBase> output);
void EmplaceBackOutput(TensorBase* output);
void EmplaceBackOutputWithoutSetRange(std::shared_ptr<TensorBase> output);
void EmplaceBackOutputWithoutSetRange(TensorBase* output);
void SetOutputWithoutSetRange(int index, std::shared_ptr<TensorBase> output);
void EmplaceBackOutputs(paddle::SmallVector<TensorBase*> outputs);
void EmplaceBackOutputs(
paddle::SmallVector<std::shared_ptr<TensorBase>> outputs);
void SetOutputWithoutSetRange(int index, TensorBase* output);
void EmplaceBackAttr(paddle::any attr);
......@@ -90,16 +88,12 @@ class KernelContext {
: paddle::optional<const TensorType&>{paddle::none};
}
std::shared_ptr<TensorBase>& MutableInputPtrAt(size_t idx) {
return inputs_.at(idx);
}
template <typename TensorType>
std::vector<TensorType> MoveInputsBetween(size_t start, size_t end) {
std::vector<TensorType> v;
for (size_t i = start; i < end; ++i) {
auto t = std::dynamic_pointer_cast<TensorType>(inputs_.at(i));
v.emplace_back(std::move(*t.get()));
v.emplace_back(*t);
inputs_.at(i) = nullptr;
}
return v;
......@@ -109,21 +103,16 @@ class KernelContext {
void AssignOutputRange(std::pair<int, int>&& range, size_t idx);
template <typename TensorType>
TensorType* MutableInputAt(size_t idx) {
return static_cast<TensorType*>(inputs_.at(idx).get());
}
template <typename TensorType>
TensorType* MutableOutputAt(size_t idx) {
return static_cast<TensorType*>(outputs_.at(idx).get());
return static_cast<TensorType*>(outputs_.at(idx));
}
template <typename TensorType>
std::vector<TensorType*> MutableOutputBetween(size_t start, size_t end) {
std::vector<TensorType*> v;
for (size_t i = start; i < end; ++i) {
v.emplace_back(static_cast<TensorType*>(outputs_.at(i).get()));
v.emplace_back(static_cast<TensorType*>(outputs_.at(i)));
}
return v;
......@@ -153,8 +142,8 @@ class KernelContext {
// TODO(chenweihang): Tensor -> Tensor*, Tensor should by managed `scope`
// Note: can't use API Tensor here, the inference don't use this API Tensor
paddle::SmallVector<std::shared_ptr<TensorBase>> inputs_;
paddle::SmallVector<std::shared_ptr<TensorBase>> outputs_;
paddle::SmallVector<const TensorBase*> inputs_;
paddle::SmallVector<TensorBase*> outputs_;
paddle::SmallVector<paddle::any> attrs_;
// Only contains input like list[Tensor] need `range`
......
......@@ -31,7 +31,7 @@ void Copy(const Context& dev_ctx,
DenseTensor* dst) {
auto* src_ptr = src.data();
const auto& src_place = src.place();
const auto& dst_place = dst->place();
auto dst_place = dst->place();
if (src_place == dst_place && paddle::platform::is_cpu_place(src_place)) {
PADDLE_THROW(paddle::platform::errors::InvalidArgument(
......@@ -51,6 +51,7 @@ void Copy(const Context& dev_ctx,
return;
}
VLOG(4) << "src:" << src_ptr << ", dst:" << dst_ptr;
CHECK(dst->layout() == src.layout());
auto size = src.numel() *
......@@ -208,6 +209,9 @@ void Copy(const Context& dev_ctx,
"Context place dose not match the source and destination place."));
}
}
} else {
PADDLE_THROW(paddle::platform::errors::InvalidArgument(
"Place type error. Please check the place of src and dst Tensor."));
}
}
......
......@@ -62,7 +62,7 @@ PADDLE_API Tensor scale_kernel_context(const Tensor& x,
auto kernel_context = pten::KernelContext(dev_ctx);
auto dense_x = std::dynamic_pointer_cast<pten::DenseTensor>(x.impl());
kernel_context.EmplaceBackInput(dense_x);
kernel_context.EmplaceBackInput(dense_x.get());
kernel_context.EmplaceBackAttr(pten::Scalar(scale));
kernel_context.EmplaceBackAttr(bias);
......@@ -73,7 +73,7 @@ PADDLE_API Tensor scale_kernel_context(const Tensor& x,
pten::make_intrusive<paddle::experimental::SharedStorage>(
pten::TransToFluidPlace(kernel_backend)),
std::move(out_meta));
kernel_context.EmplaceBackOutput(dense_out);
kernel_context.EmplaceBackOutput(dense_out.get());
Tensor out;
out.set_impl(dense_out);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册