未验证 提交 0d537003 编写于 作者: Z zyfncg 提交者: GitHub

[cherry-pick] Optimize performance of dygraph (#42093, #42103, #42137) (#42171)

* optimiaze performance of PreparePhiData (#42093)

* Dygraph performance optimization (v2) (#42103)

* optimiaze performance of PreparePhiData

* dygraph performance optimization

* optimize performance of dygraph (#42137)
上级 26167969
......@@ -402,21 +402,20 @@ std::vector<phi::MetaTensor*> CompatInferMetaContext::MutableOutputBetween(
CompatInferMetaContext BuildInferMetaContext(InferShapeContext* ctx,
const std::string& op_type) {
// 1. get kernel args
auto arg_map_fn = phi::OpUtilsMap::Instance().GetArgumentMappingFn(op_type);
PADDLE_ENFORCE_NOT_NULL(
arg_map_fn, platform::errors::NotFound(
"The ArgumentMappingFn of %s op is not found.", op_type));
auto* arg_map_fn = phi::OpUtilsMap::Instance().GetArgumentMappingFn(op_type);
InferShapeArgumentMappingContext arg_map_context(*ctx);
auto signature = arg_map_fn(arg_map_context);
KernelSignature signature =
arg_map_fn ? (*arg_map_fn)(arg_map_context)
: phi::DefaultKernelSignatureMap::Instance().Get(op_type);
VLOG(3) << "BuildInferMetaContext: op kernel signature - " << signature;
// 2. build infermeta context
CompatInferMetaContext infer_meta_context(
{ctx->IsRuntime(), ctx->IsRunMKLDNNKernel()});
auto& input_names = std::get<0>(signature.args);
auto& attr_names = std::get<1>(signature.args);
auto& output_names = std::get<2>(signature.args);
const auto& input_names = signature.input_names;
const auto& attr_names = signature.attr_names;
const auto& output_names = signature.output_names;
const auto& args_def =
phi::KernelFactory::Instance().GetFirstKernelArgsDef(signature.name);
......
......@@ -1200,8 +1200,10 @@ bool OperatorWithKernel::SupportsMKLDNN(
bool OperatorWithKernel::CanMKLDNNBeUsed(const framework::ExecutionContext& ctx,
proto::VarType::Type data_type) const {
bool use_mkldnn_ctx = ctx.HasAttr("use_mkldnn") &&
ctx.Attr<bool>("use_mkldnn") &&
const auto& attrs_map = ctx.Attrs();
auto iter = attrs_map.find("use_mkldnn");
bool use_mkldnn_ctx = iter != attrs_map.end() &&
BOOST_GET_CONST(bool, iter->second) &&
platform::is_cpu_place(ctx.GetPlace());
return use_mkldnn_ctx && this->SupportsMKLDNN(data_type);
}
......@@ -2117,8 +2119,16 @@ KernelSignature OperatorWithKernel::GetExpectedPhiKernelArgs(
const ExecutionContext& ctx) const {
ExecutionArgumentMappingContext arg_mapping_ctx(ctx);
if (arg_map_fn_ == nullptr) {
arg_map_fn_.reset(new phi::ArgumentMappingFn(
phi::OpUtilsMap::Instance().GetArgumentMappingFn(Type())));
auto* arg_map_fn = phi::OpUtilsMap::Instance().GetArgumentMappingFn(type_);
if (arg_map_fn) {
arg_map_fn_.reset(new phi::ArgumentMappingFn(*arg_map_fn));
} else {
auto func =
[this](const phi::ArgumentMappingContext& ctx) -> KernelSignature {
return phi::DefaultKernelSignatureMap::Instance().Get(type_);
};
arg_map_fn_.reset(new phi::ArgumentMappingFn(func));
}
}
return (*arg_map_fn_)(arg_mapping_ctx);
}
......@@ -2126,7 +2136,7 @@ KernelSignature OperatorWithKernel::GetExpectedPhiKernelArgs(
Scope* OperatorWithKernel::PreparePhiData(
const Scope& scope, const phi::Kernel& pt_kernel,
const KernelSignature& pt_kernel_signature, RuntimeContext* ctx) const {
auto& input_names = std::get<0>(pt_kernel_signature.args);
const auto& input_names = pt_kernel_signature.input_names;
auto input_defs = pt_kernel.args_def().input_defs();
PADDLE_ENFORCE_EQ(input_names.size(), input_defs.size(),
platform::errors::InvalidArgument(
......@@ -2178,11 +2188,15 @@ Scope* OperatorWithKernel::PreparePhiData(
if (in_def.backend == phi::Backend::ALL_BACKEND) {
continue;
}
auto expected_place = phi::TransToPhiPlace(in_def.backend);
if (platform::is_same_place(tensor_in->place(), expected_place)) {
auto tensor_backend = phi::TransToPhiBackend(tensor_in->place());
if (in_def.backend == tensor_backend ||
(in_def.backend == phi::Backend::GPUDNN &&
tensor_backend == phi::Backend::GPU)) {
continue;
}
auto expected_place = phi::TransToPhiPlace(in_def.backend);
VLOG(3) << "phi Transform Variable " << input_names[i] << " from "
<< tensor_in->place() << " to " << expected_place;
......@@ -2219,9 +2233,9 @@ void OperatorWithKernel::BuildPhiKernelContext(
phi::KernelContext* pt_kernel_context) const {
pt_kernel_context->SetDeviceContext(dev_ctx);
auto& input_names = std::get<0>(pt_kernel_signature_->args);
auto& attr_names = std::get<1>(pt_kernel_signature_->args);
auto& output_names = std::get<2>(pt_kernel_signature_->args);
auto& input_names = pt_kernel_signature_->input_names;
auto& attr_names = pt_kernel_signature_->attr_names;
auto& output_names = pt_kernel_signature_->output_names;
auto input_defs = pt_kernel_->args_def().input_defs();
auto attr_defs = pt_kernel_->args_def().attribute_defs();
......
......@@ -37,6 +37,8 @@ namespace paddle {
namespace imperative {
static const phi::Kernel empty_kernel;
static const framework::RuntimeContext empty_ctx({}, {});
static const framework::Scope empty_scope;
const std::shared_ptr<VariableWrapper>& GetVariableWrapper(
const std::shared_ptr<paddle::imperative::VarBase>& var) {
......@@ -138,8 +140,6 @@ PreparedOp PrepareImpl(const NameVarMap<VarType>& ins,
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto* dev_ctx = pool.Get(place);
framework::RuntimeContext ctx({}, {});
#ifdef PADDLE_WITH_MKLDNN
// MKLDNN variant of code reads attributes in some of GetKernelTypeForVar and
// GetKernelType functions, so we need to copy the attributes there.
......@@ -158,7 +158,7 @@ PreparedOp PrepareImpl(const NameVarMap<VarType>& ins,
// 1. get expected kernel key
auto dygraph_exe_ctx = DygraphExecutionContext<VarType>(
op, framework::Scope(), *dev_ctx, ctx, ins, outs, attrs, default_attrs);
op, empty_scope, *dev_ctx, empty_ctx, ins, outs, attrs, default_attrs);
auto expected_kernel_key = op.GetExpectedKernelType(dygraph_exe_ctx);
framework::KernelSignature pt_kernel_signature;
......@@ -172,11 +172,26 @@ PreparedOp PrepareImpl(const NameVarMap<VarType>& ins,
paddle::platform::is_in_xpu_black_list(op.Type());
#endif
if (phi::KernelFactory::Instance().HasCompatiblePhiKernel(op.Type())) {
pt_kernel_signature =
std::move(op.GetExpectedPhiKernelArgs(dygraph_exe_ctx));
VLOG(6) << pt_kernel_signature;
bool has_phi_kernel = false;
const auto* arg_map_fn =
phi::OpUtilsMap::Instance().GetArgumentMappingFn(op.Type());
if (arg_map_fn) {
has_phi_kernel = true;
pt_kernel_signature = (*arg_map_fn)(
framework::ExecutionArgumentMappingContext(dygraph_exe_ctx));
} else {
const auto* kernel_sig =
phi::DefaultKernelSignatureMap::Instance().GetNullable(op.Type());
if (kernel_sig) {
has_phi_kernel = true;
pt_kernel_signature = *kernel_sig;
}
}
if (has_phi_kernel) {
VLOG(6) << pt_kernel_signature;
pt_kernel_name = pt_kernel_signature.name;
// NOTE(Liu-xiandong): The register kernel used KP have library_type[KP],
// But the default library_type is Plain, so we need to modify the
......@@ -231,7 +246,7 @@ PreparedOp PrepareImpl(const NameVarMap<VarType>& ins,
dev_ctx = pool.Get(expected_kernel_key.place_);
}
return PreparedOp(op, ctx, expected_kernel_key,
return PreparedOp(op, empty_ctx, expected_kernel_key,
std::move(pt_kernel_signature), pt_kernel, dev_ctx);
} else {
VLOG(6) << "Dynamic mode ChoosePhiKernel - kernel `" << pt_kernel_name
......@@ -280,7 +295,7 @@ PreparedOp PrepareImpl(const NameVarMap<VarType>& ins,
<< " | kernel key: " << pt_cpu_kernel_key
<< " | kernel: " << pt_cpu_kernel;
auto* cpu_ctx = pool.Get(paddle::platform::CPUPlace());
return PreparedOp(op, ctx, expected_kernel_key,
return PreparedOp(op, empty_ctx, expected_kernel_key,
std::move(pt_kernel_signature), pt_cpu_kernel,
cpu_ctx);
}
......@@ -373,7 +388,8 @@ PreparedOp PrepareImpl(const NameVarMap<VarType>& ins,
dev_ctx = pool.Get(expected_kernel_key.place_);
}
return PreparedOp(op, ctx, expected_kernel_key, kernel_iter->second, dev_ctx);
return PreparedOp(op, empty_ctx, expected_kernel_key, kernel_iter->second,
dev_ctx);
}
PreparedOp PreparedOp::Prepare(const NameVarMap<VarBase>& ins,
......
......@@ -233,9 +233,9 @@ void BuildDygraphPhiKernelContext(
platform::DeviceContext* dev_ctx, phi::KernelContext* kernel_ctx) {
kernel_ctx->SetDeviceContext(dev_ctx);
auto& input_names = std::get<0>(pt_kernel_signature.args);
auto& attr_names = std::get<1>(pt_kernel_signature.args);
auto& output_names = std::get<2>(pt_kernel_signature.args);
const auto& input_names = pt_kernel_signature.input_names;
const auto& attr_names = pt_kernel_signature.attr_names;
const auto& output_names = pt_kernel_signature.output_names;
auto& input_defs = pt_kernel.args_def().input_defs();
auto& output_defs = pt_kernel.args_def().output_defs();
......@@ -570,7 +570,7 @@ template <typename VarType>
void PreparePhiData(const phi::Kernel& pt_kernel,
const framework::KernelSignature& pt_kernel_signature,
const NameVarMap<VarType>& ins) {
auto& input_names = std::get<0>(pt_kernel_signature.args);
const auto& input_names = pt_kernel_signature.input_names;
auto& input_defs = pt_kernel.args_def().input_defs();
PADDLE_ENFORCE_EQ(input_names.size(), input_defs.size(),
......@@ -581,10 +581,11 @@ void PreparePhiData(const phi::Kernel& pt_kernel,
for (size_t i = 0; i < input_names.size(); ++i) {
auto& in_def = input_defs.at(i);
if (ins.find(input_names[i]) == ins.end()) {
auto iter = ins.find(input_names[i]);
if (iter == ins.end()) {
continue;
}
auto& ins_vector = ins.at(input_names[i]);
auto& ins_vector = iter->second;
for (size_t offset = 0; offset < ins_vector.size(); ++offset) {
auto& var = ins_vector[offset];
......@@ -593,11 +594,15 @@ void PreparePhiData(const phi::Kernel& pt_kernel,
if (in_def.backend == phi::Backend::ALL_BACKEND) {
continue;
}
auto expected_place = phi::TransToPhiPlace(in_def.backend);
if (platform::is_same_place(tensor_in->place(), expected_place)) {
auto tensor_backend = phi::TransToPhiBackend(tensor_in->place());
if (in_def.backend == tensor_backend ||
(in_def.backend == phi::Backend::GPUDNN &&
tensor_backend == phi::Backend::GPU)) {
continue;
}
auto expected_place = phi::TransToPhiPlace(in_def.backend);
VLOG(3) << "Phi Transform Variable " << input_names[i] << " from "
<< tensor_in->place() << " to " << expected_place;
......
......@@ -2050,9 +2050,9 @@ void BindImperative(py::module *m_ptr) {
};
auto ret = self.GetExpectedKernelSignature(type, ins_map,
outs_map, attrs);
auto kernelsig_ins = input_to_vector(std::get<0>(ret.args));
auto kernelsig_attrs = attr_to_vector(std::get<1>(ret.args));
auto kernelsig_outs = output_to_vector(std::get<2>(ret.args));
auto kernelsig_ins = input_to_vector(ret.input_names);
auto kernelsig_attrs = attr_to_vector(ret.attr_names);
auto kernelsig_outs = output_to_vector(ret.output_names);
return std::make_tuple(kernelsig_ins, kernelsig_attrs,
kernelsig_outs);
}
......
......@@ -58,10 +58,10 @@ int main(int argc, char **argv) {
if (kernel_signature_map.Has(op_name)) {
kernel_signature_map_str =
kernel_signature_map_str + "\"" + op_kernel_pair.first + "\":{";
auto &args = kernel_signature_map.Get(op_name).args;
const auto &args = kernel_signature_map.Get(op_name);
kernel_signature_map_str += "\"inputs\":[";
auto inputs_ = std::get<0>(args);
auto inputs_ = args.input_names;
for (size_t i = 0; i < inputs_.size(); i++) {
kernel_signature_map_str =
kernel_signature_map_str + "\"" + inputs_[i] + "\",";
......@@ -69,14 +69,14 @@ int main(int argc, char **argv) {
if (inputs_.size()) kernel_signature_map_str.pop_back();
kernel_signature_map_str += "],\"attrs\":[";
auto attrs_ = std::get<1>(args);
auto attrs_ = args.attr_names;
for (size_t i = 0; i < attrs_.size(); i++) {
kernel_signature_map_str =
kernel_signature_map_str + "\"" + attrs_[i] + "\",";
}
if (attrs_.size()) kernel_signature_map_str.pop_back();
kernel_signature_map_str += "],\"outputs\":[";
auto outputs_ = std::get<2>(args);
auto outputs_ = args.output_names;
for (size_t i = 0; i < outputs_.size(); i++) {
kernel_signature_map_str =
kernel_signature_map_str + "\"" + outputs_[i] + "\",";
......
......@@ -108,14 +108,14 @@ void PhiOpConvertPass::convertStage() {
op->replaceAllUsesWith(kernel_op.getResults());
} else {
::phi::KernelSignature kernel_sign =
::phi::OpUtilsMap::Instance().GetArgumentMappingFn(op_name)(
(*::phi::OpUtilsMap::Instance().GetArgumentMappingFn(op_name))(
infrt::ProtoArgumentMappingContext(op));
VLOG(3) << "IncompatiblePhiKernel: op(" << op_name << "), kernel("
<< kernel_sign.name << ")";
// resort input&output according to kernel_sign
::llvm::SmallVector<mlir::Value, 4> inputs, ori_output;
::llvm::SmallVector<mlir::Type, 4> output_types;
for (const std::string &str : std::get<0>(kernel_sign.args)) {
for (const std::string &str : kernel_sign.input_names) {
if (pd_dialect_inputs_info_map_.at(op_name).count(str) == 0) {
LOG(ERROR) << "No input info for Op " << op_name << " and argument "
<< str;
......@@ -125,7 +125,7 @@ void PhiOpConvertPass::convertStage() {
inputs.push_back(op->getOperands()[index]);
}
for (const std::string &str : std::get<2>(kernel_sign.args)) {
for (const std::string &str : kernel_sign.output_names) {
if (pd_dialect_outputs_info_map_.at(op_name).count(str) == 0) {
LOG(ERROR) << "No output info for Op " << op_name << " and argument "
<< str;
......
......@@ -20,11 +20,11 @@ limitations under the License. */
namespace phi {
std::ostream& operator<<(std::ostream& os, KernelSignature signature) {
os << "Kernel Signature - name: " << signature.name << "; inputs: "
<< paddle::string::join_strings(std::get<0>(signature.args), ", ")
<< paddle::string::join_strings(signature.input_names, ", ")
<< "; attributes: "
<< paddle::string::join_strings(std::get<1>(signature.args), ", ")
<< paddle::string::join_strings(signature.attr_names, ", ")
<< "; outputs: "
<< paddle::string::join_strings(std::get<2>(signature.args), ", ");
<< paddle::string::join_strings(signature.output_names, ", ");
return os;
}
......
......@@ -33,7 +33,9 @@ using KernelArgsTuple = std::tuple<paddle::SmallVector<const char*>,
struct KernelSignature {
const char* name;
KernelArgsTuple args;
paddle::SmallVector<const char*> input_names;
paddle::SmallVector<const char*> attr_names;
paddle::SmallVector<const char*> output_names;
KernelSignature() = default;
......@@ -41,18 +43,26 @@ struct KernelSignature {
paddle::SmallVector<const char*>&& inputs,
paddle::SmallVector<const char*>&& attrs,
paddle::SmallVector<const char*>&& outputs)
: name(kernel_name), args(std::make_tuple(inputs, attrs, outputs)) {}
: name(kernel_name),
input_names(std::move(inputs)),
attr_names(std::move(attrs)),
output_names(std::move(outputs)) {}
KernelSignature(const char* kernel_name,
const paddle::SmallVector<const char*>& inputs,
const paddle::SmallVector<const char*>& attrs,
const paddle::SmallVector<const char*>& outputs)
: name(kernel_name), args(std::make_tuple(inputs, attrs, outputs)) {}
: name(kernel_name),
input_names(inputs),
attr_names(attrs),
output_names(outputs) {}
// TODO(chenweihang): add assign constructor to solve windows compile
// problem, remove it later
KernelSignature& operator=(const KernelSignature& other) {
name = other.name;
args = other.args;
input_names = other.input_names;
attr_names = other.attr_names;
output_names = other.output_names;
return *this;
}
};
......
......@@ -86,6 +86,14 @@ class DefaultKernelSignatureMap {
return it->second;
}
const KernelSignature* GetNullable(const std::string& op_type) const {
auto it = map_.find(op_type);
if (it != map_.end()) {
return &it->second;
}
return nullptr;
}
void Insert(std::string op_type, KernelSignature signature) {
PADDLE_ENFORCE_NE(
Has(op_type),
......@@ -148,16 +156,13 @@ class OpUtilsMap {
}
}
ArgumentMappingFn GetArgumentMappingFn(const std::string& op_type) const {
const ArgumentMappingFn* GetArgumentMappingFn(
const std::string& op_type) const {
auto it = arg_mapping_fn_map_.find(op_type);
if (it == arg_mapping_fn_map_.end()) {
auto func =
[&op_type](const ArgumentMappingContext& ctx) -> KernelSignature {
return DefaultKernelSignatureMap::Instance().Get(op_type);
};
return func;
return nullptr;
} else {
return it->second;
return &it->second;
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册