From 69a92b7b7f270408ba6163848a8d4c041cf1bc00 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Thu, 28 Apr 2022 10:14:59 +0800 Subject: [PATCH] [cherry-pick] Optimize performance of dygraph (#42231, #42253) (#42309) * Optimize the performanece of sum api (#42231) * optimize the performanece of sum api * optimize IsDenseTensorInput * remove debug log * Add move construct for KernelSignature (#42253) * add move construct for KernelSignature * add noexcept * fix cherry-pick problem --- paddle/fluid/framework/infershape_utils.cc | 12 ++++++---- .../new_executor/new_executor_defs.cc | 5 ++++ .../new_executor/new_executor_defs.h | 2 ++ paddle/fluid/framework/op_desc.cc | 4 ++++ paddle/fluid/framework/operator.cc | 4 ++++ paddle/fluid/framework/operator.h | 11 +++++---- paddle/fluid/framework/shape_inference.h | 2 ++ paddle/fluid/imperative/infer_shape_context.h | 9 +++++++ .../operators/reduce_ops/reduce_sum_op.cc | 6 +++++ paddle/fluid/pybind/eager_utils.cc | 6 +---- .../dialect/phi/pass/proto_arg_map_context.cc | 6 +++++ .../dialect/phi/pass/proto_arg_map_context.h | 1 + paddle/phi/core/compat/arg_map_context.h | 21 ++++++++++++++++ paddle/phi/infermeta/unary.cc | 3 +-- paddle/phi/kernels/cpu/reduce_sum_kernel.cc | 3 +++ paddle/phi/kernels/gpu/reduce_sum_kernel.cu | 3 +++ paddle/phi/ops/compat/sum_sig.cc | 2 +- paddle/phi/tests/ops/test_op_signature.h | 4 ++++ python/paddle/tensor/math.py | 24 +++++++------------ 19 files changed, 95 insertions(+), 33 deletions(-) diff --git a/paddle/fluid/framework/infershape_utils.cc b/paddle/fluid/framework/infershape_utils.cc index 91dea654ee6..f5a3265af4f 100644 --- a/paddle/fluid/framework/infershape_utils.cc +++ b/paddle/fluid/framework/infershape_utils.cc @@ -70,6 +70,11 @@ class InferShapeArgumentMappingContext : public phi::ArgumentMappingContext { } bool IsDenseTensorInput(const std::string& name) const override { + auto var_type = ctx_.GetInputVarType(name); + return var_type == proto::VarType::LOD_TENSOR; + } + + bool IsDenseTensorInputs(const std::string& name) const override { auto var_types = ctx_.GetInputsVarType(name); return std::all_of(var_types.begin(), var_types.end(), [](const proto::VarType::Type& type) { @@ -78,11 +83,8 @@ class InferShapeArgumentMappingContext : public phi::ArgumentMappingContext { } bool IsSelectedRowsInput(const std::string& name) const override { - auto var_types = ctx_.GetInputsVarType(name); - return std::all_of(var_types.begin(), var_types.end(), - [](const proto::VarType::Type& type) { - return type == proto::VarType::SELECTED_ROWS; - }); + auto var_type = ctx_.GetInputVarType(name); + return var_type == proto::VarType::SELECTED_ROWS; } bool IsDenseTensorVectorInput(const std::string& name) const override { diff --git a/paddle/fluid/framework/new_executor/new_executor_defs.cc b/paddle/fluid/framework/new_executor/new_executor_defs.cc index 0164c453076..535b7e5baa1 100644 --- a/paddle/fluid/framework/new_executor/new_executor_defs.cc +++ b/paddle/fluid/framework/new_executor/new_executor_defs.cc @@ -365,6 +365,11 @@ std::vector InterpretercoreInferShapeContext::GetInputsDim( return GetDims(vars); } +proto::VarType::Type InterpretercoreInferShapeContext::GetInputVarType( + const std::string& name) const { + return GetVarType(InputVars(name).at(0)); +} + std::vector InterpretercoreInferShapeContext::GetInputsVarType( const std::string& name) const { diff --git a/paddle/fluid/framework/new_executor/new_executor_defs.h b/paddle/fluid/framework/new_executor/new_executor_defs.h index 83eaf9514a1..b7b7d5eef41 100644 --- a/paddle/fluid/framework/new_executor/new_executor_defs.h +++ b/paddle/fluid/framework/new_executor/new_executor_defs.h @@ -100,6 +100,8 @@ class InterpretercoreInferShapeContext : public InferShapeContext { std::vector GetInputsDim(const std::string& name) const override; + proto::VarType::Type GetInputVarType(const std::string& name) const override; + std::vector GetInputsVarType( const std::string& name) const override; diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc index 4ef1d3a83a2..acd45462489 100644 --- a/paddle/fluid/framework/op_desc.cc +++ b/paddle/fluid/framework/op_desc.cc @@ -245,6 +245,10 @@ class CompileTimeInferShapeContext : public InferShapeContext { bool IsRunMKLDNNKernel() const override; + proto::VarType::Type GetInputVarType(const std::string &name) const override { + return GetVarType(Inputs(name).at(0)); + } + std::vector GetInputsVarType( const std::string &name) const override { return GetVarTypes(Inputs(name)); diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 7468aaedece..2960b024ce1 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -981,6 +981,10 @@ class RuntimeInferShapeContext : public InferShapeContext { return GetDims(vars); } + proto::VarType::Type GetInputVarType(const std::string& name) const override { + return GetVarType(InputVars(name).at(0)); + } + std::vector GetInputsVarType( const std::string& name) const override { return GetVarTypes(InputVars(name)); diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index 70e9f5c1b14..d8a4ac87292 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -479,6 +479,11 @@ class ExecutionArgumentMappingContext : public phi::ArgumentMappingContext { } bool IsDenseTensorInput(const std::string& name) const override { + const auto* var = ctx_.InputVar(name); + return var->IsType(); + } + + bool IsDenseTensorInputs(const std::string& name) const override { auto vars = ctx_.MultiInputVar(name); return std::all_of(vars.begin(), vars.end(), [](const Variable* var) { return var->IsType(); @@ -486,10 +491,8 @@ class ExecutionArgumentMappingContext : public phi::ArgumentMappingContext { } bool IsSelectedRowsInput(const std::string& name) const override { - auto vars = ctx_.MultiInputVar(name); - return std::all_of(vars.begin(), vars.end(), [](const Variable* var) { - return var->IsType(); - }); + const auto* var = ctx_.InputVar(name); + return var->IsType(); } bool IsDenseTensorVectorInput(const std::string& name) const override { diff --git a/paddle/fluid/framework/shape_inference.h b/paddle/fluid/framework/shape_inference.h index 4600213596e..850a1093317 100644 --- a/paddle/fluid/framework/shape_inference.h +++ b/paddle/fluid/framework/shape_inference.h @@ -65,6 +65,8 @@ class InferShapeContext { virtual bool HasOutput(const std::string &name) const = 0; virtual bool HasAttr(const std::string &name) const = 0; + virtual proto::VarType::Type GetInputVarType( + const std::string &name) const = 0; virtual std::vector GetInputsVarType( const std::string &name) const = 0; virtual std::vector GetOutputsVarType( diff --git a/paddle/fluid/imperative/infer_shape_context.h b/paddle/fluid/imperative/infer_shape_context.h index 8a5d942e059..a1486638c13 100644 --- a/paddle/fluid/imperative/infer_shape_context.h +++ b/paddle/fluid/imperative/infer_shape_context.h @@ -300,6 +300,15 @@ class DygraphInferShapeContext : public framework::InferShapeContext { return vec_res; } + framework::proto::VarType::Type GetInputVarType( + const std::string& name) const override { + auto it = var_map_in_->find(name); + PADDLE_ENFORCE_NE( + it, var_map_in_->end(), + platform::errors::NotFound("can not find [%s] in input", name)); + return framework::ToVarType(it->second[0]->Var().Type()); + } + std::vector GetInputsVarType( const std::string& name) const override { std::vector vec_res; diff --git a/paddle/fluid/operators/reduce_ops/reduce_sum_op.cc b/paddle/fluid/operators/reduce_ops/reduce_sum_op.cc index 2a78774f370..6b8e6b8f805 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_sum_op.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_sum_op.cc @@ -89,6 +89,12 @@ class ReduceSumVarTypeInference : public paddle::framework::VarTypeInference { BOOST_GET_CONST(int, ctx->GetAttr("out_dtype"))); if (data_type >= 0) { ctx->SetOutputDataType("Out", data_type); + } else { + auto x_type = ctx->GetInputDataType("X"); + if (x_type == framework::proto::VarType::BOOL || + x_type == framework::proto::VarType::INT32) { + ctx->SetOutputDataType("Out", framework::proto::VarType::INT64); + } } } }; diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc index 452aa0ce2d5..124e5883324 100644 --- a/paddle/fluid/pybind/eager_utils.cc +++ b/paddle/fluid/pybind/eager_utils.cc @@ -1204,11 +1204,7 @@ paddle::experimental::DataType CastPyArg2DataType(PyObject* obj, const std::string& op_type, ssize_t arg_pos) { if (obj == Py_None) { - PADDLE_THROW(platform::errors::InvalidArgument( - "%s(): argument (position %d) must be " - "data_type, but got %s", - op_type, arg_pos + 1, - ((PyTypeObject*)obj->ob_type)->tp_name)); // NOLINT + return paddle::experimental::DataType::UNDEFINED; } framework::proto::VarType::Type type = CastPyArg2ProtoType(obj, arg_pos); diff --git a/paddle/infrt/dialect/phi/pass/proto_arg_map_context.cc b/paddle/infrt/dialect/phi/pass/proto_arg_map_context.cc index 070867853ad..49fe069217e 100644 --- a/paddle/infrt/dialect/phi/pass/proto_arg_map_context.cc +++ b/paddle/infrt/dialect/phi/pass/proto_arg_map_context.cc @@ -63,6 +63,12 @@ bool ProtoArgumentMappingContext::IsDenseTensorInput( const std::string& name) const { return true; } + +bool ProtoArgumentMappingContext::IsDenseTensorInputs( + const std::string& name) const { + return true; +} + bool ProtoArgumentMappingContext::IsSelectedRowsInput( const std::string& name) const { return false; diff --git a/paddle/infrt/dialect/phi/pass/proto_arg_map_context.h b/paddle/infrt/dialect/phi/pass/proto_arg_map_context.h index 5cf2ef97907..7cb2651ccf6 100644 --- a/paddle/infrt/dialect/phi/pass/proto_arg_map_context.h +++ b/paddle/infrt/dialect/phi/pass/proto_arg_map_context.h @@ -41,6 +41,7 @@ class ProtoArgumentMappingContext : public ::phi::ArgumentMappingContext { size_t OutputSize(const std::string& name) const override; bool IsDenseTensorInput(const std::string& name) const override; + bool IsDenseTensorInputs(const std::string& name) const override; bool IsSelectedRowsInput(const std::string& name) const override; bool IsDenseTensorVectorInput(const std::string& name) const override; diff --git a/paddle/phi/core/compat/arg_map_context.h b/paddle/phi/core/compat/arg_map_context.h index 102dca48b99..cd7eb419f13 100644 --- a/paddle/phi/core/compat/arg_map_context.h +++ b/paddle/phi/core/compat/arg_map_context.h @@ -58,6 +58,18 @@ struct KernelSignature { // TODO(chenweihang): add assign constructor to solve windows compile // problem, remove it later + KernelSignature(const KernelSignature& other) + : name(other.name), + input_names(other.input_names), + attr_names(other.attr_names), + output_names(other.output_names) {} + + KernelSignature(KernelSignature&& other) noexcept + : name(other.name), + input_names(std::move(other.input_names)), + attr_names(std::move(other.attr_names)), + output_names(std::move(other.output_names)) {} + KernelSignature& operator=(const KernelSignature& other) { name = other.name; input_names = other.input_names; @@ -65,6 +77,14 @@ struct KernelSignature { output_names = other.output_names; return *this; } + + KernelSignature& operator=(KernelSignature&& other) noexcept { + name = other.name; + input_names.swap(other.input_names); + attr_names.swap(other.attr_names); + output_names.swap(other.output_names); + return *this; + } }; std::ostream& operator<<(std::ostream& os, KernelSignature signature); @@ -86,6 +106,7 @@ class ArgumentMappingContext { virtual size_t OutputSize(const std::string& name) const = 0; virtual bool IsDenseTensorInput(const std::string& name) const = 0; + virtual bool IsDenseTensorInputs(const std::string& name) const = 0; virtual bool IsSelectedRowsInput(const std::string& name) const = 0; // For compatibility with LoDTensorArray virtual bool IsDenseTensorVectorInput(const std::string& name) const = 0; diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc index 7514f19ef48..144da3cc82f 100644 --- a/paddle/phi/infermeta/unary.cc +++ b/paddle/phi/infermeta/unary.cc @@ -2254,8 +2254,7 @@ void SumRawInferMeta(const MetaTensor& x, if (dtype != DataType::UNDEFINED) { out_dtype = dtype; } else { - if (x.dtype() == DataType::BOOL || x.dtype() == DataType::INT32 || - x.dtype() == DataType::INT64) { + if (x.dtype() == DataType::BOOL || x.dtype() == DataType::INT32) { out_dtype = DataType::INT64; } else { out_dtype = x.dtype(); diff --git a/paddle/phi/kernels/cpu/reduce_sum_kernel.cc b/paddle/phi/kernels/cpu/reduce_sum_kernel.cc index 32b12ea6845..0b4c4b9f470 100644 --- a/paddle/phi/kernels/cpu/reduce_sum_kernel.cc +++ b/paddle/phi/kernels/cpu/reduce_sum_kernel.cc @@ -29,6 +29,9 @@ void SumRawKernel(const Context& dev_ctx, bool reduce_all, DataType out_dtype, DenseTensor* out) { + if (out_dtype == DataType::UNDEFINED && out->dtype() != x.dtype()) { + out_dtype = out->dtype(); + } phi::Reduce( dev_ctx, x, reduce_all, dims, keep_dim, out_dtype, out); } diff --git a/paddle/phi/kernels/gpu/reduce_sum_kernel.cu b/paddle/phi/kernels/gpu/reduce_sum_kernel.cu index 28bdbd009bd..918d9b0b65e 100644 --- a/paddle/phi/kernels/gpu/reduce_sum_kernel.cu +++ b/paddle/phi/kernels/gpu/reduce_sum_kernel.cu @@ -27,6 +27,9 @@ void SumRawKernel(const Context& dev_ctx, bool reduce_all, DataType out_dtype, DenseTensor* out) { + if (out_dtype == DataType::UNDEFINED && out->dtype() != x.dtype()) { + out_dtype = out->dtype(); + } phi::Reduce( dev_ctx, x, reduce_all, dims, keep_dim, out_dtype, out); } diff --git a/paddle/phi/ops/compat/sum_sig.cc b/paddle/phi/ops/compat/sum_sig.cc index 4364047b0e6..d71111408f8 100644 --- a/paddle/phi/ops/compat/sum_sig.cc +++ b/paddle/phi/ops/compat/sum_sig.cc @@ -18,7 +18,7 @@ namespace phi { KernelSignature SumOpArgumentMapping(const ArgumentMappingContext& ctx) { - if (ctx.IsDenseTensorInput("X")) { + if (ctx.IsDenseTensorInputs("X")) { return KernelSignature("add_n", {"X"}, {}, {"Out"}); } return KernelSignature("unregistered", {}, {}, {}); diff --git a/paddle/phi/tests/ops/test_op_signature.h b/paddle/phi/tests/ops/test_op_signature.h index 4a84793527e..1535f40b700 100644 --- a/paddle/phi/tests/ops/test_op_signature.h +++ b/paddle/phi/tests/ops/test_op_signature.h @@ -68,6 +68,10 @@ class TestArgumentMappingContext : public phi::ArgumentMappingContext { return dense_tensor_inputs.count(name) > 0; } + bool IsDenseTensorInputs(const std::string& name) const override { + return dense_tensor_inputs.count(name) > 0; + } + bool IsSelectedRowsInput(const std::string& name) const override { return selected_rows_inputs.count(name) > 0; } diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py index 217b8258a7e..ede3bcad2f3 100644 --- a/python/paddle/tensor/math.py +++ b/python/paddle/tensor/math.py @@ -899,15 +899,10 @@ def sum(x, axis=None, dtype=None, keepdim=False, name=None): else: reduce_all_flag = False - def get_dtype(x, dtype): - if dtype is not None: - return (True, dtype) - src_type = convert_dtype(x.dtype) - if src_type in ['bool','int32', 'int64']: - return (True, 'int64') - return (False, src_type) - - dtype_flag, dtype = get_dtype(x, dtype) + dtype_flag = False + if dtype is not None: + dtype_flag = True + dtype = convert_np_dtype_to_dtype_(dtype) if in_dygraph_mode(): if reduce_all_flag: @@ -915,17 +910,14 @@ def sum(x, axis=None, dtype=None, keepdim=False, name=None): else: axis = axis if axis != None and axis != [] else [0] - out_dtype = convert_np_dtype_to_dtype_(dtype) - out = _C_ops.final_state_sum(x, axis, out_dtype, keepdim) - return out + return _C_ops.final_state_sum(x, axis, dtype, keepdim) if _in_legacy_dygraph(): axis = axis if axis != None and axis != [] else [0] if dtype_flag: return _C_ops.reduce_sum(x, 'dim', axis, 'keep_dim', keepdim, 'reduce_all', reduce_all_flag, 'in_dtype', - x.dtype, 'out_dtype', - convert_np_dtype_to_dtype_(dtype)) + x.dtype, 'out_dtype', dtype) else: return _C_ops.reduce_sum(x, 'dim', axis, 'keep_dim', keepdim, 'reduce_all', reduce_all_flag) @@ -939,7 +931,7 @@ def sum(x, axis=None, dtype=None, keepdim=False, name=None): if dtype_flag: attrs.update({ 'in_dtype': x.dtype, - 'out_dtype': convert_np_dtype_to_dtype_(dtype) + 'out_dtype': dtype }) check_variable_and_dtype( @@ -953,7 +945,7 @@ def sum(x, axis=None, dtype=None, keepdim=False, name=None): helper = LayerHelper('sum', **locals()) if dtype_flag: out = helper.create_variable_for_type_inference( - dtype=convert_np_dtype_to_dtype_(dtype)) + dtype=dtype) else: out = helper.create_variable_for_type_inference(dtype=x.dtype) helper.append_op( -- GitLab