From b76c2f940a5e2da44a3ace1df649907fd8e14517 Mon Sep 17 00:00:00 2001 From: hong <43953930+phlrain@users.noreply.github.com> Date: Fri, 28 Jul 2023 10:52:43 +0800 Subject: [PATCH] New ir support fluid op (#55693) * new ir support save combine * update * polish code * update * new ir support fluid op * remove depulicate op * fix ir exe test compile error * fix compile bug * update * code format * update * update * polish code --- .../interpreter/interpreter_util.cc | 42 +++++-- .../new_executor/new_executor_defs.h | 3 + .../new_executor/new_ir_interpreter.cc | 13 +- paddle/fluid/ir/dialect/pd_op.yaml | 21 ++++ .../ir/phi_kernel_adaptor/CMakeLists.txt | 2 +- .../ir/phi_kernel_adaptor/phi_kernel_util.cc | 115 ++++++++++++++++++ .../ir/phi_kernel_adaptor/phi_kernel_util.h | 14 +++ .../fused_softmax_mask_upper_triangle_op.cc | 2 +- .../fused_softmax_mask_upper_triangle_op.cu | 2 +- paddle/phi/api/yaml/legacy_backward.yaml | 10 ++ paddle/phi/api/yaml/legacy_ops.yaml | 9 ++ test/cpp/ir/core/CMakeLists.txt | 1 + test/cpp/ir/kernel_dialect/CMakeLists.txt | 1 + test/ir/new_ir/test_standalone_new_ir.py | 7 +- 14 files changed, 226 insertions(+), 16 deletions(-) diff --git a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc index 48473f9376e..8ebaea58806 100644 --- a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc +++ b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc @@ -1004,17 +1004,37 @@ void BuildOpFuncList( true, "not found kernel for [%s]", kernel_name); - ::ir::BuildPhiContext, - paddle::small_vector, - true>((*it), - value_2_name_map, - scope, - local_scope, - op_yaml_info_parser, - &(op_func_node.kernel_context_)); + + if (kernel_name == "fused_softmax_mask_upper_triangle" || + kernel_name == "fused_softmax_mask_upper_triangle_grad") { + // builder operator + op_func_node.operator_base_ = + ir::BuildOperatorBase((*it), value_2_name_map, op_yaml_info_parser); + paddle::framework::VariableValueMap in_map; + paddle::framework::VariableValueMap out_map; + op_func_node.runtime_ctx_ = + std::make_shared( + paddle::framework::RuntimeContext(in_map, out_map)); + ir::BuildRuntimeContext((*it), + value_2_name_map, + scope, + local_scope, + op_yaml_info_parser, + op_func_node.runtime_ctx_.get()); + op_func_node.fluid_op = true; + } else { + ::ir::BuildPhiContext, + paddle::small_vector, + true>((*it), + value_2_name_map, + scope, + local_scope, + op_yaml_info_parser, + &(op_func_node.kernel_context_)); + } VLOG(6) << "finish process kernel context"; op_func_node.kernel_context_.SetDeviceContext( diff --git a/paddle/fluid/framework/new_executor/new_executor_defs.h b/paddle/fluid/framework/new_executor/new_executor_defs.h index 73d99eb63d9..032c3ffc231 100644 --- a/paddle/fluid/framework/new_executor/new_executor_defs.h +++ b/paddle/fluid/framework/new_executor/new_executor_defs.h @@ -178,6 +178,9 @@ struct OpFuncNode { phi::InferMetaContext infer_meta_context_; std::string phi_op_name_; paddle::dialect::InferMetaInterface::Concept* infer_meta_interface_{nullptr}; + + bool fluid_op{false}; + std::shared_ptr runtime_ctx_{nullptr}; }; class Instruction { diff --git a/paddle/fluid/framework/new_executor/new_ir_interpreter.cc b/paddle/fluid/framework/new_executor/new_ir_interpreter.cc index 27e57e4760b..3cdc815a562 100644 --- a/paddle/fluid/framework/new_executor/new_ir_interpreter.cc +++ b/paddle/fluid/framework/new_executor/new_ir_interpreter.cc @@ -1043,7 +1043,18 @@ void NewIRInterpreter::RunInstruction(const Instruction& instr_node) { &(op_func_node->infer_meta_context_)); } VLOG(5) << "after run infer meta"; - (*(op_func_node->phi_kernel_))(&(op_func_node->kernel_context_)); + + if (op_func_node->fluid_op) { + // run fluid op + ExecutionContext exe_ctx(*(op_func_node->operator_base_.get()), + *scope_, + *(op_func_node->dev_ctx_), + *(op_func_node->runtime_ctx_.get())); + (*(op_func_node->phi_kernel_))(&exe_ctx); + + } else { + (*(op_func_node->phi_kernel_))(&(op_func_node->kernel_context_)); + } VLOG(5) << "after run kernel"; } else if (!instr_node.IsArtificial()) { RunOperator(instr_node); diff --git a/paddle/fluid/ir/dialect/pd_op.yaml b/paddle/fluid/ir/dialect/pd_op.yaml index eaff2c738a2..9b113c02cf9 100644 --- a/paddle/fluid/ir/dialect/pd_op.yaml +++ b/paddle/fluid/ir/dialect/pd_op.yaml @@ -65,6 +65,27 @@ inplace: null backward: null +- name: load_combine + inputs: [] + attrs: + - {typename: str, name: file_path} + - {typename: bool, name: load_as_fp16} + - {typename: bool, name: model_from_memory} + outputs: + - {typename: 'Tensor[]', name: out, optional: true, intermediate: false} + no_need_buffer: null + data_transform: null + kernel: + func: [load_combine] + param: [file_path, load_as_fp16, model_from_memory] + backend: null + layout: null + data_type: null + dispatch: {fetch: null} + force_backend: null + inplace: null + backward: null + - name: share_buffer_ inputs: - typename: Tensor[] diff --git a/paddle/fluid/ir/phi_kernel_adaptor/CMakeLists.txt b/paddle/fluid/ir/phi_kernel_adaptor/CMakeLists.txt index 4b4175f99de..dd831ce7840 100644 --- a/paddle/fluid/ir/phi_kernel_adaptor/CMakeLists.txt +++ b/paddle/fluid/ir/phi_kernel_adaptor/CMakeLists.txt @@ -4,4 +4,4 @@ file(GLOB PHI_KERNEL_ADAPTOR_SRCS "*.cc") cc_library( phi_kernel_adaptor SRCS ${PHI_KERNEL_ADAPTOR_SRCS} - DEPS ir phi_utils) + DEPS program_translator ir phi_utils) diff --git a/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc b/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc index b54ef67754d..0388ee9791a 100644 --- a/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc +++ b/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc @@ -37,9 +37,12 @@ #include "paddle/fluid/ir/dialect/kernel_type.h" #include "paddle/fluid/ir/dialect/pd_attribute.h" #include "paddle/fluid/ir/interface/op_yaml_info_parser.h" +#include "paddle/fluid/ir_adaptor/translator/op_compat_info.h" #include "paddle/phi/core/enforce.h" #include "glog/logging.h" +#include "paddle/fluid/framework/op_info.h" +#include "paddle/fluid/framework/operator.h" namespace ir { @@ -451,4 +454,116 @@ void BuildScope(const ir::Block& block, const_cast(inner_scope->root())); } +void BuildRuntimeContext( + ir::Operation* op, + const std::unordered_map& name_map, + paddle::framework::Scope* scope, + paddle::framework::Scope* local_scope, + const paddle::dialect::OpYamlInfoParser& op_yaml_info, + paddle::framework::RuntimeContext* runtime_ctx) { + paddle::framework::Scope* inner_scope = + local_scope != nullptr ? local_scope : scope; + VLOG(6) << "BuildPhiContext in scope[" << scope << "] inner_scope[" + << inner_scope << "]"; + + auto& vec_kernel_fn_tensor_params = op_yaml_info.TensorParams(true); + + auto& name2id = op_yaml_info.InputName2Id(); + + auto pd_op_name = + op->attributes().at("op_name").dyn_cast().AsString(); + auto fluid_op_name = pd_op_name.substr(3); // pd_op_name start with "pd.xxx" + + auto& op_normalizer = paddle::translator::OpNameNormalizer::instance(); + + for (auto& name : vec_kernel_fn_tensor_params) { + PADDLE_ENFORCE_EQ( + name2id.count(name), + true, + phi::errors::NotFound("param [%s] MUST in name2id map", name)); + auto index = op_yaml_info.InputName2Id().at(name); + ir::Value ptr = op->operand(index); + + auto in_var_name = name_map.at(ptr); + VLOG(6) << "ctx->EmplaceBackInput: " << name << "\t" << in_var_name; + + PADDLE_ENFORCE_NOT_NULL(inner_scope->FindVar(in_var_name), + phi::errors::PreconditionNotMet( + "can not find var[%s] in scope", in_var_name)); + auto var = inner_scope->FindVar(in_var_name); + std::vector vec_tmp = {var}; + auto legacy_attr_name = op_normalizer.GetLegacyArgName(fluid_op_name, name); + + runtime_ctx->inputs[legacy_attr_name].push_back(var); + } + + auto& output_name_list = op_yaml_info.OutputNames(); + for (size_t i = 0; i < output_name_list.size(); ++i) { + auto name = output_name_list[i]; + ir::Value ptr = op->result(i); + + auto in_var_name = name_map.at(ptr); + VLOG(6) << "ctx->EmplaceBackInput: " << name << "\t" << in_var_name; + + PADDLE_ENFORCE_NOT_NULL(inner_scope->FindVar(in_var_name), + phi::errors::PreconditionNotMet( + "can not find var[%s] in scope", in_var_name)); + auto var = inner_scope->FindVar(in_var_name); + std::vector vec_tmp = {var}; + auto legacy_attr_name = op_normalizer.GetLegacyArgName(fluid_op_name, name); + runtime_ctx->outputs[legacy_attr_name] = vec_tmp; + } +} + +std::shared_ptr BuildOperatorBase( + ir::Operation* op, + const std::unordered_map& name_map, + const paddle::dialect::OpYamlInfoParser& op_yaml_info) { + paddle::framework::VariableNameMap in_name_map; + paddle::framework::VariableNameMap out_name_map; + paddle::framework::AttributeMap attr_map; + auto& vec_kernel_fn_tensor_params = op_yaml_info.TensorParams(true); + + auto& name2id = op_yaml_info.InputName2Id(); + + auto pd_op_name = + op->attributes().at("op_name").dyn_cast().AsString(); + auto fluid_op_name = pd_op_name.substr(3); // pd_op_name start with "pd.xxx" + + auto& op_normalizer = paddle::translator::OpNameNormalizer::instance(); + + for (auto& name : vec_kernel_fn_tensor_params) { + PADDLE_ENFORCE_EQ( + name2id.count(name), + true, + phi::errors::NotFound("param [%s] MUST in name2id map", name)); + auto index = op_yaml_info.InputName2Id().at(name); + ir::Value ptr = op->operand(index); + + auto in_var_name = name_map.at(ptr); + + auto legacy_attr_name = op_normalizer.GetLegacyArgName(fluid_op_name, name); + in_name_map[legacy_attr_name].push_back(in_var_name); + } + + // build attribute + + auto& output_name_list = op_yaml_info.OutputNames(); + for (size_t i = 0; i < output_name_list.size(); ++i) { + auto name = output_name_list[i]; + ir::Value ptr = op->result(i); + + auto out_var_name = name_map.at(ptr); + auto legacy_attr_name = op_normalizer.GetLegacyArgName(fluid_op_name, name); + out_name_map[legacy_attr_name].push_back(out_var_name); + } + + auto& op_info = paddle::framework::OpInfoMap::Instance().Get(fluid_op_name); + auto ptr = + op_info.Creator()(fluid_op_name, in_name_map, out_name_map, attr_map); + + std::shared_ptr res(ptr); + return res; +} + } // namespace ir diff --git a/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h b/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h index f053b045038..f3021ad4765 100644 --- a/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h +++ b/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h @@ -32,6 +32,7 @@ #include "paddle/fluid/framework/variable_helper.h" #include "paddle/phi/core/kernel_context.h" +#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/ir/dialect/kernel_attribute.h" #include "paddle/fluid/ir/dialect/kernel_type.h" #include "paddle/fluid/ir/dialect/pd_attribute.h" @@ -50,6 +51,19 @@ void BuildScope(const ir::Block& block, std::map* var_name_2_id, std::vector* variable_list); +void BuildRuntimeContext( + ir::Operation* op, + const std::unordered_map& name_map, + paddle::framework::Scope* scope, + paddle::framework::Scope* local_scope, + const paddle::dialect::OpYamlInfoParser& op_yaml_info, + paddle::framework::RuntimeContext* runtime_ctx); + +std::shared_ptr BuildOperatorBase( + ir::Operation* op, + const std::unordered_map& name_map, + const paddle::dialect::OpYamlInfoParser& op_yaml_info); + template op) const override { op->SetType("fused_softmax_mask_upper_triangle_grad"); - op->SetInput("Softmax", this->Output("Out")); + op->SetInput("Out", this->Output("Out")); op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); } diff --git a/paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cu b/paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cu index 32e7cffa498..779ee234071 100644 --- a/paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cu +++ b/paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cu @@ -479,7 +479,7 @@ class SoftmaxMaskFuseUpperTriangleGradKernel : public framework::OpKernel { context.Output(framework::GradVarName("X")); auto* grad_y = context.Input(framework::GradVarName("Out")); - auto* softmax_rst = context.Input("Softmax"); + auto* softmax_rst = context.Input("Out"); auto* grad_x_data = grad_x->mutable_data(context.GetPlace()); auto* grad_y_data = grad_y->data(); diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml index 246c14fc2d8..5ad37780f15 100755 --- a/paddle/phi/api/yaml/legacy_backward.yaml +++ b/paddle/phi/api/yaml/legacy_backward.yaml @@ -242,6 +242,16 @@ kernel : func : frobenius_norm_grad +- backward_op : fused_softmax_mask_upper_triangle_grad + forward : fused_softmax_mask_upper_triangle(Tensor X) -> Tensor(Out) + args: (Tensor Out, Tensor Out_grad) + output : Tensor(X_grad) + infer_meta : + func : UnchangedInferMeta + param : [Out_grad] + kernel: + func : fused_softmax_mask_upper_triangle_grad + - backward_op : hardswish_grad forward : hardswish (Tensor x) -> Tensor(out) args : (Tensor x, Tensor out_grad) diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml index c5feff0f22e..ea33c8461c8 100755 --- a/paddle/phi/api/yaml/legacy_ops.yaml +++ b/paddle/phi/api/yaml/legacy_ops.yaml @@ -396,6 +396,15 @@ optional : skip_update, master_params inplace : (params -> params_out), (moments1 -> moments1_out), (moments2 -> moments2_out), (beta1_pows -> beta1_pows_out), (beta2_pows -> beta2_pows_out), (master_params -> master_params_out) +- op : fused_softmax_mask_upper_triangle + args : (Tensor X) + output : Tensor(Out) + infer_meta : + func : UnchangedInferMeta + kernel: + func : fused_softmax_mask_upper_triangle + backward: fused_softmax_mask_upper_triangle_grad + - op : gaussian args : (IntArray shape, float mean, float std, int seed, DataType dtype, Place place={}) output: Tensor(out) diff --git a/test/cpp/ir/core/CMakeLists.txt b/test/cpp/ir/core/CMakeLists.txt index 4c3f1b7fb24..bac2ed3d252 100644 --- a/test/cpp/ir/core/CMakeLists.txt +++ b/test/cpp/ir/core/CMakeLists.txt @@ -30,6 +30,7 @@ cc_test_old( ir_exe_test.cc DEPS pd_op_to_kernel_pass + program_translator pd_dialect phi_kernel_adaptor ir diff --git a/test/cpp/ir/kernel_dialect/CMakeLists.txt b/test/cpp/ir/kernel_dialect/CMakeLists.txt index 1cc167f783c..0a9712c9bbd 100644 --- a/test/cpp/ir/kernel_dialect/CMakeLists.txt +++ b/test/cpp/ir/kernel_dialect/CMakeLists.txt @@ -4,6 +4,7 @@ cc_test_old( ir_kernel_dialect_pass_test.cc DEPS pd_op_to_kernel_pass + program_translator pd_dialect phi_kernel_adaptor pd_trait diff --git a/test/ir/new_ir/test_standalone_new_ir.py b/test/ir/new_ir/test_standalone_new_ir.py index 1adb8788be7..d804f8a67db 100644 --- a/test/ir/new_ir/test_standalone_new_ir.py +++ b/test/ir/new_ir/test_standalone_new_ir.py @@ -53,6 +53,7 @@ class TestCombineOp(unittest.TestCase): if paddle.is_compiled_with_cuda() else paddle.CPUPlace() ) + exe = paddle.static.Executor(place) main_program = paddle.static.Program() @@ -104,8 +105,8 @@ class TestFeedOp(unittest.TestCase): class TestSelectedRows(unittest.TestCase): def test_with_new_ir(self): - paddle.enable_static() # TODO(phlrain): support selected rows in GPU + paddle.enable_static() place = paddle.CPUPlace() exe = paddle.static.Executor(place) @@ -143,9 +144,11 @@ class TestAddGradOp(unittest.TestCase): x = paddle.static.data("x", [2, 2], dtype="float32") y = paddle.static.data("y", [2, 2], dtype="float32") x.stop_gradient = False + z = x * y paddle.static.gradients(z, x) + np_a = np.random.rand(2, 2).astype("float32") np_b = np.random.rand(2, 2).astype("float32") out = exe.run( @@ -220,6 +223,7 @@ class TestSplitOp(unittest.TestCase): if paddle.is_compiled_with_cuda() else paddle.CPUPlace() ) + exe = paddle.static.Executor(place) main_program = paddle.static.Program() @@ -245,6 +249,7 @@ class TestJitSaveOp(unittest.TestCase): linear = paddle.nn.Linear(10, 10) path = "example_model/linear" + paddle.jit.save( linear, path, -- GitLab