未验证 提交 b76c2f94 编写于 作者: H hong 提交者: GitHub

New ir support fluid op (#55693)

* new ir support save combine

* update

* polish code

* update

* new ir support fluid op

* remove depulicate op

* fix ir exe test compile error

* fix compile bug

* update

* code format

* update

* update

* polish code
上级 0b04b939
...@@ -1004,6 +1004,25 @@ void BuildOpFuncList( ...@@ -1004,6 +1004,25 @@ void BuildOpFuncList(
true, true,
"not found kernel for [%s]", "not found kernel for [%s]",
kernel_name); kernel_name);
if (kernel_name == "fused_softmax_mask_upper_triangle" ||
kernel_name == "fused_softmax_mask_upper_triangle_grad") {
// builder operator
op_func_node.operator_base_ =
ir::BuildOperatorBase((*it), value_2_name_map, op_yaml_info_parser);
paddle::framework::VariableValueMap in_map;
paddle::framework::VariableValueMap out_map;
op_func_node.runtime_ctx_ =
std::make_shared<paddle::framework::RuntimeContext>(
paddle::framework::RuntimeContext(in_map, out_map));
ir::BuildRuntimeContext((*it),
value_2_name_map,
scope,
local_scope,
op_yaml_info_parser,
op_func_node.runtime_ctx_.get());
op_func_node.fluid_op = true;
} else {
::ir::BuildPhiContext<phi::KernelContext, ::ir::BuildPhiContext<phi::KernelContext,
const phi::TensorBase*, const phi::TensorBase*,
phi::TensorBase*, phi::TensorBase*,
...@@ -1015,6 +1034,7 @@ void BuildOpFuncList( ...@@ -1015,6 +1034,7 @@ void BuildOpFuncList(
local_scope, local_scope,
op_yaml_info_parser, op_yaml_info_parser,
&(op_func_node.kernel_context_)); &(op_func_node.kernel_context_));
}
VLOG(6) << "finish process kernel context"; VLOG(6) << "finish process kernel context";
op_func_node.kernel_context_.SetDeviceContext( op_func_node.kernel_context_.SetDeviceContext(
......
...@@ -178,6 +178,9 @@ struct OpFuncNode { ...@@ -178,6 +178,9 @@ struct OpFuncNode {
phi::InferMetaContext infer_meta_context_; phi::InferMetaContext infer_meta_context_;
std::string phi_op_name_; std::string phi_op_name_;
paddle::dialect::InferMetaInterface::Concept* infer_meta_interface_{nullptr}; paddle::dialect::InferMetaInterface::Concept* infer_meta_interface_{nullptr};
bool fluid_op{false};
std::shared_ptr<RuntimeContext> runtime_ctx_{nullptr};
}; };
class Instruction { class Instruction {
......
...@@ -1043,7 +1043,18 @@ void NewIRInterpreter::RunInstruction(const Instruction& instr_node) { ...@@ -1043,7 +1043,18 @@ void NewIRInterpreter::RunInstruction(const Instruction& instr_node) {
&(op_func_node->infer_meta_context_)); &(op_func_node->infer_meta_context_));
} }
VLOG(5) << "after run infer meta"; VLOG(5) << "after run infer meta";
if (op_func_node->fluid_op) {
// run fluid op
ExecutionContext exe_ctx(*(op_func_node->operator_base_.get()),
*scope_,
*(op_func_node->dev_ctx_),
*(op_func_node->runtime_ctx_.get()));
(*(op_func_node->phi_kernel_))(&exe_ctx);
} else {
(*(op_func_node->phi_kernel_))(&(op_func_node->kernel_context_)); (*(op_func_node->phi_kernel_))(&(op_func_node->kernel_context_));
}
VLOG(5) << "after run kernel"; VLOG(5) << "after run kernel";
} else if (!instr_node.IsArtificial()) { } else if (!instr_node.IsArtificial()) {
RunOperator(instr_node); RunOperator(instr_node);
......
...@@ -65,6 +65,27 @@ ...@@ -65,6 +65,27 @@
inplace: null inplace: null
backward: null backward: null
- name: load_combine
inputs: []
attrs:
- {typename: str, name: file_path}
- {typename: bool, name: load_as_fp16}
- {typename: bool, name: model_from_memory}
outputs:
- {typename: 'Tensor[]', name: out, optional: true, intermediate: false}
no_need_buffer: null
data_transform: null
kernel:
func: [load_combine]
param: [file_path, load_as_fp16, model_from_memory]
backend: null
layout: null
data_type: null
dispatch: {fetch: null}
force_backend: null
inplace: null
backward: null
- name: share_buffer_ - name: share_buffer_
inputs: inputs:
- typename: Tensor[] - typename: Tensor[]
......
...@@ -4,4 +4,4 @@ file(GLOB PHI_KERNEL_ADAPTOR_SRCS "*.cc") ...@@ -4,4 +4,4 @@ file(GLOB PHI_KERNEL_ADAPTOR_SRCS "*.cc")
cc_library( cc_library(
phi_kernel_adaptor phi_kernel_adaptor
SRCS ${PHI_KERNEL_ADAPTOR_SRCS} SRCS ${PHI_KERNEL_ADAPTOR_SRCS}
DEPS ir phi_utils) DEPS program_translator ir phi_utils)
...@@ -37,9 +37,12 @@ ...@@ -37,9 +37,12 @@
#include "paddle/fluid/ir/dialect/kernel_type.h" #include "paddle/fluid/ir/dialect/kernel_type.h"
#include "paddle/fluid/ir/dialect/pd_attribute.h" #include "paddle/fluid/ir/dialect/pd_attribute.h"
#include "paddle/fluid/ir/interface/op_yaml_info_parser.h" #include "paddle/fluid/ir/interface/op_yaml_info_parser.h"
#include "paddle/fluid/ir_adaptor/translator/op_compat_info.h"
#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/enforce.h"
#include "glog/logging.h" #include "glog/logging.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/operator.h"
namespace ir { namespace ir {
...@@ -451,4 +454,116 @@ void BuildScope(const ir::Block& block, ...@@ -451,4 +454,116 @@ void BuildScope(const ir::Block& block,
const_cast<paddle::framework::Scope*>(inner_scope->root())); const_cast<paddle::framework::Scope*>(inner_scope->root()));
} }
void BuildRuntimeContext(
ir::Operation* op,
const std::unordered_map<ir::Value, std::string>& name_map,
paddle::framework::Scope* scope,
paddle::framework::Scope* local_scope,
const paddle::dialect::OpYamlInfoParser& op_yaml_info,
paddle::framework::RuntimeContext* runtime_ctx) {
paddle::framework::Scope* inner_scope =
local_scope != nullptr ? local_scope : scope;
VLOG(6) << "BuildPhiContext in scope[" << scope << "] inner_scope["
<< inner_scope << "]";
auto& vec_kernel_fn_tensor_params = op_yaml_info.TensorParams(true);
auto& name2id = op_yaml_info.InputName2Id();
auto pd_op_name =
op->attributes().at("op_name").dyn_cast<ir::StrAttribute>().AsString();
auto fluid_op_name = pd_op_name.substr(3); // pd_op_name start with "pd.xxx"
auto& op_normalizer = paddle::translator::OpNameNormalizer::instance();
for (auto& name : vec_kernel_fn_tensor_params) {
PADDLE_ENFORCE_EQ(
name2id.count(name),
true,
phi::errors::NotFound("param [%s] MUST in name2id map", name));
auto index = op_yaml_info.InputName2Id().at(name);
ir::Value ptr = op->operand(index);
auto in_var_name = name_map.at(ptr);
VLOG(6) << "ctx->EmplaceBackInput: " << name << "\t" << in_var_name;
PADDLE_ENFORCE_NOT_NULL(inner_scope->FindVar(in_var_name),
phi::errors::PreconditionNotMet(
"can not find var[%s] in scope", in_var_name));
auto var = inner_scope->FindVar(in_var_name);
std::vector<paddle::framework::Variable*> vec_tmp = {var};
auto legacy_attr_name = op_normalizer.GetLegacyArgName(fluid_op_name, name);
runtime_ctx->inputs[legacy_attr_name].push_back(var);
}
auto& output_name_list = op_yaml_info.OutputNames();
for (size_t i = 0; i < output_name_list.size(); ++i) {
auto name = output_name_list[i];
ir::Value ptr = op->result(i);
auto in_var_name = name_map.at(ptr);
VLOG(6) << "ctx->EmplaceBackInput: " << name << "\t" << in_var_name;
PADDLE_ENFORCE_NOT_NULL(inner_scope->FindVar(in_var_name),
phi::errors::PreconditionNotMet(
"can not find var[%s] in scope", in_var_name));
auto var = inner_scope->FindVar(in_var_name);
std::vector<paddle::framework::Variable*> vec_tmp = {var};
auto legacy_attr_name = op_normalizer.GetLegacyArgName(fluid_op_name, name);
runtime_ctx->outputs[legacy_attr_name] = vec_tmp;
}
}
std::shared_ptr<paddle::framework::OperatorBase> BuildOperatorBase(
ir::Operation* op,
const std::unordered_map<ir::Value, std::string>& name_map,
const paddle::dialect::OpYamlInfoParser& op_yaml_info) {
paddle::framework::VariableNameMap in_name_map;
paddle::framework::VariableNameMap out_name_map;
paddle::framework::AttributeMap attr_map;
auto& vec_kernel_fn_tensor_params = op_yaml_info.TensorParams(true);
auto& name2id = op_yaml_info.InputName2Id();
auto pd_op_name =
op->attributes().at("op_name").dyn_cast<ir::StrAttribute>().AsString();
auto fluid_op_name = pd_op_name.substr(3); // pd_op_name start with "pd.xxx"
auto& op_normalizer = paddle::translator::OpNameNormalizer::instance();
for (auto& name : vec_kernel_fn_tensor_params) {
PADDLE_ENFORCE_EQ(
name2id.count(name),
true,
phi::errors::NotFound("param [%s] MUST in name2id map", name));
auto index = op_yaml_info.InputName2Id().at(name);
ir::Value ptr = op->operand(index);
auto in_var_name = name_map.at(ptr);
auto legacy_attr_name = op_normalizer.GetLegacyArgName(fluid_op_name, name);
in_name_map[legacy_attr_name].push_back(in_var_name);
}
// build attribute
auto& output_name_list = op_yaml_info.OutputNames();
for (size_t i = 0; i < output_name_list.size(); ++i) {
auto name = output_name_list[i];
ir::Value ptr = op->result(i);
auto out_var_name = name_map.at(ptr);
auto legacy_attr_name = op_normalizer.GetLegacyArgName(fluid_op_name, name);
out_name_map[legacy_attr_name].push_back(out_var_name);
}
auto& op_info = paddle::framework::OpInfoMap::Instance().Get(fluid_op_name);
auto ptr =
op_info.Creator()(fluid_op_name, in_name_map, out_name_map, attr_map);
std::shared_ptr<paddle::framework::OperatorBase> res(ptr);
return res;
}
} // namespace ir } // namespace ir
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
#include "paddle/fluid/framework/variable_helper.h" #include "paddle/fluid/framework/variable_helper.h"
#include "paddle/phi/core/kernel_context.h" #include "paddle/phi/core/kernel_context.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/ir/dialect/kernel_attribute.h" #include "paddle/fluid/ir/dialect/kernel_attribute.h"
#include "paddle/fluid/ir/dialect/kernel_type.h" #include "paddle/fluid/ir/dialect/kernel_type.h"
#include "paddle/fluid/ir/dialect/pd_attribute.h" #include "paddle/fluid/ir/dialect/pd_attribute.h"
...@@ -50,6 +51,19 @@ void BuildScope(const ir::Block& block, ...@@ -50,6 +51,19 @@ void BuildScope(const ir::Block& block,
std::map<std::string, int>* var_name_2_id, std::map<std::string, int>* var_name_2_id,
std::vector<paddle::framework::Variable*>* variable_list); std::vector<paddle::framework::Variable*>* variable_list);
void BuildRuntimeContext(
ir::Operation* op,
const std::unordered_map<ir::Value, std::string>& name_map,
paddle::framework::Scope* scope,
paddle::framework::Scope* local_scope,
const paddle::dialect::OpYamlInfoParser& op_yaml_info,
paddle::framework::RuntimeContext* runtime_ctx);
std::shared_ptr<paddle::framework::OperatorBase> BuildOperatorBase(
ir::Operation* op,
const std::unordered_map<ir::Value, std::string>& name_map,
const paddle::dialect::OpYamlInfoParser& op_yaml_info);
template <typename Context, template <typename Context,
typename InType, typename InType,
typename OutType, typename OutType,
......
...@@ -84,7 +84,7 @@ class SoftmaxMaskFuseUpperTriangleGradOpMaker ...@@ -84,7 +84,7 @@ class SoftmaxMaskFuseUpperTriangleGradOpMaker
protected: protected:
void Apply(GradOpPtr<T> op) const override { void Apply(GradOpPtr<T> op) const override {
op->SetType("fused_softmax_mask_upper_triangle_grad"); op->SetType("fused_softmax_mask_upper_triangle_grad");
op->SetInput("Softmax", this->Output("Out")); op->SetInput("Out", this->Output("Out"));
op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
} }
......
...@@ -479,7 +479,7 @@ class SoftmaxMaskFuseUpperTriangleGradKernel : public framework::OpKernel<T> { ...@@ -479,7 +479,7 @@ class SoftmaxMaskFuseUpperTriangleGradKernel : public framework::OpKernel<T> {
context.Output<phi::DenseTensor>(framework::GradVarName("X")); context.Output<phi::DenseTensor>(framework::GradVarName("X"));
auto* grad_y = auto* grad_y =
context.Input<phi::DenseTensor>(framework::GradVarName("Out")); context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
auto* softmax_rst = context.Input<phi::DenseTensor>("Softmax"); auto* softmax_rst = context.Input<phi::DenseTensor>("Out");
auto* grad_x_data = grad_x->mutable_data<T>(context.GetPlace()); auto* grad_x_data = grad_x->mutable_data<T>(context.GetPlace());
auto* grad_y_data = grad_y->data<T>(); auto* grad_y_data = grad_y->data<T>();
......
...@@ -242,6 +242,16 @@ ...@@ -242,6 +242,16 @@
kernel : kernel :
func : frobenius_norm_grad func : frobenius_norm_grad
- backward_op : fused_softmax_mask_upper_triangle_grad
forward : fused_softmax_mask_upper_triangle(Tensor X) -> Tensor(Out)
args: (Tensor Out, Tensor Out_grad)
output : Tensor(X_grad)
infer_meta :
func : UnchangedInferMeta
param : [Out_grad]
kernel:
func : fused_softmax_mask_upper_triangle_grad
- backward_op : hardswish_grad - backward_op : hardswish_grad
forward : hardswish (Tensor x) -> Tensor(out) forward : hardswish (Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad) args : (Tensor x, Tensor out_grad)
......
...@@ -396,6 +396,15 @@ ...@@ -396,6 +396,15 @@
optional : skip_update, master_params optional : skip_update, master_params
inplace : (params -> params_out), (moments1 -> moments1_out), (moments2 -> moments2_out), (beta1_pows -> beta1_pows_out), (beta2_pows -> beta2_pows_out), (master_params -> master_params_out) inplace : (params -> params_out), (moments1 -> moments1_out), (moments2 -> moments2_out), (beta1_pows -> beta1_pows_out), (beta2_pows -> beta2_pows_out), (master_params -> master_params_out)
- op : fused_softmax_mask_upper_triangle
args : (Tensor X)
output : Tensor(Out)
infer_meta :
func : UnchangedInferMeta
kernel:
func : fused_softmax_mask_upper_triangle
backward: fused_softmax_mask_upper_triangle_grad
- op : gaussian - op : gaussian
args : (IntArray shape, float mean, float std, int seed, DataType dtype, Place place={}) args : (IntArray shape, float mean, float std, int seed, DataType dtype, Place place={})
output: Tensor(out) output: Tensor(out)
......
...@@ -30,6 +30,7 @@ cc_test_old( ...@@ -30,6 +30,7 @@ cc_test_old(
ir_exe_test.cc ir_exe_test.cc
DEPS DEPS
pd_op_to_kernel_pass pd_op_to_kernel_pass
program_translator
pd_dialect pd_dialect
phi_kernel_adaptor phi_kernel_adaptor
ir ir
......
...@@ -4,6 +4,7 @@ cc_test_old( ...@@ -4,6 +4,7 @@ cc_test_old(
ir_kernel_dialect_pass_test.cc ir_kernel_dialect_pass_test.cc
DEPS DEPS
pd_op_to_kernel_pass pd_op_to_kernel_pass
program_translator
pd_dialect pd_dialect
phi_kernel_adaptor phi_kernel_adaptor
pd_trait pd_trait
......
...@@ -53,6 +53,7 @@ class TestCombineOp(unittest.TestCase): ...@@ -53,6 +53,7 @@ class TestCombineOp(unittest.TestCase):
if paddle.is_compiled_with_cuda() if paddle.is_compiled_with_cuda()
else paddle.CPUPlace() else paddle.CPUPlace()
) )
exe = paddle.static.Executor(place) exe = paddle.static.Executor(place)
main_program = paddle.static.Program() main_program = paddle.static.Program()
...@@ -104,8 +105,8 @@ class TestFeedOp(unittest.TestCase): ...@@ -104,8 +105,8 @@ class TestFeedOp(unittest.TestCase):
class TestSelectedRows(unittest.TestCase): class TestSelectedRows(unittest.TestCase):
def test_with_new_ir(self): def test_with_new_ir(self):
paddle.enable_static()
# TODO(phlrain): support selected rows in GPU # TODO(phlrain): support selected rows in GPU
paddle.enable_static()
place = paddle.CPUPlace() place = paddle.CPUPlace()
exe = paddle.static.Executor(place) exe = paddle.static.Executor(place)
...@@ -143,9 +144,11 @@ class TestAddGradOp(unittest.TestCase): ...@@ -143,9 +144,11 @@ class TestAddGradOp(unittest.TestCase):
x = paddle.static.data("x", [2, 2], dtype="float32") x = paddle.static.data("x", [2, 2], dtype="float32")
y = paddle.static.data("y", [2, 2], dtype="float32") y = paddle.static.data("y", [2, 2], dtype="float32")
x.stop_gradient = False x.stop_gradient = False
z = x * y z = x * y
paddle.static.gradients(z, x) paddle.static.gradients(z, x)
np_a = np.random.rand(2, 2).astype("float32") np_a = np.random.rand(2, 2).astype("float32")
np_b = np.random.rand(2, 2).astype("float32") np_b = np.random.rand(2, 2).astype("float32")
out = exe.run( out = exe.run(
...@@ -220,6 +223,7 @@ class TestSplitOp(unittest.TestCase): ...@@ -220,6 +223,7 @@ class TestSplitOp(unittest.TestCase):
if paddle.is_compiled_with_cuda() if paddle.is_compiled_with_cuda()
else paddle.CPUPlace() else paddle.CPUPlace()
) )
exe = paddle.static.Executor(place) exe = paddle.static.Executor(place)
main_program = paddle.static.Program() main_program = paddle.static.Program()
...@@ -245,6 +249,7 @@ class TestJitSaveOp(unittest.TestCase): ...@@ -245,6 +249,7 @@ class TestJitSaveOp(unittest.TestCase):
linear = paddle.nn.Linear(10, 10) linear = paddle.nn.Linear(10, 10)
path = "example_model/linear" path = "example_model/linear"
paddle.jit.save( paddle.jit.save(
linear, linear,
path, path,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册