From 2be20e201ffb706d3295888a4fe2c8323eefbcd5 Mon Sep 17 00:00:00 2001
From: huzhiqiang <912790387@qq.com>
Date: Tue, 8 Feb 2022 20:26:52 -0600
Subject: [PATCH] convert paddle model to mlir paddle dialect (#39216)

---
 .gitignore                                    |   1 +
 paddle/fluid/operators/abs_op.cc              |   6 +-
 paddle/fluid/operators/angle_op.cc            |   6 +-
 paddle/fluid/operators/clip_op.cc             |   6 +-
 paddle/fluid/operators/flatten_op.cc          |   6 +-
 paddle/fluid/operators/mul_op.cc              |   3 +-
 paddle/fluid/operators/renorm_op.cc           |   8 +-
 paddle/fluid/operators/reshape_op.cc          |   3 +-
 paddle/fluid/operators/scale_op.cc            |   3 +-
 paddle/infrt/host_context/CMakeLists.txt      |   2 +
 paddle/infrt/host_context/paddle_mlir.cc      | 400 ++++++++++++++++++
 paddle/infrt/host_context/paddle_mlir.h       | 105 +++++
 .../host_context/paddle_mlir_converter.cc     |  56 +++
 ...rate_pd_op_dialect_from_paddle_op_maker.py |  40 +-
 14 files changed, 628 insertions(+), 17 deletions(-)
 create mode 100644 paddle/infrt/host_context/paddle_mlir.cc
 create mode 100644 paddle/infrt/host_context/paddle_mlir.h
 create mode 100644 paddle/infrt/host_context/paddle_mlir_converter.cc
diff --git a/.gitignore b/.gitignore
index ae61959a4bd..77fe7a9b4cd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -46,6 +46,7 @@ tools/__pycache__
 # This file is automatically generated.
 # TODO(zhiqiang) Move this file to build directory.
 paddle/infrt/dialect/pd_ops.td
+paddle/infrt/dialect/pd_ops_info.h
 .lit_test_times.txt
 paddle/infrt/tests/dialect/Output
 paddle/infrt/tests/lit.cfg.py
diff --git a/paddle/fluid/operators/abs_op.cc b/paddle/fluid/operators/abs_op.cc
index 796425a132b..6583ad0a495 100644
--- a/paddle/fluid/operators/abs_op.cc
+++ b/paddle/fluid/operators/abs_op.cc
@@ -47,11 +47,13 @@ class AbsOpMaker : public framework::OpProtoAndCheckerMaker {
     AddOutput("Out", "(Tensor), The output tensor of abs op.");
     AddAttr<bool>("use_mkldnn",
                   "(bool, default false) Only used in mkldnn kernel")
-        .SetDefault(false);
+        .SetDefault(false)
+        .AsExtra();
     AddAttr<bool>("use_cudnn",
                   "(bool, default false) Only used in cudnn kernel, need "
                   "install cudnn")
-        .SetDefault(false);
+        .SetDefault(false)
+        .AsExtra();
     AddComment(R"DOC(
 Abs Operator.
 
diff --git a/paddle/fluid/operators/angle_op.cc b/paddle/fluid/operators/angle_op.cc
index 3cb01486814..f8ec416c0a0 100644
--- a/paddle/fluid/operators/angle_op.cc
+++ b/paddle/fluid/operators/angle_op.cc
@@ -47,11 +47,13 @@ class AngleOpMaker : public framework::OpProtoAndCheckerMaker {
     AddOutput("Out", "(Tensor), The output tensor of angle op.");
     AddAttr<bool>("use_mkldnn",
                   "(bool, default false) Only used in mkldnn kernel")
-        .SetDefault(false);
+        .SetDefault(false)
+        .AsExtra();
     AddAttr<bool>("use_cudnn",
                   "(bool, default false) Only used in cudnn kernel, need "
                   "install cudnn")
-        .SetDefault(false);
+        .SetDefault(false)
+        .AsExtra();
     AddComment(R"DOC(
 Angle Operator.
 
diff --git a/paddle/fluid/operators/clip_op.cc b/paddle/fluid/operators/clip_op.cc
index 362f955ffc6..436d1edcedf 100644
--- a/paddle/fluid/operators/clip_op.cc
+++ b/paddle/fluid/operators/clip_op.cc
@@ -71,12 +71,14 @@ class ClipOpMaker : public framework::OpProtoAndCheckerMaker {
     AddAttr<AttrType>("max", "float number, the maximum value to clip by.");
     AddAttr<bool>("use_mkldnn",
                   "(bool, default false) Only used in mkldnn kernel")
-        .SetDefault(false);
+        .SetDefault(false)
+        .AsExtra();
     AddAttr<std::string>(
         "mkldnn_data_type",
         "(string, default \"float32\"). Data type of mkldnn kernel")
         .SetDefault("float32")
-        .InEnum({"float32", "bfloat16"});
+        .InEnum({"float32", "bfloat16"})
+        .AsExtra();
     AddComment(R"DOC(
 Clip Operator.
 
diff --git a/paddle/fluid/operators/flatten_op.cc b/paddle/fluid/operators/flatten_op.cc
index 110e6f1d025..03ee25accc6 100644
--- a/paddle/fluid/operators/flatten_op.cc
+++ b/paddle/fluid/operators/flatten_op.cc
@@ -103,12 +103,14 @@ class FlattenOpMaker : public framework::OpProtoAndCheckerMaker {
         .SetDefault(1);
     AddAttr<bool>("use_mkldnn",
                   "(bool, default false) Only used in mkldnn kernel")
-        .SetDefault(false);
+        .SetDefault(false)
+        .AsExtra();
     AddAttr<std::string>(
         "mkldnn_data_type",
         "(string, default \"float32\"). Data type of mkldnn kernel")
         .SetDefault("float32")
-        .InEnum({"float32", "bfloat16"});
+        .InEnum({"float32", "bfloat16"})
+        .AsExtra();
     AddComment(R"DOC(
 Flatten Operator
 
diff --git a/paddle/fluid/operators/mul_op.cc b/paddle/fluid/operators/mul_op.cc
index 691c394870a..01fd8604c63 100644
--- a/paddle/fluid/operators/mul_op.cc
+++ b/paddle/fluid/operators/mul_op.cc
@@ -136,7 +136,8 @@ class MulOpMaker : public framework::OpProtoAndCheckerMaker {
     AddOutput("Out", "(Tensor), The output tensor of mul op.");
     AddAttr<bool>("use_mkldnn",
                   "(bool, default false) Only used in mkldnn kernel")
-        .SetDefault(false);
+        .SetDefault(false)
+        .AsExtra();
     AddAttr<int>(
         "x_num_col_dims",
         R"DOC((int, default 1), The mul_op can take tensors with more than two
diff --git a/paddle/fluid/operators/renorm_op.cc b/paddle/fluid/operators/renorm_op.cc
index b15193e0e99..6196a2445b4 100644
--- a/paddle/fluid/operators/renorm_op.cc
+++ b/paddle/fluid/operators/renorm_op.cc
@@ -52,10 +52,12 @@ class RenormOpMaker : public framework::OpProtoAndCheckerMaker {
     AddAttr<bool>("use_cudnn",
                   "(bool, default false) Only used in cudnn kernel, need "
                   "install cudnn")
-        .SetDefault(false);
+        .SetDefault(false)
+        .AsExtra();
     AddAttr<bool>("use_mkldnn",
                   "(bool, default false) Only used in mkldnn kernel")
-        .SetDefault(false);
+        .SetDefault(false)
+        .AsExtra();
     AddComment(R"DOC(
 Renorm Operator.
 
@@ -114,4 +116,4 @@ REGISTER_OP_CPU_KERNEL(renorm, ops::CPURenormKernel<float>,
                        ops::CPURenormKernel<double>);
 
 REGISTER_OP_CPU_KERNEL(renorm_grad, ops::CPURenormGradKernel<float>,
-                       ops::CPURenormGradKernel<double>);
\ No newline at end of file
+                       ops::CPURenormGradKernel<double>);
diff --git a/paddle/fluid/operators/reshape_op.cc b/paddle/fluid/operators/reshape_op.cc
index 99e40de3080..43da63aae73 100644
--- a/paddle/fluid/operators/reshape_op.cc
+++ b/paddle/fluid/operators/reshape_op.cc
@@ -507,7 +507,8 @@ class Reshape2OpMaker : public ReshapeOpMaker {
         "mkldnn_data_type",
         "(string, default \"float32\"). Data type of mkldnn kernel")
         .SetDefault("float32")
-        .InEnum({"float32", "int8", "bfloat16"});
+        .InEnum({"float32", "int8", "bfloat16"})
+        .AsExtra();
   }
 };
 
diff --git a/paddle/fluid/operators/scale_op.cc b/paddle/fluid/operators/scale_op.cc
index ccf3afe29c7..30e3c978714 100644
--- a/paddle/fluid/operators/scale_op.cc
+++ b/paddle/fluid/operators/scale_op.cc
@@ -75,7 +75,8 @@ $$Out = scale*(X + bias)$$
         .SetDefault(true);
     AddAttr<bool>("use_mkldnn",
                   "(bool, default false) Only used in mkldnn kernel")
-        .SetDefault(false);
+        .SetDefault(false)
+        .AsExtra();
   }
 };
 
diff --git a/paddle/infrt/host_context/CMakeLists.txt b/paddle/infrt/host_context/CMakeLists.txt
index f5b4dac3408..11304742ecd 100644
--- a/paddle/infrt/host_context/CMakeLists.txt
+++ b/paddle/infrt/host_context/CMakeLists.txt
@@ -21,5 +21,7 @@ cc_test_tiny(test_infrt_op_executable SRCS op_executable_test.cc DEPS infrt ${ML
 cc_test_tiny(test_infrt_core_runtime SRCS core_runtime_test.cc DEPS infrt ${MLIR_IR_LIBS})
 cc_test_tiny(test_infrt_mlir_to_runtime_translate SRCS mlir_to_runtime_translate_test.cc DEPS infrt ${MLIR_IR_LIBS})
 
+add_executable(paddle-mlir-convert paddle_mlir.cc paddle_mlir_converter.cc)
+target_link_libraries(paddle-mlir-convert infrt ${MLIR_IR_LIBS})
 add_executable(infrtexec mlir_exec.cc)
 target_link_libraries(infrtexec infrt ${MLIR_IR_LIBS})
diff --git a/paddle/infrt/host_context/paddle_mlir.cc b/paddle/infrt/host_context/paddle_mlir.cc
new file mode 100644
index 00000000000..475e1e88168
--- /dev/null
+++ b/paddle/infrt/host_context/paddle_mlir.cc
@@ -0,0 +1,400 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/infrt/host_context/paddle_mlir.h"
+#include "paddle/infrt/dialect/pd_ops_info.h"
+
+MLIRModelGenImpl::MLIRModelGenImpl()
+    : context_(infrt::Global::getMLIRContext()), builder_(context_) {
+  context_->allowUnregisteredDialects();
+  context_->getOrLoadDialect<mlir::StandardOpsDialect>();
+  context_->getOrLoadDialect<infrt::dialect::INFRTDialect>();
+  context_->getOrLoadDialect<infrt::ts::TensorShapeDialect>();
+  context_->getOrLoadDialect<infrt::dt::DTDialect>();
+  context_->getOrLoadDialect<mlir::pd::PaddleDialect>();
+  module_ = mlir::ModuleOp::create(mlir::UnknownLoc::get(context_));
+}
+
+infrt::paddle::framework_proto::ProgramDesc MLIRModelGenImpl::ParsePaddleModel(
+    const std::string &model_file) {
+  infrt::paddle::framework_proto::ProgramDesc program_proto =
+      *infrt::paddle::LoadProgram(model_file);
+  return program_proto;
+}
+
+mlir::ModuleOp MLIRModelGenImpl::ImportPaddleModel(
+    const std::string &model_dir) {
+  infrt::paddle::framework_proto::ProgramDesc program_proto =
+      ParsePaddleModel(model_dir + "/__model__");
+  return ImportPaddleModel(program_proto);
+}
+
+mlir::ModuleOp MLIRModelGenImpl::ImportPaddleModel(
+    const std::string &model_file, const std::string &param_file) {
+  infrt::paddle::framework_proto::ProgramDesc program_proto =
+      ParsePaddleModel(model_file);
+  return ImportPaddleModel(program_proto);
+}
+
+mlir::ModuleOp MLIRModelGenImpl::ImportPaddleModel(
+    const infrt::paddle::framework_proto::ProgramDesc &program) {
+  main_block_ = program.blocks(0);
+  llvm::SmallVector<mlir::Type, 4> operandTypes = GetModelInputsType(program);
+  llvm::SmallVector<mlir::Type, 4> resultTypes = GetModelOutputsType(program);
+  mlir::FuncOp mainFunc = UpdateModelModule(operandTypes, resultTypes);
+  UpdateModelParams(program, &mainFunc);
+  UpdateModelOps(program);
+  UpdateModelOutputs(program);
+  return module_;
+}
+
+mlir::FuncOp MLIRModelGenImpl::UpdateModelModule(
+    llvm::SmallVector<mlir::Type, 4> operandTypes,
+    llvm::SmallVector<mlir::Type, 4> resultTypes) {
+  // create main op
+  const std::string &name = "main_graph";
+  auto mainFunc = mlir::FuncOp::create(
+      mlir::UnknownLoc::get(context_),
+      name,
+      /*type=*/builder_.getFunctionType({operandTypes}, {resultTypes}),
+      /*attrs=*/{});
+  module_.push_back(mainFunc);
+  mainFunc.addEntryBlock();
+  builder_.setInsertionPointToStart(&mainFunc.body().back());
+  return mainFunc;
+}
+
+llvm::SmallVector<mlir::Type, 4> MLIRModelGenImpl::GetModelInputsType(
+    const infrt::paddle::framework_proto::ProgramDesc &program) {
+  llvm::SmallVector<mlir::Type, 4> operandTypes;
+  operandTypes.push_back(infrt::dt::TensorMapType::get(context_));
+  for (auto &op_desc : main_block_.ops()) {
+    if (op_desc.type() != "feed") continue;
+    for (int var_idx = 0; var_idx < op_desc.outputs_size(); ++var_idx) {
+      // update input variables
+      auto &in = op_desc.outputs()[var_idx];
+      std::string input_var_name = in.arguments(0);
+      for (int i = 0; i < main_block_.vars_size(); i++) {
+        auto var_desc = main_block_.vars(i);
+        if (var_desc.name() == input_var_name) {
+          std::vector<int64_t> dims = RepeatedToVector<int64_t>(
+              var_desc.type().lod_tensor().tensor().dims());
+          mlir::Type precision_;
+          ConvertDataType(var_desc.type().lod_tensor().tensor().data_type(),
+                          builder_,
+                          &precision_);
+          mlir::Type type_ = mlir::RankedTensorType::get(dims, precision_);
+          operandTypes.push_back(type_);
+        }
+      }
+    }
+  }
+  return operandTypes;
+}
+
+llvm::SmallVector<mlir::Type, 4> MLIRModelGenImpl::GetModelOutputsType(
+    const infrt::paddle::framework_proto::ProgramDesc &program) {
+  llvm::SmallVector<mlir::Type, 4> resultTypes;
+  for (auto &op_desc : main_block_.ops()) {
+    if (op_desc.type() != "fetch") continue;
+    for (int var_idx = 0; var_idx < op_desc.inputs_size(); ++var_idx) {
+      auto &in = op_desc.inputs()[var_idx];
+      std::string input_var_name = in.arguments(0);
+      for (int i = 0; i < main_block_.vars_size(); i++) {
+        auto var_desc = main_block_.vars(i);
+        if (var_desc.name() == input_var_name) {
+          std::vector<int64_t> dims = RepeatedToVector<int64_t>(
+              var_desc.type().lod_tensor().tensor().dims());
+          mlir::Type precision_;
+          ConvertDataType(var_desc.type().lod_tensor().tensor().data_type(),
+                          builder_,
+                          &precision_);
+          mlir::Type type_ = mlir::RankedTensorType::get(dims, precision_);
+          resultTypes.push_back(type_);
+        }
+      }
+    }
+  }
+  return resultTypes;
+}
+
+void MLIRModelGenImpl::UpdateModelOps(
+    const infrt::paddle::framework_proto::ProgramDesc &program) {
+  for (auto &op_desc : main_block_.ops()) {
+    if (op_desc.type() == "feed" || op_desc.type() == "fetch") {
+      continue;
+    }
+    buildOperation(op_desc);
+  }
+}
+
+void MLIRModelGenImpl::UpdateModelParams(
+    const infrt::paddle::framework_proto::ProgramDesc &program,
+    mlir::FuncOp *mainFunc) {
+  // update input vars
+  for (auto &op_desc : main_block_.ops()) {
+    if (op_desc.type() == "feed") {
+      for (int var_idx = 0; var_idx < op_desc.outputs_size(); ++var_idx) {
+        // update input variables
+        auto &in = op_desc.outputs()[var_idx];
+        std::string input_var_name = in.arguments(0);
+        ::mlir::Value input_ = mainFunc->getArgument(1);
+        params_map_.insert(
+            std::pair<std::string, mlir::Value>(input_var_name, input_));
+      }
+    }
+  }
+
+  // update persistable tensors
+  ::mlir::Value map = mainFunc->getArgument(0);
+  for (int i = 0; i < main_block_.vars_size(); i++) {
+    auto var_desc = main_block_.vars(i);
+    if (params_map_.find(var_desc.name()) != params_map_.end()) continue;
+    if (var_desc.name() != "feed" && var_desc.name() != "fetch" &&
+        var_desc.persistable()) {
+      auto name = builder_.getStringAttr(var_desc.name());
+      std::vector<int64_t> dims = RepeatedToVector<int64_t>(
+          var_desc.type().lod_tensor().tensor().dims());
+      mlir::Type precision_;
+      ConvertDataType(var_desc.type().lod_tensor().tensor().data_type(),
+                      builder_,
+                      &precision_);
+      mlir::Type type_ = mlir::RankedTensorType::get(dims, precision_);
+      auto op = builder_.create<infrt::dt::GetParamOp>(
+          mlir::UnknownLoc::get(context_), type_, map, name);
+      params_map_.insert(std::pair<std::string, mlir::Value>(
+          var_desc.name(), op.getOperation()->getResult(0)));
+    }
+  }
+}
+
+void MLIRModelGenImpl::UpdateModelOutputs(
+    const infrt::paddle::framework_proto::ProgramDesc &program) {
+  // update outputs
+  for (auto &op_desc : main_block_.ops()) {
+    if (op_desc.type() == "fetch") {
+      for (int var_idx = 0; var_idx < op_desc.inputs_size(); ++var_idx) {
+        auto &in = op_desc.inputs()[var_idx];
+        // varibale name
+        std::string input_var_name = in.arguments(0);
+        // update model outpus
+        mlir::Location loc = mlir::UnknownLoc::get(context_);
+        llvm::SmallVector<mlir::Value, 4> operands;
+
+        operands.push_back((params_map_[input_var_name]));
+
+        llvm::SmallVector<mlir::Type, 4> resultTypes;
+        llvm::SmallVector<mlir::NamedAttribute, 4> attrs;
+        mlir::OperationState state(loc,
+                                   mlir::ReturnOp::getOperationName(),
+                                   operands,
+                                   resultTypes,
+                                   attrs);
+        builder_.createOperation(state);
+      }
+    }
+  }
+}
+
+void MLIRModelGenImpl::buildOperation(
+    const infrt::paddle::framework_proto::OpDesc &op_) {
+  const std::string &op_name = "pd." + op_.type();
+  mlir::Location loc = mlir::UnknownLoc::get(context_);
+
+  llvm::SmallVector<mlir::Value, 4> operands = GetOpInputValue(op_);
+  llvm::SmallVector<mlir::Type, 4> resultTypes = GetOpOutputType(op_);
+  llvm::SmallVector<mlir::NamedAttribute, 4> attrs = GetOpAttributes(op_);
+  mlir::OperationState result(loc, op_name, operands, resultTypes, attrs);
+  mlir::Operation *mlir_op_ = builder_.createOperation(result);
+  RegisterOpOutputVars(op_, mlir_op_);
+}
+
+llvm::SmallVector<mlir::Value, 4> MLIRModelGenImpl::GetOpInputValue(
+    const infrt::paddle::framework_proto::OpDesc &op_) {
+  llvm::SmallVector<mlir::Value, 4> operands;
+
+  std::vector<std::string> inputs_info = {};
+  if (pd_dialect_inputs_info_map_.count(op_.type()))
+    inputs_info = pd_dialect_inputs_info_map_.at(op_.type());
+
+  for (int var_idx = 0; var_idx < op_.inputs_size(); ++var_idx) {
+    auto &var = op_.inputs(var_idx);
+    if (!var.arguments().empty()) {
+      if (!std::count(inputs_info.begin(), inputs_info.end(), var.parameter()))
+        continue;
+      operands.push_back((params_map_[var.arguments()[0]]));
+    }
+  }
+  return operands;
+}
+
+llvm::SmallVector<mlir::Type, 4> MLIRModelGenImpl::GetOpOutputType(
+    const infrt::paddle::framework_proto::OpDesc &op_) {
+  llvm::SmallVector<mlir::Type, 4> resultTypes;
+
+  std::vector<std::string> pd_dialect_outputs_info = {};
+  if (pd_dialect_outputs_info_map_.count(op_.type()))
+    pd_dialect_outputs_info = pd_dialect_outputs_info_map_.at(op_.type());
+
+  // update op outputs info
+  for (int var_idx = 0; var_idx < op_.outputs_size(); ++var_idx) {
+    auto &var_name = op_.outputs(var_idx).arguments()[0];
+
+    if (!std::count(pd_dialect_outputs_info.begin(),
+                    pd_dialect_outputs_info.end(),
+                    op_.outputs(var_idx).parameter()))
+      continue;
+
+    // update persistable tensors
+    for (int i = 0; i < main_block_.vars_size(); i++) {
+      auto var_desc = main_block_.vars(i);
+      if (var_desc.name() == var_name) {
+        std::vector<int64_t> dims = RepeatedToVector<int64_t>(
+            var_desc.type().lod_tensor().tensor().dims());
+        mlir::Type precision_;
+        ConvertDataType(var_desc.type().lod_tensor().tensor().data_type(),
+                        builder_,
+                        &precision_);
+        mlir::Type type_ = mlir::RankedTensorType::get(dims, precision_);
+        resultTypes.push_back(type_);
+      }
+    }
+  }
+  return resultTypes;
+}
+
+llvm::SmallVector<mlir::NamedAttribute, 4> MLIRModelGenImpl::GetOpAttributes(
+    const infrt::paddle::framework_proto::OpDesc &op_) {
+  // GetInputVarName
+  llvm::SmallVector<mlir::NamedAttribute, 4> attrs;
+
+#define ATTR_IMPL_CASE(PROTO_TYPE, PROTO_TYPE_METHOD, MLIR_TYPE_METHOD) \
+  case infrt::paddle::framework_proto::AttrType::PROTO_TYPE: {          \
+    auto data = op_.attrs(attrs_num).PROTO_TYPE_METHOD();               \
+    auto value_ = builder_.MLIR_TYPE_METHOD(data);                      \
+    auto name_ = builder_.getStringAttr(attr_name_);                    \
+    auto attr_ = mlir::NamedAttribute(name_, value_);                   \
+    attrs.push_back(attr_);                                             \
+    break;                                                              \
+  }
+
+#define REPEATED_ATTR_IMPLE_CASE(                                       \
+    PROTO_TYPE, PROTO_TYPE_METHOD, MLIR_TYPE, MLIR_TYPE_METHOD)         \
+  case infrt::paddle::framework_proto::AttrType::PROTO_TYPE: {          \
+    std::vector<MLIR_TYPE> data;                                        \
+    for (const auto &var : op_.attrs(attrs_num).PROTO_TYPE_METHOD()) {  \
+      data.push_back(MLIR_TYPE(var));                                   \
+    }                                                                   \
+    auto value_ =                                                       \
+        builder_.MLIR_TYPE_METHOD(llvm::makeArrayRef<MLIR_TYPE>(data)); \
+    auto name_ = builder_.getStringAttr(attr_name_);                    \
+    auto attr_ = mlir::NamedAttribute(name_, value_);                   \
+    attrs.push_back(attr_);                                             \
+    break;                                                              \
+  }
+
+#define UNIMPLEMENTED_ATTR_IMPL_CASE(PROTO_TYPE)                        \
+  case infrt::paddle::framework_proto::AttrType::PROTO_TYPE: {          \
+    std::cout << "Unimplemented attr type: framework_proto::AttrType::" \
+              << #PROTO_TYPE << std::endl;                              \
+    break;                                                              \
+  }
+
+  // get registered attributes
+  const std::string &op_name = "pd." + op_.type();
+  mlir::RegisteredOperationName registered_op_name_ =
+      mlir::RegisteredOperationName::lookup(op_name, context_).getValue();
+  llvm::ArrayRef<mlir::StringAttr> attr_names_ =
+      registered_op_name_.getAttributeNames();
+  std::vector<mlir::StringAttr> attr_names_vec_ = attr_names_.vec();
+
+  // update attrs
+  for (int attrs_num = 0; attrs_num < op_.attrs_size(); attrs_num++) {
+    auto attr_name_ = op_.attrs(attrs_num).name();
+    auto type = op_.attrs(attrs_num).type();
+    if (!std::count(attr_names_vec_.begin(), attr_names_vec_.end(), attr_name_))
+      continue;
+    switch (type) {
+      ATTR_IMPL_CASE(FLOAT, f, getF32FloatAttr);
+      ATTR_IMPL_CASE(BOOLEAN, b, getBoolAttr);
+      ATTR_IMPL_CASE(INT, i, getI32IntegerAttr);
+      ATTR_IMPL_CASE(LONG, l, getI64IntegerAttr);
+      ATTR_IMPL_CASE(STRING, s, getStringAttr);
+
+      REPEATED_ATTR_IMPLE_CASE(
+          STRINGS, strings, llvm::StringRef, getStrArrayAttr);
+      REPEATED_ATTR_IMPLE_CASE(FLOATS, floats, float, getF32ArrayAttr);
+      REPEATED_ATTR_IMPLE_CASE(INTS, ints, int32_t, getI32ArrayAttr);
+      REPEATED_ATTR_IMPLE_CASE(LONGS, longs, int64_t, getI64ArrayAttr);
+
+      // Unimplemented attr type, will be supported later @DannyIsFunny
+      // bools attribute is not supported due to bug of llvm.
+      // REPEATED_ATTR_IMPLE_CASE(BOOLEANS, bools, bool, getBoolArrayAttr);
+      UNIMPLEMENTED_ATTR_IMPL_CASE(BOOLEANS);
+      UNIMPLEMENTED_ATTR_IMPL_CASE(BLOCK);
+      UNIMPLEMENTED_ATTR_IMPL_CASE(BLOCKS);
+      default:
+        std::cout << "error attribute" << attr_name_ << std::endl;
+    }
+  }
+  return attrs;
+}
+
+void MLIRModelGenImpl::RegisterOpOutputVars(
+    const infrt::paddle::framework_proto::OpDesc &op_,
+    mlir::Operation *mlir_op_) {
+  // op outputs
+  for (int var_idx = 0; var_idx < op_.outputs_size(); ++var_idx) {
+    auto &var_name = op_.outputs(var_idx).arguments()[0];
+    // output name
+    auto var_ = mlir_op_->getResult(var_idx);
+    params_map_.insert(std::pair<std::string, mlir::Value>(var_name, var_));
+  }
+}
+
+bool ConvertDataType(infrt::paddle::framework_proto::VarType::Type dtype,
+                     mlir::Builder builder,
+                     mlir::Type *type) {
+  switch (dtype) {
+    case infrt::paddle::framework_proto::VarType::Type::VarType_Type_FP16:
+      *type = builder.getF16Type();
+      return true;
+    case infrt::paddle::framework_proto::VarType::Type::VarType_Type_FP32:
+      *type = builder.getF32Type();
+      return true;
+    case infrt::paddle::framework_proto::VarType::Type::VarType_Type_FP64:
+      *type = builder.getF64Type();
+      return true;
+    case infrt::paddle::framework_proto::VarType::Type::VarType_Type_BOOL:
+      *type = builder.getIntegerType(1);
+      return true;
+    case infrt::paddle::framework_proto::VarType::Type::VarType_Type_INT8:
+      *type = builder.getIntegerType(8);
+      return true;
+    case infrt::paddle::framework_proto::VarType::Type::VarType_Type_INT16:
+      *type = builder.getIntegerType(16);
+      return true;
+    case infrt::paddle::framework_proto::VarType::Type::VarType_Type_INT32:
+      *type = builder.getIntegerType(32);
+      return true;
+    case infrt::paddle::framework_proto::VarType::Type::VarType_Type_INT64:
+      *type = builder.getIntegerType(64);
+      return true;
+    case infrt::paddle::framework_proto::VarType::Type::VarType_Type_UINT8:
+      *type = builder.getIntegerType(8, /*isSigned=*/false);
+      return true;
+    default:
+      return false;
+  }
+}
diff --git a/paddle/infrt/host_context/paddle_mlir.h b/paddle/infrt/host_context/paddle_mlir.h
new file mode 100644
index 00000000000..78dfefcfda2
--- /dev/null
+++ b/paddle/infrt/host_context/paddle_mlir.h
@@ -0,0 +1,105 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef PADDLE_INFRT_HOST_CONTEXT_PADDLE_MLIR_H_
+#define PADDLE_INFRT_HOST_CONTEXT_PADDLE_MLIR_H_
+
+#include <fstream>
+#include <iostream>
+#include <string>
+
+#include "llvm/Support/CommandLine.h"
+#include "mlir/Dialect/StandardOps/IR/Ops.h"
+#include "mlir/IR/AsmState.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/MLIRContext.h"
+#include "paddle/infrt/common/global.h"
+#include "paddle/infrt/common/string.h"
+#include "paddle/infrt/dialect/basic_kernels.h"
+#include "paddle/infrt/dialect/dense_tensor.h"
+#include "paddle/infrt/dialect/infrt_base.h"
+#include "paddle/infrt/dialect/init_infrt_dialects.h"
+#include "paddle/infrt/dialect/pd_ops.h"
+#include "paddle/infrt/dialect/tensor_shape.h"
+#include "paddle/infrt/paddle/model_parser.h"
+
+class MLIRModelGenImpl {
+ public:
+  MLIRModelGenImpl();
+  mlir::ModuleOp ImportPaddleModel(const std::string &model_file,
+                                   const std::string &param_file);
+  mlir::ModuleOp ImportPaddleModel(const std::string &model_dir);
+
+ private:
+  // parse paddle model file
+  infrt::paddle::framework_proto::ProgramDesc ParsePaddleModel(
+      const std::string &model_file);
+
+  // convert paddle model proto into paddle dialect module
+  mlir::ModuleOp ImportPaddleModel(
+      const infrt::paddle::framework_proto::ProgramDesc &program);
+
+  // get inputs and outputs info from program_desc
+  llvm::SmallVector<mlir::Type, 4> GetModelInputsType(
+      const infrt::paddle::framework_proto::ProgramDesc &program);
+  llvm::SmallVector<mlir::Type, 4> GetModelOutputsType(
+      const infrt::paddle::framework_proto::ProgramDesc &program);
+  // create main function module
+  mlir::FuncOp UpdateModelModule(llvm::SmallVector<mlir::Type, 4> operandTypes,
+                                 llvm::SmallVector<mlir::Type, 4> resultTypes);
+  // convert paddle ops into paddle dialect ops (in mlir form)
+  void UpdateModelOps(
+      const infrt::paddle::framework_proto::ProgramDesc &program);
+  // convert persistable params and inputs variable into mlir domain
+  void UpdateModelParams(
+      const infrt::paddle::framework_proto::ProgramDesc &program,
+      mlir::FuncOp *mainFunc);
+  // register model outpus into params_map_
+  void UpdateModelOutputs(
+      const infrt::paddle::framework_proto::ProgramDesc &program);
+
+  // method for converting proto::op into op in paddle dialect
+  void buildOperation(const infrt::paddle::framework_proto::OpDesc &op_);
+
+  llvm::SmallVector<mlir::Value, 4> GetOpInputValue(
+      const infrt::paddle::framework_proto::OpDesc &op_);
+  llvm::SmallVector<mlir::Type, 4> GetOpOutputType(
+      const infrt::paddle::framework_proto::OpDesc &op_);
+  llvm::SmallVector<mlir::NamedAttribute, 4> GetOpAttributes(
+      const infrt::paddle::framework_proto::OpDesc &op_);
+  void RegisterOpOutputVars(const infrt::paddle::framework_proto::OpDesc &op_,
+                            mlir::Operation *mlir_op_);
+
+  mlir::MLIRContext *context_;
+  mlir::OpBuilder builder_;
+  mlir::ModuleOp module_;
+  infrt::paddle::framework_proto::BlockDesc main_block_;
+
+  std::map<std::string, mlir::Value> params_map_;
+};
+
+// convert protobuf repeated to std::vector.
+template <typename T>
+inline std::vector<T> RepeatedToVector(
+    const google::protobuf::RepeatedField<T> &repeated_field) {
+  std::vector<T> ret;
+  ret.reserve(repeated_field.size());
+  std::copy(
+      repeated_field.begin(), repeated_field.end(), std::back_inserter(ret));
+  return ret;
+}
+// convert proto type to mlir type
+bool ConvertDataType(infrt::paddle::framework_proto::VarType::Type dtype,
+                     mlir::Builder builder,
+                     mlir::Type *type);
+#endif  // PADDLE_INFRT_HOST_CONTEXT_PADDLE_MLIR_H_
diff --git a/paddle/infrt/host_context/paddle_mlir_converter.cc b/paddle/infrt/host_context/paddle_mlir_converter.cc
new file mode 100644
index 00000000000..a2808a00cb6
--- /dev/null
+++ b/paddle/infrt/host_context/paddle_mlir_converter.cc
@@ -0,0 +1,56 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/infrt/host_context/paddle_mlir.h"
+
+void print_usage() {
+  std::cout << "Error inputs format, two kinds of inputs are supported:\n";
+  std::cout << "    [1] ./paddle-mlir-convert $path_to_model_file "
+               "$path_to_params_file\n";
+  std::cout << "    [2] ./paddle-mlir-convert $path_to_model_dir(__model__ + "
+               "params)\n";
+}
+
+bool parse_inputs(int argc,
+                  char** argv,
+                  std::string* model_file_name,
+                  std::string* params_file_name) {
+  switch (argc) {
+    case 1: {
+      print_usage();
+      return false;
+    }
+    case 2: {
+      *model_file_name = std::string(argv[1]) + std::string("/__model__");
+      *params_file_name = std::string(argv[1]) + std::string("/params");
+      return true;
+    }
+    case 3: {
+      *model_file_name = argv[1];
+      *params_file_name = argv[2];
+      return true;
+    }
+    default: { return false; }
+  }
+}
+
+int main(int argc, char** argv) {
+  std::string model_file_name;
+  std::string params_file_name;
+  if (parse_inputs(argc, argv, &model_file_name, &params_file_name)) {
+    MLIRModelGenImpl myGen;
+    auto module_ = myGen.ImportPaddleModel(model_file_name, params_file_name);
+    module_.dump();
+  }
+}
diff --git a/tools/infrt/generate_pd_op_dialect_from_paddle_op_maker.py b/tools/infrt/generate_pd_op_dialect_from_paddle_op_maker.py
index ed18e8666ae..f77ef86cc6c 100644
--- a/tools/infrt/generate_pd_op_dialect_from_paddle_op_maker.py
+++ b/tools/infrt/generate_pd_op_dialect_from_paddle_op_maker.py
@@ -24,6 +24,7 @@ def get_original_ops():
     all_ops, _, _ = core.op_supported_infos('CPU', core.VarDesc.VarType.FP16)
     grad_ops = []
     original_ops = []
+    necessary_ops = ["scale"]
 
     for op in all_ops:
         if op.endswith("_grad"):
@@ -33,6 +34,8 @@ def get_original_ops():
     for op in all_ops:
         if str(op + "_grad") in grad_ops:
             original_ops.append(op)
+        elif op in necessary_ops:
+            original_ops.append(op)
 
     print("Grad ops num: " + str(len(grad_ops)))
     print("Responded original ops num: " + str(len(original_ops)))
@@ -110,6 +113,7 @@ def get_all_ops_desc():
 # funtion to generate paddle op dialect file
 def convert_op_proto_into_mlir(op_descs):
     dst_dialect_file = "../../paddle/infrt/dialect/pd_ops.td"
+    dialect_info_file = "../../paddle/infrt/dialect/pd_ops_info.h"
     custom_dialect_file = "custom_pdop.td"
 
     # 1. Head files
@@ -144,12 +148,14 @@ def convert_op_proto_into_mlir(op_descs):
         "while", "conditional_block", "set_value", "run_program"
     ]
     skipped_attr_list = [
-        "trainable_statistics", "use_global_stats", "is_test", "use_mkldnn",
-        "use_cudnn"
+        "trainable_statistics", "use_global_stats", "is_test", "use_quantizer"
     ]
 
     original_ops_ = get_original_ops()
     automatically_generated_op_dialect = []
+    ops_inputs_map_ = {}
+    ops_outputs_map_ = {}
+
     for op_type, op_proto in op_descs.items():
         if (op_type in skipped_op_list) or (op_type not in original_ops_):
             continue
@@ -172,13 +178,16 @@ def convert_op_proto_into_mlir(op_descs):
         if (len(op_proto[INPUTS]) > 0 or len(op_proto[ATTRS]) > 0):
             ARGUMENTS = "  let arguments = (ins "
             # 2.3.1 inputs
+            ins_cache_list_ = []
             for input_ in op_proto[INPUTS]:
                 if op_proto[INPUTS][input_][EXTRA] != True and op_proto[INPUTS][
                         input_][INTERMEDIATE] != True:
+                    ins_cache_list_.append(input_)
                     if op_proto[INPUTS][input_][DUPLICABLE] != "true":
                         ARGUMENTS = ARGUMENTS + " PD_Tensor:$" + input_ + ","
                     else:
                         ARGUMENTS = ARGUMENTS + " PD_Tensor_Array:$" + input_ + ","
+            ops_inputs_map_[op_type] = ins_cache_list_
             # unsupported:   BLOCK = 8;  BLOCKS = 10;
             attr_mlir_converter = {
                 0: 'SI32Attr',
@@ -244,15 +253,17 @@ def convert_op_proto_into_mlir(op_descs):
         RESULTS = ""
         if (len(op_proto[OUTPUTS]) > 0):
             RESULTS = "\n  let results = (outs "
+            outs_cache_list_ = []
             for output_ in op_proto[OUTPUTS]:
                 if op_proto[OUTPUTS][output_][EXTRA] != True and op_proto[
                         OUTPUTS][output_][INTERMEDIATE] != True:
+                    outs_cache_list_.append(output_)
                     if op_proto[OUTPUTS][output_][DUPLICABLE] != "true":
                         RESULTS = RESULTS + "PD_Tensor:$" + output_ + ","
                     else:
                         RESULTS = RESULTS + "PD_Tensor_Array:$" + output_ + ","
                         print(HEAD + " PD_Tensor_Array:$" + output_ + ",")
-
+            ops_outputs_map_[op_type] = outs_cache_list_
             RESULTS = RESULTS[:-1] + ");\n"
         with open(dst_dialect_file, 'a') as ops_mlir_file:
             ops_mlir_file.write(HEAD)
@@ -267,6 +278,29 @@ def convert_op_proto_into_mlir(op_descs):
     print("Automatically generated op dialects num: " + str(
         len(automatically_generated_op_dialect)))
 
+    with open(dialect_info_file, 'w') as pd_ops_info_file:
+        pd_ops_info_file.write(
+            "#include<map>\n#include<string>\n#include<vector>\n")
+        pd_ops_info_file.write(
+            "const std::map<std::string, std::vector<std::string>> pd_dialect_inputs_info_map_ = {\n"
+        )
+        for data_ in ops_inputs_map_:
+            pd_ops_info_file.write("  {\"" + data_ + "\", {")
+            for var_ in ops_inputs_map_[data_]:
+                pd_ops_info_file.write("\"" + var_ + "\",")
+            pd_ops_info_file.write("}},\n")
+        pd_ops_info_file.write("};\n")
+
+        pd_ops_info_file.write(
+            "const std::map<std::string, std::vector<std::string>> pd_dialect_outputs_info_map_ = {\n"
+        )
+        for data_ in ops_outputs_map_:
+            pd_ops_info_file.write("  {\"" + data_ + "\", {")
+            for var_ in ops_outputs_map_[data_]:
+                pd_ops_info_file.write("\"" + var_ + "\",")
+            pd_ops_info_file.write("}},\n")
+        pd_ops_info_file.write("};\n")
+
     # 3. custom op dialect and end of file
     with open(dst_dialect_file, 'a') as ops_mlir_file:
         with open(custom_dialect_file, 'r') as custom_ops_file:
-- 
GitLab