[IR] refine program translator (#54719)

* refine program translator * fix warning: not override * fix bug * merge new modifications * modify by reviews * resolve conflicts * resolve conflicts * fix * fix * fix conflicts * add unittest for special op transcriber * set cpu as default backend * modify by reviews

[IR] refine program translator (#54719)
* refine program translator * fix warning: not override * fix bug * merge new modifications * modify by reviews * resolve conflicts * resolve conflicts * fix * fix * fix conflicts * add unittest for special op transcriber * set cpu as default backend * modify by reviews
5d40f2a2 · kangguangli · GitHub · 051e55c6 · 5d40f2a2 · 5d40f2a2
12 changed file
--- a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py
+++ b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py
@@ -1013,9 +1013,6 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase):
                        name in forward_outputs_position_map.keys()
                    ), AssertMessage(name, forward_outputs_position_map.keys())

-                if is_optional:
-                    set_tensor_wrappers = f"{indent}if({name}) grad_node->SetTensorWrapper{name}(*{name});"
-                else:
                set_tensor_wrappers = (
                    f"{indent}grad_node->SetTensorWrapper{name}({name});"
                )

--- a/paddle/fluid/ir/dialect/pd_op.yaml
+++ b/paddle/fluid/ir/dialect/pd_op.yaml
@@ -35,3 +35,128 @@
    force_backend: null
  inplace: null
  backward: null
+
+- name: share_buffer_
+  inputs:
+  - typename: Tensor[]
+    name: x
+    optional: false
+    no_need_buffer: false
+    data_transform: {}
+  attrs:
+  - {typename: 'bool[]', name: share_dims_and_dtype, default_value: '{}'}
+  outputs:
+  - {typename: 'Tensor[]', name: out, size: x.size(), optional: false, intermediate: false}
+  - {typename: 'Tensor[]', name: xout, size: x.size(), optional: false, intermediate: false}
+  no_need_buffer: null
+  data_transform: null
+  inplace: null
+  backward: null
+
+- name: assert
+  inputs:
+  - typename: Tensor
+    name: cond
+    optional: false
+    no_need_buffer: false
+    data_transform: {}
+  - typename: Tensor[]
+    name: data
+    optional: false
+    no_need_buffer: false
+    data_transform: {}
+  attrs:
+  - {typename: 'int64_t', name: summarize, default_value: '-1'}
+  outputs: []
+  no_need_buffer: null
+  data_transform: null
+  inplace: null
+  backward: null
+
+- name: print
+  inputs:
+  - typename: Tensor
+    name: in
+    optional: false
+    no_need_buffer: false
+    data_transform: {}
+  attrs:
+  - {typename: 'int', name: first_n}
+  - {typename: 'str', name: message}
+  - {typename: 'int', name: summarize}
+  - {typename: 'bool', name: print_tensor_name, default_value: 'true'}
+  - {typename: 'bool', name: print_tensor_type, default_value: 'true'}
+  - {typename: 'bool', name: print_tensor_shape, default_value: 'true'}
+  - {typename: 'bool', name: print_tensor_layout, default_value: 'true'}
+  - {typename: 'bool', name: print_tensor_lod, default_value: 'true'}
+  - {typename: 'str', name: print_phase, default_value: 'BOTH'}
+  - {typename: 'bool', name: is_forward, default_value: 'true'}
+  outputs:
+    - typename: Tensor
+      name: out
+      optional: false
+      no_need_buffer: false
+      data_transform: {}
+  no_need_buffer: null
+  data_transform: null
+  inplace: null
+  backward: null
+
+- name: add_n_
+  inputs:
+  - typename: Tensor[]
+    name: inputs
+    optional: false
+    no_need_buffer: false
+    data_transform: {}
+  attrs: []
+  outputs:
+  - {typename: Tensor, name: out, optional: false, intermediate: false}
+  no_need_buffer: null
+  data_transform: null
+  invoke: {func: add_n_impl, args: inputs}
+  backward: add_n_grad
+
+- name: write_to_array
+  inputs:
+  - typename: Tensor
+    name: i
+    optional: false
+    no_need_buffer: false
+    data_transform: {}
+  - typename: Tensor
+    name: x
+    optional: false
+    no_need_buffer: false
+    data_transform: {}
+  attrs: []
+  outputs:
+  - {typename: 'Tensor[]', name: out, optional: false, intermediate: false}
+  no_need_buffer: null
+  data_transform: null
+  backward: write_to_array_grad
+
+- name: lod_array_length
+  inputs:
+  - typename: Tensor[]
+    name: x
+    optional: false
+    no_need_buffer: false
+    data_transform: {}
+  attrs: []
+  outputs:
+  - {typename: 'Tensor', name: out, optional: false, intermediate: false}
+  no_need_buffer: null
+  data_transform: null
+
+- name: py_func_
+  inputs:
+  -  {typename: 'Tensor', name: x, optional: false, no_need_buffer: false, data_transform: {}}
+  attrs:
+  - {typename: 'int', name: forward_callable_id, default_value: '0'}
+  - {typename: 'int', name: backward_callable_id, default_value: '-1'}
+  - {typename: 'str[]', name: backward_skip_vars, default_value: '{}'}
+  outputs:
+  - {typename: 'Tensor', name: out, optional: false, intermediate: false}
+  no_need_buffer: null
+  data_transform: null
--- a/paddle/fluid/ir/transforms/pd_op_to_kernel_pass.cc
+++ b/paddle/fluid/ir/transforms/pd_op_to_kernel_pass.cc
@@ -173,6 +173,10 @@ phi::KernelKey GetKernelKey(
    }
  }

+  if (kernel_backend == phi::Backend::UNDEFINED) {
+    kernel_backend = paddle::experimental::ParseBackend(place);
+  }
+
  phi::KernelKey res(kernel_backend, kernel_layout, kernel_data_type);
  return res;
 }

--- a/paddle/fluid/ir_adaptor/translator/attribute_translator.cc
+++ b/paddle/fluid/ir_adaptor/translator/attribute_translator.cc
@@ -42,6 +42,11 @@ class AttributeVisitor {
    return ir::Int32Attribute::get(ctx, i);
  }

+  virtual ir::Attribute operator()(int64_t i) {
+    VLOG(10) << "translating int";
+    return ir::Int64Attribute::get(ctx, i);
+  }
+
  virtual ir::Attribute operator()(float f) {
    VLOG(10) << "translating float";
    return ir::FloatAttribute::get(ctx, f);
@@ -146,6 +151,21 @@ class AttributeVisitor {
  }
 };

+class Int64ArrayAttributeVisitor : public AttributeVisitor {
+ public:
+  using AttributeVisitor::AttributeVisitor;
+
+  ir::Attribute operator()(const std::vector<int>& is) override {
+    VLOG(10) << "translating vector<int64>";
+    std::vector<ir::Attribute> attrs;
+    attrs.reserve(is.size());
+    for (const auto& v : is) {
+      attrs.push_back(ir::Int64Attribute::get(ctx, v));
+    }
+    return ir::ArrayAttribute::get(ctx, attrs);
+  }
+};
+
 class IntArrayAttributeVisitor : public AttributeVisitor {
 public:
  using AttributeVisitor::AttributeVisitor;
@@ -171,6 +191,11 @@ class DataTypeAttributeVisitor : public AttributeVisitor {
    auto phi_dtype = phi::TransToPhiDataType(i);
    return paddle::dialect::DataTypeAttribute::get(ctx, phi_dtype);
  }
+
+  ir::Attribute operator()(const paddle::blank& blank) override {
+    VLOG(10) << "translating paddle::blank to DataType::UNDEFINED";
+    return paddle::dialect::DataTypeAttribute::get(ctx, phi::DataType());
+  }
 };

 class PlaceAttributeVisitor : public AttributeVisitor {
@@ -178,8 +203,8 @@ class PlaceAttributeVisitor : public AttributeVisitor {
  using AttributeVisitor::AttributeVisitor;

  ir::Attribute operator()(const paddle::blank& blank) override {
-    VLOG(10) << "translating paddle::blank";
-    phi::Place data(phi::AllocationType::CPU);
+    VLOG(10) << "translating paddle::blank to Place::UNDEFINED";
+    phi::Place data(phi::AllocationType::UNDEFINED);
    return paddle::dialect::PlaceAttribute::get(ctx, data);
  }
 };
@@ -192,6 +217,8 @@ AttributeTranslator::AttributeTranslator() {
      new DataTypeAttributeVisitor();
  special_visitors["paddle::dialect::PlaceAttribute"] =
      new PlaceAttributeVisitor();
+  special_visitors["ir::ArrayAttribute<ir::Int64Attribute>"] =
+      new Int64ArrayAttributeVisitor();
 }

 ir::Attribute AttributeTranslator::operator()(

--- a/paddle/fluid/ir_adaptor/translator/op_translator.cc
+++ b/paddle/fluid/ir_adaptor/translator/op_translator.cc
@@ -25,6 +25,7 @@
 #include "paddle/fluid/framework/op_desc.h"
 #include "paddle/fluid/ir/dialect/pd_attribute.h"
 #include "paddle/fluid/ir/dialect/pd_type.h"
+#include "paddle/fluid/ir/dialect/utils.h"
 #include "paddle/fluid/ir/interface/op_yaml_info.h"
 #include "paddle/fluid/ir_adaptor/translator/attribute_translator.h"
 #include "paddle/fluid/ir_adaptor/translator/op_compat_info.h"
@@ -53,14 +54,20 @@ using BlockDesc = paddle::framework::BlockDesc;
 using VarDesc = paddle::framework::VarDesc;
 using OpOutputTypeList = std::vector<ir::Type>;
 using OpOutputMapping = std::unordered_map<std::string, ResultIdx>;
-using OpInputInfo = paddle::dialect::OpInputInfo;
-using OpInputInfoList = std::vector<paddle::dialect::OpInputInfo>;
-using OpAttributeInfo = paddle::dialect::OpAttributeInfo;
-using OpAttributeInfoList = std::vector<paddle::dialect::OpAttributeInfo>;
-using OpOutputInfo = paddle::dialect::OpOutputInfo;
-using OpOutputInfoList = std::vector<paddle::dialect::OpOutputInfo>;
-
-static const char kTargetDialectPrefix[] = "pd.";
+using OpInputInfo = dialect::OpInputInfo;
+using OpInputInfoList = std::vector<dialect::OpInputInfo>;
+using OpAttributeInfo = dialect::OpAttributeInfo;
+using OpAttributeInfoList = std::vector<dialect::OpAttributeInfo>;
+using OpOutputInfo = dialect::OpOutputInfo;
+using OpOutputInfoList = std::vector<dialect::OpOutputInfo>;
+using InputHandleFn = std::function<ir::OpResult(ir::IrContext*,
+                                                 TranslationContext*,
+                                                 const OpDesc&,
+                                                 const std::string&,
+                                                 const OpInputInfo&,
+                                                 ir::Program*)>;
+constexpr char kTargetDialectPrefix[] = "pd.";
+constexpr char kEmptyVarName[] = "@EMPTY@";

 static const std::unordered_set<std::string> special_inplace_ops = {
    "batch_norm",
@@ -100,29 +107,11 @@ inline bool IsInplace(const OpDesc& op_desc) {
  return inplace;
 }

-inline std::string OpNamecompatibleMapping(std::string op_name) {
+inline std::string OpNameCompatibleMapping(std::string op_name) {
  auto& op_normalizer = OpNameNormalizer::instance();
  return op_normalizer[op_name];
 }

-inline ir::OpInfo LoopkUpOpInfo(ir::IrContext* ctx, const OpDesc& op_desc) {
-  std::string target_op_name =
-      kTargetDialectPrefix + OpNamecompatibleMapping(op_desc.Type());
-  if (IsInplace(op_desc)) {
-    target_op_name += "_";
-  }
-  VLOG(6) << "[op name normalizing: " << op_desc.Type() << " to "
-          << target_op_name;
-  auto op_info = ctx->GetRegisteredOpInfo(target_op_name);
-  if (!op_info) {
-    IR_THROW("Op %d should have corresponding OpInfo %d",
-             op_desc.Type(),
-             target_op_name);
-  }
-
-  return op_info;
-}
-
 inline ir::Operation* InsertSliceOperationForTarget(
    ir::IrContext* ctx,
    TranslationContext* param_map,
@@ -191,7 +180,7 @@ inline ir::Operation* InsertFullOperationForAttributeInput(ir::IrContext* ctx,
    dtype = phi::DataType::BOOL;
  }
  ir::Builder builder(ctx, program->block());
-  paddle::dialect::FullOp full_op = builder.Build<paddle::dialect::FullOp>(
+  dialect::FullOp full_op = builder.Build<dialect::FullOp>(
      std::vector<int64_t>{1}, data, dtype, phi::CPUPlace());

  return full_op.operation();
@@ -199,16 +188,15 @@ inline ir::Operation* InsertFullOperationForAttributeInput(ir::IrContext* ctx,

 inline ir::Operation* InsertFullArrayOperationForAttributeInput(
    ir::IrContext* ctx, ir::Program* program, ir::Attribute attr) {
-  IR_ENFORCE(attr.isa<paddle::dialect::IntArrayAttribute>(),
+  IR_ENFORCE(attr.isa<dialect::IntArrayAttribute>(),
             "Encounter non IntArray type when trying to insert IntArray "
             "mutable attribute");

-  phi::IntArray int_array =
-      attr.dyn_cast<paddle::dialect::IntArrayAttribute>().data();
+  phi::IntArray int_array = attr.dyn_cast<dialect::IntArrayAttribute>().data();

  ir::Builder builder(ctx, program->block());
-  paddle::dialect::FullIntArrayOp full_int_array_op =
-      builder.Build<paddle::dialect::FullIntArrayOp>(
+  dialect::FullIntArrayOp full_int_array_op =
+      builder.Build<dialect::FullIntArrayOp>(
          int_array.GetData(), phi::DataType::INT64, phi::CPUPlace());
  return full_int_array_op.operation();
 }
@@ -247,13 +235,85 @@ inline ir::OpResult GetAttributeAsInput(ir::IrContext* ctx,
  return defining_op->result(0);
 }

-inline std::vector<ir::OpResult> GenerateOperationInput(
+}  // namespace
+
+/// @brief This class is used to translate a OpDesc, it's a functor class and
+/// should have no non-static data member, since we expected it's stateless.
+struct OpTranscriber {
+ public:
+  virtual ~OpTranscriber() = default;
+
+ public:
+  virtual ir::Operation* operator()(ir::IrContext* ctx,
+                                    TranslationContext* param_map,
+                                    const OpDesc& op_desc,
+                                    ir::Program* program);
+
+ public:
+  virtual ir::OpInfo LoopkUpOpInfo(ir::IrContext* ctx, const OpDesc& op_desc);
+  virtual std::vector<ir::OpResult> GenerateOperationInput(
+      ir::IrContext* ctx,
+      TranslationContext* param_map,
+      const OpDesc& op_desc,
+      const std::string& normalized_op_name,
+      const OpInputInfoList& input_infos,
+      ir::Program* program);
+  virtual std::tuple<OpOutputTypeList, OpOutputMapping> GenerateOperationOutput(
+      ir::IrContext* ctx,
+      const OpDesc& op_desc,
+      const OpOutputInfoList& output_infos);
+  virtual void HandleNonexistentAttribute(ir::IrContext*,
+                                          ir::AttributeMap* attribute_map,
+                                          const OpAttributeInfo& info) {
+    auto& attribute_translator = AttributeTranslator::instance();
+    (*attribute_map)[info.name] =
+        attribute_translator(info.type_name, paddle::framework::Attribute());
+  }
+  virtual ir::AttributeMap TranslateOpAttribute(
+      ir::IrContext* ctx,
+      const std::string& normalized_op_name,
+      const OpAttributeInfoList& op_attr_infos,
+      const OpDesc& op_desc);
+
+  virtual void RecordOpResultMapping(TranslationContext* param_map,
+                                     const OpDesc& op_desc,
+                                     ir::Operation* operation,
+                                     const OpOutputMapping& arg_to_idx);
+
+ public:
+  virtual InputHandleFn GetSpecialInputHandlers(std::string input_name) {
+    return nullptr;
+  }
+};
+
+ir::OpInfo OpTranscriber::LoopkUpOpInfo(ir::IrContext* ctx,
+                                        const OpDesc& op_desc) {
+  std::string target_op_name =
+      kTargetDialectPrefix + OpNameCompatibleMapping(op_desc.Type());
+  if (IsInplace(op_desc)) {
+    target_op_name += "_";
+  }
+  VLOG(6) << "[op name normalizing: " << op_desc.Type() << " to "
+          << target_op_name;
+  auto op_info = ctx->GetRegisteredOpInfo(target_op_name);
+  if (!op_info) {
+    IR_THROW("Op %d should have corresponding OpInfo %d",
+             op_desc.Type(),
+             target_op_name);
+  }
+
+  return op_info;
+}
+
+std::vector<ir::OpResult> OpTranscriber::GenerateOperationInput(
    ir::IrContext* ctx,
    TranslationContext* param_map,
-    ir::Program* program,
    const OpDesc& op_desc,
    const std::string& normalized_op_name,
-    const OpInputInfoList& input_infos) {
+    const OpInputInfoList& input_infos,
+    ir::Program* program) {
+  VLOG(10) << "[op:" << op_desc.Type() << "][input] entrance";
+
  // scan all inputs to see if any of them is generated as a vector<Tensor>
  // so need an additional `SliceOp` to take it out.
  for (const auto& n : op_desc.Inputs()) {
@@ -274,12 +334,21 @@ inline std::vector<ir::OpResult> GenerateOperationInput(
    }
  }

+  VLOG(10) << "[op:" << op_desc.Type() << "][input] start";
+
  std::vector<ir::OpResult> op_inputs;
  auto& op_normalizer = OpNameNormalizer::instance();
  const auto* mutable_attributes =
      op_normalizer.GetMutableAttributes(op_desc.Type());

  for (const auto& info : input_infos) {
+    if (auto special_handler = this->GetSpecialInputHandlers(info.name)) {
+      ir::OpResult ret = special_handler(
+          ctx, param_map, op_desc, normalized_op_name, info, program);
+      op_inputs.push_back(ret);
+      continue;
+    }
+
    std::string legacy_input_name =
        op_normalizer.GetLegacyArgName(op_desc.Type(), info.name);

@@ -332,6 +401,17 @@ inline std::vector<ir::OpResult> GenerateOperationInput(
    VLOG(10) << "[op:" << op_desc.Type() << "][input]" << info.name << " "
             << is_vector << " " << info.type_name;

+    // Specially process TensorArray, this because we cannot distinguish it with
+    // Vector<DenseTensor> by other conditions but we cannot support it like
+    // Vector<DenseTensor>
+    if (legacy_input_vars.size() == 1) {
+      VarDesc* var = op_desc.Block()->FindVarRecursive(legacy_input_vars[0]);
+      if (var->GetType() ==
+          paddle::framework::proto::VarType::LOD_TENSOR_ARRAY) {
+        is_vector = false;
+      }
+    }
+
    // if src type is Tensor
    if (!is_vector) {
      auto defining_info = (*param_map)[legacy_input_vars[0]];
@@ -349,8 +429,8 @@ inline std::vector<ir::OpResult> GenerateOperationInput(
  return op_inputs;
 }

-inline std::tuple<OpOutputTypeList, OpOutputMapping> GenerateOperationOutput(
-    ir::IrContext* ctx,
+std::tuple<OpOutputTypeList, OpOutputMapping>
+OpTranscriber::GenerateOperationOutput(ir::IrContext* ctx,
                                       const OpDesc& op_desc,
                                       const OpOutputInfoList& output_infos) {
  OpOutputMapping arg_to_idx;
@@ -367,7 +447,6 @@ inline std::tuple<OpOutputTypeList, OpOutputMapping> GenerateOperationOutput(
        op_normalizer.GetLegacyArgName(op_desc.Type(), info.name);

    // return empty type if this arg is optional and not shown in OpDesc
-    // TODO(lyk): HasOutput doesnot consider variadic attribute
    if (!op_desc.HasOutput(legacy_output_name)) {
      VLOG(10) << "[output translating]"
               << "[" << op_desc.Type() << "] optional " << info.name << " :"
@@ -380,14 +459,37 @@ inline std::tuple<OpOutputTypeList, OpOutputMapping> GenerateOperationOutput(
      continue;
    }

-    const auto& legacy_output_vars = op_desc.Output(legacy_output_name);
+    const auto& origin_legacy_output_vars = op_desc.Output(legacy_output_name);
+    std::vector<std::string> legacy_output_vars;
+    std::copy_if(
+        origin_legacy_output_vars.begin(),
+        origin_legacy_output_vars.end(),
+        std::back_inserter(legacy_output_vars),
+        [](const auto& var_name) { return var_name != kEmptyVarName; });
+
    bool is_vector = (info.type_name.find("VectorType") != std::string::npos);

+    // Specially process TensorArray, this because we cannot distinguish it with
+    // Vector<DenseTensor> by other conditions but we cannot support it like
+    // Vector<DenseTensor>
+    if (legacy_output_vars.size() == 1) {
+      VarDesc* var = block->FindVarRecursive(legacy_output_vars[0]);
+      if (var->GetType() ==
+          paddle::framework::proto::VarType::LOD_TENSOR_ARRAY) {
+        ir::Type translated_var_type =
+            type_translator[var->GetType()](ctx, *var);
+        op_output_types.push_back(translated_var_type);
+        arg_to_idx[var->Name()] = cur_output_idx;
+        continue;
+      }
+    }
+
    // if src type is Tensor
    if (!is_vector) {
      VLOG(10) << "[output translating]"
               << "[" << op_desc.Type() << "]" << info.name << " :"
-               << info.type_name << " " << legacy_output_name;
+               << info.type_name << " " << legacy_output_name << " "
+               << legacy_output_vars.size();
      if (legacy_output_vars.size() == 0) {
        op_output_types.push_back(ir::Type(nullptr));
        continue;
@@ -427,8 +529,9 @@ inline std::tuple<OpOutputTypeList, OpOutputMapping> GenerateOperationOutput(
  return {op_output_types, arg_to_idx};
 }

-inline ir::AttributeMap TranslateOpAttribute(
-    std::string normalized_op_name,
+ir::AttributeMap OpTranscriber::TranslateOpAttribute(
+    ir::IrContext* ctx,
+    const std::string& normalized_op_name,
    const OpAttributeInfoList& op_attr_infos,
    const OpDesc& op_desc) {
  auto& attribute_translator = AttributeTranslator::instance();
@@ -439,27 +542,29 @@ inline ir::AttributeMap TranslateOpAttribute(
    auto legacy_attr_name =
        op_normalizer.GetLegacyAttrName(op_desc.Type(), info.name);

-    paddle::framework::Attribute legacy_attr;
    if (op_desc.HasAttr(legacy_attr_name)) {
-      legacy_attr = op_desc.GetAttr(legacy_attr_name);
-    }
+      paddle::framework::Attribute legacy_attr =
+          op_desc.GetAttr(legacy_attr_name);
      VLOG(10) << "attribute in " << op_desc.Type()
               << " name: " << legacy_attr_name << " " << legacy_attr.index();
-    ir::Attribute new_attr = attribute_translator(info.type_name, legacy_attr);
+      ir::Attribute new_attr =
+          attribute_translator(info.type_name, legacy_attr);
      attribute_map[info.name] = new_attr;
      if (!new_attr) {
        VLOG(0) << "empty attribute in " << op_desc.Type()
                << " name: " << info.name;
+      }
    } else {
-      VLOG(10) << "new attribute in " << op_desc.Type()
-               << " name: " << info.name << " " << new_attr.storage();
+      VLOG(10) << "attribute in " << op_desc.Type()
+               << " name: " << legacy_attr_name << " doesn't exist";
+      this->HandleNonexistentAttribute(ctx, &attribute_map, info);
    }
  }

  return attribute_map;
 }

-inline void RecordOpResultMapping(TranslationContext* param_map,
+void OpTranscriber::RecordOpResultMapping(TranslationContext* param_map,
                                          const OpDesc& op_desc,
                                          ir::Operation* operation,
                                          const OpOutputMapping& arg_to_idx) {
@@ -470,12 +575,32 @@ inline void RecordOpResultMapping(TranslationContext* param_map,
    auto& args = n.second;
    size_t idx_in_vector = 0;
    for (const auto& arg_name : args) {
-      auto idx = arg_to_idx.at(arg_name);
+      if (arg_name == kEmptyVarName) {
+        continue;
+      }
+      auto idx_iter = arg_to_idx.find(arg_name);
+      if (idx_iter == arg_to_idx.end()) {
+        VLOG(4) << "[output recording]"
+                << "[" << op_desc.Type() << "][skip]" << arg_name;
+        continue;
+      }
+      auto idx = idx_iter->second;
      VLOG(10) << "[output recording]"
               << "[" << op_desc.Type() << "]" << arg_name << " " << idx;

      ir::OpResult value = operation->result(idx);
      bool generated_by_vector = value.type().isa<ir::VectorType>();
+
+      // Specially process TensorArray, this because we cannot distinguish it
+      // with Vector<DenseTensor> by other conditions but we cannot support it
+      // like Vector<DenseTensor>
+      if (args.size() == 1) {
+        VarDesc* var = op_desc.Block()->FindVarRecursive(args[0]);
+        if (var->GetType() ==
+            paddle::framework::proto::VarType::LOD_TENSOR_ARRAY) {
+          generated_by_vector = false;
+        }
+      }
      (*param_map)[arg_name] = VariableDefiningInfo(
          value, generated_by_vector, generated_by_vector ? idx_in_vector : -1);
      idx_in_vector++;
@@ -483,13 +608,13 @@ inline void RecordOpResultMapping(TranslationContext* param_map,
  }
 }

-ir::Operation* GeneralOpHandler(ir::IrContext* ctx,
+ir::Operation* OpTranscriber::operator()(ir::IrContext* ctx,
                                         TranslationContext* param_map,
-                                ir::Program* program,
-                                const OpDesc& op_desc) {
-  auto op_info = LoopkUpOpInfo(ctx, op_desc);
+                                         const OpDesc& op_desc,
+                                         ir::Program* program) {
+  auto op_info = this->LoopkUpOpInfo(ctx, op_desc);
  auto* op_info_concept =
-      op_info.GetInterfaceImpl<paddle::dialect::OpYamlInfoInterface>();
+      op_info.GetInterfaceImpl<dialect::OpYamlInfoInterface>();

  OpInputInfoList input_infos;
  OpAttributeInfoList attr_infos;
@@ -497,16 +622,16 @@ ir::Operation* GeneralOpHandler(ir::IrContext* ctx,
  std::tie(input_infos, attr_infos, output_infos, std::ignore) =
      op_info_concept->get_op_info_();

-  auto op_inputs = GenerateOperationInput(
-      ctx, param_map, program, op_desc, op_info.name(), input_infos);
+  auto op_inputs = this->GenerateOperationInput(
+      ctx, param_map, op_desc, op_info.name(), input_infos, program);

  OpOutputMapping arg_to_idx;
  OpOutputTypeList op_output_types;
  std::tie(op_output_types, arg_to_idx) =
-      GenerateOperationOutput(ctx, op_desc, output_infos);
+      this->GenerateOperationOutput(ctx, op_desc, output_infos);

  auto attribute_map =
-      TranslateOpAttribute(op_info.name(), attr_infos, op_desc);
+      this->TranslateOpAttribute(ctx, op_info.name(), attr_infos, op_desc);
  VLOG(4) << "[general op][" << op_desc.Type() << "] preparation end.";

  ir::Operation* operation =
@@ -515,50 +640,267 @@ ir::Operation* GeneralOpHandler(ir::IrContext* ctx,
  program->block()->push_back(operation);

  VLOG(4) << "[general op][" << op_desc.Type() << "] opearation insertion end.";
-  RecordOpResultMapping(param_map, op_desc, operation, arg_to_idx);
+  this->RecordOpResultMapping(param_map, op_desc, operation, arg_to_idx);

  return operation;
 }

-ir::Operation* FeedOpHandler(ir::IrContext* ctx,
-                             TranslationContext* param_map,
-                             ir::Program* program,
-                             const OpDesc& op_desc) {
-  auto op_info = LoopkUpOpInfo(ctx, op_desc);
+struct CastOpTranscriber : public OpTranscriber {
+  ir::AttributeMap TranslateOpAttribute(
+      ir::IrContext*,
+      const std::string& normalized_op_name,
+      const OpAttributeInfoList& op_attr_infos,
+      const OpDesc& op_desc) override {
+    auto& attribute_translator = AttributeTranslator::instance();
+    ir::AttributeMap attribute_map = {};
+    const OpAttributeInfo info = op_attr_infos[0];
+
+    std::string legacy_attr_name("out_dtype");

+    paddle::framework::Attribute legacy_attr;
+    if (op_desc.HasAttr(legacy_attr_name)) {
+      legacy_attr = op_desc.GetAttr(legacy_attr_name);
+    }
+    VLOG(10) << "attribute in " << op_desc.Type()
+             << " name: " << legacy_attr_name << " " << legacy_attr.index();
+    ir::Attribute new_attr = attribute_translator(info.type_name, legacy_attr);
+    attribute_map[info.name] = new_attr;
+
+    return attribute_map;
+  }
+};
+
+struct EmbeddingOpTranscriber : public OpTranscriber {
+  void HandleNonexistentAttribute(ir::IrContext* ctx,
+                                  ir::AttributeMap* attribute_map,
+                                  const OpAttributeInfo& info) override {
+    if (info.name == "padding_idx") {
+      (*attribute_map)[info.name] = ir::Int64Attribute::get(ctx, -1);
+    } else if (info.name == "sparse") {
+      (*attribute_map)[info.name] = ir::BoolAttribute::get(ctx, false);
+    }
+  }
+};
+
+struct IncrementOpTranscriber : public OpTranscriber {
+  ir::AttributeMap TranslateOpAttribute(
+      ir::IrContext* ctx,
+      const std::string& normalized_op_name,
+      const OpAttributeInfoList& op_attr_infos,
+      const OpDesc& op_desc) override {
+    auto& attribute_translator = AttributeTranslator::instance();
+    ir::AttributeMap attribute_map = {};
+
+    paddle::framework::Attribute legacy_attr;
+    if (op_desc.HasAttr("step")) {
+      legacy_attr = op_desc.GetAttr("step");
+      VLOG(10) << "attribute in " << op_desc.Type() << " step: "
+               << " " << legacy_attr.index();
+      ir::Attribute new_attr = attribute_translator(legacy_attr);
+      attribute_map["value"] = new_attr;
+    } else {
+      attribute_map["value"] = ir::FloatAttribute::get(ctx, 1.0f);
+    }
+
+    return attribute_map;
+  }
+};
+
+// The `assign_value` in static_ops.yaml is different from the one in
+// `legacy_ops.yaml`. For this op we simulate the logic in
+// python/paddle/tensor/creation.py::assign(x, output)
+struct AssignValueOpTranscriber : public OpTranscriber {
+  ir::OpInfo LoopkUpOpInfo(ir::IrContext* ctx, const OpDesc& op_desc) override {
+    std::string target_op_name = "pd.assign_value_";
+    const auto& op_info = ctx->GetRegisteredOpInfo(target_op_name);
+    if (!op_info) {
+      IR_THROW(
+          "Op assign_value should have corresponding OpInfo pd.assign_value_");
+    }
+
+    return op_info;
+  }
+
+  ir::Operation* operator()(ir::IrContext* ctx,
+                            TranslationContext* param_map,
+                            const OpDesc& op_desc,
+                            ir::Program* program) override {
+    VLOG(10) << "[op assign_value] start transcribing";
+    auto op_info = this->LoopkUpOpInfo(ctx, op_desc);
    auto* op_info_concept =
-      op_info.GetInterfaceImpl<paddle::dialect::OpYamlInfoInterface>();
+        op_info.GetInterfaceImpl<dialect::OpYamlInfoInterface>();
    OpInputInfoList input_infos;
    OpAttributeInfoList attr_infos;
    OpOutputInfoList output_infos;
    std::tie(input_infos, attr_infos, output_infos, std::ignore) =
        op_info_concept->get_op_info_();
+    std::unordered_map<std::string, OpAttributeInfo> attr_info_maps;
+    for (auto info : attr_infos) {
+      attr_info_maps.insert({info.name, info});
+    }

-  std::vector<ir::OpResult> op_inputs;
+    auto& attribute_translator = AttributeTranslator::instance();
+    ir::AttributeMap attribute_map;
+
+    paddle::framework::Attribute legacy_attr;
+    if (op_desc.HasAttr("shape")) {
+      legacy_attr = op_desc.GetAttr("shape");
+    } else {
+      IR_THROW("Op assign_value should have attribute `shape` but not find");
+    }
+    ir::Attribute attr_shape =
+        attribute_translator(attr_info_maps.at("shape").type_name, legacy_attr);
+    attribute_map["shape"] = attr_shape;
+
+    if (op_desc.HasAttr("dtype")) {
+      legacy_attr = op_desc.GetAttr("dtype");
+    } else {
+      IR_THROW("Op assign_value should have attribute `dtype` but not find");
+    }
+    ir::Attribute attr_dtype =
+        attribute_translator(attr_info_maps.at("dtype").type_name, legacy_attr);
+    attribute_map["dtype"] = attr_dtype;
+
+    ir::Attribute attr_place =
+        dialect::PlaceAttribute::get(ctx, phi::CPUPlace());
+    attribute_map["place"] = attr_place;
+
+    if (op_desc.HasAttr("bool_values")) {
+      legacy_attr = op_desc.GetAttr("bool_values");
+    } else if (op_desc.HasAttr("fp32_values")) {
+      legacy_attr = op_desc.GetAttr("fp32_values");
+    } else if (op_desc.HasAttr("int32_values")) {
+      legacy_attr = op_desc.GetAttr("int32_values");
+    } else if (op_desc.HasAttr("int64_values")) {
+      legacy_attr = op_desc.GetAttr("int64_values");
+    } else {
+      IR_THROW(
+          "Op assign_value should have attribute `**_values` but not find");
+    }
+    ir::Attribute attr_values = attribute_translator(
+        attr_info_maps.at("values").type_name, legacy_attr);
+    attribute_map["values"] = attr_values;
+
+    VLOG(10) << "[op assign_value] attribute translation done";
+
+    std::vector<int> src_shape =
+        paddle::get<std::vector<int>>(op_desc.GetAttr("shape"));
+    std::vector<int64_t> target_shape(src_shape.begin(), src_shape.end());
+
+    ir::Builder builder(ctx, program->block());
+    dialect::FullOp full_op = builder.Build<dialect::FullOp>(
+        target_shape,
+        0.0f,
+        attr_dtype.dyn_cast<dialect::DataTypeAttribute>().data(),
+        phi::CPUPlace());
+
+    std::vector<ir::OpResult> op_inputs = {full_op->result(0)};
+
+    VLOG(10) << "[op assign_value] insert a full op to get input";

    OpOutputMapping arg_to_idx;
    OpOutputTypeList op_output_types;
    std::tie(op_output_types, arg_to_idx) =
-      GenerateOperationOutput(ctx, op_desc, output_infos);
+        this->GenerateOperationOutput(ctx, op_desc, output_infos);
+
+    ir::Operation* operation = ir::Operation::Create(
+        op_inputs, attribute_map, op_output_types, op_info);
+    program->block()->push_back(operation);
+    RecordOpResultMapping(param_map, op_desc, operation, arg_to_idx);
+
+    VLOG(10) << "[op assign_value] translation finished";
+
+    return operation;
+  }
+};
+
+// This input `dropout_state_in` does not exist in static version definition
+// So we generate an input by `full` with same type of output `DropoutState` of
+// OpDesc And we still should be aware that `DropoutState` is an optional output
+// in static graph.
+ir::OpResult TranslateDropOutStateIn(ir::IrContext* ctx,
+                                     TranslationContext* param_map,
+                                     const OpDesc& op_desc,
+                                     const std::string& normalized_op_name,
+                                     const OpInputInfo& input_info,
+                                     ir::Program* program) {
+  const std::string legacy_output_name = "DropoutState";
+  std::vector<std::string> legacy_output_vars;
+  if (op_desc.HasOutput(legacy_output_name)) {
+    legacy_output_vars = op_desc.Output(legacy_output_name);
+  }
+
+  if (legacy_output_vars.size() == 0) {
+    VLOG(3) << "[input translating] not find output variable: DropoutState";
+    return ir::OpResult(nullptr);
+  }
+
+  // `DropoutState` is a tensor
+  VarDesc* dropout_state =
+      op_desc.Block()->FindVarRecursive(legacy_output_vars[0]);
+  if (dropout_state == nullptr) {
+    IR_THROW("Unexpected: Rnn Op should have a non-empty DropoutState");
+  }
+  auto& type_translator = TypeTranslator::instance();
+  ir::Type translated_var_type =
+      type_translator[dropout_state->GetType()](ctx, *dropout_state);
+  IR_ENFORCE(
+      translated_var_type.isa<dialect::DenseTensorType>(),
+      "Unexpected: Rnn Op's output DropoutState should be a DenseTensor");
+  auto tensor_type = translated_var_type.dyn_cast<dialect::DenseTensorType>();
+
+  ir::Builder builder(ctx, program->block());
+  dialect::FullOp full_op = builder.Build<dialect::FullOp>(
+      phi::vectorize(tensor_type.dims()),
+      0.0f,
+      dialect::TransToPhiDataType(tensor_type.dtype()),
+      phi::CPUPlace());
+
+  return full_op->result(0);
+}
+
+// `rnn` has an aditional input in dynamic graph
+struct RnnOpTranscriber : public OpTranscriber {
+  InputHandleFn GetSpecialInputHandlers(std::string input_name) override {
+    if (input_name != "dropout_state_in") {
+      return nullptr;
+    }
+    return TranslateDropOutStateIn;
+  };
+};
+
+struct FeedOpTranscriber : public OpTranscriber {
+  ir::AttributeMap TranslateOpAttribute(
+      ir::IrContext* ctx,
+      const std::string& normalized_op_name,
+      const OpAttributeInfoList& op_attr_infos,
+      const OpDesc& op_desc) override {
    ir::AttributeMap attribute_map = {
        {"name", ir::StrAttribute::get(ctx, op_desc.OutputArgumentNames()[0])},
        {"col",
         ir::Int32Attribute::get(ctx, op_desc.GetAttrIfExists<int>("col"))},
    };

-  ir::Operation* operation =
-      ir::Operation::Create(op_inputs, attribute_map, op_output_types, op_info);
-  program->block()->push_back(operation);
-  RecordOpResultMapping(param_map, op_desc, operation, arg_to_idx);
+    return attribute_map;
+  }

-  return operation;
-}
+  std::vector<ir::OpResult> GenerateOperationInput(
+      ir::IrContext* ctx,
+      TranslationContext* param_map,
+      const OpDesc& op_desc,
+      const std::string& normalized_op_name,
+      const OpInputInfoList& input_infos,
+      ir::Program* program) override {
+    return {};
+  }
+};

-ir::Operation* FetchOpHandler(ir::IrContext* ctx,
+struct FetchOpTranscriber : public OpTranscriber {
+  ir::Operation* operator()(ir::IrContext* ctx,
                            TranslationContext* param_map,
-                              ir::Program* program,
-                              const OpDesc& op_desc) {
-  auto op_info = LoopkUpOpInfo(ctx, op_desc);
+                            const OpDesc& op_desc,
+                            ir::Program* program) override {
+    auto op_info = this->LoopkUpOpInfo(ctx, op_desc);

    auto* op_info_concept =
        op_info.GetInterfaceImpl<paddle::dialect::OpYamlInfoInterface>();
@@ -568,8 +910,8 @@ ir::Operation* FetchOpHandler(ir::IrContext* ctx,
    std::tie(input_infos, attr_infos, output_infos, std::ignore) =
        op_info_concept->get_op_info_();

-  auto op_inputs = GenerateOperationInput(
-      ctx, param_map, program, op_desc, op_info.name(), input_infos);
+    auto op_inputs = this->GenerateOperationInput(
+        ctx, param_map, op_desc, op_info.name(), input_infos, program);

    OpOutputTypeList op_output_types;
    ir::AttributeMap attribute_map = {
@@ -577,17 +919,23 @@ ir::Operation* FetchOpHandler(ir::IrContext* ctx,
    };

    op_output_types.push_back(op_inputs[0].type());
-  ir::Operation* operation =
-      ir::Operation::Create(op_inputs, attribute_map, op_output_types, op_info);
+    ir::Operation* operation = ir::Operation::Create(
+        op_inputs, attribute_map, op_output_types, op_info);
    program->block()->push_back(operation);

    return operation;
-}
-}  // namespace
+  }
+};

-OpTranslator::OpTranslator() : general_handler(GeneralOpHandler) {
-  special_handlers["feed"] = FeedOpHandler;
-  special_handlers["fetch_v2"] = FetchOpHandler;
+OpTranslator::OpTranslator() {
+  general_handler = OpTranscriber();
+  special_handlers["feed"] = FeedOpTranscriber();
+  special_handlers["fetch_v2"] = FetchOpTranscriber();
+  special_handlers["cast"] = CastOpTranscriber();
+  special_handlers["lookup_table_v2"] = EmbeddingOpTranscriber();
+  special_handlers["assign_value"] = AssignValueOpTranscriber();
+  special_handlers["increment"] = IncrementOpTranscriber();
+  special_handlers["rnn"] = RnnOpTranscriber();
 }

 }  // namespace translator

--- a/paddle/fluid/ir_adaptor/translator/op_translator.h
+++ b/paddle/fluid/ir_adaptor/translator/op_translator.h
@@ -36,7 +36,7 @@ class OpTranslator {
  using BlockDesc = paddle::framework::BlockDesc;
  using VarDesc = paddle::framework::VarDesc;
  using OpTranslateFn = std::function<ir::Operation*(
-      ir::IrContext*, TranslationContext*, ir::Program*, const OpDesc&)>;
+      ir::IrContext*, TranslationContext*, const OpDesc&, ir::Program*)>;

 private:
  OpTranslator();  // Disallow instantiation outside of the class.

--- a/paddle/fluid/ir_adaptor/translator/program_translator.cc
+++ b/paddle/fluid/ir_adaptor/translator/program_translator.cc
@@ -111,25 +111,46 @@ void ProgramTranslator::GetParameterForSingleBlock(const BlockDesc& block) {
    parameter_name_mappings_[var->Name()] = var;
  }

+  std::unordered_set<std::string> inner_defining_variables;
+
  for (auto op_desc : block.AllOps()) {
    for (const auto& n : op_desc->Inputs()) {
      const auto& input_var_names = n.second;
      for (const auto& var_name : input_var_names) {
-        bool need_get_parameter_op = (parameter_name_mappings_.find(var_name) !=
+        if (no_cast_var_names.count(var_name) != 0) continue;
+        VarDesc* var_desc = nullptr;
+
+        bool is_parameter = (parameter_name_mappings_.find(var_name) !=
                             parameter_name_mappings_.end());
-        need_get_parameter_op &= (parameter_visited_.count(var_name) == 0);
+        is_parameter &= (parameter_visited_.count(var_name) == 0);
+        if (is_parameter) {
+          var_desc = parameter_name_mappings_[var_name];
+        }
+        bool is_unseen_variable =
+            (inner_defining_variables.count(var_name) == 0);
+        if (is_unseen_variable) {
+          var_desc = block.FindVarRecursive(var_name);
+        }
+
+        bool need_get_parameter_op = is_parameter || is_unseen_variable;
        if (need_get_parameter_op) {
-          ir::Operation* op =
-              InsertGetParamaterOp(ctx_, parameter_name_mappings_[var_name]);
+          ir::Operation* op = InsertGetParamaterOp(ctx_, var_desc);
          program_->block()->push_back(op);
          param_map_[var_name] = VariableDefiningInfo(op->result(0));
          VLOG(10) << "[op translated][get parameter]" << op;

          program_->SetParameter(var_name, nullptr);
          parameter_visited_.insert(var_name);
+          inner_defining_variables.insert(var_name);
        }
      }
    }
+    for (const auto& n : op_desc->Outputs()) {
+      const auto& output_var_names = n.second;
+      for (const auto& var_name : output_var_names) {
+        inner_defining_variables.insert(var_name);
+      }
+    }
  }
 }

@@ -137,7 +158,7 @@ void ProgramTranslator::InsertOperationToSingleBlock(const BlockDesc& block) {
  auto& op_translator = OpTranslator::instance();
  for (auto op : block.AllOps()) {
    OpTranslateFn& fn = op_translator[op->Type()];
-    ir::Operation* operation = fn(ctx_, &param_map_, program_, *op);
+    ir::Operation* operation = fn(ctx_, &param_map_, *op, program_);
    VLOG(10) << "[op translated][special]" << operation;
  }
 }

--- a/paddle/fluid/ir_adaptor/translator/translate.cc
+++ b/paddle/fluid/ir_adaptor/translator/translate.cc
@@ -19,6 +19,7 @@
 #include "paddle/fluid/framework/program_desc.h"
 #include "paddle/fluid/ir/dialect/pd_dialect.h"
 #include "paddle/fluid/ir_adaptor/translator/program_translator.h"
+#include "paddle/ir/core/builtin_dialect.h"
 #include "paddle/ir/core/program.h"

 namespace paddle {
@@ -28,7 +29,9 @@ using Program = ::ir::Program;

 std::unique_ptr<Program> TranslateLegacyProgramToProgram(
    const LegacyProgramDesc& legacy_program) {
-  auto program = std::make_unique<Program>(ir::IrContext::Instance());
+  ir::IrContext* ctx = ir::IrContext::Instance();
+  ctx->GetOrRegisterDialect<dialect::PaddleDialect>();
+  auto program = std::make_unique<Program>(ctx);

  translator::ProgramTranslator program_translator(&legacy_program,
                                                   program.get());

--- a/paddle/fluid/ir_adaptor/translator/type_translator.cc
+++ b/paddle/fluid/ir_adaptor/translator/type_translator.cc
@@ -93,6 +93,20 @@ TypeTranslator::TypeTranslator() {
         size_t offset = 0;
         return DenseTensorType::get(ctx, dtype, dim, layout, lod, offset);
       }},
+      {VarType::LOD_TENSOR_ARRAY,
+       [&](ir::IrContext* ctx, const VarDesc& var_desc) -> ir::Type {
+         VLOG(10) << "[vartype translating]"
+                  << "[" << var_desc.Name() << "] from LOD_TENSOR_ARRAY";
+
+         return ir::VectorType::get(ctx, std::vector<ir::Type>{});
+       }},
+      {VarType::SELECTED_ROWS,
+       [&](ir::IrContext* ctx, const VarDesc& var_desc) -> ir::Type {
+         VLOG(10) << "[vartype translating]"
+                  << "[" << var_desc.Name() << "] from SELECTED_ROWS";
+
+         return this->operator[](VarType::LOD_TENSOR)(ctx, var_desc);
+       }},
  };
 }


--- a/paddle/phi/api/yaml/legacy_ops.yaml
+++ b/paddle/phi/api/yaml/legacy_ops.yaml
@@ -84,6 +84,7 @@
  kernel :
    func : assign
  backward : assign_grad
+  inplace : (x -> out)

 - op : assign_out_
  args : (Tensor x, Tensor output)
@@ -120,6 +121,7 @@
    data_type : x
  view : (mean -> mean_out), (variance -> variance_out)
  backward : batch_norm_grad
+  optional : reserve_space

 - op : cast
  args : (Tensor x, DataType dtype)

--- a/paddle/phi/api/yaml/op_compat.yaml
+++ b/paddle/phi/api/yaml/op_compat.yaml
@@ -769,7 +769,7 @@
    attrs : ['int[] slots = {}']

 - op : divide (elementwise_div)
-  backward : divide_grad (elementwise_div)
+  backward : divide_grad (elementwise_div_grad)
  inputs :
    {x: X, y : Y}
  outputs :
@@ -1776,6 +1776,8 @@

 - op : mish
  backward : mish_grad
+  inputs:
+    lambda:  threshold
  extra :
    attrs : [bool use_mkldnn = false]

@@ -2839,6 +2841,8 @@
    yolo_loss : GetYoloLossExpectedKernelType
    yolo_loss_grad : GetYoloLossExpectedKernelType

+- op: full_batch_size_like (fill_constant_batch_size_like)
+
 - op: lu
  backward: lu_grad
  inputs:

--- a/test/ir/new_ir/test_special_op_translator.py
+++ b/test/ir/new_ir/test_special_op_translator.py
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+
+import paddle
+from paddle.fluid import core
+
+paddle.enable_static()
+
+
+class TestCastOpTranscriber(unittest.TestCase):
+    def test_op(self):
+        place = core.Place()
+        place.set_place(paddle.CPUPlace())
+        new_scope = paddle.static.Scope()
+        main_program = paddle.static.Program()
+        with paddle.static.scope_guard(new_scope):
+            with paddle.static.program_guard(main_program):
+                x = paddle.to_tensor([2, 3, 4], 'float64')
+                y = paddle.cast(x, 'uint8')
+
+                default_job = core.Job("default")
+                type_to_program = {"default": main_program.desc}
+                plan = core.Plan([default_job], type_to_program)
+                new_exe = core.StandaloneExecutor(place, plan, new_scope)
+
+
+class TestEmbeddingOpTranscriber(unittest.TestCase):
+    def test_op(self):
+        place = core.Place()
+        place.set_place(paddle.CPUPlace())
+        new_scope = paddle.static.Scope()
+        main_program = paddle.static.Program()
+        with paddle.static.scope_guard(new_scope):
+            with paddle.static.program_guard(main_program):
+                x = paddle.static.data(name="x", shape=[2, 4], dtype=np.int64)
+                embedding = paddle.nn.Embedding(
+                    10, 3, weight_attr=paddle.nn.initializer.Constant(value=1.0)
+                )
+                output = embedding(x)
+
+                default_job = core.Job("default")
+                type_to_program = {"default": main_program.desc}
+                plan = core.Plan([default_job], type_to_program)
+                new_exe = core.StandaloneExecutor(place, plan, new_scope)
+
+
+class TestIncrementOpTranscriber(unittest.TestCase):
+    def test_op(self):
+        place = core.Place()
+        place.set_place(paddle.CPUPlace())
+        new_scope = paddle.static.Scope()
+        main_program = paddle.static.Program()
+        with paddle.static.scope_guard(new_scope):
+            with paddle.static.program_guard(main_program):
+                data = paddle.zeros(shape=[1], dtype='float32')
+                counter = paddle.increment(data)
+
+                default_job = core.Job("default")
+                type_to_program = {"default": main_program.desc}
+                plan = core.Plan([default_job], type_to_program)
+                new_exe = core.StandaloneExecutor(place, plan, new_scope)
+
+
+class TestAssignValueOpTranscriber(unittest.TestCase):
+    def test_op(self):
+        place = core.Place()
+        place.set_place(paddle.CPUPlace())
+        new_scope = paddle.static.Scope()
+        main_program = paddle.static.Program()
+        with paddle.static.scope_guard(new_scope):
+            with paddle.static.program_guard(main_program):
+                x = paddle.to_tensor(
+                    [[0.1, 0.2], [0.3, 0.4]],
+                    place=paddle.CPUPlace(),
+                    stop_gradient=False,
+                )
+
+                default_job = core.Job("default")
+                type_to_program = {"default": main_program.desc}
+                plan = core.Plan([default_job], type_to_program)
+                new_exe = core.StandaloneExecutor(place, plan, new_scope)
+
+
+class TestRnnOpTranscriber(unittest.TestCase):
+    def test_op(self):
+        place = core.Place()
+        place.set_place(paddle.CPUPlace())
+        new_scope = paddle.static.Scope()
+        main_program = paddle.static.Program()
+        with paddle.static.scope_guard(new_scope):
+            with paddle.static.program_guard(main_program):
+                x = paddle.randn((4, 16))
+                prev_h = paddle.randn((4, 32))
+
+                cell = paddle.nn.SimpleRNNCell(16, 32)
+                y, h = cell(x, prev_h)
+
+                default_job = core.Job("default")
+                type_to_program = {"default": main_program.desc}
+                plan = core.Plan([default_job], type_to_program)
+                new_exe = core.StandaloneExecutor(place, plan, new_scope)
+
+
+if __name__ == "__main__":
+    unittest.main()