Added fluid dependencies to Eager Dygraph #2 (#37556)

471fa1e8 · Zhanlue Yang · GitHub · a9608f60 · 471fa1e8 · 471fa1e8
8 changed file
--- a/paddle/fluid/eager/legacy/infer_var_type_context.h
+++ b/paddle/fluid/eager/legacy/infer_var_type_context.h
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include "paddle/fluid/eager/eager_tensor.h"
+#include "paddle/fluid/eager/legacy/tensor_helper.h"
+#include "paddle/fluid/eager/legacy/type_def.h"
+#include "paddle/fluid/framework/type_defs.h"
+#include "paddle/fluid/framework/var_type.h"
+#include "paddle/fluid/framework/var_type_inference.h"
+#include "paddle/fluid/framework/var_type_traits.h"
+#include "paddle/pten/api/all.h"
+#include "paddle/pten/include/core.h"
+
+namespace egr {
+
+// infer var type context for imperative mode
+class TensorRuntimeInferVarTypeContext
+    : public paddle::framework::InferVarTypeContext {
+ public:
+  TensorRuntimeInferVarTypeContext(
+      const NameTensorMap& inputs, const NameTensorMap& outputs,
+      const paddle::framework::AttributeMap& attrs_map,
+      const paddle::framework::AttributeMap& default_attrs_map)
+      : InferVarTypeContext(nullptr, nullptr),
+        inputs_(inputs),
+        outputs_(outputs),
+        attrs_(attrs_map),
+        default_attrs_(default_attrs_map) {}
+
+  virtual ~TensorRuntimeInferVarTypeContext() {}
+
+  paddle::framework::Attribute GetAttr(const std::string& name) const override {
+    auto it = attrs_.find(name);
+
+    if (it == attrs_.end()) {
+      it = default_attrs_.find(name);
+      if (it == default_attrs_.end()) {
+        PADDLE_THROW(paddle::platform::errors::NotFound(
+            "Can not find [%s] in attributes.", name));
+      }
+    }
+
+    return it->second;
+  }
+
+  bool HasInput(const std::string& name) const override {
+    auto it = inputs_.find(name);
+    return (it != inputs_.end() && it->second.size() > 0);
+  }
+
+  bool HasOutput(const std::string& name) const override {
+    auto it = outputs_.find(name);
+    return (it != outputs_.end() && it->second.size() > 0);
+  }
+
+  size_t InputSize(const std::string& name) const {
+    return inputs_.at(name).size();
+  }
+
+  const std::string& InputVarName(const std::string& name,
+                                  const int index = 0) const {
+    // TODO(jiabin): Support this usage inputs_.at(name)[index]->Name()
+    auto it = inputs_.find(name);
+    PADDLE_ENFORCE_NE(it, inputs_.end(),
+                      paddle::platform::errors::PreconditionNotMet(
+                          "Can not find [%s] in Input", name));
+    return inputs_.at(name)[index]->name();
+  }
+
+  bool InputTypeAnyOf(
+      const std::string& name,
+      paddle::framework::proto::VarType::Type type) const override {
+    auto& inputs = inputs_.at(name);
+    return std::any_of(
+        inputs.begin(), inputs.end(),
+        [&type](const std::shared_ptr<egr::EagerTensor>& var) {
+          return paddle::framework::ToVarType(var->Var().Type()) == type;
+        });
+  }
+
+  bool InputTypeAllOf(
+      const std::string& name,
+      paddle::framework::proto::VarType::Type type) const override {
+    auto& inputs = inputs_.at(name);
+    return std::all_of(
+        inputs.begin(), inputs.end(),
+        [&type](const std::shared_ptr<egr::EagerTensor>& var) {
+          return paddle::framework::ToVarType(var->Var().Type()) == type;
+        });
+  }
+
+  void SyncTypeAndDataType(const std::string& input_name,
+                           const std::string& output_name,
+                           int index = 0) override {
+    auto in_tensor = inputs_.at(input_name)[index];
+    auto out_tensor = outputs_.at(output_name)[index];
+    if (in_tensor != out_tensor) {
+      this->SetTensorType(
+          out_tensor, paddle::framework::ToVarType(in_tensor->Var().Type()));
+    }
+  }
+
+  void SetOutputType(const std::string& name,
+                     paddle::framework::proto::VarType::Type type,
+                     int index = 0) override {
+    if (index == paddle::framework::ALL_ELEMENTS) {
+      for (auto& item : outputs_.at(name)) {
+        this->SetTensorType(item, type);
+      }
+    } else {
+      auto& var = outputs_.at(name)[index];
+      this->SetTensorType(var, type);
+    }
+  }
+
+  void SetTensorType(std::shared_ptr<egr::EagerTensor> out,
+                     paddle::framework::proto::VarType::Type type) {
+    switch (type) {
+      case paddle::framework::proto::VarType::LOD_TENSOR: {
+        out->MutableVar()->GetMutable<paddle::framework::LoDTensor>();
+        break;
+      }
+      default: {
+        PADDLE_THROW(paddle::platform::errors::NotFound(
+            "Cannot found var type: %s while running runtime InferVarType",
+            paddle::framework::ToTypeName(type)));
+      }
+    }
+  }
+
+  paddle::framework::proto::VarType::Type GetInputType(
+      const std::string& name, const int& index = 0) const override {
+    return paddle::framework::ToVarType(inputs_.at(name)[index]->Var().Type());
+  }
+
+  paddle::framework::proto::VarType::Type GetOutputType(
+      const std::string& name, const int& index = 0) const override {
+    return paddle::framework::ToVarType(outputs_.at(name)[index]->Var().Type());
+  }
+
+  paddle::framework::proto::VarType::Type GetInputDataType(
+      const std::string& name, const int& index = 0) const override {
+    return inputs_.at(name)[index]
+        ->Var()
+        .Get<paddle::framework::LoDTensor>()
+        .type();
+  }
+
+  void SetOutputDataType(const std::string& name,
+                         paddle::framework::proto::VarType::Type type,
+                         int index = 0) override {
+    // TODO(jiabin): It seems doesn't make sense to set data_type in EagerMode.
+  }
+
+  bool IsDygraph() const override { return true; }
+
+ protected:
+  bool HasVar(const std::string& name) const override {
+    PADDLE_THROW(paddle::platform::errors::PermissionDenied(
+        "HasVar is not supported in runtime InferVarType"));
+  }
+
+  const std::vector<std::string>& InputVars(
+      const std::string& name) const override {
+    PADDLE_THROW(paddle::platform::errors::PermissionDenied(
+        "InputVars is not supported in runtime InferVarType"));
+  }
+
+  const std::vector<std::string>& OutputVars(
+      const std::string& name) const override {
+    PADDLE_THROW(paddle::platform::errors::PermissionDenied(
+        "OutputVars is not supported in runtime InferVarType"));
+  }
+
+  paddle::framework::proto::VarType::Type GetVarType(
+      const std::string& name) const override {
+    PADDLE_THROW(paddle::platform::errors::PermissionDenied(
+        "Do not manipulate var in runtime InferVarType"));
+  }
+
+  void SetVarType(const std::string& name,
+                  paddle::framework::proto::VarType::Type type) override {
+    PADDLE_THROW(paddle::platform::errors::PermissionDenied(
+        "Do not manipulate var in runtime InferVarType"));
+  }
+
+  paddle::framework::proto::VarType::Type GetVarDataType(
+      const std::string& name) const override {
+    PADDLE_THROW(paddle::platform::errors::PermissionDenied(
+        "Do not manipulate var in runtime InferVarType"));
+  }
+
+  void SetVarDataType(const std::string& name,
+                      paddle::framework::proto::VarType::Type type) override {
+    PADDLE_THROW(paddle::platform::errors::PermissionDenied(
+        "Do not manipulate var in runtime InferVarType"));
+  }
+
+  std::vector<paddle::framework::proto::VarType::Type> GetVarDataTypes(
+      const std::string& name) const override {
+    PADDLE_THROW(paddle::platform::errors::PermissionDenied(
+        "GetVarDataTypes is not supported in runtime InferVarType"));
+  }
+
+  void SetVarDataTypes(
+      const std::string& name,
+      const std::vector<paddle::framework::proto::VarType::Type>&
+          multiple_data_type) override {
+    PADDLE_THROW(paddle::platform::errors::PermissionDenied(
+        "SetVarDataTypes is not supported in runtime InferVarType"));
+  }
+
+  std::vector<int64_t> GetVarShape(const std::string& name) const override {
+    PADDLE_THROW(paddle::platform::errors::PermissionDenied(
+        "Do not handle Shape in runtime InferVarType"));
+  }
+
+  void SetVarShape(const std::string& name,
+                   const std::vector<int64_t>& dims) override {
+    PADDLE_THROW(paddle::platform::errors::PermissionDenied(
+        "Do not handle Shape in runtime InferVarType"));
+  }
+
+  int32_t GetVarLoDLevel(const std::string& name) const override {
+    PADDLE_THROW(paddle::platform::errors::PermissionDenied(
+        "Do not handle LoDLevel in runtime InferVarType"));
+  }
+
+  void SetVarLoDLevel(const std::string& name, int32_t lod_level) override {
+    PADDLE_THROW(paddle::platform::errors::PermissionDenied(
+        "Do not handle LoDLevel in runtime InferVarType"));
+  }
+
+ private:
+  const NameTensorMap& inputs_;
+  const NameTensorMap& outputs_;
+  const paddle::framework::AttributeMap& attrs_;
+  const paddle::framework::AttributeMap& default_attrs_;
+};
+
+}  // namespace egr
--- a/paddle/fluid/eager/legacy/op_runner.cc
+++ b/paddle/fluid/eager/legacy/op_runner.cc
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/eager/legacy/op_runner.h"
+#include <map>
+#include <set>
+#include <unordered_set>
+#include <utility>
+#include "paddle/fluid/eager/legacy/amp_auto_cast.h"
+#include "paddle/fluid/eager/legacy/infer_var_type_context.h"
+#include "paddle/fluid/eager/legacy/prepared_operator.h"
+#include "paddle/fluid/eager/legacy/tensor_helper.h"
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/platform/denormal.h"
+#include "paddle/fluid/string/string_helper.h"
+
+DECLARE_bool(use_mkldnn);
+DECLARE_string(tracer_mkldnn_ops_on);
+DECLARE_string(tracer_mkldnn_ops_off);
+
+namespace egr {
+
+void OpRunImpl(const paddle::framework::OperatorBase& op,
+               const NameTensorMap& ins, const NameTensorMap& outs,
+               const paddle::framework::AttributeMap& attrs,
+               const paddle::framework::AttributeMap& default_attrs,
+               const paddle::platform::Place& place) {
+  auto* op_kernel =
+      dynamic_cast<const paddle::framework::OperatorWithKernel*>(&op);
+  PADDLE_ENFORCE_NOT_NULL(
+      op_kernel, paddle::platform::errors::PermissionDenied(
+                     "Only support operator with kernel in Dygraph mode."));
+  auto& info = op.Info();
+  if (info.infer_var_type_) {
+    egr::TensorRuntimeInferVarTypeContext infer_var_type_ctx(ins, outs, attrs,
+                                                             default_attrs);
+    info.infer_var_type_(&infer_var_type_ctx);
+  }
+
+  // Initialize output tensor
+  for (auto& tensor_pair : outs) {
+    for (auto& tensor : tensor_pair.second) {
+      if (tensor && tensor.get() && (!tensor->Var().IsInitialized())) {
+        InitializeVariable(tensor->MutableVar(),
+                           paddle::framework::proto::VarType::LOD_TENSOR);
+      }
+    }
+  }
+
+  /**
+   * [ Why need temporary inputs here? ]
+   *
+   * PrepareData should not change original input tensor inplace.
+   * Suppose the user defines a tensor(int), enters an op to execute,
+   * and then this op rewrites GetExpectedKernelForVar, and converts
+   * this tensor to float type during execution. After the dynamic
+   * graph is executed, the user-defined variable will be lost, and
+   * the user cannot get the originally defined int tensor, because
+   * it has been converted to float, this should be regarded as a bug
+   * in certain usage scenarios
+   *
+   * In static graph mode, when op is executed, a temporary scope
+   * `transfer_scope` is created before PrepareData, the data after
+   * transform is stored in the temporary scope, and then discarded
+   * after the execution of op, but the original input is directly
+   * overwritten in the previous dynamic graph implemention.
+   */
+  auto prepared_op = egr::PreparedOp::Prepare(ins, outs, *op_kernel, place,
+                                              attrs, default_attrs);
+  auto tmp_ins_ptr =
+      egr::PrepareData(*op_kernel, ins, prepared_op.kernel_type());
+  if (tmp_ins_ptr == nullptr) {
+    prepared_op.Run(ins, outs, attrs, default_attrs);
+  } else {
+    prepared_op.Run(*tmp_ins_ptr, outs, attrs, default_attrs);
+  }
+
+  // TODO(jiabin): Set the output var's grad Forward DataType
+}
+
+void RunOp(const std::string& type, const NameTensorMap& ins,
+           const NameTensorMap& outs, paddle::framework::AttributeMap attrs,
+           const paddle::platform::Place& place,
+           paddle::framework::AttributeMap* default_attrs,
+           bool override_default_attr_map,
+           const std::map<std::string, std::string>& inplace_map) {
+  VLOG(1) << "Run Op: " << type;
+  if (FLAGS_use_mkldnn) {
+    // if both lists are empty all ops are enabled (default for
+    // FLAGS_use_mkldnn=1)
+    // if ops_on list is not empty only ops from that list are enabled
+    if (!FLAGS_tracer_mkldnn_ops_on.empty()) {
+      auto is_on = FLAGS_tracer_mkldnn_ops_on.find(type) != std::string::npos;
+      attrs["use_mkldnn"] = is_on;
+    } else {
+      // if ops_on list is empty all ops are enabled except types from off_list
+      auto is_off = FLAGS_tracer_mkldnn_ops_off.find(type) != std::string::npos;
+      attrs["use_mkldnn"] = !is_off;
+    }
+  }
+  auto op = paddle::framework::OpRegistry::CreateOp(type, {}, {}, {}, false);
+
+  PADDLE_ENFORCE_NOT_NULL(default_attrs,
+                          paddle::platform::errors::PermissionDenied(
+                              "Detected default_attrs = nullptr."));
+
+  if (override_default_attr_map) {
+    const auto& op_info = op->Info();
+    auto* attr_checker = op_info.Checker();
+    if (attr_checker) {
+      attr_checker->Check(&attrs, true, /*only_check_exist_value=*/true);
+    }
+
+    static paddle::framework::AttributeMap empty_attrs_map = {};
+    *default_attrs = attr_checker == nullptr
+                         ? empty_attrs_map
+                         : attr_checker->GetDefaultAttrMap();
+  }
+
+  auto amp_level = egr::Controller::Instance().GetAMPLevel();
+  NameTensorMap new_ins = ins;
+  if (amp_level == 1) {
+    VLOG(5) << "Auto mixed precision run operator: " << type;
+    new_ins = AutoCastInputs(type, ins);
+  } else if (amp_level == 2) {
+    VLOG(5) << "Pure fp16 run operator: " << type;
+    new_ins = CastPureFp16Inputs(type, ins);
+  }
+
+  try {
+    if (paddle::platform::is_gpu_place(place)) {
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+      paddle::platform::SetDeviceId(
+          BOOST_GET_CONST(paddle::platform::CUDAPlace, place).device);
+#else
+      PADDLE_THROW(paddle::platform::errors::PreconditionNotMet(
+          "PaddlePaddle should compile with GPU if use CUDAPlace."));
+#endif
+    } else if (paddle::platform::is_xpu_place(place)) {
+#ifdef PADDLE_WITH_XPU
+      paddle::platform::SetXPUDeviceId(
+          BOOST_GET_CONST(paddle::platform::XPUPlace, place).device);
+#else
+      PADDLE_THROW(paddle::platform::errors::PreconditionNotMet(
+          "PaddlePaddle should compile with XPU if use XPUPlace."));
+#endif
+    } else if (paddle::platform::is_npu_place(place)) {
+#ifdef PADDLE_WITH_ASCEND_CL
+      paddle::platform::SetNPUDeviceId(
+          BOOST_GET_CONST(paddle::platform::NPUPlace, place).device);
+#else
+      PADDLE_THROW(paddle::platform::errors::PreconditionNotMet(
+          "PaddlePaddle should compile with NPU if use NPUPlace."));
+#endif
+    }
+
+    OpRunImpl(*op, new_ins, outs, attrs, *default_attrs, place);
+  } catch (paddle::platform::EnforceNotMet& exception) {
+    paddle::framework::AppendErrorOpHint(type, &exception);
+    throw std::move(exception);
+  } catch (std::exception& ex) {
+    PADDLE_THROW(paddle::platform::errors::Fatal(
+        "Operator %s raises an %s exception.\n"
+        "The exception content is\n:%s.",
+        type, paddle::platform::demangle(typeid(ex).name()), ex.what()));
+  } catch (...) {
+    // NOTE: this branch represents a very serious bug with
+    // low probability of occurrence, and we can't get its
+    // exception content here.
+    PADDLE_THROW(paddle::platform::errors::Fatal(
+        "Operator %s raises an unknown exception.", type));
+  }
+
+  // TODO(jiabin): Support this later
+  // if (enable_program_desc_tracing_) {
+  //   VLOG(5) << "Trace op " << type << " into ProgramDesc";
+  //   program_desc_tracer_->InsertOp(type, new_ins, outs, attrs);
+  // }
+}
+}  // namespace egr
--- a/paddle/fluid/eager/legacy/op_runner.h
+++ b/paddle/fluid/eager/legacy/op_runner.h
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "paddle/fluid/eager/legacy/type_def.h"
+// TODO(Jiabin): We should not depends on this header remove it later
+#include "paddle/fluid/imperative/jit/program_desc_tracer.h"
+#include "paddle/pten/core/tensor_meta.h"
+
+namespace egr {
+
+void RunOp(const std::string& type, const NameTensorMap& ins,
+           const NameTensorMap& outs, paddle::framework::AttributeMap attrs,
+           const paddle::platform::Place& place,
+           paddle::framework::AttributeMap* default_attrs,
+           bool override_default_attr_map,
+           const std::map<std::string, std::string>& inplace_map = {});
+}
--- a/paddle/fluid/eager/legacy/prepared_operator.cc
+++ b/paddle/fluid/eager/legacy/prepared_operator.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/eager/legacy/prepared_operator.h"
+
+#include "paddle/fluid/eager/legacy/infer_shape_context.h"
+#include "paddle/fluid/framework/data_type_transform.h"
+#include "paddle/fluid/framework/details/nan_inf_utils.h"
+#include "paddle/fluid/framework/pten_utils.h"
+#include "paddle/utils/small_vector.h"
+#ifdef PADDLE_WITH_XPU
+#include "paddle/fluid/platform/xpu/xpu_op_list.h"
+#endif
+DECLARE_bool(check_nan_inf);
+DECLARE_bool(run_pten_kernel);
+
+namespace egr {
+
+const paddle::framework::Tensor* GetTensorFromVar(
+    const paddle::framework::Variable& var) {
+  if (var.IsType<paddle::framework::LoDTensor>()) {
+    return &(var.Get<paddle::framework::LoDTensor>());
+  } else if (var.IsType<paddle::framework::SelectedRows>()) {
+    return &(var.Get<paddle::framework::SelectedRows>().value());
+  } else {
+    return nullptr;
+  }
+}
+
+static const paddle::framework::Attribute& GetAttr(
+    const paddle::framework::AttributeMap& attrs,
+    const paddle::framework::AttributeMap& default_attrs,
+    const std::string& name) {
+  auto it = attrs.find(name);
+  bool found = it != attrs.end();
+  if (!found) {
+    it = default_attrs.find(name);
+    found = it != default_attrs.end();
+  }
+  PADDLE_ENFORCE_EQ(found, true,
+                    paddle::platform::errors::NotFound(
+                        "(%s) is not found in AttributeMap.", name));
+  return it->second;
+}
+
+static void HandleComplexGradToRealGrad(const NameTensorMap& outs) {
+  // TODO(jiabin): Support complex forward datatype later.
+}
+
+PreparedOp::PreparedOp(
+    const paddle::framework::OperatorBase& op,
+    const paddle::framework::RuntimeContext& ctx,
+    const paddle::framework::OpKernelType& kernel_type,
+    const paddle::framework::OperatorWithKernel::OpKernelFunc& func,
+    paddle::platform::DeviceContext* dev_ctx)
+    : op_(op),
+      ctx_(ctx),
+      kernel_type_(kernel_type),
+      func_(func),
+      dev_ctx_(dev_ctx) {}
+
+PreparedOp PrepareImpl(const NameTensorMap& ins, const NameTensorMap& outs,
+                       const paddle::framework::OperatorWithKernel& op,
+                       const paddle::platform::Place& place,
+                       const paddle::framework::AttributeMap& attrs,
+                       const paddle::framework::AttributeMap& default_attrs) {
+  paddle::platform::DeviceContextPool& pool =
+      paddle::platform::DeviceContextPool::Instance();
+  auto* dev_ctx = pool.Get(place);
+
+  paddle::framework::RuntimeContext ctx({}, {});
+
+#ifdef PADDLE_WITH_MKLDNN
+  // MKLDNN variant of code reads attributes in some of GetKernelTypeForVar and
+  // GetKernelType functions, so we need to copy the attributes there.
+  // Const qualifier of Attrs had to be discarded to overwrite it.
+  if (FLAGS_use_mkldnn) {
+    auto& mutable_op_attrs =
+        const_cast<paddle::framework::AttributeMap&>(op.Attrs());
+    mutable_op_attrs = default_attrs;
+    for (auto& attr : attrs) {
+      mutable_op_attrs[attr.first] = attr.second;
+    }
+  }
+#endif
+
+  // 1. get expected kernel key
+  auto dygraph_exe_ctx =
+      egr::EagerExecutionContext(op, paddle::framework::Scope(), *dev_ctx, ctx,
+                                 ins, outs, attrs, default_attrs);
+  auto expected_kernel_key = op.GetExpectedKernelType(dygraph_exe_ctx);
+  VLOG(3) << "expected_kernel_key:" << expected_kernel_key;
+
+  // 2. check if op[type] has kernel registered.
+  auto& all_op_kernels = op.AllOpKernels();
+  auto kernels_iter = all_op_kernels.find(op.Type());
+  PADDLE_ENFORCE_NE(
+      kernels_iter, all_op_kernels.end(),
+      paddle::platform::errors::NotFound(
+          "There are no kernels which are registered in the %s operator.",
+          op.Type()));
+
+  auto& kernels = kernels_iter->second;
+  auto kernel_iter = kernels.find(expected_kernel_key);
+#ifdef PADDLE_WITH_XPU
+  if (is_xpu_place(expected_kernel_key.place_) &&
+      (kernel_iter == kernels.end() ||
+       !paddle::platform::is_xpu_support_op(op.Type(), expected_kernel_key) ||
+       paddle::platform::is_in_xpu_black_list(op.Type()))) {
+    VLOG(3) << "missing XPU kernel: " << op.Type()
+            << ", expected_kernel_key:" << expected_kernel_key
+            << ", fallbacking to CPU one!";
+    expected_kernel_key.place_ = paddle::platform::CPUPlace();
+    kernel_iter = kernels.find(expected_kernel_key);
+  }
+#endif
+#ifdef PADDLE_WITH_ASCEND_CL
+  if (kernel_iter == kernels.end() &&
+      is_npu_place(expected_kernel_key.place_)) {
+    VLOG(3) << "missing NPU kernel: " << op.Type()
+            << ", expected_kernel_key:" << expected_kernel_key
+            << ", fallbacking to CPU one!";
+    expected_kernel_key.place_ = paddle::platform::CPUPlace();
+    kernel_iter = kernels.find(expected_kernel_key);
+  }
+#endif
+  // TODO(jiabin): Add operator.cc's line 1000 part back when we need that
+  // case
+  PADDLE_ENFORCE_NE(kernel_iter, kernels.end(),
+                    paddle::platform::errors::NotFound(
+                        "Operator %s does not have kernel for %s.", op.Type(),
+                        KernelTypeToString(expected_kernel_key)));
+
+  if (!(expected_kernel_key.place_ == place)) {
+    dev_ctx = pool.Get(expected_kernel_key.place_);
+  }
+
+  return PreparedOp(op, ctx, expected_kernel_key, kernel_iter->second, dev_ctx);
+}
+
+PreparedOp PreparedOp::Prepare(
+    const NameTensorMap& ins, const NameTensorMap& outs,
+    const paddle::framework::OperatorWithKernel& op,
+    const paddle::platform::Place& place,
+    const paddle::framework::AttributeMap& attrs,
+    const paddle::framework::AttributeMap& default_attrs) {
+  return PrepareImpl(ins, outs, op, place, attrs, default_attrs);
+}
+
+static void PreparedOpRunImpl(
+    const paddle::framework::OperatorBase& op,
+    const paddle::framework::RuntimeContext& ctx,
+    const paddle::framework::OpKernelType& kernel_type,
+    const paddle::framework::OperatorWithKernel::OpKernelFunc& func,
+    paddle::platform::DeviceContext* dev_ctx, const NameTensorMap& ins,
+    const NameTensorMap& outs, const paddle::framework::AttributeMap& attrs,
+    const paddle::framework::AttributeMap& default_attrs) {
+  // TODO(zjl): remove scope in dygraph
+  paddle::framework::Scope scope;
+
+  EagerInferShapeContext infer_shape_ctx(&ins, &outs, &attrs, &default_attrs,
+                                         op.Type());
+  static_cast<const paddle::framework::OperatorWithKernel&>(op).InferShape(
+      &infer_shape_ctx);
+
+  func(EagerExecutionContext(op, scope, *dev_ctx, ctx, ins, outs, attrs,
+                             default_attrs));
+
+  if (FLAGS_check_nan_inf) {
+    paddle::framework::details::CheckOpHasNanOrInfInEager<EagerTensor>(
+        op.Type(), outs, dev_ctx->GetPlace());
+  }
+
+  /**
+   * [ Why need handle complex gradient to real gradient? ]
+   *
+   * After the introduction of complex number calculations, Ops that support
+   * complex number calculations generally support type promotion, such as
+   * x(float32) + y(complex64) = out(complex64), then the type of the grad
+   * tensor should be dout(complex64), dx(float32), dy (complex64).
+   *
+   * But because the dout is complex64, the dx is also complex64 after
+   * grad op kernel executed, we need to recognize this situation and
+   * convert dx to float32 type. HandleComplexGradToRealGrad does this thing.
+   */
+  if (paddle::framework::IsComplexType(kernel_type.data_type_)) {
+    HandleComplexGradToRealGrad(outs);
+  }
+}
+
+void PreparedOp::Run(const NameTensorMap& ins, const NameTensorMap& outs,
+                     const paddle::framework::AttributeMap& attrs,
+                     const paddle::framework::AttributeMap& default_attrs) {
+  PreparedOpRunImpl(op_, ctx_, kernel_type_, func_, dev_ctx_, ins, outs, attrs,
+                    default_attrs);
+}
+
+std::shared_ptr<NameTensorMap> PrepareData(
+    const paddle::framework::OperatorWithKernel& op, const NameTensorMap& ins,
+    const paddle::framework::OpKernelType& expected_kernel_key) {
+  std::shared_ptr<NameTensorMap> tmp_ins_ptr = nullptr;
+  for (const auto& name_pair : ins) {
+    for (size_t i = 0; i < name_pair.second.size(); ++i) {
+      auto& egr_tensor = name_pair.second[i];
+      const auto* tensor = GetTensorFromVar(egr_tensor->Var());
+      if (tensor && tensor->IsInitialized()) {
+        auto kernel_type_for_var = op.GetKernelTypeForVar(
+            name_pair.first, *tensor, expected_kernel_key);
+        if (!NeedTransform(kernel_type_for_var, expected_kernel_key)) {
+          continue;
+        } else {
+          // TODO(jiabin): Support Cache later
+          VLOG(3) << "Transform Variable " << egr_tensor->name() << " from "
+                  << kernel_type_for_var << " to " << expected_kernel_key;
+          paddle::framework::Tensor out;
+          TransformData(expected_kernel_key, kernel_type_for_var, *tensor,
+                        &out);
+          if (NeedTransformDataType(kernel_type_for_var, expected_kernel_key)) {
+            // To avoid NameVarMap copy construction overhead in general
+            // scenarios, if inplace transformed, return original input
+            // directly
+            if (tmp_ins_ptr == nullptr) {
+              tmp_ins_ptr = std::make_shared<NameTensorMap>(ins);
+            }
+            auto tmp_egr_tensor =
+                std::make_shared<EagerTensor>(egr_tensor->name());
+            SetTensorToVariable(egr_tensor->Var(), out,
+                                tmp_egr_tensor->MutableVar());
+            (*tmp_ins_ptr)[name_pair.first][i] = tmp_egr_tensor;
+          } else {
+            // if dtype is same, transform inplace will not change the
+            // original
+            // value, transform inplace to avoid multiple copy
+            SetTensorToVariable(egr_tensor->Var(), out,
+                                egr_tensor->MutableVar());
+          }
+        }
+      }
+    }
+  }
+  return tmp_ins_ptr;
+}
+}  // namespace egr
--- a/paddle/fluid/eager/legacy/prepared_operator.h
+++ b/paddle/fluid/eager/legacy/prepared_operator.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "paddle/fluid/eager/legacy/execution_context.h"
+#include "paddle/fluid/eager/legacy/type_def.h"
+#include "paddle/fluid/framework/data_transform.h"
+#include "paddle/fluid/framework/op_kernel_type.h"
+#include "paddle/fluid/framework/operator.h"
+#include "paddle/fluid/framework/type_defs.h"
+
+#include "paddle/pten/include/core.h"
+
+DECLARE_bool(use_mkldnn);
+
+namespace paddle {
+namespace framework {
+class Tensor;
+class Variable;
+}  // namespace framework
+namespace platform {
+class DeviceContext;
+}  // namespace platform
+}  // namespace paddle
+
+namespace egr {
+
+const paddle::framework::Tensor* GetTensorFromVar(
+    const paddle::framework::Variable& var);
+
+std::shared_ptr<NameTensorMap> PrepareData(
+    const paddle::framework::OperatorWithKernel& op, const NameTensorMap& ins,
+    const paddle::framework::OpKernelType& expected_kernel_key);
+
+class PreparedOp {
+ public:
+  PreparedOp(const paddle::framework::OperatorBase& op,
+             const paddle::framework::RuntimeContext& ctx,
+             const paddle::framework::OpKernelType& kernel_type,
+             const paddle::framework::OperatorWithKernel::OpKernelFunc& func,
+             paddle::platform::DeviceContext* dev_ctx);
+
+  static PreparedOp Prepare(
+      const NameTensorMap& ins, const NameTensorMap& outs,
+      const paddle::framework::OperatorWithKernel& op,
+      const paddle::platform::Place& place,
+      const paddle::framework::AttributeMap& attrs,
+      const paddle::framework::AttributeMap& default_attrs);
+
+  void Run(const NameTensorMap& in, const NameTensorMap& out,
+           const paddle::framework::AttributeMap& attrs,
+           const paddle::framework::AttributeMap& default_attrs);
+
+  const paddle::framework::OpKernelType& kernel_type() const {
+    return kernel_type_;
+  }
+
+ private:
+  const paddle::framework::OperatorBase& op_;
+  const paddle::framework::RuntimeContext& ctx_;
+  paddle::framework::OpKernelType kernel_type_;
+  paddle::framework::OperatorWithKernel::OpKernelFunc func_;
+  paddle::platform::DeviceContext* dev_ctx_;
+};
+
+}  // namespace egr
--- a/paddle/fluid/eager/legacy/tensor_helper.cc
+++ b/paddle/fluid/eager/legacy/tensor_helper.cc
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/eager/legacy/tensor_helper.h"
+
+#include "paddle/fluid/framework/feed_fetch_type.h"
+#include "paddle/fluid/framework/lod_rank_table.h"
+#include "paddle/fluid/framework/lod_tensor.h"
+#include "paddle/fluid/framework/lod_tensor_array.h"
+#include "paddle/fluid/framework/reader.h"
+#include "paddle/fluid/framework/scope.h"
+#include "paddle/fluid/framework/selected_rows.h"
+#include "paddle/fluid/framework/var_type_traits.h"
+#include "paddle/fluid/platform/place.h"
+
+namespace egr {
+
+void InitializeVariable(paddle::framework::Variable *var,
+                        paddle::framework::proto::VarType::Type var_type) {
+  if (var_type == paddle::framework::proto::VarType::LOD_TENSOR) {
+    var->GetMutable<paddle::framework::LoDTensor>();
+  } else if (var_type == paddle::framework::proto::VarType::SELECTED_ROWS) {
+    var->GetMutable<paddle::framework::SelectedRows>();
+  } else if (var_type == paddle::framework::proto::VarType::FEED_MINIBATCH) {
+    var->GetMutable<paddle::framework::FeedList>();
+  } else if (var_type == paddle::framework::proto::VarType::FETCH_LIST) {
+    var->GetMutable<paddle::framework::FetchList>();
+  } else if (var_type == paddle::framework::proto::VarType::STEP_SCOPES) {
+    var->GetMutable<std::vector<paddle::framework::Scope *>>();
+  } else if (var_type == paddle::framework::proto::VarType::LOD_RANK_TABLE) {
+    var->GetMutable<paddle::framework::LoDRankTable>();
+  } else if (var_type == paddle::framework::proto::VarType::LOD_TENSOR_ARRAY) {
+    var->GetMutable<paddle::framework::LoDTensorArray>();
+  } else if (var_type == paddle::framework::proto::VarType::STRINGS) {
+    var->GetMutable<paddle::framework::Strings>();
+  } else if (var_type == paddle::framework::proto::VarType::VOCAB) {
+    var->GetMutable<paddle::framework::Vocab>();
+  } else if (var_type == paddle::framework::proto::VarType::PLACE_LIST) {
+    var->GetMutable<paddle::platform::PlaceList>();
+  } else if (var_type == paddle::framework::proto::VarType::READER) {
+    var->GetMutable<paddle::framework::ReaderHolder>();
+  } else if (var_type == paddle::framework::proto::VarType::RAW) {
+    // GetMutable will be called in operator
+  } else {
+    PADDLE_THROW(paddle::platform::errors::Unavailable(
+        "paddle::framework::Variable type %d is not in "
+        "[LOD_TENSOR, SELECTED_ROWS, FEED_MINIBATCH, FETCH_LIST, "
+        "LOD_RANK_TABLE, PLACE_LIST, READER, RAW].",
+        var_type));
+  }
+}
+
+void CopyVariable(const paddle::framework::Variable &src_var,
+                  paddle::framework::Variable *dst_var) {
+  // only support cpu now
+  auto cpu_place = paddle::platform::CPUPlace();
+
+  if (src_var.IsType<paddle::framework::LoDTensor>()) {
+    auto *tmp_grad_tensor = dst_var->GetMutable<paddle::framework::LoDTensor>();
+    auto &src_tensor = src_var.Get<paddle::framework::LoDTensor>();
+    tmp_grad_tensor->set_lod(src_tensor.lod());
+    paddle::framework::TensorCopy(src_tensor, cpu_place, tmp_grad_tensor);
+  } else if (src_var.IsType<paddle::framework::SelectedRows>()) {
+    auto &src_slr = src_var.Get<paddle::framework::SelectedRows>();
+    auto *tmp_grad_slr = dst_var->GetMutable<paddle::framework::SelectedRows>();
+    tmp_grad_slr->set_rows(src_slr.rows());
+    tmp_grad_slr->set_height(src_slr.height());
+    auto &src_t = src_slr.value();
+    auto *dst_t = tmp_grad_slr->mutable_value();
+    paddle::framework::TensorCopy(src_t, cpu_place, dst_t);
+  } else {
+    PADDLE_THROW(paddle::platform::errors::Unavailable(
+        "Unknown variable type to copy."));
+  }
+}
+paddle::framework::proto::VarType::Type GetDtypeFromVar(
+    const paddle::framework::Variable &var) {
+  if (var.IsType<paddle::framework::LoDTensor>()) {
+    return var.Get<paddle::framework::LoDTensor>().type();
+  } else if (var.IsType<paddle::framework::SelectedRows>()) {
+    return var.Get<paddle::framework::SelectedRows>().value().type();
+  } else {
+    PADDLE_THROW(paddle::platform::errors::InvalidArgument(
+        "Variable type is %s, expect LoDTensor or SelectedRows.",
+        paddle::framework::ToTypeName(var.Type())));
+  }
+}
+const paddle::platform::Place &GetPlaceFromVar(
+    const paddle::framework::Variable &var) {
+  if (var.IsType<paddle::framework::LoDTensor>()) {
+    return var.Get<paddle::framework::LoDTensor>().place();
+  } else if (var.IsType<paddle::framework::SelectedRows>()) {
+    return var.Get<paddle::framework::SelectedRows>().place();
+  } else {
+    PADDLE_THROW(paddle::platform::errors::InvalidArgument(
+        "Variable type is %s, expect LoDTensor or SelectedRows.",
+        paddle::framework::ToTypeName(var.Type())));
+  }
+}
+}  // namespace egr
--- a/paddle/fluid/eager/legacy/tensor_helper.h
+++ b/paddle/fluid/eager/legacy/tensor_helper.h
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <vector>
+#include "paddle/fluid/eager/eager_tensor.h"
+#include "paddle/pten/api/all.h"
+#include "paddle/pten/include/core.h"
+namespace egr {
+void InitializeVariable(paddle::framework::Variable* var,
+                        paddle::framework::proto::VarType::Type var_type);
+paddle::framework::proto::VarType::Type GetDtypeFromVar(
+    const paddle::framework::Variable& var);
+const paddle::platform::Place& GetPlaceFromVar(
+    const paddle::framework::Variable& var);
+void CopyVariable(const paddle::framework::Variable& src_var,
+                  paddle::framework::Variable* dst_var);
+}
--- a/paddle/fluid/eager/legacy/type_def.h
+++ b/paddle/fluid/eager/legacy/type_def.h
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "paddle/fluid/platform/macros.h"
+namespace egr {
+
+class EagerTensor;
+namespace details {
+template <typename T>
+struct NameVarMapTrait {};
+
+template <>
+struct NameVarMapTrait<EagerTensor> {
+  using Type =
+      std::map<std::string, std::vector<std::shared_ptr<egr::EagerTensor>>>;
+};
+}  // namespace details
+template <typename T>
+using NameMap = typename details::NameVarMapTrait<T>::Type;
+
+using NameTensorMap = NameMap<EagerTensor>;
+}  // namespace egr