[PTen]Migrate proto::VarType outside of Pten (#39411)

* #1 migrate dist-related type()-> dtype() * move datatype function from pten -> fluid/framework * change type() in imperative into convert(dtype()) * modify xx_tensor->type into xx_tensor->dtype * change the set_type interface and the caller * modify xx_tensor.type into xx_tensor.dtype * fix mutable_data(place, dtype()) * change caller of mutable_data in pten and distributed * change the caller of mutable_data in fluid/framework * change the caller of mutable_data in imperative directory * mutable_data: inference * update the call of mutable_data * transfer MakePenScalarArray MakePtenScalar ResetHolderWithType * pass the compile. the next step is remove VarType in Pten * fix all and remove VarType from pten. success in linux. Next task is other platform * fix conflict with develop * fix compiled error * Fix reset conversion * fix conflict * fix compiled problem * fix typo * Fix << in tensor_utils.cc * fix type->dtype * fix unittest * fix tensor init constructor * fix DataTypeSize for BFloat16 * fix code style * fix npu compiled error * fix npu * compile npu sucessfully * fix conflict * fix conflict Co-authored-by: N xiongkun <xiongkun03@baidu.com>

[PTen]Migrate proto::VarType outside of Pten (#39411)
* #1 migrate dist-related type()-> dtype() * move datatype function from pten -> fluid/framework * change type() in imperative into convert(dtype()) * modify xx_tensor->type into xx_tensor->dtype * change the set_type interface and the caller * modify xx_tensor.type into xx_tensor.dtype * fix mutable_data(place, dtype()) * change caller of mutable_data in pten and distributed * change the caller of mutable_data in fluid/framework * change the caller of mutable_data in imperative directory * mutable_data: inference * update the call of mutable_data * transfer MakePenScalarArray MakePtenScalar ResetHolderWithType * pass the compile. the next step is remove VarType in Pten * fix all and remove VarType from pten. success in linux. Next task is other platform * fix conflict with develop * fix compiled error * Fix reset conversion * fix conflict * fix compiled problem * fix typo * Fix << in tensor_utils.cc * fix type->dtype * fix unittest * fix tensor init constructor * fix DataTypeSize for BFloat16 * fix code style * fix npu compiled error * fix npu * compile npu sucessfully * fix conflict * fix conflict Co-authored-by: N xiongkun <xiongkun03@baidu.com>
7e7e9404 · Aurelius84 · GitHub · 9c2cee1c · 7e7e9404 · 7e7e9404
352 changed file
--- a/paddle/fluid/distributed/fleet_executor/dist_model.cc
+++ b/paddle/fluid/distributed/fleet_executor/dist_model.cc
@@ -562,7 +562,7 @@ bool DistModel::FetchResults(std::vector<DistModelTensor> *output_data,
    framework::FetchType &fetch_var =
        framework::GetFetchVariable(*scope, "fetch", idx);
    auto &fetch = BOOST_GET(framework::LoDTensor, fetch_var);
-    auto type = fetch.type();
+    auto type = framework::TransToProtoVarType(fetch.dtype());
    auto output = &(output_data->at(i));
    output->name = idx_to_fetches_[idx];
    bool rst = false;

--- a/paddle/fluid/distributed/ps/service/brpc_utils.cc
+++ b/paddle/fluid/distributed/ps/service/brpc_utils.cc
@@ -13,8 +13,11 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 #include "paddle/fluid/distributed/ps/service/brpc_utils.h"
+
 #include <arpa/inet.h>
 #include <netdb.h>
+
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/platform/enforce.h"

 namespace paddle {
@@ -98,25 +101,29 @@ void SerializeLodTensor(framework::Variable* var,
      }
    }
  }
-  var_msg->set_data_type(static_cast<VarMsg::Type>(tensor->type()));
+  var_msg->set_data_type(static_cast<VarMsg::Type>(
+      framework::TransToProtoVarType(tensor->dtype())));
  for (auto& dim : framework::vectorize(tensor->dims())) {
    var_msg->add_dims(dim);
  }
  // IO Buffer
  if (platform::is_cpu_place(tensor->place())) {
-    auto data_len = tensor->numel() * framework::SizeOfType(tensor->type());
+    auto data_len = tensor->numel() * framework::DataTypeSize(tensor->dtype());
    iobuf->append(reinterpret_cast<const char*>(&data_len), 8);
    iobuf->append(reinterpret_cast<const char*>(tensor->data()), data_len);
  } else {
 #ifdef PADDLE_WITH_CUDA
-    char* temp_ptr = new char[tensor->numel() *
-                              framework::SizeOfType(tensor->type())];  // NOLINT
+    char* temp_ptr =
+        new char[tensor->numel() *
+                 framework::DataTypeSize(tensor->dtype())];  // NOLINT
    auto stream =
        reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
    memory::Copy(
        platform::CPUPlace(), temp_ptr, tensor->place(), tensor->data(),
-        tensor->numel() * framework::SizeOfType(tensor->type()), stream);
-    auto data_len = tensor->numel() * framework::SizeOfType(tensor->type());
+        tensor->numel() * framework::SizeOfType(
+                              framework::TransToProtoVarType(tensor->dtype())),
+        stream);
+    auto data_len = tensor->numel() * framework::DataTypeSize(tensor->dtype());
    iobuf->append(reinterpret_cast<const char*>(&data_len), 8);
    iobuf->append(reinterpret_cast<const char*>(temp_ptr), data_len);
    delete[] temp_ptr;
@@ -139,25 +146,29 @@ void SerializeSelectedRows(framework::Variable* var,
  var_data->resize(rows->size() * sizeof(int64_t));
  char* data_ptr = const_cast<char*>(var_data->data());
  memcpy(data_ptr, &((*rows)[0]), rows->size() * sizeof(int64_t));
-  var_msg->set_data_type(static_cast<VarMsg::Type>(tensor->type()));
+  var_msg->set_data_type(static_cast<VarMsg::Type>(
+      framework::TransToProtoVarType(tensor->dtype())));
  for (auto& dim : framework::vectorize(tensor->dims())) {
    var_msg->add_dims(dim);
  }
  // IO Buffer
  if (platform::is_cpu_place(tensor->place())) {
-    auto data_len = tensor->numel() * framework::SizeOfType(tensor->type());
+    auto data_len = tensor->numel() * framework::DataTypeSize(tensor->dtype());
    iobuf->append(reinterpret_cast<const char*>(&data_len), 8);
    iobuf->append(reinterpret_cast<const char*>(tensor->data()), data_len);
  } else {
 #ifdef PADDLE_WITH_CUDA
-    char* temp_ptr = new char[tensor->numel() *
-                              framework::SizeOfType(tensor->type())];  // NOLINT
+    char* temp_ptr =
+        new char[tensor->numel() *
+                 framework::DataTypeSize(tensor->dtype())];  // NOLINT
    auto stream =
        reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
    memory::Copy(
        platform::CPUPlace(), temp_ptr, tensor->place(), tensor->data(),
-        tensor->numel() * framework::SizeOfType(tensor->type()), stream);
-    auto data_len = tensor->numel() * framework::SizeOfType(tensor->type());
+        tensor->numel() * framework::SizeOfType(
+                              framework::TransToProtoVarType(tensor->dtype())),
+        stream);
+    auto data_len = tensor->numel() * framework::DataTypeSize(tensor->dtype());
    iobuf->append(reinterpret_cast<const char*>(&data_len), 8);
    iobuf->append(reinterpret_cast<const char*>(temp_ptr), data_len);
    delete[] temp_ptr;
@@ -225,8 +236,9 @@ void DeserializeLodTensor(framework::Variable* var, const VarMsg& msg,
  }
  tensor->set_lod(lod);

-  void* tensor_data =
-      tensor->mutable_data(place, VarMessageToVarType(msg.data_type()));
+  void* tensor_data = tensor->mutable_data(
+      place,
+      framework::TransToPtenDataType(VarMessageToVarType(msg.data_type())));

  // IO Buffer
  if (platform::is_cpu_place(place)) {
@@ -236,15 +248,16 @@ void DeserializeLodTensor(framework::Variable* var, const VarMsg& msg,
  } else if (platform::is_gpu_place(place)) {
 #ifdef PADDLE_WITH_CUDA
    unsigned long data_len;  // NOLINT
-    char* temp_ptr = new char[tensor->numel() *
-                              framework::SizeOfType(tensor->type())];  // NOLINT
-    io_buffer_itr.copy_and_forward((void*)(&data_len), 8);             // NOLINT
-    io_buffer_itr.copy_and_forward((void*)temp_ptr, data_len);         // NOLINT
+    char* temp_ptr =
+        new char[tensor->numel() *
+                 framework::DataTypeSize(tensor->dtype())];     // NOLINT
+    io_buffer_itr.copy_and_forward((void*)(&data_len), 8);      // NOLINT
+    io_buffer_itr.copy_and_forward((void*)temp_ptr, data_len);  // NOLINT
    auto stream =
        reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
    memory::Copy(
        place, tensor_data, platform::CPUPlace(), (void*)temp_ptr,  // NOLINT
-        tensor->numel() * framework::SizeOfType(tensor->type()), stream);
+        tensor->numel() * framework::DataTypeSize(tensor->dtype()), stream);
    delete[] temp_ptr;
 #endif
  }
@@ -266,8 +279,9 @@ void DeserializeSelectedRows(
    vec_dim.push_back(x);
  }
  tensor->Resize(framework::make_ddim(vec_dim));
-  void* tensor_data =
-      tensor->mutable_data(place, VarMessageToVarType(msg.data_type()));
+  void* tensor_data = tensor->mutable_data(
+      place,
+      framework::TransToPtenDataType(VarMessageToVarType(msg.data_type())));
  // IO Buffer
  if (platform::is_cpu_place(place)) {
    unsigned long data_len;                                 // NOLINT
@@ -275,15 +289,16 @@ void DeserializeSelectedRows(
    io_buffer_itr.copy_and_forward(tensor_data, data_len);
  } else if (platform::is_gpu_place(place)) {
 #ifdef PADDLE_WITH_CUDA
-    char* temp_ptr = new char[tensor->numel() *
-                              framework::SizeOfType(tensor->type())];  // NOLINT
-    unsigned long data_len;                                            // NOLINT
-    io_buffer_itr.copy_and_forward((void*)(&data_len), 8);             // NOLINT
+    char* temp_ptr =
+        new char[tensor->numel() *
+                 framework::DataTypeSize(tensor->dtype())];  // NOLINT
+    unsigned long data_len;                                  // NOLINT
+    io_buffer_itr.copy_and_forward((void*)(&data_len), 8);   // NOLINT
    io_buffer_itr.copy_and_forward(temp_ptr, data_len);
    auto stream =
        reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
    memory::Copy(place, tensor_data, platform::CPUPlace(), temp_ptr,
-                 tensor->numel() * framework::SizeOfType(tensor->type()),
+                 tensor->numel() * framework::DataTypeSize(tensor->dtype()),
                 stream);
    delete[] temp_ptr;
 #endif

--- a/paddle/fluid/distributed/ps/service/heter_client.cc
+++ b/paddle/fluid/distributed/ps/service/heter_client.cc
@@ -13,6 +13,8 @@
 // limitations under the License.

 #include "paddle/fluid/distributed/ps/service/heter_client.h"
+
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/platform/profiler.h"
 #include "paddle/fluid/string/split.h"

@@ -39,13 +41,13 @@ int GetMicroId(const platform::DeviceContext& ctx,
  } else {
 #ifdef PADDLE_WITH_CUDA
    std::vector<char> temp;
-    temp.resize(tensor->numel() * framework::SizeOfType(tensor->type()));
+    temp.resize(tensor->numel() * framework::DataTypeSize(tensor->dtype()));
    char* temp_ptr = temp.data();
    auto stream =
        reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
    memory::Copy(
        platform::CPUPlace(), temp_ptr, tensor->place(), tensor->data(),
-        tensor->numel() * framework::SizeOfType(tensor->type()), stream);
+        tensor->numel() * framework::DataTypeSize(tensor->dtype()), stream);
    float* temp_ptr_float = reinterpret_cast<float*>(temp_ptr);
    micro_id = static_cast<int>(temp_ptr_float[0]);
 #endif

--- a/paddle/fluid/eager/grad_tensor_holder.cc
+++ b/paddle/fluid/eager/grad_tensor_holder.cc
@@ -15,6 +15,7 @@
 #include "paddle/fluid/eager/grad_tensor_holder.h"
 #include "paddle/fluid/imperative/gradient_accumulator.h"

+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/var_type.h"
 #include "paddle/pten/kernels/funcs/math_function.h"


--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@@ -452,4 +452,10 @@ endif()

 cc_test(scope_guard_test SRCS scope_guard_test.cc)
 cc_test(pten_utils_test SRCS pten_utils_test.cc DEPS pten_utils)
+
+if(WITH_GPU OR WITH_ROCM)
+  cc_library(fluid_convert_utils SRCS convert_utils.cc DEPS data_type place gpu_info)
+else()
+  cc_library(fluid_convert_utils SRCS convert_utils.cc DEPS data_type place)
+endif()
 cc_test(custom_kernel_test SRCS custom_kernel_test.cc DEPS custom_kernel pten_tensor)
--- a/paddle/fluid/framework/convert_utils.cc
+++ b/paddle/fluid/framework/convert_utils.cc
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/framework/convert_utils.h"
+// See Note [ Why still include the fluid headers? ]
+#include "paddle/fluid/platform/device/gpu/gpu_info.h"
+
+namespace paddle {
+namespace framework {
+
+paddle::experimental::DataType TransToPtenDataType(
+    const paddle::framework::proto::VarType::Type& dtype) {
+  // Set the order of case branches according to the frequency with
+  // the data type is used
+  switch (dtype) {
+    case paddle::framework::proto::VarType::FP32:
+      return DataType::FLOAT32;
+    case paddle::framework::proto::VarType::FP64:
+      return DataType::FLOAT64;
+    case paddle::framework::proto::VarType::INT64:
+      return DataType::INT64;
+    case paddle::framework::proto::VarType::INT32:
+      return DataType::INT32;
+    case paddle::framework::proto::VarType::INT8:
+      return DataType::INT8;
+    case paddle::framework::proto::VarType::UINT8:
+      return DataType::UINT8;
+    case paddle::framework::proto::VarType::INT16:
+      return DataType::INT16;
+    case paddle::framework::proto::VarType::COMPLEX64:
+      return DataType::COMPLEX64;
+    case paddle::framework::proto::VarType::COMPLEX128:
+      return DataType::COMPLEX128;
+    case paddle::framework::proto::VarType::FP16:
+      return DataType::FLOAT16;
+    case paddle::framework::proto::VarType::BF16:
+      return DataType::BFLOAT16;
+    case paddle::framework::proto::VarType::BOOL:
+      return DataType::BOOL;
+    default:
+      return DataType::UNDEFINED;
+  }
+}
+
+paddle::framework::proto::VarType::Type TransToProtoVarType(
+    const paddle::experimental::DataType& dtype) {
+  // Set the order of case branches according to the frequency with
+  // the data type is used
+  switch (dtype) {
+    case DataType::FLOAT32:
+      return paddle::framework::proto::VarType::FP32;
+    case DataType::FLOAT64:
+      return paddle::framework::proto::VarType::FP64;
+    case DataType::INT64:
+      return paddle::framework::proto::VarType::INT64;
+    case DataType::INT32:
+      return paddle::framework::proto::VarType::INT32;
+    case DataType::INT8:
+      return paddle::framework::proto::VarType::INT8;
+    case DataType::UINT8:
+      return paddle::framework::proto::VarType::UINT8;
+    case DataType::INT16:
+      return paddle::framework::proto::VarType::INT16;
+    case DataType::COMPLEX64:
+      return paddle::framework::proto::VarType::COMPLEX64;
+    case DataType::COMPLEX128:
+      return paddle::framework::proto::VarType::COMPLEX128;
+    case DataType::FLOAT16:
+      return paddle::framework::proto::VarType::FP16;
+    case DataType::BFLOAT16:
+      return paddle::framework::proto::VarType::BF16;
+    case DataType::BOOL:
+      return paddle::framework::proto::VarType::BOOL;
+    default:
+      PADDLE_THROW(paddle::platform::errors::Unimplemented(
+          "Unsupported data type `%s` when casting it into "
+          "paddle data type.",
+          dtype));
+  }
+}
+
+size_t DataTypeSize(DataType dtype) {
+  switch (dtype) {
+    case DataType::UNDEFINED:
+      return 0;
+    case DataType::BOOL:
+      return sizeof(bool);
+    case DataType::INT8:
+      return sizeof(int8_t);
+    case DataType::UINT8:
+      return sizeof(uint8_t);
+    case DataType::INT16:
+      return sizeof(int16_t);
+    case DataType::INT32:
+      return sizeof(int);
+    case DataType::INT64:
+      return sizeof(int64_t);
+    case DataType::BFLOAT16:
+      return sizeof(paddle::platform::bfloat16);
+    case DataType::FLOAT16:
+      return sizeof(paddle::platform::float16);
+    case DataType::FLOAT32:
+      return sizeof(float);
+    case DataType::FLOAT64:
+      return sizeof(double);
+    case DataType::COMPLEX64:
+      return sizeof(paddle::platform::complex<float>);
+    case DataType::COMPLEX128:
+      return sizeof(paddle::platform::complex<double>);
+    default:
+      return 0;
+  }
+}
+
+DataType String2DataType(const std::string& str) {
+  if (str == "bool") {
+    return DataType::BOOL;
+  } else if (str == "float16") {
+    return DataType::FLOAT16;
+  } else if (str == "float32") {
+    return DataType::FLOAT32;
+  } else if (str == "float64") {
+    return DataType::FLOAT64;
+  } else if (str == "int8") {
+    return DataType::INT8;
+  } else if (str == "int16") {
+    return DataType::INT16;
+  } else if (str == "int32") {
+    return DataType::INT32;
+  } else if (str == "int64") {
+    return DataType::INT64;
+  } else if (str == "uint8") {
+    return DataType::UINT8;
+  } else if (str == "complex64") {
+    return DataType::COMPLEX64;
+  } else if (str == "complex128") {
+    return DataType::COMPLEX128;
+  } else {
+    return DataType::UNDEFINED;
+  }
+}
+
+std::string DataType2String(DataType dtype) {
+  switch (dtype) {
+    case DataType::BOOL:
+      return "bool";
+    case DataType::INT8:
+      return "int8";
+    case DataType::UINT8:
+      return "uint8";
+    case DataType::INT16:
+      return "int16";
+    case DataType::INT32:
+      return "int32";
+    case DataType::INT64:
+      return "int64";
+    case DataType::FLOAT16:
+      return "float16";
+    case DataType::FLOAT32:
+      return "float32";
+    case DataType::FLOAT64:
+      return "float64";
+    case DataType::COMPLEX64:
+      return "complex64";
+    case DataType::COMPLEX128:
+      return "complex128";
+    default:
+      PADDLE_THROW(paddle::platform::errors::InvalidArgument(
+          "Unknow pten::DataType, the int value = %d.",
+          static_cast<int>(dtype)));
+      return "";
+  }
+}
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/fluid/framework/convert_utils.h
+++ b/paddle/fluid/framework/convert_utils.h
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/pten/common/backend.h"
+#include "paddle/pten/common/data_type.h"
+#include "paddle/pten/common/layout.h"
+#include "paddle/pten/core/tensor_meta.h"
+
+// See Note [ Why still include the fluid headers? ]
+#include "paddle/fluid/framework/data_type.h"
+#include "paddle/fluid/framework/lod_tensor.h"
+#include "paddle/fluid/platform/place.h"
+
+// TODO(chenweihang): this file may need to be removed
+
+namespace paddle {
+namespace framework {
+
+using DataType = paddle::experimental::DataType;
+using DataLayout = paddle::experimental::DataLayout;
+
+DataType TransToPtenDataType(
+    const paddle::framework::proto::VarType::Type& dtype);
+
+paddle::framework::proto::VarType::Type TransToProtoVarType(
+    const DataType& dtype);
+
+size_t DataTypeSize(DataType dtype);
+DataType String2DataType(const std::string& str);
+std::string DataType2String(DataType dtype);
+
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/fluid/framework/custom_operator.cc
+++ b/paddle/fluid/framework/custom_operator.cc
@@ -26,6 +26,7 @@ limitations under the License. */
 #include <vector>

 #include "paddle/fluid/framework/attribute.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/op_meta_info_helper.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/operator.h"
@@ -777,12 +778,14 @@ void RegisterOperatorWithMetaInfo(const std::vector<OpMetaInfo>& op_meta_infos,
          std::vector<DataType> vec_custom_dtype;
          for (size_t i = 0; i < ctx->InputSize(in_name); ++i) {
            auto dtype = ctx->GetInputDataType(in_name, i);
-            vec_custom_dtype.emplace_back(pten::TransToPtenDataType(dtype));
+            vec_custom_dtype.emplace_back(
+                paddle::framework::TransToPtenDataType(dtype));
          }
          vec_input_dtypes.emplace_back(vec_custom_dtype);
        } else {
          auto dtype = ctx->GetInputDataType(in_name);
-          input_dtypes.emplace_back(pten::TransToPtenDataType(dtype));
+          input_dtypes.emplace_back(
+              paddle::framework::TransToPtenDataType(dtype));
        }
      }

@@ -794,12 +797,14 @@ void RegisterOperatorWithMetaInfo(const std::vector<OpMetaInfo>& op_meta_infos,
        auto out_name = op_outputs[i];
        if (detail::IsDuplicableVar(out_name)) {
          for (size_t j = 0; j < output_dtypes.size(); ++j) {
-            auto dtype = pten::TransToProtoVarType(output_dtypes[i]);
+            auto dtype =
+                paddle::framework::TransToProtoVarType(output_dtypes[i]);
            ctx->SetOutputDataType(out_name, dtype, j);
          }
        } else {
-          ctx->SetOutputDataType(out_name,
-                                 pten::TransToProtoVarType(output_dtypes[i]));
+          ctx->SetOutputDataType(
+              out_name,
+              paddle::framework::TransToProtoVarType(output_dtypes[i]));
        }
      }
    };

--- a/paddle/fluid/framework/data_layout_transform.cc
+++ b/paddle/fluid/framework/data_layout_transform.cc
@@ -18,6 +18,7 @@
 #ifdef PADDLE_WITH_MKLDNN
 #include "paddle/fluid/platform/mkldnn_reuse.h"
 #endif
+#include "paddle/fluid/framework/convert_utils.h"

 namespace paddle {
 namespace framework {
@@ -79,10 +80,10 @@ void TransDataLayout(const OpKernelType& kernel_type_for_var,
  }

  out->Resize(make_ddim(dst_dim));
-  out->mutable_data(expected_kernel_type.place_, in.type());
+  out->mutable_data(expected_kernel_type.place_, in.dtype());

  framework::VisitDataType(
-      in.type(),
+      framework::TransToProtoVarType(in.dtype()),
      CastDataLayout(pool.Get(expected_kernel_type.place_), axis, in, out));

  out->set_layout(expected_kernel_type.data_layout_);
@@ -153,11 +154,13 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout,
  auto in_tz = paddle::framework::vectorize<int64_t>(in.dims());
  auto out_tz = in_tz;

-  memory::data_type in_type = ToMKLDNNDataType(in.type());
-  PADDLE_ENFORCE_NE(in_type, memory::data_type::undef,
-                    platform::errors::InvalidArgument(
-                        "Input tensor type (%s) is not supported.",
-                        DataTypeToString(in.type())));
+  memory::data_type in_type =
+      ToMKLDNNDataType(framework::TransToProtoVarType(in.dtype()));
+  PADDLE_ENFORCE_NE(
+      in_type, memory::data_type::undef,
+      platform::errors::InvalidArgument(
+          "Input tensor type (%s) is not supported.",
+          DataTypeToString(framework::TransToProtoVarType(in.dtype()))));

  auto in_format = platform::MKLDNNFormatForSize(in_tz.size(), in.format());
  auto out_format =
@@ -169,8 +172,8 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout,
  if ((in_format != out_format) || always_copy) {
    void* in_data = GetDataFromTensor(in, in_type);

-    platform::ReorderMKLDNNHandler handler(in_tz, in.type(), in_type,
-                                           cpu_engine);
+    platform::ReorderMKLDNNHandler handler(
+        in_tz, framework::TransToProtoVarType(in.dtype()), in_type, cpu_engine);

    auto reorder_src_memory_p = handler.AcquireSrcMemory(in_format, in_data);
    auto reorder_dst_memory_p =

--- a/paddle/fluid/framework/data_type_test.cc
+++ b/paddle/fluid/framework/data_type_test.cc
@@ -16,6 +16,7 @@
 #include <string>

 #include "gtest/gtest.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/tensor.h"

 TEST(DataType, float16) {
@@ -27,10 +28,11 @@ TEST(DataType, float16) {

  Tensor tensor;
  CPUPlace cpu;
-  tensor.mutable_data(cpu, dtype);
+  tensor.mutable_data(cpu, f::TransToPtenDataType(dtype));

  // test fp16 tensor
-  EXPECT_EQ(tensor.type(), f::ToDataType(typeid(float16)));
+  EXPECT_EQ(f::TransToProtoVarType(tensor.dtype()),
+            f::ToDataType(typeid(float16)));

  // test fp16 size
  EXPECT_EQ(f::SizeOfType(dtype), 2u);
@@ -49,10 +51,11 @@ TEST(DataType, bfloat16) {

  Tensor tensor;
  CPUPlace cpu;
-  tensor.mutable_data(cpu, dtype);
+  tensor.mutable_data(cpu, f::TransToPtenDataType(dtype));

  // test bf16 tensor
-  EXPECT_EQ(tensor.type(), f::ToDataType(typeid(bfloat16)));
+  EXPECT_EQ(f::TransToProtoVarType(tensor.dtype()),
+            f::ToDataType(typeid(bfloat16)));

  // test bf16 size
  EXPECT_EQ(f::SizeOfType(dtype), 2u);

--- a/paddle/fluid/framework/data_type_transform.cc
+++ b/paddle/fluid/framework/data_type_transform.cc
@@ -14,6 +14,7 @@ limitations under the License. */

 #include "paddle/fluid/framework/data_type_transform.h"

+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/selected_rows_utils.h"
 #include "paddle/fluid/platform/transform.h"

@@ -65,12 +66,14 @@ struct CastDataType {
 void TransDataType(const OpKernelType& kernel_type_for_var,
                   const OpKernelType& expected_kernel_type, const Tensor& in,
                   Tensor* out) {
-  PADDLE_ENFORCE_EQ(in.type(), kernel_type_for_var.data_type_,
-                    platform::errors::InvalidArgument(
-                        "The src dtype(%s) of input tensor and kernel_type(%s) "
-                        "are not conststent.",
-                        DataTypeToString(in.type()),
-                        DataTypeToString(kernel_type_for_var.data_type_)));
+  PADDLE_ENFORCE_EQ(
+      framework::TransToProtoVarType(in.dtype()),
+      kernel_type_for_var.data_type_,
+      platform::errors::InvalidArgument(
+          "The src dtype(%s) of input tensor and kernel_type(%s) "
+          "are not conststent.",
+          DataTypeToString(framework::TransToProtoVarType(in.dtype())),
+          DataTypeToString(kernel_type_for_var.data_type_)));
  auto dst_type = expected_kernel_type.data_type_;
  TransDataType(in, dst_type, out);
 }
@@ -81,7 +84,7 @@ void TransDataType(const Tensor& in,
  platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();

  out->Resize(in.dims());
-  auto src_type = in.type();
+  auto src_type = framework::TransToProtoVarType(in.dtype());
  auto dst_type = type;
  auto ctx = pool.Get(in.place());


--- a/paddle/fluid/framework/details/all_reduce_op_handle.cc
+++ b/paddle/fluid/framework/details/all_reduce_op_handle.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 #include "paddle/fluid/framework/details/all_reduce_op_handle.h"

+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/details/container_cast.h"
 #include "paddle/fluid/framework/details/reduce_and_gather.h"
 #include "paddle/fluid/platform/place.h"
@@ -127,7 +128,7 @@ void AllReduceOpHandle::AllReduceImpl(
          platform::errors::PreconditionNotMet(
              "The numel of tensor %s should be > 0, but got numel is %d.",
              in_var_handles[i]->name(), numel));
-      dtype = lod_tensor.type();
+      dtype = framework::TransToProtoVarType(lod_tensor.dtype());
      is_gpu_place = platform::is_gpu_place(lod_tensor.place());
 #if defined(PADDLE_WITH_XPU_BKCL)
      is_xpu_place = platform::is_xpu_place(lod_tensor.place());
@@ -139,7 +140,7 @@ void AllReduceOpHandle::AllReduceImpl(
            "The size of tensors of the same variable in different local "
            "scopes should be equal."));
    PADDLE_ENFORCE_EQ(
-        dtype, lod_tensor.type(),
+        dtype, framework::TransToProtoVarType(lod_tensor.dtype()),
        platform::errors::PreconditionNotMet(
            "The dtype of tensors of the same variable in different local "
            "scopes should be equal."));
@@ -227,14 +228,15 @@ void AllReduceOpHandle::AllReduceFunc(

    // Reduce All Tensor to trg in CPU
    ReduceBufferData func(lod_tensor_data, trg.data(), numel);
-    VisitDataType(trg.type(), func);
+    VisitDataType(framework::TransToProtoVarType(trg.dtype()), func);

    for (size_t i = 1; i < local_exec_scopes_.size(); ++i) {
      auto &scope = local_exec_scopes_[i];
      auto &p = places[i];
      auto *var = scope->FindVar(out_var_names[i]);

-      size_t size = numel * SizeOfType(trg.type());
+      size_t size =
+          numel * SizeOfType(framework::TransToProtoVarType(trg.dtype()));
      RunAndRecordEvent(p, [&trg, var, p, size] {
        auto dst_ptr = var->GetMutable<framework::LoDTensor>()->data();
        platform::CPUPlace cpu_place;

--- a/paddle/fluid/framework/details/broadcast_op_handle.cc
+++ b/paddle/fluid/framework/details/broadcast_op_handle.cc
@@ -14,6 +14,7 @@

 #include "paddle/fluid/framework/details/broadcast_op_handle.h"

+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/details/container_cast.h"
 #include "paddle/fluid/framework/details/variable_visitor.h"
 #include "paddle/fluid/platform/place.h"
@@ -87,7 +88,8 @@ void BroadcastOpHandle::BroadcastOneVar(
    int root_id = in_tensor.place().device;
    std::vector<std::function<void()>> broadcast_calls;

-    int type = platform::ToNCCLDataType(in_tensor.type());
+    int type = platform::ToNCCLDataType(
+        framework::TransToProtoVarType(in_tensor.dtype()));
    size_t numel = static_cast<size_t>(in_tensor.numel());

    for (auto out_var_handle : out_var_handles) {
@@ -147,7 +149,8 @@ void BroadcastOpHandle::BroadcastOneVar(
    int root_id = in_tensor.place().device;
    std::vector<std::function<void()>> broadcast_calls;

-    int type = platform::ToBKCLDataType(in_tensor.type());
+    int type = platform::ToBKCLDataType(
+        framework::TransToProtoVarType(in_tensor.dtype()));
    size_t numel = static_cast<size_t>(in_tensor.numel());

    for (auto out_var_handle : out_var_handles) {
@@ -239,7 +242,7 @@ void BroadcastOpHandle::InitOutputValue(
    }
    VariableVisitor::ShareDimsAndLoD(*in_var, out_var);
    VariableVisitor::GetMutableTensor(out_var).mutable_data(t_out_p,
-                                                            in_tensor.type());
+                                                            in_tensor.dtype());
  }
 }


--- a/paddle/fluid/framework/details/fetch_async_op_handle.cc
+++ b/paddle/fluid/framework/details/fetch_async_op_handle.cc
@@ -16,6 +16,7 @@

 #include <string>

+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/platform/profiler.h"

 namespace paddle {
@@ -49,7 +50,7 @@ static void CheckTensorAttrs(const LoDTensor *tensor,
  if (tensor->numel() && tensor->IsInitialized()) {
    // step1: check type
    PADDLE_ENFORCE_EQ(
-        type, tensor->type(),
+        type, framework::TransToProtoVarType(tensor->dtype()),
        platform::errors::InvalidArgument(
            "The data type of fetched Tensors or the items of fetched "
            "LoDTensorArray are different from each other on different "
@@ -57,7 +58,7 @@ static void CheckTensorAttrs(const LoDTensor *tensor,
            "(th) fetched variable. Please set the "
            "parameter `return_merged = False` when you "
            "call the `Executor.run()` method.",
-            DataTypeToString(type), DataTypeToString(tensor->type()), offset));
+            DataTypeToString(type), tensor->dtype(), offset));

    // step2: check layout
    PADDLE_ENFORCE_EQ(
@@ -139,7 +140,7 @@ void FetchAsyncOpHandle::FetchMergedLodTensor(
  for (auto *t : src_lodtensors) {
    if (t->numel() && t->IsInitialized()) {
      check_dim = t->dims();
-      new_type = t->type();
+      new_type = paddle::framework::TransToProtoVarType(t->dtype());
      new_layout = t->layout();
      break;
    }
@@ -169,10 +170,10 @@ void FetchAsyncOpHandle::FetchMergedLodTensor(
  dst_lodtensor->set_lod(src_lodtensors[0]->lod());
  if (platform::is_gpu_place(src_lodtensors[0]->place())) {
    dst_lodtensor->mutable_data(platform::CUDAPinnedPlace(),
-                                src_lodtensors[0]->type());
+                                src_lodtensors[0]->dtype());
  } else {
    dst_lodtensor->mutable_data(platform::CPUPlace(),
-                                src_lodtensors[0]->type());
+                                src_lodtensors[0]->dtype());
  }

  // slice and memcpy

--- a/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc
+++ b/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 #include "paddle/fluid/framework/details/fused_all_reduce_op_handle.h"

+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/details/container_cast.h"
 #include "paddle/fluid/framework/details/variable_visitor.h"
 #include "paddle/fluid/platform/device_memory_aligment.h"
@@ -337,7 +338,8 @@ void FusedAllReduceOpHandle::GetDTypeAndNumel(
  size_t size_of_dtype = 0;
  for (size_t i = 0; i < grad_tensor.size(); ++i) {
    // Get dtype
-    auto ele_dtype = grad_tensor.at(i).second->type();
+    auto ele_dtype =
+        framework::TransToProtoVarType(grad_tensor.at(i).second->dtype());
    if (i == 0) {
      *dtype = ele_dtype;
      size_of_dtype = framework::SizeOfType(ele_dtype);

--- a/paddle/fluid/framework/details/gather_op_handle.cc
+++ b/paddle/fluid/framework/details/gather_op_handle.cc
@@ -115,7 +115,7 @@ void GatherOpHandle::RunImpl() {
  DDim out_dim = pre_in_value.GetCompleteDims();
  out_dim[0] = static_cast<int64_t>(rows);
  out_value->mutable_value()->Resize(out_dim).mutable_data(
-      t_out_p, pre_in_value.value().type());
+      t_out_p, pre_in_value.value().dtype());
  Tensor *out_tensor = out_value->mutable_value();

  // copy

--- a/paddle/fluid/framework/details/nan_inf_utils_detail.cc
+++ b/paddle/fluid/framework/details/nan_inf_utils_detail.cc
@@ -19,6 +19,7 @@
 #ifdef PADDLE_WITH_ASCEND_CL
 #include "paddle/fluid/platform/device/npu/npu_op_runner.h"
 #endif
+#include "paddle/fluid/framework/convert_utils.h"

 namespace paddle {
 namespace framework {
@@ -307,7 +308,7 @@ void tensor_check<platform::CPUDeviceContext>(const std::string& op_type,
                                              const platform::Place& place) {
  TensorCheckerVisitor<platform::CPUDeviceContext> vistor(op_type, var_name,
                                                          tensor, place);
-  VisitDataType(tensor.type(), vistor);
+  VisitDataType(framework::TransToProtoVarType(tensor.dtype()), vistor);
 }

 void CheckVarHasNanOrInf(const std::string& op_type,
@@ -348,7 +349,8 @@ void CheckVarHasNanOrInf(const std::string& op_type,
    return;
  } else if (platform::is_xpu_place(tensor->place())) {
 #ifdef PADDLE_WITH_XPU
-    if (tensor->type() != proto::VarType::FP32) {
+    if (framework::TransToProtoVarType(tensor->dtype()) !=
+        proto::VarType::FP32) {
      return;
    }

@@ -377,14 +379,15 @@ void CheckVarHasNanOrInf(const std::string& op_type,
    return;
  } else if (platform::is_npu_place(tensor->place())) {
 #ifdef PADDLE_WITH_ASCEND_CL
-    if (tensor->type() != proto::VarType::FP32) {
+    if (framework::TransToProtoVarType(tensor->dtype()) !=
+        proto::VarType::FP32) {
      return;
    }

    framework::LoDTensor cpu_tensor;
    cpu_tensor.Resize(tensor->dims());
    float* cpu_data = static_cast<float*>(
-        cpu_tensor.mutable_data(platform::CPUPlace(), tensor->type()));
+        cpu_tensor.mutable_data(platform::CPUPlace(), tensor->dtype()));

    framework::TensorCopySync(*tensor, platform::CPUPlace(), &cpu_tensor);
    bool flag = false;
@@ -475,8 +478,10 @@ void PrintNpuVarInfo(const std::string& op_type, const std::string& var_name,
    return;
  }

-  if ((tensor->type() != proto::VarType::FP32) &&
-      (tensor->type() != proto::VarType::FP16)) {
+  if ((framework::TransToProtoVarType(tensor->dtype()) !=
+       proto::VarType::FP32) &&
+      (framework::TransToProtoVarType(tensor->dtype()) !=
+       proto::VarType::FP16)) {
    return;
  }

@@ -490,16 +495,17 @@ void PrintNpuVarInfo(const std::string& op_type, const std::string& var_name,

  framework::Tensor cpu_tensor;
  cpu_tensor.Resize(tensor->dims());
-  cpu_tensor.mutable_data(platform::CPUPlace(), tensor->type());
+  cpu_tensor.mutable_data(platform::CPUPlace(), tensor->dtype());
  framework::TensorCopySync(*tensor, platform::CPUPlace(), &cpu_tensor);

  LOG(WARNING) << "print [" << var_name << "] tensor info:";
  // use env strategy control in future, -1=print_all.
  int print_num = 3;
-  if (tensor->type() == proto::VarType::FP32) {
+  if (framework::TransToProtoVarType(tensor->dtype()) == proto::VarType::FP32) {
    const float* value = cpu_tensor.data<float>();
    PrintNanInf(value, tensor->numel(), print_num, op_type, var_name, false);
-  } else if (tensor->type() == proto::VarType::FP16) {
+  } else if (framework::TransToProtoVarType(tensor->dtype()) ==
+             proto::VarType::FP16) {
    const paddle::platform::float16* value =
        cpu_tensor.data<paddle::platform::float16>();
    PrintNanInf(value, tensor->numel(), print_num, op_type, var_name, false);

--- a/paddle/fluid/framework/details/nan_inf_utils_detail.cu
+++ b/paddle/fluid/framework/details/nan_inf_utils_detail.cu
@@ -19,6 +19,7 @@
 #include <unordered_map>
 #include <utility>
 #include <vector>
+#include "paddle/fluid/framework/convert_utils.h"

 namespace paddle {
 namespace framework {
@@ -208,7 +209,7 @@ void tensor_check<platform::CUDADeviceContext>(const std::string& op_type,

  TensorCheckerVisitor<platform::CUDADeviceContext> vistor(op_type, var_name,
                                                           tensor, place);
-  VisitDataType(tensor.type(), vistor);
+  VisitDataType(framework::TransToProtoVarType(tensor.dtype()), vistor);
 }

 }  // namespace details

--- a/paddle/fluid/framework/details/reduce_and_gather.h
+++ b/paddle/fluid/framework/details/reduce_and_gather.h
@@ -130,7 +130,8 @@ struct GatherLocalSelectedRowsFunctor {
    DDim out_dim = pre_in->GetCompleteDims();
    out_dim[0] = static_cast<int64_t>(rows);
    dst_tensor.mutable_value()->Resize(out_dim);
-    dst_tensor.mutable_value()->mutable_data(out_place, pre_in->value().type());
+    dst_tensor.mutable_value()->mutable_data(out_place,
+                                             pre_in->value().dtype());
  }

  void operator()() {

--- a/paddle/fluid/framework/details/reduce_op_handle.cc
+++ b/paddle/fluid/framework/details/reduce_op_handle.cc
@@ -14,6 +14,7 @@

 #include "paddle/fluid/framework/details/reduce_op_handle.h"

+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/details/container_cast.h"
 #include "paddle/fluid/framework/details/reduce_and_gather.h"
 #include "paddle/fluid/framework/details/variable_visitor.h"
@@ -150,7 +151,8 @@ void ReduceOpHandle::RunImpl() {
        if (!FLAGS_cpu_deterministic) {
          ReduceLoDTensor func(lod_tensors,
                               out_var->GetMutable<framework::LoDTensor>());
-          VisitDataType(lod_tensors[0]->type(), func);
+          VisitDataType(framework::TransToProtoVarType(lod_tensors[0]->dtype()),
+                        func);
        } else {
          // We sum lod_tensors to reduce_sum_trg which is in local_scopes_0
          // here, but it doesn't mean reduce_sum_trg must be in local_scopes_0.
@@ -158,7 +160,8 @@ void ReduceOpHandle::RunImpl() {
                                      ->FindVar(out_var_handle->name())
                                      ->GetMutable<framework::LoDTensor>();
          ReduceLoDTensor func(lod_tensors, &reduce_sum_trg);
-          VisitDataType(lod_tensors[0]->type(), func);
+          VisitDataType(framework::TransToProtoVarType(lod_tensors[0]->dtype()),
+                        func);

          auto trg = out_var->GetMutable<framework::LoDTensor>();
          if (reduce_sum_trg.data() != trg->data()) {
@@ -171,7 +174,7 @@ void ReduceOpHandle::RunImpl() {
      auto pre_in = pre_in_var->Get<framework::LoDTensor>();
      VariableVisitor::ShareDimsAndLoD(*pre_in_var, out_var);
      VariableVisitor::GetMutableTensor(out_var).mutable_data(
-          out_var_handle->place(), pre_in.type());
+          out_var_handle->place(), pre_in.dtype());

      auto out_p = out_var_handle->place();
      int root_id = out_p.device;
@@ -191,7 +194,8 @@ void ReduceOpHandle::RunImpl() {
                  out_var_handle->place());
        }

-        int type = platform::ToNCCLDataType(lod_tensor.type());
+        int type = platform::ToNCCLDataType(
+            framework::TransToProtoVarType(lod_tensor.dtype()));
        size_t numel = static_cast<size_t>(lod_tensor.numel());
        all_reduce_calls.emplace_back(
            [buffer, recvbuffer, type, numel, root_id, &nccl_ctx] {
@@ -217,7 +221,7 @@ void ReduceOpHandle::RunImpl() {
      auto pre_in = pre_in_var->Get<framework::LoDTensor>();
      VariableVisitor::ShareDimsAndLoD(*pre_in_var, out_var);
      VariableVisitor::GetMutableTensor(out_var).mutable_data(
-          out_var_handle->place(), pre_in.type());
+          out_var_handle->place(), pre_in.dtype());

      auto out_p = out_var_handle->place();
      int root_id = out_p.device;
@@ -237,7 +241,8 @@ void ReduceOpHandle::RunImpl() {
                  out_var_handle->place());
        }

-        int type = platform::ToBKCLDataType(lod_tensor.type());
+        int type = platform::ToBKCLDataType(
+            framework::TransToProtoVarType(lod_tensor.dtype()));
        size_t numel = static_cast<size_t>(lod_tensor.numel());
        all_reduce_calls.emplace_back([buffer, recvbuffer, type, numel, root_id,
                                       &bkcl_ctx] {

--- a/paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc
+++ b/paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc
@@ -17,6 +17,7 @@
 #include <utility>

 #include "dgc/dgc.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/details/container_cast.h"
 #include "paddle/fluid/framework/details/reduce_and_gather.h"
 #include "paddle/fluid/framework/details/variable_visitor.h"
@@ -151,7 +152,9 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
    auto &out = *outs[i];
    float *out_tensor_buf = out.data<float>();

-    dtype = (dtype == -1) ? platform::ToNCCLDataType(in.type()) : dtype;
+    dtype = (dtype == -1) ? platform::ToNCCLDataType(
+                                framework::TransToProtoVarType(in.dtype()))
+                          : dtype;
    in_numel = (in_numel == 0) ? static_cast<size_t>(in.numel()) : in_numel;
    PADDLE_ENFORCE_EQ(in_numel % 2, 0,
                      platform::errors::InvalidArgument(

--- a/paddle/fluid/framework/details/variable_visitor.cc
+++ b/paddle/fluid/framework/details/variable_visitor.cc
@@ -14,6 +14,7 @@

 #include "paddle/fluid/framework/details/variable_visitor.h"

+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/selected_rows_utils.h"

 namespace pten {
@@ -115,7 +116,7 @@ struct EnforceShapeAndDTypeEQVisitor {
            "The place type of the two variables is not equal. The src place "
            "is %s, but the dst place is %s",
            src.place().DebugString(), tensor.place().DebugString()));
-    PADDLE_ENFORCE_EQ(src.type(), tensor.type(),
+    PADDLE_ENFORCE_EQ(src.dtype(), tensor.dtype(),
                      platform::errors::PreconditionNotMet(
                          "The dtype of the two variables is not equal."));
    PADDLE_ENFORCE_EQ(

--- a/paddle/fluid/framework/device_worker.cc
+++ b/paddle/fluid/framework/device_worker.cc
@@ -14,6 +14,8 @@ limitations under the License. */

 #include "paddle/fluid/framework/device_worker.h"

+#include "paddle/fluid/framework/convert_utils.h"
+
 namespace pten {
 class DenseTensor;
 }  // namespace pten
@@ -58,11 +60,13 @@ std::string PrintLodTensorIntType(Tensor* tensor, int64_t start, int64_t end) {

 std::string PrintLodTensor(Tensor* tensor, int64_t start, int64_t end) {
  std::string out_val;
-  if (tensor->type() == proto::VarType::FP32) {
+  if (framework::TransToProtoVarType(tensor->dtype()) == proto::VarType::FP32) {
    out_val = PrintLodTensorType<float>(tensor, start, end);
-  } else if (tensor->type() == proto::VarType::INT64) {
+  } else if (framework::TransToProtoVarType(tensor->dtype()) ==
+             proto::VarType::INT64) {
    out_val = PrintLodTensorIntType(tensor, start, end);
-  } else if (tensor->type() == proto::VarType::FP64) {
+  } else if (framework::TransToProtoVarType(tensor->dtype()) ==
+             proto::VarType::FP64) {
    out_val = PrintLodTensorType<double>(tensor, start, end);
  } else {
    out_val = "unsupported type";

--- a/paddle/fluid/framework/dist_multi_trainer.cc
+++ b/paddle/fluid/framework/dist_multi_trainer.cc
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/device_worker_factory.h"
 #include "paddle/fluid/framework/trainer.h"

@@ -153,12 +154,13 @@ void DistMultiTrainer::Finalize() {
      }
 #define MergeCallback(cpp_type, proto_type)                                    \
  do {                                                                         \
-    if (root_tensor->type() == proto_type) {                                   \
-      if (thread_tensor->type() != proto_type) {                               \
+    if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) {  \
+      if (framework::TransToProtoVarType(thread_tensor->dtype()) !=            \
+          proto_type) {                                                        \
        VLOG(0) << "Error: thread id=" << j << ", need_merge_var_names_[" << i \
                << "] " << need_merge_var_names_[i]                            \
-                << ", root tensor type=" << root_tensor->type()                \
-                << ", thread tensor type=" << thread_tensor->type();           \
+                << ", root tensor type=" << root_tensor->dtype()               \
+                << ", thread tensor type=" << thread_tensor->dtype();          \
        exit(-1);                                                              \
      }                                                                        \
      MergeToRootScope<cpp_type>(root_tensor, thread_tensor);                  \

--- a/paddle/fluid/framework/dlpack_tensor.cc
+++ b/paddle/fluid/framework/dlpack_tensor.cc
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "paddle/fluid/framework/dlpack_tensor.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/data_type.h"

 namespace paddle {
@@ -134,7 +135,8 @@ DLPackTensor::DLPackTensor(const Tensor &tensor, LaneType lanes) {
  t_.device = paddle::platform::VisitPlace(place, internal::DLDeviceVisitor());

  // init dtype
-  t_.dtype = internal::GetDLDataTypeFromTypeIndex(tensor.type());
+  t_.dtype = internal::GetDLDataTypeFromTypeIndex(
+      framework::TransToProtoVarType(tensor.dtype()));
  t_.dtype.lanes = lanes;

  // init ndim, tensor rank

--- a/paddle/fluid/framework/executor_thread_worker.cc
+++ b/paddle/fluid/framework/executor_thread_worker.cc
@@ -20,6 +20,7 @@ limitations under the License. */
 #include "google/protobuf/text_format.h"

 #include "gflags/gflags.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/feed_fetch_method.h"
 #include "paddle/fluid/framework/feed_fetch_type.h"
 #include "paddle/fluid/framework/lod_rank_table.h"
@@ -235,16 +236,16 @@ void print_lod_tensor(std::string var_name, const LoDTensor& lod_tensor) {
 static void print_fetch_var(Scope* scope, const std::string& var_name) {
  auto& tensor = scope->FindVar(var_name)->Get<LoDTensor>();

-#define PrintLoDTensorCallback(cpp_type, proto_type) \
-  do {                                               \
-    if (tensor.type() == proto_type) {               \
-      print_lod_tensor<cpp_type>(var_name, tensor);  \
-      return;                                        \
-    }                                                \
+#define PrintLoDTensorCallback(cpp_type, proto_type)                    \
+  do {                                                                  \
+    if (framework::TransToProtoVarType(tensor.dtype()) == proto_type) { \
+      print_lod_tensor<cpp_type>(var_name, tensor);                     \
+      return;                                                           \
+    }                                                                   \
  } while (0)

  _ForEachDataType_(PrintLoDTensorCallback);
-  VLOG(1) << "print_fetch_var: unrecognized data type:" << tensor.type();
+  VLOG(1) << "print_fetch_var: unrecognized data type:" << tensor.dtype();
 }

 void ExecutorThreadWorker::TrainFilesWithTimer() {

--- a/paddle/fluid/framework/fleet/ascend_wrapper.h
+++ b/paddle/fluid/framework/fleet/ascend_wrapper.h
@@ -22,6 +22,7 @@ limitations under the License. */
 #include <string>
 #include <vector>

+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/data_type.h"
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/platform/device/gpu/gpu_info.h"
@@ -146,13 +147,16 @@ class AscendInstance {
    // }

    ge::Shape shape(vec_dim);
-    ge::TensorDesc tensor_desc(shape, ge::Format::FORMAT_ND,
-                               VarTypeToGeType(tensor->type()));
+    ge::TensorDesc tensor_desc(
+        shape, ge::Format::FORMAT_ND,
+        VarTypeToGeType(framework::TransToProtoVarType(tensor->dtype())));
    tensor_desc.SetRealDimCnt(vec_dim.size());

    const uint8_t *data = reinterpret_cast<const uint8_t *>(tensor->data());
-    std::vector<uint8_t> dst(numel * GeTypeSize(tensor->type()));
-    memcpy(dst.data(), data, GeTypeSize(tensor->type()) * numel);
+    std::vector<uint8_t> dst(
+        numel * GeTypeSize(framework::TransToProtoVarType(tensor->dtype())));
+    memcpy(dst.data(), data,
+           GeTypeSize(framework::TransToProtoVarType(tensor->dtype())) * numel);
    ge::Tensor ge_tensor(tensor_desc, dst);
    return ge_tensor;
  }

--- a/paddle/fluid/framework/fleet/heter_wrapper.cc
+++ b/paddle/fluid/framework/fleet/heter_wrapper.cc
@@ -28,6 +28,7 @@ limitations under the License. */

 #include "paddle/fluid/framework/fleet/heter_wrapper.h"
 #if defined(PADDLE_WITH_PSLIB) && !defined(PADDLE_WITH_HETERPS)
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/device_worker.h"

 namespace paddle {
@@ -90,7 +91,8 @@ void HeterWrapper::SerializeToReq(const std::string& varname, Scope* scope,
  LoDTensor* tensor = var->GetMutable<LoDTensor>();
  req_var->set_varname(varname);
  req_var->set_type(LOD_TENSOR);
-  req_var->set_data_type(static_cast<VariableMessage::Type>(tensor->type()));
+  req_var->set_data_type(static_cast<VariableMessage::Type>(
+      framework::TransToProtoVarType(tensor->dtype())));

  for (auto& dim : framework::vectorize(tensor->dims())) {
    req_var->add_dims(dim);
@@ -108,21 +110,27 @@ void HeterWrapper::SerializeToReq(const std::string& varname, Scope* scope,

  auto* req_data = req_var->mutable_data();
  req_data->clear();
-  req_data->resize(tensor->numel() * SizeOfType(tensor->type()));
+  req_data->resize(tensor->numel() *
+                   SizeOfType(framework::TransToProtoVarType(tensor->dtype())));
  char* data_ptr = const_cast<char*>(req_data->data());

  if (platform::is_cpu_place(tensor->place())) {
    memcpy(data_ptr, tensor->data(),
-           tensor->numel() * SizeOfType(tensor->type()));
+           tensor->numel() *
+               SizeOfType(framework::TransToProtoVarType(tensor->dtype())));
  } else {
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-    memory::Copy(platform::CPUPlace(), data_ptr, tensor->place(),
-                 tensor->data(), tensor->numel() * SizeOfType(tensor->type()),
-                 nullptr);
+    memory::Copy(
+        platform::CPUPlace(), data_ptr, tensor->place(), tensor->data(),
+        tensor->numel() *
+            SizeOfType(framework::TransToProtoVarType(tensor->dtype())),
+        nullptr);
 #endif
 #ifdef PADDLE_WITH_XPU
-    memory::Copy(platform::CPUPlace(), data_ptr, tensor->place(),
-                 tensor->data(), tensor->numel() * SizeOfType(tensor->type()));
+    memory::Copy(
+        platform::CPUPlace(), data_ptr, tensor->place(), tensor->data(),
+        tensor->numel() *
+            SizeOfType(framework::TransToProtoVarType(tensor->dtype())));
 #endif
  }
 }
@@ -152,15 +160,18 @@ void HeterWrapper::DeSerializeToTensor(Scope* scope,
  }
  tensor->set_lod(lod);

-  void* tensor_data =
-      tensor->mutable_data(place, ToVarType(req_var.data_type()));
+  void* tensor_data = tensor->mutable_data(
+      place, framework::TransToPtenDataType(ToVarType(req_var.data_type())));

 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
  memory::Copy(place, tensor_data, platform::CPUPlace(), req_var.data().data(),
-               tensor->numel() * SizeOfType(tensor->type()), stream);
+               tensor->numel() *
+                   SizeOfType(framework::TransToProtoVarType(tensor->dtype())),
+               stream);
 #else
  memcpy(tensor_data, req_var.data().data(),
-         tensor->numel() * SizeOfType(tensor->type()));
+         tensor->numel() *
+             SizeOfType(framework::TransToProtoVarType(tensor->dtype())));
 #endif
 }
 #endif
@@ -190,15 +201,17 @@ void HeterWrapper::DeSerializeToTensor(Scope* scope,
  }
  tensor->set_lod(lod);

-  void* tensor_data =
-      tensor->mutable_data(place, ToVarType(req_var.data_type()));
+  void* tensor_data = tensor->mutable_data(
+      place, framework::TransToPtenDataType(ToVarType(req_var.data_type())));

 #ifdef PADDLE_WITH_XPU
  memory::Copy(place, tensor_data, platform::CPUPlace(), req_var.data().data(),
-               tensor->numel() * SizeOfType(tensor->type()));
+               tensor->numel() *
+                   SizeOfType(framework::TransToProtoVarType(tensor->dtype())));
 #else
  memcpy(tensor_data, req_var.data().data(),
-         tensor->numel() * SizeOfType(tensor->type()));
+         tensor->numel() *
+             SizeOfType(framework::TransToProtoVarType(tensor->dtype())));
 #endif
 }


--- a/paddle/fluid/framework/heter_section_worker.cc
+++ b/paddle/fluid/framework/heter_section_worker.cc
@@ -11,7 +11,9 @@ limitations under the License. */

 #if defined(PADDLE_WITH_PSCORE)
 #include <float.h>
+
 #include "paddle/fluid/distributed/ps/service/heter_server.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/device_worker.h"
 #include "paddle/fluid/framework/executor_gc_helper.h"
 #include "paddle/fluid/platform/cpu_helper.h"
@@ -35,21 +37,23 @@ void SetMicroId(paddle::framework::Scope* scope,
  auto* tensor = var->GetMutable<framework::LoDTensor>();
  std::vector<int> dims{1};
  tensor->Resize(framework::make_ddim(dims));
-  void* tensor_data =
-      tensor->mutable_data(place, framework::proto::VarType::FP32);
+  void* tensor_data = tensor->mutable_data(
+      place, framework::TransToPtenDataType(framework::proto::VarType::FP32));
  if (platform::is_gpu_place(place)) {
 #ifdef PADDLE_WITH_CUDA
    std::vector<char> temp;
-    temp.resize(tensor->numel() * framework::SizeOfType(tensor->type()));
+    temp.resize(tensor->numel() * framework::DataTypeSize(tensor->dtype()));
    char* temp_ptr = temp.data();
    float* temp_ptr_float = reinterpret_cast<float*>(temp_ptr);
    temp_ptr_float[0] = micro_id;
    auto stream =
        reinterpret_cast<const platform::CUDADeviceContext&>(*dev_ctx).stream();
-    memory::Copy(place, tensor_data, platform::CPUPlace(),
-                 reinterpret_cast<void*>(temp_ptr),
-                 tensor->numel() * framework::SizeOfType(tensor->type()),
-                 stream);
+    memory::Copy(
+        place, tensor_data, platform::CPUPlace(),
+        reinterpret_cast<void*>(temp_ptr),
+        tensor->numel() * framework::SizeOfType(
+                              framework::TransToProtoVarType(tensor->dtype())),
+        stream);
 #endif
  } else {
    float* temp_ptr = reinterpret_cast<float*>(tensor_data);

--- a/paddle/fluid/framework/heterxpu_trainer.cc
+++ b/paddle/fluid/framework/heterxpu_trainer.cc
@@ -17,6 +17,7 @@ limitations under the License. */
 #include <string>
 #include <vector>
 #include "io/fs.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/data_feed_factory.h"
 #include "paddle/fluid/framework/data_set.h"
 #include "paddle/fluid/framework/device_worker_factory.h"
@@ -136,18 +137,18 @@ void HeterXpuTrainer::CreateThreadParam(const ProgramDesc& program, int num) {
      InitializeVariable(ptr, proto::VarType::LOD_TENSOR);
      LoDTensor* thread_tensor = ptr->GetMutable<LoDTensor>();

-#define HeterMemcpyFunc(cpp_type, proto_type)                           \
-  do {                                                                  \
-    if (root_tensor->type() == proto_type) {                            \
-      HeterMemCpy<cpp_type>(thread_tensor, root_tensor, place, stream); \
-    }                                                                   \
+#define HeterMemcpyFunc(cpp_type, proto_type)                                 \
+  do {                                                                        \
+    if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) { \
+      HeterMemCpy<cpp_type>(thread_tensor, root_tensor, place, stream);       \
+    }                                                                         \
  } while (0)

-#define HeterMemcpyXpuFunc(cpp_type, proto_type)                \
-  do {                                                          \
-    if (root_tensor->type() == proto_type) {                    \
-      HeterMemCpy<cpp_type>(thread_tensor, root_tensor, place); \
-    }                                                           \
+#define HeterMemcpyXpuFunc(cpp_type, proto_type)                              \
+  do {                                                                        \
+    if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) { \
+      HeterMemCpy<cpp_type>(thread_tensor, root_tensor, place);               \
+    }                                                                         \
  } while (0)
 #ifdef PADDLE_WITH_CUDA
      _ForEachDataType_(HeterMemcpyFunc);
@@ -318,12 +319,13 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request,
 //      }
 #define MergeCallback(cpp_type, proto_type)                                    \
  do {                                                                         \
-    if (root_tensor->type() == proto_type) {                                   \
-      if (thread_tensor->type() != proto_type) {                               \
+    if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) {  \
+      if (framework::TransToProtoVarType(thread_tensor->dtype()) !=            \
+          proto_type) {                                                        \
        VLOG(0) << "Error: thread id=" << j << ", need_merge_var_names_[" << i \
                << "] " << need_merge_var_names_[i]                            \
-                << ", root tensor type=" << root_tensor->type()                \
-                << ", thread tensor type=" << thread_tensor->type();           \
+                << ", root tensor type=" << root_tensor->dtype()               \
+                << ", thread tensor type=" << thread_tensor->dtype();          \
        exit(-1);                                                              \
      }                                                                        \
      MergeToRootScope<cpp_type>(root_tensor, thread_tensor);                  \
@@ -334,8 +336,10 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request,
 #ifdef PADDLE_WITH_CUDA
        auto dev_id = thread_tensor->place().device;
        platform::CUDADeviceGuard guard(dev_id);
-        cudaMemset(thread_tensor->data(), 0,
-                   thread_tensor->numel() * SizeOfType(thread_tensor->type()));
+        cudaMemset(
+            thread_tensor->data(), 0,
+            thread_tensor->numel() * SizeOfType(framework::TransToProtoVarType(
+                                         thread_tensor->dtype())));
 #endif
 #ifdef PADDLE_WITH_XPU
        auto place = thread_tensor->place();
@@ -346,12 +350,16 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request,
        platform::DeviceContext* dev_ctx = pool.Get(place);
        const platform::XPUDeviceContext* xpu_ctx =
            reinterpret_cast<const platform::XPUDeviceContext*>(dev_ctx);
-        xpu::memset(xpu_ctx->x_context(), thread_tensor->data(), 0,
-                    thread_tensor->numel() * SizeOfType(thread_tensor->type()));
+        xpu::memset(
+            xpu_ctx->x_context(), thread_tensor->data(), 0,
+            thread_tensor->numel() * SizeOfType(framework::TransToProtoVarType(
+                                         thread_tensor->dtype())));
 #endif
      } else {
        memset(thread_tensor->data(), 0,
-               thread_tensor->numel() * SizeOfType(thread_tensor->type()));
+               thread_tensor->numel() *
+                   SizeOfType(
+                       framework::TransToProtoVarType(thread_tensor->dtype())));
      }
    }
    auto* merge_var = response->add_vars();
@@ -361,8 +369,10 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request,
 #ifdef PADDLE_WITH_CUDA
      auto dev_id = root_tensor->place().device;
      platform::CUDADeviceGuard guard(dev_id);
-      cudaMemset(root_tensor->data(), 0,
-                 root_tensor->numel() * SizeOfType(root_tensor->type()));
+      cudaMemset(
+          root_tensor->data(), 0,
+          root_tensor->numel() *
+              SizeOfType(framework::TransToProtoVarType(root_tensor->dtype())));
 #endif
 #ifdef PADDLE_WITH_XPU
      auto place = root_tensor->place();
@@ -373,12 +383,15 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request,
      platform::DeviceContext* dev_ctx = pool.Get(place);
      const platform::XPUDeviceContext* xpu_ctx =
          reinterpret_cast<const platform::XPUDeviceContext*>(dev_ctx);
-      xpu::memset(xpu_ctx->x_context(), root_tensor->data(), 0,
-                  root_tensor->numel() * SizeOfType(root_tensor->type()));
+      xpu::memset(
+          xpu_ctx->x_context(), root_tensor->data(), 0,
+          root_tensor->numel() *
+              SizeOfType(framework::TransToProtoVarType(root_tensor->dtype())));
 #endif
    } else {
      memset(root_tensor->data(), 0,
-             root_tensor->numel() * SizeOfType(root_tensor->type()));
+             root_tensor->numel() * SizeOfType(framework::TransToProtoVarType(
+                                        root_tensor->dtype())));
    }
  }
  return 0;

--- a/paddle/fluid/framework/hogwild_worker.cc
+++ b/paddle/fluid/framework/hogwild_worker.cc
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 #include <ctime>
+
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/data_type.h"
 #include "paddle/fluid/framework/device_worker.h"
 #include "paddle/fluid/operators/controlflow/conditional_block_op_helper.h"
@@ -79,11 +81,11 @@ void HogwildWorker::CreateThreadScope(const ProgramDesc &program) {
        LoDTensor *thread_tensor = ptr1->GetMutable<LoDTensor>();
        LoDTensor *root_tensor =
            root_scope_->FindVar(var->Name())->GetMutable<LoDTensor>();
-#define MemsetCallback(cpp_type, proto_type)                     \
-  do {                                                           \
-    if (root_tensor->type() == proto_type) {                     \
-      SetZero<cpp_type>(thread_tensor, root_tensor, tensor_dim); \
-    }                                                            \
+#define MemsetCallback(cpp_type, proto_type)                                  \
+  do {                                                                        \
+    if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) { \
+      SetZero<cpp_type>(thread_tensor, root_tensor, tensor_dim);              \
+    }                                                                         \
  } while (0)
        _ForEachDataType_(MemsetCallback);
      }

--- a/paddle/fluid/framework/infershape_utils.cc
+++ b/paddle/fluid/framework/infershape_utils.cc
@@ -14,6 +14,7 @@ limitations under the License. */

 #include "paddle/fluid/framework/infershape_utils.h"

+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/framework.pb.h"
 #include "paddle/fluid/framework/pten_utils.h"
 #include "paddle/fluid/platform/enforce.h"
@@ -134,7 +135,7 @@ class CompatMetaTensor : public pten::MetaTensor {
      }
    } else {
      auto* var = BOOST_GET_CONST(VarDesc*, var_);
-      return pten::TransToPtenDataType(var->GetDataType());
+      return paddle::framework::TransToPtenDataType(var->GetDataType());
    }
  }

@@ -183,7 +184,7 @@ class CompatMetaTensor : public pten::MetaTensor {
      }
    } else {
      auto* var = BOOST_GET(VarDesc*, var_);
-      var->SetDataType(pten::TransToProtoVarType(dtype));
+      var->SetDataType(paddle::framework::TransToProtoVarType(dtype));
    }
  }


--- a/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc
@@ -16,6 +16,7 @@

 #include <cmath>

+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/op_version_registry.h"

 namespace pten {
@@ -216,7 +217,8 @@ void ConvAffineChannelFusePass::ApplyImpl(ir::Graph* graph) const {
        patterns::PDNodeName(name_scope_, "eltwise_y_in"));
    // Set shape && datatype manually
    eltwise_y_in_desc.SetShape(framework::vectorize(ac_bias_tensor->dims()));
-    eltwise_y_in_desc.SetDataType(ac_bias_tensor->type());
+    eltwise_y_in_desc.SetDataType(
+        framework::TransToProtoVarType(ac_bias_tensor->dtype()));
    eltwise_y_in_desc.SetLoDLevel(ac_bias->Var()->GetLoDLevel());
    eltwise_y_in_desc.SetPersistable(true);


--- a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc
@@ -16,6 +16,7 @@

 #include <string>

+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/op_version_registry.h"
 #include "paddle/fluid/platform/enforce.h"

@@ -285,7 +286,8 @@ void ConvBNFusePass::ApplyImpl(ir::Graph* graph) const {
    VarDesc eltwise_y_in_desc(
        patterns::PDNodeName("fuse_conv_bn", conv_type() + "_eltwise_y_in"));
    eltwise_y_in_desc.SetShape(framework::vectorize(bn_bias_tensor->dims()));
-    eltwise_y_in_desc.SetDataType(bn_bias_tensor->type());
+    eltwise_y_in_desc.SetDataType(
+        framework::TransToProtoVarType(bn_bias_tensor->dtype()));
    eltwise_y_in_desc.SetLoDLevel(bn_bias->Var()->GetLoDLevel());
    eltwise_y_in_desc.SetPersistable(true);
    auto* eltwise_y_in_node = g->CreateVarNode(&eltwise_y_in_desc);
@@ -531,7 +533,8 @@ void ConvEltwiseAddBNFusePass::ApplyImpl(ir::Graph* graph) const {
          name_scope_, "eltwise_y_in" + std::to_string(found_conv_bn_count)));
      eltwise_y_in_desc.SetShape(
          framework::vectorize(eltwise_y_in_tensor->dims()));
-      eltwise_y_in_desc.SetDataType(eltwise_y_in_tensor->type());
+      eltwise_y_in_desc.SetDataType(
+          framework::TransToProtoVarType(eltwise_y_in_tensor->dtype()));
      eltwise_y_in_desc.SetLoDLevel(eltwise_y_in->Var()->GetLoDLevel());
      eltwise_y_in_desc.SetPersistable(true);
      auto* eltwise_y_in_node = g->CreateVarNode(&eltwise_y_in_desc);

--- a/paddle/fluid/framework/ir/layer_norm_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/layer_norm_fuse_pass.cc
@@ -14,6 +14,7 @@

 #include <vector>

+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
 #include "paddle/fluid/framework/ir/layer_norm_fuse_pass.h"
 #include "paddle/fluid/framework/op_version_registry.h"
@@ -273,10 +274,11 @@ void LayerNormFusePass::ApplyImpl(Graph* graph) const {
            "but has %s elements.",
            eps_tensor->numel()));
    CHECK_TRUE(
-        eps_tensor->type() == proto::VarType::FP32,
+        framework::TransToProtoVarType(eps_tensor->dtype()) ==
+            proto::VarType::FP32,
        ::paddle::string::Sprintf("The LayerNorm divisor epsilon value "
                                  "must be of FP32 data type, but is %s.",
-                                  eps_tensor->type()));
+                                  eps_tensor->dtype()));

    CHECK_TRUE(validateReduceOpAttrs(x_mean, x_shape, "input mean"),
               "Validation of input mean node failed.");
@@ -333,7 +335,8 @@ void LayerNormFusePass::ApplyImpl(Graph* graph) const {
    auto* gamma_tensor = scope->FindVar(gamma->Name())->GetMutable<LoDTensor>();
    VarDesc new_gamma_desc(patterns::PDNodeName("layer_norm_fuse", "Scale"));
    new_gamma_desc.SetShape({layer_norm_x_mat_dims[1]});
-    new_gamma_desc.SetDataType(gamma_tensor->type());
+    new_gamma_desc.SetDataType(
+        framework::TransToProtoVarType(gamma_tensor->dtype()));
    new_gamma_desc.SetLoDLevel(gamma->Var()->GetLoDLevel());
    new_gamma_desc.SetPersistable(true);
    auto* new_gamma_node = g->CreateVarNode(&new_gamma_desc);
@@ -347,7 +350,8 @@ void LayerNormFusePass::ApplyImpl(Graph* graph) const {
    auto* beta_tensor = scope->FindVar(beta->Name())->GetMutable<LoDTensor>();
    VarDesc new_beta_desc(patterns::PDNodeName("layer_norm_fuse", "Bias"));
    new_beta_desc.SetShape({layer_norm_x_mat_dims[1]});
-    new_beta_desc.SetDataType(beta_tensor->type());
+    new_beta_desc.SetDataType(
+        framework::TransToProtoVarType(beta_tensor->dtype()));
    new_beta_desc.SetLoDLevel(beta->Var()->GetLoDLevel());
    new_beta_desc.SetPersistable(true);
    auto* new_beta_node = g->CreateVarNode(&new_beta_desc);

--- a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc
+++ b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc
@@ -95,7 +95,8 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
                      const char* var_name) {
  auto x = scope->Var(var_name);
  auto tensor = x->GetMutable<LoDTensor>();
-  tensor->mutable_data(place, proto::VarType::FP32, 1);
+  tensor->mutable_data(place,
+                       framework::TransToPtenDataType(proto::VarType::FP32), 1);
 }

 void MainTest(bool convWithExistingBias) {

--- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
+++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
@@ -125,7 +125,8 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
                      const char* var_name) {
  auto x = scope->Var(var_name);
  auto tensor = x->GetMutable<LoDTensor>();
-  tensor->mutable_data(place, proto::VarType::FP32, 1);
+  tensor->mutable_data(place,
+                       framework::TransToPtenDataType(proto::VarType::FP32), 1);
 }

 void PreparePass(std::unique_ptr<ir::Graph>* graph, const ProgramDesc& prog,

--- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc
+++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc
@@ -438,7 +438,8 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
                      const char* var_name) {
  auto x = scope->Var(var_name);
  auto tensor = x->GetMutable<LoDTensor>();
-  tensor->mutable_data(place, proto::VarType::FP32, 1);
+  tensor->mutable_data(place,
+                       framework::TransToPtenDataType(proto::VarType::FP32), 1);
 }

 void PrepareGraph(std::unique_ptr<ir::Graph>* graph, const ProgramDesc& prog) {

--- a/paddle/fluid/framework/lod_tensor.cc
+++ b/paddle/fluid/framework/lod_tensor.cc
@@ -16,6 +16,7 @@ limitations under the License. */

 #include <stdint.h>

+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/version.h"

 namespace paddle {
@@ -327,7 +328,7 @@ std::vector<LoDTensor> SplitLoDTensor(
    for (size_t i = 0; i < places.size(); ++i) {
      LoDTensor dst;
      dst.Resize(src.dims());
-      dst.mutable_data(places[i], src.type());
+      dst.mutable_data(places[i], src.dtype());
      if (!src.lod().empty()) {
        dst.set_lod(src.lod());
      }
@@ -393,7 +394,7 @@ void MergeLoDTensor(LoDTensor *target,
  for (auto *t : lod_tensors) {
    if (t->numel() && t->IsInitialized()) {
      new_dim = t->dims();
-      new_type = t->type();
+      new_type = framework::TransToProtoVarType(t->dtype());
      new_layout = t->layout();
      break;
    }
@@ -405,11 +406,12 @@ void MergeLoDTensor(LoDTensor *target,
    auto *t = lod_tensors[i];
    if (t->numel() && t->IsInitialized()) {
      PADDLE_ENFORCE_EQ(
-          new_type, t->type(),
+          new_type, framework::TransToProtoVarType(t->dtype()),
          platform::errors::InvalidArgument(
              "LoDTensor data type does not match, expected type is %s, actual "
              "type is %s.",
-              DataTypeToString(new_type), DataTypeToString(t->type())));
+              DataTypeToString(new_type),
+              DataTypeToString(framework::TransToProtoVarType(t->dtype()))));
      PADDLE_ENFORCE_EQ(
          new_layout, t->layout(),
          platform::errors::InvalidArgument(
@@ -444,7 +446,8 @@ void MergeLoDTensor(LoDTensor *target,
  target->Resize(new_dim);
  target->set_layout(new_layout);
  target->set_lod(new_lod);
-  target->mutable_data(dst_place, new_type);
+  target->mutable_data(dst_place,
+                       paddle::framework::TransToPtenDataType(new_type));

  int begin = 0;
  for (auto *src : lod_tensors) {

--- a/paddle/fluid/framework/multi_trainer.cc
+++ b/paddle/fluid/framework/multi_trainer.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 #include <string>
+
 #include "paddle/fluid/framework/device_worker_factory.h"
 #include "paddle/fluid/framework/trainer.h"
 #include "paddle/fluid/platform/lodtensor_printer.h"
@@ -250,12 +251,13 @@ void MultiTrainer::Finalize() {
      LoDTensor* thread_tensor = thread_var->GetMutable<LoDTensor>();
 #define MergeCallback(cpp_type, proto_type)                                    \
  do {                                                                         \
-    if (root_tensor->type() == proto_type) {                                   \
-      if (thread_tensor->type() != proto_type) {                               \
+    if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) {  \
+      if (framework::TransToProtoVarType(thread_tensor->dtype()) !=            \
+          proto_type) {                                                        \
        VLOG(0) << "Error: thread id=" << j << ", need_merge_var_names_[" << i \
                << "] " << need_merge_var_names_[i]                            \
-                << ", root tensor type=" << root_tensor->type()                \
-                << ", thread tensor type=" << thread_tensor->type();           \
+                << ", root tensor type=" << root_tensor->dtype()               \
+                << ", thread tensor type=" << thread_tensor->dtype();          \
        exit(-1);                                                              \
      }                                                                        \
      MergeToRootScope<cpp_type>(root_tensor, thread_tensor);                  \

--- a/paddle/fluid/framework/new_executor/data_transfer.cc
+++ b/paddle/fluid/framework/new_executor/data_transfer.cc
@@ -13,6 +13,7 @@
 // limitations under the License.

 #include "paddle/fluid/framework/new_executor/data_transfer.h"
+#include "paddle/fluid/framework/convert_utils.h"

 namespace paddle {
 namespace framework {
@@ -366,7 +367,7 @@ void HandleComplexGradToRealGrad(const OpFuncNode& op_func_node,
        continue;
      }
      // only focus on complex dtype now
-      auto src_type = grad_tensor->type();
+      auto src_type = framework::TransToProtoVarType(grad_tensor->dtype());
      if (!framework::IsComplexType(src_type)) {
        VLOG(3) << "skip grad_tensor with not complexType";
        continue;
@@ -390,7 +391,7 @@ void HandleComplexGradToRealGrad(const OpFuncNode& op_func_node,
          platform::errors::Unavailable(
              "Forward tensor is nullptr when handle complex data to real."));
      // only need record type, the allocation may have been released
-      auto dst_type = tensor->saved_type();
+      auto dst_type = framework::TransToProtoVarType(tensor->dtype());
      // only focus on real dtype and need casting
      if (framework::IsComplexType(dst_type)) {
        continue;

--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@@ -19,6 +19,7 @@ limitations under the License. */
 #include <string>

 #include "gflags/gflags.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/data_transform.h"
 #include "paddle/fluid/framework/data_type_transform.h"
 #include "paddle/fluid/framework/details/nan_inf_utils.h"
@@ -109,13 +110,13 @@ static std::string GetDtype(const ScopeBase& scope, const std::string& name) {
    if (UNLIKELY(!tensor.IsInitialized())) {
      return "";
    }
-    return DataTypeToString(tensor.type());
+    return DataTypeToString(framework::TransToProtoVarType(tensor.dtype()));
  } else if (var->IsType<pten::SelectedRows>()) {
    auto tensor = var->Get<pten::SelectedRows>().value();
    if (UNLIKELY(!tensor.IsInitialized())) {
      return "uninited";
    } else {
-      return DataTypeToString(tensor.type());
+      return DataTypeToString(framework::TransToProtoVarType(tensor.dtype()));
    }
  } else if (var->IsType<Strings>()) {
    return "strings";
@@ -1070,8 +1071,8 @@ static void CheckTensorNANOrInf(const std::string& op_type,
  if (tensor.memory_size() == 0) {
    return;
  }
-  if (tensor.type() != proto::VarType::FP32 &&
-      tensor.type() != proto::VarType::FP64) {
+  if (framework::TransToProtoVarType(tensor.dtype()) != proto::VarType::FP32 &&
+      framework::TransToProtoVarType(tensor.dtype()) != proto::VarType::FP64) {
    return;
  }
  PADDLE_ENFORCE_NE(
@@ -1536,7 +1537,7 @@ void OperatorWithKernel::HandleComplexGradToRealGrad(
        continue;
      }
      // only focus on complex dtype now
-      auto src_type = grad_tensor->type();
+      auto src_type = framework::TransToProtoVarType(grad_tensor->dtype());
      if (!IsComplexType(src_type)) {
        continue;
      }
@@ -1556,7 +1557,7 @@ void OperatorWithKernel::HandleComplexGradToRealGrad(
          platform::errors::Unavailable(
              "Forward tensor is nullptr when handle complex data to real."));
      // only need record type, the allocation may have been released
-      auto dst_type = tensor->saved_type();
+      auto dst_type = framework::TransToProtoVarType(tensor->dtype());
      // only focus on real dtype and need casting
      if (IsComplexType(dst_type)) {
        continue;
@@ -1770,7 +1771,8 @@ void OperatorWithKernel::ParseInputDataType(
            platform::errors::InvalidArgument("The %s Op's Input Variable `%s` "
                                              "contains uninitialized Tensor.",
                                              Type(), name));
-        proto::VarType::Type tmp = t->type();
+        proto::VarType::Type tmp =
+            paddle::framework::TransToProtoVarType(t->dtype());
        PADDLE_ENFORCE(tmp == *data_type || *data_type == default_data_type,
                       platform::errors::InvalidArgument(
                           "The DataType of %s Op's duplicable or different "
@@ -1869,8 +1871,8 @@ proto::VarType::Type OperatorWithKernel::IndicateOrPromoteVarDataTypes(
  auto* tensor_b = GetTensorFormInputSafely(ctx, name2);

  // 2. Get two input types
-  auto type_a = tensor_a->type();
-  auto type_b = tensor_b->type();
+  auto type_a = framework::TransToProtoVarType(tensor_a->dtype());
+  auto type_b = framework::TransToProtoVarType(tensor_b->dtype());

  // 3. Get first input type or promote complex types
  auto target_type = PromoteTypesIfComplexExists(type_a, type_b);
@@ -2168,7 +2170,7 @@ void OperatorWithKernel::BuildPtenKernelContext(
        pt_kernel_context->EmplaceBackAttr(BOOST_GET_CONST(std::string, attr));
      } else if (attr_defs[i].type_index ==
                 std::type_index(typeid(pten::DataType))) {
-        auto data_type = pten::TransToPtenDataType(
+        auto data_type = paddle::framework::TransToPtenDataType(
            static_cast<framework::proto::VarType::Type>(
                BOOST_GET_CONST(int, attr)));
        pt_kernel_context->EmplaceBackAttr(data_type);

--- a/paddle/fluid/framework/operator.h
+++ b/paddle/fluid/framework/operator.h
@@ -40,6 +40,7 @@ limitations under the License. */
 #include "paddle/fluid/platform/variant.h"
 #include "paddle/utils/flat_hash_map.h"

+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/pten/core/compat/arg_map_context.h"
 #include "paddle/pten/core/compat/op_utils.h"
 #include "paddle/pten/core/kernel_context.h"
@@ -422,8 +423,8 @@ class ExecutionContext {
            "size(%d).",
            allocation_ptr->size(), framework::product(dim) * sizeof(T)));

-    paddle::framework::Tensor temp_tensor(
-        framework::ToDataType(std::type_index(typeid(T))));
+    paddle::framework::Tensor temp_tensor(framework::TransToPtenDataType(
+        framework::ToDataType(std::type_index(typeid(T)))));
    temp_tensor.Resize(dim);
    temp_tensor.ResetHolder(std::move(shared_allocation));
    return temp_tensor;

--- a/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc
+++ b/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc
@@ -27,6 +27,7 @@ limitations under the License. */

 #include "cinn/frontend/op_mappers/use_op_mappers.h"
 #include "cinn/frontend/var_type_utils.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/platform/errors.h"

@@ -57,7 +58,7 @@ OpMapperContext::FeedInfo GetCinnFeedInfoFromTensor(
  // op
  auto tensor_type = ::paddle::framework::proto::VarType::FP32;
  if (!skip_trans_type) {
-    tensor_type = tensor.type();
+    tensor_type = framework::TransToProtoVarType(tensor.dtype());
  }
  auto cinn_var_type = TransformVarDataTypeToCinn(tensor_type);
  info.type = ::cinn::frontend::utils::CppVarType2CommonType(cinn_var_type);

--- a/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization_test.cc
+++ b/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization_test.cc
@@ -14,6 +14,7 @@ limitations under the License. */

 #include "gtest/gtest.h"

+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.h"

 namespace paddle {
@@ -206,7 +207,9 @@ class CinnGraphSymbolizationTest : public ::testing::Test {
      LoDTensor tensor;
      DDim dims = {256, 1024};
      tensor.Resize(dims);
-      tensor.mutable_data(platform::CPUPlace(), proto::VarType::FP32);
+      tensor.mutable_data(
+          platform::CPUPlace(),
+          framework::TransToPtenDataType(framework::proto::VarType::FP32));
      return tensor;
    };
 #define FillFeedList(Name) feed_targets[#Name] = create_tensor();

--- a/paddle/fluid/framework/parallel_executor.cc
+++ b/paddle/fluid/framework/parallel_executor.cc
@@ -21,6 +21,7 @@ limitations under the License. */
 #include <utility>
 #include <vector>

+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/details/async_ssa_graph_executor.h"
 #include "paddle/fluid/framework/details/bind_threaded_ssa_graph_executor.h"
 #include "paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h"
@@ -775,7 +776,8 @@ void ParallelExecutor::BCastParamsToDevices(
      std::vector<void *> buffers;
      buffers.reserve(member_->places_.size());
      size_t numel = main_tensor.numel();
-      ncclDataType_t data_type = platform::ToNCCLDataType(main_tensor.type());
+      ncclDataType_t data_type = platform::ToNCCLDataType(
+          framework::TransToProtoVarType(main_tensor.dtype()));
      for (size_t i = 0; i < member_->places_.size(); ++i) {
        auto place = member_->places_[i];
        void *buffer;
@@ -786,7 +788,7 @@ void ParallelExecutor::BCastParamsToDevices(
          auto local_scope = member_->local_scopes_[i];
          auto *t = local_scope->Var(var)->GetMutable<LoDTensor>();
          t->Resize(dims);
-          buffer = t->mutable_data(place, main_tensor.type());
+          buffer = t->mutable_data(place, main_tensor.dtype());
        }
        buffers.push_back(buffer);
      }
@@ -818,7 +820,8 @@ void ParallelExecutor::BCastParamsToDevices(
      // but broadcast is equivalent to no type of operation, does not affect
      // correctness.
      BKCLDataType data_type = BKCL_FLOAT;
-      // BKCLDataType data_type = platform::ToBKCLDataType(main_tensor.type());
+      // BKCLDataType data_type =
+      // platform::ToBKCLDataType(framework::TransToProtoVarType(main_tensor.dtype()));
      for (size_t i = 0; i < member_->places_.size(); ++i) {
        auto place = member_->places_[i];
        void *buffer;
@@ -829,7 +832,7 @@ void ParallelExecutor::BCastParamsToDevices(
          auto local_scope = member_->local_scopes_[i];
          auto *t = local_scope->Var(var)->GetMutable<LoDTensor>();
          t->Resize(dims);
-          buffer = t->mutable_data(place, main_tensor.type());
+          buffer = t->mutable_data(place, main_tensor.dtype());
        }
        buffers.push_back(buffer);
      }
@@ -848,7 +851,8 @@ void ParallelExecutor::BCastParamsToDevices(
        for (size_t i = 0; i < member_->places_.size(); ++i) {
          auto &bkcl_ctx = bkcl_ctxs->at(member_->places_[i]);
          auto broadcast_numel = numel;
-          if (main_tensor.type() == framework::proto::VarType::INT64) {
+          if (framework::TransToProtoVarType(main_tensor.dtype()) ==
+              framework::proto::VarType::INT64) {
            broadcast_numel *= 2;
          }
          PADDLE_ENFORCE_EQ(
@@ -873,7 +877,7 @@ void ParallelExecutor::BCastParamsToDevices(

        auto copy_memory = [&] {
          t->Resize(dims);
-          t->mutable_data(cpu, main_tensor.type());
+          t->mutable_data(cpu, main_tensor.dtype());
          paddle::framework::TensorCopy(main_tensor, cpu, t);
        };


--- a/paddle/fluid/framework/ps_gpu_trainer.cc
+++ b/paddle/fluid/framework/ps_gpu_trainer.cc
@@ -15,6 +15,7 @@ limitations under the License. */
 #include <cstdlib>
 #include <string>
 #include <vector>
+
 #include "io/fs.h"
 #include "paddle/fluid/framework/data_feed_factory.h"
 #include "paddle/fluid/framework/data_set.h"
@@ -232,12 +233,13 @@ void PSGPUTrainer::Finalize() {
      }
 #define MergeCallback(cpp_type, proto_type)                                    \
  do {                                                                         \
-    if (root_tensor->type() == proto_type) {                                   \
-      if (thread_tensor->type() != proto_type) {                               \
+    if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) {  \
+      if (framework::TransToProtoVarType(thread_tensor->dtype()) !=            \
+          proto_type) {                                                        \
        VLOG(0) << "Error: thread id=" << j << ", need_merge_var_names_[" << i \
                << "] " << need_merge_var_names_[i]                            \
-                << ", root tensor type=" << root_tensor->type()                \
-                << ", thread tensor type=" << thread_tensor->type();           \
+                << ", root tensor type=" << root_tensor->dtype()               \
+                << ", thread tensor type=" << thread_tensor->dtype();          \
        exit(-1);                                                              \
      }                                                                        \
      MergeToRootScope<cpp_type>(root_tensor, thread_tensor);                  \

--- a/paddle/fluid/framework/pten_utils.cc
+++ b/paddle/fluid/framework/pten_utils.cc
@@ -14,6 +14,7 @@ limitations under the License. */

 #include <sstream>

+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/pten_utils.h"

 #include "paddle/fluid/framework/lod_tensor.h"
@@ -59,7 +60,7 @@ class KernelArgsNameMakerByOpProto : public KernelArgsNameMaker {
 OpKernelType TransPtenKernelKeyToOpKernelType(
    const pten::KernelKey& kernel_key) {
  proto::VarType::Type data_type =
-      pten::TransToProtoVarType(kernel_key.dtype());
+      paddle::framework::TransToProtoVarType(kernel_key.dtype());
  // no need to set current device id here
  platform::Place place = pten::TransToPtenPlace(kernel_key.backend(), false);
  DataLayout data_layout = kernel_key.layout();
@@ -87,7 +88,7 @@ pten::KernelKey TransOpKernelTypeToPtenKernelKey(
  }
  paddle::experimental::DataLayout layout = kernel_type.data_layout_;
  paddle::experimental::DataType dtype =
-      pten::TransToPtenDataType(kernel_type.data_type_);
+      paddle::framework::TransToPtenDataType(kernel_type.data_type_);
  return pten::KernelKey(backend, layout, dtype);
 }


--- a/paddle/fluid/framework/save_load_util.cc
+++ b/paddle/fluid/framework/save_load_util.cc
@@ -16,6 +16,7 @@

 #include <fstream>

+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/imperative/layer.h"

 namespace paddle {
@@ -282,7 +283,7 @@ bool SaveTensorToDisk(const std::string& file_name,
    auto tensor = itera.second;

    proto::VarType::TensorDesc desc;
-    desc.set_data_type(tensor->type());
+    desc.set_data_type(framework::TransToProtoVarType(tensor->dtype()));
    auto dims = framework::vectorize(tensor->dims());
    auto* pb_dims = desc.mutable_dims();
    pb_dims->Resize(static_cast<int>(dims.size()), 0);
@@ -294,7 +295,7 @@ bool SaveTensorToDisk(const std::string& file_name,

    // save tensor
    uint64_t data_size =
-        tensor->numel() * framework::SizeOfType(tensor->type());
+        tensor->numel() * framework::DataTypeSize(tensor->dtype());
    auto* data_ptr = tensor->data();
    if (platform::is_gpu_place(tensor->place())) {
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)

--- a/paddle/fluid/framework/tensor_util.cc
+++ b/paddle/fluid/framework/tensor_util.cc
@@ -19,6 +19,7 @@ limitations under the License. */
 #include <utility>
 #include <vector>

+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/data_type.h"
 #include "paddle/fluid/framework/tensor_util.h"
 #include "paddle/fluid/platform/complex.h"
@@ -55,10 +56,10 @@ void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place,
  // than numel()*size(type())
  auto dst_ptr =
      src.layout() == DataLayout::kMKLDNN
-          ? dst->mutable_data(dst_place, src.type(), src.memory_size())
-          : dst->mutable_data(dst_place, src.type());
+          ? dst->mutable_data(dst_place, src.dtype(), src.memory_size())
+          : dst->mutable_data(dst_place, src.dtype());
 #else
-  auto dst_ptr = dst->mutable_data(dst_place, src.type());
+  auto dst_ptr = dst->mutable_data(dst_place, src.dtype());
 #endif
  if (src_ptr == dst_ptr && src_place == dst_place) {
    VLOG(3) << "Skip copy the same data async from " << src_place << " to "
@@ -70,9 +71,9 @@ void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place,
 #ifdef PADDLE_WITH_MKLDNN
  auto size = src.layout() == DataLayout::kMKLDNN
                  ? src.memory_size()
-                  : src.numel() * SizeOfType(src.type());
+                  : src.numel() * framework::DataTypeSize(src.dtype());
 #else
-  auto size = src.numel() * SizeOfType(src.type());
+  auto size = src.numel() * framework::DataTypeSize(src.dtype());
 #endif

  if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
@@ -126,7 +127,7 @@ void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place,
    Tensor npu_pinned_tensor;
    npu_pinned_tensor.Resize(src.dims());
    auto npu_pinned_ptr =
-        npu_pinned_tensor.mutable_data(npu_pinned_place, src.type());
+        npu_pinned_tensor.mutable_data(npu_pinned_place, src.dtype());
    memory::Copy(npu_pinned_place, npu_pinned_ptr, src_place, src_ptr, size);

    //  2. async copy npu pinned tensor -> npu tensor
@@ -410,7 +411,7 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
 #endif
  auto src_place = src.place();
  auto src_ptr = src.data();
-  auto dst_ptr = dst->mutable_data(dst_place, src.type());
+  auto dst_ptr = dst->mutable_data(dst_place, src.dtype());
  VLOG(4) << "src:" << src_ptr << ", dst:" << dst_ptr;

  if (src_ptr == dst_ptr && src_place == dst_place) {
@@ -419,7 +420,7 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
    return;
  }

-  auto size = src.numel() * SizeOfType(src.type());
+  auto size = src.numel() * framework::DataTypeSize(src.dtype());
  if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
    memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
  }
@@ -582,8 +583,9 @@ struct AnyDTypeVisitor {
 template <typename Predicate, typename DevCtx>
 inline void AnyImpl(Predicate predicate, const framework::Tensor& tensor,
                    const DevCtx& ctx, framework::Tensor* out) {
-  VisitDataType(tensor.type(), AnyDTypeVisitor<Predicate, DevCtx>(
-                                   predicate, tensor, ctx, out));
+  VisitDataType(
+      framework::TransToProtoVarType(tensor.dtype()),
+      AnyDTypeVisitor<Predicate, DevCtx>(predicate, tensor, ctx, out));
 }

 template <typename Predicate>
@@ -722,8 +724,9 @@ struct AllDTypeVisitor {
 template <typename Predicate, typename DevCtx>
 inline void AllImpl(Predicate predicate, const framework::Tensor& tensor,
                    const DevCtx& ctx, framework::Tensor* out) {
-  VisitDataType(tensor.type(), AllDTypeVisitor<Predicate, DevCtx>(
-                                   predicate, tensor, ctx, out));
+  VisitDataType(
+      framework::TransToProtoVarType(tensor.dtype()),
+      AllDTypeVisitor<Predicate, DevCtx>(predicate, tensor, ctx, out));
 }

 template <typename Predicate>
@@ -930,7 +933,7 @@ void TensorToStream(std::ostream& os, const Tensor& tensor,
     // int32_t  size
     // void*    protobuf message
    proto::VarType::TensorDesc desc;
-    desc.set_data_type(tensor.type());
+    desc.set_data_type(framework::TransToProtoVarType(tensor.dtype()));
    auto dims = framework::vectorize(tensor.dims());
    auto* pb_dims = desc.mutable_dims();
    pb_dims->Resize(static_cast<int>(dims.size()), 0);
@@ -941,7 +944,7 @@ void TensorToStream(std::ostream& os, const Tensor& tensor,
    os.write(out.data(), size);
  }
  {  // the 3rd field, tensor data
-    uint64_t size = tensor.numel() * framework::SizeOfType(tensor.type());
+    uint64_t size = tensor.numel() * framework::DataTypeSize(tensor.dtype());

    auto* data_ptr = tensor.data();
    PADDLE_ENFORCE_LT(size, (std::numeric_limits<std::streamsize>::max)(),
@@ -1419,13 +1422,14 @@ std::ostream& operator<<(std::ostream& os, const pten::DenseTensor& t) {
    dev_ctx.Wait();
  }

-#define PrintTensorCallback(cpp_type, proto_type)            \
-  do {                                                       \
-    if (tensor.type() == proto_type) {                       \
-      os << "  - dtype: " << proto_type << "\n";             \
-      paddle::framework::print_tensor<cpp_type>(os, tensor); \
-      return os;                                             \
-    }                                                        \
+#define PrintTensorCallback(cpp_type, proto_type)                 \
+  do {                                                            \
+    if (paddle::framework::TransToProtoVarType(tensor.dtype()) == \
+        proto_type) {                                             \
+      os << "  - dtype: " << proto_type << "\n";                  \
+      paddle::framework::print_tensor<cpp_type>(os, tensor);      \
+      return os;                                                  \
+    }                                                             \
  } while (0)

  _ForEachDataType_(PrintTensorCallback);

--- a/paddle/fluid/framework/tensor_util.h
+++ b/paddle/fluid/framework/tensor_util.h
@@ -160,7 +160,7 @@ void TensorFromArray(const T* src, const size_t& array_size,
    Tensor npu_pinned_tensor;
    npu_pinned_tensor.Resize(dst->dims());
    auto npu_pinned_ptr =
-        npu_pinned_tensor.mutable_data(npu_pinned_place, dst->type());
+        npu_pinned_tensor.mutable_data(npu_pinned_place, dst->dtype());
    memory::Copy(npu_pinned_place, npu_pinned_ptr, src_place, src_ptr, size);

    //  2. async copy npu pinned tensor -> npu tensor
@@ -211,7 +211,7 @@ void TensorFromVector(const std::vector<T>& src,
  // so pass nullptr as stream to  memory::Copy().
  else if (platform::is_npu_place(dst_place)) {  // NOLINT
    //  1. vector -> npu pinned tensor
-    Tensor npu_pinned_tensor(dst->type());
+    Tensor npu_pinned_tensor(dst->dtype());
    platform::NPUPinnedPlace npu_pinned_place;
    auto npu_pinned_ptr =
        npu_pinned_tensor.mutable_data<T>(dst->dims(), npu_pinned_place);
@@ -280,7 +280,7 @@ inline void TensorFromVector(const std::vector<bool>& src,
    Tensor npu_pinned_tensor;
    npu_pinned_tensor.Resize(dst->dims());
    auto npu_pinned_ptr =
-        npu_pinned_tensor.mutable_data(npu_pinned_place, dst->type());
+        npu_pinned_tensor.mutable_data(npu_pinned_place, dst->dtype());
    memory::Copy(npu_pinned_place, npu_pinned_ptr, src_place, src_ptr, size);

    //  2. async copy npu pinned tensor -> npu tensor

--- a/paddle/fluid/imperative/all_reduce.cc
+++ b/paddle/fluid/imperative/all_reduce.cc
@@ -15,6 +15,7 @@
 #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)

 #include "paddle/fluid/imperative/all_reduce.h"
+#include "paddle/fluid/framework/convert_utils.h"

 #ifdef PADDLE_WITH_NCCL
 #include <nccl.h>
@@ -62,8 +63,9 @@ static void AllReduce(const framework::Tensor &src, framework::Tensor *dst,

  const void *src_ptr = src.data();
  dst->Resize(src.dims());
-  auto *dst_ptr = dst->mutable_data(src.place(), src.type());
-  auto nccl_dtype = platform::ToNCCLDataType(src.type());
+  auto *dst_ptr = dst->mutable_data(src.place(), src.dtype());
+  auto nccl_dtype =
+      platform::ToNCCLDataType(framework::TransToProtoVarType(src.dtype()));
  PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce(
      src_ptr, dst_ptr, src.numel(), nccl_dtype, ncclSum, comm->comm(),
      stream));
@@ -82,7 +84,7 @@ static void AllReduce(const pten::SelectedRows &src, pten::SelectedRows *dst,
      platform::errors::Unimplemented(
          "Imperative mode does not support multi-CPU training yet."));

-  auto dtype = src_tensor.type();
+  auto dtype = framework::TransToProtoVarType(src_tensor.dtype());
  auto nccl_dtype = platform::ToNCCLDataType(dtype);
  auto *dev_ctx = static_cast<platform::CUDADeviceContext *>(
      platform::DeviceContextPool::Instance().Get(place));
@@ -127,7 +129,7 @@ static void AllReduce(const pten::SelectedRows &src, pten::SelectedRows *dst,
  dims[0] = rows_num;
  auto feature_size = framework::product(dims) / dims[0];
  dst_tensor->Resize(dims);
-  auto *dst_tensor_ptr = dst_tensor->mutable_data(place, dtype);
+  auto *dst_tensor_ptr = dst_tensor->mutable_data(place, src_tensor.dtype());
  const auto *src_tensor_ptr = src_tensor.data();

  auto sizeof_dtype = framework::SizeOfType(dtype);

--- a/paddle/fluid/imperative/basic_engine.cc
+++ b/paddle/fluid/imperative/basic_engine.cc
@@ -24,6 +24,7 @@
 #include <utility>
 #include <vector>

+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/imperative/gradient_accumulator.h"
 #include "paddle/fluid/imperative/layer.h"
 #include "paddle/fluid/imperative/op_base.h"
@@ -152,7 +153,8 @@ void BasicEngine::CheckBackwardInputs(const OpBase& op) {
        // correct. var->DataType() returns the default dtype, which is float32.
        // Here, we use the type of the corresponding forward datatype.

-        tensor->mutable_data(op.place(), var->ForwardDataType());
+        tensor->mutable_data(
+            op.place(), framework::TransToPtenDataType(var->ForwardDataType()));
        VLOG(6) << "Set ungenerated Grad: " << var->Name()
                << " as zero with dtype "
                << framework::DataTypeToString(var->ForwardDataType());

--- a/paddle/fluid/imperative/bkcl_context.cc
+++ b/paddle/fluid/imperative/bkcl_context.cc
@@ -13,13 +13,14 @@
 // limitations under the License.

 #if defined(PADDLE_WITH_XPU_BKCL)
-#include "paddle/fluid/imperative/bkcl_context.h"

 #include <string>
 #include <utility>
 #include <vector>

+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/variable.h"
+#include "paddle/fluid/imperative/bkcl_context.h"
 #include "paddle/fluid/platform/collective_helper.h"
 #include "paddle/fluid/platform/device/xpu/bkcl_helper.h"
 #include "paddle/fluid/platform/device_context.h"
@@ -41,8 +42,9 @@ static void AllReduce(const framework::Tensor &src, framework::Tensor *dst,

  const void *src_ptr = src.data();
  dst->Resize(src.dims());
-  auto *dst_ptr = dst->mutable_data(src.place(), src.type());
-  auto bkcl_dtype = platform::ToBKCLDataType(src.type());
+  auto *dst_ptr = dst->mutable_data(src.place(), src.dtype());
+  auto bkcl_dtype =
+      platform::ToBKCLDataType(framework::TransToProtoVarType(src.dtype()));

  PADDLE_ENFORCE_EQ(bkcl_all_reduce(comm->comm(), src_ptr, dst_ptr, src.numel(),
                                    bkcl_dtype, BKCL_ADD, stream),
@@ -159,7 +161,8 @@ void BKCLParallelContext::Broadcast(framework::Variable *src, int ring_id) {
  XPUStream stream = comm->stream();

  void *src_ptr = src_tensor->data();
-  auto data_type = platform::ToBKCLDataType(src_tensor->type());
+  auto data_type = platform::ToBKCLDataType(
+      framework::TransToProtoVarType(src_tensor->dtype()));

  PADDLE_ENFORCE_EQ(bkcl_broadcast(comm->comm(), src_ptr, src_ptr,
                                   src_tensor->numel(), data_type, 0, stream),

--- a/paddle/fluid/imperative/gloo_context.cc
+++ b/paddle/fluid/imperative/gloo_context.cc
@@ -13,6 +13,7 @@
 // limitations under the License.

 #include "paddle/fluid/imperative/gloo_context.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/fleet/gloo_wrapper.h"
 #include "paddle/fluid/framework/tensor_util.h"
 #include "paddle/fluid/platform/device_context.h"
@@ -109,7 +110,7 @@ void GLOOParallelContext::AllReduce(const framework::Tensor &src_tensor,
                                    framework::Tensor *dst_tensor) {
  auto gloo_wrapper = framework::GlooWrapper::GetInstance();
  dst_tensor->Resize(src_tensor.dims());
-  switch (src_tensor.type()) {
+  switch (framework::TransToProtoVarType(src_tensor.dtype())) {
    GLOO_CASE(framework::proto::VarType::FP32, float, gloo_wrapper);
    GLOO_CASE(framework::proto::VarType::FP64, double, gloo_wrapper);
    GLOO_CASE(framework::proto::VarType::INT32, int, gloo_wrapper);
@@ -139,7 +140,7 @@ void GLOOParallelContext::AllReduce(const pten::SelectedRows &src,
  VLOG(3) << "SelectedRows AllReduce start";
  const auto &src_tensor = src.value();
  const auto &place = src_tensor.place();
-  auto dtype = src_tensor.type();
+  auto dtype = framework::TransToProtoVarType(src_tensor.dtype());
  // 1. Gather rows number from all workers. Here use ncclAllGather to do this,
  // but we can use other ways to implement is in the future
  const auto &src_rows = src.rows();
@@ -169,7 +170,7 @@ void GLOOParallelContext::AllReduce(const pten::SelectedRows &src,
  std::for_each(element_nums.begin(), element_nums.end(),
                [feature_size](size_t &x) { x = x * feature_size; });

-  auto *dst_tensor_ptr = dst_tensor->mutable_data(place, dtype);
+  auto *dst_tensor_ptr = dst_tensor->mutable_data(place, src_tensor.dtype());
  gloo_wrapper->AllGatherVector<int64_t>(const_cast<int64_t *>(src_rows_ptr),
                                         static_cast<int64_t *>(dst_rows_ptr),
                                         rows_num_vector);

--- a/paddle/fluid/imperative/gradient_accumulator.cc
+++ b/paddle/fluid/imperative/gradient_accumulator.cc
@@ -18,6 +18,7 @@
 #include <memory>
 #include <utility>

+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/framework/selected_rows_utils.h"
 #include "paddle/fluid/imperative/layer.h"
@@ -263,10 +264,11 @@ void TensorAdd(const VarType& src, VarType* dst) {
          "%zu and the number of elements of destination tensor is %zu.",
          numel, dst_tensor->numel()));

-  auto data_type = src_tensor.type();
+  auto data_type = framework::TransToProtoVarType(src_tensor.dtype());
  auto place = src_tensor.place();

-  PADDLE_ENFORCE_EQ(dst_tensor->type(), data_type,
+  PADDLE_ENFORCE_EQ(framework::TransToProtoVarType(dst_tensor->dtype()),
+                    data_type,
                    platform::errors::PreconditionNotMet(
                        "The data type of source tensor and destination tensor "
                        "should be equal, Otherwise, the calculation results "
@@ -376,7 +378,8 @@ void SelectedRowsAddToTensor(const VarType& src, VarType* dst) {
  const pten::SelectedRows& src_selected_rows =
      GetInnerTensor<pten::SelectedRows>(src);
  auto place = dst_tensor->place();
-  auto data_type = src_selected_rows.value().type();
+  auto data_type =
+      framework::TransToProtoVarType(src_selected_rows.value().dtype());
  platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();

 #define PADDLE_SELECTED_ROWS_ADD_TO_TENSOR(dev_ctx_type, cpp_type)           \
@@ -422,13 +425,14 @@ void SelectedRowsAddTensor(const VarType& src_selected_rows_var,
  const pten::DenseTensor& src_tensor =
      GetInnerTensor<pten::DenseTensor>(src_tensor_var);
  const auto& place = src_tensor.place();
-  auto data_type = src_tensor.type();
+  auto data_type = framework::TransToProtoVarType(src_tensor.dtype());
  auto* dev_ctx = platform::DeviceContextPool::Instance().Get(place);

  pten::DenseTensor* dst_tensor =
      GetInnerMutableTensor<pten::DenseTensor>(dst_tensor_var);
  dst_tensor->Resize(src_tensor.dims());
-  dst_tensor->mutable_data(place, data_type);
+  dst_tensor->mutable_data(place, src_tensor.dtype());
+
 #define PADDLE_SELECTED_ROWS_ADD_TENSOR(dev_ctx_type, cpp_type)            \
  if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) {       \
    paddle::operators::math::SelectedRowsAddTensor<dev_ctx_type, cpp_type> \
@@ -477,7 +481,8 @@ std::shared_ptr<VariableWrapper> SelectedRowsMerge(
  auto& src_selected_rows1 = src1.Get<pten::SelectedRows>();
  auto& src_selected_rows2 = src2.Get<pten::SelectedRows>();
  auto place = src_selected_rows1.value().place();
-  auto data_type = src_selected_rows1.value().type();
+  auto data_type =
+      framework::TransToProtoVarType(src_selected_rows1.value().dtype());
  platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();

  std::vector<const pten::SelectedRows*> src_selected_rows;
@@ -702,12 +707,14 @@ void EagerGradientAccumulator::SumGrad(std::shared_ptr<VariableWrapper> var,
        VLOG(6) << "Dims of " << dst_var->Name() << " is set as: "
                << var->Var().Get<framework::LoDTensor>().dims();
        tensor->Resize(var->Var().Get<framework::LoDTensor>().dims());
-        tensor->mutable_data(place, var->DataType());
+        tensor->mutable_data(place,
+                             framework::TransToPtenDataType(var->DataType()));
        pten::funcs::set_constant(*dev_ctx, tensor, 0.0);
      } else {
        auto* tensor =
            dst_var->MutableVar()->GetMutable<framework::LoDTensor>();
-        tensor->mutable_data(place, var->DataType());
+        tensor->mutable_data(place,
+                             framework::TransToPtenDataType(var->DataType()));
        pten::funcs::set_constant(*dev_ctx, tensor, 0.0);
      }
    }
@@ -834,12 +841,14 @@ void SortedGradientAccumulator::SumGrad(std::shared_ptr<VariableWrapper> var,
        VLOG(6) << "Dims of " << dst_var->Name() << " is set as: "
                << var->Var().Get<framework::LoDTensor>().dims();
        tensor->Resize(var->Var().Get<framework::LoDTensor>().dims());
-        tensor->mutable_data(place, var->DataType());
+        tensor->mutable_data(place,
+                             framework::TransToPtenDataType(var->DataType()));
        pten::funcs::set_constant(*dev_ctx, tensor, 0.0);
      } else {
        auto* tensor =
            dst_var->MutableVar()->GetMutable<framework::LoDTensor>();
-        tensor->mutable_data(place, var->DataType());
+        tensor->mutable_data(place,
+                             framework::TransToPtenDataType(var->DataType()));
        pten::funcs::set_constant(*dev_ctx, tensor, 0.0);
      }
    }

--- a/paddle/fluid/imperative/hccl_context.cc
+++ b/paddle/fluid/imperative/hccl_context.cc
@@ -13,6 +13,7 @@
 // limitations under the License.

 #include "paddle/fluid/imperative/hccl_context.h"
+#include "paddle/fluid/framework/convert_utils.h"

 #include "paddle/fluid/framework/scope.h"
 #include "paddle/fluid/framework/variable.h"
@@ -44,8 +45,9 @@ static void AllReduce(const framework::Tensor &src, framework::Tensor *dst,

  void *src_ptr = const_cast<void *>(src.data());
  dst->Resize(src.dims());
-  void *dst_ptr = dst->mutable_data(src.place(), src.type());
-  HcclDataType hccl_dtype = platform::ToHCCLDataType(src.type());
+  void *dst_ptr = dst->mutable_data(src.place(), src.dtype());
+  HcclDataType hccl_dtype =
+      platform::ToHCCLDataType(framework::TransToProtoVarType(src.dtype()));

  PADDLE_ENFORCE_NPU_SUCCESS(platform::dynload::HcclAllReduce(
      src_ptr, dst_ptr, src.numel(), hccl_dtype, HCCL_REDUCE_SUM, comm->comm(),
@@ -169,7 +171,8 @@ void HCCLParallelContext::Broadcast(framework::Variable *src, int ring_id) {

    void *src_ptr =
        reinterpret_cast<void *>(const_cast<void *>(src_tensor->data()));
-    auto hccl_dtype = platform::ToHCCLDataType(src_tensor->type());
+    auto hccl_dtype = platform::ToHCCLDataType(
+        framework::TransToProtoVarType(src_tensor->dtype()));
    PADDLE_ENFORCE_NPU_SUCCESS(platform::dynload::HcclBroadcast(
        src_ptr, src_tensor->numel(), hccl_dtype, 0, comm->comm(),
        reinterpret_cast<void *>(stream)));

--- a/paddle/fluid/imperative/jit/program_desc_tracer.cc
+++ b/paddle/fluid/imperative/jit/program_desc_tracer.cc
@@ -13,6 +13,7 @@
 // limitations under the License.

 #include "paddle/fluid/imperative/jit/program_desc_tracer.h"
+#include "paddle/fluid/framework/convert_utils.h"

 namespace paddle {
 namespace imperative {
@@ -253,7 +254,7 @@ void ProgramDescTracer::InsertVarIfNotExist(
    new_var_desc->SetShape(framework::vectorize<int64_t>(tensor.dims()));
    new_var_desc->SetLoDLevel(tensor.lod().size());
    if (tensor.IsInitialized()) {
-      new_var_desc->SetDataType(tensor.type());
+      new_var_desc->SetDataType(framework::TransToProtoVarType(tensor.dtype()));
    } else {
      new_var_desc->SetDataType(framework::proto::VarType::FP32);
    }

--- a/paddle/fluid/imperative/layer.cc
+++ b/paddle/fluid/imperative/layer.cc
@@ -15,6 +15,8 @@
 #include "paddle/fluid/imperative/layer.h"

 #include "paddle/fluid/eager/eager_tensor.h"
+#include "paddle/fluid/framework/convert_utils.h"
+
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/imperative/infer_var_type_context.h"
 #include "paddle/fluid/imperative/op_base.h"
@@ -99,7 +101,9 @@ static std::string DebugString(
      auto& tensor = var.Get<framework::LoDTensor>();
      ss << "LoDTensor<";
      if (tensor.IsInitialized()) {
-        ss << framework::DataTypeToString(tensor.type()) << ", ";
+        ss << framework::DataTypeToString(
+                  framework::TransToProtoVarType(tensor.dtype()))
+           << ", ";
        ss << tensor.place() << ", ";
        ss << "(" << tensor.dims() << ")";
      } else {
@@ -112,7 +116,9 @@ static std::string DebugString(
      auto& tensor = selected_rows.value();
      auto& rows = selected_rows.rows();
      if (tensor.IsInitialized()) {
-        ss << framework::DataTypeToString(tensor.type()) << ", ";
+        ss << framework::DataTypeToString(
+                  framework::TransToProtoVarType(tensor.dtype()))
+           << ", ";
        ss << tensor.place() << ", ";
        ss << "height(" << selected_rows.height() << "), rows(";
        std::for_each(rows.cbegin(), rows.cend(),

--- a/paddle/fluid/imperative/nccl_context.cc
+++ b/paddle/fluid/imperative/nccl_context.cc
@@ -25,6 +25,7 @@
 #include "paddle/fluid/platform/dynload/nccl.h"
 #endif

+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/scope.h"
 #include "paddle/fluid/framework/variable.h"
 #include "paddle/fluid/platform/device/gpu/nccl_helper.h"
@@ -143,7 +144,8 @@ void NCCLParallelContext::Broadcast(framework::Variable *src, int ring_id) {
  gpuStream_t stream = comm->stream();

  void *src_ptr = src_tensor->data();
-  auto nccl_dtype = platform::ToNCCLDataType(src_tensor->type());
+  auto nccl_dtype = platform::ToNCCLDataType(
+      framework::TransToProtoVarType(src_tensor->dtype()));
  PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclBcast(
      src_ptr, src_tensor->numel(), nccl_dtype, 0, comm->comm(), stream));
 }

--- a/paddle/fluid/imperative/partial_grad_engine.cc
+++ b/paddle/fluid/imperative/partial_grad_engine.cc
@@ -24,6 +24,7 @@
 #include <unordered_set>
 #include <utility>
 #include <vector>
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/imperative/gradient_accumulator.h"
 #include "paddle/fluid/imperative/layer.h"
 #include "paddle/fluid/imperative/op_base.h"
@@ -312,9 +313,11 @@ static void FillConstantLike(const VariableWrapper &ref_var,
  // we can't get data_type_ directly. We need to check if we can only use
  // default data_type for now.
  if (ref_var.ForwardDataType() != -1) {
-    dst_tensor->mutable_data(place, ref_var.ForwardDataType());
+    dst_tensor->mutable_data(
+        place, framework::TransToPtenDataType(ref_var.ForwardDataType()));
  } else {
-    dst_tensor->mutable_data(place, ref_var.DataType());
+    dst_tensor->mutable_data(
+        place, framework::TransToPtenDataType(ref_var.DataType()));
  }
  pten::funcs::set_constant(*dev_ctx, dst_tensor, value);
 }
@@ -739,7 +742,8 @@ PartialGradTask::PartialGradTask(
          platform::errors::InvalidArgument(
              "The %d-th grad_output's shape does not match the %d-th output",
              i, i));
-      PADDLE_ENFORCE_EQ(grad_tensor.type(), out_tensor.type(),
+      PADDLE_ENFORCE_EQ(framework::TransToProtoVarType(grad_tensor.dtype()),
+                        framework::TransToProtoVarType(out_tensor.dtype()),
                        platform::errors::InvalidArgument(
                            "The %d-th grad_output's data type does not "
                            "match the %d-th output",

--- a/paddle/fluid/imperative/prepared_operator.h
+++ b/paddle/fluid/imperative/prepared_operator.h
@@ -29,6 +29,7 @@
 #include "paddle/fluid/imperative/type_defs.h"
 #include "paddle/fluid/imperative/var_helper.h"

+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/pten/core/dense_tensor.h"
 #include "paddle/pten/core/selected_rows.h"

@@ -425,7 +426,7 @@ void BuildDygraphPtenKernelContext(
        kernel_ctx->EmplaceBackAttr(BOOST_GET_CONST(std::string, attr));
      } else if (attr_defs[i].type_index ==
                 std::type_index(typeid(pten::DataType))) {
-        auto data_type = pten::TransToPtenDataType(
+        auto data_type = framework::TransToPtenDataType(
            static_cast<framework::proto::VarType::Type>(
                BOOST_GET_CONST(int, attr)));
        kernel_ctx->EmplaceBackAttr(data_type);

--- a/paddle/fluid/imperative/reducer.cc
+++ b/paddle/fluid/imperative/reducer.cc
@@ -446,7 +446,7 @@ void Reducer::InitializeGroups(
      InitializeDenseGroups(variable_indices_, &group);
      auto tensor = group.dense_contents_.GetMutable<framework::LoDTensor>();
      tensor->Resize(framework::make_ddim({group.all_length_}))
-          .mutable_data(place_, group.dtype_);
+          .mutable_data(place_, framework::TransToPtenDataType(group.dtype_));
    }

    // map variables to this group by VariableLocator
@@ -737,7 +737,8 @@ void Reducer::MarkVarReady(const size_t var_index, const bool is_used_var) {
      // by avoiding tensor construction
      if (!group_tensor.IsInitialized()) {
        group_tensor.Resize({static_cast<int64_t>(length)});
-        group_tensor.mutable_data(place_, group.dtype_);
+        group_tensor.mutable_data(place_,
+                                  framework::TransToPtenDataType(group.dtype_));
      }

 #ifdef PADDLE_WITH_XPU_BKCL

--- a/paddle/fluid/imperative/tests/test_gradient_accmulator.cc
+++ b/paddle/fluid/imperative/tests/test_gradient_accmulator.cc
@@ -17,6 +17,7 @@
 #include <vector>

 #include "gtest/gtest.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/variable.h"
 #include "paddle/fluid/imperative/gradient_accumulator.h"
 #include "paddle/fluid/memory/memcpy.h"
@@ -224,8 +225,10 @@ static bool IsEqualVar(const framework::Variable& var1,

  auto* t1_p = t1.data();
  auto* t2_p = t2.data();
-  return std::memcmp(t1_p, t2_p,
-                     t1.numel() * framework::SizeOfType(t1.type())) == 0;
+  return std::memcmp(
+             t1_p, t2_p,
+             t1.numel() * framework::SizeOfType(
+                              framework::TransToProtoVarType(t1.dtype()))) == 0;
 }

 template <typename T>

--- a/paddle/fluid/imperative/tests/test_group.cc
+++ b/paddle/fluid/imperative/tests/test_group.cc
@@ -86,7 +86,7 @@ void GroupConcatSplit(Place place, size_t size) {
    tmp.ShareDataWith(*tensor).Resize({static_cast<int64_t>(len)});
    group.dense_tensors_.push_back(std::move(tmp));
    group.all_length_ += len;
-    group.dtype_ = tensor->type();
+    group.dtype_ = framework::TransToProtoVarType(tensor->dtype());
  }

  paddle::platform::DeviceContextPool& pool =
@@ -96,7 +96,7 @@ void GroupConcatSplit(Place place, size_t size) {
  {  // concat
    auto* tensor = group.dense_contents_.GetMutable<framework::LoDTensor>();
    tensor->Resize(framework::make_ddim({group.all_length_}))
-        .mutable_data(place, group.dtype_);
+        .mutable_data(place, framework::TransToPtenDataType(group.dtype_));
    group.ConcatTensors(*dev_ctx);

    group.DivNRanks(*dev_ctx, 1);

--- a/paddle/fluid/imperative/var_helper.cc
+++ b/paddle/fluid/imperative/var_helper.cc
@@ -15,6 +15,7 @@
 #include "paddle/fluid/imperative/var_helper.h"

 #include "paddle/fluid/eager/eager_tensor.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/feed_fetch_type.h"
 #include "paddle/fluid/framework/lod_rank_table.h"
 #include "paddle/fluid/framework/lod_tensor.h"
@@ -170,9 +171,11 @@ template <>
 framework::proto::VarType::Type GetDataType<egr::EagerTensor>(
    std::shared_ptr<egr::EagerTensor> var) {
  if (var->Var().IsType<pten::SelectedRows>()) {
-    return var->Var().Get<pten::SelectedRows>().value().type();
+    return framework::TransToProtoVarType(
+        var->Var().Get<pten::SelectedRows>().value().type());
  } else if (var->Var().IsType<framework::LoDTensor>()) {
-    return var->Var().Get<framework::LoDTensor>().type();
+    return framework::TransToProtoVarType(
+        var->Var().Get<framework::LoDTensor>().type());
  } else {
    PADDLE_THROW(paddle::platform::errors::PermissionDenied(
        "We only support pten::SelectedRows and framework::LoDTensor in "

--- a/paddle/fluid/imperative/variable_wrapper.h
+++ b/paddle/fluid/imperative/variable_wrapper.h
@@ -19,6 +19,7 @@
 #include <string>
 #include <utility>

+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/op_kernel_type.h"
 #include "paddle/fluid/framework/string_array.h"
 #include "paddle/fluid/framework/variable.h"
@@ -169,7 +170,7 @@ class VariableWrapper {
      }
    }
    if (tensor && tensor->IsInitialized()) {
-      return tensor->type();
+      return framework::TransToProtoVarType(tensor->dtype());
    } else {
      VLOG(6) << "The tensor of variable " << name_ << " is not initialized";


--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -551,7 +551,7 @@ bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
    framework::FetchType &fetch_var =
        framework::GetFetchVariable(*scope, "fetch", idx);
    auto &fetch = BOOST_GET(framework::LoDTensor, fetch_var);
-    auto type = fetch.type();
+    auto type = framework::TransToProtoVarType(fetch.dtype());
    auto output = &(outputs->at(i));
    output->name = fetches_[idx]->Input("X")[0];
    if (type == framework::proto::VarType::FP32) {

--- a/paddle/fluid/inference/api/api_impl.cc
+++ b/paddle/fluid/inference/api/api_impl.cc
@@ -327,7 +327,7 @@ bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
    framework::FetchType &fetch_var =
        framework::GetFetchVariable(*scope, "fetch", idx);
    auto fetch = BOOST_GET_CONST(framework::LoDTensor, fetch_var);
-    auto type = fetch.type();
+    auto type = framework::TransToProtoVarType(fetch.dtype());
    auto output = &(outputs->at(i));
    output->name = fetchs_[idx]->Input("X")[0];
    if (type == framework::DataTypeTrait<float>::DataType()) {

--- a/paddle/fluid/inference/api/api_impl_tester.cc
+++ b/paddle/fluid/inference/api/api_impl_tester.cc
@@ -18,6 +18,7 @@ limitations under the License. */
 #include <thread>  // NOLINT

 #include "gflags/gflags.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/inference/api/api_impl.h"
 #include "paddle/fluid/inference/tests/test_helper.h"

@@ -36,13 +37,16 @@ namespace paddle {
 PaddleTensor LodTensorToPaddleTensor(framework::LoDTensor* t) {
  PaddleTensor pt;

-  if (t->type() == framework::proto::VarType::INT64) {
+  if (framework::TransToProtoVarType(t->dtype()) ==
+      framework::proto::VarType::INT64) {
    pt.data.Reset(t->data(), t->numel() * sizeof(int64_t));
    pt.dtype = PaddleDType::INT64;
-  } else if (t->type() == framework::proto::VarType::FP32) {
+  } else if (framework::TransToProtoVarType(t->dtype()) ==
+             framework::proto::VarType::FP32) {
    pt.data.Reset(t->data(), t->numel() * sizeof(float));
    pt.dtype = PaddleDType::FLOAT32;
-  } else if (t->type() == framework::proto::VarType::INT32) {
+  } else if (framework::TransToProtoVarType(t->dtype()) ==
+             framework::proto::VarType::INT32) {
    pt.data.Reset(t->data(), t->numel() * sizeof(int32_t));
    pt.dtype = PaddleDType::INT32;
  } else {

--- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc
+++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/data_layout_transform.h"
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/framework/scope.h"
@@ -122,7 +123,7 @@ T *Tensor::data(PlaceType *place, int *size) const {

 DataType Tensor::type() const {
  EAGER_GET_TENSOR(paddle::framework::LoDTensor);
-  auto type = tensor->type();
+  auto type = paddle::framework::TransToProtoVarType(tensor->dtype());
  if (type == paddle::framework::proto::VarType::FP32) {
    return DataType::FLOAT32;
  } else if (type == paddle::framework::proto::VarType::FP16) {

--- a/paddle/fluid/inference/capi/pd_predictor.cc
+++ b/paddle/fluid/inference/capi/pd_predictor.cc
@@ -172,7 +172,8 @@ bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config,
    snprintf(output_i.name, output_names[i].length() + 1, "%s",
             output_names[i].c_str());
    auto output_t = predictor->GetOutputTensor(output_names[i]);
-    output_i.dtype = ConvertToPDDataType(output_t->type());
+    output_i.dtype =
+        ConvertToPDDataType(framework::TransToProtoVarType(output_t->dtype()));
    std::vector<int> output_shape = output_t->shape();
    output_i.shape = new int[output_shape.size()];
    memmove(output_i.shape, output_shape.data(),
@@ -256,7 +257,8 @@ void PD_SetZeroCopyInput(PD_Predictor* predictor,

 void PD_GetZeroCopyOutput(PD_Predictor* predictor, PD_ZeroCopyTensor* tensor) {
  auto output = predictor->predictor->GetOutputTensor(tensor->name);
-  tensor->dtype = ConvertToPDDataType(output->type());
+  tensor->dtype =
+      ConvertToPDDataType(framework::TransToProtoVarType(output->dtype()));
  auto shape = output->shape();
  size_t shape_size = shape.size();
  if (tensor->shape.capacity < shape_size * sizeof(int)) {
@@ -271,7 +273,8 @@ void PD_GetZeroCopyOutput(PD_Predictor* predictor, PD_ZeroCopyTensor* tensor) {

  int n =
      std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
-  size_t length = n * paddle::PaddleDtypeSize(output->type());
+  size_t length = n * paddle::PaddleDtypeSize(
+                          framework::TransToProtoVarType(output->dtype()));
  if (tensor->data.capacity < length) {
    if (tensor->data.data) {
      std::free(tensor->data.data);

--- a/paddle/fluid/inference/lite/tensor_utils.cc
+++ b/paddle/fluid/inference/lite/tensor_utils.cc
@@ -16,6 +16,7 @@
 #include <functional>
 #include <map>
 #include <memory>
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/data_type.h"
 #include "paddle/fluid/inference/lite/engine.h"
 #include "paddle/fluid/memory/allocation/allocator.h"
@@ -185,9 +186,11 @@ void InitDstTensor(paddle::lite_api::Tensor* dst,
  // the input tensor.
  constexpr int empty_size = 0;
  dst->Resize({empty_size});
-  GetLiteTensorDataPtr(dst, GetLitePrecisionType(src.type()),
-                       GetLiteTargetType(src.place()));
-  dst->SetPrecision(GetLitePrecisionType(src.type()));
+  GetLiteTensorDataPtr(
+      dst, GetLitePrecisionType(framework::TransToProtoVarType(src.dtype())),
+      GetLiteTargetType(src.place()));
+  dst->SetPrecision(
+      GetLitePrecisionType(framework::TransToProtoVarType(src.dtype())));
  paddle::lite::LoD lite_lod;
  SetLoD(&lite_lod, src.lod());
  dst->SetLoD(lite_lod);
@@ -195,8 +198,9 @@ void InitDstTensor(paddle::lite_api::Tensor* dst,

 void InitDstTensor(framework::LoDTensor* dst,
                   const paddle::lite_api::Tensor& src) {
-  dst->mutable_data(inference::lite::utils::GetNativePlace(src.target()),
-                    GetNativePrecisionType(src.precision()));
+  dst->mutable_data(
+      inference::lite::utils::GetNativePlace(src.target()),
+      framework::TransToPtenDataType(GetNativePrecisionType(src.precision())));
  SetLoD(dst->mutable_lod(), src.lod());
 }

@@ -208,14 +212,16 @@ void TensorCopyAsync(paddle::lite_api::Tensor* dst,
  const platform::Place& src_place = src.place();
  const platform::Place& dst_place = GetNativePlace(dst->target());
  const size_t bytes =
-      static_cast<size_t>(src.numel()) * framework::SizeOfType(src.type());
+      static_cast<size_t>(src.numel()) * framework::DataTypeSize(src.dtype());
  dst->Resize(framework::vectorize(src.dims()));
  const void* src_data = src.data();
  void* dst_data{nullptr};
-  dst_data = GetLiteTensorDataPtr(dst, GetLitePrecisionType(src.type()),
-                                  GetLiteTargetType(src.place()));
+  dst_data = GetLiteTensorDataPtr(
+      dst, GetLitePrecisionType(framework::TransToProtoVarType(src.dtype())),
+      GetLiteTargetType(src.place()));
  VLOG(3) << "[CopyAsync fluid -> lite] Bytes = " << bytes << ", src = " << &src
-          << ", dst = " << dst << ", src_type = " << src.type();
+          << ", dst = " << dst
+          << ", src_type = " << framework::TransToProtoVarType(src.dtype());
  MemoryCopyAsync(dst_place, dst_data, src_place, src_data, bytes, ctx);
  VLOG(3) << "[Lite memory size] Bytes = " << bytes;
 }
@@ -229,12 +235,13 @@ void TensorCopyAsync(framework::LoDTensor* dst,
  const platform::Place& src_place = GetNativePlace(src.target());
  const platform::Place& dst_place = dst->place();
  int64_t src_numel = GetLiteTensorNumel(src);
-  const size_t bytes = src_numel * framework::SizeOfType(dst->type());
+  const size_t bytes = src_numel * framework::DataTypeSize(dst->dtype());
  const void* src_data = src.data<void>();
  // When Lite is ready, the source type needs to be modified here.
-  void* dst_data = dst->mutable_data(dst_place, dst->type());
+  void* dst_data = dst->mutable_data(dst_place, dst->dtype());
  VLOG(3) << "[CopyAsync lite -> fluid] Bytes = " << bytes << ", src = " << &src
-          << ", dst = " << dst << ", src_type = " << dst->type();
+          << ", dst = " << dst
+          << ", src_type = " << framework::TransToProtoVarType(dst->dtype());
  MemoryCopyAsync(dst_place, dst_data, src_place, src_data, bytes, ctx);
  VLOG(3) << "[Lite memory size] Bytes = " << bytes;
 }
@@ -244,7 +251,8 @@ void TensorDataShare(paddle::lite_api::Tensor* dst, framework::LoDTensor* src) {
  dst->Resize(framework::vectorize(src->dims()));
  dst->ShareExternalMemory(src->data(), src->memory_size(),
                           GetLiteTargetType(src->place()));
-  dst->SetPrecision(GetLitePrecisionType(src->type()));
+  dst->SetPrecision(
+      GetLitePrecisionType(framework::TransToProtoVarType(src->dtype())));
  paddle::lite::LoD lite_lod;
  SetLoD(&lite_lod, src->lod());
  dst->SetLoD(lite_lod);
@@ -261,7 +269,9 @@ void TensorDataShare(framework::LoDTensor* dst, paddle::lite_api::Tensor* src) {
      src_raw_data, memory_size, GetNativePlace(src->target())));
  dst->Resize(paddle::framework::make_ddim(src->shape()));
  SetLoD(dst->mutable_lod(), src->lod());
-  dst->ResetHolderWithType(holder, GetNativePrecisionType(src->precision()));
+  dst->ResetHolderWithType(
+      holder,
+      framework::TransToPtenDataType(GetNativePrecisionType(src->precision())));
 }

 }  // namespace utils

--- a/paddle/fluid/inference/tests/api/tester_helper.h
+++ b/paddle/fluid/inference/tests/api/tester_helper.h
@@ -1020,7 +1020,7 @@ static bool CompareTensorData(const framework::LoDTensor &a,
  }

  for (size_t i = 0; i < a_size; i++) {
-    if (a.type() == VarType::FP32) {
+    if (framework::TransToProtoVarType(a.dtype()) == VarType::FP32) {
      const auto *a_data = a.data<float>();
      const auto *b_data = b.data<float>();
      if (std::abs(a_data[i] - b_data[i]) > 1e-3) {
@@ -1029,7 +1029,7 @@ static bool CompareTensorData(const framework::LoDTensor &a,
            b_data[i]);
        return false;
      }
-    } else if (a.type() == VarType::INT64) {
+    } else if (framework::TransToProtoVarType(a.dtype()) == VarType::INT64) {
      const auto *a_data = a.data<int64_t>();
      const auto *b_data = b.data<int64_t>();
      if (std::abs(a_data[i] - b_data[i]) > 1e-3) {

--- a/paddle/fluid/operators/abs_op.cc
+++ b/paddle/fluid/operators/abs_op.cc
@@ -140,8 +140,9 @@ class AbsDoubleGradOp : public framework::OperatorWithKernel {
  framework::OpKernelType GetKernelTypeForVar(
      const std::string& var_name, const framework::Tensor& tensor,
      const framework::OpKernelType& expected_kernel_type) const {
-    return framework::OpKernelType(tensor.type(), tensor.place(),
-                                   tensor.layout());
+    return framework::OpKernelType(
+        framework::TransToProtoVarType(tensor.dtype()), tensor.place(),
+        tensor.layout());
  }
 };


--- a/paddle/fluid/operators/activation_op_mlu.cc
+++ b/paddle/fluid/operators/activation_op_mlu.cc
@@ -38,10 +38,12 @@ class ActivationMLUKernel : public framework::OpKernel<T> {
    output->mutable_data<T>(ctx.GetPlace());

    MLUCnnlActivationDesc act_desc(act_mode, alpha);
-    MLUCnnlTensorDesc input_desc(*input, CNNL_LAYOUT_ARRAY,
-                                 ToCnnlDataType(input->type()));
-    MLUCnnlTensorDesc output_desc(*output, CNNL_LAYOUT_ARRAY,
-                                  ToCnnlDataType(output->type()));
+    MLUCnnlTensorDesc input_desc(
+        *input, CNNL_LAYOUT_ARRAY,
+        ToCnnlDataType(framework::TransToProtoVarType(input->dtype())));
+    MLUCnnlTensorDesc output_desc(
+        *output, CNNL_LAYOUT_ARRAY,
+        ToCnnlDataType(framework::TransToProtoVarType(output->dtype())));

    MLUCnnl::Active(ctx, act_desc.get(), input_desc.get(),
                    reinterpret_cast<const void*>(input->data<T>()),
@@ -61,12 +63,15 @@ class ActivationGradMLUKernel : public framework::OpKernel<T> {

    dx->mutable_data<T>(ctx.GetPlace());

-    MLUCnnlTensorDesc dout_desc(*dout, CNNL_LAYOUT_ARRAY,
-                                ToCnnlDataType(dout->type()));
-    MLUCnnlTensorDesc out_desc(*out, CNNL_LAYOUT_ARRAY,
-                               ToCnnlDataType(out->type()));
-    MLUCnnlTensorDesc dx_desc(*dx, CNNL_LAYOUT_ARRAY,
-                              ToCnnlDataType(dx->type()));
+    MLUCnnlTensorDesc dout_desc(
+        *dout, CNNL_LAYOUT_ARRAY,
+        ToCnnlDataType(framework::TransToProtoVarType(dout->dtype())));
+    MLUCnnlTensorDesc out_desc(
+        *out, CNNL_LAYOUT_ARRAY,
+        ToCnnlDataType(framework::TransToProtoVarType(out->dtype())));
+    MLUCnnlTensorDesc dx_desc(
+        *dx, CNNL_LAYOUT_ARRAY,
+        ToCnnlDataType(framework::TransToProtoVarType(dx->dtype())));
    MLUCnnlActivationDesc act_desc(act_mode, alpha);
    MLUCnnl::ActiveGrad(
        ctx, act_desc.get(), nullptr, nullptr, nullptr, nullptr,

--- a/paddle/fluid/operators/activation_op_npu.cc
+++ b/paddle/fluid/operators/activation_op_npu.cc
@@ -76,13 +76,13 @@ class PowGradNPUKernel : public framework::OpKernel<T> {
    // Step 2: Construct a broadcast factor, which has the same shape with x.

    // 2.1 Get a factor tensor with shape [1].
-    Tensor factor_tensor(framework::proto::VarType::FP32);
+    Tensor factor_tensor(experimental::DataType::FLOAT32);
    factor_tensor.mutable_data<float>({1}, place);
    FillNpuTensorWithConstant<float>(&factor_tensor, factor);

    // 2.2 Get the factor which has the shape with x and the same value with
    // factor.
-    Tensor factor_bc_tensor(framework::proto::VarType::FP32);
+    Tensor factor_bc_tensor(experimental::DataType::FLOAT32);
    factor_bc_tensor.mutable_data<float>(x_dims, place);
    const auto& runner_bc =
        NpuOpRunner("FillD", {factor_tensor}, {factor_bc_tensor},
@@ -659,14 +659,15 @@ class HardSwishGradNPUKernel : public framework::OpKernel<T> {
                    {{"dims", framework::vectorize(x->dims())}});
    runner_fill.Run(stream);

-    Tensor tmp_bool(framework::proto::VarType::BOOL);
+    Tensor tmp_bool(experimental::DataType::BOOL);
    tmp_bool.mutable_data<bool>(x->dims(), place);
    const auto& runner_less =
        NpuOpRunner("Less", {add_offset_val, tensor_threshold}, {tmp_bool});
    runner_less.Run(stream);
    Tensor tmp4(x->type());
    tmp4.mutable_data<T>(x->dims(), place);
-    auto dst_dtype = ConvertToNpuDtype(x->type());
+    auto dst_dtype =
+        ConvertToNpuDtype(framework::TransToProtoVarType(x->type()));
    const auto& runner_cast =
        NpuOpRunner("Cast", {tmp_bool}, {tmp4},
                    {{"dst_type", static_cast<int>(dst_dtype)}});

--- a/paddle/fluid/operators/allclose_op.h
+++ b/paddle/fluid/operators/allclose_op.h
@@ -59,10 +59,13 @@ class AllcloseKernel : public framework::OpKernel<T> {
          rtol->numel(), 1,
          platform::errors::InvalidArgument(
              "Input(Rtol) size must be 1, but get %d.", rtol->numel()));
-      PADDLE_ENFORCE_EQ(rtol->type(), framework::proto::VarType::FP64,
-                        platform::errors::InvalidArgument(
-                            "Input(Rtol) type must be double, but get %s.",
-                            framework::DataTypeToString(rtol->type())));
+      PADDLE_ENFORCE_EQ(
+          framework::TransToProtoVarType(rtol->dtype()),
+          framework::proto::VarType::FP64,
+          platform::errors::InvalidArgument(
+              "Input(Rtol) type must be double, but get %s.",
+              framework::DataTypeToString(
+                  framework::TransToProtoVarType(rtol->dtype()))));
      rtol_v = get_tensor_value(dev_ctx, *rtol);
    }
    if (ctx.HasInput("Atol")) {
@@ -71,10 +74,13 @@ class AllcloseKernel : public framework::OpKernel<T> {
          atol->numel(), 1,
          platform::errors::InvalidArgument(
              "Input(Atol) size must be 1, but get %d", atol->numel()));
-      PADDLE_ENFORCE_EQ(atol->type(), framework::proto::VarType::FP64,
-                        platform::errors::InvalidArgument(
-                            "Input(Atol) type must be double, but get %s",
-                            framework::DataTypeToString(atol->type())));
+      PADDLE_ENFORCE_EQ(
+          framework::TransToProtoVarType(atol->dtype()),
+          framework::proto::VarType::FP64,
+          platform::errors::InvalidArgument(
+              "Input(Atol) type must be double, but get %s",
+              framework::DataTypeToString(
+                  framework::TransToProtoVarType(atol->dtype()))));
      atol_v = get_tensor_value(dev_ctx, *atol);
    }


--- a/paddle/fluid/operators/amp/update_loss_scaling_op_npu.cc
+++ b/paddle/fluid/operators/amp/update_loss_scaling_op_npu.cc
@@ -42,7 +42,7 @@ void Update(const platform::NPUDeviceContext& ctx,
    platform::NPUMemsetAsync(static_cast<void*>(g), 0,
                             good_out_tensor->numel() * sizeof(int), stream);
    // bad_out_data = bad_in_data + 1
-    Tensor factor_tensor(bad_out_tensor->type());
+    Tensor factor_tensor(bad_out_tensor->dtype());
    factor_tensor.mutable_data<int>({1}, place);
    FillNpuTensorWithConstant<int>(&factor_tensor, static_cast<int>(1));
    const auto& runner_p2 = NpuOpRunner("Add", {*bad_in_tensor, factor_tensor},
@@ -91,7 +91,7 @@ void Update(const platform::NPUDeviceContext& ctx,
                             bad_out_tensor->numel() * sizeof(int), stream);

    // good_out_data = good_in_data + 1
-    Tensor factor_tensor(good_out_tensor->type());
+    Tensor factor_tensor(good_out_tensor->dtype());
    factor_tensor.mutable_data<int>({1}, place);
    FillNpuTensorWithConstant<int>(&factor_tensor, static_cast<int>(1));
    const auto& runner_p2 = NpuOpRunner("Add", {*good_in_tensor, factor_tensor},
@@ -188,7 +188,7 @@ class LazyZerosNPU {
      if (!found_inf_vec[0]) {
        framework::TensorCopy(*x, place, dev_ctx, out);
      } else if (zero_ptr != dst_ptr) {
-        auto size = out->numel() * framework::SizeOfType(out->type());
+        auto size = out->numel() * framework::DataTypeSize(out->dtype());
        memory::Copy(place, dst_ptr, place, zero_ptr, size, stream);
      }
    }

--- a/paddle/fluid/operators/argsort_op_npu.cc
+++ b/paddle/fluid/operators/argsort_op_npu.cc
@@ -75,15 +75,16 @@ class ArgsortNPUKernel : public framework::OpKernel<T> {
    framework::NPUAttributeMap attr = {{"axis", -1},
                                       {"descending", descending}};

-    Tensor indices_tmp(framework::proto::VarType::INT32);
+    Tensor indices_tmp(experimental::DataType::INT32);
    indices_tmp.Resize(indices->dims());

-    if (input->type() == framework::proto::VarType::INT64) {
-      Tensor input_fp32(framework::proto::VarType::FP32);
+    if (framework::TransToProtoVarType(input->dtype()) ==
+        framework::proto::VarType::INT64) {
+      Tensor input_fp32(experimental::DataType::FLOAT32);
      input_fp32.Resize(input->dims());
      CastToFP32(ctx, stream, *input, &input_fp32);

-      Tensor output_fp32(framework::proto::VarType::FP32);
+      Tensor output_fp32(experimental::DataType::FLOAT32);
      output_fp32.Resize(output->dims());

      if (axis == -1 || axis + 1 == in_dims.size()) {
@@ -112,7 +113,7 @@ class ArgsortNPUKernel : public framework::OpKernel<T> {
        TranposeNPU<float>(ctx, stream, &perm, input_fp32, &trans_input);

        Tensor trans_output(input_fp32.type());
-        Tensor trans_indices(framework::proto::VarType::INT32);
+        Tensor trans_indices(experimental::DataType::INT32);
        trans_output.mutable_data<float>(trans_dims, ctx.GetPlace());
        trans_indices.mutable_data<int32_t>(trans_dims, ctx.GetPlace());

@@ -150,7 +151,7 @@ class ArgsortNPUKernel : public framework::OpKernel<T> {
        TranposeNPU<T>(ctx, stream, &perm, *input, &trans_input);

        Tensor trans_output(input->type());
-        Tensor trans_indices(framework::proto::VarType::INT32);
+        Tensor trans_indices(experimental::DataType::INT32);
        trans_output.mutable_data<T>(trans_dims, ctx.GetPlace());
        trans_indices.mutable_data<int32_t>(trans_dims, ctx.GetPlace());


--- a/paddle/fluid/operators/array_to_lod_tensor_op.cc
+++ b/paddle/fluid/operators/array_to_lod_tensor_op.cc
@@ -66,7 +66,8 @@ struct ArrayToLoDFunctor : public boost::static_visitor<void> {
    ArrayToLoDFunctorImpl<DeviceContext> functor;
    functor.dev_ctx_ = dev_ctx;
    functor.prev_functor_ = this;
-    framework::VisitDataType(out->type(), functor);
+    framework::VisitDataType(framework::TransToProtoVarType(out->dtype()),
+                             functor);
  }
 };

@@ -101,7 +102,7 @@ class ArrayToLoDTensorOp : public framework::OperatorBase {
                          "There's no element in the input array."));
    int rank = x[0].dims().size();
    platform::Place place = x[0].place();
-    auto data_type = x[0].type();
+    auto data_type = x[0].dtype();
    int64_t batch_size = x[0].dims()[0];
    framework::DDim ins_dims = rank > 1
                                   ? framework::slice_ddim(x[0].dims(), 1, rank)
@@ -124,12 +125,12 @@ class ArrayToLoDTensorOp : public framework::OperatorBase {
              "The current place is %d, and the previous place is %d.",
              i, x[i].place(), place));
      PADDLE_ENFORCE_EQ(
-          x[i].type(), data_type,
+          x[i].dtype(), data_type,
          platform::errors::InvalidArgument(
              "The date type of the %zu'th element in LoDTensorArray "
              "differs from previous ones."
              "The current data type is %d, and the previous data type is %d.",
-              i, x[i].type(), data_type));
+              i, x[i].dtype(), data_type));
      batch_size += x[i].dims()[0];
    }
    auto ins_dim_vec = framework::vectorize(ins_dims);

--- a/paddle/fluid/operators/batch_norm_op.cc
+++ b/paddle/fluid/operators/batch_norm_op.cc
@@ -151,15 +151,19 @@ framework::OpKernelType BatchNormOp::GetExpectedKernelType(
    bn_param_type = framework::proto::VarType::FP64;
  }
  PADDLE_ENFORCE_EQ(
-      bn_param_type, ctx.Input<Tensor>("Scale")->type(),
+      bn_param_type,
+      framework::TransToProtoVarType(ctx.Input<Tensor>("Scale")->dtype()),
      platform::errors::InvalidArgument("Scale input should be of float type"));
  PADDLE_ENFORCE_EQ(
-      bn_param_type, ctx.Input<Tensor>("Bias")->type(),
+      bn_param_type,
+      framework::TransToProtoVarType(ctx.Input<Tensor>("Bias")->dtype()),
      platform::errors::InvalidArgument("Bias input should be of float type"));
  PADDLE_ENFORCE_EQ(
-      bn_param_type, ctx.Input<Tensor>("Mean")->type(),
+      bn_param_type,
+      framework::TransToProtoVarType(ctx.Input<Tensor>("Mean")->dtype()),
      platform::errors::InvalidArgument("Mean input should be of float type"));
-  PADDLE_ENFORCE_EQ(bn_param_type, ctx.Input<Tensor>("Variance")->type(),
+  PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType(
+                                       ctx.Input<Tensor>("Variance")->dtype()),
                    platform::errors::InvalidArgument(
                        "Variance input should be of float type"));


--- a/paddle/fluid/operators/beam_search_decode_op.cc
+++ b/paddle/fluid/operators/beam_search_decode_op.cc
@@ -14,6 +14,7 @@ limitations under the License. */

 #include <string>

+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/operators/beam_search_decode_op.h"
 #include "paddle/fluid/platform/device_context.h"

@@ -192,7 +193,7 @@ class BeamSearchDecodeOp : public framework::OperatorBase {
    LoDTensor* sentenceScores = ctx.Output<LoDTensor>("SentenceScores");

    framework::VisitDataType(
-        scores->at(0).type(),
+        framework::TransToProtoVarType(scores->at(0).dtype()),
        BeamSearchDecodeFunctor(*ids, *scores, sentenceIds, sentenceScores,
                                beam_size, end_id));
  }

--- a/paddle/fluid/operators/bincount_op.cu
+++ b/paddle/fluid/operators/bincount_op.cu
@@ -112,7 +112,7 @@ void BincountCUDAInner(const framework::ExecutionContext& context) {
                                         PADDLE_CUDA_NUM_THREADS, 0, stream>>>(
        input_data, input_numel, has_weights, weights_data, output_data);
  } else {
-    const auto& weights_type = weights->type();
+    const auto& weights_type = framework::TransToProtoVarType(weights->dtype());

    if (weights_type == framework::proto::VarType::FP32) {
      float* output_data = output->mutable_data<float>(context.GetPlace());
@@ -141,7 +141,7 @@ class BincountCUDAKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
    const Tensor* input = context.Input<framework::Tensor>("X");
-    const auto& input_type = input->type();
+    const auto& input_type = framework::TransToProtoVarType(input->dtype());

    if (input_type == framework::proto::VarType::INT32) {
      BincountCUDAInner<DeviceContext, T, int>(context);

--- a/paddle/fluid/operators/bincount_op.h
+++ b/paddle/fluid/operators/bincount_op.h
@@ -61,7 +61,7 @@ void BincountInner(const framework::ExecutionContext& context) {

  if (has_weights) {
    const T* weights_data = weights->data<T>();
-    const auto& weights_type = weights->type();
+    const auto& weights_type = framework::TransToProtoVarType(weights->dtype());
    if (weights_type == framework::proto::VarType::FP32) {
      float* output_data = output->mutable_data<float>(context.GetPlace());
      pten::funcs::SetConstant<DeviceContext, float>()(
@@ -95,7 +95,7 @@ class BincountKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
    const Tensor* input = context.Input<framework::Tensor>("X");
-    const auto& input_type = input->type();
+    const auto& input_type = framework::TransToProtoVarType(input->dtype());

    if (input_type == framework::proto::VarType::INT32) {
      BincountInner<DeviceContext, T, int>(context);

--- a/paddle/fluid/operators/cast_op.cc
+++ b/paddle/fluid/operators/cast_op.cc
@@ -14,6 +14,7 @@ limitations under the License. */

 #include "paddle/fluid/operators/cast_op.h"
 #include <memory>
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/platform/float16.h"
 #ifdef PADDLE_WITH_MLU
@@ -82,7 +83,9 @@ class CastOp : public framework::OperatorWithKernel {
    auto &tensor_place = tensor->place();
    // NOTE: cuda pinned tensor need to copy its data to target place
    if (platform::is_cuda_pinned_place(tensor_place)) {
-      return framework::OpKernelType(tensor->type(), ctx.device_context());
+      return framework::OpKernelType(
+          framework::TransToProtoVarType(tensor->dtype()),
+          ctx.device_context());
    }

 #ifdef PADDLE_WITH_MKLDNN
@@ -100,26 +103,32 @@ class CastOp : public framework::OperatorWithKernel {
      return true;
    };

-    if (this->CanMKLDNNBeUsed(ctx, tensor->type()) && MKLDNNSupportsCast()) {
-      return framework::OpKernelType(tensor->type(), ctx.GetPlace(),
-                                     framework::DataLayout::kMKLDNN,
-                                     framework::LibraryType::kMKLDNN);
+    if (this->CanMKLDNNBeUsed(
+            ctx, framework::TransToProtoVarType(tensor->dtype())) &&
+        MKLDNNSupportsCast()) {
+      return framework::OpKernelType(
+          framework::TransToProtoVarType(tensor->dtype()), ctx.GetPlace(),
+          framework::DataLayout::kMKLDNN, framework::LibraryType::kMKLDNN);
    }
 #endif
 #ifdef PADDLE_WITH_MLU
    auto src_type = static_cast<VT::Type>(ctx.Attr<int>("in_dtype"));
    auto dst_type = static_cast<VT::Type>(ctx.Attr<int>("out_dtype"));
    if (src_type == dst_type || MLUSupportsCast(src_type, dst_type)) {
-      return framework::OpKernelType(tensor->type(), tensor_place);
+      return framework::OpKernelType(
+          framework::TransToProtoVarType(tensor->dtype()), tensor_place);
    } else {
      VLOG(3) << "MLU not support cast type: "
              << framework::DataTypeToString(src_type)
              << " to type: " << framework::DataTypeToString(dst_type)
              << ", fallbacking to CPU one!";
-      return framework::OpKernelType(tensor->type(), platform::CPUPlace());
+      return framework::OpKernelType(
+          framework::TransToProtoVarType(tensor->dtype()),
+          platform::CPUPlace());
    }
 #endif
-    return framework::OpKernelType(tensor->type(), tensor_place);
+    return framework::OpKernelType(
+        framework::TransToProtoVarType(tensor->dtype()), tensor_place);
  }
 };


--- a/paddle/fluid/operators/cast_op.h
+++ b/paddle/fluid/operators/cast_op.h
@@ -63,7 +63,7 @@ class CastOpKernel : public framework::OpKernel<InT> {
    out->mutable_data(dev_ctx.GetPlace(),
                      static_cast<framework::proto::VarType::Type>(out_dtype));

-    auto pt_out_dtype = pten::TransToPtenDataType(
+    auto pt_out_dtype = framework::TransToPtenDataType(
        static_cast<framework::proto::VarType::Type>(out_dtype));

    // call new kernel

--- a/paddle/fluid/operators/cast_op_npu.cc
+++ b/paddle/fluid/operators/cast_op_npu.cc
@@ -43,7 +43,7 @@ class CastNPUKernel : public framework::OpKernel<T> {
    auto* out = ctx.Output<Tensor>("Out");
    auto place = ctx.GetPlace();

-    if (x->type() == dtype) {
+    if (framework::TransToProtoVarType(x->dtype()) == dtype) {
      // NOTE(zhiqiu): NPU cast op may result in wrong value, so
      // add special case here.
      VLOG(4) << "cast to same dtype:" << dtype;

--- a/paddle/fluid/operators/cast_op_xpu.cc
+++ b/paddle/fluid/operators/cast_op_xpu.cc
@@ -15,6 +15,7 @@ limitations under the License. */
 #ifdef PADDLE_WITH_XPU
 #include <memory>

+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/cast_op.h"
 #include "paddle/fluid/platform/float16.h"
@@ -45,7 +46,7 @@ class CastXPUKernel : public framework::OpKernel<InT> {
    out->mutable_data(dev_ctx.GetPlace(),
                      static_cast<framework::proto::VarType::Type>(out_dtype));

-    auto pt_out_dtype = pten::TransToPtenDataType(
+    auto pt_out_dtype = framework::TransToPtenDataType(
        static_cast<framework::proto::VarType::Type>(out_dtype));
    // call pten kernel
    pten::CastKernel<InT>(

--- a/paddle/fluid/operators/class_center_sample_op.cu
+++ b/paddle/fluid/operators/class_center_sample_op.cu
@@ -338,8 +338,9 @@ class ClassCenterSampleCUDAKernel : public framework::OpKernel<T> {
      PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce(
          num_classes_per_device_ptr, num_classes_per_device_ptr,
          num_classes_per_device.numel(),
-          platform::ToNCCLDataType(num_classes_per_device.type()), ncclSum,
-          comm->comm(), calcu_stream));
+          platform::ToNCCLDataType(
+              framework::TransToProtoVarType(num_classes_per_device.dtype())),
+          ncclSum, comm->comm(), calcu_stream));
    }
 #endif


--- a/paddle/fluid/operators/coalesce_tensor_op.cc
+++ b/paddle/fluid/operators/coalesce_tensor_op.cc
@@ -23,6 +23,7 @@
 #ifdef PADDLE_WITH_ASCEND_CL
 #include "paddle/fluid/platform/device/npu/npu_op_runner.h"
 #endif
+#include "paddle/fluid/framework/convert_utils.h"

 namespace paddle {
 namespace operators {
@@ -53,7 +54,7 @@ struct FillConstantVisitor {
                 * = nullptr) const {
 #ifdef PADDLE_WITH_ASCEND_CL
    if (platform::is_npu_place(dev_ctx_.GetPlace())) {
-      Tensor tensor_tmp(dtype_);
+      Tensor tensor_tmp(framework::TransToPtenDataType(dtype_));
      tensor_tmp.mutable_data<T>({1}, context_.GetPlace());
      FillNpuTensorWithConstant<T>(&tensor_tmp, static_cast<T>(value_));

@@ -193,7 +194,8 @@ class CoalesceTensorOpKernel : public framework::OpKernel<T> {
    void *fused_tensor_ptr =
        fused_tensor
            ->Resize(framework::make_ddim({static_cast<int64_t>(numel)}))
-            .mutable_data(context.GetPlace(), dtype);
+            .mutable_data(context.GetPlace(),
+                          framework::TransToPtenDataType(dtype));
    VLOG(10) << "Fused tensor addr " << fused_tensor_ptr;

    // Init the continuous space

--- a/paddle/fluid/operators/collective/allreduce_op.h
+++ b/paddle/fluid/operators/collective/allreduce_op.h
@@ -41,7 +41,8 @@ class AllReduceOpKernel : public framework::OpKernel<T> {
    auto in = ctx.Input<framework::Tensor>("X");
    auto out = ctx.Output<framework::Tensor>("Out");

-    int dtype = platform::ToNCCLDataType(in->type());
+    int dtype =
+        platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype()));
    int64_t numel = in->numel();
    auto* sendbuff = in->data();
    out->Resize(in->dims());

--- a/paddle/fluid/operators/collective/alltoall_op.cu.cc
+++ b/paddle/fluid/operators/collective/alltoall_op.cu.cc
@@ -31,7 +31,8 @@ class AllToAllOpCUDAKernel : public framework::OpKernel<T> {
    auto x = ctx.Input<framework::LoDTensor>("X");
    auto out = ctx.Output<framework::LoDTensor>("Out");
    int send_numel = x->numel();
-    ncclDataType_t dtype = platform::ToNCCLDataType(x->type());
+    ncclDataType_t dtype =
+        platform::ToNCCLDataType(framework::TransToProtoVarType(x->dtype()));

    int ring_id = ctx.Attr<int>("ring_id");
    PADDLE_ENFORCE_GE(

--- a/paddle/fluid/operators/collective/barrier_op.cu.cc
+++ b/paddle/fluid/operators/collective/barrier_op.cu.cc
@@ -31,7 +31,8 @@ class BarrierOpCUDAKernel : public framework::OpKernel<T> {
    auto out = ctx.Output<framework::Tensor>("Out");

    auto place = ctx.GetPlace();
-    ncclDataType_t dtype = platform::ToNCCLDataType(in->type());
+    ncclDataType_t dtype =
+        platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype()));
    int64_t numel = in->numel();
    const void* sendbuff = in->data();
    void* recvbuff = out->mutable_data<T>(place);

--- a/paddle/fluid/operators/collective/broadcast_op.cu.cc
+++ b/paddle/fluid/operators/collective/broadcast_op.cu.cc
@@ -17,6 +17,7 @@ limitations under the License. */
 #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
 #include "paddle/fluid/platform/device/gpu/nccl_helper.h"
 #endif
+#include "paddle/fluid/framework/convert_utils.h"

 namespace ops = paddle::operators;
 namespace plat = paddle::platform;
@@ -56,7 +57,8 @@ class NCCLBroadcastOpKernel : public framework::OpKernel<T> {

    PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclBcast(
        send_recv_buffer, static_cast<size_t>(in->numel()),
-        platform::ToNCCLDataType(in->type()), root_dev_id, comm, stream));
+        platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype())),
+        root_dev_id, comm, stream));

    VLOG(3) << "Bcast " << ctx.InputNames("X")[0] << ", (" << in->numel() << ")"
            << " From " << root_dev_id << " to " << dev_id;

--- a/paddle/fluid/operators/collective/c_allgather_op.cu.cc
+++ b/paddle/fluid/operators/collective/c_allgather_op.cu.cc
@@ -18,6 +18,7 @@ limitations under the License. */
 #include "paddle/fluid/platform/collective_helper.h"
 #include "paddle/fluid/platform/device/gpu/nccl_helper.h"
 #endif
+#include "paddle/fluid/framework/convert_utils.h"

 namespace paddle {
 namespace operators {
@@ -29,7 +30,8 @@ class CAllGatherOpCUDAKernel : public framework::OpKernel<T> {
 #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
    auto in = ctx.Input<framework::Tensor>("X");
    auto out = ctx.Output<framework::Tensor>("Out");
-    ncclDataType_t dtype = platform::ToNCCLDataType(in->type());
+    ncclDataType_t dtype =
+        platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype()));

    int nranks = ctx.Attr<int>("nranks");
    int rid = ctx.Attr<int>("ring_id");

--- a/paddle/fluid/operators/collective/c_allgather_op_npu.cc
+++ b/paddle/fluid/operators/collective/c_allgather_op_npu.cc
@@ -31,7 +31,8 @@ class CAllGatherOpASCENDKernel : public framework::OpKernel<T> {
 #if defined(PADDLE_WITH_ASCEND_CL)
    auto in = ctx.Input<framework::Tensor>("X");
    auto out = ctx.Output<framework::Tensor>("Out");
-    HcclDataType dtype = platform::ToHCCLDataType(in->type());
+    HcclDataType dtype =
+        platform::ToHCCLDataType(framework::TransToProtoVarType(in->dtype()));

    int ring_id = ctx.Attr<int>("ring_id");
    std::string group =

--- a/paddle/fluid/operators/collective/c_allreduce_op.h
+++ b/paddle/fluid/operators/collective/c_allreduce_op.h
@@ -173,7 +173,8 @@ class CAllReduceOpASCENDKernel : public framework::OpKernel<T> {
    auto in = ctx.Input<framework::Tensor>("X");
    auto out = ctx.Output<framework::Tensor>("Out");
    auto place = ctx.GetPlace();
-    HcclDataType dtype = platform::ToHCCLDataType(in->type());
+    HcclDataType dtype =
+        platform::ToHCCLDataType(framework::TransToProtoVarType(in->dtype()));
    int64_t numel = in->numel();

    void* sendbuff = reinterpret_cast<void*>(const_cast<T*>(in->data<T>()));
@@ -231,7 +232,7 @@ class CAllReduceOpASCENDKernel : public framework::OpKernel<T> {

    bool found_nan = false;

-    auto d_type = in->type();
+    auto d_type = framework::TransToProtoVarType(in->dtype());
    switch (d_type) {
      case framework::proto::VarType::FP16: {
        break;
@@ -284,7 +285,8 @@ class CAllReduceOpXPUKernel : public framework::OpKernel<T> {
    auto out = ctx.Output<framework::Tensor>("Out");

    auto place = ctx.GetPlace();
-    BKCLDataType dtype = platform::ToBKCLDataType(in->type());
+    BKCLDataType dtype =
+        platform::ToBKCLDataType(framework::TransToProtoVarType(in->dtype()));
    int64_t numel = in->numel();
    const void* sendbuff = in->data<T>();
    out->Resize(in->dims());
@@ -346,7 +348,8 @@ class CAllReduceOpCUDAKernel : public framework::OpKernel<T> {
    auto out = ctx.Output<framework::Tensor>("Out");

    auto place = ctx.GetPlace();
-    ncclDataType_t dtype = platform::ToNCCLDataType(in->type());
+    ncclDataType_t dtype =
+        platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype()));
    int64_t numel = in->numel();
    const void* sendbuff = in->data<T>();
    out->Resize(in->dims());

--- a/paddle/fluid/operators/collective/c_broadcast_op.cu.cc
+++ b/paddle/fluid/operators/collective/c_broadcast_op.cu.cc
@@ -30,7 +30,8 @@ class CBroadcastOpCUDAKernel : public framework::OpKernel<T> {
    auto x = ctx.Input<framework::LoDTensor>("X");
    auto out = ctx.Output<framework::LoDTensor>("Out");
    int numel = x->numel();
-    ncclDataType_t dtype = platform::ToNCCLDataType(x->type());
+    ncclDataType_t dtype =
+        platform::ToNCCLDataType(framework::TransToProtoVarType(x->dtype()));

    int rid = ctx.Attr<int>("ring_id");
    auto place = ctx.GetPlace();

--- a/paddle/fluid/operators/collective/c_broadcast_op_npu.cc
+++ b/paddle/fluid/operators/collective/c_broadcast_op_npu.cc
@@ -30,7 +30,8 @@ class CBroadcastOpASCENDKernel : public framework::OpKernel<T> {
    auto x = ctx.Input<framework::LoDTensor>("X");
    void* ptr = reinterpret_cast<void*>(const_cast<T*>(x->data<T>()));
    int numel = x->numel();
-    HcclDataType dtype = platform::ToHCCLDataType(x->type());
+    HcclDataType dtype =
+        platform::ToHCCLDataType(framework::TransToProtoVarType(x->dtype()));

    auto out = ctx.Output<framework::LoDTensor>("Out");


--- a/paddle/fluid/operators/collective/c_concat_op.cu.cc
+++ b/paddle/fluid/operators/collective/c_concat_op.cu.cc
--- a/paddle/fluid/operators/collective/c_embedding_op.cu
+++ b/paddle/fluid/operators/collective/c_embedding_op.cu
--- a/paddle/fluid/operators/collective/c_embedding_op.h
+++ b/paddle/fluid/operators/collective/c_embedding_op.h
--- a/paddle/fluid/operators/collective/c_embedding_op_npu.cc
+++ b/paddle/fluid/operators/collective/c_embedding_op_npu.cc
--- a/paddle/fluid/operators/collective/c_reduce_op.h
+++ b/paddle/fluid/operators/collective/c_reduce_op.h
--- a/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc
+++ b/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc
--- a/paddle/fluid/operators/collective/c_reducescatter_op_npu.cc
+++ b/paddle/fluid/operators/collective/c_reducescatter_op_npu.cc
--- a/paddle/fluid/operators/collective/c_scatter_op.cu.cc
+++ b/paddle/fluid/operators/collective/c_scatter_op.cu.cc
--- a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu
+++ b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu
--- a/paddle/fluid/operators/collective/global_gather_op.cu.cc
+++ b/paddle/fluid/operators/collective/global_gather_op.cu.cc
--- a/paddle/fluid/operators/collective/global_scatter_op.cu.cc
+++ b/paddle/fluid/operators/collective/global_scatter_op.cu.cc
--- a/paddle/fluid/operators/collective/partial_allgather_op.cu.cc
+++ b/paddle/fluid/operators/collective/partial_allgather_op.cu.cc
--- a/paddle/fluid/operators/collective/partial_allgather_op_npu.cc
+++ b/paddle/fluid/operators/collective/partial_allgather_op_npu.cc
--- a/paddle/fluid/operators/collective/partial_recv_op_npu.cc
+++ b/paddle/fluid/operators/collective/partial_recv_op_npu.cc
--- a/paddle/fluid/operators/collective/partial_send_op.cu.cc
+++ b/paddle/fluid/operators/collective/partial_send_op.cu.cc
--- a/paddle/fluid/operators/collective/partial_send_op_npu.cc
+++ b/paddle/fluid/operators/collective/partial_send_op_npu.cc
--- a/paddle/fluid/operators/collective/recv_v2_op_npu.cc
+++ b/paddle/fluid/operators/collective/recv_v2_op_npu.cc
--- a/paddle/fluid/operators/collective/send_v2_op.cu.cc
+++ b/paddle/fluid/operators/collective/send_v2_op.cu.cc
--- a/paddle/fluid/operators/collective/send_v2_op_npu.cc
+++ b/paddle/fluid/operators/collective/send_v2_op_npu.cc
--- a/paddle/fluid/operators/concat_op.cc
+++ b/paddle/fluid/operators/concat_op.cc
--- a/paddle/fluid/operators/concat_op_mlu.cc
+++ b/paddle/fluid/operators/concat_op_mlu.cc
--- a/paddle/fluid/operators/controlflow/conditional_block_op.cc
+++ b/paddle/fluid/operators/controlflow/conditional_block_op.cc
--- a/paddle/fluid/operators/controlflow/conditional_block_op.h
+++ b/paddle/fluid/operators/controlflow/conditional_block_op.h
--- a/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc
+++ b/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc
--- a/paddle/fluid/operators/controlflow/while_op.cc
+++ b/paddle/fluid/operators/controlflow/while_op.cc
--- a/paddle/fluid/operators/conv_op.cc
+++ b/paddle/fluid/operators/conv_op.cc
--- a/paddle/fluid/operators/conv_op.h
+++ b/paddle/fluid/operators/conv_op.h
--- a/paddle/fluid/operators/conv_op_mlu.cc
+++ b/paddle/fluid/operators/conv_op_mlu.cc
--- a/paddle/fluid/operators/correlation_op.cc
+++ b/paddle/fluid/operators/correlation_op.cc
--- a/paddle/fluid/operators/crop_op_npu.cc
+++ b/paddle/fluid/operators/crop_op_npu.cc
--- a/paddle/fluid/operators/cumsum_op_npu.cc
+++ b/paddle/fluid/operators/cumsum_op_npu.cc
--- a/paddle/fluid/operators/data_norm_op.cu
+++ b/paddle/fluid/operators/data_norm_op.cu
--- a/paddle/fluid/operators/decode_jpeg_op.cc
+++ b/paddle/fluid/operators/decode_jpeg_op.cc
--- a/paddle/fluid/operators/detection/bbox_util.h
+++ b/paddle/fluid/operators/detection/bbox_util.h
--- a/paddle/fluid/operators/detection/density_prior_box_op_npu.cc
+++ b/paddle/fluid/operators/detection/density_prior_box_op_npu.cc
--- a/paddle/fluid/operators/detection/iou_similarity_op_npu.cc
+++ b/paddle/fluid/operators/detection/iou_similarity_op_npu.cc
--- a/paddle/fluid/operators/detection/prior_box_op.cc
+++ b/paddle/fluid/operators/detection/prior_box_op.cc
--- a/paddle/fluid/operators/dlnne/dlnne_engine_op.h
+++ b/paddle/fluid/operators/dlnne/dlnne_engine_op.h
--- a/paddle/fluid/operators/dropout_op_npu.cc
+++ b/paddle/fluid/operators/dropout_op_npu.cc
--- a/paddle/fluid/operators/eig_op.h
+++ b/paddle/fluid/operators/eig_op.h
--- a/paddle/fluid/operators/eigvals_op.h
+++ b/paddle/fluid/operators/eigvals_op.h
--- a/paddle/fluid/operators/elementwise/elementwise_div_op.h
+++ b/paddle/fluid/operators/elementwise/elementwise_div_op.h
--- a/paddle/fluid/operators/elementwise/elementwise_div_op_npu.cc
+++ b/paddle/fluid/operators/elementwise/elementwise_div_op_npu.cc
--- a/paddle/fluid/operators/elementwise/elementwise_mul_op.h
+++ b/paddle/fluid/operators/elementwise/elementwise_mul_op.h
--- a/paddle/fluid/operators/elementwise/elementwise_op.h
+++ b/paddle/fluid/operators/elementwise/elementwise_op.h
--- a/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc
+++ b/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc
--- a/paddle/fluid/operators/elementwise/mkldnn/elementwise_sub_mkldnn_op.cc
+++ b/paddle/fluid/operators/elementwise/mkldnn/elementwise_sub_mkldnn_op.cc
--- a/paddle/fluid/operators/empty_op.h
+++ b/paddle/fluid/operators/empty_op.h
--- a/paddle/fluid/operators/expand_v2_op_npu.cc
+++ b/paddle/fluid/operators/expand_v2_op_npu.cc
--- a/paddle/fluid/operators/fill_any_like_op_npu.cc
+++ b/paddle/fluid/operators/fill_any_like_op_npu.cc
--- a/paddle/fluid/operators/fill_constant_batch_size_like_op.h
+++ b/paddle/fluid/operators/fill_constant_batch_size_like_op.h
--- a/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc
+++ b/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc
--- a/paddle/fluid/operators/fill_constant_op.h
+++ b/paddle/fluid/operators/fill_constant_op.h
--- a/paddle/fluid/operators/fill_constant_op_mlu.cc
+++ b/paddle/fluid/operators/fill_constant_op_mlu.cc
--- a/paddle/fluid/operators/fill_constant_op_npu.cc
+++ b/paddle/fluid/operators/fill_constant_op_npu.cc
--- a/paddle/fluid/operators/fill_diagonal_op.cc
+++ b/paddle/fluid/operators/fill_diagonal_op.cc
--- a/paddle/fluid/operators/fill_diagonal_tensor_op.cc
+++ b/paddle/fluid/operators/fill_diagonal_tensor_op.cc
--- a/paddle/fluid/operators/fill_op.h
+++ b/paddle/fluid/operators/fill_op.h
--- a/paddle/fluid/operators/fused/conv_fusion_op.cu
+++ b/paddle/fluid/operators/fused/conv_fusion_op.cu
--- a/paddle/fluid/operators/fused/fused_attention_op.cc
+++ b/paddle/fluid/operators/fused/fused_attention_op.cc
--- a/paddle/fluid/operators/fused/fused_bn_activation_op.cc
+++ b/paddle/fluid/operators/fused/fused_bn_activation_op.cc
--- a/paddle/fluid/operators/fused/fused_bn_activation_op.cu
+++ b/paddle/fluid/operators/fused/fused_bn_activation_op.cu
--- a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc
+++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc
--- a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu
+++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu
--- a/paddle/fluid/operators/fused/fused_elemwise_activation_op.cc
+++ b/paddle/fluid/operators/fused/fused_elemwise_activation_op.cc
--- a/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cc
+++ b/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cc
--- a/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cu
+++ b/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cu
--- a/paddle/fluid/operators/fused/fused_feedforward_op.cc
+++ b/paddle/fluid/operators/fused/fused_feedforward_op.cc
--- a/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc
+++ b/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc
--- a/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc
+++ b/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc
--- a/paddle/fluid/operators/fused/resnet_unit_op.cc
+++ b/paddle/fluid/operators/fused/resnet_unit_op.cc
--- a/paddle/fluid/operators/gather_nd_op.cu
+++ b/paddle/fluid/operators/gather_nd_op.cu
--- a/paddle/fluid/operators/gather_nd_op.h
+++ b/paddle/fluid/operators/gather_nd_op.h
--- a/paddle/fluid/operators/gather_nd_op_npu.cc
+++ b/paddle/fluid/operators/gather_nd_op_npu.cc
--- a/paddle/fluid/operators/gather_nd_op_xpu.cc
+++ b/paddle/fluid/operators/gather_nd_op_xpu.cc
--- a/paddle/fluid/operators/gather_op.cu
+++ b/paddle/fluid/operators/gather_op.cu
--- a/paddle/fluid/operators/gather_op.h
+++ b/paddle/fluid/operators/gather_op.h
--- a/paddle/fluid/operators/gather_op_xpu.cc
+++ b/paddle/fluid/operators/gather_op_xpu.cc
--- a/paddle/fluid/operators/gaussian_random_op_npu.cc
+++ b/paddle/fluid/operators/gaussian_random_op_npu.cc
--- a/paddle/fluid/operators/graph_send_recv_op.cu
+++ b/paddle/fluid/operators/graph_send_recv_op.cu
--- a/paddle/fluid/operators/graph_send_recv_op.h
+++ b/paddle/fluid/operators/graph_send_recv_op.h
--- a/paddle/fluid/operators/group_norm_op.cc
+++ b/paddle/fluid/operators/group_norm_op.cc
--- a/paddle/fluid/operators/increment_op_npu.cc
+++ b/paddle/fluid/operators/increment_op_npu.cc
--- a/paddle/fluid/operators/index_sample_op.cu
+++ b/paddle/fluid/operators/index_sample_op.cu
--- a/paddle/fluid/operators/index_sample_op.h
+++ b/paddle/fluid/operators/index_sample_op.h
--- a/paddle/fluid/operators/index_sample_op_npu.cc
+++ b/paddle/fluid/operators/index_sample_op_npu.cc
--- a/paddle/fluid/operators/index_select_op.cu
+++ b/paddle/fluid/operators/index_select_op.cu
--- a/paddle/fluid/operators/index_select_op.h
+++ b/paddle/fluid/operators/index_select_op.h
--- a/paddle/fluid/operators/index_select_op_npu.cc
+++ b/paddle/fluid/operators/index_select_op_npu.cc
--- a/paddle/fluid/operators/inplace_abn_op.cc
+++ b/paddle/fluid/operators/inplace_abn_op.cc
--- a/paddle/fluid/operators/instance_norm_op.cc
+++ b/paddle/fluid/operators/instance_norm_op.cc
--- a/paddle/fluid/operators/interpolate_v2_op_npu.cc
+++ b/paddle/fluid/operators/interpolate_v2_op_npu.cc
--- a/paddle/fluid/operators/ipu/ipu_runtime_op.cc
+++ b/paddle/fluid/operators/ipu/ipu_runtime_op.cc
--- a/paddle/fluid/operators/isclose_op.h
+++ b/paddle/fluid/operators/isclose_op.h
--- a/paddle/fluid/operators/isfinite_op.cc
+++ b/paddle/fluid/operators/isfinite_op.cc
--- a/paddle/fluid/operators/isfinite_v2_op.cc
+++ b/paddle/fluid/operators/isfinite_v2_op.cc
--- a/paddle/fluid/operators/kron_op.cc
+++ b/paddle/fluid/operators/kron_op.cc
--- a/paddle/fluid/operators/layer_norm_op.cu
+++ b/paddle/fluid/operators/layer_norm_op.cu
--- a/paddle/fluid/operators/layer_norm_op_npu.cc
+++ b/paddle/fluid/operators/layer_norm_op_npu.cc
--- a/paddle/fluid/operators/linspace_op.cu
+++ b/paddle/fluid/operators/linspace_op.cu
--- a/paddle/fluid/operators/linspace_op.h
+++ b/paddle/fluid/operators/linspace_op.h
--- a/paddle/fluid/operators/load_combine_op.h
+++ b/paddle/fluid/operators/load_combine_op.h
--- a/paddle/fluid/operators/load_op.h
+++ b/paddle/fluid/operators/load_op.h
--- a/paddle/fluid/operators/lod_tensor_to_array_op.cc
+++ b/paddle/fluid/operators/lod_tensor_to_array_op.cc
--- a/paddle/fluid/operators/lookup_table_op.h
+++ b/paddle/fluid/operators/lookup_table_op.h
--- a/paddle/fluid/operators/lookup_table_v2_op.cu
+++ b/paddle/fluid/operators/lookup_table_v2_op.cu
--- a/paddle/fluid/operators/lookup_table_v2_op.h
+++ b/paddle/fluid/operators/lookup_table_v2_op.h
--- a/paddle/fluid/operators/lookup_table_v2_op_npu.cc
+++ b/paddle/fluid/operators/lookup_table_v2_op_npu.cc
--- a/paddle/fluid/operators/lstsq_op.h
+++ b/paddle/fluid/operators/lstsq_op.h
--- a/paddle/fluid/operators/lu_op.h
+++ b/paddle/fluid/operators/lu_op.h
--- a/paddle/fluid/operators/margin_cross_entropy_op.cu
+++ b/paddle/fluid/operators/margin_cross_entropy_op.cu
--- a/paddle/fluid/operators/math/beam_search_npu.cc
+++ b/paddle/fluid/operators/math/beam_search_npu.cc
--- a/paddle/fluid/operators/math/cross_entropy.cc
+++ b/paddle/fluid/operators/math/cross_entropy.cc
--- a/paddle/fluid/operators/math/cross_entropy.cu
+++ b/paddle/fluid/operators/math/cross_entropy.cu
--- a/paddle/fluid/operators/math/eigen_values_vectors.h
+++ b/paddle/fluid/operators/math/eigen_values_vectors.h
--- a/paddle/fluid/operators/math/matrix_solve.cu.cc
+++ b/paddle/fluid/operators/math/matrix_solve.cu.cc
--- a/paddle/fluid/operators/matmul_op.cc
+++ b/paddle/fluid/operators/matmul_op.cc
--- a/paddle/fluid/operators/matmul_op_npu.cc
+++ b/paddle/fluid/operators/matmul_op_npu.cc
--- a/paddle/fluid/operators/matmul_v2_op.cc
+++ b/paddle/fluid/operators/matmul_v2_op.cc
--- a/paddle/fluid/operators/matrix_power_op.h
+++ b/paddle/fluid/operators/matrix_power_op.h
--- a/paddle/fluid/operators/mean_op_mlu.cc
+++ b/paddle/fluid/operators/mean_op_mlu.cc
--- a/paddle/fluid/operators/mean_op_npu.cc
+++ b/paddle/fluid/operators/mean_op_npu.cc
--- a/paddle/fluid/operators/memcpy_h2d_op.h
+++ b/paddle/fluid/operators/memcpy_h2d_op.h
--- a/paddle/fluid/operators/meshgrid_op.cc
+++ b/paddle/fluid/operators/meshgrid_op.cc
--- a/paddle/fluid/operators/meshgrid_op_npu.cc
+++ b/paddle/fluid/operators/meshgrid_op_npu.cc
--- a/paddle/fluid/operators/metrics/accuracy_op_npu.cc
+++ b/paddle/fluid/operators/metrics/accuracy_op_npu.cc
--- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc
--- a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
--- a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc
--- a/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc
--- a/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc
--- a/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc
--- a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc
--- a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc
--- a/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc
--- a/paddle/fluid/operators/mkldnn/slice_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/slice_mkldnn_op.cc
--- a/paddle/fluid/operators/mkldnn/split_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/split_mkldnn_op.cc
--- a/paddle/fluid/operators/mkldnn/stack_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/stack_mkldnn_op.cc
--- a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc
--- a/paddle/fluid/operators/mlu/mlu_baseop.cc
+++ b/paddle/fluid/operators/mlu/mlu_baseop.cc
--- a/paddle/fluid/operators/mul_op_npu.cc
+++ b/paddle/fluid/operators/mul_op_npu.cc
--- a/paddle/fluid/operators/one_hot_op_npu.cc
+++ b/paddle/fluid/operators/one_hot_op_npu.cc
--- a/paddle/fluid/operators/one_hot_v2_op_npu.cc
+++ b/paddle/fluid/operators/one_hot_v2_op_npu.cc
--- a/paddle/fluid/operators/optimizers/adam_op_npu.cc
+++ b/paddle/fluid/operators/optimizers/adam_op_npu.cc
--- a/paddle/fluid/operators/optimizers/rmsprop_op_npu.cc
+++ b/paddle/fluid/operators/optimizers/rmsprop_op_npu.cc
--- a/paddle/fluid/operators/optimizers/sgd_op.cc
+++ b/paddle/fluid/operators/optimizers/sgd_op.cc
--- a/paddle/fluid/operators/optimizers/sparse_momentum_op.h
+++ b/paddle/fluid/operators/optimizers/sparse_momentum_op.h
--- a/paddle/fluid/operators/partial_concat_op.cc
+++ b/paddle/fluid/operators/partial_concat_op.cc
--- a/paddle/fluid/operators/partial_sum_op.cc
+++ b/paddle/fluid/operators/partial_sum_op.cc
--- a/paddle/fluid/operators/pscore/send_and_recv_op_gpu_test.cc
+++ b/paddle/fluid/operators/pscore/send_and_recv_op_gpu_test.cc
--- a/paddle/fluid/operators/put_along_axis_op.cu
+++ b/paddle/fluid/operators/put_along_axis_op.cu
--- a/paddle/fluid/operators/put_along_axis_op.h
+++ b/paddle/fluid/operators/put_along_axis_op.h
--- a/paddle/fluid/operators/pyramid_hash_op.cc
+++ b/paddle/fluid/operators/pyramid_hash_op.cc
--- a/paddle/fluid/operators/randint_op.cu
+++ b/paddle/fluid/operators/randint_op.cu
--- a/paddle/fluid/operators/reader/buffered_reader.cc
+++ b/paddle/fluid/operators/reader/buffered_reader.cc
--- a/paddle/fluid/operators/reader/read_op.cc
+++ b/paddle/fluid/operators/reader/read_op.cc
--- a/paddle/fluid/operators/recurrent_op.cc
+++ b/paddle/fluid/operators/recurrent_op.cc
--- a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h
+++ b/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h
--- a/paddle/fluid/operators/reduce_ops/reduce_max_op_npu.cc
+++ b/paddle/fluid/operators/reduce_ops/reduce_max_op_npu.cc
--- a/paddle/fluid/operators/reduce_ops/reduce_mean_op_mlu.cc
+++ b/paddle/fluid/operators/reduce_ops/reduce_mean_op_mlu.cc
--- a/paddle/fluid/operators/reduce_ops/reduce_min_op_npu.cc
+++ b/paddle/fluid/operators/reduce_ops/reduce_min_op_npu.cc
--- a/paddle/fluid/operators/reduce_ops/reduce_op.h
+++ b/paddle/fluid/operators/reduce_ops/reduce_op.h
--- a/paddle/fluid/operators/reduce_ops/reduce_prod_op_npu.cc
+++ b/paddle/fluid/operators/reduce_ops/reduce_prod_op_npu.cc
--- a/paddle/fluid/operators/reduce_ops/reduce_sum_op.h
+++ b/paddle/fluid/operators/reduce_ops/reduce_sum_op.h
--- a/paddle/fluid/operators/reduce_ops/reduce_sum_op_npu.cc
+++ b/paddle/fluid/operators/reduce_ops/reduce_sum_op_npu.cc
--- a/paddle/fluid/operators/repeat_interleave_op.cu
+++ b/paddle/fluid/operators/repeat_interleave_op.cu
--- a/paddle/fluid/operators/repeat_interleave_op.h
+++ b/paddle/fluid/operators/repeat_interleave_op.h
--- a/paddle/fluid/operators/rnn_memory_helper_op.cc
+++ b/paddle/fluid/operators/rnn_memory_helper_op.cc
--- a/paddle/fluid/operators/roi_align_op_npu.cc
+++ b/paddle/fluid/operators/roi_align_op_npu.cc
--- a/paddle/fluid/operators/save_combine_op.h
+++ b/paddle/fluid/operators/save_combine_op.h
--- a/paddle/fluid/operators/save_op.h
+++ b/paddle/fluid/operators/save_op.h
--- a/paddle/fluid/operators/scale_op_mlu.cc
+++ b/paddle/fluid/operators/scale_op_mlu.cc
--- a/paddle/fluid/operators/scale_op_npu.cc
+++ b/paddle/fluid/operators/scale_op_npu.cc
--- a/paddle/fluid/operators/scatter_nd_add_op.cc
+++ b/paddle/fluid/operators/scatter_nd_add_op.cc
--- a/paddle/fluid/operators/scatter_nd_add_op.cu
+++ b/paddle/fluid/operators/scatter_nd_add_op.cu
--- a/paddle/fluid/operators/scatter_nd_add_op.h
+++ b/paddle/fluid/operators/scatter_nd_add_op.h
--- a/paddle/fluid/operators/scatter_op.cu
+++ b/paddle/fluid/operators/scatter_op.cu
--- a/paddle/fluid/operators/scatter_op.h
+++ b/paddle/fluid/operators/scatter_op.h
--- a/paddle/fluid/operators/scatter_op_npu.cc
+++ b/paddle/fluid/operators/scatter_op_npu.cc
--- a/paddle/fluid/operators/scatter_op_xpu.cc
+++ b/paddle/fluid/operators/scatter_op_xpu.cc
--- a/paddle/fluid/operators/searchsorted_op.h
+++ b/paddle/fluid/operators/searchsorted_op.h
--- a/paddle/fluid/operators/segment_pool_op.h
+++ b/paddle/fluid/operators/segment_pool_op.h
--- a/paddle/fluid/operators/sequence_ops/sequence_mask_op_npu.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_mask_op_npu.cc
--- a/paddle/fluid/operators/set_value_op.h
+++ b/paddle/fluid/operators/set_value_op.h
--- a/paddle/fluid/operators/set_value_op_npu.cc
+++ b/paddle/fluid/operators/set_value_op_npu.cc
--- a/paddle/fluid/operators/shard_index_op_npu.cc
+++ b/paddle/fluid/operators/shard_index_op_npu.cc
--- a/paddle/fluid/operators/shrink_rnn_memory_op.cc
+++ b/paddle/fluid/operators/shrink_rnn_memory_op.cc
--- a/paddle/fluid/operators/slice_op.cc
+++ b/paddle/fluid/operators/slice_op.cc
--- a/paddle/fluid/operators/smooth_l1_loss_op_npu.cc
+++ b/paddle/fluid/operators/smooth_l1_loss_op_npu.cc
--- a/paddle/fluid/operators/softmax_with_cross_entropy_op.h
+++ b/paddle/fluid/operators/softmax_with_cross_entropy_op.h
--- a/paddle/fluid/operators/solve_op.h
+++ b/paddle/fluid/operators/solve_op.h
--- a/paddle/fluid/operators/sparse_attention_op.cu
+++ b/paddle/fluid/operators/sparse_attention_op.cu
--- a/paddle/fluid/operators/spectral_op.cc
+++ b/paddle/fluid/operators/spectral_op.cc
--- a/paddle/fluid/operators/spectral_op.cu
+++ b/paddle/fluid/operators/spectral_op.cu
--- a/paddle/fluid/operators/spectral_op.h
+++ b/paddle/fluid/operators/spectral_op.h
--- a/paddle/fluid/operators/split_op_mlu.cc
+++ b/paddle/fluid/operators/split_op_mlu.cc
--- a/paddle/fluid/operators/strided_slice_op.cc
+++ b/paddle/fluid/operators/strided_slice_op.cc
--- a/paddle/fluid/operators/sum_op.cc
+++ b/paddle/fluid/operators/sum_op.cc
--- a/paddle/fluid/operators/sum_op_mlu.cc
+++ b/paddle/fluid/operators/sum_op_mlu.cc
--- a/paddle/fluid/operators/sync_batch_norm_op.cu.h
+++ b/paddle/fluid/operators/sync_batch_norm_op.cu.h
--- a/paddle/fluid/operators/sync_batch_norm_op_npu.cc
+++ b/paddle/fluid/operators/sync_batch_norm_op_npu.cc
--- a/paddle/fluid/operators/take_along_axis_op.cu
+++ b/paddle/fluid/operators/take_along_axis_op.cu
--- a/paddle/fluid/operators/take_along_axis_op.h
+++ b/paddle/fluid/operators/take_along_axis_op.h
--- a/paddle/fluid/operators/tdm_child_op.h
+++ b/paddle/fluid/operators/tdm_child_op.h
--- a/paddle/fluid/operators/tdm_sampler_op.h
+++ b/paddle/fluid/operators/tdm_sampler_op.h
--- a/paddle/fluid/operators/tensor_formatter.cc
+++ b/paddle/fluid/operators/tensor_formatter.cc
--- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
+++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
--- a/paddle/fluid/operators/top_k_op_npu.cc
+++ b/paddle/fluid/operators/top_k_op_npu.cc
--- a/paddle/fluid/operators/top_k_v2_op_npu.cc
+++ b/paddle/fluid/operators/top_k_v2_op_npu.cc
--- a/paddle/fluid/operators/transfer_layout_op.h
+++ b/paddle/fluid/operators/transfer_layout_op.h
--- a/paddle/fluid/operators/transpose_op.cc
+++ b/paddle/fluid/operators/transpose_op.cc
--- a/paddle/fluid/operators/tril_triu_op_npu.cc
+++ b/paddle/fluid/operators/tril_triu_op_npu.cc
--- a/paddle/fluid/operators/truncated_gaussian_random_op_npu.cc
+++ b/paddle/fluid/operators/truncated_gaussian_random_op_npu.cc
--- a/paddle/fluid/operators/uniform_random_op.h
+++ b/paddle/fluid/operators/uniform_random_op.h
--- a/paddle/fluid/operators/uniform_random_op_npu.cc
+++ b/paddle/fluid/operators/uniform_random_op_npu.cc
--- a/paddle/fluid/operators/unique_op.h
+++ b/paddle/fluid/operators/unique_op.h
--- a/paddle/fluid/operators/unsqueeze_op.cc
+++ b/paddle/fluid/operators/unsqueeze_op.cc
--- a/paddle/fluid/operators/utils.h
+++ b/paddle/fluid/operators/utils.h
--- a/paddle/fluid/operators/where_index_op_npu.cc
+++ b/paddle/fluid/operators/where_index_op_npu.cc
--- a/paddle/fluid/operators/where_op_npu.cc
+++ b/paddle/fluid/operators/where_op_npu.cc
--- a/paddle/fluid/platform/device/gpu/cuda/cudnn_desc.h
+++ b/paddle/fluid/platform/device/gpu/cuda/cudnn_desc.h
--- a/paddle/fluid/platform/device/gpu/rocm/miopen_desc.h
+++ b/paddle/fluid/platform/device/gpu/rocm/miopen_desc.h
--- a/paddle/fluid/platform/device/ipu/ipu_compiler.cc
+++ b/paddle/fluid/platform/device/ipu/ipu_compiler.cc
--- a/paddle/fluid/platform/device/ipu/ipu_executor.cc
+++ b/paddle/fluid/platform/device/ipu/ipu_executor.cc
--- a/paddle/fluid/platform/device/ipu/ipu_utils.h
+++ b/paddle/fluid/platform/device/ipu/ipu_utils.h
--- a/paddle/fluid/platform/device/npu/npu_op_runner.cc
+++ b/paddle/fluid/platform/device/npu/npu_op_runner.cc
--- a/paddle/fluid/platform/device/npu/npu_op_runner.h
+++ b/paddle/fluid/platform/device/npu/npu_op_runner.h
--- a/paddle/fluid/platform/lodtensor_printer.cc
+++ b/paddle/fluid/platform/lodtensor_printer.cc
--- a/paddle/fluid/platform/mkldnn_reuse.h
+++ b/paddle/fluid/platform/mkldnn_reuse.h
--- a/paddle/fluid/pybind/eager.cc
+++ b/paddle/fluid/pybind/eager.cc
--- a/paddle/fluid/pybind/eager_functions.cc
+++ b/paddle/fluid/pybind/eager_functions.cc
--- a/paddle/fluid/pybind/eager_method.cc
+++ b/paddle/fluid/pybind/eager_method.cc
--- a/paddle/fluid/pybind/eager_properties.cc
+++ b/paddle/fluid/pybind/eager_properties.cc
--- a/paddle/fluid/pybind/imperative.cc
+++ b/paddle/fluid/pybind/imperative.cc
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
--- a/paddle/fluid/pybind/reader_py.cc
+++ b/paddle/fluid/pybind/reader_py.cc
--- a/paddle/fluid/pybind/tensor_py.h
+++ b/paddle/fluid/pybind/tensor_py.h
--- a/paddle/pten/api/lib/utils/tensor_utils.cc
+++ b/paddle/pten/api/lib/utils/tensor_utils.cc
--- a/paddle/pten/core/CMakeLists.txt
+++ b/paddle/pten/core/CMakeLists.txt
--- a/paddle/pten/core/compat/convert_utils.cc
+++ b/paddle/pten/core/compat/convert_utils.cc
--- a/paddle/pten/core/compat/convert_utils.h
+++ b/paddle/pten/core/compat/convert_utils.h
--- a/paddle/pten/core/dense_tensor.inl
+++ b/paddle/pten/core/dense_tensor.inl
--- a/paddle/pten/core/dense_tensor_impl.cc
+++ b/paddle/pten/core/dense_tensor_impl.cc
--- a/paddle/pten/kernels/cpu/copy_kernel.cc
+++ b/paddle/pten/kernels/cpu/copy_kernel.cc
--- a/paddle/pten/kernels/funcs/math_function.cc
+++ b/paddle/pten/kernels/funcs/math_function.cc
--- a/paddle/pten/kernels/funcs/math_function.cu
+++ b/paddle/pten/kernels/funcs/math_function.cu
--- a/paddle/pten/kernels/funcs/math_function.h
+++ b/paddle/pten/kernels/funcs/math_function.h
--- a/paddle/pten/kernels/funcs/math_function_impl.h
+++ b/paddle/pten/kernels/funcs/math_function_impl.h
--- a/paddle/pten/kernels/gpu/copy_kernel.cu
+++ b/paddle/pten/kernels/gpu/copy_kernel.cu
--- a/paddle/pten/kernels/xpu/copy_kernel.cc
+++ b/paddle/pten/kernels/xpu/copy_kernel.cc
--- a/paddle/pten/tests/core/test_convert_utils.cc
+++ b/paddle/pten/tests/core/test_convert_utils.cc
--- a/python/paddle/fluid/tests/custom_op/custom_raw_op_kernel_op.h
+++ b/python/paddle/fluid/tests/custom_op/custom_raw_op_kernel_op.h