[Test Mv] remove infrt (#52270)

551ff882 · jjyaoao · GitHub · 54497c47 · 551ff882 · 54497c47
247 changed file
--- a/paddle/CMakeLists.txt
+++ b/paddle/CMakeLists.txt
@@ -7,7 +7,6 @@ add_subdirectory(scripts)
 add_subdirectory(testing)
 add_subdirectory(phi)
-add_subdirectory(infrt)
 add_subdirectory(fluid)
 add_subdirectory(ir)

--- a/paddle/infrt/CMakeLists.txt
+++ b/paddle/infrt/CMakeLists.txt
-if(NOT WITH_INFRT)
-  return()
-endif()
-option(INFRT_WITH_PHI "Compile INFRT with PHI" ON)
-option(INFRT_WITH_GPU "Compile INFRT with GPU" OFF)
-option(INFRT_WITH_TRT "Compile INFRT with TensorRT" OFF)
-#TODO(xiaowei) remove fluid
-include_directories(${PADDLE_SOURCE_DIR}/paddle/fluid/platform)
-if(WITH_GPU)
-  set(INFRT_WITH_GPU ON)
-endif()
-if(INFRT_WITH_PHI)
-  add_definitions("-DINFRT_WITH_PHI")
-  # TODO(wilber): Now Infrt gpu/trt depends on phi's components, Modify compile dependency options later.
-  if(INFRT_WITH_GPU)
-    add_definitions("-DINFRT_WITH_GPU")
-    if(INFRT_WITH_TRT)
-      add_definitions("-DINFRT_WITH_TRT")
-    endif()
-  endif()
-endif()
-# compile flags
-set(INFRT_FLAGS -Wno-comment)
-foreach(flag ${INFRT_FLAGS})
-  safe_set_cflag(CMAKE_C_FLAGS ${flag})
-  safe_set_cxxflag(CMAKE_CXX_FLAGS ${flag})
-endforeach()
-set(INFRT_SOURCE_DIR "${PADDLE_SOURCE_DIR}/paddle/infrt")
-set(INFRT_BINARY_DIR "${PADDLE_BINARY_DIR}/paddle/infrt")
-set(INFRT_TEST_TARGETS CACHE INTERNAL "")
-include(infrt_lib)
-set(infrt_src CACHE INTERNAL "" FORCE)
-# Gather headers for library publish.
-function(core_gather_headers)
-  file(
-    GLOB includes
-    LIST_DIRECTORIES false
-    RELATIVE ${CMAKE_SOURCE_DIR}
-    *.h)
-  foreach(header ${includes})
-    set(core_includes
-        "${core_includes};${header}"
-        CACHE INTERNAL "")
-  endforeach()
-endfunction()
-function(gather_srcs SRC_GROUP)
-  set(options)
-  set(oneValueArgs)
-  set(multiValueArgs "SRCS")
-  cmake_parse_arguments(prefix "" "" "${multiValueArgs}" ${ARGN})
-  foreach(cpp ${prefix_SRCS})
-    set(${SRC_GROUP}
-        "${${SRC_GROUP}};${CMAKE_CURRENT_SOURCE_DIR}/${cpp}"
-        CACHE INTERNAL "")
-  endforeach()
-endfunction()
-# This method is similar to the global cc_test, but discard the huge amount default dependencies those are
-# not needed by INFRT.
-function(cc_test_tiny TARGET_NAME)
-  if(WITH_TESTING)
-    set(options SERIAL)
-    set(oneValueArgs "")
-    set(multiValueArgs SRCS DEPS ARGS)
-    cmake_parse_arguments(cc_test_tiny "${options}" "${oneValueArgs}"
-                          "${multiValueArgs}" ${ARGN})
-    add_executable(${TARGET_NAME} ${cc_test_tiny_SRCS})
-    get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
-    target_link_libraries(${TARGET_NAME} ${cc_test_tiny_DEPS}
-                          ${os_dependency_modules} infrt_gtest_main gtest)
-    add_dependencies(${TARGET_NAME} ${cc_test_tiny_DEPS} infrt_gtest_main gtest
-                     extern_gtest)
-    add_test(
-      NAME ${TARGET_NAME}
-      COMMAND ${TARGET_NAME} "${cc_test_tiny_ARGS}"
-      WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
-    if(${cc_test_tiny_SERIAL})
-      set_property(TEST ${TARGET_NAME} PROPERTY RUN_SERIAL 1)
-    endif()
-    set(INFRT_TEST_TARGETS
-        ${INFRT_TEST_TARGETS} ${TARGET_NAME}
-        CACHE INTERNAL "")
-  endif()
-endfunction()
-if(WITH_TESTING)
-  cc_library(
-    infrt_gtest_main
-    SRCS gtest_main.cc
-    DEPS gtest glog gflags)
-endif()
-add_subdirectory(api)
-add_subdirectory(backends)
-add_subdirectory(common)
-add_subdirectory(dialect)
-add_subdirectory(host_context)
-add_subdirectory(kernel)
-add_subdirectory(tensor)
-add_subdirectory(support)
-add_subdirectory(external_kernels)
-add_subdirectory(paddle)
-# MLIR td file generations
-set(infrt_mlir_incs basic_kernels_inc test_kernels_inc tensor_shape_inc
-                    dense_tensor_inc pd_extra_ops_inc trt_ops_inc)
-if(INFRT_WITH_PHI)
-  set(phi_libs phi)
-  set(infrt_mlir_incs ${infrt_mlir_incs} MLIRinfrt_phi_tensorIncGen
-                      MLIRinfrt_phi_baseIncGen)
-endif()
-cc_library(
-  infrt SHARED
-  SRCS ${infrt_src}
-  DEPS glog ${mlir_libs} ${phi_libs} paddle_framework_proto infrt_naive)
-cc_library(
-  infrt_static
-  SRCS ${infrt_src}
-  DEPS glog ${mlir_libs} ${phi_libs} paddle_framework_proto)
-add_dependencies(infrt ${infrt_mlir_incs} mlir-headers)
-add_custom_target(test_infrt_exec DEPENDS ${INFRT_TEST_TARGETS})
--- a/paddle/infrt/api/.gitignore
+++ b/paddle/infrt/api/.gitignore
-infrt_api_test.cc
--- a/paddle/infrt/api/CMakeLists.txt
+++ b/paddle/infrt/api/CMakeLists.txt
-core_gather_headers()
-gather_srcs(infrt_src SRCS infrt_api.cc)
-configure_file(${CMAKE_CURRENT_SOURCE_DIR}/infrt_api_test.cc.in
-               ${CMAKE_CURRENT_SOURCE_DIR}/infrt_api_test.cc)
-# Disable temporarily for the external-kernel's mkldnn is outdate
-cc_test_tiny(test_infrt_api SRCS infrt_api_test.cc DEPS infrt ${MLIR_IR_LIBS})
-# TODO(inference): remove after optimize weight unfold.
-set_tests_properties(test_infrt_api PROPERTIES TIMEOUT 200)
--- a/paddle/infrt/api/infrt_api.cc
+++ b/paddle/infrt/api/infrt_api.cc
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "paddle/infrt/api/infrt_api.h"
-#include <llvm/ADT/SmallVector.h>
-#include <llvm/Support/DynamicLibrary.h>
-#include <mlir/Dialect/StandardOps/IR/Ops.h>
-#include <mlir/IR/BuiltinOps.h>
-#include <mlir/Parser.h>
-#include <mlir/Pass/PassManager.h>
-#include <mlir/Transforms/Passes.h>
-#include <unordered_map>
-#include <vector>
-#include "paddle/infrt/backends/host/phi_allocator.h"
-#include "paddle/infrt/common/global.h"
-#include "paddle/infrt/dialect/dense_tensor.h"
-#include "paddle/infrt/dialect/infrt/ir/infrt_dialect.h"
-#include "paddle/infrt/dialect/infrt/pass/infrt_op_fuse_pass.h"
-#include "paddle/infrt/dialect/infrt/pass/infrt_weights_unfold_pass.h"
-#include "paddle/infrt/dialect/mlir_loader.h"
-#include "paddle/infrt/dialect/phi/ir/phi_base.h"
-#include "paddle/infrt/dialect/phi/pass/phi_op_convert_pass.h"
-#include "paddle/infrt/host_context/core_runtime.h"
-#include "paddle/infrt/host_context/kernel_registry.h"
-#include "paddle/infrt/host_context/mlir_function_executable.h"
-#include "paddle/infrt/host_context/mlir_to_runtime_translate.h"
-#include "paddle/infrt/host_context/op_executable.h"
-#include "paddle/infrt/host_context/paddle_mlir.h"
-#include "paddle/infrt/host_context/value.h"
-#include "paddle/infrt/kernel/basic_kernels.h"
-#include "paddle/infrt/kernel/control_flow_kernels.h"
-#include "paddle/infrt/kernel/phi/dense_tensor_kernels.h"
-#include "paddle/infrt/kernel/phi/infershaped/infershaped_kernel_launchers.h"
-#include "paddle/infrt/kernel/phi/registry.h"
-#include "paddle/infrt/kernel/tensor_kernels.h"
-#include "paddle/infrt/kernel/tensor_shape_kernels.h"
-#include "paddle/infrt/kernel/test_kernels.h"
-#include "paddle/infrt/tensor/tensor_map.h"
-#if defined(INFRT_WITH_GPU) && defined(INFRT_WITH_TRT)
-#include "paddle/infrt/dialect/tensorrt/trt_graph_fuse_pass.h"
-#include "paddle/infrt/dialect/tensorrt/trt_graph_split_pass.h"
-#include "paddle/infrt/dialect/tensorrt/trt_op_converter_pass.h"
-#include "paddle/infrt/dialect/tensorrt/trt_op_teller_pass.h"
-#include "paddle/infrt/dialect/tensorrt/trt_type_convert_pass.h"
-#include "paddle/infrt/kernel/tensorrt/registry.h"
-#endif
-using namespace infrt::host_context;  // NOLINT
-using namespace infrt::tensor;        // NOLINT
-using namespace infrt::tensor;        // NOLINT
-namespace infrt {
-template <typename T>
-std::string DumpToString(T& op) {  // NOLINT
-  std::string buffer;
-  llvm::raw_string_ostream os(buffer);
-  op.print(os);
-  os.flush();
-  return buffer;
-}
-struct MlirToRuntimeTranslator::Impl {
-  mlir::ModuleOp module;
-  // The runtime for a function call.
-  CoreRuntimeBuilder* runtime{};
-  // The current working op, the translator process the ops one by one, each
-  // time it updates `cur_op` here to current op
-  // working on.
-  OpExecutableBuilder* cur_op{};
-  // record the current function name.
-  std::string cur_func_name;
-  // Name to function definitions.
-  std::unordered_map<std::string, mlir::FuncOp> func_defs;
-  // Map from an operation to its results.
-  std::unordered_map<const mlir::Operation*, std::vector<ValueRef>> op_results;
-  llvm::DenseMap<mlir::Value, ValueRef> value_map;
-};
-/**
- * Execute the mlir program in predict mode.
- */
-class PredictExecutor : public MlirToRuntimeTranslator {
- public:
-  CoreRuntimeBuilder core_runtime;
-  PredictExecutor(mlir::ModuleOp module,
-                  KernelRegistry* registry,
-                  ::infrt::phi::DenseTensorMap&& map)
-      : MlirToRuntimeTranslator(module, &core_runtime),
-        core_runtime(registry),
-        registry_(registry) {
-    CHECK(registry_);
-    Init(std::move(map));
-  }
-  void Run() {
-    auto arguments = llvm::makeArrayRef(arguments_);
-    auto results = llvm::makeMutableArrayRef(results_.begin(), results_.size());
-    function_executable_->Execute(arguments, results);
-  }
-  int GetInputNum() { return inputs_.size(); }
-  ::Tensor* GetInput(int i) { return inputs_[i]; }
-  int GetOutputNum() { return outputs_.size(); }
-  ::Tensor* GetOutput(int i) { return outputs_[i]; }
- private:
-  void Init(::infrt::phi::DenseTensorMap&& map) {
-    EmitFunctions();
-    llvm::Optional<mlir::FuncOp> predict_func_ = llvm::None;
-    for (auto func_op : impl_->module.getOps<mlir::FuncOp>()) {
-      if (func_op.getName().str() != "main_graph") continue;
-      predict_func_ = func_op;
-      break;
-    }
-    if (!predict_func_) {
-      std::cout << "ERROR: init failed, no predict function found in mlir."
-                << std::endl;
-      return;
-    }
-    auto& predict_func = predict_func_.getValue();
-    function_executable_ =
-        new MlirFunctionExecutable(predict_func, registry_, impl_->func_defs);
-    // process parammeters
-    VLOG(3) << "Arguments num of predict func: "
-            << predict_func.getNumArguments();
-    for (size_t i = 0; i < predict_func.getNumArguments(); ++i) {
-      auto arg = predict_func.getArgument(i);
-      auto type = arg.getType();
-      // this param is TensorMap
-      if (type.isa<::infrt::phi::DenseTensorMapType>()) {
-        auto* value = new host_context::Value(std::move(map));
-        arguments_.push_back(value);
-        AddValue(predict_func.getArgument(i), value);
-      } else if (type.isa<::infrt::DenseTensorType>()) {
-        // this param is an input Tensor
-        auto dht = ::Tensor();
-        auto* value = new host_context::Value(std::move(dht));
-        arguments_.push_back(value);
-        inputs_.push_back(&(value->get<::Tensor>()));
-      } else {
-        llvm_unreachable("The input type has not been supported by predictor.");
-      }
-    }
-    // process results
-    auto& last_op = predict_func.front().back();
-    if (last_op.getName().getStringRef() == "infrt.return") {
-      for (size_t i = 0; i < last_op.getNumOperands(); ++i) {
-        auto operand = last_op.getOperand(i);
-        if (operand.getType().isa<::infrt::DenseTensorType>()) {
-          auto r = impl_->value_map.try_emplace(
-              operand, ValueRef(new host_context::Value(::Tensor())));
-          CHECK(r.second) << "Duplicate add mlir value ["
-                          << DumpToString(operand) << "]";
-          auto* value = r.first->second.get();
-          results_.push_back(ValueRef(value));
-          outputs_.push_back(&(value->get<::Tensor>()));
-        } else {
-          llvm_unreachable("infrt.return only supports DenseTensor now.");
-        }
-      }
-    }
-  }
- protected:
-  std::unordered_map<std::string, mlir::FuncOp> func_def_table;
-  void EmitFunction(mlir::FuncOp op) override {
-    CHECK(!impl_->func_defs.count(op.getName().str()))
-        << "Duplicate function defition found for function ["
-        << op.getName().str();
-    impl_->func_defs.emplace(op.getName().str(), op);
-  }
- private:
-  KernelRegistry* registry_{};
-  MlirFunctionExecutable* function_executable_;
-  llvm::SmallVector<::Tensor*, 1> inputs_;
-  llvm::SmallVector<host_context::Value*, 2> arguments_;
-  llvm::SmallVector<::Tensor*, 1> outputs_;
-  llvm::SmallVector<ValueRef, 1> results_;
-};
-std::unique_ptr<InfRtPredictor> CreateInfRtPredictor(
-    const InfRtConfig& config) {
-  auto x = std::make_unique<InfRtPredictor>();
-  x->Init(config);
-  return x;
-}
-struct InfRtPredictor::Impl {
-  std::unique_ptr<PredictExecutor> executor;
-  MLIRModelGenImpl module_gen_;
-};
-InfRtPredictor::InfRtPredictor() : impl_(new Impl) {}
-InfRtPredictor::~InfRtPredictor() {}
-void InfRtPredictor::Run() { impl_->executor->Run(); }
-int InfRtPredictor::Init(const InfRtConfig& config) {
-  mlir::MLIRContext* context = ::infrt::Global::getMLIRContext();
-  KernelRegistry* registry = new KernelRegistry();
-  kernel::RegisterBasicKernels(registry);
-  kernel::RegisterTestKernels(registry);
-  kernel::RegisterTensorShapeKernels(registry);
-  kernel::RegisterTensorKernels(registry);
-  kernel::RegisterControlFlowKernels(registry);
-#ifdef INFRT_WITH_PHI
-  kernel::RegisterPhiKernels(registry);
-  kernel::RegisterInferShapeLaunchers(registry);
-#if defined(INFRT_WITH_GPU) && defined(INFRT_WITH_TRT)
-  kernel::RegisterTrtKernels(registry);
-#endif  // INFRT_WITH_GPU && INFRT_WITH_TRT
-#endif
-  mlir::ModuleOp module_op;
-  if (config.tensorrt_enabled()) {
-    module_op = impl_->module_gen_.ImportPaddleModel(
-        config.model_dir(), config.param_dir(), false);
-  } else {
-    module_op = impl_->module_gen_.ImportPaddleModel(config.model_dir(),
-                                                     config.param_dir());
-  }
-  context->loadAllAvailableDialects();
-  ::mlir::PassManager pm(context);
-  ::mlir::OpPassManager& pass_manager = pm.nest<::mlir::FuncOp>();
-  if (config.tensorrt_enabled()) {
-    pass_manager.addPass(::infrt::CreateInfrtWeightsUnfoldPass());
-#if defined(INFRT_WITH_GPU) && defined(INFRT_WITH_TRT)
-    pass_manager.addPass(::infrt::trt::CreateTrtOpTellerPass());
-    pass_manager.addPass(::infrt::trt::CreateTrtGraphFusePass());
-    pass_manager.addPass(::infrt::trt::CreateTrtGraphSplitPass(1));
-    pass_manager.addPass(::infrt::trt::CreateTrtOpConverterPass());
-    pass_manager.addPass(::infrt::trt::CreateTrtTypeConvertPass());
-#endif
-    pass_manager.addPass(::mlir::createCanonicalizerPass());
-  } else {
-    std::vector<::infrt::Place> valid_places = {
-        {::infrt::TargetType::CPU,
-         ::infrt::PrecisionType::FLOAT32,
-         ::infrt::LayoutType::NCHW}};
-    if (config.gpu_enabled()) {
-      valid_places.insert(valid_places.begin(),
-                          ::infrt::Place(::infrt::TargetType::GPU,
-                                         ::infrt::PrecisionType::FLOAT32,
-                                         ::infrt::LayoutType::NCHW));
-    }
-    pass_manager.addPass(CreatePhiOpCvtPass(valid_places));
-    pass_manager.addPass(CreateInfrtOpFusePass());
-  }
-  if (mlir::failed(pm.run(module_op))) {
-    std::cout << "\npass failed!\n" << std::endl;
-    return 4;
-  }
-#ifndef NDEBUG
-  module_op.dump();
-#endif  // NDEBUG
-  // load extra shared library
-  for (const std::string& lib_path : config.shared_libs()) {
-    std::string err;
-    llvm::sys::DynamicLibrary dynLib =
-        llvm::sys::DynamicLibrary::getPermanentLibrary(lib_path.c_str(), &err);
-    if (!dynLib.isValid()) {
-      llvm::errs() << "Load shared library failed. Error: " << err << "\n";
-      return 1;
-    }
-    if (auto reg_sym = dynLib.SearchForAddressOfSymbol("RegisterKernels")) {
-      auto reg_func = reinterpret_cast<void (*)(KernelRegistry*)>(reg_sym);
-      reg_func(registry);
-    } else {
-      llvm::outs() << "Symbol \"RegisterKernels\" not found in \"" << lib_path
-                   << "\". Skip.\n";
-    }
-  }
-  // Load params
-  if (config.gpu_enabled() && !config.tensorrt_enabled()) {
-    auto tensor_map = ::infrt::kernel::phi::LoadCombinedParamsToGpu(
-        config.model_dir(), config.param_dir());
-    impl_->executor.reset(
-        new PredictExecutor(module_op, registry, std::move(tensor_map)));
-  } else {
-    auto tensor_map = ::infrt::kernel::phi::LoadCombinedParameters(
-        config.model_dir(), config.param_dir());
-    impl_->executor.reset(
-        new PredictExecutor(module_op, registry, std::move(tensor_map)));
-  }
-  return 0;
-}
-int InfRtPredictor::GetInputNum() { return impl_->executor->GetInputNum(); }
-::Tensor* InfRtPredictor::GetInput(int i) {
-  return impl_->executor->GetInput(i);
-}
-int InfRtPredictor::GetOutputNum() { return impl_->executor->GetOutputNum(); }
-::Tensor* InfRtPredictor::GetOutput(int i) {
-  return impl_->executor->GetOutput(i);
-}
-}  // namespace infrt
--- a/paddle/infrt/api/infrt_api.h
+++ b/paddle/infrt/api/infrt_api.h
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include <memory>
-#include <string>
-#include <vector>
-#include "paddle/phi/core/dense_tensor.h"
-namespace infrt {
-class InfRtConfig {
-  std::string model_dir_;
-  std::string param_dir_;
-  std::vector<std::string> shared_libs_;
-  // TODO(wilber): Design an easy-to-use interface.
-  bool gpu_enabled_{false};
-  bool tensorrt_enabled_{false};
- public:
-  InfRtConfig() = default;
-  void set_model_dir(const std::string& model_dir) { model_dir_ = model_dir; }
-  const std::string& model_dir() const { return model_dir_; }
-  void set_param_dir(const std::string& param_dir) { param_dir_ = param_dir; }
-  const std::string& param_dir() const { return param_dir_; }
-  void set_shared_libs(const std::vector<std::string>& shared_libs) {
-    shared_libs_ = shared_libs;
-  }
-  const std::vector<std::string>& shared_libs() const { return shared_libs_; }
-  void enable_gpu() { gpu_enabled_ = true; }
-  bool gpu_enabled() const { return gpu_enabled_; }
-  // TODO(wilber): Design an easy-to-use interface.
-  void enable_tensorrt() { tensorrt_enabled_ = true; }
-  void disable_tensorrt() { tensorrt_enabled_ = false; }
-  bool tensorrt_enabled() const { return tensorrt_enabled_; }
-  virtual ~InfRtConfig() = default;
-};
-class InfRtPredictor {
- public:
-  InfRtPredictor();
-  ~InfRtPredictor();
-  void Run();
-  int Init(const InfRtConfig& config);
-  int GetInputNum();
-  ::Tensor* GetInput(int i);
-  int GetOutputNum();
-  ::Tensor* GetOutput(int i);
- protected:
-  struct Impl;
-  std::unique_ptr<Impl> impl_;
-};
-std::unique_ptr<InfRtPredictor> CreateInfRtPredictor(const InfRtConfig& config);
-}  // namespace infrt
--- a/paddle/infrt/api/infrt_api_test.cc.in
+++ b/paddle/infrt/api/infrt_api_test.cc.in
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include <gtest/gtest.h>
-#include <iostream>
-#include <vector>
-#include "llvm/Support/raw_ostream.h"
-#include "paddle/infrt/api/infrt_api.h"
-#include "paddle/infrt/backends/host/phi_allocator.h"
-#include "paddle/infrt/common/buffer.h"
-#include "paddle/infrt/common/dtype.h"
-using infrt::InfRtConfig;
-using infrt::InfRtPredictor;
-using infrt::CreateInfRtPredictor;
-namespace infrt {
-TEST(InfRtPredictor, predictor) {
-  std::vector<std::string> shared_libs;
-  InfRtConfig config;
-  config.set_model_dir("@CMAKE_BINARY_DIR@/linear/linear.pdmodel");
-  config.set_param_dir("@CMAKE_BINARY_DIR@/linear/linear.pdiparams");
-  std::unique_ptr<InfRtPredictor> predictor = CreateInfRtPredictor(config);
-  ::infrt::backends::CpuPhiAllocator cpu_allocator;
-  ::Tensor* input = predictor->GetInput(0);
-  input->Resize({16, 784});
-  input->AllocateFrom(&cpu_allocator, ::phi::DataType::FLOAT32);
-  auto* input_data = reinterpret_cast<float*>(input->data());
-  for (int i = 0; i < input->numel(); i++) input_data[i] = 1.0;
-  predictor->Run();
-  // get and print output tensor
-  auto* output = predictor->GetOutput(0);
-  // TODO(Shixiaowei02): Automatic result validation for training then inference.
-  // auto* output_data = reinterpret_cast<float*>(output->data());
-  ASSERT_EQ(output->dims(), ::phi::DDim({16, 10}));
-}
-TEST(InfRtPredictor, cpu_predictor) {
-  std::vector<std::string> shared_libs;
-  InfRtConfig config;
-  config.set_model_dir("@CMAKE_BINARY_DIR@/models/resnet50/model.pdmodel");
-  config.set_param_dir("@CMAKE_BINARY_DIR@/models/resnet50/model.pdiparams");
-  std::unique_ptr<InfRtPredictor> predictor = CreateInfRtPredictor(config);
-  ::infrt::backends::CpuPhiAllocator cpu_allocator;
-  ::Tensor* input = predictor->GetInput(0);
-  input->Resize({2, 3, 256, 256});
-  input->AllocateFrom(&cpu_allocator, ::phi::DataType::FLOAT32);
-  auto* input_data = reinterpret_cast<float*>(input->data());
-  for (int i = 0; i < input->numel(); i++) input_data[i] = 1.0;
-  for(int i = 0; i < 10; i++) {
-    predictor->Run();
-  }
-  auto start = std::chrono::steady_clock::now();
-  for(int i = 0; i < 10; i++) {
-    predictor->Run();
-  }
-  auto end = std::chrono::steady_clock::now();
-  auto msec = std::chrono::duration_cast<std::chrono::milliseconds>(end-start);
-  std::cout <<"One predict period costs " << msec.count()/1000 << "ms.\n";
-  // get and print output tensor
-  auto* output = predictor->GetOutput(0);
-  ASSERT_EQ(output->dims(), ::phi::DDim({2, 1000}));
-  const std::vector<float> true_vals {
-    -3.319006264209747314e-01, -1.418896913528442383e+00,
-        -6.934890151023864746e-01, -1.498023152351379395e+00,
-        3.078042864799499512e-01, -1.340998053550720215e+00,
-        3.508620023727416992e+00, 2.274388313293457031e+00,
-        -1.321727275848388672e+00, -8.888689428567886353e-02,
-        -3.319006264209747314e-01, -1.418896913528442383e+00,
-        -6.934890151023864746e-01, -1.498023152351379395e+00,
-        3.078042864799499512e-01, -1.340998053550720215e+00,
-        3.508620023727416992e+00, 2.274388313293457031e+00,
-        -1.321727275848388672e+00, -8.888689428567886353e-02
-  };
-  for (size_t i = 0; i < true_vals.size(); i+=100) {
-    CHECK_NEAR(output->data<float>()[i*100], true_vals[i], 1e-5);
-  }
-}
-#ifdef INFRT_WITH_TRT
-TEST(InfRtPredictor, trt_predictor) {
-  std::vector<std::string> shared_libs;
-  InfRtConfig config;
-  config.enable_tensorrt();
-  config.set_model_dir("@CMAKE_BINARY_DIR@/models/resnet50/model.pdmodel");
-  config.set_param_dir("@CMAKE_BINARY_DIR@/models/resnet50/model.pdiparams");
-  std::unique_ptr<InfRtPredictor> predictor = CreateInfRtPredictor(config);
-  ::infrt::backends::CpuPhiAllocator cpu_allocator;
-  ::Tensor* input = predictor->GetInput(0);
-  input->Resize({2, 3, 256, 256});
-  input->AllocateFrom(&cpu_allocator, ::phi::DataType::FLOAT32);
-  auto* input_data = reinterpret_cast<float*>(input->data());
-  for (int i = 0; i < input->numel(); i++) input_data[i] = 1.0;
-  predictor->Run();
-  // get and print output tensor
-  auto* output = predictor->GetOutput(0);
-  ASSERT_EQ(output->dims(), ::phi::DDim({2, 1000}));
-  const std::vector<float> true_vals {
-    -3.319006264209747314e-01, -1.418896913528442383e+00,
-        -6.934890151023864746e-01, -1.498023152351379395e+00,
-        3.078042864799499512e-01, -1.340998053550720215e+00,
-        3.508620023727416992e+00, 2.274388313293457031e+00,
-        -1.321727275848388672e+00, -8.888689428567886353e-02,
-        -3.319006264209747314e-01, -1.418896913528442383e+00,
-        -6.934890151023864746e-01, -1.498023152351379395e+00,
-        3.078042864799499512e-01, -1.340998053550720215e+00,
-        3.508620023727416992e+00, 2.274388313293457031e+00,
-        -1.321727275848388672e+00, -8.888689428567886353e-02
-  };
-  for (size_t i = 0; i < true_vals.size(); i+=100) {
-    CHECK_NEAR(output->data<float>()[i*100], true_vals[i], 1e-5);
-  }
-}
-#endif
-#ifdef INFRT_WITH_GPU
-TEST(InfRtPredictor, gpu_predictor) {
-  std::vector<std::string> shared_libs;
-  InfRtConfig config;
-  config.enable_gpu();
-  config.set_model_dir("@CMAKE_BINARY_DIR@/models/resnet50/model.pdmodel");
-  config.set_param_dir("@CMAKE_BINARY_DIR@/models/resnet50/model.pdiparams");
-  std::unique_ptr<InfRtPredictor> predictor = CreateInfRtPredictor(config);
-  ::infrt::backends::GpuPhiAllocator gpu_allocator;
-  ::Tensor* input = predictor->GetInput(0);
-  input->Resize({2, 3, 256, 256});
-  input->AllocateFrom(&gpu_allocator, ::phi::DataType::FLOAT32);
-  auto* data = reinterpret_cast<float*>(input->data());
-  std::vector<float> input_data(2 * 3 * 256 * 256, 1.0);
-  cudaMemcpy(data,
-             input_data.data(),
-             sizeof(float) * input->numel(),
-             cudaMemcpyHostToDevice);
-  for(int i = 0; i < 10; i++) {
-    predictor->Run();
-  }
-  auto start = std::chrono::steady_clock::now();
-  for(int i = 0; i < 1000; i++) {
-    predictor->Run();
-  }
-  auto end = std::chrono::steady_clock::now();
-  auto msec = std::chrono::duration_cast<std::chrono::milliseconds>(end-start);
-  std::cout <<"One predict period costs " << msec.count()/1000 << "ms.\n";
-  auto* output = predictor->GetOutput(0);
-  std::vector<float> output_data(output->numel());
-  cudaMemcpy(output_data.data(),
-             output->data<float>(),
-             sizeof(float) * output->numel(),
-             cudaMemcpyDeviceToHost);
-  ASSERT_EQ(output->dims(), ::phi::DDim({2, 1000}));
-  const std::vector<float> true_vals {
-    -3.319006264209747314e-01, -1.418896913528442383e+00,
-        -6.934890151023864746e-01, -1.498023152351379395e+00,
-        3.078042864799499512e-01, -1.340998053550720215e+00,
-        3.508620023727416992e+00, 2.274388313293457031e+00,
-        -1.321727275848388672e+00, -8.888689428567886353e-02,
-        -3.319006264209747314e-01, -1.418896913528442383e+00,
-        -6.934890151023864746e-01, -1.498023152351379395e+00,
-        3.078042864799499512e-01, -1.340998053550720215e+00,
-        3.508620023727416992e+00, 2.274388313293457031e+00,
-        -1.321727275848388672e+00, -8.888689428567886353e-02
-  };
-  for (size_t i = 0; i < true_vals.size(); i+=100) {
-    CHECK_NEAR(output_data[i*100], true_vals[i], 1e-5);
-  }
-}
-#endif
-}  // namespace infrt
--- a/paddle/infrt/backends/CMakeLists.txt
+++ b/paddle/infrt/backends/CMakeLists.txt
-if(INFRT_WITH_PHI
-   AND WITH_GPU
-   AND WITH_TENSORRT)
-  add_subdirectory(tensorrt)
-endif()
--- a/paddle/infrt/backends/host/phi_allocator.h
+++ b/paddle/infrt/backends/host/phi_allocator.h
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#pragma once
-#include "paddle/fluid/memory/malloc.h"
-#include "paddle/phi/core/allocator.h"
-#ifdef INFRT_WITH_GPU
-#include <cuda_runtime.h>
-#endif
-namespace infrt {
-namespace backends {
-class CpuPhiAllocator : public phi::Allocator {
- public:
-  static void deleter(phi::Allocation* ptr) { ::operator delete(ptr); }
-  AllocationPtr Allocate(size_t bytes_size) {
-    return AllocationPtr(
-        new phi::Allocation(::operator new(bytes_size),
-                            bytes_size,
-                            phi::Place(phi::AllocationType::CPU)),
-        deleter);
-  }
-};
-#ifdef INFRT_WITH_GPU
-// TODO(wilber): Just for demo test. we need a more efficient gpu allocator.
-class GpuPhiAllocator : public phi::Allocator {
- public:
-  static void deleter(phi::Allocation* ptr) { cudaFree(ptr->ptr()); }
-  AllocationPtr Allocate(size_t bytes_size) {
-    return paddle::memory::Alloc(phi::Place(phi::AllocationType::GPU),
-                                 bytes_size);
-  }
-};
-#endif
-}  // namespace backends
-}  // namespace infrt
--- a/paddle/infrt/backends/host/phi_context.h
+++ b/paddle/infrt/backends/host/phi_context.h
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#pragma once
-#include "paddle/infrt/backends/host/phi_allocator.h"
-#include "paddle/phi/backends/cpu/cpu_context.h"
-#include "paddle/phi/backends/gpu/gpu_context.h"
-namespace infrt {
-namespace backends {
-class CpuPhiContext : public ::phi::CPUContext {
- public:
-  using Base = ::phi::CPUContext;
-  using ::phi::CPUContext::SetEigenDevice;
-  CpuPhiContext() {
-    Init();
-    SetAllocator(alloc_.get());
-  }
- private:
-  std::unique_ptr<::phi::Allocator> alloc_{std::make_unique<CpuPhiAllocator>()};
-};
-class GpuPhiContext : public ::phi::GPUContext {
- public:
-  using Base = ::phi::GPUContext;
-  using ::phi::GPUContext::SetBlasHandle;
-  using ::phi::GPUContext::SetDnnHandle;
-  using ::phi::GPUContext::SetEigenDevice;
-  using ::phi::GPUContext::SetSolverHandle;
-  using ::phi::GPUContext::SetSparseHandle;
-  using ::phi::GPUContext::SetStream;
-};
-}  // namespace backends
-}  // namespace infrt
--- a/paddle/infrt/backends/tensorrt/CMakeLists.txt
+++ b/paddle/infrt/backends/tensorrt/CMakeLists.txt
-add_subdirectory(plugin)
-core_gather_headers()
-gather_srcs(infrt_src SRCS trt_engine.cc)
-cc_test_tiny(
-  test_infrt_trt
-  SRCS
-  test_trt_engine.cc
-  DEPS
-  infrt
-  phi_dynload_cuda
-  tensorrt_converter)
--- a/paddle/infrt/backends/tensorrt/plugin/CMakeLists.txt
+++ b/paddle/infrt/backends/tensorrt/plugin/CMakeLists.txt
-gather_srcs(infrt_src SRCS pool_op_plugin.cu)
--- a/paddle/infrt/backends/tensorrt/plugin/plugin_utils.h
+++ b/paddle/infrt/backends/tensorrt/plugin/plugin_utils.h
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include <glog/logging.h>
-#include <cassert>
-#include <cstring>
-#include <string>
-#include <type_traits>
-#include <vector>
-#include "paddle/phi/backends/dynload/tensorrt.h"
-namespace infrt {
-namespace backends {
-namespace tensorrt {
-namespace plugin {
-template <typename T>
-inline void SerializeValue(void** buffer, T const& value);
-template <typename T>
-inline void DeserializeValue(void const** buffer,
-                             size_t* buffer_size,
-                             T* value);
-namespace details {
-template <typename T, class Enable = void>
-struct Serializer {};
-template <typename T>
-struct Serializer<T,
-                  typename std::enable_if<std::is_arithmetic<T>::value ||
-                                          std::is_enum<T>::value ||
-                                          std::is_pod<T>::value>::type> {
-  static size_t SerializedSize(T const& value) { return sizeof(T); }
-  static void Serialize(void** buffer, T const& value) {
-    std::memcpy(*buffer, &value, sizeof(T));
-    reinterpret_cast<char*&>(*buffer) += sizeof(T);
-  }
-  static void Deserialize(void const** buffer, size_t* buffer_size, T* value) {
-    assert(*buffer_size >= sizeof(T));
-    std::memcpy(value, *buffer, sizeof(T));
-    reinterpret_cast<char const*&>(*buffer) += sizeof(T);
-    *buffer_size -= sizeof(T);
-  }
-};
-template <>
-struct Serializer<const char*> {
-  static size_t SerializedSize(const char* value) { return strlen(value) + 1; }
-  static void Serialize(void** buffer, const char* value) {
-    std::strcpy(static_cast<char*>(*buffer), value);  // NOLINT
-    reinterpret_cast<char*&>(*buffer) += strlen(value) + 1;
-  }
-  static void Deserialize(void const** buffer,
-                          size_t* buffer_size,
-                          const char** value) {
-    *value = static_cast<char const*>(*buffer);
-    size_t data_size = strnlen(*value, *buffer_size) + 1;
-    assert(*buffer_size >= data_size);
-    reinterpret_cast<char const*&>(*buffer) += data_size;
-    *buffer_size -= data_size;
-  }
-};
-template <typename T>
-struct Serializer<std::vector<T>,
-                  typename std::enable_if<std::is_arithmetic<T>::value ||
-                                          std::is_enum<T>::value ||
-                                          std::is_pod<T>::value>::type> {
-  static size_t SerializedSize(std::vector<T> const& value) {
-    return sizeof(value.size()) + value.size() * sizeof(T);
-  }
-  static void Serialize(void** buffer, std::vector<T> const& value) {
-    SerializeValue(buffer, value.size());
-    size_t nbyte = value.size() * sizeof(T);
-    std::memcpy(*buffer, value.data(), nbyte);
-    reinterpret_cast<char*&>(*buffer) += nbyte;
-  }
-  static void Deserialize(void const** buffer,
-                          size_t* buffer_size,
-                          std::vector<T>* value) {
-    size_t size;
-    DeserializeValue(buffer, buffer_size, &size);
-    value->resize(size);
-    size_t nbyte = value->size() * sizeof(T);
-    CHECK_GE(*buffer_size, nbyte);
-    std::memcpy(value->data(), *buffer, nbyte);
-    reinterpret_cast<char const*&>(*buffer) += nbyte;
-    *buffer_size -= nbyte;
-  }
-};
-}  // namespace details
-template <typename T>
-inline size_t SerializedSize(T const& value) {
-  return details::Serializer<T>::SerializedSize(value);
-}
-template <typename T>
-inline void SerializeValue(void** buffer, T const& value) {
-  return details::Serializer<T>::Serialize(buffer, value);
-}
-template <typename T>
-inline void DeserializeValue(void const** buffer,
-                             size_t* buffer_size,
-                             T* value) {
-  return details::Serializer<T>::Deserialize(buffer, buffer_size, value);
-}
-template <typename T>
-class TrtPluginRegistrar {
- public:
-  TrtPluginRegistrar() {
-    static auto func_ptr = static_cast<nvinfer1::IPluginRegistry*>(
-        ::phi::dynload::getPluginRegistry());
-    func_ptr->registerCreator(instance, "");
-  }
- private:
-  //! Plugin instance.
-  T instance{};
-};
-#define REGISTER_TRT_PLUGIN(name) \
-  static TrtPluginRegistrar<name> pluginRegistrar##name {}
-}  // namespace plugin
-}  // namespace tensorrt
-}  // namespace backends
-}  // namespace infrt
--- a/paddle/infrt/backends/tensorrt/plugin/pool_op_plugin.cu
+++ b/paddle/infrt/backends/tensorrt/plugin/pool_op_plugin.cu
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "glog/logging.h"
-#include "paddle/infrt/backends/tensorrt/plugin/plugin_utils.h"
-#include "paddle/infrt/backends/tensorrt/plugin/pool_op_plugin.h"
-#include "paddle/phi/kernels/funcs/pooling.h"
-namespace infrt {
-namespace backends {
-namespace tensorrt {
-namespace plugin {
-PoolPlugin::PoolPlugin(bool ceil_mode,
-                       PoolType pool_type,
-                       bool adaptive,
-                       bool exclusive,
-                       std::vector<int> ksize,
-                       std::vector<int> strides,
-                       std::vector<int> paddings,
-                       std::vector<int> input_shape,
-                       std::vector<int> real_paddings)
-    : ceil_mode_(ceil_mode),
-      pool_type_(pool_type),
-      adaptive_(adaptive),
-      exclusive_(exclusive),
-      ksize_(ksize),
-      strides_(strides),
-      paddings_(paddings),
-      real_paddings_(real_paddings),
-      input_shape_(input_shape) {
-  output_shape_ = input_shape_;
-  std::vector<int> output_shape =
-      CalcOutputSize({input_shape_[1], input_shape_[2]},
-                     ceil_mode_,
-                     adaptive_,
-                     ksize_,
-                     strides_,
-                     real_paddings_);
-  output_shape_[1] = output_shape[0];
-  output_shape_[2] = output_shape[1];
-}
-PoolPlugin::PoolPlugin(void const* serialData, size_t serialLength) {
-  // deserializeBase(serialData, serialLength);
-  DeserializeValue(&serialData, &serialLength, &ceil_mode_);
-  DeserializeValue(&serialData, &serialLength, &pool_type_);
-  DeserializeValue(&serialData, &serialLength, &adaptive_);
-  DeserializeValue(&serialData, &serialLength, &exclusive_);
-  DeserializeValue(&serialData, &serialLength, &ksize_);
-  DeserializeValue(&serialData, &serialLength, &strides_);
-  DeserializeValue(&serialData, &serialLength, &paddings_);
-  DeserializeValue(&serialData, &serialLength, &real_paddings_);
-  DeserializeValue(&serialData, &serialLength, &input_shape_);
-  DeserializeValue(&serialData, &serialLength, &output_shape_);
-}
-const char* PoolPlugin::getPluginType() const noexcept { return "pool_plugin"; }
-const char* PoolPlugin::getPluginVersion() const noexcept { return "1"; }
-int PoolPlugin::getNbOutputs() const noexcept { return 1; }
-nvinfer1::Dims PoolPlugin::getOutputDimensions(int outputIndex,
-                                               const nvinfer1::Dims* inputs,
-                                               int nbInputs) noexcept {
-  assert(nbInputs == 1);
-  assert(index == 0);
-  assert(inputs[0].nbDims == 3);
-  nvinfer1::Dims const& input_dims = inputs[0];
-  nvinfer1::Dims output_dims = input_dims;
-  output_dims.d[1] = output_shape_[1];
-  output_dims.d[2] = output_shape_[2];
-  return output_dims;
-}
-int32_t PoolPlugin::initialize() noexcept { return 0; }
-void PoolPlugin::terminate() noexcept {}
-size_t PoolPlugin::getWorkspaceSize(int32_t maxBatchSize) const noexcept {
-  return 0;
-}
-#if IS_TRT_VERSION_LT(8000)
-int PoolPlugin::enqueue(int batch_size,
-                        const void* const* inputs,
-                        void** outputs,
-#else
-int PoolPlugin::enqueue(int batch_size,
-                        const void* const* inputs,
-                        void* const* outputs,
-#endif
-                        void* workspace,
-                        cudaStream_t stream) noexcept {
-  // TODO(wilber)
-  int input_size = 0;
-  float const* idata = reinterpret_cast<float const*>(inputs[0]);
-  float* const* odatas = reinterpret_cast<float* const*>(outputs);
-  std::vector<int> input_shape = input_shape_;
-  std::vector<int> output_shape = output_shape_;
-  input_shape.insert(input_shape.begin(), batch_size);
-  output_shape.insert(output_shape.begin(), batch_size);
-  if (pool_type_ == PoolType::max) {
-    ::phi::funcs::MaxPool<float> pool_process;
-    ::phi::funcs::Pool2dDirectCUDAFunctor<phi::funcs::MaxPool<float>, float>
-        pool2d_forward;
-    pool2d_forward(idata,
-                   input_shape,
-                   output_shape,
-                   ksize_,
-                   strides_,
-                   paddings_,
-                   true,
-                   false,
-                   odatas[0],
-                   stream,
-                   pool_process);
-  } else if (pool_type_ == PoolType::avg) {
-    ::phi::funcs::AvgPool<float> pool_process;
-    ::phi::funcs::Pool2dDirectCUDAFunctor<phi::funcs::AvgPool<float>, float>
-        pool2d_forward;
-    pool2d_forward(idata,
-                   input_shape,
-                   output_shape,
-                   ksize_,
-                   strides_,
-                   paddings_,
-                   exclusive_,
-                   adaptive_,
-                   odatas[0],
-                   stream,
-                   pool_process);
-  }
-  return cudaGetLastError() != cudaSuccess;
-}
-// TODO(wilber): serialize base info?
-size_t PoolPlugin::getSerializationSize() const noexcept {
-  return SerializedSize(ceil_mode_) + SerializedSize(pool_type_) +
-         SerializedSize(adaptive_) + SerializedSize(exclusive_) +
-         SerializedSize(ksize_) + SerializedSize(strides_) +
-         SerializedSize(paddings_) + SerializedSize(real_paddings_) +
-         SerializedSize(input_shape_) + SerializedSize(output_shape_);
-}
-// TODO(wilber): serialize base info?
-void PoolPlugin::serialize(void* buffer) const noexcept {
-  // serializeBase(buffer);
-  SerializeValue(&buffer, ceil_mode_);
-  SerializeValue(&buffer, pool_type_);
-  SerializeValue(&buffer, adaptive_);
-  SerializeValue(&buffer, exclusive_);
-  SerializeValue(&buffer, ksize_);
-  SerializeValue(&buffer, strides_);
-  SerializeValue(&buffer, paddings_);
-  SerializeValue(&buffer, real_paddings_);
-  SerializeValue(&buffer, input_shape_);
-  SerializeValue(&buffer, output_shape_);
-}
-void PoolPlugin::destroy() noexcept { delete this; }
-void PoolPlugin::setPluginNamespace(char const* plugin_namespace) noexcept {
-  namespace_ = plugin_namespace;
-}
-char const* PoolPlugin::getPluginNamespace() const noexcept {
-  return namespace_.c_str();
-}
-nvinfer1::DataType PoolPlugin::getOutputDataType(
-    int32_t index,
-    nvinfer1::DataType const* input_types,
-    int32_t nbInputs) const noexcept {
-  CHECK_EQ(index, 0);
-  CHECK_EQ((input_types[0] == nvinfer1::DataType::kFLOAT), true);
-  return input_types[0];
-}
-bool PoolPlugin::isOutputBroadcastAcrossBatch(int32_t outputIndex,
-                                              bool const* inputIsBroadcasted,
-                                              int32_t nbInputs) const noexcept {
-  return false;
-}
-bool PoolPlugin::canBroadcastInputAcrossBatch(
-    int32_t inputIndex) const noexcept {
-  return false;
-}
-nvinfer1::IPluginV2Ext* PoolPlugin::clone() const noexcept {
-  auto* plugin = new PoolPlugin(ceil_mode_,
-                                pool_type_,
-                                adaptive_,
-                                exclusive_,
-                                ksize_,
-                                strides_,
-                                paddings_,
-                                input_shape_,
-                                real_paddings_);
-  plugin->setPluginNamespace(namespace_.c_str());
-  return plugin;
-}
-void PoolPlugin::configurePlugin(nvinfer1::PluginTensorDesc const* in,
-                                 int32_t nb_input,
-                                 nvinfer1::PluginTensorDesc const* out,
-                                 int32_t nb_output) noexcept {
-  CHECK_EQ(nb_input, 1);
-  CHECK_EQ(nb_output, 1);
-  input_dims_ = in[0].dims;
-  data_format_ = in[0].format;
-  data_type_ = in[0].type;
-}
-bool PoolPlugin::supportsFormatCombination(
-    int32_t pos,
-    nvinfer1::PluginTensorDesc const* in_out,
-    int32_t nb_inputs,
-    int32_t nb_outputs) const noexcept {
-  CHECK_LT(pos, nb_inputs + nb_outputs);
-  CHECK_NOTNULL(in_out);
-  return ((in_out[pos].type == nvinfer1::DataType::kFLOAT) &&
-          in_out[pos].format == nvinfer1::PluginFormat::kLINEAR);
-}
-nvinfer1::IPluginV2* PoolPluginCreator::createPlugin(
-    const char* name, const nvinfer1::PluginFieldCollection* fc) noexcept {
-  // auto* plugin = new UffPoolPluginV2(*fc);
-  field_collection_ = *fc;
-  plugin_name_ = name;
-  const nvinfer1::PluginField* fields = fc->fields;
-  bool ceil_mode;
-  PoolPlugin::PoolType pool_type;
-  bool adaptive;
-  bool exclusive;
-  std::vector<int> ksize;
-  std::vector<int> strides;
-  std::vector<int> paddings;
-  std::vector<int> real_paddings;
-  std::vector<int> input_shape;
-  std::vector<int> output_shape;
-  // TODO(wilber): add implement.
-  CHECK(false) << "not implement";
-  // for (int i = 0; i < fc->nbFields; ++i) {
-  //   const char* attr_name = fields[i].name;
-  //   if (!strcmp(attr_name, "ceil_mode")) {
-  //     CHECK_EQ(fields[i].type == nvinfer1::PluginFieldType::kINT8, true);
-  //     ceil_mode = *static_cast<const bool*>(fields[i].data);
-  //     // mParam.numOutputBoxesPerClass =
-  //     //     *(static_cast<const int*>(fields[i].data));
-  //   }
-  // }
-  return nullptr;
-}
-nvinfer1::IPluginV2* PoolPluginCreator::deserializePlugin(
-    const char* name, const void* serialData, size_t serialLength) noexcept {
-  auto* plugin = new PoolPlugin(serialData, serialLength);
-  plugin_name_ = name;
-  return plugin;
-}
-}  // namespace plugin
-}  // namespace tensorrt
-}  // namespace backends
-}  // namespace infrt
--- a/paddle/infrt/backends/tensorrt/plugin/pool_op_plugin.h
+++ b/paddle/infrt/backends/tensorrt/plugin/pool_op_plugin.h
-// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include <NvInferRuntime.h>
-#include <NvInferRuntimeCommon.h>
-#include <stdio.h>
-#include <cassert>
-#include <string>
-#include <vector>
-#include "paddle/infrt/backends/tensorrt/plugin/plugin_utils.h"
-#include "paddle/infrt/backends/tensorrt/trt_utils.h"
-namespace infrt {
-namespace backends {
-namespace tensorrt {
-namespace plugin {
-static std::vector<int> CalcOutputSize(const std::vector<int>& input_shape,
-                                       const bool& ceil_mode,
-                                       const bool& adaptive,
-                                       const std::vector<int>& ksize,
-                                       const std::vector<int>& strides,
-                                       const std::vector<int>& real_paddings) {
-  std::vector<int> output_shape = input_shape;
-  if (adaptive) {
-    output_shape[0] = ksize[0];
-    output_shape[1] = ksize[1];
-  } else {
-    int output_h = 0, output_w = 0;
-    if (ceil_mode) {
-      output_h = (input_shape[0] - ksize[0] + real_paddings[0] +
-                  real_paddings[1] + strides[0] - 1) /
-                     strides[0] +
-                 1;
-      output_w = (input_shape[1] - ksize[1] + real_paddings[2] +
-                  real_paddings[3] + strides[1] - 1) /
-                     strides[1] +
-                 1;
-    }
-    // TRT will use native layer when ceil_model=false
-    /*
-    else{
-      output_h = (input_shape[0] - ksize[0] + real_paddings[0] +
-    real_paddings[1]) / strides[0] + 1;
-      output_w = (input_shape[1] - ksize[1] + real_paddings[2] +
-    real_paddings[3]) / strides[1] + 1;
-    }
-    */
-    output_shape[0] = output_h;
-    output_shape[1] = output_w;
-  }
-  return output_shape;
-}
-class PoolPlugin : public nvinfer1::IPluginV2IOExt {
- public:
-  enum class PoolType {
-    max = 0,
-    avg,
-  };
-  PoolPlugin() {}
-  PoolPlugin(bool ceil_mode,
-             PoolType pool_type,
-             bool adaptive,
-             bool exclusive,
-             std::vector<int> ksize,
-             std::vector<int> strides,
-             std::vector<int> paddings,
-             std::vector<int> input_shape,
-             std::vector<int> real_paddings);
-  PoolPlugin(void const* serialData, size_t serialLength);
-  // IPluginV2 methods
-  const char* getPluginType() const noexcept override;
-  const char* getPluginVersion() const noexcept override;
-  int getNbOutputs() const noexcept override;
-  nvinfer1::Dims getOutputDimensions(int outputIndex,
-                                     const nvinfer1::Dims* inputs,
-                                     int nbInputs) noexcept override;
-  int32_t initialize() noexcept override;
-  void terminate() noexcept override;
-  size_t getWorkspaceSize(int32_t maxBatchSize) const noexcept override;
-#if IS_TRT_VERSION_LT(8000)
-  int enqueue(int batchSize,
-              const void* const* inputs,
-              void** outputs,
-#else
-  int enqueue(int batchSize,
-              const void* const* inputs,
-              void* const* outputs,
-#endif
-              void* workspace,
-              cudaStream_t stream) noexcept override;
-  size_t getSerializationSize() const noexcept override;
-  void serialize(void* buffer) const noexcept override;
-  void destroy() noexcept override;
-  void setPluginNamespace(char const* pluginNamespace) noexcept override;
-  char const* getPluginNamespace() const noexcept override;
-  // IPluginV2Ext methods
-  nvinfer1::DataType getOutputDataType(
-      int32_t index,
-      nvinfer1::DataType const* inputTypes,
-      int32_t nbInputs) const noexcept override;
-  bool isOutputBroadcastAcrossBatch(int32_t outputIndex,
-                                    bool const* inputIsBroadcasted,
-                                    int32_t nbInputs) const noexcept override;
-  bool canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept override;
-  // void attachToContext(cudnnContext*,
-  //                      cublasContext*,
-  //                      IGpuAllocator*) noexcept override;
-  // void detachFromContext() noexcept override;
-  IPluginV2Ext* clone() const noexcept override;
-  // IPluginV2IOExt methods
-  void configurePlugin(nvinfer1::PluginTensorDesc const* in,
-                       int32_t nb_input,
-                       nvinfer1::PluginTensorDesc const* out,
-                       int32_t nb_output) noexcept override;
-  bool supportsFormatCombination(int32_t pos,
-                                 nvinfer1::PluginTensorDesc const* inOut,
-                                 int32_t nb_inputs,
-                                 int32_t nb_outputs) const noexcept override;
- private:
-  bool ceil_mode_;
-  PoolType pool_type_;
-  bool adaptive_;
-  bool exclusive_;
-  std::vector<int> ksize_;
-  std::vector<int> strides_;
-  std::vector<int> paddings_;
-  std::vector<int> real_paddings_;
-  std::vector<int> input_shape_;
-  std::vector<int> output_shape_;
- private:
-  nvinfer1::Dims input_dims_;
-  nvinfer1::DataType data_type_;
-  nvinfer1::PluginFormat data_format_;
-  std::string namespace_;
-};
-class PoolPluginCreator : public nvinfer1::IPluginCreator {
- public:
-  const char* getPluginName() const noexcept override { return "pool_plugin"; }
-  const char* getPluginVersion() const noexcept override { return "1"; }
-  const nvinfer1::PluginFieldCollection* getFieldNames() noexcept override {
-    return &field_collection_;
-  }
-  nvinfer1::IPluginV2* createPlugin(
-      const char* name,
-      const nvinfer1::PluginFieldCollection* fc) noexcept override;
-  nvinfer1::IPluginV2* deserializePlugin(const char* name,
-                                         const void* serialData,
-                                         size_t serialLength) noexcept override;
-  void setPluginNamespace(const char* plugin_namespace) noexcept override {
-    plugin_namespace_ = plugin_namespace;
-  }
-  const char* getPluginNamespace() const noexcept override {
-    return plugin_namespace_.c_str();
-  }
- private:
-  std::string plugin_namespace_;
-  std::string plugin_name_;
-  nvinfer1::PluginFieldCollection field_collection_{0, nullptr};
-};
-REGISTER_TRT_PLUGIN(PoolPluginCreator);
-}  // namespace plugin
-}  // namespace tensorrt
-}  // namespace backends
-}  // namespace infrt
--- a/paddle/infrt/backends/tensorrt/test_trt_engine.cc
+++ b/paddle/infrt/backends/tensorrt/test_trt_engine.cc
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include <NvInfer.h>
-#include <NvInferRuntime.h>
-#include <NvInferRuntimeCommon.h>
-#include <glog/logging.h>
-#include <gtest/gtest.h>
-#include <math.h>
-#include "paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h"
-#include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h"
-#include "paddle/fluid/memory/allocation/allocator_facade.h"
-#include "paddle/fluid/memory/memcpy.h"
-#include "paddle/infrt/backends/tensorrt/trt_engine.h"
-#include "paddle/infrt/backends/tensorrt/trt_options.h"
-#include "paddle/phi/backends/gpu/gpu_context.h"
-#include "paddle/phi/common/data_type.h"
-#include "paddle/phi/common/place.h"
-#include "paddle/phi/core/allocator.h"
-#include "paddle/phi/core/ddim.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/meta_tensor.h"
-namespace infrt {
-namespace backends {
-namespace tensorrt {
-const char* model_input = "input_0";
-const char* model_output = "output_0";
-const char* model_output2 = "output_1";
-TrtUniquePtr<nvinfer1::INetworkDefinition> ConstructNetwork(
-    nvinfer1::IBuilder* builder, nvinfer1::Dims dims, bool is_static_shape) {
-  TrtUniquePtr<nvinfer1::INetworkDefinition> network;
-  if (is_static_shape) {
-    network.reset(builder->createNetworkV2(0U));
-  } else {
-    auto networkFlags =
-        1U << static_cast<uint32_t>(
-            nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
-    network.reset(builder->createNetworkV2(networkFlags));
-  }
-  ITensor* data =
-      network->addInput(model_input, nvinfer1::DataType::kFLOAT, dims);
-  CHECK_NOTNULL(data);
-  IActivationLayer* act =
-      network->addActivation(*data, ActivationType::kSIGMOID);
-  CHECK_NOTNULL(act);
-  auto* act_out = act->getOutput(0);
-  std::vector<int> output_length{1, 2};
-  int axis;
-  nvinfer1::IPluginV2Layer* split_layer;
-  if (is_static_shape) {
-    axis = 0;
-    paddle::inference::tensorrt::plugin::SplitPlugin plugin(
-        axis, output_length, false);
-    split_layer = network->addPluginV2(&act_out, 1, plugin);
-  } else {
-    axis = 1;
-    paddle::inference::tensorrt::plugin::SplitPluginDynamic plugin(
-        axis, output_length, false);
-    split_layer = network->addPluginV2(&act_out, 1, plugin);
-  }
-  split_layer->getOutput(0)->setName(model_output);
-  split_layer->getOutput(1)->setName(model_output2);
-  network->markOutput(*split_layer->getOutput(0));
-  network->markOutput(*split_layer->getOutput(1));
-  return network;
-}
-TrtUniquePtr<nvinfer1::INetworkDefinition> ConstructFCNetwork(
-    nvinfer1::IBuilder* builder, nvinfer1::Dims dims, bool is_static_shape) {
-  TrtUniquePtr<nvinfer1::INetworkDefinition> network;
-  if (is_static_shape) {
-    network.reset(builder->createNetworkV2(0U));
-  } else {
-    auto networkFlags =
-        1U << static_cast<uint32_t>(
-            nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
-    network.reset(builder->createNetworkV2(networkFlags));
-  }
-  ITensor* data =
-      network->addInput(model_input, nvinfer1::DataType::kFLOAT, dims);
-  CHECK_NOTNULL(data);
-  nvinfer1::Weights kernel_weights;
-  kernel_weights.type = nvinfer1::DataType::kFLOAT;
-  kernel_weights.count = 7840;
-  std::vector<float> weight_data(kernel_weights.count);
-  for (size_t i = 0; i < weight_data.size(); ++i) {
-    weight_data[i] = i % 255 * 0.02f;
-  }
-  kernel_weights.values = weight_data.data();
-  auto* layer = network->addFullyConnected(
-      *data, 10, kernel_weights, nvinfer1::Weights{});
-  CHECK_NOTNULL(layer);
-  auto* out = layer->getOutput(0);
-  out->setName(model_output);
-  network->markOutput(*out);
-  return network;
-}
-TrtUniquePtr<nvinfer1::INetworkDefinition> ConstructConvNetwork(
-    nvinfer1::IBuilder* builder, nvinfer1::Dims dims, bool is_static_shape) {
-  TrtUniquePtr<nvinfer1::INetworkDefinition> network;
-  if (is_static_shape) {
-    network.reset(builder->createNetworkV2(0U));
-  } else {
-    auto networkFlags =
-        1U << static_cast<uint32_t>(
-            nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
-    network.reset(builder->createNetworkV2(networkFlags));
-  }
-  ITensor* data =
-      network->addInput(model_input, nvinfer1::DataType::kFLOAT, dims);
-  CHECK_NOTNULL(data);
-  nvinfer1::Weights kernel_weights, bias_weights;
-  kernel_weights.type = nvinfer1::DataType::kFLOAT;
-  bias_weights.type = nvinfer1::DataType::kFLOAT;
-  kernel_weights.count = 81;
-  bias_weights.count = 3;
-  std::vector<float> weight_data(kernel_weights.count);
-  for (size_t i = 0; i < weight_data.size(); ++i) {
-    weight_data[i] = i * 0.02f;
-  }
-  std::vector<float> bias_data(bias_weights.count);
-  for (size_t i = 0; i < bias_data.size(); ++i) {
-    bias_data[i] = i * 0.5f;
-  }
-  kernel_weights.values = weight_data.data();
-  bias_weights.values = bias_data.data();
-  nvinfer1::Dims ksize;
-  ksize.nbDims = 2;
-  ksize.d[0] = 3;
-  ksize.d[1] = 3;
-  auto* layer =
-      network->addConvolutionNd(*data, 3, ksize, kernel_weights, bias_weights);
-  CHECK_NOTNULL(layer);
-  auto* out = layer->getOutput(0);
-  out->setName(model_output);
-  network->markOutput(*out);
-  return network;
-}
-// sigmoid(x) = 1 / (1 + exp(-x))
-inline float sigmoid(float x) { return 1.f / (1.f + exp(-1 * x)); }
-TEST(trt, run_fc_static) {
-  TrtEngine engine(0);
-  auto net = ConstructFCNetwork(
-      engine.GetTrtBuilder(), nvinfer1::Dims3{1, 28, 28}, true);
-  BuildOptions build_options;
-  build_options.max_batch = 4;
-  build_options.workspace = 1024;
-  engine.Build(std::move(net), build_options);
-  InferenceOptions inference_options;
-  inference_options.batch = 1;
-  phi::GPUPlace place;
-  phi::GPUContext context;
-  context.PartialInitWithoutAllocator();
-  context.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
-                           .GetAllocator(place, context.stream())
-                           .get());
-  context.PartialInitWithAllocator();
-  phi::DenseTensorMeta meta(
-      phi::DataType::FLOAT32,
-      phi::make_ddim({inference_options.batch, 1, 28, 28}));
-  phi::DenseTensor input;
-  input.set_meta(meta);
-  context.Alloc<float>(&input, input.numel() * sizeof(float));
-  std::vector<float> host_data(inference_options.batch * 1 * 28 * 28, 0);
-  for (size_t i = 0; i < host_data.size(); ++i) {
-    host_data[i] = i % 100 * 0.016f;
-  }
-  paddle::memory::Copy(place,
-                       input.data<float>(),
-                       phi::CPUPlace(),
-                       host_data.data(),
-                       sizeof(float) * host_data.size(),
-                       context.stream());
-  std::unordered_map<std::string, phi::DenseTensor*> inputs;
-  inputs.emplace(std::make_pair(model_input, &input));
-  engine.PrepareOutputHandle("output_0");
-  engine.SetUpInference(inference_options, inputs);
-  engine.GetEngineInfo();
-  engine.Run(context);
-  cudaStreamSynchronize(context.stream());
-}
-TEST(trt, run_conv_static) {
-  TrtEngine engine(0);
-  auto net = ConstructConvNetwork(
-      engine.GetTrtBuilder(), nvinfer1::Dims3{3, 28, 28}, true);
-  BuildOptions build_options;
-  build_options.max_batch = 4;
-  build_options.workspace = 1024;
-  engine.Build(std::move(net), build_options);
-  InferenceOptions inference_options;
-  inference_options.batch = 1;
-  phi::GPUPlace place;
-  phi::GPUContext context;
-  context.PartialInitWithoutAllocator();
-  context.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
-                           .GetAllocator(place, context.stream())
-                           .get());
-  context.PartialInitWithAllocator();
-  phi::DenseTensorMeta meta(
-      phi::DataType::FLOAT32,
-      phi::make_ddim({inference_options.batch, 3, 28, 28}));
-  phi::DenseTensor input;
-  input.set_meta(meta);
-  context.Alloc<float>(&input, input.numel() * sizeof(float));
-  std::vector<float> host_data(inference_options.batch * 3 * 28 * 28, 0);
-  for (size_t i = 0; i < host_data.size(); ++i) {
-    host_data[i] = i % 100 * 0.016f;
-  }
-  paddle::memory::Copy(place,
-                       input.data<float>(),
-                       phi::CPUPlace(),
-                       host_data.data(),
-                       sizeof(float) * host_data.size(),
-                       context.stream());
-  std::unordered_map<std::string, phi::DenseTensor*> inputs;
-  inputs.emplace(std::make_pair(model_input, &input));
-  engine.PrepareOutputHandle("output_0");
-  engine.SetUpInference(inference_options, inputs);
-  engine.GetEngineInfo();
-  engine.Run(context);
-  cudaStreamSynchronize(context.stream());
-}
-TEST(trt, run_static) {
-  TrtEngine static_trt_engine(0);
-  auto net = ConstructNetwork(
-      static_trt_engine.GetTrtBuilder(), nvinfer1::Dims3{3, 28, 28}, true);
-  BuildOptions static_build_options;
-  static_build_options.max_batch = 4;
-  static_trt_engine.Build(std::move(net), static_build_options);
-  InferenceOptions inference_options;
-  inference_options.batch = 2;
-  phi::GPUPlace place;
-  phi::GPUContext context;
-  context.PartialInitWithoutAllocator();
-  context.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
-                           .GetAllocator(place, context.stream())
-                           .get());
-  context.PartialInitWithAllocator();
-  phi::DenseTensorMeta meta(
-      phi::DataType::FLOAT32,
-      phi::make_ddim({inference_options.batch, 3, 28, 28}));
-  phi::DenseTensor input;
-  input.set_meta(meta);
-  context.Alloc<float>(&input, input.numel() * sizeof(float));
-  std::vector<float> host_data(inference_options.batch * 3 * 28 * 28, 0);
-  for (size_t i = 0; i < host_data.size(); ++i) {
-    host_data[i] = i % 100 * 0.016f;
-  }
-  paddle::memory::Copy(place,
-                       input.data<float>(),
-                       phi::CPUPlace(),
-                       host_data.data(),
-                       sizeof(float) * host_data.size(),
-                       context.stream());
-  std::unordered_map<std::string, phi::DenseTensor*> inputs;
-  inputs.emplace(std::make_pair(model_input, &input));
-  static_trt_engine.PrepareOutputHandle("output_0");
-  static_trt_engine.PrepareOutputHandle("output_1");
-  static_trt_engine.SetUpInference(inference_options, inputs);
-  static_trt_engine.GetEngineInfo();
-  static_trt_engine.Run(context);
-  phi::DenseTensor* output0 = static_trt_engine.GetOutput("output_0");
-  phi::DenseTensor* output1 = static_trt_engine.GetOutput("output_1");
-  std::vector<float> output_data1(inference_options.batch * 1 * 28 * 28, 0);
-  std::vector<float> output_data2(inference_options.batch * 2 * 28 * 28, 0);
-  paddle::memory::Copy(phi::CPUPlace(),
-                       output_data1.data(),
-                       place,
-                       output0->data<float>(),
-                       sizeof(float) * output_data1.size(),
-                       context.stream());
-  paddle::memory::Copy(phi::CPUPlace(),
-                       output_data2.data(),
-                       place,
-                       output1->data<float>(),
-                       sizeof(float) * output_data2.size(),
-                       context.stream());
-  cudaStreamSynchronize(context.stream());
-  for (size_t i = 0; i < host_data.size(); ++i) {
-    int w = i % 28;
-    int h = (i / 28) % 28;
-    int c = i / (28 * 28) % 3;
-    int n = i / (28 * 28 * 3);
-    if (c == 0) {
-      CHECK_NEAR(
-          sigmoid(host_data[i]), output_data1[n * 28 * 28 + h * 28 + w], 1e-5);
-    } else {
-      CHECK_NEAR(sigmoid(host_data[i]),
-                 output_data2[n * 28 * 28 * 2 + (c - 1) * 28 * 28 + h * 28 + w],
-                 1e-5);
-    }
-  }
-}
-TEST(trt, run_dynamic) {
-  TrtEngine engine(0);
-  auto net = ConstructNetwork(
-      engine.GetTrtBuilder(), nvinfer1::Dims4{-1, 3, -1, -1}, false);
-  BuildOptions build_options;
-  build_options.max_batch = 4;
-  build_options.workspace = 32;
-  // build_options.fp16 = true;
-  std::vector<int32_t> min_shape{1, 3, 16, 16};
-  std::vector<int32_t> opt_shape{2, 3, 28, 28};
-  std::vector<int32_t> max_shape{4, 3, 28, 28};
-  build_options.shapes[model_input][0] = min_shape;
-  build_options.shapes[model_input][1] = opt_shape;
-  build_options.shapes[model_input][2] = max_shape;
-  engine.Build(std::move(net), build_options);
-  InferenceOptions inference_options;
-  inference_options.batch = 2;
-  phi::GPUPlace place;
-  phi::GPUContext context;
-  context.PartialInitWithoutAllocator();
-  context.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
-                           .GetAllocator(place, context.stream())
-                           .get());
-  context.PartialInitWithAllocator();
-  phi::DenseTensorMeta meta(
-      phi::DataType::FLOAT32,
-      phi::make_ddim({inference_options.batch, 3, 16, 16}));
-  phi::DenseTensor input, output, output2;
-  input.set_meta(meta);
-  context.Alloc<float>(&input, input.numel() * sizeof(float));
-  std::vector<float> host_data(inference_options.batch * 3 * 16 * 16, 0);
-  for (size_t i = 0; i < host_data.size(); ++i) {
-    host_data[i] = i % 100 * 0.016f;
-  }
-  paddle::memory::Copy(place,
-                       input.data<float>(),
-                       phi::CPUPlace(),
-                       host_data.data(),
-                       sizeof(float) * host_data.size(),
-                       context.stream());
-  std::unordered_map<std::string, phi::DenseTensor*> inputs;
-  inputs.emplace(std::make_pair(model_input, &input));
-  engine.PrepareOutputHandle("output_0");
-  engine.PrepareOutputHandle("output_1");
-  engine.SetUpInference(inference_options, inputs);
-  engine.GetEngineInfo();
-  engine.Run(context);
-  phi::DenseTensor* output0 = engine.GetOutput("output_0");
-  phi::DenseTensor* output1 = engine.GetOutput("output_1");
-  std::vector<float> output_data1(inference_options.batch * 1 * 16 * 16, 0);
-  std::vector<float> output_data2(inference_options.batch * 2 * 16 * 16, 0);
-  paddle::memory::Copy(phi::CPUPlace(),
-                       output_data1.data(),
-                       place,
-                       output0->data<float>(),
-                       sizeof(float) * output_data1.size(),
-                       context.stream());
-  paddle::memory::Copy(phi::CPUPlace(),
-                       output_data2.data(),
-                       place,
-                       output1->data<float>(),
-                       sizeof(float) * output_data2.size(),
-                       context.stream());
-  cudaStreamSynchronize(context.stream());
-  for (size_t i = 0; i < host_data.size(); ++i) {
-    int w = i % 16;
-    int h = (i / 16) % 16;
-    int c = i / (16 * 16) % 3;
-    int n = i / (16 * 16 * 3);
-    if (c == 0) {
-      CHECK_NEAR(
-          sigmoid(host_data[i]), output_data1[n * 16 * 16 + h * 16 + w], 1e-5);
-    } else {
-      CHECK_NEAR(sigmoid(host_data[i]),
-                 output_data2[n * 16 * 16 * 2 + (c - 1) * 16 * 16 + h * 16 + w],
-                 1e-5);
-    }
-  }
-}
-}  // namespace tensorrt
-}  // namespace backends
-}  // namespace infrt
--- a/paddle/infrt/backends/tensorrt/trt_engine.cc
+++ b/paddle/infrt/backends/tensorrt/trt_engine.cc
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "paddle/infrt/backends/tensorrt/trt_engine.h"
-#include <NvInferRuntime.h>
-#include <NvInferRuntimeCommon.h>
-#include <glog/logging.h>
-#include "paddle/phi/backends/dynload/tensorrt.h"
-#include "paddle/phi/backends/gpu/gpu_info.h"
-#include "paddle/phi/core/ddim.h"
-#include "paddle/phi/core/dense_tensor.h"
-namespace infrt {
-namespace backends {
-namespace tensorrt {
-// The following two API are implemented in TensorRT's header file, cannot load
-// from the dynamic library. So create our own implementation and directly
-// trigger the method from the dynamic library.
-static nvinfer1::IBuilder* createInferBuilder(
-    nvinfer1::ILogger& logger) {  // NOLINT
-  return static_cast<nvinfer1::IBuilder*>(
-      ::phi::dynload::createInferBuilder_INTERNAL(&logger,
-                                                  NV_TENSORRT_VERSION));
-}
-static nvinfer1::IRuntime* createInferRuntime(
-    nvinfer1::ILogger& logger) {  // NOLINT
-  return static_cast<nvinfer1::IRuntime*>(
-      ::phi::dynload::createInferRuntime_INTERNAL(&logger,
-                                                  NV_TENSORRT_VERSION));
-}
-TrtEngine::TrtEngine(int device_id) : device_id_(device_id) {
-  FreshDeviceId();
-  logger_.reset(new TrtLogger());
-  builder_.reset(createInferBuilder(logger_->GetTrtLogger()));
-  ::phi::dynload::initLibNvInferPlugins(&logger_->GetTrtLogger(), "");
-}
-nvinfer1::IBuilder* TrtEngine::GetTrtBuilder() {
-  CHECK_NOTNULL(builder_);
-  return builder_.get();
-}
-void TrtEngine::Build(TrtUniquePtr<nvinfer1::INetworkDefinition> network,
-                      const BuildOptions& build_options) {
-  FreshDeviceId();
-  ModelToBuildEnv(std::move(network), build_options);
-  CHECK_NOTNULL(engine_);
-}
-bool TrtEngine::ModelToBuildEnv(
-    TrtUniquePtr<nvinfer1::INetworkDefinition> network,
-    const BuildOptions& build) {
-  CHECK_NOTNULL(builder_);
-  std::swap(network, network_);
-  CHECK_NOTNULL(network_);
-  // ModelToNetwork(network_, logger);
-  NetworkToEngine(build);
-  return true;
-}
-bool TrtEngine::NetworkToEngine(const BuildOptions& build) {
-  TrtUniquePtr<IBuilderConfig> config{builder_->createBuilderConfig()};
-  CHECK_NOTNULL(config);
-  CHECK(SetupNetworkAndConfig(build, *network_, *config));
-#if IS_TRT_VERSION_LT(8000)
-  engine_.reset(builder_->buildEngineWithConfig(*network_, *config));
-#else
-  serialized_engine_.reset(
-      builder_->buildSerializedNetwork(*network_, *config));
-  CHECK_NOTNULL(serialized_engine_);
-  TrtUniquePtr<IRuntime> runtime{createInferRuntime(logger_->GetTrtLogger())};
-  CHECK_NOTNULL(runtime);
-  engine_.reset(runtime->deserializeCudaEngine(serialized_engine_->data(),
-                                               serialized_engine_->size()));
-  CHECK_NOTNULL(engine_);
-#endif
-  return true;
-}
-bool TrtEngine::SetupNetworkAndConfig(const BuildOptions& build,
-                                      INetworkDefinition& network,
-                                      IBuilderConfig& config) {
-  builder_->setMaxBatchSize(build.max_batch);
-  // TODO(wilber): handle one engine - multi execution context case.
-  IOptimizationProfile* profile{nullptr};
-  if (!build.shapes.empty()) {
-    profile = builder_->createOptimizationProfile();
-    CHECK_NOTNULL(profile);
-  }
-  // Set formats and data types of inputs
-  for (int32_t i = 0; i < network.getNbInputs(); ++i) {
-    auto* input = network.getInput(i);
-    if (!build.input_formats.empty()) {
-      input->setType(build.input_formats[i].first);
-      input->setAllowedFormats(build.input_formats[i].second);
-    } else {
-      switch (input->getType()) {
-        case DataType::kINT32:
-        case DataType::kBOOL:
-        case DataType::kHALF:
-          // Leave these as is.
-          break;
-        case DataType::kFLOAT:
-        case DataType::kINT8:
-          // User did not specify a floating-point format.  Default to kFLOAT.
-          input->setType(DataType::kFLOAT);
-          break;
-      }
-      input->setAllowedFormats(1U << static_cast<int>(TensorFormat::kLINEAR));
-    }
-    if (profile) {
-      Dims dims = input->getDimensions();
-      // TODO(wilber): shape tensor.
-      const bool is_dynamic_input = std::any_of(
-          dims.d, dims.d + dims.nbDims, [](int dim) { return dim == -1; });
-      if (is_dynamic_input) {
-        is_dynamic_shape_ = true;
-        auto shape = build.shapes.find(input->getName());
-        // If no shape is provided
-        if (shape == build.shapes.end()) {
-          // TODO(wilber): add infomation.
-          CHECK(false);
-        }
-        LOG(INFO) << "Run Paddle-TRT Dynamic Shape mode.";
-        std::vector<int> profile_dims{};
-        profile_dims =
-            shape->second[static_cast<size_t>(OptProfileSelector::kMIN)];
-        CHECK(profile->setDimensions(input->getName(),
-                                     OptProfileSelector::kMIN,
-                                     VecToDims(profile_dims)));
-        profile_dims =
-            shape->second[static_cast<size_t>(OptProfileSelector::kOPT)];
-        CHECK(profile->setDimensions(input->getName(),
-                                     OptProfileSelector::kOPT,
-                                     VecToDims(profile_dims)));
-        profile_dims =
-            shape->second[static_cast<size_t>(OptProfileSelector::kMAX)];
-        CHECK(profile->setDimensions(input->getName(),
-                                     OptProfileSelector::kMAX,
-                                     VecToDims(profile_dims)));
-      }
-    }
-  }
-  if (profile && is_dynamic_shape_) {
-    CHECK(profile->isValid());  // Required optimization profile is invalid
-    CHECK_NE(config.addOptimizationProfile(profile), -1);
-  }
-  // Set formats and data types of outputs
-  for (int32_t i = 0, n = network.getNbOutputs(); i < n; i++) {
-    auto* output = network.getOutput(i);
-    if (!build.output_formats.empty()) {
-      // int outputFormatIndex = broadcastOutputFormats ? 0 : i;
-      output->setType(build.output_formats[i].first);
-      output->setAllowedFormats(build.output_formats[i].second);
-    } else {
-      output->setAllowedFormats(1U << static_cast<int>(TensorFormat::kLINEAR));
-    }
-  }
-  config.setMaxWorkspaceSize(static_cast<size_t>(build.workspace) << 20);
-  if (build.fp16) {
-    config.setFlag(BuilderFlag::kFP16);
-    bool support_fp16 = builder_->platformHasFastFp16();
-    if (support_fp16) {
-      LOG(INFO) << "Run INFRT-TRT FP16 mode";
-    } else {
-      LOG(INFO) << "You specify FP16 mode, but the hardware do not support "
-                   "FP16 speed up, use FP32 instead.";
-    }
-  }
-  if (build.tf32) {
-    config.setFlag(BuilderFlag::kTF32);
-    bool support_tf32 = builder_->platformHasTf32();
-    if (support_tf32) {
-      LOG(INFO) << "Run INFRT-TRT TF32 mode";
-    } else {
-      LOG(INFO) << "You specify TF32 mode, but the hardware do not support "
-                   "TF32 speed up, use FP32 instead.";
-    }
-  }
-  // TODO(wilber): other precision.
-  // TODO(wilber): precision config.
-  switch (build.precision_constraints) {
-    case PrecisionConstraints::kNONE:
-      // It's the default for TensorRT.
-      break;
-#if IS_TRT_VERSION_GE(8200)
-    case PrecisionConstraints::kOBEY:
-      config.setFlag(BuilderFlag::kOBEY_PRECISION_CONSTRAINTS);
-      break;
-    case PrecisionConstraints::kPREFER:
-      config.setFlag(BuilderFlag::kPREFER_PRECISION_CONSTRAINTS);
-      break;
-#endif  // IS_TRT_VERSION_GE(8200)
-    default:
-      break;
-  }
-  // TODO(TRT): DLA config.
-  // TODO(TRT): int8 config.
-  // TODO(TRT): support int8
-  if (build.int8) {
-    assert(false);
-    config.setFlag(BuilderFlag::kINT8);
-    bool support_int8 = builder_->platformHasFastInt8();
-    if (support_int8) {
-      LOG(INFO) << "Run INFRT-TRT FP16 mode";
-    }
-  }
-  // TODO(TRT): calib config.
-  // TODO(TRT): sparse config.
-  return true;
-}
-void TrtEngine::PrepareOutputHandle(const std::string& out_name) {
-  ::Tensor t;
-  outputs_.emplace(out_name, t);
-}
-::Tensor* TrtEngine::GetOutput(const std::string& name) {
-  return &outputs_[name];
-}
-size_t TrtEngine::GetOutputNum() const { return outputs_.size(); }
-bool TrtEngine::SetUpInference(
-    const InferenceOptions& inference,
-    const std::unordered_map<std::string, ::Tensor*>& inputs) {
-  // TODO(wilber): now only create one exec_context
-  FreshDeviceId();
-  CHECK(engine_ != nullptr);
-  nvinfer1::IExecutionContext* ec = engine_->createExecutionContext();
-  CHECK(ec != nullptr);
-  contexts_.emplace_back(ec);
-  bindings_.emplace_back(new Bindings());
-  for (const auto& it : inputs) {
-    const int bind_index = engine_->getBindingIndex(it.first.c_str());
-    bindings_.front()->AddBinding(
-        bind_index, it.first, true, it.second, nvinfer1::DataType::kFLOAT);
-  }
-  for (auto& it : outputs_) {
-    const int bind_index = engine_->getBindingIndex(it.first.c_str());
-    bindings_.front()->AddBinding(
-        bind_index, it.first, false, &it.second, nvinfer1::DataType::kFLOAT);
-  }
-  return true;
-}
-void TrtEngine::Run(const ::phi::GPUContext& ctx) {
-  if (is_dynamic_shape_) {
-    DynamicRun(ctx);
-  } else {
-    StaticRun(ctx);
-  }
-}
-void TrtEngine::StaticRun(const ::phi::GPUContext& ctx) {
-  const int num_bindings = engine_->getNbBindings();
-  std::vector<void*> buffers(num_bindings, nullptr);
-  int runtime_batch = -1;
-  auto input_binds = bindings_.front()->GetInputBindings();
-  for (auto bind : input_binds) {
-    const int bind_index = engine_->getBindingIndex(bind.name.c_str());
-    buffers[bind_index] =
-        const_cast<void*>(static_cast<const void*>(bind.buffer->data<float>()));
-    if (runtime_batch != -1) {
-      CHECK_EQ(runtime_batch,
-               ::phi::vectorize<int64_t>(bind.buffer->dims())[0]);
-    }
-    runtime_batch = bind.buffer->dims()[0];
-  }
-  auto output_binds = bindings_.front()->GetOutputBindings();
-  for (auto bind : output_binds) {
-    const int bind_index = engine_->getBindingIndex(bind.name.c_str());
-    std::vector<int32_t> ddim;
-    auto dims = engine_->getBindingDimensions(bind_index);
-    CHECK_NE(runtime_batch, -1) << "runtime_batch should not be -1.";
-    ddim.push_back(runtime_batch);
-    for (int i = 0; i < dims.nbDims; ++i) {
-      ddim.push_back(dims.d[i]);
-    }
-    bind.buffer->Resize(::phi::make_ddim(ddim));
-    // TODO(wilber): now only support float output.
-    ctx.Alloc<float>(bind.buffer, sizeof(float) * bind.buffer->numel());
-    buffers[bind_index] = static_cast<void*>(bind.buffer->data<float>());
-  }
-  contexts_.front()->enqueue(
-      runtime_batch, buffers.data(), ctx.stream(), nullptr);
-}
-void TrtEngine::DynamicRun(const ::phi::GPUContext& ctx) {
-  const int num_bindings = engine_->getNbBindings();
-  std::vector<void*> buffers(num_bindings, nullptr);
-  auto input_binds = bindings_.front()->GetInputBindings();
-  for (auto bind : input_binds) {
-    const int bind_index = engine_->getBindingIndex(bind.name.c_str());
-    buffers[bind_index] =
-        const_cast<void*>(static_cast<const void*>(bind.buffer->data<float>()));
-    nvinfer1::Dims trt_dims;
-    trt_dims.nbDims = bind.buffer->dims().size();
-    for (int i = 0; i < trt_dims.nbDims; ++i) {
-      trt_dims.d[i] = bind.buffer->dims()[i];
-    }
-    contexts_.front()->setBindingDimensions(bind_index, trt_dims);
-  }
-  CHECK(contexts_.front()->allInputDimensionsSpecified());
-  auto output_binds = bindings_.front()->GetOutputBindings();
-  for (auto bind : output_binds) {
-    const int bind_index = engine_->getBindingIndex(bind.name.c_str());
-    auto dims = contexts_.front()->getBindingDimensions(bind_index);
-    std::vector<int32_t> ddim(dims.nbDims);
-    for (int i = 0; i < dims.nbDims; ++i) {
-      ddim[i] = dims.d[i];
-    }
-    bind.buffer->Resize(::phi::make_ddim(ddim));
-    ctx.Alloc<float>(bind.buffer, sizeof(float) * bind.buffer->numel());
-    buffers[bind_index] = static_cast<void*>(bind.buffer->data<float>());
-  }
-  contexts_.front()->enqueueV2(buffers.data(), ctx.stream(), nullptr);
-}
-void TrtEngine::FreshDeviceId() {
-  int count;
-  cudaGetDeviceCount(&count);
-  CHECK_LT(device_id_, count);
-  ::phi::backends::gpu::SetDeviceId(device_id_);
-}
-void TrtEngine::GetEngineInfo() {
-#if IS_TRT_VERSION_GE(8200)
-  LOG(INFO) << "====== engine info ======";
-  std::unique_ptr<nvinfer1::IEngineInspector> infer_inspector(
-      engine_->createEngineInspector());
-  infer_inspector->setExecutionContext(contexts_.front().get());
-  LOG(INFO) << infer_inspector->getEngineInformation(
-      nvinfer1::LayerInformationFormat::kONELINE);
-  LOG(INFO) << "====== engine info end ======";
-#else
-  LOG(INFO) << "Inspector needs TensorRT version 8.2 and after.";
-#endif
-}
-}  // namespace tensorrt
-}  // namespace backends
-}  // namespace infrt
--- a/paddle/infrt/backends/tensorrt/trt_engine.h
+++ b/paddle/infrt/backends/tensorrt/trt_engine.h
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include <NvInfer.h>
-#include <NvInferRuntime.h>
-#include "paddle/infrt/backends/tensorrt/trt_options.h"
-#include "paddle/infrt/backends/tensorrt/trt_utils.h"
-#include "paddle/phi/backends/dynload/tensorrt.h"
-#include "paddle/phi/backends/gpu/gpu_context.h"
-#include "paddle/phi/common/place.h"
-#include "paddle/phi/core/dense_tensor.h"
-namespace infrt {
-namespace backends {
-namespace tensorrt {
-using namespace nvinfer1;  // NOLINT
-// The trt programing model as follows:
-// 1. The build phase:
-// IBuilder* builder = createInferBuilder(&logger_);
-// 2. Create a network definition:
-// INetworkDefinition* network = builder->createNetworkV2(...);
-// 3. Build network:
-// network->AddLayer(...)
-// 4. Configure network:
-// IBuilderConfig* config = builder->createBuilderConfig();
-// config->setMaxWorkspaceSize(...)
-// 5. Get cuda engine and deserializing a plan:
-// IHostMemory* serialized_model = builder->buildSerializedNetwork(...);
-// IRuntime* runtime = createInferRuntime(&logger_);
-// ICudaEngine* engine = runtime->deserializeCudaEngine(...);
-// 6. Get execution context:
-// IExecutionContext* exec_context = engine->createExecutionContext();
-// 7. Set input data:
-// int32_t input_index = engine->getBindingIndex("input");
-// int32_t output_index = engine->getBindingIndex("output");
-// void* buffers[2];
-// buffers[input_index] = input_buffer;
-// buffers[output_index] = output_buffer;
-// 8. Performance inference:
-// exec_context->enqueueV2(buffers, stream, nullptr);
-//
-// We have encapsulated this logic, please use the following programming model.
-//
-// TrtEngine trt_engine;
-// trt_engine.Build(...);
-// trt_engine.SetUpInference(...);
-// trt_engine.Run(...);
-class TrtEngine {
- public:
-  explicit TrtEngine(int device_id = 0);
-  TrtEngine(const TrtEngine&) = delete;
-  TrtEngine& operator=(const TrtEngine&) = delete;
-  TrtEngine(TrtEngine&&) = default;
-  TrtEngine& operator=(TrtEngine&&) = default;
-  nvinfer1::IBuilder* GetTrtBuilder();
-  // TODO(wilber): Modify signature after infrt-trt ready.
-  void Build(TrtUniquePtr<nvinfer1::INetworkDefinition> network,
-             const BuildOptions& build_options);
-  // TODO(wilber): Modify signature after infrt-trt ready.
-  void Run(const ::phi::GPUContext& ctx);
-  // TODO(wilber): How to support multiple execution contexts?
-  bool SetUpInference(const InferenceOptions& inference,
-                      const std::unordered_map<std::string, ::Tensor*>& inputs);
-  void GetEngineInfo();
-  void PrepareOutputHandle(const std::string& out_name);
-  // TODO(wilber): The output tensor names are: output_0, output_1, ...
-  ::Tensor* GetOutput(const std::string&);
-  size_t GetOutputNum() const;
- private:
-  void FreshDeviceId();
-  bool SetupNetworkAndConfig(const BuildOptions& build,
-                             INetworkDefinition& network,  // NOLINT
-                             IBuilderConfig& config);      // NOLINT
-  bool NetworkToEngine(const BuildOptions& build);
-  bool ModelToBuildEnv(TrtUniquePtr<nvinfer1::INetworkDefinition> network,
-                       const BuildOptions& build);
-  void StaticRun(const ::phi::GPUContext& ctx);
-  void DynamicRun(const ::phi::GPUContext& ctx);
- private:
-  std::unique_ptr<TrtLogger> logger_{nullptr};
-  TrtUniquePtr<nvinfer1::IBuilder> builder_{nullptr};
-  TrtUniquePtr<INetworkDefinition> network_{nullptr};
-  std::unique_ptr<IHostMemory> serialized_engine_{nullptr};
-  TrtUniquePtr<nvinfer1::ICudaEngine> engine_{nullptr};
-  std::vector<TrtUniquePtr<nvinfer1::IExecutionContext>> contexts_;
-  std::vector<std::unique_ptr<Bindings>> bindings_;
-  int device_id_{0};
-  bool is_dynamic_shape_{false};
-  std::unordered_map<std::string, ::Tensor> outputs_;
-};
-}  // namespace tensorrt
-}  // namespace backends
-}  // namespace infrt
--- a/paddle/infrt/backends/tensorrt/trt_options.h
+++ b/paddle/infrt/backends/tensorrt/trt_options.h
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include <NvInfer.h>
-#include <string>
-#include <unordered_map>
-#include <vector>
-namespace infrt {
-namespace backends {
-namespace tensorrt {
-// Build default params
-constexpr int32_t max_batch_not_provided{0};
-constexpr int32_t default_workspace{16};
-// Inference default params
-constexpr int32_t default_batch{1};
-constexpr int32_t batch_not_provided{0};
-enum class PrecisionConstraints { kNONE, kOBEY, kPREFER };
-enum class SparsityFlag { kDISABLE, kENABLE, kFORCE };
-using ShapeRange =
-    std::array<std::vector<int32_t>,
-               nvinfer1::EnumMax<nvinfer1::OptProfileSelector>()>;
-using IOFormat = std::pair<nvinfer1::DataType, nvinfer1::TensorFormats>;
-struct BuildOptions {
-  // Set max batch size.
-  int32_t max_batch{max_batch_not_provided};
-  // Set workspace size in megabytes (default = 16)
-  int32_t workspace{default_workspace};
-  // Enable tf32 precision, in addition to fp32 (default = disabled)
-  bool tf32{false};
-  // Enable fp16 precision, in addition to fp32 (default = disabled)
-  bool fp16{false};
-  // Enable int8 precision, in addition to fp32 (default = disabled)
-  bool int8{false};
-  // Control precision constraints. (default = none)
-  // Precision Constaints: = none, obey, prefer
-  //     none = no constraints
-  //     prefer = meet precision constraints if possible
-  //     obey = meet precision constraints or fail otherwise
-  PrecisionConstraints precision_constraints{PrecisionConstraints::kNONE};
-  // Save the serialized engine.
-  bool save{false};
-  // Load a serialized engine.
-  bool load{false};
-  // Build with dynamic shapes using a profile with the min, max and opt shapes
-  // provided
-  std::unordered_map<std::string, ShapeRange> shapes;
-  // Type and format of each of the input tensors (default = all inputs in
-  // fp32:chw)
-  std::vector<IOFormat> input_formats;
-  // Type and format of each of the output tensors (default = all outputs in
-  // fp32:chw)
-  std::vector<IOFormat> output_formats;
-};
-struct InferenceOptions {
-  int32_t batch{batch_not_provided};
-  std::unordered_map<std::string, std::vector<int32_t>> shapes;
-};
-}  // namespace tensorrt
-}  // namespace backends
-}  // namespace infrt
--- a/paddle/infrt/backends/tensorrt/trt_utils.h
+++ b/paddle/infrt/backends/tensorrt/trt_utils.h
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include <NvInfer.h>
-#include <NvInferRuntime.h>
-#include <NvInferRuntimeCommon.h>
-#include <glog/logging.h>
-#include <algorithm>
-#include <cassert>
-#include <functional>
-#include <memory>
-#include <unordered_map>
-#include "paddle/phi/core/dense_tensor.h"
-namespace infrt {
-namespace backends {
-namespace tensorrt {
-#define IS_TRT_VERSION_GE(version)                       \
-  ((NV_TENSORRT_MAJOR * 1000 + NV_TENSORRT_MINOR * 100 + \
-    NV_TENSORRT_PATCH * 10 + NV_TENSORRT_BUILD) >= version)
-#define IS_TRT_VERSION_LT(version)                       \
-  ((NV_TENSORRT_MAJOR * 1000 + NV_TENSORRT_MINOR * 100 + \
-    NV_TENSORRT_PATCH * 10 + NV_TENSORRT_BUILD) < version)
-#define TRT_VERSION                                    \
-  NV_TENSORRT_MAJOR * 1000 + NV_TENSORRT_MINOR * 100 + \
-      NV_TENSORRT_PATCH * 10 + NV_TENSORRT_BUILD
-inline nvinfer1::Dims VecToDims(const std::vector<int>& vec) {
-  int limit = static_cast<int>(nvinfer1::Dims::MAX_DIMS);
-  if (static_cast<int>(vec.size()) > limit) {
-    assert(false);
-  }
-  // Pick first nvinfer1::Dims::MAX_DIMS elements
-  nvinfer1::Dims dims;
-  dims.nbDims = std::min(static_cast<int>(vec.size()), limit);
-  std::copy_n(vec.begin(), dims.nbDims, std::begin(dims.d));
-  return dims;
-}
-template <typename T>
-struct TrtDestroyer {
-  void operator()(T* t) { t->destroy(); }
-};
-template <typename T>
-using TrtUniquePtr = std::unique_ptr<T, TrtDestroyer<T>>;
-class TrtLogger : public nvinfer1::ILogger {
- public:
-  void log(nvinfer1::ILogger::Severity severity,
-           const char* msg) noexcept override {
-    switch (severity) {
-      case Severity::kVERBOSE:
-        VLOG(3) << msg;
-        break;
-      case Severity::kINFO:
-        VLOG(2) << msg;
-        break;
-      case Severity::kWARNING:
-        LOG(WARNING) << msg;
-        break;
-      case Severity::kINTERNAL_ERROR:
-      case Severity::kERROR:
-        LOG(ERROR) << msg;
-        break;
-      default:
-        break;
-    }
-  }
-  nvinfer1::ILogger& GetTrtLogger() noexcept { return *this; }
-  ~TrtLogger() override = default;
-};
-struct Binding {
-  bool is_input{false};
-  nvinfer1::DataType data_type{nvinfer1::DataType::kFLOAT};
-  ::Tensor* buffer{nullptr};
-  std::string name;
-};
-class Bindings {
- public:
-  Bindings() = default;
-  void AddBinding(int32_t b,
-                  const std::string& name,
-                  bool is_input,
-                  ::Tensor* buffer,
-                  nvinfer1::DataType data_type) {
-    while (bindings_.size() <= static_cast<size_t>(b)) {
-      bindings_.emplace_back();
-    }
-    names_[name] = b;
-    bindings_[b].buffer = buffer;
-    bindings_[b].is_input = is_input;
-    bindings_[b].data_type = data_type;
-    bindings_[b].name = name;
-  }
-  std::vector<Binding> GetInputBindings() {
-    return GetBindings([](const Binding& b) -> bool { return b.is_input; });
-  }
-  std::vector<Binding> GetOutputBindings() {
-    return GetBindings([](const Binding& b) -> bool { return !b.is_input; });
-  }
-  std::vector<Binding> GetBindings() {
-    return GetBindings([](const Binding& b) -> bool { return true; });
-  }
-  std::vector<Binding> GetBindings(
-      std::function<bool(const Binding& b)> predicate) {
-    std::vector<Binding> bindings;
-    for (const auto& b : bindings_) {
-      if (predicate(b)) {
-        bindings.push_back(b);
-      }
-    }
-    return bindings;
-  }
- private:
-  std::unordered_map<std::string, int32_t> names_;
-  std::vector<Binding> bindings_;
-};
-}  // namespace tensorrt
-}  // namespace backends
-}  // namespace infrt
--- a/paddle/infrt/common/CMakeLists.txt
+++ b/paddle/infrt/common/CMakeLists.txt
-core_gather_headers()
-set(core_includes
-    "${core_includes};infrt/common/dtype.def"
-    CACHE INTERNAL "")
-gather_srcs(
-  infrt_src
-  SRCS
-  dtype.cc
-  global.cc
-  target.cc
-  type.cc
-  shared.cc
-  object.cc
-  string.cc
-  buffer.cc
-  memory.cc)
--- a/paddle/infrt/common/buffer.cc
+++ b/paddle/infrt/common/buffer.cc
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "paddle/infrt/common/buffer.h"
-#include <stdarg.h>
-#include <stdio.h>
-#include <cmath>
-namespace infrt {
-void Buffer::Resize(uint32_t size) {
-  if (size_ > 0) {
-    Free();
-    size_ = 0;
-  }
-  if (size_ != size) {
-    data_.memory = reinterpret_cast<uint8_t*>(Malloc(size));
-    size_ = size;
-  }
-}
-void Buffer::Resize(uint32_t alignment, uint32_t size) {
-  if (size_ > 0) {
-    Free();
-    size_ = 0;
-  }
-  if (size_ != size) {
-    data_.memory = reinterpret_cast<uint8_t*>(AlignedAlloc(alignment, size));
-    size_ = size;
-  }
-}
-void Buffer::SetTarget(const infrt::common::Target& target) {
-  target_ = target;
-  memory_mng_cache_ = MemoryManager::Global().RetrieveSafely(target_.arch);
-}
-void Buffer::ResizeLazy(uint32_t size) {
-  if (size <= size_) return;
-  Resize(size);
-}
-void Buffer::ResizeLazy(uint32_t alignment, uint32_t size) {
-  if (size <= size_) return;
-  Resize(alignment, size);
-}
-void Buffer::Resize(uint32_t size, const infrt::common::Target& target) {
-  if (target.arch != target_.arch) {
-    Free();
-    SetTarget(target);
-  }
-  Resize(size);
-}
-void Buffer::Resize(uint32_t alignment,
-                    uint32_t size,
-                    const infrt::common::Target& target) {
-  if (target.arch != target_.arch) {
-    Free();
-    SetTarget(target);
-  }
-  Resize(alignment, size);
-}
-void Buffer::ResizeLazy(uint32_t size, const infrt::common::Target& target) {
-  if (target.arch != target_.arch) {
-    Free();
-    SetTarget(target);
-  }
-  ResizeLazy(size);
-}
-void Buffer::ResizeLazy(uint32_t alignment,
-                        uint32_t size,
-                        const infrt::common::Target& target) {
-  if (target.arch != target_.arch) {
-    Free();
-    SetTarget(target);
-  }
-  ResizeLazy(alignment, size);
-}
-}  // namespace infrt
--- a/paddle/infrt/common/buffer.h
+++ b/paddle/infrt/common/buffer.h
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include <glog/logging.h>
-#include <memory>
-#include "paddle/infrt/common/macros.h"
-#include "paddle/infrt/common/memory.h"
-#include "paddle/infrt/common/target.h"
-namespace infrt {
-#ifdef __cplusplus
-extern "C" {
-#endif
-#define INFRT_ALWAYS_INLINE __attribute__((always_inline)) inline
-//! Code for the primitive types supported in INFRT.
-typedef enum infrt_type_code_t {
-  infrt_type_unk = -1,   //! Unknown type
-  infrt_type_int = 0,    //! signed int
-  infrt_type_uint = 1,   //! unsigned int
-  infrt_type_float = 2,  //! floating point
-  infrt_type_handle = 3  //! void*
-} infrt_type_code_t;
-#ifndef INFRT_ATTRIBUTE_ALIGN
-#define INFRT_ATTRIBUTE_ALIGN(n) __attribute__((aligned(n)))
-#endif
-/**
- * A tuntime tag for type in INFRT system.
- */
-typedef struct infrt_type_t {
-#if __cplusplus >= 201103L
-  INFRT_ATTRIBUTE_ALIGN(1) infrt_type_code_t code;
-#else
-  uint8_t code;
-#endif
-  //! Number of bits.
-  uint8_t bits;
-  //! Number of elements in a vector, 1 for scalar.
-  uint16_t lanes;
-  //! Number of '*', e.g. for `float*`, the num_asterisks is 1, `float**` it is
-  //! 2.
-  uint8_t num_asterisks{0};
-#ifdef __cplusplus
-  INFRT_ALWAYS_INLINE infrt_type_t()
-      : code(infrt_type_int), bits(0), lanes(0) {}
-  INFRT_ALWAYS_INLINE infrt_type_t(infrt_type_code_t code,
-                                   uint8_t bits,
-                                   uint16_t lanes = 1,
-                                   uint8_t num_asterisks = 0)
-      : code(code), bits(bits), lanes(lanes), num_asterisks(num_asterisks) {}
-  INFRT_ALWAYS_INLINE bool operator==(const infrt_type_t& other) const {
-    return code == other.code && bits == other.bits && lanes == other.lanes;
-  }
-  INFRT_ALWAYS_INLINE bool operator!=(const infrt_type_t& other) const {
-    return !(*this == other);
-  }
-  INFRT_ALWAYS_INLINE uint16_t bytes() const { return (bits + 7) / 8; }
-#endif  // __cplusplus
-} infrt_type_t;
-//! Help to define the size of a dimension, due to polyhedral representation, we
-//! no need to record the extend or
-//! min(default to 0).
-typedef int infrt_dimension_t;
-//! Help to tell the kind of the device.
-typedef enum infrt_device_kind_t {
-  infrt_unk_device = -1,    // Undefined device.
-  infrt_x86_device = 0,     // X86 device
-  infrt_opencl_device = 1,  // OpenCL device
-  infrt_arm_device = 2      // ARM device
-} infrt_device_kind_t;
-struct infrt_buffer_t;
-/**
- * All INFRT backends implementation should provide an interface to be used.
- */
-struct infrt_device_interface_impl_t;
-struct infrt_device_interface_t {
-  int (*malloc)(void* context, struct infrt_buffer_t* buf);
-  int (*free)(void* context, struct infrt_buffer_t* buf);
-  int (*sync)(void* context, struct infrt_buffer_t* buf);
-  int (*release)(void* context,
-                 const struct infrt_device_interface_t* device_interface);
-  int (*copy_to_host)(void* context, struct infrt_buffer_t* buf);
-  int (*copy_to_device)(void* context, struct infrt_buffer_t* buf);
-  int (*buffer_copy)(void* context,
-                     struct infrt_buffer_t* src,
-                     struct infrt_buffer_t* dst);
-  struct infrt_device_interface_impl_t* impl;
-};
-//! The raw representation of a buffer,used in the generated code/lib.
-#define INFRT_BUFFER_MAX_DIMS 8
-typedef struct infrt_buffer_t {
-  //! Tell which kind of device this buffer locates.
-  infrt_device_kind_t device;
-  //! The interface used to operate on device.
-  const struct infrt_device_interface_t* device_interface;
-  //! A pointer to the memory in host.
-  uint8_t* memory;
-  //! Extra flags.
-  uint64_t flag;
-  //! Data type.
-  infrt_type_t type;
-  //! Number of dimensions.
-  int32_t dimensions;
-  infrt_dimension_t dims[INFRT_BUFFER_MAX_DIMS];
-  //! Allocate and deallocate lazily, default true.
-  char lazy;
-  //! The actual memory size(in bytes).
-  uint64_t memory_size;
-  uint16_t align;
-#ifdef __cplusplus
-  infrt_buffer_t()
-      : device(infrt_unk_device),
-        device_interface(NULL),
-        memory(NULL),
-        flag(0UL),
-        type(infrt_type_t()),
-        dimensions(0),
-        lazy(true),
-        memory_size(0),
-        align(0) {}
-  static void delete_(struct infrt_buffer_t* x) { delete x; }
-  ~infrt_buffer_t() {}
-  // NOTE the buffer should be resized first.
-  static void alloc(struct infrt_buffer_t*);
-  //! Set the shape of the buffer. NOTE this just record the shape, not allocate
-  //! the memory.
-  INFRT_ALWAYS_INLINE void resize(const infrt_dimension_t* dims,
-                                  int dimensions) {
-    this->dimensions = dimensions;
-    memcpy(this->dims, dims, dimensions * sizeof(infrt_dimension_t));
-  }
-  INFRT_ALWAYS_INLINE uint64_t num_elements() const {
-    uint64_t res = 1;
-    for (int i = 0; i < dimensions; i++) {
-      res *= dims[i];
-    }
-    return res;
-  }
-  INFRT_ALWAYS_INLINE int device_sync(void* ctx = NULL) {
-    if (device_interface && device_interface->sync) {
-      return device_interface->sync(ctx, this);
-    }
-    return 0;
-  }
-  INFRT_ALWAYS_INLINE uint8_t* begin() const { return 0; }
-  INFRT_ALWAYS_INLINE uint8_t* end() const {
-    return memory + num_elements() * type.bytes();
-  }
-#endif  // __cplusplus
-} infrt_buffer_t;
-#ifdef __cplusplus
-struct infrt_device_interface_impl_t {
-  int (*malloc)(void* context, struct infrt_buffer_t* buf);
-  int (*free)(void* context, struct infrt_buffer_t* buf);
-  int (*sync)(void* context, struct infrt_buffer_t* buf);
-  int (*release)(void* context);
-  int (*copy_to_host)(void* context, struct infrt_buffer_t* buf);
-  int (*copy_to_device)(void* context, struct infrt_buffer_t* buf);
-  int (*buffer_copy)(void* context,
-                     struct infrt_buffer_t* src,
-                     struct infrt_buffer_t* dst);
-};
-// The device implementations
-extern struct infrt_device_interface_t* infrt_x86_device_interface();
-#endif  // __cplusplus
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-#define INFRT_LOG(fmt, ...)     \
-  do {                          \
-    fprintf(stderr,             \
-            "%s:%d:%s(): " fmt, \
-            __FILE__,           \
-            __LINE__,           \
-            __func__,           \
-            __VA_ARGS__);       \
-  } while (0)
-#define INFRT_CHECK(cond)                \
-  if (!(cond)) {                         \
-    INFRT_LOG("check %s failed", #cond); \
-    abort();                             \
-  }
-/**
- * Buffer helps to hold the memory, and offers a set of methods to help manage
- * the memory.
- */
-struct Buffer final {
-  Buffer() = default;
-  explicit Buffer(const common::Target& target) { SetTarget(target); }
-  //! Resize the memory hold by this buffer *exactlly* to \p size.
-  void Resize(uint32_t size);
-  void Resize(uint32_t alignment, uint32_t size);
-  //! Lazily resize the memory.
-  void ResizeLazy(uint32_t size);
-  void ResizeLazy(uint32_t alignment, uint32_t size);
-  //! Resize the memory to \p size in target \p target.
-  void Resize(uint32_t size, const common::Target& target);
-  void Resize(uint32_t alignment, uint32_t size, const common::Target& target);
-  //! Lazily resize the memory to \p size in target \p target.
-  void ResizeLazy(uint32_t size, const common::Target& target);
-  void ResizeLazy(uint32_t alignment,
-                  uint32_t size,
-                  const common::Target& target);
-  void SetTarget(const common::Target& target);
-  const infrt_buffer_t* data() const { return &data_; }
-  infrt_buffer_t* data() { return &data_; }
-  //! Free all the memory owned by this buffer.
-  void Free() {
-    if (!data_.memory) return;
-    memory_mng_cache_->free(data_.memory);
-  }
- private:
-  inline void* Malloc(uint32_t size) INFRT_RESULT_SHOULD_USE {
-    CHECK(memory_mng_cache_) << "Should set target first";
-    return memory_mng_cache_->malloc(size);
-  }
-  inline void* AlignedAlloc(uint32_t alignment,
-                            uint32_t size) INFRT_RESULT_SHOULD_USE {
-    CHECK(memory_mng_cache_) << "Should set target first";
-    return memory_mng_cache_->aligned_alloc(alignment, size);
-  }
- private:
-  infrt_buffer_t data_;
-  //! The place where this buffer locates.
-  common::Target target_;
-  //! Number of bytes of this buffer.
-  uint32_t size_{};
-  //! Hold the corresponding memory manager for speed.
-  MemoryInterface* memory_mng_cache_{};
-};
-}  // namespace infrt
--- a/paddle/infrt/common/common.h
+++ b/paddle/infrt/common/common.h
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include <string>
-#include "paddle/infrt/common/macros.h"
-#include "paddle/infrt/common/shared.h"
-#include "paddle/infrt/common/target.h"
-#include "paddle/infrt/common/type.h"
-namespace infrt {
-// export some general concepts.
-using common::make_shared;
-using common::Object;
-using common::ref_count;
-using common::Shared;
-// Type related.
-using common::Bool;
-using common::Float;
-using common::Int;
-using common::UInt;
-using common::Void;
-using common::type_of;
-using common::Target;
-using common::Type;
-using common::UnkTarget;
-template <typename T>
-T& Reference(const T* x) {
-  return *const_cast<T*>(x);
-}
-static void CheckVarNameValid(const std::string& name) {
-  CHECK(!name.empty());
-  CHECK(name.find(' ') == std::string::npos &&   //
-        name.find('.') == std::string::npos &&   //
-        name.find('/') == std::string::npos &&   //
-        name.find('\t') == std::string::npos &&  //
-        name.find('\n') == std::string::npos &&  //
-        name.find('\r') == std::string::npos)
-      << "Some invalid character found";
-}
-}  // namespace infrt
--- a/paddle/infrt/common/dtype.cc
+++ b/paddle/infrt/common/dtype.cc
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "paddle/infrt/common/dtype.h"
-namespace infrt {
-const char* DType::name() const {
-  switch (kind_) {
-    case Kind::Unk:
-      return "Unk";
-      break;
-#define INFRT_DTYPE(enum__, value__) \
-  case Kind::enum__:                 \
-    return #enum__;                  \
-    break;
-#include "paddle/infrt/common/dtype.def"
-#undef INFRT_DTYPE
-  }
-  return "";
-}
-size_t DType::GetHostSize() const {
-  switch (kind_) {
-#define INFRT_DTYPE(enum__, value__) \
-  case DType::Kind::enum__:          \
-    return sizeof(DTypeInternal<DType::Kind::enum__>::type);
-#include "paddle/infrt/common/dtype.def"  // NOLINT
-#undef INFRT_DTYPE
-    case Kind::Unk:
-      return 0;
-      break;
-  }
-  return 0;
-}
-}  // namespace infrt
--- a/paddle/infrt/common/dtype.def
+++ b/paddle/infrt/common/dtype.def
-// Define all INFRT dtypes
-// DTYPE(ENUM, VALUE)
-#ifdef INFRT_DTYPE
-INFRT_DTYPE(UI8,      1)
-INFRT_DTYPE(UI16,     2)
-INFRT_DTYPE(UI32,     3)
-INFRT_DTYPE(UI64,     4)
-INFRT_DTYPE(I1,       5)
-INFRT_DTYPE(I8,       6)
-INFRT_DTYPE(I16,      7)
-INFRT_DTYPE(I32,      8)
-INFRT_DTYPE(I64,      9)
-INFRT_DTYPE(F32,      10)
-INFRT_DTYPE(F64,      11)
-INFRT_DTYPE(STRING,   12)
-#endif
\ No newline at end of file
--- a/paddle/infrt/common/dtype.h
+++ b/paddle/infrt/common/dtype.h
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include <cassert>
-#include <cstddef>
-#include <cstdint>
-#include <string>
-namespace infrt {
-class DType {
- public:
-  enum class Kind : uint8_t {
-    Unk = 0,
-// Automatically generate the enum definition
-#define INFRT_DTYPE(enum__, value__) enum__ = value__,
-#include "paddle/infrt/common/dtype.def"
-#undef INFRT_DTYPE
-    BOOL = I1,
-  };
-  DType() = default;
-  explicit constexpr DType(Kind kind) : kind_(kind) { assert(IsValid()); }
-  DType(const DType&) = default;
-  DType& operator=(const DType&) = default;
-  bool operator==(DType other) const { return kind_ == other.kind_; }
-  bool operator!=(DType other) const { return !(*this == other); }
-  constexpr Kind kind() const { return kind_; }
-  bool IsValid() const { return kind_ != Kind::Unk; }
-  bool IsInvalid() const { return !IsValid(); }
-  const char* name() const;
-  size_t GetHostSize() const;
- private:
-  Kind kind_{Kind::Unk};
-};
-template <typename T>
-constexpr DType GetDType();
-template <DType::Kind kind>
-struct DTypeInternal;
-#define INFRT_IMPL_GET_DTYPE(cpp_type__, enum__)  \
-  template <>                                     \
-  inline constexpr DType GetDType<cpp_type__>() { \
-    return DType{DType::Kind::enum__};            \
-  }                                               \
-  template <>                                     \
-  struct DTypeInternal<DType::Kind::enum__> {     \
-    using type = cpp_type__;                      \
-  };
-INFRT_IMPL_GET_DTYPE(bool, I1);
-INFRT_IMPL_GET_DTYPE(int8_t, I8);
-INFRT_IMPL_GET_DTYPE(int16_t, I16);
-INFRT_IMPL_GET_DTYPE(int32_t, I32);
-INFRT_IMPL_GET_DTYPE(int64_t, I64);
-INFRT_IMPL_GET_DTYPE(uint8_t, UI8);
-INFRT_IMPL_GET_DTYPE(uint16_t, UI16);
-INFRT_IMPL_GET_DTYPE(uint32_t, UI32);
-INFRT_IMPL_GET_DTYPE(uint64_t, UI64);
-INFRT_IMPL_GET_DTYPE(float, F32);
-INFRT_IMPL_GET_DTYPE(double, F64);
-INFRT_IMPL_GET_DTYPE(std::string, STRING);
-}  // namespace infrt
--- a/paddle/infrt/common/global.cc
+++ b/paddle/infrt/common/global.cc
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "paddle/infrt/common/global.h"
-namespace infrt {
-Global::Global() {}
-mlir::MLIRContext* Global::context = nullptr;
-mlir::MLIRContext* Global::getMLIRContext() {
-  if (nullptr == context) {
-    context = new mlir::MLIRContext();
-  }
-  return context;
-}
-}  // namespace infrt
--- a/paddle/infrt/common/global.h
+++ b/paddle/infrt/common/global.h
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include <mlir/IR/MLIRContext.h>
-#include "paddle/infrt/tensor/dense_host_tensor.h"
-namespace infrt {
-// global variables
-class Global {
- private:
-  static mlir::MLIRContext *context;
-  Global();
- public:
-  static mlir::MLIRContext *getMLIRContext();
-};  // class Global
-}  // namespace infrt
--- a/paddle/infrt/common/macros.h
+++ b/paddle/infrt/common/macros.h
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#if !defined(NDEBUG)
-#define INFRT_DEBUG
-#endif
-#define INFRT_DISALLOW_COPY_AND_ASSIGN(TypeName) \
-  TypeName(const TypeName&) = delete;            \
-  void operator=(const TypeName&) = delete
-#ifndef INFRT_NOT_IMPLEMENTED
-#define INFRT_NOT_IMPLEMENTED LOG(FATAL) << "Not Implemented";
-#endif
-#define INFRT_RESULT_SHOULD_USE __attribute__((warn_unused_result))
-/**
- * A trick to enforce the registry.
- *
- * usage:
- *
- * INFRT_REGISTER_HELPER(some_key) {
- *   // register methods
- * }
- *
- * INFRT_USE_REGISTER(some_key);
- */
-#define INFRT_REGISTER_HELPER(symbol__) bool __infrt__##symbol__##__registrar()
-#define INFRT_USE_REGISTER(symbol__)                                 \
-  extern bool __infrt__##symbol__##__registrar();                    \
-  [[maybe_unused]] static bool __infrt_extern_registrar_##symbol__ = \
-      __infrt__##symbol__##__registrar();
-#if __cplusplus >= 201703L
-#define INFRT_NODISCARD [[nodiscard]]
-#else
-#define INFRT_NODISCARD
-#endif
--- a/paddle/infrt/common/memory.cc
+++ b/paddle/infrt/common/memory.cc
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "paddle/infrt/common/memory.h"
-namespace infrt {
-using infrt::common::Target;
-namespace {
-class X86MemoryMng : public MemoryInterface {
- public:
-  void* malloc(size_t nbytes) override { return ::malloc(nbytes); }
-  void free(void* data) override {
-    if (!data) return;
-    ::free(data);
-  }
-  void* aligned_alloc(size_t alignment, size_t nbytes) override {
-    return ::aligned_alloc(alignment, nbytes);
-  }
-};
-}  // namespace
-MemoryManager::MemoryManager() {
-  Register(Target::Arch::Unk, new X86MemoryMng);
-  Register(Target::Arch::X86, new X86MemoryMng);
-}
-}  // namespace infrt
--- a/paddle/infrt/common/memory.h
+++ b/paddle/infrt/common/memory.h
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include <glog/logging.h>
-#include <memory>
-#include <unordered_map>
-#include "paddle/infrt/common/macros.h"
-#include "paddle/infrt/common/target.h"
-namespace infrt {
-class MemoryInterface {
- public:
-  virtual void* malloc(size_t nbytes) = 0;
-  virtual void free(void* data) = 0;
-  virtual void* aligned_alloc(size_t alignment, size_t nbytes) {
-    return nullptr;
-  }
-  virtual ~MemoryInterface() {}
-};
-/**
- * MemoryManager holds a map of MemoryInterface for each articture.
- */
-class MemoryManager final {
- public:
-  using key_t = common::Target::Arch;
-  static MemoryManager& Global() {
-    static auto* x = new MemoryManager;
-    return *x;
-  }
-  MemoryInterface* Retrieve(key_t key) INFRT_RESULT_SHOULD_USE {
-    auto it = memory_mngs_.find(key);
-    if (it != memory_mngs_.end()) return it->second.get();
-    return nullptr;
-  }
-  MemoryInterface* RetrieveSafely(key_t key) {
-    auto* res = Retrieve(key);
-    CHECK(res) << "no MemoryInterface for architecture [" << key << "]";
-    return res;
-  }
-  MemoryInterface* Register(key_t key, MemoryInterface* item) {
-    CHECK(!memory_mngs_.count(key)) << "Duplicate register [" << key << "]";
-    memory_mngs_[key].reset(item);
-    return item;
-  }
- private:
-  MemoryManager();
-  std::unordered_map<common::Target::Arch, std::unique_ptr<MemoryInterface>>
-      memory_mngs_;
-  INFRT_DISALLOW_COPY_AND_ASSIGN(MemoryManager);
-};
-}  // namespace infrt
--- a/paddle/infrt/common/object.cc
+++ b/paddle/infrt/common/object.cc
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "paddle/infrt/common/object.h"
-namespace infrt {
-namespace common {}  // namespace common
-}  // namespace infrt
--- a/paddle/infrt/common/object.h
+++ b/paddle/infrt/common/object.h
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include <cstring>
-#include <iostream>
-#include "paddle/infrt/common/shared.h"
-namespace infrt {
-namespace common {
-template <typename T>
-class Shared;
-/**
- * Object is the basic element in the INFRT, with `Shared` wrapper, the object
- * can be shared across the system.
- */
-struct Object {
-  //! Get the type representation of this object.
-  virtual const char* type_info() const = 0;
-  virtual ~Object() {}
-  //! Cast to a derived type.
-  template <typename T>
-  T* as() {
-    return static_cast<T*>(this);
-  }
-  //! Cast to a derived type.
-  template <typename T>
-  const T* as() const {
-    return static_cast<const T*>(this);
-  }
-  //! Type safe cast.
-  template <typename T>
-  T* safe_as() {
-    if (std::strcmp(type_info(), T::__type_info__) == 0) {
-      return static_cast<T*>(this);
-    }
-    return nullptr;
-  }
-  //! Type safe cast.
-  template <typename T>
-  const T* safe_as() const {
-    if (std::strcmp(type_info(), T::__type_info__) == 0) {
-      return static_cast<const T*>(this);
-    }
-    return nullptr;
-  }
-  //! Check if the type is right.
-  template <typename T>
-  bool is_type() const {
-    if (std::strcmp(type_info(), T::__type_info__) == 0) {
-      return true;
-    }
-    return false;
-  }
-  //! The reference count, which make all the derived type able to share.
-  mutable RefCount __ref_count__;
-};
-using object_ptr = Object*;
-using shared_object = Shared<Object>;
-}  // namespace common
-}  // namespace infrt
--- a/paddle/infrt/common/shared.cc
+++ b/paddle/infrt/common/shared.cc
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "paddle/infrt/common/shared.h"
--- a/paddle/infrt/common/shared.h
+++ b/paddle/infrt/common/shared.h
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include <atomic>
-#include <string>
-#include <type_traits>
-namespace infrt {
-namespace common {
-class RefCount {
- public:
-  using value_type = int32_t;
-  RefCount() = default;
-  value_type Inc() { return ++count_; }
-  value_type Dec() { return --count_; }
-  bool is_zero() const { return 0 == count_; }
-  std::string to_string() { return std::to_string(count_.load()); }
-  int32_t val() const { return count_; }
- private:
-  std::atomic<value_type> count_{0};
-};
-class Object;
-/**
- * The templated methods are used to unify the way to get the RefCount instance
- * in client classes.
- */
-template <typename T>
-RefCount& ref_count(const T* t) {
-  static_assert(std::is_base_of<Object, T>::value, "T is not a Object");
-  return t->__ref_count__;
-}
-template <typename T>
-void Destroy(const T* t) {
-  delete t;
-}
-template <typename T>
-struct Shared {
-  using object_ptr = T*;
-  Shared() = default;
-  explicit Shared(T* p) : p_(p) {
-    if (p) IncRef(p);
-  }
-  Shared(const Shared& other) : p_(other.p_) { IncRef(p_); }
-  Shared(Shared&& other) : p_(other.p_) { other.p_ = nullptr; }
-  Shared<T>& operator=(const Shared<T>& other);
-  //! Reset to another pointer \p x.
-  void Reset(T* x = nullptr);
-  //! Access the pointer in various ways.
-  // @{
-  inline T* get() const { return p_; }
-  inline T& operator*() const { return *p_; }
-  inline T* operator->() const { return p_; }
-  inline T* self() { return p_; }
-  inline const T* self() const { return p_; }
-  // @}
-  inline bool same_as(const Shared& other) { return p_ == other.p_; }
-  inline bool defined() const { return p_; }
-  inline bool operator<(const Shared& other) const { return p_ < other.p_; }
-  inline Shared<T>& operator=(T* x);
-  inline bool operator==(const Shared& other) const { return p_ == other.p_; }
-  ~Shared();
- private:
-  //! Increase the share count.
-  void IncRef(T* p);
-  //! Decrease the share count.
-  void DecRef(T* p);
- protected:
-  T* p_{};
-};
-template <typename T>
-void Shared<T>::IncRef(T* p) {
-  if (p) {
-    ref_count(p).Inc();
-  }
-}
-template <typename T>
-void Shared<T>::DecRef(T* p) {
-  if (p) {
-    if (ref_count(p).Dec() == 0) {
-      Destroy(p);
-    }
-  }
-}
-template <typename T>
-Shared<T>& Shared<T>::operator=(const Shared<T>& other) {
-  if (other.p_ == p_) return *this;
-  // Other can be inside of something owned by this, so we should be careful to
-  // incref other before we decref
-  // ourselves.
-  T* tmp = other.p_;
-  IncRef(tmp);
-  DecRef(p_);
-  p_ = tmp;
-  return *this;
-}
-template <typename T, typename... Args>
-T* make_shared(Args&&... args) {
-  return new T(args...);
-}
-template <typename T>
-Shared<T>& Shared<T>::operator=(T* x) {
-  if (p_ == x) return *this;
-  T* tmp = x;
-  IncRef(tmp);
-  DecRef(p_);
-  p_ = tmp;
-  return *this;
-}
-template <typename T>
-Shared<T>::~Shared() {
-  DecRef(p_);
-  p_ = nullptr;
-}
-template <typename T>
-void Shared<T>::Reset(T* x) {
-  if (x) IncRef(x);
-  DecRef(p_);
-  p_ = x;
-}
-}  // namespace common
-}  // namespace infrt
--- a/paddle/infrt/common/string.cc
+++ b/paddle/infrt/common/string.cc
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "paddle/infrt/common/string.h"
-#include <stdarg.h>
-#include <cstring>
-namespace infrt {
-namespace infrt {
-std::string StringFormat(const std::string &fmt_str, ...) {
-  /* Reserve two times as much as the length of the fmt_str */
-  int final_n, n = (static_cast<int>(fmt_str.size())) * 2;
-  std::unique_ptr<char[]> formatted;
-  va_list ap;
-  while (1) {
-    formatted.reset(
-        new char[n]); /* Wrap the plain char array into the unique_ptr */
-    std::strcpy(&formatted[0], fmt_str.c_str());  // NOLINT
-    va_start(ap, fmt_str);
-    final_n = vsnprintf(&formatted[0], n, fmt_str.c_str(), ap);
-    va_end(ap);
-    if (final_n < 0 || final_n >= n)
-      n += abs(final_n - n + 1);
-    else
-      break;
-  }
-  return std::string(formatted.get());
-}
-std::string Trim(const std::string &s, const char *empty) {
-  if (s.empty()) return s;
-  auto start = s.find_first_not_of(empty);
-  if (start == std::string::npos) return "";
-  auto end = s.find_last_not_of(empty);
-  return s.substr(start, end - start + 1);
-}
-std::string Uppercase(const std::string &x) {
-  auto res = x;
-  for (auto &c : res) {
-    c = toupper(c);
-  }
-  return res;
-}
-bool Startswith(const std::string &x, const std::string &str) {
-  return x.find(str) == 0;
-}
-bool Endswith(const std::string &x, const std::string &str) {
-  if (x.length() >= str.length()) {
-    return std::equal(str.rbegin(), str.rend(), x.rbegin());
-  }
-  return false;
-}
-std::vector<std::string> Split(const std::string &str,
-                               const std::string &splitter) {
-  std::vector<std::string> results;
-  std::string::size_type pos1, pos2;
-  pos2 = str.find(splitter);
-  pos1 = 0;
-  while (std::string::npos != pos2) {
-    results.push_back(str.substr(pos1, pos2 - pos1));
-    pos1 = pos2 + splitter.size();
-    pos2 = str.find(splitter, pos1);
-  }
-  if (pos1 != str.length()) {
-    results.push_back(str.substr(pos1));
-  }
-  return results;
-}
-void Replace(std::string *s, const std::string &from, const std::string &to) {
-  size_t pos = 0;
-  while ((pos = s->find(from, pos)) != std::string::npos) {
-    s->replace(pos, from.size(), to);
-    pos += to.length();
-  }
-}
-size_t Count(std::string *s, const std::string &sub) {
-  size_t pos = 0;
-  size_t times = 0;
-  while ((pos = s->find(sub, pos)) != std::string::npos) {
-    if ((pos == 0 || !IsPrefix(s->at(pos - 1))) &&
-        (pos + sub.length() == s->size() ||
-         !IsSuffix(s->at(pos + sub.length())))) {
-      pos += sub.length();
-      times++;
-    } else {
-      pos++;
-    }
-  }
-  return times;
-}
-bool IsPrefix(const char &c) {
-  return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c == '_');
-}
-bool IsSuffix(const char &c) {
-  return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c == '_') ||
-         (c >= '0' && c <= '9') || (c == '\'');
-}
-std::string TransValidVarName(std::string name) {
-  Replace(&name, ".", "__");
-  Replace(&name, "/", "___");
-  name.erase(0, name.find_first_not_of("_"));
-  return name;
-}
-}  // namespace infrt
-}  // namespace infrt
--- a/paddle/infrt/common/string.h
+++ b/paddle/infrt/common/string.h
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include <memory>
-#include <sstream>
-#include <string>
-#include <vector>
-namespace infrt {
-namespace infrt {
-//! Get the content of a stream.
-template <typename T>
-std::string GetStreamCnt(const T& x);
-/**
- * Construct a formatted string with arguments.
- * @param fmt_str The format.
- * @param ... The parameters of the format.
- * @return The formated string.
- */
-std::string StringFormat(const std::string& fmt_str, ...);
-/**
- * Join multiple fields to a single string. Similar to Python's str.join method.
- */
-template <typename T = std::string>
-std::string Join(const std::vector<T>& fields, const std::string& splitter) {
-  if (fields.empty()) return "";
-  std::stringstream ss;
-  for (int i = 0; i < fields.size() - 1; i++) ss << fields[i] << splitter;
-  ss << fields.back();
-  return ss.str();
-}
-std::vector<std::string> Split(const std::string& str,
-                               const std::string& splitter);
-std::string Trim(const std::string& s, const char* empty = " \n\r\t");
-//! Convert a string to its uppercase.
-std::string Uppercase(const std::string& x);
-//! Replace a substr 'from' to 'to' in string s.
-void Replace(std::string* s, const std::string& from, const std::string& to);
-//! Count how many times substr 'sub' appears in string s.
-size_t Count(std::string* s, const std::string& sub);
-//! Tell if a char is prefix of a tensor's name.
-bool IsPrefix(const char& c);
-//! Tell if a char is suffix of a tensor's name.
-bool IsSuffix(const char& c);
-//! Tell if a string \p x start with \p str.
-bool Startswith(const std::string& x, const std::string& str);
-//! Tell if a string \p x ends with \p str.
-bool Endswith(const std::string& x, const std::string& str);
-template <typename T>
-std::string GetStreamCnt(const T& x) {
-  std::stringstream os;
-  os << x;
-  return os.str();
-}
-std::string TransValidVarName(std::string name);
-}  // namespace infrt
-}  // namespace infrt
--- a/paddle/infrt/common/target.cc
+++ b/paddle/infrt/common/target.cc
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "paddle/infrt/common/target.h"
-#include <glog/logging.h>
-namespace infrt {
-namespace common {
-bool Target::operator==(const Target &other) const {
-  return os == other.os &&      //
-         arch == other.arch &&  //
-         bits == other.bits &&  //
-         features == other.features;
-}
-int Target::max_num_threads() const {
-  CHECK(arch == Arch::NVGPU)
-      << "The target is not NVGPU! Cannot get max number of threads.";
-  return 1024;
-}
-std::vector<Target::Lib> Target::get_target_libs() const { return libs; }
-int Target::get_target_bits() const {
-  switch (bits) {
-    case Bit::k32:
-      return 32;
-    case Bit::k64:
-      return 64;
-    case Bit::Unk:
-      return 0;
-    default:
-      LOG(FATAL) << "Not supported Bit";
-  }
-  return -1;
-}
-std::ostream &operator<<(std::ostream &os, const Target &target) {
-  os << "Target<";
-  switch (target.os) {
-    case Target::OS::Linux:
-      os << "linux";
-      break;
-    case Target::OS::Windows:
-      os << "windows";
-      break;
-    case Target::OS::Unk:
-      os << "unk";
-      break;
-  }
-  os << ",";
-  switch (target.arch) {
-    case Target::Arch::X86:
-      os << "x86";
-      break;
-    case Target::Arch::ARM:
-      os << "arm";
-      break;
-    case Target::Arch::NVGPU:
-      os << "nvgpu";
-      break;
-    case Target::Arch::Unk:
-      os << "unk";
-      break;
-  }
-  os << ",";
-  switch (target.bits) {
-    case Target::Bit::k32:
-      os << "32";
-      break;
-    case Target::Bit::k64:
-      os << "64";
-      break;
-    case Target::Bit::Unk:
-      os << "unk";
-      break;
-  }
-  os << ">";
-  return os;
-}
-std::ostream &operator<<(std::ostream &os, Target::Arch arch) {
-  switch (arch) {
-    case Target::Arch::Unk:
-      os << "Unk";
-      break;
-    case Target::Arch::X86:
-      os << "X86";
-      break;
-    case Target::Arch::ARM:
-      os << "ARM";
-      break;
-    case Target::Arch::NVGPU:
-      os << "NVGPU";
-      break;
-  }
-  return os;
-}
-}  // namespace common
-}  // namespace infrt
--- a/paddle/infrt/common/target.h
+++ b/paddle/infrt/common/target.h
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include <ostream>
-#include <vector>
-namespace infrt {
-namespace common {
-struct Target {
-  /**
-   * The operating system used by the target. Determines which system calls to
-   * generate.
-   */
-  enum class OS : int {
-    Unk = -1,
-    Linux,
-    Windows,
-  };
-  /**
-   * The architecture used by the target. Determines the instruction set to use.
-   */
-  enum class Arch : int {
-    Unk = -1,
-    X86,
-    ARM,
-    NVGPU,
-  };
-  enum class Bit : int {
-    Unk = -1,
-    k32,
-    k64,
-  };
-  OS os{OS::Unk};
-  Arch arch{Arch::Unk};
-  Bit bits{Bit::Unk};
-  enum class Feature : int {
-    JIT = 0,
-    Debug,
-  };
-  /**
-   * The library used by the target.
-   */
-  enum class Lib : int {
-    Unk = -1,
-    MKL,
-  };
-  std::vector<Feature> features;
-  std::vector<Lib> libs;
-  explicit Target(OS o = OS::Linux,
-                  Arch a = Arch::Unk,
-                  Bit b = Bit::Unk,
-                  const std::vector<Feature>& features = {},
-                  const std::vector<Lib>& libs = {})
-      : os(o), arch(a), bits(b), features(features), libs(libs) {}
-  bool defined() const {
-    return os != OS::Unk && arch != Arch::Unk && bits != Bit::Unk;
-  }
-  int max_num_threads() const;
-  int get_target_bits() const;
-  std::vector<Lib> get_target_libs() const;
-  bool operator==(const Target& other) const;
-  bool operator!=(const Target& other) const { return !(*this == other); }
-  friend std::ostream& operator<<(std::ostream& os, const Target& target);
-};
-static const Target& UnkTarget() {
-  static Target target(
-      Target::OS::Unk, Target::Arch::Unk, Target::Bit::Unk, {}, {});
-  return target;
-}
-static const Target& DefaultHostTarget() {
-  static Target target(
-      Target::OS::Linux, Target::Arch::X86, Target::Bit::k64, {}, {});
-  return target;
-}
-static const Target& DefaultNVGPUTarget() {
-  static Target target(
-      Target::OS::Linux, Target::Arch::NVGPU, Target::Bit::k64, {}, {});
-  return target;
-}
-std::ostream& operator<<(std::ostream& os, Target::Arch arch);
-}  // namespace common
-}  // namespace infrt
--- a/paddle/infrt/common/type.cc
+++ b/paddle/infrt/common/type.cc
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "paddle/infrt/common/type.h"
-#include <utility>
-namespace infrt {
-namespace common {
-struct Type::Storage {
-  Storage() = default;
-  Storage(type_t t, int b, int w) : type_(t), bits_(b), lanes_(w) {}
-  type_t type_{type_t::Unk};
-  cpp_type_t cpp_type_{cpp_type_t::None};
-  //! How many bits per element.
-  int bits_{};
-  //! How many elements(if a vector type), for scalar types, it should be 1.
-  int lanes_{1};
-  //! Name of the customized type.
-  std::string customized_type_;
-};
-Type::~Type() {}
-std::ostream &operator<<(std::ostream &os, const Type &t) {
-  if (t.is_cpp_const()) os << "const ";
-  switch (t.type()) {
-    case Type::type_t::Int:
-      if (t.bits() == 1) {
-        os << "bool";
-      } else {
-        os << "int" << t.bits();
-      }
-      break;
-    case Type::type_t::UInt:
-      os << "uint" << t.bits();
-      break;
-    case Type::type_t::Float:
-      os << "float" << t.bits();
-      break;
-    case Type::type_t::Void:
-      os << "void";
-      break;
-    case Type::type_t::Customized:
-      os << t.customized_type();
-      break;
-    case Type::type_t::String:
-      os << "string";
-      break;
-    case Type::type_t::Unk:
-      os << "unk";
-      break;
-  }
-  if (t.lanes() > 1) os << "<" << t.lanes() << ">";
-  if (t.is_cpp_handle()) os << "*";
-  if (t.is_cpp_handle2()) os << "**";
-  return os;
-}
-std::ostream &operator<<(std::ostream &os, Type::type_t t) {
-  switch (t) {
-    case Type::type_t::String:
-      os << "String";
-      break;
-    case Type::type_t::Void:
-      os << "Void";
-      break;
-    case Type::type_t::UInt:
-      os << "UInt";
-      break;
-    case Type::type_t::Int:
-      os << "Int";
-      break;
-    case Type::type_t::Float:
-      os << "Float";
-      break;
-    case Type::type_t::Unk:
-      os << "Unk";
-      break;
-    case Type::type_t::Customized:
-      os << "Customized";
-  }
-  return os;
-}
-Type &Type::set_cpp_handle(bool x) {
-  // unset the other handle-related bits.
-  set_cpp_handle2(false);
-  auto &v = (*reinterpret_cast<uint8_t *>(&GetStorage().cpp_type_));
-  // unset the other handle-related bits.
-  v &= ~static_cast<uint8_t>(cpp_type_t::Handle);
-  v &= ~static_cast<uint8_t>(cpp_type_t::HandleHandle);
-  if (x)
-    v |= static_cast<uint8_t>(cpp_type_t::Handle);
-  else
-    v &= ~static_cast<uint8_t>(cpp_type_t::Handle);
-  return *this;
-}
-Type &Type::set_cpp_handle2(bool x) {
-  auto &v = (*reinterpret_cast<uint8_t *>(&GetStorage().cpp_type_));
-  // unset the other handle-related bits.
-  v &= ~static_cast<uint8_t>(cpp_type_t::Handle);
-  v &= ~static_cast<uint8_t>(cpp_type_t::HandleHandle);
-  if (x)
-    v |= static_cast<uint8_t>(cpp_type_t::HandleHandle);
-  else
-    v &= ~static_cast<uint8_t>(cpp_type_t::HandleHandle);
-  return *this;
-}
-Type Type::VectorOf(int w) const {
-  CheckTypeValid();
-  return Type(type(), w, bits());
-}
-Type::Type(const Type &other) {
-  if (other.storage_) storage_.reset(new Storage(*other.storage_));
-}
-Type Type::ElementOf() const {
-  CheckTypeValid();
-  auto type = *this;
-  type.storage_->lanes_ = 1;
-  return type;
-}
-void Type::CheckTypeValid() const { CHECK_NE(GetStorage().type_, type_t::Unk); }
-Type Type::PointerOf() const {
-  CheckTypeValid();
-  auto x = *this;
-  CHECK(!x.is_cpp_handle2()) << "Not support three level of PointerOf";
-  if (x.is_cpp_handle())
-    x.set_cpp_handle2();
-  else
-    x.set_cpp_handle();
-  return x;
-}
-Type Type::ConstOf() const {
-  CheckTypeValid();
-  auto x = *this;
-  x.set_cpp_const();
-  return x;
-}
-Type Type::IgnoreConst() const {
-  CheckTypeValid();
-  auto x = *this;
-  x.set_cpp_const(false);
-  return x;
-}
-Type Type::with_bits(int x) const {
-  CHECK(is_primitive());
-  Type type = *this;
-  type.GetStorage().bits_ = x;
-  return type;
-}
-Type Type::with_type(Type::type_t x) const {
-  Type type = *this;
-  type.GetStorage().type_ = x;
-  return type;
-}
-Type Type::with_lanes(int x) const {
-  CHECK(valid());
-  Type type = *this;
-  type.GetStorage().lanes_ = x;
-  return type;
-}
-Type Type::with_cpp_const(bool x) const {
-  Type type = *this;
-  type.set_cpp_const(x);
-  return type;
-}
-Type &Type::set_cpp_const(bool is_const) {
-  uint8_t &data = *reinterpret_cast<uint8_t *>(&GetStorage().cpp_type_);
-  if (is_const) {
-    data |= static_cast<uint8_t>(cpp_type_t::Const);
-  } else {
-    data &= ~(static_cast<uint8_t>(cpp_type_t::Const));
-  }
-  return *this;
-}
-Type &Type::set_customized_type(const std::string &t) {
-  GetStorage().type_ = type_t::Customized;
-  GetStorage().customized_type_ = t;
-  return *this;
-}
-bool Type::valid() const {
-  if (is_unk()) return false;
-  if (is_customized()) {
-    return !GetStorage().customized_type_.empty();
-  }
-  if (is_primitive()) {
-    return bits() != 0;
-  }
-  return true;
-}
-Type::Type(Type::type_t t, int b, int w) : storage_(new Storage(t, b, w)) {}
-bool Type::is_primitive() const {
-  return !is_unk() && type() != type_t::Customized;
-}
-bool Type::is_customized() const {
-  return !is_unk() && type() == type_t::Customized;
-}
-bool Type::is_unk() const { return type() == type_t::Unk; }
-bool Type::is_bool() const { return type() == type_t::UInt && bits() == 1; }
-bool Type::is_void() const { return type() == type_t::Void; }
-bool Type::is_vector() const { return lanes() > 1; }
-bool Type::is_scalar() const { return lanes() == 1; }
-bool Type::is_float(int bits) const {
-  return type() == type_t::Float && (bits < 0 || bits == this->bits());
-}
-bool Type::is_uint(int bits) const {
-  return type() == type_t::UInt && (bits < 0 || bits == this->bits());
-}
-bool Type::is_int(int bits) const {
-  return type() == type_t::Int && (bits < 0 || bits == this->bits());
-}
-bool Type::is_integer(int bits) const {
-  return (type() == type_t::Int || type() == type_t::UInt) &&
-         (bits < 0 || bits == this->bits());
-}
-bool Type::is_index_type() {
-  return is_int() && lanes() == 1 && (bits() == 32 || bits() == 64);
-}
-bool Type::is_cpp_handle() const {
-  return static_cast<uint8_t>(GetStorage().cpp_type_) &
-         static_cast<uint8_t>(cpp_type_t::Handle);
-}
-bool Type::is_cpp_handle2() const {
-  return static_cast<uint8_t>(GetStorage().cpp_type_) &
-         static_cast<uint8_t>(cpp_type_t::HandleHandle);
-}
-bool Type::is_cpp_const() const {
-  return static_cast<uint8_t>(cpp_type_t::Const) &
-         static_cast<uint8_t>(GetStorage().cpp_type_);
-}
-const std::string &Type::customized_type() const {
-  return GetStorage().customized_type_;
-}
-bool Type::is_customized_type() const {
-  return !GetStorage().customized_type_.empty();
-}
-Type::type_t Type::type() const { return GetStorage().type_; }
-int Type::bits() const { return GetStorage().bits_; }
-int Type::lanes() const { return GetStorage().lanes_; }
-Type::cpp_type_t Type::cpp_type() const { return GetStorage().cpp_type_; }
-bool Type::operator==(const Type &other) const {
-  return type() == other.type() && bits() == other.bits() &&
-         lanes() == other.lanes() &&
-         GetStorage().cpp_type_ == other.GetStorage().cpp_type_ &&
-         customized_type() == other.customized_type();
-}
-bool Type::is_string() const { return type() == type_t::String; }
-Type &Type::operator=(const Type &other) {
-  if (other.storage_) storage_.reset(new Storage(*other.storage_));
-  return *this;
-}
-Type::Storage &Type::GetStorage() { return *storage_; }
-const Type::Storage &Type::GetStorage() const { return *storage_; }
-Type::Type() : storage_(new Storage) {}
-Type::Type(Type &&other) : storage_(std::move(other.storage_)) {}
-const Type &F16() {
-  static auto t = Float(16);
-  return t;
-}
-const Type &F32() {
-  static auto t = Float(32);
-  return t;
-}
-const Type &F64() {
-  static auto t = Float(64);
-  return t;
-}
-const Type &I8() {
-  static auto t = Int(8);
-  return t;
-}
-const Type &I16() {
-  static auto t = Int(16);
-  return t;
-}
-const Type &I32() {
-  static auto t = Int(32);
-  return t;
-}
-const Type &I64() {
-  static auto t = Int(64);
-  return t;
-}
-const Type &UI8() {
-  static auto t = UInt(8);
-  return t;
-}
-const Type &UI16() {
-  static auto t = UInt(16);
-  return t;
-}
-const Type &UI32() {
-  static auto t = UInt(32);
-  return t;
-}
-const Type &UI64() {
-  static auto t = UInt(64);
-  return t;
-}
-const Type &I1() {
-  static auto t = Int(1);
-  return t;
-}
-const Type &UI1() {
-  static auto t = UInt(1);
-  return t;
-}
-}  // namespace common
-}  // namespace infrt
--- a/paddle/infrt/common/type.h
+++ b/paddle/infrt/common/type.h
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include <glog/logging.h>
-#include <memory>
-#include <string>
-#include "paddle/infrt/common/macros.h"
-//! Much of the concepts are borrowed from Halide project.
-namespace infrt {
-namespace common {
-/**
- * Types in the INFRT type system. They can be ints, unsigned ints, or floats of
- * various bit-widths.
- * They can also be vectors of the same (by setting the `lanes` field to
- * something larger than one).
- * NOTE: Front-end code other than vectorize shouldn't use vector types.
- */
-struct Type {
-  enum class type_t {
-    Unk = -1,
-    Int,
-    UInt,
-    Float,
-    String,
-    Void,
-    // stupid idea to mix the Customized with other primitive types, large
-    // refactor needs here.
-    Customized,  // Customized type
-  };
-  //! type decorators in C++, the different code can used together.
-  enum class cpp_type_t : uint8_t {
-    None = 0,               // None information.
-    Const = 1,              // const.
-    Handle = 1 << 1,        // pointer type, such as `infrt_buffer_t*`.
-    HandleHandle = 1 << 2,  // pointer of pointer, such as `infrt_buffer_t**`.
-  };
-  Type();
-  Type(type_t t, int b, int w);
-  Type(const Type& other);
-  explicit Type(Type&& other);
-  Type& operator=(const Type& other);
-  INFRT_NODISCARD bool is_primitive() const;
-  INFRT_NODISCARD bool is_customized() const;
-  INFRT_NODISCARD bool valid() const;
-  //! Some helper functions to check a type.
-  // @{
-  INFRT_NODISCARD bool is_unk() const;
-  INFRT_NODISCARD bool is_void() const;
-  INFRT_NODISCARD bool is_bool() const;
-  INFRT_NODISCARD bool is_vector() const;
-  INFRT_NODISCARD bool is_scalar() const;
-  INFRT_NODISCARD bool is_float(int bits = -1) const;
-  INFRT_NODISCARD bool is_int(int bits = -1) const;
-  INFRT_NODISCARD bool is_integer(int bits = -1) const;
-  INFRT_NODISCARD bool is_uint(int bits = -1) const;
-  INFRT_NODISCARD bool is_string() const;
-  INFRT_NODISCARD bool is_index_type();
-  // @}
-  Type& set_cpp_handle(bool x = true);
-  INFRT_NODISCARD bool is_cpp_handle() const;
-  Type& set_cpp_handle2(bool x = true);
-  INFRT_NODISCARD bool is_cpp_handle2() const;
-  Type& set_cpp_const(bool is_const = true);
-  INFRT_NODISCARD bool is_cpp_const() const;
-  Type& set_customized_type(const std::string& t);
-  const std::string& customized_type() const;
-  INFRT_NODISCARD bool is_customized_type() const;
-  // Get a new type with bits set to \p x.
-  Type with_bits(int x) const;
-  // Get a new type with type set to \p x.
-  Type with_type(type_t x) const;
-  // Get a new type with lanes set to \p x.
-  Type with_lanes(int x) const;
-  // Get a new type with cpp_const set to \p x.
-  Type with_cpp_const(bool x = true) const;
-  //! Getters
-  // @{
-  type_t type() const;
-  int bits() const;
-  int lanes() const;
-  cpp_type_t cpp_type() const;
-  // @}
-  //! Compare two types for equality.
-  bool operator==(const Type& other) const;
-  //! Compare two types for inequality.
-  bool operator!=(const Type& other) const { return !(*this == other); }
-  //! Generate a vector of this type, with `w` elements.
-  Type VectorOf(int w) const;
-  //! Generate a element type of this type.
-  Type ElementOf() const;
-  //! Generate the address type.
-  Type PointerOf() const;
-  //! Ignore const.
-  Type IgnoreConst() const;
-  //! Add const.
-  Type ConstOf() const;
-  friend std::ostream& operator<<(std::ostream& os, const Type& t);
-  ~Type();
- private:
-  void CheckTypeValid() const;
-  struct Storage;
-  Storage& GetStorage();
-  const Storage& GetStorage() const;
-  std::unique_ptr<Storage> storage_;
-};  // namespace common
-inline Type Void() { return Type(Type::type_t::Void, 1, 0); }
-inline Type Int(int bits, int lanes = 1) {
-  return Type(Type::type_t::Int, bits, lanes);
-}
-inline Type UInt(int bits, int lanes = 1) {
-  return Type(Type::type_t::UInt, bits, lanes);
-}
-inline Type Float(int bits, int lanes = 1) {
-  return Type(Type::type_t::Float, bits, lanes);
-}
-inline Type Bool(int lanes = 1) { return Type(Type::type_t::UInt, 1, lanes); }
-inline Type String() { return Type(Type::type_t::String, 1, 1); }
-//! Builtin native types as global singletons.
-// @{
-const Type& F16();
-const Type& F32();
-const Type& F64();
-const Type& I8();
-const Type& I16();
-const Type& I32();
-const Type& I64();
-const Type& UI8();
-const Type& UI16();
-const Type& UI32();
-const Type& UI64();
-const Type& I1();
-const Type& UI1();
-// @}
-template <typename T>
-Type type_of();
-template <>
-inline Type type_of<float>() {
-  return F32();
-}
-template <>
-inline Type type_of<double>() {
-  return F64();
-}
-template <>
-inline Type type_of<unsigned char>() {
-  return UI8();
-}
-template <>
-inline Type type_of<int16_t>() {
-  return UI16();
-}
-template <>
-inline Type type_of<int32_t>() {
-  return I32();
-}
-template <>
-inline Type type_of<uint32_t>() {
-  return UI32();
-}
-template <>
-inline Type type_of<bool>() {
-  return UI1();
-}
-template <>
-inline Type type_of<char>() {
-  return I8();
-}
-template <>
-inline Type type_of<int64_t>() {
-  return I64();
-}
-template <>
-inline Type type_of<uint64_t>() {
-  return UI64();
-}
-template <>
-inline Type type_of<signed char>() {
-  return I8();
-}
-template <>
-inline Type type_of<void>() {
-  return Void();
-}
-template <>
-inline Type type_of<int8_t*>() {
-  Type x = Int(8);
-  x.set_cpp_handle();
-  return x;
-}
-template <>
-inline Type type_of<void*>() {
-  Type x = type_of<void>();
-  x.set_cpp_handle();
-  return x;
-}
-template <>
-inline Type type_of<void**>() {
-  Type x = type_of<void>();
-  x.set_cpp_handle2();
-  return x;
-}
-template <>
-inline Type type_of<float*>() {
-  Type x = type_of<float>();
-  x.set_cpp_handle();
-  return x;
-}
-template <>
-inline Type type_of<double*>() {
-  Type x = type_of<double>();
-  x.set_cpp_handle();
-  return x;
-}
-std::ostream& operator<<(std::ostream& os, Type::type_t t);
-}  // namespace common
-}  // namespace infrt
--- a/paddle/infrt/dialect/CMakeLists.txt
+++ b/paddle/infrt/dialect/CMakeLists.txt
-core_gather_headers()
-gather_srcs(
-  infrt_src
-  SRCS
-  dialect.cc
-  init_dialects.cc
-  tensor_shape.cc
-  dense_tensor.cc
-  mlir_loader.cc
-  diagnostic_utils.cc)
-mlir_tablegen_on(tensor_shape DIALECT ts)
-mlir_tablegen_on(dense_tensor DIALECT dt)
-# TODO(Superjomn) add a cmake function cc_executable to ecapsulate the following code
-add_executable(infrtopt opt.cc)
-target_link_libraries(infrtopt infrt)
-add_executable(print-ir print_ir.cc)
-target_link_libraries(print-ir infrt ${mlir_libs})
-cc_test_tiny(test_infrt_mlir_loader SRCS mlir_loader_test.cc DEPS infrt
-             ${MLIR_IR_LIBS})
-add_subdirectory(infrt)
-add_subdirectory(pd)
-add_subdirectory(tensorrt)
-if(INFRT_WITH_PHI)
-  add_subdirectory(phi)
-endif()
--- a/paddle/infrt/dialect/dense_tensor.cc
+++ b/paddle/infrt/dialect/dense_tensor.cc
--- a/paddle/infrt/dialect/dense_tensor.h
+++ b/paddle/infrt/dialect/dense_tensor.h
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include <mlir/IR/Dialect.h>
-#include <mlir/IR/OpDefinition.h>
-#include <mlir/Interfaces/SideEffectInterfaces.h>
-#include <string>
-#include "paddle/infrt/dialect/dense_tensor_dialect.hpp.inc"
-#include "paddle/infrt/dialect/infrt/ir/infrt_dialect.h"
-#define GET_OP_CLASSES
-#include "paddle/infrt/dialect/dense_tensor.hpp.inc"
--- a/paddle/infrt/dialect/dense_tensor.td
+++ b/paddle/infrt/dialect/dense_tensor.td
--- a/paddle/infrt/dialect/diagnostic_utils.cc
+++ b/paddle/infrt/dialect/diagnostic_utils.cc
--- a/paddle/infrt/dialect/diagnostic_utils.h
+++ b/paddle/infrt/dialect/diagnostic_utils.h
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include <llvm/Support/SourceMgr.h>
-#include <mlir/IR/Diagnostics.h>
-#include <memory>
-namespace infrt {
-namespace dialect {
-/**
- * A scoped diagnostic handler to help debug MLIR process.
- */
-class MyScopedDiagnosicHandler : public mlir::SourceMgrDiagnosticHandler {
- public:
-  MyScopedDiagnosicHandler(mlir::MLIRContext* ctx, bool propagate);
-  mlir::LogicalResult handler(mlir::Diagnostic* diag);
-  ~MyScopedDiagnosicHandler();
- private:
-  class Impl;
-  std::unique_ptr<Impl> impl_;
-};
-}  // namespace dialect
-}  // namespace infrt
--- a/paddle/infrt/dialect/dialect.cc
+++ b/paddle/infrt/dialect/dialect.cc
--- a/paddle/infrt/dialect/infrt/CMakeLists.txt
+++ b/paddle/infrt/dialect/infrt/CMakeLists.txt
-add_subdirectory(common)
-add_subdirectory(ir)
-add_subdirectory(pass)
--- a/paddle/infrt/dialect/infrt/common/CMakeLists.txt
+++ b/paddle/infrt/dialect/infrt/common/CMakeLists.txt
--- a/paddle/infrt/dialect/infrt/common/types.cc
+++ b/paddle/infrt/dialect/infrt/common/types.cc
--- a/paddle/infrt/dialect/infrt/common/types.h
+++ b/paddle/infrt/dialect/infrt/common/types.h
--- a/paddle/infrt/dialect/infrt/common/utils.cc
+++ b/paddle/infrt/dialect/infrt/common/utils.cc
--- a/paddle/infrt/dialect/infrt/common/utils.h
+++ b/paddle/infrt/dialect/infrt/common/utils.h
--- a/paddle/infrt/dialect/infrt/ir/CMakeLists.txt
+++ b/paddle/infrt/dialect/infrt/ir/CMakeLists.txt
--- a/paddle/infrt/dialect/infrt/ir/basic_kernels.cc
+++ b/paddle/infrt/dialect/infrt/ir/basic_kernels.cc
--- a/paddle/infrt/dialect/infrt/ir/basic_kernels.h
+++ b/paddle/infrt/dialect/infrt/ir/basic_kernels.h
--- a/paddle/infrt/dialect/infrt/ir/basic_kernels.td
+++ b/paddle/infrt/dialect/infrt/ir/basic_kernels.td
--- a/paddle/infrt/dialect/infrt/ir/infrt_base.td
+++ b/paddle/infrt/dialect/infrt/ir/infrt_base.td
--- a/paddle/infrt/dialect/infrt/ir/infrt_dialect.cc
+++ b/paddle/infrt/dialect/infrt/ir/infrt_dialect.cc
--- a/paddle/infrt/dialect/infrt/ir/infrt_dialect.h
+++ b/paddle/infrt/dialect/infrt/ir/infrt_dialect.h
--- a/paddle/infrt/dialect/infrt/ir/infrt_ops.td
+++ b/paddle/infrt/dialect/infrt/ir/infrt_ops.td
--- a/paddle/infrt/dialect/infrt/ir/test_kernels.cc
+++ b/paddle/infrt/dialect/infrt/ir/test_kernels.cc
--- a/paddle/infrt/dialect/infrt/ir/test_kernels.h
+++ b/paddle/infrt/dialect/infrt/ir/test_kernels.h
--- a/paddle/infrt/dialect/infrt/ir/test_kernels.td
+++ b/paddle/infrt/dialect/infrt/ir/test_kernels.td
--- a/paddle/infrt/dialect/infrt/pass/CMakeLists.txt
+++ b/paddle/infrt/dialect/infrt/pass/CMakeLists.txt
--- a/paddle/infrt/dialect/infrt/pass/infrt_op_fuse.td
+++ b/paddle/infrt/dialect/infrt/pass/infrt_op_fuse.td
--- a/paddle/infrt/dialect/infrt/pass/infrt_op_fuse_pass.cc
+++ b/paddle/infrt/dialect/infrt/pass/infrt_op_fuse_pass.cc
--- a/paddle/infrt/dialect/infrt/pass/infrt_op_fuse_pass.h
+++ b/paddle/infrt/dialect/infrt/pass/infrt_op_fuse_pass.h
--- a/paddle/infrt/dialect/infrt/pass/infrt_weights_unfold_pass.cc
+++ b/paddle/infrt/dialect/infrt/pass/infrt_weights_unfold_pass.cc
--- a/paddle/infrt/dialect/infrt/pass/infrt_weights_unfold_pass.h
+++ b/paddle/infrt/dialect/infrt/pass/infrt_weights_unfold_pass.h
--- a/paddle/infrt/dialect/init_dialects.cc
+++ b/paddle/infrt/dialect/init_dialects.cc
--- a/paddle/infrt/dialect/init_dialects.h
+++ b/paddle/infrt/dialect/init_dialects.h
--- a/paddle/infrt/dialect/mlir_loader.cc
+++ b/paddle/infrt/dialect/mlir_loader.cc
--- a/paddle/infrt/dialect/mlir_loader.h
+++ b/paddle/infrt/dialect/mlir_loader.h
--- a/paddle/infrt/dialect/mlir_loader_test.cc
+++ b/paddle/infrt/dialect/mlir_loader_test.cc
--- a/paddle/infrt/dialect/opt.cc
+++ b/paddle/infrt/dialect/opt.cc
--- a/paddle/infrt/dialect/pd/CMakeLists.txt
+++ b/paddle/infrt/dialect/pd/CMakeLists.txt
--- a/paddle/infrt/dialect/pd/common/CMakeLists.txt
+++ b/paddle/infrt/dialect/pd/common/CMakeLists.txt
--- a/paddle/infrt/dialect/pd/ir/CMakeLists.txt
+++ b/paddle/infrt/dialect/pd/ir/CMakeLists.txt
--- a/paddle/infrt/dialect/pd/ir/pd_extra_ops.td
+++ b/paddle/infrt/dialect/pd/ir/pd_extra_ops.td
--- a/paddle/infrt/dialect/pd/ir/pd_op_base.td
+++ b/paddle/infrt/dialect/pd/ir/pd_op_base.td
--- a/paddle/infrt/dialect/pd/ir/pd_ops.cc
+++ b/paddle/infrt/dialect/pd/ir/pd_ops.cc
--- a/paddle/infrt/dialect/pd/ir/pd_ops.h
+++ b/paddle/infrt/dialect/pd/ir/pd_ops.h
--- a/paddle/infrt/dialect/pd/pass/CMakeLists.txt
+++ b/paddle/infrt/dialect/pd/pass/CMakeLists.txt
--- a/paddle/infrt/dialect/pd/pass/pd_op_fuse.td
+++ b/paddle/infrt/dialect/pd/pass/pd_op_fuse.td
--- a/paddle/infrt/dialect/pd/pass/pd_op_fuse_pass.cc
+++ b/paddle/infrt/dialect/pd/pass/pd_op_fuse_pass.cc
--- a/paddle/infrt/dialect/pd/pass/pd_op_fuse_pass.h
+++ b/paddle/infrt/dialect/pd/pass/pd_op_fuse_pass.h
--- a/paddle/infrt/dialect/phi/CMakeLists.txt
+++ b/paddle/infrt/dialect/phi/CMakeLists.txt
--- a/paddle/infrt/dialect/phi/data_type.cc
+++ b/paddle/infrt/dialect/phi/data_type.cc
--- a/paddle/infrt/dialect/phi/data_type.h
+++ b/paddle/infrt/dialect/phi/data_type.h
--- a/paddle/infrt/dialect/phi/ir/CMakeLists.txt
+++ b/paddle/infrt/dialect/phi/ir/CMakeLists.txt
--- a/paddle/infrt/dialect/phi/ir/infrt_phi_base.td
+++ b/paddle/infrt/dialect/phi/ir/infrt_phi_base.td
--- a/paddle/infrt/dialect/phi/ir/infrt_phi_kernel.td
+++ b/paddle/infrt/dialect/phi/ir/infrt_phi_kernel.td
--- a/paddle/infrt/dialect/phi/ir/infrt_phi_tensor.cc
+++ b/paddle/infrt/dialect/phi/ir/infrt_phi_tensor.cc
--- a/paddle/infrt/dialect/phi/ir/infrt_phi_tensor.h
+++ b/paddle/infrt/dialect/phi/ir/infrt_phi_tensor.h
--- a/paddle/infrt/dialect/phi/ir/infrt_phi_tensor.td
+++ b/paddle/infrt/dialect/phi/ir/infrt_phi_tensor.td
--- a/paddle/infrt/dialect/phi/ir/phi_base.cc
+++ b/paddle/infrt/dialect/phi/ir/phi_base.cc
--- a/paddle/infrt/dialect/phi/ir/phi_base.h
+++ b/paddle/infrt/dialect/phi/ir/phi_base.h
--- a/paddle/infrt/dialect/phi/ir/phi_kernels.cc
+++ b/paddle/infrt/dialect/phi/ir/phi_kernels.cc
--- a/paddle/infrt/dialect/phi/ir/phi_kernels.h
+++ b/paddle/infrt/dialect/phi/ir/phi_kernels.h
--- a/paddle/infrt/dialect/phi/pass/CMakeLists.txt
+++ b/paddle/infrt/dialect/phi/pass/CMakeLists.txt
--- a/paddle/infrt/dialect/phi/pass/kernel_op_desc.cc
+++ b/paddle/infrt/dialect/phi/pass/kernel_op_desc.cc
--- a/paddle/infrt/dialect/phi/pass/kernel_op_desc.h
+++ b/paddle/infrt/dialect/phi/pass/kernel_op_desc.h
--- a/paddle/infrt/dialect/phi/pass/kernel_op_desc_test.cc
+++ b/paddle/infrt/dialect/phi/pass/kernel_op_desc_test.cc
--- a/paddle/infrt/dialect/phi/pass/phi_op_convert_pass.cc
+++ b/paddle/infrt/dialect/phi/pass/phi_op_convert_pass.cc
--- a/paddle/infrt/dialect/phi/pass/phi_op_convert_pass.h
+++ b/paddle/infrt/dialect/phi/pass/phi_op_convert_pass.h
--- a/paddle/infrt/dialect/phi/pass/proto_arg_map_context.cc
+++ b/paddle/infrt/dialect/phi/pass/proto_arg_map_context.cc
--- a/paddle/infrt/dialect/phi/pass/proto_arg_map_context.h
+++ b/paddle/infrt/dialect/phi/pass/proto_arg_map_context.h
--- a/paddle/infrt/dialect/phi/phi_exec.cc
+++ b/paddle/infrt/dialect/phi/phi_exec.cc
--- a/paddle/infrt/dialect/print_ir.cc
+++ b/paddle/infrt/dialect/print_ir.cc
--- a/paddle/infrt/dialect/tensor_shape.cc
+++ b/paddle/infrt/dialect/tensor_shape.cc
--- a/paddle/infrt/dialect/tensor_shape.h
+++ b/paddle/infrt/dialect/tensor_shape.h
--- a/paddle/infrt/dialect/tensor_shape.td
+++ b/paddle/infrt/dialect/tensor_shape.td
--- a/paddle/infrt/dialect/tensor_shape_base.td
+++ b/paddle/infrt/dialect/tensor_shape_base.td
--- a/paddle/infrt/dialect/tensorrt/CMakeLists.txt
+++ b/paddle/infrt/dialect/tensorrt/CMakeLists.txt
--- a/paddle/infrt/dialect/tensorrt/convert.h
+++ b/paddle/infrt/dialect/tensorrt/convert.h
--- a/paddle/infrt/dialect/tensorrt/pd_lower_to_trt.td
+++ b/paddle/infrt/dialect/tensorrt/pd_lower_to_trt.td
--- a/paddle/infrt/dialect/tensorrt/trt_dialect_types.h
+++ b/paddle/infrt/dialect/tensorrt/trt_dialect_types.h
--- a/paddle/infrt/dialect/tensorrt/trt_exec.cc
+++ b/paddle/infrt/dialect/tensorrt/trt_exec.cc
--- a/paddle/infrt/dialect/tensorrt/trt_graph_fuse_pass.cc
+++ b/paddle/infrt/dialect/tensorrt/trt_graph_fuse_pass.cc
--- a/paddle/infrt/dialect/tensorrt/trt_graph_fuse_pass.h
+++ b/paddle/infrt/dialect/tensorrt/trt_graph_fuse_pass.h
--- a/paddle/infrt/dialect/tensorrt/trt_graph_split_pass.cc
+++ b/paddle/infrt/dialect/tensorrt/trt_graph_split_pass.cc
--- a/paddle/infrt/dialect/tensorrt/trt_graph_split_pass.h
+++ b/paddle/infrt/dialect/tensorrt/trt_graph_split_pass.h
--- a/paddle/infrt/dialect/tensorrt/trt_op_base.td
+++ b/paddle/infrt/dialect/tensorrt/trt_op_base.td
--- a/paddle/infrt/dialect/tensorrt/trt_op_converter_pass.cc
+++ b/paddle/infrt/dialect/tensorrt/trt_op_converter_pass.cc
--- a/paddle/infrt/dialect/tensorrt/trt_op_converter_pass.h
+++ b/paddle/infrt/dialect/tensorrt/trt_op_converter_pass.h
--- a/paddle/infrt/dialect/tensorrt/trt_op_teller_pass.cc
+++ b/paddle/infrt/dialect/tensorrt/trt_op_teller_pass.cc
--- a/paddle/infrt/dialect/tensorrt/trt_op_teller_pass.h
+++ b/paddle/infrt/dialect/tensorrt/trt_op_teller_pass.h
--- a/paddle/infrt/dialect/tensorrt/trt_ops.cc
+++ b/paddle/infrt/dialect/tensorrt/trt_ops.cc
--- a/paddle/infrt/dialect/tensorrt/trt_ops.h
+++ b/paddle/infrt/dialect/tensorrt/trt_ops.h
--- a/paddle/infrt/dialect/tensorrt/trt_ops.td
+++ b/paddle/infrt/dialect/tensorrt/trt_ops.td
--- a/paddle/infrt/dialect/tensorrt/trt_type_convert_pass.cc
+++ b/paddle/infrt/dialect/tensorrt/trt_type_convert_pass.cc
--- a/paddle/infrt/dialect/tensorrt/trt_type_convert_pass.h
+++ b/paddle/infrt/dialect/tensorrt/trt_type_convert_pass.h
--- a/paddle/infrt/external_kernels/CMakeLists.txt
+++ b/paddle/infrt/external_kernels/CMakeLists.txt
--- a/paddle/infrt/external_kernels/basic.mlir
+++ b/paddle/infrt/external_kernels/basic.mlir
--- a/paddle/infrt/external_kernels/basic_kernels.cc
+++ b/paddle/infrt/external_kernels/basic_kernels.cc
--- a/paddle/infrt/external_kernels/fc.mlir
+++ b/paddle/infrt/external_kernels/fc.mlir
--- a/paddle/infrt/external_kernels/paddle.mlir
+++ b/paddle/infrt/external_kernels/paddle.mlir
--- a/paddle/infrt/gtest_main.cc
+++ b/paddle/infrt/gtest_main.cc
--- a/paddle/infrt/host_context/CMakeLists.txt
+++ b/paddle/infrt/host_context/CMakeLists.txt
--- a/paddle/infrt/host_context/core_runtime.cc
+++ b/paddle/infrt/host_context/core_runtime.cc
--- a/paddle/infrt/host_context/core_runtime.h
+++ b/paddle/infrt/host_context/core_runtime.h
--- a/paddle/infrt/host_context/core_runtime_test.cc
+++ b/paddle/infrt/host_context/core_runtime_test.cc
--- a/paddle/infrt/host_context/function.cc
+++ b/paddle/infrt/host_context/function.cc
--- a/paddle/infrt/host_context/function.h
+++ b/paddle/infrt/host_context/function.h
--- a/paddle/infrt/host_context/kernel_frame.cc
+++ b/paddle/infrt/host_context/kernel_frame.cc
--- a/paddle/infrt/host_context/kernel_frame.h
+++ b/paddle/infrt/host_context/kernel_frame.h
--- a/paddle/infrt/host_context/kernel_registry.cc
+++ b/paddle/infrt/host_context/kernel_registry.cc
--- a/paddle/infrt/host_context/kernel_registry.h
+++ b/paddle/infrt/host_context/kernel_registry.h
--- a/paddle/infrt/host_context/kernel_registry_test.cc
+++ b/paddle/infrt/host_context/kernel_registry_test.cc
--- a/paddle/infrt/host_context/kernel_utils.cc
+++ b/paddle/infrt/host_context/kernel_utils.cc
--- a/paddle/infrt/host_context/kernel_utils.h
+++ b/paddle/infrt/host_context/kernel_utils.h
--- a/paddle/infrt/host_context/kernel_utils_test.cc
+++ b/paddle/infrt/host_context/kernel_utils_test.cc
--- a/paddle/infrt/host_context/mlir_exec.cc
+++ b/paddle/infrt/host_context/mlir_exec.cc
--- a/paddle/infrt/host_context/mlir_function_executable.cc
+++ b/paddle/infrt/host_context/mlir_function_executable.cc
--- a/paddle/infrt/host_context/mlir_function_executable.h
+++ b/paddle/infrt/host_context/mlir_function_executable.h
--- a/paddle/infrt/host_context/mlir_program_executor.cc
+++ b/paddle/infrt/host_context/mlir_program_executor.cc
--- a/paddle/infrt/host_context/mlir_program_executor.h
+++ b/paddle/infrt/host_context/mlir_program_executor.h
--- a/paddle/infrt/host_context/mlir_tests/basic.mlir
+++ b/paddle/infrt/host_context/mlir_tests/basic.mlir
--- a/paddle/infrt/host_context/mlir_tests/dense_tensor.mlir
+++ b/paddle/infrt/host_context/mlir_tests/dense_tensor.mlir
--- a/paddle/infrt/host_context/mlir_tests/shape.mlir
+++ b/paddle/infrt/host_context/mlir_tests/shape.mlir
--- a/paddle/infrt/host_context/mlir_to_runtime_translate.cc
+++ b/paddle/infrt/host_context/mlir_to_runtime_translate.cc
--- a/paddle/infrt/host_context/mlir_to_runtime_translate.h
+++ b/paddle/infrt/host_context/mlir_to_runtime_translate.h
--- a/paddle/infrt/host_context/mlir_to_runtime_translate_test.cc
+++ b/paddle/infrt/host_context/mlir_to_runtime_translate_test.cc
--- a/paddle/infrt/host_context/op_executable.cc
+++ b/paddle/infrt/host_context/op_executable.cc
--- a/paddle/infrt/host_context/op_executable.h
+++ b/paddle/infrt/host_context/op_executable.h
--- a/paddle/infrt/host_context/op_executable_test.cc
+++ b/paddle/infrt/host_context/op_executable_test.cc
--- a/paddle/infrt/host_context/paddle_mlir.cc
+++ b/paddle/infrt/host_context/paddle_mlir.cc
--- a/paddle/infrt/host_context/paddle_mlir.h
+++ b/paddle/infrt/host_context/paddle_mlir.h
--- a/paddle/infrt/host_context/paddle_mlir_converter.cc
+++ b/paddle/infrt/host_context/paddle_mlir_converter.cc
--- a/paddle/infrt/host_context/symbol_table.cc
+++ b/paddle/infrt/host_context/symbol_table.cc
--- a/paddle/infrt/host_context/symbol_table.h
+++ b/paddle/infrt/host_context/symbol_table.h
--- a/paddle/infrt/host_context/value.cc
+++ b/paddle/infrt/host_context/value.cc
--- a/paddle/infrt/host_context/value.h
+++ b/paddle/infrt/host_context/value.h
--- a/paddle/infrt/host_context/value_test.cc
+++ b/paddle/infrt/host_context/value_test.cc
--- a/paddle/infrt/kernel/CMakeLists.txt
+++ b/paddle/infrt/kernel/CMakeLists.txt
--- a/paddle/infrt/kernel/basic_kernels.cc
+++ b/paddle/infrt/kernel/basic_kernels.cc
--- a/paddle/infrt/kernel/basic_kernels.h
+++ b/paddle/infrt/kernel/basic_kernels.h
--- a/paddle/infrt/kernel/control_flow_kernels.cc
+++ b/paddle/infrt/kernel/control_flow_kernels.cc
--- a/paddle/infrt/kernel/control_flow_kernels.h
+++ b/paddle/infrt/kernel/control_flow_kernels.h
--- a/paddle/infrt/kernel/phi/CMakeLists.txt
+++ b/paddle/infrt/kernel/phi/CMakeLists.txt
--- a/paddle/infrt/kernel/phi/context_kernels.cc
+++ b/paddle/infrt/kernel/phi/context_kernels.cc
--- a/paddle/infrt/kernel/phi/context_kernels.h
+++ b/paddle/infrt/kernel/phi/context_kernels.h
--- a/paddle/infrt/kernel/phi/dense_tensor_kernels.cc
+++ b/paddle/infrt/kernel/phi/dense_tensor_kernels.cc
--- a/paddle/infrt/kernel/phi/dense_tensor_kernels.h
+++ b/paddle/infrt/kernel/phi/dense_tensor_kernels.h
--- a/paddle/infrt/kernel/phi/infershaped/infershape_launchers_test.cc
+++ b/paddle/infrt/kernel/phi/infershaped/infershape_launchers_test.cc
--- a/paddle/infrt/kernel/phi/infershaped/infershaped_kernel_launcher.cc
+++ b/paddle/infrt/kernel/phi/infershaped/infershaped_kernel_launcher.cc
--- a/paddle/infrt/kernel/phi/infershaped/infershaped_kernel_launcher.h
+++ b/paddle/infrt/kernel/phi/infershaped/infershaped_kernel_launcher.h
--- a/paddle/infrt/kernel/phi/infershaped/infershaped_kernel_launchers.h
+++ b/paddle/infrt/kernel/phi/infershaped/infershaped_kernel_launchers.h
--- a/paddle/infrt/kernel/phi/infershaped/infershaped_utils.h
+++ b/paddle/infrt/kernel/phi/infershaped/infershaped_utils.h
--- a/paddle/infrt/kernel/phi/infershaped/phi_kernel_launcher.h
+++ b/paddle/infrt/kernel/phi/infershaped/phi_kernel_launcher.h
--- a/paddle/infrt/kernel/phi/registry.cc
+++ b/paddle/infrt/kernel/phi/registry.cc
--- a/paddle/infrt/kernel/phi/registry.h
+++ b/paddle/infrt/kernel/phi/registry.h
--- a/paddle/infrt/kernel/tensor_kernels.cc
+++ b/paddle/infrt/kernel/tensor_kernels.cc
--- a/paddle/infrt/kernel/tensor_kernels.h
+++ b/paddle/infrt/kernel/tensor_kernels.h
--- a/paddle/infrt/kernel/tensor_shape_kernels.cc
+++ b/paddle/infrt/kernel/tensor_shape_kernels.cc
--- a/paddle/infrt/kernel/tensor_shape_kernels.h
+++ b/paddle/infrt/kernel/tensor_shape_kernels.h
--- a/paddle/infrt/kernel/tensorrt/CMakeLists.txt
+++ b/paddle/infrt/kernel/tensorrt/CMakeLists.txt
--- a/paddle/infrt/kernel/tensorrt/registry.cc
+++ b/paddle/infrt/kernel/tensorrt/registry.cc
--- a/paddle/infrt/kernel/tensorrt/registry.h
+++ b/paddle/infrt/kernel/tensorrt/registry.h
--- a/paddle/infrt/kernel/tensorrt/trt_helper.h
+++ b/paddle/infrt/kernel/tensorrt/trt_helper.h
--- a/paddle/infrt/kernel/tensorrt/trt_kernels.cc
+++ b/paddle/infrt/kernel/tensorrt/trt_kernels.cc
--- a/paddle/infrt/kernel/tensorrt/trt_kernels.h
+++ b/paddle/infrt/kernel/tensorrt/trt_kernels.h
--- a/paddle/infrt/kernel/tensorrt/trt_layers.h
+++ b/paddle/infrt/kernel/tensorrt/trt_layers.h
--- a/paddle/infrt/kernel/test_kernels.cc
+++ b/paddle/infrt/kernel/test_kernels.cc
--- a/paddle/infrt/kernel/test_kernels.h
+++ b/paddle/infrt/kernel/test_kernels.h
--- a/paddle/infrt/paddle/CMakeLists.txt
+++ b/paddle/infrt/paddle/CMakeLists.txt
--- a/paddle/infrt/paddle/cpp/CMakeLists.txt
+++ b/paddle/infrt/paddle/cpp/CMakeLists.txt
--- a/paddle/infrt/paddle/cpp/desc_api.h
+++ b/paddle/infrt/paddle/cpp/desc_api.h
--- a/paddle/infrt/paddle/framework.proto
+++ b/paddle/infrt/paddle/framework.proto
--- a/paddle/infrt/paddle/model_parser.cc
+++ b/paddle/infrt/paddle/model_parser.cc
--- a/paddle/infrt/paddle/model_parser.h
+++ b/paddle/infrt/paddle/model_parser.h
--- a/paddle/infrt/paddle/pb/CMakeLists.txt
+++ b/paddle/infrt/paddle/pb/CMakeLists.txt
--- a/paddle/infrt/paddle/pb/block_desc.cc
+++ b/paddle/infrt/paddle/pb/block_desc.cc
--- a/paddle/infrt/paddle/pb/block_desc.h
+++ b/paddle/infrt/paddle/pb/block_desc.h
--- a/paddle/infrt/paddle/pb/op_desc.cc
+++ b/paddle/infrt/paddle/pb/op_desc.cc
--- a/paddle/infrt/paddle/pb/op_desc.h
+++ b/paddle/infrt/paddle/pb/op_desc.h
--- a/paddle/infrt/paddle/pb/program_desc.cc
+++ b/paddle/infrt/paddle/pb/program_desc.cc
--- a/paddle/infrt/paddle/pb/program_desc.h
+++ b/paddle/infrt/paddle/pb/program_desc.h
--- a/paddle/infrt/paddle/pb/var_desc.cc
+++ b/paddle/infrt/paddle/pb/var_desc.cc
--- a/paddle/infrt/paddle/pb/var_desc.h
+++ b/paddle/infrt/paddle/pb/var_desc.h
--- a/paddle/infrt/paddle/scope.cc
+++ b/paddle/infrt/paddle/scope.cc
--- a/paddle/infrt/paddle/scope.h
+++ b/paddle/infrt/paddle/scope.h
--- a/paddle/infrt/paddle/tensor.cc
+++ b/paddle/infrt/paddle/tensor.cc
--- a/paddle/infrt/paddle/tensor.h
+++ b/paddle/infrt/paddle/tensor.h
--- a/paddle/infrt/support/CMakeLists.txt
+++ b/paddle/infrt/support/CMakeLists.txt
--- a/paddle/infrt/support/type_traits.h
+++ b/paddle/infrt/support/type_traits.h
--- a/paddle/infrt/support/variant.h
+++ b/paddle/infrt/support/variant.h
--- a/paddle/infrt/tensor/CMakeLists.txt
+++ b/paddle/infrt/tensor/CMakeLists.txt
--- a/paddle/infrt/tensor/dense_host_tensor.cc
+++ b/paddle/infrt/tensor/dense_host_tensor.cc
--- a/paddle/infrt/tensor/dense_host_tensor.h
+++ b/paddle/infrt/tensor/dense_host_tensor.h
--- a/paddle/infrt/tensor/dense_tensor_view.cc
+++ b/paddle/infrt/tensor/dense_tensor_view.cc
--- a/paddle/infrt/tensor/dense_tensor_view.h
+++ b/paddle/infrt/tensor/dense_tensor_view.h
--- a/paddle/infrt/tensor/phi/CMakeLists.txt
+++ b/paddle/infrt/tensor/phi/CMakeLists.txt
--- a/paddle/infrt/tensor/phi/tensor_map.cc
+++ b/paddle/infrt/tensor/phi/tensor_map.cc
--- a/paddle/infrt/tensor/phi/tensor_map.h
+++ b/paddle/infrt/tensor/phi/tensor_map.h
--- a/paddle/infrt/tensor/tensor_map.cc
+++ b/paddle/infrt/tensor/tensor_map.cc
--- a/paddle/infrt/tensor/tensor_map.h
+++ b/paddle/infrt/tensor/tensor_map.h
--- a/paddle/infrt/tensor/tensor_metadata.cc
+++ b/paddle/infrt/tensor/tensor_metadata.cc
--- a/paddle/infrt/tensor/tensor_metadata.h
+++ b/paddle/infrt/tensor/tensor_metadata.h
--- a/paddle/infrt/tensor/tensor_shape.cc
+++ b/paddle/infrt/tensor/tensor_shape.cc
--- a/paddle/infrt/tensor/tensor_shape.h
+++ b/paddle/infrt/tensor/tensor_shape.h
--- a/paddle/infrt/tests/models/efficientnet-b4/net/efficientnet.py
+++ b/paddle/infrt/tests/models/efficientnet-b4/net/efficientnet.py
--- a/paddle/infrt/tests/models/efficientnet-b4/net/utils.py
+++ b/paddle/infrt/tests/models/efficientnet-b4/net/utils.py
--- a/paddle/infrt/tests/models/linear.py
+++ b/paddle/infrt/tests/models/linear.py