refator paddle inference c api.test=develop (#32225)

18d3e2ca · winter-wang · GitHub · 9bf90922 · 18d3e2ca · 18d3e2ca
27 changed file
--- a/cmake/inference_lib.cmake
+++ b/cmake/inference_lib.cmake
@@ -211,11 +211,11 @@ set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid")
 if(WIN32)
  set(paddle_inference_c_lib $<TARGET_FILE_DIR:paddle_inference_c>/paddle_inference_c.*)
 else(WIN32)
-  set(paddle_inference_c_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/capi/libpaddle_inference_c.*)
+  set(paddle_inference_c_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/capi_exp/libpaddle_inference_c.*)
 endif(WIN32)

 copy(inference_lib_dist
-      SRCS  ${src_dir}/inference/capi/paddle_c_api.h  ${paddle_inference_c_lib}
+      SRCS  ${src_dir}/inference/capi_exp/pd_*.h  ${paddle_inference_c_lib}
      DSTS  ${PADDLE_INFERENCE_C_INSTALL_DIR}/paddle/include ${PADDLE_INFERENCE_C_INSTALL_DIR}/paddle/lib)

 # fluid library for both train and inference

--- a/go/demo/mobilenet_c_exp.cc
+++ b/go/demo/mobilenet_c_exp.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <pd_inference_api.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+void ReadData(float* data, int size);
+
+int main(int argc, char* argv[]) {
+  PD_Config* config = PD_ConfigCreate();
+  PD_ConfigSetModel(config, "data/model/__model__", "data/model/__params__");
+  PD_ConfigDisableGlogInfo(config);
+
+  PD_Predictor* predictor = PD_PredictorCreate(config);
+  // config has destroyed in PD_PredictorCreate
+  config = NULL;
+
+  int input_num = PD_PredictorGetInputNum(predictor);
+  printf("Input num: %d\n", input_num);
+  int output_num = PD_PredictorGetOutputNum(predictor);
+  printf("Output num: %d\n", output_num);
+
+  PD_OneDimArrayCstr* input_names = PD_PredictorGetInputNames(predictor);
+  PD_Tensor* input_tensor =
+      PD_PredictorGetInputHandle(predictor, input_names->data[0]);
+  PD_OneDimArrayCstrDestroy(input_names);
+  input_names = NULL;
+
+  int32_t shape[] = {1, 3, 300, 300};
+  float* data = (float*)malloc(sizeof(float) * 1 * 3 * 300 * 300);  // NOLINT
+  ReadData(data, 1 * 3 * 300 * 300);                                // NOLINT
+  PD_TensorReshape(input_tensor, 4, shape);
+  PD_TensorCopyFromCpuFloat(input_tensor, data);
+  free(data);
+  data = NULL;
+  PD_PredictorRun(predictor);
+
+  PD_OneDimArrayCstr* output_names = PD_PredictorGetOutputNames(predictor);
+  PD_Tensor* output_tensor =
+      PD_PredictorGetOutputHandle(predictor, output_names->data[0]);
+  PD_OneDimArrayCstrDestroy(output_names);
+  output_names = nullptr;
+
+  PD_OneDimArrayInt32* out_shape = PD_TensorGetShape(output_tensor);
+  int32_t size = 1;
+  for (size_t index = 0; index < out_shape->size; ++index) {
+    size = size * out_shape->data[index];
+  }
+  PD_OneDimArrayInt32Destroy(out_shape);
+  out_shape = NULL;
+
+  data = (float*)malloc(sizeof(float) * size);  // NOLINT
+  PD_TensorCopyToCpuFloat(output_tensor, data);
+  free(data);
+  data = NULL;
+
+  PD_TensorDestroy(output_tensor);
+  output_tensor = NULL;
+  PD_TensorDestroy(input_tensor);
+  input_tensor = NULL;
+  PD_PredictorDestroy(predictor);
+  predictor = NULL;
+
+  return 0;
+}
+
+void ReadData(float* data, int n) {
+  FILE* fp = fopen("data/data.txt", "r");
+  for (int i = 0; i < n; i++) {
+    fscanf(fp, "%f", &data[i]);
+  }
+  fclose(fp);
+}
--- a/paddle/fluid/inference/CMakeLists.txt
+++ b/paddle/fluid/inference/CMakeLists.txt
@@ -33,7 +33,7 @@ if (WITH_LITE)
  add_subdirectory(lite)
 endif()

-# fluid_modules exclude API-interface of inference/api and inference/capi
+# fluid_modules exclude API-interface of inference/api and inference/capi_exp
 get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)

 # Adapt to custom op mechanism: Include the header files related to the data type
@@ -61,7 +61,7 @@ if(NOT APPLE)
 endif()

 # C inference API
-add_subdirectory(capi)
+add_subdirectory(capi_exp)

 if(WITH_TESTING AND WITH_INFERENCE_API_TEST)
    add_subdirectory(tests/api)

--- a/paddle/fluid/inference/capi_exp/CMakeLists.txt
+++ b/paddle/fluid/inference/capi_exp/CMakeLists.txt
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+set(C_API_SRCS pd_config.cc pd_predictor.cc pd_tensor.cc pd_utils.cc)
+
+cc_library(paddle_inference_c SRCS ${C_API_SRCS} DEPS paddle_inference)
+
+if(NOT ON_INFER)
+    return()
+endif()
+
+# Create inference capi shared library
+cc_library(paddle_inference_c_shared SHARED SRCS ${C_API_SRCS} DEPS paddle_inference)
+set_target_properties(paddle_inference_c_shared PROPERTIES OUTPUT_NAME paddle_inference_c)
+if(WIN32)
+    target_link_libraries(paddle_inference_c_shared shlwapi.lib)
+endif()
--- a/paddle/fluid/inference/capi_exp/lod_demo.cc
+++ b/paddle/fluid/inference/capi_exp/lod_demo.cc
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+///
+/// \file lod_demo.cc
+///
+/// \brief a demo for user to learn how to inference by c api.
+///  it rectify from
+///  paddle/fluid/inference/tests/api/analyzer_capi_exp_ner_tester.cc.
+///
+/// \author paddle-infer@baidu.com
+/// \date 2021-04-21
+/// \since 2.1
+///
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string>
+#include <vector>
+#include "paddle/fluid/inference/capi_exp/pd_inference_api.h"
+
+int main(int argc, char *argv[]) {
+  auto model_dir = FLAGS_infer_model;
+  PD_Config *config = PD_ConfigCreate();
+  PD_ConfigSetModel(config, (model_dir + "/__model__").c_str(),
+                    (model_dir + "/param").c_str());
+  PD_ConfigDisableGpu(config);
+
+  PD_Predictor *predictor = PD_PredictorCreate(config);
+  size_t input_num = PD_PredictorGetInputNum(predictor);
+  size_t output_num = PD_PredictorGetOutputNum(predictor);
+
+  PD_OneDimArrayCstr *input_names = PD_PredictorGetInputNames(predictor);
+  LOG(INFO) << "Predictor start run!";
+  PD_Tensor *inputs[2];
+  inputs[0] = PD_PredictorGetInputHandle(predictor, input_names->data[0]);
+  inputs[1] = PD_PredictorGetInputHandle(predictor, input_names->data[1]);
+  LOG(INFO) << "Predictor start run!";
+  // inputs[0]: word, use lod memory in stack
+  int32_t shape_0[2] = {11, 1};
+  int64_t data_0[11 * 1] = {12673, 9763, 905, 284, 45, 7474, 20, 17, 1, 4, 9};
+  size_t lod_layer_0[2] = {0, 11};
+  PD_OneDimArraySize layer_0;
+  layer_0.size = 2;
+  layer_0.data = lod_layer_0;
+  PD_OneDimArraySize *layer_0_ptr = &layer_0;
+  PD_TwoDimArraySize lod_0;
+  lod_0.size = 1;
+  lod_0.data = &layer_0_ptr;
+  PD_TensorReshape(inputs[0], 2, shape_0);
+  PD_TensorCopyFromCpuInt64(inputs[0], data_0);
+  PD_TensorSetLod(inputs[0], &lod_0);
+
+  // inputs[1]: mention, use lod memory in heap
+  int32_t shape_1[2] = {11, 1};
+  int64_t data_1[11 * 1] = {27, 0, 0, 33, 34, 33, 0, 0, 0, 1, 2};
+  PD_TwoDimArraySize *lod_1_ptr = new PD_TwoDimArraySize();
+  lod_1_ptr->size = 1;
+  lod_1_ptr->data = new PD_OneDimArraySize *[1];
+  lod_1_ptr->data[0] = new PD_OneDimArraySize();
+  lod_1_ptr->data[0]->size = 2;
+  lod_1_ptr->data[0]->data = new size_t[2];
+  lod_1_ptr->data[0]->data[0] = 0;
+  lod_1_ptr->data[0]->data[1] = 11;
+
+  PD_TensorReshape(inputs[1], 2, shape_1);
+  PD_TensorCopyFromCpuInt64(inputs[1], data_1);
+  PD_TensorSetLod(inputs[1], lod_1_ptr);
+  // retrieve the lod memory
+  delete[] lod_1_ptr->data[0]->data;
+  delete lod_1_ptr->data[0];
+  delete[] lod_1_ptr->data;
+  delete lod_1_ptr;
+  lod_1_ptr = nullptr;
+
+  PD_PredictorRun(predictor);
+  PD_OneDimArrayCstr *output_names = PD_PredictorGetOutputNames(predictor);
+  PD_Tensor *output =
+      PD_PredictorGetOutputHandle(predictor, output_names->data[0]);
+  PD_TwoDimArraySize *output_lod = PD_TensorGetLod(output);
+
+  PD_TwoDimArraySizeDestroy(output_lod);
+  PD_TensorDestroy(output);
+  PD_OneDimArrayCstrDestroy(output_names);
+
+  PD_TensorDestroy(inputs[0]);
+  PD_TensorDestroy(inputs[1]);
+  PD_OneDimArrayCstrDestroy(input_names);
+  PD_PredictorDestroy(predictor);
+}
--- a/paddle/fluid/inference/capi_exp/pd_common.h
+++ b/paddle/fluid/inference/capi_exp/pd_common.h
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <stdint.h>
+#include <stdio.h>
+
+#if defined(_WIN32)
+#ifdef PADDLE_DLL_INFERENCE
+#define PADDLE_CAPI_EXPORT __declspec(dllexport)
+#else
+#define PADDLE_CAPI_EXPORT __declspec(dllimport)
+#endif  // PADDLE_DLL_INFERENCE
+#else
+#define PADDLE_CAPI_EXPORT __attribute__((visibility("default")))
+#endif  // _WIN32
+
+///
+/// __pd_give means that a new object is returned. The user should make sure
+/// that the returned pointer is used exactly once as a value for an __pd_take
+/// argument. In between, it can be used as a value for as many __pd_keep
+/// arguments as the user likes.
+///
+#ifndef __pd_give
+#define __pd_give
+#endif
+///
+/// __pd_take means that the object the argument points to is taken over by the
+/// function and may no longer be used by the user as an argument to any other
+/// function. The pointer value must be one returned by a function returning an
+/// __pd_give pointer.
+///
+#ifndef __pd_take
+#define __pd_take
+#endif
+///
+/// __pd_keep means that the function will only use the object temporarily. The
+/// object which the argument points to is not taken over by the function. After
+/// the function has finished, the user can still use it as an argument to other
+/// functions.
+///
+#ifndef __pd_keep
+#define __pd_keep
+#endif
+
+typedef int8_t PD_Bool;
+#define TRUE 1
+#define FALSE 0
+
+#define PD_ENUM(type)   \
+  typedef int32_t type; \
+  enum
+
+PD_ENUM(PD_PrecisionType){PD_PRECISION_FLOAT32 = 0, PD_PRECISION_INT8,
+                          PD_PRECISION_HALF};
+
+PD_ENUM(PD_PlaceType){PD_PLACE_UNK = -1, PD_PLACE_CPU, PD_PLACE_GPU,
+                      PD_PLACE_XPU};
+
+PD_ENUM(PD_DataType){
+    PD_DATA_UNK = -1, PD_DATA_FLOAT32, PD_DATA_INT32,
+    PD_DATA_INT64,    PD_DATA_UINT8,
+};
--- a/paddle/fluid/inference/capi_exp/pd_config.cc
+++ b/paddle/fluid/inference/capi_exp/pd_config.cc
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/inference/capi_exp/pd_config.h"
+#include "paddle/fluid/inference/api/paddle_inference_api.h"
+#include "paddle/fluid/platform/enforce.h"
+
+#define CHECK_NULL_POINTER_PARM(param)                  \
+  PADDLE_ENFORCE_NOT_NULL(                              \
+      param, paddle::platform::errors::InvalidArgument( \
+                 "The pointer of " #param " shouldn't be nullptr"))
+
+#define CHECK_AND_CONVERT_PD_CONFIG                                         \
+  PADDLE_ENFORCE_NOT_NULL(                                                  \
+      pd_config, paddle::platform::errors::InvalidArgument(                 \
+                     "The pointer of paddle config shouldn't be nullptr")); \
+  Config* config = reinterpret_cast<Config*>(pd_config)
+
+using paddle_infer::Config;
+
+static Config::Precision ConvertToCxxPrecisionType(PD_PrecisionType precision) {
+  switch (precision) {
+    case PD_PRECISION_FLOAT32:
+      return Config::Precision::kFloat32;
+    case PD_PRECISION_INT8:
+      return Config::Precision::kInt8;
+    case PD_PRECISION_HALF:
+      return Config::Precision::kHalf;
+    default:
+      PADDLE_THROW(paddle::platform::errors::InvalidArgument(
+          "Unsupport paddle precision type %d.", precision));
+      return Config::Precision::kFloat32;
+  }
+}
+
+extern "C" {
+__pd_give PD_Config* PD_ConfigCreate() {
+  return reinterpret_cast<PD_Config*>(new Config());
+}
+
+void PD_ConfigDestroy(__pd_take PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  delete reinterpret_cast<Config*>(config);
+}
+
+void PD_ConfigSetModel(__pd_keep PD_Config* pd_config,
+                       const char* prog_file_path,
+                       const char* params_file_path) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  CHECK_NULL_POINTER_PARM(prog_file_path);
+  CHECK_NULL_POINTER_PARM(params_file_path);
+  config->SetModel(prog_file_path, params_file_path);
+}
+void PD_ConfigSetProgFile(__pd_keep PD_Config* pd_config,
+                          const char* prog_file_path) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  CHECK_NULL_POINTER_PARM(prog_file_path);
+  config->SetProgFile(prog_file_path);
+}
+void PD_ConfigSetParamsFile(__pd_keep PD_Config* pd_config,
+                            const char* params_file_path) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  CHECK_NULL_POINTER_PARM(params_file_path);
+  config->SetParamsFile(params_file_path);
+}
+void PD_ConfigSetOptimCacheDir(__pd_keep PD_Config* pd_config,
+                               const char* opt_cache_dir) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  CHECK_NULL_POINTER_PARM(opt_cache_dir);
+  config->SetOptimCacheDir(opt_cache_dir);
+}
+
+void PD_ConfigSetModelDir(__pd_keep PD_Config* pd_config,
+                          const char* model_dir) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  CHECK_NULL_POINTER_PARM(model_dir);
+  config->SetModel(model_dir);
+}
+const char* PD_ConfigGetModelDir(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  return config->model_dir().c_str();
+}
+const char* PD_ConfigGetProgFile(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  return config->prog_file().c_str();
+}
+const char* PD_ConfigGetParamsFile(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  return config->params_file().c_str();
+}
+
+void PD_ConfigDisableFCPadding(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  config->DisableFCPadding();
+}
+PD_Bool PD_ConfigUseFcPadding(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  return config->use_fc_padding();
+}
+
+void PD_ConfigEnableUseGpu(__pd_keep PD_Config* pd_config,
+                           uint64_t memory_pool_init_size_mb,
+                           int32_t device_id) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  config->EnableUseGpu(memory_pool_init_size_mb, device_id);
+}
+void PD_ConfigDisableGpu(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  config->DisableGpu();
+}
+PD_Bool PD_ConfigUseGpu(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  return config->use_gpu();
+}
+
+void PD_ConfigEnableXpu(__pd_keep PD_Config* pd_config,
+                        int32_t l3_workspace_size) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  config->EnableXpu(l3_workspace_size);
+}
+PD_Bool PD_ConfigUseXpu(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  return config->use_xpu();
+}
+
+int32_t PD_ConfigGpuDeviceId(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  return config->gpu_device_id();
+}
+int32_t PD_ConfigXpuDeviceId(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  return config->xpu_device_id();
+}
+int32_t PD_ConfigMemoryPoolInitSizeMb(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  return config->memory_pool_init_size_mb();
+}
+float PD_ConfigFractionOfGpuMemoryForPool(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  return config->fraction_of_gpu_memory_for_pool();
+}
+void PD_ConfigEnableCudnn(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  config->EnableCUDNN();
+}
+PD_Bool PD_ConfigCudnnEnabled(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  return config->cudnn_enabled();
+}
+
+void PD_ConfigSwitchIrOptim(__pd_keep PD_Config* pd_config, PD_Bool x) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  config->SwitchIrOptim(x);
+}
+PD_Bool PD_ConfigIrOptim(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  return config->ir_optim();
+}
+
+void PD_ConfigEnableTensorRtEngine(__pd_keep PD_Config* pd_config,
+                                   int32_t workspace_size,
+                                   int32_t max_batch_size,
+                                   int32_t min_subgraph_size,
+                                   PD_PrecisionType precision,
+                                   PD_Bool use_static, PD_Bool use_calib_mode) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  config->EnableTensorRtEngine(
+      workspace_size, max_batch_size, min_subgraph_size,
+      ConvertToCxxPrecisionType(precision), use_static, use_calib_mode);
+}
+PD_Bool PD_ConfigTensorRtEngineEnabled(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  return config->tensorrt_engine_enabled();
+}
+
+void PD_ConfigSetTrtDynamicShapeInfo(__pd_keep PD_Config* pd_config,
+                                     size_t tensor_num,
+                                     const char** tensor_name,
+                                     size_t* shapes_num, int32_t** min_shape,
+                                     int32_t** max_shape, int32_t** optim_shape,
+                                     PD_Bool disable_trt_plugin_fp16) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  std::map<std::string, std::vector<int>> min_input_shapes;
+  std::map<std::string, std::vector<int>> max_input_shapes;
+  std::map<std::string, std::vector<int>> optim_input_shapes;
+  for (size_t tensor_index = 0; tensor_index < tensor_num; ++tensor_index) {
+    std::string name(tensor_name[tensor_index]);
+    std::vector<int> min_input_shape, max_input_shape, optim_input_shape;
+    for (size_t shape_index = 0; shape_index < shapes_num[tensor_index];
+         ++shape_index) {
+      min_input_shape.emplace_back(min_shape[tensor_index][shape_index]);
+      max_input_shape.emplace_back(max_shape[tensor_index][shape_index]);
+      optim_input_shape.emplace_back(optim_shape[tensor_index][shape_index]);
+    }
+    min_input_shapes[name] = std::move(min_input_shape);
+    max_input_shapes[name] = std::move(max_input_shape);
+    optim_input_shapes[name] = std::move(optim_input_shape);
+  }
+  config->SetTRTDynamicShapeInfo(min_input_shapes, max_input_shapes,
+                                 optim_input_shapes, disable_trt_plugin_fp16);
+}
+
+void PD_ConfigDisableTensorRtOPs(__pd_keep PD_Config* pd_config, size_t ops_num,
+                                 const char** ops_name) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  std::vector<std::string> ops_list;
+  for (size_t index = 0; index < ops_num; ++index) {
+    ops_list.emplace_back(ops_name[index]);
+  }
+  config->Exp_DisableTensorRtOPs(ops_list);
+}
+
+void PD_ConfigEnableTensorRtOSS(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  config->EnableTensorRtOSS();
+}
+PD_Bool PD_ConfigTensorRtOssEnabled(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  return config->tensorrt_oss_enabled();
+}
+
+void PD_ConfigEnableTensorRtDla(__pd_keep PD_Config* pd_config,
+                                int32_t dla_core) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  config->EnableTensorRtDLA(dla_core);
+}
+PD_Bool PD_ConfigTensorRtDlaEnabled(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  return config->tensorrt_dla_enabled();
+}
+
+void PD_ConfigEnableLiteEngine(__pd_keep PD_Config* pd_config,
+                               PD_PrecisionType precision, PD_Bool zero_copy,
+                               size_t passes_filter_num,
+                               const char** passes_filter,
+                               size_t ops_filter_num, const char** ops_filter) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  std::vector<std::string> passes_filters, ops_filters;
+  for (size_t index = 0; index < passes_filter_num; ++index) {
+    passes_filters.emplace_back(passes_filter[index]);
+  }
+  for (size_t index = 0; index < ops_filter_num; ++index) {
+    ops_filters.emplace_back(ops_filter[index]);
+  }
+  config->EnableLiteEngine(ConvertToCxxPrecisionType(precision), zero_copy,
+                           passes_filters, ops_filters);
+}
+PD_Bool PD_ConfigLiteEngineEnabled(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  return config->lite_engine_enabled();
+}
+
+void PD_ConfigSwitchIrDebug(__pd_keep PD_Config* pd_config, PD_Bool x) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  config->SwitchIrDebug(x);
+}
+void PD_ConfigEnableMKLDNN(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  config->EnableMKLDNN();
+}
+void PD_ConfigSetMkldnnCacheCapacity(__pd_keep PD_Config* pd_config,
+                                     int32_t capacity) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  config->SetMkldnnCacheCapacity(capacity);
+}
+PD_Bool PD_ConfigMkldnnEnabled(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  return config->mkldnn_enabled();
+}
+void PD_ConfigSetCpuMathLibraryNumThreads(
+    __pd_keep PD_Config* pd_config, int32_t cpu_math_library_num_threads) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  config->SetCpuMathLibraryNumThreads(cpu_math_library_num_threads);
+}
+int32_t PD_ConfigGetCpuMathLibraryNumThreads(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  return config->cpu_math_library_num_threads();
+}
+
+void PD_ConfigSetMkldnnOp(__pd_keep PD_Config* pd_config, size_t ops_num,
+                          const char** op_list) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  std::unordered_set<std::string> op_names;
+  for (size_t index = 0; index < ops_num; ++index) {
+    op_names.emplace(op_list[index]);
+  }
+  config->SetMKLDNNOp(std::move(op_names));
+}
+void PD_ConfigEnableMkldnnQuantizer(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  config->EnableMkldnnQuantizer();
+}
+void PD_ConfigEnableMkldnnBfloat16(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  config->EnableMkldnnBfloat16();
+}
+PD_Bool PD_ConfigMkldnnBfloat16Enabled(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  return config->mkldnn_bfloat16_enabled();
+}
+void PD_ConfigSetBfloat16Op(__pd_keep PD_Config* pd_config, size_t ops_num,
+                            const char** op_list) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  std::unordered_set<std::string> op_names;
+  for (size_t index = 0; index < ops_num; ++index) {
+    op_names.emplace(op_list[index]);
+  }
+  config->SetBfloat16Op(std::move(op_names));
+}
+PD_Bool PD_ConfigThreadLocalStreamEnabled(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  return config->thread_local_stream_enabled();
+}
+PD_Bool PD_ConfigMkldnnQuantizerEnabled(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  return config->mkldnn_quantizer_enabled();
+}
+void PD_ConfigSetModelBuffer(__pd_keep PD_Config* pd_config,
+                             const char* prog_buffer, size_t prog_buffer_size,
+                             const char* params_buffer,
+                             size_t params_buffer_size) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  config->SetModelBuffer(prog_buffer, prog_buffer_size, params_buffer,
+                         params_buffer_size);
+}
+PD_Bool PD_ConfigModelFromMemory(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  return config->model_from_memory();
+}
+void PD_ConfigEnableMemoryOptim(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  config->EnableMemoryOptim();
+}
+PD_Bool PD_ConfigMemoryOptimEnabled(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  return config->enable_memory_optim();
+}
+void PD_ConfigEnableProfile(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  config->EnableProfile();
+}
+PD_Bool PD_ConfigProfileEnabled(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  return config->profile_enabled();
+}
+void PD_ConfigDisableGlogInfo(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  config->DisableGlogInfo();
+}
+PD_Bool PD_ConfigGlogInfoDisabled(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  return config->glog_info_disabled();
+}
+void PD_ConfigSetInvalid(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  config->SetInValid();
+}
+PD_Bool PD_ConfigIsValid(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  return config->is_valid();
+}
+void PD_ConfigEnableGpuMultiStream(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  config->EnableGpuMultiStream();
+}
+void PD_ConfigPartiallyRelease(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  config->PartiallyRelease();
+}
+
+}  // extern "C"
--- a/paddle/fluid/inference/capi_exp/pd_config.h
+++ b/paddle/fluid/inference/capi_exp/pd_config.h
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+///
+/// \file pd_config.h
+///
+/// \brief interface for paddle config
+///
+/// \author paddle-infer@baidu.com
+/// \date 2021-04-21
+/// \since 2.1
+///
+
+#pragma once
+
+#include "pd_common.h"  // NOLINT
+
+typedef struct PD_Config PD_Config;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+///
+/// \brief Create a paddle config
+///
+/// \return new config.
+///
+PADDLE_CAPI_EXPORT extern __pd_give PD_Config* PD_ConfigCreate();
+///
+/// \brief Destroy the paddle config
+///
+/// \param[in] pd_onfig config
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigDestroy(__pd_take PD_Config* pd_config);
+///
+/// \brief Set the combined model with two specific pathes for program and
+/// parameters.
+///
+/// \param[in] pd_onfig config
+/// \param[in] prog_file_path model file path of the combined model.
+/// \param[in] params_file_path params file path of the combined model.
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigSetModel(__pd_keep PD_Config* pd_config,
+                                                 const char* prog_file_path,
+                                                 const char* params_file_path);
+///
+/// \brief Set the model file path of a combined model.
+///
+/// \param[in] pd_onfig config
+/// \param[in] prog_file_path model file path.
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigSetProgFile(
+    __pd_keep PD_Config* pd_config, const char* prog_file_path);
+///
+/// \brief Set the params file path of a combined model.
+///
+/// \param[in] pd_onfig config
+/// \param[in] params_file_path params file path.
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigSetParamsFile(
+    __pd_keep PD_Config* pd_config, const char* params_file_path);
+///
+/// \brief Set the path of optimization cache directory.
+/// \param[in] pd_onfig config
+/// \param[in] opt_cache_dir the path of optimization cache directory.
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigSetOptimCacheDir(
+    __pd_keep PD_Config* pd_config, const char* opt_cache_dir);
+///
+/// \brief Set the no-combined model dir path.
+/// \param[in] pd_onfig config
+/// \param[in] model_dir model dir path.
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigSetModelDir(
+    __pd_keep PD_Config* pd_config, const char* model_dir);
+///
+/// \brief Get the model directory path.
+///
+/// \param[in] pd_onfig config
+/// \return The model directory path.
+///
+PADDLE_CAPI_EXPORT extern const char* PD_ConfigGetModelDir(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief Get the program file path.
+///
+/// \param[in] pd_onfig config
+/// \return The program file path.
+///
+PADDLE_CAPI_EXPORT extern const char* PD_ConfigGetProgFile(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief Get the params file path.
+///
+/// \param[in] pd_onfig config
+/// \return The params file path.
+///
+PADDLE_CAPI_EXPORT extern const char* PD_ConfigGetParamsFile(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief Turn off FC Padding.
+///
+/// \param[in] pd_onfig config
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigDisableFCPadding(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief A boolean state telling whether fc padding is used.
+///
+/// \param[in] pd_onfig config
+/// \return Whether fc padding is used.
+///
+PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigUseFcPadding(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief Turn on GPU.
+///
+/// \param[in] pd_onfig config
+/// \param[in] memory_pool_init_size_mb initial size of the GPU memory pool in
+/// MB.
+/// \param[in] device_id device_id the GPU card to use.
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigEnableUseGpu(
+    __pd_keep PD_Config* pd_config, uint64_t memory_pool_init_size_mb,
+    int32_t device_id);
+///
+/// \brief Turn off GPU.
+///
+/// \param[in] pd_onfig config
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigDisableGpu(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief A boolean state telling whether the GPU is turned on.
+///
+/// \brief Turn off GPU.
+/// \return Whether the GPU is turned on.
+///
+PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigUseGpu(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief Turn on XPU.
+///
+/// \param[in] pd_onfig config
+/// \param[in] l3_workspace_size l3 workspace size.
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigEnableXpu(
+    __pd_keep PD_Config* pd_config, int32_t l3_workspace_size);
+///
+/// \brief A boolean state telling whether the XPU is turned on.
+///
+/// \param[in] pd_onfig config
+/// \return Whether the XPU is turned on.
+///
+PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigUseXpu(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief Get the GPU device id.
+///
+/// \param[in] pd_onfig config
+/// \return The GPU device id.
+///
+PADDLE_CAPI_EXPORT extern int32_t PD_ConfigGpuDeviceId(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief Get the XPU device id.
+///
+/// \param[in] pd_onfig config
+/// \return The XPU device id.
+///
+PADDLE_CAPI_EXPORT extern int32_t PD_ConfigXpuDeviceId(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief Get the initial size in MB of the GPU memory pool.
+///
+/// \param[in] pd_onfig config
+/// \return The initial size in MB of the GPU memory pool.
+///
+PADDLE_CAPI_EXPORT extern int32_t PD_ConfigMemoryPoolInitSizeMb(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief Get the proportion of the initial memory pool size compared to the
+/// device.
+///
+/// \param[in] pd_onfig config
+/// \return The proportion of the initial memory pool size.
+///
+PADDLE_CAPI_EXPORT extern float PD_ConfigFractionOfGpuMemoryForPool(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief Turn on CUDNN.
+///
+/// \param[in] pd_onfig config
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigEnableCudnn(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief A boolean state telling whether to use CUDNN.
+///
+/// \param[in] pd_onfig config
+/// \return Whether to use CUDNN.
+///
+PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigCudnnEnabled(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief Control whether to perform IR graph optimization.
+/// If turned off, the AnalysisConfig will act just like a NativeConfig.
+///
+/// \param[in] pd_onfig config
+/// \param[in] x Whether the ir graph optimization is actived.
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigSwitchIrOptim(
+    __pd_keep PD_Config* pd_config, PD_Bool x);
+///
+/// \brief A boolean state telling whether the ir graph optimization is
+/// actived.
+///
+/// \param[in] pd_onfig config
+/// \return Whether to use ir graph optimization.
+///
+PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigIrOptim(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief Turn on the TensorRT engine.
+/// The TensorRT engine will accelerate some subgraphes in the original Fluid
+/// computation graph. In some models such as resnet50, GoogleNet and so on,
+/// it gains significant performance acceleration.
+///
+/// \param[in] pd_onfig config
+/// \param[in] workspace_size The memory size(in byte) used for TensorRT
+/// workspace.
+/// \param[in] max_batch_size The maximum batch size of this prediction task,
+/// better set as small as possible for less performance loss.
+/// \param[in] min_subgrpah_size The minimum TensorRT subgraph size needed, if a
+/// subgraph is smaller than this, it will not be transferred to TensorRT
+/// engine.
+/// \param[in] precision The precision used in TensorRT.
+/// \param[in] use_static Serialize optimization information to disk for
+/// reusing.
+/// \param[in] use_calib_mode Use TRT int8 calibration(post training
+/// quantization).
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigEnableTensorRtEngine(
+    __pd_keep PD_Config* pd_config, int32_t workspace_size,
+    int32_t max_batch_size, int32_t min_subgraph_size,
+    PD_PrecisionType precision, PD_Bool use_static, PD_Bool use_calib_mode);
+///
+/// \brief A boolean state telling whether the TensorRT engine is used.
+///
+/// \param[in] pd_onfig config
+/// \return Whether the TensorRT engine is used.
+///
+PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigTensorRtEngineEnabled(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief Set min, max, opt shape for TensorRT Dynamic shape mode.
+///
+/// \param[in] pd_onfig config
+/// \param[in] tensor_num The number of the subgraph input.
+/// \param[in] tensor_name The name of every subgraph input.
+/// \param[in] shapes_num The shape size of every subgraph input.
+/// \param[in] min_shape The min input shape of every subgraph input.
+/// \param[in] max_shape The max input shape of every subgraph input.
+/// \param[in] optim_shape The opt input shape of every subgraph input.
+/// \param[in] disable_trt_plugin_fp16 Setting this parameter to true means that
+/// TRT plugin will not run fp16.
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigSetTrtDynamicShapeInfo(
+    __pd_keep PD_Config* pd_config, size_t tensor_num, const char** tensor_name,
+    size_t* shapes_num, int32_t** min_shape, int32_t** max_shape,
+    int32_t** optim_shape, PD_Bool disable_trt_plugin_fp16);
+///
+/// \brief Prevent ops running in Paddle-TRT
+/// NOTE: just experimental, not an official stable API, easy to be broken.
+///
+/// \param[in] pd_onfig config
+/// \param[in] ops_num ops number
+/// \param[in] ops_name ops name
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigDisableTensorRtOPs(
+    __pd_keep PD_Config* pd_config, size_t ops_num, const char** ops_name);
+///
+/// \brief Replace some TensorRT plugins to TensorRT OSS(
+/// https://github.com/NVIDIA/TensorRT), with which some models's inference
+/// may be more high-performance. Libnvinfer_plugin.so greater than
+/// V7.2.1 is needed.
+///
+/// \param[in] pd_onfig config
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigEnableTensorRtOSS(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief A boolean state telling whether to use the TensorRT OSS.
+///
+/// \param[in] pd_onfig config
+/// \return Whether to use the TensorRT OSS.
+///
+PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigTensorRtOssEnabled(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief Enable TensorRT DLA
+///
+/// \param[in] pd_onfig config
+/// \param[in] dla_core ID of DLACore, which should be 0, 1,
+///        ..., IBuilder.getNbDLACores() - 1
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigEnableTensorRtDla(
+    __pd_keep PD_Config* pd_config, int32_t dla_core);
+///
+/// \brief A boolean state telling whether to use the TensorRT DLA.
+///
+/// \param[in] pd_onfig config
+/// \return Whether to use the TensorRT DLA.
+///
+PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigTensorRtDlaEnabled(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief Turn on the usage of Lite sub-graph engine.
+///
+/// \param[in] pd_onfig config
+/// \param[in] precision Precion used in Lite sub-graph engine.
+/// \param[in] zero_copy whether use zero copy.
+/// \param[in] passes_filter_num The number of passes used in Lite sub-graph
+/// engine.
+/// \param[in] passes_filter The name of passes used in Lite sub-graph engine.
+/// \param[in] ops_filter_num The number of operators not supported by Lite.
+/// \param[in] ops_filter The name of operators not supported by Lite.
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigEnableLiteEngine(
+    __pd_keep PD_Config* pd_config, PD_PrecisionType precision,
+    PD_Bool zero_copy, size_t passes_filter_num, const char** passes_filter,
+    size_t ops_filter_num, const char** ops_filter);
+///
+/// \brief A boolean state indicating whether the Lite sub-graph engine is
+/// used.
+///
+/// \param[in] pd_onfig config
+/// \return Whether the Lite sub-graph engine is used.
+///
+PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigLiteEngineEnabled(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief Control whether to debug IR graph analysis phase.
+/// This will generate DOT files for visualizing the computation graph after
+/// each analysis pass applied.
+///
+/// \param[in] pd_onfig config
+/// \param[in] x whether to debug IR graph analysis phase.
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigSwitchIrDebug(
+    __pd_keep PD_Config* pd_config, PD_Bool x);
+///
+/// \brief Turn on MKLDNN.
+///
+/// \param[in] pd_onfig config
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigEnableMKLDNN(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief Set the cache capacity of different input shapes for MKLDNN.
+/// Default value 0 means not caching any shape.
+/// Please see MKL-DNN Data Caching Design Document:
+/// https://github.com/PaddlePaddle/FluidDoc/blob/develop/doc/fluid/design/mkldnn/caching/caching.md
+///
+/// \param[in] pd_onfig config
+/// \param[in] capacity The cache capacity.
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigSetMkldnnCacheCapacity(
+    __pd_keep PD_Config* pd_config, int32_t capacity);
+///
+/// \brief A boolean state telling whether to use the MKLDNN.
+///
+/// \param[in] pd_onfig config
+/// \return Whether to use the MKLDNN.
+///
+PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigMkldnnEnabled(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief Set the number of cpu math library threads.
+///
+/// \param[in] pd_onfig config
+/// \param cpu_math_library_num_threads The number of cpu math library
+/// threads.
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigSetCpuMathLibraryNumThreads(
+    __pd_keep PD_Config* pd_config, int32_t cpu_math_library_num_threads);
+///
+/// \brief An int state telling how many threads are used in the CPU math
+/// library.
+///
+/// \param[in] pd_onfig config
+/// \return The number of threads used in the CPU math library.
+///
+PADDLE_CAPI_EXPORT extern int32_t PD_ConfigGetCpuMathLibraryNumThreads(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief Specify the operator type list to use MKLDNN acceleration.
+///
+/// \param[in] pd_onfig config
+/// \param[in] ops_num The number of operator type list.
+/// \param[in] op_list The name of operator type list.
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigSetMkldnnOp(
+    __pd_keep PD_Config* pd_config, size_t ops_num, const char** op_list);
+///
+/// \brief Turn on MKLDNN quantization.
+///
+/// \param[in] pd_onfig config
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigEnableMkldnnQuantizer(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief A boolean state telling whether the MKLDNN quantization is enabled.
+///
+/// \param[in] pd_onfig config
+/// \return Whether the MKLDNN quantization is enabled.
+///
+PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigMkldnnQuantizerEnabled(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief Turn on MKLDNN bfloat16.
+///
+/// \param[in] pd_onfig config
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigEnableMkldnnBfloat16(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief A boolean state telling whether to use the MKLDNN Bfloat16.
+///
+/// \param[in] pd_onfig config
+/// \return Whether to use the MKLDNN Bfloat16.
+///
+PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigMkldnnBfloat16Enabled(
+    __pd_keep PD_Config* pd_config);
+/// \brief Specify the operator type list to use Bfloat16 acceleration.
+///
+/// \param[in] pd_onfig config
+/// \param[in] ops_num The number of operator type list.
+/// \param[in] op_list The name of operator type list.
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigSetBfloat16Op(
+    __pd_keep PD_Config* pd_config, size_t ops_num, const char** op_list);
+///
+/// \brief Enable the GPU multi-computing stream feature.
+/// NOTE: The current behavior of this interface is to bind the computation
+/// stream to the thread, and this behavior may be changed in the future.
+///
+/// \param[in] pd_onfig config
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigEnableGpuMultiStream(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief A boolean state telling whether the thread local CUDA stream is
+/// enabled.
+///
+/// \param[in] pd_onfig config
+/// \return Whether the thread local CUDA stream is enabled.
+///
+PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigThreadLocalStreamEnabled(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief Specify the memory buffer of program and parameter.
+/// Used when model and params are loaded directly from memory.
+///
+/// \param[in] pd_onfig config
+/// \param[in] prog_buffer The memory buffer of program.
+/// \param[in] prog_buffer_size The size of the model data.
+/// \param[in] params_buffer The memory buffer of the combined parameters file.
+/// \param[in] params_buffer_size The size of the combined parameters data.
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigSetModelBuffer(
+    __pd_keep PD_Config* pd_config, const char* prog_buffer,
+    size_t prog_buffer_size, const char* params_buffer,
+    size_t params_buffer_size);
+///
+/// \brief A boolean state telling whether the model is set from the CPU
+/// memory.
+///
+/// \param[in] pd_onfig config
+/// \return Whether model and params are loaded directly from memory.
+///
+PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigModelFromMemory(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief Turn on memory optimize
+/// NOTE still in development.
+///
+/// \param[in] pd_onfig config
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigEnableMemoryOptim(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief A boolean state telling whether the memory optimization is
+/// activated.
+///
+/// \param[in] pd_onfig config
+/// \return Whether the memory optimization is activated.
+///
+PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigMemoryOptimEnabled(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief Turn on profiling report.
+/// If not turned on, no profiling report will be generated.
+///
+/// \param[in] pd_onfig config
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigEnableProfile(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief A boolean state telling whether the profiler is activated.
+///
+/// \param[in] pd_onfig config
+/// \return bool Whether the profiler is activated.
+///
+PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigProfileEnabled(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief Mute all logs in Paddle inference.
+///
+/// \param[in] pd_onfig config
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigDisableGlogInfo(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief A boolean state telling whether logs in Paddle inference are muted.
+///
+/// \param[in] pd_onfig config
+/// \return Whether logs in Paddle inference are muted.
+///
+PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigGlogInfoDisabled(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief Set the Config to be invalid.
+/// This is to ensure that an Config can only be used in one
+/// Predictor.
+///
+/// \param[in] pd_onfig config
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigSetInvalid(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief A boolean state telling whether the Config is valid.
+///
+/// \param[in] pd_onfig config
+/// \return Whether the Config is valid.
+///
+PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigIsValid(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief Partially release the memory
+///
+/// \param[in] pd_onfig config
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigPartiallyRelease(
+    __pd_keep PD_Config* pd_config);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
--- a/paddle/fluid/inference/capi_exp/pd_inference_api.h
+++ b/paddle/fluid/inference/capi_exp/pd_inference_api.h
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "pd_common.h"     // NOLINT
+#include "pd_config.h"     // NOLINT
+#include "pd_predictor.h"  // NOLINT
+#include "pd_tensor.h"     // NOLINT
+#include "pd_types.h"      // NOLINT
+#include "pd_utils.h"      // NOLINT
--- a/paddle/fluid/inference/capi_exp/pd_predictor.cc
+++ b/paddle/fluid/inference/capi_exp/pd_predictor.cc
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/inference/capi_exp/pd_predictor.h"
+#include "paddle/fluid/inference/api/paddle_inference_api.h"
+#include "paddle/fluid/inference/capi_exp/pd_types.h"
+#include "paddle/fluid/inference/capi_exp/pd_utils.h"
+#include "paddle/fluid/inference/capi_exp/types_internal.h"
+#include "paddle/fluid/inference/capi_exp/utils_internal.h"
+#include "paddle/fluid/platform/enforce.h"
+
+#define CHECK_AND_CONVERT_PD_PREDICTOR                              \
+  PADDLE_ENFORCE_NOT_NULL(                                          \
+      pd_predictor,                                                 \
+      paddle::platform::errors::InvalidArgument(                    \
+          "The pointer of paddle predictor shouldn't be nullptr")); \
+  auto& predictor = pd_predictor->predictor
+
+extern "C" {
+__pd_give PD_Predictor* PD_PredictorCreate(__pd_take PD_Config* pd_config) {
+  PADDLE_ENFORCE_NOT_NULL(
+      pd_config, paddle::platform::errors::InvalidArgument(
+                     "The pointer of paddle predictor shouldn't be nullptr"));
+  PD_Predictor* pd_predictor = new PD_Predictor();
+  paddle_infer::Config* config =
+      reinterpret_cast<paddle_infer::Config*>(pd_config);
+  pd_predictor->predictor = paddle_infer::CreatePredictor(*config);
+  delete config;
+  return pd_predictor;
+}
+
+__pd_give PD_Predictor* PD_PredictorClone(
+    __pd_keep PD_Predictor* pd_predictor) {
+  CHECK_AND_CONVERT_PD_PREDICTOR;
+  PD_Predictor* new_predictor = new PD_Predictor();
+  new_predictor->predictor = predictor->Clone();
+  return new_predictor;
+}
+
+__pd_give PD_OneDimArrayCstr* PD_PredictorGetInputNames(
+    __pd_keep PD_Predictor* pd_predictor) {
+  CHECK_AND_CONVERT_PD_PREDICTOR;
+  std::vector<std::string> names = predictor->GetInputNames();
+  return paddle_infer::CvtVecToOneDimArrayCstr(names);
+}
+
+__pd_give PD_OneDimArrayCstr* PD_PredictorGetOutputNames(
+    __pd_keep PD_Predictor* pd_predictor) {
+  CHECK_AND_CONVERT_PD_PREDICTOR;
+  std::vector<std::string> names = predictor->GetOutputNames();
+  return paddle_infer::CvtVecToOneDimArrayCstr(names);
+}
+
+size_t PD_PredictorGetInputNum(__pd_keep PD_Predictor* pd_predictor) {
+  CHECK_AND_CONVERT_PD_PREDICTOR;
+  return predictor->GetInputNames().size();
+}
+
+size_t PD_PredictorGetOutputNum(__pd_keep PD_Predictor* pd_predictor) {
+  CHECK_AND_CONVERT_PD_PREDICTOR;
+  return predictor->GetOutputNames().size();
+}
+__pd_give PD_Tensor* PD_PredictorGetInputHandle(
+    __pd_keep PD_Predictor* pd_predictor, const char* name) {
+  CHECK_AND_CONVERT_PD_PREDICTOR;
+  PD_Tensor* pd_tensor = new PD_Tensor();
+  pd_tensor->tensor = predictor->GetInputHandle(name);
+  return pd_tensor;
+}
+
+__pd_give PD_Tensor* PD_PredictorGetOutputHandle(
+    __pd_keep PD_Predictor* pd_predictor, const char* name) {
+  CHECK_AND_CONVERT_PD_PREDICTOR;
+  PD_Tensor* pd_tensor = new PD_Tensor();
+  pd_tensor->tensor = predictor->GetOutputHandle(name);
+  return pd_tensor;
+}
+
+PD_Bool PD_PredictorRun(__pd_keep PD_Predictor* pd_predictor) {
+  CHECK_AND_CONVERT_PD_PREDICTOR;
+  return predictor->Run();
+}
+
+void PD_PredictorClearIntermediateTensor(__pd_keep PD_Predictor* pd_predictor) {
+  CHECK_AND_CONVERT_PD_PREDICTOR;
+  predictor->ClearIntermediateTensor();
+}
+
+uint64_t PD_PredictorTryShrinkMemory(__pd_keep PD_Predictor* pd_predictor) {
+  CHECK_AND_CONVERT_PD_PREDICTOR;
+  return predictor->TryShrinkMemory();
+}
+
+void PD_PredictorDestroy(__pd_take PD_Predictor* pd_predictor) {
+  delete pd_predictor;
+}
+
+}  // extern "C"
--- a/paddle/fluid/inference/capi_exp/pd_predictor.h
+++ b/paddle/fluid/inference/capi_exp/pd_predictor.h
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+///
+/// \file pd_predictor.h
+///
+/// \brief interface for paddle predictor
+///
+/// \author paddle-infer@baidu.com
+/// \date 2021-04-21
+/// \since 2.1
+///
+
+#pragma once
+
+#include "pd_common.h"  // NOLINT
+
+typedef struct PD_Predictor PD_Predictor;
+typedef struct PD_Config PD_Config;
+typedef struct PD_Tensor PD_Tensor;
+typedef struct PD_OneDimArrayCstr PD_OneDimArrayCstr;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+///
+/// \brief Create a new Predictor
+///
+/// \param[in] Config config
+/// \return new predicor.
+///
+PADDLE_CAPI_EXPORT extern __pd_give PD_Predictor* PD_PredictorCreate(
+    __pd_take PD_Config* pd_config);
+///
+/// \brief Clone a new Predictor
+///
+/// \param[in] pd_predictor predictor
+/// \return new predictor.
+///
+PADDLE_CAPI_EXPORT extern __pd_give PD_Predictor* PD_PredictorClone(
+    __pd_keep PD_Predictor* pd_predictor);
+///
+/// \brief Get the input names
+///
+/// \param[in] pd_predictor predictor
+/// \return input names
+///
+PADDLE_CAPI_EXPORT extern __pd_give PD_OneDimArrayCstr*
+PD_PredictorGetInputNames(__pd_keep PD_Predictor* pd_predictor);
+///
+/// \brief Get the output names
+///
+/// \param[in] pd_predictor predictor
+/// \return output names
+///
+PADDLE_CAPI_EXPORT extern __pd_give PD_OneDimArrayCstr*
+PD_PredictorGetOutputNames(__pd_keep PD_Predictor* pd_predictor);
+
+///
+/// \brief Get the input number
+///
+/// \param[in] pd_predictor predictor
+/// \return input number
+///
+PADDLE_CAPI_EXPORT extern size_t PD_PredictorGetInputNum(
+    __pd_keep PD_Predictor* pd_predictor);
+
+///
+/// \brief Get the output number
+///
+/// \param[in] pd_predictor predictor
+/// \return output number
+///
+PADDLE_CAPI_EXPORT extern size_t PD_PredictorGetOutputNum(
+    __pd_keep PD_Predictor* pd_predictor);
+
+///
+/// \brief Get the Input Tensor object
+///
+/// \param[in] pd_predictor predictor
+/// \param[in] name input name
+/// \return input tensor
+///
+PADDLE_CAPI_EXPORT extern __pd_give PD_Tensor* PD_PredictorGetInputHandle(
+    __pd_keep PD_Predictor* pd_predictor, const char* name);
+
+///
+/// \brief Get the Output Tensor object
+///
+/// \param[in] pd_predictor predictor
+/// \param[in] name output name
+/// \return output tensor
+///
+PADDLE_CAPI_EXPORT extern __pd_give PD_Tensor* PD_PredictorGetOutputHandle(
+    __pd_keep PD_Predictor* pd_predictor, const char* name);
+
+///
+/// \brief Run the prediction engine
+///
+/// \param[in] pd_predictor predictor
+/// \return Whether the function executed successfully
+///
+PADDLE_CAPI_EXPORT extern PD_Bool PD_PredictorRun(
+    __pd_keep PD_Predictor* pd_predictor);
+
+/// \brief Clear the intermediate tensors of the predictor
+///
+/// \param[in] pd_predictor predictor
+///
+PADDLE_CAPI_EXPORT extern void PD_PredictorClearIntermediateTensor(
+    __pd_keep PD_Predictor* pd_predictor);
+
+///
+/// \brief Release all tmp tensor to compress the size of the memory pool.
+/// The memory pool is considered to be composed of a list of chunks, if
+/// the chunk is not occupied, it can be released.
+///
+/// \param[in] pd_predictor predictor
+/// \return Number of bytes released. It may be smaller than the actual
+/// released memory, because part of the memory is not managed by the
+/// MemoryPool.
+///
+PADDLE_CAPI_EXPORT extern uint64_t PD_PredictorTryShrinkMemory(
+    __pd_keep PD_Predictor* pd_predictor);
+
+///
+/// \brief Destroy a predictor object
+///
+/// \param[in] pd_predictor predictor
+///
+PADDLE_CAPI_EXPORT extern void PD_PredictorDestroy(
+    __pd_take PD_Predictor* pd_predictor);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
--- a/paddle/fluid/inference/capi_exp/pd_tensor.cc
+++ b/paddle/fluid/inference/capi_exp/pd_tensor.cc
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/inference/capi_exp/pd_tensor.h"
+#include "paddle/fluid/inference/api/paddle_inference_api.h"
+#include "paddle/fluid/inference/capi_exp/pd_types.h"
+#include "paddle/fluid/inference/capi_exp/pd_utils.h"
+#include "paddle/fluid/inference/capi_exp/types_internal.h"
+#include "paddle/fluid/inference/capi_exp/utils_internal.h"
+#include "paddle/fluid/platform/enforce.h"
+
+#define CHECK_AND_CONVERT_PD_TENSOR                                         \
+  PADDLE_ENFORCE_NOT_NULL(                                                  \
+      pd_tensor, paddle::platform::errors::InvalidArgument(                 \
+                     "The pointer of paddle tensor shouldn't be nullptr")); \
+  auto& tensor = pd_tensor->tensor
+
+extern "C" {
+
+void PD_TensorDestroy(__pd_take PD_Tensor* pd_tensor) { delete pd_tensor; }
+void PD_TensorReshape(__pd_keep PD_Tensor* pd_tensor, size_t shape_size,
+                      int32_t* shape) {
+  CHECK_AND_CONVERT_PD_TENSOR;
+  std::vector<int> shapes(shape_size);
+  for (size_t index = 0; index < shape_size; ++index) {
+    shapes[index] = shape[index];
+  }
+  tensor->Reshape(shapes);
+}
+
+#define REPEAT_ALL_DATA_TYPE(func)                             \
+  func(float, Float) func(int64_t, Int64) func(int32_t, Int32) \
+      func(uint8_t, Uint8) func(int8_t, Int8)
+
+#define PD_TENSOR_MUTABLE_DATA_IMPL(type, Type)                                \
+  type* PD_TensorMutableData##Type(__pd_keep PD_Tensor* pd_tensor,             \
+                                   PD_PlaceType place) {                       \
+    CHECK_AND_CONVERT_PD_TENSOR;                                               \
+    return tensor->mutable_data<type>(paddle_infer::CvtToCxxPlaceType(place)); \
+  }
+REPEAT_ALL_DATA_TYPE(PD_TENSOR_MUTABLE_DATA_IMPL)
+#undef PD_TENSOR_MUTABLE_DATA_IMPL
+
+#define PD_TENSOR_DATA_IMPL(type, Type)                                        \
+  type* PD_TensorData##Type(__pd_keep PD_Tensor* pd_tensor,                    \
+                            PD_PlaceType* place, int32_t* size) {              \
+    CHECK_AND_CONVERT_PD_TENSOR;                                               \
+    PADDLE_ENFORCE_NOT_NULL(place,                                             \
+                            paddle::platform::errors::InvalidArgument(         \
+                                "The pointer of place shouldn't be nullptr")); \
+    PADDLE_ENFORCE_NOT_NULL(size,                                              \
+                            paddle::platform::errors::InvalidArgument(         \
+                                "The pointer of size shouldn't be nullptr"));  \
+    paddle_infer::PlaceType cxx_palce_type;                                    \
+    int cxx_size;                                                              \
+    type* data = tensor->data<type>(&cxx_palce_type, &cxx_size);               \
+    *place = paddle_infer::CvtFromCxxPlaceType(cxx_palce_type);                \
+    *size = static_cast<int32_t>(cxx_size);                                    \
+    return data;                                                               \
+  }
+REPEAT_ALL_DATA_TYPE(PD_TENSOR_DATA_IMPL)
+#undef PD_TENSOR_DATA_IMPL
+
+#define PD_TENSOR_COPY_FROM_CPU_IMPL(type, Type)                  \
+  void PD_TensorCopyFromCpu##Type(__pd_keep PD_Tensor* pd_tensor, \
+                                  const type* data) {             \
+    CHECK_AND_CONVERT_PD_TENSOR;                                  \
+    tensor->CopyFromCpu<type>(data);                              \
+  }
+REPEAT_ALL_DATA_TYPE(PD_TENSOR_COPY_FROM_CPU_IMPL)
+#undef PD_TENSOR_COPY_FROM_CPU_IMPL
+
+#define PD_TENSOR_COPY_TO_CPU_IMPL(type, Type)                                \
+  void PD_TensorCopyToCpu##Type(__pd_keep PD_Tensor* pd_tensor, type* data) { \
+    CHECK_AND_CONVERT_PD_TENSOR;                                              \
+    tensor->CopyToCpu<type>(data);                                            \
+  }
+REPEAT_ALL_DATA_TYPE(PD_TENSOR_COPY_TO_CPU_IMPL)
+#undef PD_TENSOR_COPY_TO_CPU_IMPL
+
+#undef REPEAT_ALL_DATA_TYPE
+
+__pd_give PD_OneDimArrayInt32* PD_TensorGetShape(
+    __pd_keep PD_Tensor* pd_tensor) {
+  CHECK_AND_CONVERT_PD_TENSOR;
+  return paddle_infer::CvtVecToOneDimArrayInt32(tensor->shape());
+}
+void PD_TensorSetLod(__pd_keep PD_Tensor* pd_tensor,
+                     __pd_keep PD_TwoDimArraySize* lod) {
+  CHECK_AND_CONVERT_PD_TENSOR;
+  tensor->SetLoD(paddle_infer::CvtTwoDimArrayToVecSize(lod));
+}
+__pd_give PD_TwoDimArraySize* PD_TensorGetLod(__pd_keep PD_Tensor* pd_tensor) {
+  CHECK_AND_CONVERT_PD_TENSOR;
+  return paddle_infer::CvtVecToTwoDimArraySize(tensor->lod());
+}
+const char* PD_TensorGetName(__pd_keep PD_Tensor* pd_tensor) {
+  CHECK_AND_CONVERT_PD_TENSOR;
+  return tensor->name().c_str();
+}
+PD_DataType PD_TensorGetDataType(__pd_keep PD_Tensor* pd_tensor) {
+  CHECK_AND_CONVERT_PD_TENSOR;
+  return paddle_infer::CvtFromCxxDatatype(tensor->type());
+}
+
+}  // extern "C"
--- a/paddle/fluid/inference/capi_exp/pd_tensor.h
+++ b/paddle/fluid/inference/capi_exp/pd_tensor.h
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+///
+/// \file pd_tensor.h
+///
+/// \brief interface for paddle tensor
+///
+/// \author paddle-infer@baidu.com
+/// \date 2021-04-21
+/// \since 2.1
+///
+
+#pragma once
+
+#include "pd_common.h"  // NOLINT
+
+typedef struct PD_Tensor PD_Tensor;
+typedef struct PD_OneDimArrayInt32 PD_OneDimArrayInt32;
+typedef struct PD_TwoDimArraySize PD_TwoDimArraySize;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+///
+/// \brief Destroy the paddle tensor
+///
+/// \param[in] pd_tensor tensor
+///
+PADDLE_CAPI_EXPORT extern void PD_TensorDestroy(__pd_take PD_Tensor* pd_tensor);
+
+///
+/// \brief Reset the shape of the tensor.
+/// Generally it's only used for the input tensor.
+/// Reshape must be called before calling PD_TensorMutableData*() or
+/// PD_TensorCopyFromCpu*()
+///
+/// \param[in] pd_tensor tensor.
+/// \param[in] shape_size The size of shape.
+/// \param[in] shape The shape to set.
+///
+PADDLE_CAPI_EXPORT extern void PD_TensorReshape(__pd_keep PD_Tensor* pd_tensor,
+                                                size_t shape_size,
+                                                int32_t* shape);
+
+///
+/// \brief Get the memory pointer in CPU or GPU with 'float' data type.
+/// Please Reshape the tensor first before call this.
+/// It's usually used to get input data pointer.
+///
+/// \param[in] pd_tensor tensor.
+/// \param[in] place The place of the tensor.
+/// \return Memory pointer of pd_tensor
+///
+PADDLE_CAPI_EXPORT extern float* PD_TensorMutableDataFloat(
+    __pd_keep PD_Tensor* pd_tensor, PD_PlaceType place);
+///
+/// \brief Get the memory pointer in CPU or GPU with 'int64_t' data type.
+/// Please Reshape the tensor first before call this.
+/// It's usually used to get input data pointer.
+///
+/// \param[in] pd_tensor tensor.
+/// \param[in] place The place of the tensor.
+/// \return Memory pointer of pd_tensor
+///
+PADDLE_CAPI_EXPORT extern int64_t* PD_TensorMutableDataInt64(
+    __pd_keep PD_Tensor* pd_tensor, PD_PlaceType place);
+///
+/// \brief Get the memory pointer in CPU or GPU with 'int32_t' data type.
+/// Please Reshape the tensor first before call this.
+/// It's usually used to get input data pointer.
+///
+/// \param[in] pd_tensor tensor.
+/// \param[in] place The place of the tensor.
+/// \return Memory pointer of pd_tensor
+///
+PADDLE_CAPI_EXPORT extern int32_t* PD_TensorMutableDataInt32(
+    __pd_keep PD_Tensor* pd_tensor, PD_PlaceType place);
+///
+/// \brief Get the memory pointer in CPU or GPU with 'uint8_t' data type.
+/// Please Reshape the tensor first before call this.
+/// It's usually used to get input data pointer.
+///
+/// \param[in] pd_tensor tensor.
+/// \param[in] place The place of the tensor.
+/// \return Memory pointer of pd_tensor
+///
+PADDLE_CAPI_EXPORT extern uint8_t* PD_TensorMutableDataUint8(
+    __pd_keep PD_Tensor* pd_tensor, PD_PlaceType place);
+///
+/// \brief Get the memory pointer in CPU or GPU with 'int8_t' data type.
+/// Please Reshape the tensor first before call this.
+/// It's usually used to get input data pointer.
+///
+/// \param[in] pd_tensor tensor.
+/// \param[in] place The place of the tensor.
+/// \return Memory pointer of pd_tensor
+///
+PADDLE_CAPI_EXPORT extern int8_t* PD_TensorMutableDataInt8(
+    __pd_keep PD_Tensor* pd_tensor, PD_PlaceType place);
+///
+/// \brief Get the memory pointer directly.
+/// It's usually used to get the output data pointer.
+///
+/// \param[in] pd_tensor tensor.
+/// \param[out] place To get the device type of the tensor.
+/// \param[out] size To get the data size of the tensor.
+/// \return The tensor data buffer pointer.
+///
+PADDLE_CAPI_EXPORT extern float* PD_TensorDataFloat(
+    __pd_keep PD_Tensor* pd_tensor, PD_PlaceType* place, int32_t* size);
+///
+/// \brief Get the memory pointer directly.
+/// It's usually used to get the output data pointer.
+///
+/// \param[in] pd_tensor tensor.
+/// \param[out] place To get the device type of the tensor.
+/// \param[out] size To get the data size of the tensor.
+/// \return The tensor data buffer pointer.
+///
+PADDLE_CAPI_EXPORT extern int64_t* PD_TensorDataInt64(
+    __pd_keep PD_Tensor* pd_tensor, PD_PlaceType* place, int32_t* size);
+///
+/// \brief Get the memory pointer directly.
+/// It's usually used to get the output data pointer.
+///
+/// \param[in] pd_tensor tensor.
+/// \param[out] place To get the device type of the tensor.
+/// \param[out] size To get the data size of the tensor.
+/// \return The tensor data buffer pointer.
+///
+PADDLE_CAPI_EXPORT extern int32_t* PD_TensorDataInt32(
+    __pd_keep PD_Tensor* pd_tensor, PD_PlaceType* place, int32_t* size);
+///
+/// \brief Get the memory pointer directly.
+/// It's usually used to get the output data pointer.
+///
+/// \param[in] pd_tensor tensor.
+/// \param[out] place To get the device type of the tensor.
+/// \param[out] size To get the data size of the tensor.
+/// \return The tensor data buffer pointer.
+///
+PADDLE_CAPI_EXPORT extern uint8_t* PD_TensorDataUint8(
+    __pd_keep PD_Tensor* pd_tensor, PD_PlaceType* place, int32_t* size);
+///
+/// \brief Get the memory pointer directly.
+/// It's usually used to get the output data pointer.
+///
+/// \param[in] pd_tensor tensor.
+/// \param[out] place To get the device type of the tensor.
+/// \param[out] size To get the data size of the tensor.
+/// \return The tensor data buffer pointer.
+///
+PADDLE_CAPI_EXPORT extern int8_t* PD_TensorDataInt8(
+    __pd_keep PD_Tensor* pd_tensor, PD_PlaceType* place, int32_t* size);
+///
+/// \brief Copy the host memory to tensor data.
+/// It's usually used to set the input tensor data.
+/// \param[in] pd_tensor tensor.
+/// \param[in] data The pointer of the data, from which the tensor will copy.
+///
+PADDLE_CAPI_EXPORT extern void PD_TensorCopyFromCpuFloat(
+    __pd_keep PD_Tensor* pd_tensor, const float* data);
+///
+/// \brief Copy the host memory to tensor data.
+/// It's usually used to set the input tensor data.
+/// \param[in] pd_tensor tensor.
+/// \param[in] data The pointer of the data, from which the tensor will copy.
+///
+PADDLE_CAPI_EXPORT extern void PD_TensorCopyFromCpuInt64(
+    __pd_keep PD_Tensor* pd_tensor, const int64_t* data);
+///
+/// \brief Copy the host memory to tensor data.
+/// It's usually used to set the input tensor data.
+/// \param[in] pd_tensor tensor.
+/// \param[in] data The pointer of the data, from which the tensor will copy.
+///
+PADDLE_CAPI_EXPORT extern void PD_TensorCopyFromCpuInt32(
+    __pd_keep PD_Tensor* pd_tensor, const int32_t* data);
+///
+/// \brief Copy the host memory to tensor data.
+/// It's usually used to set the input tensor data.
+/// \param[in] pd_tensor tensor.
+/// \param[in] data The pointer of the data, from which the tensor will copy.
+///
+PADDLE_CAPI_EXPORT extern void PD_TensorCopyFromCpuUint8(
+    __pd_keep PD_Tensor* pd_tensor, const uint8_t* data);
+///
+/// \brief Copy the host memory to tensor data.
+/// It's usually used to set the input tensor data.
+/// \param[in] pd_tensor tensor.
+/// \param[in] data The pointer of the data, from which the tensor will copy.
+///
+PADDLE_CAPI_EXPORT extern void PD_TensorCopyFromCpuInt8(
+    __pd_keep PD_Tensor* pd_tensor, const int8_t* data);
+///
+/// \brief Copy the tensor data to the host memory.
+/// It's usually used to get the output tensor data.
+/// \param[in] pd_tensor tensor.
+/// \param[out] data The tensor will copy the data to the address.
+///
+PADDLE_CAPI_EXPORT extern void PD_TensorCopyToCpuFloat(
+    __pd_keep PD_Tensor* pd_tensor, float* data);
+///
+/// \brief Copy the tensor data to the host memory.
+/// It's usually used to get the output tensor data.
+/// \param[in] pd_tensor tensor.
+/// \param[out] data The tensor will copy the data to the address.
+///
+PADDLE_CAPI_EXPORT extern void PD_TensorCopyToCpuInt64(
+    __pd_keep PD_Tensor* pd_tensor, int64_t* data);
+///
+/// \brief Copy the tensor data to the host memory.
+/// It's usually used to get the output tensor data.
+/// \param[in] pd_tensor tensor.
+/// \param[out] data The tensor will copy the data to the address.
+///
+PADDLE_CAPI_EXPORT extern void PD_TensorCopyToCpuInt32(
+    __pd_keep PD_Tensor* pd_tensor, int32_t* data);
+///
+/// \brief Copy the tensor data to the host memory.
+/// It's usually used to get the output tensor data.
+/// \param[in] pd_tensor tensor.
+/// \param[out] data The tensor will copy the data to the address.
+///
+PADDLE_CAPI_EXPORT extern void PD_TensorCopyToCpuUint8(
+    __pd_keep PD_Tensor* pd_tensor, uint8_t* data);
+///
+/// \brief Copy the tensor data to the host memory.
+/// It's usually used to get the output tensor data.
+/// \param[in] pd_tensor tensor.
+/// \param[out] data The tensor will copy the data to the address.
+///
+PADDLE_CAPI_EXPORT extern void PD_TensorCopyToCpuInt8(
+    __pd_keep PD_Tensor* pd_tensor, int8_t* data);
+///
+/// \brief Get the tensor shape
+/// \param[in] pd_tensor tensor.
+/// \return The tensor shape.
+///
+PADDLE_CAPI_EXPORT extern __pd_give PD_OneDimArrayInt32* PD_TensorGetShape(
+    __pd_keep PD_Tensor* pd_tensor);
+
+///
+/// \brief Set the tensor lod information
+/// \param[in] pd_tensor tensor.
+/// \param[in] lod lod information.
+///
+PADDLE_CAPI_EXPORT extern void PD_TensorSetLod(
+    __pd_keep PD_Tensor* pd_tensor, __pd_keep PD_TwoDimArraySize* lod);
+///
+/// \brief Get the tensor lod information
+/// \param[in] pd_tensor tensor.
+/// \return the lod information.
+///
+PADDLE_CAPI_EXPORT extern __pd_give PD_TwoDimArraySize* PD_TensorGetLod(
+    __pd_keep PD_Tensor* pd_tensor);
+///
+/// \brief Get the tensor name
+/// \param[in] pd_tensor tensor.
+/// \return the tensor name.
+///
+PADDLE_CAPI_EXPORT extern const char* PD_TensorGetName(
+    __pd_keep PD_Tensor* pd_tensor);
+///
+/// \brief Get the tensor data type
+/// \param[in] pd_tensor tensor.
+/// \return the tensor data type.
+///
+PADDLE_CAPI_EXPORT extern PD_DataType PD_TensorGetDataType(
+    __pd_keep PD_Tensor* pd_tensor);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
--- a/paddle/fluid/inference/capi_exp/pd_types.h
+++ b/paddle/fluid/inference/capi_exp/pd_types.h
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include "pd_common.h"  // NOLINT
+
+typedef struct PD_OneDimArrayInt32 {
+  size_t size;
+  int32_t* data;
+} PD_OneDimArrayInt32;  // std::vector<int32_t>
+
+typedef struct PD_OneDimArraySize {
+  size_t size;
+  size_t* data;
+} PD_OneDimArraySize;  // std::vector<size_t>
+
+typedef struct PD_OneDimArrayCstr {
+  size_t size;
+  char** data;
+} PD_OneDimArrayCstr;  // std::vector<std::string>
+
+typedef struct PD_TwoDimArraySize {
+  size_t size;
+  PD_OneDimArraySize** data;
+} PD_TwoDimArraySize;  // std::vector<std::vector<size_t>>
--- a/paddle/fluid/inference/capi_exp/pd_utils.cc
+++ b/paddle/fluid/inference/capi_exp/pd_utils.cc
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string>
+
+#include "paddle/fluid/inference/api/paddle_inference_api.h"
+#include "paddle/fluid/inference/capi_exp/pd_utils.h"
+#include "paddle/fluid/inference/capi_exp/utils_internal.h"
+#include "paddle/fluid/platform/enforce.h"
+
+#define DESTROY_ONE_DIM_ARRAY(type)                                           \
+  void PD_OneDimArray##type##Destroy(__pd_take PD_OneDimArray##type* array) { \
+    if (array != NULL) {                                                      \
+      delete[] array->data;                                                   \
+      delete array;                                                           \
+    }                                                                         \
+  }
+#define CONVERT_VEC_TO_ONE_DIM_ARRAY(type, Type, vec_type)   \
+  __pd_give PD_OneDimArray##Type* CvtVecToOneDimArray##Type( \
+      const std::vector<vec_type>& vec) {                    \
+    PD_OneDimArray##Type* array = new PD_OneDimArray##Type;  \
+    array->size = vec.size();                                \
+    array->data = vec.empty() ? NULL : new type[vec.size()]; \
+    for (size_t index = 0; index < vec.size(); ++index) {    \
+      array->data[index] = vec[index];                       \
+    }                                                        \
+    return array;                                            \
+  }
+#define CONVERT_ONE_DIM_ARRAY_TO_VEC(type, Type, vec_type)   \
+  std::vector<vec_type> CvtOneDimArrayToVec##Type(           \
+      __pd_keep const PD_OneDimArray##Type* array) {         \
+    std::vector<vec_type> vec;                               \
+    if (array != NULL) {                                     \
+      vec.resize(array->size);                               \
+      for (size_t index = 0; index < array->size; ++index) { \
+        vec[index] = array->data[index];                     \
+      }                                                      \
+    }                                                        \
+    return vec;                                              \
+  }
+
+#define ONE_DIM_ARRAY_UTILS_FUNC_IMPL(type, Type, vec_type) \
+  extern "C" {                                              \
+  DESTROY_ONE_DIM_ARRAY(Type);                              \
+  }                                                         \
+  namespace paddle_infer {                                  \
+  CONVERT_VEC_TO_ONE_DIM_ARRAY(type, Type, vec_type)        \
+  CONVERT_ONE_DIM_ARRAY_TO_VEC(type, Type, vec_type)        \
+  }
+
+ONE_DIM_ARRAY_UTILS_FUNC_IMPL(int32_t, Int32, int)
+ONE_DIM_ARRAY_UTILS_FUNC_IMPL(size_t, Size, size_t)
+
+#undef ONE_DIM_ARRAY_UTILS_FUNC_IMPL
+#undef CONVERT_ONE_DIM_ARRAY_TO_VEC
+#undef CONVERT_VEC_TO_ONE_DIM_ARRAY
+#undef DESTROY_ONE_DIM_ARRAY
+
+void PD_OneDimArrayCstrDestroy(__pd_take PD_OneDimArrayCstr* array) {
+  if (array != NULL) {
+    if (array->size != 0) {
+      for (size_t index = 0; index < array->size; ++index) {
+        delete[] array->data[index];
+      }
+    }
+    delete[] array->data;
+    delete array;
+  }
+}
+namespace paddle_infer {
+
+__pd_give PD_OneDimArrayCstr* CvtVecToOneDimArrayCstr(
+    const std::vector<std::string>& vec) {
+  PD_OneDimArrayCstr* array = new PD_OneDimArrayCstr;
+  array->size = vec.size();
+  array->data = vec.empty() ? NULL : new char*[vec.size()];
+  for (size_t index = 0u; index < vec.size(); ++index) {
+    array->data[index] = new char[vec[index].size() + 1];
+    memcpy(array->data[index], vec[index].c_str(), vec[index].size() + 1);
+  }
+  return array;
+}
+
+std::vector<std::string> CvtOneDimArrayToVecCstr(
+    __pd_keep const PD_OneDimArrayCstr* array) {
+  std::vector<std::string> vec;
+  for (size_t index = 0; index < array->size; ++index) {
+    vec.emplace_back(array->data[index]);
+  }
+  return vec;
+}
+
+}  // namespace paddle_infer
+
+#define DESTROY_TWO_DIM_ARRAY(type)                                           \
+  void PD_TwoDimArray##type##Destroy(__pd_take PD_TwoDimArray##type* array) { \
+    if (array != NULL) {                                                      \
+      if (array->size != 0) {                                                 \
+        for (size_t index = 0; index < array->size; ++index) {                \
+          PD_OneDimArray##type##Destroy(array->data[index]);                  \
+        }                                                                     \
+      }                                                                       \
+      delete[] array->data;                                                   \
+      delete array;                                                           \
+    }                                                                         \
+  }
+#define CONVERT_VEC_TO_TWO_DIM_ARRAY(type, Type, vec_type)                    \
+  __pd_give PD_TwoDimArray##Type* CvtVecToTwoDimArray##Type(                  \
+      const std::vector<std::vector<vec_type>>& vec) {                        \
+    PD_TwoDimArray##Type* array = new PD_TwoDimArray##Type;                   \
+    array->size = vec.size();                                                 \
+    array->data = vec.empty() ? NULL : new PD_OneDimArray##Type*[vec.size()]; \
+    for (size_t index = 0; index < vec.size(); ++index) {                     \
+      array->data[index] = CvtVecToOneDimArray##Type(vec[index]);             \
+    }                                                                         \
+    return array;                                                             \
+  }
+#define CONVERT_TWO_DIM_ARRAY_TO_VEC(type, Type, vec_type)            \
+  std::vector<std::vector<vec_type>> CvtTwoDimArrayToVec##Type(       \
+      __pd_keep const PD_TwoDimArray##Type* array) {                  \
+    std::vector<std::vector<vec_type>> vec;                           \
+    if (array != NULL && array->size != 0) {                          \
+      vec.resize(array->size);                                        \
+      for (size_t index = 0; index < array->size; ++index) {          \
+        vec[index] = CvtOneDimArrayToVec##Type((array->data)[index]); \
+      }                                                               \
+    }                                                                 \
+    return vec;                                                       \
+  }
+#define TWO_DIM_ARRAY_UTILS_FUNC_IMPL(type, Type, vec_type) \
+  extern "C" {                                              \
+  DESTROY_TWO_DIM_ARRAY(Type);                              \
+  }                                                         \
+  namespace paddle_infer {                                  \
+  CONVERT_VEC_TO_TWO_DIM_ARRAY(type, Type, vec_type)        \
+  CONVERT_TWO_DIM_ARRAY_TO_VEC(type, Type, vec_type)        \
+  }
+
+TWO_DIM_ARRAY_UTILS_FUNC_IMPL(size_t, Size, size_t)
+
+#undef TWO_DIM_ARRAY_UTILS_FUNC_IMPL
+#undef CONVERT_TWO_DIM_ARRAY_TO_VEC
+#undef CONVERT_VEC_TO_TWO_DIM_ARRAY
+#undef DESTROY_TWO_DIM_ARRAY
+
+namespace paddle_infer {
+
+PlaceType CvtToCxxPlaceType(PD_PlaceType place_type) {
+  switch (place_type) {
+    case PD_PLACE_UNK:
+      return PlaceType::kUNK;
+    case PD_PLACE_CPU:
+      return PlaceType::kCPU;
+    case PD_PLACE_GPU:
+      return PlaceType::kGPU;
+    case PD_PLACE_XPU:
+      return PlaceType::kXPU;
+    default:
+      PADDLE_THROW(paddle::platform::errors::InvalidArgument(
+          "Unsupport paddle place type %d.", place_type));
+      return PlaceType::kUNK;
+  }
+}
+
+PD_PlaceType CvtFromCxxPlaceType(PlaceType place_type) {
+  switch (place_type) {
+    case PlaceType::kCPU:
+      return PD_PLACE_CPU;
+    case PlaceType::kGPU:
+      return PD_PLACE_GPU;
+    case PlaceType::kXPU:
+      return PD_PLACE_XPU;
+    default:
+      return PD_PLACE_UNK;
+  }
+}
+
+DataType CvtToCxxDatatype(PD_DataType data_type) {
+  switch (data_type) {
+    case PD_DATA_FLOAT32:
+      return DataType::FLOAT32;
+    case PD_DATA_INT64:
+      return DataType::INT64;
+    case PD_DATA_INT32:
+      return DataType::INT32;
+    case PD_DATA_UINT8:
+      return DataType::UINT8;
+    default:
+      PADDLE_THROW(paddle::platform::errors::InvalidArgument(
+          "Unsupport paddle data type %d.", data_type));
+      return DataType::FLOAT32;
+  }
+}
+
+PD_DataType CvtFromCxxDatatype(DataType data_type) {
+  switch (data_type) {
+    case DataType::FLOAT32:
+      return PD_DATA_FLOAT32;
+    case DataType::INT64:
+      return PD_DATA_INT64;
+    case DataType::INT32:
+      return PD_DATA_INT32;
+    case DataType::UINT8:
+      return PD_DATA_UINT8;
+    default:
+      return PD_DATA_UNK;
+  }
+}
+
+}  // namespace paddle_infer
--- a/paddle/fluid/inference/capi_exp/pd_utils.h
+++ b/paddle/fluid/inference/capi_exp/pd_utils.h
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+///
+/// \file pd_utils.h
+///
+/// \brief Some utility function to destroy paddle struct.
+///
+/// \author paddle-infer@baidu.com
+/// \date 2021-04-21
+/// \since 2.1
+///
+
+#pragma once
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include "pd_types.h"  // NOLINT
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+///
+/// \brief Destroy the PD_OneDimArrayInt32 object pointed to by the pointer.
+///
+/// \param[in] array pointer to the PD_OneDimArrayInt32 object.
+///
+PADDLE_CAPI_EXPORT extern void PD_OneDimArrayInt32Destroy(
+    __pd_take PD_OneDimArrayInt32* array);
+
+///
+/// \brief Destroy the PD_OneDimArrayCstr object pointed to by the pointer.
+///
+/// \param[in] array pointer to the PD_OneDimArrayCstr object.
+///
+PADDLE_CAPI_EXPORT extern void PD_OneDimArrayCstrDestroy(
+    __pd_take PD_OneDimArrayCstr* array);
+
+///
+/// \brief Destroy the PD_OneDimArraySize object pointed to by the pointer.
+///
+/// \param[in] array pointer to the PD_OneDimArraySize object.
+///
+PADDLE_CAPI_EXPORT extern void PD_OneDimArraySizeDestroy(
+    __pd_take PD_OneDimArraySize* array);
+
+///
+/// \brief Destroy the PD_TwoDimArraySize object pointed to by the pointer.
+///
+/// \param[in] array pointer to the PD_TwoDimArraySize object.
+///
+PADDLE_CAPI_EXPORT extern void PD_TwoDimArraySizeDestroy(
+    __pd_take PD_TwoDimArraySize* array);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
--- a/paddle/fluid/inference/capi_exp/types_internal.h
+++ b/paddle/fluid/inference/capi_exp/types_internal.h
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <cstdio>
+
+#include "paddle/fluid/inference/api/paddle_inference_api.h"
+#include "paddle/fluid/inference/capi_exp/pd_common.h"
+
+typedef struct PD_Tensor {
+  std::unique_ptr<paddle_infer::Tensor> tensor;
+} PD_Tensor;
+
+typedef struct PD_Predictor {
+  std::shared_ptr<paddle_infer::Predictor> predictor;
+} PD_Predictor;
--- a/paddle/fluid/inference/capi_exp/utils_internal.h
+++ b/paddle/fluid/inference/capi_exp/utils_internal.h
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+///
+/// \file utils_internal.h
+///
+/// \brief Some utility function used to convert object between C Struct and C++
+/// Class.
+///
+/// \author paddle-infer@baidu.com
+/// \date 2021-04-21
+/// \since 2.1
+///
+
+#pragma once
+
+#include <cstdint>
+#include <cstdio>
+#include <vector>
+
+#include "paddle/fluid/inference/api/paddle_inference_api.h"
+#include "paddle/fluid/inference/capi_exp/pd_types.h"
+
+namespace paddle_infer {
+
+///
+/// \brief Convert the 'std::vector<int>' object to a 'PD_OneDimArrayInt32'
+/// object.
+///
+/// \param[in] vec source object.
+/// \return target object.
+///
+__pd_give PD_OneDimArrayInt32* CvtVecToOneDimArrayInt32(
+    const std::vector<int>& vec);
+
+///
+/// \brief Convert the 'PD_OneDimArrayInt32' object to a 'std::vector<int>'
+/// object.
+///
+/// \param[in] array source object.
+/// \return target object.
+///
+std::vector<int> CvtOneDimArrayToVecInt32(
+    __pd_keep const PD_OneDimArrayInt32* array);
+
+///
+/// \brief Convert the 'std::vector<size_t>' object to a 'PD_OneDimArraySize'
+/// object.
+///
+/// \param[in] vec source object.
+/// \return target object.
+///
+__pd_give PD_OneDimArraySize* CvtVecToOneDimArraySize(
+    const std::vector<size_t>& vec);
+
+///
+/// \brief Convert the 'PD_OneDimArraySize' object to a 'std::vector<size_t>'
+/// object.
+///
+/// \param[in] array source object.
+/// \return target object.
+///
+std::vector<size_t> CvtOneDimArrayToVecSize(
+    __pd_keep const PD_OneDimArraySize* array);
+
+///
+/// \brief Convert the 'std::vector<std::string>' object to a
+/// 'PD_OneDimArrayCstr' object.
+///
+/// \param[in] vec source object.
+/// \return target object.
+///
+__pd_give PD_OneDimArrayCstr* CvtVecToOneDimArrayCstr(
+    const std::vector<std::string>& vec);
+
+///
+/// \brief Convert the 'PD_OneDimArrayCstr' object to a
+/// 'std::vector<std::string>' object.
+///
+/// \param[in] array source object.
+/// \return target object.
+///
+std::vector<std::string> CvtOneDimArrayToVecCstr(
+    __pd_keep const PD_OneDimArrayCstr* array);
+
+///
+/// \brief Convert the 'std::vector<std::vector<size_t>>' object to a
+/// 'PD_TwoDimArraySize' object.
+///
+/// \param[in] vec source object.
+/// \return target object.
+///
+__pd_give PD_TwoDimArraySize* CvtVecToTwoDimArraySize(
+    const std::vector<std::vector<size_t>>& vec);
+
+///
+/// \brief Convert the 'PD_TwoDimArraySize' object to a
+/// 'std::vector<std::vector<size_t>>' object.
+///
+/// \param[in] array source object.
+/// \return target object.
+///
+std::vector<std::vector<size_t>> CvtTwoDimArrayToVecSize(
+    __pd_keep const PD_TwoDimArraySize* array);
+
+///
+/// \brief Convert the 'PD_PlaceType' object to a 'paddle_infer::PlaceType'
+/// object.
+///
+/// \param[in] place_type source object.
+/// \return target object.
+///
+PlaceType CvtToCxxPlaceType(PD_PlaceType place_type);
+
+///
+/// \brief Convert the 'paddle_infer::PlaceType' object to a 'PD_PlaceType'
+/// object.
+///
+/// \param[in] place_type source object.
+/// \return target object.
+///
+PD_PlaceType CvtFromCxxPlaceType(PlaceType place_type);
+
+///
+/// \brief Convert the 'PD_DataType' object to a 'paddle_infer::DataType'
+/// object.
+///
+/// \param[in] place_type source object.
+/// \return target object.
+///
+DataType CvtToCxxDatatype(PD_DataType data_type);
+
+///
+/// \brief Convert the 'paddle_infer::DataType' object to a 'PD_DataType'
+/// object.
+///
+/// \param[in] place_type source object.
+/// \return target object.
+///
+PD_DataType CvtFromCxxDatatype(DataType data_type);
+
+}  // namespace paddle_infer
--- a/paddle/fluid/inference/tests/api/CMakeLists.txt
+++ b/paddle/fluid/inference/tests/api/CMakeLists.txt
@@ -522,10 +522,10 @@ if(WITH_GPU AND TENSORRT_FOUND)
    inference_analysis_test(trt_instance_norm_test SRCS trt_instance_norm_converter_test.cc
            EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} 
            ARGS --infer_model=${TEST_INSTANCE_NORM_MODEL}/)
-    inference_analysis_test(test_analyzer_capi_gpu SRCS analyzer_capi_gpu_tester.cc
+    inference_analysis_test(test_analyzer_capi_exp_gpu SRCS analyzer_capi_exp_gpu_tester.cc
            EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_inference_c
            ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models)
-    inference_analysis_test(test_analyzer_capi_xpu SRCS analyzer_capi_xpu_tester.cc
+    inference_analysis_test(test_analyzer_capi_exp_xpu SRCS analyzer_capi_exp_xpu_tester.cc
            EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_inference_c
            ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models)
            
@@ -604,14 +604,23 @@ inference_analysis_test(lite_resnet50_test SRCS lite_resnet50_test.cc
        EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
        ARGS --infer_model=${RESNET50_MODEL_DIR})

-inference_analysis_test(test_analyzer_capi SRCS analyzer_capi_tester.cc
-            EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_inference_c
-            ARGS --infer_model=${RESNET50_MODEL_DIR}/model)
+inference_analysis_test(test_analyzer_capi_exp SRCS analyzer_capi_exp_tester.cc
+        EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_inference_c
+        ARGS --infer_model=${RESNET50_MODEL_DIR}/model)
+
+inference_analysis_test(test_analyzer_capi_exp_pd_config SRCS analyzer_capi_exp_pd_config_tester.cc
+        EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_inference_c
+        ARGS --infer_model=${MOBILENET_INSTALL_DIR}/model)
+
+inference_analysis_test(test_analyzer_capi_exp_pd_tensor SRCS analyzer_capi_exp_pd_tensor_tester.cc
+        EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_inference_c
+        ARGS --infer_model=${MOBILENET_INSTALL_DIR}/model)

-inference_analysis_test(test_analyzer_capi_pd_tensor SRCS analyzer_capi_pd_tensor_tester.cc
+if (NOT APPLE AND NOT WIN32)
+    inference_analysis_test(test_analyzer_capi_exp_pd_threads SRCS analyzer_capi_exp_pd_threads_tester.cc
            EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_inference_c
            ARGS --infer_model=${MOBILENET_INSTALL_DIR}/model)
-
+endif()
 inference_analysis_test(test_analyzer_zerocopytensor_tensor SRCS analyzer_zerocopy_tensor_tester.cc
            EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} 
            ARGS --infer_model=${OCR_INSTALL_DIR}/model)        
@@ -621,17 +630,17 @@ inference_analysis_test(test_analyzer_paddletensor_tensor SRCS analyzer_paddle_t
            ARGS --infer_model=${OCR_INSTALL_DIR}/model --infer_data=${OCR_INSTALL_DIR}/data.txt --refer_result=${OCR_INSTALL_DIR}/result.txt)    
            
 if(WITH_MKLDNN)
-  inference_analysis_test(test_analyzer_capi_int SRCS analyzer_capi_int_tester.cc
+  inference_analysis_test(test_analyzer_capi_exp_int SRCS analyzer_capi_exp_int_tester.cc
            EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_inference_c
            ARGS --infer_model=${INT8_DATA_DIR}/resnet50/model)
- endif()
+endif()

-inference_analysis_test(test_analyzer_capi_ner SRCS analyzer_capi_ner_tester.cc 
+inference_analysis_test(test_analyzer_capi_exp_ner SRCS analyzer_capi_exp_ner_tester.cc 
        EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_inference_c
        ARGS --infer_model=${CHINESE_NER_INSTALL_DIR}/model)

 if(WITH_GPU)
-  inference_analysis_test(paddle_infer_api_test SRCS paddle_infer_api_test.cc
+    inference_analysis_test(paddle_infer_api_test SRCS paddle_infer_api_test.cc
        EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
        ARGS --infer_model=${RESNET50_MODEL_DIR})
 endif()

--- a/paddle/fluid/inference/tests/api/analyzer_capi_exp_gpu_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_capi_exp_gpu_tester.cc
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string>
+#include <vector>
+#include "paddle/fluid/inference/capi_exp/pd_inference_api.h"
+#include "paddle/fluid/inference/tests/api/tester_helper.h"
+
+namespace paddle {
+namespace inference {
+namespace analysis {
+
+TEST(PD_Config, gpu_interface) {
+  std::string model_dir = FLAGS_infer_model + "/mobilenet";
+  std::string prog_file = model_dir + "/__model__";
+  std::string param_file = model_dir + "/__params__";
+  std::string opt_cache_dir = FLAGS_infer_model + "/OptimCacheDir";
+  const char* ops_name = "conv_2d";
+
+  PD_Config* config = PD_ConfigCreate();
+  PD_ConfigSetModel(config, prog_file.c_str(), param_file.c_str());
+  PD_ConfigSetOptimCacheDir(config, opt_cache_dir.c_str());
+
+  PD_ConfigEnableUseGpu(config, 100, 0);
+  bool use_gpu = PD_ConfigUseGpu(config);
+  EXPECT_TRUE(use_gpu);
+  int init_size = PD_ConfigMemoryPoolInitSizeMb(config);
+  EXPECT_EQ(init_size, 100);
+  int gpu_device_id = PD_ConfigGpuDeviceId(config);
+  EXPECT_EQ(gpu_device_id, 0);
+  float frac = PD_ConfigFractionOfGpuMemoryForPool(config);
+  LOG(INFO) << frac;
+  PD_ConfigEnableCudnn(config);
+  bool cudnn = PD_ConfigCudnnEnabled(config);
+  EXPECT_TRUE(cudnn);
+
+  PD_ConfigEnableTensorRtEngine(config, 1 << 20, 1, 3, PD_PRECISION_INT8, FALSE,
+                                TRUE);
+  bool trt_enable = PD_ConfigTensorRtEngineEnabled(config);
+  EXPECT_TRUE(trt_enable);
+
+  const char* tensor_name = "image";
+  size_t shapes_num[1] = {4};
+  int32_t min_shape[4] = {1, 3, 36, 36};
+  int32_t max_shape[4] = {1, 3, 224, 224};
+  int32_t opt_shape[4] = {1, 3, 224, 224};
+  int32_t* min_shape_ptr = min_shape;
+  int32_t* max_shape_ptr = max_shape;
+  int32_t* opt_shape_ptr = opt_shape;
+  PD_ConfigSetTrtDynamicShapeInfo(config, 1, &tensor_name, shapes_num,
+                                  &min_shape_ptr, &max_shape_ptr,
+                                  &opt_shape_ptr, FALSE);
+  PD_ConfigDisableTensorRtOPs(config, 1, &ops_name);
+  PD_ConfigEnableTensorRtOSS(config);
+  bool oss_enabled = PD_ConfigTensorRtOssEnabled(config);
+  EXPECT_TRUE(oss_enabled);
+
+  PD_ConfigEnableTensorRtDla(config, 4);
+  bool dla_enabled = PD_ConfigTensorRtDlaEnabled(config);
+  EXPECT_TRUE(dla_enabled);
+
+  PD_ConfigEnableGpuMultiStream(config);
+  bool thread_local_thread = PD_ConfigThreadLocalStreamEnabled(config);
+  EXPECT_TRUE(thread_local_thread);
+
+  PD_ConfigDisableGpu(config);
+  PD_ConfigDestroy(config);
+}
+
+TEST(PD_Config, use_gpu) {
+  std::string model_dir = FLAGS_infer_model + "/mobilenet";
+  PD_Config* config = PD_ConfigCreate();
+
+  PD_ConfigDisableGpu(config);
+  PD_ConfigSetCpuMathLibraryNumThreads(config, 10);
+  int num_thread = PD_ConfigGetCpuMathLibraryNumThreads(config);
+  EXPECT_EQ(num_thread, 10);
+
+  PD_ConfigSwitchIrDebug(config, TRUE);
+  PD_ConfigSetModelDir(config, model_dir.c_str());
+  PD_ConfigSetOptimCacheDir(config,
+                            (FLAGS_infer_model + "/OptimCacheDir").c_str());
+  const char* model_dir_ = PD_ConfigGetModelDir(config);
+  LOG(INFO) << model_dir_;
+
+  PD_ConfigEnableUseGpu(config, 100, 0);
+  bool use_gpu = PD_ConfigUseGpu(config);
+  EXPECT_TRUE(use_gpu);
+  int device_id = PD_ConfigGpuDeviceId(config);
+  EXPECT_EQ(device_id, 0);
+  int init_size = PD_ConfigMemoryPoolInitSizeMb(config);
+  EXPECT_EQ(init_size, 100);
+
+  float frac = PD_ConfigFractionOfGpuMemoryForPool(config);
+  LOG(INFO) << frac;
+
+  PD_ConfigEnableCudnn(config);
+  bool cudnn = PD_ConfigCudnnEnabled(config);
+  EXPECT_TRUE(cudnn);
+
+  PD_ConfigSwitchIrOptim(config, TRUE);
+  bool ir_optim = PD_ConfigIrOptim(config);
+  EXPECT_TRUE(ir_optim);
+
+  PD_ConfigEnableTensorRtEngine(config, 1 << 20, 1, 3, PD_PRECISION_FLOAT32,
+                                FALSE, FALSE);
+  bool trt_enable = PD_ConfigTensorRtEngineEnabled(config);
+  EXPECT_TRUE(trt_enable);
+  PD_ConfigEnableMemoryOptim(config);
+  bool memory_optim_enable = PD_ConfigMemoryOptimEnabled(config);
+  EXPECT_TRUE(memory_optim_enable);
+  PD_ConfigEnableProfile(config);
+  bool profiler_enable = PD_ConfigProfileEnabled(config);
+  EXPECT_TRUE(profiler_enable);
+  PD_ConfigSetInvalid(config);
+  bool is_valid = PD_ConfigIsValid(config);
+  EXPECT_FALSE(is_valid);
+  PD_ConfigDestroy(config);
+}
+
+TEST(PD_Config, trt_int8) {
+  std::string model_dir = FLAGS_infer_model + "/mobilenet";
+  PD_Config* config = PD_ConfigCreate();
+  PD_ConfigEnableUseGpu(config, 100, 0);
+  PD_ConfigEnableTensorRtEngine(config, 1 << 20, 1, 3, PD_PRECISION_INT8, FALSE,
+                                TRUE);
+  bool trt_enable = PD_ConfigTensorRtEngineEnabled(config);
+  EXPECT_TRUE(trt_enable);
+  PD_ConfigDestroy(config);
+}
+
+TEST(PD_Config, trt_fp16) {
+  std::string model_dir = FLAGS_infer_model + "/mobilenet";
+  PD_Config* config = PD_ConfigCreate();
+  PD_ConfigEnableUseGpu(config, 100, 0);
+  PD_ConfigEnableTensorRtEngine(config, 1 << 20, 1, 3, PD_PRECISION_HALF, FALSE,
+                                FALSE);
+  bool trt_enable = PD_ConfigTensorRtEngineEnabled(config);
+  EXPECT_TRUE(trt_enable);
+  PD_Predictor* predictor = PD_PredictorCreate(config);
+  PD_PredictorDestroy(predictor);
+}
+
+}  // namespace analysis
+}  // namespace inference
+}  // namespace paddle
--- a/paddle/fluid/inference/tests/api/analyzer_capi_exp_int_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_capi_exp_int_tester.cc
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string>
+#include <vector>
+#include "paddle/fluid/inference/capi_exp/pd_inference_api.h"
+#include "paddle/fluid/inference/tests/api/tester_helper.h"
+
+namespace paddle {
+namespace inference {
+namespace analysis {
+
+void predictor_run() {
+  std::string model_dir = FLAGS_infer_model;
+  PD_Config* config = PD_ConfigCreate();
+  PD_ConfigDisableGpu(config);
+  PD_ConfigSetCpuMathLibraryNumThreads(config, 10);
+  PD_ConfigSwitchIrDebug(config, TRUE);
+  PD_ConfigSetModelDir(config, model_dir.c_str());
+  PD_Predictor* predictor = PD_PredictorCreate(config);
+  PD_OneDimArrayCstr* input_names = PD_PredictorGetInputNames(predictor);
+  LOG(INFO) << "The inputs' size is: " << input_names->size;
+  EXPECT_EQ(input_names->size, 2u);
+
+  int32_t shape_0[4] = {1, 3, 224, 224};
+  float data_0[1 * 3 * 224 * 224] = {0};
+  PD_Tensor* input_0 = PD_PredictorGetInputHandle(predictor, "image");
+  PD_TensorReshape(input_0, 4, shape_0);
+  PD_TensorCopyFromCpuFloat(input_0, data_0);
+  int32_t shape_1[2] = {1, 1};
+  int64_t data_1[1] = {0};
+  PD_Tensor* input_1 = PD_PredictorGetInputHandle(predictor, "label");
+  PD_TensorReshape(input_1, 2, shape_1);
+  PD_TensorCopyFromCpuInt64(input_1, data_1);
+
+  LOG(INFO) << "Run Inference in CAPI encapsulation. ";
+  EXPECT_TRUE(PD_PredictorRun(predictor));
+
+  PD_OneDimArrayCstr* output_names = PD_PredictorGetOutputNames(predictor);
+  LOG(INFO) << "output size is: " << output_names->size;
+  for (size_t index = 0; index < output_names->size; ++index) {
+    LOG(INFO) << "output[" << index
+              << "]'s name is: " << output_names->data[index];
+    PD_Tensor* output =
+        PD_PredictorGetOutputHandle(predictor, output_names->data[index]);
+    PD_OneDimArrayInt32* shape = PD_TensorGetShape(output);
+    LOG(INFO) << "output[" << index << "]'s shape_size is: " << shape->size;
+    int32_t out_size = 1;
+    for (size_t i = 0; i < shape->size; ++i) {
+      LOG(INFO) << "output[" << index << "]'s shape is: " << shape->data[i];
+      out_size = out_size * shape->data[i];
+    }
+    float* out_data = new float[out_size];
+    PD_TensorCopyToCpuFloat(output, out_data);
+    LOG(INFO) << "output[" << index << "]'s DATA is: " << out_data[0];
+    delete[] out_data;
+    PD_OneDimArrayInt32Destroy(shape);
+    PD_TensorDestroy(output);
+  }
+  PD_PredictorClearIntermediateTensor(predictor);
+  PD_PredictorTryShrinkMemory(predictor);
+  PD_OneDimArrayCstrDestroy(output_names);
+  PD_TensorDestroy(input_1);
+  PD_TensorDestroy(input_0);
+  PD_OneDimArrayCstrDestroy(input_names);
+  PD_PredictorDestroy(predictor);
+}
+
+#ifdef PADDLE_WITH_MKLDNN
+TEST(PD_PredictorRun, predictor_run) { predictor_run(); }
+#endif
+
+}  // namespace analysis
+}  // namespace inference
+}  // namespace paddle
--- a/paddle/fluid/inference/tests/api/analyzer_capi_exp_ner_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_capi_exp_ner_tester.cc
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string>
+#include <vector>
+#include "paddle/fluid/inference/capi_exp/pd_inference_api.h"
+#include "paddle/fluid/inference/tests/api/tester_helper.h"
+
+namespace paddle {
+namespace inference {
+namespace analysis {
+
+TEST(PD_PredictorRun, predictor_run) {
+  auto model_dir = FLAGS_infer_model;
+  PD_Config *config = PD_ConfigCreate();
+  PD_ConfigSetModel(config, (model_dir + "/__model__").c_str(),
+                    (model_dir + "/param").c_str());
+  PD_ConfigDisableGpu(config);
+
+  PD_Predictor *predictor = PD_PredictorCreate(config);
+  size_t input_num = PD_PredictorGetInputNum(predictor);
+  LOG(INFO) << "Input num: " << input_num;
+  size_t output_num = PD_PredictorGetOutputNum(predictor);
+  LOG(INFO) << "Output num: " << output_num;
+
+  PD_OneDimArrayCstr *input_names = PD_PredictorGetInputNames(predictor);
+  EXPECT_EQ(input_names->size, 2u);
+  LOG(INFO) << "Predictor start run!";
+  PD_Tensor *inputs[2];
+  inputs[0] = PD_PredictorGetInputHandle(predictor, input_names->data[0]);
+  inputs[1] = PD_PredictorGetInputHandle(predictor, input_names->data[1]);
+  LOG(INFO) << "Predictor start run!";
+  // inputs[0]: word, use lod memory in stack
+  int32_t shape_0[2] = {11, 1};
+  int64_t data_0[11 * 1] = {12673, 9763, 905, 284, 45, 7474, 20, 17, 1, 4, 9};
+  size_t lod_layer_0[2] = {0, 11};
+  PD_OneDimArraySize layer_0;
+  layer_0.size = 2;
+  layer_0.data = lod_layer_0;
+  PD_OneDimArraySize *layer_0_ptr = &layer_0;
+  PD_TwoDimArraySize lod_0;
+  lod_0.size = 1;
+  lod_0.data = &layer_0_ptr;
+  PD_TensorReshape(inputs[0], 2, shape_0);
+  PD_TensorCopyFromCpuInt64(inputs[0], data_0);
+  PD_TensorSetLod(inputs[0], &lod_0);
+
+  // inputs[1]: mention, use lod memory in heap
+  int32_t shape_1[2] = {11, 1};
+  int64_t data_1[11 * 1] = {27, 0, 0, 33, 34, 33, 0, 0, 0, 1, 2};
+  PD_TwoDimArraySize *lod_1_ptr = new PD_TwoDimArraySize();
+  lod_1_ptr->size = 1;
+  lod_1_ptr->data = new PD_OneDimArraySize *[1];
+  lod_1_ptr->data[0] = new PD_OneDimArraySize();
+  lod_1_ptr->data[0]->size = 2;
+  lod_1_ptr->data[0]->data = new size_t[2];
+  lod_1_ptr->data[0]->data[0] = 0;
+  lod_1_ptr->data[0]->data[1] = 11;
+
+  PD_TensorReshape(inputs[1], 2, shape_1);
+  PD_TensorCopyFromCpuInt64(inputs[1], data_1);
+  PD_TensorSetLod(inputs[1], lod_1_ptr);
+  // retrieve the lod memory
+  delete[] lod_1_ptr->data[0]->data;
+  delete lod_1_ptr->data[0];
+  delete[] lod_1_ptr->data;
+  delete lod_1_ptr;
+  lod_1_ptr = nullptr;
+
+  LOG(INFO) << "Predictor start run!";
+  bool success = PD_PredictorRun(predictor);
+  EXPECT_TRUE(success);
+  LOG(INFO) << "Predictor run success!";
+  PD_OneDimArrayCstr *output_names = PD_PredictorGetOutputNames(predictor);
+  PD_Tensor *output =
+      PD_PredictorGetOutputHandle(predictor, output_names->data[0]);
+  PD_TwoDimArraySize *output_lod = PD_TensorGetLod(output);
+
+  PD_TwoDimArraySizeDestroy(output_lod);
+  PD_TensorDestroy(output);
+  PD_OneDimArrayCstrDestroy(output_names);
+
+  PD_TensorDestroy(inputs[0]);
+  PD_TensorDestroy(inputs[1]);
+  PD_OneDimArrayCstrDestroy(input_names);
+  PD_PredictorDestroy(predictor);
+}
+
+}  // namespace analysis
+}  // namespace inference
+}  // namespace paddle
--- a/paddle/fluid/inference/tests/api/analyzer_capi_exp_pd_config_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_capi_exp_pd_config_tester.cc
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string>
+#include <vector>
+#include "paddle/fluid/inference/capi_exp/pd_inference_api.h"
+#include "paddle/fluid/inference/tests/api/tester_helper.h"
+
+namespace paddle {
+namespace inference {
+namespace analysis {
+
+TEST(PD_Config, interface) {
+  std::string model_dir = FLAGS_infer_model + "/mobilenet";
+  std::string prog_file = model_dir + "/__model__";
+  std::string param_file = model_dir + "/__params__";
+  std::string opt_cache_dir = FLAGS_infer_model + "/OptimCacheDir";
+
+  PD_Config* config = PD_ConfigCreate();
+  PD_ConfigSetModelDir(config, model_dir.c_str());
+  std::string model_dir_ = PD_ConfigGetModelDir(config);
+  EXPECT_EQ(model_dir, model_dir_);
+
+  PD_ConfigSetModel(config, prog_file.c_str(), param_file.c_str());
+  PD_ConfigSetProgFile(config, prog_file.c_str());
+  PD_ConfigSetParamsFile(config, param_file.c_str());
+  PD_ConfigSetOptimCacheDir(config, opt_cache_dir.c_str());
+  std::string prog_file_ = PD_ConfigGetProgFile(config);
+  std::string param_file_ = PD_ConfigGetParamsFile(config);
+  EXPECT_EQ(prog_file, prog_file_);
+  EXPECT_EQ(param_file, param_file_);
+
+  PD_ConfigDisableFCPadding(config);
+  bool fc_padding = PD_ConfigUseFcPadding(config);
+  EXPECT_FALSE(fc_padding);
+
+  PD_ConfigDisableGpu(config);
+  PD_ConfigSwitchIrOptim(config, TRUE);
+  bool ir_optim = PD_ConfigIrOptim(config);
+  EXPECT_TRUE(ir_optim);
+
+#ifndef PADDLE_WITH_LITE
+  PD_ConfigEnableLiteEngine(config, PD_PRECISION_FLOAT32, TRUE, 0, nullptr, 0,
+                            nullptr);
+  bool lite_enabled = PD_ConfigLiteEngineEnabled(config);
+  EXPECT_TRUE(lite_enabled);
+#endif
+
+  PD_ConfigSwitchIrDebug(config, TRUE);
+#ifdef PADDLE_WITH_MKLDNN
+  const char* ops_name = "conv_2d";
+  PD_ConfigEnableMKLDNN(config);
+  PD_ConfigSetMkldnnOp(config, 1, &ops_name);
+  PD_ConfigSetMkldnnCacheCapacity(config, 100);
+  bool mkldnn_enabled = PD_ConfigMkldnnEnabled(config);
+  EXPECT_TRUE(mkldnn_enabled);
+
+  PD_ConfigSetCpuMathLibraryNumThreads(config, 10);
+  int32_t cpu_threads = PD_ConfigGetCpuMathLibraryNumThreads(config);
+  EXPECT_EQ(cpu_threads, 10);
+
+  PD_ConfigEnableMkldnnQuantizer(config);
+  bool mkldnn_qt_enabled = PD_ConfigMkldnnQuantizerEnabled(config);
+  EXPECT_TRUE(mkldnn_qt_enabled);
+
+  PD_ConfigEnableMkldnnBfloat16(config);
+  PD_ConfigSetBfloat16Op(config, 1, &ops_name);
+  bool mkldnn_bf16_enabled = PD_ConfigMkldnnBfloat16Enabled(config);
+  EXPECT_TRUE(mkldnn_bf16_enabled);
+#endif
+
+  PD_ConfigEnableMemoryOptim(config);
+  bool memory_enabled = PD_ConfigMemoryOptimEnabled(config);
+  EXPECT_TRUE(memory_enabled);
+
+  PD_ConfigEnableProfile(config);
+  bool profile_enabled = PD_ConfigProfileEnabled(config);
+  EXPECT_TRUE(profile_enabled);
+
+  PD_ConfigDisableGlogInfo(config);
+  bool glog_diabled = PD_ConfigGlogInfoDisabled(config);
+  EXPECT_TRUE(glog_diabled);
+
+  PD_ConfigSetInvalid(config);
+  bool is_valid = PD_ConfigIsValid(config);
+  EXPECT_FALSE(is_valid);
+
+  PD_ConfigPartiallyRelease(config);
+  PD_ConfigDestroy(config);
+}
+
+}  // namespace analysis
+}  // namespace inference
+}  // namespace paddle
--- a/paddle/fluid/inference/tests/api/analyzer_capi_exp_pd_tensor_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_capi_exp_pd_tensor_tester.cc
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <vector>
+#include "paddle/fluid/inference/capi_exp/pd_inference_api.h"
+#include "paddle/fluid/inference/tests/api/tester_helper.h"
+
+namespace paddle {
+namespace inference {
+namespace analysis {
+
+void PD_run() {
+  auto model_dir = FLAGS_infer_model;
+  PD_Config* config = PD_ConfigCreate();
+  PD_ConfigSetModel(config, (model_dir + "/__model__").c_str(),
+                    (model_dir + "/__params__").c_str());
+  PD_Predictor* predictor = PD_PredictorCreate(config);
+  PD_OneDimArrayCstr* input_names = PD_PredictorGetInputNames(predictor);
+  PD_Tensor* tensor =
+      PD_PredictorGetInputHandle(predictor, input_names->data[0]);
+
+  int32_t shapes[4] = {1, 3, 300, 300};
+  std::vector<float> input(1 * 3 * 300 * 300, 0);
+  int32_t size;
+  PD_PlaceType place;
+  PD_TensorReshape(tensor, 4, shapes);
+  PD_TensorCopyFromCpuFloat(tensor, input.data());
+  PD_TensorDataFloat(tensor, &place, &size);
+  PD_TensorMutableDataFloat(tensor, place);
+
+  PD_TwoDimArraySize lod;
+  lod.size = 0;
+  lod.data = NULL;
+  PD_TensorSetLod(tensor, &lod);
+
+  PD_PredictorRun(predictor);
+
+  std::vector<float> out_data;
+  PD_OneDimArrayCstr* output_names = PD_PredictorGetOutputNames(predictor);
+  PD_Tensor* output_tensor =
+      PD_PredictorGetOutputHandle(predictor, output_names->data[0]);
+  PD_OneDimArrayInt32* output_shape = PD_TensorGetShape(output_tensor);
+  int32_t out_num = std::accumulate(output_shape->data,
+                                    output_shape->data + output_shape->size, 1,
+                                    std::multiplies<int32_t>());
+  out_data.resize(out_num);
+  PD_TensorCopyToCpuFloat(output_tensor, out_data.data());
+  LOG(INFO) << "Output tensor name is: " << PD_TensorGetName(output_tensor);
+  PD_DataType data_type = PD_TensorGetDataType(output_tensor);
+  EXPECT_EQ(data_type, PD_DATA_FLOAT32);
+
+  PD_TwoDimArraySize* out_lod = PD_TensorGetLod(output_tensor);
+
+  PD_TwoDimArraySizeDestroy(out_lod);
+  PD_OneDimArrayInt32Destroy(output_shape);
+  PD_TensorDestroy(output_tensor);
+  PD_OneDimArrayCstrDestroy(output_names);
+  PD_TensorDestroy(tensor);
+  PD_OneDimArrayCstrDestroy(input_names);
+  PD_PredictorDestroy(predictor);
+}
+TEST(PD_Tensor, PD_run) { PD_run(); }
+
+TEST(PD_Tensor, int32) {
+  auto model_dir = FLAGS_infer_model;
+  PD_Config* config = PD_ConfigCreate();
+  PD_ConfigSetModel(config, (model_dir + "/__model__").c_str(),
+                    (model_dir + "/__params__").c_str());
+  PD_Predictor* predictor = PD_PredictorCreate(config);
+  PD_OneDimArrayCstr* input_names = PD_PredictorGetInputNames(predictor);
+  PD_Tensor* tensor =
+      PD_PredictorGetInputHandle(predictor, input_names->data[0]);
+  int32_t shapes[4] = {1, 3, 300, 300};
+  std::vector<int32_t> input(1 * 3 * 300 * 300, 0);
+  int32_t size;
+  PD_PlaceType place;
+  PD_TensorReshape(tensor, 4, shapes);
+  PD_TensorCopyFromCpuInt32(tensor, input.data());
+  int32_t* data_ptr = PD_TensorDataInt32(tensor, &place, &size);
+  EXPECT_EQ(place, PD_PLACE_CPU);
+  EXPECT_EQ(size, 1 * 3 * 300 * 300);
+  int32_t* mutable_data_ptr = PD_TensorMutableDataInt32(tensor, place);
+  EXPECT_EQ(data_ptr, mutable_data_ptr);
+
+  PD_DataType data_type = PD_TensorGetDataType(tensor);
+  EXPECT_EQ(data_type, PD_DATA_INT32);
+  PD_TensorCopyToCpuInt32(tensor, input.data());
+
+  PD_TensorDestroy(tensor);
+  PD_OneDimArrayCstrDestroy(input_names);
+  PD_PredictorDestroy(predictor);
+}
+
+TEST(PD_Tensor, int64) {
+  auto model_dir = FLAGS_infer_model;
+  PD_Config* config = PD_ConfigCreate();
+  PD_ConfigSetModel(config, (model_dir + "/__model__").c_str(),
+                    (model_dir + "/__params__").c_str());
+  PD_Predictor* predictor = PD_PredictorCreate(config);
+  PD_OneDimArrayCstr* input_names = PD_PredictorGetInputNames(predictor);
+  PD_Tensor* tensor =
+      PD_PredictorGetInputHandle(predictor, input_names->data[0]);
+  int32_t shapes[4] = {1, 3, 300, 300};
+  std::vector<int64_t> input(1 * 3 * 300 * 300, 0);
+  int32_t size;
+  PD_PlaceType place;
+  PD_TensorReshape(tensor, 4, shapes);
+  PD_TensorCopyFromCpuInt64(tensor, input.data());
+  int64_t* data_ptr = PD_TensorDataInt64(tensor, &place, &size);
+  EXPECT_EQ(place, PD_PLACE_CPU);
+  EXPECT_EQ(size, 1 * 3 * 300 * 300);
+  int64_t* mutable_data_ptr = PD_TensorMutableDataInt64(tensor, place);
+  EXPECT_EQ(data_ptr, mutable_data_ptr);
+
+  PD_DataType data_type = PD_TensorGetDataType(tensor);
+  EXPECT_EQ(data_type, PD_DATA_INT64);
+  PD_TensorCopyToCpuInt64(tensor, input.data());
+
+  PD_TensorDestroy(tensor);
+  PD_OneDimArrayCstrDestroy(input_names);
+  PD_PredictorDestroy(predictor);
+}
+
+TEST(PD_Tensor, uint8) {
+  auto model_dir = FLAGS_infer_model;
+  PD_Config* config = PD_ConfigCreate();
+  PD_ConfigSetModel(config, (model_dir + "/__model__").c_str(),
+                    (model_dir + "/__params__").c_str());
+  PD_Predictor* predictor = PD_PredictorCreate(config);
+  PD_OneDimArrayCstr* input_names = PD_PredictorGetInputNames(predictor);
+  PD_Tensor* tensor =
+      PD_PredictorGetInputHandle(predictor, input_names->data[0]);
+  int32_t shapes[4] = {1, 3, 300, 300};
+  uint8_t input[1 * 3 * 300 * 300] = {0};
+  int32_t size;
+  PD_PlaceType place;
+  PD_TensorReshape(tensor, 4, shapes);
+  PD_TensorCopyFromCpuUint8(tensor, input);
+  uint8_t* data_ptr = PD_TensorDataUint8(tensor, &place, &size);
+  EXPECT_EQ(place, PD_PLACE_CPU);
+  EXPECT_EQ(size, 1 * 3 * 300 * 300);
+  uint8_t* mutable_data_ptr = PD_TensorMutableDataUint8(tensor, place);
+  EXPECT_EQ(data_ptr, mutable_data_ptr);
+
+  PD_DataType data_type = PD_TensorGetDataType(tensor);
+  EXPECT_EQ(data_type, PD_DATA_UINT8);
+  PD_TensorCopyToCpuUint8(tensor, input);
+
+  PD_TensorDestroy(tensor);
+  PD_OneDimArrayCstrDestroy(input_names);
+  PD_PredictorDestroy(predictor);
+}
+
+std::string read_file(std::string filename) {
+  std::ifstream file(filename);
+  return std::string((std::istreambuf_iterator<char>(file)),
+                     std::istreambuf_iterator<char>());
+}
+
+TEST(PD_Tensor, from_buffer) {
+  PD_Config* config = PD_ConfigCreate();
+  std::string prog_file = FLAGS_infer_model + "/__model__";
+  std::string params_file = FLAGS_infer_model + "/__params__";
+
+  std::string prog_str = read_file(prog_file);
+  std::string params_str = read_file(params_file);
+
+  PD_ConfigSetModelBuffer(config, prog_str.c_str(), prog_str.size(),
+                          params_str.c_str(), params_str.size());
+
+  bool model_from_memory = PD_ConfigModelFromMemory(config);
+  EXPECT_TRUE(model_from_memory);
+  PD_ConfigDestroy(config);
+}
+
+}  // namespace analysis
+}  // namespace inference
+}  // namespace paddle
--- a/paddle/fluid/inference/tests/api/analyzer_capi_exp_pd_threads_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_capi_exp_pd_threads_tester.cc
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <vector>
+#include "paddle/fluid/inference/capi_exp/pd_inference_api.h"
+#include "paddle/fluid/inference/tests/api/tester_helper.h"
+
+namespace paddle {
+namespace inference {
+namespace analysis {
+
+typedef struct RunParameter {
+  PD_Predictor* predictor;
+  int32_t* shapes;
+  size_t shape_size;
+  float* input_data;
+  int32_t out_size;
+  float* out_data;
+  int32_t thread_index;
+} RunParameter;
+
+void* run(void* thread_param) {
+  struct RunParameter* param = (struct RunParameter*)thread_param;
+  LOG(INFO) << "Thread " << param->thread_index << " start run!";
+  PD_OneDimArrayCstr* input_names = PD_PredictorGetInputNames(param->predictor);
+  PD_Tensor* tensor =
+      PD_PredictorGetInputHandle(param->predictor, input_names->data[0]);
+  PD_TensorReshape(tensor, param->shape_size, param->shapes);
+  PD_TensorCopyFromCpuFloat(tensor, param->input_data);
+  PD_PredictorRun(param->predictor);
+  PD_OneDimArrayCstr* output_names =
+      PD_PredictorGetOutputNames(param->predictor);
+  PD_Tensor* output_tensor =
+      PD_PredictorGetOutputHandle(param->predictor, output_names->data[0]);
+  PD_OneDimArrayInt32* output_shape = PD_TensorGetShape(output_tensor);
+  param->out_size = 1;
+  for (size_t index = 0; index < output_shape->size; ++index) {
+    param->out_size = param->out_size * output_shape->data[index];
+  }
+  PD_OneDimArrayInt32Destroy(output_shape);
+  param->out_data =
+      reinterpret_cast<float*>(malloc(param->out_size * sizeof(float)));
+  PD_TensorCopyToCpuFloat(output_tensor, param->out_data);
+  PD_TensorDestroy(output_tensor);
+  PD_OneDimArrayCstrDestroy(output_names);
+  PD_TensorDestroy(tensor);
+  PD_OneDimArrayCstrDestroy(input_names);
+  LOG(INFO) << "Thread " << param->thread_index << " end run!";
+  return NULL;
+}
+void threads_run(int thread_num) {
+  auto model_dir = FLAGS_infer_model;
+  PD_Config* config = PD_ConfigCreate();
+  PD_ConfigSetModel(config, (model_dir + "/__model__").c_str(),
+                    (model_dir + "/__params__").c_str());
+  PD_Predictor* predictor = PD_PredictorCreate(config);
+
+  pthread_t* threads =
+      reinterpret_cast<pthread_t*>(malloc(thread_num * sizeof(pthread_t)));
+  RunParameter* params = reinterpret_cast<RunParameter*>(
+      malloc(thread_num * sizeof(RunParameter)));
+  int32_t shapes[4] = {1, 3, 300, 300};
+  float* input =
+      reinterpret_cast<float*>(malloc(1 * 3 * 300 * 300 * sizeof(float)));
+  memset(input, 0, 1 * 3 * 300 * 300 * sizeof(float));
+  for (int i = 0; i < thread_num; ++i) {
+    params[i].predictor = PD_PredictorClone(predictor);
+    params[i].shapes = shapes;
+    params[i].shape_size = 4;
+    params[i].input_data = input;
+    params[i].out_size = 0;
+    params[i].out_data = NULL;
+    params[i].thread_index = i;
+    pthread_create(&(threads[i]), NULL, run, (params + i));
+  }
+  for (int i = 0; i < thread_num; ++i) {
+    pthread_join(threads[i], NULL);
+  }
+  ASSERT_GT(params[0].out_size, 0);
+
+  for (int i = 1; i < thread_num; ++i) {
+    ASSERT_EQ(params[i].out_size, params[0].out_size);
+    for (int j = 0; j < params[i].out_size; ++j) {
+      ASSERT_EQ(params[i].out_data[j], params[0].out_data[j]);
+    }
+  }
+  for (int i = 0; i < thread_num; ++i) {
+    PD_PredictorDestroy(params[i].predictor);
+    free(params[i].out_data);
+  }
+  free(input);
+  free(params);
+  free(threads);
+  PD_PredictorDestroy(predictor);
+}
+
+TEST(PD_Predictor, PD_multi_threads_run) { threads_run(10); }
+
+}  // namespace analysis
+}  // namespace inference
+}  // namespace paddle
--- a/paddle/fluid/inference/tests/api/analyzer_capi_exp_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_capi_exp_tester.cc
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <string>
+#include <vector>
+
+#include "paddle/fluid/inference/capi_exp/pd_inference_api.h"
+#include "paddle/fluid/inference/tests/api/tester_helper.h"
+
+namespace paddle {
+namespace inference {
+namespace analysis {
+
+void predictor_run() {
+  std::string model_dir = FLAGS_infer_model;
+  std::string prog_file = model_dir + "/model";
+  std::string params_file = model_dir + "/params";
+  PD_Config *config = PD_ConfigCreate();
+  PD_ConfigDisableGpu(config);
+  PD_ConfigSetCpuMathLibraryNumThreads(config, 10);
+  PD_ConfigSwitchIrDebug(config, TRUE);
+  PD_ConfigSetModel(config, prog_file.c_str(), params_file.c_str());
+
+  PD_Predictor *predictor = PD_PredictorCreate(config);
+  PD_Tensor *tensor = PD_PredictorGetInputHandle(predictor, "data");
+
+  const int batch_size = 1;
+  const int channels = 3;
+  const int height = 318;
+  const int width = 318;
+  float *input = new float[batch_size * channels * height * width]();
+
+  int32_t shape[4] = {batch_size, channels, height, width};
+  PD_TensorReshape(tensor, 4, shape);
+  PD_TensorCopyFromCpuFloat(tensor, input);
+  EXPECT_TRUE(PD_PredictorRun(predictor));
+
+  delete[] input;
+  PD_TensorDestroy(tensor);
+  PD_PredictorDestroy(predictor);
+}
+
+TEST(PD_PredictorRun, predictor_run) { predictor_run(); }
+
+#ifdef PADDLE_WITH_MKLDNN
+TEST(PD_Config, profile_mkldnn) {
+  std::string model_dir = FLAGS_infer_model;
+  std::string prog_file = model_dir + "/model";
+  std::string params_file = model_dir + "/params";
+  PD_Config *config = PD_ConfigCreate();
+  PD_ConfigDisableGpu(config);
+  PD_ConfigSetCpuMathLibraryNumThreads(config, 10);
+  PD_ConfigSwitchIrDebug(config, TRUE);
+  PD_ConfigEnableMKLDNN(config);
+  bool mkldnn_enable = PD_ConfigMkldnnEnabled(config);
+  EXPECT_TRUE(mkldnn_enable);
+  PD_ConfigEnableMkldnnQuantizer(config);
+  bool quantizer_enable = PD_ConfigMkldnnQuantizerEnabled(config);
+  EXPECT_TRUE(quantizer_enable);
+  PD_ConfigEnableMkldnnBfloat16(config);
+  PD_ConfigSetMkldnnCacheCapacity(config, 0);
+  PD_ConfigSetModel(config, prog_file.c_str(), params_file.c_str());
+  PD_ConfigDestroy(config);
+}
+#endif
+
+}  // namespace analysis
+}  // namespace inference
+}  // namespace paddle
--- a/paddle/fluid/inference/tests/api/analyzer_capi_exp_xpu_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_capi_exp_xpu_tester.cc
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string>
+#include <vector>
+#include "paddle/fluid/inference/capi_exp/pd_inference_api.h"
+#include "paddle/fluid/inference/tests/api/tester_helper.h"
+
+namespace paddle {
+namespace inference {
+namespace analysis {
+
+#ifdef PADDLE_WITH_XPU
+TEST(PD_Config, use_xpu) {
+  std::string model_dir = FLAGS_infer_model + "/mobilenet";
+  PD_Config *config = PD_Config();
+  PD_ConfigSwitchIrDebug(config, TRUE);
+  PD_ConfigSetModelDir(config, model_dir.c_str());
+  PD_ConfigSetOptimCacheDir(config,
+                            (FLAGS_infer_model + "/OptimCacheDir").c_str());
+  const char *model_dir_ = PD_ConfigGetModelDir(config);
+  LOG(INFO) << model_dir_;
+  PD_ConfigEnableXpu(config, 0xfffc00);
+  bool use_xpu = PD_ConfigUseXpu(config);
+  EXPECT_TRUE(use_xpu);
+  int32_t device_id = PD_ConfigXpuDeviceId(config);
+  EXPECT_EQ(devive_id, 0);
+  PD_ConfigSwitchIrOptim(config, TRUE);
+  bool ir_optim = PD_IrOptim(config);
+  EXPECT_TRUE(ir_optim);
+  PD_ConfigEnableMemoryOptim(config);
+  bool memory_optim_enable = PD_ConfigMemoryOptimEnabled(config);
+  EXPECT_TRUE(memory_optim_enable);
+  PD_ConfigEnableProfile(config);
+  bool profiler_enable = PD_ConfigProfileEnabled(config);
+  EXPECT_TRUE(profiler_enable);
+  PD_SetInValid(config);
+  bool is_valid = PD_ConfigIsValid(config);
+  EXPECT_FALSE(is_valid);
+  PD_ConfigDestroy(config);
+}
+#endif
+
+}  // namespace analysis
+}  // namespace inference
+}  // namespace paddle