inference_api.cc 52.1 KB
Newer Older
F
flame 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/pybind/inference_api.h"
16

17
#include <pybind11/functional.h>
18
#include <pybind11/numpy.h>
F
flame 已提交
19
#include <pybind11/stl.h>
20

F
flame 已提交
21
#include <cstring>
22
#include <functional>
F
flame 已提交
23
#include <iostream>
24
#include <iterator>
25
#include <map>
26
#include <memory>
F
flame 已提交
27
#include <string>
28
#include <type_traits>
29
#include <unordered_set>
30
#include <utility>
F
flame 已提交
31
#include <vector>
32

F
flame 已提交
33
#include "paddle/fluid/inference/api/analysis_predictor.h"
34
#include "paddle/fluid/inference/api/helper.h"
35
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
36
#include "paddle/fluid/inference/api/paddle_infer_contrib.h"
F
flame 已提交
37
#include "paddle/fluid/inference/api/paddle_inference_api.h"
38
#include "paddle/fluid/inference/api/paddle_pass_builder.h"
39
#include "paddle/fluid/inference/api/paddle_tensor.h"
40
#include "paddle/fluid/inference/utils/io_utils.h"
41 42 43
#include "paddle/fluid/pybind/eager.h"
#include "paddle/fluid/pybind/eager_utils.h"
#include "paddle/phi/api/include/tensor.h"
44
#include "paddle/phi/core/compat/convert_utils.h"
F
flame 已提交
45

46 47 48 49
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/phi/core/cuda_stream.h"
#endif

50 51 52 53
#ifdef PADDLE_WITH_ONNXRUNTIME
#include "paddle/fluid/inference/api/onnxruntime_predictor.h"
#endif

F
flame 已提交
54 55
namespace py = pybind11;

56 57 58 59 60 61 62 63 64 65 66 67 68
namespace pybind11 {
namespace detail {

// Note: use same enum number of float16 in numpy.
// import numpy as np
// print np.dtype(np.float16).num  # 23
constexpr int NPY_FLOAT16_ = 23;
constexpr int NPY_UINT16_ = 4;

// Note: Since float16 is not a builtin type in C++, we register
// paddle::platform::float16 as numpy.float16.
// Ref: https://github.com/pybind/pybind11/issues/1776
template <>
69
struct npy_format_descriptor<phi::dtype::float16> {
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
  static py::dtype dtype() {
    handle ptr = npy_api::get().PyArray_DescrFromType_(NPY_FLOAT16_);
    return reinterpret_borrow<py::dtype>(ptr);
  }
  static std::string format() {
    // Note: "e" represents float16.
    // Details at:
    // https://docs.python.org/3/library/struct.html#format-characters.
    return "e";
  }
  static constexpr auto name = _("float16");
};

}  // namespace detail
}  // namespace pybind11

F
flame 已提交
86 87
namespace paddle {
namespace pybind {
88 89 90
using paddle::AnalysisPredictor;
using paddle::NativeConfig;
using paddle::NativePaddlePredictor;
F
flame 已提交
91
using paddle::PaddleBuf;
92
using paddle::PaddleDataLayout;
93
using paddle::PaddleDType;
94
using paddle::PaddlePassBuilder;
F
flame 已提交
95 96
using paddle::PaddlePlace;
using paddle::PaddlePredictor;
97 98 99
using paddle::PaddleTensor;
using paddle::PassStrategy;
using paddle::ZeroCopyTensor;
F
flame 已提交
100

101 102
namespace {
void BindPaddleDType(py::module *m);
103
void BindPaddleDataLayout(py::module *m);
104 105 106 107 108 109
void BindPaddleBuf(py::module *m);
void BindPaddleTensor(py::module *m);
void BindPaddlePlace(py::module *m);
void BindPaddlePredictor(py::module *m);
void BindNativeConfig(py::module *m);
void BindNativePredictor(py::module *m);
110
void BindLiteNNAdapterConfig(py::module *m);
Z
zhupengyang 已提交
111
void BindXpuConfig(py::module *m);
112 113
void BindAnalysisConfig(py::module *m);
void BindAnalysisPredictor(py::module *m);
114 115
void BindZeroCopyTensor(py::module *m);
void BindPaddlePassBuilder(py::module *m);
W
Wilber 已提交
116 117 118
void BindPaddleInferPredictor(py::module *m);
void BindPaddleInferTensor(py::module *m);
void BindPredictorPool(py::module *m);
F
flame 已提交
119

120
#ifdef PADDLE_WITH_DNNL
121
void BindMkldnnQuantizerConfig(py::module *m);
122
#endif
123 124

template <typename T>
125
PaddleBuf PaddleBufCreate(py::array_t<T, py::array::c_style> data) {
126
  PaddleBuf buf(data.size() * sizeof(T));
W
Wilber 已提交
127 128
  std::copy_n(static_cast<const T *>(data.data()),
              data.size(),
129 130 131 132 133
              static_cast<T *>(buf.data()));
  return buf;
}

template <typename T>
134 135
void PaddleBufReset(PaddleBuf &buf,                             // NOLINT
                    py::array_t<T, py::array::c_style> data) {  // NOLINT
136
  buf.Resize(data.size() * sizeof(T));
W
Wilber 已提交
137 138
  std::copy_n(static_cast<const T *>(data.data()),
              data.size(),
139 140 141 142 143
              static_cast<T *>(buf.data()));
}

template <typename T>
PaddleTensor PaddleTensorCreate(
144
    py::array_t<T, py::array::c_style> data,
145
    const std::string name = "",
W
Wilber 已提交
146 147
    const std::vector<std::vector<size_t>> &lod = {},
    bool copy = true) {
148 149 150 151
  PaddleTensor tensor;

  if (copy) {
    PaddleBuf buf(data.size() * sizeof(T));
W
Wilber 已提交
152 153
    std::copy_n(static_cast<const T *>(data.data()),
                data.size(),
154 155 156 157 158 159
                static_cast<T *>(buf.data()));
    tensor.data = std::move(buf);
  } else {
    tensor.data = PaddleBuf(data.mutable_data(), data.size() * sizeof(T));
  }

160
  tensor.dtype = inference::PaddleTensorGetDType<T>();
161 162 163 164 165 166 167 168
  tensor.name = name;
  tensor.lod = lod;
  tensor.shape.resize(data.ndim());
  std::copy_n(data.shape(), data.ndim(), tensor.shape.begin());

  return tensor;
}

169
py::dtype PaddleDTypeToNumpyDType(PaddleDType dtype) {
170
  py::dtype dt;
171
  switch (dtype) {
172 173 174 175 176 177
    case PaddleDType::INT32:
      dt = py::dtype::of<int32_t>();
      break;
    case PaddleDType::INT64:
      dt = py::dtype::of<int64_t>();
      break;
178 179 180
    case PaddleDType::FLOAT64:
      dt = py::dtype::of<double>();
      break;
181 182 183
    case PaddleDType::FLOAT32:
      dt = py::dtype::of<float>();
      break;
184
    case PaddleDType::FLOAT16:
185
      dt = py::dtype::of<phi::dtype::float16>();
186
      break;
W
Wilber 已提交
187 188 189
    case PaddleDType::UINT8:
      dt = py::dtype::of<uint8_t>();
      break;
190 191 192 193 194
    case PaddleDType::INT8:
      dt = py::dtype::of<int8_t>();
      break;
    case PaddleDType::BOOL:
      dt = py::dtype::of<bool>();
195
      break;
196
    default:
197
      PADDLE_THROW(platform::errors::Unimplemented(
198 199
          "Unsupported data type. Now only supports INT32, INT64, FLOAT64, "
          "FLOAT32, FLOAT16, INT8, UINT8 and BOOL."));
200
  }
201 202 203 204 205 206 207 208 209 210

  return dt;
}

py::array PaddleTensorGetData(PaddleTensor &tensor) {  // NOLINT
  py::dtype dt = PaddleDTypeToNumpyDType(tensor.dtype);
  return py::array(std::move(dt), {tensor.shape}, tensor.data.data());
}

template <typename T>
211 212
void ZeroCopyTensorCreate(ZeroCopyTensor &tensor,  // NOLINT
                          py::array_t<T, py::array::c_style> data) {
213 214 215 216 217 218
  std::vector<int> shape;
  std::copy_n(data.shape(), data.ndim(), std::back_inserter(shape));
  tensor.Reshape(std::move(shape));
  tensor.copy_from_cpu(static_cast<const T *>(data.data()));
}

S
Steffy-zxf 已提交
219 220 221 222 223 224 225 226 227 228 229 230
/// \brief Experimental interface.
/// Create the Strings tensor from data.
/// \param tensor The tensor will be created and
/// the tensor value is same as data.
/// \param data The input text.
void ZeroCopyStringTensorCreate(ZeroCopyTensor &tensor,  // NOLINT
                                const paddle_infer::Strings *data) {
  size_t shape = data->size();
  tensor.ReshapeStrings(shape);
  tensor.copy_strings_from_cpu(data);
}

W
Wilber 已提交
231
template <typename T>
232 233
void PaddleInferTensorCreate(paddle_infer::Tensor &tensor,  // NOLINT
                             py::array_t<T, py::array::c_style> data) {
W
Wilber 已提交
234 235 236 237 238 239
  std::vector<int> shape;
  std::copy_n(data.shape(), data.ndim(), std::back_inserter(shape));
  tensor.Reshape(std::move(shape));
  tensor.CopyFromCpu(static_cast<const T *>(data.data()));
}

240 241 242 243 244 245
paddle_infer::PlaceType ToPaddleInferPlace(
    phi::AllocationType allocation_type) {
  if (allocation_type == phi::AllocationType::CPU) {
    return paddle_infer::PlaceType::kCPU;
  } else if (allocation_type == phi::AllocationType::GPU) {
    return paddle_infer::PlaceType::kGPU;
246 247
  } else if (allocation_type == phi::AllocationType::XPU) {
    return paddle_infer::PlaceType::kXPU;
248 249 250 251 252 253
  } else {
    return paddle_infer::PlaceType::kCPU;
  }
}

void PaddleInferShareExternalData(paddle_infer::Tensor &tensor,  // NOLINT
254
                                  phi::DenseTensor input_tensor) {
255 256 257 258
  std::vector<int> shape;
  for (int i = 0; i < input_tensor.dims().size(); ++i) {
    shape.push_back(input_tensor.dims()[i]);
  }
259 260 261 262 263 264
  if (input_tensor.dtype() == phi::DataType::FLOAT64) {
    tensor.ShareExternalData(
        static_cast<double *>(input_tensor.data()),
        shape,
        ToPaddleInferPlace(input_tensor.place().GetType()));
  } else if (input_tensor.dtype() == phi::DataType::FLOAT32) {
265
    tensor.ShareExternalData(
W
Wilber 已提交
266 267
        static_cast<float *>(input_tensor.data()),
        shape,
268 269 270
        ToPaddleInferPlace(input_tensor.place().GetType()));
  } else if (input_tensor.dtype() == phi::DataType::FLOAT16) {
    tensor.ShareExternalData(
271
        static_cast<phi::dtype::float16 *>(input_tensor.data()),
W
Wilber 已提交
272
        shape,
273
        ToPaddleInferPlace(input_tensor.place().GetType()));
274 275 276 277 278 279 280 281 282 283
  } else if (input_tensor.dtype() == phi::DataType::BFLOAT16) {
    tensor.ShareExternalData(
        static_cast<bfloat16 *>(input_tensor.data()),
        shape,
        ToPaddleInferPlace(input_tensor.place().GetType()));
  } else if (input_tensor.dtype() == phi::DataType::BOOL) {
    tensor.ShareExternalData(
        static_cast<bool *>(input_tensor.data()),
        shape,
        ToPaddleInferPlace(input_tensor.place().GetType()));
284 285 286 287 288 289 290 291 292 293 294 295 296
  } else if (input_tensor.dtype() == phi::DataType::INT32) {
    tensor.ShareExternalData(
        static_cast<int32_t *>(input_tensor.data()),
        shape,
        ToPaddleInferPlace(input_tensor.place().GetType()));
  } else if (input_tensor.dtype() == phi::DataType::INT64) {
    tensor.ShareExternalData(
        static_cast<int64_t *>(input_tensor.data()),
        shape,
        ToPaddleInferPlace(input_tensor.place().GetType()));
  } else {
    PADDLE_THROW(platform::errors::Unimplemented(
        "Unsupported data type. Now share_external_data only supports INT32, "
297
        "INT64, FLOAT64, FLOAT32, FLOAT16, BFLOAT16 and BOOL."));
298 299 300
  }
}

301 302
void PaddleTensorShareExternalData(paddle_infer::Tensor &tensor,  // NOLINT
                                   paddle::Tensor &&paddle_tensor) {
303 304 305 306
  std::vector<int> shape;
  for (int i = 0; i < paddle_tensor.dims().size(); ++i) {
    shape.push_back(paddle_tensor.dims()[i]);
  }
307 308 309 310 311 312 313

  if (paddle_tensor.dtype() == phi::DataType::FLOAT64) {
    tensor.ShareExternalData(
        static_cast<double *>(paddle_tensor.data<double>()),
        shape,
        ToPaddleInferPlace(paddle_tensor.place().GetType()));
  } else if (paddle_tensor.dtype() == phi::DataType::FLOAT32) {
314 315 316 317
    tensor.ShareExternalData(
        static_cast<float *>(paddle_tensor.data<float>()),
        shape,
        ToPaddleInferPlace(paddle_tensor.place().GetType()));
318
  } else if (paddle_tensor.dtype() == phi::DataType::FLOAT16) {
319 320 321 322 323
    tensor.ShareExternalData(
        static_cast<paddle::platform::float16 *>(
            paddle_tensor.data<paddle::platform::float16>()),
        shape,
        ToPaddleInferPlace(paddle_tensor.place().GetType()));
324 325 326 327 328 329 330 331 332 333
  } else if (paddle_tensor.dtype() == phi::DataType::BFLOAT16) {
    tensor.ShareExternalData(
        static_cast<bfloat16 *>(paddle_tensor.data<bfloat16>()),
        shape,
        ToPaddleInferPlace(paddle_tensor.place().GetType()));
  } else if (paddle_tensor.dtype() == phi::DataType::BOOL) {
    tensor.ShareExternalData(
        static_cast<bool *>(paddle_tensor.data<bool>()),
        shape,
        ToPaddleInferPlace(paddle_tensor.place().GetType()));
334
  } else if (paddle_tensor.dtype() == phi::DataType::INT32) {
335 336 337 338
    tensor.ShareExternalData(
        static_cast<int32_t *>(paddle_tensor.data<int32_t>()),
        shape,
        ToPaddleInferPlace(paddle_tensor.place().GetType()));
339
  } else if (paddle_tensor.dtype() == phi::DataType::INT64) {
340 341 342 343 344 345 346
    tensor.ShareExternalData(
        static_cast<int64_t *>(paddle_tensor.data<int64_t>()),
        shape,
        ToPaddleInferPlace(paddle_tensor.place().GetType()));
  } else {
    PADDLE_THROW(platform::errors::Unimplemented(
        "Unsupported data type. Now share_external_data only supports INT32, "
347
        "INT64, FLOAT32, FLOAT16, BFLOAT16 and BOOL."));
348 349 350
  }
}

S
Steffy-zxf 已提交
351 352 353 354 355 356 357 358 359 360 361 362 363
/// \brief Experimental interface.
/// Create the Strings tensor from data.
/// \param tensor The tensor will be created and
/// the tensor value is same as data.
/// \param data The input text.
void PaddleInferStringTensorCreate(paddle_infer::Tensor &tensor,  // NOLINT
                                   const paddle_infer::Strings *data) {
  VLOG(3) << "Create PaddleInferTensor, dtype = Strings ";
  size_t shape = data->size();
  tensor.ReshapeStrings(shape);
  tensor.CopyStringsFromCpu(data);
}

364 365 366 367 368 369 370 371 372
size_t PaddleGetDTypeSize(PaddleDType dt) {
  size_t size{0};
  switch (dt) {
    case PaddleDType::INT32:
      size = sizeof(int32_t);
      break;
    case PaddleDType::INT64:
      size = sizeof(int64_t);
      break;
373 374 375
    case PaddleDType::FLOAT64:
      size = sizeof(double);
      break;
376 377 378
    case PaddleDType::FLOAT32:
      size = sizeof(float);
      break;
379
    case PaddleDType::FLOAT16:
380
      size = sizeof(phi::dtype::float16);
381 382 383 384 385 386 387 388 389 390
      break;
    case PaddleDType::INT8:
      size = sizeof(int8_t);
      break;
    case PaddleDType::UINT8:
      size = sizeof(uint8_t);
      break;
    case PaddleDType::BOOL:
      size = sizeof(bool);
      break;
391
    default:
392
      PADDLE_THROW(platform::errors::Unimplemented(
393 394
          "Unsupported data t ype. Now only supports INT32, INT64, FLOAT64, "
          "FLOAT32, FLOAT16, INT8, UINT8 and BOOL."));
395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411
  }
  return size;
}

py::array ZeroCopyTensorToNumpy(ZeroCopyTensor &tensor) {  // NOLINT
  py::dtype dt = PaddleDTypeToNumpyDType(tensor.type());
  auto tensor_shape = tensor.shape();
  py::array::ShapeContainer shape(tensor_shape.begin(), tensor_shape.end());
  py::array array(dt, std::move(shape));

  switch (tensor.type()) {
    case PaddleDType::INT32:
      tensor.copy_to_cpu(static_cast<int32_t *>(array.mutable_data()));
      break;
    case PaddleDType::INT64:
      tensor.copy_to_cpu(static_cast<int64_t *>(array.mutable_data()));
      break;
412 413 414
    case PaddleDType::FLOAT64:
      tensor.copy_to_cpu<double>(static_cast<double *>(array.mutable_data()));
      break;
415 416 417
    case PaddleDType::FLOAT32:
      tensor.copy_to_cpu<float>(static_cast<float *>(array.mutable_data()));
      break;
418
    case PaddleDType::FLOAT16:
419 420
      tensor.copy_to_cpu<phi::dtype::float16>(
          static_cast<phi::dtype::float16 *>(array.mutable_data()));
421
      break;
W
Wilber 已提交
422 423 424
    case PaddleDType::UINT8:
      tensor.copy_to_cpu<uint8_t>(static_cast<uint8_t *>(array.mutable_data()));
      break;
425 426 427
    case PaddleDType::INT8:
      tensor.copy_to_cpu<int8_t>(static_cast<int8_t *>(array.mutable_data()));
      break;
428 429 430
    case PaddleDType::BOOL:
      tensor.copy_to_cpu<bool>(static_cast<bool *>(array.mutable_data()));
      break;
431
    default:
432
      PADDLE_THROW(platform::errors::Unimplemented(
433 434
          "Unsupported data type. Now only supports INT32, INT64, FLOAT64, "
          "FLOAT32, FLOAT16, INT8, UINT8 and BOOL."));
435 436
  }
  return array;
437
}
438

W
Wilber 已提交
439 440 441 442 443 444 445 446 447 448 449 450 451
py::array PaddleInferTensorToNumpy(paddle_infer::Tensor &tensor) {  // NOLINT
  py::dtype dt = PaddleDTypeToNumpyDType(tensor.type());
  auto tensor_shape = tensor.shape();
  py::array::ShapeContainer shape(tensor_shape.begin(), tensor_shape.end());
  py::array array(dt, std::move(shape));

  switch (tensor.type()) {
    case PaddleDType::INT32:
      tensor.CopyToCpu(static_cast<int32_t *>(array.mutable_data()));
      break;
    case PaddleDType::INT64:
      tensor.CopyToCpu(static_cast<int64_t *>(array.mutable_data()));
      break;
452 453 454
    case PaddleDType::FLOAT64:
      tensor.CopyToCpu<double>(static_cast<double *>(array.mutable_data()));
      break;
W
Wilber 已提交
455 456 457
    case PaddleDType::FLOAT32:
      tensor.CopyToCpu<float>(static_cast<float *>(array.mutable_data()));
      break;
458
    case PaddleDType::FLOAT16:
459 460
      tensor.CopyToCpu<phi::dtype::float16>(
          static_cast<phi::dtype::float16 *>(array.mutable_data()));
461
      break;
462 463 464 465 466 467
    case PaddleDType::UINT8:
      tensor.CopyToCpu(static_cast<uint8_t *>(array.mutable_data()));
      break;
    case PaddleDType::INT8:
      tensor.CopyToCpu(static_cast<int8_t *>(array.mutable_data()));
      break;
468 469 470
    case PaddleDType::BOOL:
      tensor.CopyToCpu(static_cast<bool *>(array.mutable_data()));
      break;
W
Wilber 已提交
471 472
    default:
      PADDLE_THROW(platform::errors::Unimplemented(
473 474
          "Unsupported data t ype. Now only supports INT32, INT64, FLOAT64, "
          "FLOAT32, FLOAT16, INT8, UINT8 and BOOL."));
W
Wilber 已提交
475 476 477 478
  }
  return array;
}

479 480 481 482 483
py::bytes SerializePDTensorToBytes(PaddleTensor &tensor) {  // NOLINT
  std::stringstream ss;
  paddle::inference::SerializePDTensorToStream(&ss, tensor);
  return static_cast<py::bytes>(ss.str());
}
484

485
void CopyPaddleInferTensor(paddle_infer::Tensor &dst,  // NOLINT
486 487 488 489
                           const paddle_infer::Tensor &src) {
  return paddle_infer::contrib::TensorUtils::CopyTensor(&dst, src);
}

490
}  // namespace
491

F
flame 已提交
492 493
void BindInferenceApi(py::module *m) {
  BindPaddleDType(m);
494
  BindPaddleDataLayout(m);
F
flame 已提交
495 496 497 498 499 500
  BindPaddleBuf(m);
  BindPaddleTensor(m);
  BindPaddlePlace(m);
  BindPaddlePredictor(m);
  BindNativeConfig(m);
  BindNativePredictor(m);
501
  BindLiteNNAdapterConfig(m);
Z
zhupengyang 已提交
502
  BindXpuConfig(m);
F
flame 已提交
503 504
  BindAnalysisConfig(m);
  BindAnalysisPredictor(m);
W
Wilber 已提交
505
  BindPaddleInferPredictor(m);
506
  BindZeroCopyTensor(m);
W
Wilber 已提交
507
  BindPaddleInferTensor(m);
508
  BindPaddlePassBuilder(m);
W
Wilber 已提交
509
  BindPredictorPool(m);
510
#ifdef PADDLE_WITH_DNNL
511 512
  BindMkldnnQuantizerConfig(m);
#endif
F
flame 已提交
513
  m->def("create_paddle_predictor",
W
Wilber 已提交
514 515
         &paddle::CreatePaddlePredictor<AnalysisConfig>,
         py::arg("config"));
F
flame 已提交
516
  m->def("create_paddle_predictor",
W
Wilber 已提交
517 518
         &paddle::CreatePaddlePredictor<NativeConfig>,
         py::arg("config"));
519 520 521
  m->def("create_predictor",
         [](const paddle_infer::Config &config)
             -> std::unique_ptr<paddle_infer::Predictor> {
522
           auto pred = std::make_unique<paddle_infer::Predictor>(config);
523 524
           return pred;
         });
525 526 527 528 529 530
  m->def(
      "_get_phi_kernel_name",
      [](const std::string &fluid_op_name) {
        return phi::TransToPhiKernelName(fluid_op_name);
      },
      py::return_value_policy::reference);
531
  m->def("copy_tensor", &CopyPaddleInferTensor);
F
flame 已提交
532
  m->def("paddle_dtype_size", &paddle::PaddleDtypeSize);
533
  m->def("paddle_tensor_to_bytes", &SerializePDTensorToBytes);
W
Wilber 已提交
534
  m->def("get_version", &paddle_infer::GetVersion);
535 536
  m->def("get_trt_compile_version", &paddle_infer::GetTrtCompileVersion);
  m->def("get_trt_runtime_version", &paddle_infer::GetTrtRuntimeVersion);
W
Wilber 已提交
537
  m->def("get_num_bytes_of_data_type", &paddle_infer::GetNumBytesOfDataType);
538 539 540 541 542 543 544 545 546 547
  m->def("convert_to_mixed_precision_bind",
         &paddle_infer::ConvertToMixedPrecision,
         py::arg("model_file"),
         py::arg("params_file"),
         py::arg("mixed_model_file"),
         py::arg("mixed_params_file"),
         py::arg("mixed_precision"),
         py::arg("backend"),
         py::arg("keep_io_types") = true,
         py::arg("black_list") = std::unordered_set<std::string>());
F
flame 已提交
548 549
}

550
namespace {
F
flame 已提交
551 552
void BindPaddleDType(py::module *m) {
  py::enum_<PaddleDType>(*m, "PaddleDType")
553
      .value("FLOAT64", PaddleDType::FLOAT64)
F
flame 已提交
554
      .value("FLOAT32", PaddleDType::FLOAT32)
555
      .value("FLOAT16", PaddleDType::FLOAT16)
556
      .value("INT64", PaddleDType::INT64)
557 558 559 560
      .value("INT32", PaddleDType::INT32)
      .value("UINT8", PaddleDType::UINT8)
      .value("INT8", PaddleDType::INT8)
      .value("BOOL", PaddleDType::BOOL);
F
flame 已提交
561 562
}

563 564 565 566 567 568 569 570
void BindPaddleDataLayout(py::module *m) {
  py::enum_<PaddleDataLayout>(*m, "PaddleDataLayout")
      .value("UNK", PaddleDataLayout::kUNK)
      .value("Any", PaddleDataLayout::kAny)
      .value("NHWC", PaddleDataLayout::kNHWC)
      .value("NCHW", PaddleDataLayout::kNCHW);
}

F
flame 已提交
571 572 573 574 575 576
void BindPaddleBuf(py::module *m) {
  py::class_<PaddleBuf>(*m, "PaddleBuf")
      .def(py::init<size_t>())
      .def(py::init([](std::vector<float> &data) {
        auto buf = PaddleBuf(data.size() * sizeof(float));
        std::memcpy(buf.data(), static_cast<void *>(data.data()), buf.length());
G
Gabor Buella 已提交
577
        return buf;
F
flame 已提交
578
      }))
579 580 581
      .def(py::init(&PaddleBufCreate<int32_t>))
      .def(py::init(&PaddleBufCreate<int64_t>))
      .def(py::init(&PaddleBufCreate<float>))
F
flame 已提交
582 583 584 585 586 587
      .def("resize", &PaddleBuf::Resize)
      .def("reset",
           [](PaddleBuf &self, std::vector<float> &data) {
             self.Resize(data.size() * sizeof(float));
             std::memcpy(self.data(), data.data(), self.length());
           })
588 589 590
      .def("reset", &PaddleBufReset<int32_t>)
      .def("reset", &PaddleBufReset<int64_t>)
      .def("reset", &PaddleBufReset<float>)
591
      .def("empty", &PaddleBuf::empty)
592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607
      .def("tolist",
           [](PaddleBuf &self, const std::string &dtype) -> py::list {
             py::list l;
             if (dtype == "int32") {
               auto *data = static_cast<int32_t *>(self.data());
               auto size = self.length() / sizeof(int32_t);
               l = py::cast(std::vector<int32_t>(data, data + size));
             } else if (dtype == "int64") {
               auto *data = static_cast<int64_t *>(self.data());
               auto size = self.length() / sizeof(int64_t);
               l = py::cast(std::vector<int64_t>(data, data + size));
             } else if (dtype == "float32") {
               auto *data = static_cast<float *>(self.data());
               auto size = self.length() / sizeof(float);
               l = py::cast(std::vector<float>(data, data + size));
             } else {
608 609 610
               PADDLE_THROW(platform::errors::Unimplemented(
                   "Unsupported data type. Now only supports INT32, INT64 and "
                   "FLOAT32."));
611 612 613
             }
             return l;
           })
F
flame 已提交
614 615 616 617 618 619 620 621 622 623
      .def("float_data",
           [](PaddleBuf &self) -> std::vector<float> {
             auto *data = static_cast<float *>(self.data());
             return {data, data + self.length() / sizeof(*data)};
           })
      .def("int64_data",
           [](PaddleBuf &self) -> std::vector<int64_t> {
             int64_t *data = static_cast<int64_t *>(self.data());
             return {data, data + self.length() / sizeof(*data)};
           })
624 625 626 627
      .def("int32_data",
           [](PaddleBuf &self) -> std::vector<int32_t> {
             int32_t *data = static_cast<int32_t *>(self.data());
             return {data, data + self.length() / sizeof(*data)};
F
flame 已提交
628 629 630 631 632 633 634
           })
      .def("length", &PaddleBuf::length);
}

void BindPaddleTensor(py::module *m) {
  py::class_<PaddleTensor>(*m, "PaddleTensor")
      .def(py::init<>())
W
Wilber 已提交
635 636
      .def(py::init(&PaddleTensorCreate<int32_t>),
           py::arg("data"),
637 638 639
           py::arg("name") = "",
           py::arg("lod") = std::vector<std::vector<size_t>>(),
           py::arg("copy") = true)
W
Wilber 已提交
640 641
      .def(py::init(&PaddleTensorCreate<int64_t>),
           py::arg("data"),
642 643 644
           py::arg("name") = "",
           py::arg("lod") = std::vector<std::vector<size_t>>(),
           py::arg("copy") = true)
W
Wilber 已提交
645 646
      .def(py::init(&PaddleTensorCreate<float>),
           py::arg("data"),
647 648 649 650
           py::arg("name") = "",
           py::arg("lod") = std::vector<std::vector<size_t>>(),
           py::arg("copy") = true)
      .def("as_ndarray", &PaddleTensorGetData)
F
flame 已提交
651 652 653 654 655 656 657 658 659 660 661
      .def_readwrite("name", &PaddleTensor::name)
      .def_readwrite("shape", &PaddleTensor::shape)
      .def_readwrite("data", &PaddleTensor::data)
      .def_readwrite("dtype", &PaddleTensor::dtype)
      .def_readwrite("lod", &PaddleTensor::lod);
}

void BindPaddlePlace(py::module *m) {
  py::enum_<PaddlePlace>(*m, "PaddlePlace")
      .value("UNK", PaddlePlace::kUNK)
      .value("CPU", PaddlePlace::kCPU)
662
      .value("GPU", PaddlePlace::kGPU)
W
Wilber 已提交
663
      .value("XPU", PaddlePlace::kXPU)
664
      .value("CUSTOM", PaddlePlace::kCUSTOM);
F
flame 已提交
665 666 667 668 669 670 671
}

void BindPaddlePredictor(py::module *m) {
  auto paddle_predictor = py::class_<PaddlePredictor>(*m, "PaddlePredictor");
  paddle_predictor
      .def("run",
           [](PaddlePredictor &self, const std::vector<PaddleTensor> &inputs) {
672 673 674
#if defined(PADDLE_WITH_CUSTOM_DEVICE) && !defined(PADDLE_NO_PYTHON)
             pybind11::gil_scoped_release release;
#endif
F
flame 已提交
675 676 677 678 679 680
             std::vector<PaddleTensor> outputs;
             self.Run(inputs, &outputs);
             return outputs;
           })
      .def("get_input_tensor", &PaddlePredictor::GetInputTensor)
      .def("get_output_tensor", &PaddlePredictor::GetOutputTensor)
681 682
      .def("get_input_names", &PaddlePredictor::GetInputNames)
      .def("get_output_names", &PaddlePredictor::GetOutputNames)
F
flame 已提交
683
      .def("zero_copy_run", &PaddlePredictor::ZeroCopyRun)
684
      .def("clone", [](PaddlePredictor &self) { return self.Clone(nullptr); })
685 686 687
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
      .def("clone",
           [](PaddlePredictor &self, phi::CUDAStream &stream) {
688
             return self.Clone(stream.raw_stream());
689 690
           })
#endif
691
      .def("get_serialized_program", &PaddlePredictor::GetSerializedProgram);
F
flame 已提交
692 693 694 695 696 697 698 699 700 701

  auto config = py::class_<PaddlePredictor::Config>(paddle_predictor, "Config");
  config.def(py::init<>())
      .def_readwrite("model_dir", &PaddlePredictor::Config::model_dir);
}

void BindNativeConfig(py::module *m) {
  py::class_<NativeConfig, PaddlePredictor::Config>(*m, "NativeConfig")
      .def(py::init<>())
      .def_readwrite("use_gpu", &NativeConfig::use_gpu)
702
      .def_readwrite("use_xpu", &NativeConfig::use_xpu)
F
flame 已提交
703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722
      .def_readwrite("device", &NativeConfig::device)
      .def_readwrite("fraction_of_gpu_memory",
                     &NativeConfig::fraction_of_gpu_memory)
      .def_readwrite("prog_file", &NativeConfig::prog_file)
      .def_readwrite("param_file", &NativeConfig::param_file)
      .def_readwrite("specify_input_name", &NativeConfig::specify_input_name)
      .def("set_cpu_math_library_num_threads",
           &NativeConfig::SetCpuMathLibraryNumThreads)
      .def("cpu_math_library_num_threads",
           &NativeConfig::cpu_math_library_num_threads);
}

void BindNativePredictor(py::module *m) {
  py::class_<NativePaddlePredictor, PaddlePredictor>(*m,
                                                     "NativePaddlePredictor")
      .def(py::init<const NativeConfig &>())
      .def("init", &NativePaddlePredictor::Init)
      .def("run",
           [](NativePaddlePredictor &self,
              const std::vector<PaddleTensor> &inputs) {
723 724 725
#if defined(PADDLE_WITH_CUSTOM_DEVICE) && !defined(PADDLE_NO_PYTHON)
             pybind11::gil_scoped_release release;
#endif
F
flame 已提交
726 727 728 729 730 731 732
             std::vector<PaddleTensor> outputs;
             self.Run(inputs, &outputs);
             return outputs;
           })
      .def("get_input_tensor", &NativePaddlePredictor::GetInputTensor)
      .def("get_output_tensor", &NativePaddlePredictor::GetOutputTensor)
      .def("zero_copy_run", &NativePaddlePredictor::ZeroCopyRun)
733 734
      .def("clone",
           [](NativePaddlePredictor &self) { return self.Clone(nullptr); })
735 736 737
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
      .def("clone",
           [](NativePaddlePredictor &self, phi::CUDAStream &stream) {
738
             return self.Clone(stream.raw_stream());
739 740
           })
#endif
W
Wilber 已提交
741 742
      .def("scope",
           &NativePaddlePredictor::scope,
F
flame 已提交
743 744 745 746
           py::return_value_policy::reference);
}

void BindAnalysisConfig(py::module *m) {
747 748 749 750 751
  py::class_<AnalysisConfig> analysis_config(*m, "AnalysisConfig");

  py::enum_<AnalysisConfig::Precision>(analysis_config, "Precision")
      .value("Float32", AnalysisConfig::Precision::kFloat32)
      .value("Int8", AnalysisConfig::Precision::kInt8)
Z
Zhaolong Xing 已提交
752
      .value("Half", AnalysisConfig::Precision::kHalf)
753 754 755
      .value("Bfloat16", AnalysisConfig::Precision::kBf16)
      .export_values();

756 757
  analysis_config.def(py::init<>())
      .def(py::init<const AnalysisConfig &>())
F
flame 已提交
758 759
      .def(py::init<const std::string &>())
      .def(py::init<const std::string &, const std::string &>())
760
      .def("summary", &AnalysisConfig::Summary)
W
Wilber 已提交
761 762 763
      .def("set_model",
           (void(AnalysisConfig::*)(const std::string &)) &
               AnalysisConfig::SetModel)
764 765 766
      .def("set_model",
           (void(AnalysisConfig::*)(const std::string &, const std::string &)) &
               AnalysisConfig::SetModel)
F
flame 已提交
767 768 769 770 771
      .def("set_prog_file", &AnalysisConfig::SetProgFile)
      .def("set_params_file", &AnalysisConfig::SetParamsFile)
      .def("model_dir", &AnalysisConfig::model_dir)
      .def("prog_file", &AnalysisConfig::prog_file)
      .def("params_file", &AnalysisConfig::params_file)
W
Wilber 已提交
772 773 774
      .def("enable_use_gpu",
           &AnalysisConfig::EnableUseGpu,
           py::arg("memory_pool_init_size_mb"),
775 776
           py::arg("device_id") = 0,
           py::arg("precision_mode") = AnalysisConfig::Precision::kFloat32)
777
      .def("exp_enable_use_cutlass", &AnalysisConfig::Exp_EnableUseCutlass)
778 779
      .def("exp_disable_mixed_precision_ops",
           &AnalysisConfig::Exp_DisableMixedPrecisionOps)
780 781 782 783 784 785
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
      .def("set_exec_stream",
           [](AnalysisConfig &self, phi::CUDAStream &stream) {
             self.SetExecStream(stream.raw_stream());
           })
#endif
W
Wilber 已提交
786 787
      .def("enable_xpu",
           &AnalysisConfig::EnableXpu,
Z
zhupengyang 已提交
788 789 790 791 792 793
           py::arg("l3_size") = 16 * 1024 * 1024,
           py::arg("l3_locked") = false,
           py::arg("conv_autotune") = true,
           py::arg("conv_autotune_file") = "",
           py::arg("transformer_encoder_precision") = "int16",
           py::arg("transformer_encoder_adaptive_seqlen") = false,
794
           py::arg("enable_multi_stream") = false)
W
Wilber 已提交
795 796
      .def("set_xpu_device_id",
           &AnalysisConfig::SetXpuDeviceId,
797
           py::arg("device_id") = 0)
Z
zhupengyang 已提交
798 799 800 801 802
      .def("set_xpu_config",
           [](AnalysisConfig &self, const paddle_infer::XpuConfig &xpu_config) {
             self.SetXpuConfig(xpu_config);
           })
      .def("xpu_config", &AnalysisConfig::xpu_config)
803 804 805
      .def("enable_custom_device",
           &AnalysisConfig::EnableCustomDevice,
           py::arg("device_type"),
806 807
           py::arg("device_id") = 0,
           py::arg("precision") = AnalysisConfig::Precision::kFloat32)
W
Wilber 已提交
808 809 810 811
      .def("enable_ipu",
           &AnalysisConfig::EnableIpu,
           py::arg("ipu_device_num") = 1,
           py::arg("ipu_micro_batch_size") = 1,
812 813
           py::arg("ipu_enable_pipelining") = false,
           py::arg("ipu_batches_per_step") = 1)
W
Wilber 已提交
814 815 816 817
      .def("set_ipu_config",
           &AnalysisConfig::SetIpuConfig,
           py::arg("ipu_enable_fp16") = false,
           py::arg("ipu_replica_num") = 1,
818
           py::arg("ipu_available_memory_proportion") = 1.0,
819 820
           py::arg("ipu_enable_half_partial") = false,
           py::arg("ipu_enable_model_runtime_executor") = false)
821 822 823 824 825 826 827 828
      .def("set_ipu_custom_info",
           &AnalysisConfig::SetIpuCustomInfo,
           py::arg("ipu_custom_ops_info") =
               std::vector<std::vector<std::string>>({}),
           py::arg("ipu_custom_patterns") = std::map<std::string, bool>({}))
      .def("load_ipu_config",
           &AnalysisConfig::LoadIpuConfig,
           py::arg("config_path"))
F
flame 已提交
829
      .def("disable_gpu", &AnalysisConfig::DisableGpu)
830 831 832
      .def("enable_onnxruntime", &AnalysisConfig::EnableONNXRuntime)
      .def("disable_onnxruntime", &AnalysisConfig::DisableONNXRuntime)
      .def("onnxruntime_enabled", &AnalysisConfig::use_onnxruntime)
833
      .def("use_opencl", &AnalysisConfig::use_opencl)
834
      .def("enable_ort_optimization", &AnalysisConfig::EnableORTOptimization)
F
flame 已提交
835
      .def("use_gpu", &AnalysisConfig::use_gpu)
836
      .def("use_xpu", &AnalysisConfig::use_xpu)
F
flame 已提交
837
      .def("gpu_device_id", &AnalysisConfig::gpu_device_id)
838
      .def("xpu_device_id", &AnalysisConfig::xpu_device_id)
F
flame 已提交
839 840 841 842
      .def("memory_pool_init_size_mb",
           &AnalysisConfig::memory_pool_init_size_mb)
      .def("fraction_of_gpu_memory_for_pool",
           &AnalysisConfig::fraction_of_gpu_memory_for_pool)
W
Wilber 已提交
843 844
      .def("switch_ir_optim",
           &AnalysisConfig::SwitchIrOptim,
F
flame 已提交
845 846
           py::arg("x") = true)
      .def("ir_optim", &AnalysisConfig::ir_optim)
W
Wilber 已提交
847 848
      .def("enable_memory_optim",
           &AnalysisConfig::EnableMemoryOptim,
849
           py::arg("x") = true)
850
      .def("enable_profile", &AnalysisConfig::EnableProfile)
851
      .def("disable_glog_info", &AnalysisConfig::DisableGlogInfo)
852
      .def("glog_info_disabled", &AnalysisConfig::glog_info_disabled)
853 854 855
      .def("enable_save_optim_model",
           &AnalysisConfig::EnableSaveOptimModel,
           py::arg("save_optimized_model") = false)
856
      .def("set_optim_cache_dir", &AnalysisConfig::SetOptimCacheDir)
W
Wilber 已提交
857 858
      .def("switch_use_feed_fetch_ops",
           &AnalysisConfig::SwitchUseFeedFetchOps,
F
flame 已提交
859 860 861 862
           py::arg("x") = true)
      .def("use_feed_fetch_ops_enabled",
           &AnalysisConfig::use_feed_fetch_ops_enabled)
      .def("switch_specify_input_names",
W
Wilber 已提交
863 864
           &AnalysisConfig::SwitchSpecifyInputNames,
           py::arg("x") = true)
F
flame 已提交
865
      .def("specify_input_name", &AnalysisConfig::specify_input_name)
866 867 868
      .def("enable_low_precision_io",
           &AnalysisConfig::EnableLowPrecisionIO,
           py::arg("x") = true)
W
Wilber 已提交
869 870
      .def("enable_tensorrt_engine",
           &AnalysisConfig::EnableTensorRtEngine,
871
           py::arg("workspace_size") = 1 << 30,
W
Wilber 已提交
872
           py::arg("max_batch_size") = 1,
873
           py::arg("min_subgraph_size") = 3,
N
nhzlx 已提交
874
           py::arg("precision_mode") = AnalysisConfig::Precision::kFloat32,
W
Wilber 已提交
875
           py::arg("use_static") = false,
W
Wilber 已提交
876 877
           py::arg("use_calib_mode") = true,
           py::arg("use_cuda_graph") = false)
878 879 880 881
      .def("enable_tensorrt_memory_optim",
           &AnalysisConfig::EnableTensorRTMemoryOptim,
           py::arg("engine_memory_sharing") = true,
           py::arg("sharing_identifier") = 0)
882
      .def("tensorrt_precision_mode", &AnalysisConfig::tensorrt_precision_mode)
883 884
      .def("set_trt_dynamic_shape_info",
           &AnalysisConfig::SetTRTDynamicShapeInfo,
885 886 887 888 889
           py::arg("min_input_shape") =
               std::map<std::string, std::vector<int>>({}),
           py::arg("max_input_shape") =
               std::map<std::string, std::vector<int>>({}),
           py::arg("optim_input_shape") =
890 891
               std::map<std::string, std::vector<int>>({}),
           py::arg("disable_trt_plugin_fp16") = false)
892 893
      .def("tensorrt_dynamic_shape_enabled",
           &AnalysisConfig::tensorrt_dynamic_shape_enabled)
M
ming1753 已提交
894 895 896
      .def("mark_trt_engine_outputs",
           &AnalysisConfig::MarkTrtEngineOutputs,
           py::arg("output_tensor_names") = std::vector<std::string>({}))
897 898 899
      .def("enable_tensorrt_varseqlen", &AnalysisConfig::EnableVarseqlen)
      .def("tensorrt_varseqlen_enabled",
           &AnalysisConfig::tensorrt_varseqlen_enabled)
900 901 902 903 904
      .def("collect_shape_range_info", &AnalysisConfig::CollectShapeRangeInfo)
      .def("shape_range_info_path", &AnalysisConfig::shape_range_info_path)
      .def("shape_range_info_collected",
           &AnalysisConfig::shape_range_info_collected)
      .def("enable_tuned_tensorrt_dynamic_shape",
905 906 907
           &AnalysisConfig::EnableTunedTensorRtDynamicShape,
           py::arg("shape_range_info_path") = "",
           py::arg("allow_build_at_runtime") = true)
908 909 910 911
      .def("tuned_tensorrt_dynamic_shape",
           &AnalysisConfig::tuned_tensorrt_dynamic_shape)
      .def("trt_allow_build_at_runtime",
           &AnalysisConfig::trt_allow_build_at_runtime)
912
      .def("exp_disable_tensorrt_ops", &AnalysisConfig::Exp_DisableTensorRtOPs)
W
Wilber 已提交
913 914
      .def("enable_tensorrt_dla",
           &AnalysisConfig::EnableTensorRtDLA,
915 916
           py::arg("dla_core") = 0)
      .def("tensorrt_dla_enabled", &AnalysisConfig::tensorrt_dla_enabled)
917 918 919 920
      .def("enable_tensorrt_inspector",
           &AnalysisConfig::EnableTensorRtInspector)
      .def("tensorrt_inspector_enabled",
           &AnalysisConfig::tensorrt_inspector_enabled)
F
flame 已提交
921
      .def("tensorrt_engine_enabled", &AnalysisConfig::tensorrt_engine_enabled)
W
Wilber 已提交
922 923
      .def("enable_dlnne",
           &AnalysisConfig::EnableDlnne,
D
denglin-github 已提交
924 925 926 927 928 929 930 931 932 933
           py::arg("min_subgraph_size") = 3,
           py::arg("max_batch_size") = 1,
           py::arg("use_static_batch") = false,
           py::arg("weight_share_mode") = "0",
           py::arg("disable_nodes_by_outputs") =
               std::unordered_set<std::string>(),
           py::arg("input_shape_dict") =
               std::map<std::string, std::vector<int64_t>>(),
           py::arg("use_calib_mode") = false,
           py::arg("precision_mode") = AnalysisConfig::Precision::kFloat32)
W
Wilber 已提交
934 935
      .def("enable_lite_engine",
           &AnalysisConfig::EnableLiteEngine,
936
           py::arg("precision_mode") = AnalysisConfig::Precision::kFloat32,
W
Wilber 已提交
937
           py::arg("zero_copy") = false,
938 939
           py::arg("passes_filter") = std::vector<std::string>(),
           py::arg("ops_filter") = std::vector<std::string>())
940
      .def("enable_opencl", &AnalysisConfig::EnableOpenCL)
941
      .def("lite_engine_enabled", &AnalysisConfig::lite_engine_enabled)
W
Wilber 已提交
942 943
      .def("switch_ir_debug",
           &AnalysisConfig::SwitchIrDebug,
F
flame 已提交
944 945 946 947 948 949 950 951
           py::arg("x") = true)
      .def("enable_mkldnn", &AnalysisConfig::EnableMKLDNN)
      .def("mkldnn_enabled", &AnalysisConfig::mkldnn_enabled)
      .def("set_cpu_math_library_num_threads",
           &AnalysisConfig::SetCpuMathLibraryNumThreads)
      .def("cpu_math_library_num_threads",
           &AnalysisConfig::cpu_math_library_num_threads)
      .def("to_native_config", &AnalysisConfig::ToNativeConfig)
952
      .def("enable_quantizer", &AnalysisConfig::EnableMkldnnQuantizer)
953
      .def("enable_mkldnn_bfloat16", &AnalysisConfig::EnableMkldnnBfloat16)
954
#ifdef PADDLE_WITH_DNNL
W
Wilber 已提交
955 956
      .def("quantizer_config",
           &AnalysisConfig::mkldnn_quantizer_config,
957
           py::return_value_policy::reference)
W
Wilber 已提交
958 959
      .def("set_mkldnn_cache_capacity",
           &AnalysisConfig::SetMkldnnCacheCapacity,
960
           py::arg("capacity") = 0)
961
      .def("set_bfloat16_op", &AnalysisConfig::SetBfloat16Op)
W
Wilber 已提交
962 963
      .def("enable_mkldnn_int8",
           &AnalysisConfig::EnableMkldnnInt8,
B
baoachun 已提交
964 965 966
           py::arg("mkldnn_int8_enabled_op_types") =
               std::unordered_set<std::string>({}))
      .def("mkldnn_int8_enabled", &AnalysisConfig::mkldnn_int8_enabled)
P
Paulina Gacek 已提交
967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982
      .def("disable_mkldnn_fc_passes",
           &AnalysisConfig::DisableMkldnnFcPasses,
           R"DOC(
           Disable Mkldnn FC
           Args:
                None.
           Returns:
                None.
           Examples:
               .. code-block:: python
                from paddle.inference import Config

                config = Config("")
                config.enable_mkldnn()
                config.disable_mkldnn_fc_passes()
           )DOC")
983
#endif
F
flame 已提交
984 985 986
      .def("set_mkldnn_op", &AnalysisConfig::SetMKLDNNOp)
      .def("set_model_buffer", &AnalysisConfig::SetModelBuffer)
      .def("model_from_memory", &AnalysisConfig::model_from_memory)
987 988 989 990
      .def("delete_pass",
           [](AnalysisConfig &self, const std::string &pass) {
             self.pass_builder()->DeletePass(pass);
           })
991 992 993 994 995 996
      .def(
          "pass_builder",
          [](AnalysisConfig &self) {
            return dynamic_cast<PaddlePassBuilder *>(self.pass_builder());
          },
          py::return_value_policy::reference)
997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014
      .def("nnadapter", &AnalysisConfig::NNAdapter)
      .def("set_dist_config", &AnalysisConfig::SetDistConfig)
      .def("dist_config", &AnalysisConfig::dist_config);

  py::class_<DistConfig>(*m, "DistConfig")
      .def(py::init<>())
      .def("set_carrier_id", &DistConfig::SetCarrierId)
      .def("set_comm_init_config", &DistConfig::SetCommInitConfig)
      .def("set_endpoints", &DistConfig::SetEndpoints)
      .def("set_ranks", &DistConfig::SetRanks)
      .def("enable_dist_model", &DistConfig::EnableDistModel)
      .def("carrier_id", &DistConfig::carrier_id)
      .def("current_endpoint", &DistConfig::current_endpoint)
      .def("trainer_endpoints", &DistConfig::trainer_endpoints)
      .def("nranks", &DistConfig::nranks)
      .def("rank", &DistConfig::rank)
      .def("comm_init_config", &DistConfig::comm_init_config)
      .def("use_dist_model", &DistConfig::use_dist_model);
1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032
}

void BindLiteNNAdapterConfig(py::module *m) {
  py::class_<LiteNNAdapterConfig> lite_nnadapter_config(*m,
                                                        "LiteNNAdapterConfig");

  lite_nnadapter_config
      .def("set_device_names", &LiteNNAdapterConfig::SetDeviceNames)
      .def("set_context_properties", &LiteNNAdapterConfig::SetContextProperties)
      .def("set_model_cache_dir", &LiteNNAdapterConfig::SetModelCacheDir)
      .def("set_model_cache_buffers",
           &LiteNNAdapterConfig::SetModelCacheBuffers)
      .def("set_subgraph_partition_config_path",
           &LiteNNAdapterConfig::SetSubgraphPartitionConfigPath)
      .def("set_subgraph_partition_config_buffer",
           &LiteNNAdapterConfig::SetSubgraphPartitionConfigBuffer)
      .def("enable", &LiteNNAdapterConfig::Enable)
      .def("disable", &LiteNNAdapterConfig::Disable);
F
flame 已提交
1033 1034
}

Z
zhupengyang 已提交
1035 1036 1037 1038 1039 1040 1041
void BindXpuConfig(py::module *m) {
  py::class_<XpuConfig>(*m, "XpuConfig")
      .def(py::init<>())
      .def_readwrite("device_id", &XpuConfig::device_id)
      .def_readwrite("l3_ptr", &XpuConfig::l3_ptr)
      .def_readwrite("l3_size", &XpuConfig::l3_size)
      .def_readwrite("l3_autotune_size", &XpuConfig::l3_autotune_size)
1042
      .def_readwrite("context_gm_size", &XpuConfig::context_gm_size)
1043
      .def_readwrite("context", &XpuConfig::context)
Z
zhupengyang 已提交
1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068
      .def_readwrite("stream", &XpuConfig::stream)
      .def_readwrite("conv_autotune_level", &XpuConfig::conv_autotune_level)
      .def_readwrite("conv_autotune_file", &XpuConfig::conv_autotune_file)
      .def_readwrite("conv_autotune_file_writeback",
                     &XpuConfig::conv_autotune_file_writeback)
      .def_readwrite("fc_autotune_level", &XpuConfig::fc_autotune_level)
      .def_readwrite("fc_autotune_file", &XpuConfig::fc_autotune_file)
      .def_readwrite("fc_autotune_file_writeback",
                     &XpuConfig::fc_autotune_file_writeback)
      .def_readwrite("gemm_compute_precision",
                     &XpuConfig::gemm_compute_precision)
      .def_readwrite("transformer_softmax_optimize_level",
                     &XpuConfig::transformer_softmax_optimize_level)
      .def_readwrite("transformer_encoder_adaptive_seqlen",
                     &XpuConfig::transformer_encoder_adaptive_seqlen)
      .def_readwrite("quant_post_static_gelu_out_threshold",
                     &XpuConfig::quant_post_static_gelu_out_threshold)
      .def_readwrite("quant_post_dynamic_activation_method",
                     &XpuConfig::quant_post_dynamic_activation_method)
      .def_readwrite("quant_post_dynamic_weight_precision",
                     &XpuConfig::quant_post_dynamic_weight_precision)
      .def_readwrite("quant_post_dynamic_op_types",
                     &XpuConfig::quant_post_dynamic_op_types);
}

1069
#ifdef PADDLE_WITH_DNNL
1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083
void BindMkldnnQuantizerConfig(py::module *m) {
  py::class_<MkldnnQuantizerConfig> quantizer_config(*m,
                                                     "MkldnnQuantizerConfig");
  quantizer_config.def(py::init<const MkldnnQuantizerConfig &>())
      .def(py::init<>())
      .def("set_quant_data",
           [](MkldnnQuantizerConfig &self,
              const std::vector<PaddleTensor> &data) {
             auto warmup_data =
                 std::make_shared<std::vector<PaddleTensor>>(data);
             self.SetWarmupData(warmup_data);
             return;
           })
      .def("set_quant_batch_size", &MkldnnQuantizerConfig::SetWarmupBatchSize)
1084
      .def("set_enabled_op_types", &MkldnnQuantizerConfig::SetEnabledOpTypes);
1085 1086 1087
}
#endif

F
flame 已提交
1088 1089 1090 1091 1092 1093 1094
void BindAnalysisPredictor(py::module *m) {
  py::class_<AnalysisPredictor, PaddlePredictor>(*m, "AnalysisPredictor")
      .def(py::init<const AnalysisConfig &>())
      .def("init", &AnalysisPredictor::Init)
      .def(
          "run",
          [](AnalysisPredictor &self, const std::vector<PaddleTensor> &inputs) {
1095 1096 1097
#if defined(PADDLE_WITH_CUSTOM_DEVICE) && !defined(PADDLE_NO_PYTHON)
            pybind11::gil_scoped_release release;
#endif
F
flame 已提交
1098 1099 1100 1101 1102 1103
            std::vector<PaddleTensor> outputs;
            self.Run(inputs, &outputs);
            return outputs;
          })
      .def("get_input_tensor", &AnalysisPredictor::GetInputTensor)
      .def("get_output_tensor", &AnalysisPredictor::GetOutputTensor)
1104 1105 1106
      .def("get_input_names", &AnalysisPredictor::GetInputNames)
      .def("get_output_names", &AnalysisPredictor::GetOutputNames)
      .def("get_input_tensor_shape", &AnalysisPredictor::GetInputTensorShape)
F
flame 已提交
1107
      .def("zero_copy_run", &AnalysisPredictor::ZeroCopyRun)
1108 1109
      .def("clear_intermediate_tensor",
           &AnalysisPredictor::ClearIntermediateTensor)
1110
      .def("try_shrink_memory", &AnalysisPredictor::TryShrinkMemory)
1111 1112 1113 1114 1115
      .def("create_feed_fetch_var", &AnalysisPredictor::CreateFeedFetchVar)
      .def("prepare_feed_fetch", &AnalysisPredictor::PrepareFeedFetch)
      .def("prepare_argument", &AnalysisPredictor::PrepareArgument)
      .def("optimize_inference_program",
           &AnalysisPredictor::OptimizeInferenceProgram)
W
Wilber 已提交
1116 1117
      .def("analysis_argument",
           &AnalysisPredictor::analysis_argument,
1118
           py::return_value_policy::reference)
1119
      .def("clone", [](AnalysisPredictor &self) { return self.Clone(nullptr); })
1120 1121 1122
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
      .def("clone",
           [](AnalysisPredictor &self, phi::CUDAStream &stream) {
1123
             return self.Clone(stream.raw_stream());
1124 1125
           })
#endif
W
Wilber 已提交
1126 1127
      .def("scope",
           &AnalysisPredictor::scope,
1128
           py::return_value_policy::reference)
W
Wilber 已提交
1129 1130
      .def("program",
           &AnalysisPredictor::program,
1131 1132 1133
           py::return_value_policy::reference)
      .def("get_serialized_program", &AnalysisPredictor::GetSerializedProgram)
      .def("mkldnn_quantize", &AnalysisPredictor::MkldnnQuantize)
W
Wilber 已提交
1134 1135
      .def(
          "SaveOptimModel", &AnalysisPredictor::SaveOptimModel, py::arg("dir"));
F
flame 已提交
1136
}
1137

W
Wilber 已提交
1138 1139 1140 1141 1142 1143 1144
void BindPaddleInferPredictor(py::module *m) {
  py::class_<paddle_infer::Predictor>(*m, "PaddleInferPredictor")
      .def(py::init<const paddle_infer::Config &>())
      .def("get_input_names", &paddle_infer::Predictor::GetInputNames)
      .def("get_output_names", &paddle_infer::Predictor::GetOutputNames)
      .def("get_input_handle", &paddle_infer::Predictor::GetInputHandle)
      .def("get_output_handle", &paddle_infer::Predictor::GetOutputHandle)
1145 1146 1147
      .def(
          "run",
          [](paddle_infer::Predictor &self, py::handle py_in_tensor_list) {
1148 1149 1150
#if defined(PADDLE_WITH_CUSTOM_DEVICE) && !defined(PADDLE_NO_PYTHON)
            pybind11::gil_scoped_release release;
#endif
1151 1152 1153 1154 1155 1156 1157
            auto in_tensor_list =
                CastPyArg2VectorOfTensor(py_in_tensor_list.ptr(), 0);
            std::vector<paddle::Tensor> outputs;
            self.Run(in_tensor_list, &outputs);
            return py::handle(ToPyObject(outputs));
          },
          py::arg("inputs"))
1158 1159 1160 1161 1162 1163 1164
      .def("run",
           [](paddle_infer::Predictor &self) {
#if defined(PADDLE_WITH_CUSTOM_DEVICE) && !defined(PADDLE_NO_PYTHON)
             pybind11::gil_scoped_release release;
#endif
             self.Run();
           })
1165 1166
      .def("clone",
           [](paddle_infer::Predictor &self) { return self.Clone(nullptr); })
1167 1168 1169
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
      .def("clone",
           [](paddle_infer::Predictor &self, phi::CUDAStream &stream) {
1170
             return self.Clone(stream.raw_stream());
1171 1172
           })
#endif
1173
      .def("try_shrink_memory", &paddle_infer::Predictor::TryShrinkMemory)
W
Wilber 已提交
1174
      .def("clear_intermediate_tensor",
1175 1176
           &paddle_infer::Predictor::ClearIntermediateTensor)
      .def("register_output_hook",
1177
           &paddle_infer::Predictor::RegisterOutputHook);
W
Wilber 已提交
1178 1179
}

1180 1181
void BindZeroCopyTensor(py::module *m) {
  py::class_<ZeroCopyTensor>(*m, "ZeroCopyTensor")
W
Wilber 已提交
1182 1183 1184 1185 1186 1187
      .def(
          "reshape",
          py::overload_cast<const std::vector<int> &>(&ZeroCopyTensor::Reshape))
      .def("reshape",
           py::overload_cast<const std::size_t &>(
               &paddle_infer::Tensor::ReshapeStrings))
1188 1189
      .def("copy_from_cpu", &ZeroCopyTensorCreate<int8_t>)
      .def("copy_from_cpu", &ZeroCopyTensorCreate<uint8_t>)
1190 1191 1192
      .def("copy_from_cpu", &ZeroCopyTensorCreate<int32_t>)
      .def("copy_from_cpu", &ZeroCopyTensorCreate<int64_t>)
      .def("copy_from_cpu", &ZeroCopyTensorCreate<float>)
1193
      .def("copy_from_cpu", &ZeroCopyTensorCreate<phi::dtype::float16>)
Y
Yuanle Liu 已提交
1194 1195
      // NOTE(liuyuanle): double must be bound after float.
      .def("copy_from_cpu", &ZeroCopyTensorCreate<double>)
1196
      .def("copy_from_cpu", &ZeroCopyTensorCreate<bool>)
S
Steffy-zxf 已提交
1197
      .def("copy_from_cpu", &ZeroCopyStringTensorCreate)
1198 1199 1200 1201 1202 1203 1204
      .def("copy_to_cpu", &ZeroCopyTensorToNumpy)
      .def("shape", &ZeroCopyTensor::shape)
      .def("set_lod", &ZeroCopyTensor::SetLoD)
      .def("lod", &ZeroCopyTensor::lod)
      .def("type", &ZeroCopyTensor::type);
}

W
Wilber 已提交
1205 1206
void BindPaddleInferTensor(py::module *m) {
  py::class_<paddle_infer::Tensor>(*m, "PaddleInferTensor")
W
Wilber 已提交
1207 1208 1209 1210 1211 1212
      .def("reshape",
           py::overload_cast<const std::vector<int> &>(
               &paddle_infer::Tensor::Reshape))
      .def("reshape",
           py::overload_cast<const std::size_t &>(
               &paddle_infer::Tensor::ReshapeStrings))
1213 1214 1215 1216 1217
      .def("_copy_from_cpu_bind", &PaddleInferTensorCreate<int8_t>)
      .def("_copy_from_cpu_bind", &PaddleInferTensorCreate<uint8_t>)
      .def("_copy_from_cpu_bind", &PaddleInferTensorCreate<int32_t>)
      .def("_copy_from_cpu_bind", &PaddleInferTensorCreate<int64_t>)
      .def("_copy_from_cpu_bind", &PaddleInferTensorCreate<float>)
1218
      .def("_copy_from_cpu_bind", &PaddleInferTensorCreate<phi::dtype::float16>)
Y
Yuanle Liu 已提交
1219 1220
      // NOTE(liuyuanle): double must be bound after float.
      .def("_copy_from_cpu_bind", &PaddleInferTensorCreate<double>)
1221 1222 1223 1224 1225 1226 1227 1228 1229
      .def("_copy_from_cpu_bind", &PaddleInferTensorCreate<bool>)
      .def("_copy_from_cpu_bind", &PaddleInferStringTensorCreate)
      .def("_share_external_data_bind", &PaddleInferShareExternalData)
      .def("_share_external_data_paddle_tensor_bind",
           [](paddle_infer::Tensor &self, const py::handle &input) {
             PyObject *obj = input.ptr();
             PaddleTensorShareExternalData(self,
                                           std::move(CastPyArg2Tensor(obj, 0)));
           })
W
Wilber 已提交
1230 1231 1232 1233 1234 1235 1236 1237 1238 1239
      .def("copy_to_cpu", &PaddleInferTensorToNumpy)
      .def("shape", &paddle_infer::Tensor::shape)
      .def("set_lod", &paddle_infer::Tensor::SetLoD)
      .def("lod", &paddle_infer::Tensor::lod)
      .def("type", &paddle_infer::Tensor::type);
}

void BindPredictorPool(py::module *m) {
  py::class_<paddle_infer::services::PredictorPool>(*m, "PredictorPool")
      .def(py::init<const paddle_infer::Config &, size_t>())
W
Wilber 已提交
1240 1241
      .def("retrive",
           &paddle_infer::services::PredictorPool::Retrive,
W
Wilber 已提交
1242 1243 1244
           py::return_value_policy::reference);
}

1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263
void BindPaddlePassBuilder(py::module *m) {
  py::class_<PaddlePassBuilder>(*m, "PaddlePassBuilder")
      .def(py::init<const std::vector<std::string> &>())
      .def("set_passes",
           [](PaddlePassBuilder &self, const std::vector<std::string> &passes) {
             self.ClearPasses();
             for (auto pass : passes) {
               self.AppendPass(std::move(pass));
             }
           })
      .def("append_pass", &PaddlePassBuilder::AppendPass)
      .def("insert_pass", &PaddlePassBuilder::InsertPass)
      .def("delete_pass",
           [](PaddlePassBuilder &self, const std::string &pass_type) {
             self.DeletePass(pass_type);
           })
      .def("append_analysis_pass", &PaddlePassBuilder::AppendAnalysisPass)
      .def("turn_on_debug", &PaddlePassBuilder::TurnOnDebug)
      .def("debug_string", &PaddlePassBuilder::DebugString)
W
Wilber 已提交
1264 1265
      .def("all_passes",
           &PaddlePassBuilder::AllPasses,
1266 1267 1268 1269 1270 1271 1272 1273
           py::return_value_policy::reference)
      .def("analysis_passes", &PaddlePassBuilder::AnalysisPasses);

  py::class_<PassStrategy, PaddlePassBuilder>(*m, "PassStrategy")
      .def(py::init<const std::vector<std::string> &>())
      .def("enable_cudnn", &PassStrategy::EnableCUDNN)
      .def("enable_mkldnn", &PassStrategy::EnableMKLDNN)
      .def("enable_mkldnn_quantizer", &PassStrategy::EnableMkldnnQuantizer)
1274
      .def("enable_mkldnn_bfloat16", &PassStrategy::EnableMkldnnBfloat16)
1275 1276 1277 1278 1279 1280 1281
      .def("use_gpu", &PassStrategy::use_gpu);

  py::class_<CpuPassStrategy, PassStrategy>(*m, "CpuPassStrategy")
      .def(py::init<>())
      .def(py::init<const CpuPassStrategy &>())
      .def("enable_cudnn", &CpuPassStrategy::EnableCUDNN)
      .def("enable_mkldnn", &CpuPassStrategy::EnableMKLDNN)
1282 1283
      .def("enable_mkldnn_quantizer", &CpuPassStrategy::EnableMkldnnQuantizer)
      .def("enable_mkldnn_bfloat16", &CpuPassStrategy::EnableMkldnnBfloat16);
1284 1285 1286 1287 1288 1289

  py::class_<GpuPassStrategy, PassStrategy>(*m, "GpuPassStrategy")
      .def(py::init<>())
      .def(py::init<const GpuPassStrategy &>())
      .def("enable_cudnn", &GpuPassStrategy::EnableCUDNN)
      .def("enable_mkldnn", &GpuPassStrategy::EnableMKLDNN)
1290 1291
      .def("enable_mkldnn_quantizer", &GpuPassStrategy::EnableMkldnnQuantizer)
      .def("enable_mkldnn_bfloat16", &GpuPassStrategy::EnableMkldnnBfloat16);
1292
}
1293
}  // namespace
F
flame 已提交
1294 1295
}  // namespace pybind
}  // namespace paddle