From 6326c3efbec9a024364b1fe4450a48c3eaa63de2 Mon Sep 17 00:00:00 2001 From: Wilber Date: Thu, 12 Aug 2021 13:21:10 +0800 Subject: [PATCH] [Inference] Inference python api support fp16 (#34676) --- .../fluid/inference/api/analysis_predictor.h | 4 ++ .../inference/api/details/zero_copy_tensor.cc | 7 +++ paddle/fluid/inference/api/paddle_tensor.h | 1 + paddle/fluid/pybind/inference_api.cc | 43 +++++++++++++++++++ 4 files changed, 55 insertions(+) diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h index b55d08dda5..656db31d47 100644 --- a/paddle/fluid/inference/api/analysis_predictor.h +++ b/paddle/fluid/inference/api/analysis_predictor.h @@ -25,12 +25,16 @@ #include "paddle/fluid/inference/api/details/reset_tensor_array.h" #include "paddle/fluid/inference/api/helper.h" #include "paddle/fluid/inference/api/paddle_inference_api.h" +#include "paddle/fluid/platform/float16.h" #include "paddle/fluid/string/printf.h" #ifdef PADDLE_WITH_TESTING #include #include #endif +namespace paddle_infer { +using float16 = paddle::platform::float16; +} /// /// \file analysis_predictor.h /// diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc index b117a21dea..ff167aa7cf 100644 --- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc +++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc @@ -19,9 +19,12 @@ #include "paddle/fluid/inference/api/paddle_tensor.h" #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/float16.h" namespace paddle_infer { +using float16 = paddle::platform::float16; + void Tensor::Reshape(const std::vector &shape) { PADDLE_ENFORCE_EQ( name_.empty(), false, @@ -104,6 +107,8 @@ DataType Tensor::type() const { auto type = tensor->type(); if (type == paddle::framework::proto::VarType::FP32) { return DataType::FLOAT32; + } else if (type == paddle::framework::proto::VarType::FP16) { + return DataType::FLOAT16; } else if (type == paddle::framework::proto::VarType::INT64) { return DataType::INT64; } else if (type == paddle::framework::proto::VarType::INT32) { @@ -261,12 +266,14 @@ template PD_INFER_DECL void Tensor::CopyFromCpu(const int64_t *data); template PD_INFER_DECL void Tensor::CopyFromCpu(const int32_t *data); template PD_INFER_DECL void Tensor::CopyFromCpu(const uint8_t *data); template PD_INFER_DECL void Tensor::CopyFromCpu(const int8_t *data); +template PD_INFER_DECL void Tensor::CopyFromCpu(const float16 *data); template PD_INFER_DECL void Tensor::CopyToCpu(float *data); template PD_INFER_DECL void Tensor::CopyToCpu(int64_t *data); template PD_INFER_DECL void Tensor::CopyToCpu(int32_t *data); template PD_INFER_DECL void Tensor::CopyToCpu(uint8_t *data); template PD_INFER_DECL void Tensor::CopyToCpu(int8_t *data); +template PD_INFER_DECL void Tensor::CopyToCpu(float16 *data); template PD_INFER_DECL float *Tensor::data(PlaceType *place, int *size) const; diff --git a/paddle/fluid/inference/api/paddle_tensor.h b/paddle/fluid/inference/api/paddle_tensor.h index fa3067b62d..1f813d52ef 100644 --- a/paddle/fluid/inference/api/paddle_tensor.h +++ b/paddle/fluid/inference/api/paddle_tensor.h @@ -25,6 +25,7 @@ enum DataType { INT32, UINT8, INT8, + FLOAT16, // TODO(Superjomn) support more data types if needed. }; diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index ecef0c350b..b7cf907b5d 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -34,6 +34,36 @@ namespace py = pybind11; +namespace pybind11 { +namespace detail { + +// Note: use same enum number of float16 in numpy. +// import numpy as np +// print np.dtype(np.float16).num # 23 +constexpr int NPY_FLOAT16_ = 23; +constexpr int NPY_UINT16_ = 4; + +// Note: Since float16 is not a builtin type in C++, we register +// paddle::platform::float16 as numpy.float16. +// Ref: https://github.com/pybind/pybind11/issues/1776 +template <> +struct npy_format_descriptor { + static py::dtype dtype() { + handle ptr = npy_api::get().PyArray_DescrFromType_(NPY_FLOAT16_); + return reinterpret_borrow(ptr); + } + static std::string format() { + // Note: "e" represents float16. + // Details at: + // https://docs.python.org/3/library/struct.html#format-characters. + return "e"; + } + static constexpr auto name = _("float16"); +}; + +} // namespace detail +} // namespace pybind11 + namespace paddle { namespace pybind { using paddle::AnalysisPredictor; @@ -126,6 +156,9 @@ py::dtype PaddleDTypeToNumpyDType(PaddleDType dtype) { case PaddleDType::UINT8: dt = py::dtype::of(); break; + case PaddleDType::FLOAT16: + dt = py::dtype::of(); + break; default: PADDLE_THROW(platform::errors::Unimplemented( "Unsupported data type. Now only supports INT32, INT64, UINT8 and " @@ -196,6 +229,10 @@ py::array ZeroCopyTensorToNumpy(ZeroCopyTensor &tensor) { // NOLINT case PaddleDType::FLOAT32: tensor.copy_to_cpu(static_cast(array.mutable_data())); break; + case PaddleDType::FLOAT16: + tensor.copy_to_cpu( + static_cast(array.mutable_data())); + break; case PaddleDType::UINT8: tensor.copy_to_cpu(static_cast(array.mutable_data())); break; @@ -226,6 +263,10 @@ py::array PaddleInferTensorToNumpy(paddle_infer::Tensor &tensor) { // NOLINT case PaddleDType::FLOAT32: tensor.CopyToCpu(static_cast(array.mutable_data())); break; + case PaddleDType::FLOAT16: + tensor.CopyToCpu( + static_cast(array.mutable_data())); + break; case PaddleDType::UINT8: tensor.CopyToCpu(static_cast(array.mutable_data())); break; @@ -642,6 +683,7 @@ void BindZeroCopyTensor(py::module *m) { .def("copy_from_cpu", &ZeroCopyTensorCreate) .def("copy_from_cpu", &ZeroCopyTensorCreate) .def("copy_from_cpu", &ZeroCopyTensorCreate) + .def("copy_from_cpu", &ZeroCopyTensorCreate) .def("copy_to_cpu", &ZeroCopyTensorToNumpy) .def("shape", &ZeroCopyTensor::shape) .def("set_lod", &ZeroCopyTensor::SetLoD) @@ -655,6 +697,7 @@ void BindPaddleInferTensor(py::module *m) { .def("copy_from_cpu", &PaddleInferTensorCreate) .def("copy_from_cpu", &PaddleInferTensorCreate) .def("copy_from_cpu", &PaddleInferTensorCreate) + .def("copy_from_cpu", &PaddleInferTensorCreate) .def("copy_to_cpu", &PaddleInferTensorToNumpy) .def("shape", &paddle_infer::Tensor::shape) .def("set_lod", &paddle_infer::Tensor::SetLoD) -- GitLab