未验证 提交 6326c3ef 编写于 作者: W Wilber 提交者: GitHub

[Inference] Inference python api support fp16 (#34676)

上级 016cc56d
......@@ -25,12 +25,16 @@
#include "paddle/fluid/inference/api/details/reset_tensor_array.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/string/printf.h"
#ifdef PADDLE_WITH_TESTING
#include <gtest/gtest.h>
#include <gtest/gtest_prod.h>
#endif
namespace paddle_infer {
using float16 = paddle::platform::float16;
}
///
/// \file analysis_predictor.h
///
......
......@@ -19,9 +19,12 @@
#include "paddle/fluid/inference/api/paddle_tensor.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/float16.h"
namespace paddle_infer {
using float16 = paddle::platform::float16;
void Tensor::Reshape(const std::vector<int> &shape) {
PADDLE_ENFORCE_EQ(
name_.empty(), false,
......@@ -104,6 +107,8 @@ DataType Tensor::type() const {
auto type = tensor->type();
if (type == paddle::framework::proto::VarType::FP32) {
return DataType::FLOAT32;
} else if (type == paddle::framework::proto::VarType::FP16) {
return DataType::FLOAT16;
} else if (type == paddle::framework::proto::VarType::INT64) {
return DataType::INT64;
} else if (type == paddle::framework::proto::VarType::INT32) {
......@@ -261,12 +266,14 @@ template PD_INFER_DECL void Tensor::CopyFromCpu<int64_t>(const int64_t *data);
template PD_INFER_DECL void Tensor::CopyFromCpu<int32_t>(const int32_t *data);
template PD_INFER_DECL void Tensor::CopyFromCpu<uint8_t>(const uint8_t *data);
template PD_INFER_DECL void Tensor::CopyFromCpu<int8_t>(const int8_t *data);
template PD_INFER_DECL void Tensor::CopyFromCpu<float16>(const float16 *data);
template PD_INFER_DECL void Tensor::CopyToCpu<float>(float *data);
template PD_INFER_DECL void Tensor::CopyToCpu<int64_t>(int64_t *data);
template PD_INFER_DECL void Tensor::CopyToCpu<int32_t>(int32_t *data);
template PD_INFER_DECL void Tensor::CopyToCpu<uint8_t>(uint8_t *data);
template PD_INFER_DECL void Tensor::CopyToCpu<int8_t>(int8_t *data);
template PD_INFER_DECL void Tensor::CopyToCpu<float16>(float16 *data);
template PD_INFER_DECL float *Tensor::data<float>(PlaceType *place,
int *size) const;
......
......@@ -25,6 +25,7 @@ enum DataType {
INT32,
UINT8,
INT8,
FLOAT16,
// TODO(Superjomn) support more data types if needed.
};
......
......@@ -34,6 +34,36 @@
namespace py = pybind11;
namespace pybind11 {
namespace detail {
// Note: use same enum number of float16 in numpy.
// import numpy as np
// print np.dtype(np.float16).num # 23
constexpr int NPY_FLOAT16_ = 23;
constexpr int NPY_UINT16_ = 4;
// Note: Since float16 is not a builtin type in C++, we register
// paddle::platform::float16 as numpy.float16.
// Ref: https://github.com/pybind/pybind11/issues/1776
template <>
struct npy_format_descriptor<paddle_infer::float16> {
static py::dtype dtype() {
handle ptr = npy_api::get().PyArray_DescrFromType_(NPY_FLOAT16_);
return reinterpret_borrow<py::dtype>(ptr);
}
static std::string format() {
// Note: "e" represents float16.
// Details at:
// https://docs.python.org/3/library/struct.html#format-characters.
return "e";
}
static constexpr auto name = _("float16");
};
} // namespace detail
} // namespace pybind11
namespace paddle {
namespace pybind {
using paddle::AnalysisPredictor;
......@@ -126,6 +156,9 @@ py::dtype PaddleDTypeToNumpyDType(PaddleDType dtype) {
case PaddleDType::UINT8:
dt = py::dtype::of<uint8_t>();
break;
case PaddleDType::FLOAT16:
dt = py::dtype::of<paddle_infer::float16>();
break;
default:
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported data type. Now only supports INT32, INT64, UINT8 and "
......@@ -196,6 +229,10 @@ py::array ZeroCopyTensorToNumpy(ZeroCopyTensor &tensor) { // NOLINT
case PaddleDType::FLOAT32:
tensor.copy_to_cpu<float>(static_cast<float *>(array.mutable_data()));
break;
case PaddleDType::FLOAT16:
tensor.copy_to_cpu<paddle::platform::float16>(
static_cast<paddle::platform::float16 *>(array.mutable_data()));
break;
case PaddleDType::UINT8:
tensor.copy_to_cpu<uint8_t>(static_cast<uint8_t *>(array.mutable_data()));
break;
......@@ -226,6 +263,10 @@ py::array PaddleInferTensorToNumpy(paddle_infer::Tensor &tensor) { // NOLINT
case PaddleDType::FLOAT32:
tensor.CopyToCpu<float>(static_cast<float *>(array.mutable_data()));
break;
case PaddleDType::FLOAT16:
tensor.CopyToCpu<paddle::platform::float16>(
static_cast<paddle::platform::float16 *>(array.mutable_data()));
break;
case PaddleDType::UINT8:
tensor.CopyToCpu(static_cast<uint8_t *>(array.mutable_data()));
break;
......@@ -642,6 +683,7 @@ void BindZeroCopyTensor(py::module *m) {
.def("copy_from_cpu", &ZeroCopyTensorCreate<int32_t>)
.def("copy_from_cpu", &ZeroCopyTensorCreate<int64_t>)
.def("copy_from_cpu", &ZeroCopyTensorCreate<float>)
.def("copy_from_cpu", &ZeroCopyTensorCreate<paddle_infer::float16>)
.def("copy_to_cpu", &ZeroCopyTensorToNumpy)
.def("shape", &ZeroCopyTensor::shape)
.def("set_lod", &ZeroCopyTensor::SetLoD)
......@@ -655,6 +697,7 @@ void BindPaddleInferTensor(py::module *m) {
.def("copy_from_cpu", &PaddleInferTensorCreate<int32_t>)
.def("copy_from_cpu", &PaddleInferTensorCreate<int64_t>)
.def("copy_from_cpu", &PaddleInferTensorCreate<float>)
.def("copy_from_cpu", &PaddleInferTensorCreate<paddle_infer::float16>)
.def("copy_to_cpu", &PaddleInferTensorToNumpy)
.def("shape", &paddle_infer::Tensor::shape)
.def("set_lod", &paddle_infer::Tensor::SetLoD)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册