未验证 提交 dc13f7c5 编写于 作者: Y Yuanle Liu 提交者: GitHub

[Paddle Inference] enhance paddle_infer::Tensor data type (#49388)

上级 72597c3e
...@@ -109,7 +109,7 @@ class AnalysisPredictor : public PaddlePredictor { ...@@ -109,7 +109,7 @@ class AnalysisPredictor : public PaddlePredictor {
// negative sharing_identifier directly. In the future, this may affect // negative sharing_identifier directly. In the future, this may affect
// the meaning of negative predictor id. // the meaning of negative predictor id.
predictor_id_ = -trt_identifier; predictor_id_ = -trt_identifier;
LOG(WARNING) LOG_FIRST_N(WARNING, 1)
<< "Since the engine context memory of multiple predictors " << "Since the engine context memory of multiple predictors "
"is enabled in Paddle-TRT, we set the id of current predictor to " "is enabled in Paddle-TRT, we set the id of current predictor to "
"negative sharing_identifier you specified."; "negative sharing_identifier you specified.";
......
...@@ -176,6 +176,8 @@ DataType Tensor::type() const { ...@@ -176,6 +176,8 @@ DataType Tensor::type() const {
return DataType::UINT8; return DataType::UINT8;
} else if (type == paddle::framework::proto::VarType::INT8) { } else if (type == paddle::framework::proto::VarType::INT8) {
return DataType::INT8; return DataType::INT8;
} else if (type == paddle::framework::proto::VarType::BOOL) {
return DataType::BOOL;
} }
return DataType::FLOAT32; return DataType::FLOAT32;
} }
...@@ -279,6 +281,11 @@ void Tensor::CopyFromCpu(const T *data) { ...@@ -279,6 +281,11 @@ void Tensor::CopyFromCpu(const T *data) {
template <typename T> template <typename T>
struct DataTypeInfo; struct DataTypeInfo;
template <>
struct DataTypeInfo<bool> {
paddle::experimental::DataType TYPE = paddle::experimental::DataType::BOOL;
};
template <> template <>
struct DataTypeInfo<float> { struct DataTypeInfo<float> {
paddle::experimental::DataType TYPE = paddle::experimental::DataType::FLOAT32; paddle::experimental::DataType TYPE = paddle::experimental::DataType::FLOAT32;
...@@ -513,6 +520,7 @@ template PD_INFER_DECL void Tensor::CopyFromCpu<int32_t>(const int32_t *data); ...@@ -513,6 +520,7 @@ template PD_INFER_DECL void Tensor::CopyFromCpu<int32_t>(const int32_t *data);
template PD_INFER_DECL void Tensor::CopyFromCpu<uint8_t>(const uint8_t *data); template PD_INFER_DECL void Tensor::CopyFromCpu<uint8_t>(const uint8_t *data);
template PD_INFER_DECL void Tensor::CopyFromCpu<int8_t>(const int8_t *data); template PD_INFER_DECL void Tensor::CopyFromCpu<int8_t>(const int8_t *data);
template PD_INFER_DECL void Tensor::CopyFromCpu<float16>(const float16 *data); template PD_INFER_DECL void Tensor::CopyFromCpu<float16>(const float16 *data);
template PD_INFER_DECL void Tensor::CopyFromCpu<bool>(const bool *data);
template PD_INFER_DECL void Tensor::ShareExternalData<float>( template PD_INFER_DECL void Tensor::ShareExternalData<float>(
const float *data, const float *data,
...@@ -544,6 +552,11 @@ template PD_INFER_DECL void Tensor::ShareExternalData<float16>( ...@@ -544,6 +552,11 @@ template PD_INFER_DECL void Tensor::ShareExternalData<float16>(
const std::vector<int> &shape, const std::vector<int> &shape,
PlaceType place, PlaceType place,
DataLayout layout); DataLayout layout);
template PD_INFER_DECL void Tensor::ShareExternalData<bool>(
const bool *data,
const std::vector<int> &shape,
PlaceType place,
DataLayout layout);
template PD_INFER_DECL void Tensor::CopyToCpu<float>(float *data) const; template PD_INFER_DECL void Tensor::CopyToCpu<float>(float *data) const;
template PD_INFER_DECL void Tensor::CopyToCpu<int64_t>(int64_t *data) const; template PD_INFER_DECL void Tensor::CopyToCpu<int64_t>(int64_t *data) const;
...@@ -551,6 +564,7 @@ template PD_INFER_DECL void Tensor::CopyToCpu<int32_t>(int32_t *data) const; ...@@ -551,6 +564,7 @@ template PD_INFER_DECL void Tensor::CopyToCpu<int32_t>(int32_t *data) const;
template PD_INFER_DECL void Tensor::CopyToCpu<uint8_t>(uint8_t *data) const; template PD_INFER_DECL void Tensor::CopyToCpu<uint8_t>(uint8_t *data) const;
template PD_INFER_DECL void Tensor::CopyToCpu<int8_t>(int8_t *data) const; template PD_INFER_DECL void Tensor::CopyToCpu<int8_t>(int8_t *data) const;
template PD_INFER_DECL void Tensor::CopyToCpu<float16>(float16 *data) const; template PD_INFER_DECL void Tensor::CopyToCpu<float16>(float16 *data) const;
template PD_INFER_DECL void Tensor::CopyToCpu<bool>(bool *data) const;
template PD_INFER_DECL void Tensor::CopyToCpuImpl<float>(float *data, template PD_INFER_DECL void Tensor::CopyToCpuImpl<float>(float *data,
void *exec_stream, void *exec_stream,
...@@ -566,6 +580,10 @@ template PD_INFER_DECL void Tensor::CopyToCpuImpl<int8_t>( ...@@ -566,6 +580,10 @@ template PD_INFER_DECL void Tensor::CopyToCpuImpl<int8_t>(
int8_t *data, void *exec_stream, CallbackFunc cb, void *cb_params) const; int8_t *data, void *exec_stream, CallbackFunc cb, void *cb_params) const;
template PD_INFER_DECL void Tensor::CopyToCpuImpl<float16>( template PD_INFER_DECL void Tensor::CopyToCpuImpl<float16>(
float16 *data, void *exec_stream, CallbackFunc cb, void *cb_params) const; float16 *data, void *exec_stream, CallbackFunc cb, void *cb_params) const;
template PD_INFER_DECL void Tensor::CopyToCpuImpl<bool>(bool *data,
void *exec_stream,
CallbackFunc cb,
void *cb_params) const;
template PD_INFER_DECL void Tensor::CopyToCpuAsync<float>( template PD_INFER_DECL void Tensor::CopyToCpuAsync<float>(
float *data, void *exec_stream) const; float *data, void *exec_stream) const;
...@@ -579,6 +597,8 @@ template PD_INFER_DECL void Tensor::CopyToCpuAsync<int8_t>( ...@@ -579,6 +597,8 @@ template PD_INFER_DECL void Tensor::CopyToCpuAsync<int8_t>(
int8_t *data, void *exec_stream) const; int8_t *data, void *exec_stream) const;
template PD_INFER_DECL void Tensor::CopyToCpuAsync<float16>( template PD_INFER_DECL void Tensor::CopyToCpuAsync<float16>(
float16 *data, void *exec_stream) const; float16 *data, void *exec_stream) const;
template PD_INFER_DECL void Tensor::CopyToCpuAsync<bool>(
bool *data, void *exec_stream) const;
template PD_INFER_DECL void Tensor::CopyToCpuAsync<float>( template PD_INFER_DECL void Tensor::CopyToCpuAsync<float>(
float *data, CallbackFunc cb, void *cb_params) const; float *data, CallbackFunc cb, void *cb_params) const;
...@@ -592,6 +612,9 @@ template PD_INFER_DECL void Tensor::CopyToCpuAsync<int8_t>( ...@@ -592,6 +612,9 @@ template PD_INFER_DECL void Tensor::CopyToCpuAsync<int8_t>(
int8_t *data, CallbackFunc cb, void *cb_params) const; int8_t *data, CallbackFunc cb, void *cb_params) const;
template PD_INFER_DECL void Tensor::CopyToCpuAsync<float16>( template PD_INFER_DECL void Tensor::CopyToCpuAsync<float16>(
float16 *data, CallbackFunc cb, void *cb_params) const; float16 *data, CallbackFunc cb, void *cb_params) const;
template PD_INFER_DECL void Tensor::CopyToCpuAsync<bool>(bool *data,
CallbackFunc cb,
void *cb_params) const;
template PD_INFER_DECL float *Tensor::data<float>(PlaceType *place, template PD_INFER_DECL float *Tensor::data<float>(PlaceType *place,
int *size) const; int *size) const;
...@@ -605,6 +628,8 @@ template PD_INFER_DECL int8_t *Tensor::data<int8_t>(PlaceType *place, ...@@ -605,6 +628,8 @@ template PD_INFER_DECL int8_t *Tensor::data<int8_t>(PlaceType *place,
int *size) const; int *size) const;
template PD_INFER_DECL float16 *Tensor::data<float16>(PlaceType *place, template PD_INFER_DECL float16 *Tensor::data<float16>(PlaceType *place,
int *size) const; int *size) const;
template PD_INFER_DECL bool *Tensor::data<bool>(PlaceType *place,
int *size) const;
template PD_INFER_DECL float *Tensor::mutable_data<float>(PlaceType place); template PD_INFER_DECL float *Tensor::mutable_data<float>(PlaceType place);
template PD_INFER_DECL int64_t *Tensor::mutable_data<int64_t>(PlaceType place); template PD_INFER_DECL int64_t *Tensor::mutable_data<int64_t>(PlaceType place);
...@@ -612,6 +637,7 @@ template PD_INFER_DECL int32_t *Tensor::mutable_data<int32_t>(PlaceType place); ...@@ -612,6 +637,7 @@ template PD_INFER_DECL int32_t *Tensor::mutable_data<int32_t>(PlaceType place);
template PD_INFER_DECL uint8_t *Tensor::mutable_data<uint8_t>(PlaceType place); template PD_INFER_DECL uint8_t *Tensor::mutable_data<uint8_t>(PlaceType place);
template PD_INFER_DECL int8_t *Tensor::mutable_data<int8_t>(PlaceType place); template PD_INFER_DECL int8_t *Tensor::mutable_data<int8_t>(PlaceType place);
template PD_INFER_DECL float16 *Tensor::mutable_data<float16>(PlaceType place); template PD_INFER_DECL float16 *Tensor::mutable_data<float16>(PlaceType place);
template PD_INFER_DECL bool *Tensor::mutable_data<bool>(PlaceType place);
Tensor::Tensor(void *scope, const void *device_contexts) Tensor::Tensor(void *scope, const void *device_contexts)
: scope_{scope}, device_contexs_(device_contexts) {} : scope_{scope}, device_contexs_(device_contexts) {}
...@@ -895,6 +921,8 @@ template void InternalUtils::CopyFromCpuWithIoStream<int8_t>( ...@@ -895,6 +921,8 @@ template void InternalUtils::CopyFromCpuWithIoStream<int8_t>(
paddle_infer::Tensor *t, const int8_t *data, cudaStream_t stream); paddle_infer::Tensor *t, const int8_t *data, cudaStream_t stream);
template void InternalUtils::CopyFromCpuWithIoStream<float16>( template void InternalUtils::CopyFromCpuWithIoStream<float16>(
paddle_infer::Tensor *t, const float16 *data, cudaStream_t stream); paddle_infer::Tensor *t, const float16 *data, cudaStream_t stream);
template void InternalUtils::CopyFromCpuWithIoStream<bool>(
paddle_infer::Tensor *t, const bool *data, cudaStream_t stream);
template void InternalUtils::CopyToCpuWithIoStream<float>( template void InternalUtils::CopyToCpuWithIoStream<float>(
paddle_infer::Tensor *t, float *data, cudaStream_t stream); paddle_infer::Tensor *t, float *data, cudaStream_t stream);
...@@ -908,6 +936,8 @@ template void InternalUtils::CopyToCpuWithIoStream<int8_t>( ...@@ -908,6 +936,8 @@ template void InternalUtils::CopyToCpuWithIoStream<int8_t>(
paddle_infer::Tensor *t, int8_t *data, cudaStream_t stream); paddle_infer::Tensor *t, int8_t *data, cudaStream_t stream);
template void InternalUtils::CopyToCpuWithIoStream<float16>( template void InternalUtils::CopyToCpuWithIoStream<float16>(
paddle_infer::Tensor *t, float16 *data, cudaStream_t stream); paddle_infer::Tensor *t, float16 *data, cudaStream_t stream);
template void InternalUtils::CopyToCpuWithIoStream<bool>(
paddle_infer::Tensor *t, bool *data, cudaStream_t stream);
} // namespace experimental } // namespace experimental
......
...@@ -161,7 +161,7 @@ struct PD_INFER_DECL AnalysisConfig { ...@@ -161,7 +161,7 @@ struct PD_INFER_DECL AnalysisConfig {
explicit AnalysisConfig(const std::string& prog_file, explicit AnalysisConfig(const std::string& prog_file,
const std::string& params_file); const std::string& params_file);
/// ///
/// \brief Precision of inference in TensorRT. /// \brief Precision of inference.
/// ///
enum class Precision { enum class Precision {
kFloat32 = 0, ///< fp32 kFloat32 = 0, ///< fp32
......
...@@ -52,13 +52,14 @@ class InternalUtils; ...@@ -52,13 +52,14 @@ class InternalUtils;
/// \brief Paddle data type. /// \brief Paddle data type.
enum DataType { enum DataType {
FLOAT32,
INT64, INT64,
INT32, INT32,
UINT8, UINT8,
INT8, INT8,
FLOAT32,
FLOAT16, FLOAT16,
// TODO(Superjomn) support more data types if needed. BOOL,
// TODO(Inference): support more data types if needed.
}; };
enum class PlaceType { kUNK = -1, kCPU, kGPU, kXPU, kNPU, kIPU, kCUSTOM }; enum class PlaceType { kUNK = -1, kCPU, kGPU, kXPU, kNPU, kIPU, kCUSTOM };
......
...@@ -175,16 +175,22 @@ py::dtype PaddleDTypeToNumpyDType(PaddleDType dtype) { ...@@ -175,16 +175,22 @@ py::dtype PaddleDTypeToNumpyDType(PaddleDType dtype) {
case PaddleDType::FLOAT32: case PaddleDType::FLOAT32:
dt = py::dtype::of<float>(); dt = py::dtype::of<float>();
break; break;
case PaddleDType::FLOAT16:
dt = py::dtype::of<paddle_infer::float16>();
break;
case PaddleDType::UINT8: case PaddleDType::UINT8:
dt = py::dtype::of<uint8_t>(); dt = py::dtype::of<uint8_t>();
break; break;
case PaddleDType::FLOAT16: case PaddleDType::INT8:
dt = py::dtype::of<paddle_infer::float16>(); dt = py::dtype::of<int8_t>();
break;
case PaddleDType::BOOL:
dt = py::dtype::of<bool>();
break; break;
default: default:
PADDLE_THROW(platform::errors::Unimplemented( PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported data type. Now only supports INT32, INT64, UINT8 and " "Unsupported data type. Now only supports INT32, INT64, FLOAT32, "
"FLOAT32.")); "FLOAT16, INT8, UINT8 and BOOL."));
} }
return dt; return dt;
...@@ -282,10 +288,22 @@ size_t PaddleGetDTypeSize(PaddleDType dt) { ...@@ -282,10 +288,22 @@ size_t PaddleGetDTypeSize(PaddleDType dt) {
case PaddleDType::FLOAT32: case PaddleDType::FLOAT32:
size = sizeof(float); size = sizeof(float);
break; break;
case PaddleDType::FLOAT16:
size = sizeof(paddle_infer::float16);
break;
case PaddleDType::INT8:
size = sizeof(int8_t);
break;
case PaddleDType::UINT8:
size = sizeof(uint8_t);
break;
case PaddleDType::BOOL:
size = sizeof(bool);
break;
default: default:
PADDLE_THROW(platform::errors::Unimplemented( PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported data type. Now only supports INT32, INT64 and " "Unsupported data t ype. Now only supports INT32, INT64, FLOAT32, "
"FLOAT32.")); "FLOAT16, INT8, UINT8 and BOOL."));
} }
return size; return size;
} }
...@@ -316,10 +334,13 @@ py::array ZeroCopyTensorToNumpy(ZeroCopyTensor &tensor) { // NOLINT ...@@ -316,10 +334,13 @@ py::array ZeroCopyTensorToNumpy(ZeroCopyTensor &tensor) { // NOLINT
case PaddleDType::INT8: case PaddleDType::INT8:
tensor.copy_to_cpu<int8_t>(static_cast<int8_t *>(array.mutable_data())); tensor.copy_to_cpu<int8_t>(static_cast<int8_t *>(array.mutable_data()));
break; break;
case PaddleDType::BOOL:
tensor.copy_to_cpu<bool>(static_cast<bool *>(array.mutable_data()));
break;
default: default:
PADDLE_THROW(platform::errors::Unimplemented( PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported data type. Now only supports INT32, INT64, UINT8 and " "Unsupported data type. Now only supports INT32, INT64, FLOAT32, "
"FLOAT32.")); "FLOAT16, INT8, UINT8 and BOOL."));
} }
return array; return array;
} }
...@@ -350,10 +371,13 @@ py::array PaddleInferTensorToNumpy(paddle_infer::Tensor &tensor) { // NOLINT ...@@ -350,10 +371,13 @@ py::array PaddleInferTensorToNumpy(paddle_infer::Tensor &tensor) { // NOLINT
case PaddleDType::INT8: case PaddleDType::INT8:
tensor.CopyToCpu(static_cast<int8_t *>(array.mutable_data())); tensor.CopyToCpu(static_cast<int8_t *>(array.mutable_data()));
break; break;
case PaddleDType::BOOL:
tensor.CopyToCpu(static_cast<bool *>(array.mutable_data()));
break;
default: default:
PADDLE_THROW(platform::errors::Unimplemented( PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported data type. Now only supports INT32, INT64 and " "Unsupported data t ype. Now only supports INT32, INT64, FLOAT32, "
"FLOAT32.")); "FLOAT16, INT8, UINT8 and BOOL."));
} }
return array; return array;
} }
...@@ -433,8 +457,12 @@ namespace { ...@@ -433,8 +457,12 @@ namespace {
void BindPaddleDType(py::module *m) { void BindPaddleDType(py::module *m) {
py::enum_<PaddleDType>(*m, "PaddleDType") py::enum_<PaddleDType>(*m, "PaddleDType")
.value("FLOAT32", PaddleDType::FLOAT32) .value("FLOAT32", PaddleDType::FLOAT32)
.value("FLOAT16", PaddleDType::FLOAT16)
.value("INT64", PaddleDType::INT64) .value("INT64", PaddleDType::INT64)
.value("INT32", PaddleDType::INT32); .value("INT32", PaddleDType::INT32)
.value("UINT8", PaddleDType::UINT8)
.value("INT8", PaddleDType::INT8)
.value("BOOL", PaddleDType::BOOL);
} }
void BindPaddleDataLayout(py::module *m) { void BindPaddleDataLayout(py::module *m) {
...@@ -538,7 +566,8 @@ void BindPaddlePlace(py::module *m) { ...@@ -538,7 +566,8 @@ void BindPaddlePlace(py::module *m) {
.value("CPU", PaddlePlace::kCPU) .value("CPU", PaddlePlace::kCPU)
.value("GPU", PaddlePlace::kGPU) .value("GPU", PaddlePlace::kGPU)
.value("XPU", PaddlePlace::kXPU) .value("XPU", PaddlePlace::kXPU)
.value("NPU", PaddlePlace::kNPU); .value("NPU", PaddlePlace::kNPU)
.value("CUSTOM", PaddlePlace::kCUSTOM);
} }
void BindPaddlePredictor(py::module *m) { void BindPaddlePredictor(py::module *m) {
...@@ -990,10 +1019,13 @@ void BindZeroCopyTensor(py::module *m) { ...@@ -990,10 +1019,13 @@ void BindZeroCopyTensor(py::module *m) {
.def("reshape", .def("reshape",
py::overload_cast<const std::size_t &>( py::overload_cast<const std::size_t &>(
&paddle_infer::Tensor::ReshapeStrings)) &paddle_infer::Tensor::ReshapeStrings))
.def("copy_from_cpu", &ZeroCopyTensorCreate<int8_t>)
.def("copy_from_cpu", &ZeroCopyTensorCreate<uint8_t>)
.def("copy_from_cpu", &ZeroCopyTensorCreate<int32_t>) .def("copy_from_cpu", &ZeroCopyTensorCreate<int32_t>)
.def("copy_from_cpu", &ZeroCopyTensorCreate<int64_t>) .def("copy_from_cpu", &ZeroCopyTensorCreate<int64_t>)
.def("copy_from_cpu", &ZeroCopyTensorCreate<float>) .def("copy_from_cpu", &ZeroCopyTensorCreate<float>)
.def("copy_from_cpu", &ZeroCopyTensorCreate<paddle_infer::float16>) .def("copy_from_cpu", &ZeroCopyTensorCreate<paddle_infer::float16>)
.def("copy_from_cpu", &ZeroCopyTensorCreate<bool>)
.def("copy_from_cpu", &ZeroCopyStringTensorCreate) .def("copy_from_cpu", &ZeroCopyStringTensorCreate)
.def("copy_to_cpu", &ZeroCopyTensorToNumpy) .def("copy_to_cpu", &ZeroCopyTensorToNumpy)
.def("shape", &ZeroCopyTensor::shape) .def("shape", &ZeroCopyTensor::shape)
...@@ -1010,11 +1042,14 @@ void BindPaddleInferTensor(py::module *m) { ...@@ -1010,11 +1042,14 @@ void BindPaddleInferTensor(py::module *m) {
.def("reshape", .def("reshape",
py::overload_cast<const std::size_t &>( py::overload_cast<const std::size_t &>(
&paddle_infer::Tensor::ReshapeStrings)) &paddle_infer::Tensor::ReshapeStrings))
.def("copy_from_cpu_bind", &PaddleInferTensorCreate<int8_t>)
.def("copy_from_cpu_bind", &PaddleInferTensorCreate<uint8_t>)
.def("copy_from_cpu_bind", &PaddleInferTensorCreate<int32_t>) .def("copy_from_cpu_bind", &PaddleInferTensorCreate<int32_t>)
.def("copy_from_cpu_bind", &PaddleInferTensorCreate<int64_t>) .def("copy_from_cpu_bind", &PaddleInferTensorCreate<int64_t>)
.def("copy_from_cpu_bind", &PaddleInferTensorCreate<float>) .def("copy_from_cpu_bind", &PaddleInferTensorCreate<float>)
.def("copy_from_cpu_bind", .def("copy_from_cpu_bind",
&PaddleInferTensorCreate<paddle_infer::float16>) &PaddleInferTensorCreate<paddle_infer::float16>)
.def("copy_from_cpu_bind", &PaddleInferTensorCreate<bool>)
.def("copy_from_cpu_bind", &PaddleInferStringTensorCreate) .def("copy_from_cpu_bind", &PaddleInferStringTensorCreate)
.def("share_external_data_bind", &PaddleInferShareExternalData) .def("share_external_data_bind", &PaddleInferShareExternalData)
.def("copy_to_cpu", &PaddleInferTensorToNumpy) .def("copy_to_cpu", &PaddleInferTensorToNumpy)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册