From a765eb2694f2f02220730bcf88ffe04ec0b5649b Mon Sep 17 00:00:00 2001 From: Yuanle Liu Date: Wed, 22 Mar 2023 12:36:21 +0800 Subject: [PATCH] inference support double data type (#51786) --- .../inference/api/details/zero_copy_tensor.cc | 33 +++++- .../inference/api/paddle_infer_contrib.cc | 20 +++- paddle/fluid/inference/api/paddle_tensor.h | 1 + .../paddle_infer_api_copy_tensor_tester.cc | 7 ++ paddle/fluid/pybind/inference_api.cc | 48 ++++++-- .../unittests/ir/test_inference_datatype.py | 103 ++++++++++++++++++ 6 files changed, 194 insertions(+), 18 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/ir/test_inference_datatype.py diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc index c957876bf7c..0a0a27bb6a6 100644 --- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc +++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc @@ -172,7 +172,9 @@ DataType Tensor::type() const { #endif EAGER_GET_TENSOR(phi::DenseTensor); auto type = paddle::framework::TransToProtoVarType(tensor->dtype()); - if (type == paddle::framework::proto::VarType::FP32) { + if (type == paddle::framework::proto::VarType::FP64) { + return DataType::FLOAT64; + } else if (type == paddle::framework::proto::VarType::FP32) { return DataType::FLOAT32; } else if (type == paddle::framework::proto::VarType::FP16) { return DataType::FLOAT16; @@ -292,8 +294,8 @@ template struct DataTypeInfo; template <> -struct DataTypeInfo { - phi::DataType TYPE = phi::DataType::BOOL; +struct DataTypeInfo { + phi::DataType TYPE = phi::DataType::FLOAT64; }; template <> @@ -326,6 +328,11 @@ struct DataTypeInfo { phi::DataType TYPE = phi::DataType::INT32; }; +template <> +struct DataTypeInfo { + phi::DataType TYPE = phi::DataType::BOOL; +}; + phi::DataLayout LayoutConvert(DataLayout layout) { PADDLE_ENFORCE_EQ( layout, @@ -524,6 +531,7 @@ void Tensor::CopyToCpuAsync(T *data, CallbackFunc cb, void *cb_params) const { CopyToCpuImpl(data, nullptr, cb, cb_params); } +template PD_INFER_DECL void Tensor::CopyFromCpu(const double *data); template PD_INFER_DECL void Tensor::CopyFromCpu(const float *data); template PD_INFER_DECL void Tensor::CopyFromCpu(const int64_t *data); template PD_INFER_DECL void Tensor::CopyFromCpu(const int32_t *data); @@ -532,6 +540,11 @@ template PD_INFER_DECL void Tensor::CopyFromCpu(const int8_t *data); template PD_INFER_DECL void Tensor::CopyFromCpu(const float16 *data); template PD_INFER_DECL void Tensor::CopyFromCpu(const bool *data); +template PD_INFER_DECL void Tensor::ShareExternalData( + const double *data, + const std::vector &shape, + PlaceType place, + DataLayout layout); template PD_INFER_DECL void Tensor::ShareExternalData( const float *data, const std::vector &shape, @@ -568,6 +581,7 @@ template PD_INFER_DECL void Tensor::ShareExternalData( PlaceType place, DataLayout layout); +template PD_INFER_DECL void Tensor::CopyToCpu(double *data) const; template PD_INFER_DECL void Tensor::CopyToCpu(float *data) const; template PD_INFER_DECL void Tensor::CopyToCpu(int64_t *data) const; template PD_INFER_DECL void Tensor::CopyToCpu(int32_t *data) const; @@ -576,6 +590,8 @@ template PD_INFER_DECL void Tensor::CopyToCpu(int8_t *data) const; template PD_INFER_DECL void Tensor::CopyToCpu(float16 *data) const; template PD_INFER_DECL void Tensor::CopyToCpu(bool *data) const; +template PD_INFER_DECL void Tensor::CopyToCpuImpl( + double *data, void *exec_stream, CallbackFunc cb, void *cb_params) const; template PD_INFER_DECL void Tensor::CopyToCpuImpl(float *data, void *exec_stream, CallbackFunc cb, @@ -595,6 +611,8 @@ template PD_INFER_DECL void Tensor::CopyToCpuImpl(bool *data, CallbackFunc cb, void *cb_params) const; +template PD_INFER_DECL void Tensor::CopyToCpuAsync( + double *data, void *exec_stream) const; template PD_INFER_DECL void Tensor::CopyToCpuAsync( float *data, void *exec_stream) const; template PD_INFER_DECL void Tensor::CopyToCpuAsync( @@ -610,6 +628,8 @@ template PD_INFER_DECL void Tensor::CopyToCpuAsync( template PD_INFER_DECL void Tensor::CopyToCpuAsync( bool *data, void *exec_stream) const; +template PD_INFER_DECL void Tensor::CopyToCpuAsync( + double *data, CallbackFunc cb, void *cb_params) const; template PD_INFER_DECL void Tensor::CopyToCpuAsync( float *data, CallbackFunc cb, void *cb_params) const; template PD_INFER_DECL void Tensor::CopyToCpuAsync( @@ -626,6 +646,8 @@ template PD_INFER_DECL void Tensor::CopyToCpuAsync(bool *data, CallbackFunc cb, void *cb_params) const; +template PD_INFER_DECL double *Tensor::data(PlaceType *place, + int *size) const; template PD_INFER_DECL float *Tensor::data(PlaceType *place, int *size) const; template PD_INFER_DECL int64_t *Tensor::data(PlaceType *place, @@ -641,6 +663,7 @@ template PD_INFER_DECL float16 *Tensor::data(PlaceType *place, template PD_INFER_DECL bool *Tensor::data(PlaceType *place, int *size) const; +template PD_INFER_DECL double *Tensor::mutable_data(PlaceType place); template PD_INFER_DECL float *Tensor::mutable_data(PlaceType place); template PD_INFER_DECL int64_t *Tensor::mutable_data(PlaceType place); template PD_INFER_DECL int32_t *Tensor::mutable_data(PlaceType place); @@ -922,6 +945,8 @@ void InternalUtils::CopyToCpuWithIoStream(paddle_infer::Tensor *t, } } +template void InternalUtils::CopyFromCpuWithIoStream( + paddle_infer::Tensor *t, const double *data, cudaStream_t stream); template void InternalUtils::CopyFromCpuWithIoStream( paddle_infer::Tensor *t, const float *data, cudaStream_t stream); template void InternalUtils::CopyFromCpuWithIoStream( @@ -937,6 +962,8 @@ template void InternalUtils::CopyFromCpuWithIoStream( template void InternalUtils::CopyFromCpuWithIoStream( paddle_infer::Tensor *t, const bool *data, cudaStream_t stream); +template void InternalUtils::CopyToCpuWithIoStream( + paddle_infer::Tensor *t, double *data, cudaStream_t stream); template void InternalUtils::CopyToCpuWithIoStream( paddle_infer::Tensor *t, float *data, cudaStream_t stream); template void InternalUtils::CopyToCpuWithIoStream( diff --git a/paddle/fluid/inference/api/paddle_infer_contrib.cc b/paddle/fluid/inference/api/paddle_infer_contrib.cc index b33e07b236b..11786b05c30 100644 --- a/paddle/fluid/inference/api/paddle_infer_contrib.cc +++ b/paddle/fluid/inference/api/paddle_infer_contrib.cc @@ -71,6 +71,12 @@ void TensorUtils::CopyTensorImpl(Tensor* p_dst, cb, cb_params); break; + case PaddleDType::FLOAT64: + src.CopyToCpuImpl(dst.mutable_data(PlaceType::kCPU), + exec_stream, + cb, + cb_params); + break; case PaddleDType::FLOAT32: src.CopyToCpuImpl(dst.mutable_data(PlaceType::kCPU), exec_stream, @@ -104,8 +110,8 @@ void TensorUtils::CopyTensorImpl(Tensor* p_dst, break; default: PADDLE_THROW(paddle::platform::errors::Unimplemented( - "Only INT32, INT64, UINT8, INT8, BOOL, FLOAT16 and " - "FLOAT32 is supported in Tensor. Others not implements")); + "Only INT32, INT64, UINT8, INT8, BOOL, FLOAT16, FLOAT32 and " + "FLOAT64 is supported in Tensor. Others not implements")); } // gpu => gpu or cpu => gpu } else { @@ -130,6 +136,12 @@ void TensorUtils::CopyTensorImpl(Tensor* p_dst, static_cast(src.data(&src_place, &data_size)); data_len = data_size * sizeof(int64_t); break; + case PaddleDType::FLOAT64: + dst_data = + static_cast(dst.mutable_data(PlaceType::kGPU)); + src_data = static_cast(src.data(&src_place, &data_size)); + data_len = data_size * sizeof(double); + break; case PaddleDType::FLOAT32: dst_data = static_cast(dst.mutable_data(PlaceType::kGPU)); src_data = static_cast(src.data(&src_place, &data_size)); @@ -162,8 +174,8 @@ void TensorUtils::CopyTensorImpl(Tensor* p_dst, break; default: PADDLE_THROW(paddle::platform::errors::Unimplemented( - "Only INT32, INT64, UINT8, INT8, BOOL, FLOAT16 and " - "FLOAT32 is supported in Tensor. Others not implements")); + "Only INT32, INT64, UINT8, INT8, BOOL, FLOAT16, FLOAT32 and " + "FLOAT64 is supported in Tensor. Others not implements")); } paddle::platform::DeviceContextPool& pool = diff --git a/paddle/fluid/inference/api/paddle_tensor.h b/paddle/fluid/inference/api/paddle_tensor.h index 993a04baab9..7c61d4098b4 100644 --- a/paddle/fluid/inference/api/paddle_tensor.h +++ b/paddle/fluid/inference/api/paddle_tensor.h @@ -56,6 +56,7 @@ enum DataType { INT32, UINT8, INT8, + FLOAT64, FLOAT32, FLOAT16, BOOL, diff --git a/paddle/fluid/inference/tests/api/paddle_infer_api_copy_tensor_tester.cc b/paddle/fluid/inference/tests/api/paddle_infer_api_copy_tensor_tester.cc index 85b778769c6..74ce8260564 100644 --- a/paddle/fluid/inference/tests/api/paddle_infer_api_copy_tensor_tester.cc +++ b/paddle/fluid/inference/tests/api/paddle_infer_api_copy_tensor_tester.cc @@ -205,6 +205,13 @@ static void test_copy_tensor(PlaceType src_place, PlaceType dst_place) { } } +TEST(CopyTensor, float64) { + test_copy_tensor(PlaceType::kCPU, PlaceType::kCPU); + test_copy_tensor(PlaceType::kCPU, PlaceType::kGPU); + test_copy_tensor(PlaceType::kGPU, PlaceType::kCPU); + test_copy_tensor(PlaceType::kGPU, PlaceType::kGPU); +} + TEST(CopyTensor, float32) { test_copy_tensor(PlaceType::kCPU, PlaceType::kCPU); test_copy_tensor(PlaceType::kCPU, PlaceType::kGPU); diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index 94be85af508..634013c4fac 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -175,6 +175,9 @@ py::dtype PaddleDTypeToNumpyDType(PaddleDType dtype) { case PaddleDType::INT64: dt = py::dtype::of(); break; + case PaddleDType::FLOAT64: + dt = py::dtype::of(); + break; case PaddleDType::FLOAT32: dt = py::dtype::of(); break; @@ -192,8 +195,8 @@ py::dtype PaddleDTypeToNumpyDType(PaddleDType dtype) { break; default: PADDLE_THROW(platform::errors::Unimplemented( - "Unsupported data type. Now only supports INT32, INT64, FLOAT32, " - "FLOAT16, INT8, UINT8 and BOOL.")); + "Unsupported data type. Now only supports INT32, INT64, FLOAT64, " + "FLOAT32, FLOAT16, INT8, UINT8 and BOOL.")); } return dt; @@ -252,7 +255,12 @@ void PaddleInferShareExternalData(paddle_infer::Tensor &tensor, // NOLINT for (int i = 0; i < input_tensor.dims().size(); ++i) { shape.push_back(input_tensor.dims()[i]); } - if (input_tensor.dtype() == phi::DataType::FLOAT32) { + if (input_tensor.dtype() == phi::DataType::FLOAT64) { + tensor.ShareExternalData( + static_cast(input_tensor.data()), + shape, + ToPaddleInferPlace(input_tensor.place().GetType())); + } else if (input_tensor.dtype() == phi::DataType::FLOAT32) { tensor.ShareExternalData( static_cast(input_tensor.data()), shape, @@ -275,7 +283,7 @@ void PaddleInferShareExternalData(paddle_infer::Tensor &tensor, // NOLINT } else { PADDLE_THROW(platform::errors::Unimplemented( "Unsupported data type. Now share_external_data only supports INT32, " - "INT64, FLOAT32 and FLOAT16.")); + "INT64, FLOAT64, FLOAT32 and FLOAT16.")); } } @@ -285,7 +293,13 @@ void PaddleTensorShareExternalData(paddle_infer::Tensor &tensor, // NOLINT for (int i = 0; i < paddle_tensor.dims().size(); ++i) { shape.push_back(paddle_tensor.dims()[i]); } - if (paddle_tensor.dtype() == phi::DataType::FLOAT32) { + + if (paddle_tensor.dtype() == phi::DataType::FLOAT64) { + tensor.ShareExternalData( + static_cast(paddle_tensor.data()), + shape, + ToPaddleInferPlace(paddle_tensor.place().GetType())); + } else if (paddle_tensor.dtype() == phi::DataType::FLOAT32) { tensor.ShareExternalData( static_cast(paddle_tensor.data()), shape, @@ -335,6 +349,9 @@ size_t PaddleGetDTypeSize(PaddleDType dt) { case PaddleDType::INT64: size = sizeof(int64_t); break; + case PaddleDType::FLOAT64: + size = sizeof(double); + break; case PaddleDType::FLOAT32: size = sizeof(float); break; @@ -352,8 +369,8 @@ size_t PaddleGetDTypeSize(PaddleDType dt) { break; default: PADDLE_THROW(platform::errors::Unimplemented( - "Unsupported data t ype. Now only supports INT32, INT64, FLOAT32, " - "FLOAT16, INT8, UINT8 and BOOL.")); + "Unsupported data t ype. Now only supports INT32, INT64, FLOAT64, " + "FLOAT32, FLOAT16, INT8, UINT8 and BOOL.")); } return size; } @@ -371,6 +388,9 @@ py::array ZeroCopyTensorToNumpy(ZeroCopyTensor &tensor) { // NOLINT case PaddleDType::INT64: tensor.copy_to_cpu(static_cast(array.mutable_data())); break; + case PaddleDType::FLOAT64: + tensor.copy_to_cpu(static_cast(array.mutable_data())); + break; case PaddleDType::FLOAT32: tensor.copy_to_cpu(static_cast(array.mutable_data())); break; @@ -389,8 +409,8 @@ py::array ZeroCopyTensorToNumpy(ZeroCopyTensor &tensor) { // NOLINT break; default: PADDLE_THROW(platform::errors::Unimplemented( - "Unsupported data type. Now only supports INT32, INT64, FLOAT32, " - "FLOAT16, INT8, UINT8 and BOOL.")); + "Unsupported data type. Now only supports INT32, INT64, FLOAT64, " + "FLOAT32, FLOAT16, INT8, UINT8 and BOOL.")); } return array; } @@ -408,6 +428,9 @@ py::array PaddleInferTensorToNumpy(paddle_infer::Tensor &tensor) { // NOLINT case PaddleDType::INT64: tensor.CopyToCpu(static_cast(array.mutable_data())); break; + case PaddleDType::FLOAT64: + tensor.CopyToCpu(static_cast(array.mutable_data())); + break; case PaddleDType::FLOAT32: tensor.CopyToCpu(static_cast(array.mutable_data())); break; @@ -426,8 +449,8 @@ py::array PaddleInferTensorToNumpy(paddle_infer::Tensor &tensor) { // NOLINT break; default: PADDLE_THROW(platform::errors::Unimplemented( - "Unsupported data t ype. Now only supports INT32, INT64, FLOAT32, " - "FLOAT16, INT8, UINT8 and BOOL.")); + "Unsupported data t ype. Now only supports INT32, INT64, FLOAT64, " + "FLOAT32, FLOAT16, INT8, UINT8 and BOOL.")); } return array; } @@ -506,6 +529,7 @@ void BindInferenceApi(py::module *m) { namespace { void BindPaddleDType(py::module *m) { py::enum_(*m, "PaddleDType") + .value("FLOAT64", PaddleDType::FLOAT64) .value("FLOAT32", PaddleDType::FLOAT32) .value("FLOAT16", PaddleDType::FLOAT16) .value("INT64", PaddleDType::INT64) @@ -1075,6 +1099,7 @@ void BindZeroCopyTensor(py::module *m) { .def("copy_from_cpu", &ZeroCopyTensorCreate) .def("copy_from_cpu", &ZeroCopyTensorCreate) .def("copy_from_cpu", &ZeroCopyTensorCreate) + .def("copy_from_cpu", &ZeroCopyTensorCreate) .def("copy_from_cpu", &ZeroCopyTensorCreate) .def("copy_from_cpu", &ZeroCopyTensorCreate) .def("copy_from_cpu", &ZeroCopyTensorCreate) @@ -1098,6 +1123,7 @@ void BindPaddleInferTensor(py::module *m) { .def("_copy_from_cpu_bind", &PaddleInferTensorCreate) .def("_copy_from_cpu_bind", &PaddleInferTensorCreate) .def("_copy_from_cpu_bind", &PaddleInferTensorCreate) + .def("_copy_from_cpu_bind", &PaddleInferTensorCreate) .def("_copy_from_cpu_bind", &PaddleInferTensorCreate) .def("_copy_from_cpu_bind", &PaddleInferTensorCreate) diff --git a/python/paddle/fluid/tests/unittests/ir/test_inference_datatype.py b/python/paddle/fluid/tests/unittests/ir/test_inference_datatype.py new file mode 100644 index 00000000000..df855149749 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/test_inference_datatype.py @@ -0,0 +1,103 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import tempfile +import unittest + +import numpy as np + +import paddle +from paddle.inference import Config, DataType, create_predictor + +paddle.set_default_dtype('float64') + + +class TestNet(paddle.nn.Layer): + def __init__(self): + super().__init__() + self.fc1 = paddle.nn.Linear(4, 4) + self.fc2 = paddle.nn.Linear(4, 4) + + def forward(self, x): + out = self.fc1(x) + out = self.fc2(out) + out = paddle.nn.functional.relu(out) + return out + + +@unittest.skipIf( + not paddle.is_compiled_with_cuda(), 'should compile with cuda.' +) +class TestDoubleOnGPU(unittest.TestCase): + def setUp(self): + self.temp_dir = tempfile.TemporaryDirectory() + net = TestNet() + model = paddle.jit.to_static( + net, + input_spec=[ + paddle.static.InputSpec(shape=[None, 4], dtype='float64') + ], + ) + paddle.jit.save( + model, + os.path.join( + self.temp_dir.name, 'test_inference_datatype_model/inference' + ), + ) + + def tearDown(self): + self.temp_dir.cleanup() + + def init_predictor(self): + config = Config( + os.path.join( + self.temp_dir.name, + 'test_inference_datatype_model/inference.pdmodel', + ), + os.path.join( + self.temp_dir.name, + 'test_inference_datatype_model/inference.pdiparams', + ), + ) + config.enable_use_gpu(256, 0) + config.enable_memory_optim() + # NOTE(liuyuanle): Because double computing is not supported in our pass implementation, + # we need to turn off IR optimization. + config.switch_ir_optim(False) + predictor = create_predictor(config) + return predictor + + def test_output(self): + predictor = self.init_predictor() + + input = np.ones((3, 4)).astype(np.float64) + + input_names = predictor.get_input_names() + input_tensor = predictor.get_input_handle(input_names[0]) + input_tensor.reshape(input.shape) + input_tensor.copy_from_cpu(input.copy()) + assert input_tensor.type() == DataType.FLOAT64 + + predictor.run() + + output_names = predictor.get_output_names() + output_tensor = predictor.get_output_handle(output_names[0]) + assert output_tensor.type() == DataType.FLOAT64 + + output_data = output_tensor.copy_to_cpu() + + +if __name__ == '__main__': + unittest.main() -- GitLab