未验证 提交 a765eb26 编写于 作者: Y Yuanle Liu 提交者: GitHub

inference support double data type (#51786)

上级 586d9018
......@@ -172,7 +172,9 @@ DataType Tensor::type() const {
#endif
EAGER_GET_TENSOR(phi::DenseTensor);
auto type = paddle::framework::TransToProtoVarType(tensor->dtype());
if (type == paddle::framework::proto::VarType::FP32) {
if (type == paddle::framework::proto::VarType::FP64) {
return DataType::FLOAT64;
} else if (type == paddle::framework::proto::VarType::FP32) {
return DataType::FLOAT32;
} else if (type == paddle::framework::proto::VarType::FP16) {
return DataType::FLOAT16;
......@@ -292,8 +294,8 @@ template <typename T>
struct DataTypeInfo;
template <>
struct DataTypeInfo<bool> {
phi::DataType TYPE = phi::DataType::BOOL;
struct DataTypeInfo<double> {
phi::DataType TYPE = phi::DataType::FLOAT64;
};
template <>
......@@ -326,6 +328,11 @@ struct DataTypeInfo<int32_t> {
phi::DataType TYPE = phi::DataType::INT32;
};
template <>
struct DataTypeInfo<bool> {
phi::DataType TYPE = phi::DataType::BOOL;
};
phi::DataLayout LayoutConvert(DataLayout layout) {
PADDLE_ENFORCE_EQ(
layout,
......@@ -524,6 +531,7 @@ void Tensor::CopyToCpuAsync(T *data, CallbackFunc cb, void *cb_params) const {
CopyToCpuImpl<T>(data, nullptr, cb, cb_params);
}
template PD_INFER_DECL void Tensor::CopyFromCpu<double>(const double *data);
template PD_INFER_DECL void Tensor::CopyFromCpu<float>(const float *data);
template PD_INFER_DECL void Tensor::CopyFromCpu<int64_t>(const int64_t *data);
template PD_INFER_DECL void Tensor::CopyFromCpu<int32_t>(const int32_t *data);
......@@ -532,6 +540,11 @@ template PD_INFER_DECL void Tensor::CopyFromCpu<int8_t>(const int8_t *data);
template PD_INFER_DECL void Tensor::CopyFromCpu<float16>(const float16 *data);
template PD_INFER_DECL void Tensor::CopyFromCpu<bool>(const bool *data);
template PD_INFER_DECL void Tensor::ShareExternalData<double>(
const double *data,
const std::vector<int> &shape,
PlaceType place,
DataLayout layout);
template PD_INFER_DECL void Tensor::ShareExternalData<float>(
const float *data,
const std::vector<int> &shape,
......@@ -568,6 +581,7 @@ template PD_INFER_DECL void Tensor::ShareExternalData<bool>(
PlaceType place,
DataLayout layout);
template PD_INFER_DECL void Tensor::CopyToCpu<double>(double *data) const;
template PD_INFER_DECL void Tensor::CopyToCpu<float>(float *data) const;
template PD_INFER_DECL void Tensor::CopyToCpu<int64_t>(int64_t *data) const;
template PD_INFER_DECL void Tensor::CopyToCpu<int32_t>(int32_t *data) const;
......@@ -576,6 +590,8 @@ template PD_INFER_DECL void Tensor::CopyToCpu<int8_t>(int8_t *data) const;
template PD_INFER_DECL void Tensor::CopyToCpu<float16>(float16 *data) const;
template PD_INFER_DECL void Tensor::CopyToCpu<bool>(bool *data) const;
template PD_INFER_DECL void Tensor::CopyToCpuImpl<double>(
double *data, void *exec_stream, CallbackFunc cb, void *cb_params) const;
template PD_INFER_DECL void Tensor::CopyToCpuImpl<float>(float *data,
void *exec_stream,
CallbackFunc cb,
......@@ -595,6 +611,8 @@ template PD_INFER_DECL void Tensor::CopyToCpuImpl<bool>(bool *data,
CallbackFunc cb,
void *cb_params) const;
template PD_INFER_DECL void Tensor::CopyToCpuAsync<double>(
double *data, void *exec_stream) const;
template PD_INFER_DECL void Tensor::CopyToCpuAsync<float>(
float *data, void *exec_stream) const;
template PD_INFER_DECL void Tensor::CopyToCpuAsync<int64_t>(
......@@ -610,6 +628,8 @@ template PD_INFER_DECL void Tensor::CopyToCpuAsync<float16>(
template PD_INFER_DECL void Tensor::CopyToCpuAsync<bool>(
bool *data, void *exec_stream) const;
template PD_INFER_DECL void Tensor::CopyToCpuAsync<double>(
double *data, CallbackFunc cb, void *cb_params) const;
template PD_INFER_DECL void Tensor::CopyToCpuAsync<float>(
float *data, CallbackFunc cb, void *cb_params) const;
template PD_INFER_DECL void Tensor::CopyToCpuAsync<int64_t>(
......@@ -626,6 +646,8 @@ template PD_INFER_DECL void Tensor::CopyToCpuAsync<bool>(bool *data,
CallbackFunc cb,
void *cb_params) const;
template PD_INFER_DECL double *Tensor::data<double>(PlaceType *place,
int *size) const;
template PD_INFER_DECL float *Tensor::data<float>(PlaceType *place,
int *size) const;
template PD_INFER_DECL int64_t *Tensor::data<int64_t>(PlaceType *place,
......@@ -641,6 +663,7 @@ template PD_INFER_DECL float16 *Tensor::data<float16>(PlaceType *place,
template PD_INFER_DECL bool *Tensor::data<bool>(PlaceType *place,
int *size) const;
template PD_INFER_DECL double *Tensor::mutable_data<double>(PlaceType place);
template PD_INFER_DECL float *Tensor::mutable_data<float>(PlaceType place);
template PD_INFER_DECL int64_t *Tensor::mutable_data<int64_t>(PlaceType place);
template PD_INFER_DECL int32_t *Tensor::mutable_data<int32_t>(PlaceType place);
......@@ -922,6 +945,8 @@ void InternalUtils::CopyToCpuWithIoStream(paddle_infer::Tensor *t,
}
}
template void InternalUtils::CopyFromCpuWithIoStream<double>(
paddle_infer::Tensor *t, const double *data, cudaStream_t stream);
template void InternalUtils::CopyFromCpuWithIoStream<float>(
paddle_infer::Tensor *t, const float *data, cudaStream_t stream);
template void InternalUtils::CopyFromCpuWithIoStream<int64_t>(
......@@ -937,6 +962,8 @@ template void InternalUtils::CopyFromCpuWithIoStream<float16>(
template void InternalUtils::CopyFromCpuWithIoStream<bool>(
paddle_infer::Tensor *t, const bool *data, cudaStream_t stream);
template void InternalUtils::CopyToCpuWithIoStream<double>(
paddle_infer::Tensor *t, double *data, cudaStream_t stream);
template void InternalUtils::CopyToCpuWithIoStream<float>(
paddle_infer::Tensor *t, float *data, cudaStream_t stream);
template void InternalUtils::CopyToCpuWithIoStream<int64_t>(
......
......@@ -71,6 +71,12 @@ void TensorUtils::CopyTensorImpl(Tensor* p_dst,
cb,
cb_params);
break;
case PaddleDType::FLOAT64:
src.CopyToCpuImpl(dst.mutable_data<double>(PlaceType::kCPU),
exec_stream,
cb,
cb_params);
break;
case PaddleDType::FLOAT32:
src.CopyToCpuImpl(dst.mutable_data<float>(PlaceType::kCPU),
exec_stream,
......@@ -104,8 +110,8 @@ void TensorUtils::CopyTensorImpl(Tensor* p_dst,
break;
default:
PADDLE_THROW(paddle::platform::errors::Unimplemented(
"Only INT32, INT64, UINT8, INT8, BOOL, FLOAT16 and "
"FLOAT32 is supported in Tensor. Others not implements"));
"Only INT32, INT64, UINT8, INT8, BOOL, FLOAT16, FLOAT32 and "
"FLOAT64 is supported in Tensor. Others not implements"));
}
// gpu => gpu or cpu => gpu
} else {
......@@ -130,6 +136,12 @@ void TensorUtils::CopyTensorImpl(Tensor* p_dst,
static_cast<void*>(src.data<int64_t>(&src_place, &data_size));
data_len = data_size * sizeof(int64_t);
break;
case PaddleDType::FLOAT64:
dst_data =
static_cast<void*>(dst.mutable_data<double>(PlaceType::kGPU));
src_data = static_cast<void*>(src.data<double>(&src_place, &data_size));
data_len = data_size * sizeof(double);
break;
case PaddleDType::FLOAT32:
dst_data = static_cast<void*>(dst.mutable_data<float>(PlaceType::kGPU));
src_data = static_cast<void*>(src.data<float>(&src_place, &data_size));
......@@ -162,8 +174,8 @@ void TensorUtils::CopyTensorImpl(Tensor* p_dst,
break;
default:
PADDLE_THROW(paddle::platform::errors::Unimplemented(
"Only INT32, INT64, UINT8, INT8, BOOL, FLOAT16 and "
"FLOAT32 is supported in Tensor. Others not implements"));
"Only INT32, INT64, UINT8, INT8, BOOL, FLOAT16, FLOAT32 and "
"FLOAT64 is supported in Tensor. Others not implements"));
}
paddle::platform::DeviceContextPool& pool =
......
......@@ -56,6 +56,7 @@ enum DataType {
INT32,
UINT8,
INT8,
FLOAT64,
FLOAT32,
FLOAT16,
BOOL,
......
......@@ -205,6 +205,13 @@ static void test_copy_tensor(PlaceType src_place, PlaceType dst_place) {
}
}
TEST(CopyTensor, float64) {
test_copy_tensor<double>(PlaceType::kCPU, PlaceType::kCPU);
test_copy_tensor<double>(PlaceType::kCPU, PlaceType::kGPU);
test_copy_tensor<double>(PlaceType::kGPU, PlaceType::kCPU);
test_copy_tensor<double>(PlaceType::kGPU, PlaceType::kGPU);
}
TEST(CopyTensor, float32) {
test_copy_tensor<float>(PlaceType::kCPU, PlaceType::kCPU);
test_copy_tensor<float>(PlaceType::kCPU, PlaceType::kGPU);
......
......@@ -175,6 +175,9 @@ py::dtype PaddleDTypeToNumpyDType(PaddleDType dtype) {
case PaddleDType::INT64:
dt = py::dtype::of<int64_t>();
break;
case PaddleDType::FLOAT64:
dt = py::dtype::of<double>();
break;
case PaddleDType::FLOAT32:
dt = py::dtype::of<float>();
break;
......@@ -192,8 +195,8 @@ py::dtype PaddleDTypeToNumpyDType(PaddleDType dtype) {
break;
default:
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported data type. Now only supports INT32, INT64, FLOAT32, "
"FLOAT16, INT8, UINT8 and BOOL."));
"Unsupported data type. Now only supports INT32, INT64, FLOAT64, "
"FLOAT32, FLOAT16, INT8, UINT8 and BOOL."));
}
return dt;
......@@ -252,7 +255,12 @@ void PaddleInferShareExternalData(paddle_infer::Tensor &tensor, // NOLINT
for (int i = 0; i < input_tensor.dims().size(); ++i) {
shape.push_back(input_tensor.dims()[i]);
}
if (input_tensor.dtype() == phi::DataType::FLOAT32) {
if (input_tensor.dtype() == phi::DataType::FLOAT64) {
tensor.ShareExternalData(
static_cast<double *>(input_tensor.data()),
shape,
ToPaddleInferPlace(input_tensor.place().GetType()));
} else if (input_tensor.dtype() == phi::DataType::FLOAT32) {
tensor.ShareExternalData(
static_cast<float *>(input_tensor.data()),
shape,
......@@ -275,7 +283,7 @@ void PaddleInferShareExternalData(paddle_infer::Tensor &tensor, // NOLINT
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported data type. Now share_external_data only supports INT32, "
"INT64, FLOAT32 and FLOAT16."));
"INT64, FLOAT64, FLOAT32 and FLOAT16."));
}
}
......@@ -285,7 +293,13 @@ void PaddleTensorShareExternalData(paddle_infer::Tensor &tensor, // NOLINT
for (int i = 0; i < paddle_tensor.dims().size(); ++i) {
shape.push_back(paddle_tensor.dims()[i]);
}
if (paddle_tensor.dtype() == phi::DataType::FLOAT32) {
if (paddle_tensor.dtype() == phi::DataType::FLOAT64) {
tensor.ShareExternalData(
static_cast<double *>(paddle_tensor.data<double>()),
shape,
ToPaddleInferPlace(paddle_tensor.place().GetType()));
} else if (paddle_tensor.dtype() == phi::DataType::FLOAT32) {
tensor.ShareExternalData(
static_cast<float *>(paddle_tensor.data<float>()),
shape,
......@@ -335,6 +349,9 @@ size_t PaddleGetDTypeSize(PaddleDType dt) {
case PaddleDType::INT64:
size = sizeof(int64_t);
break;
case PaddleDType::FLOAT64:
size = sizeof(double);
break;
case PaddleDType::FLOAT32:
size = sizeof(float);
break;
......@@ -352,8 +369,8 @@ size_t PaddleGetDTypeSize(PaddleDType dt) {
break;
default:
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported data t ype. Now only supports INT32, INT64, FLOAT32, "
"FLOAT16, INT8, UINT8 and BOOL."));
"Unsupported data t ype. Now only supports INT32, INT64, FLOAT64, "
"FLOAT32, FLOAT16, INT8, UINT8 and BOOL."));
}
return size;
}
......@@ -371,6 +388,9 @@ py::array ZeroCopyTensorToNumpy(ZeroCopyTensor &tensor) { // NOLINT
case PaddleDType::INT64:
tensor.copy_to_cpu(static_cast<int64_t *>(array.mutable_data()));
break;
case PaddleDType::FLOAT64:
tensor.copy_to_cpu<double>(static_cast<double *>(array.mutable_data()));
break;
case PaddleDType::FLOAT32:
tensor.copy_to_cpu<float>(static_cast<float *>(array.mutable_data()));
break;
......@@ -389,8 +409,8 @@ py::array ZeroCopyTensorToNumpy(ZeroCopyTensor &tensor) { // NOLINT
break;
default:
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported data type. Now only supports INT32, INT64, FLOAT32, "
"FLOAT16, INT8, UINT8 and BOOL."));
"Unsupported data type. Now only supports INT32, INT64, FLOAT64, "
"FLOAT32, FLOAT16, INT8, UINT8 and BOOL."));
}
return array;
}
......@@ -408,6 +428,9 @@ py::array PaddleInferTensorToNumpy(paddle_infer::Tensor &tensor) { // NOLINT
case PaddleDType::INT64:
tensor.CopyToCpu(static_cast<int64_t *>(array.mutable_data()));
break;
case PaddleDType::FLOAT64:
tensor.CopyToCpu<double>(static_cast<double *>(array.mutable_data()));
break;
case PaddleDType::FLOAT32:
tensor.CopyToCpu<float>(static_cast<float *>(array.mutable_data()));
break;
......@@ -426,8 +449,8 @@ py::array PaddleInferTensorToNumpy(paddle_infer::Tensor &tensor) { // NOLINT
break;
default:
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported data t ype. Now only supports INT32, INT64, FLOAT32, "
"FLOAT16, INT8, UINT8 and BOOL."));
"Unsupported data t ype. Now only supports INT32, INT64, FLOAT64, "
"FLOAT32, FLOAT16, INT8, UINT8 and BOOL."));
}
return array;
}
......@@ -506,6 +529,7 @@ void BindInferenceApi(py::module *m) {
namespace {
void BindPaddleDType(py::module *m) {
py::enum_<PaddleDType>(*m, "PaddleDType")
.value("FLOAT64", PaddleDType::FLOAT64)
.value("FLOAT32", PaddleDType::FLOAT32)
.value("FLOAT16", PaddleDType::FLOAT16)
.value("INT64", PaddleDType::INT64)
......@@ -1075,6 +1099,7 @@ void BindZeroCopyTensor(py::module *m) {
.def("copy_from_cpu", &ZeroCopyTensorCreate<uint8_t>)
.def("copy_from_cpu", &ZeroCopyTensorCreate<int32_t>)
.def("copy_from_cpu", &ZeroCopyTensorCreate<int64_t>)
.def("copy_from_cpu", &ZeroCopyTensorCreate<double>)
.def("copy_from_cpu", &ZeroCopyTensorCreate<float>)
.def("copy_from_cpu", &ZeroCopyTensorCreate<paddle_infer::float16>)
.def("copy_from_cpu", &ZeroCopyTensorCreate<bool>)
......@@ -1098,6 +1123,7 @@ void BindPaddleInferTensor(py::module *m) {
.def("_copy_from_cpu_bind", &PaddleInferTensorCreate<uint8_t>)
.def("_copy_from_cpu_bind", &PaddleInferTensorCreate<int32_t>)
.def("_copy_from_cpu_bind", &PaddleInferTensorCreate<int64_t>)
.def("_copy_from_cpu_bind", &PaddleInferTensorCreate<double>)
.def("_copy_from_cpu_bind", &PaddleInferTensorCreate<float>)
.def("_copy_from_cpu_bind",
&PaddleInferTensorCreate<paddle_infer::float16>)
......
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import tempfile
import unittest
import numpy as np
import paddle
from paddle.inference import Config, DataType, create_predictor
paddle.set_default_dtype('float64')
class TestNet(paddle.nn.Layer):
def __init__(self):
super().__init__()
self.fc1 = paddle.nn.Linear(4, 4)
self.fc2 = paddle.nn.Linear(4, 4)
def forward(self, x):
out = self.fc1(x)
out = self.fc2(out)
out = paddle.nn.functional.relu(out)
return out
@unittest.skipIf(
not paddle.is_compiled_with_cuda(), 'should compile with cuda.'
)
class TestDoubleOnGPU(unittest.TestCase):
def setUp(self):
self.temp_dir = tempfile.TemporaryDirectory()
net = TestNet()
model = paddle.jit.to_static(
net,
input_spec=[
paddle.static.InputSpec(shape=[None, 4], dtype='float64')
],
)
paddle.jit.save(
model,
os.path.join(
self.temp_dir.name, 'test_inference_datatype_model/inference'
),
)
def tearDown(self):
self.temp_dir.cleanup()
def init_predictor(self):
config = Config(
os.path.join(
self.temp_dir.name,
'test_inference_datatype_model/inference.pdmodel',
),
os.path.join(
self.temp_dir.name,
'test_inference_datatype_model/inference.pdiparams',
),
)
config.enable_use_gpu(256, 0)
config.enable_memory_optim()
# NOTE(liuyuanle): Because double computing is not supported in our pass implementation,
# we need to turn off IR optimization.
config.switch_ir_optim(False)
predictor = create_predictor(config)
return predictor
def test_output(self):
predictor = self.init_predictor()
input = np.ones((3, 4)).astype(np.float64)
input_names = predictor.get_input_names()
input_tensor = predictor.get_input_handle(input_names[0])
input_tensor.reshape(input.shape)
input_tensor.copy_from_cpu(input.copy())
assert input_tensor.type() == DataType.FLOAT64
predictor.run()
output_names = predictor.get_output_names()
output_tensor = predictor.get_output_handle(output_names[0])
assert output_tensor.type() == DataType.FLOAT64
output_data = output_tensor.copy_to_cpu()
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册