未验证 提交 b5404f09 编写于 作者: W Wilber 提交者: GitHub

[cherry-pick] Inference add type check in copy_from_cpu (#36552)

上级 023eb3f9
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
#include "paddle/fluid/inference/analysis/helper.h" #include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h" #include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h"
#include "paddle/fluid/inference/api/helper.h" #include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h" #include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/utils/io_utils.h" #include "paddle/fluid/inference/utils/io_utils.h"
#include "paddle/fluid/inference/utils/singleton.h" #include "paddle/fluid/inference/utils/singleton.h"
...@@ -56,6 +57,7 @@ ...@@ -56,6 +57,7 @@
#if PADDLE_WITH_TENSORRT #if PADDLE_WITH_TENSORRT
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/helper.h"
#include "paddle/fluid/inference/tensorrt/trt_int8_calibrator.h" #include "paddle/fluid/inference/tensorrt/trt_int8_calibrator.h"
#endif #endif
...@@ -1469,6 +1471,22 @@ int GetNumBytesOfDataType(DataType dtype) { ...@@ -1469,6 +1471,22 @@ int GetNumBytesOfDataType(DataType dtype) {
std::string GetVersion() { return paddle::get_version(); } std::string GetVersion() { return paddle::get_version(); }
std::tuple<int, int, int> GetTrtCompileVersion() {
#ifdef PADDLE_WITH_TENSORRT
return paddle::inference::tensorrt::GetTrtCompileVersion();
#else
return std::tuple<int, int, int>{0, 0, 0};
#endif
}
std::tuple<int, int, int> GetTrtRuntimeVersion() {
#ifdef PADDLE_WITH_TENSORRT
return paddle::inference::tensorrt::GetTrtRuntimeVersion();
#else
return std::tuple<int, int, int>{0, 0, 0};
#endif
}
std::string UpdateDllFlag(const char *name, const char *value) { std::string UpdateDllFlag(const char *name, const char *value) {
return paddle::UpdateDllFlag(name, value); return paddle::UpdateDllFlag(name, value);
} }
......
...@@ -359,6 +359,15 @@ TEST(AnalysisPredictor, set_xpu_device_id) { ...@@ -359,6 +359,15 @@ TEST(AnalysisPredictor, set_xpu_device_id) {
namespace paddle_infer { namespace paddle_infer {
TEST(Predictor, Run) { TEST(Predictor, Run) {
auto trt_compile_ver = GetTrtCompileVersion();
auto trt_runtime_ver = GetTrtRuntimeVersion();
LOG(INFO) << "trt compile version: " << std::get<0>(trt_compile_ver) << "."
<< std::get<1>(trt_compile_ver) << "."
<< std::get<2>(trt_compile_ver);
LOG(INFO) << "trt runtime version: " << std::get<0>(trt_runtime_ver) << "."
<< std::get<1>(trt_runtime_ver) << "."
<< std::get<2>(trt_runtime_ver);
Config config; Config config;
config.SetModel(FLAGS_dirname); config.SetModel(FLAGS_dirname);
......
...@@ -169,6 +169,8 @@ PD_INFER_DECL std::shared_ptr<Predictor> CreatePredictor( ...@@ -169,6 +169,8 @@ PD_INFER_DECL std::shared_ptr<Predictor> CreatePredictor(
PD_INFER_DECL int GetNumBytesOfDataType(DataType dtype); PD_INFER_DECL int GetNumBytesOfDataType(DataType dtype);
PD_INFER_DECL std::string GetVersion(); PD_INFER_DECL std::string GetVersion();
PD_INFER_DECL std::tuple<int, int, int> GetTrtCompileVersion();
PD_INFER_DECL std::tuple<int, int, int> GetTrtRuntimeVersion();
PD_INFER_DECL std::string UpdateDllFlag(const char* name, const char* value); PD_INFER_DECL std::string UpdateDllFlag(const char* name, const char* value);
namespace services { namespace services {
......
...@@ -196,6 +196,19 @@ void TensorRTEngine::FreezeNetwork() { ...@@ -196,6 +196,19 @@ void TensorRTEngine::FreezeNetwork() {
#if IS_TRT_VERSION_GE(6000) #if IS_TRT_VERSION_GE(6000)
LOG(INFO) << "Run Paddle-TRT Dynamic Shape mode."; LOG(INFO) << "Run Paddle-TRT Dynamic Shape mode.";
for (auto &input : min_input_shape_) { for (auto &input : min_input_shape_) {
#if IS_TRT_VERSION_LT(7000)
// trt6 will check all_of input > 0
if (!(std::all_of(input.second.begin(), input.second.end(),
[](int x) { return x > 0; }) &&
std::all_of(max_input_shape_[input.first].begin(),
max_input_shape_[input.first].end(),
[](int x) { return x > 0; }) &&
std::all_of(optim_input_shape_[input.first].begin(),
optim_input_shape_[input.first].end(),
[](int x) { return x > 0; }))) {
continue;
}
#endif
VLOG(4) << "TRT dynamic_shape set " << input.first VLOG(4) << "TRT dynamic_shape set " << input.first
<< " min: " << Vec2Str(input.second) << " min: " << Vec2Str(input.second)
<< ", max: " << Vec2Str(max_input_shape_[input.first]) << ", max: " << Vec2Str(max_input_shape_[input.first])
......
...@@ -73,8 +73,24 @@ static nvinfer1::IPluginRegistry* GetPluginRegistry() { ...@@ -73,8 +73,24 @@ static nvinfer1::IPluginRegistry* GetPluginRegistry() {
static int GetInferLibVersion() { static int GetInferLibVersion() {
return static_cast<int>(dy::getInferLibVersion()); return static_cast<int>(dy::getInferLibVersion());
} }
#else
static int GetInferLibVersion() { return 0; }
#endif #endif
static std::tuple<int, int, int> GetTrtRuntimeVersion() {
int ver = GetInferLibVersion();
int major = ver / 1000;
ver -= major * 1000;
int minor = ver / 100;
int patch = ver - minor * 100;
return std::tuple<int, int, int>{major, minor, patch};
}
static std::tuple<int, int, int> GetTrtCompileVersion() {
return std::tuple<int, int, int>{NV_TENSORRT_MAJOR, NV_TENSORRT_MINOR,
NV_TENSORRT_PATCH};
}
// A logger for create TensorRT infer builder. // A logger for create TensorRT infer builder.
class NaiveLogger : public nvinfer1::ILogger { class NaiveLogger : public nvinfer1::ILogger {
public: public:
......
...@@ -330,6 +330,8 @@ void BindInferenceApi(py::module *m) { ...@@ -330,6 +330,8 @@ void BindInferenceApi(py::module *m) {
m->def("paddle_dtype_size", &paddle::PaddleDtypeSize); m->def("paddle_dtype_size", &paddle::PaddleDtypeSize);
m->def("paddle_tensor_to_bytes", &SerializePDTensorToBytes); m->def("paddle_tensor_to_bytes", &SerializePDTensorToBytes);
m->def("get_version", &paddle_infer::GetVersion); m->def("get_version", &paddle_infer::GetVersion);
m->def("get_trt_compile_version", &paddle_infer::GetTrtCompileVersion);
m->def("get_trt_runtime_version", &paddle_infer::GetTrtRuntimeVersion);
m->def("get_num_bytes_of_data_type", &paddle_infer::GetNumBytesOfDataType); m->def("get_num_bytes_of_data_type", &paddle_infer::GetNumBytesOfDataType);
} }
...@@ -739,10 +741,11 @@ void BindZeroCopyTensor(py::module *m) { ...@@ -739,10 +741,11 @@ void BindZeroCopyTensor(py::module *m) {
void BindPaddleInferTensor(py::module *m) { void BindPaddleInferTensor(py::module *m) {
py::class_<paddle_infer::Tensor>(*m, "PaddleInferTensor") py::class_<paddle_infer::Tensor>(*m, "PaddleInferTensor")
.def("reshape", &paddle_infer::Tensor::Reshape) .def("reshape", &paddle_infer::Tensor::Reshape)
.def("copy_from_cpu", &PaddleInferTensorCreate<int32_t>) .def("copy_from_cpu_bind", &PaddleInferTensorCreate<int32_t>)
.def("copy_from_cpu", &PaddleInferTensorCreate<int64_t>) .def("copy_from_cpu_bind", &PaddleInferTensorCreate<int64_t>)
.def("copy_from_cpu", &PaddleInferTensorCreate<float>) .def("copy_from_cpu_bind", &PaddleInferTensorCreate<float>)
.def("copy_from_cpu", &PaddleInferTensorCreate<paddle_infer::float16>) .def("copy_from_cpu_bind",
&PaddleInferTensorCreate<paddle_infer::float16>)
.def("copy_to_cpu", &PaddleInferTensorToNumpy) .def("copy_to_cpu", &PaddleInferTensorToNumpy)
.def("shape", &paddle_infer::Tensor::shape) .def("shape", &paddle_infer::Tensor::shape)
.def("set_lod", &paddle_infer::Tensor::SetLoD) .def("set_lod", &paddle_infer::Tensor::SetLoD)
......
...@@ -14,4 +14,4 @@ ...@@ -14,4 +14,4 @@
from .wrapper import Config, DataType, PlaceType, PrecisionType, Tensor, Predictor from .wrapper import Config, DataType, PlaceType, PrecisionType, Tensor, Predictor
from ..core import create_predictor, get_version, get_num_bytes_of_data_type, PredictorPool from ..core import create_predictor, get_version, get_num_bytes_of_data_type, PredictorPool, get_trt_compile_version, get_trt_runtime_version
...@@ -15,9 +15,24 @@ ...@@ -15,9 +15,24 @@
from ..core import AnalysisConfig, PaddleDType, PaddlePlace from ..core import AnalysisConfig, PaddleDType, PaddlePlace
from ..core import PaddleInferPredictor, PaddleInferTensor from ..core import PaddleInferPredictor, PaddleInferTensor
import numpy as np
DataType = PaddleDType DataType = PaddleDType
PlaceType = PaddlePlace PlaceType = PaddlePlace
PrecisionType = AnalysisConfig.Precision PrecisionType = AnalysisConfig.Precision
Config = AnalysisConfig Config = AnalysisConfig
Tensor = PaddleInferTensor Tensor = PaddleInferTensor
Predictor = PaddleInferPredictor Predictor = PaddleInferPredictor
def tensor_copy_from_cpu(self, data):
'''
Support input type check based on tensor.copy_from_cpu.
'''
if not isinstance(data, np.ndarray):
raise TypeError(
"In copy_from_cpu, we only support numpy ndarray data type.")
self.copy_from_cpu_bind(data)
Tensor.copy_from_cpu = tensor_copy_from_cpu
...@@ -14,10 +14,14 @@ ...@@ -14,10 +14,14 @@
import os, shutil import os, shutil
import unittest import unittest
import paddle
paddle.enable_static()
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.core import PaddleTensor from paddle.fluid.core import PaddleTensor
from paddle.fluid.core import PaddleDType from paddle.fluid.core import PaddleDType
from paddle.inference import Config, Predictor, create_predictor
from paddle.inference import get_trt_compile_version, get_trt_runtime_version
class TestInferenceApi(unittest.TestCase): class TestInferenceApi(unittest.TestCase):
...@@ -54,5 +58,60 @@ class TestInferenceApi(unittest.TestCase): ...@@ -54,5 +58,60 @@ class TestInferenceApi(unittest.TestCase):
tensor_float.ravel().tolist()) tensor_float.ravel().tolist())
def get_sample_model():
place = fluid.CPUPlace()
exe = fluid.Executor(place)
main_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(main_program, startup_program):
data = fluid.data(name="data", shape=[-1, 6, 64, 64], dtype="float32")
conv_out = fluid.layers.conv2d(
input=data,
num_filters=3,
filter_size=3,
groups=1,
padding=0,
bias_attr=False,
act=None)
exe.run(startup_program)
serialized_program = paddle.static.serialize_program(
data, conv_out, program=main_program)
serialized_params = paddle.static.serialize_persistables(
data, conv_out, executor=exe, program=main_program)
return serialized_program, serialized_params
class TestInferenceBaseAPI(unittest.TestCase):
def get_config(self, model, params):
config = Config()
config.set_model_buffer(model, len(model), params, len(params))
config.enable_use_gpu(100, 0)
return config
def test_apis(self):
print('trt compile version:', get_trt_compile_version())
print('trt runtime version:', get_trt_runtime_version())
program, params = get_sample_model()
config = self.get_config(program, params)
predictor = create_predictor(config)
in_names = predictor.get_input_names()
in_handle = predictor.get_input_handle(in_names[0])
in_data = np.ones((1, 6, 32, 32)).astype(np.float32)
in_handle.copy_from_cpu(in_data)
predictor.run()
def test_wrong_input(self):
with self.assertRaises(TypeError):
program, params = get_sample_model()
config = self.get_config(program, params)
predictor = create_predictor(config)
in_names = predictor.get_input_names()
in_handle = predictor.get_input_handle(in_names[0])
in_data = np.ones((1, 6, 64, 64)).astype(np.float32)
in_handle.copy_from_cpu(list(in_data))
predictor.run()
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -20,6 +20,8 @@ from ..fluid.inference import Tensor # noqa: F401 ...@@ -20,6 +20,8 @@ from ..fluid.inference import Tensor # noqa: F401
from ..fluid.inference import Predictor # noqa: F401 from ..fluid.inference import Predictor # noqa: F401
from ..fluid.inference import create_predictor # noqa: F401 from ..fluid.inference import create_predictor # noqa: F401
from ..fluid.inference import get_version # noqa: F401 from ..fluid.inference import get_version # noqa: F401
from ..fluid.inference import get_trt_compile_version # noqa: F401
from ..fluid.inference import get_trt_runtime_version # noqa: F401
from ..fluid.inference import get_num_bytes_of_data_type # noqa: F401 from ..fluid.inference import get_num_bytes_of_data_type # noqa: F401
from ..fluid.inference import PredictorPool # noqa: F401 from ..fluid.inference import PredictorPool # noqa: F401
...@@ -32,6 +34,8 @@ __all__ = [ # noqa ...@@ -32,6 +34,8 @@ __all__ = [ # noqa
'Predictor', 'Predictor',
'create_predictor', 'create_predictor',
'get_version', 'get_version',
'get_trt_compile_version',
'get_trt_runtime_version',
'get_num_bytes_of_data_type', 'get_num_bytes_of_data_type',
'PredictorPool' 'PredictorPool'
] ]
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册