From 14a92c8c32e5589d09d398b57cfd5a081c169e52 Mon Sep 17 00:00:00 2001 From: Zhang Jun Date: Thu, 9 Feb 2023 20:58:11 +0800 Subject: [PATCH] [trt][inference]support int64 shapetensor as engine input (#50170) * update * support int64 shape tensor as engine input * add inference_predictor ut --- .../fluid/inference/api/analysis_predictor.cc | 48 ++- .../operators/tensorrt/tensorrt_engine_op.h | 29 +- .../unittests/ir/inference/CMakeLists.txt | 1 + .../inference/test_trt_inference_predictor.py | 399 ++++++++++++++++++ 4 files changed, 456 insertions(+), 21 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_trt_inference_predictor.py diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 296607b6ec6..eb916cbbb53 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -62,6 +62,7 @@ #include "paddle/phi/common/data_type.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/enforce.h" +#include "paddle/phi/kernels/funcs/data_type_transform.h" #include "paddle/utils/string/split.h" #if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE) @@ -1890,16 +1891,16 @@ bool AnalysisPredictor::ExpRunWithExternalStream(const gpuStream_t stream) { void AnalysisPredictor::CollectShapeRangeInfo() { // if use gpu, sync first. + paddle::platform::DeviceContextPool &pool = + paddle::platform::DeviceContextPool::Instance(); if (config_.use_gpu()) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - paddle::platform::DeviceContextPool &pool = - paddle::platform::DeviceContextPool::Instance(); - auto gpu_place = place_; - auto *dev_ctx = static_cast(pool.Get(gpu_place)); + auto *dev_ctx = pool.Get(place_); + auto stream = static_cast(dev_ctx)->stream(); #ifdef PADDLE_WITH_HIP - hipStreamSynchronize(dev_ctx->stream()); + hipStreamSynchronize(stream); #else - cudaStreamSynchronize(dev_ctx->stream()); + cudaStreamSynchronize(stream); #endif #endif } @@ -1911,6 +1912,7 @@ void AnalysisPredictor::CollectShapeRangeInfo() { continue; } auto tensor = var->Get(); + if (!tensor.initialized()) continue; framework::DDim dim = tensor.dims(); std::vector shape(dim.size()); for (size_t i = 0; i < shape.size(); ++i) shape[i] = dim[i]; @@ -1922,22 +1924,40 @@ void AnalysisPredictor::CollectShapeRangeInfo() { // This is a simple method to identify all shape tensors with some // mistakes, but it doesn't matter. auto is_shape_tensor = tensor.numel() <= 7 && tensor.numel() >= 1; - if (tensor.dtype() == paddle::experimental::DataType::INT32 && + if ((tensor.dtype() == phi::DataType::INT32 || + tensor.dtype() == phi::DataType::INT64) && is_shape_tensor) { std::vector int32_host(tensor.numel()); - if (tensor.place() == platform::CPUPlace()) { + + if (platform::is_cpu_place(tensor.place())) { + auto &int32_tensor = tensor; + if (tensor.dtype() == phi::DataType::INT64) { + auto *cpu_ctx = pool.Get(platform::CPUPlace()); + int32_tensor = phi::funcs::TransDataType( + reinterpret_cast(*cpu_ctx), + tensor, + DataType::INT32); + } paddle::memory::Copy(platform::CPUPlace(), int32_host.data(), platform::CPUPlace(), - tensor.data(), - tensor.numel() * sizeof(int)); - } else if (tensor.place() == platform::CUDAPlace()) { + int32_tensor.data(), + int32_tensor.numel() * sizeof(int)); + } else if (platform::is_gpu_place(tensor.place())) { #if defined(PADDLE_WITH_CUDA) + auto *dev_ctx = pool.Get(tensor.place()); + auto &int32_tensor = tensor; + if (tensor.dtype() == phi::DataType::INT64) { + int32_tensor = phi::funcs::TransDataType( + reinterpret_cast(*dev_ctx), + tensor, + DataType::INT32); + } paddle::memory::Copy(platform::CPUPlace(), int32_host.data(), - platform::CUDAPlace(), - tensor.data(), - tensor.numel() * sizeof(int), + int32_tensor.place(), + int32_tensor.data(), + int32_tensor.numel() * sizeof(int), nullptr); #endif } diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h index 87323534bba..9c46cb5c3c9 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h @@ -544,6 +544,7 @@ class TensorRTEngineOp : public framework::OperatorBase { "index=%d >= total inputs and outputs=%d", bind_index, num_bindings)); + auto type = framework::TransToProtoVarType(t.dtype()); if (!engine->with_dynamic_shape()) { // check if the input shapes are consistent with model. if (HasAttr(x + "_shape")) { @@ -586,12 +587,27 @@ class TensorRTEngineOp : public framework::OperatorBase { if (engine->engine()->isShapeBinding(bind_index) && engine->engine()->bindingIsInput(bind_index)) { std::vector shape_v(t.numel()); - paddle::memory::Copy(platform::CPUPlace(), - shape_v.data(), - platform::CUDAPlace(), - t.data(), - t.numel() * sizeof(int), - nullptr); + if (type == framework::proto::VarType::INT32) { + paddle::memory::Copy(platform::CPUPlace(), + shape_v.data(), + t.place(), + t.data(), + t.numel() * sizeof(int), + nullptr); + } else if (type == framework::proto::VarType::INT64) { + auto int32_tensor = scope.FindVar(x + "_cast_to_INT32") + ->GetMutable(); + *int32_tensor = phi::Cast( + reinterpret_cast(dev_ctx), + t, + phi::DataType::INT32); + paddle::memory::Copy(platform::CPUPlace(), + shape_v.data(), + int32_tensor->place(), + int32_tensor->data(), + int32_tensor->numel() * sizeof(int), + nullptr); + } trt_context->setInputShapeBinding(bind_index, shape_v.data()); } #endif @@ -608,7 +624,6 @@ class TensorRTEngineOp : public framework::OperatorBase { "The TRT Engine OP's input type should equal " "to the input data type")); - auto type = framework::TransToProtoVarType(t.dtype()); if (type == framework::proto::VarType::FP32) { buffers[bind_index] = static_cast(t.data()); } else if (type == framework::proto::VarType::INT64) { diff --git a/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt b/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt index fa8357f7611..96a0cfb3fb0 100755 --- a/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt @@ -135,6 +135,7 @@ if(WITH_GPU AND TENSORRT_FOUND) #set_tests_properties(test_trt_multiclass_nms_op PROPERTIES TIMEOUT 200) set_tests_properties(test_trt_dynamic_shape PROPERTIES TIMEOUT 120) set_tests_properties(test_trt_inspector PROPERTIES TIMEOUT 60) + set_tests_properties(test_trt_inference_predictor PROPERTIES TIMEOUT 60) if(WITH_NV_JETSON) set_tests_properties( diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_inference_predictor.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_inference_predictor.py new file mode 100644 index 00000000000..561502b6250 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_inference_predictor.py @@ -0,0 +1,399 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import sys +import tempfile +import unittest + +import numpy as np +import yaml + +import paddle +import paddle.nn as nn + +try: + import paddle.inference as paddle_infer +except Exception as e: + sys.stderr.write("Cannot import paddle, maybe paddle is not installed.\n") + +paddle.set_device('cpu') +paddle.disable_signal_handler() + + +def str2bool(v): + if v.lower() == 'true': + return True + else: + return False + + +def getdtype(dtype="float32"): + if dtype == "float32" or dtype == "float": + return np.float32 + if dtype == "float16": + return np.float16 + if dtype == "float64": + return np.float64 + if dtype == "int32": + return np.int32 + if dtype == "int64": + return np.int64 + + +class BackendPaddle: + def __init__(self): + super(BackendPaddle, self).__init__() + self.h2d_time = [] + self.compute_time = [] + self.d2h_time = [] + + def version(self): + return paddle.version.full_version + + def name(self): + return "paddle" + + def load(self, config_arg, inputs=None, outpus=None): + self.args = config_arg + if os.path.exists(self.args.model_dir): + model_file = os.path.join( + self.args.model_dir + "/" + self.args.paddle_model_file + ) + model_params = os.path.join( + self.args.model_dir + "/" + self.args.paddle_params_file + ) + config = paddle_infer.Config(model_file, model_params) + else: + raise ValueError( + f"The model dir {self.args.model_dir} does not exists!" + ) + + # enable memory optim + if not self.args.enable_tune: + config.enable_memory_optim() + + config.set_cpu_math_library_num_threads(self.args.cpu_threads) + config.switch_ir_optim(True) + # debug + if self.args.enable_debug: + config.switch_ir_debug() + precision_mode = paddle_infer.PrecisionType.Float32 + if self.args.precision == 'fp16': + precision_mode = paddle_infer.PrecisionType.Half + elif self.args.precision == 'int8': + precision_mode = paddle_infer.PrecisionType.Int8 + + if self.args.enable_mkldnn and not self.args.enable_gpu: + config.disable_gpu() + config.enable_mkldnn() + if self.args.precision == 'int8': + config.enable_mkldnn_int8( + {"conv2d", "depthwise_conv2d", "transpose2", "pool2d"} + ) + if not self.args.enable_mkldnn and not self.args.enable_gpu: + config.disable_gpu() + # config.enable_mkldnn() + if self.args.enable_profile: + config.enable_profile() + shape_range_file = os.path.join( + self.args.model_dir, self.args.shape_range_file + ) + if self.args.enable_tune: + config.collect_shape_range_info(shape_range_file) + if self.args.enable_gpu: + config.enable_use_gpu(256, self.args.gpu_id) + if self.args.enable_trt: + max_batch_size = self.args.batch_size + if ( + self.args.yaml_config["input_shape"]["0"]["shape"][ + self.args.test_num + ][0] + != -1 + ): + max_batch_size = self.args.yaml_config["input_shape"]["0"][ + "shape" + ][self.args.test_num][0] + config.enable_tensorrt_engine( + workspace_size=1 << 33, + precision_mode=precision_mode, + max_batch_size=max_batch_size, + min_subgraph_size=self.args.subgraph_size, + use_static=False, + use_calib_mode=False + if self.args.precision == 'int8' + else False, + ) + if self.args.enable_dynamic_shape: + if os.path.exists(shape_range_file): + config.enable_tuned_tensorrt_dynamic_shape( + shape_range_file, True + ) + config.disable_glog_info() + config.exp_disable_tensorrt_ops(["range"]) + + self.predictor = paddle_infer.create_predictor(config) + + input_shape = self.args.yaml_config["input_shape"] + if len(input_shape) <= 0: + raise Exception("input shape is empty.") + + if "input_data" in self.args.yaml_config: + input_file = self.args.yaml_config["input_data"]["data"][ + self.args.test_num + ] + self.numpy_input = np.load(input_file, allow_pickle=True) + + return self + + def set_input(self): + # set input tensor + input_names = self.predictor.get_input_names() + for i, name in enumerate(input_names): + input_tensor = self.predictor.get_input_handle(name) + if "input_data" not in self.args.yaml_config: + if ( + self.args.yaml_config["input_shape"][str(i)]["shape"][ + self.args.test_num + ][0] + == -1 + ): + input_shape = [ + self.args.batch_size + ] + self.args.yaml_config["input_shape"][str(i)]["shape"][ + self.args.test_num + ][ + 1: + ] + dtype = self.args.yaml_config["input_shape"][str(i)][ + "dtype" + ][self.args.test_num] + else: + input_shape = self.args.yaml_config["input_shape"][str(i)][ + "shape" + ][self.args.test_num] + dtype = self.args.yaml_config["input_shape"][str(i)][ + "dtype" + ][self.args.test_num] + if hasattr(self.args, "test_data"): + fake_input = self.args.test_data[i].astype(getdtype(dtype)) + else: + fake_input = np.ones(input_shape, dtype=getdtype(dtype)) + input_tensor.copy_from_cpu(fake_input) + else: + real_input = np.expand_dims(self.numpy_input[i], 0).repeat( + self.args.batch_size, axis=0 + ) + input_tensor.copy_from_cpu(real_input) + + def set_output(self): + results = [] + # get out data from output tensor + output_names = self.predictor.get_output_names() + for i, name in enumerate(output_names): + output_tensor = self.predictor.get_output_handle(name) + output_data = output_tensor.copy_to_cpu() + if self.args.return_result or self.args.save_result: + results.append(output_data) + if self.args.return_result or self.args.save_result: + return results + + def reset(self): + self.h2d_time.clear() + self.d2h_time.clear() + self.compute_time.clear() + + def warmup(self): + pass + + def predict(self, feed=None): + self.set_input() + self.predictor.run() + output = self.set_output() + if self.args.return_result or self.args.save_result: + return output + + def predict_nocopy(self, feed=None): + self.predictor.run() + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument('--batch_size', type=int, default=1) + parser.add_argument('--cpu_threads', type=int, default=1) + parser.add_argument('--inter_op_threads', type=int, default=1) + parser.add_argument( + '--precision', type=str, choices=["fp32", "fp16", "int8"] + ) + parser.add_argument( + '--backend_type', + type=str, + choices=["paddle", "onnxruntime", "openvino", "tensorrt"], + default="paddle", + ) + parser.add_argument('--gpu_id', type=int, default=0) + parser.add_argument('--subgraph_size', type=int, default=1) + parser.add_argument('--model_dir', type=str) + parser.add_argument( + '--paddle_model_file', type=str, default="model.pdmodel" + ) + parser.add_argument( + '--paddle_params_file', type=str, default="model.pdiparams" + ) + parser.add_argument('--enable_mkldnn', type=str2bool, default=False) + parser.add_argument('--enable_gpu', type=str2bool, default=True) + parser.add_argument('--enable_trt', type=str2bool, default=True) + parser.add_argument('--enable_dynamic_shape', type=str2bool, default=True) + parser.add_argument('--enable_tune', type=str2bool, default=False) + parser.add_argument('--enable_profile', type=str2bool, default=False) + parser.add_argument('--enable_benchmark', type=str2bool, default=True) + parser.add_argument('--save_result', type=str2bool, default=False) + parser.add_argument('--return_result', type=str2bool, default=False) + parser.add_argument('--enable_debug', type=str2bool, default=False) + parser.add_argument( + '--config_file', type=str, required=False, default="config/model.yaml" + ) + parser.add_argument( + '--shape_range_file', type=str, default="shape_range.pbtxt" + ) + args, unknown = parser.parse_known_args() + return args + + +def run_infer(model_path): + conf = parse_args() + + yaml_config = yaml.safe_load( + ''' + input_shape: + '0': + dtype: [float32] + shape: + - [-1, 3, 32, 32] + ''' + ) + + conf.yaml_config = yaml_config + conf.test_num = 0 + conf.model_dir = model_path + + conf.enable_tune = True + # collect shape use CPU + conf.enable_gpu = False + backend = BackendPaddle() + backend.load(conf) + backend.predict() + + # collect shape use GPU + conf.enable_gpu = True + backend = BackendPaddle() + backend.load(conf) + backend.predict() + + # run inference predictor + conf.enable_tune = False + backend = BackendPaddle() + backend.load(conf) + backend.predict() + + +class ConvBNLayer(paddle.nn.Layer): + def __init__( + self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + ): + super().__init__() + + self._conv = paddle.nn.Conv2D( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + bias_attr=False, + ) + + self._batch_norm = paddle.nn.BatchNorm(num_filters, act=act) + + def forward(self, inputs): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class Test(nn.Layer): + def __init__(self): + super(Test, self).__init__() + self.conv = ConvBNLayer( + num_channels=3, num_filters=64, filter_size=3, stride=2, act='relu' + ) + self.pool2d_max = paddle.nn.MaxPool2D( + kernel_size=3, stride=1, padding=1 + ) + self.pool2d_avg = paddle.nn.AdaptiveAvgPool2D(output_size=1) + + def forward(self, x): + x = self.conv(x) + x = self.pool2d_avg(x) + + x = paddle.reshape( + x, + shape=[ + paddle.to_tensor([-1], dtype=paddle.int64), + paddle.to_tensor([8], dtype=paddle.int64), + ], + ) + return x + + +class TestInferencePredictor(unittest.TestCase): + def setUp(self): + # enable dygraph mode + paddle.disable_static() + self.temp_dir = tempfile.TemporaryDirectory() + self.path = os.path.join(self.temp_dir.name, './inference/model') + self.path = "./inference/model" + + def tearDown(self): + self.temp_dir.cleanup() + + def SaveInferenceModel(self): + paddle.disable_static() + net = Test() + net.eval() + + net(paddle.rand(shape=[1, 3, 32, 32], dtype='float32')) + input_spec = [ + paddle.static.InputSpec( + shape=[-1, 3, 32, 32], dtype=paddle.float32, name='input' + ) + ] + + static_model = paddle.jit.to_static(net, input_spec=input_spec) + paddle.jit.save(static_model, self.path) + + def testInferencePredictor(self): + self.SaveInferenceModel() + run_infer(os.path.dirname(self.path)) + + +if __name__ == '__main__': + unittest.main() -- GitLab