未验证 提交 17188e8d 编写于 作者: W Wilber 提交者: GitHub

trt convert ut add dynamic_shape and int8, etc. (#35061)

上级 a95db6a7
...@@ -535,6 +535,7 @@ void GraphToProgram(const Graph &graph, ProgramDesc *program, ...@@ -535,6 +535,7 @@ void GraphToProgram(const Graph &graph, ProgramDesc *program,
block = program_pb.add_blocks(); block = program_pb.add_blocks();
block->set_idx(idx); block->set_idx(idx);
block->set_parent_idx(kRootBlockIndex);
GraphToBlock(*graph.GetSubGraph(idx), block, sort_kind); GraphToBlock(*graph.GetSubGraph(idx), block, sort_kind);
} }
} else { } else {
......
...@@ -13,8 +13,11 @@ See the License for the specific language governing permissions and ...@@ -13,8 +13,11 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/framework/ir/graph_viz_pass.h" #include "paddle/fluid/framework/ir/graph_viz_pass.h"
#include <string>
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph_printer.h" #include "paddle/fluid/framework/ir/graph_printer.h"
#include "paddle/fluid/framework/op_proto_maker.h" #include "paddle/fluid/framework/op_proto_maker.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/inference/analysis/dot.h" #include "paddle/fluid/inference/analysis/dot.h"
namespace paddle { namespace paddle {
...@@ -44,6 +47,31 @@ void GraphVizPass::ApplyImpl(ir::Graph* graph) const { ...@@ -44,6 +47,31 @@ void GraphVizPass::ApplyImpl(ir::Graph* graph) const {
"Can not open file %s for printing the graph.", graph_viz_path)); "Can not open file %s for printing the graph.", graph_viz_path));
std::ostream& sout = *fout; std::ostream& sout = *fout;
// serialize only model file.
std::string program_path;
std::size_t found1 = graph_viz_path.find("_ir_");
std::size_t found2 = graph_viz_path.find(".dot");
if (found1 != std::string::npos && found2 != std::string::npos) {
ProgramDesc program_desc;
GraphToProgram(*graph, &program_desc);
// TODO(wilber): GraphToProgram seems have bugs.
for (size_t i = 0; i < program_desc.Size(); ++i) {
for (size_t j = 0; j < program_desc.Block(i).OpSize(); ++j) {
if (program_desc.Block(i).Op(j)->Type() == "tensorrt_engine") {
program_desc.Block(i).Op(j)->RemoveAttr("sub_block");
}
}
}
std::string program_bytes = program_desc.Proto()->SerializeAsString();
// rename from "17_ir_fc_fuse_pass.dot" to "fc_fuse_pass.pdmodel"
program_path =
graph_viz_path.substr(found1 + 4, found2 - found1 - 4) + ".pdmodel";
std::ofstream file(program_path.c_str(), std::ios::binary);
file.write(program_bytes.c_str(), program_bytes.size());
file.close();
VLOG(3) << "serialize program to " << program_path;
}
std::unordered_map<const ir::Node*, std::string> node2dot; std::unordered_map<const ir::Node*, std::string> node2dot;
Dot dot; Dot dot;
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
import numpy as np import numpy as np
import unittest import unittest
import abc import abc
import os
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.initializer import NumpyArrayInitializer from paddle.fluid.initializer import NumpyArrayInitializer
...@@ -22,14 +23,13 @@ import paddle.fluid.core as core ...@@ -22,14 +23,13 @@ import paddle.fluid.core as core
from paddle import compat as cpt from paddle import compat as cpt
import paddle.inference as paddle_infer import paddle.inference as paddle_infer
from typing import Optional, List, Callable, Dict, Any, Set from typing import Optional, List, Callable, Dict, Any, Set
from program_config import TensorConfig, OpConfig, ProgramConfig, create_fake_model from program_config import TensorConfig, OpConfig, ProgramConfig, create_fake_model, create_quant_model
class AutoScanTest(unittest.TestCase): class AutoScanTest(unittest.TestCase):
def __init__(self, methodName='runTest'): def __init__(self, methodName='runTest'):
paddle.enable_static() paddle.enable_static()
super(AutoScanTest, self).__init__(methodName) super(AutoScanTest, self).__init__(methodName)
self.threshold = 1e-5
@abc.abstractmethod @abc.abstractmethod
def sample_program_configs(self) -> List[ProgramConfig]: def sample_program_configs(self) -> List[ProgramConfig]:
...@@ -56,11 +56,31 @@ class AutoScanTest(unittest.TestCase): ...@@ -56,11 +56,31 @@ class AutoScanTest(unittest.TestCase):
input_tensor.copy_from_cpu(feed_data[name]) input_tensor.copy_from_cpu(feed_data[name])
predictor.run() predictor.run()
result = {} result = {}
for out_name in prog_config.outputs: for out_name, o_name in zip(prog_config.outputs,
result[out_name] = predictor.get_output_handle( predictor.get_output_names()):
out_name).copy_to_cpu() result[out_name] = predictor.get_output_handle(o_name).copy_to_cpu()
return result return result
def assert_op_size(self, trt_engine_num, paddle_op_num):
cur_path = os.path.dirname(__file__)
last_passed_program = os.path.join(
cur_path, 'transpose_flatten_concat_fuse_pass.pdmodel')
model_bytes = paddle.static.load_from_file(last_passed_program)
pg = paddle.static.deserialize_program(model_bytes)
main_block = pg.desc.block(0)
op_size = main_block.op_size()
op_types = [
main_block.op(i).type() == 'tensorrt_engine' for i in range(op_size)
]
trt_engine_size = sum(op_types)
paddle_op_size = op_size - trt_engine_size
self.assertTrue(trt_engine_size == trt_engine_num,
'trt_engine_num is {}, but got {}!'.format(
trt_engine_size, trt_engine_num))
self.assertTrue(paddle_op_size == paddle_op_num,
'paddle_op_num is {}, but got {}!'.format(
paddle_op_size, paddle_op_num))
def assert_tensors_near(self, def assert_tensors_near(self,
threshold: float, threshold: float,
tensors: List[Dict[str, np.array]]): tensors: List[Dict[str, np.array]]):
...@@ -73,9 +93,15 @@ class AutoScanTest(unittest.TestCase): ...@@ -73,9 +93,15 @@ class AutoScanTest(unittest.TestCase):
first[key], arr, atol=threshold), first[key], arr, atol=threshold),
"Output has diff between GPU and TensorRT. ") "Output has diff between GPU and TensorRT. ")
def run_test(self): def run_test(self,
trt_engine_num: int,
paddle_op_num: int,
threshold=1e-5,
quant=False):
for prog_config in self.sample_program_configs(): for prog_config in self.sample_program_configs():
model, params = create_fake_model(prog_config) model, params = create_fake_model(prog_config)
if quant:
model, params = create_quant_model(model, params)
for batch_size in self.batch_size_set: for batch_size in self.batch_size_set:
feed_data = {} feed_data = {}
for name, tensor_config in prog_config.inputs.items(): for name, tensor_config in prog_config.inputs.items():
...@@ -88,5 +114,5 @@ class AutoScanTest(unittest.TestCase): ...@@ -88,5 +114,5 @@ class AutoScanTest(unittest.TestCase):
results.append( results.append(
self.run_test_config(model, params, prog_config, self.run_test_config(model, params, prog_config,
pred_config, feed_data)) pred_config, feed_data))
self.assert_tensors_near( self.assert_tensors_near(threshold=threshold, tensors=results)
threshold=self.threshold, tensors=results) self.assert_op_size(trt_engine_num, paddle_op_num)
...@@ -21,6 +21,11 @@ from paddle import compat as cpt ...@@ -21,6 +21,11 @@ from paddle import compat as cpt
from paddle.fluid.initializer import NumpyArrayInitializer from paddle.fluid.initializer import NumpyArrayInitializer
from paddle.fluid.framework import convert_np_dtype_to_dtype_ from paddle.fluid.framework import convert_np_dtype_to_dtype_
from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
from paddle.fluid.contrib.slim.quantization import QuantizationFreezePass
from paddle.fluid.framework import IrGraph, IrNode, Operator
from paddle.fluid.executor import global_scope
class TensorConfig: class TensorConfig:
''' '''
...@@ -160,3 +165,181 @@ def create_fake_model(program_config): ...@@ -160,3 +165,181 @@ def create_fake_model(program_config):
executor.run(util_program) executor.run(util_program)
params = scope.find_var("out_var_0").get_bytes() params = scope.find_var("out_var_0").get_bytes()
return model, params return model, params
def create_quant_model(model,
params,
activation_quantize_type='moving_average_abs_max',
weight_quantize_type='channel_wise_abs_max',
save=False):
place = paddle.CUDAPlace(0)
scope = global_scope()
exe = paddle.static.Executor(place)
[inference_program, feed_target_names,
fetch_targets] = paddle.static.load_inference_model(
path_prefix=None,
executor=exe,
model_filename=model,
params_filename=params)
graph = IrGraph(core.Graph(inference_program.desc), for_test=True)
transform_pass = QuantizationTransformPass(
scope=scope,
place=place,
activation_quantize_type=activation_quantize_type,
weight_quantize_type=weight_quantize_type)
transform_pass.apply(graph)
out_scale_op_list = [
"conv2d",
"depthwise_conv2d",
"mul",
"matmul",
"relu",
"leaky_relu",
"relu6",
"sigmoid",
"tanh",
"prelu",
"swish",
"softmax",
"batch_norm",
"layer_norm",
"elementwise_add",
"pool2d",
"reshape2",
"transpose2",
"concat",
"elementwise_mul",
"scale",
"slice",
"hard_swish",
"hard_sigmoid",
"conv2d_transpose",
"gru",
"bilinear_interp",
"nearest_interp",
"trilinear_interp",
"flatten",
"flatten2",
"transpose",
"pad2d",
"reshape",
"layer_norm",
]
op_real_in_out_name = {
"conv2d": [["Input", "Filter"], ["Output"]],
"depthwise_conv2d": [["Input", "Filter"], ["Output"]],
"conv2d_transpose": [["Input", "Filter"], ["Output"]],
"mul": [["X", "Y"], ["Out"]],
"matmul": [["X", "Y"], ["Out"]],
"pool2d": [["X"], ["Out"]],
"elementwise_add": [["X", "Y"], ["Out"]],
"concat": [["X"], ["Out"]],
"softmax": [["X"], ["Out"]],
"argmax": [["X"], ["Out"]],
"transpose": [["X"], ["Out"]],
"equal": [["X", "Y"], ["Out"]],
"gather": [["X"], ["Out"]],
"greater_equal": [["X", "Y"], ["Out"]],
"greater_than": [["X", "Y"], ["Out"]],
"less_equal": [["X", "Y"], ["Out"]],
"less_than": [["X", "Y"], ["Out"]],
"mean": [["X"], ["Out"]],
"not_equal": [["X", "Y"], ["Out"]],
"reshape": [["X"], ["Out"]],
"reshape2": [["X"], ["Out"]],
"transpose2": [["X"], ["Out"]],
"bilinear_interp": [["X"], ["Out"]],
"nearest_interp": [["X"], ["Out"]],
"trilinear_interp": [["X"], ["Out"]],
"slice": [["Input"], ["Out"]],
"squeeze": [["X"], ["Out"]],
"elementwise_sub": [["X", "Y"], ["Out"]],
"relu": [["X"], ["Out"]],
"relu6": [["X"], ["Out"]],
"leaky_relu": [["X"], ["Out"]],
"prelu": [["X"], ["Out"]],
"tanh": [["X"], ["Out"]],
"swish": [["X"], ["Out"]],
"dropout": [["X"], ["Out"]],
"batch_norm": [["X"], ["Y"]],
"layer_norm": [["X"], ["Y"]],
"sigmoid": [["X"], ["Out"]],
"elementwise_mul": [["X", "Y"], ["Out"]],
"scale": [["X"], ["Out"]],
"hard_swish": [["X"], ["Out"]],
"hard_sigmoid": [["X"], ["Out"]],
"gru": [["Input", "Weight"], ["Hidden"]],
"lstm": [["Input", "Weight"], ["Hidden"]],
"pad2d": [["X"], ["Out"]],
"flatten": [["X"], ["Out"]],
"flatten2": [["X"], ["Out"]],
}
def _get_op_output_var_names(op):
""" """
assert isinstance(op, (IrNode, Operator)), \
"The input op should be IrNode or Operator."
var_names = []
op_name = op.name() if isinstance(op, IrNode) \
else op.type
if op_name not in op_real_in_out_name:
return []
name_list = op_real_in_out_name[op_name][1]
for name in name_list:
var_name = op.output(name)
if isinstance(var_name, list):
var_names.extend(var_name)
else:
var_names.append(var_name)
return var_names
op_nodes = graph.all_op_nodes()
for op_node in op_nodes:
if op_node.name() in out_scale_op_list:
var_names = _get_op_output_var_names(op_node)
for var_name in var_names:
in_node = graph._find_node_by_name(op_node.outputs, var_name)
if in_node.dtype() not in \
[core.VarDesc.VarType.FP64, core.VarDesc.VarType.FP32]:
continue
op_node.op()._set_attr("out_threshold", 3.0)
# Freeze graph for inference, but the weight of fc/conv is still float type.
freeze_pass = QuantizationFreezePass(
scope=scope, place=place, weight_quantize_type=weight_quantize_type)
freeze_pass.apply(graph)
main_program = graph.to_program()
# modify fake_quantize_moving_average_abs_max(InScale) and fake_channel_wise_dequantize_max_abs(Scales)
op_nodes = graph.all_op_nodes()
for op_node in op_nodes:
if op_node.name() == 'fake_quantize_moving_average_abs_max':
var_name = op_node.input("InScale")[0]
tensor = scope.var(var_name).get_tensor()
tensor.set(np.array([1], dtype=np.float32), place)
elif op_node.name() == 'fake_channel_wise_dequantize_max_abs':
var_name = op_node.input("Scales")[0]
tensor = scope.var(var_name).get_tensor()
tensor.set(np.ones(tensor.shape(), dtype=np.float32), place)
if save:
fluid.io.save_inference_model(
'test_inference_model',
feed_target_names,
fetch_targets,
exe,
main_program=main_program)
feed_vars = [
main_program.global_block().var(name) for name in feed_target_names
]
serialized_program = paddle.static.serialize_program(
feed_vars, fetch_targets, program=main_program)
serialized_params = paddle.static.serialize_persistables(
feed_vars, fetch_targets, executor=exe, program=main_program)
return serialized_program, serialized_params
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
from trt_layer_auto_scan_test import TrtLayerAutoScanTest from trt_layer_auto_scan_test import TrtLayerAutoScanTest
from program_config import TensorConfig from program_config import TensorConfig
import numpy as np import numpy as np
import paddle.inference as paddle_infer
class TrtConvertConv2dTest(TrtLayerAutoScanTest): class TrtConvertConv2dTest(TrtLayerAutoScanTest):
...@@ -59,8 +60,33 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest): ...@@ -59,8 +60,33 @@ class TrtConvertConv2dTest(TrtLayerAutoScanTest):
self.program_inputs = {"input_data": input_data} self.program_inputs = {"input_data": input_data}
self.program_outputs = ["relu_output_data"] self.program_outputs = ["relu_output_data"]
def test_check_output(self): def test_check_fp32_output(self):
self.run_test() self.trt_param.precision == paddle_infer.PrecisionType.Float32
# the fused tensorrt engine num is 1, and paddle op num is 2(feed and fetch).
self.run_test(trt_engine_num=1, paddle_op_num=2, threshold=1e-5)
def test_check_fp16_output(self):
self.trt_param.precision == paddle_infer.PrecisionType.Half
self.run_test(trt_engine_num=1, paddle_op_num=2, threshold=1e-2)
def test_dynamic_shape_fp32_check_output(self):
self.trt_param.precision = paddle_infer.PrecisionType.Float32
self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]}
self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]}
self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 64, 64]}
self.run_test(trt_engine_num=1, paddle_op_num=2, threshold=1e-5)
def test_dynamic_shape_fp16_check_output(self):
self.trt_param.precision = paddle_infer.PrecisionType.Half
self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]}
self.dynamic_shape.max_input_shape = {"input_data": [4, 3, 64, 64]}
self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 64, 64]}
self.run_test(trt_engine_num=1, paddle_op_num=2, threshold=1e-2)
def test_trt_int8_check_output(self):
self.trt_param.precision = paddle_infer.PrecisionType.Int8
self.run_test(
trt_engine_num=1, paddle_op_num=2, quant=True, threshold=1e-1)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -16,6 +16,7 @@ import numpy as np ...@@ -16,6 +16,7 @@ import numpy as np
import unittest import unittest
import itertools import itertools
import abc import abc
import logging
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
...@@ -26,6 +27,9 @@ from typing import * ...@@ -26,6 +27,9 @@ from typing import *
from program_config import TensorConfig, OpConfig, ProgramConfig from program_config import TensorConfig, OpConfig, ProgramConfig
from auto_scan_test import AutoScanTest from auto_scan_test import AutoScanTest
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(filename)s - %(message)s")
class TrtLayerAutoScanTest(AutoScanTest): class TrtLayerAutoScanTest(AutoScanTest):
class TensorRTParam: class TensorRTParam:
...@@ -42,6 +46,18 @@ class TrtLayerAutoScanTest(AutoScanTest): ...@@ -42,6 +46,18 @@ class TrtLayerAutoScanTest(AutoScanTest):
self.use_static = use_static self.use_static = use_static
self.use_calib_mode = use_calib_mode self.use_calib_mode = use_calib_mode
class DynamicShapeParam:
'''
Prepare TensorRT subgraph engine dynamic shape parameters.
'''
def __init__(self, min_input_shape, max_input_shape, optim_input_shape,
disable_trt_plugin_fp16):
self.min_input_shape = min_input_shape
self.max_input_shape = max_input_shape
self.optim_input_shape = optim_input_shape
self.disable_trt_plugin_fp16 = disable_trt_plugin_fp16
def __init__(self, methodName='runTest'): def __init__(self, methodName='runTest'):
super(TrtLayerAutoScanTest, self).__init__(methodName) super(TrtLayerAutoScanTest, self).__init__(methodName)
self.trt_param = self.TensorRTParam( self.trt_param = self.TensorRTParam(
...@@ -51,6 +67,7 @@ class TrtLayerAutoScanTest(AutoScanTest): ...@@ -51,6 +67,7 @@ class TrtLayerAutoScanTest(AutoScanTest):
precision=paddle_infer.PrecisionType.Float32, precision=paddle_infer.PrecisionType.Float32,
use_static=False, use_static=False,
use_calib_mode=False) use_calib_mode=False)
self.dynamic_shape = self.DynamicShapeParam({}, {}, {}, False)
def update_program_input_and_weight_with_attr(self, op_attr_list): def update_program_input_and_weight_with_attr(self, op_attr_list):
raise NotImplementedError raise NotImplementedError
...@@ -96,6 +113,7 @@ class TrtLayerAutoScanTest(AutoScanTest): ...@@ -96,6 +113,7 @@ class TrtLayerAutoScanTest(AutoScanTest):
config = paddle_infer.Config() config = paddle_infer.Config()
config.enable_use_gpu(100, 0) config.enable_use_gpu(100, 0)
if use_trt: if use_trt:
config.switch_ir_debug()
config.enable_tensorrt_engine( config.enable_tensorrt_engine(
max_batch_size=self.trt_param.max_batch_size, max_batch_size=self.trt_param.max_batch_size,
workspace_size=self.trt_param.workspace_size, workspace_size=self.trt_param.workspace_size,
...@@ -103,13 +121,22 @@ class TrtLayerAutoScanTest(AutoScanTest): ...@@ -103,13 +121,22 @@ class TrtLayerAutoScanTest(AutoScanTest):
precision_mode=precision_mode, precision_mode=precision_mode,
use_static=self.trt_param.use_static, use_static=self.trt_param.use_static,
use_calib_mode=self.trt_param.use_calib_mode) use_calib_mode=self.trt_param.use_calib_mode)
if len(self.dynamic_shape.min_input_shape
) != 0 and self.dynamic_shape.min_input_shape.keys(
) == self.dynamic_shape.max_input_shape.keys(
) and self.dynamic_shape.min_input_shape.keys(
) == self.dynamic_shape.opt_input_shape.keys():
config.set_trt_dynamic_shape_info(
self.dynamic_shape.min_input_shape,
self.dynamic_shape.max_input_shape,
self.dynamic_shape.opt_input_shape,
self.dynamic_shape.disable_trt_plugin_fp16)
return config return config
@abc.abstractmethod @abc.abstractmethod
def sample_predictor_configs(self): def sample_predictor_configs(self):
logging.info('--------- gpu inference ---------')
yield self.create_program_config(use_trt=False) yield self.create_program_config(use_trt=False)
logging.info('--------- trt inference ---------')
yield self.create_program_config( yield self.create_program_config(
use_trt=True, precision_mode=self.trt_param.precision) use_trt=True, precision_mode=self.trt_param.precision)
if self.trt_param.precision == paddle_infer.PrecisionType.Float32:
yield self.create_program_config(
use_trt=True, precision_mode=paddle_infer.PrecisionType.Half)
...@@ -757,7 +757,7 @@ def load_inference_model(path_prefix, executor, **kwargs): ...@@ -757,7 +757,7 @@ def load_inference_model(path_prefix, executor, **kwargs):
"params_filename cannot be None when path_prefix is None.") "params_filename cannot be None when path_prefix is None.")
load_dirname = '' load_dirname = ''
program_bytes = model_filename program_bytes = model_filename
params_filename = params_filename params_bytes = params_filename
# load from file # load from file
else: else:
# check and norm path_prefix # check and norm path_prefix
...@@ -795,12 +795,12 @@ def load_inference_model(path_prefix, executor, **kwargs): ...@@ -795,12 +795,12 @@ def load_inference_model(path_prefix, executor, **kwargs):
program_bytes = load_from_file(model_path) program_bytes = load_from_file(model_path)
load_dirname = os.path.dirname(params_path) load_dirname = os.path.dirname(params_path)
params_filename = os.path.basename(params_path) params_filename = os.path.basename(params_path)
# load params data
params_path = os.path.join(load_dirname, params_filename)
params_bytes = load_from_file(params_path)
# deserialize bytes to program # deserialize bytes to program
program = deserialize_program(program_bytes) program = deserialize_program(program_bytes)
# load params data
params_path = os.path.join(load_dirname, params_filename)
params_bytes = load_from_file(params_path)
# deserialize bytes to params # deserialize bytes to params
deserialize_persistables(program, params_bytes, executor) deserialize_persistables(program, params_bytes, executor)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册