未验证 提交 6c4621f1 编写于 作者: J Jason 提交者: GitHub

Refine autoscan pass (#37363)

上级 e87545ce
......@@ -72,6 +72,7 @@ set_tests_properties(test_trt_conv3d_op PROPERTIES TIMEOUT 60)
set_tests_properties(test_trt_conv3d_transpose_op PROPERTIES TIMEOUT 60)
set_tests_properties(test_trt_nearest_interp_v2_op PROPERTIES TIMEOUT 30)
set_tests_properties(test_emb_eltwise_layernorm_fuse_pass PROPERTIES TIMEOUT 120)
set_tests_properties(test_fc_fuse_pass PROPERTIES TIMEOUT 120)
if (WITH_MKLDNN)
set_tests_properties(test_mkldnn_prelu_op PROPERTIES TIMEOUT 300)
......
......@@ -31,7 +31,8 @@ from typing import Optional, List, Callable, Dict, Any, Set
from program_config import TensorConfig, OpConfig, ProgramConfig, create_fake_model, create_quant_model
import hypothesis
from hypothesis import given, settings, seed, example, assume
from hypothesis import given, settings, seed, reproduce_failure
import hypothesis.strategies as st
logging.basicConfig(level=logging.INFO, format="%(message)s")
......@@ -78,6 +79,11 @@ class AutoScanTest(unittest.TestCase):
abs_dir = os.path.abspath(os.path.dirname(__file__))
self.cache_dir = os.path.join(abs_dir,
str(self.__module__) + '_cache_dir')
self.available_passes_in_framework = set()
self.num_ran_programs = 0
self.num_invalid_programs = 0
self.num_skipped_tests = 0
self.num_predictor_kinds = 0
@abc.abstractmethod
def sample_program_configs(self):
......@@ -99,9 +105,8 @@ class AutoScanTest(unittest.TestCase):
note: str):
self.skip_cases.append((teller, reason, note))
@abc.abstractmethod
def is_program_valid(self, program_config: ProgramConfig) -> bool:
raise NotImplementedError
return True
def run_test_config(self, model, params, prog_config, pred_config,
feed_data) -> Dict[str, np.ndarray]:
......@@ -110,6 +115,8 @@ class AutoScanTest(unittest.TestCase):
'''
pred_config.set_model_buffer(model, len(model), params, len(params))
predictor = paddle_infer.create_predictor(pred_config)
self.available_passes_in_framework = self.available_passes_in_framework | set(
pred_config.pass_builder().all_passes())
for name, _ in prog_config.inputs.items():
input_tensor = predictor.get_input_handle(name)
......@@ -277,39 +284,118 @@ class PassAutoScanTest(AutoScanTest):
def check_op_version(self):
status = True
for pass_name in self.passes:
if pass_name not in self.available_passes_in_framework:
continue
if not PassVersionChecker.IsCompatible(pass_name):
self.fail_log('{} version check failed.'.format(pass_name))
status = False
return status
def assert_op_size(self, fusion_before_num, fusion_after_num, origin_model):
def add_skip_pass_case(self):
return
def assert_op_list(self, op_list_after_fusion):
if not self.passes:
raise ValueError(
'In PassAutoScan you should give a valid pass name.')
"In PassAutoScan you should give a valid pass name.")
last_passed_program = os.path.join(self.cache_dir,
self.passes[-1] + '.pdmodel')
self.passes[-1] + ".pdmodel")
if not os.path.exists(last_passed_program):
raise ValueError(
"Cannot find file {}, please make sure that your pass name is correct".
format(last_passed_program))
model_bytes = paddle.static.load_from_file(last_passed_program)
pg = paddle.static.deserialize_program(model_bytes)
main_block = pg.desc.block(0)
after_op_size = main_block.op_size()
pg = paddle.static.deserialize_program(origin_model)
main_block = pg.desc.block(0)
before_op_size = main_block.op_size()
self.assertTrue(before_op_size == fusion_before_num,
'before fusion op size is {}, but got {}!'.format(
before_op_size, fusion_before_num))
self.assertTrue(after_op_size == fusion_after_num,
'after fusion op size is {}, but got {}!'.format(
after_op_size, fusion_after_num))
after_op_list = list()
for i in range(main_block.op_size()):
if main_block.op(i).type() in ["feed", "fetch"]:
continue
after_op_list.append(main_block.op(i).type())
self.assertTrue(
op_list_after_fusion == after_op_list,
"Expected operator list after fusion is {}, but now it's {}".format(
op_list_after_fusion, after_op_list), )
def run_test(self, quant=False, *args, **kwargs):
def run_and_statis(
self,
quant=False,
max_examples=100,
reproduce=None,
min_success_num=25,
max_duration=180,
passes=None, ):
if os.getenv('HYPOTHESIS_TEST_PROFILE', 'ci') == "dev":
max_examples *= 10
min_success_num *= 10
# while at ce phase, there's no limit on time
max_duration = -1
start_time = time.time()
settings.register_profile(
"ci",
max_examples=max_examples,
suppress_health_check=hypothesis.HealthCheck.all(),
deadline=None,
print_blob=True,
derandomize=True,
report_multiple_bugs=False, )
settings.load_profile("ci")
assert passes is not None, "Parameter of passes must be defined in function run_and_statis."
self.passes = passes
self.add_skip_pass_case()
def program_generator(draw):
return self.sample_program_config(draw)
def run_test(prog_config):
return self.run_test(quant=quant, prog_configs=[prog_config])
generator = st.composite(program_generator)
loop_func = given(generator())(run_test)
if reproduce is not None:
loop_func = reproduce(loop_func)
logging.info("Start to running test of {}".format(type(self)))
loop_func()
logging.info(
"===================Statistical Information===================")
logging.info("Number of Generated Programs: {}".format(
self.num_ran_programs + self.num_invalid_programs))
logging.info("Number of Invalid Programs: {}".format(
self.num_invalid_programs))
logging.info("Number of Ran Programs: {}".format(self.num_ran_programs))
logging.info("Number of Skipped Tests: {}".format(
self.num_skipped_tests))
successful_ran_programs = int(self.num_ran_programs -
self.num_skipped_tests /
self.num_predictor_kinds)
logging.info(
"Number of successfully ran programs approximately equal to {}".
format(successful_ran_programs))
if successful_ran_programs < min_success_num:
logging.warning(
"satisfied_programs = ran_programs - num_skipped_tests / num_predictor_kinds"
)
logging.error(
"At least {} programs need to ran successfully, but now only about {} programs satisfied.".
format(min_success_num, successful_ran_programs))
assert False
used_time = time.time() - start_time
if max_duration > 0 and used_time > max_duration:
logging.error(
"The duration exceeds {} seconds, if this is neccessary, try to set a larger number for parameter `max_duration`.".
format(max_duration))
assert False
def run_test(self, quant=False, prog_configs=None):
status = True
for prog_config in self.sample_program_configs(*args, **kwargs):
for prog_config in prog_configs:
# if program is invalid, we should skip that cases.
if not self.is_program_valid(prog_config):
self.num_invalid_programs += 1
continue
self.num_ran_programs += 1
model, params = create_fake_model(prog_config)
if quant:
model, params = create_quant_model(model, params)
......@@ -330,13 +416,16 @@ class PassAutoScanTest(AutoScanTest):
feed_data))
self.success_log('RUN_CPU_BASELINE done')
for pred_config, nodes_num, (
self.num_predictor_kinds = 0
for pred_config, op_list, (
atol, rtol) in self.sample_predictor_configs(prog_config):
self.num_predictor_kinds += 1
# skip info
skip_flag = False
for skip_info in self.skip_cases:
if skip_info[0](prog_config, pred_config):
skip_flag = True
self.num_skipped_tests += 1
if skip_info[1] == SkipReasons.PASS_ACCURACY_ERROR:
self.skip_log("[PASS_ACCURACY_ERROR] " + skip_info[
2] + ' ' + ' vs ' + self.inference_config_str(
......@@ -357,7 +446,7 @@ class PassAutoScanTest(AutoScanTest):
self.assert_tensors_near(atol, rtol, results[-1],
results[0])
if not skip_flag:
self.assert_op_size(nodes_num[0], nodes_num[1], model)
self.assert_op_list(op_list)
except Exception as e:
self.fail_log(
......
......@@ -34,17 +34,24 @@ class TensorConfig:
def __init__(self,
lod: Optional[List[List[int]]]=None,
data_gen: Optional[Callable[..., np.array]]=None):
data_gen: Optional[Callable[..., np.array]]=None,
shape: Optional[List[List[int]]]=None):
'''
shape: The shape of the tensor.
dtype: The data type of the tensor.
data: The value of WeightVar. for input, it should be None
'''
self.lod = lod
self.data_gen = data_gen
self.data = data_gen()
self.dtype = data_gen().dtype
self.shape = data_gen().shape
if data_gen is not None:
self.data_gen = data_gen
self.data = data_gen()
self.dtype = data_gen().dtype
self.shape = data_gen().shape
else:
assert shape is not None, "While data_gen is not defined, shape must not be None"
self.data = np.random.normal(0.0, 1.0, shape).astype(np.float32)
self.shape = shape
self.dtype = self.data.dtype
def __repr__(self):
return str({'shape': self.shape, 'lod': self.lod, 'dtype': self.dtype})
......@@ -57,11 +64,15 @@ class OpConfig:
type: str,
inputs: Dict[str, List[str]],
outputs: Dict[str, List[str]],
attrs: Dict[str, Any]):
attrs: Dict[str, Any]=None,
**kwargs):
self.type = type
self.inputs = inputs
self.outputs = outputs
self.attrs = attrs
if self.attrs is None:
self.attrs = dict()
self.attrs.update(kwargs)
def __repr__(self):
log_str = self.type
......
......@@ -71,7 +71,19 @@ class TestEmbeddingEltwiseLayerNormFusePass(PassAutoScanTest):
return True
def sample_program_configs(self, *args, **kwargs):
def sample_program_config(self, draw):
is_sparse = draw(st.booleans())
is_distributed = draw(st.booleans())
padding_idx = draw(st.integers())
axis = draw(st.integers(min_value=-4, max_value=4))
op_type = draw(st.sampled_from(['lookup_table', 'lookup_table_v2']))
epsilon = draw(st.floats(min_value=0, max_value=0.001))
# begin_norm_axis has to be 2
begin_norm_axis = 2
batch_size = draw(st.integers(min_value=1, max_value=4))
input_dim = draw(st.sampled_from([32, 64]))
weight_size = draw(st.sampled_from([[64, 64], [64, 32]]))
def generate_input(attrs):
if attrs[0]['op_type'] == 'lookup_table':
return np.random.randint(
......@@ -101,19 +113,19 @@ class TestEmbeddingEltwiseLayerNormFusePass(PassAutoScanTest):
np.float32)
attrs = [{
'is_sparse': kwargs['is_sparse'],
'is_distributed': kwargs['is_distributed'],
'padding_idx': kwargs['padding_idx'],
'op_type': kwargs['op_type']
'is_sparse': is_sparse,
'is_distributed': is_distributed,
'padding_idx': padding_idx,
'op_type': op_type
}, {
'axis': kwargs['axis']
'axis': axis
}, {
'begin_norm_axis': kwargs['begin_norm_axis'],
'epsilon': kwargs['epsilon']
'begin_norm_axis': begin_norm_axis,
'epsilon': epsilon
}, {
'batch_size': kwargs['batch_size'],
'input_dim': kwargs['input_dim'],
'weight_size': kwargs['weight_size']
'batch_size': batch_size,
'input_dim': input_dim,
'weight_size': weight_size
}]
emb_op1 = OpConfig(
......@@ -203,13 +215,12 @@ class TestEmbeddingEltwiseLayerNormFusePass(PassAutoScanTest):
},
outputs=["layer_norm_output1"])
yield program_config
return program_config
def sample_predictor_configs(self, program_config):
# only used in gpu passes and trt passes.
config = self.create_inference_config(
passes=['embedding_eltwise_layernorm_fuse_pass'], use_gpu=True)
yield config, (10, 5), (1e-5, 1e-5)
config = self.create_inference_config(use_gpu=True)
yield config, ['fused_embedding_eltwise_layernorm'], (1e-5, 1e-5)
# trt static_shape
config = self.create_trt_inference_config()
config.enable_tensorrt_engine(
......@@ -219,7 +230,7 @@ class TestEmbeddingEltwiseLayerNormFusePass(PassAutoScanTest):
precision_mode=paddle_infer.PrecisionType.Float32,
use_static=False,
use_calib_mode=False)
yield config, (10, 5), (1e-5, 1e-5)
yield config, ['fused_embedding_eltwise_layernorm'], (1e-5, 1e-5)
# trt dynamic_shape
config = self.create_trt_inference_config()
config.enable_tensorrt_engine(
......@@ -257,7 +268,7 @@ class TestEmbeddingEltwiseLayerNormFusePass(PassAutoScanTest):
"input_data2": [2, 128],
"input_data3": [2, 128]
})
yield config, (10, 5), (1e-5, 1e-5)
yield config, ['fused_embedding_eltwise_layernorm'], (1e-5, 1e-5)
def add_skip_pass_case(self):
def teller1(program_config, predictor_config):
......@@ -272,26 +283,13 @@ class TestEmbeddingEltwiseLayerNormFusePass(PassAutoScanTest):
self.add_skip_case(teller1, SkipReasons.PASS_ACCURACY_ERROR,
"The pass output has diff in a specific case.")
@given(
is_sparse=st.booleans(),
is_distributed=st.booleans(),
padding_idx=st.integers(),
axis=st.integers(
min_value=-4, max_value=4),
op_type=st.sampled_from(['lookup_table', 'lookup_table_v2']),
epsilon=st.floats(
min_value=0, max_value=0.001),
begin_norm_axis=st.integers(
min_value=-4, max_value=4),
batch_size=st.integers(
min_value=1, max_value=4),
input_dim=st.sampled_from([32, 64]),
weight_size=st.sampled_from([[64, 64], [64, 32]]))
def test(self, *args, **kwargs):
assume(kwargs['begin_norm_axis'] == 2)
self.add_skip_pass_case()
self.run_test(quant=False, *args, **kwargs)
def test(self):
# this fuse need to fix, now there's no program can ran successfully
self.run_and_statis(
quant=False,
max_examples=50,
passes=["embedding_eltwise_layernorm_fuse_pass"],
min_success_num=0)
if __name__ == "__main__":
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -12,42 +12,159 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
from auto_scan_test import PassAutoScanTest, SkipReasons
from program_config import TensorConfig, ProgramConfig, OpConfig
import numpy as np
from inference_pass_test import InferencePassTest
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.core import AnalysisConfig
from paddle.fluid.core import PassVersionChecker
class FcFusePassTest(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[-1, 128, 768], dtype="float32")
data_y = fluid.data(name="y", shape=[-1, 128, 768], dtype="float32")
fc_out1 = fluid.layers.fc(input=data,
size=3072,
num_flatten_dims=2,
act="relu")
fc_out2 = fluid.layers.fc(input=fc_out1,
size=768,
num_flatten_dims=2)
self.feeds = {"data": np.random.random((4, 128, 768)).astype("float32")}
self.fetch_list = [fc_out2]
def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])
self.assertTrue(PassVersionChecker.IsCompatible('fc_fuse_pass'))
import paddle.inference as paddle_infer
from functools import partial
from typing import Optional, List, Callable, Dict, Any, Set
import unittest
import hypothesis
from hypothesis import given, settings, seed, example, assume, reproduce_failure
import hypothesis.strategies as st
class TestFcFusePass(PassAutoScanTest):
"""
x_var y_var(persistable)
\ /
mul bias_var(persistable)
|
mul_out_var bias_var(persistable)
\ /
elementwise_add
"""
def sample_predictor_configs(self, program_config):
# cpu
before_num_ops = len(program_config.ops) + 2
config = self.create_inference_config(use_gpu=False)
yield config, ["fc"], (1e-5, 1e-5)
# for gpu
config = self.create_inference_config(use_gpu=True)
yield config, ["fc"], (1e-5, 1e-5)
def add_skip_pass_case(self):
# Here we put some skip rules to avoid known bugs
def teller1(program_config, predictor_config):
# shape of bias should be [1, mul_y_shape[-1]] or [mul_y_shape[-1]]
x_shape = list(program_config.inputs["mul_x"].shape)
y_shape = list(program_config.weights["mul_y"].shape)
bias_shape = program_config.weights["bias"].shape
if (bias_shape != [y_shape[-1], ] and
bias_shape != [1, y_shape[-1]]):
return True
return False
def teller2(program_config, predictor_config):
# TODO fuse has bug while axis != -1
if program_config.ops[1].attrs["axis"] != -1:
return True
return False
self.add_skip_case(
teller1,
SkipReasons.PASS_ACCURACY_ERROR,
"The pass output has diff while shape of bias is not [out_size] or [1, out_size].",
)
self.add_skip_case(
teller2,
SkipReasons.PASS_ACCURACY_ERROR,
"The pass output has diff while axis of elementwise_add is not -1.",
)
def is_program_valid(self, prog_config):
add_x_rank = prog_config.ops[0].attrs["x_num_col_dims"] + 1
add_y_rank = len(prog_config.weights["bias"].shape)
axis = prog_config.ops[1].attrs["axis"]
if add_x_rank == add_y_rank:
if axis != -1 or axis != 0:
return False
return True
def sample_program_config(self, draw):
# 1. Generate shape of input:X of mul
x_shape = draw(
st.lists(
st.integers(
min_value=1, max_value=4), min_size=2, max_size=4))
# 2. Generate attr:x_num_col_dims/y_num_col_dims of mul
x_num_col_dims = draw(
st.integers(
min_value=1, max_value=len(x_shape) - 1))
y_num_col_dims = 1
# 3. Generate legal shape of input:Y of mul
y_shape = draw(
st.lists(
st.integers(
min_value=1, max_value=8), min_size=2, max_size=2))
y_shape[0] = int(np.prod(x_shape[x_num_col_dims:]))
# 4. Generate legal attr:axis of elementwise_add
mul_out_shape = x_shape[:x_num_col_dims] + y_shape[1:]
axis = draw(st.integers(min_value=-1, max_value=x_num_col_dims))
# 5. Generate legal shape of input:Y of elementwise_add
if axis >= 0:
max_bias_rank = x_num_col_dims + 1 - axis
bias_rank = draw(st.integers(min_value=1, max_value=max_bias_rank))
bias_shape = mul_out_shape[axis:axis + bias_rank]
else:
max_bias_rank = 1
bias_rank = draw(
st.integers(
min_value=1, max_value=len(mul_out_shape)))
bias_shape = mul_out_shape[-1 * bias_rank:]
# 6. Random choose if use broadcast for elementwise_add, e.g [3, 4] -> [1, 4]
if draw(st.booleans()):
broadcast_dims = draw(st.integers(min_value=1, max_value=bias_rank))
for i in range(0, broadcast_dims):
bias_shape[i] = 1
# 7. Random choose if add a relu operator
has_relu = draw(st.booleans())
# Now we have all the decided parameters to compose a program
# shape of inputs/weights tensors: x_shape, y_shape, bias_shape...
# parameters of operators: x_num_col_dims, y_num_col_dims, axis...
# a random boolean value(has_relu) to decide if program include a relu op
# Here we will compose a program
# Still has some risks that the program is invalid or cause bug while running
# Use function `is_program_valid` to filter the invalid programs before running
# Use function `add_skip_pass_case` to ignore the programs even if they cause bug while runing
mul_op = OpConfig(
"mul",
inputs={"X": ["mul_x"],
"Y": ["mul_y"]},
outputs={"Out": ["mul_out"]},
x_num_col_dims=x_num_col_dims,
y_num_col_dims=y_num_col_dims, )
add_op = OpConfig(
"elementwise_add",
inputs={"X": ["mul_out"],
"Y": ["bias"]},
outputs={"Out": ["add_out"]},
axis=axis, )
ops = [mul_op, add_op]
if has_relu:
relu_op = OpConfig(
"relu",
inputs={"X": ["add_out"]},
outputs={"Out": ["relu_out"]})
ops.append(relu_op)
program_config = ProgramConfig(
ops=ops,
weights={
"mul_y": TensorConfig(shape=y_shape),
"bias": TensorConfig(shape=bias_shape),
},
inputs={"mul_x": TensorConfig(shape=x_shape), },
outputs=ops[-1].outputs["Out"], )
return program_config
def test(self):
self.run_and_statis(
quant=False, max_examples=300, passes=["fc_fuse_pass"])
if __name__ == "__main__":
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册