diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer.py index 8960cbcdd2f574a647229894c44c2b6ea188b7d4..b1851f4c78ddf984b06cf67f628099d5b60c771e 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer.py @@ -65,7 +65,9 @@ class ModelHyperParams(object): # number of head used in multi-head attention. n_head = 8 # number of sub-layers to be stacked in the encoder and decoder. - n_layer = 6 + # NOTE(zcd): the origin number of layer is 6, to make this unit test faster, + # we should reduce the layer number to 4. + n_layer = 4 # dropout rate used by all dropout layers. dropout = 0.1