diff --git a/dygraph/transformer/train.py b/dygraph/transformer/train.py index 658c082ac52abff467a8aa7160446c19cd3a8198..42a34d32545b8b73d3083ce733eb97b82ecaefc4 100644 --- a/dygraph/transformer/train.py +++ b/dygraph/transformer/train.py @@ -110,7 +110,7 @@ class ModelHyperParams(object): # to process after each sub-layer postprocess_cmd = "da" # dropout + residual connection # random seed used in dropout for CE. - dropout_seed = None + dropout_seed = 0 # the flag indicating whether to share embedding and softmax weights. # vocabularies in source and target should be same for weight sharing. weight_sharing = False