diff --git a/PaddleNLP/language_representations_kit/BERT/reader/pretraining.py b/PaddleNLP/language_representations_kit/BERT/reader/pretraining.py index c21a43d33caedd9a01c02dacbedd01a16e1eec9f..f43400eb2da68e1ea2292ba27572f17fb3d985b0 100644 --- a/PaddleNLP/language_representations_kit/BERT/reader/pretraining.py +++ b/PaddleNLP/language_representations_kit/BERT/reader/pretraining.py @@ -159,7 +159,7 @@ class DataReader(object): src_ids = origin_src_ids[:origin_sep_index + 1] + pair_src_ids[ pair_sep_index + 1:] - if len(src_ids) >= self.max_seq_len: + if len(src_ids) > self.max_seq_len: miss_num += 1 continue sent_ids = [0] * len(origin_src_ids[:origin_sep_index + 1]) + [