diff --git a/BERT/reader/pretraining.py b/BERT/reader/pretraining.py index 6f99d6e36d559d72e993f4e08f942c2168c3dda2..c21a43d33caedd9a01c02dacbedd01a16e1eec9f 100644 --- a/BERT/reader/pretraining.py +++ b/BERT/reader/pretraining.py @@ -78,7 +78,7 @@ class DataReader(object): def parse_line(self, line, max_seq_len=512): """ parse one line to token_ids, sentence_ids, pos_ids, label """ - line = line.strip().split(";") + line = line.strip().decode().split(";") assert len(line) == 4, "One sample must have 4 fields!" (token_ids, sent_ids, pos_ids, label) = line token_ids = [int(token) for token in token_ids.split(" ")]