diff --git a/ERNIE/reader/pretraining.py b/ERNIE/reader/pretraining.py index c1233ad014ac9e8300bf33dc344de1e8c5a69c40..ced210727d01d287c79037b88f42324b0dbc0c28 100644 --- a/ERNIE/reader/pretraining.py +++ b/ERNIE/reader/pretraining.py @@ -75,7 +75,7 @@ class ErnieDataReader(object): def parse_line(self, line, max_seq_len=512): """ parse one line to token_ids, sentence_ids, pos_ids, label """ - line = line.strip().split(";") + line = line.strip().decode().split(";") assert len(line) == 5, "One sample must have 5 fields!" (token_ids, sent_ids, pos_ids, seg_labels, label) = line token_ids = [int(token) for token in token_ids.split(" ")]