diff --git a/demo/sequence-labeling/predict.py b/demo/sequence-labeling/predict.py index 6465eb0c2f2871ff2996cf2a7d651b67bbb2551b..28044cd5a2e2c02dc757bdc17753cfdfc4d6fd27 100644 --- a/demo/sequence-labeling/predict.py +++ b/demo/sequence-labeling/predict.py @@ -107,12 +107,8 @@ if __name__ == '__main__': labels = np_labels.reshape([-1]).astype(np.int32).tolist() label_str = "" - count = 0 - for label_val in labels: - label_str += inv_label_map[label_val] - count += 1 - if count == np_lens: - break + for i in range(1, np_lens-1): + label_str += inv_label_map[labels[i]] print("%s\tpredict=%s" % (test_examples[index], label_str)) diff --git a/paddlehub/reader/nlp_reader.py b/paddlehub/reader/nlp_reader.py index 1751d4329d03b092197a8d380f540d6625f0a435..75f79e6caa9c02bec630e9d5617d48c32d53a6b5 100644 --- a/paddlehub/reader/nlp_reader.py +++ b/paddlehub/reader/nlp_reader.py @@ -302,8 +302,7 @@ class SequenceLabelReader(BaseReader): return return_list def _reseg_token_label(self, tokens, labels, tokenizer): - if len(tokens) != len(labels): - raise ValueError("The length of tokens must be same with labels") + assert len(tokens) == len(labels) ret_tokens = [] ret_labels = [] for token, label in zip(tokens, labels): @@ -319,8 +318,7 @@ class SequenceLabelReader(BaseReader): sub_label = "I-" + label[2:] ret_labels.extend([sub_label] * (len(sub_token) - 1)) - if len(ret_tokens) != len(labels): - raise ValueError("The length of ret_tokens can't match with labels") + assert len(ret_tokens) == len(ret_labels) return ret_tokens, ret_labels def _convert_example_to_record(self, example, max_seq_length, tokenizer):