未验证 提交 b363eaed 编写于 作者: S Steffy-zxf 提交者: GitHub

Merge pull request #60 from Austendeng/dengluodan

remove predict labels of "CLS" and "SEP"
...@@ -107,12 +107,8 @@ if __name__ == '__main__': ...@@ -107,12 +107,8 @@ if __name__ == '__main__':
labels = np_labels.reshape([-1]).astype(np.int32).tolist() labels = np_labels.reshape([-1]).astype(np.int32).tolist()
label_str = "" label_str = ""
count = 0 for i in range(1, np_lens-1):
for label_val in labels: label_str += inv_label_map[labels[i]]
label_str += inv_label_map[label_val]
count += 1
if count == np_lens:
break
print("%s\tpredict=%s" % (test_examples[index], label_str)) print("%s\tpredict=%s" % (test_examples[index], label_str))
......
...@@ -302,8 +302,7 @@ class SequenceLabelReader(BaseReader): ...@@ -302,8 +302,7 @@ class SequenceLabelReader(BaseReader):
return return_list return return_list
def _reseg_token_label(self, tokens, labels, tokenizer): def _reseg_token_label(self, tokens, labels, tokenizer):
if len(tokens) != len(labels): assert len(tokens) == len(labels)
raise ValueError("The length of tokens must be same with labels")
ret_tokens = [] ret_tokens = []
ret_labels = [] ret_labels = []
for token, label in zip(tokens, labels): for token, label in zip(tokens, labels):
...@@ -319,8 +318,7 @@ class SequenceLabelReader(BaseReader): ...@@ -319,8 +318,7 @@ class SequenceLabelReader(BaseReader):
sub_label = "I-" + label[2:] sub_label = "I-" + label[2:]
ret_labels.extend([sub_label] * (len(sub_token) - 1)) ret_labels.extend([sub_label] * (len(sub_token) - 1))
if len(ret_tokens) != len(labels): assert len(ret_tokens) == len(ret_labels)
raise ValueError("The length of ret_tokens can't match with labels")
return ret_tokens, ret_labels return ret_tokens, ret_labels
def _convert_example_to_record(self, example, max_seq_length, tokenizer): def _convert_example_to_record(self, example, max_seq_length, tokenizer):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册