diff --git a/main.py b/main.py index 4c0c135f61696bcf42c375ca5ab62aa5b105afc8..da4620e6f0cdc66febb93029578b2f92ee13795a 100644 --- a/main.py +++ b/main.py @@ -1 +1,34 @@ -print('欢迎来到 InsCode') \ No newline at end of file +#print('欢迎来到 InsCode') + +def BMM(user_dict,sentence): + print('***开始切分BMM***') + segment_words=[] + max_len=max([len(item) for item in user_dict]) + start=len(sentence) + k=0 + while start != 0: + k+=1 + #句子切分结束位置 + index = start-max_len + #结束位置大于句子长度 + if index<0: + index=0 + + for i in range(max_len): + print(sentence[index:start]) + if (sentence[index:start] in user_dict) or (len(sentence[index:start])==1):#词在user_dict内或只有一个字 + segment_words.insert(0,sentence[index:start]) + start = index + break + #失败 + index+=1 + print('当前已经切分:', segment_words) + print('---第', k, '轮结束---') + print('***切分完毕***') + print('分词结果:', segment_words) + return segment_words + +if __name__ == '__main__': + user_dict=['时间', '就', '是', '⽣命'] + sentence='时间就是⽣命' + BMM(user_dict,sentence) \ No newline at end of file