提交 bafb724c 编写于 作者: weixin_43283383's avatar weixin_43283383

#33 fix performance issue

上级 54fd9705
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
</parent> </parent>
<properties> <properties>
<elasticsearch.version>1.1.1</elasticsearch.version> <elasticsearch.version>1.0.0</elasticsearch.version>
</properties> </properties>
<repositories> <repositories>
......
...@@ -58,7 +58,7 @@ class CJKSegmenter implements ISegmenter { ...@@ -58,7 +58,7 @@ class CJKSegmenter implements ISegmenter {
//处理词段队列 //处理词段队列
Hit[] tmpArray = this.tmpHits.toArray(new Hit[this.tmpHits.size()]); Hit[] tmpArray = this.tmpHits.toArray(new Hit[this.tmpHits.size()]);
for(Hit hit : tmpArray){ for(Hit hit : tmpArray){
hit = Dictionary.getSingleton().matchWithHit(String.valueOf(context.getSegmentBuff()).toLowerCase().toCharArray(), context.getCursor() , hit); hit = Dictionary.getSingleton().matchWithHit(context.getSegmentBuff(), context.getCursor() , hit);
if(hit.isMatch()){ if(hit.isMatch()){
//输出当前的词 //输出当前的词
Lexeme newLexeme = new Lexeme(context.getBufferOffset() , hit.getBegin() , context.getCursor() - hit.getBegin() + 1 , Lexeme.TYPE_CNWORD); Lexeme newLexeme = new Lexeme(context.getBufferOffset() , hit.getBegin() , context.getCursor() - hit.getBegin() + 1 , Lexeme.TYPE_CNWORD);
...@@ -77,7 +77,7 @@ class CJKSegmenter implements ISegmenter { ...@@ -77,7 +77,7 @@ class CJKSegmenter implements ISegmenter {
//********************************* //*********************************
//再对当前指针位置的字符进行单字匹配 //再对当前指针位置的字符进行单字匹配
Hit singleCharHit = Dictionary.getSingleton().matchInMainDict(String.valueOf(context.getSegmentBuff()).toLowerCase().toCharArray(), context.getCursor(), 1); Hit singleCharHit = Dictionary.getSingleton().matchInMainDict(context.getSegmentBuff(), context.getCursor(), 1);
if(singleCharHit.isMatch()){//首字成词 if(singleCharHit.isMatch()){//首字成词
//输出当前的词 //输出当前的词
Lexeme newLexeme = new Lexeme(context.getBufferOffset() , context.getCursor() , 1 , Lexeme.TYPE_CNWORD); Lexeme newLexeme = new Lexeme(context.getBufferOffset() , context.getCursor() , 1 , Lexeme.TYPE_CNWORD);
......
...@@ -119,7 +119,7 @@ public class Dictionary { ...@@ -119,7 +119,7 @@ public class Dictionary {
for(String word : words){ for(String word : words){
if (word != null) { if (word != null) {
//批量加载词条到主内存词典中 //批量加载词条到主内存词典中
singleton._MainDict.fillSegment(word.trim().toLowerCase().toCharArray()); singleton._MainDict.fillSegment(word.trim().toCharArray());
} }
} }
} }
...@@ -133,7 +133,7 @@ public class Dictionary { ...@@ -133,7 +133,7 @@ public class Dictionary {
for(String word : words){ for(String word : words){
if (word != null) { if (word != null) {
//批量屏蔽词条 //批量屏蔽词条
singleton._MainDict.disableSegment(word.trim().toLowerCase().toCharArray()); singleton._MainDict.disableSegment(word.trim().toCharArray());
} }
} }
} }
...@@ -152,7 +152,7 @@ public class Dictionary { ...@@ -152,7 +152,7 @@ public class Dictionary {
* @return Hit 匹配结果描述 * @return Hit 匹配结果描述
*/ */
public Hit matchInMainDict(char[] charArray , int begin, int length){ public Hit matchInMainDict(char[] charArray , int begin, int length){
return singleton._MainDict.match(String.valueOf(charArray).toLowerCase().toCharArray(), begin, length); return singleton._MainDict.match(charArray, begin, length);
} }
/** /**
...@@ -160,7 +160,7 @@ public class Dictionary { ...@@ -160,7 +160,7 @@ public class Dictionary {
* @return Hit 匹配结果描述 * @return Hit 匹配结果描述
*/ */
public Hit matchInQuantifierDict(char[] charArray , int begin, int length){ public Hit matchInQuantifierDict(char[] charArray , int begin, int length){
return singleton._QuantifierDict.match(String.valueOf(charArray).toLowerCase().toCharArray(), begin, length); return singleton._QuantifierDict.match(charArray, begin, length);
} }
...@@ -179,7 +179,7 @@ public class Dictionary { ...@@ -179,7 +179,7 @@ public class Dictionary {
* @return boolean * @return boolean
*/ */
public boolean isStopWord(char[] charArray , int begin, int length){ public boolean isStopWord(char[] charArray , int begin, int length){
return singleton._StopWords.match(String.valueOf(charArray).toLowerCase().toCharArray(), begin, length).isMatch(); return singleton._StopWords.match(charArray, begin, length).isMatch();
} }
/** /**
...@@ -205,7 +205,7 @@ public class Dictionary { ...@@ -205,7 +205,7 @@ public class Dictionary {
do { do {
theWord = br.readLine(); theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) { if (theWord != null && !"".equals(theWord.trim())) {
_MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray()); _MainDict.fillSegment(theWord.trim().toCharArray());
} }
} while (theWord != null); } while (theWord != null);
...@@ -255,7 +255,7 @@ public class Dictionary { ...@@ -255,7 +255,7 @@ public class Dictionary {
theWord = br.readLine(); theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) { if (theWord != null && !"".equals(theWord.trim())) {
//加载扩展词典数据到主内存词典中 //加载扩展词典数据到主内存词典中
_MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray()); _MainDict.fillSegment(theWord.trim().toCharArray());
} }
} while (theWord != null); } while (theWord != null);
...@@ -298,7 +298,7 @@ public class Dictionary { ...@@ -298,7 +298,7 @@ public class Dictionary {
do { do {
theWord = br.readLine(); theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) { if (theWord != null && !"".equals(theWord.trim())) {
_StopWords.fillSegment(theWord.trim().toLowerCase().toCharArray()); _StopWords.fillSegment(theWord.trim().toCharArray());
} }
} while (theWord != null); } while (theWord != null);
...@@ -342,7 +342,7 @@ public class Dictionary { ...@@ -342,7 +342,7 @@ public class Dictionary {
theWord = br.readLine(); theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) { if (theWord != null && !"".equals(theWord.trim())) {
//加载扩展停止词典数据到内存中 //加载扩展停止词典数据到内存中
_StopWords.fillSegment(theWord.trim().toLowerCase().toCharArray()); _StopWords.fillSegment(theWord.trim().toCharArray());
} }
} while (theWord != null); } while (theWord != null);
...@@ -383,7 +383,7 @@ public class Dictionary { ...@@ -383,7 +383,7 @@ public class Dictionary {
do { do {
theWord = br.readLine(); theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) { if (theWord != null && !"".equals(theWord.trim())) {
_QuantifierDict.fillSegment(theWord.trim().toLowerCase().toCharArray()); _QuantifierDict.fillSegment(theWord.trim().toCharArray());
} }
} while (theWord != null); } while (theWord != null);
...@@ -440,7 +440,6 @@ public class Dictionary { ...@@ -440,7 +440,6 @@ public class Dictionary {
} }
private void loadSuffixDict(){ private void loadSuffixDict(){
_SuffixDict = new DictSegment((char)0); _SuffixDict = new DictSegment((char)0);
......
...@@ -80,7 +80,7 @@ public final class IKTokenizer extends Tokenizer { ...@@ -80,7 +80,7 @@ public final class IKTokenizer extends Tokenizer {
if(nextLexeme != null){ if(nextLexeme != null){
//将Lexeme转成Attributes //将Lexeme转成Attributes
//设置词元文本 //设置词元文本
termAtt.append(nextLexeme.getLexemeText().toLowerCase()); termAtt.append(nextLexeme.getLexemeText());
//设置词元长度 //设置词元长度
termAtt.setLength(nextLexeme.getLength()); termAtt.setLength(nextLexeme.getLength());
//设置词元位移 //设置词元位移
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册