Merge pull request #10 from wyhw/ik_lucene4

elasticsearch ik 0.20.x => 0.90.x

Merge pull request #10 from wyhw/ik_lucene4
elasticsearch ik 0.20.x => 0.90.x
43c8bc9f · weixin_43283383 · a2dc3c78 · 5e14e3d6 · 43c8bc9f · 43c8bc9f
16 changed file
--- a/pom.xml
+++ b/pom.xml
@@ -31,7 +31,7 @@
    </parent>
    <properties>
-        <elasticsearch.version>0.20.2</elasticsearch.version>
+        <elasticsearch.version>0.90.0</elasticsearch.version>
    </properties>
  <repositories>
@@ -132,4 +132,4 @@
            </plugin>
        </plugins>
    </build>
 </project>
\ No newline at end of file
--- a/src/main/java/org/elasticsearch/index/analysis/IkAnalyzer.java
+++ b/src/main/java/org/elasticsearch/index/analysis/IkAnalyzer.java
@@ -2,19 +2,32 @@ package org.elasticsearch.index.analysis;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
 import org.wltea.analyzer.lucene.IKTokenizer;
+//import org.wltea.lucene.IKTokenizer;
 import java.io.Reader;
 public class IkAnalyzer extends Analyzer {
+//    private boolean isMaxWordLength = false;
-    @Override public TokenStream tokenStream(String fieldName, Reader reader) {            
+//    @Override public TokenStream tokenStream(String fieldName, Reader reader) {
-        return new IKTokenizer(reader,true);
+//        return new IKTokenizer(reader,true);
-    }
+//    }
    public IkAnalyzer() {
        super(); 
    }
+    @Override
+    protected TokenStreamComponents createComponents(String s, Reader reader) {
+//        new TokenStreamComponents
+        Tokenizer tokenizer = new IKTokenizer(reader, true);
+        return new TokenStreamComponents(tokenizer, null);  //To change body of implemented methods use File | Settings | File Templates.
+    }
+//    public boolean isMaxWordLength() {
+//        return isMaxWordLength;
+//    }
 }
--- a/src/main/java/org/wltea/analyzer/core/AnalyzeContext.java
+++ b/src/main/java/org/wltea/analyzer/core/AnalyzeContext.java
@@ -24,11 +24,16 @@
 */
 package org.wltea.analyzer.core;
-import org.wltea.analyzer.dic.Dictionary;
 import java.io.IOException;
 import java.io.Reader;
-import java.util.*;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.Map;
+import java.util.Set;
+import org.wltea.analyzer.cfg.Configuration;
+import org.wltea.analyzer.dic.Dictionary;
 /**
 * 
@@ -68,12 +73,12 @@ class AnalyzeContext {
    private Map<Integer , LexemePath> pathMap;    
    //最终分词结果集
    private LinkedList<Lexeme> results;
+    private boolean useSmart;
 	//分词器配置项
-	private boolean useSmart;
+//	private Configuration cfg;
    public AnalyzeContext(boolean useSmart){
-    	this.useSmart = useSmart;
+        this.useSmart = useSmart;
    	this.segmentBuff = new char[BUFF_SIZE];
    	this.charTypes = new int[BUFF_SIZE];
    	this.buffLocker = new HashSet<String>();
@@ -313,7 +318,7 @@ class AnalyzeContext {
 		while(result != null){
    		//数量词合并
    		this.compound(result);
-    		if(Dictionary.isStopWord(this.segmentBuff ,  result.getBegin() , result.getLength())){
+    		if(Dictionary.getSingleton().isStopWord(this.segmentBuff ,  result.getBegin() , result.getLength())){
       			//是停止词继续取列表的下一个
    			result = this.results.pollFirst(); 				
    		}else{
@@ -344,6 +349,7 @@ class AnalyzeContext {
 	 * 组合词元
 	 */
 	private void compound(Lexeme result){
 		if(!this.useSmart){
 			return ;
 		}

--- a/src/main/java/org/wltea/analyzer/core/CJKSegmenter.java
+++ b/src/main/java/org/wltea/analyzer/core/CJKSegmenter.java
@@ -25,12 +25,12 @@
 */
 package org.wltea.analyzer.core;
-import org.wltea.analyzer.dic.Dictionary;
-import org.wltea.analyzer.dic.Hit;
 import java.util.LinkedList;
 import java.util.List;
+import org.wltea.analyzer.dic.Dictionary;
+import org.wltea.analyzer.dic.Hit;
 /**
 *  中文-日韩文子分词器
@@ -58,7 +58,7 @@ class CJKSegmenter implements ISegmenter {
 				//处理词段队列
 				Hit[] tmpArray = this.tmpHits.toArray(new Hit[this.tmpHits.size()]);
 				for(Hit hit : tmpArray){
-					hit = Dictionary.matchInMainDictWithHit(context.getSegmentBuff(), context.getCursor() , hit);
+					hit = Dictionary.getSingleton().matchWithHit(context.getSegmentBuff(), context.getCursor() , hit);
 					if(hit.isMatch()){
 						//输出当前的词
 						Lexeme newLexeme = new Lexeme(context.getBufferOffset() , hit.getBegin() , context.getCursor() - hit.getBegin() + 1 , Lexeme.TYPE_CNWORD);
@@ -77,7 +77,7 @@ class CJKSegmenter implements ISegmenter {
 			//*********************************
 			//再对当前指针位置的字符进行单字匹配
-			Hit singleCharHit = Dictionary.matchInMainDict(context.getSegmentBuff(), context.getCursor(), 1);
+			Hit singleCharHit = Dictionary.getSingleton().matchInMainDict(context.getSegmentBuff(), context.getCursor(), 1);
 			if(singleCharHit.isMatch()){//首字成词
 				//输出当前的词
 				Lexeme newLexeme = new Lexeme(context.getBufferOffset() , context.getCursor() , 1 , Lexeme.TYPE_CNWORD);

--- a/src/main/java/org/wltea/analyzer/core/CN_QuantifierSegmenter.java
+++ b/src/main/java/org/wltea/analyzer/core/CN_QuantifierSegmenter.java
@@ -24,14 +24,14 @@
 */
 package org.wltea.analyzer.core;
-import org.wltea.analyzer.dic.Dictionary;
-import org.wltea.analyzer.dic.Hit;
 import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Set;
+import org.wltea.analyzer.dic.Dictionary;
+import org.wltea.analyzer.dic.Hit;
 /**
 * 
 * 中文数量词子分词器
@@ -155,7 +155,7 @@ class CN_QuantifierSegmenter implements ISegmenter{
 				//处理词段队列
 				Hit[] tmpArray = this.countHits.toArray(new Hit[this.countHits.size()]);
 				for(Hit hit : tmpArray){
-					hit = Dictionary.matchInMainDictWithHit(context.getSegmentBuff(), context.getCursor() , hit);
+					hit = Dictionary.getSingleton().matchWithHit(context.getSegmentBuff(), context.getCursor() , hit);
 					if(hit.isMatch()){
 						//输出当前的词
 						Lexeme newLexeme = new Lexeme(context.getBufferOffset() , hit.getBegin() , context.getCursor() - hit.getBegin() + 1 , Lexeme.TYPE_COUNT);
@@ -174,7 +174,7 @@ class CN_QuantifierSegmenter implements ISegmenter{
 			//*********************************
 			//对当前指针位置的字符进行单字匹配
-			Hit singleCharHit = Dictionary.matchInQuantifierDict(context.getSegmentBuff(), context.getCursor(), 1);
+			Hit singleCharHit = Dictionary.getSingleton().matchInQuantifierDict(context.getSegmentBuff(), context.getCursor(), 1);
 			if(singleCharHit.isMatch()){//首字成量词词
 				//输出当前的词
 				Lexeme newLexeme = new Lexeme(context.getBufferOffset() , context.getCursor() , 1 , Lexeme.TYPE_COUNT);

--- a/src/main/java/org/wltea/analyzer/core/IKArbitrator.java
+++ b/src/main/java/org/wltea/analyzer/core/IKArbitrator.java
@@ -38,7 +38,7 @@ class IKArbitrator {
 	/**
 	 * 分词歧义处理
-	 * @param orgLexemes
+//	 * @param orgLexemes
 	 * @param useSmart
 	 */
 	void process(AnalyzeContext context , boolean useSmart){
@@ -87,7 +87,6 @@ class IKArbitrator {
 	 * 歧义识别
 	 * @param lexemeCell 歧义路径链表头
 	 * @param fullTextLength 歧义路径文本长度
-	 * @param option 候选结果路径
 	 * @return
 	 */
 	private LexemePath judge(QuickSortSet.Cell lexemeCell , int fullTextLength){
@@ -120,7 +119,7 @@ class IKArbitrator {
 	/**
 	 * 向前遍历，添加词元，构造一个无歧义词元组合
-	 * @param LexemePath path
+//	 * @param LexemePath path
 	 * @return
 	 */
 	private Stack<QuickSortSet.Cell> forwardPath(QuickSortSet.Cell lexemeCell , LexemePath option){
@@ -140,7 +139,7 @@ class IKArbitrator {
 	/**
 	 * 回滚词元链，直到它能够接受指定的词元
-	 * @param lexeme 
+//	 * @param lexeme
 	 * @param l
 	 */
 	private void backPath(Lexeme l  , LexemePath option){

--- a/src/main/java/org/wltea/analyzer/core/IKSegmenter.java
+++ b/src/main/java/org/wltea/analyzer/core/IKSegmenter.java
@@ -23,14 +23,15 @@
 */
 package org.wltea.analyzer.core;
-import org.elasticsearch.common.logging.ESLogger;
-import org.elasticsearch.common.logging.Loggers;
 import java.io.IOException;
 import java.io.Reader;
 import java.util.ArrayList;
 import java.util.List;
+import org.wltea.analyzer.cfg.Configuration;
+//import org.wltea.analyzer.cfg.DefaultConfig;
+import org.wltea.analyzer.dic.Dictionary;
 /**
 * IK分词器主类
 *
@@ -39,16 +40,18 @@ public final class IKSegmenter {
 	//字符窜reader
 	private Reader input;
+	//分词器配置项
+	private Configuration cfg;
 	//分词器上下文
 	private AnalyzeContext context;
 	//分词处理器列表
 	private List<ISegmenter> segmenters;
 	//分词歧义裁决器
 	private IKArbitrator arbitrator;
-	 private ESLogger logger=null;
+    private  boolean useSmart = false;
-    private final boolean useSmart;
-    /**
+	/**
 	 * IK分词器构造函数
 	 * @param input 
 	 * @param useSmart 为true，使用智能分词策略
@@ -57,16 +60,31 @@ public final class IKSegmenter {
 	 * 智能分词： 合并数词和量词，对分词结果进行歧义判断
 	 */
 	public IKSegmenter(Reader input , boolean useSmart){
-        logger = Loggers.getLogger("ik-analyzer");
 		this.input = input;
+//		this.cfg = DefaultConfig.getInstance();
        this.useSmart=useSmart;
-        this.init();
+		this.init();
+	}
+	/**
+	 * IK分词器构造函数
+	 * @param input
+	 * @param cfg 使用自定义的Configuration构造分词器
+	 * 
+	 */
+	public IKSegmenter(Reader input , Configuration cfg){
+		this.input = input;
+		this.cfg = cfg;
+		this.init();
 	}
 	/**
 	 * 初始化
 	 */
 	private void init(){
+		//初始化词典单例
+//		Dictionary.initial(this.cfg);
+//        Dictionary.getSingleton();
 		//初始化分词上下文
 		this.context = new AnalyzeContext(useSmart);
 		//加载子分词器

--- a/src/main/java/org/wltea/analyzer/core/LetterSegmenter.java
+++ b/src/main/java/org/wltea/analyzer/core/LetterSegmenter.java
@@ -120,7 +120,7 @@ class LetterSegmenter implements ISegmenter {
 	/**
 	 * 处理数字字母混合输出
 	 * 如：windos2000 | linliangyi2005@gmail.com
-	 * @param input
+//	 * @param input
 	 * @param context
 	 * @return
 	 */

--- a/src/main/java/org/wltea/analyzer/dic/DictSegment.java
+++ b/src/main/java/org/wltea/analyzer/dic/DictSegment.java
@@ -326,13 +326,5 @@ class DictSegment implements Comparable<DictSegment>{
 		//对当前节点存储的char进行比较
 		return this.nodeChar.compareTo(o.nodeChar);
 	}
-    public int getDicNum(){
-        if(charMap!=null)
-        {
-            return charMap.size();
-        }
-        return 0;
-    }
 }
--- a/src/main/java/org/wltea/analyzer/dic/Dictionary.java
+++ b/src/main/java/org/wltea/analyzer/dic/Dictionary.java
--- a/src/main/java/org/wltea/analyzer/dic/Hit.java
+++ b/src/main/java/org/wltea/analyzer/dic/Hit.java
@@ -58,7 +58,9 @@ public class Hit {
 	public boolean isMatch() {
 		return (this.hitState & MATCH) > 0;
 	}
+	/**
+	 * 
+	 */
 	public void setMatch() {
 		this.hitState = this.hitState | MATCH;
 	}
@@ -69,7 +71,9 @@ public class Hit {
 	public boolean isPrefix() {
 		return (this.hitState & PREFIX) > 0;
 	}
+	/**
+	 * 
+	 */
 	public void setPrefix() {
 		this.hitState = this.hitState | PREFIX;
 	}
@@ -79,7 +83,9 @@ public class Hit {
 	public boolean isUnmatch() {
 		return this.hitState == UNMATCH ;
 	}
+	/**
+	 * 
+	 */
 	public void setUnmatch() {
 		this.hitState = UNMATCH;
 	}

--- a/src/main/java/org/wltea/analyzer/lucene/IKAnalyzer.java
+++ b/src/main/java/org/wltea/analyzer/lucene/IKAnalyzer.java
 /**
+ * IK 中文分词  版本 5.0.1
+ * IK Analyzer release 5.0.1
+ * 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * 源代码由林良益(linliangyi2005@gmail.com)提供
+ * 版权声明 2012，乌龙茶工作室
+ * provided by Linliangyi and copyright 2012 by Oolong studio
 * 
 */
 package org.wltea.analyzer.lucene;
+import java.io.Reader;
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
 import org.elasticsearch.common.settings.Settings;
 import org.wltea.analyzer.dic.Dictionary;
-import java.io.Reader;
+/**
+ * IK分词器，Lucene Analyzer接口实现
-public final class IKAnalyzer extends Analyzer {
+ * 兼容Lucene 4.0版本
+ */
+public final class IKAnalyzer extends Analyzer{
-	private boolean isMaxWordLength = false;
+	private boolean useSmart;
-    private boolean useSmart=false;
+	public boolean useSmart() {
+		return useSmart;
+	}
-    public IKAnalyzer(){
+	public void setUseSmart(boolean useSmart) {
+		this.useSmart = useSmart;
+	}
+	/**
+	 * IK分词器Lucene  Analyzer接口实现类
+	 * 
+	 * 默认细粒度切分算法
+	 */
+	public IKAnalyzer(){
 		this(false);
 	}
+	/**
-	public IKAnalyzer(boolean isMaxWordLength){
+	 * IK分词器Lucene Analyzer接口实现类
+	 * 
+	 * @param useSmart 当为true时，分词器进行智能切分
+	 */
+	public IKAnalyzer(boolean useSmart){
 		super();
-		this.setMaxWordLength(isMaxWordLength);
+		this.useSmart = useSmart;
 	}
    public IKAnalyzer(Settings indexSetting,Settings settings1) {
        super();
-       Dictionary.getInstance().Init(indexSetting);
+        Dictionary.getInstance().Init(indexSetting);
        if(settings1.get("use_smart", "true").equals("true")){
-            useSmart=true;
+            useSmart = true;
        }
    }
+	/**
-    @Override
+	 * 重载Analyzer接口，构造分词组件
-	public TokenStream tokenStream(String fieldName, Reader reader) {
+	 */
-		return new IKTokenizer(reader , useSmart);
+	@Override
-	}
+	protected TokenStreamComponents createComponents(String fieldName, final Reader in) {
+		Tokenizer _IKTokenizer = new IKTokenizer(in , this.useSmart());
-	public void setMaxWordLength(boolean isMaxWordLength) {
+		return new TokenStreamComponents(_IKTokenizer);
-		this.isMaxWordLength = isMaxWordLength;
-	}
-	public boolean isMaxWordLength() {
-		return isMaxWordLength;
 	}
 }
--- a/src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java
+++ b/src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java
 /**
 * IK 中文分词  版本 5.0.1
 * IK Analyzer release 5.0.1
- *
+ * 
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
@@ -20,94 +20,95 @@
 * 源代码由林良益(linliangyi2005@gmail.com)提供
 * 版权声明 2012，乌龙茶工作室
 * provided by Linliangyi and copyright 2012 by Oolong studio
- *
+ * 
- *
+ * 
 */
 package org.wltea.analyzer.lucene;
+import java.io.IOException;
+import java.io.Reader;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.wltea.analyzer.core.IKSegmenter;
 import org.wltea.analyzer.core.Lexeme;
-import java.io.IOException;
-import java.io.Reader;
 /**
 * IK分词器 Lucene Tokenizer适配器类
 * 兼容Lucene 4.0版本
 */
 public final class IKTokenizer extends Tokenizer {
+	//IK分词器实现
+	private IKSegmenter _IKImplement;
+	//词元文本属性
+	private final CharTermAttribute termAtt;
+	//词元位移属性
+	private final OffsetAttribute offsetAtt;
+	//词元分类属性（该属性分类参考org.wltea.analyzer.core.Lexeme中的分类常量）
+	private final TypeAttribute typeAtt;
+	//记录最后一个词元的结束位置
+	private int endPosition;
+	/**
+	 * Lucene 4.0 Tokenizer适配器类构造函数
+	 * @param in
+	 * @param useSmart
+	 */
+	public IKTokenizer(Reader in , boolean useSmart){
+	    super(in);
+	    offsetAtt = addAttribute(OffsetAttribute.class);
+	    termAtt = addAttribute(CharTermAttribute.class);
+	    typeAtt = addAttribute(TypeAttribute.class);
+		_IKImplement = new IKSegmenter(input , useSmart);
+	}
-    //IK分词器实现
+	/* (non-Javadoc)
-    private IKSegmenter _IKImplement;
+	 * @see org.apache.lucene.analysis.TokenStream#incrementToken()
+	 */
-    //词元文本属性
+	@Override
-    private final CharTermAttribute termAtt;
+	public boolean incrementToken() throws IOException {
-    //词元位移属性
+		//清除所有的词元属性
-    private final OffsetAttribute offsetAtt;
+		clearAttributes();
-    //词元分类属性（该属性分类参考org.wltea.analyzer.core.Lexeme中的分类常量）
+		Lexeme nextLexeme = _IKImplement.next();
-    private final TypeAttribute typeAtt;
+		if(nextLexeme != null){
-    //记录最后一个词元的结束位置
+			//将Lexeme转成Attributes
-    private int endPosition;
+			//设置词元文本
+			termAtt.append(nextLexeme.getLexemeText());
-    /**
+			//设置词元长度
-     * Lucene 4.0 Tokenizer适配器类构造函数
+			termAtt.setLength(nextLexeme.getLength());
-     * @param in
+			//设置词元位移
-     * @param useSmart
+			offsetAtt.setOffset(nextLexeme.getBeginPosition(), nextLexeme.getEndPosition());
-     */
+			//记录分词的最后位置
-    public IKTokenizer(Reader in , boolean useSmart){
+			endPosition = nextLexeme.getEndPosition();
-        super(in);
+			//记录词元分类
-        offsetAtt = addAttribute(OffsetAttribute.class);
+			typeAtt.setType(nextLexeme.getLexemeTypeString());			
-        termAtt = addAttribute(CharTermAttribute.class);
+			//返会true告知还有下个词元
-        typeAtt = addAttribute(TypeAttribute.class);
+			return true;
-        _IKImplement = new IKSegmenter(input , useSmart);
+		}
-    }
+		//返会false告知词元输出完毕
+		return false;
-    /* (non-Javadoc)
+	}
-     * @see org.apache.lucene.analysis.TokenStream#incrementToken()
-     */
+	/*
-    @Override
+	 * (non-Javadoc)
-    public boolean incrementToken() throws IOException {
+	 * @see org.apache.lucene.analysis.Tokenizer#reset(java.io.Reader)
-        //清除所有的词元属性
+	 */
-        clearAttributes();
+	@Override
-        Lexeme nextLexeme = _IKImplement.next();
+	public void reset() throws IOException {
-        if(nextLexeme != null){
+		super.reset();
-            //将Lexeme转成Attributes
+		_IKImplement.reset(input);
-            //设置词元文本
+	}	
-            termAtt.append(nextLexeme.getLexemeText());
-            //设置词元长度
+	@Override
-            termAtt.setLength(nextLexeme.getLength());
+	public final void end() {
-            //设置词元位移
+	    // set final offset
-            offsetAtt.setOffset(nextLexeme.getBeginPosition(), nextLexeme.getEndPosition());
+		int finalOffset = correctOffset(this.endPosition);
-            //记录分词的最后位置
+		offsetAtt.setOffset(finalOffset, finalOffset);
-            endPosition = nextLexeme.getEndPosition();
+	}
-            //记录词元分类
-            typeAtt.setType(nextLexeme.getLexemeTypeString());
-            //返会true告知还有下个词元
-            return true;
-        }
-        //返会false告知词元输出完毕
-        return false;
-    }
-    /*
-     * (non-Javadoc)
-     * @see org.apache.lucene.analysis.Tokenizer#reset(java.io.Reader)
-     */
-    @Override
-    public void reset() throws IOException {
-        super.reset();
-        _IKImplement.reset(input);
-    }
-    @Override
-    public final void end() {
-        // set final offset
-        int finalOffset = correctOffset(this.endPosition);
-        offsetAtt.setOffset(finalOffset, finalOffset);
-    }
 }
--- a/src/main/java/org/wltea/analyzer/query/IKQueryExpressionParser.java
+++ b/src/main/java/org/wltea/analyzer/query/IKQueryExpressionParser.java
--- a/src/main/java/org/wltea/analyzer/query/SWMCQueryBuilder.java
+++ b/src/main/java/org/wltea/analyzer/query/SWMCQueryBuilder.java
-///**
+/**
-// * IK 中文分词  版本 5.0
+ * IK 中文分词  版本 5.0
-// * IK Analyzer release 5.0
+ * IK Analyzer release 5.0
-// *
+ * 
-// * Licensed to the Apache Software Foundation (ASF) under one or more
+ * Licensed to the Apache Software Foundation (ASF) under one or more
-// * contributor license agreements.  See the NOTICE file distributed with
+ * contributor license agreements.  See the NOTICE file distributed with
-// * this work for additional information regarding copyright ownership.
+ * this work for additional information regarding copyright ownership.
-// * The ASF licenses this file to You under the Apache License, Version 2.0
+ * The ASF licenses this file to You under the Apache License, Version 2.0
-// * (the "License"); you may not use this file except in compliance with
+ * (the "License"); you may not use this file except in compliance with
-// * the License.  You may obtain a copy of the License at
+ * the License.  You may obtain a copy of the License at
-// *
+ *
-// *     http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
-// *
+ *
-// * Unless required by applicable law or agreed to in writing, software
+ * Unless required by applicable law or agreed to in writing, software
-// * distributed under the License is distributed on an "AS IS" BASIS,
+ * distributed under the License is distributed on an "AS IS" BASIS,
-// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// * See the License for the specific language governing permissions and
+ * See the License for the specific language governing permissions and
-// * limitations under the License.
+ * limitations under the License.
-// *
+ *
-// * 源代码由林良益(linliangyi2005@gmail.com)提供
+ * 源代码由林良益(linliangyi2005@gmail.com)提供
-// * 版权声明 2012，乌龙茶工作室
+ * 版权声明 2012，乌龙茶工作室
-// * provided by Linliangyi and copyright 2012 by Oolong studio
+ * provided by Linliangyi and copyright 2012 by Oolong studio
-// *
+ * 
-// */
+ */
-//package org.wltea.analyzer.query;
+package org.wltea.analyzer.query;
-//
-//import java.io.IOException;
+import java.io.IOException;
-//import java.io.StringReader;
+import java.io.StringReader;
-//import java.util.ArrayList;
+import java.util.ArrayList;
-//import java.util.List;
+import java.util.List;
-//
-//import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
-//import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.queryparser.classic.ParseException;
-//import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.queryparser.classic.QueryParser;
-//import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Query;
-//import org.apache.lucene.util.Version;
+import org.apache.lucene.util.Version;
-//import org.wltea.analyzer.core.IKSegmenter;
+import org.wltea.analyzer.core.IKSegmenter;
-//import org.wltea.analyzer.core.Lexeme;
+import org.wltea.analyzer.core.Lexeme;
-//
-///**
+/**
-// * Single Word Multi Char Query Builder
+ * Single Word Multi Char Query Builder
-// * IK分词算法专用
+ * IK分词算法专用
-// * @author linliangyi
+ * @author linliangyi
-// *
+ *
-// */
+ */
-//public class SWMCQueryBuilder {
+public class SWMCQueryBuilder {
-//
-//	/**
+	/**
-//	 * 生成SWMCQuery
+	 * 生成SWMCQuery
-//	 * @param fieldName
+	 * @param fieldName
-//	 * @param keywords
+	 * @param keywords
-//	 * @param quickMode
+	 * @param quickMode
-//	 * @return Lucene Query
+	 * @return Lucene Query
-//	 */
+	 */
-//	public static Query create(String fieldName ,String keywords , boolean quickMode){
+	public static Query create(String fieldName ,String keywords , boolean quickMode){
-//		if(fieldName == null || keywords == null){
+		if(fieldName == null || keywords == null){
-//			throw new IllegalArgumentException("参数 fieldName 、 keywords 不能为null.");
+			throw new IllegalArgumentException("参数 fieldName 、 keywords 不能为null.");
-//		}
+		}
-//		//1.对keywords进行分词处理
+		//1.对keywords进行分词处理
-//		List<Lexeme> lexemes = doAnalyze(keywords);
+		List<Lexeme> lexemes = doAnalyze(keywords);
-//		//2.根据分词结果，生成SWMCQuery
+		//2.根据分词结果，生成SWMCQuery
-//		Query _SWMCQuery = getSWMCQuery(fieldName , lexemes , quickMode);
+		Query _SWMCQuery = getSWMCQuery(fieldName , lexemes , quickMode);
-//		return _SWMCQuery;
+		return _SWMCQuery;
-//	}
+	}
-//
-//	/**
+	/**
-//	 * 分词切分，并返回结链表
+	 * 分词切分，并返回结链表
-//	 * @param keywords
+	 * @param keywords
-//	 * @return
+	 * @return
-//	 */
+	 */
-//	private static List<Lexeme> doAnalyze(String keywords){
+	private static List<Lexeme> doAnalyze(String keywords){
-//		List<Lexeme> lexemes = new ArrayList<Lexeme>();
+		List<Lexeme> lexemes = new ArrayList<Lexeme>();
-//		IKSegmenter ikSeg = new IKSegmenter(new StringReader(keywords) , true);
+		IKSegmenter ikSeg = new IKSegmenter(new StringReader(keywords) , true);
-//		try{
+		try{
-//			Lexeme l = null;
+			Lexeme l = null;
-//			while( (l = ikSeg.next()) != null){
+			while( (l = ikSeg.next()) != null){
-//				lexemes.add(l);
+				lexemes.add(l);
-//			}
+			}
-//		}catch(IOException e){
+		}catch(IOException e){
-//			e.printStackTrace();
+			e.printStackTrace();
-//		}
+		}
-//		return lexemes;
+		return lexemes;
-//	}
+	}
-//
-//
-//	/**
+	/**
-//	 * 根据分词结果生成SWMC搜索
+	 * 根据分词结果生成SWMC搜索
-//	 * @param fieldName
+	 * @param fieldName
 //	 * @param pathOption
-//	 * @param quickMode
+	 * @param quickMode
-//	 * @return
+	 * @return
-//	 */
+	 */
-//	private static Query getSWMCQuery(String fieldName , List<Lexeme> lexemes , boolean quickMode){
+	private static Query getSWMCQuery(String fieldName , List<Lexeme> lexemes , boolean quickMode){
-//		//构造SWMC的查询表达式
+		//构造SWMC的查询表达式
-//		StringBuffer keywordBuffer = new StringBuffer();
+		StringBuffer keywordBuffer = new StringBuffer();
-//		//精简的SWMC的查询表达式
+		//精简的SWMC的查询表达式
-//		StringBuffer keywordBuffer_Short = new StringBuffer();
+		StringBuffer keywordBuffer_Short = new StringBuffer();
-//		//记录最后词元长度
+		//记录最后词元长度
-//		int lastLexemeLength = 0;
+		int lastLexemeLength = 0;
-//		//记录最后词元结束位置
+		//记录最后词元结束位置
-//		int lastLexemeEnd = -1;
+		int lastLexemeEnd = -1;
-//
-//		int shortCount = 0;
+		int shortCount = 0;
-//		int totalCount = 0;
+		int totalCount = 0;
-//		for(Lexeme l : lexemes){
+		for(Lexeme l : lexemes){
-//			totalCount += l.getLength();
+			totalCount += l.getLength();
-//			//精简表达式
+			//精简表达式
-//			if(l.getLength() > 1){
+			if(l.getLength() > 1){
-//				keywordBuffer_Short.append(' ').append(l.getLexemeText());
+				keywordBuffer_Short.append(' ').append(l.getLexemeText());
-//				shortCount += l.getLength();
+				shortCount += l.getLength();
-//			}
+			}
-//
-//			if(lastLexemeLength == 0){
+			if(lastLexemeLength == 0){
-//				keywordBuffer.append(l.getLexemeText());
+				keywordBuffer.append(l.getLexemeText());				
-//			}else if(lastLexemeLength == 1 && l.getLength() == 1
+			}else if(lastLexemeLength == 1 && l.getLength() == 1
-//					&& lastLexemeEnd == l.getBeginPosition()){//单字位置相邻，长度为一，合并)
+					&& lastLexemeEnd == l.getBeginPosition()){//单字位置相邻，长度为一，合并)
-//				keywordBuffer.append(l.getLexemeText());
+				keywordBuffer.append(l.getLexemeText());
-//			}else{
+			}else{
-//				keywordBuffer.append(' ').append(l.getLexemeText());
+				keywordBuffer.append(' ').append(l.getLexemeText());
-//
-//			}
+			}
-//			lastLexemeLength = l.getLength();
+			lastLexemeLength = l.getLength();
-//			lastLexemeEnd = l.getEndPosition();
+			lastLexemeEnd = l.getEndPosition();
-//		}
+		}
-//
-//		//借助lucene queryparser 生成SWMC Query
+		//借助lucene queryparser 生成SWMC Query
-//		QueryParser qp = new QueryParser(Version.LUCENE_40, fieldName, new StandardAnalyzer(Version.LUCENE_40));
+		QueryParser qp = new QueryParser(Version.LUCENE_40, fieldName, new StandardAnalyzer(Version.LUCENE_40));
-//		qp.setDefaultOperator(QueryParser.AND_OPERATOR);
+		qp.setDefaultOperator(QueryParser.AND_OPERATOR);
-//		qp.setAutoGeneratePhraseQueries(true);
+		qp.setAutoGeneratePhraseQueries(true);
-//
-//		if(quickMode && (shortCount * 1.0f / totalCount) > 0.5f){
+		if(quickMode && (shortCount * 1.0f / totalCount) > 0.5f){
-//			try {
+			try {
-//				//System.out.println(keywordBuffer.toString());
+				//System.out.println(keywordBuffer.toString());
-//				Query q = qp.parse(keywordBuffer_Short.toString());
+				Query q = qp.parse(keywordBuffer_Short.toString());
-//				return q;
+				return q;
-//			} catch (ParseException e) {
+			} catch (ParseException e) {
-//				e.printStackTrace();
+				e.printStackTrace();
-//			}
+			}
-//
-//		}else{
+		}else{
-//			if(keywordBuffer.length() > 0){
+			if(keywordBuffer.length() > 0){
-//				try {
+				try {
-//					//System.out.println(keywordBuffer.toString());
+					//System.out.println(keywordBuffer.toString());
-//					Query q = qp.parse(keywordBuffer.toString());
+					Query q = qp.parse(keywordBuffer.toString());
-//					return q;
+					return q;
-//				} catch (ParseException e) {
+				} catch (ParseException e) {
-//					e.printStackTrace();
+					e.printStackTrace();
-//				}
+				}
-//			}
+			}
-//		}
+		}
-//		return null;
+		return null;
-//	}
+	}
-//}
+}
--- a/src/main/java/org/wltea/analyzer/sample/LuceneIndexAndSearchDemo.java
+++ b/src/main/java/org/wltea/analyzer/sample/LuceneIndexAndSearchDemo.java
-///**
+/**
-// * IK 中文分词  版本 5.0
+ * IK 中文分词  版本 5.0
-// * IK Analyzer release 5.0
+ * IK Analyzer release 5.0
-// *
+ * 
-// * Licensed to the Apache Software Foundation (ASF) under one or more
+ * Licensed to the Apache Software Foundation (ASF) under one or more
-// * contributor license agreements.  See the NOTICE file distributed with
+ * contributor license agreements.  See the NOTICE file distributed with
-// * this work for additional information regarding copyright ownership.
+ * this work for additional information regarding copyright ownership.
-// * The ASF licenses this file to You under the Apache License, Version 2.0
+ * The ASF licenses this file to You under the Apache License, Version 2.0
-// * (the "License"); you may not use this file except in compliance with
+ * (the "License"); you may not use this file except in compliance with
-// * the License.  You may obtain a copy of the License at
+ * the License.  You may obtain a copy of the License at
-// *
+ *
-// *     http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
-// *
+ *
-// * Unless required by applicable law or agreed to in writing, software
+ * Unless required by applicable law or agreed to in writing, software
-// * distributed under the License is distributed on an "AS IS" BASIS,
+ * distributed under the License is distributed on an "AS IS" BASIS,
-// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// * See the License for the specific language governing permissions and
+ * See the License for the specific language governing permissions and
-// * limitations under the License.
+ * limitations under the License.
-// *
+ *
-// * 源代码由林良益(linliangyi2005@gmail.com)提供
+ * 源代码由林良益(linliangyi2005@gmail.com)提供
-// * 版权声明 2012，乌龙茶工作室
+ * 版权声明 2012，乌龙茶工作室
-// * provided by Linliangyi and copyright 2012 by Oolong studio
+ * provided by Linliangyi and copyright 2012 by Oolong studio
-// *
+ * 
-// *
+ * 
-// */
+ */
-//package org.wltea.analyzer.sample;
+package org.wltea.analyzer.sample;
-//
-//import java.io.IOException;
+import java.io.IOException;
-//
-//import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.Analyzer;
-//import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Document;
-//import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field;
-//import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.StringField;
-//import org.apache.lucene.document.TextField;
+import org.apache.lucene.document.TextField;
-//import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.CorruptIndexException;
-//import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.DirectoryReader;
-//import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader;
-//import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriter;
-//import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.IndexWriterConfig;
-//import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
-//import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.queryparser.classic.ParseException;
-//import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.queryparser.classic.QueryParser;
-//import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.IndexSearcher;
-//import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Query;
-//import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.ScoreDoc;
-//import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TopDocs;
-//import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.Directory;
-//import org.apache.lucene.store.LockObtainFailedException;
+import org.apache.lucene.store.LockObtainFailedException;
-//import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.store.RAMDirectory;
-//import org.apache.lucene.util.Version;
+import org.apache.lucene.util.Version;
-//import org.wltea.analyzer.lucene.IKAnalyzer;
+import org.wltea.analyzer.lucene.IKAnalyzer;
-//
-//
-//
-//
-///**
+/**
-// * 使用IKAnalyzer进行Lucene索引和查询的演示
+ * 使用IKAnalyzer进行Lucene索引和查询的演示
-// * 2012-3-2
+ * 2012-3-2
-// *
+ * 
-// * 以下是结合Lucene4.0 API的写法
+ * 以下是结合Lucene4.0 API的写法
-// *
+ *
-// */
+ */
-//public class LuceneIndexAndSearchDemo {
+public class LuceneIndexAndSearchDemo {
-//
-//
-//	/**
+	/**
-//	 * 模拟：
+	 * 模拟：
-//	 * 创建一个单条记录的索引，并对其进行搜索
+	 * 创建一个单条记录的索引，并对其进行搜索
-//	 * @param args
+	 * @param args
-//	 */
+	 */
-//	public static void main(String[] args){
+	public static void main(String[] args){
-//		//Lucene Document的域名
+		//Lucene Document的域名
-//		String fieldName = "text";
+		String fieldName = "text";
-//		 //检索内容
+		 //检索内容
-//		String text = "IK Analyzer是一个结合词典分词和文法分词的中文分词开源工具包。它使用了全新的正向迭代最细粒度切分算法。";
+		String text = "IK Analyzer是一个结合词典分词和文法分词的中文分词开源工具包。它使用了全新的正向迭代最细粒度切分算法。";
-//
-//		//实例化IKAnalyzer分词器
+		//实例化IKAnalyzer分词器
-//		Analyzer analyzer = new IKAnalyzer(true);
+		Analyzer analyzer = new IKAnalyzer(true);
-//
-//		Directory directory = null;
+		Directory directory = null;
-//		IndexWriter iwriter = null;
+		IndexWriter iwriter = null;
-//		IndexReader ireader = null;
+		IndexReader ireader = null;
-//		IndexSearcher isearcher = null;
+		IndexSearcher isearcher = null;
-//		try {
+		try {
-//			//建立内存索引对象
+			//建立内存索引对象
-//			directory = new RAMDirectory();
+			directory = new RAMDirectory();	 
-//
-//			//配置IndexWriterConfig
+			//配置IndexWriterConfig
-//			IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_40 , analyzer);
+			IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_40 , analyzer);
-//			iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
+			iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
-//			iwriter = new IndexWriter(directory , iwConfig);
+			iwriter = new IndexWriter(directory , iwConfig);
-//			//写入索引
+			//写入索引
-//			Document doc = new Document();
+			Document doc = new Document();
-//			doc.add(new StringField("ID", "10000", Field.Store.YES));
+			doc.add(new StringField("ID", "10000", Field.Store.YES));
-//			doc.add(new TextField(fieldName, text, Field.Store.YES));
+			doc.add(new TextField(fieldName, text, Field.Store.YES));
-//			iwriter.addDocument(doc);
+			iwriter.addDocument(doc);
-//			iwriter.close();
+			iwriter.close();
-//
-//
-//			//搜索过程**********************************
+			//搜索过程**********************************
-//		    //实例化搜索器
+		    //实例化搜索器   
-//			ireader = DirectoryReader.open(directory);
+			ireader = DirectoryReader.open(directory);
-//			isearcher = new IndexSearcher(ireader);
+			isearcher = new IndexSearcher(ireader);			
-//
-//			String keyword = "中文分词工具包";
+			String keyword = "中文分词工具包";			
-//			//使用QueryParser查询分析器构造Query对象
+			//使用QueryParser查询分析器构造Query对象
-//			QueryParser qp = new QueryParser(Version.LUCENE_40, fieldName,  analyzer);
+			QueryParser qp = new QueryParser(Version.LUCENE_40, fieldName,  analyzer);
-//			qp.setDefaultOperator(QueryParser.AND_OPERATOR);
+			qp.setDefaultOperator(QueryParser.AND_OPERATOR);
-//			Query query = qp.parse(keyword);
+			Query query = qp.parse(keyword);
-//			System.out.println("Query = " + query);
+			System.out.println("Query = " + query);
-//
-//			//搜索相似度最高的5条记录
+			//搜索相似度最高的5条记录
-//			TopDocs topDocs = isearcher.search(query , 5);
+			TopDocs topDocs = isearcher.search(query , 5);
-//			System.out.println("命中：" + topDocs.totalHits);
+			System.out.println("命中：" + topDocs.totalHits);
-//			//输出结果
+			//输出结果
-//			ScoreDoc[] scoreDocs = topDocs.scoreDocs;
+			ScoreDoc[] scoreDocs = topDocs.scoreDocs;
-//			for (int i = 0; i < topDocs.totalHits; i++){
+			for (int i = 0; i < topDocs.totalHits; i++){
-//				Document targetDoc = isearcher.doc(scoreDocs[i].doc);
+				Document targetDoc = isearcher.doc(scoreDocs[i].doc);
-//				System.out.println("内容：" + targetDoc.toString());
+				System.out.println("内容：" + targetDoc.toString());
-//			}
+			}			
-//
-//		} catch (CorruptIndexException e) {
+		} catch (CorruptIndexException e) {
-//			e.printStackTrace();
+			e.printStackTrace();
-//		} catch (LockObtainFailedException e) {
+		} catch (LockObtainFailedException e) {
-//			e.printStackTrace();
+			e.printStackTrace();
-//		} catch (IOException e) {
+		} catch (IOException e) {
-//			e.printStackTrace();
+			e.printStackTrace();
-//		} catch (ParseException e) {
+		} catch (ParseException e) {
-//			e.printStackTrace();
+			e.printStackTrace();
-//		} finally{
+		} finally{
-//			if(ireader != null){
+			if(ireader != null){
-//				try {
+				try {
-//					ireader.close();
+					ireader.close();
-//				} catch (IOException e) {
+				} catch (IOException e) {
-//					e.printStackTrace();
+					e.printStackTrace();
-//				}
+				}
-//			}
+			}
-//			if(directory != null){
+			if(directory != null){
-//				try {
+				try {
-//					directory.close();
+					directory.close();
-//				} catch (IOException e) {
+				} catch (IOException e) {
-//					e.printStackTrace();
+					e.printStackTrace();
-//				}
+				}
-//			}
+			}
-//		}
+		}
-//	}
+	}
-//}
+}