IKAnalyzer.java 2.3 KB
Newer Older
weixin_43283383's avatar
weixin_43283383 已提交
1
/**
W
wangweihua 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
 * IK 中文分词  版本 5.0.1
 * IK Analyzer release 5.0.1
 * 
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * 源代码由林良益(linliangyi2005@gmail.com)提供
 * 版权声明 2012,乌龙茶工作室
 * provided by Linliangyi and copyright 2012 by Oolong studio
weixin_43283383's avatar
weixin_43283383 已提交
23 24 25 26
 * 
 */
package org.wltea.analyzer.lucene;

W
wangweihua 已提交
27 28
import java.io.Reader;

weixin_43283383's avatar
weixin_43283383 已提交
29
import org.apache.lucene.analysis.Analyzer;
W
wangweihua 已提交
30
import org.apache.lucene.analysis.Tokenizer;
weixin_43283383's avatar
weixin_43283383 已提交
31 32 33
import org.elasticsearch.common.settings.Settings;
import org.wltea.analyzer.dic.Dictionary;

W
wangweihua 已提交
34 35 36 37 38
/**
 * IK分词器,Lucene Analyzer接口实现
 * 兼容Lucene 4.0版本
 */
public final class IKAnalyzer extends Analyzer{
weixin_43283383's avatar
weixin_43283383 已提交
39
	
W
wangweihua 已提交
40 41 42 43 44
	private boolean useSmart;
	
	public boolean useSmart() {
		return useSmart;
	}
weixin_43283383's avatar
weixin_43283383 已提交
45

W
wangweihua 已提交
46 47 48 49 50 51 52 53 54 55
	public void setUseSmart(boolean useSmart) {
		this.useSmart = useSmart;
	}

	/**
	 * IK分词器Lucene  Analyzer接口实现类
	 * 
	 * 默认细粒度切分算法
	 */
	public IKAnalyzer(){
weixin_43283383's avatar
weixin_43283383 已提交
56 57 58
		this(false);
	}
	
W
wangweihua 已提交
59 60 61 62 63 64
	/**
	 * IK分词器Lucene Analyzer接口实现类
	 * 
	 * @param useSmart 当为true时,分词器进行智能切分
	 */
	public IKAnalyzer(boolean useSmart){
weixin_43283383's avatar
weixin_43283383 已提交
65
		super();
W
wangweihua 已提交
66
		this.useSmart = useSmart;
weixin_43283383's avatar
weixin_43283383 已提交
67 68
	}

weixin_43283383's avatar
weixin_43283383 已提交
69
    Settings settings;
70

weixin_43283383's avatar
weixin_43283383 已提交
71 72 73
    public IKAnalyzer(Settings indexSetting,Settings settings) {
        super();
        this.settings=settings;
weixin_43283383's avatar
weixin_43283383 已提交
74 75
    }

W
wangweihua 已提交
76 77 78 79 80
	/**
	 * 重载Analyzer接口,构造分词组件
	 */
	@Override
	protected TokenStreamComponents createComponents(String fieldName, final Reader in) {
weixin_43283383's avatar
weixin_43283383 已提交
81
		Tokenizer _IKTokenizer = new IKTokenizer(in , settings);
W
wangweihua 已提交
82
		return new TokenStreamComponents(_IKTokenizer);
weixin_43283383's avatar
weixin_43283383 已提交
83 84 85
	}

}