提交 ca2bfe57 编写于 作者: weixin_43283383's avatar weixin_43283383

merge code

......@@ -7,9 +7,13 @@ import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpHead;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.Loggers;
public class Monitor implements Runnable {
public static ESLogger logger= Loggers.getLogger("ik-analyzer");
private static CloseableHttpClient httpclient = HttpClients.createDefault();
/*
* 上次更改时间
......@@ -19,12 +23,12 @@ public class Monitor implements Runnable {
* 资源属性
*/
private String eTags;
/*
* 请求地址
*/
private String location;
private String location;
public Monitor(String location) {
this.location = location;
this.last_modified = null;
......@@ -38,16 +42,16 @@ public class Monitor implements Runnable {
* ④如果有变化,重新加载词典
* ⑤休眠1min,返回第①步
*/
public void run() {
//超时设置
RequestConfig rc = RequestConfig.custom().setConnectionRequestTimeout(10*1000)
.setConnectTimeout(10*1000).setSocketTimeout(15*1000).build();
HttpHead head = new HttpHead(location);
head.setConfig(rc);
//设置请求头
if (last_modified != null) {
head.setHeader("If-Modified-Since", last_modified);
......@@ -55,17 +59,17 @@ public class Monitor implements Runnable {
if (eTags != null) {
head.setHeader("If-None-Match", eTags);
}
CloseableHttpResponse response = null;
try {
response = httpclient.execute(head);
//返回200 才做操作
if(response.getStatusLine().getStatusCode()==200){
if (!response.getLastHeader("Last-Modified").getValue().equalsIgnoreCase(last_modified)
||!response.getLastHeader("ETag").getValue().equalsIgnoreCase(eTags)) {
||!response.getLastHeader("ETag").getValue().equalsIgnoreCase(eTags)) {
// 远程词库有更新,需要重新加载词典,并修改last_modified,eTags
Dictionary.getSingleton().reLoadMainDict();
......@@ -87,9 +91,9 @@ public class Monitor implements Runnable {
response.close();
}
} catch (IOException e) {
e.printStackTrace();
logger.error(e.getMessage(), e);
}
}
}
}
}
\ No newline at end of file
......@@ -5,8 +5,8 @@ import org.elasticsearch.common.logging.Loggers;
public class Sleep {
public static final ESLogger logger= Loggers.getLogger("ik-analyzer");
public static ESLogger logger= Loggers.getLogger("ik-analyzer");
public enum Type{MSEC,SEC,MIN,HOUR};
public static void sleep(Type type,int num){
try {
......@@ -15,22 +15,22 @@ public class Sleep {
Thread.sleep(num);
return;
case SEC:
Thread.sleep(num*1000L);
Thread.sleep(num*1000);
return;
case MIN:
Thread.sleep(num*60*1000L);
Thread.sleep(num*60*1000);
return;
case HOUR:
Thread.sleep(num*60*60*1000L);
Thread.sleep(num*60*60*1000);
return;
default:
logger.error("输入类型错误,应为MSEC,SEC,MIN,HOUR之一");
System.err.println("输入类型错误,应为MSEC,SEC,MIN,HOUR之一");
return;
}
} catch (InterruptedException e) {
e.printStackTrace();
logger.error(e.getMessage(), e);
}
}
}
}
\ No newline at end of file
/**
* IK 中文分词 版本 5.0
* IK Analyzer release 5.0
*
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
......@@ -20,7 +20,7 @@
* 源代码由林良益(linliangyi2005@gmail.com)提供
* 版权声明 2012,乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio
*
*
*/
package org.wltea.analyzer.query;
......@@ -34,6 +34,8 @@ import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.Version;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.Loggers;
import org.wltea.analyzer.core.IKSegmenter;
import org.wltea.analyzer.core.Lexeme;
......@@ -45,6 +47,8 @@ import org.wltea.analyzer.core.Lexeme;
*/
public class SWMCQueryBuilder {
public static ESLogger logger= Loggers.getLogger("ik-analyzer");
/**
* 生成SWMCQuery
* @param fieldName
......@@ -62,7 +66,7 @@ public class SWMCQueryBuilder {
Query _SWMCQuery = getSWMCQuery(fieldName , lexemes , quickMode);
return _SWMCQuery;
}
/**
* 分词切分,并返回结链表
* @param keywords
......@@ -78,16 +82,16 @@ public class SWMCQueryBuilder {
lexemes.add(l);
}
}catch(IOException e){
e.printStackTrace();
logger.error(e.getMessage(), e);
}
return lexemes;
}
/**
* 根据分词结果生成SWMC搜索
* @param fieldName
// * @param pathOption
// * @param pathOption
* @param quickMode
* @return
*/
......@@ -100,7 +104,7 @@ public class SWMCQueryBuilder {
int lastLexemeLength = 0;
//记录最后词元结束位置
int lastLexemeEnd = -1;
int shortCount = 0;
int totalCount = 0;
for(Lexeme l : lexemes){
......@@ -110,15 +114,15 @@ public class SWMCQueryBuilder {
keywordBuffer_Short.append(' ').append(l.getLexemeText());
shortCount += l.getLength();
}
if(lastLexemeLength == 0){
keywordBuffer.append(l.getLexemeText());
keywordBuffer.append(l.getLexemeText());
}else if(lastLexemeLength == 1 && l.getLength() == 1
&& lastLexemeEnd == l.getBeginPosition()){//单字位置相邻,长度为一,合并)
keywordBuffer.append(l.getLexemeText());
}else{
keywordBuffer.append(' ').append(l.getLexemeText());
}
lastLexemeLength = l.getLength();
lastLexemeEnd = l.getEndPosition();
......@@ -128,16 +132,16 @@ public class SWMCQueryBuilder {
QueryParser qp = new QueryParser(fieldName, new StandardAnalyzer());
qp.setDefaultOperator(QueryParser.AND_OPERATOR);
qp.setAutoGeneratePhraseQueries(true);
if(quickMode && (shortCount * 1.0f / totalCount) > 0.5f){
try {
//System.out.println(keywordBuffer.toString());
Query q = qp.parse(keywordBuffer_Short.toString());
return q;
} catch (ParseException e) {
e.printStackTrace();
logger.error(e.getMessage(), e);
}
}else{
if(keywordBuffer.length() > 0){
try {
......@@ -145,10 +149,10 @@ public class SWMCQueryBuilder {
Query q = qp.parse(keywordBuffer.toString());
return q;
} catch (ParseException e) {
e.printStackTrace();
logger.error(e.getMessage(), e);
}
}
}
return null;
}
}
}
\ No newline at end of file
/**
* IK 中文分词 版本 5.0.1
* IK Analyzer release 5.0.1
*
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
......@@ -20,8 +20,8 @@
* 源代码由林良益(linliangyi2005@gmail.com)提供
* 版权声明 2012,乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio
*
*
*
*
*/
package org.wltea.analyzer.sample;
......@@ -44,47 +44,47 @@ import org.wltea.analyzer.lucene.IKAnalyzer;
*/
public class IKAnalzyerDemo {
public static final ESLogger logger= Loggers.getLogger("ik-analyzer");
public static ESLogger logger= Loggers.getLogger("ik-analyzer");
public static void main(String[] args){
//构建IK分词器,使用smart分词模式
Analyzer analyzer = new IKAnalyzer(true);
//获取Lucene的TokenStream对象
TokenStream ts = null;
TokenStream ts = null;
try {
ts = analyzer.tokenStream("myfield", new StringReader("WORLD ,.. html DATA</html>HELLO"));
// ts = analyzer.tokenStream("myfield", new StringReader("这是一个中文分词的例子,你可以直接运行它!IKAnalyer can analysis english text too"));
//获取词元位置属性
OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
//获取词元文本属性
CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
//获取词元文本属性
TypeAttribute type = ts.addAttribute(TypeAttribute.class);
//重置TokenStream(重置StringReader)
ts.reset();
OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
//获取词元文本属性
CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
//获取词元文本属性
TypeAttribute type = ts.addAttribute(TypeAttribute.class);
//重置TokenStream(重置StringReader)
ts.reset();
//迭代获取分词结果
while (ts.incrementToken()) {
logger.info(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString() + " | " + type.type());
System.out.println(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString() + " | " + type.type());
}
//关闭TokenStream(关闭StringReader)
ts.end(); // Perform end-of-stream operations, e.g. set the final offset.
} catch (IOException e) {
e.printStackTrace();
logger.error(e.getMessage(), e);
} finally {
//释放TokenStream的所有资源
if(ts != null){
try {
ts.close();
} catch (IOException e) {
e.printStackTrace();
}
try {
ts.close();
} catch (IOException e) {
logger.error(e.getMessage(), e);
}
}
}
}
}
}
}
\ No newline at end of file
/**
* IK 中文分词 版本 5.0
* IK Analyzer release 5.0
*
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
......@@ -20,8 +20,8 @@
* 源代码由林良益(linliangyi2005@gmail.com)提供
* 版权声明 2012,乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio
*
*
*
*
*/
package org.wltea.analyzer.sample;
......@@ -58,14 +58,14 @@ import org.wltea.analyzer.lucene.IKAnalyzer;
/**
* 使用IKAnalyzer进行Lucene索引和查询的演示
* 2012-3-2
*
*
* 以下是结合Lucene4.0 API的写法
*
*/
public class LuceneIndexAndSearchDemo {
public static final ESLogger logger= Loggers.getLogger("ik-analyzer");
public static ESLogger logger= Loggers.getLogger("ik-analyzer");
/**
* 模拟:
* 创建一个单条记录的索引,并对其进行搜索
......@@ -74,20 +74,20 @@ public class LuceneIndexAndSearchDemo {
public static void main(String[] args){
//Lucene Document的域名
String fieldName = "text";
//检索内容
//检索内容
String text = "IK Analyzer是一个结合词典分词和文法分词的中文分词开源工具包。它使用了全新的正向迭代最细粒度切分算法。";
//实例化IKAnalyzer分词器
Analyzer analyzer = new IKAnalyzer(true);
Directory directory = null;
IndexWriter iwriter = null;
IndexReader ireader = null;
IndexSearcher isearcher = null;
try {
//建立内存索引对象
directory = new RAMDirectory();
directory = new RAMDirectory();
//配置IndexWriterConfig
IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer);
iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
......@@ -98,53 +98,53 @@ public class LuceneIndexAndSearchDemo {
doc.add(new TextField(fieldName, text, Field.Store.YES));
iwriter.addDocument(doc);
iwriter.close();
//搜索过程**********************************
//实例化搜索器
//实例化搜索器
ireader = DirectoryReader.open(directory);
isearcher = new IndexSearcher(ireader);
String keyword = "中文分词工具包";
isearcher = new IndexSearcher(ireader);
String keyword = "中文分词工具包";
//使用QueryParser查询分析器构造Query对象
QueryParser qp = new QueryParser(fieldName, analyzer);
qp.setDefaultOperator(QueryParser.AND_OPERATOR);
Query query = qp.parse(keyword);
logger.info("Query = " + query);
System.out.println("Query = " + query);
//搜索相似度最高的5条记录
TopDocs topDocs = isearcher.search(query , 5);
logger.info("命中:" + topDocs.totalHits);
System.out.println("命中:" + topDocs.totalHits);
//输出结果
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (int i = 0; i < topDocs.totalHits; i++){
Document targetDoc = isearcher.doc(scoreDocs[i].doc);
logger.info("内容:" + targetDoc.toString());
}
System.out.println("内容:" + targetDoc.toString());
}
} catch (CorruptIndexException e) {
e.printStackTrace();
logger.error(e.getMessage(), e);
} catch (LockObtainFailedException e) {
e.printStackTrace();
logger.error(e.getMessage(), e);
} catch (IOException e) {
e.printStackTrace();
logger.error(e.getMessage(), e);
} catch (ParseException e) {
e.printStackTrace();
logger.error(e.getMessage(), e);
} finally{
if(ireader != null){
try {
ireader.close();
} catch (IOException e) {
e.printStackTrace();
logger.error(e.getMessage(), e);
}
}
if(directory != null){
try {
directory.close();
} catch (IOException e) {
e.printStackTrace();
logger.error(e.getMessage(), e);
}
}
}
}
}
}
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册