Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
iiopsd
elasticsearch-analysis-ik
提交
2dfe7637
E
elasticsearch-analysis-ik
项目概览
iiopsd
/
elasticsearch-analysis-ik
与 Fork 源项目一致
从无法访问的项目Fork
通知
4
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
E
elasticsearch-analysis-ik
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
2dfe7637
编写于
4月 10, 2016
作者:
weixin_43283383
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'DevFactory-release/use-logger-to-log-exceptions-fix-1'
上级
3bd14902
ca2bfe57
变更
6
展开全部
隐藏空白更改
内联
并排
Showing
6 changed file
with
378 addition
and
369 deletion
+378
-369
src/main/java/org/wltea/analyzer/dic/Dictionary.java
src/main/java/org/wltea/analyzer/dic/Dictionary.java
+274
-273
src/main/java/org/wltea/analyzer/dic/Monitor.java
src/main/java/org/wltea/analyzer/dic/Monitor.java
+18
-14
src/main/java/org/wltea/analyzer/help/Sleep.java
src/main/java/org/wltea/analyzer/help/Sleep.java
+10
-10
src/main/java/org/wltea/analyzer/query/SWMCQueryBuilder.java
src/main/java/org/wltea/analyzer/query/SWMCQueryBuilder.java
+20
-16
src/main/java/org/wltea/analyzer/sample/IKAnalzyerDemo.java
src/main/java/org/wltea/analyzer/sample/IKAnalzyerDemo.java
+26
-26
src/main/java/org/wltea/analyzer/sample/LuceneIndexAndSearchDemo.java
...a/org/wltea/analyzer/sample/LuceneIndexAndSearchDemo.java
+30
-30
未找到文件。
src/main/java/org/wltea/analyzer/dic/Dictionary.java
浏览文件 @
2dfe7637
此差异已折叠。
点击以展开。
src/main/java/org/wltea/analyzer/dic/Monitor.java
浏览文件 @
2dfe7637
...
...
@@ -7,9 +7,13 @@ import org.apache.http.client.methods.CloseableHttpResponse;
import
org.apache.http.client.methods.HttpHead
;
import
org.apache.http.impl.client.CloseableHttpClient
;
import
org.apache.http.impl.client.HttpClients
;
import
org.elasticsearch.common.logging.ESLogger
;
import
org.elasticsearch.common.logging.Loggers
;
public
class
Monitor
implements
Runnable
{
public
static
ESLogger
logger
=
Loggers
.
getLogger
(
"ik-analyzer"
);
private
static
CloseableHttpClient
httpclient
=
HttpClients
.
createDefault
();
/*
* 上次更改时间
...
...
@@ -19,12 +23,12 @@ public class Monitor implements Runnable {
* 资源属性
*/
private
String
eTags
;
/*
* 请求地址
*/
private
String
location
;
private
String
location
;
public
Monitor
(
String
location
)
{
this
.
location
=
location
;
this
.
last_modified
=
null
;
...
...
@@ -38,16 +42,16 @@ public class Monitor implements Runnable {
* ④如果有变化,重新加载词典
* ⑤休眠1min,返回第①步
*/
public
void
run
()
{
//超时设置
RequestConfig
rc
=
RequestConfig
.
custom
().
setConnectionRequestTimeout
(
10
*
1000
)
.
setConnectTimeout
(
10
*
1000
).
setSocketTimeout
(
15
*
1000
).
build
();
HttpHead
head
=
new
HttpHead
(
location
);
head
.
setConfig
(
rc
);
//设置请求头
if
(
last_modified
!=
null
)
{
head
.
setHeader
(
"If-Modified-Since"
,
last_modified
);
...
...
@@ -55,17 +59,17 @@ public class Monitor implements Runnable {
if
(
eTags
!=
null
)
{
head
.
setHeader
(
"If-None-Match"
,
eTags
);
}
CloseableHttpResponse
response
=
null
;
try
{
response
=
httpclient
.
execute
(
head
);
//返回200 才做操作
if
(
response
.
getStatusLine
().
getStatusCode
()==
200
){
if
(!
response
.
getLastHeader
(
"Last-Modified"
).
getValue
().
equalsIgnoreCase
(
last_modified
)
||!
response
.
getLastHeader
(
"ETag"
).
getValue
().
equalsIgnoreCase
(
eTags
))
{
||!
response
.
getLastHeader
(
"ETag"
).
getValue
().
equalsIgnoreCase
(
eTags
))
{
// 远程词库有更新,需要重新加载词典,并修改last_modified,eTags
Dictionary
.
getSingleton
().
reLoadMainDict
();
...
...
@@ -87,9 +91,9 @@ public class Monitor implements Runnable {
response
.
close
();
}
}
catch
(
IOException
e
)
{
e
.
printStackTrace
(
);
logger
.
error
(
e
.
getMessage
(),
e
);
}
}
}
}
}
\ No newline at end of file
src/main/java/org/wltea/analyzer/help/Sleep.java
浏览文件 @
2dfe7637
...
...
@@ -5,8 +5,8 @@ import org.elasticsearch.common.logging.Loggers;
public
class
Sleep
{
public
static
final
ESLogger
logger
=
Loggers
.
getLogger
(
"ik-analyzer"
);
public
static
ESLogger
logger
=
Loggers
.
getLogger
(
"ik-analyzer"
);
public
enum
Type
{
MSEC
,
SEC
,
MIN
,
HOUR
};
public
static
void
sleep
(
Type
type
,
int
num
){
try
{
...
...
@@ -15,22 +15,22 @@ public class Sleep {
Thread
.
sleep
(
num
);
return
;
case
SEC:
Thread
.
sleep
(
num
*
1000
L
);
Thread
.
sleep
(
num
*
1000
);
return
;
case
MIN:
Thread
.
sleep
(
num
*
60
*
1000
L
);
Thread
.
sleep
(
num
*
60
*
1000
);
return
;
case
HOUR:
Thread
.
sleep
(
num
*
60
*
60
*
1000
L
);
Thread
.
sleep
(
num
*
60
*
60
*
1000
);
return
;
default
:
logger
.
error
(
"输入类型错误,应为MSEC,SEC,MIN,HOUR之一"
);
System
.
err
.
println
(
"输入类型错误,应为MSEC,SEC,MIN,HOUR之一"
);
return
;
}
}
catch
(
InterruptedException
e
)
{
e
.
printStackTrace
(
);
logger
.
error
(
e
.
getMessage
(),
e
);
}
}
}
}
\ No newline at end of file
src/main/java/org/wltea/analyzer/query/SWMCQueryBuilder.java
浏览文件 @
2dfe7637
/**
* IK 中文分词 版本 5.0
* IK Analyzer release 5.0
*
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
...
...
@@ -20,7 +20,7 @@
* 源代码由林良益(linliangyi2005@gmail.com)提供
* 版权声明 2012,乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio
*
*
*/
package
org.wltea.analyzer.query
;
...
...
@@ -34,6 +34,8 @@ import org.apache.lucene.queryparser.classic.ParseException;
import
org.apache.lucene.queryparser.classic.QueryParser
;
import
org.apache.lucene.search.Query
;
import
org.apache.lucene.util.Version
;
import
org.elasticsearch.common.logging.ESLogger
;
import
org.elasticsearch.common.logging.Loggers
;
import
org.wltea.analyzer.core.IKSegmenter
;
import
org.wltea.analyzer.core.Lexeme
;
...
...
@@ -45,6 +47,8 @@ import org.wltea.analyzer.core.Lexeme;
*/
public
class
SWMCQueryBuilder
{
public
static
ESLogger
logger
=
Loggers
.
getLogger
(
"ik-analyzer"
);
/**
* 生成SWMCQuery
* @param fieldName
...
...
@@ -62,7 +66,7 @@ public class SWMCQueryBuilder {
Query
_SWMCQuery
=
getSWMCQuery
(
fieldName
,
lexemes
,
quickMode
);
return
_SWMCQuery
;
}
/**
* 分词切分,并返回结链表
* @param keywords
...
...
@@ -78,16 +82,16 @@ public class SWMCQueryBuilder {
lexemes
.
add
(
l
);
}
}
catch
(
IOException
e
){
e
.
printStackTrace
(
);
logger
.
error
(
e
.
getMessage
(),
e
);
}
return
lexemes
;
}
/**
* 根据分词结果生成SWMC搜索
* @param fieldName
// * @param pathOption
// * @param pathOption
* @param quickMode
* @return
*/
...
...
@@ -100,7 +104,7 @@ public class SWMCQueryBuilder {
int
lastLexemeLength
=
0
;
//记录最后词元结束位置
int
lastLexemeEnd
=
-
1
;
int
shortCount
=
0
;
int
totalCount
=
0
;
for
(
Lexeme
l
:
lexemes
){
...
...
@@ -110,15 +114,15 @@ public class SWMCQueryBuilder {
keywordBuffer_Short
.
append
(
' '
).
append
(
l
.
getLexemeText
());
shortCount
+=
l
.
getLength
();
}
if
(
lastLexemeLength
==
0
){
keywordBuffer
.
append
(
l
.
getLexemeText
());
keywordBuffer
.
append
(
l
.
getLexemeText
());
}
else
if
(
lastLexemeLength
==
1
&&
l
.
getLength
()
==
1
&&
lastLexemeEnd
==
l
.
getBeginPosition
()){
//单字位置相邻,长度为一,合并)
keywordBuffer
.
append
(
l
.
getLexemeText
());
}
else
{
keywordBuffer
.
append
(
' '
).
append
(
l
.
getLexemeText
());
}
lastLexemeLength
=
l
.
getLength
();
lastLexemeEnd
=
l
.
getEndPosition
();
...
...
@@ -128,16 +132,16 @@ public class SWMCQueryBuilder {
QueryParser
qp
=
new
QueryParser
(
fieldName
,
new
StandardAnalyzer
());
qp
.
setDefaultOperator
(
QueryParser
.
AND_OPERATOR
);
qp
.
setAutoGeneratePhraseQueries
(
true
);
if
(
quickMode
&&
(
shortCount
*
1.0f
/
totalCount
)
>
0.5f
){
try
{
//System.out.println(keywordBuffer.toString());
Query
q
=
qp
.
parse
(
keywordBuffer_Short
.
toString
());
return
q
;
}
catch
(
ParseException
e
)
{
e
.
printStackTrace
(
);
logger
.
error
(
e
.
getMessage
(),
e
);
}
}
else
{
if
(
keywordBuffer
.
length
()
>
0
){
try
{
...
...
@@ -145,10 +149,10 @@ public class SWMCQueryBuilder {
Query
q
=
qp
.
parse
(
keywordBuffer
.
toString
());
return
q
;
}
catch
(
ParseException
e
)
{
e
.
printStackTrace
(
);
logger
.
error
(
e
.
getMessage
(),
e
);
}
}
}
return
null
;
}
}
}
\ No newline at end of file
src/main/java/org/wltea/analyzer/sample/IKAnalzyerDemo.java
浏览文件 @
2dfe7637
/**
* IK 中文分词 版本 5.0.1
* IK Analyzer release 5.0.1
*
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
...
...
@@ -20,8 +20,8 @@
* 源代码由林良益(linliangyi2005@gmail.com)提供
* 版权声明 2012,乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio
*
*
*
*
*/
package
org.wltea.analyzer.sample
;
...
...
@@ -44,47 +44,47 @@ import org.wltea.analyzer.lucene.IKAnalyzer;
*/
public
class
IKAnalzyerDemo
{
public
static
final
ESLogger
logger
=
Loggers
.
getLogger
(
"ik-analyzer"
);
public
static
ESLogger
logger
=
Loggers
.
getLogger
(
"ik-analyzer"
);
public
static
void
main
(
String
[]
args
){
//构建IK分词器,使用smart分词模式
Analyzer
analyzer
=
new
IKAnalyzer
(
true
);
//获取Lucene的TokenStream对象
TokenStream
ts
=
null
;
TokenStream
ts
=
null
;
try
{
ts
=
analyzer
.
tokenStream
(
"myfield"
,
new
StringReader
(
"WORLD ,.. html DATA</html>HELLO"
));
// ts = analyzer.tokenStream("myfield", new StringReader("这是一个中文分词的例子,你可以直接运行它!IKAnalyer can analysis english text too"));
//获取词元位置属性
OffsetAttribute
offset
=
ts
.
addAttribute
(
OffsetAttribute
.
class
);
//获取词元文本属性
CharTermAttribute
term
=
ts
.
addAttribute
(
CharTermAttribute
.
class
);
//获取词元文本属性
TypeAttribute
type
=
ts
.
addAttribute
(
TypeAttribute
.
class
);
//重置TokenStream(重置StringReader)
ts
.
reset
();
OffsetAttribute
offset
=
ts
.
addAttribute
(
OffsetAttribute
.
class
);
//获取词元文本属性
CharTermAttribute
term
=
ts
.
addAttribute
(
CharTermAttribute
.
class
);
//获取词元文本属性
TypeAttribute
type
=
ts
.
addAttribute
(
TypeAttribute
.
class
);
//重置TokenStream(重置StringReader)
ts
.
reset
();
//迭代获取分词结果
while
(
ts
.
incrementToken
())
{
logger
.
info
(
offset
.
startOffset
()
+
" - "
+
offset
.
endOffset
()
+
" : "
+
term
.
toString
()
+
" | "
+
type
.
type
());
System
.
out
.
println
(
offset
.
startOffset
()
+
" - "
+
offset
.
endOffset
()
+
" : "
+
term
.
toString
()
+
" | "
+
type
.
type
());
}
//关闭TokenStream(关闭StringReader)
ts
.
end
();
// Perform end-of-stream operations, e.g. set the final offset.
}
catch
(
IOException
e
)
{
e
.
printStackTrace
(
);
logger
.
error
(
e
.
getMessage
(),
e
);
}
finally
{
//释放TokenStream的所有资源
if
(
ts
!=
null
){
try
{
ts
.
close
();
}
catch
(
IOException
e
)
{
e
.
printStackTrace
(
);
}
try
{
ts
.
close
();
}
catch
(
IOException
e
)
{
logger
.
error
(
e
.
getMessage
(),
e
);
}
}
}
}
}
}
}
\ No newline at end of file
src/main/java/org/wltea/analyzer/sample/LuceneIndexAndSearchDemo.java
浏览文件 @
2dfe7637
/**
* IK 中文分词 版本 5.0
* IK Analyzer release 5.0
*
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
...
...
@@ -20,8 +20,8 @@
* 源代码由林良益(linliangyi2005@gmail.com)提供
* 版权声明 2012,乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio
*
*
*
*
*/
package
org.wltea.analyzer.sample
;
...
...
@@ -58,14 +58,14 @@ import org.wltea.analyzer.lucene.IKAnalyzer;
/**
* 使用IKAnalyzer进行Lucene索引和查询的演示
* 2012-3-2
*
*
* 以下是结合Lucene4.0 API的写法
*
*/
public
class
LuceneIndexAndSearchDemo
{
public
static
final
ESLogger
logger
=
Loggers
.
getLogger
(
"ik-analyzer"
);
public
static
ESLogger
logger
=
Loggers
.
getLogger
(
"ik-analyzer"
);
/**
* 模拟:
* 创建一个单条记录的索引,并对其进行搜索
...
...
@@ -74,20 +74,20 @@ public class LuceneIndexAndSearchDemo {
public
static
void
main
(
String
[]
args
){
//Lucene Document的域名
String
fieldName
=
"text"
;
//检索内容
//检索内容
String
text
=
"IK Analyzer是一个结合词典分词和文法分词的中文分词开源工具包。它使用了全新的正向迭代最细粒度切分算法。"
;
//实例化IKAnalyzer分词器
Analyzer
analyzer
=
new
IKAnalyzer
(
true
);
Directory
directory
=
null
;
IndexWriter
iwriter
=
null
;
IndexReader
ireader
=
null
;
IndexSearcher
isearcher
=
null
;
try
{
//建立内存索引对象
directory
=
new
RAMDirectory
();
directory
=
new
RAMDirectory
();
//配置IndexWriterConfig
IndexWriterConfig
iwConfig
=
new
IndexWriterConfig
(
analyzer
);
iwConfig
.
setOpenMode
(
OpenMode
.
CREATE_OR_APPEND
);
...
...
@@ -98,53 +98,53 @@ public class LuceneIndexAndSearchDemo {
doc
.
add
(
new
TextField
(
fieldName
,
text
,
Field
.
Store
.
YES
));
iwriter
.
addDocument
(
doc
);
iwriter
.
close
();
//搜索过程**********************************
//实例化搜索器
//实例化搜索器
ireader
=
DirectoryReader
.
open
(
directory
);
isearcher
=
new
IndexSearcher
(
ireader
);
String
keyword
=
"中文分词工具包"
;
isearcher
=
new
IndexSearcher
(
ireader
);
String
keyword
=
"中文分词工具包"
;
//使用QueryParser查询分析器构造Query对象
QueryParser
qp
=
new
QueryParser
(
fieldName
,
analyzer
);
qp
.
setDefaultOperator
(
QueryParser
.
AND_OPERATOR
);
Query
query
=
qp
.
parse
(
keyword
);
logger
.
info
(
"Query = "
+
query
);
System
.
out
.
println
(
"Query = "
+
query
);
//搜索相似度最高的5条记录
TopDocs
topDocs
=
isearcher
.
search
(
query
,
5
);
logger
.
info
(
"命中:"
+
topDocs
.
totalHits
);
System
.
out
.
println
(
"命中:"
+
topDocs
.
totalHits
);
//输出结果
ScoreDoc
[]
scoreDocs
=
topDocs
.
scoreDocs
;
for
(
int
i
=
0
;
i
<
topDocs
.
totalHits
;
i
++){
Document
targetDoc
=
isearcher
.
doc
(
scoreDocs
[
i
].
doc
);
logger
.
info
(
"内容:"
+
targetDoc
.
toString
());
}
System
.
out
.
println
(
"内容:"
+
targetDoc
.
toString
());
}
}
catch
(
CorruptIndexException
e
)
{
e
.
printStackTrace
(
);
logger
.
error
(
e
.
getMessage
(),
e
);
}
catch
(
LockObtainFailedException
e
)
{
e
.
printStackTrace
(
);
logger
.
error
(
e
.
getMessage
(),
e
);
}
catch
(
IOException
e
)
{
e
.
printStackTrace
(
);
logger
.
error
(
e
.
getMessage
(),
e
);
}
catch
(
ParseException
e
)
{
e
.
printStackTrace
(
);
logger
.
error
(
e
.
getMessage
(),
e
);
}
finally
{
if
(
ireader
!=
null
){
try
{
ireader
.
close
();
}
catch
(
IOException
e
)
{
e
.
printStackTrace
(
);
logger
.
error
(
e
.
getMessage
(),
e
);
}
}
if
(
directory
!=
null
){
try
{
directory
.
close
();
}
catch
(
IOException
e
)
{
e
.
printStackTrace
(
);
logger
.
error
(
e
.
getMessage
(),
e
);
}
}
}
}
}
}
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录