Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
南宫伊儿
elasticsearch-analysis-ik
提交
bafb724c
E
elasticsearch-analysis-ik
项目概览
南宫伊儿
/
elasticsearch-analysis-ik
与 Fork 源项目一致
从无法访问的项目Fork
通知
3
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
E
elasticsearch-analysis-ik
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
bafb724c
编写于
7月 04, 2014
作者:
weixin_43283383
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
#33 fix performance issue
上级
54fd9705
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
14 addition
and
15 deletion
+14
-15
pom.xml
pom.xml
+1
-1
src/main/java/org/wltea/analyzer/core/CJKSegmenter.java
src/main/java/org/wltea/analyzer/core/CJKSegmenter.java
+2
-2
src/main/java/org/wltea/analyzer/dic/Dictionary.java
src/main/java/org/wltea/analyzer/dic/Dictionary.java
+10
-11
src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java
src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java
+1
-1
未找到文件。
pom.xml
浏览文件 @
bafb724c
...
@@ -31,7 +31,7 @@
...
@@ -31,7 +31,7 @@
</parent>
</parent>
<properties>
<properties>
<elasticsearch.version>
1.
1.1
</elasticsearch.version>
<elasticsearch.version>
1.
0.0
</elasticsearch.version>
</properties>
</properties>
<repositories>
<repositories>
...
...
src/main/java/org/wltea/analyzer/core/CJKSegmenter.java
浏览文件 @
bafb724c
...
@@ -58,7 +58,7 @@ class CJKSegmenter implements ISegmenter {
...
@@ -58,7 +58,7 @@ class CJKSegmenter implements ISegmenter {
//处理词段队列
//处理词段队列
Hit
[]
tmpArray
=
this
.
tmpHits
.
toArray
(
new
Hit
[
this
.
tmpHits
.
size
()]);
Hit
[]
tmpArray
=
this
.
tmpHits
.
toArray
(
new
Hit
[
this
.
tmpHits
.
size
()]);
for
(
Hit
hit
:
tmpArray
){
for
(
Hit
hit
:
tmpArray
){
hit
=
Dictionary
.
getSingleton
().
matchWithHit
(
String
.
valueOf
(
context
.
getSegmentBuff
()).
toLowerCase
().
toCharArray
(),
context
.
getCursor
()
,
hit
);
hit
=
Dictionary
.
getSingleton
().
matchWithHit
(
context
.
getSegmentBuff
(),
context
.
getCursor
()
,
hit
);
if
(
hit
.
isMatch
()){
if
(
hit
.
isMatch
()){
//输出当前的词
//输出当前的词
Lexeme
newLexeme
=
new
Lexeme
(
context
.
getBufferOffset
()
,
hit
.
getBegin
()
,
context
.
getCursor
()
-
hit
.
getBegin
()
+
1
,
Lexeme
.
TYPE_CNWORD
);
Lexeme
newLexeme
=
new
Lexeme
(
context
.
getBufferOffset
()
,
hit
.
getBegin
()
,
context
.
getCursor
()
-
hit
.
getBegin
()
+
1
,
Lexeme
.
TYPE_CNWORD
);
...
@@ -77,7 +77,7 @@ class CJKSegmenter implements ISegmenter {
...
@@ -77,7 +77,7 @@ class CJKSegmenter implements ISegmenter {
//*********************************
//*********************************
//再对当前指针位置的字符进行单字匹配
//再对当前指针位置的字符进行单字匹配
Hit
singleCharHit
=
Dictionary
.
getSingleton
().
matchInMainDict
(
String
.
valueOf
(
context
.
getSegmentBuff
()).
toLowerCase
().
toCharArray
(),
context
.
getCursor
(),
1
);
Hit
singleCharHit
=
Dictionary
.
getSingleton
().
matchInMainDict
(
context
.
getSegmentBuff
(),
context
.
getCursor
(),
1
);
if
(
singleCharHit
.
isMatch
()){
//首字成词
if
(
singleCharHit
.
isMatch
()){
//首字成词
//输出当前的词
//输出当前的词
Lexeme
newLexeme
=
new
Lexeme
(
context
.
getBufferOffset
()
,
context
.
getCursor
()
,
1
,
Lexeme
.
TYPE_CNWORD
);
Lexeme
newLexeme
=
new
Lexeme
(
context
.
getBufferOffset
()
,
context
.
getCursor
()
,
1
,
Lexeme
.
TYPE_CNWORD
);
...
...
src/main/java/org/wltea/analyzer/dic/Dictionary.java
浏览文件 @
bafb724c
...
@@ -119,7 +119,7 @@ public class Dictionary {
...
@@ -119,7 +119,7 @@ public class Dictionary {
for
(
String
word
:
words
){
for
(
String
word
:
words
){
if
(
word
!=
null
)
{
if
(
word
!=
null
)
{
//批量加载词条到主内存词典中
//批量加载词条到主内存词典中
singleton
.
_MainDict
.
fillSegment
(
word
.
trim
().
to
LowerCase
().
to
CharArray
());
singleton
.
_MainDict
.
fillSegment
(
word
.
trim
().
toCharArray
());
}
}
}
}
}
}
...
@@ -133,7 +133,7 @@ public class Dictionary {
...
@@ -133,7 +133,7 @@ public class Dictionary {
for
(
String
word
:
words
){
for
(
String
word
:
words
){
if
(
word
!=
null
)
{
if
(
word
!=
null
)
{
//批量屏蔽词条
//批量屏蔽词条
singleton
.
_MainDict
.
disableSegment
(
word
.
trim
().
to
LowerCase
().
to
CharArray
());
singleton
.
_MainDict
.
disableSegment
(
word
.
trim
().
toCharArray
());
}
}
}
}
}
}
...
@@ -152,7 +152,7 @@ public class Dictionary {
...
@@ -152,7 +152,7 @@ public class Dictionary {
* @return Hit 匹配结果描述
* @return Hit 匹配结果描述
*/
*/
public
Hit
matchInMainDict
(
char
[]
charArray
,
int
begin
,
int
length
){
public
Hit
matchInMainDict
(
char
[]
charArray
,
int
begin
,
int
length
){
return
singleton
.
_MainDict
.
match
(
String
.
valueOf
(
charArray
).
toLowerCase
().
toCharArray
()
,
begin
,
length
);
return
singleton
.
_MainDict
.
match
(
charArray
,
begin
,
length
);
}
}
/**
/**
...
@@ -160,7 +160,7 @@ public class Dictionary {
...
@@ -160,7 +160,7 @@ public class Dictionary {
* @return Hit 匹配结果描述
* @return Hit 匹配结果描述
*/
*/
public
Hit
matchInQuantifierDict
(
char
[]
charArray
,
int
begin
,
int
length
){
public
Hit
matchInQuantifierDict
(
char
[]
charArray
,
int
begin
,
int
length
){
return
singleton
.
_QuantifierDict
.
match
(
String
.
valueOf
(
charArray
).
toLowerCase
().
toCharArray
()
,
begin
,
length
);
return
singleton
.
_QuantifierDict
.
match
(
charArray
,
begin
,
length
);
}
}
...
@@ -179,7 +179,7 @@ public class Dictionary {
...
@@ -179,7 +179,7 @@ public class Dictionary {
* @return boolean
* @return boolean
*/
*/
public
boolean
isStopWord
(
char
[]
charArray
,
int
begin
,
int
length
){
public
boolean
isStopWord
(
char
[]
charArray
,
int
begin
,
int
length
){
return
singleton
.
_StopWords
.
match
(
String
.
valueOf
(
charArray
).
toLowerCase
().
toCharArray
()
,
begin
,
length
).
isMatch
();
return
singleton
.
_StopWords
.
match
(
charArray
,
begin
,
length
).
isMatch
();
}
}
/**
/**
...
@@ -205,7 +205,7 @@ public class Dictionary {
...
@@ -205,7 +205,7 @@ public class Dictionary {
do
{
do
{
theWord
=
br
.
readLine
();
theWord
=
br
.
readLine
();
if
(
theWord
!=
null
&&
!
""
.
equals
(
theWord
.
trim
()))
{
if
(
theWord
!=
null
&&
!
""
.
equals
(
theWord
.
trim
()))
{
_MainDict
.
fillSegment
(
theWord
.
trim
().
to
LowerCase
().
to
CharArray
());
_MainDict
.
fillSegment
(
theWord
.
trim
().
toCharArray
());
}
}
}
while
(
theWord
!=
null
);
}
while
(
theWord
!=
null
);
...
@@ -255,7 +255,7 @@ public class Dictionary {
...
@@ -255,7 +255,7 @@ public class Dictionary {
theWord
=
br
.
readLine
();
theWord
=
br
.
readLine
();
if
(
theWord
!=
null
&&
!
""
.
equals
(
theWord
.
trim
()))
{
if
(
theWord
!=
null
&&
!
""
.
equals
(
theWord
.
trim
()))
{
//加载扩展词典数据到主内存词典中
//加载扩展词典数据到主内存词典中
_MainDict
.
fillSegment
(
theWord
.
trim
().
to
LowerCase
().
to
CharArray
());
_MainDict
.
fillSegment
(
theWord
.
trim
().
toCharArray
());
}
}
}
while
(
theWord
!=
null
);
}
while
(
theWord
!=
null
);
...
@@ -298,7 +298,7 @@ public class Dictionary {
...
@@ -298,7 +298,7 @@ public class Dictionary {
do
{
do
{
theWord
=
br
.
readLine
();
theWord
=
br
.
readLine
();
if
(
theWord
!=
null
&&
!
""
.
equals
(
theWord
.
trim
()))
{
if
(
theWord
!=
null
&&
!
""
.
equals
(
theWord
.
trim
()))
{
_StopWords
.
fillSegment
(
theWord
.
trim
().
to
LowerCase
().
to
CharArray
());
_StopWords
.
fillSegment
(
theWord
.
trim
().
toCharArray
());
}
}
}
while
(
theWord
!=
null
);
}
while
(
theWord
!=
null
);
...
@@ -342,7 +342,7 @@ public class Dictionary {
...
@@ -342,7 +342,7 @@ public class Dictionary {
theWord
=
br
.
readLine
();
theWord
=
br
.
readLine
();
if
(
theWord
!=
null
&&
!
""
.
equals
(
theWord
.
trim
()))
{
if
(
theWord
!=
null
&&
!
""
.
equals
(
theWord
.
trim
()))
{
//加载扩展停止词典数据到内存中
//加载扩展停止词典数据到内存中
_StopWords
.
fillSegment
(
theWord
.
trim
().
to
LowerCase
().
to
CharArray
());
_StopWords
.
fillSegment
(
theWord
.
trim
().
toCharArray
());
}
}
}
while
(
theWord
!=
null
);
}
while
(
theWord
!=
null
);
...
@@ -383,7 +383,7 @@ public class Dictionary {
...
@@ -383,7 +383,7 @@ public class Dictionary {
do
{
do
{
theWord
=
br
.
readLine
();
theWord
=
br
.
readLine
();
if
(
theWord
!=
null
&&
!
""
.
equals
(
theWord
.
trim
()))
{
if
(
theWord
!=
null
&&
!
""
.
equals
(
theWord
.
trim
()))
{
_QuantifierDict
.
fillSegment
(
theWord
.
trim
().
to
LowerCase
().
to
CharArray
());
_QuantifierDict
.
fillSegment
(
theWord
.
trim
().
toCharArray
());
}
}
}
while
(
theWord
!=
null
);
}
while
(
theWord
!=
null
);
...
@@ -440,7 +440,6 @@ public class Dictionary {
...
@@ -440,7 +440,6 @@ public class Dictionary {
}
}
private
void
loadSuffixDict
(){
private
void
loadSuffixDict
(){
_SuffixDict
=
new
DictSegment
((
char
)
0
);
_SuffixDict
=
new
DictSegment
((
char
)
0
);
...
...
src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java
浏览文件 @
bafb724c
...
@@ -80,7 +80,7 @@ public final class IKTokenizer extends Tokenizer {
...
@@ -80,7 +80,7 @@ public final class IKTokenizer extends Tokenizer {
if
(
nextLexeme
!=
null
){
if
(
nextLexeme
!=
null
){
//将Lexeme转成Attributes
//将Lexeme转成Attributes
//设置词元文本
//设置词元文本
termAtt
.
append
(
nextLexeme
.
getLexemeText
()
.
toLowerCase
()
);
termAtt
.
append
(
nextLexeme
.
getLexemeText
());
//设置词元长度
//设置词元长度
termAtt
.
setLength
(
nextLexeme
.
getLength
());
termAtt
.
setLength
(
nextLexeme
.
getLength
());
//设置词元位移
//设置词元位移
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录