Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
myguguang
elasticsearch-analysis-ik
提交
e2fb31a5
E
elasticsearch-analysis-ik
项目概览
myguguang
/
elasticsearch-analysis-ik
与 Fork 源项目一致
从无法访问的项目Fork
通知
5
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
E
elasticsearch-analysis-ik
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
e2fb31a5
编写于
12月 13, 2013
作者:
weixin_43283383
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
bug fix,prefix blank char caused ArrayIndexOutOfBoundsException
上级
35700686
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
27 addition
and
31 deletion
+27
-31
pom.xml
pom.xml
+2
-2
src/main/java/org/wltea/analyzer/core/CJKSegmenter.java
src/main/java/org/wltea/analyzer/core/CJKSegmenter.java
+5
-5
src/main/java/org/wltea/analyzer/core/IKSegmenter.java
src/main/java/org/wltea/analyzer/core/IKSegmenter.java
+5
-5
src/main/java/org/wltea/analyzer/dic/DictSegment.java
src/main/java/org/wltea/analyzer/dic/DictSegment.java
+2
-2
src/main/java/org/wltea/analyzer/dic/Dictionary.java
src/main/java/org/wltea/analyzer/dic/Dictionary.java
+3
-3
src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java
src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java
+4
-4
src/main/java/org/wltea/analyzer/query/IKQueryExpressionParser.java
...ava/org/wltea/analyzer/query/IKQueryExpressionParser.java
+6
-10
未找到文件。
pom.xml
浏览文件 @
e2fb31a5
...
...
@@ -6,7 +6,7 @@
<modelVersion>
4.0.0
</modelVersion>
<groupId>
org.elasticsearch
</groupId>
<artifactId>
elasticsearch-analysis-ik
</artifactId>
<version>
1.2.
4
</version>
<version>
1.2.
5
</version>
<packaging>
jar
</packaging>
<description>
IK Analyzer for ElasticSearch
</description>
<inceptionYear>
2009
</inceptionYear>
...
...
@@ -31,7 +31,7 @@
</parent>
<properties>
<elasticsearch.version>
0.90.
6
</elasticsearch.version>
<elasticsearch.version>
0.90.
2
</elasticsearch.version>
</properties>
<repositories>
...
...
src/main/java/org/wltea/analyzer/core/CJKSegmenter.java
浏览文件 @
e2fb31a5
...
...
@@ -25,12 +25,12 @@
*/
package
org.wltea.analyzer.core
;
import
java.util.LinkedList
;
import
java.util.List
;
import
org.wltea.analyzer.dic.Dictionary
;
import
org.wltea.analyzer.dic.Hit
;
import
java.util.LinkedList
;
import
java.util.List
;
/**
* 中文-日韩文子分词器
...
...
@@ -58,7 +58,7 @@ class CJKSegmenter implements ISegmenter {
//处理词段队列
Hit
[]
tmpArray
=
this
.
tmpHits
.
toArray
(
new
Hit
[
this
.
tmpHits
.
size
()]);
for
(
Hit
hit
:
tmpArray
){
hit
=
Dictionary
.
getSingleton
().
matchWithHit
(
context
.
getSegmentBuff
(),
context
.
getCursor
()
,
hit
);
hit
=
Dictionary
.
getSingleton
().
matchWithHit
(
String
.
valueOf
(
context
.
getSegmentBuff
()).
toLowerCase
().
toCharArray
(),
context
.
getCursor
()
,
hit
);
if
(
hit
.
isMatch
()){
//输出当前的词
Lexeme
newLexeme
=
new
Lexeme
(
context
.
getBufferOffset
()
,
hit
.
getBegin
()
,
context
.
getCursor
()
-
hit
.
getBegin
()
+
1
,
Lexeme
.
TYPE_CNWORD
);
...
...
@@ -77,7 +77,7 @@ class CJKSegmenter implements ISegmenter {
//*********************************
//再对当前指针位置的字符进行单字匹配
Hit
singleCharHit
=
Dictionary
.
getSingleton
().
matchInMainDict
(
context
.
getSegmentBuff
(),
context
.
getCursor
(),
1
);
Hit
singleCharHit
=
Dictionary
.
getSingleton
().
matchInMainDict
(
String
.
valueOf
(
context
.
getSegmentBuff
()).
toLowerCase
().
toCharArray
(),
context
.
getCursor
(),
1
);
if
(
singleCharHit
.
isMatch
()){
//首字成词
//输出当前的词
Lexeme
newLexeme
=
new
Lexeme
(
context
.
getBufferOffset
()
,
context
.
getCursor
()
,
1
,
Lexeme
.
TYPE_CNWORD
);
...
...
src/main/java/org/wltea/analyzer/core/IKSegmenter.java
浏览文件 @
e2fb31a5
...
...
@@ -23,16 +23,16 @@
*/
package
org.wltea.analyzer.core
;
import
java.io.IOException
;
import
java.io.Reader
;
import
java.util.ArrayList
;
import
java.util.List
;
import
org.elasticsearch.common.settings.Settings
;
import
org.elasticsearch.env.Environment
;
import
org.wltea.analyzer.cfg.Configuration
;
import
org.wltea.analyzer.dic.Dictionary
;
import
java.io.IOException
;
import
java.io.Reader
;
import
java.util.ArrayList
;
import
java.util.List
;
/**
* IK分词器主类
*
...
...
src/main/java/org/wltea/analyzer/dic/DictSegment.java
浏览文件 @
e2fb31a5
...
...
@@ -114,8 +114,8 @@ class DictSegment implements Comparable<DictSegment>{
}
//设置hit的当前处理位置
searchHit
.
setEnd
(
begin
);
Character
keyChar
=
new
Character
(
charArray
[
begin
]);
Character
keyChar
=
new
Character
(
charArray
[
begin
]);
DictSegment
ds
=
null
;
//引用实例变量为本地变量,避免查询时遇到更新的同步问题
...
...
src/main/java/org/wltea/analyzer/dic/Dictionary.java
浏览文件 @
e2fb31a5
...
...
@@ -152,7 +152,7 @@ public class Dictionary {
* @return Hit 匹配结果描述
*/
public
Hit
matchInMainDict
(
char
[]
charArray
,
int
begin
,
int
length
){
return
singleton
.
_MainDict
.
match
(
String
.
valueOf
(
charArray
).
t
rim
().
t
oLowerCase
().
toCharArray
(),
begin
,
length
);
return
singleton
.
_MainDict
.
match
(
String
.
valueOf
(
charArray
).
toLowerCase
().
toCharArray
(),
begin
,
length
);
}
/**
...
...
@@ -160,7 +160,7 @@ public class Dictionary {
* @return Hit 匹配结果描述
*/
public
Hit
matchInQuantifierDict
(
char
[]
charArray
,
int
begin
,
int
length
){
return
singleton
.
_QuantifierDict
.
match
(
String
.
valueOf
(
charArray
).
t
rim
().
t
oLowerCase
().
toCharArray
(),
begin
,
length
);
return
singleton
.
_QuantifierDict
.
match
(
String
.
valueOf
(
charArray
).
toLowerCase
().
toCharArray
(),
begin
,
length
);
}
...
...
@@ -179,7 +179,7 @@ public class Dictionary {
* @return boolean
*/
public
boolean
isStopWord
(
char
[]
charArray
,
int
begin
,
int
length
){
return
singleton
.
_StopWords
.
match
(
String
.
valueOf
(
charArray
).
t
rim
().
t
oLowerCase
().
toCharArray
(),
begin
,
length
).
isMatch
();
return
singleton
.
_StopWords
.
match
(
String
.
valueOf
(
charArray
).
toLowerCase
().
toCharArray
(),
begin
,
length
).
isMatch
();
}
/**
...
...
src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java
浏览文件 @
e2fb31a5
...
...
@@ -26,9 +26,6 @@
*/
package
org.wltea.analyzer.lucene
;
import
java.io.IOException
;
import
java.io.Reader
;
import
org.apache.lucene.analysis.Tokenizer
;
import
org.apache.lucene.analysis.tokenattributes.CharTermAttribute
;
import
org.apache.lucene.analysis.tokenattributes.OffsetAttribute
;
...
...
@@ -38,6 +35,9 @@ import org.elasticsearch.env.Environment;
import
org.wltea.analyzer.core.IKSegmenter
;
import
org.wltea.analyzer.core.Lexeme
;
import
java.io.IOException
;
import
java.io.Reader
;
/**
* IK分词器 Lucene Tokenizer适配器类
* 兼容Lucene 4.0版本
...
...
@@ -80,7 +80,7 @@ public final class IKTokenizer extends Tokenizer {
if
(
nextLexeme
!=
null
){
//将Lexeme转成Attributes
//设置词元文本
termAtt
.
append
(
nextLexeme
.
getLexemeText
());
termAtt
.
append
(
nextLexeme
.
getLexemeText
()
.
toLowerCase
()
);
//设置词元长度
termAtt
.
setLength
(
nextLexeme
.
getLength
());
//设置词元位移
...
...
src/main/java/org/wltea/analyzer/query/IKQueryExpressionParser.java
浏览文件 @
e2fb31a5
...
...
@@ -24,20 +24,16 @@
*/
package
org.wltea.analyzer.query
;
import
org.apache.lucene.index.Term
;
import
org.apache.lucene.search.*
;
import
org.apache.lucene.search.BooleanClause.Occur
;
import
org.apache.lucene.util.BytesRef
;
import
java.util.ArrayList
;
import
java.util.LinkedList
;
import
java.util.List
;
import
java.util.Stack
;
import
org.apache.lucene.index.Term
;
import
org.apache.lucene.search.BooleanClause
;
import
org.apache.lucene.search.BooleanQuery
;
import
org.apache.lucene.search.Query
;
import
org.apache.lucene.search.TermQuery
;
import
org.apache.lucene.search.TermRangeQuery
;
import
org.apache.lucene.search.BooleanClause.Occur
;
import
org.apache.lucene.util.BytesRef
;
/**
* IK简易查询表达式解析
* 结合SWMCQuery算法
...
...
@@ -66,7 +62,7 @@ public class IKQueryExpressionParser {
*/
public
Query
parseExp
(
String
expression
,
boolean
quickMode
){
Query
lucenceQuery
=
null
;
if
(
expression
!=
null
&&
!
""
.
equals
(
expression
.
trim
()
)){
if
(
expression
!=
null
&&
!
""
.
equals
(
expression
)){
try
{
//文法解析
this
.
splitElements
(
expression
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录