Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
iiopsd
elasticsearch-analysis-ik
提交
7e86d739
E
elasticsearch-analysis-ik
项目概览
iiopsd
/
elasticsearch-analysis-ik
与 Fork 源项目一致
从无法访问的项目Fork
通知
4
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
E
elasticsearch-analysis-ik
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
7e86d739
编写于
7月 25, 2016
作者:
weixin_43283383
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
remove unused classes
上级
341b5863
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
0 addition
and
874 deletion
+0
-874
src/main/java/org/wltea/analyzer/query/IKQueryExpressionParser.java
...ava/org/wltea/analyzer/query/IKQueryExpressionParser.java
+0
-716
src/main/java/org/wltea/analyzer/query/SWMCQueryBuilder.java
src/main/java/org/wltea/analyzer/query/SWMCQueryBuilder.java
+0
-158
未找到文件。
src/main/java/org/wltea/analyzer/query/IKQueryExpressionParser.java
已删除
100644 → 0
浏览文件 @
341b5863
/**
* IK 中文分词 版本 5.0
* IK Analyzer release 5.0
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* 源代码由林良益(linliangyi2005@gmail.com)提供
* 版权声明 2012,乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio
*
*/
package
org.wltea.analyzer.query
;
import
org.apache.lucene.index.Term
;
import
org.apache.lucene.search.*
;
import
org.apache.lucene.search.BooleanClause.Occur
;
import
org.apache.lucene.util.BytesRef
;
import
org.elasticsearch.common.logging.ESLogger
;
import
org.elasticsearch.common.logging.Loggers
;
import
java.util.ArrayList
;
import
java.util.LinkedList
;
import
java.util.List
;
import
java.util.Stack
;
/**
* IK简易查询表达式解析
* 结合SWMCQuery算法
*
* 表达式例子 :
* (id='1231231' && title:'monkey') || (content:'你好吗' || ulr='www.ik.com') - name:'helloword'
* @author linliangyi
*
*/
public
class
IKQueryExpressionParser
{
public
static
final
ESLogger
logger
=
Loggers
.
getLogger
(
"ik-analyzer"
);
//public static final String LUCENE_SPECIAL_CHAR = "&&||-()':={}[],";
private
List
<
Element
>
elements
=
new
ArrayList
<
Element
>();
private
Stack
<
Query
>
querys
=
new
Stack
<
Query
>();
private
Stack
<
Element
>
operates
=
new
Stack
<
Element
>();
/**
* 解析查询表达式,生成Lucene Query对象
*
* @param expression
* @param quickMode
* @return Lucene query
*/
public
Query
parseExp
(
String
expression
,
boolean
quickMode
){
Query
lucenceQuery
=
null
;
if
(
expression
!=
null
&&
!
""
.
equals
(
expression
)){
try
{
//文法解析
this
.
splitElements
(
expression
);
//语法解析
this
.
parseSyntax
(
quickMode
);
if
(
this
.
querys
.
size
()
==
1
){
lucenceQuery
=
this
.
querys
.
pop
();
}
else
{
throw
new
IllegalStateException
(
"表达式异常: 缺少逻辑操作符 或 括号缺失"
);
}
}
finally
{
elements
.
clear
();
querys
.
clear
();
operates
.
clear
();
}
}
return
lucenceQuery
;
}
/**
* 表达式文法解析
* @param expression
*/
private
void
splitElements
(
String
expression
){
if
(
expression
==
null
){
return
;
}
Element
curretElement
=
null
;
char
[]
expChars
=
expression
.
toCharArray
();
for
(
int
i
=
0
;
i
<
expChars
.
length
;
i
++){
switch
(
expChars
[
i
]){
case
'&'
:
if
(
curretElement
==
null
){
curretElement
=
new
Element
();
curretElement
.
type
=
'&'
;
curretElement
.
append
(
expChars
[
i
]);
}
else
if
(
curretElement
.
type
==
'&'
){
curretElement
.
append
(
expChars
[
i
]);
this
.
elements
.
add
(
curretElement
);
curretElement
=
null
;
}
else
if
(
curretElement
.
type
==
'\''
){
curretElement
.
append
(
expChars
[
i
]);
}
else
{
this
.
elements
.
add
(
curretElement
);
curretElement
=
new
Element
();
curretElement
.
type
=
'&'
;
curretElement
.
append
(
expChars
[
i
]);
}
break
;
case
'|'
:
if
(
curretElement
==
null
){
curretElement
=
new
Element
();
curretElement
.
type
=
'|'
;
curretElement
.
append
(
expChars
[
i
]);
}
else
if
(
curretElement
.
type
==
'|'
){
curretElement
.
append
(
expChars
[
i
]);
this
.
elements
.
add
(
curretElement
);
curretElement
=
null
;
}
else
if
(
curretElement
.
type
==
'\''
){
curretElement
.
append
(
expChars
[
i
]);
}
else
{
this
.
elements
.
add
(
curretElement
);
curretElement
=
new
Element
();
curretElement
.
type
=
'|'
;
curretElement
.
append
(
expChars
[
i
]);
}
break
;
case
'-'
:
if
(
curretElement
!=
null
){
if
(
curretElement
.
type
==
'\''
){
curretElement
.
append
(
expChars
[
i
]);
continue
;
}
else
{
this
.
elements
.
add
(
curretElement
);
}
}
curretElement
=
new
Element
();
curretElement
.
type
=
'-'
;
curretElement
.
append
(
expChars
[
i
]);
this
.
elements
.
add
(
curretElement
);
curretElement
=
null
;
break
;
case
'('
:
if
(
curretElement
!=
null
){
if
(
curretElement
.
type
==
'\''
){
curretElement
.
append
(
expChars
[
i
]);
continue
;
}
else
{
this
.
elements
.
add
(
curretElement
);
}
}
curretElement
=
new
Element
();
curretElement
.
type
=
'('
;
curretElement
.
append
(
expChars
[
i
]);
this
.
elements
.
add
(
curretElement
);
curretElement
=
null
;
break
;
case
')'
:
if
(
curretElement
!=
null
){
if
(
curretElement
.
type
==
'\''
){
curretElement
.
append
(
expChars
[
i
]);
continue
;
}
else
{
this
.
elements
.
add
(
curretElement
);
}
}
curretElement
=
new
Element
();
curretElement
.
type
=
')'
;
curretElement
.
append
(
expChars
[
i
]);
this
.
elements
.
add
(
curretElement
);
curretElement
=
null
;
break
;
case
':'
:
if
(
curretElement
!=
null
){
if
(
curretElement
.
type
==
'\''
){
curretElement
.
append
(
expChars
[
i
]);
continue
;
}
else
{
this
.
elements
.
add
(
curretElement
);
}
}
curretElement
=
new
Element
();
curretElement
.
type
=
':'
;
curretElement
.
append
(
expChars
[
i
]);
this
.
elements
.
add
(
curretElement
);
curretElement
=
null
;
break
;
case
'='
:
if
(
curretElement
!=
null
){
if
(
curretElement
.
type
==
'\''
){
curretElement
.
append
(
expChars
[
i
]);
continue
;
}
else
{
this
.
elements
.
add
(
curretElement
);
}
}
curretElement
=
new
Element
();
curretElement
.
type
=
'='
;
curretElement
.
append
(
expChars
[
i
]);
this
.
elements
.
add
(
curretElement
);
curretElement
=
null
;
break
;
case
' '
:
if
(
curretElement
!=
null
){
if
(
curretElement
.
type
==
'\''
){
curretElement
.
append
(
expChars
[
i
]);
}
else
{
this
.
elements
.
add
(
curretElement
);
curretElement
=
null
;
}
}
break
;
case
'\''
:
if
(
curretElement
==
null
){
curretElement
=
new
Element
();
curretElement
.
type
=
'\''
;
}
else
if
(
curretElement
.
type
==
'\''
){
this
.
elements
.
add
(
curretElement
);
curretElement
=
null
;
}
else
{
this
.
elements
.
add
(
curretElement
);
curretElement
=
new
Element
();
curretElement
.
type
=
'\''
;
}
break
;
case
'['
:
if
(
curretElement
!=
null
){
if
(
curretElement
.
type
==
'\''
){
curretElement
.
append
(
expChars
[
i
]);
continue
;
}
else
{
this
.
elements
.
add
(
curretElement
);
}
}
curretElement
=
new
Element
();
curretElement
.
type
=
'['
;
curretElement
.
append
(
expChars
[
i
]);
this
.
elements
.
add
(
curretElement
);
curretElement
=
null
;
break
;
case
']'
:
if
(
curretElement
!=
null
){
if
(
curretElement
.
type
==
'\''
){
curretElement
.
append
(
expChars
[
i
]);
continue
;
}
else
{
this
.
elements
.
add
(
curretElement
);
}
}
curretElement
=
new
Element
();
curretElement
.
type
=
']'
;
curretElement
.
append
(
expChars
[
i
]);
this
.
elements
.
add
(
curretElement
);
curretElement
=
null
;
break
;
case
'{'
:
if
(
curretElement
!=
null
){
if
(
curretElement
.
type
==
'\''
){
curretElement
.
append
(
expChars
[
i
]);
continue
;
}
else
{
this
.
elements
.
add
(
curretElement
);
}
}
curretElement
=
new
Element
();
curretElement
.
type
=
'{'
;
curretElement
.
append
(
expChars
[
i
]);
this
.
elements
.
add
(
curretElement
);
curretElement
=
null
;
break
;
case
'}'
:
if
(
curretElement
!=
null
){
if
(
curretElement
.
type
==
'\''
){
curretElement
.
append
(
expChars
[
i
]);
continue
;
}
else
{
this
.
elements
.
add
(
curretElement
);
}
}
curretElement
=
new
Element
();
curretElement
.
type
=
'}'
;
curretElement
.
append
(
expChars
[
i
]);
this
.
elements
.
add
(
curretElement
);
curretElement
=
null
;
break
;
case
','
:
if
(
curretElement
!=
null
){
if
(
curretElement
.
type
==
'\''
){
curretElement
.
append
(
expChars
[
i
]);
continue
;
}
else
{
this
.
elements
.
add
(
curretElement
);
}
}
curretElement
=
new
Element
();
curretElement
.
type
=
','
;
curretElement
.
append
(
expChars
[
i
]);
this
.
elements
.
add
(
curretElement
);
curretElement
=
null
;
break
;
default
:
if
(
curretElement
==
null
){
curretElement
=
new
Element
();
curretElement
.
type
=
'F'
;
curretElement
.
append
(
expChars
[
i
]);
}
else
if
(
curretElement
.
type
==
'F'
){
curretElement
.
append
(
expChars
[
i
]);
}
else
if
(
curretElement
.
type
==
'\''
){
curretElement
.
append
(
expChars
[
i
]);
}
else
{
this
.
elements
.
add
(
curretElement
);
curretElement
=
new
Element
();
curretElement
.
type
=
'F'
;
curretElement
.
append
(
expChars
[
i
]);
}
}
}
if
(
curretElement
!=
null
){
this
.
elements
.
add
(
curretElement
);
curretElement
=
null
;
}
}
/**
* 语法解析
*
*/
private
void
parseSyntax
(
boolean
quickMode
){
for
(
int
i
=
0
;
i
<
this
.
elements
.
size
()
;
i
++){
Element
e
=
this
.
elements
.
get
(
i
);
if
(
'F'
==
e
.
type
){
Element
e2
=
this
.
elements
.
get
(
i
+
1
);
if
(
'='
!=
e2
.
type
&&
':'
!=
e2
.
type
){
throw
new
IllegalStateException
(
"表达式异常: = 或 : 号丢失"
);
}
Element
e3
=
this
.
elements
.
get
(
i
+
2
);
//处理 = 和 : 运算
if
(
'\''
==
e3
.
type
){
i
+=
2
;
if
(
'='
==
e2
.
type
){
TermQuery
tQuery
=
new
TermQuery
(
new
Term
(
e
.
toString
()
,
e3
.
toString
()));
this
.
querys
.
push
(
tQuery
);
}
else
if
(
':'
==
e2
.
type
){
String
keyword
=
e3
.
toString
();
//SWMCQuery Here
Query
_SWMCQuery
=
SWMCQueryBuilder
.
create
(
e
.
toString
(),
keyword
,
quickMode
);
this
.
querys
.
push
(
_SWMCQuery
);
}
}
else
if
(
'['
==
e3
.
type
||
'{'
==
e3
.
type
){
i
+=
2
;
//处理 [] 和 {}
LinkedList
<
Element
>
eQueue
=
new
LinkedList
<
Element
>();
eQueue
.
add
(
e3
);
for
(
i
++
;
i
<
this
.
elements
.
size
()
;
i
++){
Element
eN
=
this
.
elements
.
get
(
i
);
eQueue
.
add
(
eN
);
if
(
']'
==
eN
.
type
||
'}'
==
eN
.
type
){
break
;
}
}
//翻译RangeQuery
Query
rangeQuery
=
this
.
toTermRangeQuery
(
e
,
eQueue
);
this
.
querys
.
push
(
rangeQuery
);
}
else
{
throw
new
IllegalStateException
(
"表达式异常:匹配值丢失"
);
}
}
else
if
(
'('
==
e
.
type
){
this
.
operates
.
push
(
e
);
}
else
if
(
')'
==
e
.
type
){
boolean
doPop
=
true
;
while
(
doPop
&&
!
this
.
operates
.
empty
()){
Element
op
=
this
.
operates
.
pop
();
if
(
'('
==
op
.
type
){
doPop
=
false
;
}
else
{
Query
q
=
toBooleanQuery
(
op
);
this
.
querys
.
push
(
q
);
}
}
}
else
{
if
(
this
.
operates
.
isEmpty
()){
this
.
operates
.
push
(
e
);
}
else
{
boolean
doPeek
=
true
;
while
(
doPeek
&&
!
this
.
operates
.
isEmpty
()){
Element
eleOnTop
=
this
.
operates
.
peek
();
if
(
'('
==
eleOnTop
.
type
){
doPeek
=
false
;
this
.
operates
.
push
(
e
);
}
else
if
(
compare
(
e
,
eleOnTop
)
==
1
){
this
.
operates
.
push
(
e
);
doPeek
=
false
;
}
else
if
(
compare
(
e
,
eleOnTop
)
==
0
){
Query
q
=
toBooleanQuery
(
eleOnTop
);
this
.
operates
.
pop
();
this
.
querys
.
push
(
q
);
}
else
{
Query
q
=
toBooleanQuery
(
eleOnTop
);
this
.
operates
.
pop
();
this
.
querys
.
push
(
q
);
}
}
if
(
doPeek
&&
this
.
operates
.
empty
()){
this
.
operates
.
push
(
e
);
}
}
}
}
while
(!
this
.
operates
.
isEmpty
()){
Element
eleOnTop
=
this
.
operates
.
pop
();
Query
q
=
toBooleanQuery
(
eleOnTop
);
this
.
querys
.
push
(
q
);
}
}
/**
* 根据逻辑操作符,生成BooleanQuery
* @param op
* @return
*/
private
Query
toBooleanQuery
(
Element
op
){
if
(
this
.
querys
.
size
()
==
0
){
return
null
;
}
BooleanQuery
resultQuery
=
new
BooleanQuery
();
if
(
this
.
querys
.
size
()
==
1
){
return
this
.
querys
.
get
(
0
);
}
Query
q2
=
this
.
querys
.
pop
();
Query
q1
=
this
.
querys
.
pop
();
if
(
'&'
==
op
.
type
){
if
(
q1
!=
null
){
if
(
q1
instanceof
BooleanQuery
){
BooleanClause
[]
clauses
=
((
BooleanQuery
)
q1
).
getClauses
();
if
(
clauses
.
length
>
0
&&
clauses
[
0
].
getOccur
()
==
Occur
.
MUST
){
for
(
BooleanClause
c
:
clauses
){
resultQuery
.
add
(
c
);
}
}
else
{
resultQuery
.
add
(
q1
,
Occur
.
MUST
);
}
}
else
{
//q1 instanceof TermQuery
//q1 instanceof TermRangeQuery
//q1 instanceof PhraseQuery
//others
resultQuery
.
add
(
q1
,
Occur
.
MUST
);
}
}
if
(
q2
!=
null
){
if
(
q2
instanceof
BooleanQuery
){
BooleanClause
[]
clauses
=
((
BooleanQuery
)
q2
).
getClauses
();
if
(
clauses
.
length
>
0
&&
clauses
[
0
].
getOccur
()
==
Occur
.
MUST
){
for
(
BooleanClause
c
:
clauses
){
resultQuery
.
add
(
c
);
}
}
else
{
resultQuery
.
add
(
q2
,
Occur
.
MUST
);
}
}
else
{
//q1 instanceof TermQuery
//q1 instanceof TermRangeQuery
//q1 instanceof PhraseQuery
//others
resultQuery
.
add
(
q2
,
Occur
.
MUST
);
}
}
}
else
if
(
'|'
==
op
.
type
){
if
(
q1
!=
null
){
if
(
q1
instanceof
BooleanQuery
){
BooleanClause
[]
clauses
=
((
BooleanQuery
)
q1
).
getClauses
();
if
(
clauses
.
length
>
0
&&
clauses
[
0
].
getOccur
()
==
Occur
.
SHOULD
){
for
(
BooleanClause
c
:
clauses
){
resultQuery
.
add
(
c
);
}
}
else
{
resultQuery
.
add
(
q1
,
Occur
.
SHOULD
);
}
}
else
{
//q1 instanceof TermQuery
//q1 instanceof TermRangeQuery
//q1 instanceof PhraseQuery
//others
resultQuery
.
add
(
q1
,
Occur
.
SHOULD
);
}
}
if
(
q2
!=
null
){
if
(
q2
instanceof
BooleanQuery
){
BooleanClause
[]
clauses
=
((
BooleanQuery
)
q2
).
getClauses
();
if
(
clauses
.
length
>
0
&&
clauses
[
0
].
getOccur
()
==
Occur
.
SHOULD
){
for
(
BooleanClause
c
:
clauses
){
resultQuery
.
add
(
c
);
}
}
else
{
resultQuery
.
add
(
q2
,
Occur
.
SHOULD
);
}
}
else
{
//q2 instanceof TermQuery
//q2 instanceof TermRangeQuery
//q2 instanceof PhraseQuery
//others
resultQuery
.
add
(
q2
,
Occur
.
SHOULD
);
}
}
}
else
if
(
'-'
==
op
.
type
){
if
(
q1
==
null
||
q2
==
null
){
throw
new
IllegalStateException
(
"表达式异常:SubQuery 个数不匹配"
);
}
if
(
q1
instanceof
BooleanQuery
){
BooleanClause
[]
clauses
=
((
BooleanQuery
)
q1
).
getClauses
();
if
(
clauses
.
length
>
0
){
for
(
BooleanClause
c
:
clauses
){
resultQuery
.
add
(
c
);
}
}
else
{
resultQuery
.
add
(
q1
,
Occur
.
MUST
);
}
}
else
{
//q1 instanceof TermQuery
//q1 instanceof TermRangeQuery
//q1 instanceof PhraseQuery
//others
resultQuery
.
add
(
q1
,
Occur
.
MUST
);
}
resultQuery
.
add
(
q2
,
Occur
.
MUST_NOT
);
}
return
resultQuery
;
}
/**
* 组装TermRangeQuery
* @param elements
* @return
*/
private
TermRangeQuery
toTermRangeQuery
(
Element
fieldNameEle
,
LinkedList
<
Element
>
elements
){
boolean
includeFirst
=
false
;
boolean
includeLast
=
false
;
String
firstValue
=
null
;
String
lastValue
=
null
;
//检查第一个元素是否是[或者{
Element
first
=
elements
.
getFirst
();
if
(
'['
==
first
.
type
){
includeFirst
=
true
;
}
else
if
(
'{'
==
first
.
type
){
includeFirst
=
false
;
}
else
{
throw
new
IllegalStateException
(
"表达式异常"
);
}
//检查最后一个元素是否是]或者}
Element
last
=
elements
.
getLast
();
if
(
']'
==
last
.
type
){
includeLast
=
true
;
}
else
if
(
'}'
==
last
.
type
){
includeLast
=
false
;
}
else
{
throw
new
IllegalStateException
(
"表达式异常, RangeQuery缺少结束括号"
);
}
if
(
elements
.
size
()
<
4
||
elements
.
size
()
>
5
){
throw
new
IllegalStateException
(
"表达式异常, RangeQuery 错误"
);
}
//读出中间部分
Element
e2
=
elements
.
get
(
1
);
if
(
'\''
==
e2
.
type
){
firstValue
=
e2
.
toString
();
//
Element
e3
=
elements
.
get
(
2
);
if
(
','
!=
e3
.
type
){
throw
new
IllegalStateException
(
"表达式异常, RangeQuery缺少逗号分隔"
);
}
//
Element
e4
=
elements
.
get
(
3
);
if
(
'\''
==
e4
.
type
){
lastValue
=
e4
.
toString
();
}
else
if
(
e4
!=
last
){
throw
new
IllegalStateException
(
"表达式异常,RangeQuery格式错误"
);
}
}
else
if
(
','
==
e2
.
type
){
firstValue
=
null
;
//
Element
e3
=
elements
.
get
(
2
);
if
(
'\''
==
e3
.
type
){
lastValue
=
e3
.
toString
();
}
else
{
throw
new
IllegalStateException
(
"表达式异常,RangeQuery格式错误"
);
}
}
else
{
throw
new
IllegalStateException
(
"表达式异常, RangeQuery格式错误"
);
}
return
new
TermRangeQuery
(
fieldNameEle
.
toString
()
,
new
BytesRef
(
firstValue
)
,
new
BytesRef
(
lastValue
)
,
includeFirst
,
includeLast
);
}
/**
* 比较操作符优先级
* @param e1
* @param e2
* @return
*/
private
int
compare
(
Element
e1
,
Element
e2
){
if
(
'&'
==
e1
.
type
){
if
(
'&'
==
e2
.
type
){
return
0
;
}
else
{
return
1
;
}
}
else
if
(
'|'
==
e1
.
type
){
if
(
'&'
==
e2
.
type
){
return
-
1
;
}
else
if
(
'|'
==
e2
.
type
){
return
0
;
}
else
{
return
1
;
}
}
else
{
if
(
'-'
==
e2
.
type
){
return
0
;
}
else
{
return
-
1
;
}
}
}
/**
* 表达式元素(操作符、FieldName、FieldValue)
* @author linliangyi
* May 20, 2010
*/
private
class
Element
{
char
type
=
0
;
StringBuffer
eleTextBuff
;
public
Element
(){
eleTextBuff
=
new
StringBuffer
();
}
public
void
append
(
char
c
){
this
.
eleTextBuff
.
append
(
c
);
}
public
String
toString
(){
return
this
.
eleTextBuff
.
toString
();
}
}
public
static
void
main
(
String
[]
args
){
IKQueryExpressionParser
parser
=
new
IKQueryExpressionParser
();
//String ikQueryExp = "newsTitle:'的两款《魔兽世界》插件Bigfoot和月光宝盒'";
String
ikQueryExp
=
"(id='ABcdRf' && date:{'20010101','20110101'} && keyword:'魔兽中国') || (content:'KSHT-KSH-A001-18' || ulr='www.ik.com') - name:'林良益'"
;
Query
result
=
parser
.
parseExp
(
ikQueryExp
,
true
);
logger
.
info
(
result
.
toString
());
}
}
src/main/java/org/wltea/analyzer/query/SWMCQueryBuilder.java
已删除
100644 → 0
浏览文件 @
341b5863
/**
* IK 中文分词 版本 5.0
* IK Analyzer release 5.0
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* 源代码由林良益(linliangyi2005@gmail.com)提供
* 版权声明 2012,乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio
*
*/
package
org.wltea.analyzer.query
;
import
java.io.IOException
;
import
java.io.StringReader
;
import
java.util.ArrayList
;
import
java.util.List
;
import
org.apache.lucene.analysis.standard.StandardAnalyzer
;
import
org.apache.lucene.queryparser.classic.ParseException
;
import
org.apache.lucene.queryparser.classic.QueryParser
;
import
org.apache.lucene.search.Query
;
import
org.apache.lucene.util.Version
;
import
org.elasticsearch.common.logging.ESLogger
;
import
org.elasticsearch.common.logging.Loggers
;
import
org.wltea.analyzer.core.IKSegmenter
;
import
org.wltea.analyzer.core.Lexeme
;
/**
* Single Word Multi Char Query Builder
* IK分词算法专用
* @author linliangyi
*
*/
public
class
SWMCQueryBuilder
{
public
static
ESLogger
logger
=
Loggers
.
getLogger
(
"ik-analyzer"
);
/**
* 生成SWMCQuery
* @param fieldName
* @param keywords
* @param quickMode
* @return Lucene Query
*/
public
static
Query
create
(
String
fieldName
,
String
keywords
,
boolean
quickMode
){
if
(
fieldName
==
null
||
keywords
==
null
){
throw
new
IllegalArgumentException
(
"参数 fieldName 、 keywords 不能为null."
);
}
//1.对keywords进行分词处理
List
<
Lexeme
>
lexemes
=
doAnalyze
(
keywords
);
//2.根据分词结果,生成SWMCQuery
Query
_SWMCQuery
=
getSWMCQuery
(
fieldName
,
lexemes
,
quickMode
);
return
_SWMCQuery
;
}
/**
* 分词切分,并返回结链表
* @param keywords
* @return
*/
private
static
List
<
Lexeme
>
doAnalyze
(
String
keywords
){
List
<
Lexeme
>
lexemes
=
new
ArrayList
<
Lexeme
>();
IKSegmenter
ikSeg
=
new
IKSegmenter
(
new
StringReader
(
keywords
),
true
);
try
{
Lexeme
l
=
null
;
while
(
(
l
=
ikSeg
.
next
())
!=
null
){
lexemes
.
add
(
l
);
}
}
catch
(
IOException
e
){
logger
.
error
(
e
.
getMessage
(),
e
);
}
return
lexemes
;
}
/**
* 根据分词结果生成SWMC搜索
* @param fieldName
// * @param pathOption
* @param quickMode
* @return
*/
private
static
Query
getSWMCQuery
(
String
fieldName
,
List
<
Lexeme
>
lexemes
,
boolean
quickMode
){
//构造SWMC的查询表达式
StringBuffer
keywordBuffer
=
new
StringBuffer
();
//精简的SWMC的查询表达式
StringBuffer
keywordBuffer_Short
=
new
StringBuffer
();
//记录最后词元长度
int
lastLexemeLength
=
0
;
//记录最后词元结束位置
int
lastLexemeEnd
=
-
1
;
int
shortCount
=
0
;
int
totalCount
=
0
;
for
(
Lexeme
l
:
lexemes
){
totalCount
+=
l
.
getLength
();
//精简表达式
if
(
l
.
getLength
()
>
1
){
keywordBuffer_Short
.
append
(
' '
).
append
(
l
.
getLexemeText
());
shortCount
+=
l
.
getLength
();
}
if
(
lastLexemeLength
==
0
){
keywordBuffer
.
append
(
l
.
getLexemeText
());
}
else
if
(
lastLexemeLength
==
1
&&
l
.
getLength
()
==
1
&&
lastLexemeEnd
==
l
.
getBeginPosition
()){
//单字位置相邻,长度为一,合并)
keywordBuffer
.
append
(
l
.
getLexemeText
());
}
else
{
keywordBuffer
.
append
(
' '
).
append
(
l
.
getLexemeText
());
}
lastLexemeLength
=
l
.
getLength
();
lastLexemeEnd
=
l
.
getEndPosition
();
}
//借助lucene queryparser 生成SWMC Query
QueryParser
qp
=
new
QueryParser
(
fieldName
,
new
StandardAnalyzer
());
qp
.
setDefaultOperator
(
QueryParser
.
AND_OPERATOR
);
qp
.
setAutoGeneratePhraseQueries
(
true
);
if
(
quickMode
&&
(
shortCount
*
1.0f
/
totalCount
)
>
0.5f
){
try
{
//System.out.println(keywordBuffer.toString());
Query
q
=
qp
.
parse
(
keywordBuffer_Short
.
toString
());
return
q
;
}
catch
(
ParseException
e
)
{
logger
.
error
(
e
.
getMessage
(),
e
);
}
}
else
{
if
(
keywordBuffer
.
length
()
>
0
){
try
{
//System.out.println(keywordBuffer.toString());
Query
q
=
qp
.
parse
(
keywordBuffer
.
toString
());
return
q
;
}
catch
(
ParseException
e
)
{
logger
.
error
(
e
.
getMessage
(),
e
);
}
}
}
return
null
;
}
}
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录