Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
myguguang
elasticsearch-analysis-ik
提交
07ba4ece
E
elasticsearch-analysis-ik
项目概览
myguguang
/
elasticsearch-analysis-ik
与 Fork 源项目一致
从无法访问的项目Fork
通知
5
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
E
elasticsearch-analysis-ik
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
07ba4ece
编写于
5月 31, 2013
作者:
weixin_43283383
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix dict loading
上级
6fc30fe6
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
177 addition
and
56 deletion
+177
-56
pom.xml
pom.xml
+1
-1
src/main/java/org/wltea/analyzer/cfg/Configuration.java
src/main/java/org/wltea/analyzer/cfg/Configuration.java
+1
-1
src/main/java/org/wltea/analyzer/dic/DictSegment.java
src/main/java/org/wltea/analyzer/dic/DictSegment.java
+3
-3
src/main/java/org/wltea/analyzer/dic/Dictionary.java
src/main/java/org/wltea/analyzer/dic/Dictionary.java
+172
-51
未找到文件。
pom.xml
浏览文件 @
07ba4ece
...
...
@@ -6,7 +6,7 @@
<modelVersion>
4.0.0
</modelVersion>
<groupId>
org.elasticsearch
</groupId>
<artifactId>
elasticsearch-analysis-ik
</artifactId>
<version>
1.2.
0
</version>
<version>
1.2.
1
</version>
<packaging>
jar
</packaging>
<description>
IK Analyzer for ElasticSearch
</description>
<inceptionYear>
2009
</inceptionYear>
...
...
src/main/java/org/wltea/analyzer/cfg/Configuration.java
浏览文件 @
07ba4ece
...
...
@@ -37,7 +37,7 @@ public class Configuration {
try
{
input
=
new
FileInputStream
(
fileConfig
);
}
catch
(
FileNotFoundException
e
)
{
e
.
printStackTrace
(
);
logger
.
error
(
"ik-analyzer"
,
e
);
}
if
(
input
!=
null
){
try
{
...
...
src/main/java/org/wltea/analyzer/dic/DictSegment.java
浏览文件 @
07ba4ece
...
...
@@ -26,8 +26,8 @@
package
org.wltea.analyzer.dic
;
import
java.util.Arrays
;
import
java.util.HashMap
;
import
java.util.Map
;
import
java.util.concurrent.ConcurrentHashMap
;
/**
* 词典树分段,表示词典树的一个分枝
...
...
@@ -35,7 +35,7 @@ import java.util.Map;
class
DictSegment
implements
Comparable
<
DictSegment
>{
//公用字典表,存储汉字
private
static
final
Map
<
Character
,
Character
>
charMap
=
new
HashMap
<
Character
,
Character
>(
16
,
0.95f
);
private
static
final
Map
<
Character
,
Character
>
charMap
=
new
Concurrent
HashMap
<
Character
,
Character
>(
16
,
0.95f
);
//数组大小上限
private
static
final
int
ARRAY_LENGTH_LIMIT
=
3
;
...
...
@@ -298,7 +298,7 @@ class DictSegment implements Comparable<DictSegment>{
if
(
this
.
childrenMap
==
null
){
synchronized
(
this
){
if
(
this
.
childrenMap
==
null
){
this
.
childrenMap
=
new
HashMap
<
Character
,
DictSegment
>(
ARRAY_LENGTH_LIMIT
*
2
,
0.8f
);
this
.
childrenMap
=
new
ConcurrentHashMap
<
Character
,
DictSegment
>(
ARRAY_LENGTH_LIMIT
*
2
,
0.8f
);
}
}
}
...
...
src/main/java/org/wltea/analyzer/dic/Dictionary.java
浏览文件 @
07ba4ece
...
...
@@ -25,16 +25,16 @@
*/
package
org.wltea.analyzer.dic
;
import
java.io.*
;
import
java.util.Collection
;
import
java.util.List
;
import
org.elasticsearch.common.logging.ESLogger
;
import
org.elasticsearch.common.logging.Loggers
;
import
org.elasticsearch.common.settings.Settings
;
import
org.elasticsearch.env.Environment
;
import
org.wltea.analyzer.cfg.Configuration
;
import
java.io.*
;
import
java.util.Collection
;
import
java.util.List
;
/**
* 词典管理类,单子模式
*/
...
...
@@ -45,20 +45,19 @@ public class Dictionary {
* 词典单子实例
*/
private
static
Dictionary
singleton
;
/*
* 主词典对象
*/
private
DictSegment
_MainDict
;
/*
* 停止词词典
*/
private
DictSegment
_StopWordDict
;
/*
* 量词词典
*/
private
DictSegment
_QuantifierDict
;
private
DictSegment
_MainDict
;
private
DictSegment
_SurnameDict
;
private
DictSegment
_QuantifierDict
;
private
DictSegment
_SuffixDict
;
private
DictSegment
_PrepDict
;
private
DictSegment
_StopWords
;
/**
* 配置对象
...
...
@@ -95,10 +94,10 @@ public class Dictionary {
environment
=
new
Environment
(
indexSettings
);
configuration
=
new
Configuration
(
indexSettings
);
loadMainDict
();
//
loadSurnameDict();
loadSurnameDict
();
loadQuantifierDict
();
//
loadSuffixDict();
//
loadPrepDict();
loadSuffixDict
();
loadPrepDict
();
loadStopWordDict
();
dictInited
=
true
;
}
...
...
@@ -218,7 +217,7 @@ public class Dictionary {
* @return boolean
*/
public
boolean
isStopWord
(
char
[]
charArray
,
int
begin
,
int
length
){
return
singleton
.
_StopWord
Dict
.
match
(
charArray
,
begin
,
length
).
isMatch
();
return
singleton
.
_StopWord
s
.
match
(
charArray
,
begin
,
length
).
isMatch
();
}
/**
...
...
@@ -247,18 +246,17 @@ public class Dictionary {
}
}
while
(
theWord
!=
null
);
}
catch
(
IOException
ioe
)
{
System
.
err
.
println
(
"Main Dictionary loading exception."
);
ioe
.
printStackTrace
();
}
finally
{
}
catch
(
IOException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
finally
{
try
{
if
(
is
!=
null
){
is
.
close
();
is
=
null
;
}
}
catch
(
IOException
e
)
{
e
.
printStackTrace
(
);
logger
.
error
(
"ik-analyzer"
,
e
);
}
}
//加载扩展词典
...
...
@@ -275,8 +273,14 @@ public class Dictionary {
InputStream
is
=
null
;
for
(
String
extDictName
:
extDictFiles
){
//读取扩展词典文件
System
.
out
.
println
(
"加载扩展词典:"
+
extDictName
);
is
=
this
.
getClass
().
getClassLoader
().
getResourceAsStream
(
extDictName
);
logger
.
info
(
"加载扩展词典:"
+
extDictName
);
File
file
=
new
File
(
environment
.
configFile
(),
extDictName
);
try
{
is
=
new
FileInputStream
(
file
);
}
catch
(
FileNotFoundException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
//如果找不到扩展的字典,则忽略
if
(
is
==
null
){
continue
;
...
...
@@ -288,24 +292,21 @@ public class Dictionary {
theWord
=
br
.
readLine
();
if
(
theWord
!=
null
&&
!
""
.
equals
(
theWord
.
trim
()))
{
//加载扩展词典数据到主内存词典中
//System.out.println(theWord);
_MainDict
.
fillSegment
(
theWord
.
trim
().
toLowerCase
().
toCharArray
());
}
}
while
(
theWord
!=
null
);
}
catch
(
IOException
ioe
)
{
System
.
err
.
println
(
"Extension Dictionary loading exception."
);
ioe
.
printStackTrace
();
}
finally
{
}
catch
(
IOException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
finally
{
try
{
if
(
is
!=
null
){
is
.
close
();
is
=
null
;
}
}
catch
(
IOException
e
)
{
e
.
printStackTrace
(
);
}
logger
.
error
(
"ik-analyzer"
,
e
);
}
}
}
}
...
...
@@ -316,15 +317,21 @@ public class Dictionary {
*/
private
void
loadStopWordDict
(){
//建立一个主词典实例
_StopWordDict
=
new
DictSegment
((
char
)
0
);
_StopWords
=
new
DictSegment
((
char
)
0
);
//加载扩展停止词典
List
<
String
>
extStopWordDictFiles
=
configuration
.
getExtStopWordDictionarys
();
if
(
extStopWordDictFiles
!=
null
){
InputStream
is
=
null
;
for
(
String
extStopWordDictName
:
extStopWordDictFiles
){
System
.
out
.
println
(
"加载扩展停止词典:"
+
extStopWordDictName
);
// logger.info("加载扩展停止词典:" + extStopWordDictName);
//读取扩展词典文件
is
=
this
.
getClass
().
getClassLoader
().
getResourceAsStream
(
extStopWordDictName
);
File
file
=
new
File
(
environment
.
configFile
(),
extStopWordDictName
);
try
{
is
=
new
FileInputStream
(
file
);
}
catch
(
FileNotFoundException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
//如果找不到扩展的字典,则忽略
if
(
is
==
null
){
continue
;
...
...
@@ -335,15 +342,13 @@ public class Dictionary {
do
{
theWord
=
br
.
readLine
();
if
(
theWord
!=
null
&&
!
""
.
equals
(
theWord
.
trim
()))
{
//System.out.println(theWord);
//加载扩展停止词典数据到内存中
_StopWordDict
.
fillSegment
(
theWord
.
trim
().
toLowerCase
().
toCharArray
());
_StopWords
.
fillSegment
(
theWord
.
trim
().
toLowerCase
().
toCharArray
());
}
}
while
(
theWord
!=
null
);
}
catch
(
IOException
ioe
)
{
System
.
err
.
println
(
"Extension Stop word Dictionary loading exception."
);
ioe
.
printStackTrace
();
}
catch
(
IOException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
finally
{
try
{
...
...
@@ -352,7 +357,7 @@ public class Dictionary {
is
=
null
;
}
}
catch
(
IOException
e
)
{
e
.
printStackTrace
(
);
logger
.
error
(
"ik-analyzer"
,
e
);
}
}
}
...
...
@@ -371,7 +376,7 @@ public class Dictionary {
try
{
is
=
new
FileInputStream
(
file
);
}
catch
(
FileNotFoundException
e
)
{
e
.
printStackTrace
(
);
logger
.
error
(
"ik-analyzer"
,
e
);
}
try
{
BufferedReader
br
=
new
BufferedReader
(
new
InputStreamReader
(
is
,
"UTF-8"
),
512
);
...
...
@@ -384,8 +389,7 @@ public class Dictionary {
}
while
(
theWord
!=
null
);
}
catch
(
IOException
ioe
)
{
System
.
err
.
println
(
"Quantifier Dictionary loading exception."
);
ioe
.
printStackTrace
();
logger
.
error
(
"Quantifier Dictionary loading exception."
);
}
finally
{
try
{
...
...
@@ -394,12 +398,129 @@ public class Dictionary {
is
=
null
;
}
}
catch
(
IOException
e
)
{
e
.
printStackTrace
(
);
logger
.
error
(
"ik-analyzer"
,
e
);
}
}
}
private
void
loadSurnameDict
(){
_SurnameDict
=
new
DictSegment
((
char
)
0
);
File
file
=
new
File
(
environment
.
configFile
(),
Dictionary
.
PATH_DIC_SURNAME
);
InputStream
is
=
null
;
try
{
is
=
new
FileInputStream
(
file
);
}
catch
(
FileNotFoundException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
if
(
is
==
null
){
throw
new
RuntimeException
(
"Surname Dictionary not found!!!"
);
}
try
{
BufferedReader
br
=
new
BufferedReader
(
new
InputStreamReader
(
is
,
"UTF-8"
),
512
);
String
theWord
;
do
{
theWord
=
br
.
readLine
();
if
(
theWord
!=
null
&&
!
""
.
equals
(
theWord
.
trim
()))
{
_SurnameDict
.
fillSegment
(
theWord
.
trim
().
toCharArray
());
}
}
while
(
theWord
!=
null
);
// logger.info("[Dict Loading] {},SurnameDict Size:{}",file.toString(),_SurnameDict.getDicNum());
}
catch
(
IOException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
finally
{
try
{
if
(
is
!=
null
){
is
.
close
();
is
=
null
;
}
}
catch
(
IOException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
}
}
private
void
loadSuffixDict
(){
_SuffixDict
=
new
DictSegment
((
char
)
0
);
File
file
=
new
File
(
environment
.
configFile
(),
Dictionary
.
PATH_DIC_SUFFIX
);
InputStream
is
=
null
;
try
{
is
=
new
FileInputStream
(
file
);
}
catch
(
FileNotFoundException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
if
(
is
==
null
){
throw
new
RuntimeException
(
"Suffix Dictionary not found!!!"
);
}
try
{
BufferedReader
br
=
new
BufferedReader
(
new
InputStreamReader
(
is
,
"UTF-8"
),
512
);
String
theWord
;
do
{
theWord
=
br
.
readLine
();
if
(
theWord
!=
null
&&
!
""
.
equals
(
theWord
.
trim
()))
{
_SuffixDict
.
fillSegment
(
theWord
.
trim
().
toCharArray
());
}
}
while
(
theWord
!=
null
);
// logger.info("[Dict Loading] {},SuffixDict Size:{}",file.toString(),_SuffixDict.getDicNum());
}
catch
(
IOException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
finally
{
try
{
if
(
is
!=
null
){
is
.
close
();
is
=
null
;
}
}
catch
(
IOException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
}
}
private
void
loadPrepDict
(){
_PrepDict
=
new
DictSegment
((
char
)
0
);
File
file
=
new
File
(
environment
.
configFile
(),
Dictionary
.
PATH_DIC_PREP
);
InputStream
is
=
null
;
try
{
is
=
new
FileInputStream
(
file
);
}
catch
(
FileNotFoundException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
if
(
is
==
null
){
throw
new
RuntimeException
(
"Preposition Dictionary not found!!!"
);
}
try
{
BufferedReader
br
=
new
BufferedReader
(
new
InputStreamReader
(
is
,
"UTF-8"
),
512
);
String
theWord
;
do
{
theWord
=
br
.
readLine
();
if
(
theWord
!=
null
&&
!
""
.
equals
(
theWord
.
trim
()))
{
_PrepDict
.
fillSegment
(
theWord
.
trim
().
toCharArray
());
}
}
while
(
theWord
!=
null
);
// logger.info("[Dict Loading] {},PrepDict Size:{}",file.toString(),_PrepDict.getDicNum());
}
catch
(
IOException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
finally
{
try
{
if
(
is
!=
null
){
is
.
close
();
is
=
null
;
}
}
catch
(
IOException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
}
}
public
static
Dictionary
getInstance
(){
return
Dictionary
.
singleton
;
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录