Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Happy_dahai
elasticsearch-analysis-ik
提交
21a859a4
E
elasticsearch-analysis-ik
项目概览
Happy_dahai
/
elasticsearch-analysis-ik
与 Fork 源项目一致
从无法访问的项目Fork
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
E
elasticsearch-analysis-ik
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
21a859a4
编写于
4月 09, 2018
作者:
weixin_43283383
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'master' of github.com:medcl/elasticsearch-analysis-ik
上级
816b8ddd
7028b9ea
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
30 addition
and
252 deletion
+30
-252
src/main/java/org/wltea/analyzer/dic/Dictionary.java
src/main/java/org/wltea/analyzer/dic/Dictionary.java
+30
-252
未找到文件。
src/main/java/org/wltea/analyzer/dic/Dictionary.java
浏览文件 @
21a859a4
...
...
@@ -26,7 +26,6 @@
package
org.wltea.analyzer.dic
;
import
java.io.BufferedReader
;
import
java.io.File
;
import
java.io.FileInputStream
;
import
java.io.FileNotFoundException
;
import
java.io.IOException
;
...
...
@@ -201,6 +200,28 @@ public class Dictionary {
return
files
;
}
private
void
loadDictFile
(
DictSegment
dict
,
Path
file
,
boolean
critical
,
String
name
)
{
try
(
InputStream
is
=
new
FileInputStream
(
file
.
toFile
()))
{
BufferedReader
br
=
new
BufferedReader
(
new
InputStreamReader
(
is
,
"UTF-8"
),
512
);
String
word
=
br
.
readLine
();
if
(
word
!=
null
)
{
if
(
word
.
startsWith
(
"\uFEFF"
))
word
=
word
.
substring
(
1
);
for
(;
word
!=
null
;
word
=
br
.
readLine
())
{
word
=
word
.
trim
();
if
(
word
.
isEmpty
())
continue
;
dict
.
fillSegment
(
word
.
toCharArray
());
}
}
}
catch
(
FileNotFoundException
e
)
{
logger
.
error
(
"ik-analyzer: "
+
name
+
" not found"
,
e
);
if
(
critical
)
throw
new
RuntimeException
(
"ik-analyzer: "
+
name
+
" not found!!!"
,
e
);
}
catch
(
IOException
e
)
{
logger
.
error
(
"ik-analyzer: "
+
name
+
" loading failed"
,
e
);
}
}
public
List
<
String
>
getExtDictionarys
()
{
List
<
String
>
extDictFiles
=
new
ArrayList
<
String
>(
2
);
String
extDictCfg
=
getProperty
(
EXT_DICT
);
...
...
@@ -371,37 +392,7 @@ public class Dictionary {
// 读取主词典文件
Path
file
=
PathUtils
.
get
(
getDictRoot
(),
Dictionary
.
PATH_DIC_MAIN
);
InputStream
is
=
null
;
try
{
is
=
new
FileInputStream
(
file
.
toFile
());
}
catch
(
FileNotFoundException
e
)
{
logger
.
error
(
e
.
getMessage
(),
e
);
}
try
{
BufferedReader
br
=
new
BufferedReader
(
new
InputStreamReader
(
is
,
"UTF-8"
),
512
);
String
theWord
=
null
;
do
{
theWord
=
br
.
readLine
();
if
(
theWord
!=
null
&&
!
""
.
equals
(
theWord
.
trim
()))
{
_MainDict
.
fillSegment
(
theWord
.
trim
().
toCharArray
());
}
}
while
(
theWord
!=
null
);
}
catch
(
IOException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
finally
{
try
{
if
(
is
!=
null
)
{
is
.
close
();
is
=
null
;
}
}
catch
(
IOException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
}
loadDictFile
(
_MainDict
,
file
,
false
,
"Main Dict"
);
// 加载扩展词典
this
.
loadExtDict
();
// 加载远程自定义词库
...
...
@@ -415,44 +406,11 @@ public class Dictionary {
// 加载扩展词典配置
List
<
String
>
extDictFiles
=
getExtDictionarys
();
if
(
extDictFiles
!=
null
)
{
InputStream
is
=
null
;
for
(
String
extDictName
:
extDictFiles
)
{
// 读取扩展词典文件
logger
.
info
(
"[Dict Loading] "
+
extDictName
);
Path
file
=
PathUtils
.
get
(
extDictName
);
try
{
is
=
new
FileInputStream
(
file
.
toFile
());
}
catch
(
FileNotFoundException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
// 如果找不到扩展的字典,则忽略
if
(
is
==
null
)
{
continue
;
}
try
{
BufferedReader
br
=
new
BufferedReader
(
new
InputStreamReader
(
is
,
"UTF-8"
),
512
);
String
theWord
=
null
;
do
{
theWord
=
br
.
readLine
();
if
(
theWord
!=
null
&&
!
""
.
equals
(
theWord
.
trim
()))
{
// 加载扩展词典数据到主内存词典中
_MainDict
.
fillSegment
(
theWord
.
trim
().
toCharArray
());
}
}
while
(
theWord
!=
null
);
}
catch
(
IOException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
finally
{
try
{
if
(
is
!=
null
)
{
is
.
close
();
is
=
null
;
}
}
catch
(
IOException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
}
loadDictFile
(
_MainDict
,
file
,
false
,
"Extra Dict"
);
}
}
}
...
...
@@ -533,80 +491,17 @@ public class Dictionary {
// 读取主词典文件
Path
file
=
PathUtils
.
get
(
getDictRoot
(),
Dictionary
.
PATH_DIC_STOP
);
InputStream
is
=
null
;
try
{
is
=
new
FileInputStream
(
file
.
toFile
());
}
catch
(
FileNotFoundException
e
)
{
logger
.
error
(
e
.
getMessage
(),
e
);
}
try
{
BufferedReader
br
=
new
BufferedReader
(
new
InputStreamReader
(
is
,
"UTF-8"
),
512
);
String
theWord
=
null
;
do
{
theWord
=
br
.
readLine
();
if
(
theWord
!=
null
&&
!
""
.
equals
(
theWord
.
trim
()))
{
_StopWords
.
fillSegment
(
theWord
.
trim
().
toCharArray
());
}
}
while
(
theWord
!=
null
);
}
catch
(
IOException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
finally
{
try
{
if
(
is
!=
null
)
{
is
.
close
();
is
=
null
;
}
}
catch
(
IOException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
}
loadDictFile
(
_StopWords
,
file
,
false
,
"Main Stopwords"
);
// 加载扩展停止词典
List
<
String
>
extStopWordDictFiles
=
getExtStopWordDictionarys
();
if
(
extStopWordDictFiles
!=
null
)
{
is
=
null
;
for
(
String
extStopWordDictName
:
extStopWordDictFiles
)
{
logger
.
info
(
"[Dict Loading] "
+
extStopWordDictName
);
// 读取扩展词典文件
file
=
PathUtils
.
get
(
extStopWordDictName
);
try
{
is
=
new
FileInputStream
(
file
.
toFile
());
}
catch
(
FileNotFoundException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
// 如果找不到扩展的字典,则忽略
if
(
is
==
null
)
{
continue
;
}
try
{
BufferedReader
br
=
new
BufferedReader
(
new
InputStreamReader
(
is
,
"UTF-8"
),
512
);
String
theWord
=
null
;
do
{
theWord
=
br
.
readLine
();
if
(
theWord
!=
null
&&
!
""
.
equals
(
theWord
.
trim
()))
{
// 加载扩展停止词典数据到内存中
_StopWords
.
fillSegment
(
theWord
.
trim
().
toCharArray
());
}
}
while
(
theWord
!=
null
);
}
catch
(
IOException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
finally
{
try
{
if
(
is
!=
null
)
{
is
.
close
();
is
=
null
;
}
}
catch
(
IOException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
}
loadDictFile
(
_StopWords
,
file
,
false
,
"Extra Stopwords"
);
}
}
...
...
@@ -639,142 +534,25 @@ public class Dictionary {
_QuantifierDict
=
new
DictSegment
((
char
)
0
);
// 读取量词词典文件
Path
file
=
PathUtils
.
get
(
getDictRoot
(),
Dictionary
.
PATH_DIC_QUANTIFIER
);
InputStream
is
=
null
;
try
{
is
=
new
FileInputStream
(
file
.
toFile
());
}
catch
(
FileNotFoundException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
try
{
BufferedReader
br
=
new
BufferedReader
(
new
InputStreamReader
(
is
,
"UTF-8"
),
512
);
String
theWord
=
null
;
do
{
theWord
=
br
.
readLine
();
if
(
theWord
!=
null
&&
!
""
.
equals
(
theWord
.
trim
()))
{
_QuantifierDict
.
fillSegment
(
theWord
.
trim
().
toCharArray
());
}
}
while
(
theWord
!=
null
);
}
catch
(
IOException
ioe
)
{
logger
.
error
(
"Quantifier Dictionary loading exception."
);
}
finally
{
try
{
if
(
is
!=
null
)
{
is
.
close
();
is
=
null
;
}
}
catch
(
IOException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
}
loadDictFile
(
_QuantifierDict
,
file
,
false
,
"Quantifier"
);
}
private
void
loadSurnameDict
()
{
_SurnameDict
=
new
DictSegment
((
char
)
0
);
Path
file
=
PathUtils
.
get
(
getDictRoot
(),
Dictionary
.
PATH_DIC_SURNAME
);
InputStream
is
=
null
;
try
{
is
=
new
FileInputStream
(
file
.
toFile
());
}
catch
(
FileNotFoundException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
if
(
is
==
null
)
{
throw
new
RuntimeException
(
"Surname Dictionary not found!!!"
);
}
try
{
BufferedReader
br
=
new
BufferedReader
(
new
InputStreamReader
(
is
,
"UTF-8"
),
512
);
String
theWord
;
do
{
theWord
=
br
.
readLine
();
if
(
theWord
!=
null
&&
!
""
.
equals
(
theWord
.
trim
()))
{
_SurnameDict
.
fillSegment
(
theWord
.
trim
().
toCharArray
());
}
}
while
(
theWord
!=
null
);
}
catch
(
IOException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
finally
{
try
{
if
(
is
!=
null
)
{
is
.
close
();
is
=
null
;
}
}
catch
(
IOException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
}
loadDictFile
(
_SurnameDict
,
file
,
true
,
"Surname"
);
}
private
void
loadSuffixDict
()
{
_SuffixDict
=
new
DictSegment
((
char
)
0
);
Path
file
=
PathUtils
.
get
(
getDictRoot
(),
Dictionary
.
PATH_DIC_SUFFIX
);
InputStream
is
=
null
;
try
{
is
=
new
FileInputStream
(
file
.
toFile
());
}
catch
(
FileNotFoundException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
if
(
is
==
null
)
{
throw
new
RuntimeException
(
"Suffix Dictionary not found!!!"
);
}
try
{
BufferedReader
br
=
new
BufferedReader
(
new
InputStreamReader
(
is
,
"UTF-8"
),
512
);
String
theWord
;
do
{
theWord
=
br
.
readLine
();
if
(
theWord
!=
null
&&
!
""
.
equals
(
theWord
.
trim
()))
{
_SuffixDict
.
fillSegment
(
theWord
.
trim
().
toCharArray
());
}
}
while
(
theWord
!=
null
);
}
catch
(
IOException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
finally
{
try
{
is
.
close
();
is
=
null
;
}
catch
(
IOException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
}
loadDictFile
(
_SuffixDict
,
file
,
true
,
"Suffix"
);
}
private
void
loadPrepDict
()
{
_PrepDict
=
new
DictSegment
((
char
)
0
);
Path
file
=
PathUtils
.
get
(
getDictRoot
(),
Dictionary
.
PATH_DIC_PREP
);
InputStream
is
=
null
;
try
{
is
=
new
FileInputStream
(
file
.
toFile
());
}
catch
(
FileNotFoundException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
if
(
is
==
null
)
{
throw
new
RuntimeException
(
"Preposition Dictionary not found!!!"
);
}
try
{
BufferedReader
br
=
new
BufferedReader
(
new
InputStreamReader
(
is
,
"UTF-8"
),
512
);
String
theWord
;
do
{
theWord
=
br
.
readLine
();
if
(
theWord
!=
null
&&
!
""
.
equals
(
theWord
.
trim
()))
{
_PrepDict
.
fillSegment
(
theWord
.
trim
().
toCharArray
());
}
}
while
(
theWord
!=
null
);
}
catch
(
IOException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
finally
{
try
{
is
.
close
();
is
=
null
;
}
catch
(
IOException
e
)
{
logger
.
error
(
"ik-analyzer"
,
e
);
}
}
loadDictFile
(
_PrepDict
,
file
,
true
,
"Preposition"
);
}
public
void
reLoadMainDict
()
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录