提交 21a859a4 编写于 作者: weixin_43283383's avatar weixin_43283383

Merge branch 'master' of github.com:medcl/elasticsearch-analysis-ik

...@@ -26,7 +26,6 @@ ...@@ -26,7 +26,6 @@
package org.wltea.analyzer.dic; package org.wltea.analyzer.dic;
import java.io.BufferedReader; import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
...@@ -201,6 +200,28 @@ public class Dictionary { ...@@ -201,6 +200,28 @@ public class Dictionary {
return files; return files;
} }
private void loadDictFile(DictSegment dict, Path file, boolean critical, String name) {
try (InputStream is = new FileInputStream(file.toFile())) {
BufferedReader br = new BufferedReader(
new InputStreamReader(is, "UTF-8"), 512);
String word = br.readLine();
if (word != null) {
if (word.startsWith("\uFEFF"))
word = word.substring(1);
for (; word != null; word = br.readLine()) {
word = word.trim();
if (word.isEmpty()) continue;
dict.fillSegment(word.toCharArray());
}
}
} catch (FileNotFoundException e) {
logger.error("ik-analyzer: " + name + " not found", e);
if (critical) throw new RuntimeException("ik-analyzer: " + name + " not found!!!", e);
} catch (IOException e) {
logger.error("ik-analyzer: " + name + " loading failed", e);
}
}
public List<String> getExtDictionarys() { public List<String> getExtDictionarys() {
List<String> extDictFiles = new ArrayList<String>(2); List<String> extDictFiles = new ArrayList<String>(2);
String extDictCfg = getProperty(EXT_DICT); String extDictCfg = getProperty(EXT_DICT);
...@@ -371,37 +392,7 @@ public class Dictionary { ...@@ -371,37 +392,7 @@ public class Dictionary {
// 读取主词典文件 // 读取主词典文件
Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_MAIN); Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_MAIN);
loadDictFile(_MainDict, file, false, "Main Dict");
InputStream is = null;
try {
is = new FileInputStream(file.toFile());
} catch (FileNotFoundException e) {
logger.error(e.getMessage(), e);
}
try {
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
String theWord = null;
do {
theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) {
_MainDict.fillSegment(theWord.trim().toCharArray());
}
} while (theWord != null);
} catch (IOException e) {
logger.error("ik-analyzer", e);
} finally {
try {
if (is != null) {
is.close();
is = null;
}
} catch (IOException e) {
logger.error("ik-analyzer", e);
}
}
// 加载扩展词典 // 加载扩展词典
this.loadExtDict(); this.loadExtDict();
// 加载远程自定义词库 // 加载远程自定义词库
...@@ -415,44 +406,11 @@ public class Dictionary { ...@@ -415,44 +406,11 @@ public class Dictionary {
// 加载扩展词典配置 // 加载扩展词典配置
List<String> extDictFiles = getExtDictionarys(); List<String> extDictFiles = getExtDictionarys();
if (extDictFiles != null) { if (extDictFiles != null) {
InputStream is = null;
for (String extDictName : extDictFiles) { for (String extDictName : extDictFiles) {
// 读取扩展词典文件 // 读取扩展词典文件
logger.info("[Dict Loading] " + extDictName); logger.info("[Dict Loading] " + extDictName);
Path file = PathUtils.get(extDictName); Path file = PathUtils.get(extDictName);
try { loadDictFile(_MainDict, file, false, "Extra Dict");
is = new FileInputStream(file.toFile());
} catch (FileNotFoundException e) {
logger.error("ik-analyzer", e);
}
// 如果找不到扩展的字典,则忽略
if (is == null) {
continue;
}
try {
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
String theWord = null;
do {
theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) {
// 加载扩展词典数据到主内存词典中
_MainDict.fillSegment(theWord.trim().toCharArray());
}
} while (theWord != null);
} catch (IOException e) {
logger.error("ik-analyzer", e);
} finally {
try {
if (is != null) {
is.close();
is = null;
}
} catch (IOException e) {
logger.error("ik-analyzer", e);
}
}
} }
} }
} }
...@@ -533,80 +491,17 @@ public class Dictionary { ...@@ -533,80 +491,17 @@ public class Dictionary {
// 读取主词典文件 // 读取主词典文件
Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_STOP); Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_STOP);
loadDictFile(_StopWords, file, false, "Main Stopwords");
InputStream is = null;
try {
is = new FileInputStream(file.toFile());
} catch (FileNotFoundException e) {
logger.error(e.getMessage(), e);
}
try {
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
String theWord = null;
do {
theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) {
_StopWords.fillSegment(theWord.trim().toCharArray());
}
} while (theWord != null);
} catch (IOException e) {
logger.error("ik-analyzer", e);
} finally {
try {
if (is != null) {
is.close();
is = null;
}
} catch (IOException e) {
logger.error("ik-analyzer", e);
}
}
// 加载扩展停止词典 // 加载扩展停止词典
List<String> extStopWordDictFiles = getExtStopWordDictionarys(); List<String> extStopWordDictFiles = getExtStopWordDictionarys();
if (extStopWordDictFiles != null) { if (extStopWordDictFiles != null) {
is = null;
for (String extStopWordDictName : extStopWordDictFiles) { for (String extStopWordDictName : extStopWordDictFiles) {
logger.info("[Dict Loading] " + extStopWordDictName); logger.info("[Dict Loading] " + extStopWordDictName);
// 读取扩展词典文件 // 读取扩展词典文件
file = PathUtils.get(extStopWordDictName); file = PathUtils.get(extStopWordDictName);
try { loadDictFile(_StopWords, file, false, "Extra Stopwords");
is = new FileInputStream(file.toFile());
} catch (FileNotFoundException e) {
logger.error("ik-analyzer", e);
}
// 如果找不到扩展的字典,则忽略
if (is == null) {
continue;
}
try {
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
String theWord = null;
do {
theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) {
// 加载扩展停止词典数据到内存中
_StopWords.fillSegment(theWord.trim().toCharArray());
}
} while (theWord != null);
} catch (IOException e) {
logger.error("ik-analyzer", e);
} finally {
try {
if (is != null) {
is.close();
is = null;
}
} catch (IOException e) {
logger.error("ik-analyzer", e);
}
}
} }
} }
...@@ -639,142 +534,25 @@ public class Dictionary { ...@@ -639,142 +534,25 @@ public class Dictionary {
_QuantifierDict = new DictSegment((char) 0); _QuantifierDict = new DictSegment((char) 0);
// 读取量词词典文件 // 读取量词词典文件
Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_QUANTIFIER); Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_QUANTIFIER);
InputStream is = null; loadDictFile(_QuantifierDict, file, false, "Quantifier");
try {
is = new FileInputStream(file.toFile());
} catch (FileNotFoundException e) {
logger.error("ik-analyzer", e);
}
try {
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
String theWord = null;
do {
theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) {
_QuantifierDict.fillSegment(theWord.trim().toCharArray());
}
} while (theWord != null);
} catch (IOException ioe) {
logger.error("Quantifier Dictionary loading exception.");
} finally {
try {
if (is != null) {
is.close();
is = null;
}
} catch (IOException e) {
logger.error("ik-analyzer", e);
}
}
} }
private void loadSurnameDict() { private void loadSurnameDict() {
_SurnameDict = new DictSegment((char) 0); _SurnameDict = new DictSegment((char) 0);
Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_SURNAME); Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_SURNAME);
InputStream is = null; loadDictFile(_SurnameDict, file, true, "Surname");
try {
is = new FileInputStream(file.toFile());
} catch (FileNotFoundException e) {
logger.error("ik-analyzer", e);
}
if (is == null) {
throw new RuntimeException("Surname Dictionary not found!!!");
}
try {
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
String theWord;
do {
theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) {
_SurnameDict.fillSegment(theWord.trim().toCharArray());
}
} while (theWord != null);
} catch (IOException e) {
logger.error("ik-analyzer", e);
} finally {
try {
if (is != null) {
is.close();
is = null;
}
} catch (IOException e) {
logger.error("ik-analyzer", e);
}
}
} }
private void loadSuffixDict() { private void loadSuffixDict() {
_SuffixDict = new DictSegment((char) 0); _SuffixDict = new DictSegment((char) 0);
Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_SUFFIX); Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_SUFFIX);
InputStream is = null; loadDictFile(_SuffixDict, file, true, "Suffix");
try {
is = new FileInputStream(file.toFile());
} catch (FileNotFoundException e) {
logger.error("ik-analyzer", e);
}
if (is == null) {
throw new RuntimeException("Suffix Dictionary not found!!!");
}
try {
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
String theWord;
do {
theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) {
_SuffixDict.fillSegment(theWord.trim().toCharArray());
}
} while (theWord != null);
} catch (IOException e) {
logger.error("ik-analyzer", e);
} finally {
try {
is.close();
is = null;
} catch (IOException e) {
logger.error("ik-analyzer", e);
}
}
} }
private void loadPrepDict() { private void loadPrepDict() {
_PrepDict = new DictSegment((char) 0); _PrepDict = new DictSegment((char) 0);
Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_PREP); Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_PREP);
InputStream is = null; loadDictFile(_PrepDict, file, true, "Preposition");
try {
is = new FileInputStream(file.toFile());
} catch (FileNotFoundException e) {
logger.error("ik-analyzer", e);
}
if (is == null) {
throw new RuntimeException("Preposition Dictionary not found!!!");
}
try {
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
String theWord;
do {
theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) {
_PrepDict.fillSegment(theWord.trim().toCharArray());
}
} while (theWord != null);
} catch (IOException e) {
logger.error("ik-analyzer", e);
} finally {
try {
is.close();
is = null;
} catch (IOException e) {
logger.error("ik-analyzer", e);
}
}
} }
public void reLoadMainDict() { public void reLoadMainDict() {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册