Merge branch 'master' of github.com:medcl/elasticsearch-analysis-ik

21a859a4 · weixin_43283383 · 816b8ddd · 7028b9ea · 21a859a4
隐藏空白更改
内联并排

Showing with 30 addition and 252 deletion

src/main/java/org/wltea/analyzer/dic/Dictionary.java src/main/java/org/wltea/analyzer/dic/Dictionary.java +30 -252

未找到文件。
--- a/src/main/java/org/wltea/analyzer/dic/Dictionary.java
+++ b/src/main/java/org/wltea/analyzer/dic/Dictionary.java
@@ -26,7 +26,6 @@
 package org.wltea.analyzer.dic;

 import java.io.BufferedReader;
-import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileNotFoundException;
 import java.io.IOException;
@@ -201,6 +200,28 @@ public class Dictionary {
 		return files;
 	}

+	private void loadDictFile(DictSegment dict, Path file, boolean critical, String name) {
+		try (InputStream is = new FileInputStream(file.toFile())) {
+			BufferedReader br = new BufferedReader(
+					new InputStreamReader(is, "UTF-8"), 512);
+			String word = br.readLine();
+			if (word != null) {
+				if (word.startsWith("\uFEFF"))
+					word = word.substring(1);
+				for (; word != null; word = br.readLine()) {
+					word = word.trim();
+					if (word.isEmpty()) continue;
+					dict.fillSegment(word.toCharArray());
+				}
+			}
+		} catch (FileNotFoundException e) {
+			logger.error("ik-analyzer: " + name + " not found", e);
+			if (critical) throw new RuntimeException("ik-analyzer: " + name + " not found!!!", e);
+		} catch (IOException e) {
+			logger.error("ik-analyzer: " + name + " loading failed", e);
+		}
+	}
+
 	public List<String> getExtDictionarys() {
 		List<String> extDictFiles = new ArrayList<String>(2);
 		String extDictCfg = getProperty(EXT_DICT);
@@ -371,37 +392,7 @@ public class Dictionary {

 		// 读取主词典文件
 		Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_MAIN);
-
-		InputStream is = null;
-		try {
-			is = new FileInputStream(file.toFile());
-		} catch (FileNotFoundException e) {
-			logger.error(e.getMessage(), e);
-		}
-
-		try {
-			BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
-			String theWord = null;
-			do {
-				theWord = br.readLine();
-				if (theWord != null && !"".equals(theWord.trim())) {
-					_MainDict.fillSegment(theWord.trim().toCharArray());
-				}
-			} while (theWord != null);
-
-		} catch (IOException e) {
-			logger.error("ik-analyzer", e);
-
-		} finally {
-			try {
-				if (is != null) {
-					is.close();
-					is = null;
-				}
-			} catch (IOException e) {
-				logger.error("ik-analyzer", e);
-			}
-		}
+		loadDictFile(_MainDict, file, false, "Main Dict");
 		// 加载扩展词典
 		this.loadExtDict();
 		// 加载远程自定义词库
@@ -415,44 +406,11 @@ public class Dictionary {
 		// 加载扩展词典配置
 		List<String> extDictFiles = getExtDictionarys();
 		if (extDictFiles != null) {
-			InputStream is = null;
 			for (String extDictName : extDictFiles) {
 				// 读取扩展词典文件
 				logger.info("[Dict Loading] " + extDictName);
 				Path file = PathUtils.get(extDictName);
-				try {
-					is = new FileInputStream(file.toFile());
-				} catch (FileNotFoundException e) {
-					logger.error("ik-analyzer", e);
-				}
-
-				// 如果找不到扩展的字典，则忽略
-				if (is == null) {
-					continue;
-				}
-				try {
-					BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
-					String theWord = null;
-					do {
-						theWord = br.readLine();
-						if (theWord != null && !"".equals(theWord.trim())) {
-							// 加载扩展词典数据到主内存词典中
-							_MainDict.fillSegment(theWord.trim().toCharArray());
-						}
-					} while (theWord != null);
-
-				} catch (IOException e) {
-					logger.error("ik-analyzer", e);
-				} finally {
-					try {
-						if (is != null) {
-							is.close();
-							is = null;
-						}
-					} catch (IOException e) {
-						logger.error("ik-analyzer", e);
-					}
-				}
+				loadDictFile(_MainDict, file, false, "Extra Dict");
 			}
 		}
 	}
@@ -533,80 +491,17 @@ public class Dictionary {

 		// 读取主词典文件
 		Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_STOP);
-
-		InputStream is = null;
-		try {
-			is = new FileInputStream(file.toFile());
-		} catch (FileNotFoundException e) {
-			logger.error(e.getMessage(), e);
-		}
-
-		try {
-			BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
-			String theWord = null;
-			do {
-				theWord = br.readLine();
-				if (theWord != null && !"".equals(theWord.trim())) {
-					_StopWords.fillSegment(theWord.trim().toCharArray());
-				}
-			} while (theWord != null);
-
-		} catch (IOException e) {
-			logger.error("ik-analyzer", e);
-
-		} finally {
-			try {
-				if (is != null) {
-					is.close();
-					is = null;
-				}
-			} catch (IOException e) {
-				logger.error("ik-analyzer", e);
-			}
-		}
+		loadDictFile(_StopWords, file, false, "Main Stopwords");

 		// 加载扩展停止词典
 		List<String> extStopWordDictFiles = getExtStopWordDictionarys();
 		if (extStopWordDictFiles != null) {
-			is = null;
 			for (String extStopWordDictName : extStopWordDictFiles) {
 				logger.info("[Dict Loading] " + extStopWordDictName);

 				// 读取扩展词典文件
 				file = PathUtils.get(extStopWordDictName);
-				try {
-					is = new FileInputStream(file.toFile());
-				} catch (FileNotFoundException e) {
-					logger.error("ik-analyzer", e);
-				}
-				// 如果找不到扩展的字典，则忽略
-				if (is == null) {
-					continue;
-				}
-				try {
-					BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
-					String theWord = null;
-					do {
-						theWord = br.readLine();
-						if (theWord != null && !"".equals(theWord.trim())) {
-							// 加载扩展停止词典数据到内存中
-							_StopWords.fillSegment(theWord.trim().toCharArray());
-						}
-					} while (theWord != null);
-
-				} catch (IOException e) {
-					logger.error("ik-analyzer", e);
-
-				} finally {
-					try {
-						if (is != null) {
-							is.close();
-							is = null;
-						}
-					} catch (IOException e) {
-						logger.error("ik-analyzer", e);
-					}
-				}
+				loadDictFile(_StopWords, file, false, "Extra Stopwords");
 			}
 		}

@@ -639,142 +534,25 @@ public class Dictionary {
 		_QuantifierDict = new DictSegment((char) 0);
 		// 读取量词词典文件
 		Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_QUANTIFIER);
-		InputStream is = null;
-		try {
-			is = new FileInputStream(file.toFile());
-		} catch (FileNotFoundException e) {
-			logger.error("ik-analyzer", e);
-		}
-		try {
-			BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
-			String theWord = null;
-			do {
-				theWord = br.readLine();
-				if (theWord != null && !"".equals(theWord.trim())) {
-					_QuantifierDict.fillSegment(theWord.trim().toCharArray());
-				}
-			} while (theWord != null);
-
-		} catch (IOException ioe) {
-			logger.error("Quantifier Dictionary loading exception.");
-
-		} finally {
-			try {
-				if (is != null) {
-					is.close();
-					is = null;
-				}
-			} catch (IOException e) {
-				logger.error("ik-analyzer", e);
-			}
-		}
+		loadDictFile(_QuantifierDict, file, false, "Quantifier");
 	}

 	private void loadSurnameDict() {
-
 		_SurnameDict = new DictSegment((char) 0);
 		Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_SURNAME);
-		InputStream is = null;
-		try {
-			is = new FileInputStream(file.toFile());
-		} catch (FileNotFoundException e) {
-			logger.error("ik-analyzer", e);
-		}
-		if (is == null) {
-			throw new RuntimeException("Surname Dictionary not found!!!");
-		}
-		try {
-			BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
-			String theWord;
-			do {
-				theWord = br.readLine();
-				if (theWord != null && !"".equals(theWord.trim())) {
-					_SurnameDict.fillSegment(theWord.trim().toCharArray());
-				}
-			} while (theWord != null);
-		} catch (IOException e) {
-			logger.error("ik-analyzer", e);
-		} finally {
-			try {
-				if (is != null) {
-					is.close();
-					is = null;
-				}
-			} catch (IOException e) {
-				logger.error("ik-analyzer", e);
-			}
-		}
+		loadDictFile(_SurnameDict, file, true, "Surname");
 	}

 	private void loadSuffixDict() {
-
 		_SuffixDict = new DictSegment((char) 0);
 		Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_SUFFIX);
-		InputStream is = null;
-		try {
-			is = new FileInputStream(file.toFile());
-		} catch (FileNotFoundException e) {
-			logger.error("ik-analyzer", e);
-		}
-		if (is == null) {
-			throw new RuntimeException("Suffix Dictionary not found!!!");
-		}
-		try {
-
-			BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
-			String theWord;
-			do {
-				theWord = br.readLine();
-				if (theWord != null && !"".equals(theWord.trim())) {
-					_SuffixDict.fillSegment(theWord.trim().toCharArray());
-				}
-			} while (theWord != null);
-		} catch (IOException e) {
-			logger.error("ik-analyzer", e);
-		} finally {
-			try {
-				is.close();
-				is = null;
-			} catch (IOException e) {
-				logger.error("ik-analyzer", e);
-			}
-		}
+		loadDictFile(_SuffixDict, file, true, "Suffix");
 	}

 	private void loadPrepDict() {
-
 		_PrepDict = new DictSegment((char) 0);
 		Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_PREP);
-		InputStream is = null;
-		try {
-			is = new FileInputStream(file.toFile());
-		} catch (FileNotFoundException e) {
-			logger.error("ik-analyzer", e);
-		}
-		if (is == null) {
-			throw new RuntimeException("Preposition Dictionary not found!!!");
-		}
-		try {
-
-			BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
-			String theWord;
-			do {
-				theWord = br.readLine();
-				if (theWord != null && !"".equals(theWord.trim())) {
-
-					_PrepDict.fillSegment(theWord.trim().toCharArray());
-				}
-			} while (theWord != null);
-		} catch (IOException e) {
-			logger.error("ik-analyzer", e);
-		} finally {
-			try {
-				is.close();
-				is = null;
-			} catch (IOException e) {
-				logger.error("ik-analyzer", e);
-			}
-		}
+		loadDictFile(_PrepDict, file, true, "Preposition");
 	}

 	public void reLoadMainDict() {