Dictionary.java 17.7 KB
Newer Older
weixin_43283383's avatar
weixin_43283383 已提交
1 2 3
/**
 * IK 中文分词  版本 5.0
 * IK Analyzer release 5.0
4
 *
weixin_43283383's avatar
weixin_43283383 已提交
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * 源代码由林良益(linliangyi2005@gmail.com)提供
 * 版权声明 2012,乌龙茶工作室
 * provided by Linliangyi and copyright 2012 by Oolong studio
23 24
 *
 *
weixin_43283383's avatar
weixin_43283383 已提交
25 26 27
 */
package org.wltea.analyzer.dic;

A
arron 已提交
28 29 30 31 32 33
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
34 35 36
import java.nio.file.attribute.BasicFileAttributes;
import java.nio.file.Files;
import java.nio.file.FileVisitResult;
37
import java.nio.file.Path;
38
import java.nio.file.SimpleFileVisitor;
R
Rueian 已提交
39 40
import java.security.AccessController;
import java.security.PrivilegedAction;
41
import java.util.*;
A
arron 已提交
42 43 44 45
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;

weixin_43283383's avatar
fix NPE  
weixin_43283383 已提交
46 47
import org.apache.http.Header;
import org.apache.http.HttpEntity;
goBD's avatar
goBD 已提交
48 49 50 51 52 53
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
R
Rueian 已提交
54
import org.elasticsearch.SpecialPermission;
55
import org.elasticsearch.common.io.PathUtils;
56
import org.elasticsearch.plugin.analysis.ik.AnalysisIkPlugin;
weixin_43283383's avatar
weixin_43283383 已提交
57
import org.wltea.analyzer.cfg.Configuration;
58
import org.apache.logging.log4j.Logger;
59
import org.wltea.analyzer.help.ESPluginLoggerFactory;
60

weixin_43283383's avatar
weixin_43283383 已提交
61 62 63 64 65 66 67 68 69 70 71

/**
 * 词典管理类,单子模式
 */
public class Dictionary {

	/*
	 * 词典单子实例
	 */
	private static Dictionary singleton;

72
	private DictSegment _MainDict;
weixin_43283383's avatar
weixin_43283383 已提交
73

74
	private DictSegment _QuantifierDict;
weixin_43283383's avatar
weixin_43283383 已提交
75

76
	private DictSegment _StopWords;
weixin_43283383's avatar
weixin_43283383 已提交
77 78 79 80 81

	/**
	 * 配置对象
	 */
	private Configuration configuration;
82

83
	private static final Logger logger = ESPluginLoggerFactory.getLogger(Monitor.class.getName());
84 85 86

	private static ScheduledExecutorService pool = Executors.newScheduledThreadPool(1);

weixin_43283383's avatar
fix NPE  
weixin_43283383 已提交
87 88 89 90 91 92
	private static final String PATH_DIC_MAIN = "main.dic";
	private static final String PATH_DIC_SURNAME = "surname.dic";
	private static final String PATH_DIC_QUANTIFIER = "quantifier.dic";
	private static final String PATH_DIC_SUFFIX = "suffix.dic";
	private static final String PATH_DIC_PREP = "preposition.dic";
	private static final String PATH_DIC_STOP = "stopword.dic";
93

94 95 96 97 98
	private final static  String FILE_NAME = "IKAnalyzer.cfg.xml";
	private final static  String EXT_DICT = "ext_dict";
	private final static  String REMOTE_EXT_DICT = "remote_ext_dict";
	private final static  String EXT_STOP = "ext_stopwords";
	private final static  String REMOTE_EXT_STOP = "remote_ext_stopwords";
99

100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
	private Path conf_dir;
	private Properties props;

	private Dictionary(Configuration cfg) {
		this.configuration = cfg;
		this.props = new Properties();
		this.conf_dir = cfg.getEnvironment().configFile().resolve(AnalysisIkPlugin.PLUGIN_NAME);
		Path configFile = conf_dir.resolve(FILE_NAME);

		InputStream input = null;
		try {
			logger.info("try load config from {}", configFile);
			input = new FileInputStream(configFile.toFile());
		} catch (FileNotFoundException e) {
			conf_dir = cfg.getConfigInPluginDir();
			configFile = conf_dir.resolve(FILE_NAME);
			try {
				logger.info("try load config from {}", configFile);
				input = new FileInputStream(configFile.toFile());
			} catch (FileNotFoundException ex) {
				// We should report origin exception
				logger.error("ik-analyzer", e);
			}
		}
		if (input != null) {
			try {
				props.loadFromXML(input);
			} catch (IOException e) {
				logger.error("ik-analyzer", e);
			}
		}
131
	}
weixin_43283383's avatar
weixin_43283383 已提交
132

weixin_43283383's avatar
fix NPE  
weixin_43283383 已提交
133
	private String getProperty(String key){
134 135 136 137 138
		if(props!=null){
			return props.getProperty(key);
		}
		return null;
	}
weixin_43283383's avatar
weixin_43283383 已提交
139
	/**
140 141 142
	 * 词典初始化 由于IK Analyzer的词典采用Dictionary类的静态方法进行词典初始化
	 * 只有当Dictionary类被实际调用时,才会开始载入词典, 这将延长首次分词操作的时间 该方法提供了一个在应用加载阶段就初始化字典的手段
	 * 
weixin_43283383's avatar
weixin_43283383 已提交
143 144
	 * @return Dictionary
	 */
weixin_43283383's avatar
fix NPE  
weixin_43283383 已提交
145
	public static synchronized void initial(Configuration cfg) {
146 147 148
		if (singleton == null) {
			synchronized (Dictionary.class) {
				if (singleton == null) {
149 150

					singleton = new Dictionary(cfg);
151 152 153 154 155 156 157
					singleton.loadMainDict();
					singleton.loadSurnameDict();
					singleton.loadQuantifierDict();
					singleton.loadSuffixDict();
					singleton.loadPrepDict();
					singleton.loadStopWordDict();

158 159 160 161 162 163 164 165 166
					if(cfg.isEnableRemoteDict()){
						// 建立监控线程
						for (String location : singleton.getRemoteExtDictionarys()) {
							// 10 秒是初始延迟可以修改的 60是间隔时间 单位秒
							pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS);
						}
						for (String location : singleton.getRemoteExtStopWordDictionarys()) {
							pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS);
						}
167 168
					}

weixin_43283383's avatar
weixin_43283383 已提交
169 170 171 172
				}
			}
		}
	}
173

weixin_43283383's avatar
fix NPE  
weixin_43283383 已提交
174
	private void walkFileTree(List<String> files, Path path) {
175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
		if (Files.isRegularFile(path)) {
			files.add(path.toString());
		} else if (Files.isDirectory(path)) try {
			Files.walkFileTree(path, new SimpleFileVisitor<Path>() {
				@Override
				public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) {
					files.add(file.toString());
					return FileVisitResult.CONTINUE;
				}
				@Override
				public FileVisitResult visitFileFailed(Path file, IOException e) {
					logger.error("[Ext Loading] listing files", e);
					return FileVisitResult.CONTINUE;
				}
			});
		} catch (IOException e) {
			logger.error("[Ext Loading] listing files", e);
		} else {
			logger.warn("[Ext Loading] file not found: " + path);
		}
	}

F
Figroc Chen 已提交
197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
	private void loadDictFile(DictSegment dict, Path file, boolean critical, String name) {
		try (InputStream is = new FileInputStream(file.toFile())) {
			BufferedReader br = new BufferedReader(
					new InputStreamReader(is, "UTF-8"), 512);
			String word = br.readLine();
			if (word != null) {
				if (word.startsWith("\uFEFF"))
					word = word.substring(1);
				for (; word != null; word = br.readLine()) {
					word = word.trim();
					if (word.isEmpty()) continue;
					dict.fillSegment(word.toCharArray());
				}
			}
		} catch (FileNotFoundException e) {
			logger.error("ik-analyzer: " + name + " not found", e);
			if (critical) throw new RuntimeException("ik-analyzer: " + name + " not found!!!", e);
		} catch (IOException e) {
			logger.error("ik-analyzer: " + name + " loading failed", e);
		}
	}

weixin_43283383's avatar
fix NPE  
weixin_43283383 已提交
219
	private List<String> getExtDictionarys() {
220 221 222 223 224 225 226
		List<String> extDictFiles = new ArrayList<String>(2);
		String extDictCfg = getProperty(EXT_DICT);
		if (extDictCfg != null) {

			String[] filePaths = extDictCfg.split(";");
			for (String filePath : filePaths) {
				if (filePath != null && !"".equals(filePath.trim())) {
227 228
					Path file = PathUtils.get(getDictRoot(), filePath.trim());
					walkFileTree(extDictFiles, file);
229 230 231 232 233 234 235

				}
			}
		}
		return extDictFiles;
	}

weixin_43283383's avatar
fix NPE  
weixin_43283383 已提交
236
	private List<String> getRemoteExtDictionarys() {
237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
		List<String> remoteExtDictFiles = new ArrayList<String>(2);
		String remoteExtDictCfg = getProperty(REMOTE_EXT_DICT);
		if (remoteExtDictCfg != null) {

			String[] filePaths = remoteExtDictCfg.split(";");
			for (String filePath : filePaths) {
				if (filePath != null && !"".equals(filePath.trim())) {
					remoteExtDictFiles.add(filePath);

				}
			}
		}
		return remoteExtDictFiles;
	}

weixin_43283383's avatar
fix NPE  
weixin_43283383 已提交
252
	private List<String> getExtStopWordDictionarys() {
253 254 255 256 257 258 259
		List<String> extStopWordDictFiles = new ArrayList<String>(2);
		String extStopWordDictCfg = getProperty(EXT_STOP);
		if (extStopWordDictCfg != null) {

			String[] filePaths = extStopWordDictCfg.split(";");
			for (String filePath : filePaths) {
				if (filePath != null && !"".equals(filePath.trim())) {
260 261
					Path file = PathUtils.get(getDictRoot(), filePath.trim());
					walkFileTree(extStopWordDictFiles, file);
262 263 264 265 266 267 268

				}
			}
		}
		return extStopWordDictFiles;
	}

weixin_43283383's avatar
fix NPE  
weixin_43283383 已提交
269
	private List<String> getRemoteExtStopWordDictionarys() {
270 271 272 273 274 275 276 277 278 279 280 281 282 283 284
		List<String> remoteExtStopWordDictFiles = new ArrayList<String>(2);
		String remoteExtStopWordDictCfg = getProperty(REMOTE_EXT_STOP);
		if (remoteExtStopWordDictCfg != null) {

			String[] filePaths = remoteExtStopWordDictCfg.split(";");
			for (String filePath : filePaths) {
				if (filePath != null && !"".equals(filePath.trim())) {
					remoteExtStopWordDictFiles.add(filePath);

				}
			}
		}
		return remoteExtStopWordDictFiles;
	}

weixin_43283383's avatar
fix NPE  
weixin_43283383 已提交
285
	private String getDictRoot() {
286 287 288 289
		return conf_dir.toAbsolutePath().toString();
	}


weixin_43283383's avatar
weixin_43283383 已提交
290 291
	/**
	 * 获取词典单子实例
292
	 * 
weixin_43283383's avatar
weixin_43283383 已提交
293 294
	 * @return Dictionary 单例对象
	 */
295 296
	public static Dictionary getSingleton() {
		if (singleton == null) {
weixin_43283383's avatar
weixin_43283383 已提交
297 298 299 300
			throw new IllegalStateException("词典尚未初始化,请先调用initial方法");
		}
		return singleton;
	}
301

302

weixin_43283383's avatar
weixin_43283383 已提交
303 304
	/**
	 * 批量加载新词条
305 306 307
	 * 
	 * @param words
	 *            Collection<String>词条列表
weixin_43283383's avatar
weixin_43283383 已提交
308
	 */
309 310 311
	public void addWords(Collection<String> words) {
		if (words != null) {
			for (String word : words) {
weixin_43283383's avatar
weixin_43283383 已提交
312
				if (word != null) {
313
					// 批量加载词条到主内存词典中
weixin_43283383's avatar
weixin_43283383 已提交
314
					singleton._MainDict.fillSegment(word.trim().toCharArray());
weixin_43283383's avatar
weixin_43283383 已提交
315 316 317 318
				}
			}
		}
	}
319

weixin_43283383's avatar
weixin_43283383 已提交
320 321 322
	/**
	 * 批量移除(屏蔽)词条
	 */
323 324 325
	public void disableWords(Collection<String> words) {
		if (words != null) {
			for (String word : words) {
weixin_43283383's avatar
weixin_43283383 已提交
326
				if (word != null) {
327
					// 批量屏蔽词条
weixin_43283383's avatar
weixin_43283383 已提交
328
					singleton._MainDict.disableSegment(word.trim().toCharArray());
weixin_43283383's avatar
weixin_43283383 已提交
329 330 331 332
				}
			}
		}
	}
333

weixin_43283383's avatar
weixin_43283383 已提交
334 335
	/**
	 * 检索匹配主词典
336
	 * 
weixin_43283383's avatar
weixin_43283383 已提交
337 338
	 * @return Hit 匹配结果描述
	 */
339
	public Hit matchInMainDict(char[] charArray) {
weixin_43283383's avatar
weixin_43283383 已提交
340 341
		return singleton._MainDict.match(charArray);
	}
342

weixin_43283383's avatar
weixin_43283383 已提交
343 344
	/**
	 * 检索匹配主词典
345
	 * 
weixin_43283383's avatar
weixin_43283383 已提交
346 347
	 * @return Hit 匹配结果描述
	 */
348
	public Hit matchInMainDict(char[] charArray, int begin, int length) {
349
		return singleton._MainDict.match(charArray, begin, length);
weixin_43283383's avatar
weixin_43283383 已提交
350
	}
351

weixin_43283383's avatar
weixin_43283383 已提交
352 353
	/**
	 * 检索匹配量词词典
354
	 * 
weixin_43283383's avatar
weixin_43283383 已提交
355 356
	 * @return Hit 匹配结果描述
	 */
357
	public Hit matchInQuantifierDict(char[] charArray, int begin, int length) {
weixin_43283383's avatar
weixin_43283383 已提交
358
		return singleton._QuantifierDict.match(charArray, begin, length);
weixin_43283383's avatar
weixin_43283383 已提交
359
	}
360

weixin_43283383's avatar
weixin_43283383 已提交
361 362
	/**
	 * 从已匹配的Hit中直接取出DictSegment,继续向下匹配
363
	 * 
weixin_43283383's avatar
weixin_43283383 已提交
364 365
	 * @return Hit
	 */
366
	public Hit matchWithHit(char[] charArray, int currentIndex, Hit matchedHit) {
weixin_43283383's avatar
weixin_43283383 已提交
367
		DictSegment ds = matchedHit.getMatchedDictSegment();
368
		return ds.match(charArray, currentIndex, 1, matchedHit);
weixin_43283383's avatar
weixin_43283383 已提交
369
	}
370

weixin_43283383's avatar
weixin_43283383 已提交
371 372
	/**
	 * 判断是否是停止词
373
	 * 
weixin_43283383's avatar
weixin_43283383 已提交
374 375
	 * @return boolean
	 */
376
	public boolean isStopWord(char[] charArray, int begin, int length) {
weixin_43283383's avatar
weixin_43283383 已提交
377
		return singleton._StopWords.match(charArray, begin, length).isMatch();
378 379
	}

weixin_43283383's avatar
weixin_43283383 已提交
380 381 382
	/**
	 * 加载主词典及扩展词典
	 */
383 384 385
	private void loadMainDict() {
		// 建立一个主词典实例
		_MainDict = new DictSegment((char) 0);
weixin_43283383's avatar
weixin_43283383 已提交
386

387
		// 读取主词典文件
388
		Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_MAIN);
F
Figroc Chen 已提交
389
		loadDictFile(_MainDict, file, false, "Main Dict");
390
		// 加载扩展词典
weixin_43283383's avatar
weixin_43283383 已提交
391
		this.loadExtDict();
392
		// 加载远程自定义词库
goBD's avatar
goBD 已提交
393
		this.loadRemoteExtDict();
394 395
	}

weixin_43283383's avatar
weixin_43283383 已提交
396 397 398
	/**
	 * 加载用户配置的扩展词典到主词库表
	 */
399 400
	private void loadExtDict() {
		// 加载扩展词典配置
401
		List<String> extDictFiles = getExtDictionarys();
402 403 404
		if (extDictFiles != null) {
			for (String extDictName : extDictFiles) {
				// 读取扩展词典文件
405
				logger.info("[Dict Loading] " + extDictName);
406
				Path file = PathUtils.get(extDictName);
F
Figroc Chen 已提交
407
				loadDictFile(_MainDict, file, false, "Extra Dict");
weixin_43283383's avatar
weixin_43283383 已提交
408
			}
409
		}
weixin_43283383's avatar
weixin_43283383 已提交
410
	}
411

goBD's avatar
goBD 已提交
412 413 414
	/**
	 * 加载远程扩展词典到主词库表
	 */
415
	private void loadRemoteExtDict() {
416
		List<String> remoteExtDictFiles = getRemoteExtDictionarys();
417
		for (String location : remoteExtDictFiles) {
weixin_43283383's avatar
weixin_43283383 已提交
418
			logger.info("[Dict Loading] " + location);
goBD's avatar
goBD 已提交
419
			List<String> lists = getRemoteWords(location);
420 421 422
			// 如果找不到扩展的字典,则忽略
			if (lists == null) {
				logger.error("[Dict Loading] " + location + "加载失败");
goBD's avatar
goBD 已提交
423 424
				continue;
			}
425
			for (String theWord : lists) {
goBD's avatar
goBD 已提交
426
				if (theWord != null && !"".equals(theWord.trim())) {
427
					// 加载扩展词典数据到主内存词典中
goBD's avatar
goBD 已提交
428 429 430 431 432
					logger.info(theWord);
					_MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
				}
			}
		}
433

goBD's avatar
goBD 已提交
434
	}
435

R
Rueian 已提交
436 437 438 439 440 441 442
	private static List<String> getRemoteWords(String location) {
		SpecialPermission.check();
		return AccessController.doPrivileged((PrivilegedAction<List<String>>) () -> {
			return getRemoteWordsUnprivileged(location);
		});
	}

goBD's avatar
goBD 已提交
443 444 445
	/**
	 * 从远程服务器上下载自定义词条
	 */
R
Rueian 已提交
446
	private static List<String> getRemoteWordsUnprivileged(String location) {
447

goBD's avatar
goBD 已提交
448
		List<String> buffer = new ArrayList<String>();
449 450
		RequestConfig rc = RequestConfig.custom().setConnectionRequestTimeout(10 * 1000).setConnectTimeout(10 * 1000)
				.setSocketTimeout(60 * 1000).build();
goBD's avatar
goBD 已提交
451 452 453 454 455 456 457
		CloseableHttpClient httpclient = HttpClients.createDefault();
		CloseableHttpResponse response;
		BufferedReader in;
		HttpGet get = new HttpGet(location);
		get.setConfig(rc);
		try {
			response = httpclient.execute(get);
458
			if (response.getStatusLine().getStatusCode() == 200) {
459

goBD's avatar
goBD 已提交
460
				String charset = "UTF-8";
461
				// 获取编码,默认为utf-8
weixin_43283383's avatar
fix NPE  
weixin_43283383 已提交
462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482
				HttpEntity entity = response.getEntity();
				if(entity!=null){
					Header contentType = entity.getContentType();
					if(contentType!=null&&contentType.getValue()!=null){
						String typeValue = contentType.getValue();
						if(typeValue!=null&&typeValue.contains("charset=")){
							charset = typeValue.substring(typeValue.lastIndexOf("=") + 1);
						}
					}

					if (entity.getContentLength() > 0) {
						in = new BufferedReader(new InputStreamReader(entity.getContent(), charset));
						String line;
						while ((line = in.readLine()) != null) {
							buffer.add(line);
						}
						in.close();
						response.close();
						return buffer;
					}
			}
goBD's avatar
goBD 已提交
483 484
			}
			response.close();
weixin_43283383's avatar
fix NPE  
weixin_43283383 已提交
485
		} catch (IllegalStateException | IOException e) {
486
			logger.error("getRemoteWords {} error", e, location);
goBD's avatar
goBD 已提交
487 488 489
		}
		return buffer;
	}
490

weixin_43283383's avatar
weixin_43283383 已提交
491 492 493
	/**
	 * 加载用户扩展的停止词词典
	 */
494 495 496
	private void loadStopWordDict() {
		// 建立主词典实例
		_StopWords = new DictSegment((char) 0);
weixin_43283383's avatar
weixin_43283383 已提交
497

498
		// 读取主词典文件
499
		Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_STOP);
F
Figroc Chen 已提交
500
		loadDictFile(_StopWords, file, false, "Main Stopwords");
weixin_43283383's avatar
weixin_43283383 已提交
501

502
		// 加载扩展停止词典
503
		List<String> extStopWordDictFiles = getExtStopWordDictionarys();
504 505
		if (extStopWordDictFiles != null) {
			for (String extStopWordDictName : extStopWordDictFiles) {
506 507
				logger.info("[Dict Loading] " + extStopWordDictName);

508
				// 读取扩展词典文件
509
				file = PathUtils.get(extStopWordDictName);
F
Figroc Chen 已提交
510
				loadDictFile(_StopWords, file, false, "Extra Stopwords");
weixin_43283383's avatar
weixin_43283383 已提交
511
			}
goBD's avatar
goBD 已提交
512
		}
513

514
		// 加载远程停用词典
515
		List<String> remoteExtStopWordDictFiles = getRemoteExtStopWordDictionarys();
516
		for (String location : remoteExtStopWordDictFiles) {
weixin_43283383's avatar
weixin_43283383 已提交
517
			logger.info("[Dict Loading] " + location);
goBD's avatar
goBD 已提交
518
			List<String> lists = getRemoteWords(location);
519 520 521
			// 如果找不到扩展的字典,则忽略
			if (lists == null) {
				logger.error("[Dict Loading] " + location + "加载失败");
goBD's avatar
goBD 已提交
522 523
				continue;
			}
524
			for (String theWord : lists) {
goBD's avatar
goBD 已提交
525
				if (theWord != null && !"".equals(theWord.trim())) {
526
					// 加载远程词典数据到主内存中
goBD's avatar
goBD 已提交
527 528 529 530 531
					logger.info(theWord);
					_StopWords.fillSegment(theWord.trim().toLowerCase().toCharArray());
				}
			}
		}
532

weixin_43283383's avatar
weixin_43283383 已提交
533
	}
534

weixin_43283383's avatar
weixin_43283383 已提交
535 536 537
	/**
	 * 加载量词词典
	 */
538 539 540 541
	private void loadQuantifierDict() {
		// 建立一个量词典实例
		_QuantifierDict = new DictSegment((char) 0);
		// 读取量词词典文件
542
		Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_QUANTIFIER);
F
Figroc Chen 已提交
543
		loadDictFile(_QuantifierDict, file, false, "Quantifier");
weixin_43283383's avatar
weixin_43283383 已提交
544 545
	}

546
	private void loadSurnameDict() {
weixin_43283383's avatar
fix NPE  
weixin_43283383 已提交
547
		DictSegment _SurnameDict = new DictSegment((char) 0);
548
		Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_SURNAME);
F
Figroc Chen 已提交
549
		loadDictFile(_SurnameDict, file, true, "Surname");
550 551
	}

552
	private void loadSuffixDict() {
weixin_43283383's avatar
fix NPE  
weixin_43283383 已提交
553
		DictSegment _SuffixDict = new DictSegment((char) 0);
554
		Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_SUFFIX);
F
Figroc Chen 已提交
555
		loadDictFile(_SuffixDict, file, true, "Suffix");
556 557
	}

558
	private void loadPrepDict() {
weixin_43283383's avatar
fix NPE  
weixin_43283383 已提交
559
		DictSegment _PrepDict = new DictSegment((char) 0);
560
		Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_PREP);
F
Figroc Chen 已提交
561
		loadDictFile(_PrepDict, file, true, "Preposition");
562 563
	}

weixin_43283383's avatar
fix NPE  
weixin_43283383 已提交
564
	void reLoadMainDict() {
565
		logger.info("重新加载词典...");
R
rockybean 已提交
566
		// 新开一个实例加载词典,减少加载过程对当前词典使用的影响
567
		Dictionary tmpDict = new Dictionary(configuration);
568
		tmpDict.configuration = getSingleton().configuration;
R
rockybean 已提交
569 570 571 572 573
		tmpDict.loadMainDict();
		tmpDict.loadStopWordDict();
		_MainDict = tmpDict._MainDict;
		_StopWords = tmpDict._StopWords;
		logger.info("重新加载词典完毕...");
574 575
	}

576
}