fix hit.java with the correct encoding

5f5dcf06 · weixin_43283383 · 29b64548 · 5f5dcf06 · 29b64548 · 29b64548
15 changed file
--- a/src/main/java/org/wltea/analyzer/dic/Hit.java
+++ b/src/main/java/org/wltea/analyzer/dic/Hit.java
 /**
 * 
- * IK 中文分词  版本 5.0
+ * IK 中文分词  版本 5.0
 * IK Analyzer release 5.0
 * 
 * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -18,42 +18,42 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
- * 源代码由林良益(linliangyi2005@gmail.com)提供
- * 版权声明 2012，乌龙茶工作室
+ * 源代码由林良益(linliangyi2005@gmail.com)提供
+ * 版权声明 2012，乌龙茶工作室
 * provided by Linliangyi and copyright 2012 by Oolong studio
 * 
 */
 package org.wltea.analyzer.dic;

 /**
- * 表示一次词典匹配的命中
+ * 表示一次词典匹配的命中
 */
 public class Hit {
-	//Hit不匹配
+	//Hit不匹配
 	private static final int UNMATCH = 0x00000000;
-	//Hit完全匹配
+	//Hit完全匹配
 	private static final int MATCH = 0x00000001;
-	//Hit前缀匹配
+	//Hit前缀匹配
 	private static final int PREFIX = 0x00000010;
 	
 	
-	//该HIT当前状态，默认未匹配
+	//该HIT当前状态，默认未匹配
 	private int hitState = UNMATCH;
 	
-	//记录词典匹配过程中，当前匹配到的词典分支节点
+	//记录词典匹配过程中，当前匹配到的词典分支节点
 	private DictSegment matchedDictSegment; 
 	/*
-	 * 词段开始位置
+	 * 词段开始位置
 	 */
 	private int begin;
 	/*
-	 * 词段的结束位置
+	 * 词段的结束位置
 	 */
 	private int end;
 	
 	
 	/**
-	 * 判断是否完全匹配
+	 * 判断是否完全匹配
 	 */
 	public boolean isMatch() {
 		return (this.hitState & MATCH) > 0;
@@ -64,7 +64,7 @@ public class Hit {
 	}

 	/**
-	 * 判断是否是词的前缀
+	 * 判断是否是词的前缀
 	 */
 	public boolean isPrefix() {
 		return (this.hitState & PREFIX) > 0;
@@ -74,7 +74,7 @@ public class Hit {
 		this.hitState = this.hitState | PREFIX;
 	}
 	/**
-	 * 判断是否是不匹配
+	 * 判断是否是不匹配
 	 */
 	public boolean isUnmatch() {
 		return this.hitState == UNMATCH ;

--- a/src/test/java/CharacterTest.java
+++ b/src/test/java/CharacterTest.java
-/*
- * Licensed to Elastic Search and Shay Banon under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. Elastic Search licenses this
- * file to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/**
- * 
- */
-
-import org.wltea.analyzer.help.CharacterHelper;
-
-/**
- * @author Administrator
- *
- */
-public class CharacterTest {
-
-	public void testSBC2DBCChar(){
-		char a = '‘';
-
-
-		System.out.println((int)a);
-		System.out.println(CharacterHelper.regularize(a));
-		System.out.println((int)CharacterHelper.regularize(a));
-		
-		String sss  = "智灵通乳酸钙冲剂(5g\14袋)-1244466518522.txt";
-		System.out.println(sss.replaceAll("[\\\\]", "每"));
-	}
-}
--- a/src/test/java/DictionaryTester.java
+++ b/src/test/java/DictionaryTester.java
-///**
-// *
-// */
-//
-//import org.wltea.analyzer.dic.DictSegment;
-//import org.wltea.analyzer.dic.Dictionary;
-//import org.wltea.analyzer.dic.Hit;
-//
-//import java.io.BufferedReader;
-//import java.io.FileOutputStream;
-//import java.io.IOException;
-//import java.io.InputStream;
-//import java.io.InputStreamReader;
-//import java.util.ArrayList;
-//import java.util.Date;
-//import java.util.HashMap;
-//import java.util.List;
-//import java.util.Map;
-//import java.util.Set;
-//import java.util.TreeSet;
-//
-///**
-// * 主词典统计分析工具类
-// * @author 林良益
-// *
-// */
-//public class DictionaryTester {
-//
-//	public void testMainDicEncoding(){
-//		int count = 0;
-//        InputStream is = DictionaryTester.class.getResourceAsStream(Dictionary.PATH_DIC_MAIN);
-//		try {
-//
-//			String theWord = null;
-//			BufferedReader br = new BufferedReader(new InputStreamReader(is,"UTF-8"), 512);
-//			do {
-//				theWord = br.readLine();
-//				if (theWord != null) {
-//					theWord = theWord.trim();
-//                    /*Test Logging*/
-//                    System.out.println(theWord);
-//				}
-//				count++;
-//			} while (theWord != null && count < 20);
-//
-//		} catch (IOException ioe) {
-//			System.err.println("主词典库载入异常.");
-//			ioe.printStackTrace();
-//		}finally{
-//			try {
-//				if(is != null){
-//                    is.close();
-//                    is = null;
-//				}
-//			} catch (IOException e) {
-//				e.printStackTrace();
-//			}
-//		}
-//	}
-//
-//	public void testMainDictMemoryConsume(){
-//        InputStream is = DictionaryTester.class.getResourceAsStream(Dictionary.PATH_DIC_MAIN);
-//        System.out.println(new Date() + " before load dictionary");
-//        DictSegment _root_ = new DictSegment((char)0);
-//        try {
-//			Thread.sleep(20000);
-//		} catch (InterruptedException e1) {
-//
-//			e1.printStackTrace();
-//		}
-//        System.out.println(new Date() + " loading dictionary");
-//		try {
-//			String theWord = null;
-//			BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
-//			do {
-//				theWord = br.readLine();
-//				if (theWord != null) {
-//					_root_.fillSegment(theWord.toCharArray());
-//				}
-//			} while (theWord != null);
-//			System.out.println(new Date() + " after load dictionary");
-//
-//
-//		} catch (IOException ioe) {
-//			System.err.println("主词典库载入异常.");
-//			ioe.printStackTrace();
-//		}finally{
-//			try {
-//				if(is != null){
-//                    is.close();
-//                    is = null;
-//				}
-//			} catch (IOException e) {
-//				e.printStackTrace();
-//			}
-//		}
-//
-//        try {
-//			Thread.sleep(20000);
-//		} catch (InterruptedException e1) {
-//
-//			e1.printStackTrace();
-//		}
-//	}
-//
-//	public void testCountWordHeader(){
-//		FileOutputStream fos = null;
-//		Map<String , Integer> wordMap = new HashMap<String ,Integer>();
-//        InputStream is = DictionaryTester.class.getResourceAsStream(Dictionary.PATH_DIC_MAIN);
-//
-//		try {
-//			fos = new FileOutputStream("D:/testCountWordHeader.txt");
-//			String theWord = null;
-//			BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
-//			do {
-//				theWord = br.readLine();
-//				if (theWord != null) {
-//					theWord = theWord.trim();
-//					String key = theWord.substring(0,1);
-//					Integer c = wordMap.get(key);
-//					if(c == null){
-//						wordMap.put(key, new Integer(1));
-//					}else{
-//						wordMap.put(key, ++c);
-//					}
-//				}
-//			} while (theWord != null);
-//
-//			int countOnlyOne = 0;
-//			int countMorethan64 = 0;
-//			Set<String> it = wordMap.keySet();
-//			for(String key : it){
-//				Integer c = wordMap.get(key);
-//				if(c == 1){
-//					countOnlyOne ++;
-//				}
-//				if(c > 64){
-//					countMorethan64 ++;
-//				}
-//
-//				fos.write((key + " : " + c + "\r\n").getBytes());
-//			}
-//			fos.write(("Total : " + wordMap.size() + "\r\n").getBytes());
-//			fos.write(("OnlyOneCount : " + countOnlyOne + "\r\n").getBytes());
-//			fos.write(("MoreThen64Count : " + countMorethan64 + "\r\n").getBytes());
-//			fos.flush();
-//
-//		} catch (IOException ioe) {
-//			System.err.println("主词典库载入异常.");
-//			ioe.printStackTrace();
-//		}finally{
-//			try {
-//				if(is != null){
-//                    is.close();
-//                    is = null;
-//				}
-//			} catch (IOException e) {
-//				e.printStackTrace();
-//			}
-//			try {
-//				if(fos != null){
-//					fos.close();
-//					fos = null;
-//				}
-//			} catch (IOException e) {
-//				e.printStackTrace();
-//			}
-//		}
-//	}
-//
-//	public void testSurNameDicEncoding(){
-//		int count = 0;
-//        InputStream is = DictionaryTester.class.getResourceAsStream(Dictionary.PATH_DIC_SURNAME);
-//		try {
-//
-//			String theWord = null;
-//			BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
-//			do {
-//				theWord = br.readLine();
-//				if (theWord != null) {
-//					theWord = theWord.trim();
-//                    /*Test Logging*/
-//                    System.out.println(theWord);
-//				}
-//				count++;
-//			} while (theWord != null && count < 20);
-//
-//		} catch (IOException ioe) {
-//			System.err.println("姓氏典库载入异常.");
-//			ioe.printStackTrace();
-//		}finally{
-//			try {
-//				if(is != null){
-//                    is.close();
-//                    is = null;
-//				}
-//			} catch (IOException e) {
-//				e.printStackTrace();
-//			}
-//		}
-//	}
-//
-//	public void testSuffixDicEncoding(){
-//		int count = 0;
-//        InputStream is = DictionaryTester.class.getResourceAsStream(Dictionary.PATH_DIC_SUFFIX);
-//		try {
-//
-//			String theWord = null;
-//			BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
-//			do {
-//				theWord = br.readLine();
-//				if (theWord != null) {
-//					theWord = theWord.trim();
-//                    /*Test Logging*/
-//                    System.out.println(theWord);
-//				}
-//				count++;
-//			} while (theWord != null && count < 20);
-//
-//		} catch (IOException ioe) {
-//			System.err.println("后缀典库载入异常.");
-//			ioe.printStackTrace();
-//		}finally{
-//			try {
-//				if(is != null){
-//                    is.close();
-//                    is = null;
-//				}
-//			} catch (IOException e) {
-//				e.printStackTrace();
-//			}
-//		}
-//	}
-//
-//	public void testStopDicEncoding(){
-//		int count = 0;
-//
-//        InputStream is = DictionaryTester.class.getResourceAsStream("/mydict.dic");
-//		try {
-//
-//			String theWord = null;
-//			BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
-//			do {
-//				theWord = br.readLine();
-//				if (theWord != null) {
-//					theWord = theWord.trim();
-//                    /*Test Logging*/
-//                    System.out.println(theWord);
-//				}
-//				count++;
-//			} while (theWord != null);
-//
-//		} catch (IOException ioe) {
-//			System.err.println("停止词典库载入异常.");
-//			ioe.printStackTrace();
-//		}finally{
-//			try {
-//				if(is != null){
-//                    is.close();
-//                    is = null;
-//				}
-//			} catch (IOException e) {
-//				e.printStackTrace();
-//			}
-//		}
-//	}
-//
-//
-//	public void testDictSegmentSearch(){
-//        InputStream is = DictionaryTester.class.getResourceAsStream(Dictionary.PATH_DIC_QUANTIFIER);
-//        System.out.println(new Date() + " before load dictionary");
-//
-//        DictSegment _root_ = new DictSegment((char)0);
-//        List<String> allWords = new ArrayList<String>();
-//
-//        System.out.println(new Date() + " loading dictionary");
-//		try {
-//			String theWord = null;
-//			BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
-//			do {
-//				theWord = br.readLine();
-//				if (theWord != null) {
-//					allWords.add(theWord.trim());
-//					_root_.fillSegment(theWord.trim().toCharArray());
-//				}
-//			} while (theWord != null);
-//			System.out.println(new Date() + " after load dictionary");
-//
-//
-//		} catch (IOException ioe) {
-//			System.err.println("主词典库载入异常.");
-//			ioe.printStackTrace();
-//		}finally{
-//			try {
-//				if(is != null){
-//                    is.close();
-//                    is = null;
-//				}
-//			} catch (IOException e) {
-//				e.printStackTrace();
-//			}
-//		}
-//
-//        try {
-//			Thread.sleep(3000);
-//		} catch (InterruptedException e1) {
-//
-//			e1.printStackTrace();
-//		}
-//
-//		System.out.println(new Date() + " begin march");
-//		long begintime = System.currentTimeMillis();
-//		Hit hit = null;
-//		int umCount = 0;
-//		int mCount = 0;
-//		for(String word : allWords){
-//			hit = _root_.match(word.toCharArray());
-//			if(hit.isUnmatch()){
-//				System.out.println(word);
-//				umCount++;
-//			}else{
-//				mCount++;
-//				System.out.println(mCount + " : " + word);
-//			}
-//		}
-//		System.out.println(new Date() + " finish march , cost " + (System.currentTimeMillis() - begintime ) + " millseconds");
-//		System.out.println("Match words : " + mCount + " Unmatch words : " + umCount);
-//	}
-//
-//	public void testDictionarySearch(){
-//	     InputStream is = DictionaryTester.class.getResourceAsStream(Dictionary.PATH_DIC_MAIN);
-//	     List<String> allWords = new ArrayList<String>();
-//
-//	     try {
-//				String theWord = null;
-//				BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
-//				do {
-//					theWord = br.readLine();
-//					if (theWord != null) {
-//						allWords.add(theWord.trim());
-//					}
-//				} while (theWord != null);
-//
-//			} catch (IOException ioe) {
-//				ioe.printStackTrace();
-//
-//			}finally{
-//				try {
-//					if(is != null){
-//	                    is.close();
-//	                    is = null;
-//					}
-//				} catch (IOException e) {
-//					e.printStackTrace();
-//				}
-//			}
-//
-//			Dictionary.getInstance();
-//	        try {
-//				Thread.sleep(3000);
-//			} catch (InterruptedException e1) {
-//
-//				e1.printStackTrace();
-//			}
-//
-//			System.out.println(new Date() + " begin march");
-//			long begintime = System.currentTimeMillis();
-//			Hit hit = null;
-//			int umCount = 0;
-//			int mCount = 0;
-//			for(String word : allWords){
-//				hit = Dictionary.matchInMainDict(word.toCharArray(), 0, word.length());
-//				if(hit.isUnmatch()){
-//					System.out.println(word);
-//					umCount++;
-//				}else{
-//					mCount++;
-//				}
-//			}
-//			System.out.println(new Date() + " finish march , cost " + (System.currentTimeMillis() - begintime ) + " millseconds");
-//			System.out.println("Match words : " + mCount + " Unmatch words : " + umCount);
-//	}
-//
-//	/**
-//	 * 量词排序
-//	 */
-//	public void testSortCount(){
-//		InputStream is = DictionaryTester.class.getResourceAsStream(Dictionary.PATH_DIC_QUANTIFIER);
-//		TreeSet<String> allWords = new TreeSet<String>();
-//
-//		try {
-//			String theWord;
-//			BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
-//			do {
-//				theWord = br.readLine();
-//				if (theWord != null) {
-//					allWords.add(theWord.trim());
-//					System.out.println(theWord.trim());
-//				}
-//			} while (theWord != null);
-//
-//		} catch (IOException ioe) {
-//			ioe.printStackTrace();
-//
-//		}finally{
-//			try {
-//				if(is != null){
-//                    is.close();
-//                    is = null;
-//				}
-//			} catch (IOException e) {
-//				e.printStackTrace();
-//			}
-//		}
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//	}
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//
-//}
--- a/src/test/java/IKAnalyzerDemo.java
+++ b/src/test/java/IKAnalyzerDemo.java
-///**
-// *
-// */
-//
-//import java.io.IOException;
-//
-//import org.apache.lucene.analysis.Analyzer;
-//import org.apache.lucene.document.Document;
-//import org.apache.lucene.document.Field;
-//import org.apache.lucene.index.CorruptIndexException;
-//import org.apache.lucene.index.IndexWriter;
-//import org.apache.lucene.index.Term;
-//import org.apache.lucene.search.IndexSearcher;
-//import org.apache.lucene.search.Query;
-//import org.apache.lucene.search.ScoreDoc;
-//import org.apache.lucene.search.TopDocs;
-//import org.apache.lucene.store.Directory;
-//import org.apache.lucene.store.LockObtainFailedException;
-//import org.apache.lucene.store.RAMDirectory;
-//import org.wltea.analyzer.lucene.IKAnalyzer;
-//import org.wltea.analyzer.lucene.IKQueryParser;
-//import org.wltea.analyzer.lucene.IKSimilarity;
-//
-///**
-// * @author linly
-// *
-// */
-//public class IKAnalyzerDemo {
-//
-//	public static void main(String[] args){
-//
-//		String fieldName = "text";
-//
-//		String text = "IK Analyzer是一个结合词典分词和文法分词的中文分词开源工具包。它使用了全新的正向迭代最细粒度切分算法。";
-//
-//
-//		Analyzer analyzer = new IKAnalyzer();
-//
-//
-//		Directory directory = null;
-//		IndexWriter iwriter = null;
-//		IndexSearcher isearcher = null;
-//		try {
-//
-//			directory = new RAMDirectory();
-//			iwriter = new IndexWriter(directory, analyzer, true , IndexWriter.MaxFieldLength.LIMITED);
-//			Document doc = new Document();
-//			doc.add(new Field("ID", "1111", Field.Store.YES, Field.Index.NOT_ANALYZED));
-//			doc.add(new Field(fieldName, text, Field.Store.YES, Field.Index.ANALYZED));
-//			iwriter.addDocument(doc);
-//
-//			iwriter.close();
-//
-//
-//			isearcher = new IndexSearcher(directory);
-//
-//			isearcher.setSimilarity(new IKSimilarity());
-//
-//			String keyword = "中文分词工具包";
-//
-//
-//			Query query = IKQueryParser.parse(fieldName, keyword);
-//
-//
-//			TopDocs topDocs = isearcher.search(query , 5);
-//			System.out.println("命中：" + topDocs.totalHits);
-//
-//			ScoreDoc[] scoreDocs = topDocs.scoreDocs;
-//			for (int i = 0; i < topDocs.totalHits; i++){
-//				Document targetDoc = isearcher.doc(scoreDocs[i].doc);
-//				System.out.println("内容：" + targetDoc.toString());
-//			}
-//
-//		} catch (CorruptIndexException e) {
-//			e.printStackTrace();
-//		} catch (LockObtainFailedException e) {
-//			e.printStackTrace();
-//		} catch (IOException e) {
-//			e.printStackTrace();
-//		} finally{
-//			if(isearcher != null){
-//				try {
-//					isearcher.close();
-//				} catch (IOException e) {
-//					e.printStackTrace();
-//				}
-//			}
-//			if(directory != null){
-//				try {
-//					directory.close();
-//				} catch (IOException e) {
-//					e.printStackTrace();
-//				}
-//			}
-//		}
-//	}
-//}
\ No newline at end of file
--- a/src/test/java/IKTokenerTest.java
+++ b/src/test/java/IKTokenerTest.java
-/**
- *
- */
-
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
-import org.junit.Test;
-import org.wltea.analyzer.lucene.IKTokenizer;
-
-import java.io.IOException;
-import java.io.StringReader;
-
-
-/**
- * @author 林良益
- *
- */
-public class IKTokenerTest  {
-
-    @Test
-	public void testLucene3Tokenizer(){
-		String t = "IK分词器Lucene Analyzer接口实现类 民生银行";
-		IKTokenizer tokenizer = new IKTokenizer(new StringReader(t) , false);
-		try {
-			while(tokenizer.incrementToken()){
-				TermAttribute termAtt = tokenizer.getAttribute(TermAttribute.class);
-				System.out.println(termAtt);
-			}
-		} catch (IOException e) {
-
-			e.printStackTrace();
-		}
-
-
-	}
-
-
-
-}
--- a/src/test/java/SegmentorTester.java
+++ b/src/test/java/SegmentorTester.java
--- a/src/test/java/extended/ik_dict/IKAnalyzer.cfg.xml
+++ b/src/test/java/extended/ik_dict/IKAnalyzer.cfg.xml
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">  
-<properties>  
-	<comment>IK Analyzer 扩展配置</comment>
-	<!--用户可以在这里配置自己的扩展字典-->	 
-	<entry key="ext_dict">/mydict.dic;</entry> 
-	
-	 <!--用户可以在这里配置自己的扩展停止词字典-->
-	<entry key="ext_stopwords">/ext_stopword.dic</entry> 
-	
-</properties>
\ No newline at end of file
--- a/src/test/java/extended/ik_dict/ext_dict/mydict.dic
+++ b/src/test/java/extended/ik_dict/ext_dict/mydict.dic
--- a/src/test/java/extended/ik_dict/ext_stopwords/ext_stopword.dic
+++ b/src/test/java/extended/ik_dict/ext_stopwords/ext_stopword.dic
-也
-了
-仍
-从
-以
-使
-则
-却
-
-又
-及
-对
-就
-并
-很
-或
-把
-
-是
-的
-着
-给
-而
-被
-让
-在
-还
-比
-等
-当
-与
-于
-但
-
-更好的
-选择
-啊
-阿
-哎
-哎呀
-哎哟
-唉
-俺
-俺们
-按
-按照
-吧
-吧哒
-把
-罢了
-被
-本
-本着
-比
-比方
-比如
-鄙人
-彼
-彼此
-边
-别
-别的
-别说
-并
-并且
-不比
-不成
-不单
-不但
-不独
-不管
-不光
-不过
-不仅
-不拘
-不论
-不怕
-不然
-不如
-不特
-不惟
-不问
-不只
-朝
-朝着
-趁
-趁着
-乘
-冲
-除
-除此之外
-除非
-除了
-此
-此间
-此外
-从
-从而
-打
-待
-但
-但是
-当
-当着
-到
-得
-的
-的话
-等
-等等
-地
-第
-叮咚
-对
-对于
-多
-多少
-而
-而况
-而且
-而是
-而外
-而言
-而已
-尔后
-反过来
-反过来说
-反之
-非但
-非徒
-否则
-嘎
-嘎登
-该
-赶
-个
-各
-各个
-各位
-各种
-各自
-给
-根据
-跟
-故
-故此
-固然
-关于
-管
-归
-果然
-果真
-过
-哈
-哈哈
-呵
-和
-何
-何处
-何况
-何时
-嘿
-哼
-哼唷
-呼哧
-乎
-哗
-还是
-还有
-换句话说
-换言之
-或
-或是
-或者
-极了
-及
-及其
-及至
-即
-即便
-即或
-即令
-即若
-即使
-几
-几时
-己
-既
-既然
-既是
-继而
-加之
-假如
-假若
-假使
-鉴于
-将
-较
-较之
-叫
-接着
-结果
-借
-紧接着
-进而
-尽
-尽管
-经
-经过
-就
-就是
-就是说
-据
-具体地说
-具体说来
-开始
-开外
-靠
-咳
-可
-可见
-可是
-可以
-况且
-啦
-来
-来着
-离
-例如
-哩
-连
-连同
-两者
-了
-临
-另
-另外
-另一方面
-论
-嘛
-吗
-慢说
-漫说
-冒
-么
-每
-每当
-们
-莫若
-某
-某个
-某些
-拿
-哪
-哪边
-哪儿
-哪个
-哪里
-哪年
-哪怕
-哪天
-哪些
-哪样
-那
-那边
-那儿
-那个
-那会儿
-那里
-那么
-那么些
-那么样
-那时
-那些
-那样
-乃
-乃至
-呢
-能
-你
-你们
-您
-宁
-宁可
-宁肯
-宁愿
-哦
-呕
-啪达
-旁人
-呸
-凭
-凭借
-其
-其次
-其二
-其他
-其它
-其一
-其余
-其中
-起
-起见
-起见
-岂但
-恰恰相反
-前后
-前者
-且
-然而
-然后
-然则
-让
-人家
-任
-任何
-任凭
-如
-如此
-如果
-如何
-如其
-如若
-如上所述
-若
-若非
-若是
-啥
-上下
-尚且
-设若
-设使
-甚而
-甚么
-甚至
-省得
-时候
-什么
-什么样
-使得
-是
-是的
-首先
-谁
-谁知
-顺
-顺着
-似的
-虽
-虽然
-虽说
-虽则
-随
-随着
-所
-所以
-他
-他们
-他人
-它
-它们
-她
-她们
-倘
-倘或
-倘然
-倘若
-倘使
-腾
-替
-通过
-同
-同时
-哇
-万一
-往
-望
-为
-为何
-为了
-为什么
-为着
-喂
-嗡嗡
-我
-我们
-呜
-呜呼
-乌乎
-无论
-无宁
-毋宁
-嘻
-吓
-相对而言
-像
-向
-向着
-嘘
-呀
-焉
-沿
-沿着
-要
-要不
-要不然
-要不是
-要么
-要是
-也
-也罢
-也好
-一
-一般
-一旦
-一方面
-一来
-一切
-一样
-一则
-依
-依照
-矣
-以
-以便
-以及
-以免
-以至
-以至于
-以致
-抑或
-因
-因此
-因而
-因为
-哟
-用
-由
-由此可见
-由于
-有
-有的
-有关
-有些
-又
-于
-于是
-于是乎
-与
-与此同时
-与否
-与其
-越是
-云云
-哉
-再说
-再者
-在
-在下
-咱
-咱们
-则
-怎
-怎么
-怎么办
-怎么样
-怎样
-咋
-照
-照着
-者
-这
-这边
-这儿
-这个
-这会儿
-这就是说
-这里
-这么
-这么点儿
-这么些
-这么样
-这时
-这些
-这样
-正如
-吱
-之
-之类
-之所以
-之一
-只是
-只限
-只要
-只有
-至
-至于
-诸位
-着
-着呢
-自
-自从
-自个儿
-自各儿
-自己
-自家
-自身
-综上所述
-总的来看
-总的来说
-总的说来
-总而言之
-总之
-纵
-纵令
-纵然
-纵使
-遵照
-作为
-兮
-呃
-呗
-咚
-咦
-喏
-啐
-喔唷
-嗬
-嗯
-嗳
--- a/src/test/java/extended/ik_dict/main.dic
+++ b/src/test/java/extended/ik_dict/main.dic
--- a/src/test/java/extended/ik_dict/preposition.dic
+++ b/src/test/java/extended/ik_dict/preposition.dic
-不
-也
-了
-仍
-从
-以
-使
-则
-却
-又
-及
-对
-就
-并
-很
-或
-把
-是
-的
-着
-给
-而
-被
-让
-但
\ No newline at end of file
--- a/src/test/java/extended/ik_dict/quantifier.dic
+++ b/src/test/java/extended/ik_dict/quantifier.dic
-丈
-下
-世
-世纪
-两
-个
-中
-串
-亩
-人
-介
-付
-代
-件
-任
-份
-伏
-伙
-位
-位数
-例
-倍
-像素
-元
-克
-克拉
-公亩
-公克
-公分
-公升
-公尺
-公担
-公斤
-公里
-公顷
-具
-册
-出
-刀
-分
-分钟
-划
-列
-则
-刻
-剂
-剑
-副
-加仑
-勺
-包
-匙
-匹
-区
-千克
-千米
-升
-卷
-厅
-厘
-双
-发
-口
-句
-只
-台
-叶
-号
-名
-吨
-听
-员
-周
-周年
-品
-回
-团
-圆
-圈
-地
-场
-块
-坪
-堆
-声
-壶
-处
-夜
-大
-天
-头
-套
-女
-孔
-字
-宗
-室
-家
-寸
-对
-封
-尊
-小时
-尺
-尾
-局
-层
-届
-岁
-师
-帧
-幅
-幕
-幢
-平方
-平方公尺
-平方公里
-平方分米
-平方厘米
-平方码
-平方米
-平方英寸
-平方英尺
-平方英里
-平米
-年
-年代
-年级
-度
-座
-式
-引
-张
-成
-战
-截
-户
-房
-所
-扇
-手
-打
-批
-把
-折
-担
-拉
-拍
-招
-拨
-拳
-指
-掌
-排
-撮
-支
-文
-斗
-斤
-方
-族
-日
-时
-曲
-月
-月份
-期
-本
-朵
-村
-束
-条
-来
-杯
-枚
-枝
-枪
-架
-柄
-柜
-栋
-栏
-株
-样
-根
-格
-案
-桌
-档
-桩
-桶
-梯
-棵
-楼
-次
-款
-步
-段
-毛
-毫
-池
-洲
-派
-海里
-滴
-炮
-点
-点钟
-片
-版
-环
-班
-瓣
-瓶
-生
-男
-画
-界
-盆
-盎司
-盏
-盒
-盘
-相
-眼
-石
-码
-碗
-碟
-磅
-种
-科
-秒
-秒钟
-窝
-立方公尺
-立方分米
-立方厘米
-立方码
-立方米
-立方英寸
-立方英尺
-站
-章
-笔
-等
-筐
-筒
-箱
-篇
-篓
-篮
-簇
-米
-类
-粒
-级
-组
-维
-缕
-缸
-罐
-网
-群
-股
-脚
-船
-艇
-艘
-色
-节
-英亩
-英寸
-英尺
-英里
-行
-袋
-角
-言
-课
-起
-趟
-路
-车
-转
-轮
-辆
-辈
-连
-通
-遍
-部
-里
-重
-针
-钟
-钱
-锅
-门
-间
-队
-阶段
-隅
-集
-页
-顶
-顷
-项
-顿
-颗
-餐
-首
\ No newline at end of file
--- a/src/test/java/extended/ik_dict/stopword.dic
+++ b/src/test/java/extended/ik_dict/stopword.dic
-a
-an
-and
-are
-as
-at
-be
-but
-by
-for
-if
-in
-into
-is
-it
-no
-not
-of
-on
-or
-such
-that
-the
-their
-then
-there
-these
-they
-this
-to
-was
-will
-with
\ No newline at end of file
--- a/src/test/java/extended/ik_dict/suffix.dic
+++ b/src/test/java/extended/ik_dict/suffix.dic
-乡
-井
-亭
-党
-区
-厅
-县
-园
-塔
-家
-寺
-局
-巷
-市
-弄
-所
-斯基
-楼
-江
-河
-海
-湖
-省
-维奇
-署
-苑
-街
-觀
-观
-诺夫
-路
-部
-镇
-阁
-山
-子
-娃
\ No newline at end of file
--- a/src/test/java/extended/ik_dict/surname.dic
+++ b/src/test/java/extended/ik_dict/surname.dic
-丁
-万
-万俟
-上官
-东方
-乔
-于
-令狐
-仲孙
-任
-何
-余
-候
-傅
-公冶
-公孙
-公羊
-冯
-刘
-单
-单于
-卢
-史
-叶
-司徒
-司空
-司马
-吕
-吴
-周
-唐
-夏
-夏侯
-太叔
-姚
-姜
-孔
-孙
-孟
-宇文
-宋
-宗政
-尉迟
-尹
-崔
-常
-康
-廖
-张
-彭
-徐
-慕容
-戴
-文
-方
-易
-曹
-曾
-朱
-李
-杜
-杨
-林
-梁
-欧阳
-武
-段
-毛
-江
-汤
-沈
-淳于
-潘
-澹台
-濮阳
-熊
-王
-田
-申屠
-白
-皇甫
-石
-秦
-程
-罗
-肖
-胡
-苏
-范
-董
-蒋
-薛
-袁
-许
-诸葛
-谢
-谭
-贺
-贾
-赖
-赫连
-赵
-轩辕
-邓
-邱
-邵
-邹
-郑
-郝
-郭
-金
-钟
-钟离
-钱
-长孙
-闻人
-闾丘
-阎
-陆
-陈
-雷
-韩
-顾
-马
-高
-魏
-鲜于
-黄
-黎
-龙
-龚
\ No newline at end of file